PromptEnhancer_32B-FlashPack-QwenTrain

Sleeping

App Files Files Community

rahul7star commited on Oct 16

Commit

139a0fa

verified ·

1 Parent(s): e725ad5

Update app_cpu.py

Browse files

Files changed (1) hide show

app_cpu.py +62 -175

app_cpu.py CHANGED Viewed

@@ -1,184 +1,71 @@
-import os
-import time
-import logging
-import re
 import gradio as gr
-from huggingface_hub import snapshot_download
-# ============================================================
-# 1️⃣ Pre-download model during app startup
-# ============================================================
-DEFAULT_MODEL_REPO = os.environ.get("MODEL_OUTPUT_PATH", "rubricreward/mR3-Qwen3-14B-en-prompt-en-thinking")
-print(f"🔄 Checking and downloading model repo: {DEFAULT_MODEL_REPO}")
-local_model_dir = snapshot_download(repo_id=DEFAULT_MODEL_REPO)
-print(f"✅ Model cached locally at: {local_model_dir}")
-# ============================================================
-# 2️⃣ Utilities
-# ============================================================
-try:
-    from qwen_vl_utils import process_vision_info
-except Exception:
-    def process_vision_info(messages):
-        return None, None
-def replace_single_quotes(text):
-    """Replace single quotes inside words with double quotes for consistency."""
-    pattern = r"\B'([^']*)'\B"
-    replaced_text = re.sub(pattern, r'"\1"', text)
-    return replaced_text.replace("’", "”").replace("‘", "“")
-def _str_to_dtype(dtype_str):
-    """Normalize torch dtype string."""
-    return dtype_str if dtype_str in ("bfloat16", "float16", "float32") else "float32"
-# ============================================================
-# 3️⃣ Load model once (from local snapshot)
-# ============================================================
-import torch
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("PromptEnhancerCPU")
-dtype = torch.float32  # Default for CPU
-logger.info("🔧 Loading pre-downloaded model from local path...")
-model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    local_model_dir,
-    torch_dtype=dtype,
-    device_map={"": "cpu"},  # Force CPU only
-    attn_implementation="sdpa",
-)
-processor = AutoProcessor.from_pretrained(local_model_dir)
-logger.info("✅ Model loaded and ready on CPU.")
-# ============================================================
-# 4️⃣ Inference (uses already-loaded model)
-# ============================================================
-def cpu_predict(prompt_cot, sys_prompt, temperature, max_new_tokens, torch_dtype):
-    """Generate rewritten prompt using preloaded model on CPU."""
-    dtype = {
-        "bfloat16": torch.bfloat16,
-        "float16": torch.float16,
-        "float32": torch.float32,
-    }.get(torch_dtype, torch.float32)
-    device = "cpu"
-    org_prompt_cot = prompt_cot
-    user_prompt_format = sys_prompt + "\n" + org_prompt_cot
-    messages = [{"role": "user", "content": [{"type": "text", "text": user_prompt_format}]}]
-    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    image_inputs, video_inputs = process_vision_info(messages)
-    inputs = processor(
-        text=[text],
-        images=image_inputs,
-        videos=video_inputs,
-        padding=True,
-        return_tensors="pt",
-    ).to(device)
-    logger.info("🧠 Running generation (CPU)...")
-    generated_ids = model.generate(
-        **inputs,
-        max_new_tokens=int(max_new_tokens),
-        temperature=float(temperature),
-        do_sample=False,
-        top_k=5,
-        top_p=0.9,
-    )
-    generated_ids_trimmed = [
-        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
     ]
-    output_text = processor.batch_decode(
-        generated_ids_trimmed,
-        skip_special_tokens=True,
-        clean_up_tokenization_spaces=False,
     )
-    output_res = output_text[0]
-    try:
-        # Extract part after "think>" if present
-        assert output_res.count("think>") == 2
-        new_prompt = output_res.split("think>")[-1].lstrip("\n")
-        new_prompt = replace_single_quotes(new_prompt)
-    except Exception:
-        new_prompt = org_prompt_cot
-    return new_prompt, ""
-# ============================================================
-# 5️⃣ Gradio Logic
-# ============================================================
-def run_single(prompt, sys_prompt, temperature, max_new_tokens, torch_dtype, state):
-    """Handle one user query from Gradio."""
-    if not prompt.strip():
-        return "", "Please enter a prompt first.", state
-    t0 = time.time()
-    try:
-        new_prompt, err = cpu_predict(prompt, sys_prompt, temperature, max_new_tokens, torch_dtype)
-        dt = time.time() - t0
-        msg = f"Time taken: {dt:.2f}s"
-        if err:
-            msg = f"{err} ({msg})"
-        return new_prompt, msg, state
-    except Exception as e:
-        return "", f"Error: {e}", state
-# ============================================================
-# 6️⃣ Gradio UI
-# ============================================================
-example_prompts = [
-    "Third-person view: a race car speeding through a city track, with a mini-map in the top-left corner and a speedometer in the bottom-right.",
-    "Anime-style portrait of a girl with short purple hair and soft lighting.",
-    "Pointillism painting: two fishermen carrying crates by the seaside, with boats docked nearby.",
-    "A Van Gogh-inspired wheat field tangled with swirling blue nebulae and fiery sunflowers.",
-    "Create a painting depicting a 30-year-old businesswoman on a plane trip.",
-]
-with gr.Blocks(title="Prompt Enhancer (CPU Preload)") as demo:
-    gr.Markdown("## 🧩 Prompt Enhancer (CPU Mode — Model Preloaded via `snapshot_download`)")
     with gr.Row():
-        sys_prompt = gr.Textbox(
-            label="System Prompt",
-            value="Please think step-by-step and rewrite the user’s prompt in a more refined, creative, and detailed way:",
-            lines=3
-        )
-        temperature = gr.Slider(0, 1, value=0.1, step=0.05, label="Temperature")
-        max_new_tokens = gr.Slider(16, 4096, value=2048, step=16, label="Max New Tokens")
-        torch_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Torch Dtype")
-    state = gr.State(value=None)
-    with gr.Tab("Inference"):
-        with gr.Row():
-            with gr.Column(scale=2):
-                prompt = gr.Textbox(label="Input Prompt", lines=6, placeholder="Paste the prompt to rewrite here...")
-                run_btn = gr.Button("Generate Rewrite", variant="primary")
-                gr.Examples(examples=example_prompts, inputs=prompt)
-            with gr.Column(scale=3):
-                out_text = gr.Textbox(label="Rewritten Prompt", lines=10)
-                out_info = gr.Markdown("✅ Model loaded on CPU (from `snapshot_download` cache).")
-        run_btn.click(
-            run_single,
-            inputs=[prompt, sys_prompt, temperature, max_new_tokens, torch_dtype, state],
-            outputs=[out_text, out_info, state]
-        )
 if __name__ == "__main__":
-    demo.launch(show_error=True, share=True)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# =========================================================
+# Load model and tokenizer
+# =========================================================
+MODEL_ID = "gokaygokay/prompt-enhancer-gemma-3-270m-it"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+# =========================================================
+# Define enhancer function
+# =========================================================
+def enhance_prompt(prompt: str):
+    if not prompt.strip():
+        return "⚠️ Please enter a prompt to enhance."
+    messages = [
+        {"role": "system", "content": "Enhance and expand the following prompt with more details and context:"},
+        {"role": "user", "content": prompt.strip()},
     ]
+    chat_input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    output = pipe(chat_input, max_new_tokens=256, do_sample=True, temperature=0.8)
+    result = output[0]["generated_text"]
+    return result.strip()
+# =========================================================
+# Gradio UI
+# =========================================================
+with gr.Blocks(theme=gr.themes.Soft(), title="Prompt Enhancer ✨") as demo:
+    gr.Markdown(
+        """
+        # ✨ Prompt Enhancer — Gemma 3 270M IT
+        Enhance and expand your prompts with creative detail and context using a small, efficient language model.
+        """
     )
     with gr.Row():
+        with gr.Column(scale=1):
+            input_text = gr.Textbox(
+                label="Enter your prompt",
+                placeholder="e.g. a cat sitting on a chair",
+                lines=4,
+            )
+            enhance_button = gr.Button("🚀 Enhance Prompt", variant="primary")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="Enhanced Prompt",
+                placeholder="Your enhanced prompt will appear here...",
+                lines=8,
+            )
+    enhance_button.click(enhance_prompt, inputs=input_text, outputs=output_text)
+    gr.Markdown(
+        """
+        ---
+        🧠 **Tip:** Try short creative prompts like
+        - “a futuristic city at sunset”
+        - “a woman reading under a tree”
+        - “a magical forest with glowing mushrooms”
+        """
+    )
+# =========================================================
+# Launch the app
+# =========================================================
 if __name__ == "__main__":
+    demo.launch()