FLUX.1-Kontext-Dev

Runtime error

App Files Files Community

Nymbo commited on Aug 23

Commit

5a5ca47

verified ·

1 Parent(s): f17b0fe

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -237

app.py CHANGED Viewed

@@ -1,8 +1,3 @@
-# File: app.py
-# Purpose: Gradio UI + MCP server for FLUX.1 Kontext-dev with two MCP tools:
-#          1) edit_image  -> edits an uploaded image based on a prompt
-#          2) text_to_image -> generates a brand-new image from a prompt (no input image)
 import gradio as gr
 import numpy as np
 import spaces
@@ -13,65 +8,60 @@ from PIL import Image
 from diffusers import FluxKontextPipeline
 from diffusers.utils import load_image
-# -----------------------------
-# Constants & model bootstrap
-# -----------------------------
-# MAX_SEED is the highest 32-bit signed int; many generators expect this bound
-MAX_SEED = np.iinfo(np.int32).max  # <-- (layman's) the biggest safe random seed we'll allow
-# Load the FLUX.1 Kontext-dev pipeline once and keep it on GPU for speed
-# (layman's) this downloads the model and prepares it to run on your graphics card
-pipe = FluxKontextPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-Kontext-dev",
-    torch_dtype=torch.bfloat16
-).to("cuda")
-# ---------------------------------------------------------
-# Core editing function (works WITH or WITHOUT input image)
-# ---------------------------------------------------------
 @spaces.GPU
-def infer(
-    input_image: Image.Image | None,
-    prompt: str,
-    seed: int = 42,
-    randomize_seed: bool = False,
-    guidance_scale: float = 2.5,
-    steps: int = 20,
-    progress: gr.Progress = gr.Progress(track_tqdm=True),
-) -> tuple[Image.Image, int, gr.Button]:
     """
-    Perform image editing or generation using the FLUX.1 Kontext pipeline.
-    If an input image is provided, the model performs contextual editing.
-    If no image is provided, the model generates a new image from the prompt.
     Args:
-        input_image: Optional image to edit. If None, we do text-to-image instead.
-        prompt: What you want to change/create (e.g., "Remove glasses", "Neon cyberpunk cityscape").
-        seed: Random seed for reproducibility (0..2^31-1).
-        randomize_seed: If True, ignore `seed` and pick a random one.
-        guidance_scale: How strongly to follow the prompt (higher = more literal, but can reduce quality).
-        steps: Number of diffusion steps (1..30). More steps = slower but usually better.
-        progress: (Gradio) Used to stream progress updates.
     Returns:
-        (image, seed, reuse_button_visibility): The resulting image, the actual seed used, and a visible "reuse" button.
     """
-    # (layman's) pick a new seed if user asked for randomness
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    # (layman's) if you gave us an image, we edit it; if not, we create from scratch
     if input_image:
         input_image = input_image.convert("RGB")
         image = pipe(
-            image=input_image,
             prompt=prompt,
             guidance_scale=guidance_scale,
-            width=input_image.size[0],
-            height=input_image.size[1],
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
@@ -82,202 +72,76 @@ def infer(
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
-    # (layman's) return the finished picture, the seed, and show a "reuse" button
-    return image, seed, gr.Button(visible=True)
-# ------------------------------------------------------------
-# NEW: Dedicated text-to-image function (separate MCP tool)
-# ------------------------------------------------------------
-@spaces.GPU  # (layman's) make sure we run on the GPU so it's fast
-def text_to_image(
-    prompt: str,
-    seed: int = 42,
-    randomize_seed: bool = False,
-    guidance_scale: float = 2.5,
-    steps: int = 20,
-    width: int = 1024,
-    height: int = 1024,
-    progress: gr.Progress = gr.Progress(track_tqdm=True),
-) -> tuple[Image.Image, int]:
-    """
-    Generate a brand-new image from text only (no input image required).
-    This calls FLUX.1 Kontext-dev in "text-to-image" mode.
-    Great for creating images from scratch with a clean, separate MCP tool.
-    Args:
-        prompt: The scene or edit you want to create (e.g., "cozy cabin at dusk, cinematic lighting").
-        seed: Random seed for reproducibility (0..2^31-1).
-        randomize_seed: If True, ignore `seed` and pick a random one.
-        guidance_scale: How strongly to follow the prompt (higher = more literal, can reduce quality).
-        steps: Number of diffusion steps (1..30). 20 is a good speed/quality balance.
-        width: Output image width in pixels.
-        height: Output image height in pixels.
-        progress: (Gradio) Used to stream progress updates.
-    Returns:
-        (image, seed): The generated image and the seed actually used.
-    """
-    # (layman's) pick a new seed if requested
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    # (layman's) run the model in pure text-to-image mode
-    image = pipe(
-        prompt=prompt,
-        guidance_scale=guidance_scale,
-        width=width,
-        height=height,
-        num_inference_steps=steps,
-        generator=torch.Generator().manual_seed(seed),
-    ).images[0]
-    return image, seed
-# -------------------------------------
-# Lightweight helper for the Examples
-# -------------------------------------
-@spaces.GPU(duration=25)
-def infer_example(input_image: Image.Image | None, prompt: str) -> tuple[Image.Image, int]:
-    # (layman's) small wrapper used by the clickable examples
-    image, seed, _ = infer(input_image, prompt)
-    return image, seed
-# -------------
-# Minimal CSS
-# -------------
-css = """
 #col-container {
     margin: 0 auto;
     max-width: 960px;
 }
 """
-# --------------------------
-# UI (Gradio Blocks layout)
-# --------------------------
 with gr.Blocks(css=css) as demo:
-    # (layman's) top caption & links
-    gr.Markdown(
-        """# FLUX.1 Kontext [dev]
-Image editing and manipulation model guidance-distilled from FLUX.1 Kontext [pro],
-[[blog]](https://bfl.ai/announcements/flux-1-kontext-dev) [[model]](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev)
-        """
-    )
-    with gr.Row():
-        # -------------------------------
-        # Left column: inputs & settings
-        # -------------------------------
-        with gr.Column():
-            # (layman's) you can upload an image to edit — or leave it blank to generate from text
-            input_image = gr.Image(label="Upload the image for editing (optional)", type="pil")
-            with gr.Row():
-                prompt = gr.Text(
-                    label="Prompt",
-                    show_label=False,
-                    max_lines=1,
-                    placeholder="Describe what to create/edit (e.g., 'Neon skyline at night')",
-                    container=False,
-                )
-                run_button = gr.Button("Run", scale=0)
-            # (layman's) extra knobs if you want finer control
-            with gr.Accordion("Advanced Settings", open=False):
-                seed = gr.Slider(
-                    label="Seed",
-                    minimum=0,
-                    maximum=MAX_SEED,
-                    step=1,
-                    value=42,
-                )
-                randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
-                guidance_scale = gr.Slider(
-                    label="Guidance Scale",
-                    minimum=1.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=2.5,
-                )
-                steps = gr.Slider(
-                    label="Steps",
-                    minimum=1,
-                    maximum=30,
-                    value=20,
-                    step=1,
-                )
-        # -------------------------
-        # Right column: the output
-        # -------------------------
-        with gr.Column():
-            result = gr.Image(label="Result", show_label=False, interactive=False)
-            reuse_button = gr.Button("Reuse this image", visible=False)
-    # (layman's) a few quick examples for testing
-    examples = gr.Examples(
-        examples=[
-            ["flowers.png", "turn the flowers into sunflowers"],
-            ["monster.png", "make this monster ride a skateboard on the beach"],
-            ["cat.png", "make this cat happy"],
-        ],
-        inputs=[input_image, prompt],
-        outputs=[result, seed],
-        fn=infer_example,
-        cache_examples="lazy",
-    )
-    # (layman's) wire the "Run" button and Enter key to call our main function
     gr.on(
         triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[input_image, prompt, seed, randomize_seed, guidance_scale, steps],
-        outputs=[result, seed, reuse_button],
-        api_name="edit_image",  # <-- MCP tool name for UI-based edit/generate
-        api_description="Edit an uploaded image with a prompt (or generate from text if no image is provided) using FLUX.1 Kontext-dev.",
     )
-    # (Optional) If you want a 1-click "reuse image" flow in the UI later:
-    # reuse_button.click(fn=lambda image: image, inputs=[result], outputs=[input_image])
-# ------------------------------------------------------------------
-# NEW: Create a dedicated Interface for text-to-image MCP tool
-# This ensures better compatibility with MCP clients
-# ------------------------------------------------------------------
-# Create a separate interface for the text-to-image tool
-text_to_image_interface = gr.Interface(
-    fn=text_to_image,
-    inputs=[
-        gr.Text(label="Prompt", placeholder="Describe the image you want to generate", value=""),
-        gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=42, step=1),
-        gr.Checkbox(label="Randomize seed", value=True),
-        gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, value=2.5, step=0.1),
-        gr.Slider(label="Steps", minimum=1, maximum=30, value=20, step=1),
-        gr.Slider(label="Width", minimum=256, maximum=2048, value=1024, step=64),
-        gr.Slider(label="Height", minimum=256, maximum=2048, value=1024, step=64),
-    ],
-    outputs=[
-        gr.Image(label="Generated Image"),
-        gr.Number(label="Seed Used")
-    ],
-    title="FLUX.1 Text-to-Image Generator",
-    description="Generate high-quality images from text descriptions using FLUX.1 Kontext-dev",
-    api_name="text_to_image"
-)
-# Mount both interfaces using gr.TabbedInterface for better organization
-combined_demo = gr.TabbedInterface(
-    [demo, text_to_image_interface],
-    ["Image Editor", "Text-to-Image Generator"],
-    title="FLUX.1 Kontext Tools"
-)
-# (layman's) start the app with MCP enabled so tools show up to agents (e.g., Claude/Cursor)
-combined_demo.launch(mcp_server=True)

 import gradio as gr
 import numpy as np
 import spaces
 from diffusers import FluxKontextPipeline
 from diffusers.utils import load_image
+MAX_SEED = np.iinfo(np.int32).max
+pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
 @spaces.GPU
+def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=28, progress=gr.Progress(track_tqdm=True)):
     """
+    Perform image editing using the FLUX.1 Kontext pipeline.
+    This function takes an input image and a text prompt to generate a modified version
+    of the image based on the provided instructions. It uses the FLUX.1 Kontext model
+    for contextual image editing tasks.
     Args:
+        input_image (PIL.Image.Image): The input image to be edited. Will be converted
+            to RGB format if not already in that format.
+        prompt (str): Text description of the desired edit to apply to the image.
+            Examples: "Remove glasses", "Add a hat", "Change background to beach".
+        seed (int, optional): Random seed for reproducible generation. Defaults to 42.
+            Must be between 0 and MAX_SEED (2^31 - 1).
+        randomize_seed (bool, optional): If True, generates a random seed instead of
+            using the provided seed value. Defaults to False.
+        guidance_scale (float, optional): Controls how closely the model follows the
+            prompt. Higher values mean stronger adherence to the prompt but may reduce
+            image quality. Range: 1.0-10.0. Defaults to 2.5.
+        steps (int, optional): Controls how many steps to run the diffusion model for.
+            Range: 1-30. Defaults to 28.
+        progress (gr.Progress, optional): Gradio progress tracker for monitoring
+            generation progress. Defaults to gr.Progress(track_tqdm=True).
     Returns:
+        tuple: A 3-tuple containing:
+            - PIL.Image.Image: The generated/edited image
+            - int: The seed value used for generation (useful when randomize_seed=True)
+            - gr.update: Gradio update object to make the reuse button visible
+    Example:
+        >>> edited_image, used_seed, button_update = infer(
+        ...     input_image=my_image,
+        ...     prompt="Add sunglasses",
+        ...     seed=123,
+        ...     randomize_seed=False,
+        ...     guidance_scale=2.5
+        ... )
     """
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     if input_image:
         input_image = input_image.convert("RGB")
         image = pipe(
+            image=input_image,
             prompt=prompt,
             guidance_scale=guidance_scale,
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
             num_inference_steps=steps,
             generator=torch.Generator().manual_seed(seed),
         ).images[0]
+    return image, seed, gr.update(visible=True)
+css="""
 #col-container {
     margin: 0 auto;
     max-width: 960px;
 }
 """
 with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown(f"""# FLUX.1 Kontext [dev]
+Image editing and manipulation model.
+        """)
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(label="Upload the image for editing", type="pil")
+                with gr.Row():
+                    prompt = gr.Text(
+                        label="Prompt",
+                        show_label=False,
+                        max_lines=1,
+                        placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
+                        container=False,
+                    )
+                    run_button = gr.Button("Run", scale=0)
+                with gr.Accordion("Advanced Settings", open=False):
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=0,
+                        maximum=MAX_SEED,
+                        step=1,
+                        value=0,
+                    )
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                    guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        minimum=1,
+                        maximum=10,
+                        step=0.1,
+                        value=2.5,
+                    )
+                    steps = gr.Slider(
+                        label="Steps",
+                        minimum=1,
+                        maximum=30,
+                        value=28,
+                        step=1
+                    )
+            with gr.Column():
+                result = gr.Image(label="Result", show_label=False, interactive=False)
+                reuse_button = gr.Button("Reuse this image", visible=False)
     gr.on(
         triggers=[run_button.click, prompt.submit],
+        fn = infer,
+        inputs = [input_image, prompt, seed, randomize_seed, guidance_scale, steps],
+        outputs = [result, seed, reuse_button]
+    )
+    reuse_button.click(
+        fn = lambda image: image,
+        inputs = [result],
+        outputs = [input_image]
     )
+demo.launch(mcp_server=True)