Spaces:

VirtualOasis
/

CineGen

Running on Zero

App Files Files Community

VirtualOasis commited on 13 days ago

Commit

70b0d1a

1 Parent(s): 934e5c4

update

Browse files

Files changed (8) hide show

app.py +160 -622
cinegen/__init__.py +13 -0
cinegen/character_engine.py +62 -0
cinegen/models.py +61 -0
cinegen/placeholders.py +163 -0
cinegen/story_engine.py +143 -0
cinegen/video_engine.py +119 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,653 +1,191 @@
-import json
 import os
-import tempfile
-import textwrap
-import time
-from dataclasses import dataclass, field, asdict
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
 import gradio as gr
-import requests
-try:
-    from google import genai
-except ImportError:  # pragma: no cover - dependency is optional at import time
-    genai = None
-# -----------------------------
-# Domain data representations
-# -----------------------------
-@dataclass
-class CharacterProfile:
-    character_id: str
-    name: str
-    description: str
-    visual_tags: List[str] = field(default_factory=list)
-    image_path: Optional[str] = None
-@dataclass
-class ScenePlan:
-    scene_id: str
-    title: str
-    summary: str
-    visual_prompt: str
-    characters: List[str] = field(default_factory=list)
-@dataclass
-class StoryboardPlan:
-    title: str
-    logline: str
-    style: str
-    runtime_hint: str
-    tone: str
-    characters: List[CharacterProfile]
-    scenes: List[ScenePlan]
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            "title": self.title,
-            "logline": self.logline,
-            "style": self.style,
-            "runtime_hint": self.runtime_hint,
-            "tone": self.tone,
-            "characters": [asdict(c) for c in self.characters],
-            "scenes": [asdict(s) for s in self.scenes],
-        }
-# -----------------------------
-# Helper utilities
-# -----------------------------
-def resolve_token(user_supplied: str, env_key: str) -> Optional[str]:
-    candidate = (user_supplied or "").strip()
-    if candidate:
-        return candidate
-    env_candidate = (os.getenv(env_key) or "").strip()
-    return env_candidate or None
-def extract_json_block(text: str) -> str:
-    """Return the first JSON object found inside the text."""
-    stack = []
-    start_index = None
-    for index, char in enumerate(text):
-        if char == "{":
-            if not stack:
-                start_index = index
-            stack.append(char)
-        elif char == "}" and stack:
-            stack.pop()
-            if not stack and start_index is not None:
-                return text[start_index : index + 1]
-    return text
-def format_character_markdown(characters: List[CharacterProfile]) -> str:
-    lines = []
-    for character in characters:
-        tags = ", ".join(character.visual_tags) if character.visual_tags else "n/a"
-        lines.append(f"- **{character.name}** ({character.character_id}): {character.description}  \n  Visual tags: {tags}")
-    return "\n".join(lines) if lines else "No characters were generated yet."
-def ensure_module_available(module_ref, friendly_name: str) -> None:
-    if module_ref is None:
-        raise gr.Error(
-            f"{friendly_name} is not installed. Install it via `pip install google-genai` and try again."
-        )
-# -----------------------------
-# Gemini services
-# -----------------------------
-class GeminiService:
-    def __init__(
-        self,
-        api_key: str,
-        story_model: str = "gemini-2.5-flash",
-        image_model: str = "gemini-2.5-flash-image",
-    ) -> None:
-        ensure_module_available(genai, "google-genai")
-        if not api_key:
-            raise gr.Error("Google API key is required.")
-        self.client = genai.Client(api_key=api_key)
-        self.story_model = story_model
-        self.image_model = image_model
-    def generate_storyboard(
-        self,
-        movie_idea: str,
-        visual_style: str,
-        scene_count: int,
-        runtime_hint: str,
-        tone: str,
-    ) -> StoryboardPlan:
-        prompt = textwrap.dedent(
-            f"""
-            You are CineGen, an AI creative director. Given the following idea, craft a production-ready storyboard.
-            Idea: {movie_idea}
-            Target visual style: {visual_style}
-            Desired runtime: {runtime_hint}
-            Tone keywords: {tone}
-            Scene count: exactly {scene_count}
-            Respond with valid JSON using this schema:
-            {{
-              "title": "...",
-              "logline": "...",
-              "style": "...",
-              "runtime_hint": "...",
-              "tone": "...",
-              "characters": [
-                {{"id": "char_1", "name": "...", "description": "...", "visual_tags": ["tag1", "tag2"]}}
-              ],
-              "scenes": [
-                {{
-                  "id": "scene_1",
-                  "title": "...",
-                  "summary": "...",
-                  "visual_prompt": "...",
-                  "characters": ["char_1", "char_2"]
-                }}
-              ]
-            }}
-            Ensure each scene references character IDs from the characters array and highlight cinematic camera or lighting cues inside "visual_prompt".
-            """
-        ).strip()
-        response = self.client.models.generate_content(
-            model=self.story_model,
-            contents=prompt,
-        )
-        raw_text = getattr(response, "text", None) or "".join(
-            [getattr(part, "text", "") for part in getattr(response, "parts", [])]
-        )
-        if not raw_text:
-            raise gr.Error("Gemini did not return any content for the storyboard.")
-        serialized = extract_json_block(raw_text)
-        payload = json.loads(serialized)
-        characters = [
-            CharacterProfile(
-                character_id=entry.get("id", f"char_{idx+1}"),
-                name=entry.get("name", f"Character {idx+1}"),
-                description=entry.get("description", ""),
-                visual_tags=entry.get("visual_tags") or [],
-            )
-            for idx, entry in enumerate(payload.get("characters", []))
-        ]
-        scenes = [
-            ScenePlan(
-                scene_id=scene.get("id", f"scene_{idx+1}"),
-                title=scene.get("title", f"Scene {idx+1}"),
-                summary=scene.get("summary", ""),
-                visual_prompt=scene.get("visual_prompt", ""),
-                characters=scene.get("characters") or [],
-            )
-            for idx, scene in enumerate(payload.get("scenes", []))
-        ]
-        if len(scenes) != scene_count:
-            # Keep UX predictable even if the model under-delivers on scene count.
-            scenes = scenes[:scene_count]
-        return StoryboardPlan(
-            title=payload.get("title", "Untitled"),
-            logline=payload.get("logline", ""),
-            style=payload.get("style", visual_style),
-            runtime_hint=payload.get("runtime_hint", runtime_hint),
-            tone=payload.get("tone", tone),
-            characters=characters,
-            scenes=scenes,
-        )
-    def generate_character_images(
-        self,
-        characters: List[CharacterProfile],
-        visual_style: str,
-        max_characters: int = 4,
-    ) -> List[CharacterProfile]:
-        rendered: List[CharacterProfile] = []
-        for character in characters[:max_characters]:
-            prompt = textwrap.dedent(
-                f"""
-                Create a front-facing character reference portrait for use in a video production pipeline.
-                Character: {character.name}
-                Description: {character.description}
-                Visual tags: {", ".join(character.visual_tags) if character.visual_tags else "n/a"}
-                Style: {visual_style}
-                Output a single cohesive concept art image.
-                """
-            ).strip()
-            response = self.client.models.generate_content(
-                model=self.image_model,
-                contents=prompt,
-            )
-            image_path = None
-            for part in getattr(response, "parts", []):
-                if getattr(part, "inline_data", None):
-                    image = part.as_image()
-                    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
-                        image.save(tmp.name)
-                        image_path = tmp.name
-                        break
-            enriched = CharacterProfile(
-                character_id=character.character_id,
-                name=character.name,
-                description=character.description,
-                visual_tags=character.visual_tags,
-                image_path=image_path,
-            )
-            rendered.append(enriched)
-        return rendered
-# -----------------------------
-# Hugging Face video service
-# -----------------------------
-class HuggingFaceVideoService:
-    MODEL_FALLBACK = [
-        "Wan-AI/Wan2.1-T2V-14B",
-        "Lightricks/LTX-Video-0.9.7-distilled",
-        "tencent/HunyuanVideo-1.5",
-        "THUDM/CogVideoX-5b",
-    ]
-    def __init__(self, token: str):
-        if not token:
-            raise gr.Error("Hugging Face token is required for video generation.")
-        self.token = token
-        self.session = requests.Session()
-    def generate(
-        self,
-        prompt: str,
-        preferred_model: Optional[str],
-        negative_prompt: str,
-        duration_seconds: float,
-        fps: int,
-        seed: Optional[int],
-    ) -> Tuple[str, str]:
-        ordered_models = self._order_models(preferred_model)
-        last_error = ""
-        for model in ordered_models:
-            try:
-                video_path = self._invoke_model(
-                    model=model,
-                    prompt=prompt,
-                    negative_prompt=negative_prompt,
-                    duration_seconds=duration_seconds,
-                    fps=fps,
-                    seed=seed,
-                )
-                return model, video_path
-            except Exception as exc:  # pragma: no cover - defensive fallback
-                last_error = str(exc)
-                time.sleep(1.5)
-        raise gr.Error(f"All video backends failed. Last error: {last_error}")
-    def _order_models(self, preferred_model: Optional[str]) -> List[str]:
-        models = list(self.MODEL_FALLBACK)
-        if preferred_model and preferred_model in models:
-            models.remove(preferred_model)
-            models.insert(0, preferred_model)
-        elif preferred_model:
-            models.insert(0, preferred_model)
-        return models
-    def _invoke_model(
-        self,
-        model: str,
-        prompt: str,
-        negative_prompt: str,
-        duration_seconds: float,
-        fps: int,
-        seed: Optional[int],
-    ) -> str:
-        url = f"https://api-inference.huggingface.co/models/{model}"
-        headers = {
-            "Authorization": f"Bearer {self.token}",
-            "Accept": "video/mp4",
-        }
-        payload = {
-            "inputs": prompt,
-            "parameters": {
-                "negative_prompt": negative_prompt,
-                "num_frames": int(duration_seconds * fps),
-                "fps": fps,
-                "seed": seed,
-                "guidance_scale": 7.5,
-            },
-            "options": {"use_cache": True, "wait_for_model": True},
-        }
-        response = self.session.post(
-            url,
-            headers=headers,
-            json=payload,
-            timeout=600,
-        )
-        if response.status_code == 200:
-            return self._write_video(response.content)
-        if response.status_code in {503, 504, 524}:
-            raise RuntimeError(f"{model} is warming up or busy (status {response.status_code}).")
-        try:
-            message = response.json()
-        except Exception:
-            message = response.text
-        raise RuntimeError(f"{model} failed: {message}")
-    @staticmethod
-    def _write_video(content: bytes) -> str:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as handle:
-            handle.write(content)
-            return handle.name
-# -----------------------------
-# CineGen pipeline orchestration
-# -----------------------------
-def build_scene_prompt(scene: ScenePlan, storyboard: StoryboardPlan) -> str:
-    character_blurbs = []
-    pool = {c.character_id: c for c in storyboard.characters}
-    for actor_id in scene.characters:
-        profile = pool.get(actor_id)
-        if profile:
-            tags = ", ".join(profile.visual_tags) if profile.visual_tags else ""
-            character_blurbs.append(f"{profile.name}: {profile.description} {tags}".strip())
-    character_block = "\n".join(character_blurbs) if character_blurbs else "Original characters only."
-    return textwrap.dedent(
-        f"""
-        Title: {storyboard.title}
-        Logline: {storyboard.logline}
-        Scene: {scene.title} ({scene.scene_id})
-        Narrative summary: {scene.summary}
-        Visual prompt: {scene.visual_prompt}
-        Visual style: {storyboard.style}
-        Tone: {storyboard.tone}
-        Characters:\n{character_block}
-        """
-    ).strip()
-# -----------------------------
-# Gradio callbacks
-# -----------------------------
-def storyboard_callback(
-    movie_idea: str,
-    visual_style: str,
-    runtime_hint: str,
-    tone: str,
     scene_count: int,
-    google_api_key_input: str,
-):
-    api_key = resolve_token(google_api_key_input, "GOOGLE_API_KEY")
-    if not movie_idea:
-        raise gr.Error("Please describe your movie idea first.")
-    storyboard_service = GeminiService(api_key=api_key)
-    storyboard = storyboard_service.generate_storyboard(
-        movie_idea=movie_idea,
-        visual_style=visual_style,
         scene_count=scene_count,
-        runtime_hint=runtime_hint,
-        tone=tone,
     )
-    characters_with_images = storyboard_service.generate_character_images(storyboard.characters, visual_style)
-    storyboard_dict = storyboard.to_dict()
-    character_markdown = format_character_markdown(characters_with_images)
-    gallery_entries = [
-        (profile.image_path, f"{profile.name} ({profile.character_id})")
-        for profile in characters_with_images
-        if profile.image_path
-    ]
-    scene_choices = [f"{scene.scene_id}: {scene.title}" for scene in storyboard.scenes]
-    status_message = f"Storyboard ready: {storyboard.title} with {len(storyboard.scenes)} scenes."
     return (
-        status_message,
-        storyboard_dict,
-        character_markdown,
-        gallery_entries,
-        storyboard_dict,
-        [asdict(profile) for profile in characters_with_images],
-        gr.Dropdown.update(choices=scene_choices, value=scene_choices[0] if scene_choices else None),
     )
-def generate_video_callback(
-    scene_choice: str,
-    storyboard_state: Dict[str, Any],
-    hf_token_input: str,
-    preferred_model: str,
-    negative_prompt: str,
-    duration_seconds: float,
-    fps: int,
-    seed: int,
 ):
-    if not storyboard_state:
-        raise gr.Error("Generate a storyboard first.")
-    hf_token = resolve_token(hf_token_input, "HF_TOKEN")
-    if not hf_token:
-        raise gr.Error("Provide a Hugging Face token to render video.")
-    scenes = storyboard_state.get("scenes", [])
-    characters = storyboard_state.get("characters", [])
-    if not scenes:
-        raise gr.Error("Storyboard has no scenes to render.")
-    scene_id = (scene_choice or "").split(":")[0]
-    scene_payload = next((scene for scene in scenes if scene["scene_id"] == scene_id or scene["scene_id"] == scene_choice), None)
-    if not scene_payload:
-        scene_payload = scenes[0]
-    storyboard = StoryboardPlan(
-        title=storyboard_state.get("title", ""),
-        logline=storyboard_state.get("logline", ""),
-        style=storyboard_state.get("style", ""),
-        runtime_hint=storyboard_state.get("runtime_hint", ""),
-        tone=storyboard_state.get("tone", ""),
-        characters=[
-            CharacterProfile(
-                character_id=entry.get("character_id") or entry.get("id"),
-                name=entry.get("name", ""),
-                description=entry.get("description", ""),
-                visual_tags=entry.get("visual_tags") or [],
-                image_path=entry.get("image_path"),
-            )
-            for entry in characters
-        ],
-        scenes=[
-            ScenePlan(
-                scene_id=scene["scene_id"],
-                title=scene["title"],
-                summary=scene["summary"],
-                visual_prompt=scene["visual_prompt"],
-                characters=scene.get("characters") or [],
-            )
-            for scene in scenes
-        ],
-    )
-    target_scene = next((scene for scene in storyboard.scenes if scene.scene_id == scene_payload["scene_id"]), storyboard.scenes[0])
-    prompt = build_scene_prompt(target_scene, storyboard)
-    video_service = HuggingFaceVideoService(token=hf_token)
-    model_used, video_path = video_service.generate(
-        prompt=prompt,
-        preferred_model=preferred_model or None,
-        negative_prompt=negative_prompt,
-        duration_seconds=duration_seconds,
-        fps=fps,
-        seed=seed if seed >= 0 else None,
-    )
-    metadata = {
-        "model": model_used,
-        "scene": target_scene.scene_id,
-        "prompt": prompt,
-        "negative_prompt": negative_prompt,
-        "duration_seconds": duration_seconds,
-        "fps": fps,
-    }
-    status_message = f"Rendered scene {target_scene.scene_id} via {model_used}."
-    return status_message, video_path, metadata
-# -----------------------------
-# Gradio interface
-# -----------------------------
-def build_interface() -> gr.Blocks:
-    default_hf = os.getenv("HF_TOKEN", "")
-    default_google = os.getenv("GOOGLE_API_KEY", "")
-    with gr.Blocks() as demo:
-        gr.Markdown("# CineGen AI Director")
-        gr.Markdown(
-            "Transform a simple idea into a storyboard, character deck, and video shots. "
-            "Tokens can be loaded from the environment for local debugging; in production the fields must be filled manually."
         )
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("### Credentials")
-                google_api_key_input = gr.Textbox(
-                    label="Google API Key",
-                    value=default_google,
-                    type="password",
-                    placeholder="GOOGLE_API_KEY",
-                )
-                hf_token_input = gr.Textbox(
-                    label="Hugging Face Token",
-                    value=default_hf,
-                    type="password",
-                    placeholder="hf_xxx",
-                )
-                gr.Markdown("### Story Settings")
-                movie_idea = gr.Textbox(
-                    label="Movie Idea",
-                    value="A lone robot gardener trying to revive a neon-drenched city park.",
-                    lines=4,
-                )
-                visual_style = gr.Dropdown(
-                    label="Visual Style",
-                    choices=["Cinematic Realism", "American Cartoon", "Anime Noir", "Cyberpunk", "Claymation"],
-                    value="Cinematic Realism",
-                )
-                runtime_hint = gr.Dropdown(
-                    label="Runtime Target",
-                    choices=["30 seconds", "45 seconds", "60 seconds"],
-                    value="45 seconds",
-                )
-                tone = gr.Textbox(
-                    label="Tone keywords",
-                    value="hopeful, dynamic camera, sweeping synth score",
-                )
-                scene_count = gr.Slider(
-                    label="Scene Count",
-                    minimum=3,
-                    maximum=8,
-                    value=4,
-                    step=1,
-                )
-                generate_storyboard_btn = gr.Button("Generate Storyboard", variant="primary")
-            with gr.Column():
-                status_box = gr.Markdown("Status: awaiting input.")
-                storyboard_json = gr.JSON(label="Storyboard JSON")
-                character_markdown = gr.Markdown(label="Character Profiles")
-                character_gallery = gr.Gallery(label="Character Anchors", columns=2, rows=2, height="auto")
-        with gr.Tab("Scene Rendering"):
-            scene_choice = gr.Dropdown(label="Scene", choices=[])
-            preferred_model = gr.Dropdown(
-                label="Preferred Video Model",
-                choices=HuggingFaceVideoService.MODEL_FALLBACK,
-                value=HuggingFaceVideoService.MODEL_FALLBACK[0],
-            )
-            negative_prompt = gr.Textbox(
-                label="Negative Prompt",
-                value="low resolution, flicker, watermark, distorted faces",
-            )
-            duration_seconds = gr.Slider(label="Duration (s)", minimum=1.0, maximum=4.0, value=2.0, step=0.5)
-            fps = gr.Slider(label="FPS", minimum=12, maximum=24, value=24, step=1)
-            seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
-            generate_video_btn = gr.Button("Render Selected Scene", variant="primary")
-            video_status = gr.Markdown("Video renderer idle.")
-            video_output = gr.Video(label="Generated Clip")
-            video_metadata = gr.JSON(label="Render Metadata")
-        storyboard_state = gr.State({})
-        character_state = gr.State([])
-        generate_storyboard_btn.click(
-            storyboard_callback,
-            inputs=[movie_idea, visual_style, runtime_hint, tone, scene_count, google_api_key_input],
-            outputs=[
-                status_box,
-                storyboard_json,
-                character_markdown,
-                character_gallery,
-                storyboard_state,
-                character_state,
-                scene_choice,
-            ],
         )
-        generate_video_btn.click(
-            generate_video_callback,
-            inputs=[
-                scene_choice,
-                storyboard_state,
-                hf_token_input,
-                preferred_model,
-                negative_prompt,
-                duration_seconds,
-                fps,
-                seed,
-            ],
-            outputs=[video_status, video_output, video_metadata],
         )
-    return demo
-if __name__ == "__main__":
-    interface = build_interface()
-    server_name = os.getenv("GRADIO_SERVER_HOST") or "0.0.0.0"
-    server_port = int(os.getenv("GRADIO_SERVER_PORT") or os.getenv("SERVER_PORT") or "7860")
-    interface.launch(
-        server_name=server_name,
-        server_port=server_port,
-        theme=gr.themes.Soft(),
-        css=".gradio-container {max-width: 1200px; margin: auto;}",
-        footer_links=["gradio", "settings"],
-        allowed_paths=[str(Path.cwd())],
-        ssr_mode=False,
     )

+from __future__ import annotations
 import os
+from typing import List, Tuple
 import gradio as gr
+from cinegen import CharacterDesigner, StoryGenerator, VideoDirector
+from cinegen.models import Storyboard
+STYLE_CHOICES = [
+    "Cinematic Realism",
+    "Neo-Noir Animation",
+    "Analog Horror",
+    "Retro-Futuristic",
+    "Dreamlike Documentary",
+]
+VIDEO_MODEL_CHOICES = [
+    ("Wan 2.1 (fal-ai)", "Wan-AI/Wan2.1-T2V-14B"),
+    ("LTX Video 0.9.7", "Lightricks/LTX-Video-0.9.7-distilled"),
+    ("Hunyuan Video 1.5", "tencent/HunyuanVideo-1.5"),
+    ("CogVideoX 5B", "THUDM/CogVideoX-5b"),
+]
+SCENE_COLUMNS = ["Scene", "Title", "Action", "Visuals", "Characters", "Duration (s)"]
+CHARACTER_COLUMNS = ["ID", "Name", "Role", "Traits"]
+def _ensure_storyboard(board: Storyboard | None) -> Storyboard:
+    if not board:
+        raise gr.Error("Create a storyboard first.")
+    return board
+def _validate_inputs(idea: str | None, image_path: str | None):
+    if not idea and not image_path:
+        raise gr.Error("Provide either a story idea or upload a reference image.")
+def handle_storyboard(
+    idea: str,
+    inspiration_image: str | None,
+    style: str,
     scene_count: int,
+    google_api_key: str,
+) -> Tuple[str, List[List[str]], List[List[str]], Storyboard]:
+    _validate_inputs(idea, inspiration_image)
+    generator = StoryGenerator(api_key=google_api_key or None)
+    storyboard = generator.generate(
+        idea=idea,
+        style=style,
         scene_count=scene_count,
+        inspiration_path=inspiration_image,
     )
+    summary_md = f"### {storyboard.title}\n{storyboard.synopsis}"
+    scene_rows = storyboard.scenes_table()
+    character_rows = storyboard.characters_table()
     return (
+        summary_md,
+        [[row[col] for col in SCENE_COLUMNS] for row in scene_rows],
+        [[row[col] for col in CHARACTER_COLUMNS] for row in character_rows],
+        storyboard,
     )
+def handle_character_design(
+    storyboard: Storyboard | None,
+    google_api_key: str,
 ):
+    board = _ensure_storyboard(storyboard)
+    designer = CharacterDesigner(api_key=google_api_key or None)
+    gallery, updated_board = designer.design(board)
+    if not gallery:
+        raise gr.Error("Failed to design characters.")
+    return gallery, updated_board
+def handle_video_render(
+    storyboard: Storyboard | None,
+    hf_token: str,
+    model_choice: str,
+):
+    board = _ensure_storyboard(storyboard)
+    prioritized_models = [model_choice] + [
+        model for _, model in VIDEO_MODEL_CHOICES if model != model_choice
+    ]
+    director = VideoDirector(token=hf_token or None, models=prioritized_models)
+    final_cut, logs = director.render(board)
+    log_md = "\n".join(f"- {line}" for line in logs)
+    return final_cut, log_md
+css = """
+#cinegen-app {
+    max-width: 1080px;
+    margin: 0 auto;
+}
+"""
+with gr.Blocks(css=css, fill_height=True, theme=gr.themes.Soft(), elem_id="cinegen-app") as demo:
+    gr.Markdown(
+        "## 🎬 CineGen AI Director\n"
+        "Drop an idea or inspiration image and let CineGen produce a storyboard, character boards, "
+        "and a compiled short film using Hugging Face video models."
+    )
+    story_state = gr.State()
+    with gr.Row():
+        idea_box = gr.Textbox(
+            label="Movie Idea",
+            placeholder="E.g. A time loop love story set in a neon bazaar.",
+            lines=3,
         )
+        inspiration = gr.Image(label="Reference Image (optional)", type="filepath")
+    with gr.Row():
+        style_dropdown = gr.Dropdown(
+            label="Visual Style",
+            choices=STYLE_CHOICES,
+            value=STYLE_CHOICES[0],
+        )
+        scene_slider = gr.Slider(
+            label="Scene Count",
+            minimum=3,
+            maximum=8,
+            value=4,
+            step=1,
+        )
+        video_model_dropdown = gr.Dropdown(
+            label="Preferred Video Model",
+            choices=[choice for choice, _ in VIDEO_MODEL_CHOICES],
+            value=VIDEO_MODEL_CHOICES[0][0],
         )
+    with gr.Accordion("API Keys", open=False):
+        google_key_input = gr.Textbox(
+            label="Google API Key (Gemini)",
+            type="password",
+            placeholder="Required for live Gemini calls. Leave blank to use offline stubs.",
+            value=os.environ.get("GOOGLE_API_KEY", ""),
+        )
+        hf_token_input = gr.Textbox(
+            label="Hugging Face Token",
+            type="password",
+            placeholder="Needed for Wan/LTX/Hunyuan video generation.",
+            value=os.environ.get("HF_TOKEN", ""),
         )
+    storyboard_btn = gr.Button("Create Storyboard", variant="primary")
+    summary_md = gr.Markdown("Storyboard output will appear here.")
+    scenes_df = gr.Dataframe(headers=SCENE_COLUMNS, wrap=True)
+    characters_df = gr.Dataframe(headers=CHARACTER_COLUMNS, wrap=True)
+    storyboard_btn.click(
+        fn=handle_storyboard,
+        inputs=[idea_box, inspiration, style_dropdown, scene_slider, google_key_input],
+        outputs=[summary_md, scenes_df, characters_df, story_state],
+    )
+    with gr.Row():
+        design_btn = gr.Button("Design Characters", variant="secondary")
+        render_btn = gr.Button("Render Short Film", variant="primary")
+    gallery = gr.Gallery(label="Character References", columns=4, height=320)
+    render_logs = gr.Markdown(label="Render Log")
+    final_video = gr.Video(label="CineGen Short Film", interactive=False)
+    design_btn.click(
+        fn=handle_character_design,
+        inputs=[story_state, google_key_input],
+        outputs=[gallery, story_state],
+    )
+    def _model_value(label: str) -> str:
+        lookup = dict(VIDEO_MODEL_CHOICES)
+        return lookup.get(label, VIDEO_MODEL_CHOICES[0][1])
+    def render_wrapper(board, token, label):
+        return handle_video_render(board, token, _model_value(label))
+    render_btn.click(
+        fn=render_wrapper,
+        inputs=[story_state, hf_token_input, video_model_dropdown],
+        outputs=[final_video, render_logs],
     )
+if __name__ == "__main__":
+    demo.launch()

cinegen/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from .models import Storyboard, SceneBeat, CharacterSpec
+from .story_engine import StoryGenerator
+from .character_engine import CharacterDesigner
+from .video_engine import VideoDirector
+__all__ = [
+    "Storyboard",
+    "SceneBeat",
+    "CharacterSpec",
+    "StoryGenerator",
+    "CharacterDesigner",
+    "VideoDirector",
+]

cinegen/character_engine.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from __future__ import annotations
+import os
+from typing import List, Optional, Tuple
+from .models import Storyboard
+from .placeholders import synthesize_character_card
+DEFAULT_IMAGE_MODEL = os.environ.get("CINEGEN_CHARACTER_MODEL", "gemini-2.5-flash-image")
+def _load_google_client(api_key: Optional[str]):
+    if not api_key:
+        return None
+    try:
+        from google import genai
+        return genai.Client(api_key=api_key)
+    except Exception:  # pragma: no cover - optional dependency
+        return None
+class CharacterDesigner:
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
+        self.client = _load_google_client(self.api_key)
+    def design(self, storyboard: Storyboard) -> Tuple[List[Tuple[str, str]], Storyboard]:
+        gallery: List[Tuple[str, str]] = []
+        for character in storyboard.characters:
+            image_path = None
+            if self.client:
+                image_path = self._try_generate(character, storyboard.style)
+            if not image_path:
+                image_path = synthesize_character_card(character, storyboard.style)
+            character.reference_image = image_path
+            caption = f"{character.name} — {character.role}"
+            gallery.append((image_path, caption))
+        return gallery, storyboard
+    def _try_generate(self, character, style: str) -> Optional[str]:  # pragma: no cover
+        prompt = (
+            f"Create a portrait for {character.name}, a {character.role} in a {style} short film. "
+            f"Traits: {', '.join(character.traits)}. Description: {character.description}."
+        )
+        try:
+            response = self.client.models.generate_content(
+                model=DEFAULT_IMAGE_MODEL,
+                contents=[prompt],
+            )
+            for part in response.parts:
+                if getattr(part, "inline_data", None):
+                    image = part.as_image()
+                    tmp_dir = os.path.join("/tmp", "cinegen-characters")
+                    os.makedirs(tmp_dir, exist_ok=True)
+                    path = os.path.join(tmp_dir, f"{character.identifier.lower()}.png")
+                    image.save(path)
+                    return path
+        except Exception:
+            return None
+        return None

cinegen/models.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import List, Optional
+@dataclass
+class CharacterSpec:
+    identifier: str
+    name: str
+    role: str
+    description: str
+    traits: List[str] = field(default_factory=list)
+    reference_image: Optional[str] = None
+    def to_row(self) -> dict:
+        traits = ", ".join(self.traits)
+        return {
+            "ID": self.identifier,
+            "Name": self.name,
+            "Role": self.role,
+            "Traits": traits or "—",
+        }
+@dataclass
+class SceneBeat:
+    scene_id: str
+    title: str
+    visuals: str
+    action: str
+    characters: List[str] = field(default_factory=list)
+    duration: int = 6
+    mood: str = ""
+    camera: str = ""
+    def to_row(self) -> dict:
+        return {
+            "Scene": self.scene_id,
+            "Title": self.title,
+            "Action": self.action,
+            "Visuals": self.visuals,
+            "Characters": ", ".join(self.characters) or "—",
+            "Duration (s)": self.duration,
+        }
+@dataclass
+class Storyboard:
+    title: str
+    synopsis: str
+    style: str
+    inspiration_hint: Optional[str]
+    characters: List[CharacterSpec] = field(default_factory=list)
+    scenes: List[SceneBeat] = field(default_factory=list)
+    def characters_table(self) -> List[dict]:
+        return [char.to_row() for char in self.characters]
+    def scenes_table(self) -> List[dict]:
+        return [scene.to_row() for scene in self.scenes]

cinegen/placeholders.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from __future__ import annotations
+import os
+import random
+import string
+import tempfile
+from typing import List
+import imageio
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+from .models import CharacterSpec, SceneBeat, Storyboard
+SCENE_TITLES = [
+    "Opening Beat",
+    "Inciting Incident",
+    "Turning Point",
+    "Climactic Push",
+    "Final Shot",
+]
+CHARACTER_ARCHETYPES = [
+    ("Lead", "Curious protagonist who drives the story."),
+    ("Ally", "Supportive partner offering heart and humor."),
+    ("Antagonist", "Force of tension that keeps the stakes high."),
+]
+PALETTE = [
+    (28, 35, 51),
+    (44, 106, 116),
+    (96, 108, 56),
+    (224, 142, 73),
+    (211, 86, 97),
+    (123, 74, 173),
+]
+def _slugify(text: str) -> str:
+    safe = "".join(ch for ch in text if ch.isalnum() or ch in (" ", "-")).strip()
+    safe = safe.replace(" ", "-")
+    safe = safe.lower()
+    return safe or "cinegen"
+def build_stub_storyboard(
+    idea: str,
+    style: str,
+    scene_count: int,
+    inspiration_hint: str | None,
+) -> Storyboard:
+    random.seed(_slugify(idea) + style + str(scene_count))
+    title = idea.title() if idea else f"{style} Short"
+    synopsis = (
+        f"A {style.lower()} short that transforms the idea '{idea or 'mystery cue'}' "
+        "into a compact cinematic arc."
+    )
+    characters: List[CharacterSpec] = []
+    for idx, (role, desc) in enumerate(CHARACTER_ARCHETYPES):
+        if idx >= 3 and scene_count <= 3:
+            break
+        identifier = f"CHAR-{idx+1}"
+        name = f"{role} {random.choice(string.ascii_uppercase)}"
+        traits = random.sample(
+            ["brave", "witty", "restless", "tactical", "empathetic", "curious"], 2
+        )
+        characters.append(
+            CharacterSpec(
+                identifier=identifier,
+                name=name,
+                role=role,
+                description=desc,
+                traits=traits,
+            )
+        )
+    scenes: List[SceneBeat] = []
+    for idx in range(scene_count):
+        label = SCENE_TITLES[idx % len(SCENE_TITLES)]
+        scene_id = f"SCENE-{idx+1}"
+        visuals = (
+            f"{style} framing with {random.choice(['soft neon', 'moody shadows', 'bold silhouettes'])}."
+        )
+        action = f"{characters[0].name if characters else 'The hero'} faces {random.choice(['an unseen threat', 'a tough decision', 'their reflection'])}."
+        involved = [char.name for char in characters if random.random() > 0.3][:2] or [
+            characters[0].name if characters else "Narrator"
+        ]
+        scenes.append(
+            SceneBeat(
+                scene_id=scene_id,
+                title=label,
+                visuals=visuals,
+                action=action,
+                characters=involved,
+                duration=6,
+                mood=random.choice(["hopeful", "tense", "whimsical"]),
+                camera=random.choice(["slow push", "steady wide", "handheld close-up"]),
+            )
+        )
+    appendix = (
+        f"Aim for motifs inspired by the uploaded reference: {inspiration_hint}."
+        if inspiration_hint
+        else ""
+    )
+    return Storyboard(
+        title=title,
+        synopsis=f"{synopsis} {appendix}".strip(),
+        style=style,
+        inspiration_hint=inspiration_hint,
+        characters=characters,
+        scenes=scenes,
+    )
+def synthesize_character_card(character: CharacterSpec, style: str) -> str:
+    width, height = 640, 640
+    color = random.choice(PALETTE)
+    image = Image.new("RGB", (width, height), color=color)
+    draw = ImageDraw.Draw(image)
+    font = ImageFont.load_default()
+    text = f"{character.name}\n{character.role}\n{', '.join(character.traits)}"
+    draw.multiline_text((40, 80), text, fill=(255, 255, 255), font=font, spacing=6)
+    draw.text((40, height - 60), f"Style: {style}", fill=(255, 255, 255), font=font)
+    tmp_dir = tempfile.mkdtemp(prefix="cinegen-character-")
+    path = os.path.join(tmp_dir, f"{_slugify(character.name)}.png")
+    image.save(path, format="PNG")
+    return path
+def create_placeholder_video(scene: SceneBeat, style: str, seconds: int = 4) -> str:
+    fps = 6
+    frames = fps * seconds
+    width, height = 512, 512
+    tmp_dir = tempfile.mkdtemp(prefix="cinegen-scene-")
+    path = os.path.join(tmp_dir, f"{scene.scene_id.lower()}.mp4")
+    rng = np.random.default_rng(sum(ord(c) for c in scene.scene_id))
+    with imageio.get_writer(path, fps=fps) as writer:
+        for _ in range(frames):
+            base_color = rng.integers(60, 220, size=3, dtype=np.uint8)
+            frame = np.zeros((height, width, 3), dtype=np.uint8)
+            frame[:] = base_color
+            image = Image.fromarray(frame)
+            draw = ImageDraw.Draw(image)
+            font = ImageFont.load_default()
+            overlay = f"{scene.title}\n{scene.action[:60]}..."
+            draw.multiline_text((24, 24), overlay, fill=(255, 255, 255), font=font, spacing=4)
+            draw.text(
+                (24, height - 40),
+                f"{style} • {scene.characters[0] if scene.characters else 'Solo'}",
+                fill=(255, 255, 255),
+                font=font,
+            )
+            writer.append_data(np.array(image))
+    return path
+def describe_image_reference(image_path: str | None) -> str | None:
+    if not image_path or not os.path.exists(image_path):
+        return None
+    size = os.path.getsize(image_path)
+    return f"{os.path.basename(image_path)} ({round(size / 1024, 1)}KB)"

cinegen/story_engine.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from __future__ import annotations
+import json
+import os
+from typing import Any, Dict, Optional
+from .models import Storyboard, CharacterSpec, SceneBeat
+from .placeholders import build_stub_storyboard, describe_image_reference
+DEFAULT_STORY_MODEL = os.environ.get("CINEGEN_STORY_MODEL", "gemini-2.5-flash")
+def _load_google_client(api_key: Optional[str]):
+    if not api_key:
+        return None, "Missing API key"
+    try:
+        from google import genai
+        client = genai.Client(api_key=api_key)
+        return client, None
+    except Exception as exc:  # pragma: no cover - depends on optional deps
+        return None, str(exc)
+class StoryGenerator:
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
+        self.client, self.client_error = _load_google_client(self.api_key)
+    def generate(
+        self,
+        idea: str,
+        style: str,
+        scene_count: int,
+        inspiration_path: Optional[str] = None,
+    ) -> Storyboard:
+        if not self.client:
+            return build_stub_storyboard(
+                idea=idea,
+                style=style,
+                scene_count=scene_count,
+                inspiration_hint=describe_image_reference(inspiration_path),
+            )
+        prompt = self._build_prompt(idea, style, scene_count)
+        contents = [prompt]
+        parts = self._maybe_add_image_part(inspiration_path)
+        contents = parts + contents if parts else contents
+        try:  # pragma: no cover - relies on remote API
+            response = self.client.models.generate_content(
+                model=DEFAULT_STORY_MODEL,
+                contents=contents,
+                config={"response_mime_type": "application/json"},
+            )
+            payload = json.loads(response.text)
+            return self._parse_payload(
+                payload,
+                style=style,
+                inspiration_hint=describe_image_reference(inspiration_path),
+            )
+        except Exception:
+            return build_stub_storyboard(
+                idea=idea,
+                style=style,
+                scene_count=scene_count,
+                inspiration_hint=describe_image_reference(inspiration_path),
+            )
+    @staticmethod
+    def _build_prompt(idea: str, style: str, scene_count: int) -> str:
+        return (
+            "You are CineGen, an AI film director. Convert the provided idea into a "
+            "structured storyboard JSON with the following keys:\n"
+            "{\n"
+            '  "title": str,\n'
+            '  "synopsis": str,\n'
+            '  "characters": [\n'
+            '     {"id": "CHAR-1", "name": str, "role": str, "description": str, "traits": [str, ...]}\n'
+            "  ],\n"
+            '  "scenes": [\n'
+            '     {"id": "SCENE-1", "title": str, "visuals": str, "action": str, "characters": [str], "duration": int, "mood": str, "camera": str}\n'
+            "  ]\n"
+            "}\n"
+            f"Idea: {idea or 'Use the inspiration image only.'}\n"
+            f"Visual Style: {style}\n"
+            f"Scene Count: {scene_count}\n"
+            "Ensure every scene references at least one character ID."
+        )
+    def _maybe_add_image_part(self, inspiration_path: Optional[str]):
+        if not inspiration_path or not os.path.exists(inspiration_path):
+            return None
+        try:
+            from google.genai import types  # pragma: no cover - optional dependency
+            with open(inspiration_path, "rb") as handle:
+                data = handle.read()
+            mime = "image/png" if inspiration_path.endswith(".png") else "image/jpeg"
+            return [types.Part.from_bytes(data=data, mime_type=mime)]
+        except Exception:
+            return None
+    @staticmethod
+    def _parse_payload(
+        payload: Dict[str, Any],
+        style: str,
+        inspiration_hint: Optional[str],
+    ) -> Storyboard:
+        characters = [
+            CharacterSpec(
+                identifier=item.get("id", f"CHAR-{idx+1}"),
+                name=item.get("name", f"Character {idx+1}"),
+                role=item.get("role", "Supporting"),
+                description=item.get("description", ""),
+                traits=item.get("traits", []),
+            )
+            for idx, item in enumerate(payload.get("characters", []))
+        ]
+        scenes = [
+            SceneBeat(
+                scene_id=item.get("id", f"SCENE-{idx+1}"),
+                title=item.get("title", f"Scene {idx+1}"),
+                visuals=item.get("visuals", ""),
+                action=item.get("action", ""),
+                characters=item.get("characters", []),
+                duration=int(item.get("duration", 6)),
+                mood=item.get("mood", ""),
+                camera=item.get("camera", ""),
+            )
+            for idx, item in enumerate(payload.get("scenes", []))
+        ]
+        if not characters or not scenes:
+            raise ValueError("Incomplete payload")
+        return Storyboard(
+            title=payload.get("title", "Untitled Short"),
+            synopsis=payload.get("synopsis", ""),
+            style=style,
+            inspiration_hint=inspiration_hint,
+            characters=characters,
+            scenes=scenes,
+        )

cinegen/video_engine.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from __future__ import annotations
+import os
+import tempfile
+import time
+from typing import List, Optional, Sequence, Tuple
+import requests
+from .models import SceneBeat, Storyboard
+from .placeholders import create_placeholder_video
+DEFAULT_VIDEO_MODELS = [
+    "Wan-AI/Wan2.1-T2V-14B",
+    "Lightricks/LTX-Video-0.9.7-distilled",
+    "tencent/HunyuanVideo-1.5",
+    "THUDM/CogVideoX-5b",
+]
+class VideoDirector:
+    def __init__(
+        self,
+        token: Optional[str] = None,
+        models: Optional[Sequence[str]] = None,
+    ):
+        env_token = (
+            token
+            or os.environ.get("HF_TOKEN")
+            or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
+            or os.environ.get("HUGGING_FACE_HUB_TOKEN")
+        )
+        self.token = env_token
+        self.models = list(models or DEFAULT_VIDEO_MODELS)
+    def render(self, storyboard: Storyboard) -> Tuple[str, List[str]]:
+        logs: List[str] = []
+        clip_paths: List[str] = []
+        for scene in storyboard.scenes:
+            video = self._produce_scene(storyboard, scene, logs)
+            clip_paths.append(video)
+        final_cut = self._merge_clips(clip_paths, logs)
+        return final_cut, logs
+    def _produce_scene(self, storyboard: Storyboard, scene: SceneBeat, logs: List[str]) -> str:
+        composed_prompt = self._compose_prompt(storyboard, scene)
+        if self.token:
+            for model in self.models:
+                try:
+                    clip = self._call_hf_inference(composed_prompt, model, scene.duration)
+                    logs.append(f"Scene {scene.scene_id}: generated via {model}")
+                    return clip
+                except Exception as exc:
+                    logs.append(f"Scene {scene.scene_id}: {model} failed ({exc})")
+        clip = create_placeholder_video(scene, storyboard.style)
+        logs.append(f"Scene {scene.scene_id}: fallback placeholder clip used.")
+        return clip
+    def _call_hf_inference(self, prompt: str, model_id: str, duration: int) -> str:
+        if not self.token:
+            raise RuntimeError("Missing Hugging Face token")
+        url = f"https://api-inference.huggingface.co/models/{model_id}"
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Accept": "video/mp4",
+        }
+        payload = {
+            "inputs": prompt,
+            "parameters": {"duration": duration},
+        }
+        for _ in range(3):
+            response = requests.post(url, headers=headers, json=payload, timeout=600)
+            if response.status_code == 200:
+                tmp_dir = tempfile.mkdtemp(prefix="cinegen-video-")
+                path = os.path.join(tmp_dir, f"{model_id.split('/')[-1]}.mp4")
+                with open(path, "wb") as handle:
+                    handle.write(response.content)
+                return path
+            if response.status_code in (503, 529, 202):
+                time.sleep(5)
+                continue
+            raise RuntimeError(f"{response.status_code}: {response.text[:120]}")
+        raise RuntimeError("Model busy")
+    @staticmethod
+    def _compose_prompt(storyboard: Storyboard, scene: SceneBeat) -> str:
+        characters = "; ".join(scene.characters)
+        return (
+            f"Title: {storyboard.title}. Style: {storyboard.style}. "
+            f"Scene {scene.scene_id} - {scene.title}: {scene.action} "
+            f"Visual cues: {scene.visuals}. Mood: {scene.mood}. "
+            f"Camera: {scene.camera}. Characters: {characters or 'solo sequence'}."
+        )
+    def _merge_clips(self, clip_paths: Sequence[str], logs: List[str]) -> str:
+        try:
+            from moviepy.editor import VideoFileClip, concatenate_videoclips  # type: ignore
+        except Exception as exc:
+            logs.append(f"MoviePy unavailable ({exc}); returning first clip only.")
+            return clip_paths[0]
+        clips = []
+        for path in clip_paths:
+            try:
+                clip = VideoFileClip(path)
+                clips.append(clip)
+            except Exception as exc:
+                logs.append(f"Failed to read clip {path}: {exc}")
+        if not clips:
+            raise RuntimeError("No clips to merge")
+        final = concatenate_videoclips(clips, method="compose")
+        tmp_dir = tempfile.mkdtemp(prefix="cinegen-final-")
+        final_path = os.path.join(tmp_dir, "cinegen_short.mp4")
+        final.write_videofile(final_path, fps=clips[0].fps, codec="libx264", audio=False, verbose=False, logger=None)
+        for clip in clips:
+            clip.close()
+        logs.append(f"Merged {len(clips)} clips into final cut.")
+        return final_path

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ huggingface-hub>=0.26.0
 pillow>=10.2.0
 numpy>=1.24.0
 requests>=2.31.0

 pillow>=10.2.0
 numpy>=1.24.0
 requests>=2.31.0
+imageio>=2.34
+moviepy>=1.0.3