Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
70b0d1a
1
Parent(s):
934e5c4
update
Browse files- app.py +160 -622
- cinegen/__init__.py +13 -0
- cinegen/character_engine.py +62 -0
- cinegen/models.py +61 -0
- cinegen/placeholders.py +163 -0
- cinegen/story_engine.py +143 -0
- cinegen/video_engine.py +119 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,653 +1,191 @@
|
|
| 1 |
-
import
|
|
|
|
| 2 |
import os
|
| 3 |
-
import
|
| 4 |
-
import textwrap
|
| 5 |
-
import time
|
| 6 |
-
from dataclasses import dataclass, field, asdict
|
| 7 |
-
from pathlib import Path
|
| 8 |
-
from typing import Any, Dict, List, Optional, Tuple
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
-
import requests
|
| 12 |
-
try:
|
| 13 |
-
from google import genai
|
| 14 |
-
except ImportError: # pragma: no cover - dependency is optional at import time
|
| 15 |
-
genai = None
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# -----------------------------
|
| 19 |
-
# Domain data representations
|
| 20 |
-
# -----------------------------
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
@dataclass
|
| 24 |
-
class CharacterProfile:
|
| 25 |
-
character_id: str
|
| 26 |
-
name: str
|
| 27 |
-
description: str
|
| 28 |
-
visual_tags: List[str] = field(default_factory=list)
|
| 29 |
-
image_path: Optional[str] = None
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
@dataclass
|
| 33 |
-
class ScenePlan:
|
| 34 |
-
scene_id: str
|
| 35 |
-
title: str
|
| 36 |
-
summary: str
|
| 37 |
-
visual_prompt: str
|
| 38 |
-
characters: List[str] = field(default_factory=list)
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
@dataclass
|
| 42 |
-
class StoryboardPlan:
|
| 43 |
-
title: str
|
| 44 |
-
logline: str
|
| 45 |
-
style: str
|
| 46 |
-
runtime_hint: str
|
| 47 |
-
tone: str
|
| 48 |
-
characters: List[CharacterProfile]
|
| 49 |
-
scenes: List[ScenePlan]
|
| 50 |
-
|
| 51 |
-
def to_dict(self) -> Dict[str, Any]:
|
| 52 |
-
return {
|
| 53 |
-
"title": self.title,
|
| 54 |
-
"logline": self.logline,
|
| 55 |
-
"style": self.style,
|
| 56 |
-
"runtime_hint": self.runtime_hint,
|
| 57 |
-
"tone": self.tone,
|
| 58 |
-
"characters": [asdict(c) for c in self.characters],
|
| 59 |
-
"scenes": [asdict(s) for s in self.scenes],
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
# -----------------------------
|
| 64 |
-
# Helper utilities
|
| 65 |
-
# -----------------------------
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
def resolve_token(user_supplied: str, env_key: str) -> Optional[str]:
|
| 69 |
-
candidate = (user_supplied or "").strip()
|
| 70 |
-
if candidate:
|
| 71 |
-
return candidate
|
| 72 |
-
env_candidate = (os.getenv(env_key) or "").strip()
|
| 73 |
-
return env_candidate or None
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
def extract_json_block(text: str) -> str:
|
| 77 |
-
"""Return the first JSON object found inside the text."""
|
| 78 |
-
stack = []
|
| 79 |
-
start_index = None
|
| 80 |
-
for index, char in enumerate(text):
|
| 81 |
-
if char == "{":
|
| 82 |
-
if not stack:
|
| 83 |
-
start_index = index
|
| 84 |
-
stack.append(char)
|
| 85 |
-
elif char == "}" and stack:
|
| 86 |
-
stack.pop()
|
| 87 |
-
if not stack and start_index is not None:
|
| 88 |
-
return text[start_index : index + 1]
|
| 89 |
-
return text
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
def format_character_markdown(characters: List[CharacterProfile]) -> str:
|
| 93 |
-
lines = []
|
| 94 |
-
for character in characters:
|
| 95 |
-
tags = ", ".join(character.visual_tags) if character.visual_tags else "n/a"
|
| 96 |
-
lines.append(f"- **{character.name}** ({character.character_id}): {character.description} \n Visual tags: {tags}")
|
| 97 |
-
return "\n".join(lines) if lines else "No characters were generated yet."
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
def ensure_module_available(module_ref, friendly_name: str) -> None:
|
| 101 |
-
if module_ref is None:
|
| 102 |
-
raise gr.Error(
|
| 103 |
-
f"{friendly_name} is not installed. Install it via `pip install google-genai` and try again."
|
| 104 |
-
)
|
| 105 |
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
self,
|
| 115 |
-
api_key: str,
|
| 116 |
-
story_model: str = "gemini-2.5-flash",
|
| 117 |
-
image_model: str = "gemini-2.5-flash-image",
|
| 118 |
-
) -> None:
|
| 119 |
-
ensure_module_available(genai, "google-genai")
|
| 120 |
-
if not api_key:
|
| 121 |
-
raise gr.Error("Google API key is required.")
|
| 122 |
-
self.client = genai.Client(api_key=api_key)
|
| 123 |
-
self.story_model = story_model
|
| 124 |
-
self.image_model = image_model
|
| 125 |
-
|
| 126 |
-
def generate_storyboard(
|
| 127 |
-
self,
|
| 128 |
-
movie_idea: str,
|
| 129 |
-
visual_style: str,
|
| 130 |
-
scene_count: int,
|
| 131 |
-
runtime_hint: str,
|
| 132 |
-
tone: str,
|
| 133 |
-
) -> StoryboardPlan:
|
| 134 |
-
prompt = textwrap.dedent(
|
| 135 |
-
f"""
|
| 136 |
-
You are CineGen, an AI creative director. Given the following idea, craft a production-ready storyboard.
|
| 137 |
-
|
| 138 |
-
Idea: {movie_idea}
|
| 139 |
-
Target visual style: {visual_style}
|
| 140 |
-
Desired runtime: {runtime_hint}
|
| 141 |
-
Tone keywords: {tone}
|
| 142 |
-
Scene count: exactly {scene_count}
|
| 143 |
-
|
| 144 |
-
Respond with valid JSON using this schema:
|
| 145 |
-
{{
|
| 146 |
-
"title": "...",
|
| 147 |
-
"logline": "...",
|
| 148 |
-
"style": "...",
|
| 149 |
-
"runtime_hint": "...",
|
| 150 |
-
"tone": "...",
|
| 151 |
-
"characters": [
|
| 152 |
-
{{"id": "char_1", "name": "...", "description": "...", "visual_tags": ["tag1", "tag2"]}}
|
| 153 |
-
],
|
| 154 |
-
"scenes": [
|
| 155 |
-
{{
|
| 156 |
-
"id": "scene_1",
|
| 157 |
-
"title": "...",
|
| 158 |
-
"summary": "...",
|
| 159 |
-
"visual_prompt": "...",
|
| 160 |
-
"characters": ["char_1", "char_2"]
|
| 161 |
-
}}
|
| 162 |
-
]
|
| 163 |
-
}}
|
| 164 |
-
|
| 165 |
-
Ensure each scene references character IDs from the characters array and highlight cinematic camera or lighting cues inside "visual_prompt".
|
| 166 |
-
"""
|
| 167 |
-
).strip()
|
| 168 |
-
|
| 169 |
-
response = self.client.models.generate_content(
|
| 170 |
-
model=self.story_model,
|
| 171 |
-
contents=prompt,
|
| 172 |
-
)
|
| 173 |
-
raw_text = getattr(response, "text", None) or "".join(
|
| 174 |
-
[getattr(part, "text", "") for part in getattr(response, "parts", [])]
|
| 175 |
-
)
|
| 176 |
-
if not raw_text:
|
| 177 |
-
raise gr.Error("Gemini did not return any content for the storyboard.")
|
| 178 |
-
|
| 179 |
-
serialized = extract_json_block(raw_text)
|
| 180 |
-
payload = json.loads(serialized)
|
| 181 |
-
|
| 182 |
-
characters = [
|
| 183 |
-
CharacterProfile(
|
| 184 |
-
character_id=entry.get("id", f"char_{idx+1}"),
|
| 185 |
-
name=entry.get("name", f"Character {idx+1}"),
|
| 186 |
-
description=entry.get("description", ""),
|
| 187 |
-
visual_tags=entry.get("visual_tags") or [],
|
| 188 |
-
)
|
| 189 |
-
for idx, entry in enumerate(payload.get("characters", []))
|
| 190 |
-
]
|
| 191 |
-
|
| 192 |
-
scenes = [
|
| 193 |
-
ScenePlan(
|
| 194 |
-
scene_id=scene.get("id", f"scene_{idx+1}"),
|
| 195 |
-
title=scene.get("title", f"Scene {idx+1}"),
|
| 196 |
-
summary=scene.get("summary", ""),
|
| 197 |
-
visual_prompt=scene.get("visual_prompt", ""),
|
| 198 |
-
characters=scene.get("characters") or [],
|
| 199 |
-
)
|
| 200 |
-
for idx, scene in enumerate(payload.get("scenes", []))
|
| 201 |
-
]
|
| 202 |
-
|
| 203 |
-
if len(scenes) != scene_count:
|
| 204 |
-
# Keep UX predictable even if the model under-delivers on scene count.
|
| 205 |
-
scenes = scenes[:scene_count]
|
| 206 |
-
|
| 207 |
-
return StoryboardPlan(
|
| 208 |
-
title=payload.get("title", "Untitled"),
|
| 209 |
-
logline=payload.get("logline", ""),
|
| 210 |
-
style=payload.get("style", visual_style),
|
| 211 |
-
runtime_hint=payload.get("runtime_hint", runtime_hint),
|
| 212 |
-
tone=payload.get("tone", tone),
|
| 213 |
-
characters=characters,
|
| 214 |
-
scenes=scenes,
|
| 215 |
-
)
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
rendered: List[CharacterProfile] = []
|
| 224 |
-
for character in characters[:max_characters]:
|
| 225 |
-
prompt = textwrap.dedent(
|
| 226 |
-
f"""
|
| 227 |
-
Create a front-facing character reference portrait for use in a video production pipeline.
|
| 228 |
-
Character: {character.name}
|
| 229 |
-
Description: {character.description}
|
| 230 |
-
Visual tags: {", ".join(character.visual_tags) if character.visual_tags else "n/a"}
|
| 231 |
-
Style: {visual_style}
|
| 232 |
-
|
| 233 |
-
Output a single cohesive concept art image.
|
| 234 |
-
"""
|
| 235 |
-
).strip()
|
| 236 |
-
response = self.client.models.generate_content(
|
| 237 |
-
model=self.image_model,
|
| 238 |
-
contents=prompt,
|
| 239 |
-
)
|
| 240 |
-
image_path = None
|
| 241 |
-
for part in getattr(response, "parts", []):
|
| 242 |
-
if getattr(part, "inline_data", None):
|
| 243 |
-
image = part.as_image()
|
| 244 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
|
| 245 |
-
image.save(tmp.name)
|
| 246 |
-
image_path = tmp.name
|
| 247 |
-
break
|
| 248 |
-
enriched = CharacterProfile(
|
| 249 |
-
character_id=character.character_id,
|
| 250 |
-
name=character.name,
|
| 251 |
-
description=character.description,
|
| 252 |
-
visual_tags=character.visual_tags,
|
| 253 |
-
image_path=image_path,
|
| 254 |
-
)
|
| 255 |
-
rendered.append(enriched)
|
| 256 |
-
return rendered
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
# -----------------------------
|
| 260 |
-
# Hugging Face video service
|
| 261 |
-
# -----------------------------
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
class HuggingFaceVideoService:
|
| 265 |
-
MODEL_FALLBACK = [
|
| 266 |
-
"Wan-AI/Wan2.1-T2V-14B",
|
| 267 |
-
"Lightricks/LTX-Video-0.9.7-distilled",
|
| 268 |
-
"tencent/HunyuanVideo-1.5",
|
| 269 |
-
"THUDM/CogVideoX-5b",
|
| 270 |
-
]
|
| 271 |
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
prompt: str,
|
| 281 |
-
preferred_model: Optional[str],
|
| 282 |
-
negative_prompt: str,
|
| 283 |
-
duration_seconds: float,
|
| 284 |
-
fps: int,
|
| 285 |
-
seed: Optional[int],
|
| 286 |
-
) -> Tuple[str, str]:
|
| 287 |
-
ordered_models = self._order_models(preferred_model)
|
| 288 |
-
last_error = ""
|
| 289 |
-
for model in ordered_models:
|
| 290 |
-
try:
|
| 291 |
-
video_path = self._invoke_model(
|
| 292 |
-
model=model,
|
| 293 |
-
prompt=prompt,
|
| 294 |
-
negative_prompt=negative_prompt,
|
| 295 |
-
duration_seconds=duration_seconds,
|
| 296 |
-
fps=fps,
|
| 297 |
-
seed=seed,
|
| 298 |
-
)
|
| 299 |
-
return model, video_path
|
| 300 |
-
except Exception as exc: # pragma: no cover - defensive fallback
|
| 301 |
-
last_error = str(exc)
|
| 302 |
-
time.sleep(1.5)
|
| 303 |
-
raise gr.Error(f"All video backends failed. Last error: {last_error}")
|
| 304 |
-
|
| 305 |
-
def _order_models(self, preferred_model: Optional[str]) -> List[str]:
|
| 306 |
-
models = list(self.MODEL_FALLBACK)
|
| 307 |
-
if preferred_model and preferred_model in models:
|
| 308 |
-
models.remove(preferred_model)
|
| 309 |
-
models.insert(0, preferred_model)
|
| 310 |
-
elif preferred_model:
|
| 311 |
-
models.insert(0, preferred_model)
|
| 312 |
-
return models
|
| 313 |
-
|
| 314 |
-
def _invoke_model(
|
| 315 |
-
self,
|
| 316 |
-
model: str,
|
| 317 |
-
prompt: str,
|
| 318 |
-
negative_prompt: str,
|
| 319 |
-
duration_seconds: float,
|
| 320 |
-
fps: int,
|
| 321 |
-
seed: Optional[int],
|
| 322 |
-
) -> str:
|
| 323 |
-
url = f"https://api-inference.huggingface.co/models/{model}"
|
| 324 |
-
headers = {
|
| 325 |
-
"Authorization": f"Bearer {self.token}",
|
| 326 |
-
"Accept": "video/mp4",
|
| 327 |
-
}
|
| 328 |
-
payload = {
|
| 329 |
-
"inputs": prompt,
|
| 330 |
-
"parameters": {
|
| 331 |
-
"negative_prompt": negative_prompt,
|
| 332 |
-
"num_frames": int(duration_seconds * fps),
|
| 333 |
-
"fps": fps,
|
| 334 |
-
"seed": seed,
|
| 335 |
-
"guidance_scale": 7.5,
|
| 336 |
-
},
|
| 337 |
-
"options": {"use_cache": True, "wait_for_model": True},
|
| 338 |
-
}
|
| 339 |
-
|
| 340 |
-
response = self.session.post(
|
| 341 |
-
url,
|
| 342 |
-
headers=headers,
|
| 343 |
-
json=payload,
|
| 344 |
-
timeout=600,
|
| 345 |
-
)
|
| 346 |
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
@staticmethod
|
| 359 |
-
def _write_video(content: bytes) -> str:
|
| 360 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as handle:
|
| 361 |
-
handle.write(content)
|
| 362 |
-
return handle.name
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
# -----------------------------
|
| 366 |
-
# CineGen pipeline orchestration
|
| 367 |
-
# -----------------------------
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
def build_scene_prompt(scene: ScenePlan, storyboard: StoryboardPlan) -> str:
|
| 371 |
-
character_blurbs = []
|
| 372 |
-
pool = {c.character_id: c for c in storyboard.characters}
|
| 373 |
-
for actor_id in scene.characters:
|
| 374 |
-
profile = pool.get(actor_id)
|
| 375 |
-
if profile:
|
| 376 |
-
tags = ", ".join(profile.visual_tags) if profile.visual_tags else ""
|
| 377 |
-
character_blurbs.append(f"{profile.name}: {profile.description} {tags}".strip())
|
| 378 |
-
|
| 379 |
-
character_block = "\n".join(character_blurbs) if character_blurbs else "Original characters only."
|
| 380 |
-
return textwrap.dedent(
|
| 381 |
-
f"""
|
| 382 |
-
Title: {storyboard.title}
|
| 383 |
-
Logline: {storyboard.logline}
|
| 384 |
-
Scene: {scene.title} ({scene.scene_id})
|
| 385 |
-
Narrative summary: {scene.summary}
|
| 386 |
-
Visual prompt: {scene.visual_prompt}
|
| 387 |
-
Visual style: {storyboard.style}
|
| 388 |
-
Tone: {storyboard.tone}
|
| 389 |
-
Characters:\n{character_block}
|
| 390 |
-
"""
|
| 391 |
-
).strip()
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
# -----------------------------
|
| 395 |
-
# Gradio callbacks
|
| 396 |
-
# -----------------------------
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
def storyboard_callback(
|
| 400 |
-
movie_idea: str,
|
| 401 |
-
visual_style: str,
|
| 402 |
-
runtime_hint: str,
|
| 403 |
-
tone: str,
|
| 404 |
scene_count: int,
|
| 405 |
-
|
| 406 |
-
):
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
movie_idea=movie_idea,
|
| 413 |
-
visual_style=visual_style,
|
| 414 |
scene_count=scene_count,
|
| 415 |
-
|
| 416 |
-
tone=tone,
|
| 417 |
)
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
character_markdown = format_character_markdown(characters_with_images)
|
| 422 |
-
gallery_entries = [
|
| 423 |
-
(profile.image_path, f"{profile.name} ({profile.character_id})")
|
| 424 |
-
for profile in characters_with_images
|
| 425 |
-
if profile.image_path
|
| 426 |
-
]
|
| 427 |
-
scene_choices = [f"{scene.scene_id}: {scene.title}" for scene in storyboard.scenes]
|
| 428 |
-
|
| 429 |
-
status_message = f"Storyboard ready: {storyboard.title} with {len(storyboard.scenes)} scenes."
|
| 430 |
return (
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
storyboard_dict,
|
| 436 |
-
[asdict(profile) for profile in characters_with_images],
|
| 437 |
-
gr.Dropdown.update(choices=scene_choices, value=scene_choices[0] if scene_choices else None),
|
| 438 |
)
|
| 439 |
|
| 440 |
|
| 441 |
-
def
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
hf_token_input: str,
|
| 445 |
-
preferred_model: str,
|
| 446 |
-
negative_prompt: str,
|
| 447 |
-
duration_seconds: float,
|
| 448 |
-
fps: int,
|
| 449 |
-
seed: int,
|
| 450 |
):
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
if not
|
| 455 |
-
raise gr.Error("
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
characters=[
|
| 473 |
-
CharacterProfile(
|
| 474 |
-
character_id=entry.get("character_id") or entry.get("id"),
|
| 475 |
-
name=entry.get("name", ""),
|
| 476 |
-
description=entry.get("description", ""),
|
| 477 |
-
visual_tags=entry.get("visual_tags") or [],
|
| 478 |
-
image_path=entry.get("image_path"),
|
| 479 |
-
)
|
| 480 |
-
for entry in characters
|
| 481 |
-
],
|
| 482 |
-
scenes=[
|
| 483 |
-
ScenePlan(
|
| 484 |
-
scene_id=scene["scene_id"],
|
| 485 |
-
title=scene["title"],
|
| 486 |
-
summary=scene["summary"],
|
| 487 |
-
visual_prompt=scene["visual_prompt"],
|
| 488 |
-
characters=scene.get("characters") or [],
|
| 489 |
-
)
|
| 490 |
-
for scene in scenes
|
| 491 |
-
],
|
| 492 |
-
)
|
| 493 |
-
|
| 494 |
-
target_scene = next((scene for scene in storyboard.scenes if scene.scene_id == scene_payload["scene_id"]), storyboard.scenes[0])
|
| 495 |
-
prompt = build_scene_prompt(target_scene, storyboard)
|
| 496 |
-
|
| 497 |
-
video_service = HuggingFaceVideoService(token=hf_token)
|
| 498 |
-
model_used, video_path = video_service.generate(
|
| 499 |
-
prompt=prompt,
|
| 500 |
-
preferred_model=preferred_model or None,
|
| 501 |
-
negative_prompt=negative_prompt,
|
| 502 |
-
duration_seconds=duration_seconds,
|
| 503 |
-
fps=fps,
|
| 504 |
-
seed=seed if seed >= 0 else None,
|
| 505 |
-
)
|
| 506 |
-
|
| 507 |
-
metadata = {
|
| 508 |
-
"model": model_used,
|
| 509 |
-
"scene": target_scene.scene_id,
|
| 510 |
-
"prompt": prompt,
|
| 511 |
-
"negative_prompt": negative_prompt,
|
| 512 |
-
"duration_seconds": duration_seconds,
|
| 513 |
-
"fps": fps,
|
| 514 |
-
}
|
| 515 |
|
| 516 |
-
status_message = f"Rendered scene {target_scene.scene_id} via {model_used}."
|
| 517 |
-
return status_message, video_path, metadata
|
| 518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
|
| 520 |
-
# -----------------------------
|
| 521 |
-
# Gradio interface
|
| 522 |
-
# -----------------------------
|
| 523 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
|
| 525 |
-
|
| 526 |
-
default_hf = os.getenv("HF_TOKEN", "")
|
| 527 |
-
default_google = os.getenv("GOOGLE_API_KEY", "")
|
| 528 |
|
| 529 |
-
with gr.
|
| 530 |
-
gr.
|
| 531 |
-
|
| 532 |
-
"
|
| 533 |
-
|
| 534 |
)
|
|
|
|
| 535 |
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
movie_idea = gr.Textbox(
|
| 554 |
-
label="Movie Idea",
|
| 555 |
-
value="A lone robot gardener trying to revive a neon-drenched city park.",
|
| 556 |
-
lines=4,
|
| 557 |
-
)
|
| 558 |
-
visual_style = gr.Dropdown(
|
| 559 |
-
label="Visual Style",
|
| 560 |
-
choices=["Cinematic Realism", "American Cartoon", "Anime Noir", "Cyberpunk", "Claymation"],
|
| 561 |
-
value="Cinematic Realism",
|
| 562 |
-
)
|
| 563 |
-
runtime_hint = gr.Dropdown(
|
| 564 |
-
label="Runtime Target",
|
| 565 |
-
choices=["30 seconds", "45 seconds", "60 seconds"],
|
| 566 |
-
value="45 seconds",
|
| 567 |
-
)
|
| 568 |
-
tone = gr.Textbox(
|
| 569 |
-
label="Tone keywords",
|
| 570 |
-
value="hopeful, dynamic camera, sweeping synth score",
|
| 571 |
-
)
|
| 572 |
-
scene_count = gr.Slider(
|
| 573 |
-
label="Scene Count",
|
| 574 |
-
minimum=3,
|
| 575 |
-
maximum=8,
|
| 576 |
-
value=4,
|
| 577 |
-
step=1,
|
| 578 |
-
)
|
| 579 |
-
generate_storyboard_btn = gr.Button("Generate Storyboard", variant="primary")
|
| 580 |
-
|
| 581 |
-
with gr.Column():
|
| 582 |
-
status_box = gr.Markdown("Status: awaiting input.")
|
| 583 |
-
storyboard_json = gr.JSON(label="Storyboard JSON")
|
| 584 |
-
character_markdown = gr.Markdown(label="Character Profiles")
|
| 585 |
-
character_gallery = gr.Gallery(label="Character Anchors", columns=2, rows=2, height="auto")
|
| 586 |
-
|
| 587 |
-
with gr.Tab("Scene Rendering"):
|
| 588 |
-
scene_choice = gr.Dropdown(label="Scene", choices=[])
|
| 589 |
-
preferred_model = gr.Dropdown(
|
| 590 |
-
label="Preferred Video Model",
|
| 591 |
-
choices=HuggingFaceVideoService.MODEL_FALLBACK,
|
| 592 |
-
value=HuggingFaceVideoService.MODEL_FALLBACK[0],
|
| 593 |
-
)
|
| 594 |
-
negative_prompt = gr.Textbox(
|
| 595 |
-
label="Negative Prompt",
|
| 596 |
-
value="low resolution, flicker, watermark, distorted faces",
|
| 597 |
-
)
|
| 598 |
-
duration_seconds = gr.Slider(label="Duration (s)", minimum=1.0, maximum=4.0, value=2.0, step=0.5)
|
| 599 |
-
fps = gr.Slider(label="FPS", minimum=12, maximum=24, value=24, step=1)
|
| 600 |
-
seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
|
| 601 |
-
generate_video_btn = gr.Button("Render Selected Scene", variant="primary")
|
| 602 |
-
video_status = gr.Markdown("Video renderer idle.")
|
| 603 |
-
video_output = gr.Video(label="Generated Clip")
|
| 604 |
-
video_metadata = gr.JSON(label="Render Metadata")
|
| 605 |
-
|
| 606 |
-
storyboard_state = gr.State({})
|
| 607 |
-
character_state = gr.State([])
|
| 608 |
-
|
| 609 |
-
generate_storyboard_btn.click(
|
| 610 |
-
storyboard_callback,
|
| 611 |
-
inputs=[movie_idea, visual_style, runtime_hint, tone, scene_count, google_api_key_input],
|
| 612 |
-
outputs=[
|
| 613 |
-
status_box,
|
| 614 |
-
storyboard_json,
|
| 615 |
-
character_markdown,
|
| 616 |
-
character_gallery,
|
| 617 |
-
storyboard_state,
|
| 618 |
-
character_state,
|
| 619 |
-
scene_choice,
|
| 620 |
-
],
|
| 621 |
)
|
| 622 |
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
outputs=[video_status, video_output, video_metadata],
|
| 636 |
)
|
| 637 |
|
| 638 |
-
|
|
|
|
|
|
|
|
|
|
| 639 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
)
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
import os
|
| 4 |
+
from typing import List, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
from cinegen import CharacterDesigner, StoryGenerator, VideoDirector
|
| 9 |
+
from cinegen.models import Storyboard
|
| 10 |
|
| 11 |
+
STYLE_CHOICES = [
|
| 12 |
+
"Cinematic Realism",
|
| 13 |
+
"Neo-Noir Animation",
|
| 14 |
+
"Analog Horror",
|
| 15 |
+
"Retro-Futuristic",
|
| 16 |
+
"Dreamlike Documentary",
|
| 17 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
VIDEO_MODEL_CHOICES = [
|
| 20 |
+
("Wan 2.1 (fal-ai)", "Wan-AI/Wan2.1-T2V-14B"),
|
| 21 |
+
("LTX Video 0.9.7", "Lightricks/LTX-Video-0.9.7-distilled"),
|
| 22 |
+
("Hunyuan Video 1.5", "tencent/HunyuanVideo-1.5"),
|
| 23 |
+
("CogVideoX 5B", "THUDM/CogVideoX-5b"),
|
| 24 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
SCENE_COLUMNS = ["Scene", "Title", "Action", "Visuals", "Characters", "Duration (s)"]
|
| 27 |
+
CHARACTER_COLUMNS = ["ID", "Name", "Role", "Traits"]
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _ensure_storyboard(board: Storyboard | None) -> Storyboard:
|
| 31 |
+
if not board:
|
| 32 |
+
raise gr.Error("Create a storyboard first.")
|
| 33 |
+
return board
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
|
| 36 |
+
def _validate_inputs(idea: str | None, image_path: str | None):
|
| 37 |
+
if not idea and not image_path:
|
| 38 |
+
raise gr.Error("Provide either a story idea or upload a reference image.")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def handle_storyboard(
|
| 42 |
+
idea: str,
|
| 43 |
+
inspiration_image: str | None,
|
| 44 |
+
style: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
scene_count: int,
|
| 46 |
+
google_api_key: str,
|
| 47 |
+
) -> Tuple[str, List[List[str]], List[List[str]], Storyboard]:
|
| 48 |
+
_validate_inputs(idea, inspiration_image)
|
| 49 |
+
generator = StoryGenerator(api_key=google_api_key or None)
|
| 50 |
+
storyboard = generator.generate(
|
| 51 |
+
idea=idea,
|
| 52 |
+
style=style,
|
|
|
|
|
|
|
| 53 |
scene_count=scene_count,
|
| 54 |
+
inspiration_path=inspiration_image,
|
|
|
|
| 55 |
)
|
| 56 |
+
summary_md = f"### {storyboard.title}\n{storyboard.synopsis}"
|
| 57 |
+
scene_rows = storyboard.scenes_table()
|
| 58 |
+
character_rows = storyboard.characters_table()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
return (
|
| 60 |
+
summary_md,
|
| 61 |
+
[[row[col] for col in SCENE_COLUMNS] for row in scene_rows],
|
| 62 |
+
[[row[col] for col in CHARACTER_COLUMNS] for row in character_rows],
|
| 63 |
+
storyboard,
|
|
|
|
|
|
|
|
|
|
| 64 |
)
|
| 65 |
|
| 66 |
|
| 67 |
+
def handle_character_design(
|
| 68 |
+
storyboard: Storyboard | None,
|
| 69 |
+
google_api_key: str,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
):
|
| 71 |
+
board = _ensure_storyboard(storyboard)
|
| 72 |
+
designer = CharacterDesigner(api_key=google_api_key or None)
|
| 73 |
+
gallery, updated_board = designer.design(board)
|
| 74 |
+
if not gallery:
|
| 75 |
+
raise gr.Error("Failed to design characters.")
|
| 76 |
+
return gallery, updated_board
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def handle_video_render(
|
| 80 |
+
storyboard: Storyboard | None,
|
| 81 |
+
hf_token: str,
|
| 82 |
+
model_choice: str,
|
| 83 |
+
):
|
| 84 |
+
board = _ensure_storyboard(storyboard)
|
| 85 |
+
prioritized_models = [model_choice] + [
|
| 86 |
+
model for _, model in VIDEO_MODEL_CHOICES if model != model_choice
|
| 87 |
+
]
|
| 88 |
+
director = VideoDirector(token=hf_token or None, models=prioritized_models)
|
| 89 |
+
final_cut, logs = director.render(board)
|
| 90 |
+
log_md = "\n".join(f"- {line}" for line in logs)
|
| 91 |
+
return final_cut, log_md
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
css = """
|
| 95 |
+
#cinegen-app {
|
| 96 |
+
max-width: 1080px;
|
| 97 |
+
margin: 0 auto;
|
| 98 |
+
}
|
| 99 |
+
"""
|
| 100 |
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
with gr.Blocks(css=css, fill_height=True, theme=gr.themes.Soft(), elem_id="cinegen-app") as demo:
|
| 103 |
+
gr.Markdown(
|
| 104 |
+
"## 🎬 CineGen AI Director\n"
|
| 105 |
+
"Drop an idea or inspiration image and let CineGen produce a storyboard, character boards, "
|
| 106 |
+
"and a compiled short film using Hugging Face video models."
|
| 107 |
+
)
|
| 108 |
|
| 109 |
+
story_state = gr.State()
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
with gr.Row():
|
| 112 |
+
idea_box = gr.Textbox(
|
| 113 |
+
label="Movie Idea",
|
| 114 |
+
placeholder="E.g. A time loop love story set in a neon bazaar.",
|
| 115 |
+
lines=3,
|
| 116 |
)
|
| 117 |
+
inspiration = gr.Image(label="Reference Image (optional)", type="filepath")
|
| 118 |
|
| 119 |
+
with gr.Row():
|
| 120 |
+
style_dropdown = gr.Dropdown(
|
| 121 |
+
label="Visual Style",
|
| 122 |
+
choices=STYLE_CHOICES,
|
| 123 |
+
value=STYLE_CHOICES[0],
|
| 124 |
+
)
|
| 125 |
+
scene_slider = gr.Slider(
|
| 126 |
+
label="Scene Count",
|
| 127 |
+
minimum=3,
|
| 128 |
+
maximum=8,
|
| 129 |
+
value=4,
|
| 130 |
+
step=1,
|
| 131 |
+
)
|
| 132 |
+
video_model_dropdown = gr.Dropdown(
|
| 133 |
+
label="Preferred Video Model",
|
| 134 |
+
choices=[choice for choice, _ in VIDEO_MODEL_CHOICES],
|
| 135 |
+
value=VIDEO_MODEL_CHOICES[0][0],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
+
with gr.Accordion("API Keys", open=False):
|
| 139 |
+
google_key_input = gr.Textbox(
|
| 140 |
+
label="Google API Key (Gemini)",
|
| 141 |
+
type="password",
|
| 142 |
+
placeholder="Required for live Gemini calls. Leave blank to use offline stubs.",
|
| 143 |
+
value=os.environ.get("GOOGLE_API_KEY", ""),
|
| 144 |
+
)
|
| 145 |
+
hf_token_input = gr.Textbox(
|
| 146 |
+
label="Hugging Face Token",
|
| 147 |
+
type="password",
|
| 148 |
+
placeholder="Needed for Wan/LTX/Hunyuan video generation.",
|
| 149 |
+
value=os.environ.get("HF_TOKEN", ""),
|
|
|
|
| 150 |
)
|
| 151 |
|
| 152 |
+
storyboard_btn = gr.Button("Create Storyboard", variant="primary")
|
| 153 |
+
summary_md = gr.Markdown("Storyboard output will appear here.")
|
| 154 |
+
scenes_df = gr.Dataframe(headers=SCENE_COLUMNS, wrap=True)
|
| 155 |
+
characters_df = gr.Dataframe(headers=CHARACTER_COLUMNS, wrap=True)
|
| 156 |
|
| 157 |
+
storyboard_btn.click(
|
| 158 |
+
fn=handle_storyboard,
|
| 159 |
+
inputs=[idea_box, inspiration, style_dropdown, scene_slider, google_key_input],
|
| 160 |
+
outputs=[summary_md, scenes_df, characters_df, story_state],
|
| 161 |
+
)
|
| 162 |
|
| 163 |
+
with gr.Row():
|
| 164 |
+
design_btn = gr.Button("Design Characters", variant="secondary")
|
| 165 |
+
render_btn = gr.Button("Render Short Film", variant="primary")
|
| 166 |
+
|
| 167 |
+
gallery = gr.Gallery(label="Character References", columns=4, height=320)
|
| 168 |
+
render_logs = gr.Markdown(label="Render Log")
|
| 169 |
+
final_video = gr.Video(label="CineGen Short Film", interactive=False)
|
| 170 |
+
|
| 171 |
+
design_btn.click(
|
| 172 |
+
fn=handle_character_design,
|
| 173 |
+
inputs=[story_state, google_key_input],
|
| 174 |
+
outputs=[gallery, story_state],
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
def _model_value(label: str) -> str:
|
| 178 |
+
lookup = dict(VIDEO_MODEL_CHOICES)
|
| 179 |
+
return lookup.get(label, VIDEO_MODEL_CHOICES[0][1])
|
| 180 |
+
|
| 181 |
+
def render_wrapper(board, token, label):
|
| 182 |
+
return handle_video_render(board, token, _model_value(label))
|
| 183 |
+
|
| 184 |
+
render_btn.click(
|
| 185 |
+
fn=render_wrapper,
|
| 186 |
+
inputs=[story_state, hf_token_input, video_model_dropdown],
|
| 187 |
+
outputs=[final_video, render_logs],
|
| 188 |
)
|
| 189 |
+
|
| 190 |
+
if __name__ == "__main__":
|
| 191 |
+
demo.launch()
|
cinegen/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .models import Storyboard, SceneBeat, CharacterSpec
|
| 2 |
+
from .story_engine import StoryGenerator
|
| 3 |
+
from .character_engine import CharacterDesigner
|
| 4 |
+
from .video_engine import VideoDirector
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
"Storyboard",
|
| 8 |
+
"SceneBeat",
|
| 9 |
+
"CharacterSpec",
|
| 10 |
+
"StoryGenerator",
|
| 11 |
+
"CharacterDesigner",
|
| 12 |
+
"VideoDirector",
|
| 13 |
+
]
|
cinegen/character_engine.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from typing import List, Optional, Tuple
|
| 5 |
+
|
| 6 |
+
from .models import Storyboard
|
| 7 |
+
from .placeholders import synthesize_character_card
|
| 8 |
+
|
| 9 |
+
DEFAULT_IMAGE_MODEL = os.environ.get("CINEGEN_CHARACTER_MODEL", "gemini-2.5-flash-image")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _load_google_client(api_key: Optional[str]):
|
| 13 |
+
if not api_key:
|
| 14 |
+
return None
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
from google import genai
|
| 18 |
+
|
| 19 |
+
return genai.Client(api_key=api_key)
|
| 20 |
+
except Exception: # pragma: no cover - optional dependency
|
| 21 |
+
return None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class CharacterDesigner:
|
| 25 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 26 |
+
self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
|
| 27 |
+
self.client = _load_google_client(self.api_key)
|
| 28 |
+
|
| 29 |
+
def design(self, storyboard: Storyboard) -> Tuple[List[Tuple[str, str]], Storyboard]:
|
| 30 |
+
gallery: List[Tuple[str, str]] = []
|
| 31 |
+
for character in storyboard.characters:
|
| 32 |
+
image_path = None
|
| 33 |
+
if self.client:
|
| 34 |
+
image_path = self._try_generate(character, storyboard.style)
|
| 35 |
+
if not image_path:
|
| 36 |
+
image_path = synthesize_character_card(character, storyboard.style)
|
| 37 |
+
character.reference_image = image_path
|
| 38 |
+
caption = f"{character.name} — {character.role}"
|
| 39 |
+
gallery.append((image_path, caption))
|
| 40 |
+
return gallery, storyboard
|
| 41 |
+
|
| 42 |
+
def _try_generate(self, character, style: str) -> Optional[str]: # pragma: no cover
|
| 43 |
+
prompt = (
|
| 44 |
+
f"Create a portrait for {character.name}, a {character.role} in a {style} short film. "
|
| 45 |
+
f"Traits: {', '.join(character.traits)}. Description: {character.description}."
|
| 46 |
+
)
|
| 47 |
+
try:
|
| 48 |
+
response = self.client.models.generate_content(
|
| 49 |
+
model=DEFAULT_IMAGE_MODEL,
|
| 50 |
+
contents=[prompt],
|
| 51 |
+
)
|
| 52 |
+
for part in response.parts:
|
| 53 |
+
if getattr(part, "inline_data", None):
|
| 54 |
+
image = part.as_image()
|
| 55 |
+
tmp_dir = os.path.join("/tmp", "cinegen-characters")
|
| 56 |
+
os.makedirs(tmp_dir, exist_ok=True)
|
| 57 |
+
path = os.path.join(tmp_dir, f"{character.identifier.lower()}.png")
|
| 58 |
+
image.save(path)
|
| 59 |
+
return path
|
| 60 |
+
except Exception:
|
| 61 |
+
return None
|
| 62 |
+
return None
|
cinegen/models.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass
|
| 8 |
+
class CharacterSpec:
|
| 9 |
+
identifier: str
|
| 10 |
+
name: str
|
| 11 |
+
role: str
|
| 12 |
+
description: str
|
| 13 |
+
traits: List[str] = field(default_factory=list)
|
| 14 |
+
reference_image: Optional[str] = None
|
| 15 |
+
|
| 16 |
+
def to_row(self) -> dict:
|
| 17 |
+
traits = ", ".join(self.traits)
|
| 18 |
+
return {
|
| 19 |
+
"ID": self.identifier,
|
| 20 |
+
"Name": self.name,
|
| 21 |
+
"Role": self.role,
|
| 22 |
+
"Traits": traits or "—",
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class SceneBeat:
|
| 28 |
+
scene_id: str
|
| 29 |
+
title: str
|
| 30 |
+
visuals: str
|
| 31 |
+
action: str
|
| 32 |
+
characters: List[str] = field(default_factory=list)
|
| 33 |
+
duration: int = 6
|
| 34 |
+
mood: str = ""
|
| 35 |
+
camera: str = ""
|
| 36 |
+
|
| 37 |
+
def to_row(self) -> dict:
|
| 38 |
+
return {
|
| 39 |
+
"Scene": self.scene_id,
|
| 40 |
+
"Title": self.title,
|
| 41 |
+
"Action": self.action,
|
| 42 |
+
"Visuals": self.visuals,
|
| 43 |
+
"Characters": ", ".join(self.characters) or "—",
|
| 44 |
+
"Duration (s)": self.duration,
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@dataclass
|
| 49 |
+
class Storyboard:
|
| 50 |
+
title: str
|
| 51 |
+
synopsis: str
|
| 52 |
+
style: str
|
| 53 |
+
inspiration_hint: Optional[str]
|
| 54 |
+
characters: List[CharacterSpec] = field(default_factory=list)
|
| 55 |
+
scenes: List[SceneBeat] = field(default_factory=list)
|
| 56 |
+
|
| 57 |
+
def characters_table(self) -> List[dict]:
|
| 58 |
+
return [char.to_row() for char in self.characters]
|
| 59 |
+
|
| 60 |
+
def scenes_table(self) -> List[dict]:
|
| 61 |
+
return [scene.to_row() for scene in self.scenes]
|
cinegen/placeholders.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
import string
|
| 6 |
+
import tempfile
|
| 7 |
+
from typing import List
|
| 8 |
+
|
| 9 |
+
import imageio
|
| 10 |
+
import numpy as np
|
| 11 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 12 |
+
|
| 13 |
+
from .models import CharacterSpec, SceneBeat, Storyboard
|
| 14 |
+
|
| 15 |
+
SCENE_TITLES = [
|
| 16 |
+
"Opening Beat",
|
| 17 |
+
"Inciting Incident",
|
| 18 |
+
"Turning Point",
|
| 19 |
+
"Climactic Push",
|
| 20 |
+
"Final Shot",
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
CHARACTER_ARCHETYPES = [
|
| 24 |
+
("Lead", "Curious protagonist who drives the story."),
|
| 25 |
+
("Ally", "Supportive partner offering heart and humor."),
|
| 26 |
+
("Antagonist", "Force of tension that keeps the stakes high."),
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
PALETTE = [
|
| 30 |
+
(28, 35, 51),
|
| 31 |
+
(44, 106, 116),
|
| 32 |
+
(96, 108, 56),
|
| 33 |
+
(224, 142, 73),
|
| 34 |
+
(211, 86, 97),
|
| 35 |
+
(123, 74, 173),
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _slugify(text: str) -> str:
|
| 40 |
+
safe = "".join(ch for ch in text if ch.isalnum() or ch in (" ", "-")).strip()
|
| 41 |
+
safe = safe.replace(" ", "-")
|
| 42 |
+
safe = safe.lower()
|
| 43 |
+
return safe or "cinegen"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def build_stub_storyboard(
|
| 47 |
+
idea: str,
|
| 48 |
+
style: str,
|
| 49 |
+
scene_count: int,
|
| 50 |
+
inspiration_hint: str | None,
|
| 51 |
+
) -> Storyboard:
|
| 52 |
+
random.seed(_slugify(idea) + style + str(scene_count))
|
| 53 |
+
title = idea.title() if idea else f"{style} Short"
|
| 54 |
+
synopsis = (
|
| 55 |
+
f"A {style.lower()} short that transforms the idea '{idea or 'mystery cue'}' "
|
| 56 |
+
"into a compact cinematic arc."
|
| 57 |
+
)
|
| 58 |
+
characters: List[CharacterSpec] = []
|
| 59 |
+
for idx, (role, desc) in enumerate(CHARACTER_ARCHETYPES):
|
| 60 |
+
if idx >= 3 and scene_count <= 3:
|
| 61 |
+
break
|
| 62 |
+
identifier = f"CHAR-{idx+1}"
|
| 63 |
+
name = f"{role} {random.choice(string.ascii_uppercase)}"
|
| 64 |
+
traits = random.sample(
|
| 65 |
+
["brave", "witty", "restless", "tactical", "empathetic", "curious"], 2
|
| 66 |
+
)
|
| 67 |
+
characters.append(
|
| 68 |
+
CharacterSpec(
|
| 69 |
+
identifier=identifier,
|
| 70 |
+
name=name,
|
| 71 |
+
role=role,
|
| 72 |
+
description=desc,
|
| 73 |
+
traits=traits,
|
| 74 |
+
)
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
scenes: List[SceneBeat] = []
|
| 78 |
+
for idx in range(scene_count):
|
| 79 |
+
label = SCENE_TITLES[idx % len(SCENE_TITLES)]
|
| 80 |
+
scene_id = f"SCENE-{idx+1}"
|
| 81 |
+
visuals = (
|
| 82 |
+
f"{style} framing with {random.choice(['soft neon', 'moody shadows', 'bold silhouettes'])}."
|
| 83 |
+
)
|
| 84 |
+
action = f"{characters[0].name if characters else 'The hero'} faces {random.choice(['an unseen threat', 'a tough decision', 'their reflection'])}."
|
| 85 |
+
involved = [char.name for char in characters if random.random() > 0.3][:2] or [
|
| 86 |
+
characters[0].name if characters else "Narrator"
|
| 87 |
+
]
|
| 88 |
+
scenes.append(
|
| 89 |
+
SceneBeat(
|
| 90 |
+
scene_id=scene_id,
|
| 91 |
+
title=label,
|
| 92 |
+
visuals=visuals,
|
| 93 |
+
action=action,
|
| 94 |
+
characters=involved,
|
| 95 |
+
duration=6,
|
| 96 |
+
mood=random.choice(["hopeful", "tense", "whimsical"]),
|
| 97 |
+
camera=random.choice(["slow push", "steady wide", "handheld close-up"]),
|
| 98 |
+
)
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
appendix = (
|
| 102 |
+
f"Aim for motifs inspired by the uploaded reference: {inspiration_hint}."
|
| 103 |
+
if inspiration_hint
|
| 104 |
+
else ""
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
return Storyboard(
|
| 108 |
+
title=title,
|
| 109 |
+
synopsis=f"{synopsis} {appendix}".strip(),
|
| 110 |
+
style=style,
|
| 111 |
+
inspiration_hint=inspiration_hint,
|
| 112 |
+
characters=characters,
|
| 113 |
+
scenes=scenes,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def synthesize_character_card(character: CharacterSpec, style: str) -> str:
|
| 118 |
+
width, height = 640, 640
|
| 119 |
+
color = random.choice(PALETTE)
|
| 120 |
+
image = Image.new("RGB", (width, height), color=color)
|
| 121 |
+
draw = ImageDraw.Draw(image)
|
| 122 |
+
font = ImageFont.load_default()
|
| 123 |
+
text = f"{character.name}\n{character.role}\n{', '.join(character.traits)}"
|
| 124 |
+
draw.multiline_text((40, 80), text, fill=(255, 255, 255), font=font, spacing=6)
|
| 125 |
+
draw.text((40, height - 60), f"Style: {style}", fill=(255, 255, 255), font=font)
|
| 126 |
+
tmp_dir = tempfile.mkdtemp(prefix="cinegen-character-")
|
| 127 |
+
path = os.path.join(tmp_dir, f"{_slugify(character.name)}.png")
|
| 128 |
+
image.save(path, format="PNG")
|
| 129 |
+
return path
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def create_placeholder_video(scene: SceneBeat, style: str, seconds: int = 4) -> str:
|
| 133 |
+
fps = 6
|
| 134 |
+
frames = fps * seconds
|
| 135 |
+
width, height = 512, 512
|
| 136 |
+
tmp_dir = tempfile.mkdtemp(prefix="cinegen-scene-")
|
| 137 |
+
path = os.path.join(tmp_dir, f"{scene.scene_id.lower()}.mp4")
|
| 138 |
+
rng = np.random.default_rng(sum(ord(c) for c in scene.scene_id))
|
| 139 |
+
with imageio.get_writer(path, fps=fps) as writer:
|
| 140 |
+
for _ in range(frames):
|
| 141 |
+
base_color = rng.integers(60, 220, size=3, dtype=np.uint8)
|
| 142 |
+
frame = np.zeros((height, width, 3), dtype=np.uint8)
|
| 143 |
+
frame[:] = base_color
|
| 144 |
+
image = Image.fromarray(frame)
|
| 145 |
+
draw = ImageDraw.Draw(image)
|
| 146 |
+
font = ImageFont.load_default()
|
| 147 |
+
overlay = f"{scene.title}\n{scene.action[:60]}..."
|
| 148 |
+
draw.multiline_text((24, 24), overlay, fill=(255, 255, 255), font=font, spacing=4)
|
| 149 |
+
draw.text(
|
| 150 |
+
(24, height - 40),
|
| 151 |
+
f"{style} • {scene.characters[0] if scene.characters else 'Solo'}",
|
| 152 |
+
fill=(255, 255, 255),
|
| 153 |
+
font=font,
|
| 154 |
+
)
|
| 155 |
+
writer.append_data(np.array(image))
|
| 156 |
+
return path
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def describe_image_reference(image_path: str | None) -> str | None:
|
| 160 |
+
if not image_path or not os.path.exists(image_path):
|
| 161 |
+
return None
|
| 162 |
+
size = os.path.getsize(image_path)
|
| 163 |
+
return f"{os.path.basename(image_path)} ({round(size / 1024, 1)}KB)"
|
cinegen/story_engine.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from typing import Any, Dict, Optional
|
| 6 |
+
|
| 7 |
+
from .models import Storyboard, CharacterSpec, SceneBeat
|
| 8 |
+
from .placeholders import build_stub_storyboard, describe_image_reference
|
| 9 |
+
|
| 10 |
+
DEFAULT_STORY_MODEL = os.environ.get("CINEGEN_STORY_MODEL", "gemini-2.5-flash")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _load_google_client(api_key: Optional[str]):
|
| 14 |
+
if not api_key:
|
| 15 |
+
return None, "Missing API key"
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
from google import genai
|
| 19 |
+
|
| 20 |
+
client = genai.Client(api_key=api_key)
|
| 21 |
+
return client, None
|
| 22 |
+
except Exception as exc: # pragma: no cover - depends on optional deps
|
| 23 |
+
return None, str(exc)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class StoryGenerator:
|
| 27 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 28 |
+
self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
|
| 29 |
+
self.client, self.client_error = _load_google_client(self.api_key)
|
| 30 |
+
|
| 31 |
+
def generate(
|
| 32 |
+
self,
|
| 33 |
+
idea: str,
|
| 34 |
+
style: str,
|
| 35 |
+
scene_count: int,
|
| 36 |
+
inspiration_path: Optional[str] = None,
|
| 37 |
+
) -> Storyboard:
|
| 38 |
+
if not self.client:
|
| 39 |
+
return build_stub_storyboard(
|
| 40 |
+
idea=idea,
|
| 41 |
+
style=style,
|
| 42 |
+
scene_count=scene_count,
|
| 43 |
+
inspiration_hint=describe_image_reference(inspiration_path),
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
prompt = self._build_prompt(idea, style, scene_count)
|
| 47 |
+
contents = [prompt]
|
| 48 |
+
parts = self._maybe_add_image_part(inspiration_path)
|
| 49 |
+
contents = parts + contents if parts else contents
|
| 50 |
+
|
| 51 |
+
try: # pragma: no cover - relies on remote API
|
| 52 |
+
response = self.client.models.generate_content(
|
| 53 |
+
model=DEFAULT_STORY_MODEL,
|
| 54 |
+
contents=contents,
|
| 55 |
+
config={"response_mime_type": "application/json"},
|
| 56 |
+
)
|
| 57 |
+
payload = json.loads(response.text)
|
| 58 |
+
return self._parse_payload(
|
| 59 |
+
payload,
|
| 60 |
+
style=style,
|
| 61 |
+
inspiration_hint=describe_image_reference(inspiration_path),
|
| 62 |
+
)
|
| 63 |
+
except Exception:
|
| 64 |
+
return build_stub_storyboard(
|
| 65 |
+
idea=idea,
|
| 66 |
+
style=style,
|
| 67 |
+
scene_count=scene_count,
|
| 68 |
+
inspiration_hint=describe_image_reference(inspiration_path),
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
@staticmethod
|
| 72 |
+
def _build_prompt(idea: str, style: str, scene_count: int) -> str:
|
| 73 |
+
return (
|
| 74 |
+
"You are CineGen, an AI film director. Convert the provided idea into a "
|
| 75 |
+
"structured storyboard JSON with the following keys:\n"
|
| 76 |
+
"{\n"
|
| 77 |
+
' "title": str,\n'
|
| 78 |
+
' "synopsis": str,\n'
|
| 79 |
+
' "characters": [\n'
|
| 80 |
+
' {"id": "CHAR-1", "name": str, "role": str, "description": str, "traits": [str, ...]}\n'
|
| 81 |
+
" ],\n"
|
| 82 |
+
' "scenes": [\n'
|
| 83 |
+
' {"id": "SCENE-1", "title": str, "visuals": str, "action": str, "characters": [str], "duration": int, "mood": str, "camera": str}\n'
|
| 84 |
+
" ]\n"
|
| 85 |
+
"}\n"
|
| 86 |
+
f"Idea: {idea or 'Use the inspiration image only.'}\n"
|
| 87 |
+
f"Visual Style: {style}\n"
|
| 88 |
+
f"Scene Count: {scene_count}\n"
|
| 89 |
+
"Ensure every scene references at least one character ID."
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
def _maybe_add_image_part(self, inspiration_path: Optional[str]):
|
| 93 |
+
if not inspiration_path or not os.path.exists(inspiration_path):
|
| 94 |
+
return None
|
| 95 |
+
try:
|
| 96 |
+
from google.genai import types # pragma: no cover - optional dependency
|
| 97 |
+
|
| 98 |
+
with open(inspiration_path, "rb") as handle:
|
| 99 |
+
data = handle.read()
|
| 100 |
+
mime = "image/png" if inspiration_path.endswith(".png") else "image/jpeg"
|
| 101 |
+
return [types.Part.from_bytes(data=data, mime_type=mime)]
|
| 102 |
+
except Exception:
|
| 103 |
+
return None
|
| 104 |
+
|
| 105 |
+
@staticmethod
|
| 106 |
+
def _parse_payload(
|
| 107 |
+
payload: Dict[str, Any],
|
| 108 |
+
style: str,
|
| 109 |
+
inspiration_hint: Optional[str],
|
| 110 |
+
) -> Storyboard:
|
| 111 |
+
characters = [
|
| 112 |
+
CharacterSpec(
|
| 113 |
+
identifier=item.get("id", f"CHAR-{idx+1}"),
|
| 114 |
+
name=item.get("name", f"Character {idx+1}"),
|
| 115 |
+
role=item.get("role", "Supporting"),
|
| 116 |
+
description=item.get("description", ""),
|
| 117 |
+
traits=item.get("traits", []),
|
| 118 |
+
)
|
| 119 |
+
for idx, item in enumerate(payload.get("characters", []))
|
| 120 |
+
]
|
| 121 |
+
scenes = [
|
| 122 |
+
SceneBeat(
|
| 123 |
+
scene_id=item.get("id", f"SCENE-{idx+1}"),
|
| 124 |
+
title=item.get("title", f"Scene {idx+1}"),
|
| 125 |
+
visuals=item.get("visuals", ""),
|
| 126 |
+
action=item.get("action", ""),
|
| 127 |
+
characters=item.get("characters", []),
|
| 128 |
+
duration=int(item.get("duration", 6)),
|
| 129 |
+
mood=item.get("mood", ""),
|
| 130 |
+
camera=item.get("camera", ""),
|
| 131 |
+
)
|
| 132 |
+
for idx, item in enumerate(payload.get("scenes", []))
|
| 133 |
+
]
|
| 134 |
+
if not characters or not scenes:
|
| 135 |
+
raise ValueError("Incomplete payload")
|
| 136 |
+
return Storyboard(
|
| 137 |
+
title=payload.get("title", "Untitled Short"),
|
| 138 |
+
synopsis=payload.get("synopsis", ""),
|
| 139 |
+
style=style,
|
| 140 |
+
inspiration_hint=inspiration_hint,
|
| 141 |
+
characters=characters,
|
| 142 |
+
scenes=scenes,
|
| 143 |
+
)
|
cinegen/video_engine.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import tempfile
|
| 5 |
+
import time
|
| 6 |
+
from typing import List, Optional, Sequence, Tuple
|
| 7 |
+
|
| 8 |
+
import requests
|
| 9 |
+
|
| 10 |
+
from .models import SceneBeat, Storyboard
|
| 11 |
+
from .placeholders import create_placeholder_video
|
| 12 |
+
|
| 13 |
+
DEFAULT_VIDEO_MODELS = [
|
| 14 |
+
"Wan-AI/Wan2.1-T2V-14B",
|
| 15 |
+
"Lightricks/LTX-Video-0.9.7-distilled",
|
| 16 |
+
"tencent/HunyuanVideo-1.5",
|
| 17 |
+
"THUDM/CogVideoX-5b",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class VideoDirector:
|
| 22 |
+
def __init__(
|
| 23 |
+
self,
|
| 24 |
+
token: Optional[str] = None,
|
| 25 |
+
models: Optional[Sequence[str]] = None,
|
| 26 |
+
):
|
| 27 |
+
env_token = (
|
| 28 |
+
token
|
| 29 |
+
or os.environ.get("HF_TOKEN")
|
| 30 |
+
or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
| 31 |
+
or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
| 32 |
+
)
|
| 33 |
+
self.token = env_token
|
| 34 |
+
self.models = list(models or DEFAULT_VIDEO_MODELS)
|
| 35 |
+
|
| 36 |
+
def render(self, storyboard: Storyboard) -> Tuple[str, List[str]]:
|
| 37 |
+
logs: List[str] = []
|
| 38 |
+
clip_paths: List[str] = []
|
| 39 |
+
for scene in storyboard.scenes:
|
| 40 |
+
video = self._produce_scene(storyboard, scene, logs)
|
| 41 |
+
clip_paths.append(video)
|
| 42 |
+
final_cut = self._merge_clips(clip_paths, logs)
|
| 43 |
+
return final_cut, logs
|
| 44 |
+
|
| 45 |
+
def _produce_scene(self, storyboard: Storyboard, scene: SceneBeat, logs: List[str]) -> str:
|
| 46 |
+
composed_prompt = self._compose_prompt(storyboard, scene)
|
| 47 |
+
if self.token:
|
| 48 |
+
for model in self.models:
|
| 49 |
+
try:
|
| 50 |
+
clip = self._call_hf_inference(composed_prompt, model, scene.duration)
|
| 51 |
+
logs.append(f"Scene {scene.scene_id}: generated via {model}")
|
| 52 |
+
return clip
|
| 53 |
+
except Exception as exc:
|
| 54 |
+
logs.append(f"Scene {scene.scene_id}: {model} failed ({exc})")
|
| 55 |
+
clip = create_placeholder_video(scene, storyboard.style)
|
| 56 |
+
logs.append(f"Scene {scene.scene_id}: fallback placeholder clip used.")
|
| 57 |
+
return clip
|
| 58 |
+
|
| 59 |
+
def _call_hf_inference(self, prompt: str, model_id: str, duration: int) -> str:
|
| 60 |
+
if not self.token:
|
| 61 |
+
raise RuntimeError("Missing Hugging Face token")
|
| 62 |
+
|
| 63 |
+
url = f"https://api-inference.huggingface.co/models/{model_id}"
|
| 64 |
+
headers = {
|
| 65 |
+
"Authorization": f"Bearer {self.token}",
|
| 66 |
+
"Accept": "video/mp4",
|
| 67 |
+
}
|
| 68 |
+
payload = {
|
| 69 |
+
"inputs": prompt,
|
| 70 |
+
"parameters": {"duration": duration},
|
| 71 |
+
}
|
| 72 |
+
for _ in range(3):
|
| 73 |
+
response = requests.post(url, headers=headers, json=payload, timeout=600)
|
| 74 |
+
if response.status_code == 200:
|
| 75 |
+
tmp_dir = tempfile.mkdtemp(prefix="cinegen-video-")
|
| 76 |
+
path = os.path.join(tmp_dir, f"{model_id.split('/')[-1]}.mp4")
|
| 77 |
+
with open(path, "wb") as handle:
|
| 78 |
+
handle.write(response.content)
|
| 79 |
+
return path
|
| 80 |
+
if response.status_code in (503, 529, 202):
|
| 81 |
+
time.sleep(5)
|
| 82 |
+
continue
|
| 83 |
+
raise RuntimeError(f"{response.status_code}: {response.text[:120]}")
|
| 84 |
+
raise RuntimeError("Model busy")
|
| 85 |
+
|
| 86 |
+
@staticmethod
|
| 87 |
+
def _compose_prompt(storyboard: Storyboard, scene: SceneBeat) -> str:
|
| 88 |
+
characters = "; ".join(scene.characters)
|
| 89 |
+
return (
|
| 90 |
+
f"Title: {storyboard.title}. Style: {storyboard.style}. "
|
| 91 |
+
f"Scene {scene.scene_id} - {scene.title}: {scene.action} "
|
| 92 |
+
f"Visual cues: {scene.visuals}. Mood: {scene.mood}. "
|
| 93 |
+
f"Camera: {scene.camera}. Characters: {characters or 'solo sequence'}."
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
def _merge_clips(self, clip_paths: Sequence[str], logs: List[str]) -> str:
|
| 97 |
+
try:
|
| 98 |
+
from moviepy.editor import VideoFileClip, concatenate_videoclips # type: ignore
|
| 99 |
+
except Exception as exc:
|
| 100 |
+
logs.append(f"MoviePy unavailable ({exc}); returning first clip only.")
|
| 101 |
+
return clip_paths[0]
|
| 102 |
+
|
| 103 |
+
clips = []
|
| 104 |
+
for path in clip_paths:
|
| 105 |
+
try:
|
| 106 |
+
clip = VideoFileClip(path)
|
| 107 |
+
clips.append(clip)
|
| 108 |
+
except Exception as exc:
|
| 109 |
+
logs.append(f"Failed to read clip {path}: {exc}")
|
| 110 |
+
if not clips:
|
| 111 |
+
raise RuntimeError("No clips to merge")
|
| 112 |
+
final = concatenate_videoclips(clips, method="compose")
|
| 113 |
+
tmp_dir = tempfile.mkdtemp(prefix="cinegen-final-")
|
| 114 |
+
final_path = os.path.join(tmp_dir, "cinegen_short.mp4")
|
| 115 |
+
final.write_videofile(final_path, fps=clips[0].fps, codec="libx264", audio=False, verbose=False, logger=None)
|
| 116 |
+
for clip in clips:
|
| 117 |
+
clip.close()
|
| 118 |
+
logs.append(f"Merged {len(clips)} clips into final cut.")
|
| 119 |
+
return final_path
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ huggingface-hub>=0.26.0
|
|
| 5 |
pillow>=10.2.0
|
| 6 |
numpy>=1.24.0
|
| 7 |
requests>=2.31.0
|
| 8 |
-
|
|
|
|
|
|
| 5 |
pillow>=10.2.0
|
| 6 |
numpy>=1.24.0
|
| 7 |
requests>=2.31.0
|
| 8 |
+
imageio>=2.34
|
| 9 |
+
moviepy>=1.0.3
|