VirtualOasis commited on
Commit
70b0d1a
·
1 Parent(s): 934e5c4
app.py CHANGED
@@ -1,653 +1,191 @@
1
- import json
 
2
  import os
3
- import tempfile
4
- import textwrap
5
- import time
6
- from dataclasses import dataclass, field, asdict
7
- from pathlib import Path
8
- from typing import Any, Dict, List, Optional, Tuple
9
 
10
  import gradio as gr
11
- import requests
12
- try:
13
- from google import genai
14
- except ImportError: # pragma: no cover - dependency is optional at import time
15
- genai = None
16
-
17
-
18
- # -----------------------------
19
- # Domain data representations
20
- # -----------------------------
21
-
22
-
23
- @dataclass
24
- class CharacterProfile:
25
- character_id: str
26
- name: str
27
- description: str
28
- visual_tags: List[str] = field(default_factory=list)
29
- image_path: Optional[str] = None
30
-
31
-
32
- @dataclass
33
- class ScenePlan:
34
- scene_id: str
35
- title: str
36
- summary: str
37
- visual_prompt: str
38
- characters: List[str] = field(default_factory=list)
39
-
40
-
41
- @dataclass
42
- class StoryboardPlan:
43
- title: str
44
- logline: str
45
- style: str
46
- runtime_hint: str
47
- tone: str
48
- characters: List[CharacterProfile]
49
- scenes: List[ScenePlan]
50
-
51
- def to_dict(self) -> Dict[str, Any]:
52
- return {
53
- "title": self.title,
54
- "logline": self.logline,
55
- "style": self.style,
56
- "runtime_hint": self.runtime_hint,
57
- "tone": self.tone,
58
- "characters": [asdict(c) for c in self.characters],
59
- "scenes": [asdict(s) for s in self.scenes],
60
- }
61
-
62
-
63
- # -----------------------------
64
- # Helper utilities
65
- # -----------------------------
66
-
67
-
68
- def resolve_token(user_supplied: str, env_key: str) -> Optional[str]:
69
- candidate = (user_supplied or "").strip()
70
- if candidate:
71
- return candidate
72
- env_candidate = (os.getenv(env_key) or "").strip()
73
- return env_candidate or None
74
-
75
-
76
- def extract_json_block(text: str) -> str:
77
- """Return the first JSON object found inside the text."""
78
- stack = []
79
- start_index = None
80
- for index, char in enumerate(text):
81
- if char == "{":
82
- if not stack:
83
- start_index = index
84
- stack.append(char)
85
- elif char == "}" and stack:
86
- stack.pop()
87
- if not stack and start_index is not None:
88
- return text[start_index : index + 1]
89
- return text
90
-
91
-
92
- def format_character_markdown(characters: List[CharacterProfile]) -> str:
93
- lines = []
94
- for character in characters:
95
- tags = ", ".join(character.visual_tags) if character.visual_tags else "n/a"
96
- lines.append(f"- **{character.name}** ({character.character_id}): {character.description} \n Visual tags: {tags}")
97
- return "\n".join(lines) if lines else "No characters were generated yet."
98
-
99
-
100
- def ensure_module_available(module_ref, friendly_name: str) -> None:
101
- if module_ref is None:
102
- raise gr.Error(
103
- f"{friendly_name} is not installed. Install it via `pip install google-genai` and try again."
104
- )
105
 
 
 
106
 
107
- # -----------------------------
108
- # Gemini services
109
- # -----------------------------
110
-
111
-
112
- class GeminiService:
113
- def __init__(
114
- self,
115
- api_key: str,
116
- story_model: str = "gemini-2.5-flash",
117
- image_model: str = "gemini-2.5-flash-image",
118
- ) -> None:
119
- ensure_module_available(genai, "google-genai")
120
- if not api_key:
121
- raise gr.Error("Google API key is required.")
122
- self.client = genai.Client(api_key=api_key)
123
- self.story_model = story_model
124
- self.image_model = image_model
125
-
126
- def generate_storyboard(
127
- self,
128
- movie_idea: str,
129
- visual_style: str,
130
- scene_count: int,
131
- runtime_hint: str,
132
- tone: str,
133
- ) -> StoryboardPlan:
134
- prompt = textwrap.dedent(
135
- f"""
136
- You are CineGen, an AI creative director. Given the following idea, craft a production-ready storyboard.
137
-
138
- Idea: {movie_idea}
139
- Target visual style: {visual_style}
140
- Desired runtime: {runtime_hint}
141
- Tone keywords: {tone}
142
- Scene count: exactly {scene_count}
143
-
144
- Respond with valid JSON using this schema:
145
- {{
146
- "title": "...",
147
- "logline": "...",
148
- "style": "...",
149
- "runtime_hint": "...",
150
- "tone": "...",
151
- "characters": [
152
- {{"id": "char_1", "name": "...", "description": "...", "visual_tags": ["tag1", "tag2"]}}
153
- ],
154
- "scenes": [
155
- {{
156
- "id": "scene_1",
157
- "title": "...",
158
- "summary": "...",
159
- "visual_prompt": "...",
160
- "characters": ["char_1", "char_2"]
161
- }}
162
- ]
163
- }}
164
-
165
- Ensure each scene references character IDs from the characters array and highlight cinematic camera or lighting cues inside "visual_prompt".
166
- """
167
- ).strip()
168
-
169
- response = self.client.models.generate_content(
170
- model=self.story_model,
171
- contents=prompt,
172
- )
173
- raw_text = getattr(response, "text", None) or "".join(
174
- [getattr(part, "text", "") for part in getattr(response, "parts", [])]
175
- )
176
- if not raw_text:
177
- raise gr.Error("Gemini did not return any content for the storyboard.")
178
-
179
- serialized = extract_json_block(raw_text)
180
- payload = json.loads(serialized)
181
-
182
- characters = [
183
- CharacterProfile(
184
- character_id=entry.get("id", f"char_{idx+1}"),
185
- name=entry.get("name", f"Character {idx+1}"),
186
- description=entry.get("description", ""),
187
- visual_tags=entry.get("visual_tags") or [],
188
- )
189
- for idx, entry in enumerate(payload.get("characters", []))
190
- ]
191
-
192
- scenes = [
193
- ScenePlan(
194
- scene_id=scene.get("id", f"scene_{idx+1}"),
195
- title=scene.get("title", f"Scene {idx+1}"),
196
- summary=scene.get("summary", ""),
197
- visual_prompt=scene.get("visual_prompt", ""),
198
- characters=scene.get("characters") or [],
199
- )
200
- for idx, scene in enumerate(payload.get("scenes", []))
201
- ]
202
-
203
- if len(scenes) != scene_count:
204
- # Keep UX predictable even if the model under-delivers on scene count.
205
- scenes = scenes[:scene_count]
206
-
207
- return StoryboardPlan(
208
- title=payload.get("title", "Untitled"),
209
- logline=payload.get("logline", ""),
210
- style=payload.get("style", visual_style),
211
- runtime_hint=payload.get("runtime_hint", runtime_hint),
212
- tone=payload.get("tone", tone),
213
- characters=characters,
214
- scenes=scenes,
215
- )
216
 
217
- def generate_character_images(
218
- self,
219
- characters: List[CharacterProfile],
220
- visual_style: str,
221
- max_characters: int = 4,
222
- ) -> List[CharacterProfile]:
223
- rendered: List[CharacterProfile] = []
224
- for character in characters[:max_characters]:
225
- prompt = textwrap.dedent(
226
- f"""
227
- Create a front-facing character reference portrait for use in a video production pipeline.
228
- Character: {character.name}
229
- Description: {character.description}
230
- Visual tags: {", ".join(character.visual_tags) if character.visual_tags else "n/a"}
231
- Style: {visual_style}
232
-
233
- Output a single cohesive concept art image.
234
- """
235
- ).strip()
236
- response = self.client.models.generate_content(
237
- model=self.image_model,
238
- contents=prompt,
239
- )
240
- image_path = None
241
- for part in getattr(response, "parts", []):
242
- if getattr(part, "inline_data", None):
243
- image = part.as_image()
244
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
245
- image.save(tmp.name)
246
- image_path = tmp.name
247
- break
248
- enriched = CharacterProfile(
249
- character_id=character.character_id,
250
- name=character.name,
251
- description=character.description,
252
- visual_tags=character.visual_tags,
253
- image_path=image_path,
254
- )
255
- rendered.append(enriched)
256
- return rendered
257
-
258
-
259
- # -----------------------------
260
- # Hugging Face video service
261
- # -----------------------------
262
-
263
-
264
- class HuggingFaceVideoService:
265
- MODEL_FALLBACK = [
266
- "Wan-AI/Wan2.1-T2V-14B",
267
- "Lightricks/LTX-Video-0.9.7-distilled",
268
- "tencent/HunyuanVideo-1.5",
269
- "THUDM/CogVideoX-5b",
270
- ]
271
 
272
- def __init__(self, token: str):
273
- if not token:
274
- raise gr.Error("Hugging Face token is required for video generation.")
275
- self.token = token
276
- self.session = requests.Session()
277
-
278
- def generate(
279
- self,
280
- prompt: str,
281
- preferred_model: Optional[str],
282
- negative_prompt: str,
283
- duration_seconds: float,
284
- fps: int,
285
- seed: Optional[int],
286
- ) -> Tuple[str, str]:
287
- ordered_models = self._order_models(preferred_model)
288
- last_error = ""
289
- for model in ordered_models:
290
- try:
291
- video_path = self._invoke_model(
292
- model=model,
293
- prompt=prompt,
294
- negative_prompt=negative_prompt,
295
- duration_seconds=duration_seconds,
296
- fps=fps,
297
- seed=seed,
298
- )
299
- return model, video_path
300
- except Exception as exc: # pragma: no cover - defensive fallback
301
- last_error = str(exc)
302
- time.sleep(1.5)
303
- raise gr.Error(f"All video backends failed. Last error: {last_error}")
304
-
305
- def _order_models(self, preferred_model: Optional[str]) -> List[str]:
306
- models = list(self.MODEL_FALLBACK)
307
- if preferred_model and preferred_model in models:
308
- models.remove(preferred_model)
309
- models.insert(0, preferred_model)
310
- elif preferred_model:
311
- models.insert(0, preferred_model)
312
- return models
313
-
314
- def _invoke_model(
315
- self,
316
- model: str,
317
- prompt: str,
318
- negative_prompt: str,
319
- duration_seconds: float,
320
- fps: int,
321
- seed: Optional[int],
322
- ) -> str:
323
- url = f"https://api-inference.huggingface.co/models/{model}"
324
- headers = {
325
- "Authorization": f"Bearer {self.token}",
326
- "Accept": "video/mp4",
327
- }
328
- payload = {
329
- "inputs": prompt,
330
- "parameters": {
331
- "negative_prompt": negative_prompt,
332
- "num_frames": int(duration_seconds * fps),
333
- "fps": fps,
334
- "seed": seed,
335
- "guidance_scale": 7.5,
336
- },
337
- "options": {"use_cache": True, "wait_for_model": True},
338
- }
339
-
340
- response = self.session.post(
341
- url,
342
- headers=headers,
343
- json=payload,
344
- timeout=600,
345
- )
346
 
347
- if response.status_code == 200:
348
- return self._write_video(response.content)
349
- if response.status_code in {503, 504, 524}:
350
- raise RuntimeError(f"{model} is warming up or busy (status {response.status_code}).")
351
-
352
- try:
353
- message = response.json()
354
- except Exception:
355
- message = response.text
356
- raise RuntimeError(f"{model} failed: {message}")
357
-
358
- @staticmethod
359
- def _write_video(content: bytes) -> str:
360
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as handle:
361
- handle.write(content)
362
- return handle.name
363
-
364
-
365
- # -----------------------------
366
- # CineGen pipeline orchestration
367
- # -----------------------------
368
-
369
-
370
- def build_scene_prompt(scene: ScenePlan, storyboard: StoryboardPlan) -> str:
371
- character_blurbs = []
372
- pool = {c.character_id: c for c in storyboard.characters}
373
- for actor_id in scene.characters:
374
- profile = pool.get(actor_id)
375
- if profile:
376
- tags = ", ".join(profile.visual_tags) if profile.visual_tags else ""
377
- character_blurbs.append(f"{profile.name}: {profile.description} {tags}".strip())
378
-
379
- character_block = "\n".join(character_blurbs) if character_blurbs else "Original characters only."
380
- return textwrap.dedent(
381
- f"""
382
- Title: {storyboard.title}
383
- Logline: {storyboard.logline}
384
- Scene: {scene.title} ({scene.scene_id})
385
- Narrative summary: {scene.summary}
386
- Visual prompt: {scene.visual_prompt}
387
- Visual style: {storyboard.style}
388
- Tone: {storyboard.tone}
389
- Characters:\n{character_block}
390
- """
391
- ).strip()
392
-
393
-
394
- # -----------------------------
395
- # Gradio callbacks
396
- # -----------------------------
397
-
398
-
399
- def storyboard_callback(
400
- movie_idea: str,
401
- visual_style: str,
402
- runtime_hint: str,
403
- tone: str,
404
  scene_count: int,
405
- google_api_key_input: str,
406
- ):
407
- api_key = resolve_token(google_api_key_input, "GOOGLE_API_KEY")
408
- if not movie_idea:
409
- raise gr.Error("Please describe your movie idea first.")
410
- storyboard_service = GeminiService(api_key=api_key)
411
- storyboard = storyboard_service.generate_storyboard(
412
- movie_idea=movie_idea,
413
- visual_style=visual_style,
414
  scene_count=scene_count,
415
- runtime_hint=runtime_hint,
416
- tone=tone,
417
  )
418
- characters_with_images = storyboard_service.generate_character_images(storyboard.characters, visual_style)
419
-
420
- storyboard_dict = storyboard.to_dict()
421
- character_markdown = format_character_markdown(characters_with_images)
422
- gallery_entries = [
423
- (profile.image_path, f"{profile.name} ({profile.character_id})")
424
- for profile in characters_with_images
425
- if profile.image_path
426
- ]
427
- scene_choices = [f"{scene.scene_id}: {scene.title}" for scene in storyboard.scenes]
428
-
429
- status_message = f"Storyboard ready: {storyboard.title} with {len(storyboard.scenes)} scenes."
430
  return (
431
- status_message,
432
- storyboard_dict,
433
- character_markdown,
434
- gallery_entries,
435
- storyboard_dict,
436
- [asdict(profile) for profile in characters_with_images],
437
- gr.Dropdown.update(choices=scene_choices, value=scene_choices[0] if scene_choices else None),
438
  )
439
 
440
 
441
- def generate_video_callback(
442
- scene_choice: str,
443
- storyboard_state: Dict[str, Any],
444
- hf_token_input: str,
445
- preferred_model: str,
446
- negative_prompt: str,
447
- duration_seconds: float,
448
- fps: int,
449
- seed: int,
450
  ):
451
- if not storyboard_state:
452
- raise gr.Error("Generate a storyboard first.")
453
- hf_token = resolve_token(hf_token_input, "HF_TOKEN")
454
- if not hf_token:
455
- raise gr.Error("Provide a Hugging Face token to render video.")
456
-
457
- scenes = storyboard_state.get("scenes", [])
458
- characters = storyboard_state.get("characters", [])
459
- if not scenes:
460
- raise gr.Error("Storyboard has no scenes to render.")
461
- scene_id = (scene_choice or "").split(":")[0]
462
- scene_payload = next((scene for scene in scenes if scene["scene_id"] == scene_id or scene["scene_id"] == scene_choice), None)
463
- if not scene_payload:
464
- scene_payload = scenes[0]
465
-
466
- storyboard = StoryboardPlan(
467
- title=storyboard_state.get("title", ""),
468
- logline=storyboard_state.get("logline", ""),
469
- style=storyboard_state.get("style", ""),
470
- runtime_hint=storyboard_state.get("runtime_hint", ""),
471
- tone=storyboard_state.get("tone", ""),
472
- characters=[
473
- CharacterProfile(
474
- character_id=entry.get("character_id") or entry.get("id"),
475
- name=entry.get("name", ""),
476
- description=entry.get("description", ""),
477
- visual_tags=entry.get("visual_tags") or [],
478
- image_path=entry.get("image_path"),
479
- )
480
- for entry in characters
481
- ],
482
- scenes=[
483
- ScenePlan(
484
- scene_id=scene["scene_id"],
485
- title=scene["title"],
486
- summary=scene["summary"],
487
- visual_prompt=scene["visual_prompt"],
488
- characters=scene.get("characters") or [],
489
- )
490
- for scene in scenes
491
- ],
492
- )
493
-
494
- target_scene = next((scene for scene in storyboard.scenes if scene.scene_id == scene_payload["scene_id"]), storyboard.scenes[0])
495
- prompt = build_scene_prompt(target_scene, storyboard)
496
-
497
- video_service = HuggingFaceVideoService(token=hf_token)
498
- model_used, video_path = video_service.generate(
499
- prompt=prompt,
500
- preferred_model=preferred_model or None,
501
- negative_prompt=negative_prompt,
502
- duration_seconds=duration_seconds,
503
- fps=fps,
504
- seed=seed if seed >= 0 else None,
505
- )
506
-
507
- metadata = {
508
- "model": model_used,
509
- "scene": target_scene.scene_id,
510
- "prompt": prompt,
511
- "negative_prompt": negative_prompt,
512
- "duration_seconds": duration_seconds,
513
- "fps": fps,
514
- }
515
 
516
- status_message = f"Rendered scene {target_scene.scene_id} via {model_used}."
517
- return status_message, video_path, metadata
518
 
 
 
 
 
 
 
519
 
520
- # -----------------------------
521
- # Gradio interface
522
- # -----------------------------
523
 
 
 
 
 
 
 
524
 
525
- def build_interface() -> gr.Blocks:
526
- default_hf = os.getenv("HF_TOKEN", "")
527
- default_google = os.getenv("GOOGLE_API_KEY", "")
528
 
529
- with gr.Blocks() as demo:
530
- gr.Markdown("# CineGen AI Director")
531
- gr.Markdown(
532
- "Transform a simple idea into a storyboard, character deck, and video shots. "
533
- "Tokens can be loaded from the environment for local debugging; in production the fields must be filled manually."
534
  )
 
535
 
536
- with gr.Row():
537
- with gr.Column():
538
- gr.Markdown("### Credentials")
539
- google_api_key_input = gr.Textbox(
540
- label="Google API Key",
541
- value=default_google,
542
- type="password",
543
- placeholder="GOOGLE_API_KEY",
544
- )
545
- hf_token_input = gr.Textbox(
546
- label="Hugging Face Token",
547
- value=default_hf,
548
- type="password",
549
- placeholder="hf_xxx",
550
- )
551
-
552
- gr.Markdown("### Story Settings")
553
- movie_idea = gr.Textbox(
554
- label="Movie Idea",
555
- value="A lone robot gardener trying to revive a neon-drenched city park.",
556
- lines=4,
557
- )
558
- visual_style = gr.Dropdown(
559
- label="Visual Style",
560
- choices=["Cinematic Realism", "American Cartoon", "Anime Noir", "Cyberpunk", "Claymation"],
561
- value="Cinematic Realism",
562
- )
563
- runtime_hint = gr.Dropdown(
564
- label="Runtime Target",
565
- choices=["30 seconds", "45 seconds", "60 seconds"],
566
- value="45 seconds",
567
- )
568
- tone = gr.Textbox(
569
- label="Tone keywords",
570
- value="hopeful, dynamic camera, sweeping synth score",
571
- )
572
- scene_count = gr.Slider(
573
- label="Scene Count",
574
- minimum=3,
575
- maximum=8,
576
- value=4,
577
- step=1,
578
- )
579
- generate_storyboard_btn = gr.Button("Generate Storyboard", variant="primary")
580
-
581
- with gr.Column():
582
- status_box = gr.Markdown("Status: awaiting input.")
583
- storyboard_json = gr.JSON(label="Storyboard JSON")
584
- character_markdown = gr.Markdown(label="Character Profiles")
585
- character_gallery = gr.Gallery(label="Character Anchors", columns=2, rows=2, height="auto")
586
-
587
- with gr.Tab("Scene Rendering"):
588
- scene_choice = gr.Dropdown(label="Scene", choices=[])
589
- preferred_model = gr.Dropdown(
590
- label="Preferred Video Model",
591
- choices=HuggingFaceVideoService.MODEL_FALLBACK,
592
- value=HuggingFaceVideoService.MODEL_FALLBACK[0],
593
- )
594
- negative_prompt = gr.Textbox(
595
- label="Negative Prompt",
596
- value="low resolution, flicker, watermark, distorted faces",
597
- )
598
- duration_seconds = gr.Slider(label="Duration (s)", minimum=1.0, maximum=4.0, value=2.0, step=0.5)
599
- fps = gr.Slider(label="FPS", minimum=12, maximum=24, value=24, step=1)
600
- seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
601
- generate_video_btn = gr.Button("Render Selected Scene", variant="primary")
602
- video_status = gr.Markdown("Video renderer idle.")
603
- video_output = gr.Video(label="Generated Clip")
604
- video_metadata = gr.JSON(label="Render Metadata")
605
-
606
- storyboard_state = gr.State({})
607
- character_state = gr.State([])
608
-
609
- generate_storyboard_btn.click(
610
- storyboard_callback,
611
- inputs=[movie_idea, visual_style, runtime_hint, tone, scene_count, google_api_key_input],
612
- outputs=[
613
- status_box,
614
- storyboard_json,
615
- character_markdown,
616
- character_gallery,
617
- storyboard_state,
618
- character_state,
619
- scene_choice,
620
- ],
621
  )
622
 
623
- generate_video_btn.click(
624
- generate_video_callback,
625
- inputs=[
626
- scene_choice,
627
- storyboard_state,
628
- hf_token_input,
629
- preferred_model,
630
- negative_prompt,
631
- duration_seconds,
632
- fps,
633
- seed,
634
- ],
635
- outputs=[video_status, video_output, video_metadata],
636
  )
637
 
638
- return demo
 
 
 
639
 
 
 
 
 
 
640
 
641
- if __name__ == "__main__":
642
- interface = build_interface()
643
- server_name = os.getenv("GRADIO_SERVER_HOST") or "0.0.0.0"
644
- server_port = int(os.getenv("GRADIO_SERVER_PORT") or os.getenv("SERVER_PORT") or "7860")
645
- interface.launch(
646
- server_name=server_name,
647
- server_port=server_port,
648
- theme=gr.themes.Soft(),
649
- css=".gradio-container {max-width: 1200px; margin: auto;}",
650
- footer_links=["gradio", "settings"],
651
- allowed_paths=[str(Path.cwd())],
652
- ssr_mode=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  )
 
 
 
 
1
+ from __future__ import annotations
2
+
3
  import os
4
+ from typing import List, Tuple
 
 
 
 
 
5
 
6
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ from cinegen import CharacterDesigner, StoryGenerator, VideoDirector
9
+ from cinegen.models import Storyboard
10
 
11
+ STYLE_CHOICES = [
12
+ "Cinematic Realism",
13
+ "Neo-Noir Animation",
14
+ "Analog Horror",
15
+ "Retro-Futuristic",
16
+ "Dreamlike Documentary",
17
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ VIDEO_MODEL_CHOICES = [
20
+ ("Wan 2.1 (fal-ai)", "Wan-AI/Wan2.1-T2V-14B"),
21
+ ("LTX Video 0.9.7", "Lightricks/LTX-Video-0.9.7-distilled"),
22
+ ("Hunyuan Video 1.5", "tencent/HunyuanVideo-1.5"),
23
+ ("CogVideoX 5B", "THUDM/CogVideoX-5b"),
24
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ SCENE_COLUMNS = ["Scene", "Title", "Action", "Visuals", "Characters", "Duration (s)"]
27
+ CHARACTER_COLUMNS = ["ID", "Name", "Role", "Traits"]
28
+
29
+
30
+ def _ensure_storyboard(board: Storyboard | None) -> Storyboard:
31
+ if not board:
32
+ raise gr.Error("Create a storyboard first.")
33
+ return board
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+
36
+ def _validate_inputs(idea: str | None, image_path: str | None):
37
+ if not idea and not image_path:
38
+ raise gr.Error("Provide either a story idea or upload a reference image.")
39
+
40
+
41
+ def handle_storyboard(
42
+ idea: str,
43
+ inspiration_image: str | None,
44
+ style: str,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  scene_count: int,
46
+ google_api_key: str,
47
+ ) -> Tuple[str, List[List[str]], List[List[str]], Storyboard]:
48
+ _validate_inputs(idea, inspiration_image)
49
+ generator = StoryGenerator(api_key=google_api_key or None)
50
+ storyboard = generator.generate(
51
+ idea=idea,
52
+ style=style,
 
 
53
  scene_count=scene_count,
54
+ inspiration_path=inspiration_image,
 
55
  )
56
+ summary_md = f"### {storyboard.title}\n{storyboard.synopsis}"
57
+ scene_rows = storyboard.scenes_table()
58
+ character_rows = storyboard.characters_table()
 
 
 
 
 
 
 
 
 
59
  return (
60
+ summary_md,
61
+ [[row[col] for col in SCENE_COLUMNS] for row in scene_rows],
62
+ [[row[col] for col in CHARACTER_COLUMNS] for row in character_rows],
63
+ storyboard,
 
 
 
64
  )
65
 
66
 
67
+ def handle_character_design(
68
+ storyboard: Storyboard | None,
69
+ google_api_key: str,
 
 
 
 
 
 
70
  ):
71
+ board = _ensure_storyboard(storyboard)
72
+ designer = CharacterDesigner(api_key=google_api_key or None)
73
+ gallery, updated_board = designer.design(board)
74
+ if not gallery:
75
+ raise gr.Error("Failed to design characters.")
76
+ return gallery, updated_board
77
+
78
+
79
+ def handle_video_render(
80
+ storyboard: Storyboard | None,
81
+ hf_token: str,
82
+ model_choice: str,
83
+ ):
84
+ board = _ensure_storyboard(storyboard)
85
+ prioritized_models = [model_choice] + [
86
+ model for _, model in VIDEO_MODEL_CHOICES if model != model_choice
87
+ ]
88
+ director = VideoDirector(token=hf_token or None, models=prioritized_models)
89
+ final_cut, logs = director.render(board)
90
+ log_md = "\n".join(f"- {line}" for line in logs)
91
+ return final_cut, log_md
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
 
 
93
 
94
+ css = """
95
+ #cinegen-app {
96
+ max-width: 1080px;
97
+ margin: 0 auto;
98
+ }
99
+ """
100
 
 
 
 
101
 
102
+ with gr.Blocks(css=css, fill_height=True, theme=gr.themes.Soft(), elem_id="cinegen-app") as demo:
103
+ gr.Markdown(
104
+ "## 🎬 CineGen AI Director\n"
105
+ "Drop an idea or inspiration image and let CineGen produce a storyboard, character boards, "
106
+ "and a compiled short film using Hugging Face video models."
107
+ )
108
 
109
+ story_state = gr.State()
 
 
110
 
111
+ with gr.Row():
112
+ idea_box = gr.Textbox(
113
+ label="Movie Idea",
114
+ placeholder="E.g. A time loop love story set in a neon bazaar.",
115
+ lines=3,
116
  )
117
+ inspiration = gr.Image(label="Reference Image (optional)", type="filepath")
118
 
119
+ with gr.Row():
120
+ style_dropdown = gr.Dropdown(
121
+ label="Visual Style",
122
+ choices=STYLE_CHOICES,
123
+ value=STYLE_CHOICES[0],
124
+ )
125
+ scene_slider = gr.Slider(
126
+ label="Scene Count",
127
+ minimum=3,
128
+ maximum=8,
129
+ value=4,
130
+ step=1,
131
+ )
132
+ video_model_dropdown = gr.Dropdown(
133
+ label="Preferred Video Model",
134
+ choices=[choice for choice, _ in VIDEO_MODEL_CHOICES],
135
+ value=VIDEO_MODEL_CHOICES[0][0],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  )
137
 
138
+ with gr.Accordion("API Keys", open=False):
139
+ google_key_input = gr.Textbox(
140
+ label="Google API Key (Gemini)",
141
+ type="password",
142
+ placeholder="Required for live Gemini calls. Leave blank to use offline stubs.",
143
+ value=os.environ.get("GOOGLE_API_KEY", ""),
144
+ )
145
+ hf_token_input = gr.Textbox(
146
+ label="Hugging Face Token",
147
+ type="password",
148
+ placeholder="Needed for Wan/LTX/Hunyuan video generation.",
149
+ value=os.environ.get("HF_TOKEN", ""),
 
150
  )
151
 
152
+ storyboard_btn = gr.Button("Create Storyboard", variant="primary")
153
+ summary_md = gr.Markdown("Storyboard output will appear here.")
154
+ scenes_df = gr.Dataframe(headers=SCENE_COLUMNS, wrap=True)
155
+ characters_df = gr.Dataframe(headers=CHARACTER_COLUMNS, wrap=True)
156
 
157
+ storyboard_btn.click(
158
+ fn=handle_storyboard,
159
+ inputs=[idea_box, inspiration, style_dropdown, scene_slider, google_key_input],
160
+ outputs=[summary_md, scenes_df, characters_df, story_state],
161
+ )
162
 
163
+ with gr.Row():
164
+ design_btn = gr.Button("Design Characters", variant="secondary")
165
+ render_btn = gr.Button("Render Short Film", variant="primary")
166
+
167
+ gallery = gr.Gallery(label="Character References", columns=4, height=320)
168
+ render_logs = gr.Markdown(label="Render Log")
169
+ final_video = gr.Video(label="CineGen Short Film", interactive=False)
170
+
171
+ design_btn.click(
172
+ fn=handle_character_design,
173
+ inputs=[story_state, google_key_input],
174
+ outputs=[gallery, story_state],
175
+ )
176
+
177
+ def _model_value(label: str) -> str:
178
+ lookup = dict(VIDEO_MODEL_CHOICES)
179
+ return lookup.get(label, VIDEO_MODEL_CHOICES[0][1])
180
+
181
+ def render_wrapper(board, token, label):
182
+ return handle_video_render(board, token, _model_value(label))
183
+
184
+ render_btn.click(
185
+ fn=render_wrapper,
186
+ inputs=[story_state, hf_token_input, video_model_dropdown],
187
+ outputs=[final_video, render_logs],
188
  )
189
+
190
+ if __name__ == "__main__":
191
+ demo.launch()
cinegen/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .models import Storyboard, SceneBeat, CharacterSpec
2
+ from .story_engine import StoryGenerator
3
+ from .character_engine import CharacterDesigner
4
+ from .video_engine import VideoDirector
5
+
6
+ __all__ = [
7
+ "Storyboard",
8
+ "SceneBeat",
9
+ "CharacterSpec",
10
+ "StoryGenerator",
11
+ "CharacterDesigner",
12
+ "VideoDirector",
13
+ ]
cinegen/character_engine.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import List, Optional, Tuple
5
+
6
+ from .models import Storyboard
7
+ from .placeholders import synthesize_character_card
8
+
9
+ DEFAULT_IMAGE_MODEL = os.environ.get("CINEGEN_CHARACTER_MODEL", "gemini-2.5-flash-image")
10
+
11
+
12
+ def _load_google_client(api_key: Optional[str]):
13
+ if not api_key:
14
+ return None
15
+
16
+ try:
17
+ from google import genai
18
+
19
+ return genai.Client(api_key=api_key)
20
+ except Exception: # pragma: no cover - optional dependency
21
+ return None
22
+
23
+
24
+ class CharacterDesigner:
25
+ def __init__(self, api_key: Optional[str] = None):
26
+ self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
27
+ self.client = _load_google_client(self.api_key)
28
+
29
+ def design(self, storyboard: Storyboard) -> Tuple[List[Tuple[str, str]], Storyboard]:
30
+ gallery: List[Tuple[str, str]] = []
31
+ for character in storyboard.characters:
32
+ image_path = None
33
+ if self.client:
34
+ image_path = self._try_generate(character, storyboard.style)
35
+ if not image_path:
36
+ image_path = synthesize_character_card(character, storyboard.style)
37
+ character.reference_image = image_path
38
+ caption = f"{character.name} — {character.role}"
39
+ gallery.append((image_path, caption))
40
+ return gallery, storyboard
41
+
42
+ def _try_generate(self, character, style: str) -> Optional[str]: # pragma: no cover
43
+ prompt = (
44
+ f"Create a portrait for {character.name}, a {character.role} in a {style} short film. "
45
+ f"Traits: {', '.join(character.traits)}. Description: {character.description}."
46
+ )
47
+ try:
48
+ response = self.client.models.generate_content(
49
+ model=DEFAULT_IMAGE_MODEL,
50
+ contents=[prompt],
51
+ )
52
+ for part in response.parts:
53
+ if getattr(part, "inline_data", None):
54
+ image = part.as_image()
55
+ tmp_dir = os.path.join("/tmp", "cinegen-characters")
56
+ os.makedirs(tmp_dir, exist_ok=True)
57
+ path = os.path.join(tmp_dir, f"{character.identifier.lower()}.png")
58
+ image.save(path)
59
+ return path
60
+ except Exception:
61
+ return None
62
+ return None
cinegen/models.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional
5
+
6
+
7
+ @dataclass
8
+ class CharacterSpec:
9
+ identifier: str
10
+ name: str
11
+ role: str
12
+ description: str
13
+ traits: List[str] = field(default_factory=list)
14
+ reference_image: Optional[str] = None
15
+
16
+ def to_row(self) -> dict:
17
+ traits = ", ".join(self.traits)
18
+ return {
19
+ "ID": self.identifier,
20
+ "Name": self.name,
21
+ "Role": self.role,
22
+ "Traits": traits or "—",
23
+ }
24
+
25
+
26
+ @dataclass
27
+ class SceneBeat:
28
+ scene_id: str
29
+ title: str
30
+ visuals: str
31
+ action: str
32
+ characters: List[str] = field(default_factory=list)
33
+ duration: int = 6
34
+ mood: str = ""
35
+ camera: str = ""
36
+
37
+ def to_row(self) -> dict:
38
+ return {
39
+ "Scene": self.scene_id,
40
+ "Title": self.title,
41
+ "Action": self.action,
42
+ "Visuals": self.visuals,
43
+ "Characters": ", ".join(self.characters) or "—",
44
+ "Duration (s)": self.duration,
45
+ }
46
+
47
+
48
+ @dataclass
49
+ class Storyboard:
50
+ title: str
51
+ synopsis: str
52
+ style: str
53
+ inspiration_hint: Optional[str]
54
+ characters: List[CharacterSpec] = field(default_factory=list)
55
+ scenes: List[SceneBeat] = field(default_factory=list)
56
+
57
+ def characters_table(self) -> List[dict]:
58
+ return [char.to_row() for char in self.characters]
59
+
60
+ def scenes_table(self) -> List[dict]:
61
+ return [scene.to_row() for scene in self.scenes]
cinegen/placeholders.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import random
5
+ import string
6
+ import tempfile
7
+ from typing import List
8
+
9
+ import imageio
10
+ import numpy as np
11
+ from PIL import Image, ImageDraw, ImageFont
12
+
13
+ from .models import CharacterSpec, SceneBeat, Storyboard
14
+
15
+ SCENE_TITLES = [
16
+ "Opening Beat",
17
+ "Inciting Incident",
18
+ "Turning Point",
19
+ "Climactic Push",
20
+ "Final Shot",
21
+ ]
22
+
23
+ CHARACTER_ARCHETYPES = [
24
+ ("Lead", "Curious protagonist who drives the story."),
25
+ ("Ally", "Supportive partner offering heart and humor."),
26
+ ("Antagonist", "Force of tension that keeps the stakes high."),
27
+ ]
28
+
29
+ PALETTE = [
30
+ (28, 35, 51),
31
+ (44, 106, 116),
32
+ (96, 108, 56),
33
+ (224, 142, 73),
34
+ (211, 86, 97),
35
+ (123, 74, 173),
36
+ ]
37
+
38
+
39
+ def _slugify(text: str) -> str:
40
+ safe = "".join(ch for ch in text if ch.isalnum() or ch in (" ", "-")).strip()
41
+ safe = safe.replace(" ", "-")
42
+ safe = safe.lower()
43
+ return safe or "cinegen"
44
+
45
+
46
+ def build_stub_storyboard(
47
+ idea: str,
48
+ style: str,
49
+ scene_count: int,
50
+ inspiration_hint: str | None,
51
+ ) -> Storyboard:
52
+ random.seed(_slugify(idea) + style + str(scene_count))
53
+ title = idea.title() if idea else f"{style} Short"
54
+ synopsis = (
55
+ f"A {style.lower()} short that transforms the idea '{idea or 'mystery cue'}' "
56
+ "into a compact cinematic arc."
57
+ )
58
+ characters: List[CharacterSpec] = []
59
+ for idx, (role, desc) in enumerate(CHARACTER_ARCHETYPES):
60
+ if idx >= 3 and scene_count <= 3:
61
+ break
62
+ identifier = f"CHAR-{idx+1}"
63
+ name = f"{role} {random.choice(string.ascii_uppercase)}"
64
+ traits = random.sample(
65
+ ["brave", "witty", "restless", "tactical", "empathetic", "curious"], 2
66
+ )
67
+ characters.append(
68
+ CharacterSpec(
69
+ identifier=identifier,
70
+ name=name,
71
+ role=role,
72
+ description=desc,
73
+ traits=traits,
74
+ )
75
+ )
76
+
77
+ scenes: List[SceneBeat] = []
78
+ for idx in range(scene_count):
79
+ label = SCENE_TITLES[idx % len(SCENE_TITLES)]
80
+ scene_id = f"SCENE-{idx+1}"
81
+ visuals = (
82
+ f"{style} framing with {random.choice(['soft neon', 'moody shadows', 'bold silhouettes'])}."
83
+ )
84
+ action = f"{characters[0].name if characters else 'The hero'} faces {random.choice(['an unseen threat', 'a tough decision', 'their reflection'])}."
85
+ involved = [char.name for char in characters if random.random() > 0.3][:2] or [
86
+ characters[0].name if characters else "Narrator"
87
+ ]
88
+ scenes.append(
89
+ SceneBeat(
90
+ scene_id=scene_id,
91
+ title=label,
92
+ visuals=visuals,
93
+ action=action,
94
+ characters=involved,
95
+ duration=6,
96
+ mood=random.choice(["hopeful", "tense", "whimsical"]),
97
+ camera=random.choice(["slow push", "steady wide", "handheld close-up"]),
98
+ )
99
+ )
100
+
101
+ appendix = (
102
+ f"Aim for motifs inspired by the uploaded reference: {inspiration_hint}."
103
+ if inspiration_hint
104
+ else ""
105
+ )
106
+
107
+ return Storyboard(
108
+ title=title,
109
+ synopsis=f"{synopsis} {appendix}".strip(),
110
+ style=style,
111
+ inspiration_hint=inspiration_hint,
112
+ characters=characters,
113
+ scenes=scenes,
114
+ )
115
+
116
+
117
+ def synthesize_character_card(character: CharacterSpec, style: str) -> str:
118
+ width, height = 640, 640
119
+ color = random.choice(PALETTE)
120
+ image = Image.new("RGB", (width, height), color=color)
121
+ draw = ImageDraw.Draw(image)
122
+ font = ImageFont.load_default()
123
+ text = f"{character.name}\n{character.role}\n{', '.join(character.traits)}"
124
+ draw.multiline_text((40, 80), text, fill=(255, 255, 255), font=font, spacing=6)
125
+ draw.text((40, height - 60), f"Style: {style}", fill=(255, 255, 255), font=font)
126
+ tmp_dir = tempfile.mkdtemp(prefix="cinegen-character-")
127
+ path = os.path.join(tmp_dir, f"{_slugify(character.name)}.png")
128
+ image.save(path, format="PNG")
129
+ return path
130
+
131
+
132
+ def create_placeholder_video(scene: SceneBeat, style: str, seconds: int = 4) -> str:
133
+ fps = 6
134
+ frames = fps * seconds
135
+ width, height = 512, 512
136
+ tmp_dir = tempfile.mkdtemp(prefix="cinegen-scene-")
137
+ path = os.path.join(tmp_dir, f"{scene.scene_id.lower()}.mp4")
138
+ rng = np.random.default_rng(sum(ord(c) for c in scene.scene_id))
139
+ with imageio.get_writer(path, fps=fps) as writer:
140
+ for _ in range(frames):
141
+ base_color = rng.integers(60, 220, size=3, dtype=np.uint8)
142
+ frame = np.zeros((height, width, 3), dtype=np.uint8)
143
+ frame[:] = base_color
144
+ image = Image.fromarray(frame)
145
+ draw = ImageDraw.Draw(image)
146
+ font = ImageFont.load_default()
147
+ overlay = f"{scene.title}\n{scene.action[:60]}..."
148
+ draw.multiline_text((24, 24), overlay, fill=(255, 255, 255), font=font, spacing=4)
149
+ draw.text(
150
+ (24, height - 40),
151
+ f"{style} • {scene.characters[0] if scene.characters else 'Solo'}",
152
+ fill=(255, 255, 255),
153
+ font=font,
154
+ )
155
+ writer.append_data(np.array(image))
156
+ return path
157
+
158
+
159
+ def describe_image_reference(image_path: str | None) -> str | None:
160
+ if not image_path or not os.path.exists(image_path):
161
+ return None
162
+ size = os.path.getsize(image_path)
163
+ return f"{os.path.basename(image_path)} ({round(size / 1024, 1)}KB)"
cinegen/story_engine.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from typing import Any, Dict, Optional
6
+
7
+ from .models import Storyboard, CharacterSpec, SceneBeat
8
+ from .placeholders import build_stub_storyboard, describe_image_reference
9
+
10
+ DEFAULT_STORY_MODEL = os.environ.get("CINEGEN_STORY_MODEL", "gemini-2.5-flash")
11
+
12
+
13
+ def _load_google_client(api_key: Optional[str]):
14
+ if not api_key:
15
+ return None, "Missing API key"
16
+
17
+ try:
18
+ from google import genai
19
+
20
+ client = genai.Client(api_key=api_key)
21
+ return client, None
22
+ except Exception as exc: # pragma: no cover - depends on optional deps
23
+ return None, str(exc)
24
+
25
+
26
+ class StoryGenerator:
27
+ def __init__(self, api_key: Optional[str] = None):
28
+ self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
29
+ self.client, self.client_error = _load_google_client(self.api_key)
30
+
31
+ def generate(
32
+ self,
33
+ idea: str,
34
+ style: str,
35
+ scene_count: int,
36
+ inspiration_path: Optional[str] = None,
37
+ ) -> Storyboard:
38
+ if not self.client:
39
+ return build_stub_storyboard(
40
+ idea=idea,
41
+ style=style,
42
+ scene_count=scene_count,
43
+ inspiration_hint=describe_image_reference(inspiration_path),
44
+ )
45
+
46
+ prompt = self._build_prompt(idea, style, scene_count)
47
+ contents = [prompt]
48
+ parts = self._maybe_add_image_part(inspiration_path)
49
+ contents = parts + contents if parts else contents
50
+
51
+ try: # pragma: no cover - relies on remote API
52
+ response = self.client.models.generate_content(
53
+ model=DEFAULT_STORY_MODEL,
54
+ contents=contents,
55
+ config={"response_mime_type": "application/json"},
56
+ )
57
+ payload = json.loads(response.text)
58
+ return self._parse_payload(
59
+ payload,
60
+ style=style,
61
+ inspiration_hint=describe_image_reference(inspiration_path),
62
+ )
63
+ except Exception:
64
+ return build_stub_storyboard(
65
+ idea=idea,
66
+ style=style,
67
+ scene_count=scene_count,
68
+ inspiration_hint=describe_image_reference(inspiration_path),
69
+ )
70
+
71
+ @staticmethod
72
+ def _build_prompt(idea: str, style: str, scene_count: int) -> str:
73
+ return (
74
+ "You are CineGen, an AI film director. Convert the provided idea into a "
75
+ "structured storyboard JSON with the following keys:\n"
76
+ "{\n"
77
+ ' "title": str,\n'
78
+ ' "synopsis": str,\n'
79
+ ' "characters": [\n'
80
+ ' {"id": "CHAR-1", "name": str, "role": str, "description": str, "traits": [str, ...]}\n'
81
+ " ],\n"
82
+ ' "scenes": [\n'
83
+ ' {"id": "SCENE-1", "title": str, "visuals": str, "action": str, "characters": [str], "duration": int, "mood": str, "camera": str}\n'
84
+ " ]\n"
85
+ "}\n"
86
+ f"Idea: {idea or 'Use the inspiration image only.'}\n"
87
+ f"Visual Style: {style}\n"
88
+ f"Scene Count: {scene_count}\n"
89
+ "Ensure every scene references at least one character ID."
90
+ )
91
+
92
+ def _maybe_add_image_part(self, inspiration_path: Optional[str]):
93
+ if not inspiration_path or not os.path.exists(inspiration_path):
94
+ return None
95
+ try:
96
+ from google.genai import types # pragma: no cover - optional dependency
97
+
98
+ with open(inspiration_path, "rb") as handle:
99
+ data = handle.read()
100
+ mime = "image/png" if inspiration_path.endswith(".png") else "image/jpeg"
101
+ return [types.Part.from_bytes(data=data, mime_type=mime)]
102
+ except Exception:
103
+ return None
104
+
105
+ @staticmethod
106
+ def _parse_payload(
107
+ payload: Dict[str, Any],
108
+ style: str,
109
+ inspiration_hint: Optional[str],
110
+ ) -> Storyboard:
111
+ characters = [
112
+ CharacterSpec(
113
+ identifier=item.get("id", f"CHAR-{idx+1}"),
114
+ name=item.get("name", f"Character {idx+1}"),
115
+ role=item.get("role", "Supporting"),
116
+ description=item.get("description", ""),
117
+ traits=item.get("traits", []),
118
+ )
119
+ for idx, item in enumerate(payload.get("characters", []))
120
+ ]
121
+ scenes = [
122
+ SceneBeat(
123
+ scene_id=item.get("id", f"SCENE-{idx+1}"),
124
+ title=item.get("title", f"Scene {idx+1}"),
125
+ visuals=item.get("visuals", ""),
126
+ action=item.get("action", ""),
127
+ characters=item.get("characters", []),
128
+ duration=int(item.get("duration", 6)),
129
+ mood=item.get("mood", ""),
130
+ camera=item.get("camera", ""),
131
+ )
132
+ for idx, item in enumerate(payload.get("scenes", []))
133
+ ]
134
+ if not characters or not scenes:
135
+ raise ValueError("Incomplete payload")
136
+ return Storyboard(
137
+ title=payload.get("title", "Untitled Short"),
138
+ synopsis=payload.get("synopsis", ""),
139
+ style=style,
140
+ inspiration_hint=inspiration_hint,
141
+ characters=characters,
142
+ scenes=scenes,
143
+ )
cinegen/video_engine.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import tempfile
5
+ import time
6
+ from typing import List, Optional, Sequence, Tuple
7
+
8
+ import requests
9
+
10
+ from .models import SceneBeat, Storyboard
11
+ from .placeholders import create_placeholder_video
12
+
13
+ DEFAULT_VIDEO_MODELS = [
14
+ "Wan-AI/Wan2.1-T2V-14B",
15
+ "Lightricks/LTX-Video-0.9.7-distilled",
16
+ "tencent/HunyuanVideo-1.5",
17
+ "THUDM/CogVideoX-5b",
18
+ ]
19
+
20
+
21
+ class VideoDirector:
22
+ def __init__(
23
+ self,
24
+ token: Optional[str] = None,
25
+ models: Optional[Sequence[str]] = None,
26
+ ):
27
+ env_token = (
28
+ token
29
+ or os.environ.get("HF_TOKEN")
30
+ or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
31
+ or os.environ.get("HUGGING_FACE_HUB_TOKEN")
32
+ )
33
+ self.token = env_token
34
+ self.models = list(models or DEFAULT_VIDEO_MODELS)
35
+
36
+ def render(self, storyboard: Storyboard) -> Tuple[str, List[str]]:
37
+ logs: List[str] = []
38
+ clip_paths: List[str] = []
39
+ for scene in storyboard.scenes:
40
+ video = self._produce_scene(storyboard, scene, logs)
41
+ clip_paths.append(video)
42
+ final_cut = self._merge_clips(clip_paths, logs)
43
+ return final_cut, logs
44
+
45
+ def _produce_scene(self, storyboard: Storyboard, scene: SceneBeat, logs: List[str]) -> str:
46
+ composed_prompt = self._compose_prompt(storyboard, scene)
47
+ if self.token:
48
+ for model in self.models:
49
+ try:
50
+ clip = self._call_hf_inference(composed_prompt, model, scene.duration)
51
+ logs.append(f"Scene {scene.scene_id}: generated via {model}")
52
+ return clip
53
+ except Exception as exc:
54
+ logs.append(f"Scene {scene.scene_id}: {model} failed ({exc})")
55
+ clip = create_placeholder_video(scene, storyboard.style)
56
+ logs.append(f"Scene {scene.scene_id}: fallback placeholder clip used.")
57
+ return clip
58
+
59
+ def _call_hf_inference(self, prompt: str, model_id: str, duration: int) -> str:
60
+ if not self.token:
61
+ raise RuntimeError("Missing Hugging Face token")
62
+
63
+ url = f"https://api-inference.huggingface.co/models/{model_id}"
64
+ headers = {
65
+ "Authorization": f"Bearer {self.token}",
66
+ "Accept": "video/mp4",
67
+ }
68
+ payload = {
69
+ "inputs": prompt,
70
+ "parameters": {"duration": duration},
71
+ }
72
+ for _ in range(3):
73
+ response = requests.post(url, headers=headers, json=payload, timeout=600)
74
+ if response.status_code == 200:
75
+ tmp_dir = tempfile.mkdtemp(prefix="cinegen-video-")
76
+ path = os.path.join(tmp_dir, f"{model_id.split('/')[-1]}.mp4")
77
+ with open(path, "wb") as handle:
78
+ handle.write(response.content)
79
+ return path
80
+ if response.status_code in (503, 529, 202):
81
+ time.sleep(5)
82
+ continue
83
+ raise RuntimeError(f"{response.status_code}: {response.text[:120]}")
84
+ raise RuntimeError("Model busy")
85
+
86
+ @staticmethod
87
+ def _compose_prompt(storyboard: Storyboard, scene: SceneBeat) -> str:
88
+ characters = "; ".join(scene.characters)
89
+ return (
90
+ f"Title: {storyboard.title}. Style: {storyboard.style}. "
91
+ f"Scene {scene.scene_id} - {scene.title}: {scene.action} "
92
+ f"Visual cues: {scene.visuals}. Mood: {scene.mood}. "
93
+ f"Camera: {scene.camera}. Characters: {characters or 'solo sequence'}."
94
+ )
95
+
96
+ def _merge_clips(self, clip_paths: Sequence[str], logs: List[str]) -> str:
97
+ try:
98
+ from moviepy.editor import VideoFileClip, concatenate_videoclips # type: ignore
99
+ except Exception as exc:
100
+ logs.append(f"MoviePy unavailable ({exc}); returning first clip only.")
101
+ return clip_paths[0]
102
+
103
+ clips = []
104
+ for path in clip_paths:
105
+ try:
106
+ clip = VideoFileClip(path)
107
+ clips.append(clip)
108
+ except Exception as exc:
109
+ logs.append(f"Failed to read clip {path}: {exc}")
110
+ if not clips:
111
+ raise RuntimeError("No clips to merge")
112
+ final = concatenate_videoclips(clips, method="compose")
113
+ tmp_dir = tempfile.mkdtemp(prefix="cinegen-final-")
114
+ final_path = os.path.join(tmp_dir, "cinegen_short.mp4")
115
+ final.write_videofile(final_path, fps=clips[0].fps, codec="libx264", audio=False, verbose=False, logger=None)
116
+ for clip in clips:
117
+ clip.close()
118
+ logs.append(f"Merged {len(clips)} clips into final cut.")
119
+ return final_path
requirements.txt CHANGED
@@ -5,4 +5,5 @@ huggingface-hub>=0.26.0
5
  pillow>=10.2.0
6
  numpy>=1.24.0
7
  requests>=2.31.0
8
-
 
 
5
  pillow>=10.2.0
6
  numpy>=1.24.0
7
  requests>=2.31.0
8
+ imageio>=2.34
9
+ moviepy>=1.0.3