whisper-large-v3

Running

App Files Files Community

whisper-large-v3 / app.py

staraks

Update app.py

ae60bd6 verified 30 days ago

raw

history blame

29.2 kB

	import os
	import shutil
	import tempfile
	from typing import List, Literal, Optional

	import torch
	import pyzipper
	import soundfile as sf # noqa: F401 (ensure audio backend is available)

	from docx import Document
	from fastapi import FastAPI, File, UploadFile, Form, HTTPException
	from fastapi.responses import (
	FileResponse,
	JSONResponse,
	PlainTextResponse,
	HTMLResponse,
	)
	from pydantic import BaseModel
	from transformers import pipeline
	import spaces

	# ===================== CONFIG =====================

	MODEL_NAME = "openai/whisper-large-v3"

	AUDIO_EXTENSIONS = (
	".wav",
	".mp3",
	".m4a",
	".flac",
	".ogg",
	".opus",
	".webm",
	)

	# Use GPU if available on the Space
	device = 0 if torch.cuda.is_available() else "cpu"

	# Lazy-loaded pipeline (created on first request)
	asr_pipe = None


	def get_pipeline():
	global asr_pipe
	if asr_pipe is None:
	asr_pipe = pipeline(
	task="automatic-speech-recognition",
	model=MODEL_NAME,
	chunk_length_s=30,
	device=device,
	)
	return asr_pipe


	# ===================== Pydantic models =====================

	class FileTranscript(BaseModel):
	filename: str
	text: str


	class TranscriptionResponse(BaseModel):
	mode: Literal["general", "medical_en"]
	combined_transcript: str
	items: List[FileTranscript]


	# ===================== Helper functions =====================

	def build_generate_kwargs(mode: str):
	"""
	mode: 'general' \| 'medical_en'
	Always transcribe with auto language detection,
	but in medical_en we bias towards English medical dictation.
	"""
	generate_kwargs = {
	"task": "transcribe", # keep same language as audio
	}

	if mode == "medical_en":
	# Strong bias towards English medical terminology
	generate_kwargs["language"] = "en"
	generate_kwargs["initial_prompt"] = (
	"This is a medical dictation. Use accurate English medical terminology, "
	"including anatomy, diseases, investigations, lab values, imaging, and drugs. "
	"Keep the style clinical and professional."
	)

	return generate_kwargs


	def filter_audio_files(paths: List[str]) -> List[str]:
	out: List[str] = []
	for p in paths:
	_, ext = os.path.splitext(p)
	if ext.lower() in AUDIO_EXTENSIONS:
	out.append(p)
	return out


	def transcribe_file(path: str, mode: str) -> str:
	pipe = get_pipeline()
	generate_kwargs = build_generate_kwargs(mode)

	result = pipe(
	path,
	batch_size=8,
	generate_kwargs=generate_kwargs,
	return_timestamps=False,
	)

	if isinstance(result, dict):
	return (result.get("text") or "").strip()
	if isinstance(result, list) and result:
	return (result[0].get("text") or "").strip()
	return ""


	def format_combined(results: List[FileTranscript]) -> str:
	parts: List[str] = []
	for idx, item in enumerate(results, start=1):
	parts.append(f"### File {idx}: {item.filename}")
	parts.append("")
	parts.append(item.text if item.text else "[No transcript]")
	parts.append("")
	return "\n".join(parts).strip()


	def build_docx(results: List[FileTranscript], title: str) -> str:
	doc = Document()
	doc.add_heading(title, level=1)

	for idx, item in enumerate(results, start=1):
	doc.add_heading(f"File {idx}: {item.filename}", level=2)
	doc.add_paragraph(item.text if item.text else "[No transcript]")
	doc.add_paragraph()

	tmpdir = tempfile.mkdtemp(prefix="docx_")
	out_path = os.path.join(tmpdir, "transcripts.docx")
	doc.save(out_path)
	return out_path


	def save_uploads_to_temp(files: List[UploadFile]) -> List[str]:
	tmpdir = tempfile.mkdtemp(prefix="uploads_")
	local_paths: List[str] = []
	for uf in files:
	filename = os.path.basename(uf.filename or "audio")
	local_path = os.path.join(tmpdir, filename)
	with open(local_path, "wb") as out_f:
	shutil.copyfileobj(uf.file, out_f)
	local_paths.append(local_path)
	return local_paths


	def extract_zip_to_temp(zip_file: UploadFile, password: Optional[str]) -> List[str]:
	tmpdir = tempfile.mkdtemp(prefix="zip_")
	zip_path = os.path.join(tmpdir, os.path.basename(zip_file.filename or "archive.zip"))

	# Save uploaded ZIP
	with open(zip_path, "wb") as out_f:
	shutil.copyfileobj(zip_file.file, out_f)

	outdir = tempfile.mkdtemp(prefix="zip_files_")

	try:
	with pyzipper.AESZipFile(zip_path, "r") as zf:
	if password:
	zf.setpassword(password.encode("utf-8"))

	for info in zf.infolist():
	if info.is_dir():
	continue
	name = os.path.basename(info.filename)
	if not name:
	continue
	out_path = os.path.join(outdir, name)
	os.makedirs(os.path.dirname(out_path), exist_ok=True)
	with zf.open(info) as src, open(out_path, "wb") as dst:
	shutil.copyfileobj(src, dst)

	except (pyzipper.BadZipFile, RuntimeError, KeyError) as e:
	shutil.rmtree(outdir, ignore_errors=True)
	raise HTTPException(
	status_code=400,
	detail=f"Failed to open ZIP file. Check password / integrity. {e}",
	)

	files = [os.path.join(outdir, f) for f in os.listdir(outdir)]
	return files


	# ===================== FastAPI app =====================

	app = FastAPI(
	title="Whisper Large V3 – Medical Batch Transcription API",
	description="""
	HTTP API for Whisper Large V3 with:

	- Multi-file audio upload
	- Password-protected ZIP upload
	- Medical-biased transcription mode
	- Combined transcript
	- Optional merged Word (.docx) download

	Use `/docs` for Swagger UI and `/ui` for the web interface.
	""",
	version="1.0.0",
	)


	@app.get("/", response_class=PlainTextResponse)
	def root():
	return (
	"Whisper Large V3 – Medical Batch Transcription API\n"
	"Open /docs for API documentation or /ui for the web interface.\n"
	)


	@app.get("/health", response_class=PlainTextResponse)
	def health():
	return "OK"


	@app.get("/self-test")
	def self_test():
	"""
	Basic self-check:
	- can we create/load the pipeline?
	- what device are we using?
	"""
	try:
	pipe = get_pipeline()
	model_name = getattr(pipe.model, "name_or_path", MODEL_NAME)
	dev = "cuda" if device == 0 else str(device)
	return JSONResponse(
	{
	"status": "ok",
	"message": "Pipeline loaded successfully.",
	"model": model_name,
	"device": dev,
	}
	)
	except Exception as e:
	return JSONResponse(
	{
	"status": "error",
	"message": f"Pipeline failed to load: {e}",
	},
	status_code=500,
	)


	# ---------- 1. Multi-file transcription (JSON) ----------

	@app.post("/api/transcribe/files", response_model=TranscriptionResponse)
	@spaces.GPU
	def transcribe_files(
	files: List[UploadFile] = File(..., description="One or more audio files"),
	mode: Literal["general", "medical_en"] = Form("medical_en"),
	):
	if not files:
	raise HTTPException(status_code=400, detail="No files uploaded.")

	local_paths = save_uploads_to_temp(files)
	audio_paths = filter_audio_files(local_paths)

	if not audio_paths:
	raise HTTPException(
	status_code=400,
	detail=f"No valid audio files found. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
	)

	items: List[FileTranscript] = []
	for path in audio_paths:
	fname = os.path.basename(path)
	text = transcribe_file(path, mode)
	items.append(FileTranscript(filename=fname, text=text))

	combined = format_combined(items)

	return TranscriptionResponse(
	mode=mode,
	combined_transcript=combined,
	items=items,
	)


	# ---------- 2. Multi-file transcription (DOCX download) ----------

	@app.post("/api/transcribe/files/docx")
	@spaces.GPU
	def transcribe_files_docx(
	files: List[UploadFile] = File(..., description="One or more audio files"),
	mode: Literal["general", "medical_en"] = Form("medical_en"),
	):
	if not files:
	raise HTTPException(status_code=400, detail="No files uploaded.")

	local_paths = save_uploads_to_temp(files)
	audio_paths = filter_audio_files(local_paths)

	if not audio_paths:
	raise HTTPException(
	status_code=400,
	detail=f"No valid audio files found. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
	)

	items: List[FileTranscript] = []
	for path in audio_paths:
	fname = os.path.basename(path)
	text = transcribe_file(path, mode)
	items.append(FileTranscript(filename=fname, text=text))

	docx_path = build_docx(items, "Multi-file transcription")

	return FileResponse(
	docx_path,
	media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
	filename="transcripts_files.docx",
	)


	# ---------- 3. ZIP transcription (JSON) ----------

	@app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
	@spaces.GPU
	def transcribe_zip(
	file: UploadFile = File(..., description="ZIP file containing audio files"),
	password: str = Form("", description="ZIP password (leave blank if none)"),
	mode: Literal["general", "medical_en"] = Form("medical_en"),
	):
	if file is None:
	raise HTTPException(status_code=400, detail="No ZIP uploaded.")

	extracted_paths = extract_zip_to_temp(file, password or None)
	audio_paths = filter_audio_files(extracted_paths)

	if not audio_paths:
	raise HTTPException(
	status_code=400,
	detail=f"No valid audio files found inside ZIP. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
	)

	items: List[FileTranscript] = []
	for path in audio_paths:
	fname = os.path.basename(path)
	text = transcribe_file(path, mode)
	items.append(FileTranscript(filename=fname, text=text))

	combined = format_combined(items)

	return TranscriptionResponse(
	mode=mode,
	combined_transcript=combined,
	items=items,
	)


	# ---------- 4. ZIP transcription (DOCX download) ----------

	@app.post("/api/transcribe/zip/docx")
	@spaces.GPU
	def transcribe_zip_docx(
	file: UploadFile = File(..., description="ZIP file containing audio files"),
	password: str = Form("", description="ZIP password (leave blank if none)"),
	mode: Literal["general", "medical_en"] = Form("medical_en"),
	):
	if file is None:
	raise HTTPException(status_code=400, detail="No ZIP uploaded.")

	extracted_paths = extract_zip_to_temp(file, password or None)
	audio_paths = filter_audio_files(extracted_paths)

	if not audio_paths:
	raise HTTPException(
	status_code=400,
	detail=f"No valid audio files found inside ZIP. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
	)

	items: List[FileTranscript] = []
	for path in audio_paths:
	fname = os.path.basename(path)
	text = transcribe_file(path, mode)
	items.append(FileTranscript(filename=fname, text=text))

	docx_path = build_docx(items, "ZIP transcription")

	return FileResponse(
	docx_path,
	media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
	filename="transcripts_zip.docx",
	)


	# ===================== Simple HTML UI =====================

	HTML_UI = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<title>Whisper Large V3 – Medical Batch Transcription</title>
	<style>
	body {
	font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
	margin: 0;
	padding: 0;
	background: #f4f4f6;
	color: #111827;
	}
	header {
	background: #111827;
	color: #f9fafb;
	padding: 16px 24px;
	}
	header h1 {
	margin: 0;
	font-size: 20px;
	}
	header p {
	margin: 4px 0 0;
	font-size: 13px;
	color: #9ca3af;
	}

	/* Progress bar at TOP */
	#progress-wrapper {
	background: #f9fafb;
	border-bottom: 1px solid #e5e7eb;
	padding: 8px 24px 10px;
	font-size: 12px;
	color: #4b5563;
	}
	#progress-track {
	width: 100%;
	max-width: 1100px;
	height: 8px;
	background: #e5e7eb;
	border-radius: 999px;
	overflow: hidden;
	margin-top: 4px;
	}
	#progress-fill {
	height: 100%;
	width: 0%;
	background: #111827;
	border-radius: 999px;
	transition: width 0.2s ease-out;
	}
	#progress-text {
	font-size: 11px;
	color: #6b7280;
	margin-top: 3px;
	min-height: 14px;
	max-width: 1100px;
	}
	#status {
	max-width: 1100px;
	margin: 2px auto 0;
	font-size: 12px;
	color: #6b7280;
	padding: 0 16px 6px;
	min-height: 16px;
	}

	main {
	max-width: 1100px;
	margin: 16px auto 40px;
	padding: 0 16px;
	}
	.card {
	background: #ffffff;
	border-radius: 12px;
	padding: 16px 20px;
	box-shadow: 0 12px 35px rgba(15, 23, 42, 0.08);
	margin-bottom: 20px;
	}
	.card h2 {
	margin-top: 0;
	font-size: 18px;
	display: flex;
	align-items: center;
	gap: 8px;
	}
	.card h3 {
	margin-bottom: 6px;
	margin-top: 16px;
	font-size: 15px;
	}
	label {
	font-size: 13px;
	font-weight: 500;
	display: block;
	margin-bottom: 4px;
	}
	input[type="file"],
	select,
	input[type="password"] {
	width: 100%;
	padding: 8px 10px;
	font-size: 13px;
	border-radius: 8px;
	border: 1px solid #d1d5db;
	box-sizing: border-box;
	margin-bottom: 10px;
	background: #f9fafb;
	}
	textarea {
	width: 100%;
	min-height: 260px;
	padding: 10px;
	box-sizing: border-box;
	border-radius: 10px;
	border: 1px solid #d1d5db;
	font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
	font-size: 13px;
	background: #f9fafb;
	}
	.row {
	display: flex;
	flex-wrap: wrap;
	gap: 16px;
	}
	.col {
	flex: 1 1 280px;
	}
	.btn-row {
	display: flex;
	gap: 10px;
	flex-wrap: wrap;
	margin: 6px 0 10px;
	}
	button {
	appearance: none;
	border: none;
	border-radius: 999px;
	padding: 8px 16px;
	font-size: 13px;
	font-weight: 500;
	cursor: pointer;
	display: inline-flex;
	align-items: center;
	gap: 6px;
	}
	.btn-primary {
	background: #111827;
	color: #f9fafb;
	}
	.btn-secondary {
	background: #e5e7eb;
	color: #111827;
	}
	.pill {
	display: inline-flex;
	align-items: center;
	gap: 6px;
	padding: 3px 8px;
	border-radius: 999px;
	font-size: 11px;
	background: #eff6ff;
	color: #1d4ed8;
	margin-left: 8px;
	}
	.small-hint {
	font-size: 11px;
	color: #6b7280;
	margin-top: -4px;
	margin-bottom: 8px;
	}
	code {
	font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
	font-size: 12px;
	}
	pre {
	background: #0b1120;
	color: #e5e7eb;
	padding: 10px 12px;
	border-radius: 10px;
	overflow-x: auto;
	font-size: 12px;
	line-height: 1.5;
	}
	a {
	color: #1d4ed8;
	text-decoration: none;
	}
	a:hover {
	text-decoration: underline;
	}

	@media (max-width: 768px) {
	header {
	padding: 12px 16px;
	}
	main {
	margin-top: 12px;
	}
	}
	</style>
	</head>
	<body>
	<header>
	<h1>Whisper Large V3 – Medical Batch Transcription</h1>
	<p>
	Upload multiple audio files or a password-protected ZIP.
	Mode: <code>general</code> or <code>medical_en</code>.
	API docs at <code>/docs</code>.
	</p>
	</header>

	<!-- Progress & status AT TOP -->
	<div id="progress-wrapper">
	<div>Transcription progress</div>
	<div id="progress-track">
	<div id="progress-fill"></div>
	</div>
	<div id="progress-text">Idle</div>
	</div>
	<div id="status"></div>

	<main>
	<div class="card">
	<h2>1. Multi-file audio upload <span class="pill">JSON & DOCX</span></h2>
	<div class="row">
	<div class="col">
	<h3>Inputs</h3>
	<label for="files_input">Audio files</label>
	<input id="files_input" type="file" multiple accept="audio/*" />
	<div class="small-hint">
	You can select multiple audio files.
	</div>

	<label for="files_mode">Mode</label>
	<select id="files_mode">
	<option value="medical_en">medical_en (English medical bias)</option>
	<option value="general">general</option>
	</select>

	<div class="btn-row">
	<button class="btn-primary" id="btn_files_json">Transcribe → JSON</button>
	<button class="btn-secondary" id="btn_files_docx">Download DOCX</button>
	</div>
	</div>
	<div class="col">
	<h3>Combined transcript</h3>
	<textarea id="files_output" placeholder="Transcript will appear here when you use the JSON button."></textarea>
	</div>
	</div>
	</div>

	<div class="card">
	<h2>2. ZIP upload (with password) <span class="pill">JSON & DOCX</span></h2>
	<div class="row">
	<div class="col">
	<h3>ZIP Inputs</h3>
	<label for="zip_input">ZIP file</label>
	<input id="zip_input" type="file" accept=".zip" />
	<div class="small-hint">ZIP should contain audio files only.</div>

	<label for="zip_password">ZIP password (optional)</label>
	<input id="zip_password" type="password" placeholder="Leave blank if ZIP is not encrypted" />

	<label for="zip_mode">Mode</label>
	<select id="zip_mode">
	<option value="medical_en">medical_en (English medical bias)</option>
	<option value="general">general</option>
	</select>

	<div class="btn-row">
	<button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button>
	<button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX</button>
	</div>
	</div>
	<div class="col">
	<h3>ZIP combined transcript</h3>
	<textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON button."></textarea>
	</div>
	</div>
	</div>

	<!-- 3. Quick examples -->
	<div class="card">
	<h2>3. Quick examples <span class="pill">API & sample audio</span></h2>
	<h3>Sample audio for testing (download & upload above)</h3>
	<p class="small-hint">
	1. Download this small public sample file<br>
	2. Upload it in section 1 and click <strong>Transcribe → JSON</strong>
	</p>
	<p>
	👉 <a href="https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac" target="_blank" rel="noopener">
	Download example audio (mlk.flac)
	</a>
	</p>

	<h3>Example: cURL for multi-file JSON</h3>
	<p class="small-hint">Replace <code>@path/to/audio1.flac</code> with your local file path.</p>
	<pre><code>curl -X POST \\
	"https://staraks-whisper-large-v3.hf.space/api/transcribe/files" \\
	-H "Accept: application/json" \\
	-F "mode=medical_en" \\
	-F "files=@path/to/audio1.flac" \\
	-F "files=@path/to/audio2.wav"</code></pre>

	<h3>Example: cURL for ZIP JSON</h3>
	<p class="small-hint">ZIP file contains multiple audio files. Password field is optional.</p>
	<pre><code>curl -X POST \\
	"https://staraks-whisper-large-v3.hf.space/api/transcribe/zip" \\
	-H "Accept: application/json" \\
	-F "mode=medical_en" \\
	-F "file=@path/to/audios.zip" \\
	-F "password="</code></pre>
	</div>

	<!-- 4. System self-check -->
	<div class="card">
	<h2>4. System self-check <span class="pill">Model & API status</span></h2>
	<p class="small-hint">
	Use this to quickly verify that the API is running and the Whisper pipeline can be loaded.
	</p>
	<button class="btn-primary" id="btn_self_test">Run self-test</button>
	<pre id="self_test_output"><code>Click "Run self-test" to see status...</code></pre>
	</div>
	</main>

	<script>
	let __progressTimer = null;
	let __progressValue = 0;

	function setProgress(value, label) {
	__progressValue = Math.max(0, Math.min(100, value));
	const fill = document.getElementById("progress-fill");
	const text = document.getElementById("progress-text");
	if (fill) {
	fill.style.width = __progressValue + "%";
	}
	if (text) {
	text.innerText = label + " (" + __progressValue.toFixed(0) + "%)";
	}
	}

	function resetProgress() {
	if (__progressTimer) {
	clearInterval(__progressTimer);
	__progressTimer = null;
	}
	setProgress(0, "Idle");
	}

	function startSimulatedProgress(label) {
	if (__progressTimer) {
	clearInterval(__progressTimer);
	}
	let p = 5;
	setProgress(p, label);
	__progressTimer = setInterval(() => {
	if (p < 90) {
	p += Math.random() * 10;
	if (p > 90) p = 90;
	setProgress(p, label);
	}
	}, 600);
	}

	function finishProgress(label) {
	if (__progressTimer) {
	clearInterval(__progressTimer);
	__progressTimer = null;
	}
	setProgress(100, label);
	setTimeout(() => {
	resetProgress();
	}, 2000);
	}

	function errorProgress(message) {
	if (__progressTimer) {
	clearInterval(__progressTimer);
	__progressTimer = null;
	}
	setProgress(0, "Error");
	setStatus(message);
	}

	function apiUrl(path) {
	return new URL(path, window.location.origin).toString();
	}

	async function postForm(path, formData, expectBlob = false) {
	const url = apiUrl(path);
	let res;
	try {
	res = await fetch(url, {
	method: "POST",
	body: formData
	});
	} catch (e) {
	throw new Error("Failed to connect to server: " + e.message);
	}

	if (!res.ok) {
	let msg = "Request failed: " + res.status;
	try {
	const data = await res.json();
	if (data && data.detail) msg += " – " + JSON.stringify(data.detail);
	} catch (e) {
	// ignore JSON parse errors
	}
	throw new Error(msg);
	}
	if (expectBlob) {
	return await res.blob();
	} else {
	return await res.json();
	}
	}

	function setStatus(text) {
	document.getElementById("status").innerText = text \|\| "";
	}

	// ------- Self test -------
	document.getElementById("btn_self_test").addEventListener("click", async () => {
	const out = document.getElementById("self_test_output");
	out.textContent = "Running self-test...";
	setStatus("Running self-test…");
	try {
	const res = await fetch(apiUrl("/self-test"));
	const data = await res.json();
	out.textContent = JSON.stringify(data, null, 2);
	if (data.status === "ok") {
	setStatus("Self-test OK – model and API are working.");
	} else {
	setStatus("Self-test reported an error.");
	}
	} catch (err) {
	console.error(err);
	out.textContent = "Self-test failed: " + err.message;
	setStatus("Self-test failed.");
	}
	});

	// ------- Multi-files JSON -------
	document.getElementById("btn_files_json").addEventListener("click", async () => {
	const filesInput = document.getElementById("files_input");
	const mode = document.getElementById("files_mode").value;
	const out = document.getElementById("files_output");

	if (!filesInput.files.length) {
	alert("Please choose at least one audio file.");
	return;
	}

	const formData = new FormData();
	for (const f of filesInput.files) {
	formData.append("files", f);
	}
	formData.append("mode", mode);

	setStatus("Transcribing multiple files… (this may take some time for large audio)");
	out.value = "";
	startSimulatedProgress("Transcribing files");

	try {
	const data = await postForm("/api/transcribe/files", formData, false);
	out.value = data.combined_transcript \|\| "";
	setStatus("Done.");
	finishProgress("Transcription complete");
	} catch (err) {
	console.error(err);
	alert(err.message);
	errorProgress("Error during transcription.");
	}
	});

	// ------- Multi-files DOCX -------
	document.getElementById("btn_files_docx").addEventListener("click", async () => {
	const filesInput = document.getElementById("files_input");
	const mode = document.getElementById("files_mode").value;

	if (!filesInput.files.length) {
	alert("Please choose at least one audio file.");
	return;
	}

	const formData = new FormData();
	for (const f of filesInput.files) {
	formData.append("files", f);
	}
	formData.append("mode", mode);

	setStatus("Generating DOCX for multi-file transcription…");
	startSimulatedProgress("Generating DOCX");

	try {
	const blob = await postForm("/api/transcribe/files/docx", formData, true);
	const url = window.URL.createObjectURL(blob);
	const a = document.createElement("a");
	a.href = url;
	a.download = "transcripts_files.docx";
	document.body.appendChild(a);
	a.click();
	a.remove();
	window.URL.revokeObjectURL(url);
	setStatus("DOCX downloaded.");
	finishProgress("DOCX ready");
	} catch (err) {
	console.error(err);
	alert(err.message);
	errorProgress("Error during DOCX generation.");
	}
	});

	// ------- ZIP JSON -------
	document.getElementById("btn_zip_json").addEventListener("click", async () => {
	const zipInput = document.getElementById("zip_input");
	const pwd = document.getElementById("zip_password").value \|\| "";
	const mode = document.getElementById("zip_mode").value;
	const out = document.getElementById("zip_output");

	if (!zipInput.files.length) {
	alert("Please choose a ZIP file.");
	return;
	}

	const formData = new FormData();
	formData.append("file", zipInput.files[0]);
	formData.append("password", pwd);
	formData.append("mode", mode);

	setStatus("Transcribing ZIP contents…");
	out.value = "";
	startSimulatedProgress("Transcribing ZIP");

	try {
	const data = await postForm("/api/transcribe/zip", formData, false);
	out.value = data.combined_transcript \|\| "";
	setStatus("Done.");
	finishProgress("ZIP transcription complete");
	} catch (err) {
	console.error(err);
	alert(err.message);
	errorProgress("Error during ZIP transcription.");
	}
	});

	// ------- ZIP DOCX -------
	document.getElementById("btn_zip_docx").addEventListener("click", async () => {
	const zipInput = document.getElementById("zip_input");
	const pwd = document.getElementById("zip_password").value \|\| "";
	const mode = document.getElementById("zip_mode").value;

	if (!zipInput.files.length) {
	alert("Please choose a ZIP file.");
	return;
	}

	const formData = new FormData();
	formData.append("file", zipInput.files[0]);
	formData.append("password", pwd);
	formData.append("mode", mode);

	setStatus("Generating DOCX from ZIP contents…");
	startSimulatedProgress("Generating ZIP DOCX");

	try {
	const blob = await postForm("/api/transcribe/zip/docx", formData, true);
	const url = window.URL.createObjectURL(blob);
	const a = document.createElement("a");
	a.href = url;
	a.download = "transcripts_zip.docx";
	document.body.appendChild(a);
	a.click();
	a.remove();
	window.URL.revokeObjectURL(url);
	setStatus("DOCX downloaded.");
	finishProgress("ZIP DOCX ready");
	} catch (err) {
	console.error(err);
	alert(err.message);
	errorProgress("Error during ZIP DOCX generation.");
	}
	});

	// Initial state
	resetProgress();
	</script>
	</body>
	</html>
	"""


	@app.get("/ui", response_class=HTMLResponse)
	def get_ui():
	return HTML_UI


	# ===================== Run (local dev / HF Spaces) =====================

	if __name__ == "__main__":
	import uvicorn

	port = int(os.getenv("PORT", "7860"))
	uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)