whisper-large-v3

Running

File size: 29,191 Bytes

import os
import shutil
import tempfile
from typing import List, Literal, Optional

import torch
import pyzipper
import soundfile as sf  # noqa: F401  (ensure audio backend is available)

from docx import Document
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import (
    FileResponse,
    JSONResponse,
    PlainTextResponse,
    HTMLResponse,
)
from pydantic import BaseModel
from transformers import pipeline
import spaces

# ===================== CONFIG =====================

MODEL_NAME = "openai/whisper-large-v3"

AUDIO_EXTENSIONS = (
    ".wav",
    ".mp3",
    ".m4a",
    ".flac",
    ".ogg",
    ".opus",
    ".webm",
)

# Use GPU if available on the Space
device = 0 if torch.cuda.is_available() else "cpu"

# Lazy-loaded pipeline (created on first request)
asr_pipe = None


def get_pipeline():
    global asr_pipe
    if asr_pipe is None:
        asr_pipe = pipeline(
            task="automatic-speech-recognition",
            model=MODEL_NAME,
            chunk_length_s=30,
            device=device,
        )
    return asr_pipe


# ===================== Pydantic models =====================

class FileTranscript(BaseModel):
    filename: str
    text: str


class TranscriptionResponse(BaseModel):
    mode: Literal["general", "medical_en"]
    combined_transcript: str
    items: List[FileTranscript]


# ===================== Helper functions =====================

def build_generate_kwargs(mode: str):
    """
    mode: 'general' | 'medical_en'
    Always transcribe with auto language detection,
    but in medical_en we bias towards English medical dictation.
    """
    generate_kwargs = {
        "task": "transcribe",  # keep same language as audio
    }

    if mode == "medical_en":
        # Strong bias towards English medical terminology
        generate_kwargs["language"] = "en"
        generate_kwargs["initial_prompt"] = (
            "This is a medical dictation. Use accurate English medical terminology, "
            "including anatomy, diseases, investigations, lab values, imaging, and drugs. "
            "Keep the style clinical and professional."
        )

    return generate_kwargs


def filter_audio_files(paths: List[str]) -> List[str]:
    out: List[str] = []
    for p in paths:
        _, ext = os.path.splitext(p)
        if ext.lower() in AUDIO_EXTENSIONS:
            out.append(p)
    return out


def transcribe_file(path: str, mode: str) -> str:
    pipe = get_pipeline()
    generate_kwargs = build_generate_kwargs(mode)

    result = pipe(
        path,
        batch_size=8,
        generate_kwargs=generate_kwargs,
        return_timestamps=False,
    )

    if isinstance(result, dict):
        return (result.get("text") or "").strip()
    if isinstance(result, list) and result:
        return (result[0].get("text") or "").strip()
    return ""


def format_combined(results: List[FileTranscript]) -> str:
    parts: List[str] = []
    for idx, item in enumerate(results, start=1):
        parts.append(f"### File {idx}: {item.filename}")
        parts.append("")
        parts.append(item.text if item.text else "[No transcript]")
        parts.append("")
    return "\n".join(parts).strip()


def build_docx(results: List[FileTranscript], title: str) -> str:
    doc = Document()
    doc.add_heading(title, level=1)

    for idx, item in enumerate(results, start=1):
        doc.add_heading(f"File {idx}: {item.filename}", level=2)
        doc.add_paragraph(item.text if item.text else "[No transcript]")
        doc.add_paragraph()

    tmpdir = tempfile.mkdtemp(prefix="docx_")
    out_path = os.path.join(tmpdir, "transcripts.docx")
    doc.save(out_path)
    return out_path


def save_uploads_to_temp(files: List[UploadFile]) -> List[str]:
    tmpdir = tempfile.mkdtemp(prefix="uploads_")
    local_paths: List[str] = []
    for uf in files:
        filename = os.path.basename(uf.filename or "audio")
        local_path = os.path.join(tmpdir, filename)
        with open(local_path, "wb") as out_f:
            shutil.copyfileobj(uf.file, out_f)
        local_paths.append(local_path)
    return local_paths


def extract_zip_to_temp(zip_file: UploadFile, password: Optional[str]) -> List[str]:
    tmpdir = tempfile.mkdtemp(prefix="zip_")
    zip_path = os.path.join(tmpdir, os.path.basename(zip_file.filename or "archive.zip"))

    # Save uploaded ZIP
    with open(zip_path, "wb") as out_f:
        shutil.copyfileobj(zip_file.file, out_f)

    outdir = tempfile.mkdtemp(prefix="zip_files_")

    try:
        with pyzipper.AESZipFile(zip_path, "r") as zf:
            if password:
                zf.setpassword(password.encode("utf-8"))

            for info in zf.infolist():
                if info.is_dir():
                    continue
                name = os.path.basename(info.filename)
                if not name:
                    continue
                out_path = os.path.join(outdir, name)
                os.makedirs(os.path.dirname(out_path), exist_ok=True)
                with zf.open(info) as src, open(out_path, "wb") as dst:
                    shutil.copyfileobj(src, dst)

    except (pyzipper.BadZipFile, RuntimeError, KeyError) as e:
        shutil.rmtree(outdir, ignore_errors=True)
        raise HTTPException(
            status_code=400,
            detail=f"Failed to open ZIP file. Check password / integrity. {e}",
        )

    files = [os.path.join(outdir, f) for f in os.listdir(outdir)]
    return files


# ===================== FastAPI app =====================

app = FastAPI(
    title="Whisper Large V3 – Medical Batch Transcription API",
    description="""
HTTP API for Whisper Large V3 with:

- Multi-file audio upload
- Password-protected ZIP upload
- Medical-biased transcription mode
- Combined transcript
- Optional merged Word (.docx) download

Use `/docs` for Swagger UI and `/ui` for the web interface.
""",
    version="1.0.0",
)


@app.get("/", response_class=PlainTextResponse)
def root():
    return (
        "Whisper Large V3 – Medical Batch Transcription API\n"
        "Open /docs for API documentation or /ui for the web interface.\n"
    )


@app.get("/health", response_class=PlainTextResponse)
def health():
    return "OK"


@app.get("/self-test")
def self_test():
    """
    Basic self-check:
    - can we create/load the pipeline?
    - what device are we using?
    """
    try:
        pipe = get_pipeline()
        model_name = getattr(pipe.model, "name_or_path", MODEL_NAME)
        dev = "cuda" if device == 0 else str(device)
        return JSONResponse(
            {
                "status": "ok",
                "message": "Pipeline loaded successfully.",
                "model": model_name,
                "device": dev,
            }
        )
    except Exception as e:
        return JSONResponse(
            {
                "status": "error",
                "message": f"Pipeline failed to load: {e}",
            },
            status_code=500,
        )


# ---------- 1. Multi-file transcription (JSON) ----------

@app.post("/api/transcribe/files", response_model=TranscriptionResponse)
@spaces.GPU
def transcribe_files(
    files: List[UploadFile] = File(..., description="One or more audio files"),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
):
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded.")

    local_paths = save_uploads_to_temp(files)
    audio_paths = filter_audio_files(local_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=f"No valid audio files found. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
        )

    items: List[FileTranscript] = []
    for path in audio_paths:
        fname = os.path.basename(path)
        text = transcribe_file(path, mode)
        items.append(FileTranscript(filename=fname, text=text))

    combined = format_combined(items)

    return TranscriptionResponse(
        mode=mode,
        combined_transcript=combined,
        items=items,
    )


# ---------- 2. Multi-file transcription (DOCX download) ----------

@app.post("/api/transcribe/files/docx")
@spaces.GPU
def transcribe_files_docx(
    files: List[UploadFile] = File(..., description="One or more audio files"),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
):
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded.")

    local_paths = save_uploads_to_temp(files)
    audio_paths = filter_audio_files(local_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=f"No valid audio files found. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
        )

    items: List[FileTranscript] = []
    for path in audio_paths:
        fname = os.path.basename(path)
        text = transcribe_file(path, mode)
        items.append(FileTranscript(filename=fname, text=text))

    docx_path = build_docx(items, "Multi-file transcription")

    return FileResponse(
        docx_path,
        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        filename="transcripts_files.docx",
    )


# ---------- 3. ZIP transcription (JSON) ----------

@app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
@spaces.GPU
def transcribe_zip(
    file: UploadFile = File(..., description="ZIP file containing audio files"),
    password: str = Form("", description="ZIP password (leave blank if none)"),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
):
    if file is None:
        raise HTTPException(status_code=400, detail="No ZIP uploaded.")

    extracted_paths = extract_zip_to_temp(file, password or None)
    audio_paths = filter_audio_files(extracted_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=f"No valid audio files found inside ZIP. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
        )

    items: List[FileTranscript] = []
    for path in audio_paths:
        fname = os.path.basename(path)
        text = transcribe_file(path, mode)
        items.append(FileTranscript(filename=fname, text=text))

    combined = format_combined(items)

    return TranscriptionResponse(
        mode=mode,
        combined_transcript=combined,
        items=items,
    )


# ---------- 4. ZIP transcription (DOCX download) ----------

@app.post("/api/transcribe/zip/docx")
@spaces.GPU
def transcribe_zip_docx(
    file: UploadFile = File(..., description="ZIP file containing audio files"),
    password: str = Form("", description="ZIP password (leave blank if none)"),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
):
    if file is None:
        raise HTTPException(status_code=400, detail="No ZIP uploaded.")

    extracted_paths = extract_zip_to_temp(file, password or None)
    audio_paths = filter_audio_files(extracted_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=f"No valid audio files found inside ZIP. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}",
        )

    items: List[FileTranscript] = []
    for path in audio_paths:
        fname = os.path.basename(path)
        text = transcribe_file(path, mode)
        items.append(FileTranscript(filename=fname, text=text))

    docx_path = build_docx(items, "ZIP transcription")

    return FileResponse(
        docx_path,
        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        filename="transcripts_zip.docx",
    )


# ===================== Simple HTML UI =====================

HTML_UI = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8" />
  <title>Whisper Large V3 – Medical Batch Transcription</title>
  <style>
    body {
      font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
      margin: 0;
      padding: 0;
      background: #f4f4f6;
      color: #111827;
    }
    header {
      background: #111827;
      color: #f9fafb;
      padding: 16px 24px;
    }
    header h1 {
      margin: 0;
      font-size: 20px;
    }
    header p {
      margin: 4px 0 0;
      font-size: 13px;
      color: #9ca3af;
    }

    /* Progress bar at TOP */
    #progress-wrapper {
      background: #f9fafb;
      border-bottom: 1px solid #e5e7eb;
      padding: 8px 24px 10px;
      font-size: 12px;
      color: #4b5563;
    }
    #progress-track {
      width: 100%;
      max-width: 1100px;
      height: 8px;
      background: #e5e7eb;
      border-radius: 999px;
      overflow: hidden;
      margin-top: 4px;
    }
    #progress-fill {
      height: 100%;
      width: 0%;
      background: #111827;
      border-radius: 999px;
      transition: width 0.2s ease-out;
    }
    #progress-text {
      font-size: 11px;
      color: #6b7280;
      margin-top: 3px;
      min-height: 14px;
      max-width: 1100px;
    }
    #status {
      max-width: 1100px;
      margin: 2px auto 0;
      font-size: 12px;
      color: #6b7280;
      padding: 0 16px 6px;
      min-height: 16px;
    }

    main {
      max-width: 1100px;
      margin: 16px auto 40px;
      padding: 0 16px;
    }
    .card {
      background: #ffffff;
      border-radius: 12px;
      padding: 16px 20px;
      box-shadow: 0 12px 35px rgba(15, 23, 42, 0.08);
      margin-bottom: 20px;
    }
    .card h2 {
      margin-top: 0;
      font-size: 18px;
      display: flex;
      align-items: center;
      gap: 8px;
    }
    .card h3 {
      margin-bottom: 6px;
      margin-top: 16px;
      font-size: 15px;
    }
    label {
      font-size: 13px;
      font-weight: 500;
      display: block;
      margin-bottom: 4px;
    }
    input[type="file"],
    select,
    input[type="password"] {
      width: 100%;
      padding: 8px 10px;
      font-size: 13px;
      border-radius: 8px;
      border: 1px solid #d1d5db;
      box-sizing: border-box;
      margin-bottom: 10px;
      background: #f9fafb;
    }
    textarea {
      width: 100%;
      min-height: 260px;
      padding: 10px;
      box-sizing: border-box;
      border-radius: 10px;
      border: 1px solid #d1d5db;
      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
      font-size: 13px;
      background: #f9fafb;
    }
    .row {
      display: flex;
      flex-wrap: wrap;
      gap: 16px;
    }
    .col {
      flex: 1 1 280px;
    }
    .btn-row {
      display: flex;
      gap: 10px;
      flex-wrap: wrap;
      margin: 6px 0 10px;
    }
    button {
      appearance: none;
      border: none;
      border-radius: 999px;
      padding: 8px 16px;
      font-size: 13px;
      font-weight: 500;
      cursor: pointer;
      display: inline-flex;
      align-items: center;
      gap: 6px;
    }
    .btn-primary {
      background: #111827;
      color: #f9fafb;
    }
    .btn-secondary {
      background: #e5e7eb;
      color: #111827;
    }
    .pill {
      display: inline-flex;
      align-items: center;
      gap: 6px;
      padding: 3px 8px;
      border-radius: 999px;
      font-size: 11px;
      background: #eff6ff;
      color: #1d4ed8;
      margin-left: 8px;
    }
    .small-hint {
      font-size: 11px;
      color: #6b7280;
      margin-top: -4px;
      margin-bottom: 8px;
    }
    code {
      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
      font-size: 12px;
    }
    pre {
      background: #0b1120;
      color: #e5e7eb;
      padding: 10px 12px;
      border-radius: 10px;
      overflow-x: auto;
      font-size: 12px;
      line-height: 1.5;
    }
    a {
      color: #1d4ed8;
      text-decoration: none;
    }
    a:hover {
      text-decoration: underline;
    }

    @media (max-width: 768px) {
      header {
        padding: 12px 16px;
      }
      main {
        margin-top: 12px;
      }
    }
  </style>
</head>
<body>
  <header>
    <h1>Whisper Large V3 – Medical Batch Transcription</h1>
    <p>
      Upload multiple audio files or a password-protected ZIP.
      Mode: <code>general</code> or <code>medical_en</code>.
      API docs at <code>/docs</code>.
    </p>
  </header>

  <!-- Progress & status AT TOP -->
  <div id="progress-wrapper">
    <div>Transcription progress</div>
    <div id="progress-track">
      <div id="progress-fill"></div>
    </div>
    <div id="progress-text">Idle</div>
  </div>
  <div id="status"></div>

  <main>
    <div class="card">
      <h2>1. Multi-file audio upload <span class="pill">JSON & DOCX</span></h2>
      <div class="row">
        <div class="col">
          <h3>Inputs</h3>
          <label for="files_input">Audio files</label>
          <input id="files_input" type="file" multiple accept="audio/*" />
          <div class="small-hint">
            You can select multiple audio files.
          </div>

          <label for="files_mode">Mode</label>
          <select id="files_mode">
            <option value="medical_en">medical_en (English medical bias)</option>
            <option value="general">general</option>
          </select>

          <div class="btn-row">
            <button class="btn-primary" id="btn_files_json">Transcribe → JSON</button>
            <button class="btn-secondary" id="btn_files_docx">Download DOCX</button>
          </div>
        </div>
        <div class="col">
          <h3>Combined transcript</h3>
          <textarea id="files_output" placeholder="Transcript will appear here when you use the JSON button."></textarea>
        </div>
      </div>
    </div>

    <div class="card">
      <h2>2. ZIP upload (with password) <span class="pill">JSON & DOCX</span></h2>
      <div class="row">
        <div class="col">
          <h3>ZIP Inputs</h3>
          <label for="zip_input">ZIP file</label>
          <input id="zip_input" type="file" accept=".zip" />
          <div class="small-hint">ZIP should contain audio files only.</div>

          <label for="zip_password">ZIP password (optional)</label>
          <input id="zip_password" type="password" placeholder="Leave blank if ZIP is not encrypted" />

          <label for="zip_mode">Mode</label>
          <select id="zip_mode">
            <option value="medical_en">medical_en (English medical bias)</option>
            <option value="general">general</option>
          </select>

          <div class="btn-row">
            <button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button>
            <button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX</button>
          </div>
        </div>
        <div class="col">
          <h3>ZIP combined transcript</h3>
          <textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON button."></textarea>
        </div>
      </div>
    </div>

    <!-- 3. Quick examples -->
    <div class="card">
      <h2>3. Quick examples <span class="pill">API & sample audio</span></h2>
      <h3>Sample audio for testing (download & upload above)</h3>
      <p class="small-hint">
        1. Download this small public sample file<br>
        2. Upload it in section 1 and click <strong>Transcribe → JSON</strong>
      </p>
      <p>
        👉 <a href="https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac" target="_blank" rel="noopener">
          Download example audio (mlk.flac)
        </a>
      </p>

      <h3>Example: cURL for multi-file JSON</h3>
      <p class="small-hint">Replace <code>@path/to/audio1.flac</code> with your local file path.</p>
      <pre><code>curl -X POST \\
  "https://staraks-whisper-large-v3.hf.space/api/transcribe/files" \\
  -H "Accept: application/json" \\
  -F "mode=medical_en" \\
  -F "files=@path/to/audio1.flac" \\
  -F "files=@path/to/audio2.wav"</code></pre>

      <h3>Example: cURL for ZIP JSON</h3>
      <p class="small-hint">ZIP file contains multiple audio files. Password field is optional.</p>
      <pre><code>curl -X POST \\
  "https://staraks-whisper-large-v3.hf.space/api/transcribe/zip" \\
  -H "Accept: application/json" \\
  -F "mode=medical_en" \\
  -F "file=@path/to/audios.zip" \\
  -F "password="</code></pre>
    </div>

    <!-- 4. System self-check -->
    <div class="card">
      <h2>4. System self-check <span class="pill">Model & API status</span></h2>
      <p class="small-hint">
        Use this to quickly verify that the API is running and the Whisper pipeline can be loaded.
      </p>
      <button class="btn-primary" id="btn_self_test">Run self-test</button>
      <pre id="self_test_output"><code>Click "Run self-test" to see status...</code></pre>
    </div>
  </main>

  <script>
    let __progressTimer = null;
    let __progressValue = 0;

    function setProgress(value, label) {
      __progressValue = Math.max(0, Math.min(100, value));
      const fill = document.getElementById("progress-fill");
      const text = document.getElementById("progress-text");
      if (fill) {
        fill.style.width = __progressValue + "%";
      }
      if (text) {
        text.innerText = label + " (" + __progressValue.toFixed(0) + "%)";
      }
    }

    function resetProgress() {
      if (__progressTimer) {
        clearInterval(__progressTimer);
        __progressTimer = null;
      }
      setProgress(0, "Idle");
    }

    function startSimulatedProgress(label) {
      if (__progressTimer) {
        clearInterval(__progressTimer);
      }
      let p = 5;
      setProgress(p, label);
      __progressTimer = setInterval(() => {
        if (p < 90) {
          p += Math.random() * 10;
          if (p > 90) p = 90;
          setProgress(p, label);
        }
      }, 600);
    }

    function finishProgress(label) {
      if (__progressTimer) {
        clearInterval(__progressTimer);
        __progressTimer = null;
      }
      setProgress(100, label);
      setTimeout(() => {
        resetProgress();
      }, 2000);
    }

    function errorProgress(message) {
      if (__progressTimer) {
        clearInterval(__progressTimer);
        __progressTimer = null;
      }
      setProgress(0, "Error");
      setStatus(message);
    }

    function apiUrl(path) {
      return new URL(path, window.location.origin).toString();
    }

    async function postForm(path, formData, expectBlob = false) {
      const url = apiUrl(path);
      let res;
      try {
        res = await fetch(url, {
          method: "POST",
          body: formData
        });
      } catch (e) {
        throw new Error("Failed to connect to server: " + e.message);
      }

      if (!res.ok) {
        let msg = "Request failed: " + res.status;
        try {
          const data = await res.json();
          if (data && data.detail) msg += " – " + JSON.stringify(data.detail);
        } catch (e) {
          // ignore JSON parse errors
        }
        throw new Error(msg);
      }
      if (expectBlob) {
        return await res.blob();
      } else {
        return await res.json();
      }
    }

    function setStatus(text) {
      document.getElementById("status").innerText = text || "";
    }

    // ------- Self test -------
    document.getElementById("btn_self_test").addEventListener("click", async () => {
      const out = document.getElementById("self_test_output");
      out.textContent = "Running self-test...";
      setStatus("Running self-test…");
      try {
        const res = await fetch(apiUrl("/self-test"));
        const data = await res.json();
        out.textContent = JSON.stringify(data, null, 2);
        if (data.status === "ok") {
          setStatus("Self-test OK – model and API are working.");
        } else {
          setStatus("Self-test reported an error.");
        }
      } catch (err) {
        console.error(err);
        out.textContent = "Self-test failed: " + err.message;
        setStatus("Self-test failed.");
      }
    });

    // ------- Multi-files JSON -------
    document.getElementById("btn_files_json").addEventListener("click", async () => {
      const filesInput = document.getElementById("files_input");
      const mode = document.getElementById("files_mode").value;
      const out = document.getElementById("files_output");

      if (!filesInput.files.length) {
        alert("Please choose at least one audio file.");
        return;
      }

      const formData = new FormData();
      for (const f of filesInput.files) {
        formData.append("files", f);
      }
      formData.append("mode", mode);

      setStatus("Transcribing multiple files… (this may take some time for large audio)");
      out.value = "";
      startSimulatedProgress("Transcribing files");

      try {
        const data = await postForm("/api/transcribe/files", formData, false);
        out.value = data.combined_transcript || "";
        setStatus("Done.");
        finishProgress("Transcription complete");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress("Error during transcription.");
      }
    });

    // ------- Multi-files DOCX -------
    document.getElementById("btn_files_docx").addEventListener("click", async () => {
      const filesInput = document.getElementById("files_input");
      const mode = document.getElementById("files_mode").value;

      if (!filesInput.files.length) {
        alert("Please choose at least one audio file.");
        return;
      }

      const formData = new FormData();
      for (const f of filesInput.files) {
        formData.append("files", f);
      }
      formData.append("mode", mode);

      setStatus("Generating DOCX for multi-file transcription…");
      startSimulatedProgress("Generating DOCX");

      try {
        const blob = await postForm("/api/transcribe/files/docx", formData, true);
        const url = window.URL.createObjectURL(blob);
        const a = document.createElement("a");
        a.href = url;
        a.download = "transcripts_files.docx";
        document.body.appendChild(a);
        a.click();
        a.remove();
        window.URL.revokeObjectURL(url);
        setStatus("DOCX downloaded.");
        finishProgress("DOCX ready");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress("Error during DOCX generation.");
      }
    });

    // ------- ZIP JSON -------
    document.getElementById("btn_zip_json").addEventListener("click", async () => {
      const zipInput = document.getElementById("zip_input");
      const pwd = document.getElementById("zip_password").value || "";
      const mode = document.getElementById("zip_mode").value;
      const out = document.getElementById("zip_output");

      if (!zipInput.files.length) {
        alert("Please choose a ZIP file.");
        return;
      }

      const formData = new FormData();
      formData.append("file", zipInput.files[0]);
      formData.append("password", pwd);
      formData.append("mode", mode);

      setStatus("Transcribing ZIP contents…");
      out.value = "";
      startSimulatedProgress("Transcribing ZIP");

      try {
        const data = await postForm("/api/transcribe/zip", formData, false);
        out.value = data.combined_transcript || "";
        setStatus("Done.");
        finishProgress("ZIP transcription complete");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress("Error during ZIP transcription.");
      }
    });

    // ------- ZIP DOCX -------
    document.getElementById("btn_zip_docx").addEventListener("click", async () => {
      const zipInput = document.getElementById("zip_input");
      const pwd = document.getElementById("zip_password").value || "";
      const mode = document.getElementById("zip_mode").value;

      if (!zipInput.files.length) {
        alert("Please choose a ZIP file.");
        return;
      }

      const formData = new FormData();
      formData.append("file", zipInput.files[0]);
      formData.append("password", pwd);
      formData.append("mode", mode);

      setStatus("Generating DOCX from ZIP contents…");
      startSimulatedProgress("Generating ZIP DOCX");

      try {
        const blob = await postForm("/api/transcribe/zip/docx", formData, true);
        const url = window.URL.createObjectURL(blob);
        const a = document.createElement("a");
        a.href = url;
        a.download = "transcripts_zip.docx";
        document.body.appendChild(a);
        a.click();
        a.remove();
        window.URL.revokeObjectURL(url);
        setStatus("DOCX downloaded.");
        finishProgress("ZIP DOCX ready");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress("Error during ZIP DOCX generation.");
      }
    });

    // Initial state
    resetProgress();
  </script>
</body>
</html>
"""


@app.get("/ui", response_class=HTMLResponse)
def get_ui():
    return HTML_UI


# ===================== Run (local dev / HF Spaces) =====================

if __name__ == "__main__":
    import uvicorn

    port = int(os.getenv("PORT", "7860"))
    uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)