import os
import json
import shutil
import tempfile
import re
from typing import List, Literal, Optional

from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import (
    FileResponse,
    JSONResponse,
    PlainTextResponse,
    HTMLResponse,
)
from pydantic import BaseModel
from faster_whisper import WhisperModel
import pyzipper
import soundfile as sf  # noqa: F401
from docx import Document

# ===================== CONFIG =====================

MODEL_SIZE = os.getenv("WHISPER_MODEL_SIZE", "large-v3")
DEVICE = os.getenv("WHISPER_DEVICE", "cpu")  # "cpu" or "cuda"
COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")

# Keep .dct as allowed, but handle decoder failure nicely
AUDIO_EXTENSIONS = (
    ".wav",
    ".mp3",
    ".m4a",
    ".flac",
    ".ogg",
    ".opus",
    ".webm",
    ".dct",
)

# Default ZIP password if user leaves password blank
DEFAULT_ZIP_PASSWORD = os.getenv("DEFAULT_ZIP_PASSWORD", "dietcoke1")

MEMORY_PATH = "transcribe_memory.json"
MEDICAL_TERMS_PATH = "medical_terms.json"

fw_model: Optional[WhisperModel] = None
memory_cache: Optional[dict] = None
medical_terms_cache: Optional[dict] = None

WORD_RE = re.compile(r"[A-Za-z][A-Za-z\-]{2,}")

STOPWORDS = {
    "the", "and", "for", "that", "with", "this", "have", "from",
    "into", "about", "will", "there", "their", "which", "your",
    "been", "were", "they", "them", "then", "than", "also", "some",
    "very", "over", "under", "after", "before", "because", "would",
    "could", "should", "when", "where", "what", "while", "here",
    "such", "much", "more", "most", "many", "each", "every",
    "other", "another", "those", "these", "ours", "yours",
    "doctor", "patient", "patients", "report", "note",
}


# ===================== MEMORY HELPERS =====================

def ensure_memory_file():
    if not os.path.exists(MEMORY_PATH):
        data = {"replacements": []}
        with open(MEMORY_PATH, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)


def load_memory() -> dict:
    global memory_cache
    if memory_cache is not None:
        return memory_cache
    ensure_memory_file()
    try:
        with open(MEMORY_PATH, "r", encoding="utf-8") as f:
            memory_cache = json.load(f)
    except Exception:
        memory_cache = {"replacements": []}
    return memory_cache


def save_memory(data: dict):
    global memory_cache
    memory_cache = data
    with open(MEMORY_PATH, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)


def apply_memory_to_text(text: str) -> str:
    mem = load_memory()
    replacements = mem.get("replacements", [])
    out = text
    for rule in replacements:
        src = rule.get("source") or ""
        dst = rule.get("target") or ""
        if src:
            out = out.replace(src, dst)
    return out


# ===================== MEDICAL TERMS HELPERS =====================

def ensure_med_terms_file():
    if not os.path.exists(MEDICAL_TERMS_PATH):
        data = {"terms": {}}
        with open(MEDICAL_TERMS_PATH, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)


def load_med_terms() -> dict:
    global medical_terms_cache
    if medical_terms_cache is not None:
        return medical_terms_cache
    ensure_med_terms_file()
    try:
        with open(MEDICAL_TERMS_PATH, "r", encoding="utf-8") as f:
            medical_terms_cache = json.load(f)
    except Exception:
        medical_terms_cache = {"terms": {}}
    return medical_terms_cache


def save_med_terms(data: dict):
    global medical_terms_cache
    medical_terms_cache = data
    with open(MEDICAL_TERMS_PATH, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)


def extract_candidate_terms(text: str) -> List[str]:
    words = WORD_RE.findall(text)
    terms = set()
    for w in words:
        lw = w.lower()
        if len(lw) < 4:
            continue
        if any(ch.isdigit() for ch in lw):
            continue
        if lw in STOPWORDS:
            continue
        # crude heuristic: longer, non-stopword words are usually domain terms / names
        terms.add(lw)
    return list(terms)


def update_med_terms_from_text(text: str) -> List[str]:
    """Update medical_terms.json from given text; return list of NEW terms added."""
    data = load_med_terms()
    existing = data.get("terms", {})
    terms = extract_candidate_terms(text)

    new_terms: List[str] = []
    for t in terms:
        if t in existing:
            existing[t] += 1
        else:
            existing[t] = 1
            new_terms.append(t)

    data["terms"] = existing
    save_med_terms(data)
    return new_terms


# ===================== MODEL HELPERS =====================

def get_model() -> WhisperModel:
    global fw_model
    if fw_model is not None:
        return fw_model

    fw_model = WhisperModel(
        MODEL_SIZE,
        device=DEVICE,
        compute_type=COMPUTE_TYPE,
    )
    return fw_model


def build_transcription_params(mode: str):
    """
    Fast, CPU-friendly settings:
    - greedy decoding (beam_size=1, best_of=1)
    """
    params = {
        "task": "transcribe",
        "beam_size": 1,   # was 5 → faster
        "best_of": 1,     # was 5 → faster
        "temperature": 0.0,
    }

    if mode == "medical_en":
        params["language"] = "en"
        params["initial_prompt"] = (
            "This is an English medical dictation. Use accurate medical terminology, "
            "including anatomy, diseases, lab values, imaging, and medications. "
            "Write in a formal clinical style."
        )
    else:
        # leave language autodetect for general mode
        params["language"] = None

    return params


def transcribe_file(path: str, mode: str) -> str:
    """
    Run faster-whisper on a single file, return plain text with memory applied.
    If the audio decoder fails (e.g. proprietary .dct), we raise a clear error.
    """
    model = get_model()
    params = build_transcription_params(mode)

    try:
        segments, info = model.transcribe(
            path,
            task=params["task"],
            beam_size=params["beam_size"],
            best_of=params["best_of"],
            temperature=params["temperature"],
            language=params["language"],
            initial_prompt=params.get("initial_prompt"),
        )
    except Exception as e:
        msg = str(e)
        fname = os.path.basename(path)

        # ffmpeg / decoder-type failures
        decoder_signatures = [
            "Invalid data found when processing input",
            "error opening",
            "Decoder",
            "demuxing failed",
            "Could not seek to",
        ]
        if any(sig in msg for sig in decoder_signatures):
            raise RuntimeError(
                f"Audio decoder could not read file '{fname}'. "
                f"This often happens with proprietary .dct formats. "
                f"Please export/convert this dictation file to WAV or MP3 "
                f"using your dictation software, then upload the converted file."
            ) from e

        raise RuntimeError(
            f"Transcription failed for {fname}: {msg}"
        ) from e

    raw_text_parts: List[str] = []
    for seg in segments:
        raw_text_parts.append(seg.text)
    raw_text = "".join(raw_text_parts).strip()

    final_text = apply_memory_to_text(raw_text)
    return final_text


# ===================== Pydantic models =====================

class FileTranscript(BaseModel):
    filename: str
    text: str


class TranscriptionResponse(BaseModel):
    mode: Literal["general", "medical_en"]
    combined_transcript: str
    items: List[FileTranscript]
    file_count: int
    audio_files: List[str]
    new_medical_terms: List[str] = []


class MemoryRule(BaseModel):
    source: str
    target: str


class MemoryResponse(BaseModel):
    replacements: List[MemoryRule]


class MedicalTermsResponse(BaseModel):
    terms: dict


# ===================== OTHER HELPERS =====================

def filter_audio_files(paths: List[str]) -> List[str]:
    out: List[str] = []
    for p in paths:
        _, ext = os.path.splitext(p)
        if ext.lower() in AUDIO_EXTENSIONS:
            out.append(p)
    return out


def format_combined(results: List[FileTranscript]) -> str:
    parts: List[str] = []
    for idx, item in enumerate(results, start=1):
        parts.append(f"### File {idx}: {item.filename}")
        parts.append("")
        parts.append(item.text if item.text else "[No transcript]")
        parts.append("")
    return "\n".join(parts).strip()


def build_docx(results: List[FileTranscript], title: str) -> str:
    doc = Document()
    doc.add_heading(title, level=1)

    for idx, item in enumerate(results, start=1):
        doc.add_heading(f"File {idx}: {item.filename}", level=2)
        doc.add_paragraph(item.text if item.text else "[No transcript]")
        doc.add_paragraph()

    tmpdir = tempfile.mkdtemp(prefix="docx_")
    out_path = os.path.join(tmpdir, "transcripts.docx")
    doc.save(out_path)
    return out_path


def save_uploads_to_temp(files: List[UploadFile]) -> List[str]:
    tmpdir = tempfile.mkdtemp(prefix="uploads_")
    local_paths: List[str] = []
    for uf in files:
        filename = os.path.basename(uf.filename or "audio")
        local_path = os.path.join(tmpdir, filename)
        with open(local_path, "wb") as out_f:
            shutil.copyfileobj(uf.file, out_f)
        local_paths.append(local_path)
    return local_paths


def extract_zip_to_temp(zip_file: UploadFile, password: Optional[str]) -> List[str]:
    tmpdir = tempfile.mkdtemp(prefix="zip_")
    zip_path = os.path.join(tmpdir, os.path.basename(zip_file.filename or "archive.zip"))

    with open(zip_path, "wb") as out_f:
        shutil.copyfileobj(zip_file.file, out_f)

    outdir = tempfile.mkdtemp(prefix="zip_files_")

    try:
        with pyzipper.AESZipFile(zip_path, "r") as zf:
            if password:
                zf.setpassword(password.encode("utf-8"))

            for info in zf.infolist():
                if info.is_dir():
                    continue
                name = os.path.basename(info.filename)
                if not name:
                    continue
                out_path = os.path.join(outdir, name)
                os.makedirs(os.path.dirname(out_path), exist_ok=True)
                with zf.open(info) as src, open(out_path, "wb") as dst:
                    shutil.copyfileobj(src, dst)

    except (pyzipper.BadZipFile, RuntimeError, KeyError) as e:
        shutil.rmtree(outdir, ignore_errors=True)
        raise HTTPException(
            status_code=400,
            detail=f"Failed to open ZIP file. Check password / integrity. {e}",
        )

    files = [os.path.join(outdir, f) for f in os.listdir(outdir)]
    return files


# ===================== FastAPI app =====================

app = FastAPI(
    title="Whisper Large V3 – Medical Batch Transcription API (faster-whisper CPU)",
    description="""
HTTP API for Whisper (via faster-whisper) with:

- Multi-file audio upload (including .dct where supported by ffmpeg)
- Password-protected ZIP upload (default password: dietcoke1)
- Option to ONLY extract ZIP and list audio names (no transcription)
- NEW: ZIP → choose selected files to transcribe
- Medical-biased transcription mode
- Persistent word/phrase memory (replacements)
- Extraction & saving of frequent 'medical terms' from transcripts
- Combined transcript + DOCX export
- Fast greedy decoding for CPU (beam_size=1, best_of=1)

If a .dct file uses a proprietary codec that ffmpeg cannot decode,
you will get a clear error suggesting to convert to WAV/MP3 first.

Use `/docs` for Swagger UI and `/ui` for the web interface.
""",
    version="2.5.0",
)


@app.get("/", response_class=PlainTextResponse)
def root():
    return (
        "Whisper Large V3 – Medical Batch Transcription API (faster-whisper)\n"
        "Open /docs for API documentation or /ui for the web interface.\n"
    )


@app.get("/health", response_class=PlainTextResponse)
def health():
    return "OK"


@app.get("/self-test")
def self_test():
    """
    Basic self-check:
    - can we create/load the faster-whisper model?
    - device & compute type
    - number of memory rules
    - number of collected medical terms
    """
    try:
        model = get_model()
        _ = model
        mem = load_memory()
        num_rules = len(mem.get("replacements", []))

        med = load_med_terms()
        med_count = len(med.get("terms", {}))

        return JSONResponse(
            {
                "status": "ok",
                "message": "Model loaded successfully.",
                "model_size": MODEL_SIZE,
                "device": DEVICE,
                "compute_type": COMPUTE_TYPE,
                "memory_rules": num_rules,
                "medical_terms_count": med_count,
                "zip_default_password": DEFAULT_ZIP_PASSWORD,
                "decoding": "fast (beam_size=1, best_of=1)",
            }
        )
    except Exception as e:
        return JSONResponse(
            {
                "status": "error",
                "message": f"Model or memory failed to load: {e}",
            },
            status_code=500,
        )


# ---------- 1. Multi-file transcription (JSON) ----------

@app.post("/api/transcribe/files", response_model=TranscriptionResponse)
def transcribe_files(
    files: List[UploadFile] = File(..., description="One or more audio files"),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
    extract_terms: bool = Form(False),
):
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded.")

    local_paths = save_uploads_to_temp(files)
    audio_paths = filter_audio_files(local_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=(
                f"No valid audio files found. "
                f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
            ),
        )

    items: List[FileTranscript] = []
    try:
        for path in audio_paths:
            fname = os.path.basename(path)
            text = transcribe_file(path, mode)
            items.append(FileTranscript(filename=fname, text=text))
    except RuntimeError as e:
        msg = str(e)
        # If decoder can't read (common for proprietary .dct), treat as 400
        if "Audio decoder could not read file" in msg:
            raise HTTPException(status_code=400, detail=msg) from e
        raise HTTPException(
            status_code=500,
            detail=f"Transcription failed: {msg}",
        ) from e

    combined = format_combined(items)
    filenames = [it.filename for it in items]

    new_terms: List[str] = []
    if extract_terms and combined:
        new_terms = update_med_terms_from_text(combined)

    return TranscriptionResponse(
        mode=mode,
        combined_transcript=combined,
        items=items,
        file_count=len(items),
        audio_files=filenames,
        new_medical_terms=new_terms,
    )


# ---------- 2. Multi-file transcription (DOCX) ----------

@app.post("/api/transcribe/files/docx")
def transcribe_files_docx(
    files: List[UploadFile] = File(..., description="One or more audio files"),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
    extract_terms: bool = Form(False),
):
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded.")

    local_paths = save_uploads_to_temp(files)
    audio_paths = filter_audio_files(local_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=(
                f"No valid audio files found. "
                f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
            ),
        )

    items: List[FileTranscript] = []
    combined_text: List[str] = []
    try:
        for path in audio_paths:
            fname = os.path.basename(path)
            text = transcribe_file(path, mode)
            items.append(FileTranscript(filename=fname, text=text))
            combined_text.append(text)
    except RuntimeError as e:
        msg = str(e)
        if "Audio decoder could not read file" in msg:
            raise HTTPException(status_code=400, detail=msg) from e
        raise HTTPException(
            status_code=500,
            detail=f"Transcription failed while building DOCX: {msg}",
        ) from e

    if extract_terms and combined_text:
        update_med_terms_from_text("\n".join(combined_text))

    docx_path = build_docx(items, "Multi-file transcription")

    return FileResponse(
        docx_path,
        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        filename="transcripts_files.docx",
    )


# ---------- 3. ZIP EXTRACT ONLY (no transcription) ----------

@app.post("/api/zip/extract")
def zip_extract_only(
    file: UploadFile = File(..., description="ZIP file containing audio files"),
    password: str = Form(
        "",
        description="ZIP password. Leave blank to use default 'dietcoke1'.",
    ),
):
    if file is None:
        raise HTTPException(status_code=400, detail="No ZIP uploaded.")

    effective_password = password if password else DEFAULT_ZIP_PASSWORD

    extracted_paths = extract_zip_to_temp(file, effective_password)
    audio_paths = filter_audio_files(extracted_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=(
                "No valid audio files in ZIP. "
                f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
            ),
        )

    filenames = [os.path.basename(p) for p in audio_paths]
    return JSONResponse(
        {
            "status": "ok",
            "count": len(filenames),
            "audio_files": filenames,
        }
    )


# ---------- 4. ZIP transcription (JSON) – ALL FILES ----------

@app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
def transcribe_zip(
    file: UploadFile = File(..., description="ZIP file containing audio files"),
    password: str = Form(
        "",
        description="ZIP password. Leave blank to use default 'dietcoke1'.",
    ),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
    extract_terms: bool = Form(False),
):
    if file is None:
        raise HTTPException(status_code=400, detail="No ZIP uploaded.")

    effective_password = password if password else DEFAULT_ZIP_PASSWORD

    extracted_paths = extract_zip_to_temp(file, effective_password)
    audio_paths = filter_audio_files(extracted_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=(
                "No valid audio files in ZIP. "
                f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
            ),
        )

    items: List[FileTranscript] = []
    try:
        for path in audio_paths:
            fname = os.path.basename(path)
            text = transcribe_file(path, mode)
            items.append(FileTranscript(filename=fname, text=text))
    except RuntimeError as e:
        msg = str(e)
        if "Audio decoder could not read file" in msg:
            raise HTTPException(status_code=400, detail=msg) from e
        raise HTTPException(
            status_code=500,
            detail=f"Transcription failed (ZIP): {msg}",
        ) from e

    combined = format_combined(items)
    filenames = [it.filename for it in items]

    new_terms: List[str] = []
    if extract_terms and combined:
        new_terms = update_med_terms_from_text(combined)

    return TranscriptionResponse(
        mode=mode,
        combined_transcript=combined,
        items=items,
        file_count=len(items),
        audio_files=filenames,
        new_medical_terms=new_terms,
    )


# ---------- 5. ZIP transcription (JSON) – SELECTED FILES ONLY ----------

@app.post("/api/transcribe/zip/selected", response_model=TranscriptionResponse)
def transcribe_zip_selected(
    file: UploadFile = File(..., description="ZIP file containing audio files"),
    password: str = Form(
        "",
        description="ZIP password. Leave blank to use default 'dietcoke1'.",
    ),
    selected_files: str = Form(
        "",
        description="Comma-separated file names (inside ZIP) to transcribe",
    ),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
    extract_terms: bool = Form(False),
):
    """
    Extract ZIP, then ONLY transcribe the subset of files whose basenames are
    passed in 'selected_files' (comma-separated).
    """
    if file is None:
        raise HTTPException(status_code=400, detail="No ZIP uploaded.")

    effective_password = password if password else DEFAULT_ZIP_PASSWORD

    selected_set = {
        name.strip()
        for name in (selected_files or "").split(",")
        if name.strip()
    }
    if not selected_set:
        raise HTTPException(
            status_code=400,
            detail="No selected_files provided. Please choose at least one file from the ZIP.",
        )

    extracted_paths = extract_zip_to_temp(file, effective_password)
    audio_paths = filter_audio_files(extracted_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=(
                "No valid audio files in ZIP. "
                f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
            ),
        )

    # Map names -> paths for quick lookup
    name_to_path = {}
    for p in audio_paths:
        base = os.path.basename(p)
        if base in selected_set:
            name_to_path[base] = p

    if not name_to_path:
        raise HTTPException(
            status_code=400,
            detail="None of the selected_files were found as audio inside the ZIP.",
        )

    items: List[FileTranscript] = []
    try:
        # keep order in which user selected (or alphabetical; here we just iterate on set intersection)
        for fname in sorted(name_to_path.keys()):
            path = name_to_path[fname]
            text = transcribe_file(path, mode)
            items.append(FileTranscript(filename=fname, text=text))
    except RuntimeError as e:
        msg = str(e)
        if "Audio decoder could not read file" in msg:
            raise HTTPException(status_code=400, detail=msg) from e
        raise HTTPException(
            status_code=500,
            detail=f"Transcription failed (ZIP selected): {msg}",
        ) from e

    combined = format_combined(items)
    filenames = [it.filename for it in items]

    new_terms: List[str] = []
    if extract_terms and combined:
        new_terms = update_med_terms_from_text(combined)

    return TranscriptionResponse(
        mode=mode,
        combined_transcript=combined,
        items=items,
        file_count=len(items),
        audio_files=filenames,
        new_medical_terms=new_terms,
    )


# ---------- 6. ZIP transcription (DOCX) – ALL FILES ----------

@app.post("/api/transcribe/zip/docx")
def transcribe_zip_docx(
    file: UploadFile = File(..., description="ZIP file containing audio files"),
    password: str = Form(
        "",
        description="ZIP password. Leave blank to use default 'dietcoke1'.",
    ),
    mode: Literal["general", "medical_en"] = Form("medical_en"),
    extract_terms: bool = Form(False),
):
    if file is None:
        raise HTTPException(status_code=400, detail="No ZIP uploaded.")

    effective_password = password if password else DEFAULT_ZIP_PASSWORD

    extracted_paths = extract_zip_to_temp(file, effective_password)
    audio_paths = filter_audio_files(extracted_paths)

    if not audio_paths:
        raise HTTPException(
            status_code=400,
            detail=(
                "No valid audio files in ZIP. "
                f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
            ),
        )

    items: List[FileTranscript] = []
    combined_text: List[str] = []
    try:
        for path in audio_paths:
            fname = os.path.basename(path)
            text = transcribe_file(path, mode)
            items.append(FileTranscript(filename=fname, text=text))
            combined_text.append(text)
    except RuntimeError as e:
        msg = str(e)
        if "Audio decoder could not read file" in msg:
            raise HTTPException(status_code=400, detail=msg) from e
        raise HTTPException(
            status_code=500,
            detail=f"Transcription failed while building ZIP DOCX: {msg}",
        ) from e

    if extract_terms and combined_text:
        update_med_terms_from_text("\n".join(combined_text))

    docx_path = build_docx(items, "ZIP transcription")

    return FileResponse(
        docx_path,
        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        filename="transcripts_zip.docx",
    )


# ===================== MEMORY ENDPOINTS =====================

@app.get("/api/memory", response_model=MemoryResponse)
def get_memory():
    mem = load_memory()
    replacements = [
        MemoryRule(source=r.get("source", ""), target=r.get("target", ""))
        for r in mem.get("replacements", [])
    ]
    return MemoryResponse(replacements=replacements)


@app.post("/api/memory/add", response_model=MemoryResponse)
def add_memory(rule: MemoryRule):
    mem = load_memory()
    repl = mem.get("replacements", [])
    repl = [r for r in repl if r.get("source") != rule.source]
    repl.append({"source": rule.source, "target": rule.target})
    mem["replacements"] = repl
    save_memory(mem)
    replacements = [
        MemoryRule(source=r.get("source", ""), target=r.get("target", ""))
        for r in mem.get("replacements", [])
    ]
    return MemoryResponse(replacements=replacements)


@app.post("/api/memory/reset", response_model=MemoryResponse)
def reset_memory():
    mem = {"replacements": []}
    save_memory(mem)
    return MemoryResponse(replacements=[])


# ===================== MEDICAL TERMS ENDPOINTS =====================

@app.get("/api/medical-terms", response_model=MedicalTermsResponse)
def get_med_terms():
    data = load_med_terms()
    return MedicalTermsResponse(terms=data.get("terms", {}))


@app.post("/api/medical-terms/reset", response_model=MedicalTermsResponse)
def reset_med_terms():
    data = {"terms": {}}
    save_med_terms(data)
    return MedicalTermsResponse(terms={})


# ===================== Simple HTML UI (multi-tab) =====================

HTML_UI = r"""<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8" />
  <title>Whisper – Medical Batch Transcription (faster-whisper)</title>
  <style>
    body {
      font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
      margin: 0;
      padding: 0;
      background: #f4f4f6;
      color: #111827;
    }
    header {
      background: #111827;
      color: #f9fafb;
      padding: 16px 24px;
    }
    header h1 {
      margin: 0;
      font-size: 20px;
    }
    header p {
      margin: 4px 0 0;
      font-size: 13px;
      color: #9ca3af;
    }

    #progress-wrapper {
      background: #f9fafb;
      border-bottom: 1px solid #e5e7eb;
      padding: 8px 24px 10px;
      font-size: 12px;
      color: #4b5563;
    }
    #progress-track {
      width: 100%;
      max-width: 1100px;
      height: 8px;
      background: #e5e7eb;
      border-radius: 999px;
      overflow: hidden;
      margin-top: 4px;
    }
    #progress-fill {
      height: 100%;
      width: 0%;
      background: #111827;
      border-radius: 999px;
      transition: width 0.2s ease-out;
    }
    #progress-text {
      font-size: 11px;
      color: #6b7280;
      margin-top: 3px;
      min-height: 14px;
      max-width: 1100px;
    }
    #status {
      max-width: 1100px;
      margin: 2px auto 0;
      font-size: 12px;
      color: #6b7280;
      padding: 0 16px 6px;
      min-height: 16px;
    }

    main {
      max-width: 1100px;
      margin: 16px auto 40px;
      padding: 0 16px;
    }

    .tabs {
      display: flex;
      gap: 8px;
      border-bottom: 1px solid #e5e7eb;
      margin-bottom: 16px;
    }
    .tab-button {
      padding: 8px 14px;
      border-radius: 999px 999px 0 0;
      border: none;
      background: transparent;
      font-size: 13px;
      cursor: pointer;
      color: #6b7280;
    }
    .tab-button.active {
      background: #ffffff;
      color: #111827;
      box-shadow: 0 -1px 0 #ffffff, 0 2px 4px rgba(15, 23, 42, 0.15);
    }
    .tab-content {
      display: none;
    }
    .tab-content.active {
      display: block;
    }

    .card {
      background: #ffffff;
      border-radius: 12px;
      padding: 16px 20px;
      box-shadow: 0 12px 35px rgba(15, 23, 42, 0.08);
      margin-bottom: 20px;
    }
    .card h2 {
      margin-top: 0;
      font-size: 18px;
      display: flex;
      align-items: center;
      gap: 8px;
    }
    .card h3 {
      margin-bottom: 6px;
      margin-top: 16px;
      font-size: 15px;
    }
    label {
      font-size: 13px;
      font-weight: 500;
      display: block;
      margin-bottom: 4px;
    }
    input[type="file"],
    select,
    input[type="password"],
    input[type="text"] {
      width: 100%;
      padding: 8px 10px;
      font-size: 13px;
      border-radius: 8px;
      border: 1px solid #d1d5db;
      box-sizing: border-box;
      margin-bottom: 10px;
      background: #f9fafb;
    }
    textarea {
      width: 100%;
      min-height: 260px;
      padding: 10px;
      box-sizing: border-box;
      border-radius: 10px;
      border: 1px solid #d1d5db;
      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
      font-size: 13px;
      background: #f9fafb;
    }
    .row {
      display: flex;
      flex-wrap: wrap;
      gap: 16px;
    }
    .col {
      flex: 1 1 280px;
    }
    .btn-row {
      display: flex;
      gap: 10px;
      flex-wrap: wrap;
      margin: 6px 0 10px;
    }
    button {
      appearance: none;
      border: none;
      border-radius: 999px;
      padding: 8px 16px;
      font-size: 13px;
      font-weight: 500;
      cursor: pointer;
      display: inline-flex;
      align-items: center;
      gap: 6px;
    }
    .btn-primary {
      background: #111827;
      color: #f9fafb;
    }
    .btn-secondary {
      background: #e5e7eb;
      color: #111827;
    }
    .pill {
      display: inline-flex;
      align-items: center;
      gap: 6px;
      padding: 3px 8px;
      border-radius: 999px;
      font-size: 11px;
      background: #eff6ff;
      color: #1d4ed8;
      margin-left: 8px;
    }
    .small-hint {
      font-size: 11px;
      color: #6b7280;
      margin-top: -4px;
      margin-bottom: 8px;
    }
    code {
      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
      font-size: 12px;
    }
    pre {
      background: #0b1120;
      color: #e5e7eb;
      padding: 10px 12px;
      border-radius: 10px;
      overflow-x: auto;
      font-size: 12px;
      line-height: 1.5;
    }
    a {
      color: #1d4ed8;
      text-decoration: none;
    }
    a:hover {
      text-decoration: underline;
    }

    @media (max-width: 768px) {
      header {
        padding: 12px 16px;
      }
      main {
        margin-top: 12px;
      }
    }
  </style>
</head>
<body>
  <header>
    <h1>Whisper – Medical Batch Transcription (faster-whisper CPU)</h1>
    <p>
      Multi-file & ZIP transcription with medical mode, .dct support (where decodable), ZIP extract-only mode,
      selectable ZIP files for transcription, and memory of preferred terms + collected medical vocabulary.
      Default ZIP password: <code>dietcoke1</code>. API docs: <code>/docs</code>.
    </p>
  </header>

  <div id="progress-wrapper">
    <div>Transcription progress</div>
    <div id="progress-track">
      <div id="progress-fill"></div>
    </div>
    <div id="progress-text">Idle</div>
  </div>
  <div id="status"></div>

  <main>
    <div class="tabs">
      <button class="tab-button active" data-tab="tab-audio">Audio files</button>
      <button class="tab-button" data-tab="tab-zip">ZIP files</button>
      <button class="tab-button" data-tab="tab-tools">Tools (Self-test & Memory)</button>
    </div>

    <!-- AUDIO TAB -->
    <div id="tab-audio" class="tab-content active">
      <div class="card">
        <h2>Audio files <span class="pill">JSON & DOCX</span></h2>
        <div class="row">
          <div class="col">
            <h3>Inputs</h3>
            <label for="files_input">Audio files (.wav, .mp3, .flac, .dct, ...)</label>
            <input id="files_input" type="file" multiple accept="audio/*,.dct" />
            <div class="small-hint">
              You can select multiple audio files. <br />
              .dct dictation files are accepted when ffmpeg can decode them. If not, you will see a clear error asking you to convert.
            </div>

            <label for="files_mode">Mode</label>
            <select id="files_mode">
              <option value="medical_en">medical_en (English medical bias)</option>
              <option value="general">general</option>
            </select>

            <label>
              <input type="checkbox" id="files_extract_terms" />
              Extract & save medical terms from this batch
            </label>
            <div class="small-hint">
              Finds frequent longer words (likely medical terms) and adds them into a vocabulary list for future reference.
            </div>

            <div class="btn-row">
              <button class="btn-primary" id="btn_files_json">Transcribe → JSON</button>
              <button class="btn-secondary" id="btn_files_docx">Download DOCX</button>
            </div>
          </div>
          <div class="col">
            <h3>Combined transcript</h3>
            <textarea id="files_output" placeholder="Transcript will appear here when you use the JSON button."></textarea>
          </div>
        </div>
      </div>

      <div class="card">
        <h2>Quick example audio</h2>
        <p class="small-hint">
          1. Download this public sample file<br>
          2. Upload it above and click <strong>Transcribe → JSON</strong>
        </p>
        <p>
          👉 <a href="https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac" target="_blank" rel="noopener">
            Download example audio (mlk.flac)
          </a>
        </p>
      </div>
    </div>

    <!-- ZIP TAB -->
    <div id="tab-zip" class="tab-content">
      <div class="card">
        <h2>ZIP upload <span class="pill">Extract & Transcribe</span></h2>
        <div class="row">
          <div class="col">
            <h3>ZIP Inputs</h3>
            <label for="zip_input">ZIP file (contains audio / .dct)</label>
            <input id="zip_input" type="file" accept=".zip" />
            <div class="small-hint">ZIP should contain audio or .dct files.</div>

            <label for="zip_password">ZIP password</label>
            <input id="zip_password" type="password" placeholder="Leave blank to use default password: dietcoke1" />
            <div class="small-hint">
              If you don't type anything here, the server will try password <code>dietcoke1</code>.
            </div>

            <label for="zip_mode">Mode</label>
            <select id="zip_mode">
              <option value="medical_en">medical_en (English medical bias)</option>
              <option value="general">general</option>
            </select>

            <label>
              <input type="checkbox" id="zip_extract_terms" />
              Extract & save medical terms from this batch
            </label>
            <div class="small-hint">
              Adds frequent longer words from all transcripts in the ZIP into the shared medical vocabulary list.
            </div>

            <div class="btn-row">
              <button class="btn-secondary" id="btn_zip_extract_only">Extract only & list audio files</button>
              <button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON (all files)</button>
              <button class="btn-secondary" id="btn_zip_selected">Transcribe selected from ZIP → JSON</button>
              <button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX (all files)</button>
            </div>

            <h3>Files inside ZIP (select to transcribe)</h3>
            <div id="zip_file_list" class="small-hint">
              Run "Extract only & list audio files" to see files and choose which ones to transcribe.
            </div>
          </div>
          <div class="col">
            <h3>ZIP combined transcript</h3>
            <textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON buttons."></textarea>
          </div>
        </div>
      </div>
    </div>

    <!-- TOOLS TAB -->
    <div id="tab-tools" class="tab-content">
      <div class="card">
        <h2>Self-check <span class="pill">Model & memory status</span></h2>
        <p class="small-hint">
          Use this to verify that the model is loaded and memory rules are available.
        </p>
        <button class="btn-primary" id="btn_self_test">Run self-test</button>
        <pre id="self_test_output"><code>Click "Run self-test" to see status...</code></pre>
      </div>

      <div class="card">
        <h2>Memory – preferred words & corrections</h2>
        <p class="small-hint">
          Add replacements such as <code>diabetis → diabetes mellitus</code>.<br>
          These are applied automatically to every new transcription.
        </p>

        <div class="row">
          <div class="col">
            <label for="mem_source">From (original text)</label>
            <input id="mem_source" type="text" placeholder="e.g. diabetis" />

            <label for="mem_target">To (preferred text)</label>
            <input id="mem_target" type="text" placeholder="e.g. diabetes mellitus" />

            <div class="btn-row">
              <button class="btn-primary" id="btn_mem_add">Add / Update rule</button>
              <button class="btn-secondary" id="btn_mem_reset">Reset all</button>
              <button class="btn-secondary" id="btn_mem_refresh">Refresh list</button>
            </div>
          </div>
          <div class="col">
            <h3>Current memory rules</h3>
            <pre id="mem_output"><code>Loading memory...</code></pre>
          </div>
        </div>
      </div>

      <div class="card">
        <h2>Collected medical terms</h2>
        <p class="small-hint">
          When you enable "Extract & save medical terms", the app collects frequent longer words here.
          Use this vocabulary for future fine-tuning or dictionary building.
        </p>
        <div class="btn-row">
          <button class="btn-primary" id="btn_terms_refresh">Refresh terms</button>
          <button class="btn-secondary" id="btn_terms_reset">Reset terms</button>
        </div>
        <pre id="terms_output"><code>Click "Refresh terms" to see collected vocabulary...</code></pre>
      </div>

      <div class="card">
        <h2>API example</h2>
        <h3>Multi-file JSON</h3>
        <pre><code>curl -X POST \
  "https://staraks-whisper-large-v3.hf.space/api/transcribe/files" \
  -H "Accept: application/json" \
  -F "mode=medical_en" \
  -F "extract_terms=true" \
  -F "files=@path/to/audio1.flac"</code></pre>

        <h3>ZIP selected files JSON</h3>
        <pre><code>curl -X POST \
  "https://staraks-whisper-large-v3.hf.space/api/transcribe/zip/selected" \
  -H "Accept: application/json" \
  -F "mode=medical_en" \
  -F "extract_terms=true" \
  -F "selected_files=file1.wav,file3.dct" \
  -F "file=@path/to/archive.zip"</code></pre>
      </div>
    </div>
  </main>

  <script>
    document.querySelectorAll(".tab-button").forEach(btn => {
      btn.addEventListener("click", () => {
        const tabId = btn.getAttribute("data-tab");
        document.querySelectorAll(".tab-button").forEach(b => b.classList.remove("active"));
        document.querySelectorAll(".tab-content").forEach(c => c.classList.remove("active"));
        btn.classList.add("active");
        document.getElementById(tabId).classList.add("active");
        setStatus("");
      });
    });

    let __progressTimer = null;
    let __progressValue = 0;

    function setProgress(value, label) {
      __progressValue = Math.max(0, Math.min(100, value));
      const fill = document.getElementById("progress-fill");
      const text = document.getElementById("progress-text");
      if (fill) {
        fill.style.width = __progressValue + "%";
      }
      if (text) {
        text.innerText = label + " (" + __progressValue.toFixed(0) + "%)";
      }
    }

    function resetProgress() {
      if (__progressTimer) {
        clearInterval(__progressTimer);
        __progressTimer = null;
      }
      setProgress(0, "Idle");
    }

    function startSimulatedProgress(label) {
      if (__progressTimer) {
        clearInterval(__progressTimer);
      }
      let p = 5;
      setProgress(p, label);
      __progressTimer = setInterval(() => {
        if (p < 90) {
          p += Math.random() * 10;
          if (p > 90) p = 90;
          setProgress(p, label);
        }
      }, 700);
    }

    function finishProgress(label) {
      if (__progressTimer) {
        clearInterval(__progressTimer);
        __progressTimer = null;
      }
      setProgress(100, label);
      setTimeout(() => {
        resetProgress();
      }, 2000);
    }

    function errorProgress(message) {
      if (__progressTimer) {
        clearInterval(__progressTimer);
        __progressTimer = null;
      }
      setProgress(0, "Error");
      setStatus(message);
    }

    function apiUrl(path) {
      return new URL(path, window.location.origin).toString();
    }

    async function postForm(path, formData, expectBlob = false) {
      const url = apiUrl(path);
      let res;
      try {
        res = await fetch(url, {
          method: "POST",
          body: formData
        });
      } catch (e) {
        throw new Error("Failed to fetch: " + e.message);
      }

      if (!res.ok) {
        let msg = "Request failed: " + res.status;
        try {
          const data = await res.json();
          if (data && data.detail) msg += " – " + JSON.stringify(data.detail);
        } catch (e) {}
        throw new Error(msg);
      }
      if (expectBlob) {
        return await res.blob();
      } else {
        return await res.json();
      }
    }

    async function postJson(path, payload) {
      const url = apiUrl(path);
      let res;
      try {
        res = await fetch(url, {
          method: "POST",
          headers: { "Content-Type": "application/json" },
          body: JSON.stringify(payload)
        });
      } catch (e) {
        throw new Error("Failed to fetch: " + e.message);
      }
      if (!res.ok) {
        let msg = "Request failed: " + res.status;
        try {
          const data = await res.json();
          if (data && data.detail) msg += " – " + JSON.stringify(data.detail);
        } catch (e) {}
        throw new Error(msg);
      }
      return await res.json();
    }

    async function getJson(path) {
      const url = apiUrl(path);
      const res = await fetch(url);
      if (!res.ok) {
        throw new Error("Request failed: " + res.status);
      }
      return await res.json();
    }

    function setStatus(text) {
      document.getElementById("status").innerText = text || "";
    }

    // Self-test
    document.getElementById("btn_self_test").addEventListener("click", async () => {
      const out = document.getElementById("self_test_output");
      out.textContent = "Running self-test...";
      setStatus("Running self-test…");
      try {
        const data = await getJson("/self-test");
        out.textContent = JSON.stringify(data, null, 2);
        if (data.status === "ok") {
          setStatus("Self-test OK – model, memory and vocabulary loaded. Default ZIP password: " + data.zip_default_password);
        } else {
          setStatus("Self-test reported an error.");
        }
      } catch (err) {
        console.error(err);
        out.textContent = "Self-test failed: " + err.message;
        setStatus("Self-test failed.");
      }
    });

    // Multi-files JSON
    document.getElementById("btn_files_json").addEventListener("click", async () => {
      const filesInput = document.getElementById("files_input");
      const mode = document.getElementById("files_mode").value;
      const out = document.getElementById("files_output");
      const extractTerms = document.getElementById("files_extract_terms").checked;

      if (!filesInput.files.length) {
        alert("Please choose at least one audio/.dct file.");
        return;
      }

      const formData = new FormData();
      for (const f of filesInput.files) {
        formData.append("files", f);
      }
      formData.append("mode", mode);
      formData.append("extract_terms", extractTerms ? "true" : "false");

      setStatus("Transcribing multiple files… (CPU, depends on audio length)");
      out.value = "";
      startSimulatedProgress("Uploading & transcribing files");

      try {
        const data = await postForm("/api/transcribe/files", formData, false);
        out.value = data.combined_transcript || "";
        const count = data.file_count || (data.items ? data.items.length : 0);
        let extra = "";
        if (data.new_medical_terms && data.new_medical_terms.length) {
          extra = " New medical terms added: " + data.new_medical_terms.join(", ");
        }
        setStatus("Done. Processed " + count + " file(s): " + (data.audio_files || []).join(", ") + extra);
        finishProgress("Transcription complete");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress(err.message || "Error during transcription.");
      }
    });

    // Multi-files DOCX
    document.getElementById("btn_files_docx").addEventListener("click", async () => {
      const filesInput = document.getElementById("files_input");
      const mode = document.getElementById("files_mode").value;
      const extractTerms = document.getElementById("files_extract_terms").checked;

      if (!filesInput.files.length) {
        alert("Please choose at least one audio/.dct file.");
        return;
      }

      const formData = new FormData();
      for (const f of filesInput.files) {
        formData.append("files", f);
      }
      formData.append("mode", mode);
      formData.append("extract_terms", extractTerms ? "true" : "false");

      setStatus("Generating DOCX for multi-file transcription…");
      startSimulatedProgress("Uploading files & generating DOCX");

      try {
        const blob = await postForm("/api/transcribe/files/docx", formData, true);
        const url = window.URL.createObjectURL(blob);
        const a = document.createElement("a");
        a.href = url;
        a.download = "transcripts_files.docx";
        document.body.appendChild(a);
        a.click();
        a.remove();
        window.URL.revokeObjectURL(url);
        setStatus("DOCX downloaded.");
        finishProgress("DOCX ready");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress(err.message || "Error during DOCX generation.");
      }
    });

    // ZIP Extract-only
    document.getElementById("btn_zip_extract_only").addEventListener("click", async () => {
      const zipInput = document.getElementById("zip_input");
      const pwd = document.getElementById("zip_password").value || "";
      const listDiv = document.getElementById("zip_file_list");

      if (!zipInput.files.length) {
        alert("Please choose a ZIP file.");
        return;
      }

      const formData = new FormData();
      formData.append("file", zipInput.files[0]);
      formData.append("password", pwd);

      setStatus("Uploading ZIP and extracting audio file list…");
      startSimulatedProgress("Extracting ZIP");

      try {
        const data = await postForm("/api/zip/extract", formData, false);
        const count = data.count || (data.audio_files ? data.audio_files.length : 0);
        const names = data.audio_files || [];
        setStatus("Extracted " + count + " audio file(s) from ZIP.");

        // populate selectable list
        if (names.length) {
          listDiv.innerHTML = "";
          names.forEach(name => {
            const safeId = "zip_choice_" + name.replace(/[^a-zA-Z0-9_\-]/g, "_");
            const label = document.createElement("label");
            label.style.display = "block";
            const cb = document.createElement("input");
            cb.type = "checkbox";
            cb.value = name;
            cb.id = safeId;
            cb.checked = true;
            label.appendChild(cb);
            label.append(" " + name);
            listDiv.appendChild(label);
          });
        } else {
          listDiv.innerHTML = "No audio files found in ZIP.";
        }

        finishProgress("ZIP extraction complete");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress(err.message || "Error during ZIP extraction.");
      }
    });

    // ZIP JSON – ALL FILES
    document.getElementById("btn_zip_json").addEventListener("click", async () => {
      const zipInput = document.getElementById("zip_input");
      const pwd = document.getElementById("zip_password").value || "";
      const mode = document.getElementById("zip_mode").value;
      const extractTerms = document.getElementById("zip_extract_terms").checked;
      const out = document.getElementById("zip_output");

      if (!zipInput.files.length) {
        alert("Please choose a ZIP file.");
        return;
      }

      const formData = new FormData();
      formData.append("file", zipInput.files[0]);
      formData.append("password", pwd);
      formData.append("mode", mode);
      formData.append("extract_terms", extractTerms ? "true" : "false");

      setStatus("Uploading ZIP and extracting audio files… (default password: dietcoke1 if empty)");
      out.value = "";
      startSimulatedProgress("Uploading & extracting ZIP");

      try {
        const data = await postForm("/api/transcribe/zip", formData, false);
        out.value = data.combined_transcript || "";
        const count = data.file_count || (data.items ? data.items.length : 0);
        const names = (data.audio_files || []).join(", ");
        let extra = "";
        if (data.new_medical_terms && data.new_medical_terms.length) {
          extra = " New medical terms added: " + data.new_medical_terms.join(", ");
        }
        setStatus("Done. Extracted " + count + " audio file(s) from ZIP and transcribed: " + names + extra);
        finishProgress("ZIP extraction & transcription complete");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress(err.message || "Error during ZIP transcription.");
      }
    });

    // ZIP JSON – SELECTED FILES ONLY
    document.getElementById("btn_zip_selected").addEventListener("click", async () => {
      const zipInput = document.getElementById("zip_input");
      const pwd = document.getElementById("zip_password").value || "";
      const mode = document.getElementById("zip_mode").value;
      const extractTerms = document.getElementById("zip_extract_terms").checked;
      const out = document.getElementById("zip_output");
      const listDiv = document.getElementById("zip_file_list");

      if (!zipInput.files.length) {
        alert("Please choose a ZIP file.");
        return;
      }

      const checkboxes = listDiv.querySelectorAll("input[type='checkbox']:checked");
      if (!checkboxes.length) {
        alert("Please select at least one file from the ZIP (use the checkboxes).");
        return;
      }
      const names = Array.from(checkboxes).map(cb => cb.value);

      const formData = new FormData();
      formData.append("file", zipInput.files[0]);
      formData.append("password", pwd);
      formData.append("mode", mode);
      formData.append("extract_terms", extractTerms ? "true" : "false");
      formData.append("selected_files", names.join(","));

      setStatus("Uploading ZIP and transcribing selected files only…");
      out.value = "";
      startSimulatedProgress("Uploading & transcribing selected ZIP files");

      try {
        const data = await postForm("/api/transcribe/zip/selected", formData, false);
        out.value = data.combined_transcript || "";
        const count = data.file_count || (data.items ? data.items.length : 0);
        const transcribedNames = (data.audio_files || []).join(", ");
        let extra = "";
        if (data.new_medical_terms && data.new_medical_terms.length) {
          extra = " New medical terms added: " + data.new_medical_terms.join(", ");
        }
        setStatus("Done. Transcribed " + count + " selected file(s) from ZIP: " + transcribedNames + extra);
        finishProgress("Selected ZIP transcription complete");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress(err.message || "Error during selected ZIP transcription.");
      }
    });

    // ZIP DOCX – ALL FILES
    document.getElementById("btn_zip_docx").addEventListener("click", async () => {
      const zipInput = document.getElementById("zip_input");
      const pwd = document.getElementById("zip_password").value || "";
      const mode = document.getElementById("zip_mode").value;
      const extractTerms = document.getElementById("zip_extract_terms").checked;

      if (!zipInput.files.length) {
        alert("Please choose a ZIP file.");
        return;
      }

      const formData = new FormData();
      formData.append("file", zipInput.files[0]);
      formData.append("password", pwd);
      formData.append("mode", mode);
      formData.append("extract_terms", extractTerms ? "true" : "false");

      setStatus("Uploading ZIP, extracting audio files and generating DOCX… (default password: dietcoke1 if empty)");
      startSimulatedProgress("Processing ZIP & generating DOCX");

      try {
        const blob = await postForm("/api/transcribe/zip/docx", formData, true);
        const url = window.URL.createObjectURL(blob);
        const a = document.createElement("a");
        a.href = url;
        a.download = "transcripts_zip.docx";
        document.body.appendChild(a);
        a.click();
        a.remove();
        window.URL.revokeObjectURL(url);
        setStatus("DOCX downloaded.");
        finishProgress("ZIP DOCX ready");
      } catch (err) {
        console.error(err);
        alert(err.message);
        errorProgress(err.message || "Error during ZIP DOCX generation.");
      }
    });

    // Memory view helpers
    async function refreshMemory() {
      const out = document.getElementById("mem_output");
      try {
        const data = await getJson("/api/memory");
        out.textContent = JSON.stringify(data, null, 2);
        setStatus("Memory loaded.");
      } catch (err) {
        console.error(err);
        out.textContent = "Failed to load memory: " + err.message;
        setStatus("Failed to load memory.");
      }
    }

    document.getElementById("btn_mem_add").addEventListener("click", async () => {
      const src = document.getElementById("mem_source").value.trim();
      const tgt = document.getElementById("mem_target").value.trim();
      if (!src || !tgt) {
        alert("Please fill both From and To fields.");
        return;
      }
      setStatus("Adding/updating memory rule…");
      try {
        const data = await postJson("/api/memory/add", { source: src, target: tgt });
        document.getElementById("mem_output").textContent = JSON.stringify(data, null, 2);
        setStatus("Memory rule saved.");
      } catch (err) {
        console.error(err);
        alert(err.message);
        setStatus("Failed to save memory rule.");
      }
    });

    document.getElementById("btn_mem_reset").addEventListener("click", async () => {
      if (!confirm("Reset all memory rules?")) return;
      setStatus("Resetting memory rules…");
      try {
        const data = await postJson("/api/memory/reset", {});
        document.getElementById("mem_output").textContent = JSON.stringify(data, null, 2);
        setStatus("Memory reset.");
      } catch (err) {
        console.error(err);
        alert(err.message);
        setStatus("Failed to reset memory.");
      }
    });

    document.getElementById("btn_mem_refresh").addEventListener("click", () => {
      refreshMemory();
    });

    // Medical terms view helpers
    async function refreshTerms() {
      const out = document.getElementById("terms_output");
      try {
        const data = await getJson("/api/medical-terms");
        out.textContent = JSON.stringify(data, null, 2);
        setStatus("Loaded collected medical terms.");
      } catch (err) {
        console.error(err);
        out.textContent = "Failed to load terms: " + err.message;
        setStatus("Failed to load medical terms.");
      }
    }

    document.getElementById("btn_terms_refresh").addEventListener("click", () => {
      refreshTerms();
    });

    document.getElementById("btn_terms_reset").addEventListener("click", async () => {
      if (!confirm("Reset ALL collected medical terms?")) return;
      setStatus("Resetting medical terms…");
      try {
        const data = await postJson("/api/medical-terms/reset", {});
        document.getElementById("terms_output").textContent = JSON.stringify(data, null, 2);
        setStatus("Medical terms reset.");
      } catch (err) {
        console.error(err);
        alert(err.message);
        setStatus("Failed to reset medical terms.");
      }
    });

    // Initial
    resetProgress();
    refreshMemory();
    refreshTerms();
  </script>
</body>
</html>
"""


@app.get("/ui", response_class=HTMLResponse)
def get_ui():
    return HTML_UI


if __name__ == "__main__":
    import uvicorn

    port = int(os.getenv("PORT", "7860"))
    uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)