Spaces:
Running
Running
| import os | |
| import shutil | |
| import tempfile | |
| from typing import List, Literal, Optional | |
| import torch | |
| import pyzipper | |
| import soundfile as sf # noqa: F401 (ensure audio backend is available) | |
| from docx import Document | |
| from fastapi import FastAPI, File, UploadFile, Form, HTTPException | |
| from fastapi.responses import ( | |
| FileResponse, | |
| JSONResponse, | |
| PlainTextResponse, | |
| HTMLResponse, | |
| ) | |
| from pydantic import BaseModel | |
| from transformers import pipeline | |
| import spaces | |
| # ===================== CONFIG ===================== | |
| MODEL_NAME = "openai/whisper-large-v3" | |
| AUDIO_EXTENSIONS = ( | |
| ".wav", | |
| ".mp3", | |
| ".m4a", | |
| ".flac", | |
| ".ogg", | |
| ".opus", | |
| ".webm", | |
| ) | |
| # Use GPU if available on the Space | |
| device = 0 if torch.cuda.is_available() else "cpu" | |
| # Lazy-loaded pipeline (created on first request) | |
| asr_pipe = None | |
| def get_pipeline(): | |
| global asr_pipe | |
| if asr_pipe is None: | |
| asr_pipe = pipeline( | |
| task="automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| chunk_length_s=30, | |
| device=device, | |
| ) | |
| return asr_pipe | |
| # ===================== Pydantic models ===================== | |
| class FileTranscript(BaseModel): | |
| filename: str | |
| text: str | |
| class TranscriptionResponse(BaseModel): | |
| mode: Literal["general", "medical_en"] | |
| combined_transcript: str | |
| items: List[FileTranscript] | |
| # ===================== Helper functions ===================== | |
| def build_generate_kwargs(mode: str): | |
| """ | |
| mode: 'general' | 'medical_en' | |
| Always transcribe with auto language detection, | |
| but in medical_en we bias towards English medical dictation. | |
| """ | |
| generate_kwargs = { | |
| "task": "transcribe", # keep same language as audio | |
| } | |
| if mode == "medical_en": | |
| # Strong bias towards English medical terminology | |
| generate_kwargs["language"] = "en" | |
| generate_kwargs["initial_prompt"] = ( | |
| "This is a medical dictation. Use accurate English medical terminology, " | |
| "including anatomy, diseases, investigations, lab values, imaging, and drugs. " | |
| "Keep the style clinical and professional." | |
| ) | |
| return generate_kwargs | |
| def filter_audio_files(paths: List[str]) -> List[str]: | |
| out: List[str] = [] | |
| for p in paths: | |
| _, ext = os.path.splitext(p) | |
| if ext.lower() in AUDIO_EXTENSIONS: | |
| out.append(p) | |
| return out | |
| def transcribe_file(path: str, mode: str) -> str: | |
| pipe = get_pipeline() | |
| generate_kwargs = build_generate_kwargs(mode) | |
| result = pipe( | |
| path, | |
| batch_size=8, | |
| generate_kwargs=generate_kwargs, | |
| return_timestamps=False, | |
| ) | |
| if isinstance(result, dict): | |
| return (result.get("text") or "").strip() | |
| if isinstance(result, list) and result: | |
| return (result[0].get("text") or "").strip() | |
| return "" | |
| def format_combined(results: List[FileTranscript]) -> str: | |
| parts: List[str] = [] | |
| for idx, item in enumerate(results, start=1): | |
| parts.append(f"### File {idx}: {item.filename}") | |
| parts.append("") | |
| parts.append(item.text if item.text else "[No transcript]") | |
| parts.append("") | |
| return "\n".join(parts).strip() | |
| def build_docx(results: List[FileTranscript], title: str) -> str: | |
| doc = Document() | |
| doc.add_heading(title, level=1) | |
| for idx, item in enumerate(results, start=1): | |
| doc.add_heading(f"File {idx}: {item.filename}", level=2) | |
| doc.add_paragraph(item.text if item.text else "[No transcript]") | |
| doc.add_paragraph() | |
| tmpdir = tempfile.mkdtemp(prefix="docx_") | |
| out_path = os.path.join(tmpdir, "transcripts.docx") | |
| doc.save(out_path) | |
| return out_path | |
| def save_uploads_to_temp(files: List[UploadFile]) -> List[str]: | |
| tmpdir = tempfile.mkdtemp(prefix="uploads_") | |
| local_paths: List[str] = [] | |
| for uf in files: | |
| filename = os.path.basename(uf.filename or "audio") | |
| local_path = os.path.join(tmpdir, filename) | |
| with open(local_path, "wb") as out_f: | |
| shutil.copyfileobj(uf.file, out_f) | |
| local_paths.append(local_path) | |
| return local_paths | |
| def extract_zip_to_temp(zip_file: UploadFile, password: Optional[str]) -> List[str]: | |
| tmpdir = tempfile.mkdtemp(prefix="zip_") | |
| zip_path = os.path.join(tmpdir, os.path.basename(zip_file.filename or "archive.zip")) | |
| # Save uploaded ZIP | |
| with open(zip_path, "wb") as out_f: | |
| shutil.copyfileobj(zip_file.file, out_f) | |
| outdir = tempfile.mkdtemp(prefix="zip_files_") | |
| try: | |
| with pyzipper.AESZipFile(zip_path, "r") as zf: | |
| if password: | |
| zf.setpassword(password.encode("utf-8")) | |
| for info in zf.infolist(): | |
| if info.is_dir(): | |
| continue | |
| name = os.path.basename(info.filename) | |
| if not name: | |
| continue | |
| out_path = os.path.join(outdir, name) | |
| os.makedirs(os.path.dirname(out_path), exist_ok=True) | |
| with zf.open(info) as src, open(out_path, "wb") as dst: | |
| shutil.copyfileobj(src, dst) | |
| except (pyzipper.BadZipFile, RuntimeError, KeyError) as e: | |
| shutil.rmtree(outdir, ignore_errors=True) | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Failed to open ZIP file. Check password / integrity. {e}", | |
| ) | |
| files = [os.path.join(outdir, f) for f in os.listdir(outdir)] | |
| return files | |
| # ===================== FastAPI app ===================== | |
| app = FastAPI( | |
| title="Whisper Large V3 – Medical Batch Transcription API", | |
| description=""" | |
| HTTP API for Whisper Large V3 with: | |
| - Multi-file audio upload | |
| - Password-protected ZIP upload | |
| - Medical-biased transcription mode | |
| - Combined transcript | |
| - Optional merged Word (.docx) download | |
| Use `/docs` for Swagger UI and `/ui` for the web interface. | |
| """, | |
| version="1.0.0", | |
| ) | |
| def root(): | |
| return ( | |
| "Whisper Large V3 – Medical Batch Transcription API\n" | |
| "Open /docs for API documentation or /ui for the web interface.\n" | |
| ) | |
| def health(): | |
| return "OK" | |
| def self_test(): | |
| """ | |
| Basic self-check: | |
| - can we create/load the pipeline? | |
| - what device are we using? | |
| """ | |
| try: | |
| pipe = get_pipeline() | |
| model_name = getattr(pipe.model, "name_or_path", MODEL_NAME) | |
| dev = "cuda" if device == 0 else str(device) | |
| return JSONResponse( | |
| { | |
| "status": "ok", | |
| "message": "Pipeline loaded successfully.", | |
| "model": model_name, | |
| "device": dev, | |
| } | |
| ) | |
| except Exception as e: | |
| return JSONResponse( | |
| { | |
| "status": "error", | |
| "message": f"Pipeline failed to load: {e}", | |
| }, | |
| status_code=500, | |
| ) | |
| # ---------- 1. Multi-file transcription (JSON) ---------- | |
| def transcribe_files( | |
| files: List[UploadFile] = File(..., description="One or more audio files"), | |
| mode: Literal["general", "medical_en"] = Form("medical_en"), | |
| ): | |
| if not files: | |
| raise HTTPException(status_code=400, detail="No files uploaded.") | |
| local_paths = save_uploads_to_temp(files) | |
| audio_paths = filter_audio_files(local_paths) | |
| if not audio_paths: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"No valid audio files found. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}", | |
| ) | |
| items: List[FileTranscript] = [] | |
| for path in audio_paths: | |
| fname = os.path.basename(path) | |
| text = transcribe_file(path, mode) | |
| items.append(FileTranscript(filename=fname, text=text)) | |
| combined = format_combined(items) | |
| return TranscriptionResponse( | |
| mode=mode, | |
| combined_transcript=combined, | |
| items=items, | |
| ) | |
| # ---------- 2. Multi-file transcription (DOCX download) ---------- | |
| def transcribe_files_docx( | |
| files: List[UploadFile] = File(..., description="One or more audio files"), | |
| mode: Literal["general", "medical_en"] = Form("medical_en"), | |
| ): | |
| if not files: | |
| raise HTTPException(status_code=400, detail="No files uploaded.") | |
| local_paths = save_uploads_to_temp(files) | |
| audio_paths = filter_audio_files(local_paths) | |
| if not audio_paths: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"No valid audio files found. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}", | |
| ) | |
| items: List[FileTranscript] = [] | |
| for path in audio_paths: | |
| fname = os.path.basename(path) | |
| text = transcribe_file(path, mode) | |
| items.append(FileTranscript(filename=fname, text=text)) | |
| docx_path = build_docx(items, "Multi-file transcription") | |
| return FileResponse( | |
| docx_path, | |
| media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
| filename="transcripts_files.docx", | |
| ) | |
| # ---------- 3. ZIP transcription (JSON) ---------- | |
| def transcribe_zip( | |
| file: UploadFile = File(..., description="ZIP file containing audio files"), | |
| password: str = Form("", description="ZIP password (leave blank if none)"), | |
| mode: Literal["general", "medical_en"] = Form("medical_en"), | |
| ): | |
| if file is None: | |
| raise HTTPException(status_code=400, detail="No ZIP uploaded.") | |
| extracted_paths = extract_zip_to_temp(file, password or None) | |
| audio_paths = filter_audio_files(extracted_paths) | |
| if not audio_paths: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"No valid audio files found inside ZIP. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}", | |
| ) | |
| items: List[FileTranscript] = [] | |
| for path in audio_paths: | |
| fname = os.path.basename(path) | |
| text = transcribe_file(path, mode) | |
| items.append(FileTranscript(filename=fname, text=text)) | |
| combined = format_combined(items) | |
| return TranscriptionResponse( | |
| mode=mode, | |
| combined_transcript=combined, | |
| items=items, | |
| ) | |
| # ---------- 4. ZIP transcription (DOCX download) ---------- | |
| def transcribe_zip_docx( | |
| file: UploadFile = File(..., description="ZIP file containing audio files"), | |
| password: str = Form("", description="ZIP password (leave blank if none)"), | |
| mode: Literal["general", "medical_en"] = Form("medical_en"), | |
| ): | |
| if file is None: | |
| raise HTTPException(status_code=400, detail="No ZIP uploaded.") | |
| extracted_paths = extract_zip_to_temp(file, password or None) | |
| audio_paths = filter_audio_files(extracted_paths) | |
| if not audio_paths: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"No valid audio files found inside ZIP. Supported extensions: {', '.join(AUDIO_EXTENSIONS)}", | |
| ) | |
| items: List[FileTranscript] = [] | |
| for path in audio_paths: | |
| fname = os.path.basename(path) | |
| text = transcribe_file(path, mode) | |
| items.append(FileTranscript(filename=fname, text=text)) | |
| docx_path = build_docx(items, "ZIP transcription") | |
| return FileResponse( | |
| docx_path, | |
| media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
| filename="transcripts_zip.docx", | |
| ) | |
| # ===================== Simple HTML UI ===================== | |
| HTML_UI = """ | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <title>Whisper Large V3 – Medical Batch Transcription</title> | |
| <style> | |
| body { | |
| font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; | |
| margin: 0; | |
| padding: 0; | |
| background: #f4f4f6; | |
| color: #111827; | |
| } | |
| header { | |
| background: #111827; | |
| color: #f9fafb; | |
| padding: 16px 24px; | |
| } | |
| header h1 { | |
| margin: 0; | |
| font-size: 20px; | |
| } | |
| header p { | |
| margin: 4px 0 0; | |
| font-size: 13px; | |
| color: #9ca3af; | |
| } | |
| /* Progress bar at TOP */ | |
| #progress-wrapper { | |
| background: #f9fafb; | |
| border-bottom: 1px solid #e5e7eb; | |
| padding: 8px 24px 10px; | |
| font-size: 12px; | |
| color: #4b5563; | |
| } | |
| #progress-track { | |
| width: 100%; | |
| max-width: 1100px; | |
| height: 8px; | |
| background: #e5e7eb; | |
| border-radius: 999px; | |
| overflow: hidden; | |
| margin-top: 4px; | |
| } | |
| #progress-fill { | |
| height: 100%; | |
| width: 0%; | |
| background: #111827; | |
| border-radius: 999px; | |
| transition: width 0.2s ease-out; | |
| } | |
| #progress-text { | |
| font-size: 11px; | |
| color: #6b7280; | |
| margin-top: 3px; | |
| min-height: 14px; | |
| max-width: 1100px; | |
| } | |
| #status { | |
| max-width: 1100px; | |
| margin: 2px auto 0; | |
| font-size: 12px; | |
| color: #6b7280; | |
| padding: 0 16px 6px; | |
| min-height: 16px; | |
| } | |
| main { | |
| max-width: 1100px; | |
| margin: 16px auto 40px; | |
| padding: 0 16px; | |
| } | |
| .card { | |
| background: #ffffff; | |
| border-radius: 12px; | |
| padding: 16px 20px; | |
| box-shadow: 0 12px 35px rgba(15, 23, 42, 0.08); | |
| margin-bottom: 20px; | |
| } | |
| .card h2 { | |
| margin-top: 0; | |
| font-size: 18px; | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| } | |
| .card h3 { | |
| margin-bottom: 6px; | |
| margin-top: 16px; | |
| font-size: 15px; | |
| } | |
| label { | |
| font-size: 13px; | |
| font-weight: 500; | |
| display: block; | |
| margin-bottom: 4px; | |
| } | |
| input[type="file"], | |
| select, | |
| input[type="password"] { | |
| width: 100%; | |
| padding: 8px 10px; | |
| font-size: 13px; | |
| border-radius: 8px; | |
| border: 1px solid #d1d5db; | |
| box-sizing: border-box; | |
| margin-bottom: 10px; | |
| background: #f9fafb; | |
| } | |
| textarea { | |
| width: 100%; | |
| min-height: 260px; | |
| padding: 10px; | |
| box-sizing: border-box; | |
| border-radius: 10px; | |
| border: 1px solid #d1d5db; | |
| font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; | |
| font-size: 13px; | |
| background: #f9fafb; | |
| } | |
| .row { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 16px; | |
| } | |
| .col { | |
| flex: 1 1 280px; | |
| } | |
| .btn-row { | |
| display: flex; | |
| gap: 10px; | |
| flex-wrap: wrap; | |
| margin: 6px 0 10px; | |
| } | |
| button { | |
| appearance: none; | |
| border: none; | |
| border-radius: 999px; | |
| padding: 8px 16px; | |
| font-size: 13px; | |
| font-weight: 500; | |
| cursor: pointer; | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| } | |
| .btn-primary { | |
| background: #111827; | |
| color: #f9fafb; | |
| } | |
| .btn-secondary { | |
| background: #e5e7eb; | |
| color: #111827; | |
| } | |
| .pill { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| padding: 3px 8px; | |
| border-radius: 999px; | |
| font-size: 11px; | |
| background: #eff6ff; | |
| color: #1d4ed8; | |
| margin-left: 8px; | |
| } | |
| .small-hint { | |
| font-size: 11px; | |
| color: #6b7280; | |
| margin-top: -4px; | |
| margin-bottom: 8px; | |
| } | |
| code { | |
| font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; | |
| font-size: 12px; | |
| } | |
| pre { | |
| background: #0b1120; | |
| color: #e5e7eb; | |
| padding: 10px 12px; | |
| border-radius: 10px; | |
| overflow-x: auto; | |
| font-size: 12px; | |
| line-height: 1.5; | |
| } | |
| a { | |
| color: #1d4ed8; | |
| text-decoration: none; | |
| } | |
| a:hover { | |
| text-decoration: underline; | |
| } | |
| @media (max-width: 768px) { | |
| header { | |
| padding: 12px 16px; | |
| } | |
| main { | |
| margin-top: 12px; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <header> | |
| <h1>Whisper Large V3 – Medical Batch Transcription</h1> | |
| <p> | |
| Upload multiple audio files or a password-protected ZIP. | |
| Mode: <code>general</code> or <code>medical_en</code>. | |
| API docs at <code>/docs</code>. | |
| </p> | |
| </header> | |
| <!-- Progress & status AT TOP --> | |
| <div id="progress-wrapper"> | |
| <div>Transcription progress</div> | |
| <div id="progress-track"> | |
| <div id="progress-fill"></div> | |
| </div> | |
| <div id="progress-text">Idle</div> | |
| </div> | |
| <div id="status"></div> | |
| <main> | |
| <div class="card"> | |
| <h2>1. Multi-file audio upload <span class="pill">JSON & DOCX</span></h2> | |
| <div class="row"> | |
| <div class="col"> | |
| <h3>Inputs</h3> | |
| <label for="files_input">Audio files</label> | |
| <input id="files_input" type="file" multiple accept="audio/*" /> | |
| <div class="small-hint"> | |
| You can select multiple audio files. | |
| </div> | |
| <label for="files_mode">Mode</label> | |
| <select id="files_mode"> | |
| <option value="medical_en">medical_en (English medical bias)</option> | |
| <option value="general">general</option> | |
| </select> | |
| <div class="btn-row"> | |
| <button class="btn-primary" id="btn_files_json">Transcribe → JSON</button> | |
| <button class="btn-secondary" id="btn_files_docx">Download DOCX</button> | |
| </div> | |
| </div> | |
| <div class="col"> | |
| <h3>Combined transcript</h3> | |
| <textarea id="files_output" placeholder="Transcript will appear here when you use the JSON button."></textarea> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="card"> | |
| <h2>2. ZIP upload (with password) <span class="pill">JSON & DOCX</span></h2> | |
| <div class="row"> | |
| <div class="col"> | |
| <h3>ZIP Inputs</h3> | |
| <label for="zip_input">ZIP file</label> | |
| <input id="zip_input" type="file" accept=".zip" /> | |
| <div class="small-hint">ZIP should contain audio files only.</div> | |
| <label for="zip_password">ZIP password (optional)</label> | |
| <input id="zip_password" type="password" placeholder="Leave blank if ZIP is not encrypted" /> | |
| <label for="zip_mode">Mode</label> | |
| <select id="zip_mode"> | |
| <option value="medical_en">medical_en (English medical bias)</option> | |
| <option value="general">general</option> | |
| </select> | |
| <div class="btn-row"> | |
| <button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button> | |
| <button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX</button> | |
| </div> | |
| </div> | |
| <div class="col"> | |
| <h3>ZIP combined transcript</h3> | |
| <textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON button."></textarea> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- 3. Quick examples --> | |
| <div class="card"> | |
| <h2>3. Quick examples <span class="pill">API & sample audio</span></h2> | |
| <h3>Sample audio for testing (download & upload above)</h3> | |
| <p class="small-hint"> | |
| 1. Download this small public sample file<br> | |
| 2. Upload it in section 1 and click <strong>Transcribe → JSON</strong> | |
| </p> | |
| <p> | |
| 👉 <a href="https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac" target="_blank" rel="noopener"> | |
| Download example audio (mlk.flac) | |
| </a> | |
| </p> | |
| <h3>Example: cURL for multi-file JSON</h3> | |
| <p class="small-hint">Replace <code>@path/to/audio1.flac</code> with your local file path.</p> | |
| <pre><code>curl -X POST \\ | |
| "https://staraks-whisper-large-v3.hf.space/api/transcribe/files" \\ | |
| -H "Accept: application/json" \\ | |
| -F "mode=medical_en" \\ | |
| -F "files=@path/to/audio1.flac" \\ | |
| -F "files=@path/to/audio2.wav"</code></pre> | |
| <h3>Example: cURL for ZIP JSON</h3> | |
| <p class="small-hint">ZIP file contains multiple audio files. Password field is optional.</p> | |
| <pre><code>curl -X POST \\ | |
| "https://staraks-whisper-large-v3.hf.space/api/transcribe/zip" \\ | |
| -H "Accept: application/json" \\ | |
| -F "mode=medical_en" \\ | |
| -F "file=@path/to/audios.zip" \\ | |
| -F "password="</code></pre> | |
| </div> | |
| <!-- 4. System self-check --> | |
| <div class="card"> | |
| <h2>4. System self-check <span class="pill">Model & API status</span></h2> | |
| <p class="small-hint"> | |
| Use this to quickly verify that the API is running and the Whisper pipeline can be loaded. | |
| </p> | |
| <button class="btn-primary" id="btn_self_test">Run self-test</button> | |
| <pre id="self_test_output"><code>Click "Run self-test" to see status...</code></pre> | |
| </div> | |
| </main> | |
| <script> | |
| let __progressTimer = null; | |
| let __progressValue = 0; | |
| function setProgress(value, label) { | |
| __progressValue = Math.max(0, Math.min(100, value)); | |
| const fill = document.getElementById("progress-fill"); | |
| const text = document.getElementById("progress-text"); | |
| if (fill) { | |
| fill.style.width = __progressValue + "%"; | |
| } | |
| if (text) { | |
| text.innerText = label + " (" + __progressValue.toFixed(0) + "%)"; | |
| } | |
| } | |
| function resetProgress() { | |
| if (__progressTimer) { | |
| clearInterval(__progressTimer); | |
| __progressTimer = null; | |
| } | |
| setProgress(0, "Idle"); | |
| } | |
| function startSimulatedProgress(label) { | |
| if (__progressTimer) { | |
| clearInterval(__progressTimer); | |
| } | |
| let p = 5; | |
| setProgress(p, label); | |
| __progressTimer = setInterval(() => { | |
| if (p < 90) { | |
| p += Math.random() * 10; | |
| if (p > 90) p = 90; | |
| setProgress(p, label); | |
| } | |
| }, 600); | |
| } | |
| function finishProgress(label) { | |
| if (__progressTimer) { | |
| clearInterval(__progressTimer); | |
| __progressTimer = null; | |
| } | |
| setProgress(100, label); | |
| setTimeout(() => { | |
| resetProgress(); | |
| }, 2000); | |
| } | |
| function errorProgress(message) { | |
| if (__progressTimer) { | |
| clearInterval(__progressTimer); | |
| __progressTimer = null; | |
| } | |
| setProgress(0, "Error"); | |
| setStatus(message); | |
| } | |
| function apiUrl(path) { | |
| return new URL(path, window.location.origin).toString(); | |
| } | |
| async function postForm(path, formData, expectBlob = false) { | |
| const url = apiUrl(path); | |
| let res; | |
| try { | |
| res = await fetch(url, { | |
| method: "POST", | |
| body: formData | |
| }); | |
| } catch (e) { | |
| throw new Error("Failed to connect to server: " + e.message); | |
| } | |
| if (!res.ok) { | |
| let msg = "Request failed: " + res.status; | |
| try { | |
| const data = await res.json(); | |
| if (data && data.detail) msg += " – " + JSON.stringify(data.detail); | |
| } catch (e) { | |
| // ignore JSON parse errors | |
| } | |
| throw new Error(msg); | |
| } | |
| if (expectBlob) { | |
| return await res.blob(); | |
| } else { | |
| return await res.json(); | |
| } | |
| } | |
| function setStatus(text) { | |
| document.getElementById("status").innerText = text || ""; | |
| } | |
| // ------- Self test ------- | |
| document.getElementById("btn_self_test").addEventListener("click", async () => { | |
| const out = document.getElementById("self_test_output"); | |
| out.textContent = "Running self-test..."; | |
| setStatus("Running self-test…"); | |
| try { | |
| const res = await fetch(apiUrl("/self-test")); | |
| const data = await res.json(); | |
| out.textContent = JSON.stringify(data, null, 2); | |
| if (data.status === "ok") { | |
| setStatus("Self-test OK – model and API are working."); | |
| } else { | |
| setStatus("Self-test reported an error."); | |
| } | |
| } catch (err) { | |
| console.error(err); | |
| out.textContent = "Self-test failed: " + err.message; | |
| setStatus("Self-test failed."); | |
| } | |
| }); | |
| // ------- Multi-files JSON ------- | |
| document.getElementById("btn_files_json").addEventListener("click", async () => { | |
| const filesInput = document.getElementById("files_input"); | |
| const mode = document.getElementById("files_mode").value; | |
| const out = document.getElementById("files_output"); | |
| if (!filesInput.files.length) { | |
| alert("Please choose at least one audio file."); | |
| return; | |
| } | |
| const formData = new FormData(); | |
| for (const f of filesInput.files) { | |
| formData.append("files", f); | |
| } | |
| formData.append("mode", mode); | |
| setStatus("Transcribing multiple files… (this may take some time for large audio)"); | |
| out.value = ""; | |
| startSimulatedProgress("Transcribing files"); | |
| try { | |
| const data = await postForm("/api/transcribe/files", formData, false); | |
| out.value = data.combined_transcript || ""; | |
| setStatus("Done."); | |
| finishProgress("Transcription complete"); | |
| } catch (err) { | |
| console.error(err); | |
| alert(err.message); | |
| errorProgress("Error during transcription."); | |
| } | |
| }); | |
| // ------- Multi-files DOCX ------- | |
| document.getElementById("btn_files_docx").addEventListener("click", async () => { | |
| const filesInput = document.getElementById("files_input"); | |
| const mode = document.getElementById("files_mode").value; | |
| if (!filesInput.files.length) { | |
| alert("Please choose at least one audio file."); | |
| return; | |
| } | |
| const formData = new FormData(); | |
| for (const f of filesInput.files) { | |
| formData.append("files", f); | |
| } | |
| formData.append("mode", mode); | |
| setStatus("Generating DOCX for multi-file transcription…"); | |
| startSimulatedProgress("Generating DOCX"); | |
| try { | |
| const blob = await postForm("/api/transcribe/files/docx", formData, true); | |
| const url = window.URL.createObjectURL(blob); | |
| const a = document.createElement("a"); | |
| a.href = url; | |
| a.download = "transcripts_files.docx"; | |
| document.body.appendChild(a); | |
| a.click(); | |
| a.remove(); | |
| window.URL.revokeObjectURL(url); | |
| setStatus("DOCX downloaded."); | |
| finishProgress("DOCX ready"); | |
| } catch (err) { | |
| console.error(err); | |
| alert(err.message); | |
| errorProgress("Error during DOCX generation."); | |
| } | |
| }); | |
| // ------- ZIP JSON ------- | |
| document.getElementById("btn_zip_json").addEventListener("click", async () => { | |
| const zipInput = document.getElementById("zip_input"); | |
| const pwd = document.getElementById("zip_password").value || ""; | |
| const mode = document.getElementById("zip_mode").value; | |
| const out = document.getElementById("zip_output"); | |
| if (!zipInput.files.length) { | |
| alert("Please choose a ZIP file."); | |
| return; | |
| } | |
| const formData = new FormData(); | |
| formData.append("file", zipInput.files[0]); | |
| formData.append("password", pwd); | |
| formData.append("mode", mode); | |
| setStatus("Transcribing ZIP contents…"); | |
| out.value = ""; | |
| startSimulatedProgress("Transcribing ZIP"); | |
| try { | |
| const data = await postForm("/api/transcribe/zip", formData, false); | |
| out.value = data.combined_transcript || ""; | |
| setStatus("Done."); | |
| finishProgress("ZIP transcription complete"); | |
| } catch (err) { | |
| console.error(err); | |
| alert(err.message); | |
| errorProgress("Error during ZIP transcription."); | |
| } | |
| }); | |
| // ------- ZIP DOCX ------- | |
| document.getElementById("btn_zip_docx").addEventListener("click", async () => { | |
| const zipInput = document.getElementById("zip_input"); | |
| const pwd = document.getElementById("zip_password").value || ""; | |
| const mode = document.getElementById("zip_mode").value; | |
| if (!zipInput.files.length) { | |
| alert("Please choose a ZIP file."); | |
| return; | |
| } | |
| const formData = new FormData(); | |
| formData.append("file", zipInput.files[0]); | |
| formData.append("password", pwd); | |
| formData.append("mode", mode); | |
| setStatus("Generating DOCX from ZIP contents…"); | |
| startSimulatedProgress("Generating ZIP DOCX"); | |
| try { | |
| const blob = await postForm("/api/transcribe/zip/docx", formData, true); | |
| const url = window.URL.createObjectURL(blob); | |
| const a = document.createElement("a"); | |
| a.href = url; | |
| a.download = "transcripts_zip.docx"; | |
| document.body.appendChild(a); | |
| a.click(); | |
| a.remove(); | |
| window.URL.revokeObjectURL(url); | |
| setStatus("DOCX downloaded."); | |
| finishProgress("ZIP DOCX ready"); | |
| } catch (err) { | |
| console.error(err); | |
| alert(err.message); | |
| errorProgress("Error during ZIP DOCX generation."); | |
| } | |
| }); | |
| // Initial state | |
| resetProgress(); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| def get_ui(): | |
| return HTML_UI | |
| # ===================== Run (local dev / HF Spaces) ===================== | |
| if __name__ == "__main__": | |
| import uvicorn | |
| port = int(os.getenv("PORT", "7860")) | |
| uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False) | |