Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,12 @@ import shutil
|
|
| 3 |
import tempfile
|
| 4 |
from typing import List, Literal, Optional
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
|
| 7 |
from fastapi.responses import (
|
| 8 |
FileResponse,
|
|
@@ -12,11 +18,6 @@ from fastapi.responses import (
|
|
| 12 |
)
|
| 13 |
from pydantic import BaseModel
|
| 14 |
from transformers import pipeline
|
| 15 |
-
import torch
|
| 16 |
-
import pyzipper
|
| 17 |
-
from docx import Document
|
| 18 |
-
import soundfile as sf # noqa: F401 (ensure audio backend is available)
|
| 19 |
-
import spaces
|
| 20 |
|
| 21 |
# ===================== CONFIG =====================
|
| 22 |
|
|
@@ -32,9 +33,10 @@ AUDIO_EXTENSIONS = (
|
|
| 32 |
".webm",
|
| 33 |
)
|
| 34 |
|
|
|
|
| 35 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 36 |
|
| 37 |
-
# Lazy-loaded pipeline
|
| 38 |
asr_pipe = None
|
| 39 |
|
| 40 |
|
|
@@ -58,30 +60,26 @@ class FileTranscript(BaseModel):
|
|
| 58 |
|
| 59 |
|
| 60 |
class TranscriptionResponse(BaseModel):
|
| 61 |
-
task: Literal["transcribe", "translate"]
|
| 62 |
mode: Literal["general", "medical_en"]
|
| 63 |
-
language: str
|
| 64 |
combined_transcript: str
|
| 65 |
items: List[FileTranscript]
|
| 66 |
|
| 67 |
|
| 68 |
# ===================== Helper functions =====================
|
| 69 |
|
| 70 |
-
def build_generate_kwargs(
|
| 71 |
"""
|
| 72 |
-
task: 'transcribe' | 'translate'
|
| 73 |
mode: 'general' | 'medical_en'
|
| 74 |
-
|
|
|
|
| 75 |
"""
|
| 76 |
-
generate_kwargs = {
|
|
|
|
|
|
|
| 77 |
|
| 78 |
if mode == "medical_en":
|
|
|
|
| 79 |
generate_kwargs["language"] = "en"
|
| 80 |
-
else:
|
| 81 |
-
if language and language != "auto":
|
| 82 |
-
generate_kwargs["language"] = language
|
| 83 |
-
|
| 84 |
-
if mode == "medical_en":
|
| 85 |
generate_kwargs["initial_prompt"] = (
|
| 86 |
"This is a medical dictation. Use accurate English medical terminology, "
|
| 87 |
"including anatomy, diseases, investigations, lab values, imaging, and drugs. "
|
|
@@ -100,9 +98,9 @@ def filter_audio_files(paths: List[str]) -> List[str]:
|
|
| 100 |
return out
|
| 101 |
|
| 102 |
|
| 103 |
-
def transcribe_file(path: str,
|
| 104 |
pipe = get_pipeline()
|
| 105 |
-
generate_kwargs = build_generate_kwargs(
|
| 106 |
|
| 107 |
result = pipe(
|
| 108 |
path,
|
|
@@ -112,9 +110,9 @@ def transcribe_file(path: str, task: str, mode: str, language: str) -> str:
|
|
| 112 |
)
|
| 113 |
|
| 114 |
if isinstance(result, dict):
|
| 115 |
-
return result.get("text"
|
| 116 |
if isinstance(result, list) and result:
|
| 117 |
-
return result[0].get("text"
|
| 118 |
return ""
|
| 119 |
|
| 120 |
|
|
@@ -159,6 +157,7 @@ def extract_zip_to_temp(zip_file: UploadFile, password: Optional[str]) -> List[s
|
|
| 159 |
tmpdir = tempfile.mkdtemp(prefix="zip_")
|
| 160 |
zip_path = os.path.join(tmpdir, os.path.basename(zip_file.filename or "archive.zip"))
|
| 161 |
|
|
|
|
| 162 |
with open(zip_path, "wb") as out_f:
|
| 163 |
shutil.copyfileobj(zip_file.file, out_f)
|
| 164 |
|
|
@@ -187,6 +186,7 @@ def extract_zip_to_temp(zip_file: UploadFile, password: Optional[str]) -> List[s
|
|
| 187 |
detail=f"Failed to open ZIP file. Check password / integrity. {e}",
|
| 188 |
)
|
| 189 |
|
|
|
|
| 190 |
files = [os.path.join(outdir, f) for f in os.listdir(outdir)]
|
| 191 |
return files
|
| 192 |
|
|
@@ -204,7 +204,7 @@ HTTP API for Whisper Large V3 with:
|
|
| 204 |
- Combined transcript
|
| 205 |
- Optional merged Word (.docx) download
|
| 206 |
|
| 207 |
-
Use `/docs` for Swagger UI and `/ui` for
|
| 208 |
""",
|
| 209 |
version="1.0.0",
|
| 210 |
)
|
|
@@ -218,15 +218,18 @@ def root():
|
|
| 218 |
)
|
| 219 |
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
# ---------- 1. Multi-file transcription (JSON) ----------
|
| 222 |
|
| 223 |
@app.post("/api/transcribe/files", response_model=TranscriptionResponse)
|
| 224 |
@spaces.GPU
|
| 225 |
def transcribe_files(
|
| 226 |
files: List[UploadFile] = File(..., description="One or more audio files"),
|
| 227 |
-
|
| 228 |
-
mode: Literal["general", "medical_en"] = Form("general"),
|
| 229 |
-
language: str = Form("auto"),
|
| 230 |
):
|
| 231 |
if not files:
|
| 232 |
raise HTTPException(status_code=400, detail="No files uploaded.")
|
|
@@ -243,15 +246,13 @@ def transcribe_files(
|
|
| 243 |
items: List[FileTranscript] = []
|
| 244 |
for path in audio_paths:
|
| 245 |
fname = os.path.basename(path)
|
| 246 |
-
text = transcribe_file(path,
|
| 247 |
items.append(FileTranscript(filename=fname, text=text))
|
| 248 |
|
| 249 |
combined = format_combined(items)
|
| 250 |
|
| 251 |
return TranscriptionResponse(
|
| 252 |
-
task=task,
|
| 253 |
mode=mode,
|
| 254 |
-
language=language,
|
| 255 |
combined_transcript=combined,
|
| 256 |
items=items,
|
| 257 |
)
|
|
@@ -263,9 +264,7 @@ def transcribe_files(
|
|
| 263 |
@spaces.GPU
|
| 264 |
def transcribe_files_docx(
|
| 265 |
files: List[UploadFile] = File(..., description="One or more audio files"),
|
| 266 |
-
|
| 267 |
-
mode: Literal["general", "medical_en"] = Form("general"),
|
| 268 |
-
language: str = Form("auto"),
|
| 269 |
):
|
| 270 |
if not files:
|
| 271 |
raise HTTPException(status_code=400, detail="No files uploaded.")
|
|
@@ -282,16 +281,14 @@ def transcribe_files_docx(
|
|
| 282 |
items: List[FileTranscript] = []
|
| 283 |
for path in audio_paths:
|
| 284 |
fname = os.path.basename(path)
|
| 285 |
-
text = transcribe_file(path,
|
| 286 |
items.append(FileTranscript(filename=fname, text=text))
|
| 287 |
|
| 288 |
docx_path = build_docx(items, "Multi-file transcription")
|
| 289 |
|
| 290 |
return FileResponse(
|
| 291 |
docx_path,
|
| 292 |
-
media_type=
|
| 293 |
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
| 294 |
-
),
|
| 295 |
filename="transcripts_files.docx",
|
| 296 |
)
|
| 297 |
|
|
@@ -303,9 +300,7 @@ def transcribe_files_docx(
|
|
| 303 |
def transcribe_zip(
|
| 304 |
file: UploadFile = File(..., description="ZIP file containing audio files"),
|
| 305 |
password: str = Form("", description="ZIP password (leave blank if none)"),
|
| 306 |
-
task: Literal["transcribe", "translate"] = Form("transcribe"),
|
| 307 |
mode: Literal["general", "medical_en"] = Form("medical_en"),
|
| 308 |
-
language: str = Form("auto"),
|
| 309 |
):
|
| 310 |
if file is None:
|
| 311 |
raise HTTPException(status_code=400, detail="No ZIP uploaded.")
|
|
@@ -322,15 +317,13 @@ def transcribe_zip(
|
|
| 322 |
items: List[FileTranscript] = []
|
| 323 |
for path in audio_paths:
|
| 324 |
fname = os.path.basename(path)
|
| 325 |
-
text = transcribe_file(path,
|
| 326 |
items.append(FileTranscript(filename=fname, text=text))
|
| 327 |
|
| 328 |
combined = format_combined(items)
|
| 329 |
|
| 330 |
return TranscriptionResponse(
|
| 331 |
-
task=task,
|
| 332 |
mode=mode,
|
| 333 |
-
language=language,
|
| 334 |
combined_transcript=combined,
|
| 335 |
items=items,
|
| 336 |
)
|
|
@@ -343,9 +336,7 @@ def transcribe_zip(
|
|
| 343 |
def transcribe_zip_docx(
|
| 344 |
file: UploadFile = File(..., description="ZIP file containing audio files"),
|
| 345 |
password: str = Form("", description="ZIP password (leave blank if none)"),
|
| 346 |
-
task: Literal["transcribe", "translate"] = Form("transcribe"),
|
| 347 |
mode: Literal["general", "medical_en"] = Form("medical_en"),
|
| 348 |
-
language: str = Form("auto"),
|
| 349 |
):
|
| 350 |
if file is None:
|
| 351 |
raise HTTPException(status_code=400, detail="No ZIP uploaded.")
|
|
@@ -362,16 +353,14 @@ def transcribe_zip_docx(
|
|
| 362 |
items: List[FileTranscript] = []
|
| 363 |
for path in audio_paths:
|
| 364 |
fname = os.path.basename(path)
|
| 365 |
-
text = transcribe_file(path,
|
| 366 |
items.append(FileTranscript(filename=fname, text=text))
|
| 367 |
|
| 368 |
docx_path = build_docx(items, "ZIP transcription")
|
| 369 |
|
| 370 |
return FileResponse(
|
| 371 |
docx_path,
|
| 372 |
-
media_type=
|
| 373 |
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
| 374 |
-
),
|
| 375 |
filename="transcripts_zip.docx",
|
| 376 |
)
|
| 377 |
|
|
@@ -438,7 +427,6 @@ HTML_UI = """
|
|
| 438 |
}
|
| 439 |
input[type="file"],
|
| 440 |
select,
|
| 441 |
-
input[type="text"],
|
| 442 |
input[type="password"] {
|
| 443 |
width: 100%;
|
| 444 |
padding: 8px 10px;
|
|
@@ -530,7 +518,7 @@ HTML_UI = """
|
|
| 530 |
<body>
|
| 531 |
<header>
|
| 532 |
<h1>Whisper Large V3 – Medical Batch Transcription</h1>
|
| 533 |
-
<p>Upload multiple audio files or a password-protected ZIP.
|
| 534 |
</header>
|
| 535 |
<main>
|
| 536 |
<div class="card">
|
|
@@ -542,28 +530,10 @@ HTML_UI = """
|
|
| 542 |
<input id="files_input" type="file" multiple accept="audio/*" />
|
| 543 |
<div class="small-hint">You can select multiple audio files.</div>
|
| 544 |
|
| 545 |
-
<label for="files_task">Task</label>
|
| 546 |
-
<select id="files_task">
|
| 547 |
-
<option value="transcribe">transcribe (same language)</option>
|
| 548 |
-
<option value="translate">translate to English</option>
|
| 549 |
-
</select>
|
| 550 |
-
|
| 551 |
<label for="files_mode">Mode</label>
|
| 552 |
<select id="files_mode">
|
| 553 |
-
<option value="general">general</option>
|
| 554 |
<option value="medical_en">medical_en (English medical bias)</option>
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
<label for="files_language">Spoken language</label>
|
| 558 |
-
<select id="files_language">
|
| 559 |
-
<option value="auto">auto</option>
|
| 560 |
-
<option value="en">en (English)</option>
|
| 561 |
-
<option value="hi">hi (Hindi)</option>
|
| 562 |
-
<option value="es">es (Spanish)</option>
|
| 563 |
-
<option value="fr">fr (French)</option>
|
| 564 |
-
<option value="de">de (German)</option>
|
| 565 |
-
<option value="ar">ar (Arabic)</option>
|
| 566 |
-
<option value="zh">zh (Chinese)</option>
|
| 567 |
</select>
|
| 568 |
|
| 569 |
<div class="btn-row">
|
|
@@ -590,30 +560,12 @@ HTML_UI = """
|
|
| 590 |
<label for="zip_password">ZIP password (optional)</label>
|
| 591 |
<input id="zip_password" type="password" placeholder="Leave blank if ZIP is not encrypted" />
|
| 592 |
|
| 593 |
-
<label for="zip_task">Task</label>
|
| 594 |
-
<select id="zip_task">
|
| 595 |
-
<option value="transcribe">transcribe (same language)</option>
|
| 596 |
-
<option value="translate">translate to English</option>
|
| 597 |
-
</select>
|
| 598 |
-
|
| 599 |
<label for="zip_mode">Mode</label>
|
| 600 |
<select id="zip_mode">
|
| 601 |
<option value="medical_en">medical_en (English medical bias)</option>
|
| 602 |
<option value="general">general</option>
|
| 603 |
</select>
|
| 604 |
|
| 605 |
-
<label for="zip_language">Spoken language</label>
|
| 606 |
-
<select id="zip_language">
|
| 607 |
-
<option value="auto">auto</option>
|
| 608 |
-
<option value="en">en (English)</option>
|
| 609 |
-
<option value="hi">hi (Hindi)</option>
|
| 610 |
-
<option value="es">es (Spanish)</option>
|
| 611 |
-
<option value="fr">fr (French)</option>
|
| 612 |
-
<option value="de">de (German)</option>
|
| 613 |
-
<option value="ar">ar (Arabic)</option>
|
| 614 |
-
<option value="zh">zh (Chinese)</option>
|
| 615 |
-
</select>
|
| 616 |
-
|
| 617 |
<div class="btn-row">
|
| 618 |
<button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button>
|
| 619 |
<button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX</button>
|
|
@@ -659,9 +611,7 @@ HTML_UI = """
|
|
| 659 |
// ------- Multi-files JSON -------
|
| 660 |
document.getElementById("btn_files_json").addEventListener("click", async () => {
|
| 661 |
const filesInput = document.getElementById("files_input");
|
| 662 |
-
const task = document.getElementById("files_task").value;
|
| 663 |
const mode = document.getElementById("files_mode").value;
|
| 664 |
-
const language = document.getElementById("files_language").value;
|
| 665 |
const out = document.getElementById("files_output");
|
| 666 |
|
| 667 |
if (!filesInput.files.length) {
|
|
@@ -673,9 +623,7 @@ HTML_UI = """
|
|
| 673 |
for (const f of filesInput.files) {
|
| 674 |
formData.append("files", f);
|
| 675 |
}
|
| 676 |
-
formData.append("task", task);
|
| 677 |
formData.append("mode", mode);
|
| 678 |
-
formData.append("language", language);
|
| 679 |
|
| 680 |
setStatus("Transcribing multiple files… (this may take some time for large audio)");
|
| 681 |
out.value = "";
|
|
@@ -694,9 +642,7 @@ HTML_UI = """
|
|
| 694 |
// ------- Multi-files DOCX -------
|
| 695 |
document.getElementById("btn_files_docx").addEventListener("click", async () => {
|
| 696 |
const filesInput = document.getElementById("files_input");
|
| 697 |
-
const task = document.getElementById("files_task").value;
|
| 698 |
const mode = document.getElementById("files_mode").value;
|
| 699 |
-
const language = document.getElementById("files_language").value;
|
| 700 |
|
| 701 |
if (!filesInput.files.length) {
|
| 702 |
alert("Please choose at least one audio file.");
|
|
@@ -707,9 +653,7 @@ HTML_UI = """
|
|
| 707 |
for (const f of filesInput.files) {
|
| 708 |
formData.append("files", f);
|
| 709 |
}
|
| 710 |
-
formData.append("task", task);
|
| 711 |
formData.append("mode", mode);
|
| 712 |
-
formData.append("language", language);
|
| 713 |
|
| 714 |
setStatus("Generating DOCX for multi-file transcription…");
|
| 715 |
|
|
@@ -735,9 +679,7 @@ HTML_UI = """
|
|
| 735 |
document.getElementById("btn_zip_json").addEventListener("click", async () => {
|
| 736 |
const zipInput = document.getElementById("zip_input");
|
| 737 |
const pwd = document.getElementById("zip_password").value || "";
|
| 738 |
-
const task = document.getElementById("zip_task").value;
|
| 739 |
const mode = document.getElementById("zip_mode").value;
|
| 740 |
-
const language = document.getElementById("zip_language").value;
|
| 741 |
const out = document.getElementById("zip_output");
|
| 742 |
|
| 743 |
if (!zipInput.files.length) {
|
|
@@ -748,9 +690,7 @@ HTML_UI = """
|
|
| 748 |
const formData = new FormData();
|
| 749 |
formData.append("file", zipInput.files[0]);
|
| 750 |
formData.append("password", pwd);
|
| 751 |
-
formData.append("task", task);
|
| 752 |
formData.append("mode", mode);
|
| 753 |
-
formData.append("language", language);
|
| 754 |
|
| 755 |
setStatus("Transcribing ZIP contents…");
|
| 756 |
out.value = "";
|
|
@@ -770,9 +710,7 @@ HTML_UI = """
|
|
| 770 |
document.getElementById("btn_zip_docx").addEventListener("click", async () => {
|
| 771 |
const zipInput = document.getElementById("zip_input");
|
| 772 |
const pwd = document.getElementById("zip_password").value || "";
|
| 773 |
-
const task = document.getElementById("zip_task").value;
|
| 774 |
const mode = document.getElementById("zip_mode").value;
|
| 775 |
-
const language = document.getElementById("zip_language").value;
|
| 776 |
|
| 777 |
if (!zipInput.files.length) {
|
| 778 |
alert("Please choose a ZIP file.");
|
|
@@ -782,9 +720,7 @@ HTML_UI = """
|
|
| 782 |
const formData = new FormData();
|
| 783 |
formData.append("file", zipInput.files[0]);
|
| 784 |
formData.append("password", pwd);
|
| 785 |
-
formData.append("task", task);
|
| 786 |
formData.append("mode", mode);
|
| 787 |
-
formData.append("language", language);
|
| 788 |
|
| 789 |
setStatus("Generating DOCX from ZIP contents…");
|
| 790 |
|
|
@@ -823,10 +759,3 @@ if __name__ == "__main__":
|
|
| 823 |
|
| 824 |
port = int(os.getenv("PORT", "7860"))
|
| 825 |
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
@app.get("/health", response_class=PlainTextResponse)
|
| 830 |
-
def health():
|
| 831 |
-
return "OK"
|
| 832 |
-
|
|
|
|
| 3 |
import tempfile
|
| 4 |
from typing import List, Literal, Optional
|
| 5 |
|
| 6 |
+
import torch
|
| 7 |
+
import pyzipper
|
| 8 |
+
import spaces
|
| 9 |
+
import soundfile as sf # noqa: F401 (ensures audio backend is available)
|
| 10 |
+
|
| 11 |
+
from docx import Document
|
| 12 |
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
|
| 13 |
from fastapi.responses import (
|
| 14 |
FileResponse,
|
|
|
|
| 18 |
)
|
| 19 |
from pydantic import BaseModel
|
| 20 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# ===================== CONFIG =====================
|
| 23 |
|
|
|
|
| 33 |
".webm",
|
| 34 |
)
|
| 35 |
|
| 36 |
+
# Use GPU if available on the Space
|
| 37 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 38 |
|
| 39 |
+
# Lazy-loaded pipeline (created on first request)
|
| 40 |
asr_pipe = None
|
| 41 |
|
| 42 |
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
class TranscriptionResponse(BaseModel):
|
|
|
|
| 63 |
mode: Literal["general", "medical_en"]
|
|
|
|
| 64 |
combined_transcript: str
|
| 65 |
items: List[FileTranscript]
|
| 66 |
|
| 67 |
|
| 68 |
# ===================== Helper functions =====================
|
| 69 |
|
| 70 |
+
def build_generate_kwargs(mode: str):
|
| 71 |
"""
|
|
|
|
| 72 |
mode: 'general' | 'medical_en'
|
| 73 |
+
Always transcribe with auto language detection,
|
| 74 |
+
but in medical_en we bias towards English medical dictation.
|
| 75 |
"""
|
| 76 |
+
generate_kwargs = {
|
| 77 |
+
"task": "transcribe", # keep same language as audio
|
| 78 |
+
}
|
| 79 |
|
| 80 |
if mode == "medical_en":
|
| 81 |
+
# Strong bias towards English medical terminology
|
| 82 |
generate_kwargs["language"] = "en"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
generate_kwargs["initial_prompt"] = (
|
| 84 |
"This is a medical dictation. Use accurate English medical terminology, "
|
| 85 |
"including anatomy, diseases, investigations, lab values, imaging, and drugs. "
|
|
|
|
| 98 |
return out
|
| 99 |
|
| 100 |
|
| 101 |
+
def transcribe_file(path: str, mode: str) -> str:
|
| 102 |
pipe = get_pipeline()
|
| 103 |
+
generate_kwargs = build_generate_kwargs(mode)
|
| 104 |
|
| 105 |
result = pipe(
|
| 106 |
path,
|
|
|
|
| 110 |
)
|
| 111 |
|
| 112 |
if isinstance(result, dict):
|
| 113 |
+
return (result.get("text") or "").strip()
|
| 114 |
if isinstance(result, list) and result:
|
| 115 |
+
return (result[0].get("text") or "").strip()
|
| 116 |
return ""
|
| 117 |
|
| 118 |
|
|
|
|
| 157 |
tmpdir = tempfile.mkdtemp(prefix="zip_")
|
| 158 |
zip_path = os.path.join(tmpdir, os.path.basename(zip_file.filename or "archive.zip"))
|
| 159 |
|
| 160 |
+
# Save uploaded ZIP
|
| 161 |
with open(zip_path, "wb") as out_f:
|
| 162 |
shutil.copyfileobj(zip_file.file, out_f)
|
| 163 |
|
|
|
|
| 186 |
detail=f"Failed to open ZIP file. Check password / integrity. {e}",
|
| 187 |
)
|
| 188 |
|
| 189 |
+
# Only top-level; nested dirs can be added if needed.
|
| 190 |
files = [os.path.join(outdir, f) for f in os.listdir(outdir)]
|
| 191 |
return files
|
| 192 |
|
|
|
|
| 204 |
- Combined transcript
|
| 205 |
- Optional merged Word (.docx) download
|
| 206 |
|
| 207 |
+
Use `/docs` for Swagger UI and `/ui` for the web interface.
|
| 208 |
""",
|
| 209 |
version="1.0.0",
|
| 210 |
)
|
|
|
|
| 218 |
)
|
| 219 |
|
| 220 |
|
| 221 |
+
@app.get("/health", response_class=PlainTextResponse)
|
| 222 |
+
def health():
|
| 223 |
+
return "OK"
|
| 224 |
+
|
| 225 |
+
|
| 226 |
# ---------- 1. Multi-file transcription (JSON) ----------
|
| 227 |
|
| 228 |
@app.post("/api/transcribe/files", response_model=TranscriptionResponse)
|
| 229 |
@spaces.GPU
|
| 230 |
def transcribe_files(
|
| 231 |
files: List[UploadFile] = File(..., description="One or more audio files"),
|
| 232 |
+
mode: Literal["general", "medical_en"] = Form("medical_en"),
|
|
|
|
|
|
|
| 233 |
):
|
| 234 |
if not files:
|
| 235 |
raise HTTPException(status_code=400, detail="No files uploaded.")
|
|
|
|
| 246 |
items: List[FileTranscript] = []
|
| 247 |
for path in audio_paths:
|
| 248 |
fname = os.path.basename(path)
|
| 249 |
+
text = transcribe_file(path, mode)
|
| 250 |
items.append(FileTranscript(filename=fname, text=text))
|
| 251 |
|
| 252 |
combined = format_combined(items)
|
| 253 |
|
| 254 |
return TranscriptionResponse(
|
|
|
|
| 255 |
mode=mode,
|
|
|
|
| 256 |
combined_transcript=combined,
|
| 257 |
items=items,
|
| 258 |
)
|
|
|
|
| 264 |
@spaces.GPU
|
| 265 |
def transcribe_files_docx(
|
| 266 |
files: List[UploadFile] = File(..., description="One or more audio files"),
|
| 267 |
+
mode: Literal["general", "medical_en"] = Form("medical_en"),
|
|
|
|
|
|
|
| 268 |
):
|
| 269 |
if not files:
|
| 270 |
raise HTTPException(status_code=400, detail="No files uploaded.")
|
|
|
|
| 281 |
items: List[FileTranscript] = []
|
| 282 |
for path in audio_paths:
|
| 283 |
fname = os.path.basename(path)
|
| 284 |
+
text = transcribe_file(path, mode)
|
| 285 |
items.append(FileTranscript(filename=fname, text=text))
|
| 286 |
|
| 287 |
docx_path = build_docx(items, "Multi-file transcription")
|
| 288 |
|
| 289 |
return FileResponse(
|
| 290 |
docx_path,
|
| 291 |
+
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
|
|
|
|
|
| 292 |
filename="transcripts_files.docx",
|
| 293 |
)
|
| 294 |
|
|
|
|
| 300 |
def transcribe_zip(
|
| 301 |
file: UploadFile = File(..., description="ZIP file containing audio files"),
|
| 302 |
password: str = Form("", description="ZIP password (leave blank if none)"),
|
|
|
|
| 303 |
mode: Literal["general", "medical_en"] = Form("medical_en"),
|
|
|
|
| 304 |
):
|
| 305 |
if file is None:
|
| 306 |
raise HTTPException(status_code=400, detail="No ZIP uploaded.")
|
|
|
|
| 317 |
items: List[FileTranscript] = []
|
| 318 |
for path in audio_paths:
|
| 319 |
fname = os.path.basename(path)
|
| 320 |
+
text = transcribe_file(path, mode)
|
| 321 |
items.append(FileTranscript(filename=fname, text=text))
|
| 322 |
|
| 323 |
combined = format_combined(items)
|
| 324 |
|
| 325 |
return TranscriptionResponse(
|
|
|
|
| 326 |
mode=mode,
|
|
|
|
| 327 |
combined_transcript=combined,
|
| 328 |
items=items,
|
| 329 |
)
|
|
|
|
| 336 |
def transcribe_zip_docx(
|
| 337 |
file: UploadFile = File(..., description="ZIP file containing audio files"),
|
| 338 |
password: str = Form("", description="ZIP password (leave blank if none)"),
|
|
|
|
| 339 |
mode: Literal["general", "medical_en"] = Form("medical_en"),
|
|
|
|
| 340 |
):
|
| 341 |
if file is None:
|
| 342 |
raise HTTPException(status_code=400, detail="No ZIP uploaded.")
|
|
|
|
| 353 |
items: List[FileTranscript] = []
|
| 354 |
for path in audio_paths:
|
| 355 |
fname = os.path.basename(path)
|
| 356 |
+
text = transcribe_file(path, mode)
|
| 357 |
items.append(FileTranscript(filename=fname, text=text))
|
| 358 |
|
| 359 |
docx_path = build_docx(items, "ZIP transcription")
|
| 360 |
|
| 361 |
return FileResponse(
|
| 362 |
docx_path,
|
| 363 |
+
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
|
|
|
|
|
| 364 |
filename="transcripts_zip.docx",
|
| 365 |
)
|
| 366 |
|
|
|
|
| 427 |
}
|
| 428 |
input[type="file"],
|
| 429 |
select,
|
|
|
|
| 430 |
input[type="password"] {
|
| 431 |
width: 100%;
|
| 432 |
padding: 8px 10px;
|
|
|
|
| 518 |
<body>
|
| 519 |
<header>
|
| 520 |
<h1>Whisper Large V3 – Medical Batch Transcription</h1>
|
| 521 |
+
<p>Upload multiple audio files or a password-protected ZIP. Mode: general or medical_en. API docs at <code>/docs</code>.</p>
|
| 522 |
</header>
|
| 523 |
<main>
|
| 524 |
<div class="card">
|
|
|
|
| 530 |
<input id="files_input" type="file" multiple accept="audio/*" />
|
| 531 |
<div class="small-hint">You can select multiple audio files.</div>
|
| 532 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
<label for="files_mode">Mode</label>
|
| 534 |
<select id="files_mode">
|
|
|
|
| 535 |
<option value="medical_en">medical_en (English medical bias)</option>
|
| 536 |
+
<option value="general">general</option>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
</select>
|
| 538 |
|
| 539 |
<div class="btn-row">
|
|
|
|
| 560 |
<label for="zip_password">ZIP password (optional)</label>
|
| 561 |
<input id="zip_password" type="password" placeholder="Leave blank if ZIP is not encrypted" />
|
| 562 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
<label for="zip_mode">Mode</label>
|
| 564 |
<select id="zip_mode">
|
| 565 |
<option value="medical_en">medical_en (English medical bias)</option>
|
| 566 |
<option value="general">general</option>
|
| 567 |
</select>
|
| 568 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 569 |
<div class="btn-row">
|
| 570 |
<button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button>
|
| 571 |
<button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX</button>
|
|
|
|
| 611 |
// ------- Multi-files JSON -------
|
| 612 |
document.getElementById("btn_files_json").addEventListener("click", async () => {
|
| 613 |
const filesInput = document.getElementById("files_input");
|
|
|
|
| 614 |
const mode = document.getElementById("files_mode").value;
|
|
|
|
| 615 |
const out = document.getElementById("files_output");
|
| 616 |
|
| 617 |
if (!filesInput.files.length) {
|
|
|
|
| 623 |
for (const f of filesInput.files) {
|
| 624 |
formData.append("files", f);
|
| 625 |
}
|
|
|
|
| 626 |
formData.append("mode", mode);
|
|
|
|
| 627 |
|
| 628 |
setStatus("Transcribing multiple files… (this may take some time for large audio)");
|
| 629 |
out.value = "";
|
|
|
|
| 642 |
// ------- Multi-files DOCX -------
|
| 643 |
document.getElementById("btn_files_docx").addEventListener("click", async () => {
|
| 644 |
const filesInput = document.getElementById("files_input");
|
|
|
|
| 645 |
const mode = document.getElementById("files_mode").value;
|
|
|
|
| 646 |
|
| 647 |
if (!filesInput.files.length) {
|
| 648 |
alert("Please choose at least one audio file.");
|
|
|
|
| 653 |
for (const f of filesInput.files) {
|
| 654 |
formData.append("files", f);
|
| 655 |
}
|
|
|
|
| 656 |
formData.append("mode", mode);
|
|
|
|
| 657 |
|
| 658 |
setStatus("Generating DOCX for multi-file transcription…");
|
| 659 |
|
|
|
|
| 679 |
document.getElementById("btn_zip_json").addEventListener("click", async () => {
|
| 680 |
const zipInput = document.getElementById("zip_input");
|
| 681 |
const pwd = document.getElementById("zip_password").value || "";
|
|
|
|
| 682 |
const mode = document.getElementById("zip_mode").value;
|
|
|
|
| 683 |
const out = document.getElementById("zip_output");
|
| 684 |
|
| 685 |
if (!zipInput.files.length) {
|
|
|
|
| 690 |
const formData = new FormData();
|
| 691 |
formData.append("file", zipInput.files[0]);
|
| 692 |
formData.append("password", pwd);
|
|
|
|
| 693 |
formData.append("mode", mode);
|
|
|
|
| 694 |
|
| 695 |
setStatus("Transcribing ZIP contents…");
|
| 696 |
out.value = "";
|
|
|
|
| 710 |
document.getElementById("btn_zip_docx").addEventListener("click", async () => {
|
| 711 |
const zipInput = document.getElementById("zip_input");
|
| 712 |
const pwd = document.getElementById("zip_password").value || "";
|
|
|
|
| 713 |
const mode = document.getElementById("zip_mode").value;
|
|
|
|
| 714 |
|
| 715 |
if (!zipInput.files.length) {
|
| 716 |
alert("Please choose a ZIP file.");
|
|
|
|
| 720 |
const formData = new FormData();
|
| 721 |
formData.append("file", zipInput.files[0]);
|
| 722 |
formData.append("password", pwd);
|
|
|
|
| 723 |
formData.append("mode", mode);
|
|
|
|
| 724 |
|
| 725 |
setStatus("Generating DOCX from ZIP contents…");
|
| 726 |
|
|
|
|
| 759 |
|
| 760 |
port = int(os.getenv("PORT", "7860"))
|
| 761 |
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|