Spaces:

bencser
/

whisperdemo

Sleeping

App Files Files Community

bencser commited on Aug 5, 2024

Commit

660f424

verified ·

1 Parent(s): 5774bea

Create app.py

Browse files

Files changed (1) hide show

app.py +148 -0

app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import gradio as gr
+import whisper
+import yt_dlp
+import os
+import traceback
+from pydub import AudioSegment
+from threading import Thread
+from queue import Queue
+# Global variable to store the selected model
+selected_model = None
+def load_whisper_model(model_name):
+    global selected_model
+    selected_model = whisper.load_model(model_name)
+    return f"Loaded {model_name} model"
+def chunk_audio(audio_file, chunk_size_ms=30000):
+    audio = AudioSegment.from_file(audio_file)
+    chunks = [audio[i:i+chunk_size_ms] for i in range(0, len(audio), chunk_size_ms)]
+    return chunks
+def stream_transcription(audio_file):
+    segment_queue = Queue()
+    def transcribe_worker():
+        try:
+            chunks = chunk_audio(audio_file)
+            for i, chunk in enumerate(chunks):
+                chunk_file = f"temp_chunk_{i}.wav"
+                chunk.export(chunk_file, format="wav")
+                result = selected_model.transcribe(chunk_file)
+                os.remove(chunk_file)
+                for segment in result['segments']:
+                    segment_text = f"[{segment['start'] + i*30:.2f}s -> {segment['end'] + i*30:.2f}s] {segment['text']}\n"
+                    segment_queue.put(segment_text)
+            segment_queue.put(None)  # Signal end of transcription
+        except Exception as e:
+            segment_queue.put(f"Error: {str(e)}")
+            segment_queue.put(None)
+    Thread(target=transcribe_worker).start()
+    full_transcript = ""
+    while True:
+        segment_text = segment_queue.get()
+        if segment_text is None:
+            break
+        if segment_text.startswith("Error"):
+            yield segment_text
+            break
+        full_transcript += segment_text
+        yield full_transcript
+def download_youtube_audio(youtube_url):
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        'outtmpl': 'temp_audio.%(ext)s',
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([youtube_url])
+    return "temp_audio.mp3"
+def process_input(model, input_type, youtube_url=None, audio_file=None):
+    try:
+        yield "Loading Whisper model..."
+        load_whisper_model(model)
+        yield f"Loaded {model} model. "
+        if input_type == "YouTube URL":
+            if youtube_url:
+                yield "Downloading audio from YouTube..."
+                audio_file = download_youtube_audio(youtube_url)
+                yield "Download complete. Starting transcription...\n"
+            else:
+                yield "Please provide a valid YouTube URL."
+                return
+        elif input_type == "Audio File":
+            if not audio_file:
+                yield "Please upload an audio file."
+                return
+            else:
+                yield "Starting transcription...\n"
+        yield from stream_transcription(audio_file)
+    except Exception as e:
+        error_msg = f"An error occurred: {str(e)}\n"
+        error_msg += traceback.format_exc()
+        print(error_msg)
+        yield f"Error: {str(e)}"
+    finally:
+        if input_type == "YouTube URL" and audio_file:
+            os.remove(audio_file)
+# Define the Gradio interface
+with gr.Blocks() as iface:
+    gr.Markdown("# Whisper Transcription App")
+    gr.Markdown("Transcribe YouTube videos or audio files using OpenAI's Whisper model. Large files and long videos can take a very long time to process.")
+    with gr.Row():
+        with gr.Column():
+            model = gr.Radio(
+                choices=["tiny", "base", "small", "medium", "large"],
+                label="Whisper Model",
+                value="base"
+            )
+            gr.Markdown("""
+            - tiny: very fast, less accurate
+            - base: medium speed and accuracy
+            - small: balanced speed and accuracy
+            - medium: more accurate, slower
+            - large: most accurate, very slow
+            """)
+            input_type = gr.Radio(["YouTube URL", "Audio File"], label="Input Type")
+            youtube_url = gr.Textbox(label="YouTube URL")
+            audio_file = gr.Audio(label="Audio File", type="filepath")
+            with gr.Row():
+                submit_button = gr.Button("Submit")
+                clear_button = gr.Button("Clear")
+        with gr.Column():
+            output = gr.Textbox(label="Transcription", lines=25)
+    submit_button.click(
+        fn=process_input,
+        inputs=[model, input_type, youtube_url, audio_file],
+        outputs=output,
+        api_name="transcribe"
+    )
+    def clear_outputs():
+        return {youtube_url: "", audio_file: None, output: ""}
+    clear_button.click(
+        fn=clear_outputs,
+        inputs=[],
+        outputs=[youtube_url, audio_file, output],
+        api_name="clear"
+    )
+# Launch the interface
+iface.queue().launch(share=True)