import gradio as gr import numpy as np import torch import torch.nn.functional as F try: import librosa LIBROSA_AVAILABLE = True except ImportError: LIBROSA_AVAILABLE = False print("⚠️ Librosa not available, using scipy fallback") import plotly.graph_objects as go from plotly.subplots import make_subplots import io import time from typing import Dict, Tuple, Optional import threading import queue from dataclasses import dataclass from collections import deque # Resto del código igual hasta la función create_interface... # [Aquí iría todo el código de las clases como está, pero cambio solo la parte del streaming] def create_interface(): """Create Gradio interface with corrected streaming""" with gr.Blocks(title="VAD Demo - Real-time Speech Detection", theme=gr.themes.Soft()) as interface: gr.Markdown(""" # 🎤 VAD Demo: Real-time Speech Detection Framework **Multi-Model Voice Activity Detection with Interactive Visualization** This demo showcases 5 different AI models for speech detection optimized for CPU. """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 🎛️ **Controls**") model_a = gr.Dropdown( choices=list(demo_app.models.keys()), value="Silero-VAD", label="Panel A Model" ) model_b = gr.Dropdown( choices=list(demo_app.models.keys()), value="E-PANNs", label="Panel B Model" ) threshold_slider = gr.Slider( minimum=0.0, maximum=1.0, value=0.5, step=0.05, label="Detection Threshold" ) status_display = gr.Textbox( label="Status", value="🔇 Ready to detect speech", interactive=False ) with gr.Column(scale=2): gr.Markdown("### 🎙️ **Audio Input**") # Simplified audio input without streaming for compatibility audio_input = gr.Audio( sources=["microphone"], type="numpy", label="Microphone Input" ) process_btn = gr.Button("🎯 Process Audio", variant="primary") gr.Markdown("### 📊 **Analysis Results**") plot_output = gr.Plot(label="VAD Analysis") model_details = gr.JSON(label="Model Details") # Event handlers - usando click en lugar de streaming para compatibilidad process_btn.click( fn=demo_app.process_audio_stream, inputs=[audio_input, model_a, model_b, threshold_slider], outputs=[plot_output, status_display, model_details] ) # Auto-process cuando se graba audio audio_input.change( fn=demo_app.process_audio_stream, inputs=[audio_input, model_a, model_b, threshold_slider], outputs=[plot_output, status_display, model_details] ) gr.Markdown(""" ### 🔬 **Research Context** This demonstration supports research in privacy-preserving audio datasets and real-time speech analysis. Original: https://github.com/gbibbo/vad_demo """) return interface # Initialize demo demo_app = VADDemo() # Create and launch interface if __name__ == "__main__": interface = create_interface() interface.queue(max_size=20) # Simplified launch for HF Spaces compatibility interface.launch( share=False, # HF Spaces maneja esto automáticamente debug=False, show_error=True )