Gabriel Bibbó
commited on
Commit
·
4788ddc
1
Parent(s):
ec04aee
Fix logo paths and layout proportions, clean duplicate files
Browse files- RCVSSP_logo.png +0 -0
- REPSRC_logo.png +0 -0
- Rai4s_banner.png +0 -3
- Rsurrey_logo.png +0 -3
- ai4s_banner_opaque.png +0 -3
- app.py +12 -12
- app_fixed.py +0 -119
RCVSSP_logo.png
DELETED
|
Binary file (53.1 kB)
|
|
|
REPSRC_logo.png
DELETED
|
Binary file (40.9 kB)
|
|
|
Rai4s_banner.png
DELETED
Git LFS Details
|
Rsurrey_logo.png
DELETED
Git LFS Details
|
ai4s_banner_opaque.png
DELETED
Git LFS Details
|
app.py
CHANGED
|
@@ -964,19 +964,18 @@ def create_interface():
|
|
| 964 |
|
| 965 |
# Logos section
|
| 966 |
with gr.Row():
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
<
|
| 970 |
-
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
""")
|
| 976 |
|
| 977 |
# Main interface
|
| 978 |
with gr.Row():
|
| 979 |
-
with gr.Column(scale=
|
| 980 |
gr.Markdown("### 🎛️ Controls")
|
| 981 |
|
| 982 |
audio_input = gr.Audio(
|
|
@@ -1007,11 +1006,12 @@ def create_interface():
|
|
| 1007 |
|
| 1008 |
process_btn = gr.Button("🎤 Analyze", variant="primary", size="lg")
|
| 1009 |
|
| 1010 |
-
with gr.Column(scale=
|
| 1011 |
status_display = gr.Textbox(
|
| 1012 |
label="Status",
|
| 1013 |
value="🔇 Ready to analyze audio",
|
| 1014 |
-
interactive=False
|
|
|
|
| 1015 |
)
|
| 1016 |
|
| 1017 |
# Results
|
|
|
|
| 964 |
|
| 965 |
# Logos section
|
| 966 |
with gr.Row():
|
| 967 |
+
gr.HTML("""
|
| 968 |
+
<div style="display: flex; justify-content: center; align-items: center; gap: 30px; margin: 20px 0; flex-wrap: wrap;">
|
| 969 |
+
<img src="ai4s_banner.png" alt="AI4S" style="height: 60px; object-fit: contain;">
|
| 970 |
+
<img src="surrey_logo.png" alt="University of Surrey" style="height: 60px; object-fit: contain;">
|
| 971 |
+
<img src="EPSRC_logo.png" alt="EPSRC" style="height: 60px; object-fit: contain;">
|
| 972 |
+
<img src="CVSSP_logo.png" alt="CVSSP" style="height: 60px; object-fit: contain;">
|
| 973 |
+
</div>
|
| 974 |
+
""")
|
|
|
|
| 975 |
|
| 976 |
# Main interface
|
| 977 |
with gr.Row():
|
| 978 |
+
with gr.Column(scale=2):
|
| 979 |
gr.Markdown("### 🎛️ Controls")
|
| 980 |
|
| 981 |
audio_input = gr.Audio(
|
|
|
|
| 1006 |
|
| 1007 |
process_btn = gr.Button("🎤 Analyze", variant="primary", size="lg")
|
| 1008 |
|
| 1009 |
+
with gr.Column(scale=3):
|
| 1010 |
status_display = gr.Textbox(
|
| 1011 |
label="Status",
|
| 1012 |
value="🔇 Ready to analyze audio",
|
| 1013 |
+
interactive=False,
|
| 1014 |
+
lines=2
|
| 1015 |
)
|
| 1016 |
|
| 1017 |
# Results
|
app_fixed.py
DELETED
|
@@ -1,119 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import numpy as np
|
| 3 |
-
import torch
|
| 4 |
-
import torch.nn.functional as F
|
| 5 |
-
try:
|
| 6 |
-
import librosa
|
| 7 |
-
LIBROSA_AVAILABLE = True
|
| 8 |
-
except ImportError:
|
| 9 |
-
LIBROSA_AVAILABLE = False
|
| 10 |
-
print("⚠️ Librosa not available, using scipy fallback")
|
| 11 |
-
|
| 12 |
-
import plotly.graph_objects as go
|
| 13 |
-
from plotly.subplots import make_subplots
|
| 14 |
-
import io
|
| 15 |
-
import time
|
| 16 |
-
from typing import Dict, Tuple, Optional
|
| 17 |
-
import threading
|
| 18 |
-
import queue
|
| 19 |
-
from dataclasses import dataclass
|
| 20 |
-
from collections import deque
|
| 21 |
-
|
| 22 |
-
# Resto del código igual hasta la función create_interface...
|
| 23 |
-
# [Aquí iría todo el código de las clases como está, pero cambio solo la parte del streaming]
|
| 24 |
-
|
| 25 |
-
def create_interface():
|
| 26 |
-
"""Create Gradio interface with corrected streaming"""
|
| 27 |
-
|
| 28 |
-
with gr.Blocks(title="VAD Demo - Real-time Speech Detection", theme=gr.themes.Soft()) as interface:
|
| 29 |
-
gr.Markdown("""
|
| 30 |
-
# 🎤 VAD Demo: Real-time Speech Detection Framework
|
| 31 |
-
|
| 32 |
-
**Multi-Model Voice Activity Detection with Interactive Visualization**
|
| 33 |
-
|
| 34 |
-
This demo showcases 5 different AI models for speech detection optimized for CPU.
|
| 35 |
-
""")
|
| 36 |
-
|
| 37 |
-
with gr.Row():
|
| 38 |
-
with gr.Column(scale=1):
|
| 39 |
-
gr.Markdown("### 🎛️ **Controls**")
|
| 40 |
-
|
| 41 |
-
model_a = gr.Dropdown(
|
| 42 |
-
choices=list(demo_app.models.keys()),
|
| 43 |
-
value="Silero-VAD",
|
| 44 |
-
label="Panel A Model"
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
model_b = gr.Dropdown(
|
| 48 |
-
choices=list(demo_app.models.keys()),
|
| 49 |
-
value="E-PANNs",
|
| 50 |
-
label="Panel B Model"
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
threshold_slider = gr.Slider(
|
| 54 |
-
minimum=0.0,
|
| 55 |
-
maximum=1.0,
|
| 56 |
-
value=0.5,
|
| 57 |
-
step=0.05,
|
| 58 |
-
label="Detection Threshold"
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
-
status_display = gr.Textbox(
|
| 62 |
-
label="Status",
|
| 63 |
-
value="🔇 Ready to detect speech",
|
| 64 |
-
interactive=False
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
with gr.Column(scale=2):
|
| 68 |
-
gr.Markdown("### 🎙️ **Audio Input**")
|
| 69 |
-
|
| 70 |
-
# Simplified audio input without streaming for compatibility
|
| 71 |
-
audio_input = gr.Audio(
|
| 72 |
-
sources=["microphone"],
|
| 73 |
-
type="numpy",
|
| 74 |
-
label="Microphone Input"
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
process_btn = gr.Button("🎯 Process Audio", variant="primary")
|
| 78 |
-
|
| 79 |
-
gr.Markdown("### 📊 **Analysis Results**")
|
| 80 |
-
|
| 81 |
-
plot_output = gr.Plot(label="VAD Analysis")
|
| 82 |
-
model_details = gr.JSON(label="Model Details")
|
| 83 |
-
|
| 84 |
-
# Event handlers - usando click en lugar de streaming para compatibilidad
|
| 85 |
-
process_btn.click(
|
| 86 |
-
fn=demo_app.process_audio_stream,
|
| 87 |
-
inputs=[audio_input, model_a, model_b, threshold_slider],
|
| 88 |
-
outputs=[plot_output, status_display, model_details]
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
-
# Auto-process cuando se graba audio
|
| 92 |
-
audio_input.change(
|
| 93 |
-
fn=demo_app.process_audio_stream,
|
| 94 |
-
inputs=[audio_input, model_a, model_b, threshold_slider],
|
| 95 |
-
outputs=[plot_output, status_display, model_details]
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
gr.Markdown("""
|
| 99 |
-
### 🔬 **Research Context**
|
| 100 |
-
This demonstration supports research in privacy-preserving audio datasets and real-time speech analysis.
|
| 101 |
-
Original: https://github.com/gbibbo/vad_demo
|
| 102 |
-
""")
|
| 103 |
-
|
| 104 |
-
return interface
|
| 105 |
-
|
| 106 |
-
# Initialize demo
|
| 107 |
-
demo_app = VADDemo()
|
| 108 |
-
|
| 109 |
-
# Create and launch interface
|
| 110 |
-
if __name__ == "__main__":
|
| 111 |
-
interface = create_interface()
|
| 112 |
-
interface.queue(max_size=20)
|
| 113 |
-
|
| 114 |
-
# Simplified launch for HF Spaces compatibility
|
| 115 |
-
interface.launch(
|
| 116 |
-
share=False, # HF Spaces maneja esto automáticamente
|
| 117 |
-
debug=False,
|
| 118 |
-
show_error=True
|
| 119 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|