Gabriel Bibbó commited on
Commit
4788ddc
·
1 Parent(s): ec04aee

Fix logo paths and layout proportions, clean duplicate files

Browse files
Files changed (7) hide show
  1. RCVSSP_logo.png +0 -0
  2. REPSRC_logo.png +0 -0
  3. Rai4s_banner.png +0 -3
  4. Rsurrey_logo.png +0 -3
  5. ai4s_banner_opaque.png +0 -3
  6. app.py +12 -12
  7. app_fixed.py +0 -119
RCVSSP_logo.png DELETED
Binary file (53.1 kB)
 
REPSRC_logo.png DELETED
Binary file (40.9 kB)
 
Rai4s_banner.png DELETED

Git LFS Details

  • SHA256: f8cc500e3a45c10155080887f77dbb47b5870d0ed32e6df88191b6a7a9f74606
  • Pointer size: 131 Bytes
  • Size of remote file: 405 kB
Rsurrey_logo.png DELETED

Git LFS Details

  • SHA256: 648bbaa5e9d95c5cffe28bebc20911afc25fee7116263f374a7c659024e53676
  • Pointer size: 131 Bytes
  • Size of remote file: 413 kB
ai4s_banner_opaque.png DELETED

Git LFS Details

  • SHA256: affc028aa1705c93768555966eeac6edd336dbfa6ba0009f833d53ace2bcc79f
  • Pointer size: 131 Bytes
  • Size of remote file: 367 kB
app.py CHANGED
@@ -964,19 +964,18 @@ def create_interface():
964
 
965
  # Logos section
966
  with gr.Row():
967
- with gr.Column():
968
- gr.HTML("""
969
- <div style="display: flex; justify-content: center; align-items: center; gap: 20px; margin: 20px 0; flex-wrap: wrap;">
970
- <img src="file/ai4s_banner.png" alt="AI4S" style="height: 60px; object-fit: contain;">
971
- <img src="file/surrey_logo.png" alt="University of Surrey" style="height: 60px; object-fit: contain;">
972
- <img src="file/EPSRC_logo.png" alt="EPSRC" style="height: 60px; object-fit: contain;">
973
- <img src="file/CVSSP_logo.png" alt="CVSSP" style="height: 60px; object-fit: contain;">
974
- </div>
975
- """)
976
 
977
  # Main interface
978
  with gr.Row():
979
- with gr.Column(scale=1):
980
  gr.Markdown("### 🎛️ Controls")
981
 
982
  audio_input = gr.Audio(
@@ -1007,11 +1006,12 @@ def create_interface():
1007
 
1008
  process_btn = gr.Button("🎤 Analyze", variant="primary", size="lg")
1009
 
1010
- with gr.Column(scale=2):
1011
  status_display = gr.Textbox(
1012
  label="Status",
1013
  value="🔇 Ready to analyze audio",
1014
- interactive=False
 
1015
  )
1016
 
1017
  # Results
 
964
 
965
  # Logos section
966
  with gr.Row():
967
+ gr.HTML("""
968
+ <div style="display: flex; justify-content: center; align-items: center; gap: 30px; margin: 20px 0; flex-wrap: wrap;">
969
+ <img src="ai4s_banner.png" alt="AI4S" style="height: 60px; object-fit: contain;">
970
+ <img src="surrey_logo.png" alt="University of Surrey" style="height: 60px; object-fit: contain;">
971
+ <img src="EPSRC_logo.png" alt="EPSRC" style="height: 60px; object-fit: contain;">
972
+ <img src="CVSSP_logo.png" alt="CVSSP" style="height: 60px; object-fit: contain;">
973
+ </div>
974
+ """)
 
975
 
976
  # Main interface
977
  with gr.Row():
978
+ with gr.Column(scale=2):
979
  gr.Markdown("### 🎛️ Controls")
980
 
981
  audio_input = gr.Audio(
 
1006
 
1007
  process_btn = gr.Button("🎤 Analyze", variant="primary", size="lg")
1008
 
1009
+ with gr.Column(scale=3):
1010
  status_display = gr.Textbox(
1011
  label="Status",
1012
  value="🔇 Ready to analyze audio",
1013
+ interactive=False,
1014
+ lines=2
1015
  )
1016
 
1017
  # Results
app_fixed.py DELETED
@@ -1,119 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- import torch
4
- import torch.nn.functional as F
5
- try:
6
- import librosa
7
- LIBROSA_AVAILABLE = True
8
- except ImportError:
9
- LIBROSA_AVAILABLE = False
10
- print("⚠️ Librosa not available, using scipy fallback")
11
-
12
- import plotly.graph_objects as go
13
- from plotly.subplots import make_subplots
14
- import io
15
- import time
16
- from typing import Dict, Tuple, Optional
17
- import threading
18
- import queue
19
- from dataclasses import dataclass
20
- from collections import deque
21
-
22
- # Resto del código igual hasta la función create_interface...
23
- # [Aquí iría todo el código de las clases como está, pero cambio solo la parte del streaming]
24
-
25
- def create_interface():
26
- """Create Gradio interface with corrected streaming"""
27
-
28
- with gr.Blocks(title="VAD Demo - Real-time Speech Detection", theme=gr.themes.Soft()) as interface:
29
- gr.Markdown("""
30
- # 🎤 VAD Demo: Real-time Speech Detection Framework
31
-
32
- **Multi-Model Voice Activity Detection with Interactive Visualization**
33
-
34
- This demo showcases 5 different AI models for speech detection optimized for CPU.
35
- """)
36
-
37
- with gr.Row():
38
- with gr.Column(scale=1):
39
- gr.Markdown("### 🎛️ **Controls**")
40
-
41
- model_a = gr.Dropdown(
42
- choices=list(demo_app.models.keys()),
43
- value="Silero-VAD",
44
- label="Panel A Model"
45
- )
46
-
47
- model_b = gr.Dropdown(
48
- choices=list(demo_app.models.keys()),
49
- value="E-PANNs",
50
- label="Panel B Model"
51
- )
52
-
53
- threshold_slider = gr.Slider(
54
- minimum=0.0,
55
- maximum=1.0,
56
- value=0.5,
57
- step=0.05,
58
- label="Detection Threshold"
59
- )
60
-
61
- status_display = gr.Textbox(
62
- label="Status",
63
- value="🔇 Ready to detect speech",
64
- interactive=False
65
- )
66
-
67
- with gr.Column(scale=2):
68
- gr.Markdown("### 🎙️ **Audio Input**")
69
-
70
- # Simplified audio input without streaming for compatibility
71
- audio_input = gr.Audio(
72
- sources=["microphone"],
73
- type="numpy",
74
- label="Microphone Input"
75
- )
76
-
77
- process_btn = gr.Button("🎯 Process Audio", variant="primary")
78
-
79
- gr.Markdown("### 📊 **Analysis Results**")
80
-
81
- plot_output = gr.Plot(label="VAD Analysis")
82
- model_details = gr.JSON(label="Model Details")
83
-
84
- # Event handlers - usando click en lugar de streaming para compatibilidad
85
- process_btn.click(
86
- fn=demo_app.process_audio_stream,
87
- inputs=[audio_input, model_a, model_b, threshold_slider],
88
- outputs=[plot_output, status_display, model_details]
89
- )
90
-
91
- # Auto-process cuando se graba audio
92
- audio_input.change(
93
- fn=demo_app.process_audio_stream,
94
- inputs=[audio_input, model_a, model_b, threshold_slider],
95
- outputs=[plot_output, status_display, model_details]
96
- )
97
-
98
- gr.Markdown("""
99
- ### 🔬 **Research Context**
100
- This demonstration supports research in privacy-preserving audio datasets and real-time speech analysis.
101
- Original: https://github.com/gbibbo/vad_demo
102
- """)
103
-
104
- return interface
105
-
106
- # Initialize demo
107
- demo_app = VADDemo()
108
-
109
- # Create and launch interface
110
- if __name__ == "__main__":
111
- interface = create_interface()
112
- interface.queue(max_size=20)
113
-
114
- # Simplified launch for HF Spaces compatibility
115
- interface.launch(
116
- share=False, # HF Spaces maneja esto automáticamente
117
- debug=False,
118
- show_error=True
119
- )