Gabriel Bibb贸
commited on
Commit
路
ad332f8
1
Parent(s):
ee24bdc
adjust app.py
Browse files
app.py
CHANGED
|
@@ -860,7 +860,7 @@ class AudioProcessor:
|
|
| 860 |
hop_size = int(self.sample_rate * self.base_hop)
|
| 861 |
|
| 862 |
energy_signal = []
|
| 863 |
-
for i in range(0, len(audio_data) - window_size, hop_size):
|
| 864 |
window = audio_data[i:i + window_size]
|
| 865 |
energy = np.sum(window ** 2)
|
| 866 |
energy_signal.append(energy)
|
|
@@ -874,7 +874,7 @@ class AudioProcessor:
|
|
| 874 |
vad_times = np.array([r.timestamp for r in vad_results])
|
| 875 |
vad_probs = np.array([r.probability for r in vad_results])
|
| 876 |
|
| 877 |
-
energy_times = np.arange(len(energy_signal)) * self.base_hop
|
| 878 |
vad_interp = np.interp(energy_times, vad_times, vad_probs)
|
| 879 |
vad_interp = (vad_interp - np.mean(vad_interp)) / (np.std(vad_interp) + 1e-8)
|
| 880 |
|
|
@@ -883,9 +883,9 @@ class AudioProcessor:
|
|
| 883 |
delay_samples = np.argmax(correlation) - len(vad_interp) + 1
|
| 884 |
delay_seconds = delay_samples * self.base_hop
|
| 885 |
|
| 886 |
-
max_corr = np.max(correlation) / (len(vad_interp) * np.std(energy_signal) * np.std(vad_interp))
|
| 887 |
-
if
|
| 888 |
-
|
| 889 |
|
| 890 |
return self.delay_compensation
|
| 891 |
|
|
@@ -1290,7 +1290,7 @@ class VADDemo:
|
|
| 1290 |
delay = self.processor.estimate_delay_compensation(processed_audio, model_results)
|
| 1291 |
model_delays[model_name] = delay
|
| 1292 |
for r in model_results:
|
| 1293 |
-
r.timestamp
|
| 1294 |
debug_info.append(f" Delay compensation = {delay:.3f}s applied to {model_name} timestamps")
|
| 1295 |
|
| 1296 |
# Compute total duration
|
|
@@ -1472,4 +1472,4 @@ demo_app = VADDemo()
|
|
| 1472 |
# Create and launch interface
|
| 1473 |
if __name__ == "__main__":
|
| 1474 |
interface = create_interface()
|
| 1475 |
-
interface.launch(share=True, debug=False)
|
|
|
|
| 860 |
hop_size = int(self.sample_rate * self.base_hop)
|
| 861 |
|
| 862 |
energy_signal = []
|
| 863 |
+
for i in range(0, len(audio_data) - window_size + 1, hop_size):
|
| 864 |
window = audio_data[i:i + window_size]
|
| 865 |
energy = np.sum(window ** 2)
|
| 866 |
energy_signal.append(energy)
|
|
|
|
| 874 |
vad_times = np.array([r.timestamp for r in vad_results])
|
| 875 |
vad_probs = np.array([r.probability for r in vad_results])
|
| 876 |
|
| 877 |
+
energy_times = np.arange(len(energy_signal)) * self.base_hop + self.base_window / 2
|
| 878 |
vad_interp = np.interp(energy_times, vad_times, vad_probs)
|
| 879 |
vad_interp = (vad_interp - np.mean(vad_interp)) / (np.std(vad_interp) + 1e-8)
|
| 880 |
|
|
|
|
| 883 |
delay_samples = np.argmax(correlation) - len(vad_interp) + 1
|
| 884 |
delay_seconds = delay_samples * self.base_hop
|
| 885 |
|
| 886 |
+
max_corr = np.max(correlation) / (len(vad_interp) * np.std(energy_signal) * np.std(vad_interp) + 1e-8)
|
| 887 |
+
# Removed the if condition, always apply
|
| 888 |
+
self.delay_compensation = np.clip(delay_seconds, -1.0, 1.0)
|
| 889 |
|
| 890 |
return self.delay_compensation
|
| 891 |
|
|
|
|
| 1290 |
delay = self.processor.estimate_delay_compensation(processed_audio, model_results)
|
| 1291 |
model_delays[model_name] = delay
|
| 1292 |
for r in model_results:
|
| 1293 |
+
r.timestamp += delay
|
| 1294 |
debug_info.append(f" Delay compensation = {delay:.3f}s applied to {model_name} timestamps")
|
| 1295 |
|
| 1296 |
# Compute total duration
|
|
|
|
| 1472 |
# Create and launch interface
|
| 1473 |
if __name__ == "__main__":
|
| 1474 |
interface = create_interface()
|
| 1475 |
+
interface.launch(share=True, debug=False)
|