Gabriel Bibb贸 commited on
Commit
ad332f8
1 Parent(s): ee24bdc

adjust app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -860,7 +860,7 @@ class AudioProcessor:
860
  hop_size = int(self.sample_rate * self.base_hop)
861
 
862
  energy_signal = []
863
- for i in range(0, len(audio_data) - window_size, hop_size):
864
  window = audio_data[i:i + window_size]
865
  energy = np.sum(window ** 2)
866
  energy_signal.append(energy)
@@ -874,7 +874,7 @@ class AudioProcessor:
874
  vad_times = np.array([r.timestamp for r in vad_results])
875
  vad_probs = np.array([r.probability for r in vad_results])
876
 
877
- energy_times = np.arange(len(energy_signal)) * self.base_hop
878
  vad_interp = np.interp(energy_times, vad_times, vad_probs)
879
  vad_interp = (vad_interp - np.mean(vad_interp)) / (np.std(vad_interp) + 1e-8)
880
 
@@ -883,9 +883,9 @@ class AudioProcessor:
883
  delay_samples = np.argmax(correlation) - len(vad_interp) + 1
884
  delay_seconds = delay_samples * self.base_hop
885
 
886
- max_corr = np.max(correlation) / (len(vad_interp) * np.std(energy_signal) * np.std(vad_interp))
887
- if max_corr > self.correlation_threshold:
888
- self.delay_compensation = np.clip(delay_seconds, -0.1, 0.1)
889
 
890
  return self.delay_compensation
891
 
@@ -1290,7 +1290,7 @@ class VADDemo:
1290
  delay = self.processor.estimate_delay_compensation(processed_audio, model_results)
1291
  model_delays[model_name] = delay
1292
  for r in model_results:
1293
- r.timestamp -= delay
1294
  debug_info.append(f" Delay compensation = {delay:.3f}s applied to {model_name} timestamps")
1295
 
1296
  # Compute total duration
@@ -1472,4 +1472,4 @@ demo_app = VADDemo()
1472
  # Create and launch interface
1473
  if __name__ == "__main__":
1474
  interface = create_interface()
1475
- interface.launch(share=True, debug=False)
 
860
  hop_size = int(self.sample_rate * self.base_hop)
861
 
862
  energy_signal = []
863
+ for i in range(0, len(audio_data) - window_size + 1, hop_size):
864
  window = audio_data[i:i + window_size]
865
  energy = np.sum(window ** 2)
866
  energy_signal.append(energy)
 
874
  vad_times = np.array([r.timestamp for r in vad_results])
875
  vad_probs = np.array([r.probability for r in vad_results])
876
 
877
+ energy_times = np.arange(len(energy_signal)) * self.base_hop + self.base_window / 2
878
  vad_interp = np.interp(energy_times, vad_times, vad_probs)
879
  vad_interp = (vad_interp - np.mean(vad_interp)) / (np.std(vad_interp) + 1e-8)
880
 
 
883
  delay_samples = np.argmax(correlation) - len(vad_interp) + 1
884
  delay_seconds = delay_samples * self.base_hop
885
 
886
+ max_corr = np.max(correlation) / (len(vad_interp) * np.std(energy_signal) * np.std(vad_interp) + 1e-8)
887
+ # Removed the if condition, always apply
888
+ self.delay_compensation = np.clip(delay_seconds, -1.0, 1.0)
889
 
890
  return self.delay_compensation
891
 
 
1290
  delay = self.processor.estimate_delay_compensation(processed_audio, model_results)
1291
  model_delays[model_name] = delay
1292
  for r in model_results:
1293
+ r.timestamp += delay
1294
  debug_info.append(f" Delay compensation = {delay:.3f}s applied to {model_name} timestamps")
1295
 
1296
  # Compute total duration
 
1472
  # Create and launch interface
1473
  if __name__ == "__main__":
1474
  interface = create_interface()
1475
+ interface.launch(share=True, debug=False)