correct error when using VAC
Browse files
whisperlivekit/whisper_streaming_custom/online_asr.py
CHANGED
|
@@ -343,15 +343,15 @@ class OnlineASRProcessor:
|
|
| 343 |
)
|
| 344 |
sentences.append(sentence)
|
| 345 |
return sentences
|
| 346 |
-
|
|
|
|
| 347 |
"""
|
| 348 |
Flush the remaining transcript when processing ends.
|
| 349 |
"""
|
| 350 |
remaining_tokens = self.transcript_buffer.buffer
|
| 351 |
-
|
| 352 |
-
logger.debug(f"Final non-committed transcript: {final_transcript}")
|
| 353 |
self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE
|
| 354 |
-
return
|
| 355 |
|
| 356 |
def concatenate_tokens(
|
| 357 |
self,
|
|
@@ -384,7 +384,8 @@ class VACOnlineASRProcessor:
|
|
| 384 |
def __init__(self, online_chunk_size: float, *args, **kwargs):
|
| 385 |
self.online_chunk_size = online_chunk_size
|
| 386 |
self.online = OnlineASRProcessor(*args, **kwargs)
|
| 387 |
-
|
|
|
|
| 388 |
# Load a VAD model (e.g. Silero VAD)
|
| 389 |
import torch
|
| 390 |
model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
|
|
@@ -455,7 +456,7 @@ class VACOnlineASRProcessor:
|
|
| 455 |
self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE)
|
| 456 |
self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]
|
| 457 |
|
| 458 |
-
def process_iter(self) ->
|
| 459 |
"""
|
| 460 |
Depending on the VAD status and the amount of accumulated audio,
|
| 461 |
process the current audio chunk.
|
|
@@ -467,9 +468,9 @@ class VACOnlineASRProcessor:
|
|
| 467 |
return self.online.process_iter()
|
| 468 |
else:
|
| 469 |
logger.debug("No online update, only VAD")
|
| 470 |
-
return
|
| 471 |
|
| 472 |
-
def finish(self) ->
|
| 473 |
"""Finish processing by flushing any remaining text."""
|
| 474 |
result = self.online.finish()
|
| 475 |
self.current_online_chunk_buffer_size = 0
|
|
@@ -480,4 +481,4 @@ class VACOnlineASRProcessor:
|
|
| 480 |
"""
|
| 481 |
Get the unvalidated buffer in string format.
|
| 482 |
"""
|
| 483 |
-
return self.online.concatenate_tokens(self.online.transcript_buffer.buffer)
|
|
|
|
| 343 |
)
|
| 344 |
sentences.append(sentence)
|
| 345 |
return sentences
|
| 346 |
+
|
| 347 |
+
def finish(self) -> List[ASRToken]:
|
| 348 |
"""
|
| 349 |
Flush the remaining transcript when processing ends.
|
| 350 |
"""
|
| 351 |
remaining_tokens = self.transcript_buffer.buffer
|
| 352 |
+
logger.debug(f"Final non-committed tokens: {remaining_tokens}")
|
|
|
|
| 353 |
self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE
|
| 354 |
+
return remaining_tokens
|
| 355 |
|
| 356 |
def concatenate_tokens(
|
| 357 |
self,
|
|
|
|
| 384 |
def __init__(self, online_chunk_size: float, *args, **kwargs):
|
| 385 |
self.online_chunk_size = online_chunk_size
|
| 386 |
self.online = OnlineASRProcessor(*args, **kwargs)
|
| 387 |
+
self.asr = self.online.asr
|
| 388 |
+
|
| 389 |
# Load a VAD model (e.g. Silero VAD)
|
| 390 |
import torch
|
| 391 |
model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
|
|
|
|
| 456 |
self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE)
|
| 457 |
self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]
|
| 458 |
|
| 459 |
+
def process_iter(self) -> List[ASRToken]:
|
| 460 |
"""
|
| 461 |
Depending on the VAD status and the amount of accumulated audio,
|
| 462 |
process the current audio chunk.
|
|
|
|
| 468 |
return self.online.process_iter()
|
| 469 |
else:
|
| 470 |
logger.debug("No online update, only VAD")
|
| 471 |
+
return []
|
| 472 |
|
| 473 |
+
def finish(self) -> List[ASRToken]:
|
| 474 |
"""Finish processing by flushing any remaining text."""
|
| 475 |
result = self.online.finish()
|
| 476 |
self.current_online_chunk_buffer_size = 0
|
|
|
|
| 481 |
"""
|
| 482 |
Get the unvalidated buffer in string format.
|
| 483 |
"""
|
| 484 |
+
return self.online.concatenate_tokens(self.online.transcript_buffer.buffer)
|