End of transcription : Properly sends signal back to the endpoint
Browse files
whisperlivekit/web/live_transcription.html
CHANGED
|
@@ -308,6 +308,7 @@
|
|
| 308 |
let waveCtx = waveCanvas.getContext("2d");
|
| 309 |
let animationFrame = null;
|
| 310 |
let waitingForStop = false;
|
|
|
|
| 311 |
waveCanvas.width = 60 * (window.devicePixelRatio || 1);
|
| 312 |
waveCanvas.height = 30 * (window.devicePixelRatio || 1);
|
| 313 |
waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
|
|
@@ -357,18 +358,31 @@
|
|
| 357 |
|
| 358 |
websocket.onclose = () => {
|
| 359 |
if (userClosing) {
|
| 360 |
-
if (
|
| 361 |
-
statusText.textContent = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
}
|
| 363 |
-
|
|
|
|
| 364 |
} else {
|
| 365 |
-
statusText.textContent =
|
| 366 |
-
"Disconnected from the WebSocket server. (Check logs if model is loading.)";
|
| 367 |
if (isRecording) {
|
| 368 |
-
stopRecording();
|
| 369 |
}
|
| 370 |
}
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
};
|
| 373 |
|
| 374 |
websocket.onerror = () => {
|
|
@@ -382,24 +396,31 @@
|
|
| 382 |
|
| 383 |
// Check for status messages
|
| 384 |
if (data.type === "ready_to_stop") {
|
| 385 |
-
console.log("Ready to stop, closing WebSocket");
|
| 386 |
-
|
| 387 |
-
// signal that we are not waiting for stop anymore
|
| 388 |
waitingForStop = false;
|
| 389 |
-
recordButton.disabled = false; // this should be elsewhere
|
| 390 |
-
console.log("Record button enabled");
|
| 391 |
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
}
|
| 397 |
-
|
| 398 |
-
|
| 399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
return;
|
| 401 |
}
|
| 402 |
|
|
|
|
|
|
|
| 403 |
// Handle normal transcription updates
|
| 404 |
const {
|
| 405 |
lines = [],
|
|
@@ -414,13 +435,14 @@
|
|
| 414 |
buffer_diarization,
|
| 415 |
buffer_transcription,
|
| 416 |
remaining_time_diarization,
|
| 417 |
-
remaining_time_transcription
|
|
|
|
| 418 |
);
|
| 419 |
};
|
| 420 |
});
|
| 421 |
}
|
| 422 |
|
| 423 |
-
function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription) {
|
| 424 |
const linesHtml = lines.map((item, idx) => {
|
| 425 |
let timeInfo = "";
|
| 426 |
if (item.beg !== undefined && item.end !== undefined) {
|
|
@@ -430,30 +452,46 @@
|
|
| 430 |
let speakerLabel = "";
|
| 431 |
if (item.speaker === -2) {
|
| 432 |
speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
|
| 433 |
-
} else if (item.speaker == 0) {
|
| 434 |
speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'>${remaining_time_diarization} second(s) of audio are undergoing diarization</span></span>`;
|
| 435 |
} else if (item.speaker == -1) {
|
| 436 |
-
speakerLabel = `<span id="speaker"
|
| 437 |
-
} else if (item.speaker !== -1) {
|
| 438 |
speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
|
| 439 |
}
|
| 440 |
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
}
|
| 452 |
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
: `<p>${speakerLabel}<br/></p>`;
|
| 457 |
}).join("");
|
| 458 |
|
| 459 |
linesTranscriptDiv.innerHTML = linesHtml;
|
|
@@ -578,20 +616,6 @@
|
|
| 578 |
timerElement.textContent = "00:00";
|
| 579 |
startTime = null;
|
| 580 |
|
| 581 |
-
if (websocket && websocket.readyState === WebSocket.OPEN) {
|
| 582 |
-
try {
|
| 583 |
-
await websocket.send(JSON.stringify({
|
| 584 |
-
type: "stop",
|
| 585 |
-
message: "User stopped recording"
|
| 586 |
-
}));
|
| 587 |
-
statusText.textContent = "Recording stopped. Processing final audio...";
|
| 588 |
-
} catch (e) {
|
| 589 |
-
console.error("Could not send stop message:", e);
|
| 590 |
-
statusText.textContent = "Recording stopped. Error during final audio processing.";
|
| 591 |
-
websocket.close();
|
| 592 |
-
websocket = null;
|
| 593 |
-
}
|
| 594 |
-
}
|
| 595 |
|
| 596 |
isRecording = false;
|
| 597 |
updateUI();
|
|
@@ -625,19 +649,22 @@
|
|
| 625 |
|
| 626 |
function updateUI() {
|
| 627 |
recordButton.classList.toggle("recording", isRecording);
|
| 628 |
-
|
|
|
|
| 629 |
if (waitingForStop) {
|
| 630 |
-
statusText.textContent
|
| 631 |
-
|
| 632 |
-
|
| 633 |
} else if (isRecording) {
|
| 634 |
statusText.textContent = "Recording...";
|
| 635 |
-
recordButton.disabled = false;
|
| 636 |
-
console.log("Record button enabled");
|
| 637 |
} else {
|
| 638 |
-
statusText.textContent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
recordButton.disabled = false;
|
| 640 |
-
console.log("Record button enabled");
|
| 641 |
}
|
| 642 |
}
|
| 643 |
|
|
@@ -645,4 +672,4 @@
|
|
| 645 |
</script>
|
| 646 |
</body>
|
| 647 |
|
| 648 |
-
</html>
|
|
|
|
| 308 |
let waveCtx = waveCanvas.getContext("2d");
|
| 309 |
let animationFrame = null;
|
| 310 |
let waitingForStop = false;
|
| 311 |
+
let lastReceivedData = null;
|
| 312 |
waveCanvas.width = 60 * (window.devicePixelRatio || 1);
|
| 313 |
waveCanvas.height = 30 * (window.devicePixelRatio || 1);
|
| 314 |
waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
|
|
|
|
| 358 |
|
| 359 |
websocket.onclose = () => {
|
| 360 |
if (userClosing) {
|
| 361 |
+
if (waitingForStop) {
|
| 362 |
+
statusText.textContent = "Processing finalized or connection closed.";
|
| 363 |
+
if (lastReceivedData) {
|
| 364 |
+
renderLinesWithBuffer(
|
| 365 |
+
lastReceivedData.lines || [],
|
| 366 |
+
lastReceivedData.buffer_diarization || "",
|
| 367 |
+
lastReceivedData.buffer_transcription || "",
|
| 368 |
+
0, 0, true // isFinalizing = true
|
| 369 |
+
);
|
| 370 |
+
}
|
| 371 |
}
|
| 372 |
+
// If ready_to_stop was received, statusText is already "Finished processing..."
|
| 373 |
+
// and waitingForStop is false.
|
| 374 |
} else {
|
| 375 |
+
statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)";
|
|
|
|
| 376 |
if (isRecording) {
|
| 377 |
+
stopRecording();
|
| 378 |
}
|
| 379 |
}
|
| 380 |
+
isRecording = false;
|
| 381 |
+
waitingForStop = false;
|
| 382 |
+
userClosing = false;
|
| 383 |
+
lastReceivedData = null;
|
| 384 |
+
websocket = null;
|
| 385 |
+
updateUI();
|
| 386 |
};
|
| 387 |
|
| 388 |
websocket.onerror = () => {
|
|
|
|
| 396 |
|
| 397 |
// Check for status messages
|
| 398 |
if (data.type === "ready_to_stop") {
|
| 399 |
+
console.log("Ready to stop received, finalizing display and closing WebSocket.");
|
|
|
|
|
|
|
| 400 |
waitingForStop = false;
|
|
|
|
|
|
|
| 401 |
|
| 402 |
+
if (lastReceivedData) {
|
| 403 |
+
renderLinesWithBuffer(
|
| 404 |
+
lastReceivedData.lines || [],
|
| 405 |
+
lastReceivedData.buffer_diarization || "",
|
| 406 |
+
lastReceivedData.buffer_transcription || "",
|
| 407 |
+
0, // No more lag
|
| 408 |
+
0, // No more lag
|
| 409 |
+
true // isFinalizing = true
|
| 410 |
+
);
|
| 411 |
}
|
| 412 |
+
statusText.textContent = "Finished processing audio! Ready to record again.";
|
| 413 |
+
recordButton.disabled = false;
|
| 414 |
|
| 415 |
+
if (websocket) {
|
| 416 |
+
websocket.close(); // will trigger onclose
|
| 417 |
+
// websocket = null; // onclose handle setting websocket to null
|
| 418 |
+
}
|
| 419 |
return;
|
| 420 |
}
|
| 421 |
|
| 422 |
+
lastReceivedData = data;
|
| 423 |
+
|
| 424 |
// Handle normal transcription updates
|
| 425 |
const {
|
| 426 |
lines = [],
|
|
|
|
| 435 |
buffer_diarization,
|
| 436 |
buffer_transcription,
|
| 437 |
remaining_time_diarization,
|
| 438 |
+
remaining_time_transcription,
|
| 439 |
+
false // isFinalizing = false for normal updates
|
| 440 |
);
|
| 441 |
};
|
| 442 |
});
|
| 443 |
}
|
| 444 |
|
| 445 |
+
function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription, isFinalizing = false) {
|
| 446 |
const linesHtml = lines.map((item, idx) => {
|
| 447 |
let timeInfo = "";
|
| 448 |
if (item.beg !== undefined && item.end !== undefined) {
|
|
|
|
| 452 |
let speakerLabel = "";
|
| 453 |
if (item.speaker === -2) {
|
| 454 |
speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
|
| 455 |
+
} else if (item.speaker == 0 && !isFinalizing) {
|
| 456 |
speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'>${remaining_time_diarization} second(s) of audio are undergoing diarization</span></span>`;
|
| 457 |
} else if (item.speaker == -1) {
|
| 458 |
+
speakerLabel = `<span id="speaker">Speaker 1<span id='timeInfo'>${timeInfo}</span></span>`;
|
| 459 |
+
} else if (item.speaker !== -1 && item.speaker !== 0) {
|
| 460 |
speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
|
| 461 |
}
|
| 462 |
|
| 463 |
+
|
| 464 |
+
let currentLineText = item.text || "";
|
| 465 |
+
|
| 466 |
+
if (idx === lines.length - 1) {
|
| 467 |
+
if (!isFinalizing) {
|
| 468 |
+
if (remaining_time_transcription > 0) {
|
| 469 |
+
speakerLabel += `<span class="label_transcription"><span class="spinner"></span>Transcription lag <span id='timeInfo'>${remaining_time_transcription}s</span></span>`;
|
| 470 |
+
}
|
| 471 |
+
if (buffer_diarization && remaining_time_diarization > 0) {
|
| 472 |
+
speakerLabel += `<span class="label_diarization"><span class="spinner"></span>Diarization lag<span id='timeInfo'>${remaining_time_diarization}s</span></span>`;
|
| 473 |
+
}
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
if (buffer_diarization) {
|
| 477 |
+
if (isFinalizing) {
|
| 478 |
+
currentLineText += (currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim();
|
| 479 |
+
} else {
|
| 480 |
+
currentLineText += `<span class="buffer_diarization">${buffer_diarization}</span>`;
|
| 481 |
+
}
|
| 482 |
+
}
|
| 483 |
+
if (buffer_transcription) {
|
| 484 |
+
if (isFinalizing) {
|
| 485 |
+
currentLineText += (currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") + buffer_transcription.trim();
|
| 486 |
+
} else {
|
| 487 |
+
currentLineText += `<span class="buffer_transcription">${buffer_transcription}</span>`;
|
| 488 |
+
}
|
| 489 |
+
}
|
| 490 |
}
|
| 491 |
|
| 492 |
+
return currentLineText.trim().length > 0 || speakerLabel.length > 0
|
| 493 |
+
? `<p>${speakerLabel}<br/><div class='textcontent'>${currentLineText}</div></p>`
|
| 494 |
+
: `<p>${speakerLabel}<br/></p>`;
|
|
|
|
| 495 |
}).join("");
|
| 496 |
|
| 497 |
linesTranscriptDiv.innerHTML = linesHtml;
|
|
|
|
| 616 |
timerElement.textContent = "00:00";
|
| 617 |
startTime = null;
|
| 618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
|
| 620 |
isRecording = false;
|
| 621 |
updateUI();
|
|
|
|
| 649 |
|
| 650 |
function updateUI() {
|
| 651 |
recordButton.classList.toggle("recording", isRecording);
|
| 652 |
+
recordButton.disabled = waitingForStop;
|
| 653 |
+
|
| 654 |
if (waitingForStop) {
|
| 655 |
+
if (statusText.textContent !== "Recording stopped. Processing final audio...") {
|
| 656 |
+
statusText.textContent = "Please wait for processing to complete...";
|
| 657 |
+
}
|
| 658 |
} else if (isRecording) {
|
| 659 |
statusText.textContent = "Recording...";
|
|
|
|
|
|
|
| 660 |
} else {
|
| 661 |
+
if (statusText.textContent !== "Finished processing audio! Ready to record again." &&
|
| 662 |
+
statusText.textContent !== "Processing finalized or connection closed.") {
|
| 663 |
+
statusText.textContent = "Click to start transcription";
|
| 664 |
+
}
|
| 665 |
+
}
|
| 666 |
+
if (!waitingForStop) {
|
| 667 |
recordButton.disabled = false;
|
|
|
|
| 668 |
}
|
| 669 |
}
|
| 670 |
|
|
|
|
| 672 |
</script>
|
| 673 |
</body>
|
| 674 |
|
| 675 |
+
</html>
|