prepend first section of audio while detecting speech (was cut before)

This commit is contained in:
Dobromir Popov 2023-03-07 11:54:56 +02:00
parent 6429852505
commit 5354d8c328
2 changed files with 53 additions and 10 deletions

View File

@ -51,7 +51,7 @@
let isSpeaking = false; let isSpeaking = false;
let soundDetected = false; let soundDetected = false;
let speakingCount = 0; let speakingCount = 0;
let SILENCE_DELAY_MS = 100; let SILENCE_DELAY_MS = 50; //was 100 with good results
const volumeHistory = []; const volumeHistory = [];
let canvas = document.getElementById("canvas"); let canvas = document.getElementById("canvas");
@ -100,9 +100,9 @@
if (averageVolume > threshold) { if (averageVolume > threshold) {
if (autosend.checked && speakingCount == 0 && audioRecorder) { if (autosend.checked && speakingCount == 0 && audioRecorder) {
console.log("startint new recording"); console.log("startint new recording");
soundDetected = false;
audioRecorder.stop(); audioRecorder.stop();
audioRecorder.start(); audioRecorder.start();
soundDetected = true;
} }
speakingCount++; speakingCount++;
if (speakingCount > 7) { if (speakingCount > 7) {
@ -111,13 +111,14 @@
isSpeaking = true; isSpeaking = true;
console.log("Was silent and is now speaking. (" + averageVolume + " averageVolume)."); console.log("Was silent and is now speaking. (" + averageVolume + " averageVolume).");
} }
} else { } else if (averageVolume - 5 < threshold) {
speakingCount = 0; speakingCount = 0;
if (isSpeaking) { if (isSpeaking) {
silenceCount++; silenceCount++;
if (silenceCount > 5) { if (silenceCount > 3) {
if (autosend.checked) { if (autosend.checked) {
console.log("Was speakng and is now silence. (" + averageVolume + " averageVolume). Sending audio to server."); console.log("Was speakng and is now silence. (" + averageVolume + " averageVolume). Sending audio to server.");
soundDetected = true;
audioRecorder.stop(); audioRecorder.stop();
audioRecorder.start(); audioRecorder.start();
} }
@ -182,7 +183,6 @@
}); });
}; };
function onmessage(event) { function onmessage(event) {
let latency = Date.now() - serverTime; let latency = Date.now() - serverTime;
console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)"); console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
@ -197,6 +197,10 @@
} }
} }
const preBufferDuration = 500 ; // duration of pre-buffer in ms
function startListening() { function startListening() {
// Initialize canvas // Initialize canvas
canvasCtx.fillStyle = "green"; canvasCtx.fillStyle = "green";
@ -204,14 +208,39 @@
navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, echoCancellation: true } }).then((stream) => { navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, echoCancellation: true } }).then((stream) => {
audioRecorder = new MediaRecorder(stream); audioRecorder = new MediaRecorder(stream);
audioRecorder.start(); audioRecorder.start();
console.log("Started listening to microphone (sample rate: " + stream.getAudioTracks()[0].getSettings().sampleRate + " Hz, echoCancellation: " + stream.getAudioTracks()[0].getSettings().echoCancellation + ", " +audioRecorder.mimeType.audioChannels + " channels)");
//const preBufferLength = Math.ceil(preBufferDuration * audioRecorder.mimeType.audioSampleRate * audioRecorder.mimeType.audioChannels);
const preBufferLength = Math.ceil(preBufferDuration * 48000 * 2 );
var preBuffer = [];
audioRecorder.addEventListener("dataavailable", (event) => { audioRecorder.addEventListener("dataavailable", (event) => {
console.log("Audio data available: " + event.data.size + " bytes"); console.log("Audio data available: " + event.data.size + " bytes");
if (!soundDetected && autosend.checked) { if (!soundDetected && autosend.checked) {
console.log("discarding audio data because not speaking"); console.log("discarding audio data because not speaking");
//store last 100ms of audio data
preBuffer = [];
const audioData = event.data;
//const start = audioBlob.size - preBufferLength; // calculate offset to trim pre-buffer
//const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/wav" });
//const end = audioBlob.size;
//const slicedAudio = audioBlob.slice(start, end);
const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
const start = audioBlob.size - 500 * 48 * 2 / 8; // Assuming 48 kHz sampling rate and 16-bit PCM audio
const end = audioBlob.size;
const slicedAudio = audioBlob.slice(start, end);
preBuffer = slicedAudio;
//sendAudioToServer(event.data);
return; return;
} }
if (event.data.size > 0) { if (event.data.size > 0) {
sendAudioToServer(event.data); let data = event.data;
if (preBuffer.length > 0) {
const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
const newEventChunk = new Blob([audioBlob, preBuffer], { type: "audio/ogg; codecs=opus" });
data = newEventChunk;
}
sendAudioToServer(data);
soundDetected = false; soundDetected = false;
} }
}); });
@ -223,6 +252,21 @@
recordButton.classList.add("recording"); recordButton.classList.add("recording");
} }
function getlast500ms(audioData, preBuffer) {
const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
const start = audioBlob.size - 500 * 48000 * 2 / 8; // Assuming 48 kHz sampling rate and 16-bit PCM audio
const end = audioBlob.size;
const slicedAudio = audioBlob.slice(start, end);
// Create a new Blob object with the remaining audio data
const remainingAudio = audioBlob.slice(0, start);
const newEvent = new Blob([remainingAudio], { type: "audio/ogg; codecs=opus" });
// Replace the original event data with the new event data
const newEventChunk = new Blob([newEvent, slicedAudio], { type: "audio/ogg; codecs=opus" });
return newEventChunk;
}
function stopListening() { function stopListening() {
recording = false; recording = false;
audioRecorder.stop(); audioRecorder.stop();
@ -262,7 +306,7 @@
return device.kind === 'audioinput'; return device.kind === 'audioinput';
}); });
console.log(audioInputDevices.length + ' audio input devices found'); console.log(audioInputDevices.length + ' audio input devices found');
// If more than one audio input device is available, populate the select list // If more than one audio input device is available, populate the select list
if (audioInputDevices.length > 1) { if (audioInputDevices.length > 1) {
audioInputDevices.forEach(function (device) { audioInputDevices.forEach(function (device) {
@ -312,7 +356,7 @@
//transcription = document.getElementById("transcription"); //transcription = document.getElementById("transcription");
//autosend = document.getElementById("autosend"); //autosend = document.getElementById("autosend");
statusRecording = document.getElementById("status-recording"); statusRecording = document.getElementById("status-recording");
enumerateDevices(); enumerateDevices();
connect(socket); connect(socket);
}; };

View File

@ -35,7 +35,7 @@ wss.on('connection', (ws) => {
} }
}; };
//"yyyymmdd-hhMMss"
var timestampfilename = Date.now("yyyymmdd-hhMMss"); var timestampfilename = Date.now("yyyymmdd-hhMMss");
//save the audio data to a file to /rec subfolder //save the audio data to a file to /rec subfolder
@ -85,7 +85,6 @@ app.get('/wsurl', (req, res) => {
res.send(process.env.WS_URL, 200, { 'Content-Type': 'text/plain' }); res.send(process.env.WS_URL, 200, { 'Content-Type': 'text/plain' });
}); });
app.listen(8080, () => { app.listen(8080, () => {
console.log('Server listening on port 8080'); console.log('Server listening on port 8080');
}); });