prepend first section of audio while detecting speech (was cut before)

2023-03-07 11:54:56 +02:00
parent 6429852505
commit 5354d8c328
2 changed files with 53 additions and 10 deletions
--- a/web/client.html
+++ b/web/client.html
@ -51,7 +51,7 @@
        let isSpeaking = false;
        let soundDetected = false;
        let speakingCount = 0;
-        let SILENCE_DELAY_MS = 100;
+        let SILENCE_DELAY_MS = 50; //was 100 with good results
        const volumeHistory = [];
        let canvas = document.getElementById("canvas");
@ -100,9 +100,9 @@
            if (averageVolume > threshold) {
                if (autosend.checked && speakingCount == 0 && audioRecorder) {
                    console.log("startint new recording");
                    soundDetected = false;
                    audioRecorder.stop();
                    audioRecorder.start();
                    soundDetected = true;
                }
                speakingCount++;
                if (speakingCount > 7) {
@ -111,13 +111,14 @@
                    isSpeaking = true;
                    console.log("Was silent and is now speaking. (" + averageVolume + " averageVolume).");
                }
-            } else {
+            } else if (averageVolume - 5 < threshold) {
                speakingCount = 0;
                if (isSpeaking) {
                    silenceCount++;
-                    if (silenceCount > 5) {
+                    if (silenceCount > 3) {
                        if (autosend.checked) {
                            console.log("Was speakng and is now silence. (" + averageVolume + " averageVolume). Sending audio to server.");
                            soundDetected = true;
                            audioRecorder.stop();
                            audioRecorder.start();
                        }
@ -182,7 +183,6 @@
                });
        };
        function onmessage(event) {
            let latency = Date.now() - serverTime;
            console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
@ -197,6 +197,10 @@
            }
        }
        const preBufferDuration = 500 ; // duration of pre-buffer in ms
        function startListening() {
            // Initialize canvas
            canvasCtx.fillStyle = "green";
@ -204,14 +208,39 @@
            navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, echoCancellation: true } }).then((stream) => {
                audioRecorder = new MediaRecorder(stream);
                audioRecorder.start();
                console.log("Started listening to microphone (sample rate: " + stream.getAudioTracks()[0].getSettings().sampleRate + " Hz, echoCancellation: " + stream.getAudioTracks()[0].getSettings().echoCancellation + ", " +audioRecorder.mimeType.audioChannels + " channels)");
                //const preBufferLength = Math.ceil(preBufferDuration * audioRecorder.mimeType.audioSampleRate * audioRecorder.mimeType.audioChannels);
                const preBufferLength = Math.ceil(preBufferDuration  * 48000 * 2 );
                var preBuffer = [];
                audioRecorder.addEventListener("dataavailable", (event) => {
                    console.log("Audio data available: " + event.data.size + " bytes");
                    if (!soundDetected && autosend.checked) {
                        console.log("discarding audio data because not speaking");
                        //store last 100ms of audio data
                        preBuffer = [];
                        const audioData = event.data;
                        //const start = audioBlob.size - preBufferLength; // calculate offset to trim pre-buffer
                        //const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/wav" });
                        //const end = audioBlob.size;
                        //const slicedAudio = audioBlob.slice(start, end);
                        const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
                        const start = audioBlob.size - 500 * 48 * 2 / 8; // Assuming 48 kHz sampling rate and 16-bit PCM audio
                        const end = audioBlob.size;
                        const slicedAudio = audioBlob.slice(start, end);
                        preBuffer = slicedAudio;
                        //sendAudioToServer(event.data);
                        return;
                    }
                    if (event.data.size > 0) {
-                        sendAudioToServer(event.data);
+                        let data = event.data;
                        if (preBuffer.length > 0) {
                            const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
                            const newEventChunk = new Blob([audioBlob, preBuffer], { type: "audio/ogg; codecs=opus" });
                            data = newEventChunk;
                        }
                        sendAudioToServer(data);
                        soundDetected = false;
                    }
                });
@ -223,6 +252,21 @@
            recordButton.classList.add("recording");
        }
        function getlast500ms(audioData, preBuffer) {
            const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
            const start = audioBlob.size - 500 * 48000  * 2 / 8; // Assuming 48 kHz sampling rate and 16-bit PCM audio
            const end = audioBlob.size;
            const slicedAudio = audioBlob.slice(start, end);
            // Create a new Blob object with the remaining audio data
            const remainingAudio = audioBlob.slice(0, start);
            const newEvent = new Blob([remainingAudio], { type: "audio/ogg; codecs=opus" });
            // Replace the original event data with the new event data
            const newEventChunk = new Blob([newEvent, slicedAudio], { type: "audio/ogg; codecs=opus" });
            return newEventChunk;
          }
        function stopListening() {
            recording = false;
            audioRecorder.stop();
@ -262,7 +306,7 @@
                        return device.kind === 'audioinput';
                    });
                    console.log(audioInputDevices.length + ' audio input devices found');
-                    
+
                    // If more than one audio input device is available, populate the select list
                    if (audioInputDevices.length > 1) {
                        audioInputDevices.forEach(function (device) {
@ -312,7 +356,7 @@
            //transcription = document.getElementById("transcription");
            //autosend = document.getElementById("autosend");
            statusRecording = document.getElementById("status-recording");
-            
+
            enumerateDevices();
            connect(socket);
        };
--- a/web/server.js
+++ b/web/server.js
@ -35,7 +35,7 @@ wss.on('connection', (ws) => {
            }
        };
-
+        //"yyyymmdd-hhMMss"
        var timestampfilename = Date.now("yyyymmdd-hhMMss");
        //save the audio data to a file to /rec subfolder 
@ -85,7 +85,6 @@ app.get('/wsurl', (req, res) => {
    res.send(process.env.WS_URL, 200, { 'Content-Type': 'text/plain' });
 });
 app.listen(8080, () => {
    console.log('Server listening on port 8080');
 });