gogo2/web/client.html

<!DOCTYPE html>
<html>

<head>
    <title>Real-time Speech-to-Text</title>
    <style>
        .recording {
            background-color: red;
            color: white;
        }
    </style>
</head>

<body>
    <h1>Rt STT</h1>
    <label class="toggle">
        <input type="checkbox"
            id="autosend" />
        <span class="slider">Continious</span>
    </label>
    <select id="input-devices">
        <option value="default">Default</option>
    </select>

    <button id="record-button"
        disabled>Start Recording</button>
    <span id="connection-status"></span>

    <div id="status-recording"></div>
    <p id="transcription"></p>

    <canvas id="canvas"
        width="500"
        height="500"></canvas>
    <script>
        let selectedDeviceId = "default";
        let socket;
        let audioRecorder;
        let recording = false;
        let recordButton;
        let connected = false;
        let connectionStatus; //HTML auto generated
        let statusRecording; //HTML auto generated
        let audioContext;
        let serverTime;

        let volumeChecker;
        let lastVolumes = new Array(5);
        let averageVolume;
        let silenceCount = 0;
        let isSpeaking = false;
        let soundDetected = false;
        let speakingCount = 0;
        let SILENCE_DELAY_MS = 100;
        const volumeHistory = [];

        let canvas = document.getElementById("canvas");
        let canvasCtx = canvas.getContext("2d");
        let barWidth = 10;
        let barSpacing = 5;

        // Draw sliding bar graph
        function drawSlidingBarGraph(lastVolumes) {
            canvasCtx.clearRect(0, 0, canvas.width, canvas.height);
            // Draw bars
            for (let i = 0; i < lastVolumes.length; i++) {
                let value = lastVolumes[i];
                let barHeight = (value / 255) * canvas.height;
                let x = i * (barWidth + barSpacing);
                let y = canvas.height - barHeight;
                canvasCtx.fillRect(x, y, barWidth, barHeight);
            }
        }

        // Check the audio level every SILENCE_DELAY_MS milliseconds
        volumeChecker = setInterval(() => {
            if (!audioContext) {
                console.log("No audio context");
                return;
            }
            const frequencyData = new Uint8Array(analyser.frequencyBinCount);
            //analyser.getByteTimeDomainData(dataArray);//history
            analyser.getByteFrequencyData(frequencyData); //current

            let totalVolume = 0;
            for (let i = 0; i < frequencyData.length; i++) {
                totalVolume += frequencyData[i];
            }
            averageVolume = totalVolume / frequencyData.length;

            volumeHistory.push(averageVolume);
            if (volumeHistory.length > 100) {
                volumeHistory.shift();
            }

            const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5;
            const isSilent = averageVolume < threshold;

            // count speaking and silence
            if (averageVolume > threshold) {
                if (autosend.checked && speakingCount == 0 && audioRecorder) {
                    console.log("startint new recording");
                    audioRecorder.stop();
                    audioRecorder.start();
                    soundDetected = true;
                }
                speakingCount++;
                if (speakingCount > 7) {
                    statusRecording.innerHTML = "Listening...";
                    statusRecording.style.color = "green";
                    isSpeaking = true;
                    console.log("Was silent and is now speaking. (" + averageVolume + " averageVolume).");
                }
            } else {
                speakingCount = 0;
                if (isSpeaking) {
                    silenceCount++;
                    if (silenceCount > 5) {
                        if (autosend.checked) {
                            console.log("Was speakng and is now silence. (" + averageVolume + " averageVolume). Sending audio to server.");
                            audioRecorder.stop();
                            audioRecorder.start();
                        }
                        isSpeaking = false;
                        statusRecording.innerHTML = "Silence detected...";
                        statusRecording.style.color = "orange";
                    }
                }
            }

            //console.log(`Average volume: ${averageVolume}, isSilent: ${isSilent}, threshold: ${threshold}`);
            //drawSlidingBarGraph(lastVolumes);
        }, SILENCE_DELAY_MS);

        function InitAudioAnalyser(stream) {
            // Initialize the Web Audio API
            audioContext = new AudioContext();
            const source = audioContext.createMediaStreamSource(stream);
            analyser = audioContext.createAnalyser();
            //analyser.fftSize = 32;
            analyser.fftSize = 2048;
            analyser.smoothingTimeConstant = 0.8;
            source.connect(analyser);
            console.log("Audio context initialized. analyser sampling: " + audioContext.sampleRate + "Hz, recorder sampling: " + audioRecorder.sampleRate + "Hz");
        }

        function connect() {
            connectionStatus.innerHTML = "Connecting to WS...";
            let wsurl = "ws://localhost:8081";
            //get crrent ws url from the server
            fetch("/wsurl")
                .then((response) => response.text())
                .then((data) => {
                    wsurl = data;
                    console.log("Got ws url: '" + wsurl + "'");

                })
                .then(() => {
                    console.log("connecting to '" + wsurl + "'...");
                    socket = new WebSocket(wsurl);
                    socket.onopen = () => {
                        console.log("WebSocket connection opened.");
                        connectionStatus.innerHTML = "Connected to " + wsurl;
                        transcription.innerHTML = "Whisper something into the microphone...";
                        recordButton.disabled = false;
                        connected = true;
                    };
                    socket.onmessage = onmessage;
                    socket.onclose = () => {
                        console.log("WebSocket connection closed");
                        connectionStatus.innerHTML = "Disconnected";
                        recordButton.disabled = true;
                        connected = false;
                        setTimeout(() => {
                            connect();
                        }, 5000);
                    };
                })
                .catch((error) => {
                    console.log("Error getting ws url: " + error);
                    connectionStatus.innerHTML = "Error getting ws url: " + error;
                });
        };


        function onmessage(event) {
            let latency = Date.now() - serverTime;
            console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
            if (autosend.checked) {
                //append to the text on new line
                transcription.innerHTML += "<br>>" + event.data;
                statusRecording.innerHTML = "waiting...";
                statusRecording.style.color = "black";
            } else {
                //replace the text
                transcription.innerHTML = event.data;
            }
        }

        function startListening() {
            // Initialize canvas
            canvasCtx.fillStyle = "green";
            recording = true;
            navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, echoCancellation: true } }).then((stream) => {
                audioRecorder = new MediaRecorder(stream);
                audioRecorder.start();
                audioRecorder.addEventListener("dataavailable", (event) => {
                    console.log("Audio data available: " + event.data.size + " bytes");
                    if (!soundDetected && autosend.checked) {
                        console.log("discarding audio data because not speaking");
                        return;
                    }
                    if (event.data.size > 0) {
                        sendAudioToServer(event.data);
                        soundDetected = false;
                    }
                });

                InitAudioAnalyser(stream);
            });

            recordButton.innerHTML = "Stop Recording";
            recordButton.classList.add("recording");
        }

        function stopListening() {
            recording = false;
            audioRecorder.stop();
            recordButton.innerHTML = "Start Recording";
            recordButton.classList.remove("recording");
            clearInterval(volumeChecker);
        }

        function sendAudioToServer(data) {
            if (connected) {
                //const blob = new Blob(data, { type: 'audio/webm' });
                socket.send(data);
                serverTime = Date.now();
                console.log("Sent some audio data to server.");
                if (!autosend.checked) {
                    transcription.innerHTML = "Processing audio...";
                }
            } else {
                console.log("Not connected, not sending audio data to server.");
            }
        }
        function toggleListening() {
            if (socket.readyState === WebSocket.OPEN) {
                if (recording) {
                    stopListening();
                } else {
                    startListening();
                }
            }
        }

        function enumerateDevices() {
            // Enumerate the available audio input devices
            navigator.mediaDevices.enumerateDevices()
                .then(function (devices) {
                    var audioInputDevices = devices.filter(function (device) {
                        return device.kind === 'audioinput';
                    });
                    console.log(audioInputDevices.length + ' audio input devices found');

                    // If more than one audio input device is available, populate the select list
                    if (audioInputDevices.length > 1) {
                        audioInputDevices.forEach(function (device) {
                            var option = document.createElement('option');
                            option.value = device.deviceId;
                            option.text = device.label || 'Device ' + device.deviceId;
                            inputDevices.appendChild(option);
                        });
                        // Listen for changes to the select list and connect to the selected audio input device
                        inputDevices.addEventListener('change', function (event) {
                            var selectedDeviceId = event.target.value;
                            var constraints = { audio: { deviceId: selectedDeviceId } };
                            navigator.mediaDevices.getUserMedia(constraints)
                                .then(function (stream) {
                                    // Handle the audio stream from the selected device here
                                })
                                .catch(function (error) {
                                    console.log('Error accessing audio stream:', error);
                                });
                        });
                    }
                    // If only one audio input device is available, connect to it automatically
                    else if (audioInputDevices.length === 1) {
                        var constraints = { audio: { deviceId: audioInputDevices[0].deviceId } };
                        navigator.mediaDevices.getUserMedia(constraints)
                            .then(function (stream) {
                                // Handle the audio stream from the selected device here
                            })
                            .catch(function (error) {
                                console.log('Error accessing audio stream:', error);
                            });
                    }
                    // If no audio input devices are available, show an error message
                    else {
                        console.log('No audio input devices available');
                    }
                })
                .catch(function (error) {
                    console.log('Error listing audio input devices:', error);
                });
        }

        window.onload = () => {
            recordButton = document.getElementById("record-button");
            recordButton.addEventListener("click", toggleListening);
            connectionStatus = document.getElementById("connection-status");
            //transcription = document.getElementById("transcription");
            //autosend = document.getElementById("autosend");
            statusRecording = document.getElementById("status-recording");

            enumerateDevices();
            connect(socket);
        };
    </script>
    <script src="https://cdn.webrtc-experiment.com/MediaStreamRecorder.js"></script>
</body>

</html>