gogo2/web/client.html

<!DOCTYPE html>
<html>

<head>
    <title>Real-time Speech-to-Text</title>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <!-- Add the Tailwind CSS library -->
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
</head>

<body class="bg-gray-100">
    <div class="container mx-auto px-4 py-8">
        <h1 class="text-2xl font-bold mb-4 text-center">Rt STT</h1>
        <div class="flex justify-center items-center mb-4">
            <label class="toggle flex items-center">
                <input type="checkbox" id="autosend" class="mr-2">
                <span class="slider"></span>
                <span class="ml-2">Continuous</span>
            </label>
            <select id="input-devices" class="ml-4">
                <option value="default">Default</option>
            </select>
            <select id="language-select">
                <option value="auto">Auto</option>
                <option value="en">English</option>
                <option value="bg">Български</option>
                <option value="fr">Français</option>
            </select>
            <select id="task-select">
                <option value="transcribe">Transcribe</option>
                <option value="translate">Translate</option>
            </select>
            <label class="toggle flex items-center ml-4">
                <input type="checkbox" id="store-recordings" class="mr-2">
                <span class="slider"></span>
                <span class="ml-2">Store Recordings</span>
        </div>
        <div class="flex justify-center items-center mb-4">
            <span id="record-actions">
                <button id="record-button" disabled
                    class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
                    Start Recording</button>
                <button id="record-button-speakers" disabled
                    class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
                    Stream from speakers</button>
            </span>
        </div>
        <div class="flex justify-center items-center mb-4">
            <div id="connection-status" style="margin-right: 5px;"></div>
        </div>
        <div class="flex justify-center items-center mb-4">
            <div id="info"></div>
        </div>
        <div id="status-recording" class="flex justify-center items-center mb-4">
        </div>
        <div class="relative rounded-lg border border-gray-300 shadow-sm">
            <textarea id="transcription" class="block w-full h-48 p-4 resize-none"
                placeholder="Whisper something into the microphone..."></textarea>
            <button id="copyButton"
                class="absolute top-0 right-0 px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-bl-lg focus:outline-none"
                onclick="copyToClipboard('transcription')">
                Copy
            </button>
            <button id="clearButton"
                class="absolute top-0 right-20 px-2 py-1 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-br-lg focus:outline-none"
                onclick="transcription.value = ''">
                Clear
            </button>
        </div>
        <canvas id="canvas" class="w-full"></canvas>
        <script>
            let sessionId;

            let selectedDeviceId = "default";
            let socket;
            let audioRecorder;
            let audioStream;
            let recording = false;
            let recordButton;
            let connected = false;
            let connectionStatus; //HTML auto generated
            let statusRecording; //HTML auto generated
            let audioContext;
            let audioSampleRate;
            let serverTime;

            let volumeChecker;
            let lastVolumes = new Array(5);
            let averageVolume;
            let silenceCount = 0;
            let isSpeaking = false;
            let soundDetected = false;
            let speakingCount = 0;

            let SILENCE_DELAY_MS = 50;
            let preDetect_IncludedAudio = 400; //ms
            let soundCount_Threshold = 10;
            let silenceCount_Threshold = 10;

            const volumeHistory = [];

            let canvas = document.getElementById("canvas");
            let canvasCtx = canvas.getContext("2d");
            let barWidth = 10;
            let barSpacing = 5;

            // Handle language select change
            document.getElementById('language-select').addEventListener('change', (event) => {
                const language = event.target.value;
                fetch('/settings', {
                    method: 'POST',
                    body: JSON.stringify({ language, sessionId }),
                    headers: { 'Content-Type': 'application/json' },
                    credentials: 'same-origin'
                });
            });
            document.getElementById('store-recordings').addEventListener('change', (event) => {
                const storeRecordings = event.target.checked;
                fetch('/settings', {
                    method: 'POST',
                    body: JSON.stringify({ storeRecordings, sessionId }),
                    headers: { 'Content-Type': 'application/json' },
                    credentials: 'same-origin'
                });
            });
            document.getElementById('task-select').addEventListener('change', (event) => {
                const task = event.target.value;
                fetch('/settings', {
                    method: 'POST',
                    body: JSON.stringify({ task, sessionId }),
                    headers: { 'Content-Type': 'application/json' },
                    credentials: 'same-origin'
                });
            });

            // Draw sliding bar graph
            function drawSlidingBarGraph(lastVolumes) {
                canvasCtx.clearRect(0, 0, canvas.width, canvas.height);
                // Draw bars
                for (let i = 0; i < lastVolumes.length; i++) {
                    let value = lastVolumes[i];
                    let barHeight = (value / 255) * canvas.height;
                    let x = i * (barWidth + barSpacing);
                    let y = canvas.height - barHeight;
                    canvasCtx.fillRect(x, y, barWidth, barHeight);
                }
            }

            // Check the audio level every SILENCE_DELAY_MS milliseconds
            volumeChecker = setInterval(() => {
                if (!audioContext) {
                    console.log("No audio context");
                    return;
                }
                const frequencyData = new Uint8Array(analyser.frequencyBinCount);
                //analyser.getByteTimeDomainData(dataArray);//history
                analyser.getByteFrequencyData(frequencyData); //current

                let totalVolume = 0;
                for (let i = 0; i < frequencyData.length; i++) {
                    totalVolume += frequencyData[i];
                }
                averageVolume = totalVolume / frequencyData.length;

                volumeHistory.push(averageVolume);
                if (volumeHistory.length > 100) {
                    volumeHistory.shift();
                }

                const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5;
                const isSilent = averageVolume < threshold;

                // count speaking and silence
                if (averageVolume > threshold) {
                    if (autosend.checked && speakingCount == 0 && audioRecorder) {
                        console.log("starting new recording");
                        soundDetected = false;
                        audioRecorder.stop();
                        audioRecorder.start();
                    }
                    speakingCount++;
                    if (speakingCount > soundCount_Threshold) {
                        statusRecording.innerHTML = "Listening...";
                        statusRecording.style.color = "green";
                        isSpeaking = true;
                        console.log("Was silent and is now speaking. (" + averageVolume + " averageVolume).");
                    }
                } else if (averageVolume - 5 < threshold) {
                    speakingCount = 0;
                    if (isSpeaking) {
                        silenceCount++;
                        if (silenceCount > silenceCount_Threshold) {
                            if (autosend.checked) {
                                console.log("Was speakng and is now silence. (" + averageVolume + " averageVolume). Sending audio to server.");
                                soundDetected = true;
                                audioRecorder.stop();
                                audioRecorder.start();
                            }
                            isSpeaking = false;
                            statusRecording.innerHTML = "Silence detected...";
                            statusRecording.style.color = "orange";
                        }
                    }
                }

                //console.log(`Average volume: ${averageVolume}, isSilent: ${isSilent}, threshold: ${threshold}`);
                //drawSlidingBarGraph(lastVolumes);
            }, SILENCE_DELAY_MS);

            function InitAudioAnalyser(stream) {
                // Initialize the Web Audio API
                audioContext = new AudioContext();
                const source = audioContext.createMediaStreamSource(stream);
                analyser = audioContext.createAnalyser();
                //analyser.fftSize = 32;
                analyser.fftSize = 2048;
                analyser.smoothingTimeConstant = 0.8;
                source.connect(analyser);
                console.log("Audio context initialized. analyser sampling: " + audioContext.sampleRate + "Hz, recorder sampling: " + audioRecorder.sampleRate + "Hz");
            }

            function connect() {
                connectionStatus.innerHTML = "Connecting to WS...";
                let wsurl = "ws://localhost:8081";
                //get crrent ws url from the server
                fetch("/wsurl")
                    .then((response) => response.text())
                    .then((data) => {
                        wsurl = data;
                        console.log("Got ws url: '" + wsurl + "'");

                    })
                    .then(() => {
                        console.log("connecting to '" + wsurl + "'...");
                        socket = new WebSocket(wsurl);
                        socket.onopen = () => {
                            console.log("WebSocket connection opened.");
                            connectionStatus.innerHTML = "Connected to " + wsurl;
                            recordButton.disabled = false;
                            connected = true;
                        };
                        socket.onmessage = onmessage;
                        socket.onclose = () => {
                            console.log("WebSocket connection closed");
                            connectionStatus.innerHTML = "Disconnected";
                            recordButton.disabled = true;
                            connected = false;
                            setTimeout(() => {
                                connect();
                            }, 5000);
                        };
                    })
                    .catch((error) => {
                        console.log("Error getting ws url: " + error);
                        connectionStatus.innerHTML = "Error getting ws url: " + error;
                    });
            };

            function onmessage(event) {
                //check if the message is json
                try {
                    let json = JSON.parse(event.data);
                    //store session id in cookies
                    if (json.hasOwnProperty("sessionId")) {
                        sessionId = json.sessionId;
                        console.log("Got session id: " + sessionId);
                    }
                    if (json.hasOwnProperty("language")) {
                        languageSelect.value = json.language;
                    }
                    if (json.hasOwnProperty("languageDetected")) {
                        statusRecording.innerHTML = "Detected language: " + json.languageDetected;
                    }

                    if (json.hasOwnProperty("taskSelect")) {
                        taskSelect.value = json.taskSelect;
                    }

                    //storerecordings checkbox
                    if (json.hasOwnProperty("storeRecordings")) {
                        storeRecordings.checked = json.storeRecordings;
                    }

                    if (json.hasOwnProperty("text")) {
                        transcription.value += "\r\n" + json.text;
                    }

                    if (json.hasOwnProperty("queueCounter")) {
                        let latency = Date.now() - serverTime;
                        console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
                        info.innerHTML = "latency: " + latency + "ms; server queue: " + queue + " requests";
                    }
                    return;
                } catch (e) {
                    //not json
                }

                let latency = Date.now() - serverTime;
                console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
                info.innerHTML = "latency: " + latency + "ms";

                if (autosend.checked) {
                    //data is in format (count)text. split it
                    const arr = event.data.split(/[(\)]/); // split the string at "(" or ")"
                    let queue = arr[1]
                    let text = arr[2].trim();
                    info.innerHTML = "latency: " + latency + "ms; server queue: " + queue + " requests";
                    transcription.value += text + " ";
                    statusRecording.innerHTML = "listening...";
                    statusRecording.style.color = "black";
                } else {
                    //replace the text
                    transcription.innerHTML = event.data;
                }
            }

            const preBufferDuration = 500; // duration of pre-buffer in ms

            function startListening() {
                // Initialize canvas
                canvasCtx.fillStyle = "green";
                recording = true;
                navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000 } })
                    .then((stream) => {
                        audioStream = stream;

                        const audioContext = new AudioContext();
                        const sourceNode = audioContext.createMediaStreamSource(audioStream);
                        const audioSampleRate = sourceNode.context.sampleRate;
                        console.log("Started listening to microphone (sample rate: " + audioSampleRate + " Hz, echoCancellation: " + stream.getAudioTracks()[0].getSettings().echoCancellation + ", " + sourceNode.channelCount + " channels)");

                        info.innerHTML = "Sample rate: " + audioSampleRate + " Hz";
                        var preBuffer = [];

                        //merge audio channels
                        const channelSplitter = audioContext.createChannelSplitter(2);
                        const channelMerger = audioContext.createChannelMerger(1);
                        sourceNode.connect(channelSplitter);
                        channelSplitter.connect(channelMerger, 0, 0);
                        const outputNode = channelMerger;

                        //const singleChannelStream = new MediaStream();
                        //singleChannelStream.addTrack(outputNode.stream.getAudioTracks()[0]);
                        const mediaStreamDestination = audioContext.createMediaStreamDestination();
                        outputNode.connect(mediaStreamDestination);
                        const singleChannelStream = mediaStreamDestination.stream;

                        audioRecorder = new MediaRecorder(singleChannelStream);
                        audioRecorder.start();
                        audioRecorder.addEventListener("dataavailable", (event) => {
                            console.log("Audio data available: " + (event.data.size / 1024).toFixed(2) + " KB in " + event.data.type + " format. (" + audioContext.sampleRate + " Hz)");

                            if (!soundDetected && autosend.checked) {
                                console.log("discarding audio data because not speaking");
                                //store last 100ms of audio data
                                preBuffer = [];
                                preBuffer.push(event.data);
                                return;
                            }
                            if (event.data.size > 0) {
                                let data = event.data;
                                if (preBuffer.length > 0) {
                                    //     const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
                                    //     const newEventChunk = new Blob([audioBlob, preBuffer], { type: "audio/ogg; codecs=opus" });
                                    ///     data = newEventChunk;
                                    sendAudioToServerPost(preBuffer);
                                }
                                sendAudioToServer(data);
                                soundDetected = false;
                            }
                        });

                        InitAudioAnalyser(stream);
                    });

                recordButton.innerHTML = "Stop Recording";
                recordButton.classList.toggle('bg-red-500');
                recordButton.classList.toggle('bg-blue-500');
                recordButton.classList.toggle('hover:bg-blue-700');

            }

            function getlast500ms(audioData, preBuffer) {
                const audioBlob = new Blob([...preBuffer, audioData], { type: "audio/ogg; codecs=opus" });
                const start = audioBlob.size - 500 * 48000 * 2 / 8; // Assuming 48 kHz sampling rate and 16-bit PCM audio
                const end = audioBlob.size;
                const slicedAudio = audioBlob.slice(start, end);

                // Create a new Blob object with the remaining audio data
                const remainingAudio = audioBlob.slice(0, start);
                const newEvent = new Blob([remainingAudio], { type: "audio/ogg; codecs=opus" });

                // Replace the original event data with the new event data
                //const newEventChunk = new Blob([newEvent, slicedAudio], { type: "audio/ogg; codecs=opus" });
                const newEventChunk = new Blob([slicedAudio], { type: "audio/ogg; codecs=opus" });
                return audioData;
            }

            function stopListening() {
                recording = false;
                audioRecorder.stop();
                recordButton.innerHTML = "Start Recording";
                //recordButton.classList.remove("recording");
                recordButton.classList.toggle('bg-blue-500');
                recordButton.classList.toggle('bg-red-500');
                recordButton.classList.toggle('hover:bg-blue-700');
                clearInterval(volumeChecker);
                //stop using microphone
                if (audioStream) {
                    // stop all tracks in the stream
                    audioStream.getTracks().forEach(track => track.stop());
                    // set the stream variable to null or undefined to indicate it's no longer in use
                    audioStream = null;
                }

            }

            function sendAudioToServerPost(data) {
                const blob = new Blob(data, { type: "audio/ogg; codecs=opus" });
                var formData = new FormData();
                formData.append('file', data);
                fetch('/upload', {
                    method: 'POST',
                    body: formData
                });

            }
            function sendAudioToServer(data) {
                if (connected) {
                    //const blob = new Blob(data, { type: 'audio/webm' });
                    socket.send(data);
                    serverTime = Date.now();
                    console.log("Sent some audio data to server.");
                    if (!autosend.checked) {
                        transcription.placeholder = "Processing audio...";
                    }
                } else {
                    console.log("Not connected, not sending audio data to server.");
                }
            }
            function toggleListening() {
                if (socket.readyState === WebSocket.OPEN) {
                    if (recording) {
                        stopListening();
                    } else {
                        startListening();
                    }
                }
            }

            function enumerateDevices() {
                // Enumerate the available audio input devices
                navigator.mediaDevices.enumerateDevices()
                    .then(function (devices) {
                        var audioInputDevices = devices.filter(function (device) {
                            return device.kind === 'audioinput';
                        });
                        console.log(audioInputDevices.length + ' audio input devices found');

                        // If more than one audio input device is available, populate the select list
                        if (audioInputDevices.length > 1) {
                            audioInputDevices.forEach(function (device) {
                                var option = document.createElement('option');
                                option.value = device.deviceId;
                                option.text = device.label || 'Device ' + device.deviceId;
                                inputDevices.appendChild(option);
                            });
                            // Listen for changes to the select list and connect to the selected audio input device
                            inputDevices.addEventListener('change', function (event) {
                                var selectedDeviceId = event.target.value;
                                var constraints = { audio: { deviceId: selectedDeviceId } };
                                navigator.mediaDevices.getUserMedia(constraints)
                                    .then(function (stream) {
                                        // Handle the audio stream from the selected device here
                                    })
                                    .catch(function (error) {
                                        console.log('Error accessing audio stream:', error);
                                    });
                            });
                        }
                        // If only one audio input device is available, connect to it automatically
                        else if (audioInputDevices.length === 1) {
                            var constraints = { audio: { deviceId: audioInputDevices[0].deviceId } };
                            navigator.mediaDevices.getUserMedia(constraints)
                                .then(function (stream) {
                                    // Handle the audio stream from the selected device here
                                })
                                .catch(function (error) {
                                    console.log('Error accessing audio stream:', error);
                                });
                        }
                        // If no audio input devices are available, show an error message
                        else {
                            console.log('No audio input devices available');
                        }
                    })
                    .catch(function (error) {
                        console.log('Error listing audio input devices:', error);
                    });
            }

            window.onload = () => {
                recordButton = document.getElementById("record-button");
                recordButton.addEventListener("click", toggleListening);
                connectionStatus = document.getElementById("connection-status");
                //transcription = document.getElementById("transcription");
                //autosend = document.getElementById("autosend");
                statusRecording = document.getElementById("status-recording");
                languageSelect = document.getElementById("language-select");
                inputDevices = document.getElementById("input-devices");
                storeRecordings = document.getElementById("store-recordings");
                taskSelect = document.getElementById("task-select");

                enumerateDevices();
                connect(socket);
            };

            function copyToClipboard(id) {
                var textarea = document.getElementById(id);
                textarea.select();
                document.execCommand('copy');
            }


        </script>
        <script src="https://cdn.webrtc-experiment.com/MediaStreamRecorder.js"></script>
</body>

</html>