let selectedDeviceId = "default"; export let serverTime; export let recordButton; let socket; let audioRecorder; let audioStream; let recording = false; let connectionStatus; let statusRecording; let audioContext; let volumeChecker; let lastVolumes = new Array(5); let averageVolume; let silenceCount = 0; let isSpeaking = false; let soundDetected = false; let speakingCount = 0; let analyser = null; let SILENCE_DELAY_MS = 50; let preDetect_IncludedAudio = 400; //ms let soundCount_Threshold = 10; let silenceCount_Threshold = 10; const volumeHistory = []; export function setSocket(newSocket) { socket = newSocket; } export function setRecordButton(newRecordButton) { recordButton = newRecordButton; recordButton.addEventListener("click", toggleListening); } export function InitAudioAnalyser(stream) { audioContext = new AudioContext(); const source = audioContext.createMediaStreamSource(stream); analyser = audioContext.createAnalyser(); analyser.fftSize = 2048; analyser.smoothingTimeConstant = 0.8; source.connect(analyser); } export function startListening() { //canvasCtx.fillStyle = "green"; recording = true; navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000 } }) .then((stream) => { audioStream = stream; const audioContext = new AudioContext(); const sourceNode = audioContext.createMediaStreamSource(audioStream); const audioSampleRate = sourceNode.context.sampleRate; info.innerHTML = "Sample rate: " + audioSampleRate + " Hz"; var preBuffer = []; const channelSplitter = audioContext.createChannelSplitter(2); const channelMerger = audioContext.createChannelMerger(1); sourceNode.connect(channelSplitter); channelSplitter.connect(channelMerger, 0, 0); const outputNode = channelMerger; const mediaStreamDestination = audioContext.createMediaStreamDestination(); outputNode.connect(mediaStreamDestination); const singleChannelStream = mediaStreamDestination.stream; audioRecorder = new MediaRecorder(singleChannelStream); audioRecorder.start(); audioRecorder.addEventListener("dataavailable", (event) => { if (!soundDetected && autosend.checked) { preBuffer = []; preBuffer.push(event.data); return; } if (event.data.size > 0) { let data = event.data; console.log("audio data size: " + data.size); if (preBuffer.length > 0) { sendAudioToServerPost(preBuffer); } sendAudioToServer(data); soundDetected = false; } }); InitAudioAnalyser(stream); }); recordButton.innerHTML = "Stop Talking"; recordButton.classList.toggle('bg-red-500'); recordButton.classList.toggle('bg-blue-500'); recordButton.classList.toggle('hover:bg-blue-700'); } export function stopListening() { recording = false; audioRecorder.stop(); recordButton.innerHTML = "Push to Talk"; recordButton.classList.toggle('bg-blue-500'); recordButton.classList.toggle('bg-red-500'); recordButton.classList.toggle('hover:bg-blue-700'); clearInterval(volumeChecker); if (audioStream) { audioStream.getTracks().forEach(track => track.stop()); audioStream = null; } } export function sendAudioToServerPost(data) { const blob = new Blob(data, { type: "audio/ogg; codecs=opus" }); var formData = new FormData(); formData.append('file', data); fetch('/upload', { method: 'POST', body: formData }); } export function sendAudioToServer(data) { //if (connected) { socket.send(JSON.stringify({ type: 'audio', task:"transcribe", audio: data })); serverTime = Date.now(); if (!autosend.checked) { transcription.innerHTML = "Processing audio..."; } //} } export function toggleListening() { if (socket.readyState === WebSocket.OPEN) { if (recording) { stopListening(); } else { startListening(); } } } export function initializeVolumeChecker() { volumeChecker = setInterval(() => { if (!audioContext) { console.log("No audio context"); return; } const frequencyData = new Uint8Array(analyser.frequencyBinCount); analyser.getByteFrequencyData(frequencyData); let totalVolume = 0; for (let i = 0; i < frequencyData.length; i++) { totalVolume += frequencyData[i]; } averageVolume = totalVolume / frequencyData.length; volumeHistory.push(averageVolume); if (volumeHistory.length > 100) { volumeHistory.shift(); } const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5; const isSilent = averageVolume < threshold; if (averageVolume > threshold) { if (autosend.checked && speakingCount == 0 && audioRecorder) { soundDetected = false; audioRecorder.stop(); audioRecorder.start(); } speakingCount++; if (speakingCount > soundCount_Threshold) { statusRecording.innerHTML = "Listening..."; statusRecording.style.color = "green"; isSpeaking = true; } } else if (averageVolume - 5 < threshold) { speakingCount = 0; if (isSpeaking) { silenceCount++; if (silenceCount > silenceCount_Threshold) { if (autosend.checked) { soundDetected = true; audioRecorder.stop(); audioRecorder.start(); } isSpeaking = false; statusRecording.innerHTML = "Silence detected..."; statusRecording.style.color = "orange"; } } } }, SILENCE_DELAY_MS); }