diff --git a/web/audio.js b/web/audio.js new file mode 100644 index 0000000..53b1e2b --- /dev/null +++ b/web/audio.js @@ -0,0 +1,192 @@ +let selectedDeviceId = "default"; +export let serverTime; +export let recordButton; +let socket; +let audioRecorder; +let audioStream; +let recording = false; +let connectionStatus; +let statusRecording; +let audioContext; +let volumeChecker; +let lastVolumes = new Array(5); +let averageVolume; +let silenceCount = 0; +let isSpeaking = false; +let soundDetected = false; +let speakingCount = 0; +let analyser = null; + +let SILENCE_DELAY_MS = 50; +let preDetect_IncludedAudio = 400; //ms +let soundCount_Threshold = 10; +let silenceCount_Threshold = 10; + +const volumeHistory = []; + +export function setSocket(newSocket) { + socket = newSocket; +} +export function setRecordButton(newRecordButton) { + recordButton = newRecordButton; + recordButton.addEventListener("click", toggleListening); +} + +export function InitAudioAnalyser(stream) { + audioContext = new AudioContext(); + const source = audioContext.createMediaStreamSource(stream); + analyser = audioContext.createAnalyser(); + analyser.fftSize = 2048; + analyser.smoothingTimeConstant = 0.8; + source.connect(analyser); +} + +export function startListening() { + //canvasCtx.fillStyle = "green"; + recording = true; + navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000 } }) + .then((stream) => { + audioStream = stream; + + const audioContext = new AudioContext(); + const sourceNode = audioContext.createMediaStreamSource(audioStream); + const audioSampleRate = sourceNode.context.sampleRate; + + info.innerHTML = "Sample rate: " + audioSampleRate + " Hz"; + var preBuffer = []; + + const channelSplitter = audioContext.createChannelSplitter(2); + const channelMerger = audioContext.createChannelMerger(1); + sourceNode.connect(channelSplitter); + channelSplitter.connect(channelMerger, 0, 0); + const outputNode = channelMerger; + + const mediaStreamDestination = audioContext.createMediaStreamDestination(); + outputNode.connect(mediaStreamDestination); + const singleChannelStream = mediaStreamDestination.stream; + + audioRecorder = new MediaRecorder(singleChannelStream); + audioRecorder.start(); + audioRecorder.addEventListener("dataavailable", (event) => { + if (!soundDetected && autosend.checked) { + preBuffer = []; + preBuffer.push(event.data); + return; + } + if (event.data.size > 0) { + let data = event.data; + console.log("audio data size: " + data.size); + if (preBuffer.length > 0) { + sendAudioToServerPost(preBuffer); + } + sendAudioToServer(data); + soundDetected = false; + } + }); + + InitAudioAnalyser(stream); + }); + + recordButton.innerHTML = "Stop Talking"; + recordButton.classList.toggle('bg-red-500'); + recordButton.classList.toggle('bg-blue-500'); + recordButton.classList.toggle('hover:bg-blue-700'); +} + +export function stopListening() { + recording = false; + audioRecorder.stop(); + recordButton.innerHTML = "Push to Talk"; + recordButton.classList.toggle('bg-blue-500'); + recordButton.classList.toggle('bg-red-500'); + recordButton.classList.toggle('hover:bg-blue-700'); + clearInterval(volumeChecker); + if (audioStream) { + audioStream.getTracks().forEach(track => track.stop()); + audioStream = null; + } +} + +export function sendAudioToServerPost(data) { + const blob = new Blob(data, { type: "audio/ogg; codecs=opus" }); + var formData = new FormData(); + formData.append('file', data); + fetch('/upload', { + method: 'POST', + body: formData + }); +} + +export function sendAudioToServer(data) { + //if (connected) { + socket.send(JSON.stringify({ type: 'audio', task:"transcribe", audio: data })); + serverTime = Date.now(); + if (!autosend.checked) { + transcription.innerHTML = "Processing audio..."; + } + //} +} + +export function toggleListening() { + if (socket.readyState === WebSocket.OPEN) { + if (recording) { + stopListening(); + } else { + startListening(); + } + } +} + +export function initializeVolumeChecker() { + volumeChecker = setInterval(() => { + if (!audioContext) { + console.log("No audio context"); + return; + } + const frequencyData = new Uint8Array(analyser.frequencyBinCount); + analyser.getByteFrequencyData(frequencyData); + + let totalVolume = 0; + for (let i = 0; i < frequencyData.length; i++) { + totalVolume += frequencyData[i]; + } + averageVolume = totalVolume / frequencyData.length; + + volumeHistory.push(averageVolume); + if (volumeHistory.length > 100) { + volumeHistory.shift(); + } + + const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5; + const isSilent = averageVolume < threshold; + + if (averageVolume > threshold) { + if (autosend.checked && speakingCount == 0 && audioRecorder) { + soundDetected = false; + audioRecorder.stop(); + audioRecorder.start(); + } + speakingCount++; + if (speakingCount > soundCount_Threshold) { + statusRecording.innerHTML = "Listening..."; + statusRecording.style.color = "green"; + isSpeaking = true; + } + } else if (averageVolume - 5 < threshold) { + speakingCount = 0; + if (isSpeaking) { + silenceCount++; + if (silenceCount > silenceCount_Threshold) { + if (autosend.checked) { + soundDetected = true; + audioRecorder.stop(); + audioRecorder.start(); + } + isSpeaking = false; + statusRecording.innerHTML = "Silence detected..."; + statusRecording.style.color = "orange"; + } + } + } + }, SILENCE_DELAY_MS); +} diff --git a/web/chat-client.html b/web/chat-client.html index 1527051..c8cc410 100644 --- a/web/chat-client.html +++ b/web/chat-client.html @@ -11,10 +11,16 @@