gogo2/web/audio.js
Dobromir Popov 364df3d891 wip
2024-06-10 17:32:00 +03:00

193 lines
6.2 KiB
JavaScript

let selectedDeviceId = "default";
export let serverTime;
export let recordButton;
let socket;
let audioRecorder;
let audioStream;
let recording = false;
let connectionStatus;
let statusRecording;
let audioContext;
let volumeChecker;
let lastVolumes = new Array(5);
let averageVolume;
let silenceCount = 0;
let isSpeaking = false;
let soundDetected = false;
let speakingCount = 0;
let analyser = null;
let SILENCE_DELAY_MS = 50;
let preDetect_IncludedAudio = 400; //ms
let soundCount_Threshold = 10;
let silenceCount_Threshold = 10;
const volumeHistory = [];
export function setSocket(newSocket) {
socket = newSocket;
}
export function setRecordButton(newRecordButton) {
recordButton = newRecordButton;
recordButton.addEventListener("click", toggleListening);
}
export function InitAudioAnalyser(stream) {
audioContext = new AudioContext();
const source = audioContext.createMediaStreamSource(stream);
analyser = audioContext.createAnalyser();
analyser.fftSize = 2048;
analyser.smoothingTimeConstant = 0.8;
source.connect(analyser);
}
export function startListening() {
//canvasCtx.fillStyle = "green";
recording = true;
navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000 } })
.then((stream) => {
audioStream = stream;
const audioContext = new AudioContext();
const sourceNode = audioContext.createMediaStreamSource(audioStream);
const audioSampleRate = sourceNode.context.sampleRate;
info.innerHTML = "Sample rate: " + audioSampleRate + " Hz";
var preBuffer = [];
const channelSplitter = audioContext.createChannelSplitter(2);
const channelMerger = audioContext.createChannelMerger(1);
sourceNode.connect(channelSplitter);
channelSplitter.connect(channelMerger, 0, 0);
const outputNode = channelMerger;
const mediaStreamDestination = audioContext.createMediaStreamDestination();
outputNode.connect(mediaStreamDestination);
const singleChannelStream = mediaStreamDestination.stream;
audioRecorder = new MediaRecorder(singleChannelStream);
audioRecorder.start();
audioRecorder.addEventListener("dataavailable", (event) => {
if (!soundDetected && autosend.checked) {
preBuffer = [];
preBuffer.push(event.data);
return;
}
if (event.data.size > 0) {
let data = event.data;
console.log("audio data size: " + data.size);
if (preBuffer.length > 0) {
sendAudioToServerPost(preBuffer);
}
sendAudioToServer(data);
soundDetected = false;
}
});
InitAudioAnalyser(stream);
});
recordButton.innerHTML = "Stop Talking";
recordButton.classList.toggle('bg-red-500');
recordButton.classList.toggle('bg-blue-500');
recordButton.classList.toggle('hover:bg-blue-700');
}
export function stopListening() {
recording = false;
audioRecorder.stop();
recordButton.innerHTML = "Push to Talk";
recordButton.classList.toggle('bg-blue-500');
recordButton.classList.toggle('bg-red-500');
recordButton.classList.toggle('hover:bg-blue-700');
clearInterval(volumeChecker);
if (audioStream) {
audioStream.getTracks().forEach(track => track.stop());
audioStream = null;
}
}
export function sendAudioToServerPost(data) {
const blob = new Blob(data, { type: "audio/ogg; codecs=opus" });
var formData = new FormData();
formData.append('file', data);
fetch('/upload', {
method: 'POST',
body: formData
});
}
export function sendAudioToServer(data) {
//if (connected) {
socket.send(JSON.stringify({ type: 'audio', task:"transcribe", audio: data }));
serverTime = Date.now();
if (!autosend.checked) {
transcription.innerHTML = "Processing audio...";
}
//}
}
export function toggleListening() {
if (socket.readyState === WebSocket.OPEN) {
if (recording) {
stopListening();
} else {
startListening();
}
}
}
export function initializeVolumeChecker() {
volumeChecker = setInterval(() => {
if (!audioContext) {
console.log("No audio context");
return;
}
const frequencyData = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(frequencyData);
let totalVolume = 0;
for (let i = 0; i < frequencyData.length; i++) {
totalVolume += frequencyData[i];
}
averageVolume = totalVolume / frequencyData.length;
volumeHistory.push(averageVolume);
if (volumeHistory.length > 100) {
volumeHistory.shift();
}
const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5;
const isSilent = averageVolume < threshold;
if (averageVolume > threshold) {
if (autosend.checked && speakingCount == 0 && audioRecorder) {
soundDetected = false;
audioRecorder.stop();
audioRecorder.start();
}
speakingCount++;
if (speakingCount > soundCount_Threshold) {
statusRecording.innerHTML = "Listening...";
statusRecording.style.color = "green";
isSpeaking = true;
}
} else if (averageVolume - 5 < threshold) {
speakingCount = 0;
if (isSpeaking) {
silenceCount++;
if (silenceCount > silenceCount_Threshold) {
if (autosend.checked) {
soundDetected = true;
audioRecorder.stop();
audioRecorder.start();
}
isSpeaking = false;
statusRecording.innerHTML = "Silence detected...";
statusRecording.style.color = "orange";
}
}
}
}, SILENCE_DELAY_MS);
}