193 lines
6.2 KiB
JavaScript
193 lines
6.2 KiB
JavaScript
let selectedDeviceId = "default";
|
|
export let serverTime;
|
|
export let recordButton;
|
|
let socket;
|
|
let audioRecorder;
|
|
let audioStream;
|
|
let recording = false;
|
|
let connectionStatus;
|
|
let statusRecording;
|
|
let audioContext;
|
|
let volumeChecker;
|
|
let lastVolumes = new Array(5);
|
|
let averageVolume;
|
|
let silenceCount = 0;
|
|
let isSpeaking = false;
|
|
let soundDetected = false;
|
|
let speakingCount = 0;
|
|
let analyser = null;
|
|
|
|
let SILENCE_DELAY_MS = 50;
|
|
let preDetect_IncludedAudio = 400; //ms
|
|
let soundCount_Threshold = 10;
|
|
let silenceCount_Threshold = 10;
|
|
|
|
const volumeHistory = [];
|
|
|
|
export function setSocket(newSocket) {
|
|
socket = newSocket;
|
|
}
|
|
export function setRecordButton(newRecordButton) {
|
|
recordButton = newRecordButton;
|
|
recordButton.addEventListener("click", toggleListening);
|
|
}
|
|
|
|
export function InitAudioAnalyser(stream) {
|
|
audioContext = new AudioContext();
|
|
const source = audioContext.createMediaStreamSource(stream);
|
|
analyser = audioContext.createAnalyser();
|
|
analyser.fftSize = 2048;
|
|
analyser.smoothingTimeConstant = 0.8;
|
|
source.connect(analyser);
|
|
}
|
|
|
|
export function startListening() {
|
|
//canvasCtx.fillStyle = "green";
|
|
recording = true;
|
|
navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000 } })
|
|
.then((stream) => {
|
|
audioStream = stream;
|
|
|
|
const audioContext = new AudioContext();
|
|
const sourceNode = audioContext.createMediaStreamSource(audioStream);
|
|
const audioSampleRate = sourceNode.context.sampleRate;
|
|
|
|
info.innerHTML = "Sample rate: " + audioSampleRate + " Hz";
|
|
var preBuffer = [];
|
|
|
|
const channelSplitter = audioContext.createChannelSplitter(2);
|
|
const channelMerger = audioContext.createChannelMerger(1);
|
|
sourceNode.connect(channelSplitter);
|
|
channelSplitter.connect(channelMerger, 0, 0);
|
|
const outputNode = channelMerger;
|
|
|
|
const mediaStreamDestination = audioContext.createMediaStreamDestination();
|
|
outputNode.connect(mediaStreamDestination);
|
|
const singleChannelStream = mediaStreamDestination.stream;
|
|
|
|
audioRecorder = new MediaRecorder(singleChannelStream);
|
|
audioRecorder.start();
|
|
audioRecorder.addEventListener("dataavailable", (event) => {
|
|
if (!soundDetected && autosend.checked) {
|
|
preBuffer = [];
|
|
preBuffer.push(event.data);
|
|
return;
|
|
}
|
|
if (event.data.size > 0) {
|
|
let data = event.data;
|
|
console.log("audio data size: " + data.size);
|
|
if (preBuffer.length > 0) {
|
|
sendAudioToServerPost(preBuffer);
|
|
}
|
|
sendAudioToServer(data);
|
|
soundDetected = false;
|
|
}
|
|
});
|
|
|
|
InitAudioAnalyser(stream);
|
|
});
|
|
|
|
recordButton.innerHTML = "Stop Talking";
|
|
recordButton.classList.toggle('bg-red-500');
|
|
recordButton.classList.toggle('bg-blue-500');
|
|
recordButton.classList.toggle('hover:bg-blue-700');
|
|
}
|
|
|
|
export function stopListening() {
|
|
recording = false;
|
|
audioRecorder.stop();
|
|
recordButton.innerHTML = "Push to Talk";
|
|
recordButton.classList.toggle('bg-blue-500');
|
|
recordButton.classList.toggle('bg-red-500');
|
|
recordButton.classList.toggle('hover:bg-blue-700');
|
|
clearInterval(volumeChecker);
|
|
if (audioStream) {
|
|
audioStream.getTracks().forEach(track => track.stop());
|
|
audioStream = null;
|
|
}
|
|
}
|
|
|
|
export function sendAudioToServerPost(data) {
|
|
const blob = new Blob(data, { type: "audio/ogg; codecs=opus" });
|
|
var formData = new FormData();
|
|
formData.append('file', data);
|
|
fetch('/upload', {
|
|
method: 'POST',
|
|
body: formData
|
|
});
|
|
}
|
|
|
|
export function sendAudioToServer(data) {
|
|
//if (connected) {
|
|
socket.send(JSON.stringify({ type: 'audio', task:"transcribe", audio: data }));
|
|
serverTime = Date.now();
|
|
if (!autosend.checked) {
|
|
transcription.innerHTML = "Processing audio...";
|
|
}
|
|
//}
|
|
}
|
|
|
|
export function toggleListening() {
|
|
if (socket.readyState === WebSocket.OPEN) {
|
|
if (recording) {
|
|
stopListening();
|
|
} else {
|
|
startListening();
|
|
}
|
|
}
|
|
}
|
|
|
|
export function initializeVolumeChecker() {
|
|
volumeChecker = setInterval(() => {
|
|
if (!audioContext) {
|
|
console.log("No audio context");
|
|
return;
|
|
}
|
|
const frequencyData = new Uint8Array(analyser.frequencyBinCount);
|
|
analyser.getByteFrequencyData(frequencyData);
|
|
|
|
let totalVolume = 0;
|
|
for (let i = 0; i < frequencyData.length; i++) {
|
|
totalVolume += frequencyData[i];
|
|
}
|
|
averageVolume = totalVolume / frequencyData.length;
|
|
|
|
volumeHistory.push(averageVolume);
|
|
if (volumeHistory.length > 100) {
|
|
volumeHistory.shift();
|
|
}
|
|
|
|
const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5;
|
|
const isSilent = averageVolume < threshold;
|
|
|
|
if (averageVolume > threshold) {
|
|
if (autosend.checked && speakingCount == 0 && audioRecorder) {
|
|
soundDetected = false;
|
|
audioRecorder.stop();
|
|
audioRecorder.start();
|
|
}
|
|
speakingCount++;
|
|
if (speakingCount > soundCount_Threshold) {
|
|
statusRecording.innerHTML = "Listening...";
|
|
statusRecording.style.color = "green";
|
|
isSpeaking = true;
|
|
}
|
|
} else if (averageVolume - 5 < threshold) {
|
|
speakingCount = 0;
|
|
if (isSpeaking) {
|
|
silenceCount++;
|
|
if (silenceCount > silenceCount_Threshold) {
|
|
if (autosend.checked) {
|
|
soundDetected = true;
|
|
audioRecorder.stop();
|
|
audioRecorder.start();
|
|
}
|
|
isSpeaking = false;
|
|
statusRecording.innerHTML = "Silence detected...";
|
|
statusRecording.style.color = "orange";
|
|
}
|
|
}
|
|
}
|
|
}, SILENCE_DELAY_MS);
|
|
}
|