323 lines
13 KiB
HTML
323 lines
13 KiB
HTML
<!DOCTYPE html>
|
|
<html>
|
|
|
|
<head>
|
|
<title>Real-time Speech-to-Text</title>
|
|
<style>
|
|
.recording {
|
|
background-color: red;
|
|
color: white;
|
|
}
|
|
</style>
|
|
</head>
|
|
|
|
<body>
|
|
<h1>Rt STT</h1>
|
|
<label class="toggle">
|
|
<input type="checkbox"
|
|
id="autosend" />
|
|
<span class="slider">Continious</span>
|
|
</label>
|
|
<select id="input-devices">
|
|
<option value="default">Default</option>
|
|
</select>
|
|
|
|
<button id="record-button"
|
|
disabled>Start Recording</button>
|
|
<span id="connection-status"></span>
|
|
|
|
<div id="status-recording"></div>
|
|
<p id="transcription"></p>
|
|
|
|
<canvas id="canvas"
|
|
width="500"
|
|
height="500"></canvas>
|
|
<script>
|
|
let selectedDeviceId = "default";
|
|
let socket;
|
|
let audioRecorder;
|
|
let recording = false;
|
|
let recordButton;
|
|
let connected = false;
|
|
let connectionStatus; //HTML auto generated
|
|
let statusRecording; //HTML auto generated
|
|
let audioContext;
|
|
let serverTime;
|
|
|
|
let volumeChecker;
|
|
let lastVolumes = new Array(5);
|
|
let averageVolume;
|
|
let silenceCount = 0;
|
|
let isSpeaking = false;
|
|
let soundDetected = false;
|
|
let speakingCount = 0;
|
|
let SILENCE_DELAY_MS = 100;
|
|
const volumeHistory = [];
|
|
|
|
let canvas = document.getElementById("canvas");
|
|
let canvasCtx = canvas.getContext("2d");
|
|
let barWidth = 10;
|
|
let barSpacing = 5;
|
|
|
|
// Draw sliding bar graph
|
|
function drawSlidingBarGraph(lastVolumes) {
|
|
canvasCtx.clearRect(0, 0, canvas.width, canvas.height);
|
|
// Draw bars
|
|
for (let i = 0; i < lastVolumes.length; i++) {
|
|
let value = lastVolumes[i];
|
|
let barHeight = (value / 255) * canvas.height;
|
|
let x = i * (barWidth + barSpacing);
|
|
let y = canvas.height - barHeight;
|
|
canvasCtx.fillRect(x, y, barWidth, barHeight);
|
|
}
|
|
}
|
|
|
|
// Check the audio level every SILENCE_DELAY_MS milliseconds
|
|
volumeChecker = setInterval(() => {
|
|
if (!audioContext) {
|
|
console.log("No audio context");
|
|
return;
|
|
}
|
|
const frequencyData = new Uint8Array(analyser.frequencyBinCount);
|
|
//analyser.getByteTimeDomainData(dataArray);//history
|
|
analyser.getByteFrequencyData(frequencyData); //current
|
|
|
|
let totalVolume = 0;
|
|
for (let i = 0; i < frequencyData.length; i++) {
|
|
totalVolume += frequencyData[i];
|
|
}
|
|
averageVolume = totalVolume / frequencyData.length;
|
|
|
|
volumeHistory.push(averageVolume);
|
|
if (volumeHistory.length > 100) {
|
|
volumeHistory.shift();
|
|
}
|
|
|
|
const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5;
|
|
const isSilent = averageVolume < threshold;
|
|
|
|
// count speaking and silence
|
|
if (averageVolume > threshold) {
|
|
if (autosend.checked && speakingCount == 0 && audioRecorder) {
|
|
console.log("startint new recording");
|
|
audioRecorder.stop();
|
|
audioRecorder.start();
|
|
soundDetected = true;
|
|
}
|
|
speakingCount++;
|
|
if (speakingCount > 7) {
|
|
statusRecording.innerHTML = "Listening...";
|
|
statusRecording.style.color = "green";
|
|
isSpeaking = true;
|
|
console.log("Was silent and is now speaking. (" + averageVolume + " averageVolume).");
|
|
}
|
|
} else {
|
|
speakingCount = 0;
|
|
if (isSpeaking) {
|
|
silenceCount++;
|
|
if (silenceCount > 5) {
|
|
if (autosend.checked) {
|
|
console.log("Was speakng and is now silence. (" + averageVolume + " averageVolume). Sending audio to server.");
|
|
audioRecorder.stop();
|
|
audioRecorder.start();
|
|
}
|
|
isSpeaking = false;
|
|
statusRecording.innerHTML = "Silence detected...";
|
|
statusRecording.style.color = "orange";
|
|
}
|
|
}
|
|
}
|
|
|
|
//console.log(`Average volume: ${averageVolume}, isSilent: ${isSilent}, threshold: ${threshold}`);
|
|
//drawSlidingBarGraph(lastVolumes);
|
|
}, SILENCE_DELAY_MS);
|
|
|
|
function InitAudioAnalyser(stream) {
|
|
// Initialize the Web Audio API
|
|
audioContext = new AudioContext();
|
|
const source = audioContext.createMediaStreamSource(stream);
|
|
analyser = audioContext.createAnalyser();
|
|
//analyser.fftSize = 32;
|
|
analyser.fftSize = 2048;
|
|
analyser.smoothingTimeConstant = 0.8;
|
|
source.connect(analyser);
|
|
console.log("Audio context initialized. analyser sampling: " + audioContext.sampleRate + "Hz, recorder sampling: " + audioRecorder.sampleRate + "Hz");
|
|
}
|
|
|
|
function connect() {
|
|
connectionStatus.innerHTML = "Connecting to WS...";
|
|
let wsurl = "ws://localhost:8081";
|
|
//get crrent ws url from the server
|
|
fetch("/wsurl")
|
|
.then((response) => response.text())
|
|
.then((data) => {
|
|
wsurl = data;
|
|
console.log("Got ws url: '" + wsurl + "'");
|
|
|
|
})
|
|
.then(() => {
|
|
console.log("connecting to '" + wsurl + "'...");
|
|
socket = new WebSocket(wsurl);
|
|
socket.onopen = () => {
|
|
console.log("WebSocket connection opened.");
|
|
connectionStatus.innerHTML = "Connected to " + wsurl;
|
|
transcription.innerHTML = "Whisper something into the microphone...";
|
|
recordButton.disabled = false;
|
|
connected = true;
|
|
};
|
|
socket.onmessage = onmessage;
|
|
socket.onclose = () => {
|
|
console.log("WebSocket connection closed");
|
|
connectionStatus.innerHTML = "Disconnected";
|
|
recordButton.disabled = true;
|
|
connected = false;
|
|
setTimeout(() => {
|
|
connect();
|
|
}, 5000);
|
|
};
|
|
})
|
|
.catch((error) => {
|
|
console.log("Error getting ws url: " + error);
|
|
connectionStatus.innerHTML = "Error getting ws url: " + error;
|
|
});
|
|
};
|
|
|
|
|
|
function onmessage(event) {
|
|
let latency = Date.now() - serverTime;
|
|
console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
|
|
if (autosend.checked) {
|
|
//append to the text on new line
|
|
transcription.innerHTML += "<br>>" + event.data;
|
|
statusRecording.innerHTML = "waiting...";
|
|
statusRecording.style.color = "black";
|
|
} else {
|
|
//replace the text
|
|
transcription.innerHTML = event.data;
|
|
}
|
|
}
|
|
|
|
function startListening() {
|
|
// Initialize canvas
|
|
canvasCtx.fillStyle = "green";
|
|
recording = true;
|
|
navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, echoCancellation: true } }).then((stream) => {
|
|
audioRecorder = new MediaRecorder(stream);
|
|
audioRecorder.start();
|
|
audioRecorder.addEventListener("dataavailable", (event) => {
|
|
console.log("Audio data available: " + event.data.size + " bytes");
|
|
if (!soundDetected && autosend.checked) {
|
|
console.log("discarding audio data because not speaking");
|
|
return;
|
|
}
|
|
if (event.data.size > 0) {
|
|
sendAudioToServer(event.data);
|
|
soundDetected = false;
|
|
}
|
|
});
|
|
|
|
InitAudioAnalyser(stream);
|
|
});
|
|
|
|
recordButton.innerHTML = "Stop Recording";
|
|
recordButton.classList.add("recording");
|
|
}
|
|
|
|
function stopListening() {
|
|
recording = false;
|
|
audioRecorder.stop();
|
|
recordButton.innerHTML = "Start Recording";
|
|
recordButton.classList.remove("recording");
|
|
clearInterval(volumeChecker);
|
|
}
|
|
|
|
function sendAudioToServer(data) {
|
|
if (connected) {
|
|
//const blob = new Blob(data, { type: 'audio/webm' });
|
|
socket.send(data);
|
|
serverTime = Date.now();
|
|
console.log("Sent some audio data to server.");
|
|
if (!autosend.checked) {
|
|
transcription.innerHTML = "Processing audio...";
|
|
}
|
|
} else {
|
|
console.log("Not connected, not sending audio data to server.");
|
|
}
|
|
}
|
|
function toggleListening() {
|
|
if (socket.readyState === WebSocket.OPEN) {
|
|
if (recording) {
|
|
stopListening();
|
|
} else {
|
|
startListening();
|
|
}
|
|
}
|
|
}
|
|
|
|
function enumerateDevices() {
|
|
// Enumerate the available audio input devices
|
|
navigator.mediaDevices.enumerateDevices()
|
|
.then(function (devices) {
|
|
var audioInputDevices = devices.filter(function (device) {
|
|
return device.kind === 'audioinput';
|
|
});
|
|
console.log(audioInputDevices.length + ' audio input devices found');
|
|
|
|
// If more than one audio input device is available, populate the select list
|
|
if (audioInputDevices.length > 1) {
|
|
audioInputDevices.forEach(function (device) {
|
|
var option = document.createElement('option');
|
|
option.value = device.deviceId;
|
|
option.text = device.label || 'Device ' + device.deviceId;
|
|
inputDevices.appendChild(option);
|
|
});
|
|
// Listen for changes to the select list and connect to the selected audio input device
|
|
inputDevices.addEventListener('change', function (event) {
|
|
var selectedDeviceId = event.target.value;
|
|
var constraints = { audio: { deviceId: selectedDeviceId } };
|
|
navigator.mediaDevices.getUserMedia(constraints)
|
|
.then(function (stream) {
|
|
// Handle the audio stream from the selected device here
|
|
})
|
|
.catch(function (error) {
|
|
console.log('Error accessing audio stream:', error);
|
|
});
|
|
});
|
|
}
|
|
// If only one audio input device is available, connect to it automatically
|
|
else if (audioInputDevices.length === 1) {
|
|
var constraints = { audio: { deviceId: audioInputDevices[0].deviceId } };
|
|
navigator.mediaDevices.getUserMedia(constraints)
|
|
.then(function (stream) {
|
|
// Handle the audio stream from the selected device here
|
|
})
|
|
.catch(function (error) {
|
|
console.log('Error accessing audio stream:', error);
|
|
});
|
|
}
|
|
// If no audio input devices are available, show an error message
|
|
else {
|
|
console.log('No audio input devices available');
|
|
}
|
|
})
|
|
.catch(function (error) {
|
|
console.log('Error listing audio input devices:', error);
|
|
});
|
|
}
|
|
|
|
window.onload = () => {
|
|
recordButton = document.getElementById("record-button");
|
|
recordButton.addEventListener("click", toggleListening);
|
|
connectionStatus = document.getElementById("connection-status");
|
|
//transcription = document.getElementById("transcription");
|
|
//autosend = document.getElementById("autosend");
|
|
statusRecording = document.getElementById("status-recording");
|
|
|
|
enumerateDevices();
|
|
connect(socket);
|
|
};
|
|
</script>
|
|
<script src="https://cdn.webrtc-experiment.com/MediaStreamRecorder.js"></script>
|
|
</body>
|
|
|
|
</html> |