WS & TTS urls configurable

This commit is contained in:
Dobromir Popov 2023-03-06 23:56:43 +02:00
parent 473d640809
commit 2f56d76042
5 changed files with 394 additions and 61 deletions

View File

@ -16,7 +16,11 @@
//"forwardPorts": [ "28080:8080", "28081:8081"], //"forwardPorts": [ "28080:8080", "28081:8081"],
// tell vscode to forward the port 8080 to the container to random local port // tell vscode to forward the port 8080 to the container to random local port
"appPort": ["28080:8080", "28081:8081"], "appPort": ["28080:8080", "28081:8081"],
"remoteEnv": {
"TTS_BACKEND_URL": "http://192.168.0.10:9008/asr",
//"WS_URL":"ws://192.168.0.10:28081"
"WS_URL":"wss://ws.ai.d-popov.com"
}
// Use 'postCreateCommand' to run commands after the container is created. // Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "npm install ws express request", // "postCreateCommand": "npm install ws express request",

View File

@ -10,8 +10,8 @@ RUN echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME && chmod 0440 /etc/sudoers.d/$USERNAME
# Set `DEVCONTAINER` environment variable to help with orientation # Set `DEVCONTAINER` environment variable to help with orientation
ENV DEVCONTAINER=true #ENV DEVCONTAINER=true
#! env declarations not copied to devcontainer
########## Modified Dockerfile ########## ########## Modified Dockerfile ##########

115
package-lock.json generated
View File

@ -8,9 +8,11 @@
"name": "kevin-ai", "name": "kevin-ai",
"version": "1.0.0", "version": "1.0.0",
"dependencies": { "dependencies": {
"dotenv": "^16.0.3",
"express": "^4.18.2", "express": "^4.18.2",
"request": "^2.88.2", "request": "^2.88.2",
"ws": "^8.12.1" "ws": "^8.12.1",
"wscat": "^5.2.0"
} }
}, },
"node_modules/accepts": { "node_modules/accepts": {
@ -25,6 +27,38 @@
"node": ">= 0.6" "node": ">= 0.6"
} }
}, },
"node_modules/agent-base": {
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz",
"integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==",
"dependencies": {
"debug": "4"
},
"engines": {
"node": ">= 6.0.0"
}
},
"node_modules/agent-base/node_modules/debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
"integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
"dependencies": {
"ms": "2.1.2"
},
"engines": {
"node": ">=6.0"
},
"peerDependenciesMeta": {
"supports-color": {
"optional": true
}
}
},
"node_modules/agent-base/node_modules/ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node_modules/ajv": { "node_modules/ajv": {
"version": "6.12.6", "version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@ -146,6 +180,14 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/commander": {
"version": "9.5.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
"integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==",
"engines": {
"node": "^12.20.0 || >=14"
}
},
"node_modules/content-disposition": { "node_modules/content-disposition": {
"version": "0.5.4", "version": "0.5.4",
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
@ -227,6 +269,14 @@
"npm": "1.2.8000 || >= 1.4.16" "npm": "1.2.8000 || >= 1.4.16"
} }
}, },
"node_modules/dotenv": {
"version": "16.0.3",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
"integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==",
"engines": {
"node": ">=12"
}
},
"node_modules/ecc-jsbn": { "node_modules/ecc-jsbn": {
"version": "0.1.2", "version": "0.1.2",
"resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
@ -478,6 +528,39 @@
"npm": ">=1.3.7" "npm": ">=1.3.7"
} }
}, },
"node_modules/https-proxy-agent": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
"integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==",
"dependencies": {
"agent-base": "6",
"debug": "4"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/https-proxy-agent/node_modules/debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
"integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
"dependencies": {
"ms": "2.1.2"
},
"engines": {
"node": ">=6.0"
},
"peerDependenciesMeta": {
"supports-color": {
"optional": true
}
}
},
"node_modules/https-proxy-agent/node_modules/ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node_modules/iconv-lite": { "node_modules/iconv-lite": {
"version": "0.4.24", "version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
@ -602,6 +685,11 @@
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
}, },
"node_modules/mute-stream": {
"version": "0.0.8",
"resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.8.tgz",
"integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA=="
},
"node_modules/negotiator": { "node_modules/negotiator": {
"version": "0.6.3", "version": "0.6.3",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
@ -716,6 +804,17 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/read": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/read/-/read-1.0.7.tgz",
"integrity": "sha512-rSOKNYUmaxy0om1BNjMN4ezNT6VKK+2xF4GBhc81mkH7L60i6dp8qPYrkndNLT3QPphoII3maL9PVC9XmhHwVQ==",
"dependencies": {
"mute-stream": "~0.0.4"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/request": { "node_modules/request": {
"version": "2.88.2", "version": "2.88.2",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz", "resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
@ -992,6 +1091,20 @@
"optional": true "optional": true
} }
} }
},
"node_modules/wscat": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/wscat/-/wscat-5.2.0.tgz",
"integrity": "sha512-UkVzuBdv3jk1Nt0mVCTw0wt/2kGPXry9MZMMUHYE/kEIJdtz1Ez28HD2WQdapC75tM10KZVL8EHG1/WHFK9dtw==",
"dependencies": {
"commander": "^9.3.0",
"https-proxy-agent": "^5.0.0",
"read": "^1.0.7",
"ws": "^8.0.0"
},
"bin": {
"wscat": "bin/wscat"
}
} }
} }
} }

View File

@ -1,7 +1,7 @@
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head>
<meta charset="UTF-8"> <head>
<title>Real-time Speech-to-Text</title> <title>Real-time Speech-to-Text</title>
<style> <style>
.recording { .recording {
@ -9,74 +9,260 @@
color: white; color: white;
} }
</style> </style>
</head> </head>
<body>
<h1>Real-time Speech-to-Text</h1> <body>
<p id="connection-status">WebSocket disconnected</p> <h1>Rt STT</h1>
<button id="record-button" disabled > Start Recording</button> <label class="toggle">
<input type="checkbox"
id="autosend" />
<span class="slider">Continious</span>
</label>
<button id="record-button"
disabled>Start Recording</button>
<span id="connection-status"></span>
<div id="status-recording"></div>
<p id="transcription"></p> <p id="transcription"></p>
<canvas id="canvas"
width="500"
height="500"></canvas>
<script> <script>
let socket = new WebSocket('ws://localhost:8081'); let socket;
let audioRecorder; let audioRecorder;
let recording = false; let recording = false;
let recordButton; let recordButton;
let connectionStatus; let connected = false;
let connectionStatus; //HTML auto generated
let statusRecording; //HTML auto generated
let audioContext;
let serverTime;
function connect(socket) { let volumeChecker;
console.log("trying to connect to server"); let lastVolumes = new Array(5);
let averageVolume;
let silenceCount = 0;
let isSpeaking = false;
let soundDetected = false;
let speakingCount = 0;
let SILENCE_DELAY_MS = 100;
const volumeHistory = [];
let canvas = document.getElementById("canvas");
let canvasCtx = canvas.getContext("2d");
let barWidth = 10;
let barSpacing = 5;
// Draw sliding bar graph
function drawSlidingBarGraph(lastVolumes) {
canvasCtx.clearRect(0, 0, canvas.width, canvas.height);
// Draw bars
for (let i = 0; i < lastVolumes.length; i++) {
let value = lastVolumes[i];
let barHeight = (value / 255) * canvas.height;
let x = i * (barWidth + barSpacing);
let y = canvas.height - barHeight;
canvasCtx.fillRect(x, y, barWidth, barHeight);
}
}
// Check the audio level every SILENCE_DELAY_MS milliseconds
volumeChecker = setInterval(() => {
if (!audioContext) {
console.log("No audio context");
return;
}
const frequencyData = new Uint8Array(analyser.frequencyBinCount);
//analyser.getByteTimeDomainData(dataArray);//history
analyser.getByteFrequencyData(frequencyData); //current
let totalVolume = 0;
for (let i = 0; i < frequencyData.length; i++) {
totalVolume += frequencyData[i];
}
averageVolume = totalVolume / frequencyData.length;
volumeHistory.push(averageVolume);
if (volumeHistory.length > 100) {
volumeHistory.shift();
}
const threshold = volumeHistory.reduce((acc, curr) => acc + curr) / volumeHistory.length + 5;
const isSilent = averageVolume < threshold;
// count speaking and silence
if (averageVolume > threshold) {
if (speakingCount == 0 && audioRecorder) {
console.log("startint new recording");
audioRecorder.stop();
audioRecorder.start();
soundDetected = true;
}
speakingCount++;
if (speakingCount > 7) {
statusRecording.innerHTML = "Listening...";
statusRecording.style.color = "green";
isSpeaking = true;
console.log("Was silent and is now speaking. (" + averageVolume + " averageVolume).");
}
} else {
speakingCount = 0;
if (isSpeaking) {
silenceCount++;
if (silenceCount > 5) {
if (autosend.checked) {
console.log("Was speakng and is now silence. (" + averageVolume + " averageVolume). Sending audio to server.");
audioRecorder.stop();
audioRecorder.start();
}
isSpeaking = false;
statusRecording.innerHTML = "Silence detected...";
statusRecording.style.color = "orange";
}
}
}
//console.log(`Average volume: ${averageVolume}, isSilent: ${isSilent}, threshold: ${threshold}`);
//drawSlidingBarGraph(lastVolumes);
}, SILENCE_DELAY_MS);
function InitAudioAnalyser(stream) {
// Initialize the Web Audio API
audioContext = new AudioContext();
const source = audioContext.createMediaStreamSource(stream);
analyser = audioContext.createAnalyser();
//analyser.fftSize = 32;
analyser.fftSize = 2048;
analyser.smoothingTimeConstant = 0.8;
source.connect(analyser);
console.log("Audio context initialized. analyser sampling: " + audioContext.sampleRate + "Hz, recorder sampling: " + audioRecorder.sampleRate + "Hz");
}
function connect() {
connectionStatus.innerHTML = "Connecting to WS...";
let wsurl = "ws://localhost:8081";
//get crrent ws url from the server
fetch("/wsurl")
.then((response) => response.text())
.then((data) => {
wsurl = data;
console.log("Got ws url: '" + wsurl + "'");
})
.then(() => {
console.log("connecting to '" + wsurl + "'...");
socket = new WebSocket(wsurl);
socket.onopen = () => { socket.onopen = () => {
console.log("WebSocket connection opened."); console.log("WebSocket connection opened.");
connectionStatus.innerHTML = "Connected"; connectionStatus.innerHTML = "Connected to " + wsurl;
transcription.innerHTML = "Whisper something into the microphone...";
recordButton.disabled = false; recordButton.disabled = false;
connected = true;
}; };
socket.onmessage = (event) => { socket.onmessage = onmessage;
transcription.innerHTML = event.data;
};
socket.onclose = () => { socket.onclose = () => {
console.log("WebSocket connection closed"); console.log("WebSocket connection closed");
connectionStatus.innerHTML = "Disconnected"; connectionStatus.innerHTML = "Disconnected";
recordButton.disabled = true; recordButton.disabled = true;
setTimeout(connect, 5000); connected = false;
setTimeout(() => {
connect();
}, 5000);
}; };
})
.catch((error) => {
console.log("Error getting ws url: " + error);
connectionStatus.innerHTML = "Error getting ws url: " + error;
});
};
function onmessage(event) {
let latency = Date.now() - serverTime;
console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
if (autosend.checked) {
//append to the text on new line
transcription.innerHTML += "<br>>" + event.data;
statusRecording.innerHTML = "waiting...";
statusRecording.style.color = "black";
} else {
//replace the text
transcription.innerHTML = event.data;
}
} }
function startRecording() { function startListening() {
// Initialize canvas
canvasCtx.fillStyle = "green";
recording = true; recording = true;
navigator.mediaDevices.getUserMedia({ audio: true }) navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, echoCancellation: true } }).then((stream) => {
.then((stream) => {
audioRecorder = new MediaRecorder(stream); audioRecorder = new MediaRecorder(stream);
audioRecorder.start(); audioRecorder.start();
audioRecorder.addEventListener('dataavailable', (event) => { audioRecorder.addEventListener("dataavailable", (event) => {
console.log("Sent some audio data to server."); console.log("Audio data available: " + event.data.size + " bytes");
socket.send(event.data); if (!soundDetected && autosend.checked) {
console.log("discarding audio data because not speaking");
return;
}
if (event.data.size > 0) {
sendAudioToServer(event.data);
soundDetected = false;
}
}); });
InitAudioAnalyser(stream);
}); });
recordButton.classList.add("recording");
recordButton.innerHTML = "Stop Recording"; recordButton.innerHTML = "Stop Recording";
recordButton.classList.add("recording");
} }
function stopRecording() { function stopListening() {
recording = false; recording = false;
audioRecorder.stop(); audioRecorder.stop();
recordButton.classList.remove("recording");
recordButton.innerHTML = "Start Recording"; recordButton.innerHTML = "Start Recording";
recordButton.classList.remove("recording");
clearInterval(volumeChecker);
} }
function toggleRecording() { function sendAudioToServer(data) {
if (recording) { if (connected) {
stopRecording(); //const blob = new Blob(data, { type: 'audio/webm' });
socket.send(data);
serverTime = Date.now();
console.log("Sent some audio data to server.");
if (!autosend.checked) {
transcription.innerHTML = "Processing audio...";
}
} else { } else {
startRecording(); console.log("Not connected, not sending audio data to server.");
}
}
function toggleListening() {
if (socket.readyState === WebSocket.OPEN) {
if (recording) {
stopListening();
} else {
startListening();
}
} }
} }
window.onload = () => { window.onload = () => {
recordButton = document.getElementById("record-button"); recordButton = document.getElementById("record-button");
recordButton.addEventListener('click', toggleRecording); recordButton.addEventListener("click", toggleListening);
connectionStatus = document.getElementById("connection-status"); connectionStatus = document.getElementById("connection-status");
//transcription = document.getElementById("transcription");
//autosend = document.getElementById("autosend");
statusRecording = document.getElementById("status-recording");
connect(socket); connect(socket);
}; };
</script> </script>
<script src="https://cdn.webrtc-experiment.com/MediaStreamRecorder.js"></script> <script src="https://cdn.webrtc-experiment.com/MediaStreamRecorder.js"></script>
</body> </body>
</html> </html>

View File

@ -2,17 +2,27 @@ const WebSocket = require('ws');
const wss = new WebSocket.Server({ port: 8081 }); const wss = new WebSocket.Server({ port: 8081 });
console.log('WebSocket server started on port 8081'); console.log('WebSocket server started on port 8081');
//load TTS_BACHEND_URL from .env file
//require('dotenv').config();
console.log(process.env)
console.log(process.env.TTS_BACKEND_URL)
console.log(process.env.WS_URL)
//we use https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice to transcribe the audio //we use https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice to transcribe the audio
//docker run -p 9009:9009 -d onerahmet/openai-whisper-asr-webservice //docker run -p 9009:9009 -d onerahmet/openai-whisper-asr-webservice
wss.on('connection', (ws) => { wss.on('connection', (ws) => {
console.log('Client ' + ws._socket.remoteAddress + ' connected'); console.log('Client ' + ws._socket.remoteAddress + ' connected');
ws.on('message', (data) => { ws.on('message', (data) => {
console.log('Received data from client: ' + data.length + ' bytes'); //show the size of the audio data as 0.000 MB
console.log('Received data from client: ' + (data.length / 1024 / 1024).toFixed(3) + ' MB');
var request = require('request'); var request = require('request');
var formData = { var formData = {
task: 'transcribe', task: 'transcribe',
language: 'en-US', language: 'en-US', //bg-BG|en-US
output: 'json', output: 'json',
audio_file: { audio_file: {
value: data, value: data,
@ -22,15 +32,28 @@ wss.on('connection', (ws) => {
} }
} }
}; };
request.post({url:'http://192.168.0.10:9009/asr', formData: formData}, function optionalCallback(err, httpResponse, body) { //save the audio data to a file to /rec folder
var fs = require('fs');
var timestampfilename = Date.now();
fs.writeFile('./rec/audio' + timestampfilename + '.ogg', data, function (err) {
//fs.writeFile('audio' + timestampfilename + '.ogg', data, function (err) {
if (err) {
return console.log(err);
}
console.log('Audio data saved to audio.ogg');
});
request.post({url:'http://192.168.0.10:9008/asr', formData: formData}, function optionalCallback(err, httpResponse, body) {
if (err) { if (err) {
return console.error('upload failed:', err); return console.error('upload failed:', err);
} }
console.log('Upload successful! Server responded with:', body); console.log('Whisper decoded:', body);
ws.send(">>: " + body); ws.send(body);
}); });
ws.send("Processing audio...");
}); });
}); });
@ -50,6 +73,13 @@ app.get('/', (req, res) => {
res.sendFile(path.join(__dirname, 'client.html')); res.sendFile(path.join(__dirname, 'client.html'));
}); });
//get WS url from .env file
app.get('/wsurl', (req, res) => {
res.send(process.env.WS_URL, 200, {'Content-Type': 'text/plain'});
});
app.listen(8080, () => { app.listen(8080, () => {
console.log('Server listening on port 8080'); console.log('Server listening on port 8080');
}); });