languade detection and translation implemented - not working very well

This commit is contained in:
Dobromir Popov 2024-06-10 01:40:52 +03:00
parent d28f73cd7e
commit aac600ebaf
2 changed files with 65 additions and 33 deletions

View File

@ -3,11 +3,9 @@
<head> <head>
<title>Real-time Speech-to-Text</title> <title>Real-time Speech-to-Text</title>
<meta name="viewport" <meta name="viewport" content="width=device-width, initial-scale=1">
content="width=device-width, initial-scale=1">
<!-- Add the Tailwind CSS library --> <!-- Add the Tailwind CSS library -->
<link rel="stylesheet" <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
</head> </head>
<body class="bg-gray-100"> <body class="bg-gray-100">
@ -15,56 +13,48 @@
<h1 class="text-2xl font-bold mb-4 text-center">Rt STT</h1> <h1 class="text-2xl font-bold mb-4 text-center">Rt STT</h1>
<div class="flex justify-center items-center mb-4"> <div class="flex justify-center items-center mb-4">
<label class="toggle flex items-center"> <label class="toggle flex items-center">
<input type="checkbox" <input type="checkbox" id="autosend" class="mr-2">
id="autosend"
class="mr-2">
<span class="slider"></span> <span class="slider"></span>
<span class="ml-2">Continuous</span> <span class="ml-2">Continuous</span>
</label> </label>
<select id="input-devices" <select id="input-devices" class="ml-4">
class="ml-4">
<option value="default">Default</option> <option value="default">Default</option>
</select> </select>
<select id="language-select"> <select id="language-select">
<option value="auto">Auto</option>
<option value="en">English</option> <option value="en">English</option>
<option value="bg">Български</option> <option value="bg">Български</option>
<option value="fr">Français</option>
</select> </select>
<select id="task-select"> <select id="task-select">
<option value="transcribe">Transcribe</option> <option value="transcribe">Transcribe</option>
<option value="translate">Translate</option> <option value="translate">Translate</option>
</select> </select>
<label class="toggle flex items-center ml-4"> <label class="toggle flex items-center ml-4">
<input type="checkbox" <input type="checkbox" id="store-recordings" class="mr-2">
id="store-recordings"
class="mr-2">
<span class="slider"></span> <span class="slider"></span>
<span class="ml-2">Store Recordings</span> <span class="ml-2">Store Recordings</span>
</div> </div>
<div class="flex justify-center items-center mb-4"> <div class="flex justify-center items-center mb-4">
<span id="record-actions"> <span id="record-actions">
<button id="record-button" <button id="record-button" disabled
disabled
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4"> class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
Start Recording</button> Start Recording</button>
<button id="record-button-speakers" <button id="record-button-speakers" disabled
disabled
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4"> class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
Stream from speakers</button> Stream from speakers</button>
</span> </span>
</div> </div>
<div class="flex justify-center items-center mb-4"> <div class="flex justify-center items-center mb-4">
<div id="connection-status" <div id="connection-status" style="margin-right: 5px;"></div>
style="margin-right: 5px;"></div>
</div> </div>
<div class="flex justify-center items-center mb-4"> <div class="flex justify-center items-center mb-4">
<div id="info"></div> <div id="info"></div>
</div> </div>
<div id="status-recording" <div id="status-recording" class="flex justify-center items-center mb-4">
class="flex justify-center items-center mb-4">
</div> </div>
<div class="relative rounded-lg border border-gray-300 shadow-sm"> <div class="relative rounded-lg border border-gray-300 shadow-sm">
<textarea id="transcription" <textarea id="transcription" class="block w-full h-48 p-4 resize-none"
class="block w-full h-48 p-4 resize-none"
placeholder="Whisper something into the microphone..."></textarea> placeholder="Whisper something into the microphone..."></textarea>
<button id="copyButton" <button id="copyButton"
class="absolute top-0 right-0 px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-bl-lg focus:outline-none" class="absolute top-0 right-0 px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-bl-lg focus:outline-none"
@ -72,8 +62,7 @@ disabled
Copy Copy
</button> </button>
</div> </div>
<canvas id="canvas" <canvas id="canvas" class="w-full"></canvas>
class="w-full"></canvas>
<script> <script>
let sessionId; let sessionId;
@ -274,6 +263,10 @@ disabled
if (json.hasOwnProperty("language")) { if (json.hasOwnProperty("language")) {
languageSelect.value = json.language; languageSelect.value = json.language;
} }
if (json.hasOwnProperty("languageDetected")) {
languageSelect.value = json.language;
statusRecording.innerHTML = "Detected language: " + json.languageDetected;
}
if (json.hasOwnProperty("taskSelect")) { if (json.hasOwnProperty("taskSelect")) {
taskSelect.value = json.taskSelect; taskSelect.value = json.taskSelect;
@ -283,6 +276,16 @@ disabled
if (json.hasOwnProperty("storeRecordings")) { if (json.hasOwnProperty("storeRecordings")) {
storeRecordings.checked = json.storeRecordings; storeRecordings.checked = json.storeRecordings;
} }
if (json.hasOwnProperty("text")) {
transcription.value += "\r\n" + json.text;
}
if (json.hasOwnProperty("queueCounter")) {
let latency = Date.now() - serverTime;
console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
info.innerHTML = "latency: " + latency + "ms; server queue: " + queue + " requests";
}
return; return;
} catch (e) { } catch (e) {
//not json //not json

View File

@ -9,8 +9,8 @@ const wss = new WebSocket.Server({ port: process.env.SERVER_PORT_WS });
// console.log("ENV="+process.env) // console.log("ENV="+process.env)
console.log("TTS_BACKEND_URL="+process.env.TTS_BACKEND_URL) console.log("TTS_BACKEND_URL=" + process.env.TTS_BACKEND_URL)
console.log("WS_URL="+process.env.WS_URL) console.log("WS_URL=" + process.env.WS_URL)
let language = "en"; let language = "en";
let storeRecordings = false; let storeRecordings = false;
@ -19,7 +19,7 @@ let queueCounter = 0;
const storage = require('node-persist'); const storage = require('node-persist');
storage.init().then(() => { storage.init().then(() => {
storage.getItem('language').then((value) => { storage.getItem('language').then((value) => {
if (value != undefined) { language = value; console.log('language: ' + language); } if (value != undefined) { language = value; console.log('stored language: ' + language); }
else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); } else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); }
}); });
@ -51,11 +51,14 @@ wss.on('connection', (ws, req) => {
let language = sessionData?.language || 'en'; let language = sessionData?.language || 'en';
let task = sessionData?.task || 'transcribe'; let task = sessionData?.task || 'transcribe';
//show the size of the audio data as 0.000 MB //show the size of the audio data as 0.000 MB
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language); console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language, 'task: ' + task);
var request = require('request'); var request = require('request');
var endpoint = process.env.TTS_BACKEND_URL;
var formData = { var formData = {
task: task, task: task,
language: sessionData.language, language: language,
vad_filter: 'true',
output: 'json', output: 'json',
audio_file: { audio_file: {
value: data, value: data,
@ -65,6 +68,31 @@ wss.on('connection', (ws, req) => {
} }
} }
}; };
console.log('language:', language);
if (language == 'auto' || language == '') {
console.log('Detecting language...');
request.post({ url: endpoint.replace('/asr', '/detect-language'), formData: formData }, function optionalCallback(err, httpResponse, body) {
console.log('detected:', body);
if (typeof body === 'string') {
body = JSON.parse(body);
}
if (body && body.language_code) {
language = body.language_code; if (body && body.language_code) {
let language = body.language_code;
sessionData.language = language;
console.log('language set to:', language);
webSocket.send(JSON.stringify({ languageDetected: body.detected_language }));
} else {
console.error('Error: Invalid body or missing language_code');
}
sessionData.language = language;
console.log('language set to:', language);
} else {
console.error('Error: Invalid body or missing language_code');
}
});
}
storeRecordings = sessionData?.storeRecordings || storeRecordings; storeRecordings = sessionData?.storeRecordings || storeRecordings;
if (storeRecordings) { if (storeRecordings) {
@ -94,7 +122,8 @@ wss.on('connection', (ws, req) => {
var duration = new Date().getTime() - start; var duration = new Date().getTime() - start;
//console.log('decoded (' + duration + 'ms):', body); //console.log('decoded (' + duration + 'ms):', body);
console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body); console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body);
webSocket.send("(" + queueCounter + ") " + body); //webSocket.send("(" + queueCounter + ") " + body);
webSocket.send(JSON.stringify({ queueCounter: queueCounter, duration: duration, language: language, text: body}));
}); });
}); });
}); });
@ -158,7 +187,7 @@ app.post('/settings', (req, res) => {
sessionData.storeRecordings = body.storeRecordings; sessionData.storeRecordings = body.storeRecordings;
console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`); console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`);
} }
if(body.task != undefined){ if (body.task != undefined) {
sessionData.task = body.task; sessionData.task = body.task;
} }