languade detection and translation implemented - not working very well

This commit is contained in:
Dobromir Popov 2024-06-10 01:40:52 +03:00
parent d28f73cd7e
commit aac600ebaf
2 changed files with 65 additions and 33 deletions

View File

@ -3,11 +3,9 @@
<head>
<title>Real-time Speech-to-Text</title>
<meta name="viewport"
content="width=device-width, initial-scale=1">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- Add the Tailwind CSS library -->
<link rel="stylesheet"
href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
</head>
<body class="bg-gray-100">
@ -15,56 +13,48 @@
<h1 class="text-2xl font-bold mb-4 text-center">Rt STT</h1>
<div class="flex justify-center items-center mb-4">
<label class="toggle flex items-center">
<input type="checkbox"
id="autosend"
class="mr-2">
<input type="checkbox" id="autosend" class="mr-2">
<span class="slider"></span>
<span class="ml-2">Continuous</span>
</label>
<select id="input-devices"
class="ml-4">
<select id="input-devices" class="ml-4">
<option value="default">Default</option>
</select>
<select id="language-select">
<option value="auto">Auto</option>
<option value="en">English</option>
<option value="bg">Български</option>
<option value="fr">Français</option>
</select>
<select id="task-select">
<option value="transcribe">Transcribe</option>
<option value="translate">Translate</option>
</select>
<label class="toggle flex items-center ml-4">
<input type="checkbox"
id="store-recordings"
class="mr-2">
<input type="checkbox" id="store-recordings" class="mr-2">
<span class="slider"></span>
<span class="ml-2">Store Recordings</span>
</div>
<div class="flex justify-center items-center mb-4">
<span id="record-actions">
<button id="record-button"
disabled
<button id="record-button" disabled
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
Start Recording</button>
<button id="record-button-speakers"
disabled
<button id="record-button-speakers" disabled
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
Stream from speakers</button>
</span>
</div>
<div class="flex justify-center items-center mb-4">
<div id="connection-status"
style="margin-right: 5px;"></div>
<div id="connection-status" style="margin-right: 5px;"></div>
</div>
<div class="flex justify-center items-center mb-4">
<div id="info"></div>
</div>
<div id="status-recording"
class="flex justify-center items-center mb-4">
<div id="status-recording" class="flex justify-center items-center mb-4">
</div>
<div class="relative rounded-lg border border-gray-300 shadow-sm">
<textarea id="transcription"
class="block w-full h-48 p-4 resize-none"
<textarea id="transcription" class="block w-full h-48 p-4 resize-none"
placeholder="Whisper something into the microphone..."></textarea>
<button id="copyButton"
class="absolute top-0 right-0 px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-bl-lg focus:outline-none"
@ -72,8 +62,7 @@ disabled
Copy
</button>
</div>
<canvas id="canvas"
class="w-full"></canvas>
<canvas id="canvas" class="w-full"></canvas>
<script>
let sessionId;
@ -274,6 +263,10 @@ disabled
if (json.hasOwnProperty("language")) {
languageSelect.value = json.language;
}
if (json.hasOwnProperty("languageDetected")) {
languageSelect.value = json.language;
statusRecording.innerHTML = "Detected language: " + json.languageDetected;
}
if (json.hasOwnProperty("taskSelect")) {
taskSelect.value = json.taskSelect;
@ -283,6 +276,16 @@ disabled
if (json.hasOwnProperty("storeRecordings")) {
storeRecordings.checked = json.storeRecordings;
}
if (json.hasOwnProperty("text")) {
transcription.value += "\r\n" + json.text;
}
if (json.hasOwnProperty("queueCounter")) {
let latency = Date.now() - serverTime;
console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
info.innerHTML = "latency: " + latency + "ms; server queue: " + queue + " requests";
}
return;
} catch (e) {
//not json

View File

@ -9,8 +9,8 @@ const wss = new WebSocket.Server({ port: process.env.SERVER_PORT_WS });
// console.log("ENV="+process.env)
console.log("TTS_BACKEND_URL="+process.env.TTS_BACKEND_URL)
console.log("WS_URL="+process.env.WS_URL)
console.log("TTS_BACKEND_URL=" + process.env.TTS_BACKEND_URL)
console.log("WS_URL=" + process.env.WS_URL)
let language = "en";
let storeRecordings = false;
@ -19,7 +19,7 @@ let queueCounter = 0;
const storage = require('node-persist');
storage.init().then(() => {
storage.getItem('language').then((value) => {
if (value != undefined) { language = value; console.log('language: ' + language); }
if (value != undefined) { language = value; console.log('stored language: ' + language); }
else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); }
});
@ -51,11 +51,14 @@ wss.on('connection', (ws, req) => {
let language = sessionData?.language || 'en';
let task = sessionData?.task || 'transcribe';
//show the size of the audio data as 0.000 MB
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language);
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language, 'task: ' + task);
var request = require('request');
var endpoint = process.env.TTS_BACKEND_URL;
var formData = {
task: task,
language: sessionData.language,
language: language,
vad_filter: 'true',
output: 'json',
audio_file: {
value: data,
@ -65,6 +68,31 @@ wss.on('connection', (ws, req) => {
}
}
};
console.log('language:', language);
if (language == 'auto' || language == '') {
console.log('Detecting language...');
request.post({ url: endpoint.replace('/asr', '/detect-language'), formData: formData }, function optionalCallback(err, httpResponse, body) {
console.log('detected:', body);
if (typeof body === 'string') {
body = JSON.parse(body);
}
if (body && body.language_code) {
language = body.language_code; if (body && body.language_code) {
let language = body.language_code;
sessionData.language = language;
console.log('language set to:', language);
webSocket.send(JSON.stringify({ languageDetected: body.detected_language }));
} else {
console.error('Error: Invalid body or missing language_code');
}
sessionData.language = language;
console.log('language set to:', language);
} else {
console.error('Error: Invalid body or missing language_code');
}
});
}
storeRecordings = sessionData?.storeRecordings || storeRecordings;
if (storeRecordings) {
@ -94,7 +122,8 @@ wss.on('connection', (ws, req) => {
var duration = new Date().getTime() - start;
//console.log('decoded (' + duration + 'ms):', body);
console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body);
webSocket.send("(" + queueCounter + ") " + body);
//webSocket.send("(" + queueCounter + ") " + body);
webSocket.send(JSON.stringify({ queueCounter: queueCounter, duration: duration, language: language, text: body}));
});
});
});
@ -158,7 +187,7 @@ app.post('/settings', (req, res) => {
sessionData.storeRecordings = body.storeRecordings;
console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`);
}
if(body.task != undefined){
if (body.task != undefined) {
sessionData.task = body.task;
}