languade detection and translation implemented - not working very well
This commit is contained in:
parent
d28f73cd7e
commit
aac600ebaf
@ -3,11 +3,9 @@
|
||||
|
||||
<head>
|
||||
<title>Real-time Speech-to-Text</title>
|
||||
<meta name="viewport"
|
||||
content="width=device-width, initial-scale=1">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<!-- Add the Tailwind CSS library -->
|
||||
<link rel="stylesheet"
|
||||
href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
|
||||
</head>
|
||||
|
||||
<body class="bg-gray-100">
|
||||
@ -15,56 +13,48 @@
|
||||
<h1 class="text-2xl font-bold mb-4 text-center">Rt STT</h1>
|
||||
<div class="flex justify-center items-center mb-4">
|
||||
<label class="toggle flex items-center">
|
||||
<input type="checkbox"
|
||||
id="autosend"
|
||||
class="mr-2">
|
||||
<input type="checkbox" id="autosend" class="mr-2">
|
||||
<span class="slider"></span>
|
||||
<span class="ml-2">Continuous</span>
|
||||
</label>
|
||||
<select id="input-devices"
|
||||
class="ml-4">
|
||||
<select id="input-devices" class="ml-4">
|
||||
<option value="default">Default</option>
|
||||
</select>
|
||||
<select id="language-select">
|
||||
<option value="auto">Auto</option>
|
||||
<option value="en">English</option>
|
||||
<option value="bg">Български</option>
|
||||
<option value="fr">Français</option>
|
||||
</select>
|
||||
<select id="task-select">
|
||||
<option value="transcribe">Transcribe</option>
|
||||
<option value="translate">Translate</option>
|
||||
</select>
|
||||
<label class="toggle flex items-center ml-4">
|
||||
<input type="checkbox"
|
||||
id="store-recordings"
|
||||
class="mr-2">
|
||||
<input type="checkbox" id="store-recordings" class="mr-2">
|
||||
<span class="slider"></span>
|
||||
<span class="ml-2">Store Recordings</span>
|
||||
</div>
|
||||
<div class="flex justify-center items-center mb-4">
|
||||
<span id="record-actions">
|
||||
<button id="record-button"
|
||||
disabled
|
||||
<button id="record-button" disabled
|
||||
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
|
||||
Start Recording</button>
|
||||
<button id="record-button-speakers"
|
||||
disabled
|
||||
<button id="record-button-speakers" disabled
|
||||
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
|
||||
Stream from speakers</button>
|
||||
</span>
|
||||
</div>
|
||||
<div class="flex justify-center items-center mb-4">
|
||||
<div id="connection-status"
|
||||
style="margin-right: 5px;"></div>
|
||||
<div id="connection-status" style="margin-right: 5px;"></div>
|
||||
</div>
|
||||
<div class="flex justify-center items-center mb-4">
|
||||
<div id="info"></div>
|
||||
</div>
|
||||
<div id="status-recording"
|
||||
class="flex justify-center items-center mb-4">
|
||||
<div id="status-recording" class="flex justify-center items-center mb-4">
|
||||
</div>
|
||||
<div class="relative rounded-lg border border-gray-300 shadow-sm">
|
||||
<textarea id="transcription"
|
||||
class="block w-full h-48 p-4 resize-none"
|
||||
<textarea id="transcription" class="block w-full h-48 p-4 resize-none"
|
||||
placeholder="Whisper something into the microphone..."></textarea>
|
||||
<button id="copyButton"
|
||||
class="absolute top-0 right-0 px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-bl-lg focus:outline-none"
|
||||
@ -72,8 +62,7 @@ disabled
|
||||
Copy
|
||||
</button>
|
||||
</div>
|
||||
<canvas id="canvas"
|
||||
class="w-full"></canvas>
|
||||
<canvas id="canvas" class="w-full"></canvas>
|
||||
<script>
|
||||
let sessionId;
|
||||
|
||||
@ -274,6 +263,10 @@ disabled
|
||||
if (json.hasOwnProperty("language")) {
|
||||
languageSelect.value = json.language;
|
||||
}
|
||||
if (json.hasOwnProperty("languageDetected")) {
|
||||
languageSelect.value = json.language;
|
||||
statusRecording.innerHTML = "Detected language: " + json.languageDetected;
|
||||
}
|
||||
|
||||
if (json.hasOwnProperty("taskSelect")) {
|
||||
taskSelect.value = json.taskSelect;
|
||||
@ -283,6 +276,16 @@ disabled
|
||||
if (json.hasOwnProperty("storeRecordings")) {
|
||||
storeRecordings.checked = json.storeRecordings;
|
||||
}
|
||||
|
||||
if (json.hasOwnProperty("text")) {
|
||||
transcription.value += "\r\n" + json.text;
|
||||
}
|
||||
|
||||
if (json.hasOwnProperty("queueCounter")) {
|
||||
let latency = Date.now() - serverTime;
|
||||
console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
|
||||
info.innerHTML = "latency: " + latency + "ms; server queue: " + queue + " requests";
|
||||
}
|
||||
return;
|
||||
} catch (e) {
|
||||
//not json
|
||||
|
@ -9,8 +9,8 @@ const wss = new WebSocket.Server({ port: process.env.SERVER_PORT_WS });
|
||||
|
||||
|
||||
// console.log("ENV="+process.env)
|
||||
console.log("TTS_BACKEND_URL="+process.env.TTS_BACKEND_URL)
|
||||
console.log("WS_URL="+process.env.WS_URL)
|
||||
console.log("TTS_BACKEND_URL=" + process.env.TTS_BACKEND_URL)
|
||||
console.log("WS_URL=" + process.env.WS_URL)
|
||||
|
||||
let language = "en";
|
||||
let storeRecordings = false;
|
||||
@ -19,7 +19,7 @@ let queueCounter = 0;
|
||||
const storage = require('node-persist');
|
||||
storage.init().then(() => {
|
||||
storage.getItem('language').then((value) => {
|
||||
if (value != undefined) { language = value; console.log('language: ' + language); }
|
||||
if (value != undefined) { language = value; console.log('stored language: ' + language); }
|
||||
else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); }
|
||||
});
|
||||
|
||||
@ -51,11 +51,14 @@ wss.on('connection', (ws, req) => {
|
||||
let language = sessionData?.language || 'en';
|
||||
let task = sessionData?.task || 'transcribe';
|
||||
//show the size of the audio data as 0.000 MB
|
||||
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language);
|
||||
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language, 'task: ' + task);
|
||||
var request = require('request');
|
||||
|
||||
var endpoint = process.env.TTS_BACKEND_URL;
|
||||
var formData = {
|
||||
task: task,
|
||||
language: sessionData.language,
|
||||
language: language,
|
||||
vad_filter: 'true',
|
||||
output: 'json',
|
||||
audio_file: {
|
||||
value: data,
|
||||
@ -65,6 +68,31 @@ wss.on('connection', (ws, req) => {
|
||||
}
|
||||
}
|
||||
};
|
||||
console.log('language:', language);
|
||||
if (language == 'auto' || language == '') {
|
||||
console.log('Detecting language...');
|
||||
request.post({ url: endpoint.replace('/asr', '/detect-language'), formData: formData }, function optionalCallback(err, httpResponse, body) {
|
||||
console.log('detected:', body);
|
||||
if (typeof body === 'string') {
|
||||
body = JSON.parse(body);
|
||||
}
|
||||
if (body && body.language_code) {
|
||||
language = body.language_code; if (body && body.language_code) {
|
||||
let language = body.language_code;
|
||||
sessionData.language = language;
|
||||
console.log('language set to:', language);
|
||||
|
||||
webSocket.send(JSON.stringify({ languageDetected: body.detected_language }));
|
||||
} else {
|
||||
console.error('Error: Invalid body or missing language_code');
|
||||
}
|
||||
sessionData.language = language;
|
||||
console.log('language set to:', language);
|
||||
} else {
|
||||
console.error('Error: Invalid body or missing language_code');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
storeRecordings = sessionData?.storeRecordings || storeRecordings;
|
||||
if (storeRecordings) {
|
||||
@ -94,7 +122,8 @@ wss.on('connection', (ws, req) => {
|
||||
var duration = new Date().getTime() - start;
|
||||
//console.log('decoded (' + duration + 'ms):', body);
|
||||
console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body);
|
||||
webSocket.send("(" + queueCounter + ") " + body);
|
||||
//webSocket.send("(" + queueCounter + ") " + body);
|
||||
webSocket.send(JSON.stringify({ queueCounter: queueCounter, duration: duration, language: language, text: body}));
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -158,7 +187,7 @@ app.post('/settings', (req, res) => {
|
||||
sessionData.storeRecordings = body.storeRecordings;
|
||||
console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`);
|
||||
}
|
||||
if(body.task != undefined){
|
||||
if (body.task != undefined) {
|
||||
sessionData.task = body.task;
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user