languade detection and translation implemented - not working very well
This commit is contained in:
parent
d28f73cd7e
commit
aac600ebaf
@ -3,11 +3,9 @@
|
|||||||
|
|
||||||
<head>
|
<head>
|
||||||
<title>Real-time Speech-to-Text</title>
|
<title>Real-time Speech-to-Text</title>
|
||||||
<meta name="viewport"
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
content="width=device-width, initial-scale=1">
|
|
||||||
<!-- Add the Tailwind CSS library -->
|
<!-- Add the Tailwind CSS library -->
|
||||||
<link rel="stylesheet"
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
|
||||||
href="https://cdnjs.cloudflare.com/ajax/libs/tailwindcss/2.2.19/tailwind.min.css">
|
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body class="bg-gray-100">
|
<body class="bg-gray-100">
|
||||||
@ -15,56 +13,48 @@
|
|||||||
<h1 class="text-2xl font-bold mb-4 text-center">Rt STT</h1>
|
<h1 class="text-2xl font-bold mb-4 text-center">Rt STT</h1>
|
||||||
<div class="flex justify-center items-center mb-4">
|
<div class="flex justify-center items-center mb-4">
|
||||||
<label class="toggle flex items-center">
|
<label class="toggle flex items-center">
|
||||||
<input type="checkbox"
|
<input type="checkbox" id="autosend" class="mr-2">
|
||||||
id="autosend"
|
|
||||||
class="mr-2">
|
|
||||||
<span class="slider"></span>
|
<span class="slider"></span>
|
||||||
<span class="ml-2">Continuous</span>
|
<span class="ml-2">Continuous</span>
|
||||||
</label>
|
</label>
|
||||||
<select id="input-devices"
|
<select id="input-devices" class="ml-4">
|
||||||
class="ml-4">
|
|
||||||
<option value="default">Default</option>
|
<option value="default">Default</option>
|
||||||
</select>
|
</select>
|
||||||
<select id="language-select">
|
<select id="language-select">
|
||||||
|
<option value="auto">Auto</option>
|
||||||
<option value="en">English</option>
|
<option value="en">English</option>
|
||||||
<option value="bg">Български</option>
|
<option value="bg">Български</option>
|
||||||
|
<option value="fr">Français</option>
|
||||||
</select>
|
</select>
|
||||||
<select id="task-select">
|
<select id="task-select">
|
||||||
<option value="transcribe">Transcribe</option>
|
<option value="transcribe">Transcribe</option>
|
||||||
<option value="translate">Translate</option>
|
<option value="translate">Translate</option>
|
||||||
</select>
|
</select>
|
||||||
<label class="toggle flex items-center ml-4">
|
<label class="toggle flex items-center ml-4">
|
||||||
<input type="checkbox"
|
<input type="checkbox" id="store-recordings" class="mr-2">
|
||||||
id="store-recordings"
|
|
||||||
class="mr-2">
|
|
||||||
<span class="slider"></span>
|
<span class="slider"></span>
|
||||||
<span class="ml-2">Store Recordings</span>
|
<span class="ml-2">Store Recordings</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex justify-center items-center mb-4">
|
<div class="flex justify-center items-center mb-4">
|
||||||
<span id="record-actions">
|
<span id="record-actions">
|
||||||
<button id="record-button"
|
<button id="record-button" disabled
|
||||||
disabled
|
|
||||||
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
|
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
|
||||||
Start Recording</button>
|
Start Recording</button>
|
||||||
<button id="record-button-speakers"
|
<button id="record-button-speakers" disabled
|
||||||
disabled
|
|
||||||
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
|
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded mr-4">
|
||||||
Stream from speakers</button>
|
Stream from speakers</button>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex justify-center items-center mb-4">
|
<div class="flex justify-center items-center mb-4">
|
||||||
<div id="connection-status"
|
<div id="connection-status" style="margin-right: 5px;"></div>
|
||||||
style="margin-right: 5px;"></div>
|
|
||||||
</div>
|
</div>
|
||||||
<div class="flex justify-center items-center mb-4">
|
<div class="flex justify-center items-center mb-4">
|
||||||
<div id="info"></div>
|
<div id="info"></div>
|
||||||
</div>
|
</div>
|
||||||
<div id="status-recording"
|
<div id="status-recording" class="flex justify-center items-center mb-4">
|
||||||
class="flex justify-center items-center mb-4">
|
|
||||||
</div>
|
</div>
|
||||||
<div class="relative rounded-lg border border-gray-300 shadow-sm">
|
<div class="relative rounded-lg border border-gray-300 shadow-sm">
|
||||||
<textarea id="transcription"
|
<textarea id="transcription" class="block w-full h-48 p-4 resize-none"
|
||||||
class="block w-full h-48 p-4 resize-none"
|
|
||||||
placeholder="Whisper something into the microphone..."></textarea>
|
placeholder="Whisper something into the microphone..."></textarea>
|
||||||
<button id="copyButton"
|
<button id="copyButton"
|
||||||
class="absolute top-0 right-0 px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-bl-lg focus:outline-none"
|
class="absolute top-0 right-0 px-4 py-2 text-sm font-medium text-gray-700 bg-gray-200 hover:bg-gray-300 rounded-bl-lg focus:outline-none"
|
||||||
@ -72,8 +62,7 @@ disabled
|
|||||||
Copy
|
Copy
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<canvas id="canvas"
|
<canvas id="canvas" class="w-full"></canvas>
|
||||||
class="w-full"></canvas>
|
|
||||||
<script>
|
<script>
|
||||||
let sessionId;
|
let sessionId;
|
||||||
|
|
||||||
@ -274,6 +263,10 @@ disabled
|
|||||||
if (json.hasOwnProperty("language")) {
|
if (json.hasOwnProperty("language")) {
|
||||||
languageSelect.value = json.language;
|
languageSelect.value = json.language;
|
||||||
}
|
}
|
||||||
|
if (json.hasOwnProperty("languageDetected")) {
|
||||||
|
languageSelect.value = json.language;
|
||||||
|
statusRecording.innerHTML = "Detected language: " + json.languageDetected;
|
||||||
|
}
|
||||||
|
|
||||||
if (json.hasOwnProperty("taskSelect")) {
|
if (json.hasOwnProperty("taskSelect")) {
|
||||||
taskSelect.value = json.taskSelect;
|
taskSelect.value = json.taskSelect;
|
||||||
@ -283,6 +276,16 @@ disabled
|
|||||||
if (json.hasOwnProperty("storeRecordings")) {
|
if (json.hasOwnProperty("storeRecordings")) {
|
||||||
storeRecordings.checked = json.storeRecordings;
|
storeRecordings.checked = json.storeRecordings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (json.hasOwnProperty("text")) {
|
||||||
|
transcription.value += "\r\n" + json.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (json.hasOwnProperty("queueCounter")) {
|
||||||
|
let latency = Date.now() - serverTime;
|
||||||
|
console.log("Received message from server: " + event.data + " (latency: " + latency + "ms)");
|
||||||
|
info.innerHTML = "latency: " + latency + "ms; server queue: " + queue + " requests";
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
//not json
|
//not json
|
||||||
|
@ -9,8 +9,8 @@ const wss = new WebSocket.Server({ port: process.env.SERVER_PORT_WS });
|
|||||||
|
|
||||||
|
|
||||||
// console.log("ENV="+process.env)
|
// console.log("ENV="+process.env)
|
||||||
console.log("TTS_BACKEND_URL="+process.env.TTS_BACKEND_URL)
|
console.log("TTS_BACKEND_URL=" + process.env.TTS_BACKEND_URL)
|
||||||
console.log("WS_URL="+process.env.WS_URL)
|
console.log("WS_URL=" + process.env.WS_URL)
|
||||||
|
|
||||||
let language = "en";
|
let language = "en";
|
||||||
let storeRecordings = false;
|
let storeRecordings = false;
|
||||||
@ -19,7 +19,7 @@ let queueCounter = 0;
|
|||||||
const storage = require('node-persist');
|
const storage = require('node-persist');
|
||||||
storage.init().then(() => {
|
storage.init().then(() => {
|
||||||
storage.getItem('language').then((value) => {
|
storage.getItem('language').then((value) => {
|
||||||
if (value != undefined) { language = value; console.log('language: ' + language); }
|
if (value != undefined) { language = value; console.log('stored language: ' + language); }
|
||||||
else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); }
|
else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); }
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -51,11 +51,14 @@ wss.on('connection', (ws, req) => {
|
|||||||
let language = sessionData?.language || 'en';
|
let language = sessionData?.language || 'en';
|
||||||
let task = sessionData?.task || 'transcribe';
|
let task = sessionData?.task || 'transcribe';
|
||||||
//show the size of the audio data as 0.000 MB
|
//show the size of the audio data as 0.000 MB
|
||||||
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language);
|
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language, 'task: ' + task);
|
||||||
var request = require('request');
|
var request = require('request');
|
||||||
|
|
||||||
|
var endpoint = process.env.TTS_BACKEND_URL;
|
||||||
var formData = {
|
var formData = {
|
||||||
task: task,
|
task: task,
|
||||||
language: sessionData.language,
|
language: language,
|
||||||
|
vad_filter: 'true',
|
||||||
output: 'json',
|
output: 'json',
|
||||||
audio_file: {
|
audio_file: {
|
||||||
value: data,
|
value: data,
|
||||||
@ -65,6 +68,31 @@ wss.on('connection', (ws, req) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
console.log('language:', language);
|
||||||
|
if (language == 'auto' || language == '') {
|
||||||
|
console.log('Detecting language...');
|
||||||
|
request.post({ url: endpoint.replace('/asr', '/detect-language'), formData: formData }, function optionalCallback(err, httpResponse, body) {
|
||||||
|
console.log('detected:', body);
|
||||||
|
if (typeof body === 'string') {
|
||||||
|
body = JSON.parse(body);
|
||||||
|
}
|
||||||
|
if (body && body.language_code) {
|
||||||
|
language = body.language_code; if (body && body.language_code) {
|
||||||
|
let language = body.language_code;
|
||||||
|
sessionData.language = language;
|
||||||
|
console.log('language set to:', language);
|
||||||
|
|
||||||
|
webSocket.send(JSON.stringify({ languageDetected: body.detected_language }));
|
||||||
|
} else {
|
||||||
|
console.error('Error: Invalid body or missing language_code');
|
||||||
|
}
|
||||||
|
sessionData.language = language;
|
||||||
|
console.log('language set to:', language);
|
||||||
|
} else {
|
||||||
|
console.error('Error: Invalid body or missing language_code');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
storeRecordings = sessionData?.storeRecordings || storeRecordings;
|
storeRecordings = sessionData?.storeRecordings || storeRecordings;
|
||||||
if (storeRecordings) {
|
if (storeRecordings) {
|
||||||
@ -94,7 +122,8 @@ wss.on('connection', (ws, req) => {
|
|||||||
var duration = new Date().getTime() - start;
|
var duration = new Date().getTime() - start;
|
||||||
//console.log('decoded (' + duration + 'ms):', body);
|
//console.log('decoded (' + duration + 'ms):', body);
|
||||||
console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body);
|
console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body);
|
||||||
webSocket.send("(" + queueCounter + ") " + body);
|
//webSocket.send("(" + queueCounter + ") " + body);
|
||||||
|
webSocket.send(JSON.stringify({ queueCounter: queueCounter, duration: duration, language: language, text: body}));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@ -158,7 +187,7 @@ app.post('/settings', (req, res) => {
|
|||||||
sessionData.storeRecordings = body.storeRecordings;
|
sessionData.storeRecordings = body.storeRecordings;
|
||||||
console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`);
|
console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`);
|
||||||
}
|
}
|
||||||
if(body.task != undefined){
|
if (body.task != undefined) {
|
||||||
sessionData.task = body.task;
|
sessionData.task = body.task;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user