From 1c15463b210c6a8f0615562b8a7a706c5fc50cbc Mon Sep 17 00:00:00 2001 From: popov Date: Wed, 15 Mar 2023 14:20:35 +0000 Subject: [PATCH] various improvements: using mono audio; options to store recordings; multple backends; new audio file name; sessions; --- .gitignore | 1 + .../storage/8512ae7d57b1396273f76fe6ed341a23 | 1 + .../storage/dfe9cbcde628e8a86855f6d2cd16dd2b | 1 + Dockerfile | 3 +- web/client.html | 801 ++++++++++-------- web/server.js | 138 ++- 6 files changed, 588 insertions(+), 357 deletions(-) create mode 100644 .node-persist/storage/8512ae7d57b1396273f76fe6ed341a23 create mode 100644 .node-persist/storage/dfe9cbcde628e8a86855f6d2cd16dd2b diff --git a/.gitignore b/.gitignore index 6fb3f7e..2fa124a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ node_modules/* package-lock.json +rec/* diff --git a/.node-persist/storage/8512ae7d57b1396273f76fe6ed341a23 b/.node-persist/storage/8512ae7d57b1396273f76fe6ed341a23 new file mode 100644 index 0000000..e42c6de --- /dev/null +++ b/.node-persist/storage/8512ae7d57b1396273f76fe6ed341a23 @@ -0,0 +1 @@ +{"key":"language","value":"bg"} \ No newline at end of file diff --git a/.node-persist/storage/dfe9cbcde628e8a86855f6d2cd16dd2b b/.node-persist/storage/dfe9cbcde628e8a86855f6d2cd16dd2b new file mode 100644 index 0000000..de37c0c --- /dev/null +++ b/.node-persist/storage/dfe9cbcde628e8a86855f6d2cd16dd2b @@ -0,0 +1 @@ +{"key":"storeRecordings","value":"true"} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 7d3f9cc..2f38d5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ COPY package*.json ./ # Install dependencies -RUN npm install ws express request #--only=production +RUN npm install ws express request node-persist body-parser dotenv #--only=production # Copy the rest of the application files COPY . . @@ -45,6 +45,7 @@ COPY . . # Start the application #CMD ["npm", "start"] CMD npm start +# portainer: '-c' 'echo Container started; trap "exit 0" 15; exec npm start' EXPOSE 8080 8081 diff --git a/web/client.html b/web/client.html index 467fa20..2b907d6 100644 --- a/web/client.html +++ b/web/client.html @@ -3,322 +3,459 @@ Real-time Speech-to-Text - + + + - -

Rt STT

- - + +
+

Rt STT

+
+ + + +
+
+ + + + +
+
+
+
+
+
+
+
+
+
+ + +
+ + - + enumerateDevices(); + connect(socket); + }; + + function copyToClipboard(id) { + var textarea = document.getElementById(id); + textarea.select(); + document.execCommand('copy'); + } + + + + + \ No newline at end of file diff --git a/web/server.js b/web/server.js index 5ab311e..efb66a5 100644 --- a/web/server.js +++ b/web/server.js @@ -13,18 +13,49 @@ console.log(process.env) console.log(process.env.TTS_BACKEND_URL) console.log(process.env.WS_URL) +let language = "en"; +let storeRecordings = false; +let queueCounter = 0; + +const storage = require('node-persist'); +storage.init().then(() => { + storage.getItem('language').then((value) => { + if (value != undefined) { language = value; console.log('language: ' + language); } + else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); } + }); + + storage.getItem('storeRecordings').then((value) => { + if (value != undefined) { storeRecordings = value; console.log('storeRecordings: ' + storeRecordings); } + else { storage.setItem('storeRecordings', storeRecordings).then(() => { console.log('storeRecordings set to ' + storeRecordings + "(default)"); }); } + }); +}); + + + //we use https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice to transcribe the audio //docker run -p 9009:9009 -d onerahmet/openai-whisper-asr-webservice -wss.on('connection', (ws) => { - console.log('Client ' + ws._socket.remoteAddress + ' connected'); +const sessions = new Map(); // Store session data + +wss.on('connection', (ws, req) => { + ws.sessionId = Math.random().toString(36).slice(2); + sessions.set(ws.sessionId, { language: 'en' }); + console.log('Client ' + ws._socket.remoteAddress + ' connected with session id ' + ws.sessionId); + //send cookie to client + ws.send(JSON.stringify({ sessionId: ws.sessionId, language: language, storeRecordings: storeRecordings })); ws.on('message', (data) => { + let webSocket = ws; + const sessionData = sessions.get(webSocket.sessionId); + if (!sessionData) { + console.log('No session data found for session id ' + webSocket.sessionId); + } + let language = sessionData?.language || 'en'; //show the size of the audio data as 0.000 MB - console.log('Received data from client: ' + (data.length / 1024 / 1024).toFixed(3) + ' MB'); + console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language); var request = require('request'); var formData = { task: 'transcribe', - language: 'en-US', //bg-BG|en-US + language: sessionData.language, output: 'json', audio_file: { value: data, @@ -34,31 +65,36 @@ wss.on('connection', (ws) => { } } }; + + storeRecordings = sessionData?.storeRecordings || storeRecordings; + if (storeRecordings) { + //"yyyymmdd-hhMMss" + var timestampfilename = Date.now("yyyymmdd-hhMMss"); + var fs = require('fs'); + fs.mkdir('rec', { recursive: true }, (err) => { + if (err) throw err; + }); + fs.writeFile('rec/audio' + timestampfilename + '.ogg', data, function (err) { + if (err) { + return console.log(err); + } + console.log('Audio data saved to audio.ogg'); + }); + } - //"yyyymmdd-hhMMss" - var timestampfilename = Date.now("yyyymmdd-hhMMss"); - - //save the audio data to a file to /rec subfolder - var fs = require('fs'); - fs.mkdir('rec', { recursive: true }, (err) => { - if (err) throw err; - }); - - fs.writeFile('rec/audio' + timestampfilename + '.ogg', data, function (err) { - if (err) { - return console.log(err); - } - console.log('Audio data saved to audio.ogg'); - }); - - - + //record start time + var start = new Date().getTime(); + queueCounter++; request.post({ url: process.env.TTS_BACKEND_URL, formData: formData }, function optionalCallback(err, httpResponse, body) { + queueCounter--; if (err) { return console.error('upload failed:', err); } - console.log('Whisper decoded:', body); - ws.send(body); + //duration of the transcribe in 0.00s + var duration = new Date().getTime() - start; + //console.log('decoded (' + duration + 'ms):', body); + console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body); + webSocket.send("(" + queueCounter + ") " + body); }); }); }); @@ -72,7 +108,11 @@ function transcribeAudio(audioData) { // --- web server that servers client.html const express = require('express'); +const bodyParser = require('body-parser'); const app = express(); +app.use(bodyParser.json()); +// app.use(bodyParser.urlencoded({ extended: false })); // Parse request body as URL-encoded + const path = require('path'); app.get('/', (req, res) => { @@ -84,6 +124,56 @@ app.get('/', (req, res) => { app.get('/wsurl', (req, res) => { res.send(process.env.WS_URL, 200, { 'Content-Type': 'text/plain' }); }); +//GET used to store default settings for all clients +app.get('/settings', (req, res) => { + if (req.query.language != undefined) { + language = req.query.language; + storage.setItem('language', language).then(() => { console.log('language set to ' + language); }); + } + if (req.query.storeRecordings != undefined) { + storeRecordings = req.query.storeRecordings; + storage.setItem('storeRecordings', storeRecordings).then(() => { console.log('storeRecordings set to ' + storeRecordings); }); + } + //send back the current settings as json + res.send(JSON.stringify({ language: language, storeRecordings: storeRecordings }), 200, { 'Content-Type': 'text/plain' }); +}); + +//POST used to store settings for a specific client +app.post('/settings', (req, res) => { + //get the language from the json body ( { language: language, sessionId: sessionId }) + const body = req.body; + const sid = body.sessionId; + const sessionData = sessions.get(sid); + if (body.language != undefined) { + sessionData.language = body.language; + console.log(`Session ${sid}: language set to ${sessionData.language}`); + } + if(body.storeRecordings != undefined) { + sessionData.storeRecordings = body.storeRecordings; + console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`); + } + res.send('OK', 200, { 'Content-Type': 'text/plain' }); +}); + + +//save the audio file +app.post('/upload', (req, res) => { + try { + //save the audio file + var timestampfilename = Date.now("yyyymmdd-hhMMss"); + var fs = require('fs'); + fs.mkdir('rec', { recursive: true }, (err) => { + if (err) throw err; + }); + var file = fs.createWriteStream('rec/audio_slice_' + timestampfilename + '.ogg'); + req.pipe(file); + res.send('OK', 200, { 'Content-Type': 'text/plain' }); + } catch (err) { + console.log(err); + res.send('ERROR', 500, { 'Content-Type': 'text/plain' }); + } +}); + app.listen(8080, () => { console.log('Server listening on port 8080');