From afcb22a3a943faf2c13659a0c9d71ff34f9f3abd Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Mon, 10 Jun 2024 00:39:15 +0300 Subject: [PATCH 01/14] ignore node-persist --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 25b2b34..b6c8ffa 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ agent-mobile/jdk/* agent-mobile/artimobile/supervisord.pid agent-pyter/lag-llama agent-pyter/google-chrome-stable_current_amd64.deb +web/.node-persist/* From aac600ebaff385f75728304717968442409c4825 Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Mon, 10 Jun 2024 01:40:52 +0300 Subject: [PATCH 02/14] languade detection and translation implemented - not working very well --- web/client.html | 55 ++++++++++++++++++++++++++----------------------- web/server.js | 43 +++++++++++++++++++++++++++++++------- 2 files changed, 65 insertions(+), 33 deletions(-) diff --git a/web/client.html b/web/client.html index 1b3801d..466c6bd 100644 --- a/web/client.html +++ b/web/client.html @@ -3,11 +3,9 @@ Real-time Speech-to-Text - + - + @@ -15,56 +13,48 @@

Rt STT

-
- -
-
+
-
+
-
- + + + + + diff --git a/web/chat-server.js b/web/chat-server.js new file mode 100644 index 0000000..fb4b04c --- /dev/null +++ b/web/chat-server.js @@ -0,0 +1,192 @@ +// server.js +if (require('dotenv')) { + const envFile = process.env.NODE_ENV === 'development' ? '.env.development' : '.env'; + require('dotenv').config({ path: envFile }); +} +const express = require('express'); +const bodyParser = require('body-parser'); +const WebSocket = require('ws'); +const storage = require('node-persist'); +const request = require('request'); +const fs = require('fs'); +const path = require('path'); + +const app = express(); +app.use(bodyParser.json()); + +const PORT_HTTP = process.env.SERVER_PORT_HTTP || 3000; +const PORT_WS = process.env.SERVER_PORT_WS || 8080; +const TTS_API_URL = process.env.TTS_API_URL; + +let language = "en"; +let storeRecordings = false; +let queueCounter = 0; + +const sessions = new Map(); +const users = new Map(); // Store users with their usernames and session IDs + +storage.init().then(() => { + storage.getItem('language').then((value) => { + if (value !== undefined) language = value; + else storage.setItem('language', language); + }); + storage.getItem('storeRecordings').then((value) => { + if (value !== undefined) storeRecordings = value; + else storage.setItem('storeRecordings', storeRecordings); + }); +}); + +// WebSocket Server +const wss = new WebSocket.Server({ port: PORT_WS }); +wss.on('connection', (ws) => { + ws.sessionId = Math.random().toString(36).substring(2); + sessions.set(ws.sessionId, { language: 'en' }); + + ws.send(JSON.stringify({ sessionId: ws.sessionId, language, storeRecordings })); + + ws.on('message', (message) => { + try { + const data = JSON.parse(message); + + if (data.type === 'join') { + const { username } = data; + users.set(ws.sessionId, { username, sessionId: ws.sessionId }); + broadcastUserList(); + } else if (data.type === 'audio') { + handleAudioData(ws, data.audio); + } + } catch (err) { + console.error('Failed to parse message', err); + } + }); + + ws.on('close', () => { + users.delete(ws.sessionId); + sessions.delete(ws.sessionId); + broadcastUserList(); + }); +}); + +function handleAudioData(ws, data) { + const sessionData = sessions.get(ws.sessionId); + let language = sessionData.language || 'en'; + let task = sessionData.task || 'transcribe'; + + const formData = { + task, + language, + vad_filter: 'true', + output: 'json', + audio_file: { + value: data, + options: { filename: 'audio.ogg', contentType: 'audio/ogg' } + } + }; + + if (language === 'auto' || language === '') { + detectLanguage(ws, formData); + } else { + transcribeAudio(ws, formData, sessionData); + } +} + +function detectLanguage(ws, formData) { + request.post({ url: TTS_API_URL.replace('/asr', '/detect-language'), formData }, (err, httpResponse, body) => { + if (err) return console.error('Language detection failed:', err); + const result = JSON.parse(body); + if (result && result.language_code) { + const language = result.language_code; + const sessionData = sessions.get(ws.sessionId); + sessionData.language = language; + ws.send(JSON.stringify({ languageDetected: result.detected_language })); + transcribeAudio(ws, formData, sessionData); + } + }); +} + +function transcribeAudio(ws, formData, sessionData) { + const start = new Date().getTime(); + queueCounter++; + + request.post({ url: TTS_API_URL, formData }, (err, httpResponse, body) => { + queueCounter--; + if (err) return console.error('Transcription failed:', err); + + const duration = new Date().getTime() - start; + ws.send(JSON.stringify({ + queueCounter, + duration, + language: sessionData.language, + text: body + })); + }); + + if (storeRecordings) { + const timestamp = Date.now(); + fs.mkdir('rec', { recursive: true }, (err) => { + if (err) throw err; + }); + fs.writeFile(`rec/audio${timestamp}.ogg`, formData.audio_file.value, (err) => { + if (err) console.log(err); + else console.log('Audio data saved to rec/audio' + timestamp + '.ogg'); + }); + } +} + +function broadcastUserList() { + const userList = Array.from(users.values()).map(user => ({ username: user.username, sessionId: user.sessionId })); + wss.clients.forEach(client => { + if (client.readyState === WebSocket.OPEN) { + client.send(JSON.stringify({ type: 'userList', users: userList })); + } + }); +} + +// HTTP Server +app.get('/', (req, res) => { + res.sendFile(path.join(__dirname, 'chat-client.html')); +}); + +app.post('/log', (req, res) => { + console.log(`[LOG ${new Date().toISOString()}] ${req.body.message}`); + res.status(200).send('OK'); +}); + +app.get('/wsurl', (req, res) => { + res.status(200).send(process.env.WS_URL); +}); + +app.get('/settings', (req, res) => { + if (req.query.language) { + language = req.query.language; + storage.setItem('language', language); + } + if (req.query.storeRecordings) { + storeRecordings = req.query.storeRecordings; + storage.setItem('storeRecordings', storeRecordings); + } + res.status(200).send({ language, storeRecordings }); +}); + +app.post('/settings', (req, res) => { + const { sessionId, language, storeRecordings, task } = req.body; + const sessionData = sessions.get(sessionId); + if (language) sessionData.language = language; + if (storeRecordings) sessionData.storeRecordings = storeRecordings; + if (task) sessionData.task = task; + res.status(200).send('OK'); +}); + +app.post('/upload', (req, res) => { + const timestamp = Date.now(); + fs.mkdir('rec', { recursive: true }, (err) => { + if (err) return res.status(500).send('ERROR'); + const file = fs.createWriteStream(`rec/audio_slice_${timestamp}.ogg`); + req.pipe(file); + file.on('finish', () => res.status(200).send('OK')); + }); +}); + +app.listen(PORT_HTTP, () => { + console.log(`Server listening on port ${PORT_HTTP}`); +}); From 43f3f9a28119a8040f0ca6b2dc92f54aa5a352b2 Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Mon, 10 Jun 2024 14:45:52 +0300 Subject: [PATCH 06/14] new UI --- web/chat-client.html | 117 ++++++++++++++++++++++++++++++++----------- web/chat-server.js | 14 +++++- 2 files changed, 100 insertions(+), 31 deletions(-) diff --git a/web/chat-client.html b/web/chat-client.html index ec5b768..1527051 100644 --- a/web/chat-client.html +++ b/web/chat-client.html @@ -18,36 +18,43 @@
-
+