//load .env file if (require('dotenv')) { require('dotenv').config() } console.log('Starting ws server on port '+ process.env.SERVER_PORT_WS); const WebSocket = require('ws'); const wss = new WebSocket.Server({ port: process.env.SERVER_PORT_WS }); console.log(process.env) console.log(process.env.TTS_BACKEND_URL) console.log(process.env.WS_URL) let language = "en"; let storeRecordings = false; let queueCounter = 0; const storage = require('node-persist'); storage.init().then(() => { storage.getItem('language').then((value) => { if (value != undefined) { language = value; console.log('language: ' + language); } else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); } }); storage.getItem('storeRecordings').then((value) => { if (value != undefined) { storeRecordings = value; console.log('storeRecordings: ' + storeRecordings); } else { storage.setItem('storeRecordings', storeRecordings).then(() => { console.log('storeRecordings set to ' + storeRecordings + "(default)"); }); } }); }); //we use https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice to transcribe the audio //docker run -p 9009:9009 -d onerahmet/openai-whisper-asr-webservice const sessions = new Map(); // Store session data wss.on('connection', (ws, req) => { ws.sessionId = Math.random().toString(36).slice(2); sessions.set(ws.sessionId, { language: 'en' }); console.log('Client ' + ws._socket.remoteAddress + ' connected with session id ' + ws.sessionId); //send cookie to client ws.send(JSON.stringify({ sessionId: ws.sessionId, language: language, storeRecordings: storeRecordings })); ws.on('message', (data) => { let webSocket = ws; const sessionData = sessions.get(webSocket.sessionId); if (!sessionData) { console.log('No session data found for session id ' + webSocket.sessionId); } let language = sessionData?.language || 'en'; //show the size of the audio data as 0.000 MB console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language); var request = require('request'); var formData = { task: 'transcribe', language: sessionData.language, output: 'json', audio_file: { value: data, options: { filename: 'audio.ogg', contentType: 'audio/ogg' } } }; storeRecordings = sessionData?.storeRecordings || storeRecordings; if (storeRecordings) { //"yyyymmdd-hhMMss" var timestampfilename = Date.now("yyyymmdd-hhMMss"); var fs = require('fs'); fs.mkdir('rec', { recursive: true }, (err) => { if (err) throw err; }); fs.writeFile('rec/audio' + timestampfilename + '.ogg', data, function (err) { if (err) { return console.log(err); } console.log('Audio data saved to audio.ogg'); }); } //record start time var start = new Date().getTime(); queueCounter++; request.post({ url: process.env.TTS_BACKEND_URL, formData: formData }, function optionalCallback(err, httpResponse, body) { queueCounter--; if (err) { return console.error('upload failed:', err); } //duration of the transcribe in 0.00s var duration = new Date().getTime() - start; //console.log('decoded (' + duration + 'ms):', body); console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body); webSocket.send("(" + queueCounter + ") " + body); }); }); }); function transcribeAudio(audioData) { // Use a speech-to-text library to transcribe the audio data //return transcription; return "TEST"; } // --- web server that servers client.html const express = require('express'); const bodyParser = require('body-parser'); const app = express(); app.use(bodyParser.json()); // app.use(bodyParser.urlencoded({ extended: false })); // Parse request body as URL-encoded const path = require('path'); app.get('/', (req, res) => { res.sendFile(path.join(__dirname, 'client.html')); }); //accept LOG messages on /log app.post('/log', (req, res) => { console.log("log["+new Date().toISOString() + '] ' + req.body.message); res.send('OK', 200, { 'Content-Type': 'text/plain' }); }); //get WS url from .env file app.get('/wsurl', (req, res) => { res.send(process.env.WS_URL, 200, { 'Content-Type': 'text/plain' }); }); //GET used to store default settings for all clients app.get('/settings', (req, res) => { if (req.query.language != undefined) { language = req.query.language; storage.setItem('language', language).then(() => { console.log('language set to ' + language); }); } if (req.query.storeRecordings != undefined) { storeRecordings = req.query.storeRecordings; storage.setItem('storeRecordings', storeRecordings).then(() => { console.log('storeRecordings set to ' + storeRecordings); }); } //send back the current settings as json res.send(JSON.stringify({ language: language, storeRecordings: storeRecordings }), 200, { 'Content-Type': 'text/plain' }); }); //POST used to store settings for a specific client app.post('/settings', (req, res) => { //get the language from the json body ( { language: language, sessionId: sessionId }) const body = req.body; const sid = body.sessionId; const sessionData = sessions.get(sid); if (body.language != undefined) { sessionData.language = body.language; console.log(`Session ${sid}: language set to ${sessionData.language}`); } if (body.storeRecordings != undefined) { sessionData.storeRecordings = body.storeRecordings; console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`); } res.send('OK', 200, { 'Content-Type': 'text/plain' }); }); //save the audio file app.post('/upload', (req, res) => { try { //save the audio file var timestampfilename = Date.now("yyyymmdd-hhMMss"); var fs = require('fs'); fs.mkdir('rec', { recursive: true }, (err) => { if (err) throw err; }); var file = fs.createWriteStream('rec/audio_slice_' + timestampfilename + '.ogg'); req.pipe(file); res.send('OK', 200, { 'Content-Type': 'text/plain' }); } catch (err) { console.log(err); res.send('ERROR', 500, { 'Content-Type': 'text/plain' }); } }); app.get('/test_ocr', (req, res) => { var fs = require('fs'); var request = require('request'); var filename = 'audio_me2.ogg'; if (req.query.filename != undefined) { filename = req.query.filename; } var formData = { task: 'transcribe', language: 'en', output: 'json', audio_file: { value: fs.createReadStream('rec/dev/' + filename), options: { filename: 'audio.ogg', contentType: 'audio/ogg' } } }; if (req.query.language != undefined) { formData.language = req.query.language; } var tts_url = process.env.TTS_BACKEND_URL; if (req.query.ttsID != undefined) { //1: TTS_BACKEND_URL //2: TTS_BACKEND_URL2 //3: TTS_BACKEND_URL3 //4: TTS_BACKEND_URL4 if (req.query.ttsID !== '1') { tts_url = process.env['TTS_BACKEND_URL' + req.query.ttsID]; } } //record start time var start = new Date().getTime(); request.post({ url: tts_url, formData: formData }, function optionalCallback(err, httpResponse, body) { if (err) { return console.error('upload to ' + tts_url + ' failed:', err); } console.log('decoded:', body); //duration of the transcribe in 0.00s var duration = new Date().getTime() - start; console.log('decoded (' + duration + 'ms):', body); res.send('(' + duration + 'ms): ' + body); }); }); app.listen(process.env.SERVER_PORT_HTTP, () => { console.log('Server listening on port ' + process.env.SERVER_PORT_HTTP); });