various improvements:

using mono audio;
options to store recordings;
multple backends;
new audio file name;
sessions;
This commit is contained in:
popov
2023-03-15 14:20:35 +00:00
parent 5354d8c328
commit 1c15463b21
6 changed files with 588 additions and 357 deletions

View File

@ -13,18 +13,49 @@ console.log(process.env)
console.log(process.env.TTS_BACKEND_URL)
console.log(process.env.WS_URL)
let language = "en";
let storeRecordings = false;
let queueCounter = 0;
const storage = require('node-persist');
storage.init().then(() => {
storage.getItem('language').then((value) => {
if (value != undefined) { language = value; console.log('language: ' + language); }
else { storage.setItem('language', language).then(() => { console.log('language set to ' + language + "(default)"); }); }
});
storage.getItem('storeRecordings').then((value) => {
if (value != undefined) { storeRecordings = value; console.log('storeRecordings: ' + storeRecordings); }
else { storage.setItem('storeRecordings', storeRecordings).then(() => { console.log('storeRecordings set to ' + storeRecordings + "(default)"); }); }
});
});
//we use https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice to transcribe the audio
//docker run -p 9009:9009 -d onerahmet/openai-whisper-asr-webservice
wss.on('connection', (ws) => {
console.log('Client ' + ws._socket.remoteAddress + ' connected');
const sessions = new Map(); // Store session data
wss.on('connection', (ws, req) => {
ws.sessionId = Math.random().toString(36).slice(2);
sessions.set(ws.sessionId, { language: 'en' });
console.log('Client ' + ws._socket.remoteAddress + ' connected with session id ' + ws.sessionId);
//send cookie to client
ws.send(JSON.stringify({ sessionId: ws.sessionId, language: language, storeRecordings: storeRecordings }));
ws.on('message', (data) => {
let webSocket = ws;
const sessionData = sessions.get(webSocket.sessionId);
if (!sessionData) {
console.log('No session data found for session id ' + webSocket.sessionId);
}
let language = sessionData?.language || 'en';
//show the size of the audio data as 0.000 MB
console.log('Received data from client: ' + (data.length / 1024 / 1024).toFixed(3) + ' MB');
console.log('(queue ' + queueCounter + ') Received ' + (data.length / 1024 / 1024).toFixed(3) + ' MB audio from client. Crrent language: ' + language);
var request = require('request');
var formData = {
task: 'transcribe',
language: 'en-US', //bg-BG|en-US
language: sessionData.language,
output: 'json',
audio_file: {
value: data,
@ -34,31 +65,36 @@ wss.on('connection', (ws) => {
}
}
};
storeRecordings = sessionData?.storeRecordings || storeRecordings;
if (storeRecordings) {
//"yyyymmdd-hhMMss"
var timestampfilename = Date.now("yyyymmdd-hhMMss");
var fs = require('fs');
fs.mkdir('rec', { recursive: true }, (err) => {
if (err) throw err;
});
fs.writeFile('rec/audio' + timestampfilename + '.ogg', data, function (err) {
if (err) {
return console.log(err);
}
console.log('Audio data saved to audio.ogg');
});
}
//"yyyymmdd-hhMMss"
var timestampfilename = Date.now("yyyymmdd-hhMMss");
//save the audio data to a file to /rec subfolder
var fs = require('fs');
fs.mkdir('rec', { recursive: true }, (err) => {
if (err) throw err;
});
fs.writeFile('rec/audio' + timestampfilename + '.ogg', data, function (err) {
if (err) {
return console.log(err);
}
console.log('Audio data saved to audio.ogg');
});
//record start time
var start = new Date().getTime();
queueCounter++;
request.post({ url: process.env.TTS_BACKEND_URL, formData: formData }, function optionalCallback(err, httpResponse, body) {
queueCounter--;
if (err) {
return console.error('upload failed:', err);
}
console.log('Whisper decoded:', body);
ws.send(body);
//duration of the transcribe in 0.00s
var duration = new Date().getTime() - start;
//console.log('decoded (' + duration + 'ms):', body);
console.log('decoded (' + (duration / 1000).toFixed(2) + 's):', body);
webSocket.send("(" + queueCounter + ") " + body);
});
});
});
@ -72,7 +108,11 @@ function transcribeAudio(audioData) {
// --- web server that servers client.html
const express = require('express');
const bodyParser = require('body-parser');
const app = express();
app.use(bodyParser.json());
// app.use(bodyParser.urlencoded({ extended: false })); // Parse request body as URL-encoded
const path = require('path');
app.get('/', (req, res) => {
@ -84,6 +124,56 @@ app.get('/', (req, res) => {
app.get('/wsurl', (req, res) => {
res.send(process.env.WS_URL, 200, { 'Content-Type': 'text/plain' });
});
//GET used to store default settings for all clients
app.get('/settings', (req, res) => {
if (req.query.language != undefined) {
language = req.query.language;
storage.setItem('language', language).then(() => { console.log('language set to ' + language); });
}
if (req.query.storeRecordings != undefined) {
storeRecordings = req.query.storeRecordings;
storage.setItem('storeRecordings', storeRecordings).then(() => { console.log('storeRecordings set to ' + storeRecordings); });
}
//send back the current settings as json
res.send(JSON.stringify({ language: language, storeRecordings: storeRecordings }), 200, { 'Content-Type': 'text/plain' });
});
//POST used to store settings for a specific client
app.post('/settings', (req, res) => {
//get the language from the json body ( { language: language, sessionId: sessionId })
const body = req.body;
const sid = body.sessionId;
const sessionData = sessions.get(sid);
if (body.language != undefined) {
sessionData.language = body.language;
console.log(`Session ${sid}: language set to ${sessionData.language}`);
}
if(body.storeRecordings != undefined) {
sessionData.storeRecordings = body.storeRecordings;
console.log(`Session ${sid}: storeRecordings set to ${sessionData.storeRecordings}`);
}
res.send('OK', 200, { 'Content-Type': 'text/plain' });
});
//save the audio file
app.post('/upload', (req, res) => {
try {
//save the audio file
var timestampfilename = Date.now("yyyymmdd-hhMMss");
var fs = require('fs');
fs.mkdir('rec', { recursive: true }, (err) => {
if (err) throw err;
});
var file = fs.createWriteStream('rec/audio_slice_' + timestampfilename + '.ogg');
req.pipe(file);
res.send('OK', 200, { 'Content-Type': 'text/plain' });
} catch (err) {
console.log(err);
res.send('ERROR', 500, { 'Content-Type': 'text/plain' });
}
});
app.listen(8080, () => {
console.log('Server listening on port 8080');