main idea is now working :)
Using openai for tts and groq for ollama fast inference
This commit is contained in:
@@ -183,7 +183,7 @@
|
||||
break;
|
||||
case "text":
|
||||
case "transcriptionResult":
|
||||
transcription.innerHTML += "\r\n" + json.text;
|
||||
transcription.innerHTML += "<br />" + json.text;
|
||||
let latency = Date.now() - serverTime;
|
||||
if (autosend.checked) {
|
||||
// const arr = event.data.split(/[(\)]/);
|
||||
@@ -197,6 +197,13 @@
|
||||
//transcription.innerHTML = event.data;
|
||||
}
|
||||
break;
|
||||
case 'audio':
|
||||
const audioBuffer = Uint8Array.from(atob(json.audio), char => char.charCodeAt(0));
|
||||
const audioBlob = new Blob([audioBuffer], { type: 'audio/mp3' });
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
const audio = new Audio(audioUrl);
|
||||
audio.play();
|
||||
break;
|
||||
|
||||
case "userList":
|
||||
users = json.users;
|
||||
@@ -235,14 +242,14 @@
|
||||
}
|
||||
|
||||
function userJoin(sessionId, username, language) {
|
||||
socket.send(JSON.stringify({ type: 'join', username , language}));
|
||||
socket.send(JSON.stringify({ type: 'join', username, language }));
|
||||
document.cookie = `sessionId=${sessionId}; path=/;`;
|
||||
document.cookie = `username=${username}; path=/;`;
|
||||
|
||||
showClearSessionOption();
|
||||
}
|
||||
|
||||
|
||||
|
||||
function clearSession() {
|
||||
document.cookie = "sessionId=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=/;";
|
||||
document.cookie = "username=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=/;";
|
||||
@@ -303,7 +310,7 @@
|
||||
users.forEach(user => {
|
||||
const option = document.createElement('option');
|
||||
option.value = user.sessionId;
|
||||
option.innerText = "["+user.language+"] " +user.username;
|
||||
option.innerText = "[" + user.language + "] " + user.username;
|
||||
if (user.username === username) {
|
||||
option.innerText += " (me)";
|
||||
}
|
||||
|
@@ -8,6 +8,14 @@ const path = require('path');
|
||||
const dotenv = require('dotenv');
|
||||
const ollama = require('ollama');
|
||||
const axios = require('axios');
|
||||
// import OpenAI from "openai";
|
||||
const OpenAI = require('openai');
|
||||
const openai = new OpenAI({ apiKey: "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" });
|
||||
|
||||
const Groq = require('groq-sdk');
|
||||
//const LLM = require("@themaximalist/llm.js"); //https://www.npmjs.com/package/@themaximalist/llm.js
|
||||
const groq = new Groq({ apiKey: process.env.GROQ_API_KEY });
|
||||
|
||||
|
||||
if (dotenv) {
|
||||
const envFile = process.env.NODE_ENV === 'development' ? '.env.development' : '.env';
|
||||
@@ -21,6 +29,7 @@ const PORT_HTTP = process.env.SERVER_PORT_HTTP || 3000;
|
||||
const PORT_WS = process.env.SERVER_PORT_WS || 8080;
|
||||
const TTS_API_URL = process.env.TTS_API_URL;
|
||||
const LNN_API_URL = process.env.LNN_API_URL;
|
||||
const LLN_MODEL = process.env.LLN_MODEL;
|
||||
|
||||
let language = "en";
|
||||
let storeRecordings = false;
|
||||
@@ -221,17 +230,50 @@ function detectLanguage(ws, formData) {
|
||||
}
|
||||
|
||||
async function translateText(originalText, originalLanguage, targetLanguage) {
|
||||
return queryLLMAxios("translate this text from " + originalLanguage + " to " + targetLanguage + ": " + originalText)
|
||||
.then(response => {
|
||||
console.log('Translation response:', response);
|
||||
return response;
|
||||
const prompt = "Translate this text from " + originalLanguage + " to " + targetLanguage + ": " + originalText;
|
||||
|
||||
|
||||
// const llm = new LLM();
|
||||
// llm.system("Translate voice transcriptions. some words may be omonymous, so please provide the most likely translation.");
|
||||
|
||||
// let result = await llm.chat(prompt, { service: "groq", model: "mixtral-8x7b-32768" });
|
||||
// return result;
|
||||
|
||||
|
||||
return groq.chat.completions
|
||||
.create({
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are translating voice transcriptions from '" + originalLanguage + "' to '" + targetLanguage + "'. Reply with just the translation. It will be converted to speech using TTS - you can add more context if needed.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: originalText,
|
||||
},
|
||||
],
|
||||
model: "llama3-8b-8192",
|
||||
})
|
||||
.then((chatCompletion) => {
|
||||
let result = chatCompletion.choices[0]?.message?.content || "";
|
||||
console.log(result);
|
||||
return { response: result };
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
// return queryLLMAxios("translate this text from " + originalLanguage + " to " + targetLanguage + ": " + originalText)
|
||||
// .then(response => {
|
||||
// console.log('Translation response:', response);
|
||||
// return response;
|
||||
// });
|
||||
}
|
||||
async function queryLLM(prompt) {
|
||||
const requestData = {
|
||||
model: 'qwen2', // ollama3
|
||||
model: LLN_MODEL || 'qwen2', // ollama3
|
||||
prompt: prompt,
|
||||
system: "you provide translations to the text transcribed from audio. The text is in a language you understand, and you can provide translations to any language you know.",
|
||||
system: "Translate voice transcriptions. some words may be omonymous, so please provide the most likely translation.",
|
||||
//format: "json"
|
||||
};
|
||||
const ola = new ollama.Ollama({ host: LNN_API_URL })
|
||||
@@ -241,14 +283,14 @@ async function queryLLM(prompt) {
|
||||
///obsolete function
|
||||
async function queryLLMAxios(prompt) {
|
||||
const requestData = {
|
||||
model: 'qwen2',
|
||||
model: LLN_MODEL || 'qwen2',
|
||||
prompt: prompt,
|
||||
"system": "talk like a pirate",
|
||||
"system": "Translate voice transcriptions. some words may be omonymous, so please provide the most likely translation.",
|
||||
"stream": false
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await axios.post(LNN_API_URL + "/api/generate", requestData, {
|
||||
const response = await axios.post(LNN_API_URL, requestData, {
|
||||
headers: {
|
||||
// 'Authorization': `Bearer ${OLLAMA_API_KEY}`,
|
||||
'Content-Type': 'application/json'
|
||||
@@ -261,7 +303,7 @@ async function queryLLMAxios(prompt) {
|
||||
}
|
||||
}
|
||||
|
||||
function transcribeAudio(ws, formData, sessionData) {
|
||||
async function transcribeAudio(ws, formData, sessionData) {
|
||||
const start = new Date().getTime();
|
||||
queueCounter++;
|
||||
|
||||
@@ -289,16 +331,36 @@ function transcribeAudio(ws, formData, sessionData) {
|
||||
chat.participants.forEach(sessionId => {
|
||||
if (sessionId !== ws.sessionId) {
|
||||
let targetLang = sessions.get(sessionId)?.language || 'en';
|
||||
targetLang = "bg";
|
||||
//targetLang = "bg";
|
||||
if (targetLang !== sessionData.language) {
|
||||
console.log('Translating message "'+body+'" from ' + sessionData.language + ' to ' + targetLang);
|
||||
console.log('Translating message "' + body + '" from ' + sessionData.language + ' to ' + targetLang);
|
||||
translateText(body, sessionData.language, targetLang)
|
||||
.then(translation => {
|
||||
const jsonResp = JSON.parse(translation);
|
||||
msg.translations.push({ language: targetLang, text: jsonResp.response });
|
||||
let jsonResp;
|
||||
if (typeof translation === 'string') {
|
||||
try {
|
||||
jsonResp = JSON.parse(translation);
|
||||
} catch (e) {
|
||||
console.error('Failed to parse translation response:', e);
|
||||
ws.send(JSON.stringify({ type: 'error', message: 'Invalid translation response' }));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
jsonResp = translation;
|
||||
}
|
||||
|
||||
const participantSocket = Array.from(wss.clients).find(client => client.sessionId === sessionId);
|
||||
if (participantSocket && participantSocket.readyState === WebSocket.OPEN) {
|
||||
participantSocket.send(JSON.stringify({ type: 'text', text: sessionData.username + ': ' + jsonResp.response + "\n" }));
|
||||
|
||||
// Generate and send the speech audio
|
||||
generateSpeech(jsonResp.response)
|
||||
.then(audioBuffer => {
|
||||
console.log('Generated audio for translation:', audioBuffer.length);
|
||||
msg.translations.push({ language: targetLang, text: jsonResp.response, audio: audioBuffer.toString('base64') });
|
||||
participantSocket.send(JSON.stringify({ type: 'audio', audio: audioBuffer.toString('base64') }));
|
||||
});
|
||||
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -306,6 +368,7 @@ function transcribeAudio(ws, formData, sessionData) {
|
||||
const participantSocket = Array.from(wss.clients).find(client => client.sessionId === sessionId);
|
||||
if (participantSocket && participantSocket.readyState === WebSocket.OPEN) {
|
||||
participantSocket.send(JSON.stringify({ type: 'text', text: sessionData.username + ': ' + body + "\n" }));
|
||||
participantSocket.send(JSON.stringify({ type: 'audio', audio: formData.toString('base64') }));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -336,6 +399,16 @@ function broadcastUserList() {
|
||||
});
|
||||
}
|
||||
|
||||
async function generateSpeech(text) {
|
||||
const mp3 = await openai.audio.speech.create({
|
||||
model: "tts-1",
|
||||
voice: "alloy",
|
||||
input: text,
|
||||
});
|
||||
const buffer = Buffer.from(await mp3.arrayBuffer());
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// HTTP Server
|
||||
app.get('/', (req, res) => {
|
||||
res.sendFile(path.join(__dirname, 'chat-client.html'));
|
||||
|
Reference in New Issue
Block a user