main idea is now working :)
Using openai for tts and groq for ollama fast inference
This commit is contained in:
@ -8,6 +8,14 @@ const path = require('path');
|
||||
const dotenv = require('dotenv');
|
||||
const ollama = require('ollama');
|
||||
const axios = require('axios');
|
||||
// import OpenAI from "openai";
|
||||
const OpenAI = require('openai');
|
||||
const openai = new OpenAI({ apiKey: "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" });
|
||||
|
||||
const Groq = require('groq-sdk');
|
||||
//const LLM = require("@themaximalist/llm.js"); //https://www.npmjs.com/package/@themaximalist/llm.js
|
||||
const groq = new Groq({ apiKey: process.env.GROQ_API_KEY });
|
||||
|
||||
|
||||
if (dotenv) {
|
||||
const envFile = process.env.NODE_ENV === 'development' ? '.env.development' : '.env';
|
||||
@ -21,6 +29,7 @@ const PORT_HTTP = process.env.SERVER_PORT_HTTP || 3000;
|
||||
const PORT_WS = process.env.SERVER_PORT_WS || 8080;
|
||||
const TTS_API_URL = process.env.TTS_API_URL;
|
||||
const LNN_API_URL = process.env.LNN_API_URL;
|
||||
const LLN_MODEL = process.env.LLN_MODEL;
|
||||
|
||||
let language = "en";
|
||||
let storeRecordings = false;
|
||||
@ -221,17 +230,50 @@ function detectLanguage(ws, formData) {
|
||||
}
|
||||
|
||||
async function translateText(originalText, originalLanguage, targetLanguage) {
|
||||
return queryLLMAxios("translate this text from " + originalLanguage + " to " + targetLanguage + ": " + originalText)
|
||||
.then(response => {
|
||||
console.log('Translation response:', response);
|
||||
return response;
|
||||
const prompt = "Translate this text from " + originalLanguage + " to " + targetLanguage + ": " + originalText;
|
||||
|
||||
|
||||
// const llm = new LLM();
|
||||
// llm.system("Translate voice transcriptions. some words may be omonymous, so please provide the most likely translation.");
|
||||
|
||||
// let result = await llm.chat(prompt, { service: "groq", model: "mixtral-8x7b-32768" });
|
||||
// return result;
|
||||
|
||||
|
||||
return groq.chat.completions
|
||||
.create({
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are translating voice transcriptions from '" + originalLanguage + "' to '" + targetLanguage + "'. Reply with just the translation. It will be converted to speech using TTS - you can add more context if needed.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: originalText,
|
||||
},
|
||||
],
|
||||
model: "llama3-8b-8192",
|
||||
})
|
||||
.then((chatCompletion) => {
|
||||
let result = chatCompletion.choices[0]?.message?.content || "";
|
||||
console.log(result);
|
||||
return { response: result };
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
// return queryLLMAxios("translate this text from " + originalLanguage + " to " + targetLanguage + ": " + originalText)
|
||||
// .then(response => {
|
||||
// console.log('Translation response:', response);
|
||||
// return response;
|
||||
// });
|
||||
}
|
||||
async function queryLLM(prompt) {
|
||||
const requestData = {
|
||||
model: 'qwen2', // ollama3
|
||||
model: LLN_MODEL || 'qwen2', // ollama3
|
||||
prompt: prompt,
|
||||
system: "you provide translations to the text transcribed from audio. The text is in a language you understand, and you can provide translations to any language you know.",
|
||||
system: "Translate voice transcriptions. some words may be omonymous, so please provide the most likely translation.",
|
||||
//format: "json"
|
||||
};
|
||||
const ola = new ollama.Ollama({ host: LNN_API_URL })
|
||||
@ -241,14 +283,14 @@ async function queryLLM(prompt) {
|
||||
///obsolete function
|
||||
async function queryLLMAxios(prompt) {
|
||||
const requestData = {
|
||||
model: 'qwen2',
|
||||
model: LLN_MODEL || 'qwen2',
|
||||
prompt: prompt,
|
||||
"system": "talk like a pirate",
|
||||
"system": "Translate voice transcriptions. some words may be omonymous, so please provide the most likely translation.",
|
||||
"stream": false
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await axios.post(LNN_API_URL + "/api/generate", requestData, {
|
||||
const response = await axios.post(LNN_API_URL, requestData, {
|
||||
headers: {
|
||||
// 'Authorization': `Bearer ${OLLAMA_API_KEY}`,
|
||||
'Content-Type': 'application/json'
|
||||
@ -261,7 +303,7 @@ async function queryLLMAxios(prompt) {
|
||||
}
|
||||
}
|
||||
|
||||
function transcribeAudio(ws, formData, sessionData) {
|
||||
async function transcribeAudio(ws, formData, sessionData) {
|
||||
const start = new Date().getTime();
|
||||
queueCounter++;
|
||||
|
||||
@ -289,16 +331,36 @@ function transcribeAudio(ws, formData, sessionData) {
|
||||
chat.participants.forEach(sessionId => {
|
||||
if (sessionId !== ws.sessionId) {
|
||||
let targetLang = sessions.get(sessionId)?.language || 'en';
|
||||
targetLang = "bg";
|
||||
//targetLang = "bg";
|
||||
if (targetLang !== sessionData.language) {
|
||||
console.log('Translating message "'+body+'" from ' + sessionData.language + ' to ' + targetLang);
|
||||
console.log('Translating message "' + body + '" from ' + sessionData.language + ' to ' + targetLang);
|
||||
translateText(body, sessionData.language, targetLang)
|
||||
.then(translation => {
|
||||
const jsonResp = JSON.parse(translation);
|
||||
msg.translations.push({ language: targetLang, text: jsonResp.response });
|
||||
let jsonResp;
|
||||
if (typeof translation === 'string') {
|
||||
try {
|
||||
jsonResp = JSON.parse(translation);
|
||||
} catch (e) {
|
||||
console.error('Failed to parse translation response:', e);
|
||||
ws.send(JSON.stringify({ type: 'error', message: 'Invalid translation response' }));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
jsonResp = translation;
|
||||
}
|
||||
|
||||
const participantSocket = Array.from(wss.clients).find(client => client.sessionId === sessionId);
|
||||
if (participantSocket && participantSocket.readyState === WebSocket.OPEN) {
|
||||
participantSocket.send(JSON.stringify({ type: 'text', text: sessionData.username + ': ' + jsonResp.response + "\n" }));
|
||||
|
||||
// Generate and send the speech audio
|
||||
generateSpeech(jsonResp.response)
|
||||
.then(audioBuffer => {
|
||||
console.log('Generated audio for translation:', audioBuffer.length);
|
||||
msg.translations.push({ language: targetLang, text: jsonResp.response, audio: audioBuffer.toString('base64') });
|
||||
participantSocket.send(JSON.stringify({ type: 'audio', audio: audioBuffer.toString('base64') }));
|
||||
});
|
||||
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -306,6 +368,7 @@ function transcribeAudio(ws, formData, sessionData) {
|
||||
const participantSocket = Array.from(wss.clients).find(client => client.sessionId === sessionId);
|
||||
if (participantSocket && participantSocket.readyState === WebSocket.OPEN) {
|
||||
participantSocket.send(JSON.stringify({ type: 'text', text: sessionData.username + ': ' + body + "\n" }));
|
||||
participantSocket.send(JSON.stringify({ type: 'audio', audio: formData.toString('base64') }));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -336,6 +399,16 @@ function broadcastUserList() {
|
||||
});
|
||||
}
|
||||
|
||||
async function generateSpeech(text) {
|
||||
const mp3 = await openai.audio.speech.create({
|
||||
model: "tts-1",
|
||||
voice: "alloy",
|
||||
input: text,
|
||||
});
|
||||
const buffer = Buffer.from(await mp3.arrayBuffer());
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// HTTP Server
|
||||
app.get('/', (req, res) => {
|
||||
res.sendFile(path.join(__dirname, 'chat-client.html'));
|
||||
|
Reference in New Issue
Block a user