From 54b0f1661b374abf0dfe45b0a517a0e6f32b5fd8 Mon Sep 17 00:00:00 2001
From: Dobromir Popov <dobromir.popov@gateway.one>
Date: Tue, 10 Sep 2024 12:16:51 +0300
Subject: [PATCH] use memory stream and save to disk async in background
 (broken)

---
 agent-mAId/main.py | 76 ++++++++++++++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 26 deletions(-)

diff --git a/agent-mAId/main.py b/agent-mAId/main.py
index 7bde14d..e9ae8d4 100644
--- a/agent-mAId/main.py
+++ b/agent-mAId/main.py
@@ -11,10 +11,10 @@ import pystray
 from pystray import MenuItem as item
 from PIL import Image
 import ctypes
+import io
 import time
 import json5
 
-
 # Load configuration from config.json
 def load_config():
     config_path = os.path.join(os.path.dirname(__file__), 'config.json')
@@ -35,44 +35,65 @@ AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start
 # Initialize the Groq client
 client = Groq(api_key=API_KEY)
 
-def record_audio(filename):
-    """Records audio when key and mouse button is pressed."""
+
+def save_audio_to_disk(filename, audio_data, audio_format, channels, rate):
+    """Save the audio data to disk asynchronously."""
+    with wave.open(filename, 'wb') as wave_file:
+        wave_file.setnchannels(channels)
+        wave_file.setsampwidth(audio_format)
+        wave_file.setframerate(rate)
+        wave_file.writeframes(audio_data)
+
+def record_audio():
+    """Records audio when the key and mouse button is pressed, stores in memory."""
     audio = pyaudio.PyAudio()
     stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
     
     frames = []
     print("Recording...")
-
-    # Record while button or mouse is pressed
+    start_time = time.time()
+    
+    # Record while both keyboard and mouse button are pressed
     while keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN):
         data = stream.read(1024)
         frames.append(data)
     
-    print("Recording stopped.")
+    recording_duration = len(frames) * 1024 / 16000  # Calculate audio duration in seconds
+    print(f"Recording stopped. Duration: {recording_duration:.2f} seconds.")
+
     stream.stop_stream()
     stream.close()
     audio.terminate()
 
-    # Save the recorded audio
-    wave_file = wave.open(filename, 'wb')
-    wave_file.setnchannels(1)
-    wave_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
-    wave_file.setframerate(44100)
-    wave_file.writeframes(b''.join(frames))
-    wave_file.close()
+    # Store recorded audio in an in-memory stream
+    audio_data = b''.join(frames)
+    memory_stream = io.BytesIO(audio_data)
+    
+    # Save audio to disk asynchronously as a side task
+    threading.Thread(target=save_audio_to_disk, args=("output.wav", audio_data, audio.get_sample_size(pyaudio.paInt16), 1, 16000)).start()
+    
+    return memory_stream
 
-def transcribe_audio(filename):
+def transcribe_audio(memory_stream):
     """Transcribes the recorded audio using the Groq Whisper model."""
-    with open(filename, "rb") as file:
-        transcription = client.audio.transcriptions.create(
-            file=(filename, file.read()),  
-            model=MODEL, #"distil-whisper-large-v3-en",  
-            prompt="Specify context or spelling",  
-            language=config['language'],
-            response_format="json",  
-            temperature=0.0  
-        )
-    return transcription.text
+    memory_stream.seek(0)  # Reset the stream position to the beginning
+
+    start_time = time.time()
+
+    transcription = client.audio.transcriptions.create(
+        file=('audio.wav', memory_stream), 
+        model=MODEL, 
+        prompt="Specify context or spelling",
+        language=config['language'],
+        response_format="json",
+        temperature=0.0
+    )
+    end_time = time.time()
+    
+    transcription_time = end_time - start_time
+    print(f"Transcription took: {transcription_time:.2f} seconds.")
+
+    return transcription.text 
 
 def simulate_keypress(text):
     """Simulates typing of transcribed text quickly."""
@@ -110,6 +131,9 @@ def setup_tray_icon():
 
 
 
+response_times = []
+ma_window_size = 10  # Moving average over the last 10 responses
+
 def main_loop():
     """Continuously listen for key or mouse press and transcribe audio."""
     filename = "output.wav"
@@ -122,11 +146,11 @@ def main_loop():
             time.sleep(0.1)  # Small sleep to avoid busy-waiting
         
         # Record audio
-        record_audio(filename)
+        memory_stream = record_audio()
         
         # Transcribe audio
         print("Transcribing audio...")
-        transcribed_text = transcribe_audio(filename)
+        transcribed_text = transcribe_audio(memory_stream)
         
         # Simulate typing the transcribed text
         print("Typing transcribed text...")