gogo2/agent-mAId/main.py
2024-09-10 12:18:28 +03:00

174 lines
5.5 KiB
Python

import os
import sys
import pyaudio
import wave
import pyautogui
import keyboard
import mouse
import threading
from groq import Groq
import pystray
from pystray import MenuItem as item
from PIL import Image
import ctypes
import io
import time
import json5
import wave
# Load configuration from config.json
def load_config():
config_path = os.path.join(os.path.dirname(__file__), 'config.json')
with open(config_path, 'r') as config_file:
return json5.load(config_file)
# Load the config
config = load_config()
# Extract API key and button from the config file
API_KEY = config['api_key']
KB_KEY = config['kb_key']
MOUSE_BTN = config['mouse_btn']
MODEL = config['model']
# Constants
AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup") # For autostart
# Initialize the Groq client
client = Groq(api_key=API_KEY)
def save_audio_to_disk(filename, audio_data, audio_format, channels, rate):
"""Save the audio data to disk asynchronously."""
with wave.open(filename, 'wb') as wave_file:
wave_file.setnchannels(channels)
wave_file.setsampwidth(audio_format)
wave_file.setframerate(rate)
wave_file.writeframes(audio_data)
def record_audio():
"""Records audio when the key and mouse button is pressed, stores in memory."""
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
frames = []
print("Recording...")
# Record while both keyboard and mouse button are pressed
while keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN):
data = stream.read(1024)
frames.append(data)
recording_duration = len(frames) * 1024 / 16000 # Calculate audio duration in seconds
print(f"Recording stopped. Duration: {recording_duration:.2f} seconds.")
stream.stop_stream()
stream.close()
audio.terminate()
# Store the recorded audio in an in-memory stream as a valid WAV file
memory_stream = io.BytesIO()
with wave.open(memory_stream, 'wb') as wave_file:
wave_file.setnchannels(1)
wave_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
wave_file.setframerate(16000)
wave_file.writeframes(b''.join(frames))
memory_stream.seek(0) # Reset the stream position to the beginning for reading
# Save audio to disk asynchronously as a side task (optional)
threading.Thread(target=save_audio_to_disk, args=("output.wav", b''.join(frames), audio.get_sample_size(pyaudio.paInt16), 1, 16000)).start()
return memory_stream
def transcribe_audio(memory_stream):
"""Transcribes the recorded audio using the Groq Whisper model."""
memory_stream.seek(0) # Reset the stream position to the beginning
start_time = time.time()
transcription = client.audio.transcriptions.create(
file=('audio.wav', memory_stream),
model=MODEL,
prompt="Specify context or spelling",
language=config['language'],
response_format="json",
temperature=0.0
)
end_time = time.time()
transcription_time = end_time - start_time
print(f"Transcription took: {transcription_time:.2f} seconds.")
return transcription.text
def simulate_keypress(text):
"""Simulates typing of transcribed text quickly."""
pyautogui.typewrite(text, interval=0.01) # Reduce interval between characters for faster typing
# pyautogui.press('enter')
def add_to_autostart():
"""Registers the app to autostart on login."""
script_path = os.path.abspath(__file__)
shortcut_path = os.path.join(AUTO_START_PATH, "mAId.lnk")
# Use ctypes to create the shortcut (this is Windows specific)
shell = ctypes.windll.shell32
shell.ShellExecuteW(None, "runas", "cmd.exe", f'/C mklink "{shortcut_path}" "{script_path}"', None, 1)
print("App added to autostart.")
def quit_app(icon):
"""Quit the tray application."""
icon.stop()
sys.exit()
def setup_tray_icon():
"""Setup system tray icon and menu."""
#icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0)) # Red icon as an example
icon_image = Image.open('mic.webp')
menu = (
item('Register to Autostart', add_to_autostart),
item('Exit', lambda: quit_app(icon))
)
icon = pystray.Icon("mAId", icon_image, menu=pystray.Menu(*menu))
icon.run()
response_times = []
ma_window_size = 10 # Moving average over the last 10 responses
def main_loop():
"""Continuously listen for key or mouse press and transcribe audio."""
filename = "output.wav"
while True:
print("Waiting for key and mouse press...")
# Wait for KB_KEY or mouse press
while not (keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN)):
time.sleep(0.1) # Small sleep to avoid busy-waiting
# Record audio
memory_stream = record_audio()
# Transcribe audio
print("Transcribing audio...")
transcribed_text = transcribe_audio(memory_stream)
# Simulate typing the transcribed text
print("Typing transcribed text...")
simulate_keypress(transcribed_text)
if __name__ == "__main__":
# Start the tray icon in a separate thread so it doesn't block the main functionality
tray_thread = threading.Thread(target=setup_tray_icon)
tray_thread.daemon = True
tray_thread.start()
# Run the main loop that listens for key or mouse presses in the background
main_loop()