gogo2/agent-mAId/main.py

import os
import sys
import pyaudio
import wave
import pyautogui
import keyboard
import mouse
import threading
from groq import Groq
import pystray
from pystray import MenuItem as item
from PIL import Image
import ctypes
import time

# Constants
API_KEY = "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"  # Make sure to use your actual API key
BUTTON = 'ctrl'  # The keyboard button to listen for
AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup")  # For autostart

# Initialize the Groq client
client = Groq(api_key=API_KEY)

def record_audio(filename):
    """Records audio when BUTTON or mouse button is pressed."""
    audio = pyaudio.PyAudio()
    stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)

    frames = []
    print("Recording...")

    # Record while button or mouse is pressed
    while keyboard.is_pressed(BUTTON) and mouse.is_pressed(button='left'):
        data = stream.read(1024)
        frames.append(data)

    print("Recording stopped.")
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio
    wave_file = wave.open(filename, 'wb')
    wave_file.setnchannels(1)
    wave_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
    wave_file.setframerate(44100)
    wave_file.writeframes(b''.join(frames))
    wave_file.close()

def transcribe_audio(filename):
    """Transcribes the recorded audio using the Groq Whisper model."""
    with open(filename, "rb") as file:
        transcription = client.audio.transcriptions.create(
            file=(filename, file.read()),
            model="distil-whisper-large-v3-en",
            prompt="Specify context or spelling",
            response_format="json",
            temperature=0.0
        )
    return transcription.text

def simulate_keypress(text):
    """Simulates typing of transcribed text quickly."""
    pyautogui.typewrite(text, interval=0.01)  # Reduce interval between characters for faster typing
    # pyautogui.press('enter')

def add_to_autostart():
    """Registers the app to autostart on login."""
    script_path = os.path.abspath(__file__)
    shortcut_path = os.path.join(AUTO_START_PATH, "MyApp.lnk")

    # Use ctypes to create the shortcut (this is Windows specific)
    shell = ctypes.windll.shell32
    shell.ShellExecuteW(None, "runas", "cmd.exe", f'/C mklink "{shortcut_path}" "{script_path}"', None, 1)
    print("App added to autostart.")

def quit_app(icon):
    """Quit the tray application."""
    icon.stop()
    sys.exit()

def setup_tray_icon():
    """Setup system tray icon and menu."""
    icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0))  # Red icon as an example

    menu = (
        item('Register to Autostart', add_to_autostart),
        item('Exit', lambda: quit_app(icon))
    )

    icon = pystray.Icon("my_app", icon_image, menu=pystray.Menu(*menu))
    icon.run()

def main_loop():
    """Continuously listen for key or mouse press and transcribe audio."""
    filename = "output.wav"

    while True:
        print("Waiting for button or mouse press...")

        # Wait for button or mouse press
        while not (keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left')):
            time.sleep(0.1)  # Small sleep to avoid busy-waiting

        # Record audio
        record_audio(filename)

        # Transcribe audio
        print("Transcribing audio...")
        transcribed_text = transcribe_audio(filename)

        # Simulate typing the transcribed text
        print("Typing transcribed text...")
        simulate_keypress(transcribed_text)

if __name__ == "__main__":
    # Start the tray icon in a separate thread so it doesn't block the main functionality
    tray_thread = threading.Thread(target=setup_tray_icon)
    tray_thread.daemon = True
    tray_thread.start()

    # Run the main loop that listens for key or mouse presses in the background
    main_loop()