gogo2/agent-mAId/main.py

import os
import sys
import pyaudio
import wave
import pyautogui
import keyboard
import mouse
import threading
from groq import Groq
import pystray
from pystray import MenuItem as item
from PIL import Image
import ctypes

# Constants
API_KEY = "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"  # Make sure to use your actual API key
BUTTON = 'ctrl'  # The keyboard button to listen for
AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup")  # For autostart

# Initialize the Groq client
client = Groq(api_key=API_KEY)

def record_audio(filename):
    # Setup audio recording
    audio = pyaudio.PyAudio()
    stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)

    frames = []
    print("Recording...")

    # Record while button or mouse is pressed
    while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'):
        data = stream.read(1024)
        frames.append(data)

    print("Recording stopped.")
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio
    wave_file = wave.open(filename, 'wb')
    wave_file.setnchannels(1)
    wave_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
    wave_file.setframerate(44100)
    wave_file.writeframes(b''.join(frames))
    wave_file.close()

def transcribe_audio(filename):
    # Open the audio file
    with open(filename, "rb") as file:
        # Create a transcription of the audio file
        transcription = client.audio.transcriptions.create(
            file=(filename, file.read()),  # Required audio file
            model="distil-whisper-large-v3-en",  # Required model to use for transcription
            prompt="Specify context or spelling",  # Optional
            response_format="json",  # Optional
            temperature=0.0  # Optional
        )

    # Access the transcription text using dot notation
    return transcription.text

def simulate_keypress(text):
    # Simulate keypress for each character in text
    for char in text:
        pyautogui.typewrite(char)
    pyautogui.press('enter')

def main():
    filename = "output.wav"

    print("Press and hold the button or left mouse button to record...")
    # Wait for button or mouse press
    while not (keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left')):
        pass
    record_audio(filename)

    print("Transcribing audio...")
    transcribed_text = transcribe_audio(filename)

    print("Entering text...")
    simulate_keypress(transcribed_text)

def add_to_autostart():
    """Registers the app to autostart on login."""
    script_path = os.path.abspath(__file__)
    shortcut_path = os.path.join(AUTO_START_PATH, "MyApp.lnk")

    # Use ctypes to create the shortcut (this is Windows specific)
    shell = ctypes.windll.shell32
    shell.ShellExecuteW(None, "runas", "cmd.exe", f'/C mklink "{shortcut_path}" "{script_path}"', None, 1)
    print("App added to autostart.")

def quit_app(icon):
    """Quit the tray application."""
    icon.stop()
    sys.exit()

def setup_tray_icon():
    """Setup system tray icon and menu."""
    # Create an icon image
    icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0))  # Red icon as an example

    # Define menu items for the tray
    menu = (
        item('Register to Autostart', add_to_autostart),
        item('Exit', lambda: quit_app(icon))
    )

    # Create the tray icon
    icon = pystray.Icon("my_app", icon_image, menu=pystray.Menu(*menu))

    # Run the tray icon
    icon.run()

if __name__ == "__main__":
    # Start the tray icon in a separate thread so it doesn't block the main functionality
    tray_thread = threading.Thread(target=setup_tray_icon)
    tray_thread.daemon = True
    tray_thread.start()

    # Run the main function
    main()