diff --git a/.gitignore b/.gitignore index b6c8ffa..e35c3ad 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ agent-mobile/artimobile/supervisord.pid agent-pyter/lag-llama agent-pyter/google-chrome-stable_current_amd64.deb web/.node-persist/* +agent-mAId/output.wav diff --git a/agent-mAId/main.py b/agent-mAId/main.py index 5dd01be..9c9be1b 100644 --- a/agent-mAId/main.py +++ b/agent-mAId/main.py @@ -11,6 +11,7 @@ import pystray from pystray import MenuItem as item from PIL import Image import ctypes +import time # Constants API_KEY = "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" # Make sure to use your actual API key @@ -21,7 +22,7 @@ AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start client = Groq(api_key=API_KEY) def record_audio(filename): - # Setup audio recording + """Records audio when BUTTON or mouse button is pressed.""" audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024) @@ -29,7 +30,7 @@ def record_audio(filename): print("Recording...") # Record while button or mouse is pressed - while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'): + while keyboard.is_pressed(BUTTON) and mouse.is_pressed(button='left'): data = stream.read(1024) frames.append(data) @@ -47,40 +48,21 @@ def record_audio(filename): wave_file.close() def transcribe_audio(filename): - # Open the audio file + """Transcribes the recorded audio using the Groq Whisper model.""" with open(filename, "rb") as file: - # Create a transcription of the audio file transcription = client.audio.transcriptions.create( - file=(filename, file.read()), # Required audio file - model="distil-whisper-large-v3-en", # Required model to use for transcription - prompt="Specify context or spelling", # Optional - response_format="json", # Optional - temperature=0.0 # Optional + file=(filename, file.read()), + model="distil-whisper-large-v3-en", + prompt="Specify context or spelling", + response_format="json", + temperature=0.0 ) - - # Access the transcription text using dot notation return transcription.text def simulate_keypress(text): - # Simulate keypress for each character in text - for char in text: - pyautogui.typewrite(char) - pyautogui.press('enter') - -def main(): - filename = "output.wav" - - print("Press and hold the button or left mouse button to record...") - # Wait for button or mouse press - while not (keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left')): - pass - record_audio(filename) - - print("Transcribing audio...") - transcribed_text = transcribe_audio(filename) - - print("Entering text...") - simulate_keypress(transcribed_text) + """Simulates typing of transcribed text quickly.""" + pyautogui.typewrite(text, interval=0.01) # Reduce interval between characters for faster typing + # pyautogui.press('enter') def add_to_autostart(): """Registers the app to autostart on login.""" @@ -99,26 +81,43 @@ def quit_app(icon): def setup_tray_icon(): """Setup system tray icon and menu.""" - # Create an icon image icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0)) # Red icon as an example - # Define menu items for the tray menu = ( item('Register to Autostart', add_to_autostart), item('Exit', lambda: quit_app(icon)) ) - # Create the tray icon icon = pystray.Icon("my_app", icon_image, menu=pystray.Menu(*menu)) - - # Run the tray icon icon.run() +def main_loop(): + """Continuously listen for key or mouse press and transcribe audio.""" + filename = "output.wav" + + while True: + print("Waiting for button or mouse press...") + + # Wait for button or mouse press + while not (keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left')): + time.sleep(0.1) # Small sleep to avoid busy-waiting + + # Record audio + record_audio(filename) + + # Transcribe audio + print("Transcribing audio...") + transcribed_text = transcribe_audio(filename) + + # Simulate typing the transcribed text + print("Typing transcribed text...") + simulate_keypress(transcribed_text) + if __name__ == "__main__": # Start the tray icon in a separate thread so it doesn't block the main functionality tray_thread = threading.Thread(target=setup_tray_icon) tray_thread.daemon = True tray_thread.start() - # Run the main function - main() + # Run the main loop that listens for key or mouse presses in the background + main_loop() diff --git a/agent-mAId/output.wav b/agent-mAId/output.wav index b9dca44..8dbde95 100644 Binary files a/agent-mAId/output.wav and b/agent-mAId/output.wav differ