import os import sys import pyaudio import wave import pyautogui import keyboard import mouse import threading from groq import Groq import pystray from pystray import MenuItem as item from PIL import Image import ctypes import time import json5 # Load configuration from config.json def load_config(): config_path = os.path.join(os.path.dirname(__file__), 'config.json') with open(config_path, 'r') as config_file: return json5.load(config_file) # Load the config config = load_config() # Extract API key and button from the config file API_KEY = config['api_key'] KB_KEY = config['kb_key'] MOUSE_BTN = config['mouse_btn'] MODEL = config['model'] # Constants AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup") # For autostart # Initialize the Groq client client = Groq(api_key=API_KEY) def record_audio(filename): """Records audio when key and mouse button is pressed.""" audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) frames = [] print("Recording...") # Record while button or mouse is pressed while keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN): data = stream.read(1024) frames.append(data) print("Recording stopped.") stream.stop_stream() stream.close() audio.terminate() # Save the recorded audio wave_file = wave.open(filename, 'wb') wave_file.setnchannels(1) wave_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) wave_file.setframerate(44100) wave_file.writeframes(b''.join(frames)) wave_file.close() def transcribe_audio(filename): """Transcribes the recorded audio using the Groq Whisper model.""" with open(filename, "rb") as file: transcription = client.audio.transcriptions.create( file=(filename, file.read()), model=MODEL, #"distil-whisper-large-v3-en", prompt="Specify context or spelling", language=config['language'], response_format="json", temperature=0.0 ) return transcription.text def simulate_keypress(text): """Simulates typing of transcribed text quickly.""" pyautogui.typewrite(text, interval=0.01) # Reduce interval between characters for faster typing # pyautogui.press('enter') def add_to_autostart(): """Registers the app to autostart on login.""" script_path = os.path.abspath(__file__) shortcut_path = os.path.join(AUTO_START_PATH, "mAId.lnk") # Use ctypes to create the shortcut (this is Windows specific) shell = ctypes.windll.shell32 shell.ShellExecuteW(None, "runas", "cmd.exe", f'/C mklink "{shortcut_path}" "{script_path}"', None, 1) print("App added to autostart.") def quit_app(icon): """Quit the tray application.""" icon.stop() sys.exit() def setup_tray_icon(): """Setup system tray icon and menu.""" #icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0)) # Red icon as an example icon_image = Image.open('mic.webp') menu = ( item('Register to Autostart', add_to_autostart), item('Exit', lambda: quit_app(icon)) ) icon = pystray.Icon("mAId", icon_image, menu=pystray.Menu(*menu)) icon.run() def main_loop(): """Continuously listen for key or mouse press and transcribe audio.""" filename = "output.wav" while True: print("Waiting for key and mouse press...") # Wait for KB_KEY or mouse press while not (keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN)): time.sleep(0.1) # Small sleep to avoid busy-waiting # Record audio record_audio(filename) # Transcribe audio print("Transcribing audio...") transcribed_text = transcribe_audio(filename) # Simulate typing the transcribed text print("Typing transcribed text...") simulate_keypress(transcribed_text) if __name__ == "__main__": # Start the tray icon in a separate thread so it doesn't block the main functionality tray_thread = threading.Thread(target=setup_tray_icon) tray_thread.daemon = True tray_thread.start() # Run the main loop that listens for key or mouse presses in the background main_loop()