diff --git a/agent-mAId/config.json b/agent-mAId/config.json new file mode 100644 index 0000000..8d7d658 --- /dev/null +++ b/agent-mAId/config.json @@ -0,0 +1,8 @@ +{ + "api_key": "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", + "kb_key": "ctrl", + "mouse_btn": "left", + "model": "distil-whisper-large-v3-en", + "language":"en" // whisper-large-v3 or distil-whisper-large-v3-en + } + \ No newline at end of file diff --git a/agent-mAId/main.py b/agent-mAId/main.py index 9c9be1b..7bde14d 100644 --- a/agent-mAId/main.py +++ b/agent-mAId/main.py @@ -12,25 +12,39 @@ from pystray import MenuItem as item from PIL import Image import ctypes import time +import json5 + + +# Load configuration from config.json +def load_config(): + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + with open(config_path, 'r') as config_file: + return json5.load(config_file) + +# Load the config +config = load_config() +# Extract API key and button from the config file +API_KEY = config['api_key'] +KB_KEY = config['kb_key'] +MOUSE_BTN = config['mouse_btn'] +MODEL = config['model'] # Constants -API_KEY = "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" # Make sure to use your actual API key -BUTTON = 'ctrl' # The keyboard button to listen for AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup") # For autostart # Initialize the Groq client client = Groq(api_key=API_KEY) def record_audio(filename): - """Records audio when BUTTON or mouse button is pressed.""" + """Records audio when key and mouse button is pressed.""" audio = pyaudio.PyAudio() - stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024) + stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) frames = [] print("Recording...") # Record while button or mouse is pressed - while keyboard.is_pressed(BUTTON) and mouse.is_pressed(button='left'): + while keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN): data = stream.read(1024) frames.append(data) @@ -52,8 +66,9 @@ def transcribe_audio(filename): with open(filename, "rb") as file: transcription = client.audio.transcriptions.create( file=(filename, file.read()), - model="distil-whisper-large-v3-en", + model=MODEL, #"distil-whisper-large-v3-en", prompt="Specify context or spelling", + language=config['language'], response_format="json", temperature=0.0 ) @@ -67,7 +82,7 @@ def simulate_keypress(text): def add_to_autostart(): """Registers the app to autostart on login.""" script_path = os.path.abspath(__file__) - shortcut_path = os.path.join(AUTO_START_PATH, "MyApp.lnk") + shortcut_path = os.path.join(AUTO_START_PATH, "mAId.lnk") # Use ctypes to create the shortcut (this is Windows specific) shell = ctypes.windll.shell32 @@ -79,27 +94,31 @@ def quit_app(icon): icon.stop() sys.exit() + def setup_tray_icon(): """Setup system tray icon and menu.""" - icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0)) # Red icon as an example + #icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0)) # Red icon as an example + icon_image = Image.open('mic.webp') menu = ( item('Register to Autostart', add_to_autostart), item('Exit', lambda: quit_app(icon)) ) - icon = pystray.Icon("my_app", icon_image, menu=pystray.Menu(*menu)) + icon = pystray.Icon("mAId", icon_image, menu=pystray.Menu(*menu)) icon.run() + + def main_loop(): """Continuously listen for key or mouse press and transcribe audio.""" filename = "output.wav" while True: - print("Waiting for button or mouse press...") + print("Waiting for key and mouse press...") - # Wait for button or mouse press - while not (keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left')): + # Wait for KB_KEY or mouse press + while not (keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN)): time.sleep(0.1) # Small sleep to avoid busy-waiting # Record audio diff --git a/agent-mAId/mic.webp b/agent-mAId/mic.webp new file mode 100644 index 0000000..1310532 Binary files /dev/null and b/agent-mAId/mic.webp differ diff --git a/agent-mAId/output.wav b/agent-mAId/output.wav index 8dbde95..8bbccc2 100644 Binary files a/agent-mAId/output.wav and b/agent-mAId/output.wav differ diff --git a/agent-mAId/readme.md b/agent-mAId/readme.md new file mode 100644 index 0000000..0700184 --- /dev/null +++ b/agent-mAId/readme.md @@ -0,0 +1,3 @@ +# to install as an app: +pip install pyinstaller +pyinstaller --onefile main.py diff --git a/config.json b/config.json index 9990c5b..1c0308e 100644 --- a/config.json +++ b/config.json @@ -1,4 +1,12 @@ +//C:\Users\popov\.continue\config.json { + "models": [ { + "title": "local ollama> yi-coder", + "provider": "ollama", + "model": "yi-coder:9b", + "apiBase": "http://localhost:11434" + } + ], "tabAutocompleteModel": { "title": "Tab Autocomplete Model", "provider": "ollama", @@ -8,6 +16,7 @@ } + // original: "tabAutocompleteModel": { // "title": "Starcoder 3b", // "provider": "ollama",