config file, executable, tray icon

2024-09-10 11:45:17 +03:00
parent 400bccfeec
commit 4974af0678
6 changed files with 51 additions and 12 deletions
--- a/agent-mAId/config.json
+++ b/agent-mAId/config.json
@@ -0,0 +1,8 @@
 {
    "api_key": "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",
    "kb_key": "ctrl",
    "mouse_btn": "left",
    "model": "distil-whisper-large-v3-en", 
    "language":"en" // whisper-large-v3 or distil-whisper-large-v3-en
  }
--- a/agent-mAId/main.py
+++ b/agent-mAId/main.py
@@ -12,25 +12,39 @@ from pystray import MenuItem as item
 from PIL import Image
 import ctypes
 import time
 import json5
 # Load configuration from config.json
 def load_config():
    config_path = os.path.join(os.path.dirname(__file__), 'config.json')
    with open(config_path, 'r') as config_file:
        return json5.load(config_file)
 # Load the config
 config = load_config()
 # Extract API key and button from the config file
 API_KEY = config['api_key']
 KB_KEY = config['kb_key']
 MOUSE_BTN = config['mouse_btn']
 MODEL = config['model']
 # Constants
 API_KEY = "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"  # Make sure to use your actual API key
 BUTTON = 'ctrl'  # The keyboard button to listen for
 AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup")  # For autostart
 # Initialize the Groq client
 client = Groq(api_key=API_KEY)
 def record_audio(filename):
-    """Records audio when BUTTON or mouse button is pressed."""
+    """Records audio when key and mouse button is pressed."""
    audio = pyaudio.PyAudio()
-    stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
+    stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
    frames = []
    print("Recording...")
    # Record while button or mouse is pressed
-    while keyboard.is_pressed(BUTTON) and mouse.is_pressed(button='left'):
+    while keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN):
        data = stream.read(1024)
        frames.append(data)
@@ -52,8 +66,9 @@ def transcribe_audio(filename):
    with open(filename, "rb") as file:
        transcription = client.audio.transcriptions.create(
            file=(filename, file.read()),  
-            model="distil-whisper-large-v3-en",  
+            model=MODEL, #"distil-whisper-large-v3-en",  
            prompt="Specify context or spelling",  
            language=config['language'],
            response_format="json",  
            temperature=0.0  
        )
@@ -67,7 +82,7 @@ def simulate_keypress(text):
 def add_to_autostart():
    """Registers the app to autostart on login."""
    script_path = os.path.abspath(__file__)
-    shortcut_path = os.path.join(AUTO_START_PATH, "MyApp.lnk")
+    shortcut_path = os.path.join(AUTO_START_PATH, "mAId.lnk")
    # Use ctypes to create the shortcut (this is Windows specific)
    shell = ctypes.windll.shell32
@@ -79,27 +94,31 @@ def quit_app(icon):
    icon.stop()
    sys.exit()
 def setup_tray_icon():
    """Setup system tray icon and menu."""
-    icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0))  # Red icon as an example
+    #icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0))  # Red icon as an example
    icon_image = Image.open('mic.webp')
    menu = (
        item('Register to Autostart', add_to_autostart),
        item('Exit', lambda: quit_app(icon))
    )
-    icon = pystray.Icon("my_app", icon_image, menu=pystray.Menu(*menu))
+    icon = pystray.Icon("mAId", icon_image, menu=pystray.Menu(*menu))
    icon.run()
 def main_loop():
    """Continuously listen for key or mouse press and transcribe audio."""
    filename = "output.wav"
    while True:
-        print("Waiting for button or mouse press...")
+        print("Waiting for key and mouse press...")
-        # Wait for button or mouse press
+        # Wait for KB_KEY or mouse press
-        while not (keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left')):
+        while not (keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN)):
            time.sleep(0.1)  # Small sleep to avoid busy-waiting
        # Record audio
--- a/agent-mAId/mic.webp
+++ b/agent-mAId/mic.webp
--- a/agent-mAId/output.wav
+++ b/agent-mAId/output.wav
--- a/agent-mAId/readme.md
+++ b/agent-mAId/readme.md
@@ -0,0 +1,3 @@
 # to install as an app:
 pip install pyinstaller
 pyinstaller --onefile main.py
--- a/config.json
+++ b/config.json
@@ -1,4 +1,12 @@
 //C:\Users\popov\.continue\config.json
 {
    "models": [    {
        "title": "local ollama> yi-coder",
        "provider": "ollama",
        "model": "yi-coder:9b",
        "apiBase": "http://localhost:11434"
      }
    ],
    "tabAutocompleteModel": {
        "title": "Tab Autocomplete Model",
        "provider": "ollama",
@@ -8,6 +16,7 @@
 }
 // original:   "tabAutocompleteModel": {
 //     "title": "Starcoder 3b",
 //     "provider": "ollama",