diff --git a/.gitignore b/.gitignore index 6bc38ed..707c6db 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,8 @@ agent-mobile/artimobile/supervisord.pid agent-pyter/lag-llama agent-pyter/google-chrome-stable_current_amd64.deb web/.node-persist/* +agent-mAId/output.wav +agent-mAId/build/* +agent-mAId/dist/main.exe +agent-mAId/output.wav .node-persist/storage/* \ No newline at end of file diff --git a/agent-mAId/config.json b/agent-mAId/config.json new file mode 100644 index 0000000..108c567 --- /dev/null +++ b/agent-mAId/config.json @@ -0,0 +1,9 @@ +{ + "api_key": "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", + "kb_key": "ctrl", + "mouse_btn": "left", + "model": "distil-whisper-large-v3-en", + "language":"en", // whisper-large-v3 or distil-whisper-large-v3-en + "action": "type" //type,copy + } + \ No newline at end of file diff --git a/agent-mAId/main.py b/agent-mAId/main.py new file mode 100644 index 0000000..1e1bc14 --- /dev/null +++ b/agent-mAId/main.py @@ -0,0 +1,267 @@ +import os +import sys +import pyaudio +import wave +import pyautogui +import keyboard +import mouse +import threading +from groq import Groq +import pystray +from pystray import MenuItem as item +from PIL import Image +import ctypes +import io +import time +import json5 +import wave +import pyperclip +import argparse +import atexit + +# # Load configuration from config.json +DEFAULT_CONFIG = { + "api_key": "xxx", + "kb_key": "ctrl", + "mouse_btn": "left", + "model": "distil-whisper-large-v3-en", + "language": "en", # whisper-large-v3 or distil-whisper-large-v3-en + "action": "type" # type, copy +} + +def parse_args(): + """Parse command line arguments for config file.""" + parser = argparse.ArgumentParser(description='Run the AI transcription app.') + parser.add_argument( + '--config', type=str, help='Path to config file', default=None + ) + return parser.parse_args() + +def load_config(config_path=None): + """Load the configuration file, adjusting for PyInstaller's temp path when bundled.""" + config = DEFAULT_CONFIG.copy() # Start with default configuration + + try: + if config_path is None: + # Determine if the script is running as a PyInstaller bundle + if getattr(sys, 'frozen', False): + # If running in a bundle, use the temp path where PyInstaller extracts files + config_path = os.path.join(sys._MEIPASS, 'config.json') + else: + # If running in development (normal execution), use the local directory + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + + print(f'Trying to load config from: {config_path}') + with open(config_path, 'r') as config_file: + loaded_config = json5.load(config_file) + # Update the default config with any values from config.json + config.update(loaded_config) + + except FileNotFoundError as ex: + print("Config file not found, using defaults." + ex.strerror) + raise ex + except json5.JSONDecodeError as ex: + print("Error decoding config file, using defaults." + ex.msg) + except Exception as e: + print(f"Unexpected error while loading config: {e}, using defaults.") + + return config + +# Load the config +# config = load_config() +# Parse command line arguments +args = parse_args() + +# Load the config from the specified path or default location +config = load_config(args.config) + +# Extract API key and button from the config file +API_KEY = config['api_key'] +KB_KEY = config['kb_key'] +MOUSE_BTN = config['mouse_btn'] +MODEL = config['model'] +POST_TRANSCRIBE = config['action'] + + +# Constants +AUTO_START_PATH = os.path.expanduser(r"~\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup") # For autostart + +# Initialize the Groq client +client = Groq(api_key=API_KEY) + + +def save_audio_to_disk(filename, audio_data, audio_format, channels, rate): + """Save the audio data to disk asynchronously.""" + with wave.open(filename, 'wb') as wave_file: + wave_file.setnchannels(channels) + wave_file.setsampwidth(audio_format) + wave_file.setframerate(rate) + wave_file.writeframes(audio_data) + +def record_audio(): + """Records audio when the key and mouse button is pressed, stores in memory.""" + audio = pyaudio.PyAudio() + stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024) + + frames = [] + print("Recording...") + + # Record while both keyboard and mouse button are pressed + while keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN): + data = stream.read(1024) + frames.append(data) + + recording_duration = len(frames) * 1024 / 16000 # Calculate audio duration in seconds + print(f"Recording stopped. Duration: {recording_duration:.2f} seconds.") + + stream.stop_stream() + stream.close() + audio.terminate() + + # Store the recorded audio in an in-memory stream as a valid WAV file + memory_stream = io.BytesIO() + + with wave.open(memory_stream, 'wb') as wave_file: + wave_file.setnchannels(1) + wave_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) + wave_file.setframerate(16000) + wave_file.writeframes(b''.join(frames)) + + memory_stream.seek(0) # Reset the stream position to the beginning for reading + + # Save audio to disk asynchronously as a side task (optional) + threading.Thread(target=save_audio_to_disk, args=("output.wav", b''.join(frames), audio.get_sample_size(pyaudio.paInt16), 1, 16000)).start() + + return memory_stream + +def transcribe_audio(memory_stream): + """Transcribes the recorded audio using the Groq Whisper model.""" + memory_stream.seek(0) # Reset the stream position to the beginning + + start_time = time.time() + + transcription = client.audio.transcriptions.create( + file=('audio.wav', memory_stream), + model=MODEL, + prompt="Transcribe the following audio", + language=config['language'], + response_format="json", + temperature=0.0 + ) + end_time = time.time() + + transcription_time = end_time - start_time + print(f"Transcription took: {transcription_time:.2f} seconds. Result: {transcription.text}") + log_transcription_time(transcription_time) + + return transcription.text + +def simulate_keypress(text): + """Simulates typing of transcribed text quickly.""" + pyautogui.typewrite(text, interval=0.01) # Reduce interval between characters for faster typing + # pyautogui.press('enter') + +def add_to_autostart(): + """Registers the app to autostart on login.""" + script_path = os.path.abspath(__file__) + shortcut_path = os.path.join(AUTO_START_PATH, "mAId.lnk") + + # Use ctypes to create the shortcut (this is Windows specific) + shell = ctypes.windll.shell32 + shell.ShellExecuteW(None, "runas", "cmd.exe", f'/C mklink "{shortcut_path}" "{script_path}"', None, 1) + print("App added to autostart.") + +icon = None # Global variable to store the tray icon object +def cleanup_and_exit(): + """Clean up the tray icon and exit the application.""" + global icon + if icon: + print("Stopping and removing tray icon...") + icon.stop() # Stop the tray icon to remove it from the tray + sys.exit() + +def setup_tray_icon(): + global icon + """Setup system tray icon and menu.""" + if getattr(sys, 'frozen', False): + # If running as a bundle, use the temp path where PyInstaller extracts files + icon_path = os.path.join(sys._MEIPASS, 'mic.webp') + else: + # If running in development (normal execution), use the local directory + icon_path = os.path.join(os.path.dirname(__file__), 'mic.webp') + + try: + # Load the tray icon + icon_image = Image.open(icon_path) + except FileNotFoundError: + print(f"Icon file not found at {icon_path}") + icon_image = Image.new('RGB', (64, 64), color=(255, 0, 0)) # Red icon as an example + return + + menu = ( + item('Register to Autostart', add_to_autostart), + item('Exit', lambda: quit_app(icon)) + ) + + icon = pystray.Icon("mAId", icon_image, menu=pystray.Menu(*menu)) + icon.run() + +# Ensure the tray icon is removed when the app exits +atexit.register(cleanup_and_exit) + +response_times = [] +ma_window_size = 10 # Moving average over the last 10 responses +def log_transcription_time(transcription_time): + """Logs the transcription time and updates the moving average.""" + global response_times + + # Add the transcription time to the list + response_times.append(transcription_time) + + # If the number of logged times exceeds the window size, remove the oldest entry + if len(response_times) > ma_window_size: + response_times.pop(0) + + # Calculate and print the moving average + moving_average = sum(response_times) / len(response_times) + print(f"Moving Average of Transcription Time (last {ma_window_size} responses): {moving_average:.2f} seconds.") + + +def main_loop(): + """Continuously listen for key or mouse press and transcribe audio.""" + filename = "output.wav" + + while True: + print("Waiting for key and mouse press...") + + # Wait for KB_KEY or mouse press + while not (keyboard.is_pressed(KB_KEY) and mouse.is_pressed(button=MOUSE_BTN)): + time.sleep(0.1) # Small sleep to avoid busy-waiting + + # Record audio + memory_stream = record_audio() + + # Transcribe audio + print("Transcribing audio...") + transcribed_text = transcribe_audio(memory_stream) + + if POST_TRANSCRIBE == "type": + # Simulate typing the transcribed text + print("Typing transcribed text...") + simulate_keypress(transcribed_text) + elif POST_TRANSCRIBE == "copy": + # Copy the transcribed text to clipboard + pyperclip.copy(transcribed_text) + print("Transcribed text copied to clipboard.") + + + +if __name__ == "__main__": + + # Start the tray icon in a separate thread so it doesn't block the main functionality + tray_thread = threading.Thread(target=setup_tray_icon) + tray_thread.daemon = True + tray_thread.start() + + # Run the main loop that listens for key or mouse presses in the background + main_loop() diff --git a/agent-mAId/main.spec b/agent-mAId/main.spec new file mode 100644 index 0000000..914fdae --- /dev/null +++ b/agent-mAId/main.spec @@ -0,0 +1,39 @@ +# -*- mode: python ; coding: utf-8 -*- + + +a = Analysis( + ['main.py'], + pathex=[], + binaries=[], + datas=[('config.json', '.'), ('mic.webp', '.')], + hiddenimports=[], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[], + noarchive=False, + optimize=0, +) + +pyz = PYZ(a.pure) + +exe = EXE( + pyz, + a.scripts, + a.binaries, + a.datas, + [], + name='main', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) diff --git a/agent-mAId/mic.webp b/agent-mAId/mic.webp new file mode 100644 index 0000000..1310532 Binary files /dev/null and b/agent-mAId/mic.webp differ diff --git a/agent-mAId/output.wav b/agent-mAId/output.wav new file mode 100644 index 0000000..3fe9131 Binary files /dev/null and b/agent-mAId/output.wav differ diff --git a/agent-mAId/readme.md b/agent-mAId/readme.md new file mode 100644 index 0000000..3ac5af8 --- /dev/null +++ b/agent-mAId/readme.md @@ -0,0 +1,10 @@ + +pip install pyinstaller +pyinstaller --onefile main.py + pyinstaller main.spec + + + +pipreqs . + +pip freeze > requirements.txt diff --git a/agent-mAId/requirements.txt b/agent-mAId/requirements.txt new file mode 100644 index 0000000..76eb3cc --- /dev/null +++ b/agent-mAId/requirements.txt @@ -0,0 +1,9 @@ +groq==0.11.0 +json5==0.9.25 +keyboard==0.13.5 +mouse==0.7.1 +Pillow==10.4.0 +PyAudio==0.2.14 +PyAutoGUI==0.9.54 +pyperclip==1.9.0 +pystray==0.19.5 diff --git a/agent-py-bot/agents/runner.py b/agent-py-bot/agents/runner.py index d165923..d6fb3c8 100644 --- a/agent-py-bot/agents/runner.py +++ b/agent-py-bot/agents/runner.py @@ -10,4 +10,8 @@ def execute_python_code(code_block): except Exception as e: return f"Execution error: {str(e)}" - \ No newline at end of file +def execute_trading_action(action): + # Placeholder for executing trading actions + # This could be an API call to a trading platform + print(f"Executing trading action: {action}") + diff --git a/agent-py-bot/agents/webagent.py b/agent-py-bot/agents/webagent.py index 1024e31..59c1078 100644 --- a/agent-py-bot/agents/webagent.py +++ b/agent-py-bot/agents/webagent.py @@ -46,9 +46,15 @@ def parse_rss_feed(feed_url): articles = [{'title': entry.title, 'link': entry.link} for entry in feed.entries] return articles +import yfinance as yf from selenium import webdriver from selenium.webdriver.chrome.options import Options +def fetch_stock_data(ticker, interval='1d', period='1mo'): + stock = yf.Ticker(ticker) + hist = stock.history(interval=interval, period=period) + return hist + def search_google_news(topic): options = Options() options.headless = True @@ -148,6 +154,17 @@ def get_news_api_results(query, api_key, from_param): except Exception as e: return f"API Request Error: {e}" +def search_tavily(topic, api_key): + url = f"https://app.tavily.com/api/search?q={topic}" + headers = { + "Authorization": f"Bearer {api_key}" + } + response = requests.get(url, headers=headers) + if response.status_code == 200: + return response.json() + else: + return {"error": response.text} + def search_news(topic): # DuckDuckGo Results duck_results = search_duckduckgo(topic) @@ -205,4 +222,7 @@ def summarize_data(data): def run_web_agent(topic, folder): print(f"[{datetime.now()}] Running web agent for topic: {topic}") news_data = search_news(topic) - return news_data \ No newline at end of file + tavily_api_key = "YOUR_TAVILY_API_KEY" + tavily_results = search_tavily(topic, tavily_api_key) + news_data["tavily"] = tavily_results + return news_data diff --git a/config.json b/config.json index 9990c5b..1c0308e 100644 --- a/config.json +++ b/config.json @@ -1,4 +1,12 @@ +//C:\Users\popov\.continue\config.json { + "models": [ { + "title": "local ollama> yi-coder", + "provider": "ollama", + "model": "yi-coder:9b", + "apiBase": "http://localhost:11434" + } + ], "tabAutocompleteModel": { "title": "Tab Autocomplete Model", "provider": "ollama", @@ -8,6 +16,7 @@ } + // original: "tabAutocompleteModel": { // "title": "Starcoder 3b", // "provider": "ollama", diff --git a/docker-compose.yml b/docker-compose.yml index 64b4241..6ab5833 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.4' - services: # kevinai: # image: kevinai @@ -28,6 +26,28 @@ services: WS_URL: wss://tts.d-popov.com SERVER_PORT_WS: 8081 SERVER_PORT_HTTP: 8080 + SERVER_PORT_WS: 8082 ports: - 28080:8080 - - 28081:8081 \ No newline at end of file + - 28081:8081 + chat-server: + image: node:20-alpine + container_name: ml-voice-chat-server + working_dir: /usr/src/app + volumes: + - /mnt/apps/DEV/REPOS/git.d-popov.com/ai-kevin:/usr/src/app + command: > + sh -c "npm install && node web/chat-server.js" + environment: + NODE_ENV: demo + #TTS_BACKEND_URL: https://api.tts.d-popov.com/asr + TTS_API_URL: http://192.168.0.11:9009/asr + WS_URL: wss://ws.tts.d-popov.com + SERVER_PORT_HTTP: 8080 + SERVER_PORT_WS: 8082 + ports: + - 28080:8080 + - 28081:8082 + dns: + - 8.8.8.8 + - 8.8.4.4 diff --git a/output.wav b/output.wav new file mode 100644 index 0000000..9715b30 Binary files /dev/null and b/output.wav differ diff --git a/package-lock.json b/package-lock.json index 3cd4954..a7948b7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "name": "kevin-ai", "version": "1.0.0", "dependencies": { + "@prisma/client": "^5.16.1", "axios": "^1.7.2", "body-parser": "^1.20.2", "dotenv": "^16.4.5", @@ -21,6 +22,23 @@ "ws": "^8.12.1" } }, + "node_modules/@prisma/client": { + "version": "5.16.1", + "resolved": "https://registry.npmjs.org/@prisma/client/-/client-5.16.1.tgz", + "integrity": "sha512-wM9SKQjF0qLxdnOZIVAIMKiz6Hu7vDt4FFAih85K1dk/Rr2mdahy6d3QP41K62N9O0DJJA//gUDA3Mp49xsKIg==", + "hasInstallScript": true, + "engines": { + "node": ">=16.13" + }, + "peerDependencies": { + "prisma": "*" + }, + "peerDependenciesMeta": { + "prisma": { + "optional": true + } + } + }, "node_modules/@types/node": { "version": "18.19.34", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.34.tgz", diff --git a/package.json b/package.json index 79e272c..cebd6f9 100644 --- a/package.json +++ b/package.json @@ -7,12 +7,12 @@ "start:demo": "NODE_ENV=demo node web/server.js", "start:demo-chat": "node web/chat-server.js", "start:tele": "python agent-py-bot/agent.py" - - }, + }, "env": { "NODE_ENV": "demo" - }, + }, "dependencies": { + "@prisma/client": "^5.16.1", "axios": "^1.7.2", "body-parser": "^1.20.2", "dotenv": "^16.4.5", diff --git a/vision/notes.md b/vision/notes.md new file mode 100644 index 0000000..4d311c5 --- /dev/null +++ b/vision/notes.md @@ -0,0 +1,25 @@ + Visual options : + -- OD: + - object detction /w fine tuning: Yolo V5: https://learnopencv.com/custom-object-detection-training-using-yolov5/ + +-- V-aware + - visual LLM: LLAVA : https://llava.hliu.cc/ + + -- BOTH detection and comprehention: + -Phi + https://huggingface.co/microsoft/Phi-3-vision-128k-instruct + https://github.com/microsoft/Phi-3CookBook + +- Lavva chat +https://github.com/LLaVA-VL/LLaVA-Interactive-Demo?tab=readme-ov-file +git clone https://github.com/LLaVA-VL/LLaVA-Interactive-Demo.git +conda create -n llava_int -c conda-forge -c pytorch python=3.10.8 pytorch=2.0.1 -y +conda activate llava_int +cd LLaVA-Interactive-Demo +pip install -r requirements.txt +source setup.sh + + + + +- decision making based on ENV, RL: https://github.com/OpenGenerativeAI/llm-colosseum \ No newline at end of file diff --git a/web/.env b/web/.env index 38be4ef..1233140 100644 --- a/web/.env +++ b/web/.env @@ -14,10 +14,19 @@ TTS_API_URL=https://api.tts.d-popov.com/asr LLN_MODEL=qwen2 LNN_API_URL=https://ollama.d-popov.com/api/generate -GROQ_API_KEY=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 -OPENAI_API_KEY=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 +# GROQ_API_KEY=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 +# OPENAI_API_KEY=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 -WS_URL=wss://tts.d-popov.com -PUBLIC_HOSTNAME=tts.d-popov.com -SERVER_PORT_WS=8080 -SERVER_PORT_HTTP=8080 \ No newline at end of file +# WS_URL=wss://tts.d-popov.com +# PUBLIC_HOSTNAME=tts.d-popov.com +# SERVER_PORT_WS=8080 +# SERVER_PORT_HTTP=8080 + +# This was inserted by `prisma init`: +# Environment variables declared in this file are automatically made available to Prisma. +# See the documentation for more detail: https://pris.ly/d/prisma-schema#accessing-environment-variables-from-the-schema + +# Prisma supports the native connection string format for PostgreSQL, MySQL, SQLite, SQL Server, MongoDB and CockroachDB. +# See the documentation for all the connection string options: https://pris.ly/d/connection-strings + +DATABASE_URL="postgresql://johndoe:randompassword@localhost:5432/mydb?schema=public" \ No newline at end of file diff --git a/web/.gitignore b/web/.gitignore new file mode 100644 index 0000000..11ddd8d --- /dev/null +++ b/web/.gitignore @@ -0,0 +1,3 @@ +node_modules +# Keep environment variables out of version control +.env diff --git a/web/chat-client.html b/web/chat-client.html index eaace96..659f646 100644 --- a/web/chat-client.html +++ b/web/chat-client.html @@ -1,615 +1,354 @@ - +
-