diff --git a/agent-mAId/main.py b/agent-mAId/main.py index 707f8b6..96dd788 100644 --- a/agent-mAId/main.py +++ b/agent-mAId/main.py @@ -22,18 +22,20 @@ # if __name__ == "__main__": # main() - +import os +from groq import Groq import pyaudio import wave import pyautogui -import requests import keyboard # Constants -API_URL = "https://api.openai.com/v1/whisper" -API_KEY = "your_openai_api_key" +API_KEY = "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" # Make sure to use your actual API key BUTTON = 'ctrl' # The button to listen for +# Initialize the Groq client +client = Groq(api_key=API_KEY) + def record_audio(filename): # Setup audio recording audio = pyaudio.PyAudio() @@ -42,8 +44,8 @@ def record_audio(filename): frames = [] print("Recording...") - # Record while button is pressed - while keyboard.is_pressed(BUTTON): + # Record while button or mouse is pressed + while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'): data = stream.read(1024) frames.append(data) @@ -61,14 +63,17 @@ def record_audio(filename): wave_file.close() def transcribe_audio(filename): - # Transcribe audio using Whisper API - with open(filename, 'rb') as audio_file: - response = requests.post( - API_URL, - headers={"Authorization": f"Bearer {API_KEY}"}, - files={"file": audio_file} + # Open the audio file + with open(filename, "rb") as file: + # Create a transcription of the audio file + transcription = client.audio.transcriptions.create( + file=(filename, file.read()), # Required audio file + model="distil-whisper-large-v3-en", # Required model to use for transcription + prompt="Specify context or spelling", # Optional + response_format="json", # Optional + temperature=0.0 # Optional ) - return response.json().get('text', '') + return transcription['text'] def simulate_keypress(text): # Simulate keypress for each character in text