edits to transcribe agent

2024-09-10 02:36:40 +03:00
parent 80ff1832cd
commit cb4e222bab
1 changed files with 18 additions and 13 deletions
--- a/agent-mAId/main.py
+++ b/agent-mAId/main.py
@@ -22,18 +22,20 @@
 # if __name__ == "__main__":
 #     main()
-
+import os
 from groq import Groq
 import pyaudio
 import wave
 import pyautogui
 import requests
 import keyboard
 # Constants
-API_URL = "https://api.openai.com/v1/whisper"
+API_KEY = "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"  # Make sure to use your actual API key
 API_KEY = "your_openai_api_key"
 BUTTON = 'ctrl'  # The button to listen for
 # Initialize the Groq client
 client = Groq(api_key=API_KEY)
 def record_audio(filename):
    # Setup audio recording
    audio = pyaudio.PyAudio()
@@ -42,8 +44,8 @@ def record_audio(filename):
    frames = []
    print("Recording...")
-    # Record while button is pressed
+    # Record while button or mouse is pressed
-    while keyboard.is_pressed(BUTTON):
+    while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'):
        data = stream.read(1024)
        frames.append(data)
@@ -61,14 +63,17 @@ def record_audio(filename):
    wave_file.close()
 def transcribe_audio(filename):
-    # Transcribe audio using Whisper API
+    # Open the audio file
-    with open(filename, 'rb') as audio_file:
+    with open(filename, "rb") as file:
-        response = requests.post(
+        # Create a transcription of the audio file
-            API_URL,
+        transcription = client.audio.transcriptions.create(
-            headers={"Authorization": f"Bearer {API_KEY}"},
+            file=(filename, file.read()),  # Required audio file
-            files={"file": audio_file}
+            model="distil-whisper-large-v3-en",  # Required model to use for transcription
            prompt="Specify context or spelling",  # Optional
            response_format="json",  # Optional
            temperature=0.0  # Optional
        )
-    return response.json().get('text', '')
+    return transcription['text']
 def simulate_keypress(text):
    # Simulate keypress for each character in text