edits to transcribe agent

This commit is contained in:
Dobromir Popov 2024-09-10 02:36:40 +03:00
parent 80ff1832cd
commit cb4e222bab

View File

@ -22,18 +22,20 @@
# if __name__ == "__main__":
# main()
import os
from groq import Groq
import pyaudio
import wave
import pyautogui
import requests
import keyboard
# Constants
API_URL = "https://api.openai.com/v1/whisper"
API_KEY = "your_openai_api_key"
API_KEY = "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" # Make sure to use your actual API key
BUTTON = 'ctrl' # The button to listen for
# Initialize the Groq client
client = Groq(api_key=API_KEY)
def record_audio(filename):
# Setup audio recording
audio = pyaudio.PyAudio()
@ -42,8 +44,8 @@ def record_audio(filename):
frames = []
print("Recording...")
# Record while button is pressed
while keyboard.is_pressed(BUTTON):
# Record while button or mouse is pressed
while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'):
data = stream.read(1024)
frames.append(data)
@ -61,14 +63,17 @@ def record_audio(filename):
wave_file.close()
def transcribe_audio(filename):
# Transcribe audio using Whisper API
with open(filename, 'rb') as audio_file:
response = requests.post(
API_URL,
headers={"Authorization": f"Bearer {API_KEY}"},
files={"file": audio_file}
# Open the audio file
with open(filename, "rb") as file:
# Create a transcription of the audio file
transcription = client.audio.transcriptions.create(
file=(filename, file.read()), # Required audio file
model="distil-whisper-large-v3-en", # Required model to use for transcription
prompt="Specify context or spelling", # Optional
response_format="json", # Optional
temperature=0.0 # Optional
)
return response.json().get('text', '')
return transcription['text']
def simulate_keypress(text):
# Simulate keypress for each character in text