edits to transcribe agent

This commit is contained in:
Dobromir Popov 2024-09-10 02:36:40 +03:00
parent 80ff1832cd
commit cb4e222bab

View File

@ -22,18 +22,20 @@
# if __name__ == "__main__": # if __name__ == "__main__":
# main() # main()
import os
from groq import Groq
import pyaudio import pyaudio
import wave import wave
import pyautogui import pyautogui
import requests
import keyboard import keyboard
# Constants # Constants
API_URL = "https://api.openai.com/v1/whisper" API_KEY = "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" # Make sure to use your actual API key
API_KEY = "your_openai_api_key"
BUTTON = 'ctrl' # The button to listen for BUTTON = 'ctrl' # The button to listen for
# Initialize the Groq client
client = Groq(api_key=API_KEY)
def record_audio(filename): def record_audio(filename):
# Setup audio recording # Setup audio recording
audio = pyaudio.PyAudio() audio = pyaudio.PyAudio()
@ -42,8 +44,8 @@ def record_audio(filename):
frames = [] frames = []
print("Recording...") print("Recording...")
# Record while button is pressed # Record while button or mouse is pressed
while keyboard.is_pressed(BUTTON): while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'):
data = stream.read(1024) data = stream.read(1024)
frames.append(data) frames.append(data)
@ -61,14 +63,17 @@ def record_audio(filename):
wave_file.close() wave_file.close()
def transcribe_audio(filename): def transcribe_audio(filename):
# Transcribe audio using Whisper API # Open the audio file
with open(filename, 'rb') as audio_file: with open(filename, "rb") as file:
response = requests.post( # Create a transcription of the audio file
API_URL, transcription = client.audio.transcriptions.create(
headers={"Authorization": f"Bearer {API_KEY}"}, file=(filename, file.read()), # Required audio file
files={"file": audio_file} model="distil-whisper-large-v3-en", # Required model to use for transcription
prompt="Specify context or spelling", # Optional
response_format="json", # Optional
temperature=0.0 # Optional
) )
return response.json().get('text', '') return transcription['text']
def simulate_keypress(text): def simulate_keypress(text):
# Simulate keypress for each character in text # Simulate keypress for each character in text