edits to transcribe agent
This commit is contained in:
parent
80ff1832cd
commit
cb4e222bab
@ -22,18 +22,20 @@
|
|||||||
# if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
# main()
|
# main()
|
||||||
|
|
||||||
|
import os
|
||||||
|
from groq import Groq
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import wave
|
import wave
|
||||||
import pyautogui
|
import pyautogui
|
||||||
import requests
|
|
||||||
import keyboard
|
import keyboard
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
API_URL = "https://api.openai.com/v1/whisper"
|
API_KEY = "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" # Make sure to use your actual API key
|
||||||
API_KEY = "your_openai_api_key"
|
|
||||||
BUTTON = 'ctrl' # The button to listen for
|
BUTTON = 'ctrl' # The button to listen for
|
||||||
|
|
||||||
|
# Initialize the Groq client
|
||||||
|
client = Groq(api_key=API_KEY)
|
||||||
|
|
||||||
def record_audio(filename):
|
def record_audio(filename):
|
||||||
# Setup audio recording
|
# Setup audio recording
|
||||||
audio = pyaudio.PyAudio()
|
audio = pyaudio.PyAudio()
|
||||||
@ -42,8 +44,8 @@ def record_audio(filename):
|
|||||||
frames = []
|
frames = []
|
||||||
print("Recording...")
|
print("Recording...")
|
||||||
|
|
||||||
# Record while button is pressed
|
# Record while button or mouse is pressed
|
||||||
while keyboard.is_pressed(BUTTON):
|
while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'):
|
||||||
data = stream.read(1024)
|
data = stream.read(1024)
|
||||||
frames.append(data)
|
frames.append(data)
|
||||||
|
|
||||||
@ -61,14 +63,17 @@ def record_audio(filename):
|
|||||||
wave_file.close()
|
wave_file.close()
|
||||||
|
|
||||||
def transcribe_audio(filename):
|
def transcribe_audio(filename):
|
||||||
# Transcribe audio using Whisper API
|
# Open the audio file
|
||||||
with open(filename, 'rb') as audio_file:
|
with open(filename, "rb") as file:
|
||||||
response = requests.post(
|
# Create a transcription of the audio file
|
||||||
API_URL,
|
transcription = client.audio.transcriptions.create(
|
||||||
headers={"Authorization": f"Bearer {API_KEY}"},
|
file=(filename, file.read()), # Required audio file
|
||||||
files={"file": audio_file}
|
model="distil-whisper-large-v3-en", # Required model to use for transcription
|
||||||
|
prompt="Specify context or spelling", # Optional
|
||||||
|
response_format="json", # Optional
|
||||||
|
temperature=0.0 # Optional
|
||||||
)
|
)
|
||||||
return response.json().get('text', '')
|
return transcription['text']
|
||||||
|
|
||||||
def simulate_keypress(text):
|
def simulate_keypress(text):
|
||||||
# Simulate keypress for each character in text
|
# Simulate keypress for each character in text
|
||||||
|
Loading…
x
Reference in New Issue
Block a user