gogo2/agent-mAId/main.py

98 lines
3.0 KiB
Python

# let's create a new application which will run in the background at startup, will listen to the microphone while mouse or other (configurable) button is pressed and will send the audio to Whisper and will enter (emulate keypress) of the transcribed text result on key release
# https://console.groq.com/docs/speech-text
# import system_hooks
# import audio_recorder
# import whisper_api
# import keyboard_emulator
# def on_button_press():
# audio_recorder.start_recording()
# def on_button_release():
# audio = audio_recorder.stop_recording()
# text = whisper_api.transcribe(audio)
# keyboard_emulator.type_text(text)
# def main():
# system_hooks.set_startup_run()
# system_hooks.register_button_callback(on_button_press, on_button_release)
# system_hooks.run_event_loop()
# if __name__ == "__main__":
# main()
import os
from groq import Groq
import pyaudio
import wave
import pyautogui
import keyboard
# Constants
API_KEY = "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" # Make sure to use your actual API key
BUTTON = 'ctrl' # The button to listen for
# Initialize the Groq client
client = Groq(api_key=API_KEY)
def record_audio(filename):
# Setup audio recording
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
frames = []
print("Recording...")
# Record while button or mouse is pressed
while keyboard.is_pressed(BUTTON) or mouse.is_pressed(button='left'):
data = stream.read(1024)
frames.append(data)
print("Recording stopped.")
stream.stop_stream()
stream.close()
audio.terminate()
# Save the recorded audio
wave_file = wave.open(filename, 'wb')
wave_file.setnchannels(1)
wave_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
wave_file.setframerate(44100)
wave_file.writeframes(b''.join(frames))
wave_file.close()
def transcribe_audio(filename):
# Open the audio file
with open(filename, "rb") as file:
# Create a transcription of the audio file
transcription = client.audio.transcriptions.create(
file=(filename, file.read()), # Required audio file
model="distil-whisper-large-v3-en", # Required model to use for transcription
prompt="Specify context or spelling", # Optional
response_format="json", # Optional
temperature=0.0 # Optional
)
return transcription['text']
def simulate_keypress(text):
# Simulate keypress for each character in text
for char in text:
pyautogui.typewrite(char)
pyautogui.press('enter')
def main():
filename = "output.wav"
print("Press and hold the button to record...")
keyboard.wait(BUTTON) # Wait for button press
record_audio(filename)
print("Transcribing audio...")
transcribed_text = transcribe_audio(filename)
print("Entering text...")
simulate_keypress(transcribed_text)
if __name__ == "__main__":
main()