Added: STT

This commit is contained in:
2026-01-29 02:03:23 +01:00
parent f901481d6a
commit efb1eab82f
3 changed files with 95 additions and 0 deletions

75
stt/main.py Normal file
View File

@@ -0,0 +1,75 @@
import sys
import os
from groq import Groq
import speech_recognition as sr
def recognize(recognizer: sr.Recognizer, microphone: sr.Microphone) -> dict:
if not isinstance(recognizer, sr.Recognizer):
raise TypeError("`recognizer` must be `Recognizer` instance")
if not isinstance(microphone, sr.Microphone):
raise TypeError("`microphone` must be `Microphone` instance")
with microphone as source:
# recognizer.adjust_for_ambient_noise(source)
audio = recognizer.listen(source)
response = {"success": True, "error": None, "transcription": None}
try:
response["transcription"] = recognizer.recognize_google(audio)
except sr.RequestError:
response["success"] = False
response["error"] = "API unavailable"
except sr.UnknownValueError:
response["success"] = False
response["error"] = "Unable to recognize speech"
return response
def chat(client: Groq, message: str, model: str = "llama-3.3-70b-versatile") -> str:
request = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You're an ai girlfriend, answer short and nice.",
},
{
"role": "user",
"content": message,
},
],
model=model,
)
return request.choices
def setup(device_index=1):
client = Groq(api_key="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")
recognizer = sr.Recognizer()
microphone = sr.Microphone(device_index=device_index)
return client, recognizer, microphone
def main() -> None:
client, *devices = setup(1)
try:
while 1:
res = recognize(*devices)
if not res["success"]:
print(res["error"])
continue
print("User:", res["transcription"])
res = chat(client, res["transcription"])
print("AI:", res[0].message.content)
except KeyboardInterrupt:
sys.exit()
if __name__ == "__main__":
main()

19
stt/requirements.txt Normal file
View File

@@ -0,0 +1,19 @@
annotated-types==0.7.0
anyio==4.12.1
audioop-lts==0.2.2
certifi==2026.1.4
distro==1.9.0
groq==1.0.0
h11==0.16.0
httpcore==1.0.9
httpx==0.28.1
idna==3.11
PyAudio==0.2.14
pydantic==2.12.5
pydantic_core==2.41.5
sniffio==1.3.1
SpeechRecognition==3.14.5
standard-aifc==3.13.0
standard-chunk==3.13.0
typing-inspection==0.4.2
typing_extensions==4.15.0