Newbie on this forum so be kind! I might just be blind to the issue at this point but I can't for the life of me figure out why I can't interrupt my speaking AI when it's rambling. I've tried a bunch of different approaches and now I'm finally at threading but even though playback is in another thread, execution will not allow callbacks from listen_in_background. How would I go about being able to tell "Jarvis" to stop talking mid sentence?
The foundation is instructions from this video: https://www[.]youtube[.]com/watch?v=6zAk0KHmiGw
This is what I've got currently:
from os import system
import speech_recognition as sr
from playsound import playsound
from gpt4all import GPT4All
import whisper
import time
import os
import pyttsx3
import importlib
import threading
wake_word = "jarvis"
model = GPT4All("nous-hermes-llama2-13b.Q4_0.gguf", allow_download=False)
r = sr.Recognizer()
tiny_model = whisper.load_model("tiny")
base_model = whisper.load_model("base")
listening_for_wake_word = True
stop_talking = False
source = sr.Microphone()
def speak_thread(text):
importlib.reload(pyttsx3)
engine = pyttsx3.init()
engine.say(text)
engine.runAndWait()
def speak(text):
global stop_talking
talk_thread = threading.Thread(daemon=True, target=speak_thread, name="talking", args=(text,)).start()
while any("talking" in item.name for item in threading.enumerate()):
if stop_talking:
talk_thread.stop()
time.sleep(1)
def listen_for_wake_word(audio):
global listening_for_wake_word
with open("wake_detect.wav", "wb") as f:
f.write(audio.get_wav_data())
result = tiny_model.transcribe("wake_detect.wav")
text_input = result["text"]
if wake_word in text_input.lower().strip():
print("Wake word detected. Please speak your prompt to GPT4All.")
speak("Listening")
listening_for_wake_word = False
def prompt_gpt(audio):
global listening_for_wake_word
try:
with open("prompt.wav", "wb") as f:
f.write(audio.get_wav_data())
result = base_model.transcribe("prompt.wav")
prompt_text = result["text"]
if len(prompt_text .strip()) == 0:
print("I didn't catch that. Please repeat.")
listening_for_wake_word = True
else:
print("User: " + prompt_text)
output = base_model.generate(prompt_text, max_tokens=500)
print("GPT4All: ", output)
speak(output)
print("\nSay", wake_word, "to wake me up. \n")
listening_for_wake_word = True
except Exception as e:
print("Prompt error: ", e)
def callback (recognizer, audio):
global listening_for_wake_word
global stop_talking
print("Heard something")
try:
with open("temp.wav", "wb") as f:
f.write(audio.get_wav_data())
result = tiny_model.transcribe("wake_detect.wav")
text_input = result["text"]
if "stop" in text_input.lower().strip():
print("Stopping playback...")
stop_talking = True
return
except Exception as e:
print("Prompt error in stop part: ", e)
if listening_for_wake_word:
listen_for_wake_word(audio)
else:
prompt_gpt(audio)
def start_listening():
with source as s:
r.adjust_for_ambient_noise(s, duration=2)
print("\nSay", wake_word, "to wake me up. \n")
r.listen_in_background(source, callback) # Why does this not send further callbacks until after audio playback?
while True: # just to keep alive
time.sleep(1)
print(threading.enumerate())
threading.get_ident()
if __name__ == "__main__":
start_listening()
Initially "runAndWait()" seemed to be the issue but now I feel like maybe "listen_in_background" can't get a word in edgeways while any execution is ongoing. Is that was is happening and if so, is there a workaround? I want to be able to interrupt Jarvis mid sentence with a voice command to make it feel more interactive.
Hope any of you can help!