66
77import argparse
88import os
9- import sys
109
1110from datetime import datetime , timedelta
1211from queue import Queue
13- from time import sleep
12+ import time
13+ import random
1414from tempfile import NamedTemporaryFile
1515
1616import speech_recognition as sr
1717import openai
1818
19- # Add your OpenAI API key here
20- openai .api_key = "sk-..."
19+ import board
20+ import digitalio
21+ from adafruit_motorkit import MotorKit
22+
23+ openai .api_key = "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"
2124SYSTEM_ROLE = (
2225 "You are a helpful voice assistant that answers questions and gives information"
2326)
24-
25- def speak (text ):
26- subprocess .run (["espeak-ng" , text , "&" ], check = False )
27-
27+ CHATGPT_MODEL = "gpt-3.5-turbo"
28+ WHISPER_MODEL = "whisper-1"
29+ ARM_MOVEMENT_TIME = 0.5
30+ BASE_MOUTH_DURATION = 0.2 # A higher number means slower mouth movement
31+ SPEECH_VARIANCE = 0.03 # A higher number means more variance in the mouth movement
32+ RECORD_TIMEOUT = 30
33+
34+ # Setup Motors
35+ kit = MotorKit (i2c = board .I2C ())
36+ arms_motor = kit .motor1
37+ mouth_motor = kit .motor2
38+
39+ # Setup Foot Button
40+ foot_button = digitalio .DigitalInOut (board .D16 )
41+ foot_button .direction = digitalio .Direction .INPUT
42+ foot_button .pull = digitalio .Pull .UP
2843
2944def sendchat (prompt ):
3045 completion = openai .ChatCompletion .create (
31- model = "gpt-3.5-turbo" ,
46+ model = CHATGPT_MODEL ,
3247 messages = [
3348 {"role" : "system" , "content" : SYSTEM_ROLE },
3449 {"role" : "user" , "content" : prompt },
@@ -37,84 +52,80 @@ def sendchat(prompt):
3752 # Send the heard text to ChatGPT and return the result
3853 return completion .choices [0 ].message .content
3954
55+ def move_arms_motor (dir_up = True , speed = 1.0 ):
56+ direction = 1 if dir_up else - 1
57+ arms_motor .throttle = speed * direction
58+ time .sleep (ARM_MOVEMENT_TIME )
59+ arms_motor .throttle = 0
60+
61+ def move_mouth_motor (dir_open = True , duration = 0.5 , speed = 1.0 ):
62+ direction = 1 if dir_open else - 1
63+ mouth_motor .throttle = speed * direction
64+ time .sleep (duration )
65+ mouth_motor .throttle = 0
66+
67+ def move_mouth ():
68+ move_mouth_motor (dir_open = True , duration = random_mouth_duration ())
69+ move_mouth_motor (dir_open = False , duration = random_mouth_duration ())
70+
71+ def random_mouth_duration ():
72+ return BASE_MOUTH_DURATION + random .random () * SPEECH_VARIANCE - (SPEECH_VARIANCE / 2 )
73+
74+ def move_arms (hide = True ):
75+ move_arms_motor (dir_up = not hide )
76+
77+ def speak (text ):
78+ # while the subprocess is still running, move the mouth
79+ with subprocess .Popen (["espeak-ng" , text , "&" ]) as proc :
80+ while proc .poll () is None :
81+ move_mouth ()
4082
4183def transcribe (wav_data ):
4284 # Read the transcription.
4385 print ("Transcribing..." )
44- with NamedTemporaryFile (suffix = ".wav" ) as temp_file :
45- result = openai .Audio .translate_raw ("whisper-1" , wav_data , temp_file .name )
46- return result ["text" ].strip ()
47-
86+ speak ("Let me think about that" )
87+ move_arms (hide = True )
88+ attempts = 0
89+ while attempts < 3 :
90+ try :
91+ with NamedTemporaryFile (suffix = ".wav" ) as temp_file :
92+ result = openai .Audio .translate_raw (WHISPER_MODEL , wav_data , temp_file .name )
93+ return result ["text" ].strip ()
94+ except (
95+ openai .error .ServiceUnavailableError ,
96+ openai .error .APIError
97+ ):
98+ time .sleep (3 )
99+ attempts += 1
100+ return "I wasn't able to understand you. Please repeat that."
48101
49102class Listener :
50103 def __init__ (
51- self , default_microphone , record_timeout , energy_threshold , phrase_timeout
104+ self , energy_threshold , phrase_timeout
52105 ):
53106 self .listener_handle = None
54- self .recorder = sr .Recognizer ()
55- self .record_timeout = record_timeout
56- self .recorder .energy_threshold = energy_threshold
57- self .recorder .dynamic_energy_threshold = False
58- self .recorder .pause_threshold = 1
59- self .source = None
107+ self .recognizer = sr .Recognizer ()
108+ self .recognizer .energy_threshold = energy_threshold
109+ self .recognizer .dynamic_energy_threshold = False
110+ self .recognizer .pause_threshold = 1
60111 self .last_sample = bytes ()
61112 self .phrase_time = datetime .utcnow ()
62113 self .phrase_timeout = phrase_timeout
63114 self .phrase_complete = False
64- self .default_microphone = default_microphone
65115 # Thread safe Queue for passing data from the threaded recording callback.
66116 self .data_queue = Queue ()
67- self .source = self ._get_microphone ()
68-
69- def _get_microphone (self ):
70- if self .source :
71- return self .source
72- mic_name = self .default_microphone
73- source = None
74- if not mic_name or mic_name == "list" :
75- print ("Available microphone devices are: " )
76- for index , name in enumerate (sr .Microphone .list_microphone_names ()):
77- print (f'Microphone with name "{ name } " found' )
78- sys .exit ()
79- else :
80- for index , name in enumerate (sr .Microphone .list_microphone_names ()):
81- if mic_name in name :
82- print (f'Microphone with name "{ name } " at index "{ index } " found' )
83- source = sr .Microphone (sample_rate = 16000 , device_index = index )
84- break
85- if not source :
86- print (f'Microphone with name "{ mic_name } " not found' )
87- sys .exit ()
88-
89- with source :
90- self .recorder .adjust_for_ambient_noise (source )
91-
92- return source
117+ self .mic_dev_index = None
93118
94119 def listen (self ):
95120 if not self .listener_handle :
96- with self ._get_microphone () as source :
97- audio = self .recorder .listen (source )
121+ with sr .Microphone () as source :
122+ print (source .stream )
123+ self .recognizer .adjust_for_ambient_noise (source )
124+ audio = self .recognizer .listen (source , timeout = RECORD_TIMEOUT )
98125 data = audio .get_raw_data ()
99126 self .data_queue .put (data )
100127
101- def start (self ):
102- if not self .listener_handle :
103- self .listener_handle = self .recorder .listen_in_background (
104- self ._get_microphone (),
105- self .record_callback ,
106- phrase_time_limit = self .record_timeout ,
107- )
108-
109- def stop (self , wait_for_stop : bool = False ):
110- self .listener_handle (wait_for_stop = wait_for_stop )
111- self .listener_handle = None
112-
113128 def record_callback (self , _ , audio : sr .AudioData ) -> None :
114- """
115- Threaded callback function to recieve audio data when recordings finish.
116- audio: An AudioData containing the recorded bytes.
117- """
118129 # Grab the raw bytes and push it into the thread safe queue.
119130 data = audio .get_raw_data ()
120131 self .data_queue .put (data )
@@ -143,14 +154,14 @@ def get_audio_data(self):
143154 data = self .get_speech ()
144155 self .last_sample += data
145156
146- source = self ._get_microphone ()
147-
148157 # Use AudioData to convert the raw data to wav data.
149- return sr .AudioData (
150- self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
151- )
152- return None
158+ with sr .Microphone () as source :
159+ audio_data = sr .AudioData (
160+ self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
161+ )
162+ return audio_data
153163
164+ return None
154165
155166def main ():
156167 parser = argparse .ArgumentParser ()
@@ -160,44 +171,32 @@ def main():
160171 help = "Energy level for mic to detect." ,
161172 type = int ,
162173 )
163- parser .add_argument (
164- "--record_timeout" ,
165- default = 2 ,
166- help = "How real time the recording is in seconds." ,
167- type = float ,
168- )
169174 parser .add_argument (
170175 "--phrase_timeout" ,
171176 default = 3 ,
172177 help = "How much empty space between recordings before we "
173178 "consider it a new line in the transcription." ,
174179 type = float ,
175180 )
176- parser .add_argument (
177- "--default_microphone" ,
178- default = "pulse" ,
179- help = "Default microphone name for SpeechRecognition. "
180- "Run this with 'list' to view available Microphones." ,
181- type = str ,
182- )
181+
183182 args = parser .parse_args ()
184183
185184 listener = Listener (
186- args .default_microphone ,
187- args .record_timeout ,
188185 args .energy_threshold ,
189186 args .phrase_timeout ,
190187 )
191188
192189 transcription = ["" ]
193190
194- print ("How may I help you?" )
195- speak ("How may I help you?" )
196-
197191 while True :
198192 try :
193+ # If button is pressed, start listening
194+ if not foot_button .value :
195+ print ("How may I help you?" )
196+ speak ("How may I help you?" )
197+ listener .listen ()
198+
199199 # Pull raw recorded audio from the queue.
200- listener .listen ()
201200 if listener .speech_waiting ():
202201 audio_data = listener .get_audio_data ()
203202 text = transcribe (audio_data .get_wav_data ())
@@ -209,6 +208,7 @@ def main():
209208 chat_response = sendchat (text )
210209 transcription .append (f"> { chat_response } " )
211210 print ("Got response from ChatGPT. Beginning speech synthesis." )
211+ move_arms (hide = False )
212212 speak (chat_response )
213213 print ("Done speaking." )
214214 else :
@@ -219,12 +219,10 @@ def main():
219219 for line in transcription :
220220 print (line )
221221 print ("" , end = "" , flush = True )
222- sleep (0.25 )
223- except (AssertionError , AttributeError ):
224- pass
222+ time .sleep (0.25 )
225223 except KeyboardInterrupt :
226224 break
227-
225+ move_arms ( hide = False )
228226 print ("\n \n Transcription:" )
229227 for line in transcription :
230228 print (line )
0 commit comments