22#
33# SPDX-License-Identifier: MIT
44
5- from datetime import datetime , timedelta
6- from queue import Queue
5+ import time
76
87import speech_recognition as sr
98
109
1110class Listener :
12- def __init__ (self , energy_threshold = 1000 , phrase_timeout = 3.0 , record_timeout = 30 ):
11+ def __init__ (self , api_key , energy_threshold = 300 , record_timeout = 30 ):
1312 self .listener_handle = None
13+ self .microphone = sr .Microphone ()
1414 self .recognizer = sr .Recognizer ()
1515 self .recognizer .energy_threshold = energy_threshold
16- self .recognizer .dynamic_energy_threshold = False
17- self .recognizer .pause_threshold = 1
18- self .last_sample = bytes ()
19- self .phrase_time = datetime .utcnow ()
20- self .phrase_timeout = phrase_timeout
16+ with self .microphone as source :
17+ self .recognizer .adjust_for_ambient_noise (source ) # we only need to calibrate once, before we start listening
2118 self .record_timeout = record_timeout
22- self .phrase_complete = False
23- # Thread safe Queue for passing data from the threaded recording callback.
24- self .data_queue = Queue ()
25- self .mic_dev_index = None
19+ self .listener_handle = None
20+ self .audio = None
21+ self .api_key = api_key
2622
2723 def listen (self , ready_callback = None ):
28- self .phrase_complete = False
29- start = datetime .utcnow ()
30- self .start_listening ()
24+ self ._start_listening ()
3125 if ready_callback :
3226 ready_callback ()
3327 while (
3428 self .listener_handle
35- and not self .speech_waiting ()
36- or not self .phrase_complete
29+ and self .audio is None
3730 ):
38- if self .phrase_time and start - self .phrase_time > timedelta (
39- seconds = self .phrase_timeout
40- ):
41- self .last_sample = bytes ()
42- self .phrase_complete = True
43- self .phrase_time = start
31+ time .sleep (0.1 )
4432 self .stop_listening ()
4533
46- def start_listening (self ):
47- if not self .listener_handle :
48- with sr .Microphone () as source :
49- self .recognizer .adjust_for_ambient_noise (source )
50- self .listener_handle = self .recognizer .listen_in_background (
51- sr .Microphone (),
52- self .record_callback ,
53- phrase_time_limit = self .record_timeout ,
54- )
34+ def _save_audio_callback (self , _recognizer , audio ):
35+ self .audio = audio
36+
37+ def _start_listening (self ):
38+ self .listener_handle = self .recognizer .listen_in_background (self .microphone , self ._save_audio_callback )
5539
5640 def stop_listening (self , wait_for_stop = False ):
5741 if self .listener_handle :
@@ -61,40 +45,24 @@ def stop_listening(self, wait_for_stop=False):
6145 def is_listening (self ):
6246 return self .listener_handle is not None
6347
64- def record_callback (self , _ , audio : sr .AudioData ) -> None :
65- # Grab the raw bytes and push it into the thread safe queue.
66- data = audio .get_raw_data ()
67- self .data_queue .put (data )
68-
6948 def speech_waiting (self ):
70- return not self .data_queue .empty ()
71-
72- def get_speech (self ):
73- if self .speech_waiting ():
74- return self .data_queue .get ()
75- return None
76-
77- def get_audio_data (self ):
78- now = datetime .utcnow ()
79- if self .speech_waiting ():
80- self .phrase_complete = False
81- if self .phrase_time and now - self .phrase_time > timedelta (
82- seconds = self .phrase_timeout
83- ):
84- self .last_sample = bytes ()
85- self .phrase_complete = True
86- self .phrase_time = now
87-
88- # Concatenate our current audio data with the latest audio data.
89- while self .speech_waiting ():
90- data = self .get_speech ()
91- self .last_sample += data
49+ return self .audio is not None
9250
93- # Use AudioData to convert the raw data to wav data.
94- with sr .Microphone () as source :
95- audio_data = sr .AudioData (
96- self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
97- )
98- return audio_data
51+ def recognize (self ):
52+ if self .audio :
53+ # Transcribe the audio data to text using Whisper
54+ print ("Recognizing..." )
55+ attempts = 0
56+ while attempts < 3 :
57+ try :
58+ result = self .recognizer .recognize_whisper_api (
59+ self .audio , api_key = self .api_key
60+ )
9961
100- return None
62+ return result .strip ()
63+ except sr .RequestError as e :
64+ time .sleep (3 )
65+ attempts += 1
66+ print ("I wasn't able to understand you. Please repeat that." )
67+ return None
68+ return None
0 commit comments