Skip to content

Commit 0a896d8

Browse files
authored
Merge pull request #3205 from FoamyGuy/moonshine_rpi_voicecontrol
Adding rpi moonshine voice control project
2 parents 6dcd885 + 80c0b45 commit 0a896d8

File tree

1 file changed

+195
-0
lines changed

1 file changed

+195
-0
lines changed
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
# SPDX-FileCopyrightText: 2026 Tim Cocks for Adafruit Industries
2+
#
3+
# SPDX-License-Identifier: MIT
4+
"""
5+
Adapted from Moonshine my-dalek example:
6+
https://github.com/moonshine-ai/moonshine/blob/main/examples/raspberry-pi/my-dalek/my-dalek.py
7+
"""
8+
import argparse
9+
import sys
10+
import time
11+
import board
12+
import neopixel
13+
from moonshine_voice import (
14+
MicTranscriber,
15+
TranscriptEventListener,
16+
IntentRecognizer,
17+
get_model_for_language,
18+
get_embedding_model,
19+
)
20+
from adafruit_led_animation.animation.colorcycle import ColorCycle
21+
22+
# pylint: disable=global-statement
23+
24+
# NeoPixels setup
25+
PIXEL_PIN = board.D26 # pin that the NeoPixel is connected to
26+
pixels = neopixel.NeoPixel(PIXEL_PIN, 30, brightness=0.1)
27+
28+
# CLI args setup
29+
parser = argparse.ArgumentParser(
30+
description="Control NeoPixels using your voice on a Raspberry Pi"
31+
)
32+
parser.add_argument(
33+
"--model-arch",
34+
type=int,
35+
default=None,
36+
help="Model architecture to use for transcription",
37+
)
38+
parser.add_argument(
39+
"--embedding-model",
40+
type=str,
41+
default="embeddinggemma-300m",
42+
help="Embedding model name (default: embeddinggemma-300m)",
43+
)
44+
parser.add_argument(
45+
"--threshold",
46+
type=float,
47+
default=0.6,
48+
help="Similarity threshold for intent matching (default: 0.6)",
49+
)
50+
args = parser.parse_args()
51+
52+
53+
class TranscriptPrinter(TranscriptEventListener):
54+
"""Listener that prints transcript updates to the terminal."""
55+
56+
def __init__(self):
57+
self.last_line_text_length = 0
58+
59+
def update_last_terminal_line(self, new_text: str):
60+
print(f"\r{new_text}", end="", flush=True)
61+
if len(new_text) < self.last_line_text_length:
62+
diff = self.last_line_text_length - len(new_text)
63+
print(f"{' ' * diff}", end="", flush=True)
64+
self.last_line_text_length = len(new_text)
65+
66+
def on_line_started(self, event): # pylint: disable=unused-argument
67+
self.last_line_text_length = 0
68+
69+
def on_line_text_changed(self, event):
70+
self.update_last_terminal_line(f"{event.line.text}")
71+
72+
def on_line_completed(self, event):
73+
self.update_last_terminal_line(f"{event.line.text}")
74+
print() # New line after completion
75+
76+
77+
# Load the transcription model
78+
print("Loading transcription model...", file=sys.stderr)
79+
model_path, model_arch = get_model_for_language("en", args.model_arch)
80+
81+
# Download and load the embedding model for intent recognition
82+
quantization = "q4"
83+
print(
84+
f"Loading embedding model ({args.embedding_model}, variant={quantization})...",
85+
file=sys.stderr,
86+
)
87+
embedding_model_path, embedding_model_arch = get_embedding_model(
88+
args.embedding_model, quantization
89+
)
90+
91+
# Create the intent recognizer (implements TranscriptEventListener)
92+
print(f"Creating intent recognizer (threshold={args.threshold})...", file=sys.stderr)
93+
intent_recognizer = IntentRecognizer(
94+
model_path=embedding_model_path,
95+
model_arch=embedding_model_arch,
96+
model_variant=quantization,
97+
threshold=args.threshold,
98+
)
99+
100+
colors = [
101+
("red", (255, 0, 0)),
102+
("blue", (0, 0, 255)),
103+
("green", (0, 255, 0)),
104+
("yellow", (255, 255, 0)),
105+
("orange", (255, 95, 0)),
106+
("pink", (255, 0, 255)),
107+
("purple", (90, 0, 255)),
108+
("turquoise", (0, 255, 255)),
109+
("off", (0, 0, 0)),
110+
("black", (0, 0, 0)),
111+
]
112+
113+
# Disco Party animation setup
114+
disco_party = ColorCycle(pixels, speed=0.35, colors=[_[1] for _ in colors[:8]])
115+
run_disco_animation = False
116+
117+
118+
def build_lights_color_callback_function(input_data):
119+
"""
120+
Given a tuple with color name, and RGB values like:
121+
("red", (255, 0, 0))
122+
Create and return an intent trigger callback function
123+
that turns the NeoPixels the specified color.
124+
"""
125+
126+
def lights_color_callback(trigger: str, utterance: str, similarity: float):
127+
print("###########################")
128+
print(f"# {trigger} - {utterance} - {similarity}")
129+
print(f"# Turning lights {input_data[0]}")
130+
print("###########################")
131+
global run_disco_animation
132+
run_disco_animation = False
133+
pixels.fill(input_data[1])
134+
pixels.show()
135+
136+
return lights_color_callback
137+
138+
139+
def on_disco_party(trigger: str, utterance: str, similarity: float):
140+
"""
141+
Intent trigger listener callback function for Disco Party command.
142+
Enables the disco party animation boolean.
143+
"""
144+
print("###########################")
145+
print(f"# {trigger} - {utterance} - {similarity}")
146+
print("# Disco Party!")
147+
print("###########################")
148+
global run_disco_animation
149+
run_disco_animation = True
150+
151+
152+
# Register intents with their trigger phrases and handlers
153+
intents = {
154+
"disco party": on_disco_party,
155+
}
156+
# Add intents for all color commands
157+
for color in colors:
158+
intents[f"lights {color[0]}"] = build_lights_color_callback_function(color)
159+
intents[f"{color[0]} lights"] = build_lights_color_callback_function(color)
160+
161+
for intent, handler in intents.items():
162+
intent_recognizer.register_intent(intent, handler)
163+
print(f"Registered {intent_recognizer.intent_count} intents", file=sys.stderr)
164+
165+
# Initialize transcriber
166+
transcriber = MicTranscriber(model_path=model_path, model_arch=model_arch)
167+
168+
# Add both the transcript printer and intent recognizer as listeners
169+
# The intent recognizer will process completed lines and trigger handlers
170+
transcript_printer = TranscriptPrinter()
171+
transcriber.add_listener(transcript_printer)
172+
transcriber.add_listener(intent_recognizer)
173+
174+
print("\n" + "=" * 60, file=sys.stderr)
175+
print("🎤 Listening for voice commands...", file=sys.stderr)
176+
print("Try saying phrases with the same meaning as these actions:", file=sys.stderr)
177+
for intent in intents.keys(): # pylint: disable=consider-iterating-dictionary
178+
print(f" - '{intent}'", file=sys.stderr)
179+
print("=" * 60, file=sys.stderr)
180+
print("Press Ctrl+C to stop.\n", file=sys.stderr)
181+
182+
transcriber.start()
183+
try:
184+
# Loop forever, listening for voice commands,
185+
# and showing NeoPixel animation when appropriate.
186+
while True:
187+
if run_disco_animation:
188+
disco_party.animate()
189+
time.sleep(0.01)
190+
except KeyboardInterrupt:
191+
print("\n\nStopping...", file=sys.stderr)
192+
finally:
193+
intent_recognizer.close()
194+
transcriber.stop()
195+
transcriber.close()

0 commit comments

Comments
 (0)