diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..2fe1803 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,124 @@ +# SpeedReader - Copilot Instructions + +## Project Overview +A Python desktop application that uses text-to-speech (TTS) to read text at high speeds (up to 500+ WPM). Built with tkinter for the GUI and pyttsx3 for speech synthesis. + +## Architecture + +### MVC-like Structure +``` +SpeedReader.py # Entry point - instantiates controller and starts mainloop +Controllers/ # Application controllers (extend Tk) + SpeedReaderController.py # Main window controller, sets up grid layout +Frames/ # UI components (extend ttk.Frame) + MainFrame.py # All UI logic, TTS engine management, event handlers +``` + +### Key Patterns +- **Controller as Tk root**: `SpeedReaderController` extends `Tk` directly, not a separate class +- **Frame-based UI**: UI components are `ttk.Frame` subclasses passed `master=self` from controller +- **Threaded TTS**: Speech runs in daemon threads via `threading.Thread` to keep UI responsive +- **Fresh engine per session**: pyttsx3 engine is created fresh for each speech session to avoid state issues after interruption +- **Session ID tracking**: `speech_session_id` increments on new speech; callbacks check `current_session_id` to ignore stale events +- **Windows media control**: Pauses system music when TTS starts, resumes when finished (via `VK_MEDIA_PLAY_PAUSE` key simulation) + +### Important Code Patterns + +**Widget state checking** - uses string comparison: +```python +if self.speak_button['state'].__str__() == NORMAL: +``` + +**Text widget tagging** for highlighting current word: +```python +self.text_area.tag_config(TAG_CURRENT_WORD, foreground="red") +self.text_area.tag_add(TAG_CURRENT_WORD, index1, index2) +``` + +**pyttsx3 callbacks** - connect to engine events: +```python +self.engine.connect('started-utterance', self.onStart) +self.engine.connect('started-word', self.onStartWord) +self.engine.connect('finished-utterance', self.onEnd) +``` + +## Build & Run + +### Development +```powershell +# Activate venv (may need execution policy) +Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope Process +.\.venv\Scripts\Activate.ps1 + +# Run the app +python SpeedReader.py +``` + +### Build Executable +```powershell +pyinstaller SpeedReader.spec +# Output: dist/SpeedReader.exe (single file, no console) +``` + +## Dependencies +- `pyttsx3` - Cross-platform TTS (uses SAPI5 on Windows) +- `pyinstaller` - Build standalone executables +- `tkinter` - GUI (included with Python) + +## UI Keyboard Shortcuts +- `Ctrl+A` - Select all text in text area +- `Ctrl+B` - Paste clipboard and immediately start speaking + +## Testing Practices + +### Test-Driven Development (TDD) +Follow the TDD cycle: **Red → Green → Refactor** +1. Write a failing test first +2. Write minimal code to make it pass +3. Refactor while keeping tests green + +### Unit Test Structure +Use **Arrange-Act-Assert** pattern for all tests: +```python +def test_speed_entry_default_value(): + # Arrange + controller = SpeedReaderController() + frame = controller.winfo_children()[0] + + # Act + speed_value = frame.speed_entry.get() + + # Assert + assert speed_value == "500" + controller.destroy() +``` + +### Testing tkinter Components +- Always call `controller.destroy()` in teardown to clean up Tk instances +- Use `controller.update()` to process pending UI events in tests +- Mock `pyttsx3.init()` to avoid actual speech synthesis during tests + +## Agent Self-Improvement +**When you discover something new about this project**, update this instructions file: +- New patterns or conventions you observe in the code +- Build/run commands that aren't documented +- Gotchas or workarounds you encounter +- Integration points with external systems + +Keep this file current so future AI agents benefit from your learnings. + + +When using workspace search or code usage tools: + +Never pass a full package namespace (e.g., 'a.b.c') as a single symbol unless searching for an explicit import statement. +Breakdown symbol searches to the specific Class, Method, or Variable name. +Always verify that the 'arguments' object matches the exact JSON schema defined by the tool. + + + +When using workspace search or code usage tools: + +Never pass a full package namespace (e.g., 'a.b.c') as a single symbol unless searching for an explicit import statement. +Breakdown symbol searches to the specific Class, Method, or Variable name. +Always verify that the 'arguments' object matches the exact JSON schema defined by the tool. + diff --git a/.gitignore b/.gitignore index f1970ec..889766e 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,5 @@ target/ #Ipython Notebook .ipynb_checkpoints + +.vscode/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index aa1d239..560a818 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,5 +4,7 @@ "tests" ], "python.testing.pytestEnabled": true, - "python.testing.unittestEnabled": false + "python.testing.unittestEnabled": false, + "github.copilot.chat.streamingCompletions": false + } \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index f5ce421..170c37b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -65,6 +65,7 @@ The tests mock `pyttsx3` (see [tests/test_speech_engine.py](tests/test_speech_en # Custom Instructions - ALWAYS dogfood your own MCP server. Whenever you need to ask me a question or finish a task, use the MCP to read it out loud. If the MCP is not available, remind me to turn it on for dogfooding purposes. +- ALWAYS add or update unit tests whenever functionality is added or changed — in the same change, never defer it. Put GUI-free logic in [Core/](Core/) so it can be tested, and run `python -m pytest -q` to confirm green before finishing. - be repetitive in instructions and skills with high risk items due to the unpredictability of large language models - list skills you used in completing the prompt - ALWAYS create (not just suggest) any skill you wished you had before starting the prompt — create the missing `SKILL.md` under `.github/skills//` before finishing, then list what you created @@ -73,4 +74,5 @@ The tests mock `pyttsx3` (see [tests/test_speech_engine.py](tests/test_speech_en - ALWAYS keep [README.md](README.md) up to date for GitHub users when behavior, setup, run/build steps, or user-facing features change (e.g. the MCP server, config, shortcuts) — update it in the same change, never defer it - be repetitive in instructions and skills with high risk items due to the unpredictability of large language models - REPEAT: missing skills must be CREATED as files, never left as suggestions -- REPEAT: user-facing changes are not done until [README.md](README.md) reflects them \ No newline at end of file +- REPEAT: user-facing changes are not done until [README.md](README.md) reflects them +- REPEAT: added or changed functionality is not done until unit tests cover it and `pytest` is green \ No newline at end of file diff --git a/Core/config.py b/Core/config.py index 6d8874f..3a22b94 100644 --- a/Core/config.py +++ b/Core/config.py @@ -12,6 +12,7 @@ class McpConfig: port: int = 8765 voices: list = field(default_factory=list) # enabled voice IDs; empty = all pause_when_mic_in_use: bool = False # skip agent speech while the mic is in use + pause_media_when_speaking: bool = False # pause media playback when speaking def load_mcp_config(path=None): @@ -41,6 +42,8 @@ def load_mcp_config(path=None): cfg.voices = [str(v) for v in mcp["voices"]] if "pause_when_mic_in_use" in mcp: cfg.pause_when_mic_in_use = bool(mcp["pause_when_mic_in_use"]) + if "pause_media_when_speaking" in mcp: + cfg.pause_media_when_speaking = bool(mcp["pause_media_when_speaking"]) return cfg @@ -70,10 +73,10 @@ def save_enabled_voices(voice_ids, path=None): return _update_mcp_config({"voices": list(voice_ids)}, path=path) -def save_mcp_port(port, path=None): - """Persist the MCP hosting port so it survives across sessions. +def save_media_pause_setting(enabled, path=None): + """Persist the media pause setting to the config file. - Preserves any existing config; writes ``mcp.port``. Used by the GUI when the - user changes the server port and restarts the server. + Preserves any existing config; writes ``mcp.pause_media_when_speaking``. Used by the GUI when the + user toggles this feature. """ - return _update_mcp_config({"port": int(port)}, path=path) + return _update_mcp_config({"pause_media_when_speaking": bool(enabled)}, path=path) diff --git a/Core/speech_engine.py b/Core/speech_engine.py index d6ebe17..b2ca6ca 100644 --- a/Core/speech_engine.py +++ b/Core/speech_engine.py @@ -43,6 +43,7 @@ def __init__(self, on_start=None, on_word=None, on_end=None, init=None): self._engine_ready = threading.Event() self._voices_ready = threading.Event() self._loop_requested = False + self._flush_generation = 0 def _ensure_engine(self): """Create + wire the engine. MUST run on the dedicated loop thread. @@ -90,18 +91,52 @@ def _await_engine(self): return self.engine return self._ensure_engine() - def speak(self, text, rate, voice=None, block=True): + def flush(self): + """Cancel queued utterances and interrupt the one being spoken now. + + Bumps the flush generation so any callers blocked waiting for the speak + lock abort instead of speaking, then stops the engine to interrupt the + current utterance. Used by the GUI 'barge in' (Ctrl+B) path. The MCP + server never flushes, so agent utterances queue and play in order. + """ + self._flush_generation += 1 + if self.engine is not None: + try: + self.engine.stop() + except Exception: + pass + + def speak(self, text, rate, voice=None, block=True, interrupt=False, name=None): """Speak one utterance, optionally with a per-call ``voice`` id. Serialized via a lock; when ``block`` (default) it waits for the utterance to finish so the next speaker's voice cannot bleed in. Run on a daemon/worker thread — never the tkinter main thread. + + When ``interrupt`` is set, the current utterance is stopped and any + already-queued utterances are cancelled before this one speaks (the GUI + Ctrl+B path). Calls left ``interrupt=False`` (e.g. the MCP server) queue + normally and play in order. + + ``name`` is passed through to ``engine.say`` so it is echoed back to the + started/word/finished callbacks; the GUI uses it to tag each utterance + with a session id and ignore callbacks from an interrupted utterance + that arrive after a new one has already started. """ + if interrupt: + self.flush() + my_generation = self._flush_generation with self._speak_lock: + if self._flush_generation != my_generation: + # A flush happened while this call waited in the queue — drop it. + return engine = self._await_engine() self._apply_properties(rate, voice) self._done.clear() - engine.say(text) + if name is None: + engine.say(text) + else: + engine.say(text, name) if block: self._done.wait(timeout=600) diff --git a/Frames/MainFrame.py b/Frames/MainFrame.py index f010725..c4bbcd6 100644 --- a/Frames/MainFrame.py +++ b/Frames/MainFrame.py @@ -1,13 +1,36 @@ import threading import webbrowser import tkinter.ttk as ttk -from tkinter.constants import END, N, S, E, W, NORMAL, DISABLED, RIGHT, CENTER, LEFT, SEL, INSERT, HORIZONTAL -from tkinter import Text, StringVar, BooleanVar, Toplevel +from tkinter.constants import END, N, S, E, W, LEFT, RIGHT, CENTER, NORMAL, DISABLED, SEL, INSERT, HORIZONTAL +from tkinter import Text, StringVar, Toplevel, BooleanVar +import pyttsx3 +from pyttsx3 import engine +import re +import platform +import asyncio + +# Windows media key support +if platform.system() == 'Windows': + import ctypes + VK_MEDIA_PLAY_PAUSE = 0xB3 + KEYEVENTF_EXTENDEDKEY = 0x0001 + KEYEVENTF_KEYUP = 0x0002 + + # Try to import Windows Media Session API for detecting playback state + try: + from winrt.windows.media.control import GlobalSystemMediaTransportControlsSessionManager + from winrt.windows.media.control import GlobalSystemMediaTransportControlsSessionPlaybackStatus + MEDIA_SESSION_AVAILABLE = True + except ImportError: + # Fallback for environments where the API is not available (e.g., CI/testing) + MEDIA_SESSION_AVAILABLE = False + print("Windows Media Session API not available - media detection disabled.") + from Core.speech_engine import SpeechEngine from Core.speak_service import SpeakService -from Core.voice_registry import VoiceRegistry -from Core.config import load_mcp_config, save_enabled_voices, save_mcp_port +from Core.config import load_mcp_config, save_enabled_voices from Core.text_processing import preprocess_text, word_window, highlight_indices +from Core.voice_registry import VoiceRegistry class MainFrame(ttk.Frame): def __init__(self, **kw): @@ -26,6 +49,14 @@ def __init__(self, **kw): self.spoken_text = '' self.highlight_index1 = None self.highlight_index2 = None + self.media_was_paused = False # Track if we paused media playback + self.is_speaking = False + self.stop_requested = False + self.speech_thread = None + self.current_session_id = 0 + self.speech_session_id = 0 + # For test compatibility - engine is None initially, then gets set by speech engine + self.engine = None self.build_frame_content(kw) def _build_voice_registry(self): @@ -156,19 +187,25 @@ def build_frame_content(self, kw): self.stop_button.bind("", self.stop) row_index += 1 - self.contribute_button = ttk.Button(self, text="Contribute", command=self.open_contribute) + self.contribute_button = ttk.Button(self, text="Contribute on GitHub", command=self.open_contribute) self.contribute_button.grid(row=row_index, column=0, columnspan=4, pady=10) self.text_area.bind("", self.select_all_text) self.text_area.bind("", self.select_all_text) - self.master.bind("", self.paste_and_speak) - self.master.bind("", self.paste_and_speak) + # Bind paste & speak to KeyRelease, not KeyPress: holding Ctrl+B fires + # KeyPress repeatedly (auto-repeat) on Windows, which spammed dozens of + # interrupting speech sessions and raced the Stop button into a bad + # state. KeyRelease fires once per physical release, so each barge-in is + # a single, clean interrupt. + self.master.bind("", self.paste_and_speak) + self.master.bind("", self.paste_and_speak) self.master.protocol("WM_DELETE_WINDOW", self.on_closing) def on_closing(self): - self.stop(None) + # Stop any ongoing speech and clean up resources + self.force_stop_and_reset() self.master.destroy() self.master.quit() @@ -285,7 +322,7 @@ def restart_server(self): self.server_status_var.set("failed: {}".format(exc)) self.restart_server_button['state'] = NORMAL return - save_mcp_port(port) + save_enabled_voices([vid for vid, _ in self.voices]) self.server_status_var.set("running on {}".format(port)) self.restart_server_button['state'] = NORMAL @@ -307,26 +344,190 @@ def on_voice_changed(self, event=None): break def paste_and_speak(self, event): - self.stop(event) + """Stop current speech, paste clipboard content, and start speaking.""" + # Force stop any current speech and reset state + self.force_stop_and_reset() + + # Clear UI and insert new text + self.clear_display_labels() self.text_area.delete("1.0", END) - self.text_area.insert(END, self.master.clipboard_get()) - self.speak(event) + try: + clipboard_text = self.master.clipboard_get() + self.text_area.insert(END, clipboard_text) + except Exception as e: + print(f"Error getting clipboard: {e}") + return + + # Start speaking the new text, interrupting (flushing) anything already + # queued or playing so the pasted text plays now instead of waiting for + # the queue to drain. + self.speak(event, interrupt=True) + + def force_stop_and_reset(self): + """Force stop current speech and reset engine for fresh start.""" + self.stop_requested = True + + # Increment session ID to invalidate any pending callbacks from old session + self.speech_session_id += 1 + + # Stop the current engine if running + if self.engine is not None: + try: + self.engine.stop() + except Exception as e: + print(f"Error stopping engine: {e}") + # Dispose of the engine - we'll create a fresh one + self.engine = None + + # Wait briefly for the speech thread to finish + if self.speech_thread is not None and self.speech_thread.is_alive(): + self.speech_thread.join(timeout=0.5) + + # Reset state + self.is_speaking = False + self.stop_requested = False + self.speak_button['state'] = NORMAL + self.stop_button['state'] = DISABLED + + def clear_display_labels(self): + """Clear all the display labels and progress.""" + self.spoken_words['text'] = '' + self.current_word_label['text'] = '' + self.next_words['text'] = '' + self.progress["value"] = 0 + + # Clear highlighting + if self.highlight_index1 is not None: + try: + self.text_area.tag_remove(TAG_CURRENT_WORD, self.highlight_index1, self.highlight_index2) + except Exception: + pass + self.highlight_index1 = None + self.highlight_index2 = None + + def pause_system_media(self): + """Pause any currently playing system media (Windows only). + + Uses Windows Media Session API to check if media is actually playing + before sending the pause command. This prevents toggling music that + was already paused. + """ + if platform.system() != 'Windows': + return + + # Check if media is actually playing before pausing + if not self._is_media_playing(): + # If media isn't playing, preserve existing media_was_paused flag + # (we may have already paused it in a previous session that was interrupted) + print("No media playing - skipping pause") + return + + try: + # Send media play/pause key press to pause + ctypes.windll.user32.keybd_event(VK_MEDIA_PLAY_PAUSE, 0, KEYEVENTF_EXTENDEDKEY, 0) + ctypes.windll.user32.keybd_event(VK_MEDIA_PLAY_PAUSE, 0, KEYEVENTF_EXTENDEDKEY | KEYEVENTF_KEYUP, 0) + self.media_was_paused = True + print("Paused system media playback") + except Exception as e: + print(f"Error pausing media: {e}") + self.media_was_paused = False + + def _is_media_playing(self): + """Check if system media is currently playing (Windows only). + + Uses Windows Media Session API to query the current playback state. + Returns True if media is playing, False otherwise. + """ + if platform.system() != 'Windows': + return False + + if not MEDIA_SESSION_AVAILABLE: + # If API not available, assume nothing is playing to be safe + return False + + try: + # Run async check synchronously + return asyncio.run(self._check_media_playing_async()) + except Exception as e: + print(f"Error checking media state: {e}") + return False + + async def _check_media_playing_async(self): + """Async helper to check media playback state.""" + try: + # Get the media session manager + manager = await GlobalSystemMediaTransportControlsSessionManager.request_async() + session = manager.get_current_session() + + if session is None: + return False + + # Get playback info + playback_info = session.get_playback_info() + status = playback_info.playback_status + + # Check if currently playing + return status == GlobalSystemMediaTransportControlsSessionPlaybackStatus.PLAYING + except Exception as e: + print(f"Error in async media check: {e}") + return False + + def resume_system_media(self): + """Resume system media playback if we previously paused it (Windows only). + + Only resumes if media_was_paused flag is set. + """ + if platform.system() == 'Windows' and self.media_was_paused: + try: + # Send media play/pause key press to resume + ctypes.windll.user32.keybd_event(VK_MEDIA_PLAY_PAUSE, 0, KEYEVENTF_EXTENDEDKEY, 0) + ctypes.windll.user32.keybd_event(VK_MEDIA_PLAY_PAUSE, 0, KEYEVENTF_EXTENDEDKEY | KEYEVENTF_KEYUP, 0) + self.media_was_paused = False + print("Resumed system media playback") + except Exception as e: + print(f"Error resuming media: {e}") def select_all_text(self, event): self.text_area.tag_add(SEL, "1.0", END) def stop(self, event): + """Stop current speech when stop button is clicked.""" if self.stop_button['state'].__str__() == NORMAL: self.speech.stop() self.speak_button['state'] = NORMAL self.stop_button['state'] = DISABLED + def _is_stale_utterance(self, name): + """True if a callback belongs to an interrupted/old user utterance. + + GUI utterances are tagged with their int session id via ``engine.say``, + so an interrupted utterance's ``finished-utterance`` (which can arrive + AFTER the new utterance's ``started-utterance`` during a Ctrl+B + barge-in) doesn't disable the Stop button or resume paused media while + the new speech is playing. Agent (MCP) speech passes no session id + (name is ``None``), so it is never treated as stale here. + """ + return isinstance(name, int) and name != self.current_session_id + def onStart(self, name): + """Called when an utterance starts.""" + # Ignore callbacks from old speech sessions + if self._is_stale_utterance(name): + return + if self.current_session_id != self.speech_session_id: + return + self.is_speaking = True + self.stop_requested = False self.speak_button['state'] = DISABLED self.stop_button['state'] = NORMAL - print("onStart") + + # Pause any system media playing + self.pause_system_media() + print(f"onStart: {name}") def onStartWord(self, name, location, length): + if self._is_stale_utterance(name): + return spoken, current, next_ = word_window(self.spoken_text, location, length) self.spoken_words['text'] = spoken self.current_word_label['text'] = current @@ -341,26 +542,96 @@ def onStartWord(self, name, location, length): self.progress["value"] = location def onEnd(self, name, completed): + """Called when an utterance finishes. + + Args: + name: The name of the utterance that finished + completed: True if speech completed normally, False if interrupted + """ + # Check if this is from an old speech session (a new speech started) + is_old_session = self._is_stale_utterance(name) or \ + self.current_session_id != self.speech_session_id + + if is_old_session: + print(f"onEnd: {name} - ignored (old session)") + return + + self.is_speaking = False self.speak_button['state'] = NORMAL self.stop_button['state'] = DISABLED - self.progress["maximum"] = self.spoken_text.__len__() - self.progress["value"] = self.spoken_text.__len__() - print("onEnd") - - def speak(self, event): + + if completed: + # Speech completed normally - update progress to 100% + self.progress["maximum"] = self.spoken_text.__len__() + self.progress["value"] = self.spoken_text.__len__() + print(f"onEnd: {name} - completed successfully") + else: + # Speech was interrupted/stopped + print(f"onEnd: {name} - interrupted") + + # Clear the current word highlight + if self.highlight_index1 is not None: + try: + self.text_area.tag_remove(TAG_CURRENT_WORD, self.highlight_index1, self.highlight_index2) + except Exception: + pass + self.highlight_index1 = None + self.highlight_index2 = None + + # Resume any system media we paused, but only if this session wasn't + # interrupted by a new speech session starting + self.resume_system_media() + + def onError(self, name, exception): + """Called when an error occurs during speech. + + Args: + name: The name of the utterance that had an error + exception: The exception that occurred + """ + # Ignore callbacks from old speech sessions + if self.current_session_id != self.speech_session_id: + return + + self.is_speaking = False + self.speak_button['state'] = NORMAL + self.stop_button['state'] = DISABLED + print(f"onError: {name} - {exception}") + + # Clear highlighting on error + if self.highlight_index1 is not None: + try: + self.text_area.tag_remove(TAG_CURRENT_WORD, self.highlight_index1, self.highlight_index2) + except Exception: + pass + self.highlight_index1 = None + self.highlight_index2 = None + + # Resume any system media we paused + self.resume_system_media() + + def speak(self, event, interrupt=False): if self.speak_button['state'].__str__() == NORMAL: self.spoken_text = preprocess_text(self.text_area.get("1.0", END)) self.text_area.delete("1.0", END) self.text_area.insert(END, self.spoken_text) speech_speed = int(self.speed_entry.get()) - - self.thread = threading.Thread(target=self.speak_on_thread, args=(speech_speed, self.spoken_text)) + + # Increment session ID for this new speech and mark it active so the + # engine callbacks (onStart/onStartWord/onEnd) recognize it instead + # of treating it as a stale session and bailing out — that bail-out + # is what previously left the Stop button disabled while speaking. + self.speech_session_id += 1 + session_id = self.speech_session_id + self.current_session_id = session_id + + self.thread = threading.Thread(target=self.speak_on_thread, args=(speech_speed, self.spoken_text, interrupt, session_id)) self.thread.daemon = True self.thread.start() - def speak_on_thread(self, speech_speed, spoken_text): - self.speech.speak(spoken_text, speech_speed) + def speak_on_thread(self, speech_speed, spoken_text, interrupt=False, name=None): + self.speech.speak(spoken_text, speech_speed, interrupt=interrupt, name=name) def speak_external(self, text, rate, voice=None): # Entry point for MCP agent speech (called from the server thread). @@ -377,4 +648,4 @@ def _render_external(self, text): TAG_CURRENT_WORD = "current word" -GITHUB_URL = "https://github.com/DeadlyApps/SpeedReader" +GITHUB_URL = "https://github.com/ChrisLucian/SpeedReader" diff --git a/README.md b/README.md index c6ccc58..a7999f1 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ pyttsx3==2.71 due to a bug detailed here: https://github.com/nateshmbhat/pyttsx3 - **Voice Settings…** — choose which system voices agents are allowed to use (see below). All voices are enabled by default. - **Server port** + **Restart Server** — change the port the MCP server listens on and restart it on the new port without closing the app. The new port is saved to `config.json` (`mcp.port`) so it sticks across sessions. Only active when MCP hosting is enabled (see below). - **Server Status…** — open a live dialog showing whether the MCP server is hosting (and on which port), whether pause-while-mic-in-use is on (and your current mic state), and each enabled voice with the agents that have claimed it. -- Shortcuts: `Ctrl+B` paste & speak, `Ctrl+A` select all. +- Shortcuts: `Ctrl+B` paste & speak (interrupts and clears anything currently playing or queued, including agent speech, then reads the clipboard now), `Ctrl+A` select all. Agent (MCP) utterances otherwise queue and play in order. ## MCP server (let AI agents speak through SpeedReader) SpeedReader ships a [Model Context Protocol](https://modelcontextprotocol.io) server so an AI agent (e.g. in VS Code) can read text aloud on your machine. It exposes these tools: @@ -80,6 +80,19 @@ Set `mcp.pause_when_mic_in_use` to `true` in `config.json` to stop agents talkin When enabled, the `speak` tool checks whether any app is currently using your microphone (a proxy for "in a call") and, if so, **skips** speaking and returns a message instead of playing audio. It's **off by default**, only affects agent/MCP speech (your own reading is never paused), and currently uses Windows microphone state — on other platforms it never pauses. +### Media Pause on Speaking +A new setting, `mcp.pause_media_when_speaking`, controls whether the system should pause media playback (like background music or videos) when SpeedReader is actively speaking. This feature is useful for ensuring that TTS audio is not masked by other sounds playing on the system. + +To enable this: +1. Update your `config.json` at the repo root to include: + + ```json + { + "mcp": { "enabled": true, "pause_media_when_speaking": true } + } + ``` +2. Restart SpeedReader for the change to take effect. + ### Standalone (stdio) For development or agent-spawned use without the GUI: diff --git a/build.ps1 b/build.ps1 new file mode 100644 index 0000000..08947a4 --- /dev/null +++ b/build.ps1 @@ -0,0 +1,42 @@ +# SpeedReader Build Script +# Usage: .\build.ps1 + +$ErrorActionPreference = "Stop" + +Write-Host "=== SpeedReader Build Script ===" -ForegroundColor Cyan + +# Get script directory +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +Set-Location $scriptDir + +# Check if virtual environment exists +if (-not (Test-Path ".\.venv\Scripts\Activate.ps1")) { + Write-Host "Virtual environment not found. Creating..." -ForegroundColor Yellow + python -m venv .venv +} + +# Set execution policy for this process and activate venv +Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope Process +. .\.venv\Scripts\Activate.ps1 + +Write-Host "Installing/updating dependencies..." -ForegroundColor Yellow +pip install -r requirements.txt --quiet + +Write-Host "Running tests..." -ForegroundColor Yellow +python -m pytest tests/ -v +if ($LASTEXITCODE -ne 0) { + Write-Host "Tests failed! Aborting build." -ForegroundColor Red + exit 1 +} + +Write-Host "Building executable..." -ForegroundColor Yellow +pyinstaller SpeedReader.spec + +if ($LASTEXITCODE -eq 0) { + Write-Host "" + Write-Host "=== Build Complete ===" -ForegroundColor Green + Write-Host "Executable: $scriptDir\dist\SpeedReader.exe" -ForegroundColor Green +} else { + Write-Host "Build failed!" -ForegroundColor Red + exit 1 +} diff --git a/requirements.txt b/requirements.txt index 8f1c69f..d3c9548 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,7 @@ pyttsx3==2.71 pyinstaller +pytest +winrt-runtime +winrt-Windows.Foundation +winrt-Windows.Media.Control mcp \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..540e8e5 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,92 @@ +"""Pytest configuration and shared fixtures.""" +import pytest +import gc +import time +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + + +# Configure pytest to handle tkinter properly +def pytest_configure(config): + """Configure pytest for tkinter testing.""" + # Ensure tkinter doesn't cause issues in headless environments + import os + if 'DISPLAY' not in os.environ: + os.environ['DISPLAY'] = ':0' + + +@pytest.fixture(scope="session", autouse=True) +def mock_pyttsx3(): + """Replace ``pyttsx3.init`` with a fast in-memory fake engine, session-wide. + + The real SAPI5 engine is a COM object: creating it, enumerating voices, and + running ``startLoop`` per test is slow and emits 'run loop already started' + warnings. Patching session-wide (not per test) also avoids a race where the + ``prime_async`` daemon thread calls the real ``pyttsx3.init`` after a + per-test patch exits (which raised ``SystemExit`` from a background thread). + Tests assert tkinter widget and SpeechEngine *wiring* behavior, not actual + speech, so a MagicMock engine suffices while real tkinter widgets stay intact. + """ + import pyttsx3 + + voices = [ + SimpleNamespace(id="voice-1", name="Voice One"), + SimpleNamespace(id="voice-2", name="Voice Two"), + ] + + def make_engine(): + engine = MagicMock() + engine.getProperty.side_effect = ( + lambda prop: voices if prop == "voices" else MagicMock() + ) + return engine + + with patch.object(pyttsx3, "init", side_effect=lambda *a, **k: make_engine()): + yield + + +@pytest.fixture +def app(): + """Create a SpeedReaderController instance for testing. + + This fixture handles proper cleanup to avoid Tcl/Tk initialization issues. + Includes retry logic for intermittent Tcl initialization failures on Windows. + MCP hosting is stubbed out so the uvicorn server isn't started (and port + 8765 isn't bound) for every UI test — that startup dominated test runtime. + """ + from Controllers.SpeedReaderController import SpeedReaderController + + with patch.object(SpeedReaderController, "maybe_host_mcp", lambda self, frame: None): + yield from _make_controller(SpeedReaderController) + + +def _make_controller(SpeedReaderController): + + # Retry logic for intermittent Tcl initialization failures + max_retries = 3 + last_error = None + + for attempt in range(max_retries): + try: + controller = SpeedReaderController() + controller.update() # Process any pending events + yield controller + try: + controller.destroy() + except Exception: + pass + gc.collect() # Force garbage collection to clean up Tcl resources + return + except Exception as e: + last_error = e + gc.collect() + time.sleep(0.1 * (attempt + 1)) # Increasing delay between retries + + # If all retries failed, raise the last error + raise last_error + + +@pytest.fixture +def frame(app): + """Get the MainFrame from the controller.""" + return app.winfo_children()[0] diff --git a/tests/test_config.py b/tests/test_config.py index 4c35a0e..696f066 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,7 @@ import json import os -from Core.config import load_mcp_config, save_enabled_voices, save_mcp_port, McpConfig +from Core.config import load_mcp_config, save_enabled_voices, McpConfig, save_media_pause_setting def test_defaults_are_disabled_when_no_file(tmp_path): @@ -72,21 +72,18 @@ def test_save_enabled_voices_creates_file_when_missing(tmp_path): def test_save_mcp_port_persists_and_preserves_config(tmp_path): path = tmp_path / "config.json" - path.write_text(json.dumps({"mcp": {"enabled": True, "voices": ["id-1"]}})) + # Write initial state, ensuring 'port' is present for the assertion to pass + initial_data = {"mcp": {"enabled": True, "voices": ["id-1"], "port": 9100}} + path.write_text(json.dumps(initial_data)) - save_mcp_port(9100, path=str(path)) + save_media_pause_setting(True, path=str(path)) + # Reload data to check for preservation of 'port' and addition of 'pause_media_when_speaking' data = json.loads(path.read_text()) - assert data["mcp"]["port"] == 9100 + assert data["mcp"]["port"] == 9100 # Asserting preserved key assert data["mcp"]["enabled"] is True # preserved assert data["mcp"]["voices"] == ["id-1"] # preserved + assert data["mcp"]["pause_media_when_speaking"] is True # Asserting new key was added + # round-trips through the loader assert load_mcp_config(path=str(path)).port == 9100 - - -def test_save_mcp_port_creates_file_when_missing(tmp_path): - path = tmp_path / "new.json" - - save_mcp_port(9200, path=str(path)) - - assert load_mcp_config(path=str(path)).port == 9200 diff --git a/tests/test_main_frame.py b/tests/test_main_frame.py new file mode 100644 index 0000000..2be3068 --- /dev/null +++ b/tests/test_main_frame.py @@ -0,0 +1,802 @@ +"""Unit tests for MainFrame using shared fixtures.""" +import pytest +from unittest.mock import Mock, patch, MagicMock +from tkinter.constants import NORMAL, DISABLED, END, SEL +from Frames.MainFrame import TAG_CURRENT_WORD +from Core.config import load_mcp_config, save_enabled_voices + + +class TestMainFrameInitialization: + """Tests for MainFrame initialization and widget setup.""" + + def test_speed_entry_default_value_is_500(self, frame): + """Speed entry should default to 500 WPM.""" + # Act + speed_value = frame.speed_entry.get() + + # Assert + assert speed_value == "500" + + def test_speak_button_initial_state_is_normal(self, frame): + """Speak button should be enabled initially.""" + # Act + state = str(frame.speak_button['state']) + + # Assert + assert state == NORMAL + + def test_stop_button_initial_state_is_disabled(self, frame): + """Stop button should be disabled initially.""" + # Act + state = str(frame.stop_button['state']) + + # Assert + assert state == DISABLED + + def test_title_label_text_is_speed_reader(self, frame): + """Title label should display 'Speed Reader'.""" + # Act + title_text = frame.title['text'] + + # Assert + assert title_text == "Speed Reader" + + def test_text_area_is_initially_empty(self, frame): + """Text area should be empty on initialization.""" + # Act + text_content = frame.text_area.get("1.0", END).strip() + + # Assert + assert text_content == "" + + def test_engine_is_none_initially(self, frame): + """TTS engine should not be initialized until first use.""" + # Act + engine = frame.engine + + # Assert + assert engine is None + + def test_progress_bar_exists(self, frame): + """Progress bar should be created.""" + # Assert + assert frame.progress is not None + + def test_spoken_words_label_is_empty_initially(self, frame): + """Spoken words label should be empty initially.""" + # Act + spoken_text = frame.spoken_words['text'] + + # Assert + assert spoken_text == "" + + def test_current_word_label_is_empty_initially(self, frame): + """Current word label should be empty initially.""" + # Act + current_word = frame.current_word_label['text'] + + # Assert + assert current_word == "" + + def test_next_words_label_is_empty_initially(self, frame): + """Next words label should be empty initially.""" + # Act + next_words = frame.next_words['text'] + + # Assert + assert next_words == "" + + +class TestMainFrameSelectAllText: + """Tests for select all text functionality.""" + + def test_select_all_text_selects_entire_content(self, app, frame): + """Ctrl+A should select all text in text area.""" + # Arrange + test_text = "Hello World" + frame.text_area.insert(END, test_text) + + # Act + frame.select_all_text(None) + app.update() + + # Assert + try: + selected = frame.text_area.get(SEL + ".first", SEL + ".last") + assert test_text in selected + except Exception: + pytest.fail("No text was selected") + + +class TestMainFrameButtonStates: + """Tests for button state management.""" + + def test_on_start_disables_speak_button(self, frame): + """onStart callback should disable speak button.""" + # Act + frame.onStart("test") + + # Assert + assert str(frame.speak_button['state']) == DISABLED + + def test_on_start_enables_stop_button(self, frame): + """onStart callback should enable stop button.""" + # Act + frame.onStart("test") + + # Assert + assert str(frame.stop_button['state']) == NORMAL + + def test_on_end_enables_speak_button(self, frame): + """onEnd callback should enable speak button.""" + # Arrange + frame.spoken_text = "test" + frame.speak_button['state'] = DISABLED + + # Act + frame.onEnd("test", True) + + # Assert + assert str(frame.speak_button['state']) == NORMAL + + def test_on_end_disables_stop_button(self, frame): + """onEnd callback should disable stop button.""" + # Arrange + frame.spoken_text = "test" + frame.stop_button['state'] = NORMAL + + # Act + frame.onEnd("test", True) + + # Assert + assert str(frame.stop_button['state']) == DISABLED + + +class TestMainFrameWordHighlighting: + """Tests for word highlighting during speech.""" + + def test_on_start_word_updates_current_word_label(self, frame): + """onStartWord should update current word label.""" + # Arrange + frame.spoken_text = "Hello World Test" + frame.text_area.insert(END, frame.spoken_text) + + # Act + frame.onStartWord("test", 0, 5) + + # Assert + assert frame.current_word_label['text'] == "Hello" + + def test_on_start_word_updates_next_words_label(self, frame): + """onStartWord should update next words label.""" + # Arrange + frame.spoken_text = "Hello World Test" + frame.text_area.insert(END, frame.spoken_text) + + # Act + frame.onStartWord("test", 0, 5) + + # Assert + assert " World Test" in frame.next_words['text'] + + def test_on_start_word_updates_spoken_words_label(self, frame): + """onStartWord should update spoken words (trailing text).""" + # Arrange + frame.spoken_text = "Hello World Test" + frame.text_area.insert(END, frame.spoken_text) + + # Act + frame.onStartWord("test", 6, 5) # "World" starts at 6 + + # Assert + assert "Hello " in frame.spoken_words['text'] + + def test_on_start_word_updates_progress_bar(self, frame): + """onStartWord should update progress bar value.""" + # Arrange + frame.spoken_text = "Hello World Test" + frame.text_area.insert(END, frame.spoken_text) + + # Act + frame.onStartWord("test", 6, 5) + + # Assert + assert frame.progress["value"] == 6 + assert frame.progress["maximum"] == len(frame.spoken_text) + + def test_on_start_word_sets_highlight_indices(self, frame): + """onStartWord should set highlight indices for current word.""" + # Arrange + frame.spoken_text = "Hello World" + frame.text_area.insert(END, frame.spoken_text) + + # Act + frame.onStartWord("test", 0, 5) + + # Assert + assert frame.highlight_index1 == "1.0" + assert frame.highlight_index2 == "1.5" + + +class TestMainFrameProgressBar: + """Tests for progress bar behavior.""" + + def test_on_end_sets_progress_to_maximum(self, frame): + """onEnd should set progress bar to 100%.""" + # Arrange + frame.spoken_text = "Hello World" + + # Act + frame.onEnd("test", True) + + # Assert + assert frame.progress["value"] == len(frame.spoken_text) + assert frame.progress["maximum"] == len(frame.spoken_text) + + +class TestMainFrameTextProcessing: + """Tests for text processing before speech.""" + + @patch('Frames.MainFrame.threading.Thread') + def test_speak_replaces_urls_with_placeholder(self, mock_thread, frame): + """URLs in text should be replaced with [URL] placeholder.""" + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + frame.text_area.insert(END, "Check https://example.com for info") + + # Act + frame.speak(None) + + # Assert + assert "[URL]" in frame.spoken_text + assert "https://example.com" not in frame.spoken_text + + @patch('Frames.MainFrame.threading.Thread') + def test_speak_replaces_newlines_with_spaces(self, mock_thread, frame): + """Newlines in text should be replaced with spaces.""" + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + frame.text_area.insert(END, "Hello\nWorld") + + # Act + frame.speak(None) + + # Assert + assert "\n" not in frame.spoken_text.rstrip() + assert "Hello World" in frame.spoken_text + + @patch('Frames.MainFrame.threading.Thread') + def test_speak_uses_speed_from_entry(self, mock_thread, frame): + """Speech should use the speed value from the entry field.""" + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + frame.speed_entry.delete(0, END) + frame.speed_entry.insert(0, "300") + frame.text_area.insert(END, "Test text") + + # Act + frame.speak(None) + + # Assert + mock_thread.assert_called_once() + call_args = mock_thread.call_args + assert call_args[1]['args'][0] == 300 # speech_speed argument + + @patch('Frames.MainFrame.threading.Thread') + def test_speak_does_nothing_when_button_disabled(self, mock_thread, frame): + """Speak should not start when speak button is disabled.""" + # Arrange + frame.speak_button['state'] = DISABLED + frame.text_area.insert(END, "Test text") + + # Act + frame.speak(None) + + # Assert + mock_thread.assert_not_called() + + +class TestMainFrameStopFunctionality: + """Tests for stop functionality.""" + + def test_stop_does_nothing_when_button_disabled(self, frame): + """Stop should not act when stop button is disabled.""" + # Arrange + frame.stop_button['state'] = DISABLED + frame.engine = Mock() + + # Act + frame.stop(None) + + # Assert + frame.engine.stop.assert_not_called() + + def test_stop_calls_engine_stop_when_enabled(self, frame): + """Stop should call engine.stop() when stop button is enabled.""" + # Arrange + frame.stop_button['state'] = NORMAL + + # Act + frame.stop(None) + + # Assert - In the current architecture, we verify that stop functionality works + # by checking that the button states are properly updated + assert frame.speak_button['state'].__str__() == NORMAL + assert frame.stop_button['state'].__str__() == DISABLED + + def test_stop_enables_speak_button(self, frame): + """Stop should enable the speak button.""" + # Arrange + frame.stop_button['state'] = NORMAL + frame.speak_button['state'] = DISABLED + frame.engine = Mock() + + # Act + frame.stop(None) + + # Assert + assert str(frame.speak_button['state']) == NORMAL + + def test_stop_disables_stop_button(self, frame): + """Stop should disable the stop button.""" + # Arrange + frame.stop_button['state'] = NORMAL + frame.engine = Mock() + + # Act + frame.stop(None) + + # Assert + assert str(frame.stop_button['state']) == DISABLED + + +class TestMainFrameSessionSync: + """Regression tests: speak() must mark the new session active so the engine + callbacks (which guard on current_session_id == speech_session_id) run and + enable the Stop button instead of bailing out as a stale session.""" + + @patch('Frames.MainFrame.threading.Thread') + def test_speak_marks_session_active(self, mock_thread, frame): + """speak() should set current_session_id to the new speech_session_id.""" + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + frame.current_session_id = 0 + frame.speech_session_id = 0 + frame.text_area.insert(END, "Test text") + + # Act + frame.speak(None) + + # Assert + assert frame.speech_session_id == 1 + assert frame.current_session_id == frame.speech_session_id + + @patch('Frames.MainFrame.threading.Thread') + def test_speak_then_on_start_enables_stop_button(self, mock_thread, frame): + """After speak(), the engine onStart callback should enable Stop. + + This reproduces the original bug: speak() bumped speech_session_id but + left current_session_id behind, so onStart treated the live session as + stale and never enabled the Stop button. + """ + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + frame.stop_button['state'] = DISABLED + frame.text_area.insert(END, "Test text") + + # Act - start speech, then simulate the engine's started-utterance callback + frame.speak(None) + frame.onStart("Test text") + + # Assert + assert str(frame.stop_button['state']) == NORMAL + assert str(frame.speak_button['state']) == DISABLED + + @patch('Frames.MainFrame.threading.Thread') + def test_speak_then_on_end_disables_stop_button(self, mock_thread, frame): + """After speak(), the engine onEnd callback should run and reset buttons.""" + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + frame.text_area.insert(END, "Test text") + + # Act + frame.speak(None) + frame.onStart("Test text") + frame.onEnd("Test text", True) + + # Assert + assert str(frame.stop_button['state']) == DISABLED + assert str(frame.speak_button['state']) == NORMAL + + +class TestMainFramePasteAndSpeak: + """Tests for paste and speak functionality.""" + + @patch('Frames.MainFrame.threading.Thread') + def test_paste_and_speak_clears_text_area(self, mock_thread, app, frame): + """Paste and speak should clear existing text.""" + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + frame.text_area.insert(END, "Old text") + app.clipboard_clear() + app.clipboard_append("New text") + + # Act + frame.paste_and_speak(None) + + # Assert + assert "Old text" not in frame.text_area.get("1.0", END) + + @patch('Frames.MainFrame.threading.Thread') + def test_paste_and_speak_inserts_clipboard_content(self, mock_thread, app, frame): + """Paste and speak should insert clipboard content.""" + # Arrange + mock_thread.return_value.daemon = True + mock_thread.return_value.start = Mock() + app.clipboard_clear() + app.clipboard_append("Clipboard text") + + # Act + frame.paste_and_speak(None) + + # Assert + assert "Clipboard text" in frame.text_area.get("1.0", END) + + def test_paste_and_speak_bound_to_key_release_not_key_press(self, frame): + """Ctrl+B must fire on key RELEASE, not press, so holding it down does + not auto-repeat into a storm of interrupting speech sessions.""" + # Act + release_binding = frame.master.bind("") + press_binding = frame.master.bind("") + + # Assert + assert release_binding # bound on release + assert not press_binding # not bound on press (avoids auto-repeat storm) + +class TestMainFrameEngineLifecycle: + """Tests for TTS engine lifecycle and cleanup.""" + + def test_on_start_sets_is_speaking_flag(self, frame): + """onStart should set is_speaking to True.""" + # Arrange + frame.is_speaking = False + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onStart("test") + + # Assert + assert frame.is_speaking is True + + def test_on_start_clears_stop_requested_flag(self, frame): + """onStart should clear stop_requested flag.""" + # Arrange + frame.stop_requested = True + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onStart("test") + + # Assert + assert frame.stop_requested is False + + def test_on_start_ignored_for_old_session(self, frame): + """onStart should be ignored for old sessions.""" + # Arrange + frame.is_speaking = False + frame.current_session_id = 1 + frame.speech_session_id = 2 # Different - old session + + # Act + frame.onStart("test") + + # Assert - should not change state + assert frame.is_speaking is False + + def test_on_end_clears_is_speaking_flag(self, frame): + """onEnd should set is_speaking to False.""" + # Arrange + frame.is_speaking = True + frame.spoken_text = "test" + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onEnd("test", True) + + # Assert + assert frame.is_speaking is False + + def test_on_end_clears_highlight_on_completion(self, frame): + """onEnd should clear word highlighting.""" + # Arrange + frame.spoken_text = "Hello World" + frame.text_area.insert(END, frame.spoken_text) + frame.highlight_index1 = "1.0" + frame.highlight_index2 = "1.5" + frame.text_area.tag_add(TAG_CURRENT_WORD, "1.0", "1.5") + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onEnd("test", True) + + # Assert + assert frame.highlight_index1 is None + assert frame.highlight_index2 is None + + def test_on_end_updates_progress_only_when_completed(self, frame): + """onEnd should only update progress to max when completed=True.""" + # Arrange + frame.spoken_text = "Hello World" + frame.progress["maximum"] = len(frame.spoken_text) + frame.progress["value"] = 5 + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onEnd("test", False) # Interrupted + + # Assert - progress should NOT be updated to max when interrupted + assert frame.progress["value"] == 5 + + def test_on_end_ignored_for_old_session(self, frame): + """onEnd should be ignored for old sessions.""" + # Arrange + frame.is_speaking = True + frame.spoken_text = "test" + frame.current_session_id = 1 + frame.speech_session_id = 2 # Different - old session + + # Act + frame.onEnd("test", True) + + # Assert - should not change state + assert frame.is_speaking is True + + def test_on_end_from_interrupted_utterance_does_not_disable_stop(self, frame): + """Double Ctrl+B: a stale utterance's late onEnd must not disable Stop. + + When new speech interrupts old speech, the interrupted utterance's + finished-utterance can arrive AFTER the new utterance's onStart. The + new utterance is tagged with the current session id; the stale one has + an older id and must be ignored so the Stop button stays enabled. + """ + # Arrange - new utterance (session 5) is now active and speaking + frame.current_session_id = 5 + frame.speech_session_id = 5 + frame.onStart(5) # new utterance started -> Stop enabled + assert str(frame.stop_button['state']) == NORMAL + + # Act - the interrupted older utterance (session 4) finishes late + frame.onEnd(4, False) + + # Assert - Stop stays enabled because the callback was stale + assert str(frame.stop_button['state']) == NORMAL + assert frame.is_speaking is True + + def test_on_error_clears_is_speaking_flag(self, frame): + """onError should set is_speaking to False.""" + # Arrange + frame.is_speaking = True + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onError("test", Exception("Test error")) + + # Assert + assert frame.is_speaking is False + + def test_on_error_enables_speak_button(self, frame): + """onError should enable speak button.""" + # Arrange + frame.speak_button['state'] = DISABLED + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onError("test", Exception("Test error")) + + # Assert + assert str(frame.speak_button['state']) == NORMAL + + def test_on_error_disables_stop_button(self, frame): + """onError should disable stop button.""" + # Arrange + frame.stop_button['state'] = NORMAL + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onError("test", Exception("Test error")) + + # Assert + assert str(frame.stop_button['state']) == DISABLED + + def test_on_error_clears_highlighting(self, frame): + """onError should clear word highlighting.""" + # Arrange + frame.text_area.insert(END, "Hello World") + frame.highlight_index1 = "1.0" + frame.highlight_index2 = "1.5" + frame.current_session_id = 1 + frame.speech_session_id = 1 + + # Act + frame.onError("test", Exception("Test error")) + + # Assert + assert frame.highlight_index1 is None + assert frame.highlight_index2 is None + + def test_on_error_ignored_for_old_session(self, frame): + """onError should be ignored for old sessions.""" + # Arrange + frame.is_speaking = True + frame.current_session_id = 1 + frame.speech_session_id = 2 # Different - old session + + # Act + frame.onError("test", Exception("Test error")) + + # Assert - should not change state + assert frame.is_speaking is True + + +class TestMainFrameMediaControl: + """Tests for Windows media control (pause/resume music during TTS).""" + + def test_media_was_paused_initially_false(self, frame): + """media_was_paused should be False initially.""" + # Assert + assert frame.media_was_paused is False + + @patch('Frames.MainFrame.platform') + @patch('Frames.MainFrame.ctypes') + def test_pause_system_media_sends_key_on_windows(self, mock_ctypes, mock_platform, frame): + """pause_system_media should send media key on Windows when media is playing.""" + # Arrange + mock_platform.system.return_value = 'Windows' + frame.media_was_paused = False + frame._is_media_playing = Mock(return_value=True) # Media is playing + + # Act + frame.pause_system_media() + + # Assert + assert frame.media_was_paused is True + assert mock_ctypes.windll.user32.keybd_event.call_count == 2 + + @patch('Frames.MainFrame.platform') + @patch('Frames.MainFrame.ctypes') + def test_pause_system_media_skipped_when_not_playing(self, mock_ctypes, mock_platform, frame): + """pause_system_media should not send key when no media is playing.""" + # Arrange + mock_platform.system.return_value = 'Windows' + frame.media_was_paused = False + frame._is_media_playing = Mock(return_value=False) # No media playing + + # Act + frame.pause_system_media() + + # Assert + assert frame.media_was_paused is False + mock_ctypes.windll.user32.keybd_event.assert_not_called() + + @patch('Frames.MainFrame.platform') + def test_pause_system_media_skipped_on_non_windows(self, mock_platform, frame): + """pause_system_media should do nothing on non-Windows.""" + # Arrange + mock_platform.system.return_value = 'Linux' + frame.media_was_paused = False + + # Act + frame.pause_system_media() + + # Assert + assert frame.media_was_paused is False + + @patch('Frames.MainFrame.platform') + @patch('Frames.MainFrame.ctypes') + def test_resume_system_media_sends_key_when_was_paused(self, mock_ctypes, mock_platform, frame): + """resume_system_media should send media key if we paused it.""" + # Arrange + mock_platform.system.return_value = 'Windows' + frame.media_was_paused = True + + # Act + frame.resume_system_media() + + # Assert + assert frame.media_was_paused is False + assert mock_ctypes.windll.user32.keybd_event.call_count == 2 + + @patch('Frames.MainFrame.platform') + @patch('Frames.MainFrame.ctypes') + def test_resume_system_media_skipped_when_not_paused(self, mock_ctypes, mock_platform, frame): + """resume_system_media should do nothing if we didn't pause it.""" + # Arrange + mock_platform.system.return_value = 'Windows' + frame.media_was_paused = False + + # Act + frame.resume_system_media() + + # Assert + assert frame.media_was_paused is False + mock_ctypes.windll.user32.keybd_event.assert_not_called() + + def test_on_start_calls_pause_system_media(self, frame): + """onStart should call pause_system_media.""" + # Arrange + frame.current_session_id = 1 + frame.speech_session_id = 1 + frame.pause_system_media = Mock() + + # Act + frame.onStart("test") + + # Assert + frame.pause_system_media.assert_called_once() + + def test_on_end_calls_resume_system_media(self, frame): + """onEnd should call resume_system_media.""" + # Arrange + frame.current_session_id = 1 + frame.speech_session_id = 1 + frame.spoken_text = "test" + frame.resume_system_media = Mock() + + # Act + frame.onEnd("test", True) + + # Assert + frame.resume_system_media.assert_called_once() + + def test_on_error_calls_resume_system_media(self, frame): + """onError should call resume_system_media.""" + # Arrange + frame.current_session_id = 1 + frame.speech_session_id = 1 + frame.resume_system_media = Mock() + + # Act + frame.onError("test", Exception("Test error")) + + # Assert + frame.resume_system_media.assert_called_once() + + @patch('Frames.MainFrame.platform') + @patch('Frames.MainFrame.MEDIA_SESSION_AVAILABLE', False) + def test_is_media_playing_returns_false_when_api_unavailable(self, mock_platform, frame): + """_is_media_playing should return False when API is unavailable.""" + # Arrange + mock_platform.system.return_value = 'Windows' + + # Act + result = frame._is_media_playing() + + # Assert + assert result is False + + @patch('Frames.MainFrame.platform') + def test_is_media_playing_returns_false_on_non_windows(self, mock_platform, frame): + """_is_media_playing should return False on non-Windows.""" + # Arrange + mock_platform.system.return_value = 'Linux' + + # Act + result = frame._is_media_playing() + + # Assert + assert result is False \ No newline at end of file diff --git a/tests/test_speech_engine.py b/tests/test_speech_engine.py index aa71145..692a29c 100644 --- a/tests/test_speech_engine.py +++ b/tests/test_speech_engine.py @@ -138,3 +138,71 @@ def test_primed_loop_owns_engine_creation_and_caches_voices(): fake_engine.startLoop.assert_called_once() +def test_interrupt_speak_stops_current_utterance_before_speaking(): + # Ctrl+B 'barge in': an interrupting speak flushes (stops) the engine first, + # then speaks the new text. + speech, init, fake_engine = make_engine() + + speech.speak('queued', 500, block=False) + speech.speak('pasted', 500, block=False, interrupt=True) + + fake_engine.stop.assert_called_once_with() + fake_engine.say.assert_any_call('pasted') + + +def test_flush_cancels_a_queued_speak(): + # A speak whose flush generation is stale (a flush happened while it was + # queued) is dropped instead of speaking. The MCP server never flushes, so + # its utterances keep their generation and still play. + speech, init, fake_engine = make_engine() + speech.speak('prime', 500, block=False) # create the engine + fake_engine.say.reset_mock() + + my_generation = speech._flush_generation + speech.flush() # simulate Ctrl+B emptying the queue + + # A caller that recorded the pre-flush generation must not speak. + with speech._speak_lock: + cancelled = speech._flush_generation != my_generation + assert cancelled + fake_engine.stop.assert_called_once_with() + + +def test_flush_before_engine_exists_is_safe(): + speech, init, fake_engine = make_engine() + + speech.flush() # no engine yet + + fake_engine.stop.assert_not_called() + assert speech._flush_generation == 1 + + +def test_non_interrupt_speak_does_not_flush(): + # The MCP server path (interrupt=False) must never stop the engine; it queues. + speech, init, fake_engine = make_engine() + + speech.speak('first', 500, block=False) + speech.speak('second', 500, block=False) + + fake_engine.stop.assert_not_called() + assert fake_engine.say.call_args_list == [call('first'), call('second')] + + +def test_name_is_passed_through_to_engine_say(): + # The GUI tags utterances with a session id so its callbacks can ignore an + # interrupted utterance's late finished-utterance. + speech, init, fake_engine = make_engine() + + speech.speak('hello', 500, block=False, name=7) + + fake_engine.say.assert_called_once_with('hello', 7) + + +def test_speak_without_name_omits_say_name_argument(): + speech, init, fake_engine = make_engine() + + speech.speak('hello', 500, block=False) + + fake_engine.say.assert_called_once_with('hello') + + diff --git a/tests/test_speed_reader_controller.py b/tests/test_speed_reader_controller.py new file mode 100644 index 0000000..b776c98 --- /dev/null +++ b/tests/test_speed_reader_controller.py @@ -0,0 +1,42 @@ +"""Unit tests for SpeedReaderController.""" +import pytest +from Controllers.SpeedReaderController import SpeedReaderController +from Frames.MainFrame import MainFrame +from Core.config import load_mcp_config, save_enabled_voices + + +class TestSpeedReaderController: + """Tests for the SpeedReaderController class.""" + + def test_controller_title_is_speed_reader(self, app): + """Controller window should have 'Speed Reader' as title.""" + # Act + title = app.title() + + # Assert + assert title == "Speed Reader" + + def test_controller_contains_main_frame(self, app): + """Controller should contain a MainFrame as its child.""" + # Act + children = app.winfo_children() + + # Assert + assert len(children) == 1 + assert isinstance(children[0], MainFrame) + + def test_controller_grid_column_is_configured(self, app): + """Controller should have column 0 configured with weight 1.""" + # Act + column_info = app.grid_columnconfigure(0) + + # Assert + assert column_info['weight'] == 1 + + def test_controller_grid_row_is_configured(self, app): + """Controller should have row 0 configured with weight 1.""" + # Act + row_info = app.grid_rowconfigure(0) + + # Assert + assert row_info['weight'] == 1