From 9bab4baedf5f4c6781c4ffeac3e09056c0d72101 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 19 Jun 2026 17:15:02 +0900 Subject: [PATCH 01/26] refactor(claude): extract parseClaudeStartOptions Pull the CLI arg parsing out of claudeCommand.run into a pure function so the option handling is unit-testable. No behavior change. --- cli/src/commands/claude.test.ts | 30 ++++++++ cli/src/commands/claude.ts | 118 ++++++++++++++++++-------------- 2 files changed, 95 insertions(+), 53 deletions(-) create mode 100644 cli/src/commands/claude.test.ts diff --git a/cli/src/commands/claude.test.ts b/cli/src/commands/claude.test.ts new file mode 100644 index 0000000000..805a3fe29a --- /dev/null +++ b/cli/src/commands/claude.test.ts @@ -0,0 +1,30 @@ +import { describe, it, expect } from 'vitest' +import { parseClaudeStartOptions } from './claude' + +describe('parseClaudeStartOptions', () => { + it('maps --yolo to bypassPermissions and forwards --dangerously-skip-permissions', () => { + const { options } = parseClaudeStartOptions(['--hapi-starting-mode', 'remote', '--yolo']) + expect(options.startingMode).toBe('remote') + expect(options.permissionMode).toBe('bypassPermissions') + expect(options.claudeArgs).toContain('--dangerously-skip-permissions') + }) + + it('passes through model / effort / unknown args', () => { + const { options } = parseClaudeStartOptions(['--model', 'opus', '--effort', 'high', '--resume', 'abc123']) + expect(options.model).toBe('opus') + expect(options.effort).toBe('high') + expect(options.claudeArgs).toEqual(expect.arrayContaining(['--model', 'opus', '--effort', 'high', '--resume', 'abc123'])) + }) + + it('honors an explicit --permission-mode and ignores a later --yolo', () => { + const { options } = parseClaudeStartOptions(['--permission-mode', 'default', '--yolo']) + expect(options.permissionMode).toBe('default') + expect(options.claudeArgs ?? []).not.toContain('--dangerously-skip-permissions') + }) + + it('captures --started-by and surfaces --help via showHelp', () => { + const { options, showHelp } = parseClaudeStartOptions(['--started-by', 'runner', '--help']) + expect(options.startedBy).toBe('runner') + expect(showHelp).toBe(true) + }) +}) diff --git a/cli/src/commands/claude.ts b/cli/src/commands/claude.ts index 6b72a51776..bcaec32e0f 100644 --- a/cli/src/commands/claude.ts +++ b/cli/src/commands/claude.ts @@ -15,6 +15,70 @@ import { withBunRuntimeEnv } from '@/utils/bunRuntime' import { extractErrorInfo } from '@/utils/errorUtils' import type { CommandDefinition } from './types' +/** + * Parse `hapi` / `hapi claude` CLI args into {@link StartOptions} plus a + * showHelp flag. Extracted from {@link claudeCommand}.run so the arg handling + * is unit-testable. Expects a leading `claude` subcommand to already be + * stripped from `args`. + */ +export function parseClaudeStartOptions(args: string[]): { options: StartOptions; showHelp: boolean } { + const options: StartOptions = {} + let showHelp = false + const unknownArgs: string[] = [] + let hasExplicitPermissionMode = false + + for (let i = 0; i < args.length; i++) { + const arg = args[i] + + if (arg === '-h' || arg === '--help') { + showHelp = true + unknownArgs.push(arg) + } else if (arg === '--hapi-starting-mode') { + options.startingMode = z.enum(['local', 'remote', 'pty']).parse(args[++i]) + } else if (arg === '--permission-mode') { + const mode = args[++i] + if (!mode || !(CLAUDE_PERMISSION_MODES as readonly string[]).includes(mode)) { + throw new Error(`Invalid --permission-mode value: ${mode ?? '(missing)'}`) + } + options.permissionMode = mode as StartOptions['permissionMode'] + hasExplicitPermissionMode = true + } else if (arg === '--yolo' && !hasExplicitPermissionMode) { + options.permissionMode = 'bypassPermissions' + unknownArgs.push('--dangerously-skip-permissions') + } else if (arg === '--dangerously-skip-permissions' && !hasExplicitPermissionMode) { + options.permissionMode = 'bypassPermissions' + unknownArgs.push(arg) + } else if (arg === '--model') { + const model = args[++i] + if (!model) { + throw new Error('Missing --model value') + } + options.model = model + unknownArgs.push('--model', model) + } else if (arg === '--effort') { + const effort = args[++i] + if (!effort) { + throw new Error('Missing --effort value') + } + options.effort = effort + unknownArgs.push('--effort', effort) + } else if (arg === '--started-by') { + options.startedBy = args[++i] as 'runner' | 'terminal' + } else { + unknownArgs.push(arg) + if (i + 1 < args.length && !args[i + 1].startsWith('-')) { + unknownArgs.push(args[++i]) + } + } + } + + if (unknownArgs.length > 0) { + options.claudeArgs = [...(options.claudeArgs || []), ...unknownArgs] + } + + return { options, showHelp } +} + export const claudeCommand: CommandDefinition = { name: 'default', requiresRuntimeAssets: true, @@ -25,59 +89,7 @@ export const claudeCommand: CommandDefinition = { args.shift() } - const options: StartOptions = {} - let showHelp = false - const unknownArgs: string[] = [] - let hasExplicitPermissionMode = false - - for (let i = 0; i < args.length; i++) { - const arg = args[i] - - if (arg === '-h' || arg === '--help') { - showHelp = true - unknownArgs.push(arg) - } else if (arg === '--hapi-starting-mode') { - options.startingMode = z.enum(['local', 'remote']).parse(args[++i]) - } else if (arg === '--permission-mode') { - const mode = args[++i] - if (!mode || !(CLAUDE_PERMISSION_MODES as readonly string[]).includes(mode)) { - throw new Error(`Invalid --permission-mode value: ${mode ?? '(missing)'}`) - } - options.permissionMode = mode as StartOptions['permissionMode'] - hasExplicitPermissionMode = true - } else if (arg === '--yolo' && !hasExplicitPermissionMode) { - options.permissionMode = 'bypassPermissions' - unknownArgs.push('--dangerously-skip-permissions') - } else if (arg === '--dangerously-skip-permissions' && !hasExplicitPermissionMode) { - options.permissionMode = 'bypassPermissions' - unknownArgs.push(arg) - } else if (arg === '--model') { - const model = args[++i] - if (!model) { - throw new Error('Missing --model value') - } - options.model = model - unknownArgs.push('--model', model) - } else if (arg === '--effort') { - const effort = args[++i] - if (!effort) { - throw new Error('Missing --effort value') - } - options.effort = effort - unknownArgs.push('--effort', effort) - } else if (arg === '--started-by') { - options.startedBy = args[++i] as 'runner' | 'terminal' - } else { - unknownArgs.push(arg) - if (i + 1 < args.length && !args[i + 1].startsWith('-')) { - unknownArgs.push(args[++i]) - } - } - } - - if (unknownArgs.length > 0) { - options.claudeArgs = [...(options.claudeArgs || []), ...unknownArgs] - } + const { options, showHelp } = parseClaudeStartOptions(args) if (showHelp) { console.log(` From 288da8e46b2b92b283d49fa2e9ad446d8b8fd368 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 19 Jun 2026 22:37:58 +0900 Subject: [PATCH 02/26] refactor(pty): split interactive PTY launch from the local/remote control axis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit startingMode conflated two orthogonal axes: control mode (local/remote — who drives the session) and launch kind (PTY — how the agent process is run). Only claude (and the upcoming agy) can launch a PTY, yet every remote flavor's options carried 'pty' and had to collapse it back to 'remote', duplicating the same ternary across runGemini/runOpencode/runPi. Split the launch kind into a separate `interactive` flag: - Shared RemoteAgentCommandOptions.startingMode is back to 'local' | 'remote'; '--hapi-starting-mode pty' now sets `interactive` instead. Non-PTY flavors (gemini/opencode/pi) no longer mention pty at all and revert to their upstream form. - claude maps `interactive` to the runtime SessionMode 'pty' at the loop boundary, so the dispatcher (loopBase), persistence (agentState/metadata startingMode) and the web agent-terminal gate are unchanged — pty still lives in the runtime/dispatch layer where it belongs. No behavior change: PTY sessions launch, persist and resume exactly as before. --- cli/src/claude/runClaude.ts | 21 +++++++++++++++++--- cli/src/commands/agentCommandOptions.test.ts | 18 +++++++++++++++++ cli/src/commands/agentCommandOptions.ts | 12 +++++++++++ cli/src/commands/claude.ts | 8 +++++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/cli/src/claude/runClaude.ts b/cli/src/claude/runClaude.ts index 1ebb1601e5..83df7ee367 100644 --- a/cli/src/claude/runClaude.ts +++ b/cli/src/claude/runClaude.ts @@ -1,5 +1,6 @@ import { logger } from '@/ui/logger'; import { loop } from '@/claude/loop'; +import type { SessionMode } from '@/agent/loopBase'; import { AgentState, SessionEffort, SessionModel } from '@/api/types'; import { EnhancedMode, PermissionMode } from './loop'; import { MessageQueue2 } from '@/utils/MessageQueue2'; @@ -27,7 +28,12 @@ export interface StartOptions { model?: string effort?: string permissionMode?: PermissionMode + // Control mode (who drives the session). pty is NOT a value here — it's the + // separate `interactive` launch axis below. startingMode?: 'local' | 'remote' + // Launch the agent as an interactive PTY terminal. Mapped to the runtime + // SessionMode 'pty' at the loop boundary; persistence/web still see 'pty'. + interactive?: boolean shouldStartRunner?: boolean claudeEnvVars?: Record claudeArgs?: string[] @@ -40,10 +46,13 @@ export interface StartOptions { export async function runClaude(options: StartOptions = {}): Promise { const workingDirectory = options.workingDirectory ?? getInvokedCwd(); const startedBy = options.startedBy ?? 'terminal'; + // Launch axis: when set, claude runs in an interactive PTY (runtime mode + // 'pty'); otherwise the control axis (local/remote) applies. + const interactive = options.interactive ?? false; // Log environment info at startup logger.debugLargeJson('[START] HAPI process started', getEnvironmentInfo()); - logger.debug(`[START] Options: startedBy=${startedBy}, startingMode=${options.startingMode}`); + logger.debug(`[START] Options: startedBy=${startedBy}, startingMode=${options.startingMode}, interactive=${interactive}`); // Validate runner spawn requirements if (startedBy === 'runner' && options.startingMode === 'local') { @@ -67,6 +76,7 @@ export async function runClaude(options: StartOptions = {}): Promise { flavor: 'claude', startedBy, workingDirectory, + tag: interactive ? `__hapi_pty__claude-${randomUUID()}` : undefined, agentState: initialState, model: initialModel ?? undefined, effort: initialEffort ?? undefined @@ -148,8 +158,13 @@ export async function runClaude(options: StartOptions = {}): Promise { registerKillSessionHandler(session.rpcHandlerManager, lifecycle); registerLocalHandoffHandler(session.rpcHandlerManager, lifecycle); - // Set initial agent state - const startingMode = options.startingMode ?? (startedBy === 'runner' ? 'remote' : 'local'); + // Set initial agent state. Collapse the launch axis into the runtime + // SessionMode here: interactive → 'pty'; otherwise the control axis. From + // this point on the runtime/persistence layers keep using 'pty' (the agent + // terminal toggle and resume both key on the persisted startingMode). + const startingMode: SessionMode = interactive + ? 'pty' + : (options.startingMode ?? (startedBy === 'runner' ? 'remote' : 'local')); setControlledByUser(session, startingMode); // Import MessageQueue2 and create message queue diff --git a/cli/src/commands/agentCommandOptions.test.ts b/cli/src/commands/agentCommandOptions.test.ts index f70c3e4ad3..79c671fe04 100644 --- a/cli/src/commands/agentCommandOptions.test.ts +++ b/cli/src/commands/agentCommandOptions.test.ts @@ -159,6 +159,24 @@ describe('parseRemoteAgentCommandOptions — pi flavor', () => { )).toThrow('Invalid --hapi-starting-mode') }) + it('maps --hapi-starting-mode pty to the interactive launch flag, not a control mode', () => { + // pty is a launch axis (how the process is driven), not a control mode + // (who drives the session). Non-pty flavors ignore `interactive`; pty + // flavors (claude/agy) read it to launch their interactive terminal. + const result = parseRemoteAgentCommandOptions( + ['--hapi-starting-mode', 'pty'], + ALLOWED + ) + expect(result.interactive).toBe(true) + // control mode is left to its default — pty does NOT occupy startingMode + expect(result.startingMode).toBeUndefined() + }) + + it('does not set interactive for local/remote', () => { + expect(parseRemoteAgentCommandOptions(['--hapi-starting-mode', 'remote'], ALLOWED).interactive).toBeUndefined() + expect(parseRemoteAgentCommandOptions(['--hapi-starting-mode', 'local'], ALLOWED).interactive).toBeUndefined() + }) + it('handles a full pi invocation end-to-end', () => { const result = parseRemoteAgentCommandOptions( [ diff --git a/cli/src/commands/agentCommandOptions.ts b/cli/src/commands/agentCommandOptions.ts index f7e8b29c5e..2961eb3920 100644 --- a/cli/src/commands/agentCommandOptions.ts +++ b/cli/src/commands/agentCommandOptions.ts @@ -2,7 +2,14 @@ import type { PermissionMode } from '@hapi/protocol/types' export type RemoteAgentCommandOptions = { startedBy?: 'runner' | 'terminal' + // Control mode: who drives the session (terminal vs web). Orthogonal to + // flavor and to how the agent process is launched. startingMode?: 'local' | 'remote' + // Launch axis: run the agent as an interactive PTY terminal. Only pty-capable + // flavors (claude has its own parser; agy) act on this; the rest ignore it. + // Kept separate from `startingMode` so non-pty flavors never have to know + // about pty (it always collapses to remote-controlled at the session level). + interactive?: boolean permissionMode?: TPermissionMode model?: string effort?: string @@ -25,6 +32,11 @@ export function parseRemoteAgentCommandOptions Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 03/26] refactor(claude): extract question-answer input builders to a shared util MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the AskUserQuestion / request_user_input answer-to-input builders (and the question tool-name predicates) out of the SDK permission handler into a standalone util. No behavior change — the SDK handler imports them — so the PTY permission bridge can reuse them without pulling in the SDK handler's dependencies. --- cli/src/claude/utils/permissionHandler.ts | 75 ++----------------- cli/src/claude/utils/questionAnswerInput.ts | 81 +++++++++++++++++++++ 2 files changed, 88 insertions(+), 68 deletions(-) create mode 100644 cli/src/claude/utils/questionAnswerInput.ts diff --git a/cli/src/claude/utils/permissionHandler.ts b/cli/src/claude/utils/permissionHandler.ts index d154b71823..549d609a2d 100644 --- a/cli/src/claude/utils/permissionHandler.ts +++ b/cli/src/claude/utils/permissionHandler.ts @@ -21,6 +21,13 @@ import { type PendingPermissionRequest, type PermissionCompletion } from "@/modules/common/permission/BasePermissionHandler"; +import { + isAskUserQuestionToolName, + isRequestUserInputToolName, + isQuestionToolName, + buildAskUserQuestionUpdatedInput, + buildRequestUserInputUpdatedInput +} from "./questionAnswerInput"; interface PermissionResponse { id: string; @@ -34,17 +41,6 @@ interface PermissionResponse { const PLAN_EXIT_MODES: PermissionMode[] = ['default', 'acceptEdits', 'auto', 'bypassPermissions']; -function isAskUserQuestionToolName(toolName: string): boolean { - return toolName === 'AskUserQuestion' || toolName === 'ask_user_question'; -} - -function isRequestUserInputToolName(toolName: string): boolean { - return toolName === 'request_user_input'; -} - -function isQuestionToolName(toolName: string): boolean { - return isAskUserQuestionToolName(toolName) || isRequestUserInputToolName(toolName); -} function formatAskUserQuestionAnswers(answers: Record | Record, input: unknown): string { // Normalize nested format to flat format for display @@ -100,63 +96,6 @@ function formatAskUserQuestionAnswers(answers: Record | Record : `User answered:\n${body}`; } -function buildAskUserQuestionUpdatedInput(input: unknown, answers: Record | Record): Record { - // Normalize incoming answers (web sends Record; - // codex pathway sends nested Record) into a - // single Record shape we can iterate. - const indexedAnswers: Record = {}; - for (const [key, value] of Object.entries(answers)) { - if (Array.isArray(value)) { - indexedAnswers[key] = value; - } else if (value && typeof value === 'object' && 'answers' in value) { - indexedAnswers[key] = value.answers; - } - } - - if (!isObject(input)) { - return { answers: {} }; - } - - // claude code 2.x's built-in AskUserQuestion tool expects - // answers: Record - // and joins multi-select answers with a comma; it then echoes them - // verbatim in the tool result (`mapToolResultToToolResultBlockParam`). - // Sending the index-keyed `string[]` shape we receive from the web - // makes claude's lookup miss every question, producing the empty - // "User has answered your questions: ." result that locks the turn. - const questions = Array.isArray(input.questions) ? input.questions : []; - const claudeShapedAnswers: Record = {}; - for (let i = 0; i < questions.length; i += 1) { - const q = questions[i]; - if (!q || typeof q !== 'object') continue; - const questionText = (q as { question?: unknown }).question; - if (typeof questionText !== 'string' || questionText.length === 0) continue; - const selections = indexedAnswers[String(i)]; - if (!selections || selections.length === 0) continue; - claudeShapedAnswers[questionText] = selections.join(','); - } - - return { - ...input, - answers: claudeShapedAnswers - }; -} - -/** - * Build updated input for request_user_input tool - * The answers format is nested: { answers: { [id]: { answers: string[] } } } - */ -function buildRequestUserInputUpdatedInput(input: unknown, answers: unknown): Record { - if (!isObject(input)) { - return { answers }; - } - - return { - ...input, - answers - }; -} - export class PermissionHandler extends BasePermissionHandler { private toolCalls: { id: string, name: string, input: any, used: boolean }[] = []; private responses = new Map(); diff --git a/cli/src/claude/utils/questionAnswerInput.ts b/cli/src/claude/utils/questionAnswerInput.ts new file mode 100644 index 0000000000..b25f0f4861 --- /dev/null +++ b/cli/src/claude/utils/questionAnswerInput.ts @@ -0,0 +1,81 @@ +/** + * Helpers for turning a web-collected answer set into the tool input that + * claude's built-in question tools (AskUserQuestion / request_user_input) + * expect. Shared by the SDK permission handler (canUseTool path) and the PTY + * permission bridge (PreToolUse hook path), which both pre-fill the answers via + * the tool's updatedInput so claude echoes them instead of prompting. + */ + +import { isObject } from "@hapi/protocol"; + +export function isAskUserQuestionToolName(toolName: string): boolean { + return toolName === 'AskUserQuestion' || toolName === 'ask_user_question'; +} + +export function isRequestUserInputToolName(toolName: string): boolean { + return toolName === 'request_user_input'; +} + +export function isQuestionToolName(toolName: string): boolean { + return isAskUserQuestionToolName(toolName) || isRequestUserInputToolName(toolName); +} + +export function buildAskUserQuestionUpdatedInput( + input: unknown, + answers: Record | Record +): Record { + // Normalize incoming answers (web sends Record; + // codex pathway sends nested Record) into a + // single Record shape we can iterate. + const indexedAnswers: Record = {}; + for (const [key, value] of Object.entries(answers)) { + if (Array.isArray(value)) { + indexedAnswers[key] = value; + } else if (value && typeof value === 'object' && 'answers' in value) { + indexedAnswers[key] = value.answers; + } + } + + if (!isObject(input)) { + return { answers: {} }; + } + + // claude code 2.x's built-in AskUserQuestion tool expects + // answers: Record + // and joins multi-select answers with a comma; it then echoes them + // verbatim in the tool result (`mapToolResultToToolResultBlockParam`). + // Sending the index-keyed `string[]` shape we receive from the web + // makes claude's lookup miss every question, producing the empty + // "User has answered your questions: ." result that locks the turn. + const questions = Array.isArray(input.questions) ? input.questions : []; + const claudeShapedAnswers: Record = {}; + for (let i = 0; i < questions.length; i += 1) { + const q = questions[i]; + if (!q || typeof q !== 'object') continue; + const questionText = (q as { question?: unknown }).question; + if (typeof questionText !== 'string' || questionText.length === 0) continue; + const selections = indexedAnswers[String(i)]; + if (!selections || selections.length === 0) continue; + claudeShapedAnswers[questionText] = selections.join(','); + } + + return { + ...input, + answers: claudeShapedAnswers + }; +} + +/** + * Build updated input for the request_user_input tool. The answers format is + * nested: { answers: { [id]: { answers: string[] } } }. + */ +export function buildRequestUserInputUpdatedInput(input: unknown, answers: unknown): Record { + if (!isObject(input)) { + return { answers }; + } + + return { + ...input, + answers + }; +} From 08040077a5b4c74579b75c16ed460c77dc8deb36 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Sun, 14 Jun 2026 10:08:58 +0900 Subject: [PATCH 04/26] refactor(web): extract shared QuickKeys from the terminal view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The standalone terminal's quick-input keys (Esc/arrows/Ctrl/Alt) and their sticky-modifier dispatch were inline in terminal.tsx. Extract them to a shared QuickKeys module — a useQuickKeyInput hook (modifier state + dispatcher) and a QuickKeyRows presentational component — so other terminal views can reuse them without duplication. Behavior-preserving: the raw onData path stays ungated and shares the modifier state with the buttons, exactly as before. --- web/src/components/QuickKeys/QuickKeys.tsx | 226 +++++++++++++++++++++ web/src/routes/sessions/terminal.tsx | 203 ++---------------- 2 files changed, 244 insertions(+), 185 deletions(-) create mode 100644 web/src/components/QuickKeys/QuickKeys.tsx diff --git a/web/src/components/QuickKeys/QuickKeys.tsx b/web/src/components/QuickKeys/QuickKeys.tsx new file mode 100644 index 0000000000..67441730a4 --- /dev/null +++ b/web/src/components/QuickKeys/QuickKeys.tsx @@ -0,0 +1,226 @@ +import { useCallback, useEffect, useRef, useState } from 'react' +import type { PointerEvent } from 'react' +import { useLongPress } from '@/hooks/useLongPress' + +// A quick-input key for driving a TUI from a touch device (or any viewer): a +// labelled button that sends a raw terminal sequence, optionally a long-press +// alternate, or a sticky Ctrl/Alt modifier toggle. +export type QuickInput = { + label: string + sequence?: string + description: string + modifier?: 'ctrl' | 'alt' + popup?: { + label: string + sequence: string + description: string + } +} + +export type ModifierState = { + ctrl: boolean + alt: boolean +} + +// Apply the sticky Ctrl/Alt modifiers to a raw sequence: Alt prefixes ESC, Ctrl +// maps a single printable letter to its control code (C0). Multi-char sequences +// (arrows, paste) only receive the Alt prefix — Ctrl is meaningless there. +export function applyModifierState(sequence: string, state: ModifierState): string { + let modified = sequence + if (state.alt) { + modified = `\u001b${modified}` + } + if (state.ctrl && modified.length === 1) { + const code = modified.toUpperCase().charCodeAt(0) + if (code >= 64 && code <= 95) { + modified = String.fromCharCode(code - 64) + } + } + return modified +} + +// A sticky modifier is consumed (and should reset) once a real sequence is sent. +export function shouldResetModifiers(sequence: string, state: ModifierState): boolean { + if (!sequence) { + return false + } + return state.ctrl || state.alt +} + +export const QUICK_INPUT_ROWS: QuickInput[][] = [ + [ + { label: 'Esc', sequence: '\u001b', description: 'Escape' }, + { + label: '/', + sequence: '/', + description: 'Forward slash', + popup: { label: '?', sequence: '?', description: 'Question mark' }, + }, + { + label: '-', + sequence: '-', + description: 'Hyphen', + popup: { label: '|', sequence: '|', description: 'Pipe' }, + }, + { label: 'Home', sequence: '\u001b[H', description: 'Home' }, + { label: '↑', sequence: '\u001b[A', description: 'Arrow up' }, + { label: 'End', sequence: '\u001b[F', description: 'End' }, + { label: 'PgUp', sequence: '\u001b[5~', description: 'Page up' }, + ], + [ + { label: 'Tab', sequence: '\t', description: 'Tab' }, + { label: 'Ctrl', description: 'Control', modifier: 'ctrl' }, + { label: 'Alt', description: 'Alternate', modifier: 'alt' }, + { label: '←', sequence: '\u001b[D', description: 'Arrow left' }, + { label: '↓', sequence: '\u001b[B', description: 'Arrow down' }, + { label: '→', sequence: '\u001b[C', description: 'Arrow right' }, + { label: 'PgDn', sequence: '\u001b[6~', description: 'Page down' }, + ], +] + +function QuickKeyButton(props: { + input: QuickInput + disabled: boolean + isActive: boolean + onPress: (sequence: string) => void + onToggleModifier: (modifier: 'ctrl' | 'alt') => void +}) { + const { input, disabled, isActive, onPress, onToggleModifier } = props + const modifier = input.modifier + const popupSequence = input.popup?.sequence + const popupDescription = input.popup?.description + const hasPopup = Boolean(popupSequence) + const longPressDisabled = disabled || Boolean(modifier) || !hasPopup + + const handleClick = useCallback(() => { + if (modifier) { + onToggleModifier(modifier) + return + } + onPress(input.sequence ?? '') + }, [modifier, onToggleModifier, onPress, input.sequence]) + + const handlePointerDown = useCallback((event: PointerEvent) => { + if (event.pointerType === 'touch') { + event.preventDefault() + } + }, []) + + const longPressHandlers = useLongPress({ + onLongPress: () => { + if (popupSequence && !modifier) { + onPress(popupSequence) + } + }, + onClick: handleClick, + disabled: longPressDisabled, + }) + + return ( + + ) +} + +// Sticky-modifier state + a dispatcher that applies the modifiers and resets +// them after a real send. Shared by the quick-key buttons AND the terminal's +// raw onData path so toggling Ctrl then typing a letter sends the control code, +// exactly like a physical modifier key. Gating (when to disable) is the caller's +// concern — the quick-key buttons gate via their `disabled` prop, while the raw +// onData path is intentionally ungated. +export function useQuickKeyInput(opts: { onSend: (data: string) => void }): { + ctrlActive: boolean + altActive: boolean + dispatch: (sequence: string) => void + toggleModifier: (modifier: 'ctrl' | 'alt') => void + resetModifiers: () => void +} { + const [ctrlActive, setCtrlActive] = useState(false) + const [altActive, setAltActive] = useState(false) + // Read modifiers from a ref inside dispatch so the terminal onData closure + // (registered once) always sees the current state, never a stale snapshot. + const modifierStateRef = useRef({ ctrl: false, alt: false }) + useEffect(() => { + modifierStateRef.current = { ctrl: ctrlActive, alt: altActive } + }, [ctrlActive, altActive]) + const onSendRef = useRef(opts.onSend) + useEffect(() => { + onSendRef.current = opts.onSend + }, [opts.onSend]) + + const resetModifiers = useCallback(() => { + setCtrlActive(false) + setAltActive(false) + }, []) + + const dispatch = useCallback((sequence: string) => { + const state = modifierStateRef.current + onSendRef.current(applyModifierState(sequence, state)) + if (shouldResetModifiers(sequence, state)) { + resetModifiers() + } + }, [resetModifiers]) + + const toggleModifier = useCallback((modifier: 'ctrl' | 'alt') => { + if (modifier === 'ctrl') { + setCtrlActive((value) => !value) + setAltActive(false) + } else { + setAltActive((value) => !value) + setCtrlActive(false) + } + }, []) + + return { ctrlActive, altActive, dispatch, toggleModifier, resetModifiers } +} + +// Presentational rows of quick-input keys. State/dispatch live in the caller +// (via useQuickKeyInput) so they can be shared with the terminal onData path. +export function QuickKeyRows(props: { + ctrlActive: boolean + altActive: boolean + disabled: boolean + onPress: (sequence: string) => void + onToggleModifier: (modifier: 'ctrl' | 'alt') => void +}) { + const { ctrlActive, altActive, disabled, onPress, onToggleModifier } = props + return ( + <> + {QUICK_INPUT_ROWS.map((row, rowIndex) => ( +
+ {row.map((input) => { + const modifier = input.modifier + const isCtrl = modifier === 'ctrl' + const isAlt = modifier === 'alt' + const isActive = (isCtrl && ctrlActive) || (isAlt && altActive) + return ( + + ) + })} +
+ ))} + + ) +} diff --git a/web/src/routes/sessions/terminal.tsx b/web/src/routes/sessions/terminal.tsx index 0339145b7c..7a602cb2a3 100644 --- a/web/src/routes/sessions/terminal.tsx +++ b/web/src/routes/sessions/terminal.tsx @@ -1,12 +1,11 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react' -import type { PointerEvent } from 'react' import { useParams } from '@tanstack/react-router' import type { Terminal } from '@xterm/xterm' import { useAppContext } from '@/lib/app-context' import { useAppGoBack } from '@/hooks/useAppGoBack' import { useSession } from '@/hooks/queries/useSession' import { useTerminalSocket } from '@/hooks/useTerminalSocket' -import { useLongPress } from '@/hooks/useLongPress' +import { useQuickKeyInput, QuickKeyRows } from '@/components/QuickKeys/QuickKeys' import { useTranslation } from '@/lib/use-translation' import { randomId } from '@/lib/randomId' import { TerminalView } from '@/components/Terminal/TerminalView' @@ -55,133 +54,8 @@ function ConnectionIndicator(props: { status: 'idle' | 'connecting' | 'connected ) } -type QuickInput = { - label: string - sequence?: string - description: string - modifier?: 'ctrl' | 'alt' - popup?: { - label: string - sequence: string - description: string - } -} - -type ModifierState = { - ctrl: boolean - alt: boolean -} - -function applyModifierState(sequence: string, state: ModifierState): string { - let modified = sequence - if (state.alt) { - modified = `\u001b${modified}` - } - if (state.ctrl && modified.length === 1) { - const code = modified.toUpperCase().charCodeAt(0) - if (code >= 64 && code <= 95) { - modified = String.fromCharCode(code - 64) - } - } - return modified -} - -function shouldResetModifiers(sequence: string, state: ModifierState): boolean { - if (!sequence) { - return false - } - return state.ctrl || state.alt -} - const EXIT_NAVIGATION_DELAY_MS = 700 -const QUICK_INPUT_ROWS: QuickInput[][] = [ - [ - { label: 'Esc', sequence: '\u001b', description: 'Escape' }, - { - label: '/', - sequence: '/', - description: 'Forward slash', - popup: { label: '?', sequence: '?', description: 'Question mark' }, - }, - { - label: '-', - sequence: '-', - description: 'Hyphen', - popup: { label: '|', sequence: '|', description: 'Pipe' }, - }, - { label: 'Home', sequence: '\u001b[H', description: 'Home' }, - { label: '↑', sequence: '\u001b[A', description: 'Arrow up' }, - { label: 'End', sequence: '\u001b[F', description: 'End' }, - { label: 'PgUp', sequence: '\u001b[5~', description: 'Page up' }, - ], - [ - { label: 'Tab', sequence: '\t', description: 'Tab' }, - { label: 'Ctrl', description: 'Control', modifier: 'ctrl' }, - { label: 'Alt', description: 'Alternate', modifier: 'alt' }, - { label: '←', sequence: '\u001b[D', description: 'Arrow left' }, - { label: '↓', sequence: '\u001b[B', description: 'Arrow down' }, - { label: '→', sequence: '\u001b[C', description: 'Arrow right' }, - { label: 'PgDn', sequence: '\u001b[6~', description: 'Page down' }, - ], -] - -function QuickKeyButton(props: { - input: QuickInput - disabled: boolean - isActive: boolean - onPress: (sequence: string) => void - onToggleModifier: (modifier: 'ctrl' | 'alt') => void -}) { - const { input, disabled, isActive, onPress, onToggleModifier } = props - const modifier = input.modifier - const popupSequence = input.popup?.sequence - const popupDescription = input.popup?.description - const hasPopup = Boolean(popupSequence) - const longPressDisabled = disabled || Boolean(modifier) || !hasPopup - - const handleClick = useCallback(() => { - if (modifier) { - onToggleModifier(modifier) - return - } - onPress(input.sequence ?? '') - }, [modifier, onToggleModifier, onPress, input.sequence]) - - const handlePointerDown = useCallback((event: PointerEvent) => { - if (event.pointerType === 'touch') { - event.preventDefault() - } - }, []) - - const longPressHandlers = useLongPress({ - onLongPress: () => { - if (popupSequence && !modifier) { - onPress(popupSequence) - } - }, - onClick: handleClick, - disabled: longPressDisabled, - }) - - return ( - - ) -} - export default function TerminalPage() { const { t } = useTranslation() const { sessionId } = useParams({ from: '/sessions/$sessionId/terminal' }) @@ -194,11 +68,8 @@ export default function TerminalPage() { const inputDisposableRef = useRef<{ dispose: () => void } | null>(null) const connectOnceRef = useRef(false) const lastSizeRef = useRef<{ cols: number; rows: number } | null>(null) - const modifierStateRef = useRef({ ctrl: false, alt: false }) const exitNavTimerRef = useRef | null>(null) const [exitInfo, setExitInfo] = useState<{ code: number | null; signal: string | null } | null>(null) - const [ctrlActive, setCtrlActive] = useState(false) - const [altActive, setAltActive] = useState(false) const [pasteDialogOpen, setPasteDialogOpen] = useState(false) const [manualPasteText, setManualPasteText] = useState('') @@ -237,36 +108,21 @@ export default function TerminalPage() { }) }, [onExit, goBack]) - useEffect(() => { - modifierStateRef.current = { ctrl: ctrlActive, alt: altActive } - }, [ctrlActive, altActive]) - - const resetModifiers = useCallback(() => { - setCtrlActive(false) - setAltActive(false) - }, []) - - const dispatchSequence = useCallback( - (sequence: string, modifierState: ModifierState) => { - write(applyModifierState(sequence, modifierState)) - if (shouldResetModifiers(sequence, modifierState)) { - resetModifiers() - } - }, - [write, resetModifiers] - ) + // Raw terminal input AND the quick-key buttons share one sticky-modifier + // state via the dispatcher, so toggling Ctrl then typing sends the control + // code. onData is intentionally ungated; the buttons gate via `disabled`. + const { ctrlActive, altActive, dispatch, toggleModifier, resetModifiers } = useQuickKeyInput({ onSend: write }) const handleTerminalMount = useCallback( (terminal: Terminal) => { terminalRef.current = terminal inputDisposableRef.current?.dispose() inputDisposableRef.current = terminal.onData((data) => { - const modifierState = modifierStateRef.current - dispatchSequence(data, modifierState) + dispatch(data) }) terminal.focus() }, - [dispatchSequence] + [dispatch] ) const handleResize = useCallback( @@ -387,11 +243,10 @@ export default function TerminalPage() { if (quickInputDisabled) { return } - const modifierState = { ctrl: ctrlActive, alt: altActive } - dispatchSequence(sequence, modifierState) + dispatch(sequence) terminalRef.current?.focus() }, - [quickInputDisabled, ctrlActive, altActive, dispatchSequence] + [quickInputDisabled, dispatch] ) const handleModifierToggle = useCallback( @@ -399,16 +254,10 @@ export default function TerminalPage() { if (quickInputDisabled) { return } - if (modifier === 'ctrl') { - setCtrlActive((value) => !value) - setAltActive(false) - } else { - setAltActive((value) => !value) - setCtrlActive(false) - } + toggleModifier(modifier) terminalRef.current?.focus() }, - [quickInputDisabled] + [quickInputDisabled, toggleModifier] ) if (!session) { @@ -496,29 +345,13 @@ export default function TerminalPage() { > {t('button.paste')} - {QUICK_INPUT_ROWS.map((row, rowIndex) => ( -
- {row.map((input) => { - const modifier = input.modifier - const isCtrl = modifier === 'ctrl' - const isAlt = modifier === 'alt' - const isActive = (isCtrl && ctrlActive) || (isAlt && altActive) - return ( - - ) - })} -
- ))} + From 2cbe528f3f8369237868d1fd73cb7e6b7fffc1b0 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Sun, 14 Jun 2026 13:15:29 +0900 Subject: [PATCH 05/26] refactor(pty): extract the respawn loop into RemoteLauncherBase Hoist the launch/ready/backoff/give-up respawn loop out of the Claude PTY launcher into a RemoteLauncherBase.runRespawnLoop template method, so a second PTY flavor can reuse it instead of duplicating the loop. No behavior change. --- .../common/remote/RemoteLauncherBase.test.ts | 114 ++++++++++++++++++ .../common/remote/RemoteLauncherBase.ts | 68 +++++++++++ 2 files changed, 182 insertions(+) create mode 100644 cli/src/modules/common/remote/RemoteLauncherBase.test.ts diff --git a/cli/src/modules/common/remote/RemoteLauncherBase.test.ts b/cli/src/modules/common/remote/RemoteLauncherBase.test.ts new file mode 100644 index 0000000000..f146d3adce --- /dev/null +++ b/cli/src/modules/common/remote/RemoteLauncherBase.test.ts @@ -0,0 +1,114 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import type { ReactElement } from 'react' +import { RemoteLauncherBase, type LaunchOutcome } from './RemoteLauncherBase' + +// Concrete subclass that exposes the protected respawn loop so the real +// template-method logic (backoff, give-up bound, counter reset) can be driven +// directly — the per-launcher tests mock this method out, so without this the +// live loop is uncovered. +class TestLauncher extends RemoteLauncherBase { + constructor() { + super(undefined) + } + protected createDisplay(): ReactElement { + throw new Error('unused in test') + } + protected async runMainLoop(): Promise {} + protected async cleanup(): Promise {} + + public run(opts: Parameters[0]): Promise { + return this.runRespawnLoop(opts) + } + // Stop the `while (!this.exitReason)` loop from outside the scripted outcomes. + public stop(): void { + this.exitReason = 'exit' + } +} + +// Drive launchOnce from a scripted list of outcomes; once exhausted, end the +// loop so the test terminates deterministically. +function scriptedLaunchOnce(launcher: TestLauncher, outcomes: LaunchOutcome[]) { + let i = 0 + return vi.fn(async (): Promise => { + if (i >= outcomes.length) { + launcher.stop() + return { reachedReady: false } + } + return outcomes[i++] + }) +} + +const fail = (): LaunchOutcome => ({ reachedReady: false, error: new Error('boom') }) +const readyCrash = (): LaunchOutcome => ({ reachedReady: true, error: new Error('crash') }) + +describe('RemoteLauncherBase.runRespawnLoop', () => { + afterEach(() => { + vi.restoreAllMocks() + }) + + it('gives up after maxImmediateFailures consecutive launches that never reach ready', async () => { + const launcher = new TestLauncher() + const onLaunchFailure = vi.fn() + const launchOnce = scriptedLaunchOnce(launcher, [fail(), fail(), fail(), fail(), fail()]) + + await launcher.run({ + maxImmediateFailures: 3, + respawnBackoffMs: 0, + onLaunchStart: () => {}, + launchOnce, + onLaunchFailure, + }) + + // Bounded: stops at the cap, does not consume the 4th/5th scripted outcome. + expect(launchOnce).toHaveBeenCalledTimes(3) + // Each failure surfaced, plus a final give-up message. + const lastMsg = onLaunchFailure.mock.calls.at(-1)?.[0] as Error + expect(lastMsg.message).toContain('failed to start after 3 attempts') + }) + + it('keeps mid-session crash recovery unbounded when launches reach ready', async () => { + const launcher = new TestLauncher() + const onLaunchFailure = vi.fn() + const onLaunchSuccess = vi.fn() + // Four crashes that EACH reached a ready prompt — a long-running session + // that keeps crashing must never hit the give-up bound. + const launchOnce = scriptedLaunchOnce(launcher, [ + readyCrash(), readyCrash(), readyCrash(), readyCrash(), + ]) + + await launcher.run({ + maxImmediateFailures: 3, + respawnBackoffMs: 0, + onLaunchStart: () => {}, + launchOnce, + onLaunchSuccess, + onLaunchFailure, + }) + + // Respawned past the cap (4 > 3) because the counter resets on ready. + expect(launchOnce).toHaveBeenCalledTimes(5) + expect(onLaunchSuccess).toHaveBeenCalledTimes(4) + const gaveUp = onLaunchFailure.mock.calls.some( + ([e]) => (e as Error).message.includes('failed to start after') + ) + expect(gaveUp).toBe(false) + }) + + it('backs off between immediate failures but not after a ready crash', async () => { + const setTimeoutSpy = vi.spyOn(globalThis, 'setTimeout') + const launcher = new TestLauncher() + const launchOnce = scriptedLaunchOnce(launcher, [fail(), readyCrash()]) + + await launcher.run({ + maxImmediateFailures: 5, + respawnBackoffMs: 250, + onLaunchStart: () => {}, + launchOnce, + onLaunchFailure: () => {}, + }) + + const backoffWaits = setTimeoutSpy.mock.calls.filter(([, ms]) => ms === 250) + // Exactly one backoff: after the immediate failure, none after the ready crash. + expect(backoffWaits).toHaveLength(1) + }) +}) diff --git a/cli/src/modules/common/remote/RemoteLauncherBase.ts b/cli/src/modules/common/remote/RemoteLauncherBase.ts index bce64e0d75..01e2667cff 100644 --- a/cli/src/modules/common/remote/RemoteLauncherBase.ts +++ b/cli/src/modules/common/remote/RemoteLauncherBase.ts @@ -6,6 +6,11 @@ import { RPC_METHODS } from '@hapi/protocol/rpcMethods'; export type RemoteLauncherExitReason = 'switch' | 'exit'; +export type LaunchOutcome = { + reachedReady: boolean; + error?: Error; +}; + export type RemoteLauncherDisplayContext = { messageBuffer: MessageBuffer; logPath?: string; @@ -36,6 +41,7 @@ export abstract class RemoteLauncherBase { protected readonly logPath?: string; protected exitReason: RemoteLauncherExitReason | null = null; protected shouldExit: boolean = false; + protected ptyAbortController: AbortController | null = null; private inkInstance: ReturnType | null = null; protected constructor(logPath?: string) { @@ -50,6 +56,68 @@ export abstract class RemoteLauncherBase { protected abstract cleanup(): Promise; + protected getCurrentSessionId(): string | null { + return null; + } + + protected async runRespawnLoop(opts: { + maxImmediateFailures?: number; + respawnBackoffMs?: number; + onLaunchStart: (isNewSession: boolean) => void; + launchOnce: (signal: AbortSignal) => Promise; + onLaunchSuccess?: () => void; + onLaunchFailure?: (error: Error) => void; + }): Promise { + const maxImmediateFailures = opts.maxImmediateFailures ?? 3; + const respawnBackoffMs = opts.respawnBackoffMs ?? 1000; + + let consecutiveImmediateFailures = 0; + let previousSessionId: string | null = null; + + while (!this.exitReason) { + const currentSessionId = this.getCurrentSessionId(); + const isNewSession = currentSessionId !== previousSessionId; + opts.onLaunchStart(isNewSession); + previousSessionId = currentSessionId; + + const controller = new AbortController(); + this.ptyAbortController = controller; + + let reachedReady = false; + try { + const outcome = await opts.launchOnce(controller.signal); + reachedReady = outcome.reachedReady; + + if (reachedReady) { + consecutiveImmediateFailures = 0; + opts.onLaunchSuccess?.(); + } + + if (outcome.error) { + throw outcome.error; + } + } catch (e) { + if (this.exitReason) break; + + const error = e instanceof Error ? e : new Error(String(e)); + opts.onLaunchFailure?.(error); + + if (!reachedReady) { + consecutiveImmediateFailures++; + if (consecutiveImmediateFailures >= maxImmediateFailures) { + opts.onLaunchFailure?.(new Error(`PTY failed to start after ${maxImmediateFailures} attempts; ending session.`)); + this.exitReason = 'exit'; + break; + } + await new Promise((r) => setTimeout(r, respawnBackoffMs)); + } + continue; + } finally { + this.ptyAbortController = null; + } + } + } + protected setupTerminal(handlers: RemoteLauncherTerminalHandlers): void { if (this.hasTTY) { console.clear(); From 47557555bad55dac219d1d5445f9b294b8c11491 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:15 +0900 Subject: [PATCH 06/26] feat(pty): add interactive PTY process manager and shared driver --- cli/src/agent/AgentPtyManager.test.ts | 218 +++++++++++++ cli/src/agent/AgentPtyManager.ts | 133 ++++++++ cli/src/agent/__tests__/runAgentPty.test.ts | 243 +++++++++++++++ cli/src/agent/runAgentPty.ts | 324 ++++++++++++++++++++ cli/src/claude/__tests__/__echo.js | 3 + 5 files changed, 921 insertions(+) create mode 100644 cli/src/agent/AgentPtyManager.test.ts create mode 100644 cli/src/agent/AgentPtyManager.ts create mode 100644 cli/src/agent/__tests__/runAgentPty.test.ts create mode 100644 cli/src/agent/runAgentPty.ts create mode 100644 cli/src/claude/__tests__/__echo.js diff --git a/cli/src/agent/AgentPtyManager.test.ts b/cli/src/agent/AgentPtyManager.test.ts new file mode 100644 index 0000000000..548fe965bb --- /dev/null +++ b/cli/src/agent/AgentPtyManager.test.ts @@ -0,0 +1,218 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { AgentPtyManager } from './AgentPtyManager' + +const globalWithBun = globalThis as unknown as { + Bun?: { + spawn?: unknown + } +} +const originalBun = globalWithBun.Bun + +function makeMockProc(): { terminal: Bun.Terminal; killed: boolean; exitCode: number | null; signalCode: string | null; kill: ReturnType; onExit?: (code: number | null) => void } { + return { + terminal: { + write: vi.fn(), + resize: vi.fn(), + close: vi.fn(), + } as unknown as Bun.Terminal, + killed: false, + exitCode: null, + signalCode: null, + kill: vi.fn(() => { (proc as any).killed = true }), + } +} + +let proc: ReturnType + +describe('AgentPtyManager', () => { + beforeEach(() => { + proc = makeMockProc() + const spawnMock = vi.fn(() => proc) + globalWithBun.Bun = { + spawn: spawnMock, + } + }) + + afterEach(() => { + if (originalBun === undefined) { + delete globalWithBun.Bun + } else { + globalWithBun.Bun = originalBun + } + }) + + it('spawns a process with terminal option', () => { + const manager = new AgentPtyManager() + const onData = vi.fn() + + manager.spawn({ + command: 'claude', + args: ['--model', 'sonnet'], + cwd: '/workspace/project', + cols: 80, + rows: 24, + onData, + }) + + expect(globalWithBun.Bun!.spawn).toHaveBeenCalledWith( + ['claude', '--model', 'sonnet'], + expect.objectContaining({ + cwd: '/workspace/project', + terminal: expect.objectContaining({ + cols: 80, + rows: 24, + data: expect.any(Function), + }), + }) + ) + expect(manager.isRunning).toBe(true) + }) + + it('calls onData callback when terminal emits data', () => { + const manager = new AgentPtyManager() + const onData = vi.fn() + + manager.spawn({ + command: 'claude', + onData, + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + const terminalConfig = spawnCall[1].terminal + const decoder = new TextDecoder() + const data = new TextEncoder().encode('hello from claude') + + terminalConfig.data(proc.terminal, data) + + expect(onData).toHaveBeenCalledWith('hello from claude') + }) + + it('writes data to terminal', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + }) + + manager.write('test input\n') + + expect(proc.terminal.write).toHaveBeenCalledWith('test input\n') + }) + + it('resizes terminal dimensions', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + cols: 80, + rows: 24, + onData: vi.fn(), + }) + + manager.resize(120, 40) + + expect(proc.terminal.resize).toHaveBeenCalledWith(120, 40) + }) + + it('kills the process and cleans up', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + }) + + manager.kill() + + expect(proc.kill).toHaveBeenCalled() + expect(proc.terminal.close).toHaveBeenCalled() + expect(manager.isRunning).toBe(false) + }) + + it('reports exit code via onExit callback', () => { + const manager = new AgentPtyManager() + const onExit = vi.fn() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + onExit, + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + const onExitHandler = spawnCall[1].onExit + + onExitHandler(proc, 0) + + expect(onExit).toHaveBeenCalledWith(0, null) + expect(manager.exitCode).toBe(0) + }) + + it('does not call spawn if Bun is unavailable', () => { + delete globalWithBun.Bun + const manager = new AgentPtyManager() + const onError = vi.fn() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + onError, + }) + + expect(onError).toHaveBeenCalledWith( + expect.objectContaining({ message: expect.stringContaining('Bun') }) + ) + expect(manager.isRunning).toBe(false) + }) + + it('does not write if not spawned', () => { + const manager = new AgentPtyManager() + manager.write('data') + // No error should be thrown + }) + + it('does not resize if not spawned', () => { + const manager = new AgentPtyManager() + manager.resize(80, 24) + // No error should be thrown + }) + + it('does not kill if not spawned', () => { + const manager = new AgentPtyManager() + manager.kill() + // No error should be thrown + }) + + it('tracks exit code and signal code', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + const onExitHandler = spawnCall[1].onExit + + proc.signalCode = 'SIGTERM' + onExitHandler(proc, null) + + expect(manager.exitCode).toBe(null) + expect(manager.signalCode).toBe('SIGTERM') + expect(manager.isRunning).toBe(false) + }) + + it('applies environment variables from filtered env', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + env: { TERM: 'xterm-256color', CUSTOM_VAR: 'value' }, + onData: vi.fn(), + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + expect(spawnCall[1].env).toEqual({ TERM: 'xterm-256color', CUSTOM_VAR: 'value' }) + }) +}) diff --git a/cli/src/agent/AgentPtyManager.ts b/cli/src/agent/AgentPtyManager.ts new file mode 100644 index 0000000000..903395bd31 --- /dev/null +++ b/cli/src/agent/AgentPtyManager.ts @@ -0,0 +1,133 @@ +import { logger } from '@/ui/logger' + +export type AgentPtyOptions = { + command: string + args?: string[] + cwd?: string + env?: Record + cols?: number + rows?: number + onData: (data: string) => void + onExit?: (code: number | null, signal: string | null) => void + onError?: (error: Error) => void +} + +function getOptionalBun(): typeof Bun | null { + return typeof Bun === 'undefined' ? null : Bun +} + +export class AgentPtyManager { + private proc: Bun.Subprocess | null = null + private terminal: Bun.Terminal | null = null + private _exitCode: number | null = null + private _signalCode: string | null = null + private _isRunning: boolean = false + + get exitCode(): number | null { + return this._exitCode + } + + get signalCode(): string | null { + return this._signalCode + } + + get isRunning(): boolean { + return this._isRunning + } + + spawn(opts: AgentPtyOptions): void { + const bun = getOptionalBun() + if (!bun || typeof bun.spawn !== 'function') { + const err = new Error('Bun.spawn is unavailable in this runtime') + opts.onError?.(err) + return + } + + const cmd = opts.command + const args = opts.args ?? [] + const cwd = opts.cwd + const decoder = new TextDecoder() + + try { + this.proc = bun.spawn([cmd, ...args], { + cwd, + env: opts.env ?? process.env, + terminal: { + cols: opts.cols ?? 80, + rows: opts.rows ?? 24, + data: (_terminal, data) => { + const text = decoder.decode(data, { stream: true }) + if (text) { + opts.onData(text) + } + }, + }, + onExit: (subprocess, exitCode) => { + this._exitCode = exitCode + this._signalCode = subprocess.signalCode ?? null + this._isRunning = false + opts.onExit?.(this._exitCode, this._signalCode) + }, + }) + + this.terminal = this.proc.terminal ?? null + if (!this.terminal) { + try { + this.proc.kill() + } catch (error) { + logger.debug('[AgentPtyManager] Failed to kill process after missing terminal', { error }) + } + this.proc = null + const err = new Error('Failed to attach terminal to spawned process') + opts.onError?.(err) + return + } + + this._isRunning = true + } catch (error) { + logger.debug('[AgentPtyManager] Failed to spawn process', { error }) + this.proc = null + this.terminal = null + opts.onError?.(error instanceof Error ? error : new Error(String(error))) + } + } + + write(data: string): void { + if (!this.terminal || !this._isRunning) { + return + } + this.terminal.write(data) + } + + resize(cols: number, rows: number): void { + if (!this.terminal || !this._isRunning) { + return + } + this.terminal.resize(cols, rows) + } + + kill(): void { + if (!this.proc || !this._isRunning) { + return + } + + if (!this.proc.killed && this.proc.exitCode === null) { + try { + this.proc.kill() + } catch (error) { + logger.debug('[AgentPtyManager] Failed to kill process', { error }) + } + } + + if (this.terminal) { + try { + this.terminal.close() + } catch (error) { + logger.debug('[AgentPtyManager] Failed to close terminal', { error }) + } + } + + this.terminal = null + this._isRunning = false + } +} diff --git a/cli/src/agent/__tests__/runAgentPty.test.ts b/cli/src/agent/__tests__/runAgentPty.test.ts new file mode 100644 index 0000000000..74455b9218 --- /dev/null +++ b/cli/src/agent/__tests__/runAgentPty.test.ts @@ -0,0 +1,243 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +const harness = vi.hoisted(() => { + let _isRunning = true + let _onExit: ((code: number | null, signal: string | null) => void) | null = null + let _onData: ((data: string) => void) | null = null + let _echo = true + + const m = { + get isRunning() { return _isRunning }, + spawn: vi.fn((opts: Record) => { + _onExit = (opts.onExit as typeof _onExit) ?? null + _onData = (opts.onData as typeof _onData) ?? null + }), + // By default simulate the agent echoing keystrokes back as output so the + // echo-confirm in runAgentPty proceeds on the first attempt. + write: vi.fn((data: string) => { + if (_echo) _onData?.(data) + }), + kill: vi.fn(() => { _isRunning = false }), + resize: vi.fn(), + } + + return { + setRunning(v: boolean) { _isRunning = v }, + setEcho(v: boolean) { _echo = v }, + triggerExit(code: number | null = 0, signal: string | null = null) { + _isRunning = false + _onExit?.(code, signal) + }, + triggerData(data: string) { _onData?.(data) }, + reset() { + _isRunning = true; _onExit = null; _onData = null; _echo = true + m.spawn.mockClear(); m.write.mockClear(); m.kill.mockClear(); m.resize.mockClear() + }, + m, + } +}) + +vi.mock('@/agent/AgentPtyManager', () => ({ + AgentPtyManager: vi.fn(function() { return harness.m }), +})) +vi.mock('@/lib', () => ({ logger: { debug: vi.fn() } })) +vi.mock('@/parsers/specialCommands', () => ({ + parseSpecialCommand: (msg: string) => { + if (msg === '/clear') return { type: 'clear' } + if (msg === '/compact') return { type: 'compact' } + return { type: 'message' } + }, +})) + +import { runAgentPty } from '../runAgentPty' + +function deferred(): { promise: Promise; resolve: (v: T) => void } { + let resolve!: (v: T) => void + return { promise: new Promise((r) => { resolve = r }), resolve } +} + +type Opts = Parameters[0] +function makeOpts(overrides: Partial = {}): Opts { + return { + command: 'testagent', + args: [], + cwd: '/tmp', + debugPrefix: '[test]', + idleReadyMs: 20, + nextMessage: vi.fn(), + onReady: vi.fn(), + onMessage: vi.fn(), + ...overrides, + } +} + +const tick = (ms = 0) => new Promise((r) => setTimeout(r, ms)) + +// Drive past the markerless waitForInputReady: emit output, then let the idle +// window + polling loop elapse. +async function reachReady() { + harness.triggerData('boot') + await tick(220) +} + +describe('runAgentPty', () => { + afterEach(() => { harness.reset() }) + + it('spawns with the given command/args/cwd and calls onReady', async () => { + const msg = deferred<{ message: string } | null>() + const onReady = vi.fn() + const opts = makeOpts({ command: 'mycli', args: ['--foo'], cwd: '/work', onReady, nextMessage: () => msg.promise }) + const promise = runAgentPty(opts) + await tick(0) + expect(harness.m.spawn).toHaveBeenCalled() + const spawnArgs = harness.m.spawn.mock.calls[0][0] as { command: string; args: string[]; cwd: string } + expect(spawnArgs.command).toBe('mycli') + expect(spawnArgs.args).toEqual(['--foo']) + expect(spawnArgs.cwd).toBe('/work') + expect(onReady).toHaveBeenCalled() + await reachReady() + msg.resolve(null) + await promise + }) + + it('injects envVars/extraEnv into the spawn env only (not process.env)', async () => { + const msg = deferred<{ message: string } | null>() + const opts = makeOpts({ + envVars: { FLAVOR_TOKEN: 'tok' }, + extraEnv: { CLAUDE_CONFIG_DIR: '/tmp/iso-cfg' }, + nextMessage: () => msg.promise, + }) + const promise = runAgentPty(opts) + await tick(0) + const spawnEnv = (harness.m.spawn.mock.calls[0][0] as { env: Record }).env + expect(spawnEnv.FLAVOR_TOKEN).toBe('tok') + expect(spawnEnv.CLAUDE_CONFIG_DIR).toBe('/tmp/iso-cfg') + // TERM is always set so interactive TUI agents initialize correctly. + expect(spawnEnv.TERM).toBeTruthy() + // process.env must stay clean so the parent's scanner is unaffected. + expect(process.env.CLAUDE_CONFIG_DIR).toBeUndefined() + expect(process.env.FLAVOR_TOKEN).toBeUndefined() + await reachReady() + msg.resolve(null) + await promise + }) + + it('removes unsetEnv keys from the spawn env (CLAUDECODE stripping)', async () => { + const msg = deferred<{ message: string } | null>() + const opts = makeOpts({ + extraEnv: { CLAUDECODE: '1', KEEP_ME: 'yes' }, + unsetEnv: ['CLAUDECODE'], + nextMessage: () => msg.promise, + }) + const promise = runAgentPty(opts) + await tick(0) + const spawnEnv = (harness.m.spawn.mock.calls[0][0] as { env: Record }).env + // CLAUDECODE is stripped so the child claude isn't treated as a nested + // session (which stops it writing its transcript); unrelated vars are kept. + expect(spawnEnv.CLAUDECODE).toBeUndefined() + expect(spawnEnv.KEEP_ME).toBe('yes') + await reachReady() + msg.resolve(null) + await promise + }) + + it('auto-approves the trust prompt with Enter (not consuming the first message)', async () => { + const msg = deferred<{ message: string } | null>() + const opts = makeOpts({ trustMarkers: ['trust this folder'], nextMessage: () => msg.promise }) + const promise = runAgentPty(opts) + await tick(0) + // Agent shows the first-run trust screen. + harness.triggerData('Quick safety check: Is this a project you trust this folder? 1. Yes') + await tick(40) + // Driver auto-approves with Enter (default highlight = Yes). + expect(harness.m.write).toHaveBeenCalledWith('\r') + msg.resolve(null) + await promise + }) + + it('submits the first message only after ready, with CR separate from text', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + msg1.resolve({ message: 'hello' }) + await tick(300) + // text then CR, as separate writes + expect(harness.m.write).toHaveBeenCalledWith('hello') + expect(harness.m.write).toHaveBeenCalledWith('\r') + msg2.resolve(null) + await promise + }) + + it('retries the write when the agent does not echo (stdin not ready yet)', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + harness.setEcho(false) // agent ignores input → no echo + msg1.resolve({ message: 'hi' }) + await tick(2500) // 3 attempts × 700ms echo wait + const textWrites = harness.m.write.mock.calls.filter((c) => c[0] === 'hi').length + expect(textWrites).toBe(3) + msg2.resolve(null) + harness.setRunning(false) + await promise + }) + + it('ignores /clear and /compact in the loop', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const msg3 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + .mockImplementationOnce(() => msg3.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + msg1.resolve({ message: '/clear' }) + await tick(60) + expect(harness.m.write).not.toHaveBeenCalledWith('/clear') + msg2.resolve({ message: '/compact' }) + await tick(60) + expect(harness.m.write).not.toHaveBeenCalledWith('/compact') + msg3.resolve(null) + await promise + }) + + it('stops and kills on exit', async () => { + const msg1 = deferred<{ message: string } | null>() + const onExit = vi.fn() + const nextMessage = vi.fn().mockImplementationOnce(() => msg1.promise) + const promise = runAgentPty(makeOpts({ nextMessage, onExit })) + await reachReady() + harness.triggerExit(0) + msg1.resolve({ message: 'late' }) + await promise + expect(onExit).toHaveBeenCalledWith(0) + expect(harness.m.kill).toHaveBeenCalled() + }) + + it('aborts via signal', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const controller = new AbortController() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage, signal: controller.signal })) + await reachReady() + msg1.resolve({ message: 'first' }) + await tick(120) + controller.abort() + msg2.resolve({ message: 'should not send' }) + await promise + expect(harness.m.write).not.toHaveBeenCalledWith('should not send') + expect(harness.m.kill).toHaveBeenCalled() + }) +}) diff --git a/cli/src/agent/runAgentPty.ts b/cli/src/agent/runAgentPty.ts new file mode 100644 index 0000000000..bc00647baf --- /dev/null +++ b/cli/src/agent/runAgentPty.ts @@ -0,0 +1,324 @@ +import { AgentPtyManager } from "@/agent/AgentPtyManager" +import { parseSpecialCommand } from "@/parsers/specialCommands" +import { logger } from "@/lib" + +/** + * Shared driver for running an interactive agent CLI (e.g. claude) inside a + * PTY. All flavor-specific behavior is supplied via options: + * - `command` / `args` / `cwd` / `envVars` / `extraEnv` — how to spawn + * - `promptMarkers` — strings that indicate the agent's input prompt has + * rendered. When provided, input-ready is gated on seeing one of them (e.g. + * claude's ink TUI). When omitted, falls back to an output-idle heuristic + * (for an agent with no detectable prompt marker). + * + * The driver handles the parts every PTY agent shares: spawn lifecycle, + * waiting until the agent is ready before sending the first message, echo- + * confirmed submit with retry (so the first keystrokes aren't dropped while the + * agent wires up stdin), and the message loop. + */ +export type RunAgentPtyOpts = { + command: string + args: string[] + cwd: string + /** Flavor env vars merged into process.env before spawn. */ + envVars?: Record + /** Additional env vars (e.g. DISABLE_AUTOUPDATER) applied after envVars. */ + extraEnv?: Record + /** + * Env var names to REMOVE from the spawned process's environment. claude uses + * this to strip CLAUDECODE / CLAUDE_CODE_* so the child isn't mistaken for a + * nested session (which would stop it writing its JSONL transcript). + */ + unsetEnv?: string[] + /** Output substrings that signal the input prompt has rendered. */ + promptMarkers?: string[] + /** + * Output substrings that indicate a trust/safety prompt the agent shows on + * first run in a folder (e.g. claude's "Is this a project you trust?"). + * When detected, the driver auto-approves it (Enter selects the default + * "Yes" option) so the trust screen doesn't get mistaken for the input + * prompt and the first user message isn't consumed by it. + */ + trustMarkers?: string[] + /** Idle window (ms) used to decide output has settled. */ + idleReadyMs?: number + /** + * Output substrings shown while the agent is actively working (e.g. claude's + * "esc to interrupt" footer / spinner). When seen, `onThinkingChange(true)`. + */ + busyMarkers?: string[] + /** + * Output substrings shown when the agent is back at an idle input prompt + * (e.g. claude's "for shortcuts" hint). When seen, `onThinkingChange(false)`. + */ + idleMarkers?: string[] + debugPrefix: string + signal?: AbortSignal + nextMessage: () => Promise<{ message: string } | null> + onReady: () => void + onMessage: (data: string) => void + /** + * Fired when the agent's working/idle state changes, derived from + * busy/idle markers in the PTY output. Drives the chat "thinking" indicator + * (PTY agents have no streaming protocol to read this from). Tracks the live + * spinner, so it stays accurate even through a long silent inference. + */ + onThinkingChange?: (thinking: boolean) => void + onExit?: (code: number | null) => void + /** + * Called once the PTY is spawned with controls for the live terminal. The + * agent-terminal viewer uses `resize` to repaint the TUI on (re)subscribe so + * the current screen is shown instead of a stale/black buffer replay. Controls + * become no-ops after the process exits. + */ + registerControls?: (controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void }) => void +} + +export async function runAgentPty(opts: RunAgentPtyOpts): Promise { + const { debugPrefix } = opts + logger.debug(`${debugPrefix} Starting PTY session`) + + // Flavor env vars are injected into the spawned process's environment ONLY — + // never into this process's process.env. This keeps CLAUDE_CONFIG_DIR (used + // by claudePty to isolate folder-trust) scoped to the child, so the parent's + // session scanner still resolves transcripts against the real ~/.claude. + const spawnEnv = { + ...process.env, + // PTY agents with a full TUI need TERM set — the runner's Bun.spawn env + // lacks it. Default to a sane terminal so the interactive TUI initializes + // correctly. + TERM: process.env.TERM || 'xterm-256color', + ...(opts.envVars ?? {}), + ...(opts.extraEnv ?? {}), + } as Record + + for (const key of opts.unsetEnv ?? []) { + delete spawnEnv[key] + } + + const manager = new AgentPtyManager() + const signal = opts.signal + const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) + + const markers = opts.promptMarkers ?? [] + const hasMarkers = markers.length > 0 + const trustMarkers = opts.trustMarkers ?? [] + const idleReadyMs = opts.idleReadyMs ?? (hasMarkers ? 500 : 1000) + + let lastOutputAt = 0 + let sawOutput = false + // For marker-based agents (claude): true once the input prompt rendered. + let promptSeen = false + // Whether the first-run trust/safety prompt has been auto-approved. + let trustHandled = false + + // Working/idle state derived from busy/idle markers, reported only on change. + const busyMarkers = opts.busyMarkers ?? [] + const idleMarkers = opts.idleMarkers ?? [] + const hasBusyMarkers = busyMarkers.length > 0 + let thinking = false + // Output-silence watchdog against a stuck "thinking" indicator. The post-submit + // setThinking(true) is optimistic, and the idle MARKER that should clear it can + // be missed (it arrives mid-chunk with a busy marker, or fragmented across + // reads), so the spinner can stick long after the turn ends — or forever if the + // turn never started (a --resume replay swallowed the first message). A working + // claude repaints its spinner footer every few hundred ms, so once output has + // been SILENT for IDLE_SILENCE_MS while we still think it's busy, the turn is + // really over → force idle. Scoped to agents with a busy marker (claude). + const IDLE_SILENCE_MS = 3000 + let idleWatchdog: ReturnType | null = null + const disarmIdleWatchdog = (): void => { + if (idleWatchdog) { clearTimeout(idleWatchdog); idleWatchdog = null } + } + // (Re)start the silence timer. Called when thinking begins and on every output + // chunk while thinking, so the window only elapses once claude has gone quiet. + const armIdleWatchdog = (): void => { + if (!hasBusyMarkers || !thinking) return + disarmIdleWatchdog() + idleWatchdog = setTimeout(() => { + idleWatchdog = null + if (thinking) { + logger.debug(`${debugPrefix} idle watchdog: ${IDLE_SILENCE_MS}ms of silence; forcing idle`) + thinking = false + opts.onThinkingChange?.(false) + } + }, IDLE_SILENCE_MS) + idleWatchdog.unref?.() + } + const setThinking = (next: boolean): void => { + if (next === thinking) { + if (next) armIdleWatchdog() // refresh the silence window on repeated busy signals + return + } + thinking = next + if (next) armIdleWatchdog() + else disarmIdleWatchdog() + opts.onThinkingChange?.(next) + } + + // Wait until the agent's TUI is ready to receive input. Marker-based agents + // require both the prompt marker AND settled output; markerless agents use + // idle alone. A longer-idle fallback prevents hanging if a marker never + // matches (UI change). + const waitForInputReady = async (timeoutMs = 20000): Promise => { + const start = Date.now() + while (Date.now() - start < timeoutMs) { + if (signal?.aborted || !manager.isRunning) return + const idle = Date.now() - lastOutputAt + if (hasMarkers) { + if (promptSeen && idle >= idleReadyMs) return + } else if (sawOutput && idle >= idleReadyMs) { + return + } + if (sawOutput && idle >= 3000) return + await sleep(80) + } + } + + // Type the text, confirm the agent ingested it (its TUI echoes keystrokes → + // output), then submit with CR. If no echo comes back, stdin isn't wired up + // yet, so retry — this is what was dropping the first message. CR is sent + // separately so the text isn't submitted before it's buffered. + const submitMessage = async (message: string): Promise => { + let echoed = false + for (let attempt = 0; attempt < 3 && !echoed; attempt++) { + const before = lastOutputAt + manager.write(message) + const waitStart = Date.now() + while (Date.now() - waitStart < 700) { + if (signal?.aborted || !manager.isRunning) return + if (lastOutputAt > before) { echoed = true; break } + await sleep(40) + } + if (!echoed && process.env.DEBUG_PTY) { + logger.debug(`${debugPrefix} no echo after write (attempt ${attempt + 1}); retrying`) + } + } + await sleep(150) + manager.write('\r') + await sleep(50) + } + + const abortHandler = () => { + logger.debug(`${debugPrefix} Abort signal received`) + manager.kill() + } + signal?.addEventListener('abort', abortHandler, { once: true }) + + try { + manager.spawn({ + command: opts.command, + args: opts.args, + cwd: opts.cwd, + env: spawnEnv, + cols: 80, + rows: 24, + onData: (data) => { + sawOutput = true + lastOutputAt = Date.now() + // Auto-approve the first-run trust/safety prompt (Enter = default + // "Yes"). Do this BEFORE prompt detection so the trust screen + // isn't mistaken for the input prompt — otherwise the first user + // message gets consumed as the trust answer. + if (!trustHandled && trustMarkers.length > 0 && trustMarkers.some((m) => data.includes(m))) { + trustHandled = true + logger.debug(`${debugPrefix} trust prompt detected; auto-approving with Enter`) + manager.write('\r') + } else if (hasMarkers && !promptSeen && markers.some((m) => data.includes(m))) { + promptSeen = true + } + // Track the working/idle state from the live footer. The busy + // marker (spinner/"esc to interrupt") wins when both appear in a + // chunk; chunks with neither leave the state unchanged. + if (busyMarkers.length > 0 && busyMarkers.some((m) => data.includes(m))) { + setThinking(true) + } else if (idleMarkers.length > 0 && idleMarkers.some((m) => data.includes(m))) { + setThinking(false) + } else if (thinking) { + // Still producing output (e.g. streaming response text with no + // footer marker in this chunk) — keep the silence watchdog at bay. + armIdleWatchdog() + } + if (process.env.DEBUG_PTY) logger.debug(`${debugPrefix} onData: ${data.length} bytes`) + opts.onMessage(data) + }, + onExit: (code) => { + logger.debug(`${debugPrefix} Process exited with code ${code}`) + setThinking(false) + opts.onExit?.(code) + }, + onError: (error) => { + logger.debug(`${debugPrefix} PTY error: ${error.message}`, error) + }, + }) + + if (!manager.isRunning) { + logger.debug(`${debugPrefix} Failed to spawn ${opts.command} PTY`) + return + } + + opts.registerControls?.({ + resize: (cols: number, rows: number) => { + if (!manager.isRunning) return + if (!Number.isInteger(cols) || !Number.isInteger(rows) || cols < 1 || rows < 1) return + manager.resize(cols, rows) + }, + // Inject raw keystrokes into the live TUI — used to drive in-place + // settings changes (e.g. claude's `/model`/`/effort` slash commands) + // without re-spawning the process. + sendKeys: (data: string) => { + if (!manager.isRunning || !data) return + manager.write(data) + } + }) + + opts.onReady() + + // Spawn the agent up-front and wait until its prompt is ready BEFORE any + // message arrives, so the first user message is processed immediately + // instead of being consumed as the spawn trigger. + await waitForInputReady() + + while (manager.isRunning) { + if (signal?.aborted) { + logger.debug(`${debugPrefix} Aborted`) + break + } + + const next = await opts.nextMessage() + if (!next) { + logger.debug(`${debugPrefix} No more input; waiting for process to finish`) + break + } + + if (!manager.isRunning) { + logger.debug(`${debugPrefix} Process exited while waiting for message`) + break + } + + const cmd = parseSpecialCommand(next.message) + if (cmd.type === 'clear' || cmd.type === 'compact') { + logger.debug(`${debugPrefix} ${cmd.type} command - ignoring in PTY mode`) + continue + } + + // Queue semantics: wait until output goes idle (agent back at the + // prompt) before sending the next queued message. + await waitForInputReady() + if (!manager.isRunning || signal?.aborted) { + break + } + + if (process.env.DEBUG_PTY) logger.debug(`${debugPrefix} write(loop): ${next.message}`) + await submitMessage(next.message) + // The agent is now working on this input — show "thinking" right away + // (a busy marker reinforces it; the idle marker clears it when done). + setThinking(true) + } + } finally { + disarmIdleWatchdog() + signal?.removeEventListener('abort', abortHandler) + manager.kill() + logger.debug(`${debugPrefix} PTY session ended`) + } +} diff --git a/cli/src/claude/__tests__/__echo.js b/cli/src/claude/__tests__/__echo.js new file mode 100644 index 0000000000..2bf7913cb0 --- /dev/null +++ b/cli/src/claude/__tests__/__echo.js @@ -0,0 +1,3 @@ +const readline = require('readline'); +const rl = readline.createInterface({ input: process.stdin, terminal: false }); +rl.on('line', (line) => { process.stdout.write('echo:' + line + '\n'); }); From c19d7718fa6664f08467cac5f8598cadcf4bac3a Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:15 +0900 Subject: [PATCH 07/26] feat(pty): isolate claude folder-trust in a disposable config dir --- .../claude/__tests__/claudePty.real.test.ts | 63 ++++++++ cli/src/claude/__tests__/claudePty.test.ts | 104 ++++++++++++ .../claude/__tests__/trustedConfigDir.test.ts | 149 ++++++++++++++++++ cli/src/claude/claudePty.ts | 97 ++++++++++++ cli/src/claude/trustedConfigDir.ts | 111 +++++++++++++ 5 files changed, 524 insertions(+) create mode 100644 cli/src/claude/__tests__/claudePty.real.test.ts create mode 100644 cli/src/claude/__tests__/claudePty.test.ts create mode 100644 cli/src/claude/__tests__/trustedConfigDir.test.ts create mode 100644 cli/src/claude/claudePty.ts create mode 100644 cli/src/claude/trustedConfigDir.ts diff --git a/cli/src/claude/__tests__/claudePty.real.test.ts b/cli/src/claude/__tests__/claudePty.real.test.ts new file mode 100644 index 0000000000..352e032938 --- /dev/null +++ b/cli/src/claude/__tests__/claudePty.real.test.ts @@ -0,0 +1,63 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { AgentPtyManager } from '@/agent/AgentPtyManager' + +async function waitForOutput(onData: ReturnType, timeoutMs = 3000): Promise { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + if (onData.mock.calls.length > 0) return + await new Promise(r => setTimeout(r, 10)) + } +} + +// Real PTY spawn requires the Bun runtime (Bun.spawn terminal). Vitest runs +// its test workers under Node, where Bun is undefined, so skip there. Run with +// the Bun runtime to exercise these. +describe.skipIf(typeof Bun === 'undefined')('claudePty real PTY', () => { + let manager: AgentPtyManager + + afterEach(() => { + manager?.kill() + }) + + it('onData fires for every write (messages 1, 2, 3)', async () => { + manager = new AgentPtyManager() + const onData = vi.fn() + const onError = vi.fn((err: Error) => { + console.error('[test] spawn error:', err.message) + }) + + manager.spawn({ + command: 'bash', + args: ['-c', 'while IFS= read -r line; do echo "echo:$line"; done'], + onData, + onError, + }) + + expect(manager.isRunning).toBe(true) + if (!manager.isRunning) { + console.error('[test] manager not running, onError calls:', onError.mock.calls) + return + } + + manager.write('first\n') + await waitForOutput(onData) + expect(onData).toHaveBeenCalled() + const firstCalls = onData.mock.calls.length + const firstOutput = onData.mock.calls.map(c => c[0]).join('') + expect(firstOutput).toContain('echo:first') + onData.mockClear() + + manager.write('second\n') + await waitForOutput(onData) + expect(onData).toHaveBeenCalled() + const secondOutput = onData.mock.calls.map(c => c[0]).join('') + expect(secondOutput).toContain('echo:second') + onData.mockClear() + + manager.write('third\n') + await waitForOutput(onData) + expect(onData).toHaveBeenCalled() + const thirdOutput = onData.mock.calls.map(c => c[0]).join('') + expect(thirdOutput).toContain('echo:third') + }) +}) diff --git a/cli/src/claude/__tests__/claudePty.test.ts b/cli/src/claude/__tests__/claudePty.test.ts new file mode 100644 index 0000000000..989d07a1b3 --- /dev/null +++ b/cli/src/claude/__tests__/claudePty.test.ts @@ -0,0 +1,104 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +// claudePty is a thin wrapper over the shared runAgentPty driver. Here we only +// verify it forwards the correct claude-specific options; the PTY behavior +// (spawn/ready/echo-submit/loop) is tested in runAgentPty.test.ts. +vi.mock('@/agent/runAgentPty', () => ({ + runAgentPty: vi.fn(async () => {}), +})) + +vi.mock('@/lib', () => ({ + logger: { debug: vi.fn() }, +})) + +// Trust isolation is unit-tested in trustedConfigDir.test.ts; here we only +// verify claudePty wires it into the spawn env and cleans up afterwards. +vi.mock('@/claude/trustedConfigDir', () => ({ + prepareTrustedConfigDir: vi.fn(() => '/tmp/fake-cfg'), + cleanupTrustedConfigDir: vi.fn(), +})) + +import { claudePty } from '../claudePty' +import { runAgentPty } from '@/agent/runAgentPty' +import { cleanupTrustedConfigDir, prepareTrustedConfigDir } from '@/claude/trustedConfigDir' + +type ClaudePtyOpts = Parameters[0] + +function makeOpts(overrides: Partial = {}): ClaudePtyOpts { + return { + sessionId: 'test-session', + path: '/tmp/test', + nextMessage: vi.fn(), + onReady: vi.fn(), + onMessage: vi.fn(), + ...overrides, + } +} + +function lastCall() { + const mock = vi.mocked(runAgentPty) + return mock.mock.calls[mock.mock.calls.length - 1]![0] +} + +describe('claudePty wrapper', () => { + afterEach(() => { + vi.mocked(runAgentPty).mockClear() + }) + + it('spawns the claude command', async () => { + await claudePty(makeOpts()) + expect(runAgentPty).toHaveBeenCalled() + expect(lastCall().command).toBe('claude') + expect(lastCall().cwd).toBe('/tmp/test') + }) + + it('includes --settings when provided, preserving claudeArgs', async () => { + await claudePty(makeOpts({ hookSettingsPath: '/tmp/hooks/h.json', claudeArgs: ['--model', 'opus'] })) + const args = lastCall().args + const idx = args.indexOf('--settings') + expect(idx).toBeGreaterThanOrEqual(0) + expect(args[idx + 1]).toBe('/tmp/hooks/h.json') + expect(args).toEqual(expect.arrayContaining(['--model', 'opus'])) + }) + + it('omits --settings when no hookSettingsPath', async () => { + await claudePty(makeOpts({ claudeArgs: ['--model', 'opus'] })) + expect(lastCall().args).not.toContain('--settings') + }) + + it('passes claude prompt + trust markers and DISABLE_AUTOUPDATER', async () => { + await claudePty(makeOpts()) + expect(lastCall().promptMarkers).toEqual(expect.arrayContaining(['for shortcuts'])) + // '❯' must NOT be a prompt marker — it appears in the trust screen too. + expect(lastCall().promptMarkers).not.toContain('❯') + expect(lastCall().trustMarkers).toEqual(expect.arrayContaining(['trust this folder'])) + expect(lastCall().extraEnv).toMatchObject({ DISABLE_AUTOUPDATER: '1' }) + }) + + it('forwards callbacks and signal', async () => { + const nextMessage = vi.fn() + const onReady = vi.fn() + const onMessage = vi.fn() + const onExit = vi.fn() + const controller = new AbortController() + await claudePty(makeOpts({ nextMessage, onReady, onMessage, onExit, signal: controller.signal })) + const call = lastCall() + expect(call.nextMessage).toBe(nextMessage) + expect(call.onReady).toBe(onReady) + expect(call.onMessage).toBe(onMessage) + expect(call.onExit).toBe(onExit) + expect(call.signal).toBe(controller.signal) + }) + + it('passes claudeEnvVars as envVars', async () => { + await claudePty(makeOpts({ claudeEnvVars: { FOO: 'bar' } })) + expect(lastCall().envVars).toEqual({ FOO: 'bar' }) + }) + + it('isolates folder trust via CLAUDE_CONFIG_DIR and cleans up after', async () => { + await claudePty(makeOpts({ path: '/work/dir' })) + expect(prepareTrustedConfigDir).toHaveBeenCalledWith('/work/dir') + expect(lastCall().extraEnv).toMatchObject({ CLAUDE_CONFIG_DIR: '/tmp/fake-cfg' }) + expect(cleanupTrustedConfigDir).toHaveBeenCalledWith('/tmp/fake-cfg') + }) +}) diff --git a/cli/src/claude/__tests__/trustedConfigDir.test.ts b/cli/src/claude/__tests__/trustedConfigDir.test.ts new file mode 100644 index 0000000000..299633593d --- /dev/null +++ b/cli/src/claude/__tests__/trustedConfigDir.test.ts @@ -0,0 +1,149 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('node:fs', () => ({ + mkdtempSync: vi.fn(() => '/tmp/hapi-claude-cfg-abc'), + readdirSync: vi.fn(() => ['.credentials.json', 'projects', 'settings.json']), + symlinkSync: vi.fn(), + readFileSync: vi.fn(() => JSON.stringify({ projects: { '/other': { hasTrustDialogAccepted: true } } })), + writeFileSync: vi.fn(), + rmSync: vi.fn(), +})) +vi.mock('node:os', () => ({ + homedir: () => '/home/user', + tmpdir: () => '/tmp', +})) +vi.mock('@/lib', () => ({ logger: { debug: vi.fn() } })) + +import { mkdtempSync, readdirSync, readFileSync, rmSync, symlinkSync, writeFileSync } from 'node:fs' +import { cleanupTrustedConfigDir, prepareTrustedConfigDir } from '../trustedConfigDir' + +function findWrite(suffix: string) { + return vi.mocked(writeFileSync).mock.calls.find((c) => String(c[0]).endsWith(suffix)) +} + +describe('prepareTrustedConfigDir', () => { + beforeEach(() => { + delete process.env.CLAUDE_CONFIG_DIR + }) + afterEach(() => { + vi.clearAllMocks() + }) + + it('symlinks every real config entry into the temp dir', () => { + prepareTrustedConfigDir('/work') + expect(symlinkSync).toHaveBeenCalledWith('/home/user/.claude/.credentials.json', '/tmp/hapi-claude-cfg-abc/.credentials.json') + expect(symlinkSync).toHaveBeenCalledWith('/home/user/.claude/projects', '/tmp/hapi-claude-cfg-abc/projects') + expect(symlinkSync).toHaveBeenCalledWith('/home/user/.claude/settings.json', '/tmp/hapi-claude-cfg-abc/settings.json') + }) + + it('never symlinks a .claude.json entry (would writethrough to the real file)', () => { + // A custom CLAUDE_CONFIG_DIR can itself hold a .claude.json. Symlinking it + // and then writeFileSync-ing the trust-patched copy would follow the link + // and mutate the real file — so the entry must be skipped, not linked. + vi.mocked(readdirSync).mockReturnValueOnce(['.credentials.json', '.claude.json'] as never) + prepareTrustedConfigDir('/work') + const linkedDotJson = vi.mocked(symlinkSync).mock.calls.find((c) => String(c[1]).endsWith('/.claude.json')) + expect(linkedDotJson).toBeUndefined() + // The private copy is still written into the temp dir. + expect(findWrite('.claude.json')![0]).toBe('/tmp/hapi-claude-cfg-abc/.claude.json') + }) + + it('writes a private .claude.json with the working folder pre-trusted', () => { + prepareTrustedConfigDir('/work') + const call = findWrite('.claude.json') + expect(call).toBeDefined() + // The copy lives in the temp dir, NOT in the user's home. + expect(call![0]).toBe('/tmp/hapi-claude-cfg-abc/.claude.json') + const written = JSON.parse(String(call![1])) + expect(written.projects['/work'].hasTrustDialogAccepted).toBe(true) + }) + + it('preserves the user\'s existing trusted projects in the copy', () => { + prepareTrustedConfigDir('/work') + const written = JSON.parse(String(findWrite('.claude.json')![1])) + expect(written.projects['/other'].hasTrustDialogAccepted).toBe(true) + }) + + it('never writes to the real ~/.claude.json', () => { + prepareTrustedConfigDir('/work') + const homeWrite = vi.mocked(writeFileSync).mock.calls.find((c) => c[0] === '/home/user/.claude.json') + expect(homeWrite).toBeUndefined() + // It only reads the real one. + expect(readFileSync).toHaveBeenCalledWith('/home/user/.claude.json', 'utf-8') + }) + + it('honors an existing CLAUDE_CONFIG_DIR as the real config source', () => { + process.env.CLAUDE_CONFIG_DIR = '/custom/cfg' + prepareTrustedConfigDir('/work') + expect(symlinkSync).toHaveBeenCalledWith('/custom/cfg/.credentials.json', '/tmp/hapi-claude-cfg-abc/.credentials.json') + }) + + it('returns the temp dir path on success', () => { + expect(prepareTrustedConfigDir('/work')).toBe('/tmp/hapi-claude-cfg-abc') + }) + + it('returns undefined (no throw) when preparation fails', () => { + vi.mocked(mkdtempSync).mockImplementationOnce(() => { throw new Error('no tmp') }) + expect(prepareTrustedConfigDir('/work')).toBeUndefined() + }) +}) + +describe('cleanupTrustedConfigDir', () => { + afterEach(() => vi.clearAllMocks()) + + it('recursively removes the temp dir', () => { + cleanupTrustedConfigDir('/tmp/hapi-claude-cfg-abc') + expect(rmSync).toHaveBeenCalledWith('/tmp/hapi-claude-cfg-abc', expect.objectContaining({ recursive: true, force: true })) + }) + + it('is a no-op for undefined', () => { + cleanupTrustedConfigDir(undefined) + expect(rmSync).not.toHaveBeenCalled() + }) +}) + +// Archive (KillSession) and SIGTERM/SIGINT end the runner with process.exit(), +// which skips claudePty's finally → cleanupTrustedConfigDir never runs. A +// process 'exit' handler must reap whatever is still pending so /tmp doesn't +// accumulate hapi-claude-cfg-* across sessions. +describe('exit-time reaping of leaked dirs', () => { + afterEach(() => { + vi.restoreAllMocks() + delete process.env.CLAUDE_CONFIG_DIR + }) + + it('registers a process exit handler that reaps still-pending dirs', async () => { + vi.resetModules() + const onSpy = vi.spyOn(process, 'on') + const { prepareTrustedConfigDir } = await import('../trustedConfigDir') + prepareTrustedConfigDir('/work') + const exitHandler = onSpy.mock.calls.find((c) => c[0] === 'exit')?.[1] as (() => void) | undefined + expect(exitHandler).toBeDefined() + vi.mocked(rmSync).mockClear() + exitHandler!() + expect(rmSync).toHaveBeenCalledWith('/tmp/hapi-claude-cfg-abc', expect.objectContaining({ recursive: true, force: true })) + }) + + it('does not reap a dir already cleaned up via cleanupTrustedConfigDir', async () => { + vi.resetModules() + const onSpy = vi.spyOn(process, 'on') + const { prepareTrustedConfigDir, cleanupTrustedConfigDir } = await import('../trustedConfigDir') + const dir = prepareTrustedConfigDir('/work') + cleanupTrustedConfigDir(dir) + const exitHandler = onSpy.mock.calls.find((c) => c[0] === 'exit')?.[1] as (() => void) | undefined + vi.mocked(rmSync).mockClear() + exitHandler?.() + expect(rmSync).not.toHaveBeenCalled() + }) + + it('registers the exit handler only once across multiple prepares', async () => { + vi.resetModules() + const onSpy = vi.spyOn(process, 'on') + const { prepareTrustedConfigDir } = await import('../trustedConfigDir') + prepareTrustedConfigDir('/a') + prepareTrustedConfigDir('/b') + prepareTrustedConfigDir('/c') + const exitRegistrations = onSpy.mock.calls.filter((c) => c[0] === 'exit') + expect(exitRegistrations).toHaveLength(1) + }) +}) diff --git a/cli/src/claude/claudePty.ts b/cli/src/claude/claudePty.ts new file mode 100644 index 0000000000..c5ebd937f0 --- /dev/null +++ b/cli/src/claude/claudePty.ts @@ -0,0 +1,97 @@ +import { runAgentPty } from "@/agent/runAgentPty" +import { cleanupTrustedConfigDir, prepareTrustedConfigDir } from "@/claude/trustedConfigDir" + +export type ClaudePtyOpts = { + sessionId: string | null + path: string + claudeEnvVars?: Record + claudeArgs?: string[] + /** + * Path to a Claude settings file registering a SessionStart hook. When + * present, `--settings ` is appended so the interactive (PTY) Claude + * reports its freshly created sessionId back to Hapi, enabling the session + * scanner to tail the matching jsonl transcript for structured messages. + */ + hookSettingsPath?: string + signal?: AbortSignal + nextMessage: () => Promise<{ message: string } | null> + onReady: () => void + onMessage: (data: string) => void + onThinkingChange?: (thinking: boolean) => void + onExit?: (code: number | null) => void + registerControls?: (controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void }) => void +} + +function buildClaudePtyArgs(opts: ClaudePtyOpts): string[] { + const args: string[] = [] + if (opts.hookSettingsPath) { + args.push('--settings', opts.hookSettingsPath) + } + if (opts.claudeArgs) { + args.push(...opts.claudeArgs) + } + return args +} + +// claude's ink TUI renders these strings once the input prompt is ready. +// NOTE: '❯' is intentionally excluded — it also appears in the first-run trust +// prompt ("❯ 1. Yes, I trust this folder"), so using it as a prompt marker +// would make the trust screen look like the input prompt. +const CLAUDE_PROMPT_MARKERS = ['for shortcuts', 'bypass permissions', 'esc to interrupt'] +// First-run trust/safety prompt. Primary suppression is an isolated +// CLAUDE_CONFIG_DIR with the folder pre-trusted (see prepareTrustedConfigDir); +// these markers are a fallback so the driver auto-approves (Enter = Yes) if the +// prompt still appears. We deliberately do NOT touch the user's ~/.claude.json. +const CLAUDE_TRUST_MARKERS = ['trust this folder', 'Yes, I trust', 'safety check'] +// Footer shown while generating ("… (esc to interrupt)") vs at an idle input +// prompt ("? for shortcuts"). Drives the chat thinking indicator. +const CLAUDE_BUSY_MARKERS = ['esc to interrupt'] +const CLAUDE_IDLE_MARKERS = ['for shortcuts'] + +// When claude is launched from a process that itself inherited Claude Code's env +// (e.g. the runner started from inside a Claude session, a hook, or a sub-agent), +// the child claude sees CLAUDECODE / CLAUDE_CODE_* and treats itself as a nested +// session — and STOPS WRITING ITS JSONL TRANSCRIPT (so HAPI's scanner has nothing +// to forward to chat). Strip these markers so the spawned claude is a clean, +// top-level session that persists its transcript. (Note: CLAUDE_CONFIG_DIR is +// NOT matched and is preserved.) +function claudeInheritedEnvKeys(): string[] { + return Object.keys(process.env).filter( + (k) => k === 'CLAUDECODE' || k.startsWith('CLAUDE_CODE_') + ) +} + +export async function claudePty(opts: ClaudePtyOpts): Promise { + // Pre-trust the folder in a throwaway config dir so the trust prompt never + // shows — without mutating the user's real ~/.claude.json. + const configDir = prepareTrustedConfigDir(opts.path) + try { + return await runAgentPty({ + command: 'claude', + args: buildClaudePtyArgs(opts), + cwd: opts.path, + envVars: opts.claudeEnvVars, + extraEnv: { + DISABLE_AUTOUPDATER: '1', + ...(configDir ? { CLAUDE_CONFIG_DIR: configDir } : {}), + }, + // Drop inherited CLAUDECODE / CLAUDE_CODE_* so claude saves its + // transcript (see claudeInheritedEnvKeys). + unsetEnv: claudeInheritedEnvKeys(), + promptMarkers: CLAUDE_PROMPT_MARKERS, + trustMarkers: CLAUDE_TRUST_MARKERS, + busyMarkers: CLAUDE_BUSY_MARKERS, + idleMarkers: CLAUDE_IDLE_MARKERS, + debugPrefix: '[claudePty]', + signal: opts.signal, + nextMessage: opts.nextMessage, + onReady: opts.onReady, + onMessage: opts.onMessage, + onThinkingChange: opts.onThinkingChange, + onExit: opts.onExit, + registerControls: opts.registerControls, + }) + } finally { + cleanupTrustedConfigDir(configDir) + } +} diff --git a/cli/src/claude/trustedConfigDir.ts b/cli/src/claude/trustedConfigDir.ts new file mode 100644 index 0000000000..f653421a1b --- /dev/null +++ b/cli/src/claude/trustedConfigDir.ts @@ -0,0 +1,111 @@ +import { mkdtempSync, readdirSync, readFileSync, rmSync, symlinkSync, writeFileSync } from "node:fs" +import { homedir, tmpdir } from "node:os" +import { join } from "node:path" +import { logger } from "@/lib" + +// Temp config dirs still pending cleanup. The normal path removes a dir via +// cleanupTrustedConfigDir (claudePty's finally), but session archive (KillSession +// RPC) and SIGTERM/SIGINT terminate the runner with process.exit(), which skips +// that finally. A synchronous 'exit' handler reaps whatever is still registered +// so these temp dirs don't pile up in /tmp across sessions. +const pendingConfigDirs = new Set() +let exitHandlerRegistered = false + +function ensureExitCleanupRegistered(): void { + if (exitHandlerRegistered) return + exitHandlerRegistered = true + // 'exit' callbacks must be synchronous; rmSync fits. It does not follow + // symlinks, so the real ~/.claude the dir links to is preserved. + process.on('exit', () => { + for (const dir of pendingConfigDirs) { + try { + rmSync(dir, { recursive: true, force: true }) + } catch { + // best-effort; process is exiting + } + } + pendingConfigDirs.clear() + }) +} + +/** + * Build an isolated CLAUDE_CONFIG_DIR that shares the user's real Claude state + * but pre-trusts the working folder — so the first-run "Is this a project you + * trust?" prompt never appears in PTY mode, WITHOUT mutating the user's own + * ~/.claude.json. + * + * How: every entry in the real config dir (credentials, projects/transcripts, + * settings, hooks, ...) is symlinked into a fresh temp dir, so login state and + * transcripts stay shared with the real install. Only `.claude.json` is a + * private copy, with `projects[cwd].hasTrustDialogAccepted = true` added. + * + * Claude resolves `.claude.json` and everything else from CLAUDE_CONFIG_DIR, so + * pointing the spawned process at this temp dir suppresses the trust prompt. + * The parent process's process.env is left untouched (see runAgentPty), so the + * session scanner still resolves transcripts against the real ~/.claude (which + * the symlinked `projects` entry points back to). + * + * Returns the temp dir path, or undefined if preparation failed (caller then + * falls back to the runtime trust-prompt auto-approve). + */ +export function prepareTrustedConfigDir(cwd: string): string | undefined { + try { + const realConfigDir = process.env.CLAUDE_CONFIG_DIR || join(homedir(), '.claude') + const realDotJson = join(homedir(), '.claude.json') + + const dir = mkdtempSync(join(tmpdir(), 'hapi-claude-cfg-')) + pendingConfigDirs.add(dir) + ensureExitCleanupRegistered() + + // Share all real config state via symlinks (login, transcripts, settings). + // `.claude.json` is skipped here — it lives in homedir, not in the config + // dir, and we want a private trust-patched copy anyway. + for (const entry of readdirSync(realConfigDir)) { + // Never symlink `.claude.json`: we write a private trust-patched copy + // below, and writeFileSync would follow the symlink and mutate the + // real file (only reachable when CLAUDE_CONFIG_DIR points at a dir + // that itself holds a .claude.json; the default ~/.claude does not). + if (entry === '.claude.json') { + continue + } + try { + symlinkSync(join(realConfigDir, entry), join(dir, entry)) + } catch (e) { + logger.debug(`[trustedConfigDir] failed to symlink ${entry}`, e) + } + } + + // Private .claude.json with the folder pre-trusted. Original untouched. + let config: Record = {} + try { + config = JSON.parse(readFileSync(realDotJson, 'utf-8')) + } catch (e) { + logger.debug('[trustedConfigDir] could not read ~/.claude.json; starting fresh', e) + } + const projects = (config.projects ?? {}) as Record> + projects[cwd] = { ...(projects[cwd] ?? {}), hasTrustDialogAccepted: true } + config.projects = projects + writeFileSync(join(dir, '.claude.json'), JSON.stringify(config)) + + logger.debug(`[trustedConfigDir] prepared isolated config at ${dir} (folder pre-trusted)`) + return dir + } catch (e) { + logger.debug('[trustedConfigDir] preparation failed; relying on trust auto-approve', e) + return undefined + } +} + +/** + * Remove a temp config dir created by prepareTrustedConfigDir. Symlinked entries + * are unlinked (Node's rm does not follow symlinks), so the real ~/.claude state + * they point to is preserved. + */ +export function cleanupTrustedConfigDir(dir: string | undefined): void { + if (!dir) return + pendingConfigDirs.delete(dir) + try { + rmSync(dir, { recursive: true, force: true }) + } catch (e) { + logger.debug(`[trustedConfigDir] cleanup failed for ${dir}`, e) + } +} From 782474b48b0805d5f09c4c4d54861420d763d952 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:15 +0900 Subject: [PATCH 08/26] perf(scanner): incremental byte reads and adaptive polling --- cli/src/claude/utils/sessionScanner.ts | 103 ++++++++++++------ .../common/session/BaseSessionScanner.ts | 73 ++++++++++++- 2 files changed, 143 insertions(+), 33 deletions(-) diff --git a/cli/src/claude/utils/sessionScanner.ts b/cli/src/claude/utils/sessionScanner.ts index d97dd22304..2e2244af44 100644 --- a/cli/src/claude/utils/sessionScanner.ts +++ b/cli/src/claude/utils/sessionScanner.ts @@ -1,6 +1,6 @@ import { RawJSONLines, RawJSONLinesSchema } from "../types"; import { basename, join } from "node:path"; -import { readFile } from "node:fs/promises"; +import { open, stat } from "node:fs/promises"; import { logger } from "@/ui/logger"; import { getProjectPath } from "./path"; import { BaseSessionScanner, SessionFileScanEntry, SessionFileScanResult, SessionFileScanStats } from "@/modules/common/session/BaseSessionScanner"; @@ -35,6 +35,11 @@ export async function createSessionScanner(opts: { }, onNewSession: (sessionId: string) => { scanner.onNewSession(sessionId); + }, + // Call when the user submits input so the scanner polls fast right away + // (rather than waiting up to the idle interval for the first response). + markActive: () => { + scanner.markActive(); } }; } @@ -51,7 +56,12 @@ class ClaudeSessionScanner extends BaseSessionScanner { private readonly scannedSessions = new Set(); constructor(opts: { sessionId: string | null; workingDirectory: string; onMessage: (message: RawJSONLines) => void }) { - super({ intervalMs: 3000 }); + // fs.watch (in BaseSessionScanner.ensureWatcher) drives near-real-time + // updates; the poll is a fallback for missed watch events. Adaptive: 5s + // while idle (cheap — a stat, then an incremental read of only new + // bytes), 100ms while a response/tool-call is active or just after user + // input, dropping back after 3s of quiet. + super({ intervalMs: 5000, activeIntervalMs: 100, activeWindowMs: 3000 }); this.projectDir = getProjectPath(opts.workingDirectory); this.onMessage = opts.onMessage; this.currentSessionId = opts.sessionId; @@ -83,11 +93,11 @@ class ClaudeSessionScanner extends BaseSessionScanner { return; } const sessionFile = this.sessionFilePath(this.currentSessionId); - const { events, totalLines } = await readSessionLog(sessionFile, 0); + const { events, nextCursor } = await readSessionLog(sessionFile, 0); logger.debug(`[SESSION_SCANNER] Marking ${events.length} existing messages as processed from session ${this.currentSessionId}`); const keys = events.map((entry) => messageKey(entry.event)); this.seedProcessedKeys(keys); - this.setCursor(sessionFile, totalLines); + this.setCursor(sessionFile, nextCursor); } protected async beforeScan(): Promise { @@ -113,10 +123,10 @@ class ClaudeSessionScanner extends BaseSessionScanner { if (sessionId) { this.scannedSessions.add(sessionId); } - const { events, totalLines } = await readSessionLog(filePath, cursor); + const { events, nextCursor } = await readSessionLog(filePath, cursor); return { events, - nextCursor: totalLines + nextCursor }; } @@ -169,52 +179,81 @@ function messageKey(message: RawJSONLines): string { } /** - * Read and parse session log file. - * Returns only valid conversation messages, silently skipping internal events. + * Incrementally read and parse a session log file. + * + * The cursor is a BYTE OFFSET into the (append-only) JSONL. Each scan stats the + * file and reads only the bytes after the cursor — so the cost is O(new content) + * regardless of how large the conversation has grown, instead of re-reading the + * whole file every poll. A trailing partial line (a write in progress) is left + * unconsumed until its newline arrives. If the file shrank (compaction rewrote + * it), the cursor resets to 0 and the whole file is re-read (dedup by uuid in the + * base scanner absorbs any re-sent events). */ -async function readSessionLog(filePath: string, startLine: number): Promise<{ events: SessionFileScanEntry[]; totalLines: number }> { - logger.debug(`[SESSION_SCANNER] Reading session file: ${filePath}`); - let file: string; +async function readSessionLog(filePath: string, startByte: number): Promise<{ events: SessionFileScanEntry[]; nextCursor: number }> { + let size: number; try { - file = await readFile(filePath, 'utf-8'); + size = (await stat(filePath)).size; } catch (error) { logger.debug(`[SESSION_SCANNER] Session file not found: ${filePath}`); - return { events: [], totalLines: startLine }; + return { events: [], nextCursor: startByte }; + } + + let from = startByte; + if (from > size) { + from = 0; // file was truncated/rewritten — re-read from the top } - const lines = file.split('\n'); - const hasTrailingEmpty = lines.length > 0 && lines[lines.length - 1] === ''; - const totalLines = hasTrailingEmpty ? lines.length - 1 : lines.length; - let effectiveStartLine = startLine; - if (effectiveStartLine > totalLines) { - effectiveStartLine = 0; + if (from >= size) { + return { events: [], nextCursor: size }; // no new bytes } + + let chunk: Buffer; + const fd = await open(filePath, 'r'); + try { + const length = size - from; + const buffer = Buffer.allocUnsafe(length); + // fd.read may return fewer bytes than requested even for a regular file; + // the tail of an allocUnsafe buffer is uninitialized heap, so only the + // first `bytesRead` bytes are valid. Operating past them would let a stray + // 0x0a in garbage advance the cursor past never-read data → dropped lines. + const { bytesRead } = await fd.read(buffer, 0, length, from); + chunk = buffer.subarray(0, bytesRead); + } finally { + await fd.close(); + } + + // Consume only through the last newline; keep any trailing partial line for + // the next scan (`from` always sits on a line boundary, so the chunk's first + // line is always complete). + const lastNewline = chunk.lastIndexOf(0x0a); + if (lastNewline === -1) { + return { events: [], nextCursor: from }; + } + const nextCursor = from + lastNewline + 1; + const text = chunk.subarray(0, lastNewline).toString('utf-8'); + const messages: SessionFileScanEntry[] = []; - for (let index = effectiveStartLine; index < lines.length; index += 1) { - const l = lines[index]; + for (const l of text.split('\n')) { + if (l.trim() === '') { + continue; + } try { - if (l.trim() === '') { - continue; - } - let message = JSON.parse(l); - - // Silently skip known internal Claude Code events - // These are state/tracking events, not conversation messages + const message = JSON.parse(l); + // Silently skip known internal Claude Code state/tracking events. if (message.type && INTERNAL_CLAUDE_EVENT_TYPES.has(message.type)) { continue; } - - let parsed = RawJSONLinesSchema.safeParse(message); + const parsed = RawJSONLinesSchema.safeParse(message); if (!parsed.success) { // Unknown message types are silently skipped. continue; } - messages.push({ event: parsed.data, lineIndex: index }); + messages.push({ event: parsed.data }); } catch (e) { logger.debug(`[SESSION_SCANNER] Error processing message: ${e}`); continue; } } - return { events: messages, totalLines }; + return { events: messages, nextCursor }; } function sessionIdFromPath(filePath: string): string | null { diff --git a/cli/src/modules/common/session/BaseSessionScanner.ts b/cli/src/modules/common/session/BaseSessionScanner.ts index e19d0e751c..9ffe448736 100644 --- a/cli/src/modules/common/session/BaseSessionScanner.ts +++ b/cli/src/modules/common/session/BaseSessionScanner.ts @@ -22,7 +22,17 @@ export type SessionFileScanStats = { }; type BaseSessionScannerOptions = { + /** Poll interval while idle (no recent events). */ intervalMs: number; + /** + * Poll interval while "active" — i.e. shortly after a user input or a new + * event. Defaults to `intervalMs` (adaptive polling disabled). Set lower + * (e.g. 100ms) for snappy updates during a live response without paying that + * cost while idle. + */ + activeIntervalMs?: number; + /** How long to stay on `activeIntervalMs` after the last activity. */ + activeWindowMs?: number; }; export abstract class BaseSessionScanner { @@ -33,9 +43,25 @@ export abstract class BaseSessionScanner { private intervalId: ReturnType | null = null; private stopped = false; private scanPromise: Promise | null = null; + private currentIntervalMs: number; + private activeUntil = 0; protected constructor(private readonly options: BaseSessionScannerOptions) { this.sync = new InvalidateSync(() => this.scan()); + this.currentIntervalMs = options.intervalMs; + } + + private get idleIntervalMs(): number { + return this.options.intervalMs; + } + private get activeIntervalMs(): number { + return this.options.activeIntervalMs ?? this.options.intervalMs; + } + private get activeWindowMs(): number { + return this.options.activeWindowMs ?? 3000; + } + private get adaptiveEnabled(): boolean { + return this.activeIntervalMs < this.idleIntervalMs; } protected abstract findSessionFiles(): Promise; @@ -105,7 +131,47 @@ export abstract class BaseSessionScanner { public async start(): Promise { await this.initialize(); await this.sync.invalidateAndAwait(); - this.intervalId = setInterval(() => this.sync.invalidate(), this.options.intervalMs); + this.startInterval(this.idleIntervalMs); + } + + private startInterval(ms: number): void { + if (this.intervalId) { + clearInterval(this.intervalId); + } + this.currentIntervalMs = ms; + this.intervalId = setInterval(() => this.tick(), ms); + } + + private tick(): void { + // Drop back to the idle interval once the active window lapses. + if ( + this.adaptiveEnabled && + this.currentIntervalMs === this.activeIntervalMs && + Date.now() >= this.activeUntil + ) { + this.startInterval(this.idleIntervalMs); + } + this.sync.invalidate(); + } + + /** + * Signal external activity (e.g. the user just submitted input) so the + * scanner polls at `activeIntervalMs` and re-scans immediately. New events + * found during a scan extend the window automatically. + */ + public markActive(): void { + this.extendActiveWindow(); + this.sync.invalidate(); + } + + private extendActiveWindow(): void { + if (!this.adaptiveEnabled) { + return; + } + this.activeUntil = Date.now() + this.activeWindowMs; + if (this.currentIntervalMs !== this.activeIntervalMs) { + this.startInterval(this.activeIntervalMs); + } } public async cleanup(): Promise { @@ -179,6 +245,11 @@ export abstract class BaseSessionScanner { for (const key of newKeys) { this.recordProcessedKey(key); } + if (newEvents.length > 0) { + // A live response/tool-call is streaming in — stay on the fast + // interval so the next chunk is picked up promptly. + this.extendActiveWindow(); + } } await this.afterScan(); } From 6b03e99bb5706822cbbb9c731c2ce124036588a7 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 09/26] feat(pty): drive the claude PTY launcher with chat, model, resume and approvals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Launch claude inside a PTY and drive it from the web: chat input, in-place /model and /effort changes (auto-confirming claude's "Switch model?" dialog), and --resume. Tool approvals are bridged to the web instead of the SDK's canUseTool (which a PTY agent doesn't have): a PreToolUse hook forwards each tool call to the runner, which surfaces it in the existing web approval modal (reusing state.requests + the permission RPC — no web changes) and returns allow/deny to claude. - generateHookSettings registers the PreToolUse hook (PTY only) with a generous timeout so the blocking hook survives a slow phone approval. - sessionHookForwarder branches on the stdin hook_event_name: PreToolUse posts to a new endpoint and echoes claude's hookSpecificOutput decision on stdout; SessionStart keeps its fire-and-forget behavior. - startHookServer gains a /hook/pre-tool-use endpoint that awaits the decision. - PtyPermissionHandler reuses BasePermissionHandler. Read-only tools auto-allow; bypassPermissions allows all; everything else asks the web. "Allow for session" is honored, including Bash's command-qualified form (Bash() / Bash(:*)). Question tools (AskUserQuestion / request_user_input) are routed to the web too: the picked answers are injected back via the tool's updatedInput so claude echoes them instead of prompting in its TUI. Decisions are always allow/deny (never ask, which would fall back to the TUI prompt) and fail closed on errors. A default-mode PTY session therefore prompts for permission — and asks its questions — in the chat like the SDK path; the explicit YOLO toggle still opts into --yolo. --- .../agent/__tests__/bracketedPaste.test.ts | 28 + cli/src/agent/__tests__/loopBase.test.ts | 76 +++ cli/src/agent/__tests__/runAgentPty.test.ts | 126 ++++- .../agent/__tests__/runnerLifecycle.test.ts | 51 ++ cli/src/agent/bracketedPaste.ts | 15 + cli/src/agent/loopBase.ts | 37 +- cli/src/agent/runAgentPty.ts | 56 +- cli/src/agent/runnerLifecycle.ts | 17 +- cli/src/agent/sessionBase.ts | 15 + cli/src/api/apiMachine.ts | 5 +- cli/src/api/apiSession.ts | 120 ++++- .../__tests__/claudePtyLauncher.test.ts | 250 +++++++++ cli/src/claude/claudePty.ts | 3 + cli/src/claude/claudePtyLauncher.ts | 480 ++++++++++++++++++ cli/src/claude/loop.ts | 11 +- cli/src/claude/runClaude.ts | 57 ++- cli/src/claude/session.ts | 21 + .../utils/backSyncPermissionMode.test.ts | 34 ++ .../claude/utils/backSyncPermissionMode.ts | 40 ++ .../utils/claudePermissionPolicy.test.ts | 36 ++ .../claude/utils/claudePermissionPolicy.ts | 54 ++ .../claude/utils/ptyPermissionHandler.test.ts | 249 +++++++++ cli/src/claude/utils/ptyPermissionHandler.ts | 255 ++++++++++ .../claude/utils/sessionHookForwarder.test.ts | 151 ++++++ cli/src/claude/utils/sessionHookForwarder.ts | 146 ++++-- cli/src/claude/utils/startHookServer.test.ts | 83 ++- cli/src/claude/utils/startHookServer.ts | 83 +++ cli/src/commands/agentCommandOptions.ts | 2 +- cli/src/commands/claude.test.ts | 46 ++ cli/src/commands/claude.ts | 17 + cli/src/commands/resume.ts | 4 +- .../common/hooks/generateHookSettings.test.ts | 79 +++ .../common/hooks/generateHookSettings.ts | 44 +- cli/src/modules/common/rpcTypes.ts | 1 + cli/src/runner/buildCliArgs.test.ts | 46 ++ cli/src/runner/run.ts | 7 +- cli/src/ui/ink/ResumeSessionPicker.tsx | 4 +- 37 files changed, 2673 insertions(+), 76 deletions(-) create mode 100644 cli/src/agent/__tests__/bracketedPaste.test.ts create mode 100644 cli/src/agent/__tests__/loopBase.test.ts create mode 100644 cli/src/agent/__tests__/runnerLifecycle.test.ts create mode 100644 cli/src/agent/bracketedPaste.ts create mode 100644 cli/src/claude/__tests__/claudePtyLauncher.test.ts create mode 100644 cli/src/claude/claudePtyLauncher.ts create mode 100644 cli/src/claude/utils/backSyncPermissionMode.test.ts create mode 100644 cli/src/claude/utils/backSyncPermissionMode.ts create mode 100644 cli/src/claude/utils/claudePermissionPolicy.test.ts create mode 100644 cli/src/claude/utils/claudePermissionPolicy.ts create mode 100644 cli/src/claude/utils/ptyPermissionHandler.test.ts create mode 100644 cli/src/claude/utils/ptyPermissionHandler.ts create mode 100644 cli/src/claude/utils/sessionHookForwarder.test.ts create mode 100644 cli/src/modules/common/hooks/generateHookSettings.test.ts diff --git a/cli/src/agent/__tests__/bracketedPaste.test.ts b/cli/src/agent/__tests__/bracketedPaste.test.ts new file mode 100644 index 0000000000..53159435b4 --- /dev/null +++ b/cli/src/agent/__tests__/bracketedPaste.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from 'vitest' +import { bracketPasteIfMultiline } from '../bracketedPaste' + +const START = '\x1b[200~' +const END = '\x1b[201~' + +describe('bracketPasteIfMultiline', () => { + it('leaves a single-line message untouched', () => { + expect(bracketPasteIfMultiline('hello world')).toBe('hello world') + }) + + it('wraps a multiline message in bracketed-paste markers', () => { + expect(bracketPasteIfMultiline('line 1\nline 2')).toBe(`${START}line 1\nline 2${END}`) + }) + + it('wraps an attachment-formatted prompt (@path\\n\\ntext)', () => { + expect(bracketPasteIfMultiline('@/tmp/a.png\n\ndescribe this')) + .toBe(`${START}@/tmp/a.png\n\ndescribe this${END}`) + }) + + it('wraps a trailing newline (so it is not interpreted as a premature submit)', () => { + expect(bracketPasteIfMultiline('text\n')).toBe(`${START}text\n${END}`) + }) + + it('leaves an empty string untouched', () => { + expect(bracketPasteIfMultiline('')).toBe('') + }) +}) diff --git a/cli/src/agent/__tests__/loopBase.test.ts b/cli/src/agent/__tests__/loopBase.test.ts new file mode 100644 index 0000000000..fcde251e35 --- /dev/null +++ b/cli/src/agent/__tests__/loopBase.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it, vi } from 'vitest' +import { runLocalRemoteLoop, type LoopLauncher, type SessionMode } from '../loopBase' + +// runLocalRemoteLoop only ever touches session.onModeChange, so a minimal fake +// session suffices. +function fakeSession() { + return { onModeChange: vi.fn() } +} + +type Reason = 'switch' | 'exit' + +function launcher(...reasons: Reason[]): LoopLauncher> { + let i = 0 + return vi.fn(async () => reasons[Math.min(i++, reasons.length - 1)]) +} + +async function run(opts: { + startingMode?: SessionMode + runLocal: LoopLauncher> + runRemote: LoopLauncher> + runPty?: LoopLauncher> +}) { + const session = fakeSession() + await runLocalRemoteLoop({ + session: session as never, + startingMode: opts.startingMode, + logTag: 'test', + runLocal: opts.runLocal, + runRemote: opts.runRemote, + runPty: opts.runPty, + }) + return session +} + +describe('runLocalRemoteLoop mode selection', () => { + it('a non-PTY session hands off local→SDK remote even when a runPty launcher is registered', async () => { + // The regression: claude always registers runPty, so a normal local + // session pressing space must still reach the SDK remote launcher, not + // PTY (which is opt-in). + const runLocal = launcher('switch') + const runRemote = launcher('exit') + const runPty = launcher('exit') + + const session = await run({ startingMode: 'local', runLocal, runRemote, runPty }) + + expect(runRemote).toHaveBeenCalledTimes(1) + expect(runPty).not.toHaveBeenCalled() + // The external mode reported is 'remote'. + expect(session.onModeChange).toHaveBeenCalledWith('remote') + }) + + it('defaults (no startingMode) behave as a local→remote session', async () => { + const runLocal = launcher('switch') + const runRemote = launcher('exit') + const runPty = launcher('exit') + + await run({ runLocal, runRemote, runPty }) + + expect(runRemote).toHaveBeenCalledTimes(1) + expect(runPty).not.toHaveBeenCalled() + }) + + it('a PTY session toggles local↔pty and never uses the SDK remote launcher', async () => { + // pty → (switch) local → (switch) pty → (exit) + const runPty = launcher('switch', 'exit') + const runLocal = launcher('switch') + const runRemote = launcher('exit') + + const session = await run({ startingMode: 'pty', runLocal, runRemote, runPty }) + + expect(runPty).toHaveBeenCalledTimes(2) + expect(runRemote).not.toHaveBeenCalled() + // PTY is reported to the hub/UI as 'remote'. + expect(session.onModeChange).toHaveBeenCalledWith('remote') + }) +}) diff --git a/cli/src/agent/__tests__/runAgentPty.test.ts b/cli/src/agent/__tests__/runAgentPty.test.ts index 74455b9218..74779baa8b 100644 --- a/cli/src/agent/__tests__/runAgentPty.test.ts +++ b/cli/src/agent/__tests__/runAgentPty.test.ts @@ -4,13 +4,22 @@ const harness = vi.hoisted(() => { let _isRunning = true let _onExit: ((code: number | null, signal: string | null) => void) | null = null let _onData: ((data: string) => void) | null = null + let _onError: ((error: Error) => void) | null = null let _echo = true + let _spawnError: Error | null = null const m = { get isRunning() { return _isRunning }, spawn: vi.fn((opts: Record) => { _onExit = (opts.onExit as typeof _onExit) ?? null _onData = (opts.onData as typeof _onData) ?? null + _onError = (opts.onError as typeof _onError) ?? null + // Simulate the manager reporting a spawn failure: onError fires and + // the process never enters the running state. + if (_spawnError) { + _isRunning = false + _onError?.(_spawnError) + } }), // By default simulate the agent echoing keystrokes back as output so the // echo-confirm in runAgentPty proceeds on the first attempt. @@ -24,13 +33,14 @@ const harness = vi.hoisted(() => { return { setRunning(v: boolean) { _isRunning = v }, setEcho(v: boolean) { _echo = v }, + setSpawnError(err: Error | null) { _spawnError = err }, triggerExit(code: number | null = 0, signal: string | null = null) { _isRunning = false _onExit?.(code, signal) }, triggerData(data: string) { _onData?.(data) }, reset() { - _isRunning = true; _onExit = null; _onData = null; _echo = true + _isRunning = true; _onExit = null; _onData = null; _onError = null; _echo = true; _spawnError = null m.spawn.mockClear(); m.write.mockClear(); m.kill.mockClear(); m.resize.mockClear() }, m, @@ -83,6 +93,29 @@ async function reachReady() { describe('runAgentPty', () => { afterEach(() => { harness.reset() }) + it('rejects (does not silently return) when the PTY fails to spawn', async () => { + // A real failure such as `claude` not installed or the terminal failing + // to attach: the manager reports onError and never enters running state. + // runAgentPty must throw so the caller surfaces the error instead of + // treating a never-started PTY as a clean exit and respawning. + harness.setSpawnError(new Error('claude: command not found')) + const nextMessage = vi.fn() + const onReady = vi.fn() + + await expect(runAgentPty(makeOpts({ nextMessage, onReady }))) + .rejects.toThrow('claude: command not found') + + // It bailed before reaching the message loop / ready callback. + expect(nextMessage).not.toHaveBeenCalled() + expect(onReady).not.toHaveBeenCalled() + }) + + it('rejects with a generic error if spawn fails without an onError detail', async () => { + harness.setRunning(false) // not running, but no onError fired + const promise = runAgentPty(makeOpts({ command: 'mycli', nextMessage: vi.fn() })) + await expect(promise).rejects.toThrow('Failed to spawn mycli PTY') + }) + it('spawns with the given command/args/cwd and calls onReady', async () => { const msg = deferred<{ message: string } | null>() const onReady = vi.fn() @@ -94,12 +127,31 @@ describe('runAgentPty', () => { expect(spawnArgs.command).toBe('mycli') expect(spawnArgs.args).toEqual(['--foo']) expect(spawnArgs.cwd).toBe('/work') - expect(onReady).toHaveBeenCalled() + // onReady fires only once the prompt is actually ready, not right after + // spawn — so it has NOT been called yet here. + expect(onReady).not.toHaveBeenCalled() await reachReady() + expect(onReady).toHaveBeenCalled() msg.resolve(null) await promise }) + it('rejects (and never calls onReady) if the PTY exits before becoming ready', async () => { + // Spawn succeeds, but the agent exits before rendering a usable prompt + // (bad config, invalid args, auth failure). This must be treated as a + // failure — not a ready session — so the caller's give-up breaker counts + // it instead of respawning forever. + const onReady = vi.fn() + const nextMessage = vi.fn() + const promise = runAgentPty(makeOpts({ command: 'mycli', onReady, nextMessage })) + await tick(0) + harness.triggerExit(1) // exits before any ready output + + await expect(promise).rejects.toThrow('mycli PTY exited before becoming ready') + expect(onReady).not.toHaveBeenCalled() + expect(nextMessage).not.toHaveBeenCalled() + }) + it('injects envVars/extraEnv into the spawn env only (not process.env)', async () => { const msg = deferred<{ message: string } | null>() const opts = makeOpts({ @@ -172,6 +224,76 @@ describe('runAgentPty', () => { await promise }) + it('fires onMessageSubmitted after the write completes, once per real message (not for /clear)', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const msg3 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + .mockImplementationOnce(() => msg3.promise) + const onMessageSubmitted = vi.fn() + const promise = runAgentPty(makeOpts({ nextMessage, onMessageSubmitted })) + await reachReady() + + // /clear is dropped before the submit path → no post-submit callback, + // so a first-message verifier armed here would never fire on a no-op. + msg1.resolve({ message: '/clear' }) + await tick(60) + expect(onMessageSubmitted).not.toHaveBeenCalled() + + // A real message fires the callback exactly once, AFTER text + CR were + // written — the contract that stops a verifier racing the submit. + msg2.resolve({ message: 'hello' }) + await tick(300) + expect(onMessageSubmitted).toHaveBeenCalledTimes(1) + expect(onMessageSubmitted).toHaveBeenCalledWith('hello') + const lastWriteOrder = Math.max(...harness.m.write.mock.invocationCallOrder) + expect(onMessageSubmitted.mock.invocationCallOrder[0]).toBeGreaterThan(lastWriteOrder) + + msg3.resolve(null) + await promise + }) + + it('bracketed-paste wraps a multiline message so only the final CR submits', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + // e.g. an attachment-formatted prompt or a batched queue flush. + msg1.resolve({ message: '@/tmp/a.png\n\ndescribe this' }) + await tick(300) + // The whole block is written once, bracketed — embedded newlines stay + // literal instead of each acting as Enter. + expect(harness.m.write).toHaveBeenCalledWith('\x1b[200~@/tmp/a.png\n\ndescribe this\x1b[201~') + // The raw (unbracketed) multiline text must never be written. + expect(harness.m.write).not.toHaveBeenCalledWith('@/tmp/a.png\n\ndescribe this') + // Exactly one CR submits the whole paste. + const crWrites = harness.m.write.mock.calls.filter((c) => c[0] === '\r').length + expect(crWrites).toBe(1) + msg2.resolve(null) + await promise + }) + + it('does not bracket a single-line message', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + msg1.resolve({ message: 'hello world' }) + await tick(300) + expect(harness.m.write).toHaveBeenCalledWith('hello world') + expect(harness.m.write).not.toHaveBeenCalledWith('\x1b[200~hello world\x1b[201~') + msg2.resolve(null) + await promise + }) + it('retries the write when the agent does not echo (stdin not ready yet)', async () => { const msg1 = deferred<{ message: string } | null>() const msg2 = deferred<{ message: string } | null>() diff --git a/cli/src/agent/__tests__/runnerLifecycle.test.ts b/cli/src/agent/__tests__/runnerLifecycle.test.ts new file mode 100644 index 0000000000..23aaab9846 --- /dev/null +++ b/cli/src/agent/__tests__/runnerLifecycle.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it } from 'vitest' +import { setControlledByUser } from '../runnerLifecycle' +import type { ApiSessionClient } from '@/api/apiSession' + +// Minimal stand-in that applies the update handlers and records the result, so +// we can assert how startingMode/controlledByUser evolve across mode changes. +function fakeSession() { + const state = { agentState: {} as Record, metadata: {} as Record } + const session = { + updateAgentState: (h: (s: Record) => Record) => { state.agentState = h(state.agentState) }, + updateMetadata: (h: (m: Record) => Record) => { state.metadata = h(state.metadata) }, + } + return { session: session as unknown as ApiSessionClient, state } +} + +describe('setControlledByUser', () => { + it('keeps a PTY launch identity across a pty → local → pty handoff', () => { + const { session, state } = fakeSession() + + // Launch as PTY. + setControlledByUser(session, 'pty') + expect(state.metadata.startingMode).toBe('pty') + expect(state.agentState.startingMode).toBe('pty') + expect(state.agentState.controlledByUser).toBe(false) + + // Hand off to local — user is now driving locally, but the session is + // still PTY-backed so its launch identity must not change. + setControlledByUser(session, 'local') + expect(state.metadata.startingMode).toBe('pty') + expect(state.agentState.controlledByUser).toBe(true) + + // Hand back to PTY (reported as external mode 'remote'): the terminal + // toggle must remain available, i.e. startingMode stays 'pty'. + setControlledByUser(session, 'remote') + expect(state.metadata.startingMode).toBe('pty') + expect(state.agentState.startingMode).toBe('pty') + expect(state.agentState.controlledByUser).toBe(false) + }) + + it('tracks the collaboration mode for a non-PTY session (unchanged behavior)', () => { + const { session, state } = fakeSession() + + setControlledByUser(session, 'remote') + expect(state.metadata.startingMode).toBe('remote') + expect(state.agentState.controlledByUser).toBe(false) + + setControlledByUser(session, 'local') + expect(state.metadata.startingMode).toBe('local') + expect(state.agentState.controlledByUser).toBe(true) + }) +}) diff --git a/cli/src/agent/bracketedPaste.ts b/cli/src/agent/bracketedPaste.ts new file mode 100644 index 0000000000..5aca5936fd --- /dev/null +++ b/cli/src/agent/bracketedPaste.ts @@ -0,0 +1,15 @@ +// Bracketed-paste (DECSET 2004) framing for PTY input. +// +// Interactive TUIs that enable bracketed-paste mode (claude does — its init +// emits ESC[?2004h) treat the bytes between these markers as a single literal +// paste, so embedded newlines are inserted as text instead of being acted on +// as Enter. A multiline message written raw would otherwise submit its first +// line on its own and run the rest as separate prompts/slash-commands. Wrap +// such a message before writing it; a trailing CR (sent separately by the +// caller) is what actually submits the whole block. +const PASTE_START = '\x1b[200~' +const PASTE_END = '\x1b[201~' + +export function bracketPasteIfMultiline(text: string): string { + return text.includes('\n') ? `${PASTE_START}${text}${PASTE_END}` : text +} diff --git a/cli/src/agent/loopBase.ts b/cli/src/agent/loopBase.ts index 6e2c35bd28..65e28916d1 100644 --- a/cli/src/agent/loopBase.ts +++ b/cli/src/agent/loopBase.ts @@ -3,12 +3,15 @@ import type { AgentSessionBase } from './sessionBase'; export type LoopLauncher = (session: TSession) => Promise<'switch' | 'exit'>; +export type SessionMode = 'local' | 'remote' | 'pty'; + export async function runLocalRemoteSession>(opts: { session: TSession; - startingMode?: 'local' | 'remote'; + startingMode?: SessionMode; logTag: string; runLocal: LoopLauncher; runRemote: LoopLauncher; + runPty?: LoopLauncher; onSessionReady?: (session: TSession) => void; }): Promise { if (opts.onSessionReady) { @@ -20,18 +23,20 @@ export async function runLocalRemoteSession>(opts: { session: TSession; - startingMode?: 'local' | 'remote'; + startingMode?: SessionMode; logTag: string; runLocal: LoopLauncher; runRemote: LoopLauncher; + runPty?: LoopLauncher; }): Promise { - let mode: 'local' | 'remote' = opts.startingMode ?? 'local'; + let mode: SessionMode = opts.startingMode ?? 'local'; while (true) { logger.debug(`[${opts.logTag}] Iteration with mode: ${mode}`); @@ -42,8 +47,13 @@ export async function runLocalRemoteLoop> return; } - mode = 'remote'; - opts.session.onModeChange(mode); + // Leaving local mode returns to this session's remote variant. PTY + // is OPT-IN: only a session that started in PTY mode hands off to the + // PTY launcher. A normal local/remote session must still use the SDK + // remote launcher even though claude always registers a runPty + // launcher (so `opts.runPty` is truthy for every claude session). + mode = opts.startingMode === 'pty' && opts.runPty ? 'pty' : 'remote'; + opts.session.onModeChange(mode === 'pty' ? 'remote' : mode); continue; } @@ -57,5 +67,20 @@ export async function runLocalRemoteLoop> opts.session.onModeChange(mode); continue; } + + if (mode === 'pty') { + if (!opts.runPty) { + throw new Error('PTY mode selected but no runPty launcher provided'); + } + + const reason = await opts.runPty(opts.session); + if (reason === 'exit') { + return; + } + + mode = 'local'; + opts.session.onModeChange(mode); + continue; + } } } diff --git a/cli/src/agent/runAgentPty.ts b/cli/src/agent/runAgentPty.ts index bc00647baf..ad880f0b51 100644 --- a/cli/src/agent/runAgentPty.ts +++ b/cli/src/agent/runAgentPty.ts @@ -1,5 +1,6 @@ import { AgentPtyManager } from "@/agent/AgentPtyManager" import { parseSpecialCommand } from "@/parsers/specialCommands" +import { bracketPasteIfMultiline } from "@/agent/bracketedPaste" import { logger } from "@/lib" /** @@ -65,6 +66,16 @@ export type RunAgentPtyOpts = { */ onThinkingChange?: (thinking: boolean) => void onExit?: (code: number | null) => void + /** + * Fired after a message has been written to the PTY (text + CR) by the + * driver's submit path. Callers that want to verify/repair delivery of a + * message must hook here rather than at nextMessage time: nextMessage + * returns BEFORE waitForInputReady + submitMessage run, so a verifier + * started there can race the driver's own submit (and on a slow resume, + * fire its repair keystrokes before the message was ever sent — duplicating + * it). This hook guarantees the submit already happened. + */ + onMessageSubmitted?: (message: string) => void | Promise /** * Called once the PTY is spawned with controls for the live terminal. The * agent-terminal viewer uses `resize` to repaint the TUI on (re)subscribe so @@ -180,10 +191,15 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { // yet, so retry — this is what was dropping the first message. CR is sent // separately so the text isn't submitted before it's buffered. const submitMessage = async (message: string): Promise => { + // Multiline web messages (batched queue flush, attachment prompts, + // multiline composer input) must be bracketed-pasted so their embedded + // newlines stay literal instead of each submitting a partial line. The + // trailing CR sent separately below is what submits the whole block. + const payload = bracketPasteIfMultiline(message) let echoed = false for (let attempt = 0; attempt < 3 && !echoed; attempt++) { const before = lastOutputAt - manager.write(message) + manager.write(payload) const waitStart = Date.now() while (Date.now() - waitStart < 700) { if (signal?.aborted || !manager.isRunning) return @@ -206,6 +222,10 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { signal?.addEventListener('abort', abortHandler, { once: true }) try { + // Captured so a spawn failure can be re-thrown (not swallowed): the PTY + // manager reports failure via onError + isRunning=false rather than a + // throw from spawn(). + let spawnError: Error | null = null manager.spawn({ command: opts.command, args: opts.args, @@ -248,13 +268,17 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { opts.onExit?.(code) }, onError: (error) => { + spawnError = error logger.debug(`${debugPrefix} PTY error: ${error.message}`, error) }, }) if (!manager.isRunning) { - logger.debug(`${debugPrefix} Failed to spawn ${opts.command} PTY`) - return + // Surface the failure instead of returning as if it succeeded — + // otherwise the caller (e.g. ClaudePtyLauncher) treats a never-started + // PTY as a clean exit and silently respawns, hiding real errors like + // `claude` not being installed or the terminal failing to attach. + throw spawnError ?? new Error(`Failed to spawn ${opts.command} PTY`) } opts.registerControls?.({ @@ -272,13 +296,26 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { } }) - opts.onReady() - - // Spawn the agent up-front and wait until its prompt is ready BEFORE any - // message arrives, so the first user message is processed immediately - // instead of being consumed as the spawn trigger. + // Wait until the prompt is actually usable BEFORE any message arrives, so + // the first user message is processed immediately instead of being + // consumed as the spawn trigger. await waitForInputReady() + // A successful spawn() does not mean the agent reached a working prompt: + // it can spawn and then exit before rendering one (bad config, invalid + // args, auth failure). Distinguish that from a healthy start so onReady() + // — which the caller uses to mark the session "ready" and to reset its + // launch-failure breaker — only fires for a genuinely usable prompt. A + // user abort during startup is a clean stop, not a failure. + if (signal?.aborted) { + return + } + if (!manager.isRunning) { + throw new Error(`${opts.command} PTY exited before becoming ready`) + } + + opts.onReady() + while (manager.isRunning) { if (signal?.aborted) { logger.debug(`${debugPrefix} Aborted`) @@ -311,6 +348,9 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { if (process.env.DEBUG_PTY) logger.debug(`${debugPrefix} write(loop): ${next.message}`) await submitMessage(next.message) + // The message has now been written to the PTY; let a caller verify it + // actually landed (and repair it) without racing this submit path. + await opts.onMessageSubmitted?.(next.message) // The agent is now working on this input — show "thinking" right away // (a busy marker reinforces it; the idle marker clears it when done). setThinking(true) diff --git a/cli/src/agent/runnerLifecycle.ts b/cli/src/agent/runnerLifecycle.ts index 16a0495120..208f584fd7 100644 --- a/cli/src/agent/runnerLifecycle.ts +++ b/cli/src/agent/runnerLifecycle.ts @@ -181,10 +181,23 @@ export function createRunnerLifecycle(options: RunnerLifecycleOptions): RunnerLi } } -export function setControlledByUser(session: ApiSessionClient, mode: 'local' | 'remote'): void { +export function setControlledByUser(session: ApiSessionClient, mode: 'local' | 'remote' | 'pty'): void { session.updateAgentState((currentState) => ({ ...currentState, - controlledByUser: mode === 'local' + controlledByUser: mode === 'local', + // Persist the launch mode so reopen/resume can restore it. 'pty' is an + // immutable launch identity (the web gates the agent-terminal toggle on + // it), so once set it must survive later local/remote collaboration-mode + // changes — otherwise a pty→local→pty handoff reports external mode + // 'remote' and would rewrite it, hiding the terminal toggle for a session + // whose PTY is still running. + startingMode: currentState.startingMode === 'pty' ? 'pty' : mode + })) + // Also surface it in metadata so the web can gate the agent-terminal toggle + // (only PTY sessions have an agent PTY to view). + session.updateMetadata((metadata) => ({ + ...metadata, + startingMode: metadata.startingMode === 'pty' ? 'pty' : mode })) } diff --git a/cli/src/agent/sessionBase.ts b/cli/src/agent/sessionBase.ts index 48ba06e778..7279c558c4 100644 --- a/cli/src/agent/sessionBase.ts +++ b/cli/src/agent/sessionBase.ts @@ -134,6 +134,21 @@ export class AgentSessionBase { } }; + private _killHandler: (() => void) | null = null; + + // Graceful-shutdown hook shared by all flavors. The active launcher + // registers a teardown handler (e.g. abort the PTY) via setKillHandler; the + // runner lifecycle's onBeforeClose calls kill() before process.exit so the + // resource is released through the normal finally path rather than relying on + // last-resort reapers. No-op when no handler is registered (e.g. local mode). + setKillHandler = (handler: () => void): void => { + this._killHandler = handler; + }; + + kill = (): void => { + this._killHandler?.(); + }; + protected getKeepAliveRuntime(): { permissionMode?: SessionPermissionMode diff --git a/cli/src/api/apiMachine.ts b/cli/src/api/apiMachine.ts index 88ba35966b..f0673835eb 100644 --- a/cli/src/api/apiMachine.ts +++ b/cli/src/api/apiMachine.ts @@ -249,7 +249,7 @@ export class ApiMachineClient { setRPCHandlers({ spawnSession, stopSession, requestShutdown }: MachineRpcHandlers): void { this.rpcHandlerManager.registerHandler(RPC_METHODS.SpawnHappySession, async (params: any) => { - const { directory, sessionId, resumeSessionId, machineId, approvedNewDirectoryCreation, agent, model, effort, modelReasoningEffort, yolo, permissionMode, serviceTier, token, sessionType, worktreeName } = params || {} + const { directory, sessionId, resumeSessionId, machineId, approvedNewDirectoryCreation, agent, model, effort, modelReasoningEffort, yolo, permissionMode, serviceTier, token, sessionType, worktreeName, startingMode } = params || {} if (!directory) { throw new Error('Directory is required') @@ -275,7 +275,8 @@ export class ApiMachineClient { serviceTier, token, sessionType, - worktreeName + worktreeName, + startingMode }) switch (result.type) { diff --git a/cli/src/api/apiSession.ts b/cli/src/api/apiSession.ts index cd98d69a51..32937c9e5c 100644 --- a/cli/src/api/apiSession.ts +++ b/cli/src/api/apiSession.ts @@ -11,8 +11,10 @@ import type { RawJSONLines } from '@/claude/types' import { configuration } from '@/configuration' import { AGENT_MESSAGE_PAYLOAD_TYPE } from "@hapi/protocol" import type { SessionEndReason } from '@hapi/protocol' -import type { ClientToServerEvents, ServerToClientEvents, Update } from '@hapi/protocol' +import type { ClientToServerEvents, ServerToClientEvents, TerminalOutputPayload, Update } from '@hapi/protocol' import { + AgentTerminalRefreshPayloadSchema, + AgentTerminalResizePayloadSchema, TerminalClosePayloadSchema, TerminalOpenPayloadSchema, TerminalResizePayloadSchema, @@ -48,6 +50,10 @@ const SYSTEM_INJECTION_PREFIXES = [ '', ] +// Cap for the runner-side in-memory agent-terminal screen buffer (matches the +// hub's scrollback ring). The tail always holds the latest full-screen redraw. +const AGENT_TERMINAL_LOCAL_BUFFER_BYTES = 256 * 1024 + function extractRawUserTextContent(content: unknown): string | null { if (typeof content === 'string') { return content @@ -82,7 +88,11 @@ function extractRawUserTextContent(content: unknown): string | null { */ export function isExternalUserMessage(body: RawJSONLines): body is Extract { if (body.type !== 'user') return false - const text = extractRawUserTextContent(body.message.content) + // Defensive: a malformed/minimal user line may lack `.message`. Treat it as + // a non-external (forwardable) message rather than throwing. + const message = (body as { message?: { content?: unknown } }).message + if (!message || typeof message !== 'object') return false + const text = extractRawUserTextContent(message.content) if (text === null) return false if (body.isSidechain === true) return false if (body.isMeta === true) return false @@ -169,6 +179,18 @@ export class ApiSessionClient extends EventEmitter { private hasConnectedOnce = false readonly rpcHandlerManager: RpcHandlerManager private readonly terminalManager: TerminalManager + private agentTerminalResize: ((cols: number, rows: number) => void) | null = null + private lastAgentTerminalSize: { cols: number; rows: number } | null = null + // The agent PTY emits a high-frequency byte stream (spinners ~10Hz, full + // redraws). Only forward it to the hub while a viewer is actually subscribed + // to the agent terminal — otherwise the hub relays it to an empty room and + // buffers it for no one. Enabled on (re)subscribe, disabled when the last + // viewer leaves. Default false: chat-only users never open the raw terminal, + // so nothing is streamed for them. + private agentTerminalActive = false + // In-memory copy of the recent agent-PTY screen, captured regardless of the + // network gate so a subscribing viewer can be replayed the current screen. + private agentTerminalLocalBuffer = '' private agentStateLock = new AsyncLock() private metadataLock = new AsyncLock() @@ -282,6 +304,29 @@ export class ApiSessionClient extends EventEmitter { this.terminalManager.close(payload.terminalId) })) + // Read-only agent-terminal viewer: resize the agent PTY to the viewer's + // size, and force a repaint when a viewer (re)subscribes so it sees the + // live screen instead of a stale/black buffer replay. + this.socket.on('agent-terminal:resize', handleTerminalEvent(AgentTerminalResizePayloadSchema, (payload) => { + this.lastAgentTerminalSize = { cols: payload.cols, rows: payload.rows } + this.agentTerminalResize?.(payload.cols, payload.rows) + })) + + this.socket.on('agent-terminal:refresh', handleTerminalEvent(AgentTerminalRefreshPayloadSchema, () => { + // A viewer is subscribed → start streaming (enable BEFORE replay so + // the bytes flow), replay the locally-captured current screen (works + // even for resumed sessions that don't repaint), then nudge a repaint + // as a belt-and-suspenders for any truncated head sequence. + this.agentTerminalActive = true + this.emitAgentTerminalLocalReplay() + this.forceAgentTerminalRepaint() + })) + + this.socket.on('agent-terminal:idle', handleTerminalEvent(AgentTerminalRefreshPayloadSchema, () => { + // Last viewer left — stop streaming the PTY to the hub. + this.agentTerminalActive = false + })) + this.socket.on('update', (data: Update, ack?: (response: { removed: boolean }) => void) => { try { if (!data.body) return @@ -565,6 +610,77 @@ export class ApiSessionClient extends EventEmitter { }) } + emitAgentTerminalOutput(data: string): void { + // Always capture the screen locally (in-memory, no network) so a late + // subscriber can be replayed the CURRENT screen without depending on a + // TUI repaint — resumed (`--resume`) sessions don't reliably redraw on + // SIGWINCH, which is what caused the reopen black screen. + this.agentTerminalLocalBuffer = + (this.agentTerminalLocalBuffer + data).slice(-AGENT_TERMINAL_LOCAL_BUFFER_BYTES) + // Gate only the NETWORK forward: with no viewer the hub would relay this + // high-frequency byte stream (spinners ~10Hz) to an empty room. On + // subscribe, 'agent-terminal:refresh' flips this on and replays the local + // buffer (see the handler), so nothing is lost. + if (!this.agentTerminalActive) return + const payload: TerminalOutputPayload = { + sessionId: this.sessionId, + terminalId: 'agent', + data + } + this.socket.emit('agent-terminal:output', payload) + } + + private emitAgentTerminalLocalReplay(): void { + if (!this.agentTerminalLocalBuffer) return + this.socket.emit('agent-terminal:output', { + sessionId: this.sessionId, + terminalId: 'agent', + data: this.agentTerminalLocalBuffer + }) + } + + /** + * Tell the hub to drop its scrollback buffer for this session. Called when a + * fresh agent PTY spawns (e.g. after archive→restart) so a re-subscribing + * viewer replays only the NEW session's screen, not a stale mix of the old + * one's output and its alt-screen-exit. + */ + resetAgentTerminal(): void { + // New PTY → drop the previous screen from both the hub buffer and our + // local copy so neither replays stale output. + this.agentTerminalLocalBuffer = '' + this.socket.emit('agent-terminal:reset', { sessionId: this.sessionId }) + } + + /** + * Register (or clear) the live agent-PTY controls. The PTY launcher calls + * this once the agent is spawned so the agent-terminal viewer can resize / + * repaint it. Passing null (on exit) makes the controls no-ops. + */ + setAgentTerminalControls(controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void } | null): void { + this.agentTerminalResize = controls?.resize ?? null + } + + // Force the agent TUI to repaint its current screen. A plain same-size resize + // is a no-op (the kernel only sends SIGWINCH on an actual size change), so we + // nudge one row smaller then back — a single transient frame, imperceptible — + // which guarantees the TUI redraws the full current screen for a freshly + // (re)subscribed viewer. + private forceAgentTerminalRepaint(): void { + const resize = this.agentTerminalResize + if (!resize) return + const initial = this.lastAgentTerminalSize ?? { cols: 80, rows: 24 } + resize(initial.cols, Math.max(1, initial.rows - 1)) + // Restore to the LATEST known size (a concurrent viewer resize may have + // updated it in the meantime) so the nudge never shrinks the final view. + setTimeout(() => { + const r = this.agentTerminalResize + if (!r) return + const cur = this.lastAgentTerminalSize ?? initial + r(cur.cols, cur.rows) + }, 30) + } + keepAlive( thinking: boolean, mode: 'local' | 'remote', diff --git a/cli/src/claude/__tests__/claudePtyLauncher.test.ts b/cli/src/claude/__tests__/claudePtyLauncher.test.ts new file mode 100644 index 0000000000..0d565bc327 --- /dev/null +++ b/cli/src/claude/__tests__/claudePtyLauncher.test.ts @@ -0,0 +1,250 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +const harness = vi.hoisted(() => ({ + scannerOnMessage: null as ((message: Record) => void) | null, + scannerOpts: null as Record | null, + cleanupCalls: 0, + foundCallbacks: [] as Array<(sessionId: string) => void>, + exitReason: 'exit' as string | null, +})) + +let lastSendKeysSpy = vi.fn() +let ptyOptsCaptured: any = null +vi.mock('../claudePty', () => ({ + claudePty: vi.fn(async (opts: any) => { + ptyOptsCaptured = opts + lastSendKeysSpy = vi.fn() + opts.registerControls?.({ + resize: () => {}, + sendKeys: lastSendKeysSpy + }) + opts.onReady?.() + await opts.nextMessage() + }), +})) + +vi.mock('../utils/sessionScanner', () => ({ + createSessionScanner: async (opts: { onMessage: (message: Record) => void }) => { + harness.scannerOnMessage = opts.onMessage + harness.scannerOpts = opts + return { + cleanup: async () => { harness.cleanupCalls += 1 }, + onNewSession: () => {}, + } + }, +})) + +vi.mock('@/ui/ink/RemoteModeDisplay', () => ({ + RemoteModeDisplay: () => null, +})) + +vi.mock('@/ui/logger', () => ({ + logger: { debug: vi.fn() }, +})) + +let mockAbortHandlers: any = null +vi.mock('@/modules/common/remote/RemoteLauncherBase', () => ({ + RemoteLauncherBase: class { + get exitReason() { return harness.exitReason } + set exitReason(v) { harness.exitReason = v } + protected hasTTY = false + protected messageBuffer = { addMessage: () => {} } + protected ptyAbortController: AbortController | null = null + constructor(_logPath?: string) {} + protected setupAbortHandlers(rpc: any, handlers: any) { + mockAbortHandlers = handlers + } + protected clearAbortHandlers() {} + protected async requestExit(reason: string, handler: Function) { + harness.exitReason = reason + await handler() + } + protected async runRespawnLoop(opts: any): Promise { + const controller = new AbortController() + this.ptyAbortController = controller + await opts.launchOnce(controller.signal) + this.ptyAbortController = null + } + async start(): Promise { + await (this as unknown as { runMainLoop: () => Promise }).runMainLoop() + return harness.exitReason || 'exit' + } + }, +})) + +import { claudePtyLauncher, lastUserPromptText, transcriptConfirmsDelivery } from '../claudePtyLauncher' + +describe('transcriptConfirmsDelivery', () => { + const userLine = (text: string) => JSON.stringify({ type: 'user', message: { content: text } }) + const assistantLine = (text: string) => + JSON.stringify({ type: 'assistant', message: { content: [{ type: 'text', text }] } }) + + it('confirms when the just-submitted message is the last prompt', () => { + const transcript = [userLine('first'), assistantLine('ok'), userLine('continue the task')].join('\n') + expect(transcriptConfirmsDelivery(transcript, 'continue the task')).toBe(true) + }) + + it('does NOT confirm on a stale substring of the prior prompt (resume false-positive guard)', () => { + // Prior turn typed "please continue the task"; on --resume the new message + // "continue" has NOT landed yet, so the last prompt is still the prior one. + // A substring check would wrongly confirm and suppress the re-type. + const transcript = [userLine('please continue the task'), assistantLine('done')].join('\n') + expect(transcriptConfirmsDelivery(transcript, 'continue')).toBe(false) + }) + + it('ignores trailing whitespace differences', () => { + const transcript = userLine('hello world\n') + expect(transcriptConfirmsDelivery(transcript, 'hello world')).toBe(true) + }) + + it('falls back to whole-file match when no user prompt parses', () => { + expect(transcriptConfirmsDelivery('not json\n{"type":"assistant"}', 'assistant')).toBe(true) + expect(transcriptConfirmsDelivery('', 'anything')).toBe(false) + }) +}) + +describe('lastUserPromptText', () => { + const userLine = (text: string) => JSON.stringify({ type: 'user', message: { content: text } }) + const userBlocks = (text: string) => + JSON.stringify({ type: 'user', message: { content: [{ type: 'text', text }] } }) + const assistantLine = (text: string) => + JSON.stringify({ type: 'assistant', message: { content: [{ type: 'text', text }] } }) + const toolResultLine = JSON.stringify({ + type: 'user', + message: { content: [{ type: 'tool_result', content: 'PINGA file output' }] }, + }) + + it('returns the most recent typed prompt, ignoring assistant turns', () => { + const transcript = [userLine('PINGA'), assistantLine('ok'), userLine('PONGB')].join('\n') + expect(lastUserPromptText(transcript)).toBe('PONGB') + }) + + it('does not match stale pre-resume history (the false-positive guard)', () => { + // Replayed history contains PINGA; the just-submitted prompt is PONGB. + const transcript = [userLine('PINGA'), assistantLine('A'), userBlocks('PONGB')].join('\n') + const result = lastUserPromptText(transcript) + expect(result).toBe('PONGB') + // The whole-file substring would have matched PINGA; the anchored check must not. + expect(result?.includes('PINGA')).toBe(false) + }) + + it('skips tool_result user entries (no typed text)', () => { + const transcript = [userLine('PINGA'), toolResultLine].join('\n') + expect(lastUserPromptText(transcript)).toBe('PINGA') + }) + + it('returns null when there is no parseable user prompt', () => { + expect(lastUserPromptText('')).toBeNull() + expect(lastUserPromptText('not json\n{"type":"assistant"}')).toBeNull() + expect(lastUserPromptText(toolResultLine)).toBeNull() + }) +}) + +function createSessionStub() { + const sentMessages: Array> = [] + return { + session: { + sessionId: 'pty-session', + path: '/tmp/pty-test', + startedBy: 'terminal' as const, + startingMode: 'remote' as const, + claudeEnvVars: {}, + claudeArgs: [], + hookSettingsPath: '/tmp/hooks/pty.json', + consumeOneTimeFlags: () => {}, + setKillHandler: (_handler: () => void) => {}, + setConfigChangeHandler: (_handler: (() => void) | null) => {}, + getModel: () => null, + getEffort: () => undefined, + addSessionFoundCallback: (cb: (sessionId: string) => void) => { harness.foundCallbacks.push(cb) }, + removeSessionFoundCallback: () => {}, + queue: { + waitForMessagesAndGetAsString: vi.fn().mockResolvedValue(null) + }, + client: { + sendClaudeSessionMessage: (msg: Record) => { sentMessages.push(msg) }, + sendSessionEvent: () => {}, + emitAgentTerminalOutput: () => {}, + setAgentTerminalControls: () => {}, + rpcHandlerManager: { registerHandler: () => {} }, + }, + }, + sentMessages, + } +} + +describe('claudePtyLauncher structured message forwarding', () => { + afterEach(() => { + harness.scannerOnMessage = null + harness.scannerOpts = null + harness.cleanupCalls = 0 + harness.foundCallbacks = [] + }) + + it('creates the scanner with the session id and working directory', async () => { + const { session } = createSessionStub() + await claudePtyLauncher(session as never) + + expect(harness.scannerOpts).toMatchObject({ + sessionId: 'pty-session', + workingDirectory: '/tmp/pty-test', + }) + }) + + it('registers a session-found callback and cleans up the scanner', async () => { + const { session } = createSessionStub() + await claudePtyLauncher(session as never) + + expect(harness.foundCallbacks).toHaveLength(1) + expect(harness.cleanupCalls).toBe(1) + }) + + it('registers a kill handler so the lifecycle can tear down the PTY on archive', async () => { + const { session } = createSessionStub() + let killHandler: (() => void) | undefined + session.setKillHandler = (h: () => void) => { killHandler = h } + await claudePtyLauncher(session as never) + // onBeforeClose calls session.kill() → this handler → launcher.abort(). + expect(killHandler).toBeTypeOf('function') + }) + + it('filters out summary messages', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'summary', leafUuid: '1' }) + + expect(sentMessages).toHaveLength(0) + }) + + it('filters out invisible system messages', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'system', subtype: 'init', uuid: '1' }) + harness.scannerOnMessage!({ type: 'system', subtype: 'stop_hook_summary', uuid: '2' }) + harness.scannerOnMessage!({ type: 'system', uuid: '3' }) + + expect(sentMessages).toHaveLength(0) + }) + + it('filters out isMeta and isCompactSummary messages', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'user', isMeta: true, uuid: '1' }) + harness.scannerOnMessage!({ type: 'assistant', isCompactSummary: true, uuid: '2' }) + + expect(sentMessages).toHaveLength(0) + }) + + it('forwards normal conversation messages to the hub', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'user', uuid: '1' }) + harness.scannerOnMessage!({ type: 'assistant', uuid: '2' }) + + expect(sentMessages).toHaveLength(2) + }) +}) diff --git a/cli/src/claude/claudePty.ts b/cli/src/claude/claudePty.ts index c5ebd937f0..6841e68028 100644 --- a/cli/src/claude/claudePty.ts +++ b/cli/src/claude/claudePty.ts @@ -17,6 +17,8 @@ export type ClaudePtyOpts = { nextMessage: () => Promise<{ message: string } | null> onReady: () => void onMessage: (data: string) => void + /** Fired after the driver has written a message to the PTY. See runAgentPty. */ + onMessageSubmitted?: (message: string) => void | Promise onThinkingChange?: (thinking: boolean) => void onExit?: (code: number | null) => void registerControls?: (controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void }) => void @@ -87,6 +89,7 @@ export async function claudePty(opts: ClaudePtyOpts): Promise { nextMessage: opts.nextMessage, onReady: opts.onReady, onMessage: opts.onMessage, + onMessageSubmitted: opts.onMessageSubmitted, onThinkingChange: opts.onThinkingChange, onExit: opts.onExit, registerControls: opts.registerControls, diff --git a/cli/src/claude/claudePtyLauncher.ts b/cli/src/claude/claudePtyLauncher.ts new file mode 100644 index 0000000000..296041d208 --- /dev/null +++ b/cli/src/claude/claudePtyLauncher.ts @@ -0,0 +1,480 @@ +import React from "react" +import { Session } from "./session" +import { RemoteModeDisplay } from "@/ui/ink/RemoteModeDisplay" +import { claudePty } from "./claudePty" +import { bracketPasteIfMultiline } from "@/agent/bracketedPaste" +import { createSessionScanner, type SessionScanner } from "./utils/sessionScanner" +import { getProjectPath } from "./utils/path" +import { isClaudeChatVisibleMessage } from "./utils/chatVisibility" +import { isExternalUserMessage } from "@/api/apiSession" +import type { SessionEffort, SessionModel } from "@/api/types" +import { logger } from "@/ui/logger" +import { readFile } from "node:fs/promises" +import { join } from "node:path" +import { + RemoteLauncherBase, + type RemoteLauncherDisplayContext, + type RemoteLauncherExitReason, + type LaunchOutcome +} from "@/modules/common/remote/RemoteLauncherBase" + +// Delay before respawning the PTY after a launch failure, so a persistent +// failure surfaces its error at a steady cadence instead of a tight respawn loop. +const RESPAWN_BACKOFF_MS = 1000 +// Give up after this many consecutive launches that never reached a ready +// prompt. Such failures are deterministic (claude not installed, terminal can't +// attach) and will not recover by respawning — bound them so the session ends +// with a clear error instead of retrying forever. A launch that DOES reach +// ready resets the counter, so genuine mid-session crash recovery stays +// unbounded. +const MAX_IMMEDIATE_LAUNCH_FAILURES = 3 + +// Extract the text of the LAST typed user prompt from a claude transcript +// (JSONL). Tool-result user entries and assistant turns carry no prompt text and +// are skipped, so the result is the most recent thing the human actually typed. +// Returns null when nothing parseable is found (caller falls back). +export function lastUserPromptText(transcript: string): string | null { + let last: string | null = null + for (const line of transcript.split('\n')) { + const trimmed = line.trim() + if (!trimmed) continue + let entry: { type?: string; message?: { content?: unknown } } + try { + entry = JSON.parse(trimmed) + } catch { + continue + } + if (entry.type !== 'user') continue + const content = entry.message?.content + let text: string | null = null + if (typeof content === 'string') { + text = content + } else if (Array.isArray(content)) { + const parts = content + .filter((part): part is { type?: string; text?: string } => + typeof part === 'object' && part !== null) + .filter((part) => part.type === 'text' && typeof part.text === 'string') + .map((part) => part.text as string) + if (parts.length > 0) text = parts.join('') + } + if (text !== null && text.length > 0) last = text + } + return last +} + +// Whether `text` was actually delivered as the latest user prompt in a claude +// transcript. claude writes the user prompt to its JSONL the moment it ingests it +// (before the API call), so a hit confirms delivery. On --resume the file also +// contains the REPLAYED prior conversation, so a plain whole-file substring match +// would false-positive on stale history (e.g. a short "continue") and suppress +// the re-type self-correction. Anchor on the LAST typed user prompt and require +// EQUALITY: only the just-submitted message can be the last prompt, and equality +// (not substring) keeps a new message that is a substring of the prior turn from +// matching stale content. Falls back to a whole-file check only when no user +// prompt parses (e.g. a fresh transcript with nothing to false-match yet). +export function transcriptConfirmsDelivery(transcript: string, text: string): boolean { + const lastPrompt = lastUserPromptText(transcript) + if (lastPrompt !== null) return lastPrompt.trim() === text.trim() + return transcript.includes(text) +} + +class ClaudePtyLauncher extends RemoteLauncherBase { + private readonly session: Session + private scanner: Awaited | null = null + // Claude's own session UUID (discovered via the SessionStart hook). Used to + // --resume the conversation if Claude ever has to be re-spawned (e.g. a crash) + // so the conversation continues with the current model/effort. + private claudeSessionId: string | null = null + // Live PTY controls (raw keystroke injection) for in-place /model and /effort. + private ptyControls: { sendKeys: (data: string) => void } | null = null + // The model/effort currently applied to the running Claude TUI, so a config + // change only drives the slash command for what actually changed. + private appliedModel: SessionModel = null + private appliedEffort: SessionEffort = null + // When set, PTY output is fed here to detect claude's "Switch model?" dialog + // (across chunks, ANSI-stripped) and accept it with Enter. + private confirmWatch: { feed: (chunk: string) => void } | null = null + // Coalesce rapid model+effort changes into a single apply pass. + private configApplyScheduled = false + // True once claude's SessionStart hook has fired for the CURRENT spawn (reset + // each (re)launch). Gates the first message so a --resume that's still + // replaying its transcript doesn't eat the keystrokes (the input box renders + // before the replay redraw completes; typing then is lost). See waitForSessionStart. + private sessionStartSeen = false + private sessionStartResolvers: Array<() => void> = [] + + protected getCurrentSessionId(): string | null { + return this.session.sessionId + } + + private sleep(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)) } + + // Apply a mid-session model/effort change to the LIVE claude TUI via its + // /model and /effort slash commands — no re-spawn, so the conversation and + // scrollback are preserved. claude's /model pops a "Switch model?" dialog + // (default = Yes); we accept it with Enter. + private scheduleConfigApply(): void { + if (this.configApplyScheduled) return + this.configApplyScheduled = true + setTimeout(() => { this.configApplyScheduled = false; void this.applyConfigChange() }, 120) + } + + private async applyConfigChange(): Promise { + const controls = this.ptyControls + if (!controls) return + const model = this.session.getModel() + const effort = this.session.getEffort() + if (model !== this.appliedModel) { + this.appliedModel = model + if (model) { + logger.debug(`[pty]: applying model change via /model ${model}`) + controls.sendKeys(`/model ${model}\r`) + await this.confirmModelDialog() + } + } + if (effort !== this.appliedEffort) { + this.appliedEffort = effort + if (effort) { + logger.debug(`[pty]: applying effort change via /effort ${effort}`) + controls.sendKeys(`/effort ${effort}\r`) + await this.sleep(300) + } + } + } + + private confirmModelDialog(timeoutMs = 3500): Promise { + return new Promise((resolve) => { + let settled = false + let buf = '' + // Match the dialog across chunks, with ANSI escapes stripped (the TUI + // interleaves color codes between words, so a raw regex misses it). + const marker = /yes,\s*switch|switch model|no,\s*go back/i + const finish = () => { if (settled) return; settled = true; this.confirmWatch = null; resolve() } + const timer = setTimeout(finish, timeoutMs) + this.confirmWatch = { + feed: (chunk: string) => { + buf = (buf + chunk.replace(/\x1b\[[0-9;?]*[a-zA-Z]/g, '')).slice(-2000) + if (marker.test(buf)) { + clearTimeout(timer) + // Default-highlighted option is "Yes, switch" — Enter accepts. + setTimeout(() => this.ptyControls?.sendKeys('\r'), 200) + finish() + } + } + } + }) + } + + // Re-derive Claude's spawn args each (re)launch: --model/--effort/--resume are + // dynamic (the model/effort can change mid-session, and a re-spawn must resume + // the existing conversation), so strip any stale copies from the base args and + // append the current values. + private buildSpawnArgs(): string[] { + const DYNAMIC = new Set(['--model', '--effort', '--resume']) + const base: string[] = [] + const args = this.session.claudeArgs ?? [] + // Preserve a HAPI-resume uuid passed in the initial args (first spawn, + // before the SessionStart hook has reported Claude's own id). + let resumeFromArgs: string | null = null + for (let i = 0; i < args.length; i++) { + if (DYNAMIC.has(args[i])) { + const hasValue = i + 1 < args.length && !args[i + 1].startsWith('-') + if (args[i] === '--resume' && hasValue) resumeFromArgs = args[i + 1] + if (hasValue) i++ + continue + } + base.push(args[i]) + } + const resumeId = this.claudeSessionId ?? resumeFromArgs + const model = this.session.getModel() + const effort = this.session.getEffort() + return [ + ...base, + ...(resumeId ? ['--resume', resumeId] : []), + ...(model ? ['--model', model] : []), + ...(effort ? ['--effort', effort] : []), + ] + } + + // The claude session id passed via `--resume ` in the initial args (set by + // the runner when reopening/resuming an existing conversation). Used to seed the + // scanner with the already-forwarded transcript so resume doesn't re-emit the + // prior turns (the new runner has a fresh scanner with no memory of what the + // previous lifetime already sent). + private resumeIdFromArgs(): string | null { + const args = this.session.claudeArgs ?? [] + for (let i = 0; i < args.length; i++) { + if (args[i] === '--resume' && i + 1 < args.length && !args[i + 1].startsWith('-')) { + return args[i + 1] + } + } + return null + } + + // Resolve once claude's SessionStart hook fires for the current spawn (or after + // `timeoutMs` as a fallback so a missed hook never hangs the message loop). + private waitForSessionStart(timeoutMs: number): Promise { + if (this.sessionStartSeen) return Promise.resolve() + return new Promise((resolve) => { + const wrapped = () => { clearTimeout(timer); resolve() } + const timer = setTimeout(() => { + this.sessionStartResolvers = this.sessionStartResolvers.filter((r) => r !== wrapped) + logger.debug('[pty]: SessionStart hook gate timed out; proceeding with first message') + resolve() + }, timeoutMs) + this.sessionStartResolvers.push(wrapped) + }) + } + + private markSessionStartSeen(): void { + this.sessionStartSeen = true + const resolvers = this.sessionStartResolvers.splice(0) + for (const r of resolvers) r() + } + + // Path of the live claude transcript (used to confirm a submitted message was + // actually ingested). Resolves against the REAL ~/.claude (not the isolated + // CLAUDE_CONFIG_DIR), mirroring the scanner. + private transcriptPath(): string | null { + if (!this.claudeSessionId) return null + return join(getProjectPath(this.session.path), `${this.claudeSessionId}.jsonl`) + } + + private async transcriptHasText(text: string): Promise { + const path = this.transcriptPath() + if (!path) return false + try { + return transcriptConfirmsDelivery(await readFile(path, 'utf-8'), text) + } catch { + return false + } + } + + // Self-correcting delivery for the FIRST message after a (re)spawn. The driver + // submits it right after nextMessage returns, but a claude --resume that's still + // painting its replayed conversation can swallow those keystrokes (the input box + // renders, then a late redraw wipes the typed text) — the message never reaches + // claude and no response ever comes. Confirm the prompt landed in the transcript + // and re-type it if not. Guarded by claudeSessionId so we never blindly re-send + // when we can't verify. + private async ensureFirstMessageDelivered(text: string, signal: AbortSignal): Promise { + if (!this.claudeSessionId) return + const trimmed = text.trim() + if (!trimmed) return + for (let attempt = 0; attempt < 3; attempt++) { + const deadline = Date.now() + 5000 + while (Date.now() < deadline) { + if (signal.aborted || !!this.exitReason) return + if (await this.transcriptHasText(trimmed)) return + await this.sleep(500) + } + if (signal.aborted || !!this.exitReason || !this.ptyControls) return + logger.debug(`[pty]: first message not in transcript after submit; re-typing (attempt ${attempt + 1})`) + // Match the driver's submit path: a multiline first message must be + // bracketed-pasted on repair too, otherwise the re-typed newlines act + // as Enter and Claude receives split prompts instead of the message. + this.ptyControls.sendKeys(bracketPasteIfMultiline(trimmed)) + await this.sleep(200) + this.ptyControls.sendKeys('\r') + } + } + + constructor(session: Session) { + super(process.env.DEBUG ? session.logPath : undefined) + this.session = session + // Let the runner lifecycle (onBeforeClose) tear down the PTY gracefully + // on archive/SIGTERM: aborting the controller triggers runAgentPty's + // synchronous manager.kill(), so the child dies before process.exit. + session.setKillHandler(() => { void this.abort() }) + } + + protected createDisplay(context: RemoteLauncherDisplayContext): React.ReactElement { + return React.createElement(RemoteModeDisplay, context) + } + + private async abort(): Promise { + if (this.ptyAbortController && !this.ptyAbortController.signal.aborted) { + this.ptyAbortController.abort() + } + } + + private async handleAbortRequest(): Promise { + logger.debug('[pty]: handleAbortRequest') + await this.abort() + } + + private async handleSwitchRequest(): Promise { + logger.debug('[pty]: doSwitch') + await this.requestExit('switch', async () => { + await this.abort() + }) + } + + private async handleExitFromUi(): Promise { + logger.debug('[pty]: Exiting via Ctrl-C') + await this.requestExit('exit', async () => { + await this.abort() + }) + } + + private async handleSwitchFromUi(): Promise { + logger.debug('[pty]: Switching to local mode via double space') + await this.handleSwitchRequest() + } + + public async launch(): Promise { + return this.start({ + onExit: () => this.handleExitFromUi(), + onSwitchToLocal: () => this.handleSwitchFromUi() + }) + } + + protected async launchOnce(signal: AbortSignal): Promise { + let reachedReady = false + let gatedFirstMessage = false + let firstSubmitVerified = false + try { + await claudePty({ + sessionId: this.session.sessionId, + path: this.session.path, + claudeEnvVars: this.session.claudeEnvVars, + claudeArgs: this.buildSpawnArgs(), + hookSettingsPath: this.session.hookSettingsPath, + signal, + nextMessage: async () => { + const msg = await this.session.queue.waitForMessagesAndGetAsString(signal) + if (!msg) return null + if (!gatedFirstMessage) { + gatedFirstMessage = true + await this.waitForSessionStart(15000) + if (signal.aborted) return null + } + this.scanner?.markActive() + if (/^\/model\s+\S/i.test(msg.message.trim())) { + void this.confirmModelDialog(6000) + } + return { message: msg.message } + }, + onMessageSubmitted: (message: string) => { + if (firstSubmitVerified) return + firstSubmitVerified = true + void this.ensureFirstMessageDelivered(message, signal) + }, + onReady: () => { + reachedReady = true + logger.debug('[pty]: claude PTY ready') + this.session.client.sendSessionEvent({ type: 'ready' }) + }, + onMessage: (data: string) => { + if (process.env.DEBUG_PTY) { + logger.debug(`[pty:onMessage] received ${data.length} bytes: ${data.slice(0, 80)}`) + } + if (this.confirmWatch) this.confirmWatch.feed(data) + this.session.client.emitAgentTerminalOutput(data) + }, + onThinkingChange: (thinking: boolean) => { + this.session.onThinkingChange(thinking) + }, + registerControls: (controls) => { + this.ptyControls = controls + this.session.client.resetAgentTerminal() + this.session.client.setAgentTerminalControls(controls) + }, + onExit: (code: number | null) => { + logger.debug(`[pty]: claude PTY exited with code ${code}`) + this.ptyControls = null + this.session.client.sendSessionEvent({ + type: 'message', + message: `Process exited with code ${code}` + }) + }, + }) + + this.session.consumeOneTimeFlags() + + if (!this.exitReason && signal.aborted) { + this.session.client.sendSessionEvent({ type: 'message', message: 'Aborted by user' }) + } + + return { reachedReady } + } catch (e) { + return { reachedReady, error: e instanceof Error ? e : new Error(String(e)) } + } + } + + protected async runMainLoop(): Promise { + logger.debug('[claudePtyLauncher] Starting PTY launcher') + logger.debug(`[claudePtyLauncher] TTY available: ${this.hasTTY}`) + + const session = this.session + const messageBuffer = this.messageBuffer + + this.setupAbortHandlers(session.client.rpcHandlerManager, { + onAbort: () => this.handleAbortRequest(), + onSwitch: () => this.handleSwitchRequest() + }) + + const resumeId = this.resumeIdFromArgs() + if (resumeId) this.claudeSessionId = resumeId + this.scanner = await createSessionScanner({ + sessionId: resumeId ?? session.sessionId, + workingDirectory: session.path, + onMessage: (message) => { + if (message.type === 'summary') return + if (message.isMeta || message.isCompactSummary) return + if (!isClaudeChatVisibleMessage(message)) return + if (isExternalUserMessage(message)) return + session.client.sendClaudeSessionMessage(message) + } + }) + const handleSessionFound = (sessionId: string) => { + this.claudeSessionId = sessionId + this.markSessionStartSeen() + this.scanner?.onNewSession(sessionId) + } + session.addSessionFoundCallback(handleSessionFound) + + this.appliedModel = session.getModel() + this.appliedEffort = session.getEffort() + + session.setConfigChangeHandler(() => this.scheduleConfigApply()) + + try { + await this.runRespawnLoop({ + maxImmediateFailures: MAX_IMMEDIATE_LAUNCH_FAILURES, + respawnBackoffMs: RESPAWN_BACKOFF_MS, + onLaunchStart: (isNewSession) => { + messageBuffer.addMessage('═'.repeat(40), 'status') + if (isNewSession) { + messageBuffer.addMessage('Starting new Claude PTY session...', 'status') + } else { + messageBuffer.addMessage('Continuing Claude PTY session...', 'status') + } + }, + launchOnce: (sig) => this.launchOnce(sig), + onLaunchFailure: (err) => { + session.client.sendSessionEvent({ type: 'message', message: err.message }) + } + }) + } finally { + session.setConfigChangeHandler(null) + session.client.setAgentTerminalControls(null) + session.removeSessionFoundCallback(handleSessionFound) + if (this.scanner) { + await this.scanner.cleanup() + this.scanner = null + } + logger.debug('[pty]: main loop ended') + } + } + + protected async cleanup(): Promise { + this.clearAbortHandlers(this.session.client.rpcHandlerManager) + logger.debug('[pty]: cleanup done') + } +} + +export async function claudePtyLauncher(session: Session): Promise<'switch' | 'exit'> { + const launcher = new ClaudePtyLauncher(session) + return launcher.launch() +} diff --git a/cli/src/claude/loop.ts b/cli/src/claude/loop.ts index a7846171bd..89d251777a 100644 --- a/cli/src/claude/loop.ts +++ b/cli/src/claude/loop.ts @@ -1,10 +1,11 @@ import { ApiSessionClient } from "@/api/apiSession" import { MessageQueue2 } from "@/utils/MessageQueue2" import { logger } from "@/ui/logger" -import { runLocalRemoteSession } from "@/agent/loopBase" +import { runLocalRemoteSession, type SessionMode } from "@/agent/loopBase" import { Session } from "./session" import { claudeLocalLauncher } from "./claudeLocalLauncher" import { claudeRemoteLauncher } from "./claudeRemoteLauncher" +import { claudePtyLauncher } from "./claudePtyLauncher" import { ApiClient } from "@/lib" import type { SessionEffort, SessionModel } from "@/api/types" import type { ClaudePermissionMode } from "@hapi/protocol/types" @@ -27,7 +28,7 @@ interface LoopOptions { model?: SessionModel effort?: SessionEffort permissionMode?: PermissionMode - startingMode?: 'local' | 'remote' + startingMode?: 'local' | 'remote' | 'pty' startedBy?: 'runner' | 'terminal' onModeChange: (mode: 'local' | 'remote') => void mcpServers: Record @@ -48,6 +49,7 @@ export async function loop(opts: LoopOptions) { const logPath = logger.logFilePath; const startedBy = opts.startedBy ?? 'terminal'; const startingMode = opts.startingMode ?? 'local'; + const sessionMode: 'local' | 'remote' = startingMode === 'pty' ? 'remote' : startingMode; const session = new Session({ api: opts.api, client: opts.session, @@ -60,9 +62,9 @@ export async function loop(opts: LoopOptions) { messageQueue: opts.messageQueue, allowedTools: opts.allowedTools, onModeChange: opts.onModeChange, - mode: startingMode, + mode: sessionMode, startedBy, - startingMode, + startingMode: sessionMode, hookSettingsPath: opts.hookSettingsPath, permissionMode: opts.permissionMode ?? 'default', model: opts.model, @@ -75,6 +77,7 @@ export async function loop(opts: LoopOptions) { logTag: 'loop', runLocal: claudeLocalLauncher, runRemote: claudeRemoteLauncher, + runPty: claudePtyLauncher, onSessionReady: opts.onSessionReady }); } diff --git a/cli/src/claude/runClaude.ts b/cli/src/claude/runClaude.ts index 83df7ee367..e971bf511f 100644 --- a/cli/src/claude/runClaude.ts +++ b/cli/src/claude/runClaude.ts @@ -1,3 +1,4 @@ +import { randomUUID } from 'node:crypto'; import { logger } from '@/ui/logger'; import { loop } from '@/claude/loop'; import type { SessionMode } from '@/agent/loopBase'; @@ -10,6 +11,7 @@ import { parseSpecialCommand } from '@/parsers/specialCommands'; import { getEnvironmentInfo } from '@/ui/doctor'; import { startHappyServer } from '@/claude/utils/startHappyServer'; import { startHookServer } from '@/claude/utils/startHookServer'; +import { PtyPermissionHandler } from '@/claude/utils/ptyPermissionHandler'; import { generateHookSettingsFile, cleanupHookSettingsFile } from '@/modules/common/hooks/generateHookSettings'; import { registerKillSessionHandler } from './registerKillSessionHandler'; import type { Session } from './session'; @@ -22,6 +24,7 @@ import { PermissionModeSchema } from '@hapi/protocol/schemas'; import { formatMessageWithAttachments } from '@/utils/attachmentFormatter'; import { normalizeClaudeSessionModel } from './model'; import { normalizeClaudeSessionEffort } from './effort'; +import { computeBackSyncedPermissionMode } from './utils/backSyncPermissionMode'; import { getInvokedCwd } from '@/utils/invokedCwd'; export interface StartOptions { @@ -107,6 +110,12 @@ export async function runClaude(options: StartOptions = {}): Promise { // Variable to track current session instance (updated via onSessionReady callback) const currentSessionRef: { current: Session | null } = { current: null }; + // PTY mode has no SDK canUseTool callback, so tool approvals are bridged from + // a PreToolUse hook to the web via this handler (assigned below once the + // permission-mode state exists). Null in SDK/local/remote modes. + const isPtyMode = interactive; + let ptyPermissionHandler: PtyPermissionHandler | null = null; + const formatFailureReason = (message: string): string => { const maxLength = 200; if (message.length <= maxLength) { @@ -128,13 +137,36 @@ export async function runClaude(options: StartOptions = {}): Promise { currentSession.onSessionFound(sessionId); } } + }, + // PTY-mode tool-approval bridge. Resolves once the user answers in the + // web modal (may take minutes). Allows by default if the handler isn't + // up yet (should not happen in PTY mode). + onPreToolUse: async (data) => { + if (!ptyPermissionHandler) { + return { permissionDecision: 'allow' }; + } + // Reverse-sync: if the user changed claude's mode in the terminal + // (Shift+Tab cycles auto → acceptEdits → plan), claude reports it in + // the hook payload. Adopt it so the Chat UI / handler stay consistent. + // yolo (bypassPermissions) is hapi-only and is never overwritten here. + const syncedMode = computeBackSyncedPermissionMode(currentPermissionMode, data.permission_mode); + if (syncedMode) { + logger.debug(`[pty] adopting claude TUI permission mode: ${currentPermissionMode} → ${syncedMode}`); + currentPermissionMode = syncedMode; + syncSessionModes(); + } + const toolUseId = data.tool_use_id || `${data.tool_name ?? 'tool'}-${data.session_id ?? ''}`; + return ptyPermissionHandler.requestDecision(toolUseId, data.tool_name ?? '', data.tool_input); } }); logger.debug(`[START] Hook server started on port ${hookServer.port}`); const hookSettingsPath = generateHookSettingsFile(hookServer.port, hookServer.token, { filenamePrefix: 'session-hook', - logLabel: 'generateHookSettings' + logLabel: 'generateHookSettings', + // PTY sessions rely on the PreToolUse hook for approvals; the SDK path + // must NOT register it (it uses canUseTool instead). + includePreToolUse: isPtyMode }); logger.debug(`[START] Generated hook settings file: ${hookSettingsPath}`); @@ -147,7 +179,13 @@ export async function runClaude(options: StartOptions = {}): Promise { session, logTag: 'claude', stopKeepAlive: () => currentSessionRef.current?.stopKeepAlive(), + // Tear down the PTY before process.exit. For PTY mode + // the launcher registers a kill handler that aborts the controller → + // runAgentPty's manager.kill() runs synchronously. No-op in local/remote + // mode where no handler is registered. + onBeforeClose: () => { currentSessionRef.current?.kill(); }, onAfterClose: () => { + ptyPermissionHandler?.cancelAll('Session ended'); happyServer.stop(); hookServer.stop(); cleanupHookSettingsFile(hookSettingsPath, 'generateHookSettings'); @@ -202,6 +240,23 @@ export async function runClaude(options: StartOptions = {}): Promise { sessionInstance.setEffort(currentEffort); logger.debug(`[loop] Synced session config for keepalive: permissionMode=${currentPermissionMode}, model=${currentModel ?? 'auto'}, effort=${currentEffort ?? 'auto'}`); }; + + // Bring up the PTY tool-approval bridge now that the permission-mode state + // exists. It reads the live mode (web dropdown can change it mid-session) and + // routes any "approve & switch mode" choice back into that same state. + if (isPtyMode) { + ptyPermissionHandler = new PtyPermissionHandler(session, { + getPermissionMode: () => currentPermissionMode, + onModeChange: (mode) => { + if (!isPermissionModeAllowedForFlavor(mode, 'claude')) { + return; + } + currentPermissionMode = mode as PermissionMode; + currentSessionRef.current?.setPermissionMode(mode as PermissionMode); + syncSessionModes(); + } + }); + } session.onUserMessage((message, localId) => { const sessionPermissionMode = currentSessionRef.current?.getPermissionMode(); if (sessionPermissionMode && isPermissionModeAllowedForFlavor(sessionPermissionMode, 'claude')) { diff --git a/cli/src/claude/session.ts b/cli/src/claude/session.ts index 2106d312fa..87a080a0e3 100644 --- a/cli/src/claude/session.ts +++ b/cli/src/claude/session.ts @@ -85,14 +85,35 @@ export class Session extends AgentSessionBase { return this.permissionMode as PermissionMode | undefined; } + // Fired when the model or effort actually changes mid-session. The PTY + // launcher uses this to re-spawn Claude with --resume + the new --model / + // --effort (the interactive CLI fixes its model at spawn, so a live change + // can only take effect on a fresh, conversation-preserving re-spawn). + private configChangeHandler: (() => void) | null = null; + setConfigChangeHandler = (handler: (() => void) | null): void => { + this.configChangeHandler = handler; + }; + setModel = (model: SessionModel): void => { + if (model === this.model) return; this.model = model; + this.configChangeHandler?.(); }; setEffort = (effort: SessionEffort): void => { + if (effort === this.effort) return; this.effort = effort; + this.configChangeHandler?.(); }; + getModel(): SessionModel { + return this.model ?? null; + } + + getEffort(): SessionEffort { + return this.effort ?? null; + } + recordLocalLaunchFailure = (message: string, exitReason: LocalLaunchExitReason): void => { this.localLaunchFailure = { message, exitReason }; }; diff --git a/cli/src/claude/utils/backSyncPermissionMode.test.ts b/cli/src/claude/utils/backSyncPermissionMode.test.ts new file mode 100644 index 0000000000..b79ca1db33 --- /dev/null +++ b/cli/src/claude/utils/backSyncPermissionMode.test.ts @@ -0,0 +1,34 @@ +import { describe, it, expect } from 'vitest' +import { computeBackSyncedPermissionMode } from './backSyncPermissionMode' + +describe('computeBackSyncedPermissionMode', () => { + it('adopts the mode claude reports (e.g. user pressed Shift+Tab in the TUI)', () => { + expect(computeBackSyncedPermissionMode('default', 'acceptEdits')).toBe('acceptEdits') + expect(computeBackSyncedPermissionMode('acceptEdits', 'plan')).toBe('plan') + // claude reports its default mode as "auto" — a valid hapi mode, taken as-is + expect(computeBackSyncedPermissionMode('default', 'auto')).toBe('auto') + }) + + it('returns null when nothing changed (no redundant sync)', () => { + expect(computeBackSyncedPermissionMode('acceptEdits', 'acceptEdits')).toBeNull() + expect(computeBackSyncedPermissionMode('auto', 'auto')).toBeNull() + }) + + it('keeps yolo (bypassPermissions) hapi-only: claude mode never clobbers it', () => { + // claude can't represent bypassPermissions (not in its Shift+Tab cycle), + // so its reported mode must not pull a yolo session out of yolo. + expect(computeBackSyncedPermissionMode('bypassPermissions', 'auto')).toBeNull() + expect(computeBackSyncedPermissionMode('bypassPermissions', 'acceptEdits')).toBeNull() + expect(computeBackSyncedPermissionMode('bypassPermissions', 'plan')).toBeNull() + }) + + it('never lets an inbound hook flip us INTO bypassPermissions', () => { + expect(computeBackSyncedPermissionMode('default', 'bypassPermissions')).toBeNull() + }) + + it('ignores missing / invalid claude modes', () => { + expect(computeBackSyncedPermissionMode('default', undefined)).toBeNull() + expect(computeBackSyncedPermissionMode('default', '')).toBeNull() + expect(computeBackSyncedPermissionMode('default', 'nonsense')).toBeNull() + }) +}) diff --git a/cli/src/claude/utils/backSyncPermissionMode.ts b/cli/src/claude/utils/backSyncPermissionMode.ts new file mode 100644 index 0000000000..66f82d1956 --- /dev/null +++ b/cli/src/claude/utils/backSyncPermissionMode.ts @@ -0,0 +1,40 @@ +import type { ClaudePermissionMode } from '@hapi/protocol/types' +import { CLAUDE_PERMISSION_MODES } from '@hapi/protocol/modes' + +/** + * Reverse-sync of the PTY permission mode: when the user changes claude's mode + * directly in the terminal (Shift+Tab cycles auto → acceptEdits → plan), claude + * reports the new mode in every PreToolUse hook payload. This maps that reported + * mode back to the hapi session mode so the Chat UI (the primary control/display + * channel) stays consistent with the terminal. + * + * Returns the new mode to apply, or `null` when nothing should change. + * + * yolo (`bypassPermissions`) is deliberately hapi-only: claude can't represent + * it (it isn't in claude's Shift+Tab cycle), so a yolo session must NOT be pulled + * out of yolo by claude's reported mode. yolo is set/cleared only from the Chat UI. + * + * @param current the hapi session's current permission mode + * @param claudeMode the `permission_mode` claude reported in the hook payload + */ +export function computeBackSyncedPermissionMode( + current: ClaudePermissionMode, + claudeMode: string | undefined +): ClaudePermissionMode | null { + if (!claudeMode) { + return null + } + // Don't let claude's (non-yolo) mode clobber a yolo session. + if (current === 'bypassPermissions') { + return null + } + if (!(CLAUDE_PERMISSION_MODES as readonly string[]).includes(claudeMode)) { + return null + } + const next = claudeMode as ClaudePermissionMode + // Guard: an inbound hook must never flip us into yolo. + if (next === 'bypassPermissions') { + return null + } + return next === current ? null : next +} diff --git a/cli/src/claude/utils/claudePermissionPolicy.test.ts b/cli/src/claude/utils/claudePermissionPolicy.test.ts new file mode 100644 index 0000000000..dc48a17cb8 --- /dev/null +++ b/cli/src/claude/utils/claudePermissionPolicy.test.ts @@ -0,0 +1,36 @@ +import { describe, it, expect } from 'vitest' +import { resolveClaudeModePolicy } from './claudePermissionPolicy' + +describe('resolveClaudeModePolicy', () => { + it('routes question tools to the web regardless of mode', () => { + for (const mode of ['default', 'bypassPermissions', 'acceptEdits', 'plan'] as const) { + expect(resolveClaudeModePolicy(mode, 'AskUserQuestion')).toBe('web') + expect(resolveClaudeModePolicy(mode, 'ask_user_question')).toBe('web') + expect(resolveClaudeModePolicy(mode, 'request_user_input')).toBe('web') + } + }) + + it('auto-allows everything except question tools under bypassPermissions', () => { + expect(resolveClaudeModePolicy('bypassPermissions', 'Bash')).toBe('allow') + expect(resolveClaudeModePolicy('bypassPermissions', 'Edit')).toBe('allow') + expect(resolveClaudeModePolicy('bypassPermissions', 'Read')).toBe('allow') + }) + + it('auto-allows edit tools under acceptEdits', () => { + expect(resolveClaudeModePolicy('acceptEdits', 'Edit')).toBe('allow') + expect(resolveClaudeModePolicy('acceptEdits', 'Write')).toBe('allow') + expect(resolveClaudeModePolicy('acceptEdits', 'MultiEdit')).toBe('allow') + expect(resolveClaudeModePolicy('acceptEdits', 'NotebookEdit')).toBe('allow') + }) + + it('falls through for non-edit tools under acceptEdits', () => { + expect(resolveClaudeModePolicy('acceptEdits', 'Bash')).toBe('fallthrough') + expect(resolveClaudeModePolicy('acceptEdits', 'Read')).toBe('fallthrough') + }) + + it('falls through in default mode and for undefined mode', () => { + expect(resolveClaudeModePolicy('default', 'Bash')).toBe('fallthrough') + expect(resolveClaudeModePolicy('default', 'Edit')).toBe('fallthrough') + expect(resolveClaudeModePolicy(undefined, 'Bash')).toBe('fallthrough') + }) +}) diff --git a/cli/src/claude/utils/claudePermissionPolicy.ts b/cli/src/claude/utils/claudePermissionPolicy.ts new file mode 100644 index 0000000000..b1b1da4976 --- /dev/null +++ b/cli/src/claude/utils/claudePermissionPolicy.ts @@ -0,0 +1,54 @@ +import type { PermissionMode } from '@hapi/protocol/types' +import { getToolDescriptor } from './getToolDescriptor' +import { isQuestionToolName } from './questionAnswerInput' + +/** + * Outcome of the shared, mode-based claude permission policy. + * - `allow` — auto-approve without asking the user. + * - `web` — must be surfaced in the web UI (never auto-approved). + * - `fallthrough` — no mode-based decision; the caller applies its own + * remaining rules (session allow-lists, read-only tools, + * the approval modal, ...). + */ +export type ClaudeModePolicy = 'allow' | 'web' | 'fallthrough' + +/** + * The mode-based slice of claude's permission decision for the PTY PreToolUse + * hook path. Modelled on the SDK `canCallTool` handler (permissionHandler.ts), + * which keeps its own inline copy of these rules — adopting this helper there is + * a separate change, out of scope for the PTY work — so any rule change here + * should be mirrored in permissionHandler.ts. + * + * Rules, in order: + * 1. Question tools (AskUserQuestion / request_user_input) ALWAYS go to the + * web, in every mode. Auto-allowing them would make the SDK stall or the + * PTY render its interactive selector only — the question would never + * reach the chat. + * 2. bypassPermissions (the --yolo mapping) auto-allows everything else. + * 3. acceptEdits auto-allows edit tools (Edit/Write/MultiEdit/NotebookEdit). + * + * Everything else is `fallthrough`: the caller decides (default mode, plan + * mode, session allow-lists, etc.). + * + * Known divergence from the SDK: under bypassPermissions the SDK special-cases + * `exit_plan_mode` (injects PLAN_FAKE_RESTART and denies, so the SDK turn + * continues past the plan). This helper returns `allow` for it instead — in PTY + * mode claude drives its own plan exit interactively, so the SDK's queue- + * injection trick doesn't apply. Callers that need that behaviour must handle + * `exit_plan_mode` before consulting this helper (the SDK path still does). + */ +export function resolveClaudeModePolicy( + mode: PermissionMode | undefined, + toolName: string +): ClaudeModePolicy { + if (isQuestionToolName(toolName)) { + return 'web' + } + if (mode === 'bypassPermissions') { + return 'allow' + } + if (mode === 'acceptEdits' && getToolDescriptor(toolName).edit) { + return 'allow' + } + return 'fallthrough' +} diff --git a/cli/src/claude/utils/ptyPermissionHandler.test.ts b/cli/src/claude/utils/ptyPermissionHandler.test.ts new file mode 100644 index 0000000000..70d56a942a --- /dev/null +++ b/cli/src/claude/utils/ptyPermissionHandler.test.ts @@ -0,0 +1,249 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { PermissionMode } from '@hapi/protocol/types'; +import { PtyPermissionHandler } from './ptyPermissionHandler'; +import type { PermissionHandlerClient } from '@/modules/common/permission/BasePermissionHandler'; +import { RPC_METHODS } from '@hapi/protocol/rpcMethods'; + +type PermissionRpcHandler = (response: { + id: string; + approved: boolean; + reason?: string; + mode?: PermissionMode; + allowTools?: string[]; + answers?: Record | Record; +}) => Promise | void; + +function createFakeClient() { + let permissionHandler: PermissionRpcHandler | null = null; + const state: { requests: Record; completedRequests: Record } = { + requests: {}, + completedRequests: {} + }; + + const client: PermissionHandlerClient = { + rpcHandlerManager: { + registerHandler: vi.fn((method: string, handler: unknown) => { + if (method === RPC_METHODS.Permission) { + permissionHandler = handler as PermissionRpcHandler; + } + }) + }, + updateAgentState: vi.fn((handler: (s: any) => any) => { + Object.assign(state, handler(state)); + }) + }; + + return { + client, + state, + respond: (response: Parameters[0]) => { + if (!permissionHandler) throw new Error('Permission RPC handler not registered'); + return permissionHandler(response); + } + }; +} + +describe('PtyPermissionHandler', () => { + it('auto-allows pure read-only tools without a web round trip', async () => { + const { client, state } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + for (const tool of ['Read', 'Glob', 'Grep', 'LS', 'NotebookRead', 'TodoWrite']) { + const decision = await handler.requestDecision(`id-${tool}`, tool, {}); + expect(decision.permissionDecision).toBe('allow'); + } + // never surfaced a request to the web + expect(Object.keys(state.requests)).toHaveLength(0); + }); + + it('routes AskUserQuestion to the web and injects the picked answers via updatedInput', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const input = { questions: [{ question: 'Pick a color?', header: 'Color' }] }; + const pending = handler.requestDecision('q1', 'AskUserQuestion', input); + // surfaced in agent state so the web shows the question card + expect(state.requests['q1']).toMatchObject({ tool: 'AskUserQuestion' }); + + await respond({ id: 'q1', approved: true, answers: { '0': ['Blue'] } }); + const decision = await pending; + expect(decision.permissionDecision).toBe('allow'); + // claude's AskUserQuestion expects answers keyed by question text + expect(decision.updatedInput).toMatchObject({ answers: { 'Pick a color?': 'Blue' } }); + }); + + it('under bypassPermissions (--yolo), auto-allows permission tools but still forwards question tools to the web', async () => { + const { client, state } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'bypassPermissions' }); + + // A permission-gated tool is auto-allowed — yolo semantics are preserved. + const bash = await handler.requestDecision('b1', 'Bash', { command: 'ls' }); + expect(bash.permissionDecision).toBe('allow'); + expect(state.requests['b1']).toBeUndefined(); + + // AskUserQuestion must NOT be auto-allowed even under bypassPermissions: + // it has to surface in the web so the question reaches the chat instead + // of rendering only in the PTY's interactive selector. + handler.requestDecision('q-yolo', 'AskUserQuestion', { questions: [{ question: 'Web or CLI?', header: 'Form' }] }); + expect(state.requests['q-yolo']).toMatchObject({ tool: 'AskUserQuestion' }); + + // request_user_input is handled the same way. + handler.requestDecision('r-yolo', 'request_user_input', { prompt: 'Anything else?' }); + expect(state.requests['r-yolo']).toMatchObject({ tool: 'request_user_input' }); + }); + + it('mirrors the SDK under acceptEdits: auto-allows edit tools, asks for the rest', async () => { + const { client, state } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'acceptEdits' }); + + // Edit tools are auto-allowed, matching the SDK canCallTool path. + for (const tool of ['Edit', 'Write', 'MultiEdit', 'NotebookEdit']) { + const dec = await handler.requestDecision(`e-${tool}`, tool, { file_path: '/x' }); + expect(dec.permissionDecision).toBe('allow'); + expect(state.requests[`e-${tool}`]).toBeUndefined(); + } + + // A non-edit tool still goes to the web modal under acceptEdits. + handler.requestDecision('b1', 'Bash', { command: 'ls' }); + expect(state.requests['b1']).toMatchObject({ tool: 'Bash' }); + }); + + it('denies AskUserQuestion when no answers are provided', async () => { + const { client, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('q2', 'AskUserQuestion', { questions: [{ question: 'X?' }] }); + await respond({ id: 'q2', approved: true, answers: {} }); + const decision = await pending; + expect(decision.permissionDecision).toBe('deny'); + }); + + it('denies AskUserQuestion when answers cannot be mapped to questions (never stalls)', async () => { + const { client, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + // Web sends a non-empty answer, but the index doesn't line up with any + // question text, so the claude-shaped map comes out empty. Allowing here + // would make claude echo "answered: ." and lock the turn — deny instead. + const pending = handler.requestDecision('q3', 'AskUserQuestion', { questions: [{ question: 'X?' }] }); + await respond({ id: 'q3', approved: true, answers: { '5': ['Stray'] } }); + const decision = await pending; + expect(decision.permissionDecision).toBe('deny'); + expect(decision.updatedInput).toBeUndefined(); + }); + + it('auto-allows everything in bypassPermissions (the --yolo mapping)', async () => { + const { client, state } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'bypassPermissions' }); + + const decision = await handler.requestDecision('b1', 'Bash', { command: 'rm -rf /tmp/x' }); + expect(decision.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests)).toHaveLength(0); + }); + + it('routes gated tools to the web modal and resolves allow on approval', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('tool-1', 'Bash', { command: 'ls' }); + // surfaced in agent state for the web modal + expect(state.requests['tool-1']).toMatchObject({ tool: 'Bash' }); + + await respond({ id: 'tool-1', approved: true }); + const decision = await pending; + expect(decision.permissionDecision).toBe('allow'); + expect(decision.updatedInput).toEqual({ command: 'ls' }); + }); + + it('resolves deny (never ask) when the user rejects', async () => { + const { client, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('tool-2', 'Write', { file_path: '/etc/x' }); + await respond({ id: 'tool-2', approved: false, reason: 'nope' }); + const decision = await pending; + expect(decision.permissionDecision).toBe('deny'); + expect(decision.reason).toContain('nope'); + }); + + it('remembers "allow for session" tools and skips re-prompting', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('w-1', 'WebFetch', { url: 'https://a' }); + await respond({ id: 'w-1', approved: true, allowTools: ['WebFetch'] }); + expect((await first).permissionDecision).toBe('allow'); + + // second call to the same tool is auto-allowed without a new request + const before = Object.keys(state.requests).length; + const second = await handler.requestDecision('w-2', 'WebFetch', { url: 'https://b' }); + expect(second.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests).length).toBe(before); + }); + + it('honors "allow for session" for a Bash command (web sends Bash())', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('b-1', 'Bash', { command: 'echo hi' }); + // web's "Allow For Session" for claude Bash sends the command-qualified id + await respond({ id: 'b-1', approved: true, allowTools: ['Bash(echo hi)'] }); + expect((await first).permissionDecision).toBe('allow'); + + // same command auto-allows without a new web request + const before = Object.keys(state.requests).length; + const second = await handler.requestDecision('b-2', 'Bash', { command: 'echo hi' }); + expect(second.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests).length).toBe(before); + }); + + it('still prompts for a different Bash command after a literal session-allow', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('b-1', 'Bash', { command: 'echo hi' }); + await respond({ id: 'b-1', approved: true, allowTools: ['Bash(echo hi)'] }); + await first; + + // a DIFFERENT command is not covered by the literal allow → surfaces a request + handler.requestDecision('b-2', 'Bash', { command: 'rm -rf /' }); + expect(state.requests['b-2']).toMatchObject({ tool: 'Bash' }); + }); + + it('honors a Bash prefix session-allow (Bash(:*))', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('p-1', 'Bash', { command: 'npm test' }); + await respond({ id: 'p-1', approved: true, allowTools: ['Bash(npm:*)'] }); + await first; + + const before = Object.keys(state.requests).length; + const second = await handler.requestDecision('p-2', 'Bash', { command: 'npm run build' }); + expect(second.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests).length).toBe(before); + }); + + it('propagates a mode switch chosen alongside the approval', async () => { + const { client, respond } = createFakeClient(); + const onModeChange = vi.fn(); + const handler = new PtyPermissionHandler(client, { + getPermissionMode: () => 'default', + onModeChange + }); + + const pending = handler.requestDecision('e-1', 'Edit', { file_path: '/x' }); + await respond({ id: 'e-1', approved: true, mode: 'acceptEdits' }); + await pending; + expect(onModeChange).toHaveBeenCalledWith('acceptEdits'); + }); + + it('cancelAll rejects in-flight requests (deny path for teardown)', async () => { + const { client, respond: _respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('c-1', 'Bash', { command: 'sleep 999' }); + handler.cancelAll('Session ended'); + await expect(pending).rejects.toThrow('Session ended'); + }); +}); diff --git a/cli/src/claude/utils/ptyPermissionHandler.ts b/cli/src/claude/utils/ptyPermissionHandler.ts new file mode 100644 index 0000000000..bb24aeffae --- /dev/null +++ b/cli/src/claude/utils/ptyPermissionHandler.ts @@ -0,0 +1,255 @@ +/** + * Permission bridge for PTY-mode claude sessions. + * + * The SDK path routes tool approvals through the SDK's `canUseTool` callback + * (see {@link ../utils/permissionHandler.ts}). A PTY-mode claude has no such + * callback — it would render permission prompts in its own TUI and stall the + * chat-driven flow. Instead, a PreToolUse hook forwards each tool call here; we + * either auto-allow it or surface it in the web approval modal (reusing the + * exact `state.requests` + `permission` RPC machinery the SDK path uses) and + * return the resulting allow/deny to claude. + * + * We MUST always resolve to `allow` or `deny` — never `ask` — because `ask` + * makes claude fall back to its own (TUI) prompt, which blocks the PTY. + */ + +import type { PermissionMode } from '@hapi/protocol/types'; +import { + BasePermissionHandler, + resolveToolAutoApprovalDecision, + type PendingPermissionRequest, + type PermissionCompletion, + type PermissionHandlerClient +} from '@/modules/common/permission/BasePermissionHandler'; +import { logger } from '@/ui/logger'; +import { + isAskUserQuestionToolName, + isRequestUserInputToolName, + isQuestionToolName, + buildAskUserQuestionUpdatedInput, + buildRequestUserInputUpdatedInput +} from './questionAnswerInput'; +import { resolveClaudeModePolicy } from './claudePermissionPolicy'; + +export type PtyPermissionDecision = { + permissionDecision: 'allow' | 'deny'; + reason?: string; + updatedInput?: Record; +}; + +// The web-driven response delivered over the `permission` RPC. Same shape the +// SDK PermissionHandler consumes, so the existing web approval UI works as-is. +type PermissionResponse = { + id: string; + approved: boolean; + reason?: string; + mode?: PermissionMode; + allowTools?: string[]; + // Picked answers for the question tools (AskUserQuestion / request_user_input). + answers?: Record | Record; +}; + +// Tools claude itself does not prompt for in default mode: pure read-only +// file/search/state tools. Auto-allow them so PTY default mode isn't flooded +// with an approval prompt for every Read/Grep. Network/exec/write tools still +// go to the web. Question tools (AskUserQuestion / request_user_input) are NOT +// here on purpose — they are routed to the web so the user answers in the chat, +// and the picked answers are injected back via the tool's updatedInput. +const PTY_AUTO_ALLOW_TOOLS = new Set([ + 'Read', + 'Glob', + 'Grep', + 'LS', + 'NotebookRead', + 'TodoWrite' +]); + +export type PtyPermissionHandlerOptions = { + /** Reads the session's CURRENT permission mode (web dropdown can change it mid-session). */ + getPermissionMode: () => PermissionMode | undefined; + /** Propagate a mode change requested via the web approval (e.g. "approve & switch to acceptEdits"). */ + onModeChange?: (mode: PermissionMode) => void; +}; + +export class PtyPermissionHandler extends BasePermissionHandler { + private readonly options: PtyPermissionHandlerOptions; + // Tools the user chose to always allow this session ("allow for session"). + private readonly sessionAllowedTools = new Set(); + // Bash "allow for session" arrives command-qualified (Bash() or + // Bash(:*)), so it needs literal/prefix matching rather than a plain + // tool-name set — mirrors the SDK PermissionHandler. + private readonly allowedBashLiterals = new Set(); + private readonly allowedBashPrefixes = new Set(); + + constructor(client: PermissionHandlerClient, options: PtyPermissionHandlerOptions) { + super(client); + this.options = options; + } + + /** + * Decide whether a PTY tool call may proceed. Resolves immediately for + * auto-allowed tools/modes; otherwise registers a pending request that + * resolves when the user answers in the web modal. + */ + requestDecision(toolUseId: string, toolName: string, input: unknown): Promise { + const mode = this.options.getPermissionMode(); + + // 1. Already allowed for the session via a prior approval. + if (toolName === 'Bash') { + const command = (input as { command?: string } | null)?.command; + if (command && this.isBashCommandAllowed(command)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + } else if (this.sessionAllowedTools.has(toolName)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + + // 2. Pure read-only tools — never gated. + if (PTY_AUTO_ALLOW_TOOLS.has(toolName)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + + // 3. Shared mode-based policy, kept identical to the SDK canCallTool + // path (resolveClaudeModePolicy): question tools (AskUserQuestion / + // request_user_input) ALWAYS go to the web — otherwise claude renders + // its interactive selector in the PTY only and the question never + // reaches the chat; bypassPermissions auto-allows; acceptEdits + // auto-allows edit tools. 'fallthrough' defers to the auto-approval + // hints (change_title, etc.) below. + const policy = resolveClaudeModePolicy(mode, toolName); + if (policy === 'allow') { + return Promise.resolve({ permissionDecision: 'allow' }); + } + if (policy === 'fallthrough' && resolveToolAutoApprovalDecision(mode, toolName, toolUseId)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + + // 4. Ask the user via the web approval modal. + return new Promise((resolve, reject) => { + this.addPendingRequest(toolUseId, toolName, input, { resolve, reject }); + logger.debug(`[ptyPermission] Awaiting web approval for ${toolName} (${toolUseId})`); + }); + } + + /** Reject every in-flight request (deny) — call on session teardown/abort. */ + cancelAll(reason: string): void { + this.cancelPendingRequests({ + completedReason: reason, + rejectMessage: reason, + decision: 'denied' + }); + } + + protected async handlePermissionResponse( + response: PermissionResponse, + pending: PendingPermissionRequest + ): Promise { + // Remember "allow for session" choices so we don't re-prompt. Bash comes + // command-qualified (Bash() / Bash(:*)); other tools by name. + if (response.allowTools && response.allowTools.length > 0) { + for (const tool of response.allowTools) { + if (tool === 'Bash' || tool.startsWith('Bash(')) { + this.rememberBashPermission(tool); + } else { + this.sessionAllowedTools.add(tool); + } + } + } + + // A mode switch chosen alongside the approval (e.g. acceptEdits). + if (response.mode) { + this.options.onModeChange?.(response.mode); + } + + const completion: PermissionCompletion = { + status: response.approved ? 'approved' : 'denied', + reason: response.reason, + mode: response.mode, + allowTools: response.allowTools, + answers: response.answers + }; + + // Question tools: the user answered in the chat. Inject the picked + // answers into the tool's updatedInput so claude echoes them instead of + // re-prompting in its TUI (same trick the SDK canUseTool path uses). + if (isQuestionToolName(pending.toolName)) { + const answers = response.answers ?? {}; + const denyNoAnswers = (): PermissionCompletion => { + completion.status = 'denied'; + completion.reason = completion.reason ?? 'No answers were provided.'; + pending.resolve({ permissionDecision: 'deny', reason: 'No answers were provided.' }); + return completion; + }; + if (Object.keys(answers).length === 0) { + return denyNoAnswers(); + } + const updatedInput = isAskUserQuestionToolName(pending.toolName) + ? buildAskUserQuestionUpdatedInput(pending.input, answers) + : isRequestUserInputToolName(pending.toolName) + ? buildRequestUserInputUpdatedInput(pending.input, answers) + : (pending.input as Record); + // Never-stall guard: if the index->questionText mapping produced no + // usable answers (e.g. malformed/reordered questions), an `allow` with + // empty answers makes claude echo an empty "answered: ." result and + // lock the turn. Deny instead so the bridge never silently stalls. + if (isAskUserQuestionToolName(pending.toolName)) { + const mapped = (updatedInput as { answers?: unknown }).answers; + if (!mapped || typeof mapped !== 'object' || Object.keys(mapped as object).length === 0) { + return denyNoAnswers(); + } + } + pending.resolve({ permissionDecision: 'allow', updatedInput }); + return completion; + } + + if (response.approved) { + pending.resolve({ + permissionDecision: 'allow', + updatedInput: (pending.input as Record) ?? undefined + }); + } else { + pending.resolve({ + permissionDecision: 'deny', + reason: + response.reason || + "The user declined this tool use. The tool was NOT run. Stop and wait for the user to tell you how to proceed." + }); + } + + return completion; + } + + protected handleMissingPendingResponse(response: PermissionResponse): void { + logger.debug(`[ptyPermission] No pending request for response ${response.id} (already resolved?)`); + } + + private isBashCommandAllowed(command: string): boolean { + if (this.allowedBashLiterals.has(command)) { + return true; + } + for (const prefix of this.allowedBashPrefixes) { + if (command.startsWith(prefix)) { + return true; + } + } + return false; + } + + private rememberBashPermission(permission: string): void { + // Plain "Bash" would allow every command — treat it as a name-level allow. + if (permission === 'Bash') { + this.sessionAllowedTools.add('Bash'); + return; + } + const match = permission.match(/^Bash\((.+?)\)$/); + if (!match) { + return; + } + const command = match[1]; + if (command.endsWith(':*')) { + this.allowedBashPrefixes.add(command.slice(0, -2)); + } else { + this.allowedBashLiterals.add(command); + } + } +} diff --git a/cli/src/claude/utils/sessionHookForwarder.test.ts b/cli/src/claude/utils/sessionHookForwarder.test.ts new file mode 100644 index 0000000000..d6974ad648 --- /dev/null +++ b/cli/src/claude/utils/sessionHookForwarder.test.ts @@ -0,0 +1,151 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { createServer, type Server } from 'node:http'; +import { + detectHookEventName, + buildPreToolUseStdout, + runSessionHookForwarder +} from './sessionHookForwarder'; + +describe('detectHookEventName', () => { + it('extracts the hook event name from a JSON payload', () => { + expect(detectHookEventName(JSON.stringify({ hook_event_name: 'PreToolUse' }))).toBe('PreToolUse'); + expect(detectHookEventName(Buffer.from(JSON.stringify({ hook_event_name: 'SessionStart' })))).toBe('SessionStart'); + }); + + it('returns null for non-JSON or missing event name', () => { + expect(detectHookEventName('not json')).toBeNull(); + expect(detectHookEventName(JSON.stringify({ session_id: 'x' }))).toBeNull(); + }); +}); + +describe('buildPreToolUseStdout', () => { + it('wraps an allow decision in claude hookSpecificOutput shape', () => { + const out = JSON.parse(buildPreToolUseStdout({ permissionDecision: 'allow' })); + expect(out).toEqual({ + hookSpecificOutput: { hookEventName: 'PreToolUse', permissionDecision: 'allow' } + }); + }); + + it('includes reason and updatedInput when present', () => { + const out = JSON.parse( + buildPreToolUseStdout({ permissionDecision: 'deny', reason: 'no', updatedInput: { a: 1 } }) + ); + expect(out.hookSpecificOutput.permissionDecisionReason).toBe('no'); + expect(out.hookSpecificOutput.updatedInput).toEqual({ a: 1 }); + }); +}); + +// --- integration: drive the forwarder against a stub hook server --- + +let server: Server | null = null; + +afterEach(async () => { + if (server) { + await new Promise((r) => server!.close(() => r())); + server = null; + } +}); + +function startStub(handler: (path: string, body: string) => { status: number; body: string }): Promise { + return new Promise((resolve) => { + server = createServer((req, res) => { + const chunks: Buffer[] = []; + req.on('data', (c) => chunks.push(c as Buffer)); + req.on('end', () => { + const { status, body } = handler(req.url || '', Buffer.concat(chunks).toString('utf-8')); + res.writeHead(status, { 'Content-Type': 'application/json' }).end(body); + }); + }); + server.listen(0, '127.0.0.1', () => { + const addr = server!.address(); + resolve(typeof addr === 'object' && addr ? addr.port : 0); + }); + }); +} + +function withStdin(payload: string, fn: () => Promise): Promise { + const original = process.stdin; + // Minimal async-iterable stdin stub. + const fake = (async function* () { + yield Buffer.from(payload); + })(); + Object.defineProperty(process, 'stdin', { + value: Object.assign(fake, { resume: () => {} }), + configurable: true + }); + return fn().finally(() => { + Object.defineProperty(process, 'stdin', { value: original, configurable: true }); + }); +} + +function captureStdout(): { restore: () => void; get: () => string } { + const original = process.stdout.write.bind(process.stdout); + let captured = ''; + (process.stdout as unknown as { write: (s: string) => boolean }).write = (s: string) => { + captured += s; + return true; + }; + return { restore: () => { (process.stdout as unknown as { write: typeof original }).write = original; }, get: () => captured }; +} + +describe('runSessionHookForwarder — PreToolUse routing', () => { + it('POSTs PreToolUse to /hook/pre-tool-use and echoes the decision on stdout', async () => { + let hitPath = ''; + const port = await startStub((path) => { + hitPath = path; + return { status: 200, body: JSON.stringify({ permissionDecision: 'allow' }) }; + }); + + const out = captureStdout(); + try { + await withStdin( + JSON.stringify({ hook_event_name: 'PreToolUse', tool_name: 'Bash', tool_use_id: 'tc-1' }), + () => runSessionHookForwarder(['--port', String(port), '--token', 'tok']) + ); + } finally { + out.restore(); + } + + expect(hitPath).toBe('/hook/pre-tool-use'); + expect(JSON.parse(out.get())).toEqual({ + hookSpecificOutput: { hookEventName: 'PreToolUse', permissionDecision: 'allow' } + }); + }); + + it('fails closed (deny) when the bridge returns an error status', async () => { + const port = await startStub(() => ({ status: 500, body: 'boom' })); + + const out = captureStdout(); + try { + await withStdin( + JSON.stringify({ hook_event_name: 'PreToolUse', tool_name: 'Write', tool_use_id: 'tc-2' }), + () => runSessionHookForwarder(['--port', String(port), '--token', 'tok']) + ); + } finally { + out.restore(); + } + + expect(JSON.parse(out.get()).hookSpecificOutput.permissionDecision).toBe('deny'); + }); + + it('routes SessionStart to /hook/session-start and writes nothing to stdout', async () => { + let hitPath = ''; + const port = await startStub((path) => { + hitPath = path; + return { status: 200, body: 'ok' }; + }); + + const out = captureStdout(); + try { + await withStdin( + JSON.stringify({ hook_event_name: 'SessionStart', session_id: 's-1' }), + () => runSessionHookForwarder(['--port', String(port), '--token', 'tok']) + ); + } finally { + out.restore(); + } + + expect(hitPath).toBe('/hook/session-start'); + expect(out.get()).toBe(''); + }); +}); diff --git a/cli/src/claude/utils/sessionHookForwarder.ts b/cli/src/claude/utils/sessionHookForwarder.ts index 8cc206d307..64d6c3521a 100644 --- a/cli/src/claude/utils/sessionHookForwarder.ts +++ b/cli/src/claude/utils/sessionHookForwarder.ts @@ -6,6 +6,84 @@ function logError(message: string, error?: unknown): void { process.stderr.write(`[hook-forwarder] ${message}${suffix}\n`); } +export type PreToolUseDecision = { + permissionDecision: 'allow' | 'deny'; + reason?: string; + updatedInput?: Record; +}; + +/** Read the hook event name from a hook stdin payload, or null if unparseable. */ +export function detectHookEventName(body: Buffer | string): string | null { + try { + const parsed = JSON.parse(typeof body === 'string' ? body : body.toString('utf-8')); + if (parsed && typeof parsed === 'object' && typeof parsed.hook_event_name === 'string') { + return parsed.hook_event_name; + } + } catch { + // Not JSON / no event name — caller falls back to the session-start path. + } + return null; +} + +/** + * Wrap a permission decision in the JSON shape claude's PreToolUse hook reads + * from stdout. `permissionDecision` is always allow/deny — never `ask` (which + * would make claude fall back to its own TUI prompt and stall the PTY). + */ +export function buildPreToolUseStdout(decision: PreToolUseDecision): string { + const hookSpecificOutput: Record = { + hookEventName: 'PreToolUse', + permissionDecision: decision.permissionDecision + }; + if (decision.reason) { + hookSpecificOutput.permissionDecisionReason = decision.reason; + } + if (decision.updatedInput) { + hookSpecificOutput.updatedInput = decision.updatedInput; + } + return JSON.stringify({ hookSpecificOutput }); +} + +function postHook( + port: number, + token: string, + path: string, + body: Buffer +): Promise<{ statusCode?: number; body: string; error: boolean }> { + return new Promise((resolve) => { + const chunks: Buffer[] = []; + const req = request( + { + host: '127.0.0.1', + port, + method: 'POST', + path, + headers: { + 'Content-Type': 'application/json', + 'Content-Length': body.length, + 'x-hapi-hook-token': token + } + }, + (res) => { + res.on('data', (chunk) => chunks.push(chunk as Buffer)); + res.on('error', (error) => { + logError('Error reading hook server response', error); + resolve({ statusCode: res.statusCode, body: Buffer.concat(chunks).toString('utf-8'), error: true }); + }); + res.on('end', () => + resolve({ statusCode: res.statusCode, body: Buffer.concat(chunks).toString('utf-8'), error: false }) + ); + } + ); + + req.on('error', (error) => { + logError('Failed to send hook request', error); + resolve({ body: '', error: true }); + }); + req.end(body); + }); +} + function parsePort(value: string | undefined): number | null { if (!value) { return null; @@ -91,40 +169,42 @@ export async function runSessionHookForwarder(args: string[]): Promise { const body = Buffer.concat(chunks); - let hadError = false; - await new Promise((resolve) => { - const req = request({ - host: '127.0.0.1', - port, - method: 'POST', - path: '/hook/session-start', - headers: { - 'Content-Type': 'application/json', - 'Content-Length': body.length, - 'x-hapi-hook-token': token + // PTY-mode permission bridge: a PreToolUse hook must wait for the web + // decision and echo it on stdout (allow/deny). Everything else (chiefly + // SessionStart) keeps the original fire-and-forget behavior. + if (detectHookEventName(body) === 'PreToolUse') { + const response = await postHook(port, token, '/hook/pre-tool-use', body); + + // Fail closed: if the bridge is unreachable or replies oddly, deny the + // tool rather than silently letting it run. Always exit 0 with valid + // stdout so claude honors the decision instead of treating the hook as + // failed (which would fall back to its own TUI prompt). + let decision: PreToolUseDecision = { + permissionDecision: 'deny', + reason: 'Permission bridge unavailable.' + }; + if (!response.error && response.statusCode === 200) { + try { + const parsed = JSON.parse(response.body); + if (parsed?.permissionDecision === 'allow' || parsed?.permissionDecision === 'deny') { + decision = parsed as PreToolUseDecision; + } + } catch (parseError) { + logError('Failed to parse pre-tool-use decision', parseError); } - }, (res) => { - if (res.statusCode && res.statusCode >= 400) { - hadError = true; - logError(`Hook server responded with status ${res.statusCode}`); - } - res.on('error', (error) => { - hadError = true; - logError('Error reading hook server response', error); - resolve(); - }); - res.on('end', () => resolve()); - res.resume(); - }); - - req.on('error', (error) => { - hadError = true; - logError('Failed to send hook request', error); - resolve(); - }); - req.end(body); - }); - if (hadError) { + } else if (response.statusCode && response.statusCode >= 400) { + logError(`Pre-tool-use hook responded with status ${response.statusCode}`); + } + + process.stdout.write(buildPreToolUseStdout(decision)); + return; + } + + const response = await postHook(port, token, '/hook/session-start', body); + if (response.error || (response.statusCode && response.statusCode >= 400)) { + if (response.statusCode && response.statusCode >= 400) { + logError(`Hook server responded with status ${response.statusCode}`); + } process.exitCode = 1; } } catch (error) { diff --git a/cli/src/claude/utils/startHookServer.test.ts b/cli/src/claude/utils/startHookServer.test.ts index 2e5b2d57d1..3ecad254b4 100644 --- a/cli/src/claude/utils/startHookServer.test.ts +++ b/cli/src/claude/utils/startHookServer.test.ts @@ -2,7 +2,7 @@ import { describe, it, expect } from 'vitest' import { request } from 'node:http' import { startHookServer, type SessionHookData } from './startHookServer' -const sendHookRequest = async (port: number, body: string, token?: string): Promise<{ statusCode?: number; body: string }> => { +const sendHookRequest = async (port: number, body: string, token?: string, path = '/hook/session-start'): Promise<{ statusCode?: number; body: string }> => { return await new Promise((resolve, reject) => { const headers: Record = { 'Content-Type': 'application/json', @@ -15,7 +15,7 @@ const sendHookRequest = async (port: number, body: string, token?: string): Prom const req = request({ host: '127.0.0.1', port, - path: '/hook/session-start', + path, method: 'POST', headers }, (res) => { @@ -114,4 +114,83 @@ describe('startHookServer', () => { expect(hookCalled).toBe(false) }) + + describe('pre-tool-use', () => { + const sendPreToolUse = (port: number, payload: unknown, token?: string) => + sendHookRequest(port, JSON.stringify(payload), token, '/hook/pre-tool-use') + + it('forwards the tool call to onPreToolUse and returns its decision', async () => { + let received: unknown = null + const server = await startHookServer({ + onSessionHook: () => {}, + onPreToolUse: async (data) => { + received = data + return { permissionDecision: 'deny', reason: 'not allowed' } + } + }) + + try { + const response = await sendPreToolUse( + server.port, + { tool_name: 'Bash', tool_input: { command: 'ls' }, tool_use_id: 'tc-1', hook_event_name: 'PreToolUse' }, + server.token + ) + expect(response.statusCode).toBe(200) + expect(JSON.parse(response.body)).toEqual({ permissionDecision: 'deny', reason: 'not allowed' }) + } finally { + server.stop() + } + + expect((received as { tool_name?: string }).tool_name).toBe('Bash') + }) + + it('allows by default when no onPreToolUse handler is wired', async () => { + const server = await startHookServer({ onSessionHook: () => {} }) + try { + const response = await sendPreToolUse( + server.port, + { tool_name: 'Bash', tool_use_id: 'tc-2' }, + server.token + ) + expect(response.statusCode).toBe(200) + expect(JSON.parse(response.body)).toEqual({ permissionDecision: 'allow' }) + } finally { + server.stop() + } + }) + + it('fails closed (deny) when the handler throws', async () => { + const server = await startHookServer({ + onSessionHook: () => {}, + onPreToolUse: async () => { + throw new Error('bridge down') + } + }) + try { + const response = await sendPreToolUse(server.port, { tool_name: 'Write', tool_use_id: 'tc-3' }, server.token) + expect(response.statusCode).toBe(200) + expect(JSON.parse(response.body).permissionDecision).toBe('deny') + } finally { + server.stop() + } + }) + + it('returns 401 when the token is missing', async () => { + let called = false + const server = await startHookServer({ + onSessionHook: () => {}, + onPreToolUse: async () => { + called = true + return { permissionDecision: 'allow' } + } + }) + try { + const response = await sendPreToolUse(server.port, { tool_name: 'Bash' }) + expect(response.statusCode).toBe(401) + } finally { + server.stop() + } + expect(called).toBe(false) + }) + }) }) diff --git a/cli/src/claude/utils/startHookServer.ts b/cli/src/claude/utils/startHookServer.ts index 02073edfe6..63aed9663d 100644 --- a/cli/src/claude/utils/startHookServer.ts +++ b/cli/src/claude/utils/startHookServer.ts @@ -22,9 +22,37 @@ export interface SessionHookData { [key: string]: unknown; } +/** + * Data received from Claude's PreToolUse hook (PTY mode only). claude sends this + * before every tool call so we can bridge the approval to the web. + */ +export interface PreToolUseHookData { + session_id?: string; + tool_name?: string; + tool_input?: unknown; + tool_use_id?: string; + permission_mode?: string; + cwd?: string; + hook_event_name?: string; + [key: string]: unknown; +} + +/** Decision returned to claude for a PreToolUse tool call. Never 'ask' (would stall the PTY). */ +export interface PreToolUseDecision { + permissionDecision: 'allow' | 'deny'; + reason?: string; + updatedInput?: Record; +} + export interface HookServerOptions { /** Called when a session hook is received with a valid session ID. */ onSessionHook: (sessionId: string, data: SessionHookData) => void; + /** + * Called for each PreToolUse hook (PTY mode). Resolves with the allow/deny + * decision once the user answers; may legitimately take minutes. When + * omitted, tool calls are allowed (no-op), matching --yolo behavior. + */ + onPreToolUse?: (data: PreToolUseHookData) => Promise; /** Optional token to require for hook requests. */ token?: string; } @@ -130,6 +158,61 @@ export async function startHookServer(options: HookServerOptions): Promise { expect(options.claudeArgs ?? []).not.toContain('--dangerously-skip-permissions') }) + it('keeps the PreToolUse hook alive in PTY mode: --yolo does NOT forward --dangerously-skip-permissions', () => { + const { options } = parseClaudeStartOptions(['--hapi-starting-mode', 'pty', '--yolo']) + // pty is the interactive launch axis, not a control mode + expect(options.interactive).toBe(true) + expect(options.startingMode).toBeUndefined() + // yolo semantics preserved internally... + expect(options.permissionMode).toBe('bypassPermissions') + // ...but the flag that would make claude bypass the hook is dropped, so + // AskUserQuestion / permission requests still reach the web chat. + expect(options.claudeArgs ?? []).not.toContain('--dangerously-skip-permissions') + }) + + it('strips an explicit --dangerously-skip-permissions in PTY mode too', () => { + const { options } = parseClaudeStartOptions(['--hapi-starting-mode', 'pty', '--dangerously-skip-permissions']) + expect(options.permissionMode).toBe('bypassPermissions') + expect(options.claudeArgs ?? []).not.toContain('--dangerously-skip-permissions') + }) + + it('strips --dangerously-skip-permissions in PTY mode even with an explicit non-bypass mode', () => { + // Regression: an explicit --permission-mode keeps the raw skip flag out of + // the bypassPermissions branch, but in PTY mode the flag still disables the + // PreToolUse hook the web bridge relies on, so it must be dropped regardless. + const { options } = parseClaudeStartOptions([ + '--hapi-starting-mode', + 'pty', + '--permission-mode', + 'default', + '--dangerously-skip-permissions' + ]) + expect(options.permissionMode).toBe('default') + expect(options.claudeArgs ?? []).not.toContain('--dangerously-skip-permissions') + }) + + it('is arg-order independent (--yolo before --hapi-starting-mode pty)', () => { + const { options } = parseClaudeStartOptions(['--yolo', '--hapi-starting-mode', 'pty']) + expect(options.permissionMode).toBe('bypassPermissions') + expect(options.claudeArgs ?? []).not.toContain('--dangerously-skip-permissions') + }) + + it('does not strip in local/remote mode (regression guard)', () => { + for (const mode of ['local', 'remote'] as const) { + const { options } = parseClaudeStartOptions(['--hapi-starting-mode', mode, '--yolo']) + expect(options.claudeArgs).toContain('--dangerously-skip-permissions') + } + }) + it('captures --started-by and surfaces --help via showHelp', () => { const { options, showHelp } = parseClaudeStartOptions(['--started-by', 'runner', '--help']) expect(options.startedBy).toBe('runner') diff --git a/cli/src/commands/claude.ts b/cli/src/commands/claude.ts index 4bb97f62cc..b4e285921c 100644 --- a/cli/src/commands/claude.ts +++ b/cli/src/commands/claude.ts @@ -78,6 +78,23 @@ export function parseClaudeStartOptions(args: string[]): { options: StartOptions } } + // PTY mode surfaces AskUserQuestion / permission requests in the web UI via + // claude's PreToolUse hook (see ptyPermissionHandler). `--dangerously-skip- + // permissions` makes claude bypass that hook entirely, so the question + // renders only in the PTY TUI and never reaches the chat. In PTY mode we + // always drop the flag — regardless of the resolved permission mode (an + // explicit `--permission-mode default --dangerously-skip-permissions` would + // otherwise keep it) — and rely on the hook instead: under bypassPermissions + // every request is auto-approved, while question tools are still forwarded to + // the web. + if (options.interactive) { + for (let i = unknownArgs.length - 1; i >= 0; i--) { + if (unknownArgs[i] === '--dangerously-skip-permissions') { + unknownArgs.splice(i, 1) + } + } + } + if (unknownArgs.length > 0) { options.claudeArgs = [...(options.claudeArgs || []), ...unknownArgs] } diff --git a/cli/src/commands/resume.ts b/cli/src/commands/resume.ts index cf2af593fd..da101c2fcc 100644 --- a/cli/src/commands/resume.ts +++ b/cli/src/commands/resume.ts @@ -2,7 +2,7 @@ import chalk from 'chalk' import React from 'react' import { render } from 'ink' import { existsSync } from 'node:fs' -import type { LocalResumeTarget, ResumableSession } from '@hapi/protocol' +import { getFlavorLabel, type LocalResumeTarget, type ResumableSession } from '@hapi/protocol' import type { ClaudePermissionMode, CodexPermissionMode, @@ -25,7 +25,7 @@ function formatSessionLine(session: ResumableSession, index: number): string { const state = session.active ? session.controlledByUser ? 'local' : 'remote' : 'inactive' - return `${index + 1}. ${session.flavor.padEnd(8)} ${state.padEnd(8)} ${name} ${session.directory}` + return `${index + 1}. ${getFlavorLabel(session.flavor).padEnd(12)} ${state.padEnd(8)} ${name} ${session.directory}` } async function selectSession(sessions: ResumableSession[]): Promise { diff --git a/cli/src/modules/common/hooks/generateHookSettings.test.ts b/cli/src/modules/common/hooks/generateHookSettings.test.ts new file mode 100644 index 0000000000..50f93b7cdf --- /dev/null +++ b/cli/src/modules/common/hooks/generateHookSettings.test.ts @@ -0,0 +1,79 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { readFileSync, rmSync } from 'node:fs'; +import { generateHookSettingsFile } from './generateHookSettings'; + +type WrittenSettings = { + hooks: { + SessionStart: Array<{ matcher: string; hooks: Array<{ type: string; command: string; timeout?: number }> }>; + PreToolUse?: Array<{ matcher: string; hooks: Array<{ type: string; command: string; timeout?: number }> }>; + }; +}; + +const created: string[] = []; + +function readSettings(filepath: string): WrittenSettings { + created.push(filepath); + return JSON.parse(readFileSync(filepath, 'utf-8')) as WrittenSettings; +} + +afterEach(() => { + for (const filepath of created.splice(0)) { + try { + rmSync(filepath, { force: true }); + } catch { + // best effort + } + } +}); + +describe('generateHookSettingsFile', () => { + it('registers SessionStart with the hook-forwarder command', () => { + const settings = readSettings( + generateHookSettingsFile(45678, 'tok-abc', { + filenamePrefix: 'test-session-hook', + logLabel: 'test' + }) + ); + + expect(settings.hooks.SessionStart).toHaveLength(1); + const entry = settings.hooks.SessionStart[0]; + expect(entry.matcher).toBe('*'); + expect(entry.hooks[0].type).toBe('command'); + expect(entry.hooks[0].command).toContain('hook-forwarder'); + expect(entry.hooks[0].command).toContain('45678'); + expect(entry.hooks[0].command).toContain('tok-abc'); + }); + + it('does NOT register PreToolUse by default (SDK/local/remote modes)', () => { + const settings = readSettings( + generateHookSettingsFile(45678, 'tok-abc', { + filenamePrefix: 'test-session-hook', + logLabel: 'test' + }) + ); + + expect(settings.hooks.PreToolUse).toBeUndefined(); + }); + + it('registers PreToolUse only when includePreToolUse is set (PTY mode)', () => { + const settings = readSettings( + generateHookSettingsFile(45678, 'tok-abc', { + filenamePrefix: 'test-pty-hook', + logLabel: 'test', + includePreToolUse: true + }) + ); + + expect(settings.hooks.PreToolUse).toHaveLength(1); + const entry = settings.hooks.PreToolUse![0]; + // matcher '*' matches every tool name (claude's Ghz: !q || q==='*' → true) + expect(entry.matcher).toBe('*'); + expect(entry.hooks[0].type).toBe('command'); + // same forwarder command — it branches on stdin hook_event_name + expect(entry.hooks[0].command).toBe(settings.hooks.SessionStart[0].hooks[0].command); + // generous timeout so the blocking hook survives a slow phone approval + expect(entry.hooks[0].timeout).toBeGreaterThanOrEqual(600); + // SessionStart keeps claude's default (no explicit timeout) + expect(settings.hooks.SessionStart[0].hooks[0].timeout).toBeUndefined(); + }); +}); diff --git a/cli/src/modules/common/hooks/generateHookSettings.ts b/cli/src/modules/common/hooks/generateHookSettings.ts index b612801315..2ec1118351 100644 --- a/cli/src/modules/common/hooks/generateHookSettings.ts +++ b/cli/src/modules/common/hooks/generateHookSettings.ts @@ -9,15 +9,26 @@ type HookCommandConfig = { hooks: Array<{ type: 'command'; command: string; + /** Per-command timeout in SECONDS (claude's hook schema). */ + timeout?: number; }>; }; +// PreToolUse bridges a tool approval to the web and blocks the (synchronous) +// hook until the user answers on their phone — which can take minutes. claude's +// default command-hook timeout is 60s; on timeout the decision is dropped and +// claude falls back to its own permission prompt (in PTY that renders in the TUI +// and stalls the chat flow). Give the PreToolUse hook a generous timeout so a +// human has time to respond. +const PRE_TOOL_USE_TIMEOUT_SECONDS = 3600; + type HookSettings = { hooksConfig?: { enabled?: boolean; }; hooks: { SessionStart: HookCommandConfig[]; + PreToolUse?: HookCommandConfig[]; }; }; @@ -25,6 +36,15 @@ export type HookSettingsOptions = { filenamePrefix: string; logLabel: string; hooksEnabled?: boolean; + /** + * Register a PreToolUse hook (PTY mode only). The SDK path routes tool + * approvals through the SDK's canUseTool callback, so it must NOT register + * PreToolUse or every tool would be double-handled. PTY sessions have no + * SDK callback, so they rely on this hook to bridge tool approvals to the + * web. The same forwarder command serves both events; it branches on the + * stdin `hook_event_name`. + */ + includePreToolUse?: boolean; }; function shellQuote(value: string): string { @@ -43,21 +63,29 @@ function shellJoin(parts: string[]): string { return parts.map(shellQuote).join(' '); } -function buildHookSettings(command: string, hooksEnabled?: boolean): HookSettings { +function buildHookSettings(command: string, hooksEnabled?: boolean, includePreToolUse?: boolean): HookSettings { const hooks: HookSettings['hooks'] = { SessionStart: [ { matcher: '*', - hooks: [ - { - type: 'command', - command - } - ] + hooks: [{ type: 'command', command }] } ] }; + if (includePreToolUse) { + // matcher '*' matches every tool name (claude's matcher: !q || q==='*' → all). + // The same forwarder command serves both events; it branches on the + // stdin hook_event_name. The long timeout keeps the (blocking) hook + // alive while the user approves on their phone. + hooks.PreToolUse = [ + { + matcher: '*', + hooks: [{ type: 'command', command, timeout: PRE_TOOL_USE_TIMEOUT_SECONDS }] + } + ]; + } + const settings: HookSettings = { hooks }; if (hooksEnabled !== undefined) { settings.hooksConfig = { @@ -88,7 +116,7 @@ export function generateHookSettingsFile( ]); const hookCommand = shellJoin([command, ...args]); - const settings = buildHookSettings(hookCommand, options.hooksEnabled); + const settings = buildHookSettings(hookCommand, options.hooksEnabled, options.includePreToolUse); writeFileSync(filepath, JSON.stringify(settings, null, 4)); logger.debug(`[${options.logLabel}] Created hook settings file: ${filepath}`); diff --git a/cli/src/modules/common/rpcTypes.ts b/cli/src/modules/common/rpcTypes.ts index 11c5f07686..d15c6ad605 100644 --- a/cli/src/modules/common/rpcTypes.ts +++ b/cli/src/modules/common/rpcTypes.ts @@ -16,6 +16,7 @@ export interface SpawnSessionOptions { token?: string sessionType?: 'simple' | 'worktree' worktreeName?: string + startingMode?: 'remote' | 'pty' } export type SpawnSessionResult = diff --git a/cli/src/runner/buildCliArgs.test.ts b/cli/src/runner/buildCliArgs.test.ts index fb6d764895..ec471d9751 100644 --- a/cli/src/runner/buildCliArgs.test.ts +++ b/cli/src/runner/buildCliArgs.test.ts @@ -62,6 +62,52 @@ describe('buildCliArgs', () => { + it('passes --model and --effort through for claude in PTY mode (model/effort at start)', () => { + const args = buildCliArgs('claude', { + directory: '/tmp', + startingMode: 'pty', + model: 'opus', + effort: 'high', + }) + expect(args).toContain('--model') + expect(args[args.indexOf('--model') + 1]).toBe('opus') + expect(args).toContain('--effort') + expect(args[args.indexOf('--effort') + 1]).toBe('high') + expect(args).toContain('--hapi-starting-mode') + expect(args[args.indexOf('--hapi-starting-mode') + 1]).toBe('pty') + }) + + it('does NOT force --yolo for PTY mode (tool approvals are bridged via the PreToolUse hook)', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty' }) + expect(args).not.toContain('--yolo') + }) + + it('still honors explicit yolo (the new-session toggle) in PTY mode', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty' }, true) + expect(args).toContain('--yolo') + }) + + it('prefers an explicit --permission-mode over yolo in PTY mode', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty', permissionMode: 'plan' }, true) + expect(args).toContain('--permission-mode') + expect(args[args.indexOf('--permission-mode') + 1]).toBe('plan') + expect(args).not.toContain('--yolo') + }) + + it('does not add --effort for non-claude agents (claude-only flag)', () => { + const args = buildCliArgs('opencode', { + directory: '/tmp', + effort: 'high', + }) + expect(args).not.toContain('--effort') + }) + + it('omits --model/--effort when not specified', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty' }) + expect(args).not.toContain('--model') + expect(args).not.toContain('--effort') + }) + it('passes --model-reasoning-effort through for opencode', () => { const args = buildCliArgs('opencode', { directory: '/tmp', diff --git a/cli/src/runner/run.ts b/cli/src/runner/run.ts index 694ad7197b..0f53ed9c30 100644 --- a/cli/src/runner/run.ts +++ b/cli/src/runner/run.ts @@ -1099,7 +1099,8 @@ export function buildCliArgs( args.push('--resume', options.resumeSessionId); } } - args.push('--hapi-starting-mode', 'remote', '--started-by', 'runner'); + const startingMode = options.startingMode || 'remote'; + args.push('--hapi-starting-mode', startingMode, '--started-by', 'runner'); if (options.model) { args.push('--model', options.model); } @@ -1121,5 +1122,9 @@ export function buildCliArgs( args.push('--yolo'); } } + // PTY tool approvals are bridged from a PreToolUse hook to the web (see + // ptyPermissionHandler + generateHookSettings), so a default-mode PTY session + // prompts for permission just like the SDK path — no implicit bypass. Explicit + // YOLO (the new-session toggle) opts into --yolo via `yolo`. return args; } diff --git a/cli/src/ui/ink/ResumeSessionPicker.tsx b/cli/src/ui/ink/ResumeSessionPicker.tsx index 5ef9398ec1..185d291f93 100644 --- a/cli/src/ui/ink/ResumeSessionPicker.tsx +++ b/cli/src/ui/ink/ResumeSessionPicker.tsx @@ -1,6 +1,6 @@ import React, { useMemo, useState } from 'react' import { Box, Text, useInput, useStdout } from 'ink' -import type { ResumableSession } from '@hapi/protocol' +import { getFlavorLabel, type ResumableSession } from '@hapi/protocol' import { filterResumeSessions, formatResumeSessionRelativeTime, @@ -85,7 +85,7 @@ function padEndColumns(value: string, width: number): string { function formatSessionLine(session: ResumableSession, width: number): string { const state = getResumeSessionState(session) const time = formatResumeSessionRelativeTime(session.updatedAt).padStart(10) - const prefix = `${time} ${session.flavor.padEnd(8)} ${state.padEnd(8)} ` + const prefix = `${time} ${getFlavorLabel(session.flavor).padEnd(12)} ${state.padEnd(8)} ` const nameBudget = Math.max(12, width - prefix.length) const name = truncateText(getResumeSessionName(session), nameBudget) return padEndColumns(`${prefix}${name}`, width) From 0d022d77cb73600c070dd36ecbb7cd2e37c2163b Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 10/26] feat(pty): stream the interactive terminal and structured chat to the web --- hub/src/socket/agentTerminalBuffer.ts | 42 ++++ .../handlers/cli/terminalHandlers.test.ts | 82 ++++++++ .../socket/handlers/cli/terminalHandlers.ts | 44 ++++ hub/src/socket/handlers/terminal.test.ts | 164 ++++++++++++++- hub/src/socket/handlers/terminal.ts | 109 ++++++++++ hub/src/socket/userTerminalBuffer.test.ts | 47 +++++ hub/src/socket/userTerminalBuffer.ts | 31 +++ hub/src/sync/rpcGateway.ts | 5 +- hub/src/sync/syncEngine.ts | 49 ++++- hub/src/web/routes/machines.ts | 4 +- shared/src/apiTypes.ts | 3 +- shared/src/schemas.ts | 7 + shared/src/socket.ts | 27 +++ .../AgentTerminal/AgentTerminalView.tsx | 156 +++++++++++++++ web/src/hooks/useAgentTerminalSocket.test.ts | 115 +++++++++++ web/src/hooks/useAgentTerminalSocket.ts | 189 ++++++++++++++++++ web/src/router.tsx | 2 +- web/src/routes/sessions/terminal.test.tsx | 27 ++- web/src/routes/sessions/terminal.tsx | 18 +- web/vite.config.ts | 21 +- 20 files changed, 1125 insertions(+), 17 deletions(-) create mode 100644 hub/src/socket/agentTerminalBuffer.ts create mode 100644 hub/src/socket/userTerminalBuffer.test.ts create mode 100644 hub/src/socket/userTerminalBuffer.ts create mode 100644 web/src/components/AgentTerminal/AgentTerminalView.tsx create mode 100644 web/src/hooks/useAgentTerminalSocket.test.ts create mode 100644 web/src/hooks/useAgentTerminalSocket.ts diff --git a/hub/src/socket/agentTerminalBuffer.ts b/hub/src/socket/agentTerminalBuffer.ts new file mode 100644 index 0000000000..23cd52fb99 --- /dev/null +++ b/hub/src/socket/agentTerminalBuffer.ts @@ -0,0 +1,42 @@ +// Per-session scrollback buffer for the agent (PTY) terminal output. +// +// Claude's interactive TUI only emits output when something changes. A web +// client that subscribes while the TUI is idle therefore receives nothing and +// shows a black screen until the next keystroke forces a redraw. We keep a +// rolling buffer of recent raw output so a fresh subscriber can be replayed the +// current screen immediately. +// +// The buffer is a byte-bounded ring: the oldest bytes are dropped first. The +// most recent full-screen redraw sequence from the TUI is always preserved at +// the tail, so replaying the buffer reconstructs the current screen (older, +// possibly-truncated escape sequences at the head are overwritten by later +// redraws). + +const MAX_BUFFER_BYTES = 256 * 1024 + +const buffers = new Map() + +export function appendAgentTerminalOutput(sessionId: string, data: string): void { + if (!data) return + const next = (buffers.get(sessionId) ?? '') + data + buffers.set( + sessionId, + next.length > MAX_BUFFER_BYTES ? next.slice(next.length - MAX_BUFFER_BYTES) : next + ) +} + +// Replay variant: when a full-screen TUI exits (e.g. an archived alt-screen +// session) it emits an alt-screen-exit (`CSI ? 1049 l`) that restores the empty +// normal screen — so a raw replay would render black. If the buffer's LAST +// alt-screen toggle is an exit (the process ended without re-entering), drop it +// and everything after, leaving the final alt-screen frame visible. Live sessions +// stay in the alt screen (no trailing exit), so this is a no-op for them. +const TRAILING_ALT_EXIT = /\x1b\[\?1049l(?:(?!\x1b\[\?1049h)[\s\S])*$/ +export function getAgentTerminalReplay(sessionId: string): string { + const raw = buffers.get(sessionId) ?? '' + return raw.replace(TRAILING_ALT_EXIT, '') +} + +export function clearAgentTerminalBuffer(sessionId: string): void { + buffers.delete(sessionId) +} diff --git a/hub/src/socket/handlers/cli/terminalHandlers.test.ts b/hub/src/socket/handlers/cli/terminalHandlers.test.ts index 4983b63510..695a3a3943 100644 --- a/hub/src/socket/handlers/cli/terminalHandlers.test.ts +++ b/hub/src/socket/handlers/cli/terminalHandlers.test.ts @@ -42,15 +42,97 @@ class FakeSocket { } } +type RoomEmit = { + room: string + event: string + data: unknown +} + class FakeNamespace { readonly sockets = new Map() + readonly roomEmits: RoomEmit[] = [] + + to(room: string): { emit: (event: string, data: unknown) => FakeNamespace } { + const self = this + return { + emit(event: string, data: unknown) { + self.roomEmits.push({ room, event, data }) + return self + } + } + } } function lastEmit(socket: FakeSocket, event: string): EmittedEvent | undefined { return [...socket.emitted].reverse().find((entry) => entry.event === event) } +function lastRoomEmit(namespace: FakeNamespace, event: string): RoomEmit | undefined { + return [...namespace.roomEmits].reverse().find((entry) => entry.event === event) +} + +function firstRoomEmit(namespace: FakeNamespace, event: string): RoomEmit | undefined { + return namespace.roomEmits.find((entry) => entry.event === event) +} + describe('cli terminal handlers', () => { + it('forwards agent-terminal:output to the agent-terminal room on terminal namespace', () => { + const cliSocket = new FakeSocket('cli-socket') + const terminalNamespace = new FakeNamespace() + const terminalRegistry = new TerminalRegistry({ idleTimeoutMs: 0 }) + + registerTerminalHandlers(cliSocket as unknown as CliSocketWithData, { + terminalRegistry, + terminalNamespace: terminalNamespace as never, + resolveSessionAccess: () => ({ ok: true, value: {} as StoredSession }), + emitAccessError: () => { + throw new Error('Unexpected access error') + } + }) + + cliSocket.trigger('agent-terminal:output', { + sessionId: 'session-1', + terminalId: 'agent', + data: '\x1b[32mhello\x1b[0m' + }) + + const emit = lastRoomEmit(terminalNamespace, 'agent-terminal:output') + expect(emit).toBeDefined() + expect(emit?.room).toBe('agent-session:session-1') + expect(emit?.data).toEqual({ + sessionId: 'session-1', + terminalId: 'agent', + data: '\x1b[32mhello\x1b[0m' + }) + }) + + it('rejects agent-terminal:output when session access is denied', () => { + const cliSocket = new FakeSocket('cli-socket') + const terminalNamespace = new FakeNamespace() + const terminalRegistry = new TerminalRegistry({ idleTimeoutMs: 0 }) + const accessErrors: { scope: string; id: string; reason: string }[] = [] + + registerTerminalHandlers(cliSocket as unknown as CliSocketWithData, { + terminalRegistry, + terminalNamespace: terminalNamespace as never, + resolveSessionAccess: () => ({ ok: false, reason: 'access-denied' }), + emitAccessError: (scope, id, reason) => { + accessErrors.push({ scope, id, reason }) + } + }) + + cliSocket.trigger('agent-terminal:output', { + sessionId: 'session-1', + terminalId: 'agent', + data: 'should not pass' + }) + + expect(terminalNamespace.roomEmits).toHaveLength(0) + expect(accessErrors).toEqual([ + { scope: 'session', id: 'session-1', reason: 'access-denied' } + ]) + }) + it('removes stale registry entries after terminal errors', () => { const cliSocket = new FakeSocket('cli-socket') const terminalSocket = new FakeSocket('terminal-socket') diff --git a/hub/src/socket/handlers/cli/terminalHandlers.ts b/hub/src/socket/handlers/cli/terminalHandlers.ts index bf54f6df1b..a83f20deb7 100644 --- a/hub/src/socket/handlers/cli/terminalHandlers.ts +++ b/hub/src/socket/handlers/cli/terminalHandlers.ts @@ -1,3 +1,4 @@ +import { z } from 'zod' import { TerminalErrorPayloadSchema, TerminalExitPayloadSchema, @@ -8,6 +9,8 @@ import type { StoredSession } from '../../../store' import type { TerminalRegistry } from '../../terminalRegistry' import type { CliSocketWithData, SocketServer } from '../../socketTypes' import type { AccessErrorReason, AccessResult } from './types' +import { appendAgentTerminalOutput, clearAgentTerminalBuffer } from '../../agentTerminalBuffer' +import { appendUserTerminalOutput, clearUserTerminalBuffer } from '../../userTerminalBuffer' type ResolveSessionAccess = (sessionId: string) => AccessResult @@ -68,9 +71,47 @@ export function registerTerminalHandlers(socket: CliSocketWithData, deps: Termin return } terminalRegistry.markActivity(parsed.data.terminalId) + // Keep a scrollback buffer so reconnecting web clients see the + // current terminal content instead of a black screen. + appendUserTerminalOutput(parsed.data.sessionId, parsed.data.terminalId, parsed.data.data) forwardTerminalEvent('terminal:output', parsed.data) }) + socket.on('agent-terminal:output', (data: unknown) => { + const parsed = terminalOutputSchema.safeParse(data) + if (!parsed.success) { + return + } + const sessionAccess = resolveSessionAccess(parsed.data.sessionId) + if (!sessionAccess.ok) { + emitAccessError('session', parsed.data.sessionId, sessionAccess.reason) + return + } + // Keep a scrollback buffer so a web client that subscribes later can be + // replayed the current screen (avoids the black-screen-until-keystroke). + appendAgentTerminalOutput(parsed.data.sessionId, parsed.data.data) + // Broadcast to the agent-terminal room (distinct from the user-terminal's + // `session:${id}` room) so only agent-terminal viewers receive PTY output + // and the streaming-teardown viewer count stays accurate. + terminalNamespace.to(`agent-session:${parsed.data.sessionId}`).emit('agent-terminal:output', parsed.data) + }) + + socket.on('agent-terminal:reset', (data: unknown) => { + const parsed = z.object({ sessionId: z.string().min(1) }).safeParse(data) + if (!parsed.success) { + return + } + const sessionAccess = resolveSessionAccess(parsed.data.sessionId) + if (!sessionAccess.ok) { + emitAccessError('session', parsed.data.sessionId, sessionAccess.reason) + return + } + // A fresh agent PTY spawned — drop the previous session's scrollback so a + // re-subscribing viewer doesn't replay stale (and alt-screen-corrupted) + // output from before the restart. + clearAgentTerminalBuffer(parsed.data.sessionId) + }) + socket.on('terminal:exit', (data: unknown) => { const parsed = terminalExitSchema.safeParse(data) if (!parsed.success) { @@ -81,6 +122,9 @@ export function registerTerminalHandlers(socket: CliSocketWithData, deps: Termin return } terminalRegistry.remove(parsed.data.terminalId) + // Drop the scrollback so a reconnecting viewer doesn't replay a dead + // terminal's output, and so the buffer doesn't leak for the session's life. + clearUserTerminalBuffer(parsed.data.sessionId) const terminalSocket = terminalNamespace.sockets.get(entry.socketId) if (!terminalSocket) { return diff --git a/hub/src/socket/handlers/terminal.test.ts b/hub/src/socket/handlers/terminal.test.ts index 0a9cf2382c..542114cad6 100644 --- a/hub/src/socket/handlers/terminal.test.ts +++ b/hub/src/socket/handlers/terminal.test.ts @@ -1,6 +1,7 @@ -import { describe, expect, it } from 'bun:test' +import { beforeEach, describe, expect, it } from 'bun:test' import { registerTerminalHandlers } from './terminal' import { TerminalRegistry } from '../terminalRegistry' +import { appendAgentTerminalOutput, clearAgentTerminalBuffer, getAgentTerminalReplay } from '../agentTerminalBuffer' import type { SocketServer, SocketWithData } from '../socketTypes' type EmittedEvent = { @@ -12,6 +13,7 @@ class FakeSocket { readonly id: string readonly data: Record = {} readonly emitted: EmittedEvent[] = [] + readonly rooms = new Set() private readonly handlers = new Map void>() constructor(id: string) { @@ -28,6 +30,10 @@ class FakeSocket { return true } + join(room: string): void { + this.rooms.add(room) + } + trigger(event: string, data?: unknown): void { const handler = this.handlers.get(event) if (!handler) { @@ -69,6 +75,7 @@ type Harness = { function createHarness(options?: { sessionActive?: boolean + sessionNamespace?: string maxTerminalsPerSocket?: number maxTerminalsPerSession?: number }): Harness { @@ -80,7 +87,10 @@ function createHarness(options?: { registerTerminalHandlers(terminalSocket as unknown as SocketWithData, { io: io as unknown as SocketServer, - getSession: () => ({ active: options?.sessionActive ?? true, namespace: 'default' }), + getSession: () => ({ + active: options?.sessionActive ?? true, + namespace: options?.sessionNamespace ?? 'default' + }), terminalRegistry, maxTerminalsPerSocket: options?.maxTerminalsPerSocket ?? 4, maxTerminalsPerSession: options?.maxTerminalsPerSession ?? 4 @@ -200,6 +210,156 @@ describe('terminal socket handlers', () => { expect(terminalRegistry.get('terminal-1')).toBeNull() }) + it('joins terminal socket to session room on create', () => { + const { terminalSocket, cliNamespace } = createHarness() + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('terminal:create', { + sessionId: 'session-1', + terminalId: 'terminal-1', + cols: 80, + rows: 24 + }) + + expect(terminalSocket.rooms.has('session:session-1')).toBe(true) + }) + + describe('agent-terminal:subscribe', () => { + beforeEach(() => { + clearAgentTerminalBuffer('session-1') + clearAgentTerminalBuffer('session-2') + }) + + it('replays buffered agent output on subscribe', () => { + const { terminalSocket } = createHarness() + appendAgentTerminalOutput('session-1', '\x1b[32mInitial output\x1b[0m\r\n') + appendAgentTerminalOutput('session-1', 'More output\r\n') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(true) + const replayEvent = lastEmit(terminalSocket, 'agent-terminal:output') + expect(replayEvent).toBeDefined() + expect(replayEvent?.data).toEqual({ + sessionId: 'session-1', + terminalId: 'agent', + data: '\x1b[32mInitial output\x1b[0m\r\nMore output\r\n' + }) + }) + + it('rejects subscribe to a session in another namespace (no join, no replay)', () => { + // A valid token for the 'default' namespace must not be able to + // subscribe to a session that belongs to a different namespace. + const { terminalSocket } = createHarness({ sessionNamespace: 'other' }) + appendAgentTerminalOutput('session-1', 'secret-output-from-other-namespace') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(false) + expect(lastEmit(terminalSocket, 'agent-terminal:output')).toBeUndefined() + }) + + it('rejects subscribe when the session is inactive (no join, no replay)', () => { + const { terminalSocket } = createHarness({ sessionActive: false }) + appendAgentTerminalOutput('session-1', 'stale-output') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(false) + expect(lastEmit(terminalSocket, 'agent-terminal:output')).toBeUndefined() + }) + + it('joins a dedicated agent-terminal room (not the user-terminal session room)', () => { + const { terminalSocket } = createHarness() + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + // Agent-terminal viewers must NOT land in the user-terminal `session:` room, + // otherwise the streaming-teardown viewer count counts the wrong sockets. + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(true) + expect(terminalSocket.rooms.has('session:session-1')).toBe(false) + const replayEvent = lastEmit(terminalSocket, 'agent-terminal:output') + expect(replayEvent).toBeUndefined() + }) + + it('strips a trailing alt-screen-exit so an exited TUI replays its last frame (not black)', () => { + clearAgentTerminalBuffer('session-3') + // alt-screen enter + a frame, then the process exits (alt-screen exit). + appendAgentTerminalOutput('session-3', '\x1b[?1049h\x1b[HLAST FRAME') + appendAgentTerminalOutput('session-3', '\r\n\x1b[?1049l\x1b[?25h') + const replay = getAgentTerminalReplay('session-3') + expect(replay).toContain('LAST FRAME') + expect(replay).not.toContain('\x1b[?1049l') + clearAgentTerminalBuffer('session-3') + }) + + it('keeps alt-screen content intact for a live (still in alt-screen) TUI', () => { + clearAgentTerminalBuffer('session-4') + appendAgentTerminalOutput('session-4', '\x1b[?1049h\x1b[HLIVE FRAME') + const replay = getAgentTerminalReplay('session-4') + expect(replay).toBe('\x1b[?1049h\x1b[HLIVE FRAME') + clearAgentTerminalBuffer('session-4') + }) + + it('replays buffer per-session independently', () => { + const { terminalSocket } = createHarness() + appendAgentTerminalOutput('session-1', 'data-for-session-1') + appendAgentTerminalOutput('session-2', 'data-for-session-2') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-2' }) + + const replayEvent = lastEmit(terminalSocket, 'agent-terminal:output') + expect(replayEvent?.data).toEqual({ + sessionId: 'session-2', + terminalId: 'agent', + data: 'data-for-session-2' + }) + }) + + it('replays same buffer on repeated subscribe (no clear)', () => { + const { terminalSocket } = createHarness() + appendAgentTerminalOutput('session-1', 'persistent-data') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + const firstReplay = lastEmit(terminalSocket, 'agent-terminal:output') + expect(firstReplay).toBeDefined() + expect((firstReplay!.data as { data: string }).data).toBe('persistent-data') + + // Second subscribe gets the same buffer again (not cleared) + terminalSocket.emitted.length = 0 + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + const secondReplay = lastEmit(terminalSocket, 'agent-terminal:output') + expect(secondReplay).toBeDefined() + expect((secondReplay!.data as { data: string }).data).toBe('persistent-data') + }) + }) + + describe('agent-terminal:resize', () => { + it('forwards a resize to the CLI socket for an authorized active session', () => { + const { terminalSocket, cliNamespace } = createHarness() + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:resize', { sessionId: 'session-1', cols: 100, rows: 30 }) + + const resizeEvent = lastEmit(cliSocket, 'agent-terminal:resize') + expect(resizeEvent?.data).toEqual({ sessionId: 'session-1', cols: 100, rows: 30 }) + }) + + it('does not forward a resize when the session is inactive (guard, not just pickCliSocket)', () => { + // CLI socket IS connected in this socket's own namespace, so without + // the authorization guard pickCliSocketId would find it and emit. + const { terminalSocket, cliNamespace } = createHarness({ sessionActive: false }) + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:resize', { sessionId: 'session-1', cols: 100, rows: 30 }) + + expect(lastEmit(cliSocket, 'agent-terminal:resize')).toBeUndefined() + }) + }) + it('enforces per-socket terminal limits', () => { const { terminalSocket, cliNamespace } = createHarness({ maxTerminalsPerSocket: 1 }) const cliSocket = new FakeSocket('cli-socket-1') diff --git a/hub/src/socket/handlers/terminal.ts b/hub/src/socket/handlers/terminal.ts index 086f25df13..dc5350014f 100644 --- a/hub/src/socket/handlers/terminal.ts +++ b/hub/src/socket/handlers/terminal.ts @@ -2,6 +2,8 @@ import { TerminalOpenPayloadSchema } from '@hapi/protocol' import { z } from 'zod' import type { TerminalRegistry, TerminalRegistryEntry } from '../terminalRegistry' import type { SocketServer, SocketWithData } from '../socketTypes' +import { getAgentTerminalReplay } from '../agentTerminalBuffer' +import { getUserTerminalBuffer } from '../userTerminalBuffer' const terminalCreateSchema = TerminalOpenPayloadSchema @@ -127,6 +129,8 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH return } + socket.join(`session:${sessionId}`) + cliSocket.emit('terminal:open', { sessionId, terminalId, @@ -134,6 +138,16 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH rows }) terminalRegistry.markActivity(terminalId) + + // Replay buffered output so the terminal shows scrollback immediately + // instead of staying black until the next output from CLI. + // The buffer is never explicitly cleared here: it persists so a client + // that navigates away and back (new socket, isReconnect=false) still + // sees the accumulated output. It is bounded to 256KB per session. + const buffered = getUserTerminalBuffer(sessionId) + if (buffered && !isReconnect) { + socket.emit('terminal:output', { terminalId, data: buffered }) + } }) socket.on('terminal:write', (data: unknown) => { @@ -201,10 +215,105 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH emitCloseToCli(entry) }) + const emitToCliForSession = (sessionId: string, event: 'agent-terminal:resize' | 'agent-terminal:refresh' | 'agent-terminal:idle', payload: Record): void => { + const cliSocketId = pickCliSocketId(sessionId) + if (!cliSocketId) return + const cliSocket = cliNamespace.sockets.get(cliSocketId) + if (!cliSocket || cliSocket.data.namespace !== namespace) return + cliSocket.emit(event, payload as never) + } + + // Sessions this socket is viewing the agent terminal for. When the last + // viewer of a session leaves (this socket unsubscribes or disconnects and the + // room empties), tell the CLI to stop streaming that PTY. + // + // Agent-terminal viewers get their OWN room, distinct from the user-terminal's + // `session:${id}` room: the streaming-teardown count must reflect agent-terminal + // viewers only, otherwise a user-terminal viewer in `session:${id}` would keep + // the agent PTY streaming forever after every agent-terminal viewer has left. + const agentTerminalRoom = (sessionId: string): string => `agent-session:${sessionId}` + const subscribedAgentSessions = new Set() + // A valid token for one namespace must not be able to act on (subscribe to, + // replay, or drive) a session in another namespace. Same shape as the + // terminal:create guard (terminal.ts:95). Callers drop silently rather than + // emitting an error: surfacing "session inactive/unavailable" to an + // unauthorized caller would leak existence, and the only honest-client + // rejection path (a session that just went inactive) unmounts the terminal + // view anyway via canViewAgentTerminal, so there is no live viewer to inform. + const isAuthorizedSession = (sessionId: string): boolean => { + const session = getSession(sessionId) + return Boolean(namespace && session && session.namespace === namespace && session.active) + } + const tellCliIfNoViewers = (sessionId: string): void => { + const size = socket.nsp.adapter.rooms.get(agentTerminalRoom(sessionId))?.size ?? 0 + if (size === 0) { + emitToCliForSession(sessionId, 'agent-terminal:idle', { sessionId }) + } + } + + socket.on('agent-terminal:subscribe', (data: unknown) => { + const parsed = z.object({ sessionId: z.string().min(1) }).safeParse(data) + if (!parsed.success) { + return + } + const { sessionId } = parsed.data + if (!isAuthorizedSession(sessionId)) { + return + } + socket.join(agentTerminalRoom(sessionId)) + subscribedAgentSessions.add(sessionId) + // Replay recent output so the terminal renders the current screen + // immediately instead of staying black until the next keystroke. + // terminalId must match the web client's filter ('agent'), not a + // synthetic id, otherwise the replayed data is silently dropped. + const buffered = getAgentTerminalReplay(sessionId) + if (buffered) { + socket.emit('agent-terminal:output', { sessionId, terminalId: 'agent', data: buffered }) + } + // Full-screen TUIs (e.g. claude's ink alt-screen) can't always + // be reconstructed from a byte-ring replay (truncated alt-screen enter, + // stale alt-screen-exit from a prior spawn). Ask the CLI to repaint the + // current screen so a freshly (re)subscribed viewer never sees black. + emitToCliForSession(sessionId, 'agent-terminal:refresh', { sessionId }) + }) + + socket.on('agent-terminal:unsubscribe', (data: unknown) => { + const parsed = z.object({ sessionId: z.string().min(1) }).safeParse(data) + if (!parsed.success) { + return + } + const { sessionId } = parsed.data + socket.leave(agentTerminalRoom(sessionId)) + subscribedAgentSessions.delete(sessionId) + tellCliIfNoViewers(sessionId) + }) + + socket.on('agent-terminal:resize', (data: unknown) => { + const parsed = z.object({ + sessionId: z.string().min(1), + cols: z.number().int().positive(), + rows: z.number().int().positive() + }).safeParse(data) + if (!parsed.success) { + return + } + const { sessionId, cols, rows } = parsed.data + if (!isAuthorizedSession(sessionId)) { + return + } + emitToCliForSession(sessionId, 'agent-terminal:resize', { sessionId, cols, rows }) + }) + socket.on('disconnect', () => { const removed = terminalRegistry.removeBySocket(socket.id) for (const entry of removed) { emitCloseToCli(entry) } + // On disconnect the socket has already left its rooms, so the room size + // now reflects the remaining viewers — tell the CLI to stop streaming any + // agent terminal this socket was the last viewer of. + for (const sessionId of subscribedAgentSessions) { + tellCliIfNoViewers(sessionId) + } }) } diff --git a/hub/src/socket/userTerminalBuffer.test.ts b/hub/src/socket/userTerminalBuffer.test.ts new file mode 100644 index 0000000000..898e6cbf2c --- /dev/null +++ b/hub/src/socket/userTerminalBuffer.test.ts @@ -0,0 +1,47 @@ +import { describe, it, expect } from 'bun:test' +import { appendUserTerminalOutput, getUserTerminalBuffer, clearUserTerminalBuffer } from './userTerminalBuffer' + +describe('userTerminalBuffer', () => { + it('stores and retrieves output per session', () => { + appendUserTerminalOutput('s1', 't1', 'hello ') + appendUserTerminalOutput('s1', 't1', 'world') + expect(getUserTerminalBuffer('s1')).toBe('hello world') + }) + + it('keeps sessions isolated', () => { + appendUserTerminalOutput('sa', 't1', 'alpha') + appendUserTerminalOutput('sb', 't1', 'beta') + expect(getUserTerminalBuffer('sa')).toBe('alpha') + expect(getUserTerminalBuffer('sb')).toBe('beta') + }) + + it('returns empty string for unknown session', () => { + expect(getUserTerminalBuffer('nonexistent')).toBe('') + }) + + it('ignores empty data', () => { + appendUserTerminalOutput('s3', 't1', 'keep') + appendUserTerminalOutput('s3', 't1', '') + expect(getUserTerminalBuffer('s3')).toBe('keep') + }) + + it('clears buffer on demand', () => { + appendUserTerminalOutput('s4', 't1', 'data') + clearUserTerminalBuffer('s4') + expect(getUserTerminalBuffer('s4')).toBe('') + }) + + it('rolls over at max size', () => { + const small = 'a'.repeat(100) + // Fill buffer to near capacity + for (let i = 0; i < 2600; i++) { + appendUserTerminalOutput('s5', 't1', small) + } + const buf = getUserTerminalBuffer('s5') + // Should be at most MAX_BUFFER_BYTES (256KB) + const MAX = 256 * 1024 + expect(buf.length).toBeLessThanOrEqual(MAX) + // Should contain the most recent data (tail preserved) + expect(buf.endsWith(small)).toBe(true) + }) +}) diff --git a/hub/src/socket/userTerminalBuffer.ts b/hub/src/socket/userTerminalBuffer.ts new file mode 100644 index 0000000000..0b1e4730a0 --- /dev/null +++ b/hub/src/socket/userTerminalBuffer.ts @@ -0,0 +1,31 @@ +// Per-session scrollback buffer for the user (remote) terminal output. +// +// A web client that navigates away and back creates a new xterm.js instance +// with a new terminalId, so the previous output is lost. We keep a rolling +// buffer per session so a fresh subscriber can replay the current terminal +// content immediately instead of showing a black screen until the next +// keystroke or output. +// +// The buffer is keyed by sessionId only (not terminalId) because each +// navigation creates a new terminalId for the same session. + +const MAX_BUFFER_BYTES = 256 * 1024 + +const buffers = new Map() + +export function appendUserTerminalOutput(sessionId: string, _terminalId: string, data: string): void { + if (!data) return + const next = (buffers.get(sessionId) ?? '') + data + buffers.set( + sessionId, + next.length > MAX_BUFFER_BYTES ? next.slice(next.length - MAX_BUFFER_BYTES) : next + ) +} + +export function getUserTerminalBuffer(sessionId: string): string { + return buffers.get(sessionId) ?? '' +} + +export function clearUserTerminalBuffer(sessionId: string): void { + buffers.delete(sessionId) +} diff --git a/hub/src/sync/rpcGateway.ts b/hub/src/sync/rpcGateway.ts index 5a63f4547c..afc9cef8bc 100644 --- a/hub/src/sync/rpcGateway.ts +++ b/hub/src/sync/rpcGateway.ts @@ -139,13 +139,14 @@ export class RpcGateway { resumeSessionId?: string, effort?: string, permissionMode?: PermissionMode, - serviceTier?: string + serviceTier?: string, + startingMode?: 'remote' | 'pty' ): Promise<{ type: 'success'; sessionId: string } | { type: 'error'; message: string }> { try { const result = await this.machineRpc( machineId, RPC_METHODS.SpawnHappySession, - { type: 'spawn-in-directory', directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, resumeSessionId, effort, permissionMode, serviceTier } + { type: 'spawn-in-directory', directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, resumeSessionId, effort, permissionMode, serviceTier, startingMode } ) if (result && typeof result === 'object') { const obj = result as Record diff --git a/hub/src/sync/syncEngine.ts b/hub/src/sync/syncEngine.ts index d59f58fd47..2b7c8fce85 100644 --- a/hub/src/sync/syncEngine.ts +++ b/hub/src/sync/syncEngine.ts @@ -15,6 +15,7 @@ import type { Server } from 'socket.io' import type { Store, CancelQueuedMessageResult } from '../store' import type { HapiSessionExportResult } from '@hapi/protocol/sessionExport' import type { RpcRegistry } from '../socket/rpcRegistry' +import { clearAgentTerminalBuffer } from '../socket/agentTerminalBuffer' import type { SSEManager } from '../sse/sseManager' import { CursorLegacyMigrator, type CursorLegacyMigratorOptions } from '../cursor/cursorLegacyMigrator' @@ -139,7 +140,7 @@ export class SyncEngine { constructor( private readonly store: Store, - io: Server, + private readonly io: Server, rpcRegistry: RpcRegistry, sseManager: SSEManager ) { @@ -344,6 +345,23 @@ export class SyncEngine { this.triggerDedupIfNeeded(payload.sid) } this.sessionReadyIds.delete(payload.sid) + + // Notify agent-terminal subscribers so the web UI shows a clear + // termination message instead of staying "connected" with stale output. + // Targets the dedicated agent-terminal room (NOT the user-terminal + // `session:${id}` room), matching where agent viewers actually subscribe. + if (typeof this.io.of === 'function') { + this.io.of('/terminal').to(`agent-session:${payload.sid}`).emit('agent-terminal:output', { + sessionId: payload.sid, + terminalId: 'agent', + data: '\r\n[Session terminated]\r\n' + }) + } + // Release the PTY scrollback for a session that has ended (mirrors the + // userTerminalBuffer clear-on-`terminal:exit`); a fresh spawn would also + // reset it, but an ended-and-never-reopened session would otherwise leak + // its buffer for the hub process's lifetime. + clearAgentTerminalBuffer(payload.sid) } handleBackgroundTaskDelta(sessionId: string, delta: { started: number; completed: number }): void { @@ -720,9 +738,10 @@ export class SyncEngine { resumeSessionId?: string, effort?: string, permissionMode?: PermissionMode, - serviceTier?: string + serviceTier?: string, + startingMode?: 'remote' | 'pty' ): Promise<{ type: 'success'; sessionId: string } | { type: 'error'; message: string }> { - return await this.rpcGateway.spawnSession( + const result = await this.rpcGateway.spawnSession( machineId, directory, agent, @@ -734,8 +753,20 @@ export class SyncEngine { resumeSessionId, effort, permissionMode, - serviceTier + serviceTier, + startingMode ) + // PTY sessions need the runner to attach the interactive terminal before + // the web client can connect; wait for the session to register active so a + // failed PTY spawn surfaces as an error instead of an empty terminal. Other + // start modes return as soon as the spawn RPC succeeds (legacy behavior). + if (result.type === 'success' && startingMode === 'pty') { + const becameActive = await this.waitForSessionActive(result.sessionId) + if (!becameActive) { + return { type: 'error', message: 'Session spawned but failed to become active' } + } + } + return result } private resolveFlavor(session: Session): AgentFlavor { @@ -1177,6 +1208,13 @@ export class SyncEngine { const preferredPermissionMode = opts?.permissionMode ?? session.permissionMode ?? session.metadata?.preferredPermissionMode + // Restore the original launch mode. Without this a reopened PTY session + // would re-spawn in the default 'remote' (SDK) mode — no agent terminal, + // so the terminal view renders black. + const resumedStartingMode = + (session.agentState as { startingMode?: 'local' | 'remote' | 'pty' } | null)?.startingMode === 'pty' + ? 'pty' + : undefined const spawnResult = await this.rpcGateway.spawnSession( targetMachine.id, directory, @@ -1189,7 +1227,8 @@ export class SyncEngine { resumeToken, session.effort ?? undefined, preferredPermissionMode, - session.serviceTier ?? undefined + session.serviceTier ?? undefined, + resumedStartingMode ) if (spawnResult.type !== 'success') { diff --git a/hub/src/web/routes/machines.ts b/hub/src/web/routes/machines.ts index 7288a42de0..01a53df8b0 100644 --- a/hub/src/web/routes/machines.ts +++ b/hub/src/web/routes/machines.ts @@ -50,7 +50,9 @@ export function createMachinesRoutes(getSyncEngine: () => SyncEngine | null): Ho parsed.data.sessionType, parsed.data.worktreeName, undefined, - parsed.data.effort + parsed.data.effort, + undefined, + parsed.data.startingMode ) return c.json(result) }) diff --git a/shared/src/apiTypes.ts b/shared/src/apiTypes.ts index e6381da723..12a79b6b86 100644 --- a/shared/src/apiTypes.ts +++ b/shared/src/apiTypes.ts @@ -250,7 +250,8 @@ export const SpawnSessionRequestSchema = z.object({ modelReasoningEffort: z.string().optional(), yolo: z.boolean().optional(), sessionType: z.enum(['simple', 'worktree']).optional(), - worktreeName: z.string().optional() + worktreeName: z.string().optional(), + startingMode: z.enum(['remote', 'pty']).optional() }) export type SpawnSessionRequest = z.infer diff --git a/shared/src/schemas.ts b/shared/src/schemas.ts index 1b891fbef5..0391a09767 100644 --- a/shared/src/schemas.ts +++ b/shared/src/schemas.ts @@ -64,6 +64,9 @@ export const MetadataSchema = z.object({ archiveReason: z.string().optional(), preferredPermissionMode: PermissionModeSchema.optional(), flavor: z.string().nullish(), + // Launch mode, surfaced so the web can show the agent-terminal toggle only + // for PTY sessions (a 'remote'/SDK session has no agent PTY to view). + startingMode: z.enum(['local', 'remote', 'pty']).nullish(), capabilities: SessionCapabilitiesSchema.optional(), worktree: WorktreeMetadataSchema.optional(), // Cached Pi model list — written by CLI, read by web (inactive session fallback). @@ -108,6 +111,10 @@ export type AgentStateCompletedRequest = z.infer +// Read-only agent-terminal viewer controls (no terminalId — the agent PTY is the +// session's single TUI, keyed by sessionId). `resize` repaints the agent TUI at a +// given size; `refresh` forces a repaint of the current screen so a freshly +// (re)subscribed viewer sees the live state instead of a stale/black buffer. +export const AgentTerminalResizePayloadSchema = z.object({ + sessionId: z.string().min(1), + cols: z.number().int().positive(), + rows: z.number().int().positive() +}) + +export type AgentTerminalResizePayload = z.infer + +export const AgentTerminalRefreshPayloadSchema = z.object({ + sessionId: z.string().min(1) +}) + +export type AgentTerminalRefreshPayload = z.infer + export const TerminalClosePayloadSchema = z.object({ sessionId: z.string().min(1), terminalId: z.string().min(1) @@ -196,6 +214,11 @@ export interface ServerToClientEvents { 'terminal:write': (data: TerminalWritePayload) => void 'terminal:resize': (data: TerminalResizePayload) => void 'terminal:close': (data: TerminalClosePayload) => void + 'agent-terminal:resize': (data: AgentTerminalResizePayload) => void + 'agent-terminal:refresh': (data: AgentTerminalRefreshPayload) => void + // Sent to the CLI when the last agent-terminal viewer leaves, so it stops + // streaming PTY output to the hub until someone subscribes again. + 'agent-terminal:idle': (data: AgentTerminalRefreshPayload) => void error: (data: { message: string; code?: SocketErrorReason; scope?: 'session' | 'machine'; id?: string }) => void } @@ -228,6 +251,10 @@ export interface ClientToServerEvents { 'terminal:output': (data: TerminalOutputPayload) => void 'terminal:exit': (data: TerminalExitPayload) => void 'terminal:error': (data: TerminalErrorPayload) => void + 'agent-terminal:output': (data: TerminalOutputPayload) => void + // Drop the hub's scrollback buffer for this session (a new agent PTY just + // spawned, e.g. after archive→restart, so old output must not replay). + 'agent-terminal:reset': (data: { sessionId: string }) => void ping: (callback: () => void) => void 'usage-report': (data: unknown) => void } diff --git a/web/src/components/AgentTerminal/AgentTerminalView.tsx b/web/src/components/AgentTerminal/AgentTerminalView.tsx new file mode 100644 index 0000000000..34648a1cb2 --- /dev/null +++ b/web/src/components/AgentTerminal/AgentTerminalView.tsx @@ -0,0 +1,156 @@ +import { useEffect, useRef } from 'react' +import { Terminal } from '@xterm/xterm' +import { FitAddon } from '@xterm/addon-fit' +import '@xterm/xterm/css/xterm.css' +import { useAgentTerminalSocket } from '@/hooks/useAgentTerminalSocket' +import { useAppContext } from '@/lib/app-context' + +function resolveThemeColors(): { background: string; foreground: string; selectionBackground: string } { + const styles = getComputedStyle(document.documentElement) + const background = styles.getPropertyValue('--app-bg').trim() || '#000000' + const foreground = styles.getPropertyValue('--app-fg').trim() || '#ffffff' + const selectionBackground = styles.getPropertyValue('--app-subtle-bg').trim() || 'rgba(255, 255, 255, 0.2)' + return { background, foreground, selectionBackground } +} + +type AgentTerminalViewProps = { + sessionId: string + visible: boolean + className?: string +} + +// Output-only view of the agent PTY. Input is handled by the shared chat +// composer (HappyComposer) so there is a single composer with correct IME +// handling — no separate terminal input bar. +export function AgentTerminalView(props: AgentTerminalViewProps) { + const { sessionId, visible, className } = props + const { token, baseUrl } = useAppContext() + const containerRef = useRef(null) + const terminalRef = useRef(null) + const fitAddonRef = useRef(null) + + const { + state, + connect, + disconnect, + resubscribe, + unsubscribe, + onOutput, + resize, + } = useAgentTerminalSocket({ + baseUrl, + token, + sessionId, + }) + + const onOutputRef = useRef(onOutput) + useEffect(() => { + onOutputRef.current = onOutput + }, [onOutput]) + + const resizeRef = useRef(resize) + useEffect(() => { + resizeRef.current = resize + }, [resize]) + + useEffect(() => { + const container = containerRef.current + if (!container) return + + const abortController = new AbortController() + const { background, foreground, selectionBackground } = resolveThemeColors() + + const terminal = new Terminal({ + cursorBlink: true, + fontSize: 13, + theme: { + background, + foreground, + cursor: foreground, + selectionBackground, + }, + convertEol: true, + customGlyphs: true, + cols: 80, + rows: 12, + }) + + const fitAddon = new FitAddon() + fitAddonRef.current = fitAddon + terminal.loadAddon(fitAddon) + terminal.open(container) + + const observer = new ResizeObserver(() => { + requestAnimationFrame(() => { + fitAddon.fit() + // Push the fitted size to the agent PTY so the TUI re-renders at + // the viewer's dimensions (and repaints — no black screen). + resizeRef.current(terminal.cols, terminal.rows) + }) + }) + observer.observe(container) + + onOutputRef.current((data) => { + terminal.write(data) + }) + + abortController.signal.addEventListener('abort', () => { + observer.disconnect() + fitAddon.dispose() + terminal.dispose() + }) + + requestAnimationFrame(() => { + fitAddon.fit() + }) + terminalRef.current = terminal + + return () => abortController.abort() + }, []) + + useEffect(() => { + connect() + return () => disconnect() + }, [connect, disconnect]) + + useEffect(() => { + if (!visible) return + resubscribe() + requestAnimationFrame(() => { + fitAddonRef.current?.fit() + const terminal = terminalRef.current + if (terminal) { + // On (re)entry: sync size and trigger a repaint so the current + // screen shows instead of a stale/black buffer replay. + resizeRef.current(terminal.cols, terminal.rows) + } + }) + // Leaving the terminal view (hidden or unmounted) → unsubscribe so the + // CLI can stop streaming the PTY when no viewers remain. + return () => unsubscribe() + }, [visible, resubscribe, unsubscribe]) + + const statusColor = state.status === 'connected' + ? 'bg-emerald-500' + : state.status === 'connecting' + ? 'bg-amber-400 animate-pulse' + : state.status === 'error' + ? 'bg-red-500' + : 'bg-[var(--app-hint)]' + + return ( +
+
+ + + {state.status === 'connected' ? 'Agent terminal connected' : + state.status === 'connecting' ? 'Connecting...' : + state.status === 'error' ? `Error: ${state.error}` : + 'Disconnected'} + +
+ +
+
+ ) +} diff --git a/web/src/hooks/useAgentTerminalSocket.test.ts b/web/src/hooks/useAgentTerminalSocket.test.ts new file mode 100644 index 0000000000..3730947196 --- /dev/null +++ b/web/src/hooks/useAgentTerminalSocket.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { renderHook, act } from '@testing-library/react' + +// A minimal fake socket.io-client that records emits and lets the test drive +// lifecycle events ('connect', etc.). The hook calls `new Manager(url, opts)` +// then `manager.socket('/terminal', { auth })`. +class FakeSocket { + connected = false + auth: unknown + readonly emitted: Array<{ event: string; data: unknown }> = [] + private readonly handlers = new Map void>() + + constructor(auth: unknown) { + this.auth = auth + } + + on(event: string, handler: (arg?: unknown) => void): this { + this.handlers.set(event, handler) + return this + } + + emit(event: string, data: unknown): boolean { + this.emitted.push({ event, data }) + return true + } + + connect(): void { + this.connected = true + this.handlers.get('connect')?.() + } + + disconnect(): void { + this.connected = false + } + + removeAllListeners(): void { + this.handlers.clear() + } + + subscribeCount(): number { + return this.emitted.filter((e) => e.event === 'agent-terminal:subscribe').length + } +} + +let lastSocket: FakeSocket | null = null + +vi.mock('socket.io-client', () => ({ + Manager: class { + socket(_nsp: string, opts: { auth: unknown }): FakeSocket { + lastSocket = new FakeSocket(opts.auth) + return lastSocket + } + } +})) + +import { useAgentTerminalSocket } from './useAgentTerminalSocket' + +const options = { baseUrl: 'http://localhost:3000', token: 'tok', sessionId: 'session-1' } + +describe('useAgentTerminalSocket subscribe gating', () => { + beforeEach(() => { + lastSocket = null + }) + + it('does NOT subscribe on connect when the viewer never asked (hidden mount)', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + // connect() created the socket and connected it synchronously. + expect(lastSocket).not.toBeNull() + expect(lastSocket!.subscribeCount()).toBe(0) + }) + + it('subscribes only after resubscribe(), and re-subscribes across reconnects', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + expect(lastSocket!.subscribeCount()).toBe(0) + + // Becoming visible → resubscribe() emits the subscribe. + act(() => result.current.resubscribe()) + expect(lastSocket!.subscribeCount()).toBe(1) + + // A reconnect (e.g. network blip) must re-emit subscribe because the + // viewer is still watching. + act(() => lastSocket!.connect()) + expect(lastSocket!.subscribeCount()).toBe(2) + }) + + it('does not subscribe when connect() is called again on an already-connected socket', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + // Second connect() hits the reuse branch (socket already exists + + // connected); it must not subscribe on its own — only resubscribe() does. + act(() => result.current.connect()) + expect(lastSocket!.subscribeCount()).toBe(0) + }) + + it('stops re-subscribing on reconnect after unsubscribe() (viewer left)', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + act(() => result.current.resubscribe()) + expect(lastSocket!.subscribeCount()).toBe(1) + + act(() => result.current.unsubscribe()) + // After leaving, a reconnect must NOT re-subscribe. + act(() => lastSocket!.connect()) + expect(lastSocket!.subscribeCount()).toBe(1) + expect( + lastSocket!.emitted.some((e) => e.event === 'agent-terminal:unsubscribe') + ).toBe(true) + }) +}) diff --git a/web/src/hooks/useAgentTerminalSocket.ts b/web/src/hooks/useAgentTerminalSocket.ts new file mode 100644 index 0000000000..da5cce2b2c --- /dev/null +++ b/web/src/hooks/useAgentTerminalSocket.ts @@ -0,0 +1,189 @@ +import { useCallback, useEffect, useRef, useState } from 'react' +import { Manager, type Socket } from 'socket.io-client' + +type AgentTerminalConnectionState = + | { status: 'idle' } + | { status: 'connecting' } + | { status: 'connected' } + | { status: 'error'; error: string } + +type UseAgentTerminalSocketOptions = { + baseUrl: string + token: string + sessionId: string +} + +type TerminalOutputPayload = { + terminalId: string + data: string +} + +export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): { + state: AgentTerminalConnectionState + connect: () => void + disconnect: () => void + resubscribe: () => void + unsubscribe: () => void + onOutput: (handler: (data: string) => void) => void + resize: (cols: number, rows: number) => void +} { + const [state, setState] = useState({ status: 'idle' }) + const socketRef = useRef(null) + const outputHandlerRef = useRef<(data: string) => void>(() => {}) + const sessionIdRef = useRef(options.sessionId) + const tokenRef = useRef(options.token) + const baseUrlRef = useRef(options.baseUrl) + // Whether the viewer currently wants the PTY streamed. Connecting alone must + // NOT subscribe — SessionChat mounts this hidden for every PTY session, and + // an unconditional subscribe-on-connect would stream the high-frequency raw + // TUI even when the terminal is never opened. Subscribe is gated on this so + // (re)connects only re-subscribe when the terminal is actually visible. + const subscribedRef = useRef(false) + + useEffect(() => { + sessionIdRef.current = options.sessionId + baseUrlRef.current = options.baseUrl + }, [options.sessionId, options.baseUrl]) + + useEffect(() => { + tokenRef.current = options.token + const socket = socketRef.current + if (!socket) { + return + } + if (!options.token) { + if (socket.connected) { + socket.disconnect() + } + return + } + socket.auth = { token: options.token } + if (socket.connected) { + socket.disconnect() + socket.connect() + } + }, [options.token]) + + const connect = useCallback(() => { + const token = tokenRef.current + const sessionId = sessionIdRef.current + + if (!token || !sessionId) { + setState({ status: 'error', error: 'Missing terminal credentials.' }) + return + } + + if (socketRef.current) { + const socket = socketRef.current + socket.auth = { token } + if (socket.connected) { + setState({ status: 'connected' }) + } else { + socket.connect() + } + return + } + + const manager = new Manager(baseUrlRef.current, { + path: '/socket.io/', + reconnection: true, + reconnectionAttempts: Infinity, + reconnectionDelay: 1000, + reconnectionDelayMax: 5000, + transports: ['polling', 'websocket'], + autoConnect: false + }) + const socket = manager.socket('/terminal', { + auth: { token } + }) + + socketRef.current = socket + setState({ status: 'connecting' }) + + socket.on('connect', () => { + // Re-subscribe across reconnects only if the viewer still wants it. + if (subscribedRef.current) { + socket.emit('agent-terminal:subscribe', { sessionId }) + } + setState({ status: 'connected' }) + }) + + socket.on('agent-terminal:output', (payload: TerminalOutputPayload) => { + if (payload.terminalId !== 'agent') { + return + } + outputHandlerRef.current(payload.data) + }) + + socket.on('connect_error', (error) => { + const message = error instanceof Error ? error.message : 'Connection error' + setState({ status: 'error', error: message }) + }) + + socket.on('disconnect', (reason) => { + if (reason === 'io client disconnect') { + setState({ status: 'idle' }) + return + } + setState({ status: 'error', error: `Disconnected: ${reason}` }) + }) + + socket.connect() + }, []) + + const disconnect = useCallback(() => { + const socket = socketRef.current + if (!socket) { + return + } + socket.removeAllListeners() + socket.disconnect() + socketRef.current = null + setState({ status: 'idle' }) + }, []) + + const resubscribe = useCallback(() => { + subscribedRef.current = true + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (socket?.connected && sessionId) { + socket.emit('agent-terminal:subscribe', { sessionId }) + } + }, []) + + // Tell the hub we're no longer viewing, so the CLI can stop streaming the PTY + // when no viewers remain. (Safe to miss — the runner keeps streaming until it + // hears this, never the other way around, so a missed unsubscribe never + // causes a black screen.) + const unsubscribe = useCallback(() => { + subscribedRef.current = false + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (socket?.connected && sessionId) { + socket.emit('agent-terminal:unsubscribe', { sessionId }) + } + }, []) + + const resize = useCallback((cols: number, rows: number) => { + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (!socket?.connected || !sessionId || cols < 1 || rows < 1) { + return + } + socket.emit('agent-terminal:resize', { sessionId, cols, rows }) + }, []) + + const onOutput = useCallback((handler: (data: string) => void) => { + outputHandlerRef.current = handler + }, []) + + return { + state, + connect, + disconnect, + resubscribe, + unsubscribe, + onOutput, + resize + } +} diff --git a/web/src/router.tsx b/web/src/router.tsx index 84222fef70..536f37d163 100644 --- a/web/src/router.tsx +++ b/web/src/router.tsx @@ -942,7 +942,7 @@ function SessionDetailRoute() { return } navigate({ to: '/sessions', replace: true }) - }, [navigate, sessionNotFound]) + }, [navigate, sessionNotFound, sessionId]) if (sessionNotFound) { return ( diff --git a/web/src/routes/sessions/terminal.test.tsx b/web/src/routes/sessions/terminal.test.tsx index 4497873b82..531b33e65f 100644 --- a/web/src/routes/sessions/terminal.test.tsx +++ b/web/src/routes/sessions/terminal.test.tsx @@ -51,8 +51,13 @@ vi.mock('@/hooks/queries/useSession', () => ({ }) })) +const capturedTerminalIds: string[] = [] + vi.mock('@/hooks/useTerminalSocket', () => ({ - useTerminalSocket: () => terminalSocketState + useTerminalSocket: (opts: { terminalId: string }) => { + capturedTerminalIds.push(opts.terminalId) + return terminalSocketState + } })) vi.mock('@/hooks/useLongPress', () => ({ @@ -112,6 +117,26 @@ describe('TerminalPage paste behavior', () => { }) }) +describe('TerminalPage terminal id', () => { + beforeEach(() => { + vi.clearAllMocks() + capturedTerminalIds.length = 0 + }) + + it('generates a unique terminal id per mount so concurrent viewers do not collide', () => { + // Two viewers (tabs/devices) of the SAME session must not share one + // terminal id: the hub registry would treat the second viewer's reused + // id as a stale reconnect and evict the first viewer's PTY ownership. + renderWithProviders() + renderWithProviders() + + const distinct = new Set(capturedTerminalIds) + expect(distinct.size).toBe(2) + // Each id still carries the session for debuggability/scoping. + expect([...distinct].every((id) => id.startsWith('term-session-1-'))).toBe(true) + }) +}) + describe('TerminalPage exit behavior', () => { beforeEach(() => { vi.clearAllMocks() diff --git a/web/src/routes/sessions/terminal.tsx b/web/src/routes/sessions/terminal.tsx index 7a602cb2a3..935d6d0231 100644 --- a/web/src/routes/sessions/terminal.tsx +++ b/web/src/routes/sessions/terminal.tsx @@ -1,4 +1,4 @@ -import { useCallback, useEffect, useMemo, useRef, useState } from 'react' +import { useCallback, useEffect, useRef, useState } from 'react' import { useParams } from '@tanstack/react-router' import type { Terminal } from '@xterm/xterm' import { useAppContext } from '@/lib/app-context' @@ -7,7 +7,6 @@ import { useSession } from '@/hooks/queries/useSession' import { useTerminalSocket } from '@/hooks/useTerminalSocket' import { useQuickKeyInput, QuickKeyRows } from '@/components/QuickKeys/QuickKeys' import { useTranslation } from '@/lib/use-translation' -import { randomId } from '@/lib/randomId' import { TerminalView } from '@/components/Terminal/TerminalView' import { LoadingState } from '@/components/LoadingState' import { Button } from '@/components/ui/button' @@ -63,7 +62,20 @@ export default function TerminalPage() { const goBack = useAppGoBack() const { session } = useSession(api, sessionId) const terminalSupported = isRemoteTerminalSupported(session?.metadata) - const terminalId = useMemo(() => randomId(), [sessionId]) + // A per-viewer-unique terminal id. Two browsers/tabs/devices viewing the + // same session must each drive their own shell: the hub registry evicts a + // reused id arriving from a different socket as a stale reconnect + // (terminalRegistry.ts), which would otherwise let a second viewer hijack + // the first viewer's PTY. The id is intentionally NOT derived from sessionId + // alone — scrollback survives navigation via the sessionId-keyed buffer + // (userTerminalBuffer.ts), not via a stable id. Held in a ref so it stays + // constant across re-renders and transient socket reconnects, and + // regenerates only when the route switches to a different session. + const terminalIdRef = useRef<{ sessionId: string; id: string } | null>(null) + if (terminalIdRef.current?.sessionId !== sessionId) { + terminalIdRef.current = { sessionId, id: `term-${sessionId}-${crypto.randomUUID()}` } + } + const terminalId = terminalIdRef.current.id const terminalRef = useRef(null) const inputDisposableRef = useRef<{ dispose: () => void } | null>(null) const connectOnceRef = useRef(false) diff --git a/web/vite.config.ts b/web/vite.config.ts index 2f2e29b64d..75fd4569eb 100644 --- a/web/vite.config.ts +++ b/web/vite.config.ts @@ -1,10 +1,27 @@ -import { defineConfig } from 'vite' +import { defineConfig, type Plugin } from 'vite' import react from '@vitejs/plugin-react' import { VitePWA } from 'vite-plugin-pwa' import { readFileSync } from 'node:fs' import { resolve } from 'node:path' import { shareTargetPathnameFromBase } from './src/lib/sharePath' +function spaFallback(): Plugin { + return { + name: 'spa-fallback', + configureServer(server) { + server.middlewares.use((req, _res, next) => { + const url = (req.url ?? '').split('?')[0] + if (url === '/' || url === '' || url.includes('.') || url.startsWith('/@') || url.startsWith('/api') || url.startsWith('/socket.io') || url.startsWith('/src/')) { + next() + return + } + req.url = '/index.html' + next() + }) + } + } +} + const base = process.env.VITE_BASE_URL || '/' const shareAction = shareTargetPathnameFromBase(base) const hubTarget = process.env.VITE_HUB_PROXY || 'http://127.0.0.1:3006' @@ -47,6 +64,7 @@ function getVendorChunkName(id: string): string | undefined { } export default defineConfig({ + appType: 'spa', define: { __APP_VERSION__: JSON.stringify(appVersion), }, @@ -66,6 +84,7 @@ export default defineConfig({ }, plugins: [ react(), + spaFallback(), VitePWA({ // User-controlled reload avoids mid-session surprise reloads (autoUpdate reloads all tabs). registerType: 'prompt', From 885e6da13cc700e5f7f43eec70c9e33a4fad9c08 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 11/26] feat(pty): add PTY-mode option to the web new-session flow --- web/src/api/client.ts | 5 +-- web/src/chat/normalizeAgent.ts | 1 + .../components/NewSession/AgentSelector.tsx | 4 +-- web/src/components/NewSession/index.tsx | 35 +++++++++++++++++-- .../NewSession/newSessionFormDraft.ts | 4 ++- web/src/components/SessionChat.tsx | 26 ++++++++++++-- web/src/components/SessionHeader.tsx | 27 +++++++++++++- web/src/hooks/mutations/useSpawnSession.ts | 4 ++- web/src/lib/locales/en.ts | 2 ++ web/src/lib/locales/zh-CN.ts | 2 ++ web/src/types/api.ts | 1 + 11 files changed, 100 insertions(+), 11 deletions(-) diff --git a/web/src/api/client.ts b/web/src/api/client.ts index d37e0828bc..6ad072898d 100644 --- a/web/src/api/client.ts +++ b/web/src/api/client.ts @@ -596,11 +596,12 @@ export class ApiClient { yolo?: boolean, sessionType?: 'simple' | 'worktree', worktreeName?: string, - effort?: string + effort?: string, + startingMode?: 'remote' | 'pty' ): Promise { return await this.request(`/api/machines/${encodeURIComponent(machineId)}/spawn`, { method: 'POST', - body: JSON.stringify({ directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, effort }) + body: JSON.stringify({ directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, effort, startingMode }) }) } diff --git a/web/src/chat/normalizeAgent.ts b/web/src/chat/normalizeAgent.ts index 3f2bf8814d..55dc07f8f1 100644 --- a/web/src/chat/normalizeAgent.ts +++ b/web/src/chat/normalizeAgent.ts @@ -518,6 +518,7 @@ export function normalizeAgentRecord( meta } } + return null } diff --git a/web/src/components/NewSession/AgentSelector.tsx b/web/src/components/NewSession/AgentSelector.tsx index 4146f30815..cf0c5f294b 100644 --- a/web/src/components/NewSession/AgentSelector.tsx +++ b/web/src/components/NewSession/AgentSelector.tsx @@ -1,4 +1,4 @@ -import { AGENT_FLAVORS } from '@hapi/protocol' +import { AGENT_FLAVORS, getFlavorLabel } from '@hapi/protocol' import type { AgentType } from './types' import { useTranslation } from '@/lib/use-translation' @@ -29,7 +29,7 @@ export function AgentSelector(props: { disabled={props.isDisabled} className="accent-[var(--app-link)]" /> - {agentType} + {getFlavorLabel(agentType)} ))}
diff --git a/web/src/components/NewSession/index.tsx b/web/src/components/NewSession/index.tsx index f19e311a81..9f9c444479 100644 --- a/web/src/components/NewSession/index.tsx +++ b/web/src/components/NewSession/index.tsx @@ -77,6 +77,9 @@ export function NewSession(props: { const [effort, setEffort] = useState('auto') const [modelReasoningEffort, setModelReasoningEffort] = useState('default') const [yoloMode, setYoloMode] = useState(loadPreferredYoloMode) + // Default to 'remote' (the stable SDK path); PTY is an explicit opt-in via the + // checkbox below. + const [startingMode, setStartingMode] = useState<'remote' | 'pty'>('remote') const [sessionType, setSessionType] = useState('simple') const [worktreeName, setWorktreeName] = useState('') const [directoryCreationConfirmed, setDirectoryCreationConfirmed] = useState(false) @@ -102,6 +105,12 @@ export function NewSession(props: { savePreferredAgent(agent) }, [agent]) + useEffect(() => { + // Reset to the stable 'remote' path when switching agents; PTY stays an + // explicit opt-in via the checkbox. + setStartingMode('remote') + }, [agent]) + useEffect(() => { savePreferredYoloMode(yoloMode) }, [yoloMode]) @@ -147,6 +156,7 @@ export function NewSession(props: { setYoloMode(draft.yoloMode) setSessionType(draft.sessionType) setWorktreeName(draft.worktreeName) + setStartingMode(draft.startingMode ?? 'remote') clearNewSessionFormDraft() }, [ props.initialDirectory, @@ -343,6 +353,7 @@ export function NewSession(props: { cwdExists: deferredDirectoryExists, }) }) + useEffect(() => { // Auto-pick the OpenCode default model when discovery finishes, so the // form has a sensible value if the user hits Enter without scrolling. @@ -456,7 +467,8 @@ export function NewSession(props: { modelReasoningEffort, yoloMode, sessionType, - worktreeName + worktreeName, + startingMode: agent === 'claude' ? startingMode : undefined }) props.onChooseFolder({ machineId, directory: trimmedDirectory }) }, [ @@ -572,9 +584,11 @@ export function NewSession(props: { modelReasoningEffort: resolvedModelReasoningEffort, yolo: yoloMode, sessionType, - worktreeName: sessionType === 'worktree' ? (worktreeName.trim() || undefined) : undefined + worktreeName: sessionType === 'worktree' ? (worktreeName.trim() || undefined) : undefined, + startingMode: agent === 'claude' ? startingMode : undefined }) + if (result.type === 'success') { haptic.notification('success') clearNewSessionFormDraft() @@ -716,6 +730,23 @@ export function NewSession(props: { isDisabled={isFormDisabled} onEffortChange={setEffort} /> + {agent === 'claude' && ( +
+ + + {t('newSession.pty.desc')} + +
+ )} >(new Map()) const blocksByIdRef = useRef>(new Map()) const visibleGroupsRef = useRef([]) const [forceScrollToken, setForceScrollToken] = useState(0) const [outlineOpen, setOutlineOpen] = useState(props.initialOutlineOpen ?? false) + const [terminalVisible, setTerminalVisible] = useState(false) useEffect(() => { if (!props.initialOutlineOpen) { return @@ -418,7 +427,6 @@ function SessionChatInner(props: SessionChatProps) { setOutlineOpen(true) props.onInitialOutlineConsumed?.() }, [props.initialOutlineOpen, props.onInitialOutlineConsumed]) - const [cursorSelectedBase, setCursorSelectedBase] = useState('auto') const lastSyncedCursorModelRef = useRef(undefined) const scratchlist = useScratchlist(props.session.id) @@ -1109,6 +1117,8 @@ function SessionChatInner(props: SessionChatProps) { filesActive={false} onToggleOutline={handleToggleOutline} outlineActive={outlineOpen} + onToggleTerminal={canViewAgentTerminal ? () => setTerminalVisible(v => !v) : undefined} + terminalActive={terminalVisible} api={props.api} onSessionDeleted={props.onBack} onSessionReopened={(newSessionId) => { @@ -1122,6 +1132,7 @@ function SessionChatInner(props: SessionChatProps) { +
{props.session.teamState && ( )} @@ -1139,7 +1150,15 @@ function SessionChatInner(props: SessionChatProps) { - +
+ {canViewAgentTerminal && ( + + )} +
+
{codexCollaborationModeSupported && codexModelsState.error ? (
@@ -1339,8 +1359,10 @@ function SessionChatInner(props: SessionChatProps) { sendError={props.sendError ?? null} onClearSendError={props.onClearSendError} /> +
+
{/* Voice session component - renders nothing but initializes voice backend */} {voice && ( diff --git a/web/src/components/SessionHeader.tsx b/web/src/components/SessionHeader.tsx index c80ac68a4e..b16302e2d0 100644 --- a/web/src/components/SessionHeader.tsx +++ b/web/src/components/SessionHeader.tsx @@ -10,6 +10,7 @@ import { ConfirmDialog } from '@/components/ui/ConfirmDialog' import { formatReopenError } from '@/lib/reopenError' import { getSessionModelLabel } from '@/lib/sessionModelLabel' import { useTranslation } from '@/lib/use-translation' +import { getFlavorLabel } from '@hapi/protocol' import { AgentFlavorIcon } from '@/components/AgentFlavorIcon' function getSessionTitle(session: Session): string { @@ -78,6 +79,15 @@ function headerToggleClass(active: boolean): string { }` } +function TerminalIcon(props: { className?: string }) { + return ( + + + + + ) +} + function MoreVerticalIcon(props: { className?: string }) { return ( void outlineActive?: boolean + onToggleTerminal?: () => void + terminalActive?: boolean api: ApiClient | null onSessionDeleted?: () => void onSessionReopened?: (newSessionId: string) => void @@ -191,7 +203,7 @@ export function SessionHeader(props: {
- {session.metadata?.flavor?.trim() || 'unknown'} + {getFlavorLabel(session.metadata?.flavor)} {modelLabel ? ( @@ -230,6 +242,19 @@ export function SessionHeader(props: { ) : null} + {props.onToggleTerminal ? ( + + ) : null} +
+ +
+ { + dispatch(sequence) + terminalRef.current?.focus() + }} + onToggleModifier={(modifier) => { + toggleModifier(modifier) + terminalRef.current?.focus() + }} + /> +
) } diff --git a/web/src/hooks/useAgentTerminalSocket.ts b/web/src/hooks/useAgentTerminalSocket.ts index da5cce2b2c..0a560fb4c3 100644 --- a/web/src/hooks/useAgentTerminalSocket.ts +++ b/web/src/hooks/useAgentTerminalSocket.ts @@ -26,6 +26,7 @@ export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): unsubscribe: () => void onOutput: (handler: (data: string) => void) => void resize: (cols: number, rows: number) => void + sendInput: (data: string) => void } { const [state, setState] = useState({ status: 'idle' }) const socketRef = useRef(null) @@ -177,6 +178,15 @@ export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): outputHandlerRef.current = handler }, []) + const sendInput = useCallback((data: string) => { + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (!socket?.connected || !sessionId || !data) { + return + } + socket.emit('agent-terminal:input', { sessionId, data }) + }, []) + return { state, connect, @@ -184,6 +194,7 @@ export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): resubscribe, unsubscribe, onOutput, - resize + resize, + sendInput } } From d0d6cc6efacc599b0d44b501578f219a816b31b5 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 19 Jun 2026 17:15:02 +0900 Subject: [PATCH 14/26] feat(pty): forward user messages to local-mode Claude stdin - Pipe stdin for local Claude processes so chat messages go directly to the running session instead of triggering a mode switch - Add stdin message dedup filter in local launcher scanner callback (guards against user messages without a `message` payload) - Restore lost messages to queue on claudeRemote launch failure - Export extractRawUserTextContent for reuse in dedup logic --- cli/src/api/apiSession.ts | 2 +- cli/src/claude/claudeLocal.ts | 9 ++++- cli/src/claude/claudeLocalLauncher.test.ts | 22 ++++++++++- cli/src/claude/claudeLocalLauncher.ts | 46 +++++++++++++++++----- cli/src/claude/claudeRemoteLauncher.ts | 23 +++++++++++ cli/src/claude/runClaude.ts | 15 +++++++ cli/src/claude/session.ts | 5 +++ cli/src/utils/spawnWithAbort.ts | 10 +++++ 8 files changed, 118 insertions(+), 14 deletions(-) diff --git a/cli/src/api/apiSession.ts b/cli/src/api/apiSession.ts index 82a2ba21ba..2495446fc8 100644 --- a/cli/src/api/apiSession.ts +++ b/cli/src/api/apiSession.ts @@ -55,7 +55,7 @@ const SYSTEM_INJECTION_PREFIXES = [ // hub's scrollback ring). The tail always holds the latest full-screen redraw. const AGENT_TERMINAL_LOCAL_BUFFER_BYTES = 256 * 1024 -function extractRawUserTextContent(content: unknown): string | null { +export function extractRawUserTextContent(content: unknown): string | null { if (typeof content === 'string') { return content } diff --git a/cli/src/claude/claudeLocal.ts b/cli/src/claude/claudeLocal.ts index 04edc2ee0f..02b98f8251 100644 --- a/cli/src/claude/claudeLocal.ts +++ b/cli/src/claude/claudeLocal.ts @@ -19,6 +19,8 @@ export async function claudeLocal(opts: { claudeArgs?: string[] allowedTools?: string[] hookSettingsPath: string + /** Called when the child process's stdin is ready for writing. */ + onStdinReady?: (write: (data: string) => void) => void }) { // Ensure project directory exists @@ -95,7 +97,8 @@ export async function claudeLocal(opts: { const claudeCommand = getDefaultClaudeCodePath(); logger.debug(`[ClaudeLocal] Using claude executable: ${claudeCommand}`); - // Spawn the process + // Spawn the process with pipe stdin so chat messages can be forwarded + // to the running Claude process instead of triggering a mode switch. try { await spawnWithTerminalGuard({ command: claudeCommand, @@ -108,7 +111,9 @@ export async function claudeLocal(opts: { installHint: 'Claude CLI', includeCause: true, logExit: true, - shell: false // Use absolute path, no shell needed + shell: false, // Use absolute path, no shell needed + stdio: ['pipe', 'inherit', 'inherit'], + onSpawned: opts.onStdinReady }); } finally { cleanupMcpConfig?.(); diff --git a/cli/src/claude/claudeLocalLauncher.test.ts b/cli/src/claude/claudeLocalLauncher.test.ts index a4e855aabf..ba1d58ba6e 100644 --- a/cli/src/claude/claudeLocalLauncher.test.ts +++ b/cli/src/claude/claudeLocalLauncher.test.ts @@ -54,7 +54,8 @@ function createSessionStub() { addSessionFoundCallback: () => {}, removeSessionFoundCallback: () => {}, consumeOneTimeFlags: () => {}, - recordLocalLaunchFailure: () => {} + recordLocalLaunchFailure: () => {}, + stdinMessageTexts: new Set() }, sentMessages } @@ -133,4 +134,23 @@ describe('claudeLocalLauncher message filtering', () => { expect(sentMessages).toHaveLength(0) }) + + it('swallows exactly one stdin echo per forward, so a repeated identical message still surfaces', async () => { + const { session, sentMessages } = createSessionStub() + await claudeLocalLauncher(session as never) + + // One "continue" was forwarded to claude via stdin (marked for dedup). + session.stdinMessageTexts.add('continue') + // Its JSONL echo is swallowed (already shown in chat via the web path)... + harness.scannerOnMessage!({ type: 'user', uuid: '1', message: { content: 'continue' } }) + expect(sentMessages).toHaveLength(0) + // ...and the dedup entry is consumed on match, bounding the set. + expect(session.stdinMessageTexts.size).toBe(0) + + // The user sends "continue" again; this echo has no pending forward to + // swallow it, so it must surface (regression: a value-keyed, never-cleared + // set would have silently dropped it). + harness.scannerOnMessage!({ type: 'user', uuid: '2', message: { content: 'continue' } }) + expect(sentMessages).toHaveLength(1) + }) }) diff --git a/cli/src/claude/claudeLocalLauncher.ts b/cli/src/claude/claudeLocalLauncher.ts index 291569cdad..04adbaa97d 100644 --- a/cli/src/claude/claudeLocalLauncher.ts +++ b/cli/src/claude/claudeLocalLauncher.ts @@ -3,6 +3,7 @@ import { Session } from "./session"; import { createSessionScanner } from "./utils/sessionScanner"; import { isClaudeChatVisibleMessage } from "./utils/chatVisibility"; import { BaseLocalLauncher } from "@/modules/common/launcher/BaseLocalLauncher"; +import { extractRawUserTextContent } from "@/api/apiSession"; export async function claudeLocalLauncher(session: Session): Promise<'switch' | 'exit'> { @@ -25,6 +26,19 @@ export async function claudeLocalLauncher(session: Session): Promise<'switch' | if (!isClaudeChatVisibleMessage(message)) { return } + // Skip the JSONL echo of a user message we already forwarded to the + // local process via stdin (it is already in the hub as a consumed + // message from the web chat). Without this the same user text would + // appear twice in the chat UI — once from the web path and once from + // the JSONL transcript. Swallow exactly ONE echo per forward by + // deleting on match: this both bounds the set and lets a later, + // identical message ("yes", "continue", ...) surface normally. + if (message.type === 'user') { + const text = extractRawUserTextContent(message.message?.content) + if (text && session.stdinMessageTexts.delete(text)) { + return + } + } session.client.sendClaudeSessionMessage(message) } }); @@ -43,16 +57,28 @@ export async function claudeLocalLauncher(session: Session): Promise<'switch' | startedBy: session.startedBy, startingMode: session.startingMode, launch: async (abortSignal) => { - await claudeLocal({ - path: session.path, - sessionId: session.sessionId, - abort: abortSignal, - claudeEnvVars: session.claudeEnvVars, - claudeArgs: session.claudeArgs, - mcpServers: session.mcpServers, - allowedTools: session.allowedTools, - hookSettingsPath: session.hookSettingsPath, - }); + session.writeStdin = null; + try { + await claudeLocal({ + path: session.path, + sessionId: session.sessionId, + abort: abortSignal, + claudeEnvVars: session.claudeEnvVars, + claudeArgs: session.claudeArgs, + mcpServers: session.mcpServers, + allowedTools: session.allowedTools, + hookSettingsPath: session.hookSettingsPath, + onStdinReady: (write) => { + session.writeStdin = (data: string) => write(data); + } + }); + } finally { + // The child has exited: drop the stdin writer so a message that + // races the mode-flip isn't routed to a destroyed pipe (and then + // acked as consumed but silently lost). onUserMessage falls back + // to the queue once this is null. + session.writeStdin = null; + } }, onLaunchSuccess: () => { session.consumeOneTimeFlags(); diff --git a/cli/src/claude/claudeRemoteLauncher.ts b/cli/src/claude/claudeRemoteLauncher.ts index f3ca0eb702..cbb921d957 100644 --- a/cli/src/claude/claudeRemoteLauncher.ts +++ b/cli/src/claude/claudeRemoteLauncher.ts @@ -294,6 +294,9 @@ class ClaudeRemoteLauncher extends RemoteLauncherBase { this.abortFuture = new Future(); let modeHash: string | null = null; let mode: EnhancedMode | null = null; + // Track the last message consumed from the queue so we can + // restore it if claudeRemote throws before processing. + let consumedMessage: { message: string; mode: EnhancedMode } | null = null; try { await claudeRemote({ sessionId: session.sessionId, @@ -331,6 +334,7 @@ class ClaudeRemoteLauncher extends RemoteLauncherBase { } modeHash = msg.hash; mode = msg.mode; + consumedMessage = msg; permissionHandler.handleModeChange(mode.permissionMode); return { message: msg.message, @@ -356,6 +360,8 @@ class ClaudeRemoteLauncher extends RemoteLauncherBase { session.clearSessionId(); }, onReady: () => { + // Message was successfully processed, stop tracking. + consumedMessage = null; logger.debug( `[claudeRemoteLauncher][async-debug] onReady callback ` + `(hasPending=${Boolean(pending)}, queueSize=${session.queue.size()})` @@ -377,6 +383,23 @@ class ClaudeRemoteLauncher extends RemoteLauncherBase { } } catch (e) { logger.debug('[remote]: launch error', e); + // `consumedMessage` is only assigned inside the nextMessage + // callback, which TS can't trace from this catch (it narrows + // the var to null), so restore through a typed local. + const restore = consumedMessage as { message: string; mode: EnhancedMode } | null; + if (restore) { + logger.debug('[remote]: restoring lost message to queue'); + // Restore via the public queue API so the waiter is + // notified and the mode hash is recomputed — don't poke + // the private backing array. NOTE: collectBatch already + // dropped the consumed messages' localIds (only the + // combined string survives), so the restored item is + // localId-less; a later cancel-by-localId can't target it. + // Full id restoration would require collectBatch to surface + // the consumed ids. + session.queue.unshift(restore.message, restore.mode); + consumedMessage = null; + } if (!this.exitReason) { const detail = e instanceof Error ? e.message : String(e); session.client.sendSessionEvent({ type: 'message', message: `Process exited unexpectedly: ${detail}` }); diff --git a/cli/src/claude/runClaude.ts b/cli/src/claude/runClaude.ts index e971bf511f..4be929a55b 100644 --- a/cli/src/claude/runClaude.ts +++ b/cli/src/claude/runClaude.ts @@ -404,6 +404,21 @@ export async function runClaude(options: StartOptions = {}): Promise { return; } + // If in local mode with a live stdin pipe, forward the message + // directly to the running Claude process instead of pushing to + // the queue (which would trigger doSwitch and kill the process). + const sessionInstance = currentSessionRef.current; + if (sessionInstance?.writeStdin && sessionInstance.mode === 'local') { + logger.debug('[start] forwarding message to local process stdin'); + sessionInstance.stdinMessageTexts.add(formattedText); + sessionInstance.writeStdin(formattedText + '\n'); + if (localId) { + session.emitMessagesConsumed([localId]); + } + logger.debugLargeJson('User message forwarded to local stdin:', message) + return; + } + // Push with resolved permission mode, model, system prompts, and tools const enhancedMode: EnhancedMode = { permissionMode: messagePermissionMode ?? 'default', diff --git a/cli/src/claude/session.ts b/cli/src/claude/session.ts index 87a080a0e3..4a2c8d747b 100644 --- a/cli/src/claude/session.ts +++ b/cli/src/claude/session.ts @@ -21,6 +21,11 @@ export class Session extends AgentSessionBase { readonly startedBy: 'runner' | 'terminal'; readonly startingMode: 'local' | 'remote'; localLaunchFailure: LocalLaunchFailure | null = null; + /** Function to write data to the local Claude process's stdin. */ + writeStdin: ((data: string) => void) | null = null; + /** Texts of messages that were forwarded to local Claude via stdin. + * Used by the sessionScanner to skip duplicate user messages. */ + readonly stdinMessageTexts: Set = new Set(); constructor(opts: { api: ApiClient; diff --git a/cli/src/utils/spawnWithAbort.ts b/cli/src/utils/spawnWithAbort.ts index 03d1d68522..294a307cef 100644 --- a/cli/src/utils/spawnWithAbort.ts +++ b/cli/src/utils/spawnWithAbort.ts @@ -31,6 +31,8 @@ export type SpawnWithAbortOptions = { shell?: SpawnOptions['shell']; stdio?: StdioOptions; windowsHide?: SpawnOptions['windowsHide']; + /** Called after the child process is spawned with a function to write to stdin. */ + onSpawned?: (writeStdin: (data: string) => void) => void; }; export async function spawnWithAbort(options: SpawnWithAbortOptions): Promise { @@ -57,6 +59,14 @@ export async function spawnWithAbort(options: SpawnWithAbortOptions): Promise { + if (child.stdin && !child.stdin.destroyed) { + child.stdin.write(data); + } + }); + } + let abortKillTimeout: NodeJS.Timeout | null = null; const abortHandler = () => { From 5096b89118b3e11e17ab97ac470e2ee5ef829c2e Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Tue, 23 Jun 2026 18:51:35 +0900 Subject: [PATCH 15/26] feat(pty): clear PTY input, reset queue, and signal restore on abort On abort, send Esc to interrupt the claude TUI, then (after a 150 ms delay so the TUI finishes restoring its prompt) Ctrl-U to clear the input line and reset the message queue. Emit an abort-restore session event so the web composer can recover the aborted prompt text. --- cli/src/api/apiSession.ts | 4 + .../__tests__/claudePtyLauncher.test.ts | 135 +++++++++++++++++- cli/src/claude/claudePtyLauncher.ts | 27 ++++ 3 files changed, 164 insertions(+), 2 deletions(-) diff --git a/cli/src/api/apiSession.ts b/cli/src/api/apiSession.ts index 2495446fc8..4c5eb392fe 100644 --- a/cli/src/api/apiSession.ts +++ b/cli/src/api/apiSession.ts @@ -604,6 +604,10 @@ export class ApiSessionClient extends EventEmitter { mode: SessionPermissionMode } | { type: 'ready' + } | { + // Emitted on abort so the web composer can restore the aborted prompt. + // The web side reads the last user message text from normalizedMessages. + type: 'abort-restore' }, id?: string): void { const content = { role: 'agent', diff --git a/cli/src/claude/__tests__/claudePtyLauncher.test.ts b/cli/src/claude/__tests__/claudePtyLauncher.test.ts index a72bc8c2c9..590e4c4537 100644 --- a/cli/src/claude/__tests__/claudePtyLauncher.test.ts +++ b/cli/src/claude/__tests__/claudePtyLauncher.test.ts @@ -142,6 +142,7 @@ describe('lastUserPromptText', () => { function createSessionStub() { const sentMessages: Array> = [] + const sentSessionEvents: Array> = [] return { session: { sessionId: 'pty-session', @@ -159,17 +160,20 @@ function createSessionStub() { addSessionFoundCallback: (cb: (sessionId: string) => void) => { harness.foundCallbacks.push(cb) }, removeSessionFoundCallback: () => {}, queue: { - waitForMessagesAndGetAsString: vi.fn().mockResolvedValue(null) + waitForMessagesAndGetAsString: vi.fn().mockResolvedValue(null), + reset: vi.fn(), }, client: { sendClaudeSessionMessage: (msg: Record) => { sentMessages.push(msg) }, - sendSessionEvent: () => {}, + sendSessionEvent: vi.fn((event: Record) => { sentSessionEvents.push(event) }), emitAgentTerminalOutput: () => {}, setAgentTerminalControls: () => {}, + resetAgentTerminal: () => {}, rpcHandlerManager: { registerHandler: () => {} }, }, }, sentMessages, + sentSessionEvents, } } @@ -291,6 +295,89 @@ describe('claudePtyLauncher turn-interrupt', () => { await launcherPromise }) + it('sends clear-line key after Esc and resets the queue on abort', async () => { + harness.exitReason = null + + const { session } = createSessionStub() + const msgPromise = deferred() + vi.mocked(session.queue.waitForMessagesAndGetAsString).mockImplementation(() => msgPromise.promise) + + const launcherPromise = claudePtyLauncher(session as never) + + await tick(50) + + expect(mockAbortHandlers).toBeTruthy() + + // Trigger turn interrupt + await mockAbortHandlers.onAbort() + + // Should send clear-line key after Esc + const calls = lastSendKeysSpy.mock.calls.map((c: unknown[]) => c[0]) + expect(calls[0]).toBe('\x1b') + expect(calls[1]).toBe('\x15') + + // Should reset the queue so pending messages don't get appended + expect(session.queue.reset).toHaveBeenCalledTimes(1) + + // Should NOT abort the PTY spawn signal + expect(ptyOptsCaptured.signal.aborted).toBe(false) + + harness.exitReason = 'exit' + msgPromise.resolve(null) + await launcherPromise + }) + + it('emits abort-restore event on abort (regardless of whether a message was submitted)', async () => { + harness.exitReason = null + + const { session, sentSessionEvents } = createSessionStub() + const msgPromise = deferred() + vi.mocked(session.queue.waitForMessagesAndGetAsString).mockImplementation(() => msgPromise.promise) + + const launcherPromise = claudePtyLauncher(session as never) + + await tick(50) + + // Simulate a message being submitted via onMessageSubmitted callback + ptyOptsCaptured.onMessageSubmitted?.('hello world') + + // Trigger abort after message was submitted + await mockAbortHandlers.onAbort() + + // Should emit abort-restore (text is read by the web from normalizedMessages) + const restoreEvent = sentSessionEvents.find((e) => e.type === 'abort-restore') + expect(restoreEvent).toBeDefined() + // abort-restore carries no text field — the web reads the last user message + expect((restoreEvent as any)?.text).toBeUndefined() + + harness.exitReason = 'exit' + msgPromise.resolve(null) + await launcherPromise + }) + + it('emits abort-restore even when no message was submitted before abort', async () => { + harness.exitReason = null + + const { session, sentSessionEvents } = createSessionStub() + const msgPromise = deferred() + vi.mocked(session.queue.waitForMessagesAndGetAsString).mockImplementation(() => msgPromise.promise) + + const launcherPromise = claudePtyLauncher(session as never) + + await tick(50) + + // Abort without any message submitted first + await mockAbortHandlers.onAbort() + + // Should still emit abort-restore (the web will find no user message to restore) + const restoreEvent = sentSessionEvents.find((e) => e.type === 'abort-restore') + expect(restoreEvent).toBeDefined() + + harness.exitReason = 'exit' + msgPromise.resolve(null) + await launcherPromise + }) + it('kills the PTY session (aborts the controller) when aborted and PTY controls are NOT active', async () => { harness.exitReason = null @@ -321,4 +408,48 @@ describe('claudePtyLauncher turn-interrupt', () => { msgPromise.resolve(null) await launcherPromise }) + + it('delays Ctrl-U until after the Esc interrupt has settled (~150 ms)', async () => { + // Esc causes claude TUI to asynchronously restore the previous prompt. + // Ctrl-U must arrive AFTER that restore, so we verify that Ctrl-U is NOT + // sent synchronously with Esc but only after ~150 ms have elapsed. + vi.useFakeTimers() + harness.exitReason = null + + const { session } = createSessionStub() + const msgPromise = deferred() + vi.mocked(session.queue.waitForMessagesAndGetAsString).mockImplementation(() => msgPromise.promise) + + const launcherPromise = claudePtyLauncher(session as never) + + // Advance fake timers to let the claudePty mock's async setup resolve + // (the mock calls onReady synchronously and then awaits nextMessage which + // hangs until msgPromise resolves, but the setup tick needs to drain). + await vi.advanceTimersByTimeAsync(50) + + expect(mockAbortHandlers).toBeTruthy() + + // Kick off the abort — do NOT await yet; we want to inspect mid-flight. + const abortPromise = mockAbortHandlers.onAbort() + + // Drain synchronous microtasks: Esc should have been sent already + // (it is sent before the sleep), but Ctrl-U is gated behind sleep(150). + await Promise.resolve() + const callsAfterEsc = lastSendKeysSpy.mock.calls.map((c: unknown[]) => c[0]) + expect(callsAfterEsc).toContain('\x1b') + // Ctrl-U must NOT have arrived yet — the sleep is still pending. + expect(callsAfterEsc).not.toContain('\x15') + + // Advance past the sleep delay; Ctrl-U should now be sent. + await vi.advanceTimersByTimeAsync(200) + const callsAfterDelay = lastSendKeysSpy.mock.calls.map((c: unknown[]) => c[0]) + expect(callsAfterDelay).toContain('\x15') + + await abortPromise + + vi.useRealTimers() + harness.exitReason = 'exit' + msgPromise.resolve(null) + await launcherPromise + }) }) diff --git a/cli/src/claude/claudePtyLauncher.ts b/cli/src/claude/claudePtyLauncher.ts index 0431e63e08..5a8c5102bd 100644 --- a/cli/src/claude/claudePtyLauncher.ts +++ b/cli/src/claude/claudePtyLauncher.ts @@ -79,6 +79,15 @@ export function transcriptConfirmsDelivery(transcript: string, text: string): bo } class ClaudePtyLauncher extends RemoteLauncherBase { + // Ctrl-U (line-kill): clears the PTY input line from the cursor to the + // beginning of the line. Used after an Esc interrupt so the aborted + // prompt text does not bleed into the next submission. + // Verified in bash PTY (readline-compatible); claude TUI (ink/React + // input) is unverified on real hardware — confirm at Validation Gate 1. + // Isolated as a constant so it can be swapped without a grep if a + // future claude version requires a different sequence. + private static readonly PTY_CLEAR_LINE = '\x15' + private readonly session: Session private scanner: Awaited | null = null // Claude's own session UUID (discovered via the SessionStart hook). Used to @@ -303,6 +312,24 @@ class ClaudePtyLauncher extends RemoteLauncherBase { if (this.ptyControls) { logger.debug('[pty]: Sending interrupt key (Esc) to PTY') this.ptyControls.sendKeys('\x1b') + // Wait briefly before clearing the line: claude TUI (ink) restores + // the previous prompt to the input line asynchronously after an Esc + // interrupt. Sending Ctrl-U immediately could race against that + // restore and leave stale text behind. ~150 ms is enough for the + // TUI's event loop to complete the restore in practice. + await this.sleep(150) + // Clear any lingering input the claude TUI restored to the prompt + // after the Esc interrupt, so the next submitted message is not + // prefixed by the aborted text. + logger.debug('[pty]: Sending line-clear key (Ctrl-U) to PTY') + this.ptyControls.sendKeys(ClaudePtyLauncher.PTY_CLEAR_LINE) + // Drop pending queued messages — they were enqueued AFTER the + // message that is now being aborted and should not be auto-delivered + // to the fresh prompt. + this.session.queue.reset() + // Signal the web composer to restore the aborted prompt text. + // The web side reads the last user message from normalizedMessages. + this.session.client.sendSessionEvent({ type: 'abort-restore' }) } else { logger.debug('[pty]: No PTY controls active, falling back to aborting the controller') await this.abort() From a75d10a5b5217a523b1e8723e16d0a645f6488ba Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Tue, 23 Jun 2026 18:51:42 +0900 Subject: [PATCH 16/26] feat(pty): restore aborted prompt to the web composer on abort Consume the abort-restore session event: read the last user message text and surface it back in the composer via the existing sendError path, so an aborted prompt is not lost. Skip the signal in the chat timeline since it is a composer side-effect, not a visible event. --- web/src/chat/reducerTimeline.ts | 5 +++ web/src/components/SessionChat.tsx | 54 ++++++++++++++++++++++++++++++ web/src/lib/locales/en.ts | 1 + web/src/lib/locales/zh-CN.ts | 1 + web/src/router.tsx | 13 +++++++ 5 files changed, 74 insertions(+) diff --git a/web/src/chat/reducerTimeline.ts b/web/src/chat/reducerTimeline.ts index 6b95d193b4..73760d688f 100644 --- a/web/src/chat/reducerTimeline.ts +++ b/web/src/chat/reducerTimeline.ts @@ -449,6 +449,11 @@ export function reduceTimeline( if (msg.content.type === 'token-count') { continue } + // abort-restore is a side-effect signal for the web composer, + // not a visible chat event. Skip it in the timeline. + if (msg.content.type === 'abort-restore') { + continue + } if (msg.content.type === 'turn-duration') { const targetId = msg.content.targetMessageId const durationMs = msg.content.durationMs as number diff --git a/web/src/components/SessionChat.tsx b/web/src/components/SessionChat.tsx index 18b09b1f26..5a44be0fc5 100644 --- a/web/src/components/SessionChat.tsx +++ b/web/src/components/SessionChat.tsx @@ -252,6 +252,56 @@ function ShareSeedConsumer(props: { sessionId: string; sessionActive: boolean }) return null } +/** + * Watches for incoming `abort-restore` events (emitted by the PTY launcher + * when the user aborts a running turn) and surfaces the last user message + * text via the existing sendError path (onAbortRestore prop) — but only when + * no user message has been sent after the abort-restore event, so we never + * replay a prompt the user has already submitted. + */ +function AbortRestoreConsumer(props: { + messages: NormalizedMessage[] + onAbortRestore: (text: string) => void +}) { + const lastHandledIdRef = useRef(null) + + useEffect(() => { + // Walk backwards: find an abort-restore event with no user message after it. + // If a user message comes after the abort-restore, the restore was already + // acted on — treat it as consumed regardless of page reload. + let abortRestoreId: string | null = null + for (let i = props.messages.length - 1; i >= 0; i--) { + const msg = props.messages[i] + if (!msg) continue + if (msg.role === 'user') break // user message after abort-restore → stale + if (msg.role !== 'event') continue + if (msg.content.type === 'abort-restore') { + abortRestoreId = msg.id + break + } + } + if (!abortRestoreId) return + if (lastHandledIdRef.current === abortRestoreId) return + lastHandledIdRef.current = abortRestoreId + + // Find the last user message text before the abort-restore event and + // surface it via the sendError path so HappyComposer restores it in + // the same way it handles a failed send. + for (let i = props.messages.length - 1; i >= 0; i--) { + const msg = props.messages[i] + if (!msg) continue + if (msg.role !== 'user') continue + const text = msg.content.text + if (text.length > 0) { + props.onAbortRestore(text) + break + } + } + }, [props.messages, props.onAbortRestore]) + + return null +} + /** * Mounts the per-session scratchlist DRAWER (composer-controlled). * @@ -378,6 +428,9 @@ type SessionChatProps = { onClearSendError?: () => void initialOutlineOpen?: boolean onInitialOutlineConsumed?: () => void + // Called when an `abort-restore` event arrives and the composer is not empty, + // so the caller can surface the aborted text via the existing sendError path. + onAbortRestore?: (text: string) => void } /** @@ -1149,6 +1202,7 @@ function SessionChatInner(props: SessionChatProps) { + {})} />
{canViewAgentTerminal && ( diff --git a/web/src/lib/locales/en.ts b/web/src/lib/locales/en.ts index 36be73b25e..bf50802ea0 100644 --- a/web/src/lib/locales/en.ts +++ b/web/src/lib/locales/en.ts @@ -256,6 +256,7 @@ export default { 'chat.terminal': 'Terminal', 'chat.switchRemote': 'Switch to remote mode', 'chat.sendError.fallback': "Couldn't send your message. Edit and try again.", + 'chat.sendError.aborted': 'Turn was aborted. Edit and resend if needed.', 'chat.sendError.sessionInactive': 'This session is archived. Reopen it to send your message.', 'chat.sendError.sessionInactive.action': 'Reopen', diff --git a/web/src/lib/locales/zh-CN.ts b/web/src/lib/locales/zh-CN.ts index b276b78fb3..18243c2848 100644 --- a/web/src/lib/locales/zh-CN.ts +++ b/web/src/lib/locales/zh-CN.ts @@ -260,6 +260,7 @@ export default { 'chat.terminal': '终端', 'chat.switchRemote': '切换到远程模式', 'chat.sendError.fallback': '消息未能发送。请修改后重试。', + 'chat.sendError.aborted': '操作已中止。如需要,请编辑后重新发送。', 'chat.sendError.sessionInactive': '此会话已归档。请先重新打开再发送消息。', 'chat.sendError.sessionInactive.action': '重新打开', diff --git a/web/src/router.tsx b/web/src/router.tsx index 536f37d163..e2a7b206a8 100644 --- a/web/src/router.tsx +++ b/web/src/router.tsx @@ -924,6 +924,19 @@ function SessionPage() { onClearSendError={clearSendError} initialOutlineOpen={outline} onInitialOutlineConsumed={handleInitialOutlineConsumed} + onAbortRestore={(text) => { + sendErrorIdRef.current += 1 + setSendErrors((prev) => ({ + ...prev, + [sessionId]: { + id: sendErrorIdRef.current, + text, + message: t('chat.sendError.aborted'), + code: 'abort', + scheduledAt: null + } + })) + }} /> ) } From bff26e5d686a7ea466997d433155aadf0969e6c9 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Tue, 23 Jun 2026 19:26:07 +0900 Subject: [PATCH 17/26] fix(pty): restore only the in-flight prompt on abort abort-restore previously fired unconditionally and the web rebuilt the text by scanning backward for the last user message, so aborting during idle/startup/no-submission could replay an unrelated historical prompt into an empty composer and invite an accidental resend. Track the submitted prompt on the launcher (set on submit, cleared when the turn goes idle) and carry it on the abort-restore event itself. The event is emitted only when a prompt was actually in flight, and the web restores that exact text instead of guessing from history. --- cli/src/api/apiSession.ts | 3 +- .../__tests__/claudePtyLauncher.test.ts | 48 +++++++++++++++---- cli/src/claude/claudePtyLauncher.ts | 24 ++++++++-- web/src/chat/types.ts | 1 + web/src/components/SessionChat.tsx | 39 +++++++-------- 5 files changed, 79 insertions(+), 36 deletions(-) diff --git a/cli/src/api/apiSession.ts b/cli/src/api/apiSession.ts index 4c5eb392fe..4b9496f369 100644 --- a/cli/src/api/apiSession.ts +++ b/cli/src/api/apiSession.ts @@ -606,8 +606,9 @@ export class ApiSessionClient extends EventEmitter { type: 'ready' } | { // Emitted on abort so the web composer can restore the aborted prompt. - // The web side reads the last user message text from normalizedMessages. + // Carries the exact in-flight prompt text the web should restore. type: 'abort-restore' + text: string }, id?: string): void { const content = { role: 'agent', diff --git a/cli/src/claude/__tests__/claudePtyLauncher.test.ts b/cli/src/claude/__tests__/claudePtyLauncher.test.ts index 590e4c4537..11a60d9f83 100644 --- a/cli/src/claude/__tests__/claudePtyLauncher.test.ts +++ b/cli/src/claude/__tests__/claudePtyLauncher.test.ts @@ -157,6 +157,7 @@ function createSessionStub() { setConfigChangeHandler: (_handler: (() => void) | null) => {}, getModel: () => null, getEffort: () => undefined, + onThinkingChange: vi.fn(), addSessionFoundCallback: (cb: (sessionId: string) => void) => { harness.foundCallbacks.push(cb) }, removeSessionFoundCallback: () => {}, queue: { @@ -327,7 +328,7 @@ describe('claudePtyLauncher turn-interrupt', () => { await launcherPromise }) - it('emits abort-restore event on abort (regardless of whether a message was submitted)', async () => { + it('emits abort-restore carrying the submitted prompt text on abort', async () => { harness.exitReason = null const { session, sentSessionEvents } = createSessionStub() @@ -338,24 +339,24 @@ describe('claudePtyLauncher turn-interrupt', () => { await tick(50) - // Simulate a message being submitted via onMessageSubmitted callback + // Simulate the in-flight prompt being submitted via onMessageSubmitted. ptyOptsCaptured.onMessageSubmitted?.('hello world') - // Trigger abort after message was submitted + // Trigger abort while the submitted turn is still running. await mockAbortHandlers.onAbort() - // Should emit abort-restore (text is read by the web from normalizedMessages) + // abort-restore carries the exact submitted prompt so the web restores + // that text rather than scanning historical user turns. const restoreEvent = sentSessionEvents.find((e) => e.type === 'abort-restore') expect(restoreEvent).toBeDefined() - // abort-restore carries no text field — the web reads the last user message - expect((restoreEvent as any)?.text).toBeUndefined() + expect((restoreEvent as any)?.text).toBe('hello world') harness.exitReason = 'exit' msgPromise.resolve(null) await launcherPromise }) - it('emits abort-restore even when no message was submitted before abort', async () => { + it('does NOT emit abort-restore when no prompt was submitted before abort', async () => { harness.exitReason = null const { session, sentSessionEvents } = createSessionStub() @@ -366,12 +367,39 @@ describe('claudePtyLauncher turn-interrupt', () => { await tick(50) - // Abort without any message submitted first + // Abort during idle/startup, with no prompt ever submitted. await mockAbortHandlers.onAbort() - // Should still emit abort-restore (the web will find no user message to restore) + // Nothing was submitted → no prompt to restore → no event (so an old + // prompt is never replayed into an empty composer). const restoreEvent = sentSessionEvents.find((e) => e.type === 'abort-restore') - expect(restoreEvent).toBeDefined() + expect(restoreEvent).toBeUndefined() + + harness.exitReason = 'exit' + msgPromise.resolve(null) + await launcherPromise + }) + + it('does NOT emit abort-restore when the turn already went idle before abort', async () => { + harness.exitReason = null + + const { session, sentSessionEvents } = createSessionStub() + const msgPromise = deferred() + vi.mocked(session.queue.waitForMessagesAndGetAsString).mockImplementation(() => msgPromise.promise) + + const launcherPromise = claudePtyLauncher(session as never) + + await tick(50) + + // A prompt was submitted and its turn completed (thinking → idle). + ptyOptsCaptured.onMessageSubmitted?.('completed prompt') + ptyOptsCaptured.onThinkingChange?.(false) + + // A later abort must not resurrect the already-completed prompt. + await mockAbortHandlers.onAbort() + + const restoreEvent = sentSessionEvents.find((e) => e.type === 'abort-restore') + expect(restoreEvent).toBeUndefined() harness.exitReason = 'exit' msgPromise.resolve(null) diff --git a/cli/src/claude/claudePtyLauncher.ts b/cli/src/claude/claudePtyLauncher.ts index 5a8c5102bd..0bb3cc6aa3 100644 --- a/cli/src/claude/claudePtyLauncher.ts +++ b/cli/src/claude/claudePtyLauncher.ts @@ -96,6 +96,11 @@ class ClaudePtyLauncher extends RemoteLauncherBase { private claudeSessionId: string | null = null // Live PTY controls (raw keystroke injection) for in-place /model and /effort. private ptyControls: { sendKeys: (data: string) => void } | null = null + // The prompt currently being processed, captured on submit and cleared when + // the turn goes idle. Drives abort-restore: only a prompt that is actually + // in flight when the user aborts is restored to the web composer — aborting + // during idle/startup/no-submission restores nothing. + private promptToRestoreOnAbort: string | null = null // The model/effort currently applied to the running Claude TUI, so a config // change only drives the slash command for what actually changed. private appliedModel: SessionModel = null @@ -310,6 +315,11 @@ class ClaudePtyLauncher extends RemoteLauncherBase { private async handleAbortRequest(): Promise { logger.debug('[pty]: handleAbortRequest (interrupt)') if (this.ptyControls) { + // Capture synchronously up front: the Esc interrupt below can drive + // the TUI back to idle (clearing promptToRestoreOnAbort via + // onThinkingChange) before this handler finishes its 150 ms wait. + const promptToRestore = this.promptToRestoreOnAbort + this.promptToRestoreOnAbort = null logger.debug('[pty]: Sending interrupt key (Esc) to PTY') this.ptyControls.sendKeys('\x1b') // Wait briefly before clearing the line: claude TUI (ink) restores @@ -327,9 +337,12 @@ class ClaudePtyLauncher extends RemoteLauncherBase { // message that is now being aborted and should not be auto-delivered // to the fresh prompt. this.session.queue.reset() - // Signal the web composer to restore the aborted prompt text. - // The web side reads the last user message from normalizedMessages. - this.session.client.sendSessionEvent({ type: 'abort-restore' }) + // Signal the web composer to restore the exact prompt that was in + // flight. Skip the signal entirely when nothing was being processed + // so an old prompt is never replayed into an empty composer. + if (promptToRestore) { + this.session.client.sendSessionEvent({ type: 'abort-restore', text: promptToRestore }) + } } else { logger.debug('[pty]: No PTY controls active, falling back to aborting the controller') await this.abort() @@ -389,6 +402,8 @@ class ClaudePtyLauncher extends RemoteLauncherBase { return { message: msg.message } }, onMessageSubmitted: (message: string) => { + // Track the in-flight prompt for abort-restore on every submit. + this.promptToRestoreOnAbort = message if (firstSubmitVerified) return firstSubmitVerified = true void this.ensureFirstMessageDelivered(message, signal) @@ -406,6 +421,9 @@ class ClaudePtyLauncher extends RemoteLauncherBase { this.session.client.emitAgentTerminalOutput(data) }, onThinkingChange: (thinking: boolean) => { + // Turn finished → the prompt is no longer in flight, so a + // later abort during idle must not restore it. + if (!thinking) this.promptToRestoreOnAbort = null this.session.onThinkingChange(thinking) }, registerControls: (controls) => { diff --git a/web/src/chat/types.ts b/web/src/chat/types.ts index f42d0540a6..47c0314e95 100644 --- a/web/src/chat/types.ts +++ b/web/src/chat/types.ts @@ -27,6 +27,7 @@ export type AgentEvent = | { type: 'compact'; trigger: string; preTokens: number } | { type: 'thread-goal-updated'; goal: ThreadGoal; threadId?: string; turnId?: string } | { type: 'thread-goal-cleared'; threadId?: string } + | { type: 'abort-restore'; text: string } | ({ type: string } & Record) export type ToolResultPermission = { diff --git a/web/src/components/SessionChat.tsx b/web/src/components/SessionChat.tsx index 5a44be0fc5..573ac8ca1e 100644 --- a/web/src/components/SessionChat.tsx +++ b/web/src/components/SessionChat.tsx @@ -254,10 +254,10 @@ function ShareSeedConsumer(props: { sessionId: string; sessionActive: boolean }) /** * Watches for incoming `abort-restore` events (emitted by the PTY launcher - * when the user aborts a running turn) and surfaces the last user message - * text via the existing sendError path (onAbortRestore prop) — but only when - * no user message has been sent after the abort-restore event, so we never - * replay a prompt the user has already submitted. + * when the user aborts a running turn) and surfaces the aborted prompt text — + * carried on the event itself — via the existing sendError path + * (onAbortRestore prop). Acts only when no user message has been sent after the + * abort-restore event, so we never replay a prompt the user already resubmitted. */ function AbortRestoreConsumer(props: { messages: NormalizedMessage[] @@ -269,33 +269,28 @@ function AbortRestoreConsumer(props: { // Walk backwards: find an abort-restore event with no user message after it. // If a user message comes after the abort-restore, the restore was already // acted on — treat it as consumed regardless of page reload. - let abortRestoreId: string | null = null + let abortRestore: { id: string; text: string } | null = null for (let i = props.messages.length - 1; i >= 0; i--) { const msg = props.messages[i] if (!msg) continue if (msg.role === 'user') break // user message after abort-restore → stale if (msg.role !== 'event') continue if (msg.content.type === 'abort-restore') { - abortRestoreId = msg.id + // The exact in-flight prompt rides on the event; no need to guess + // it by scanning historical user turns. + const text = typeof msg.content.text === 'string' ? msg.content.text : '' + abortRestore = { id: msg.id, text } break } } - if (!abortRestoreId) return - if (lastHandledIdRef.current === abortRestoreId) return - lastHandledIdRef.current = abortRestoreId - - // Find the last user message text before the abort-restore event and - // surface it via the sendError path so HappyComposer restores it in - // the same way it handles a failed send. - for (let i = props.messages.length - 1; i >= 0; i--) { - const msg = props.messages[i] - if (!msg) continue - if (msg.role !== 'user') continue - const text = msg.content.text - if (text.length > 0) { - props.onAbortRestore(text) - break - } + if (!abortRestore) return + if (lastHandledIdRef.current === abortRestore.id) return + lastHandledIdRef.current = abortRestore.id + + // Surface it via the sendError path so HappyComposer restores it the + // same way it handles a failed send. + if (abortRestore.text.length > 0) { + props.onAbortRestore(abortRestore.text) } }, [props.messages, props.onAbortRestore]) From cbcbca82131040b6a16b7747e59c4964bd686a8c Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Tue, 23 Jun 2026 19:39:34 +0900 Subject: [PATCH 18/26] fix(pty): honor a name-level Bash session allow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A plain "Bash" entry in allowTools is stored in sessionAllowedTools as a name-level allow, but the Bash fast path only consulted the per-command literal/prefix sets — so a session-wide Bash approval was recorded and then ignored, re-prompting on every command. Check sessionAllowedTools for "Bash" before falling back to the per-command match. --- .../claude/utils/ptyPermissionHandler.test.ts | 16 ++++++++++++++++ cli/src/claude/utils/ptyPermissionHandler.ts | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/cli/src/claude/utils/ptyPermissionHandler.test.ts b/cli/src/claude/utils/ptyPermissionHandler.test.ts index 70d56a942a..47ea9a8d1f 100644 --- a/cli/src/claude/utils/ptyPermissionHandler.test.ts +++ b/cli/src/claude/utils/ptyPermissionHandler.test.ts @@ -197,6 +197,22 @@ describe('PtyPermissionHandler', () => { expect(Object.keys(state.requests).length).toBe(before); }); + it('honors a plain "Bash" name-level session-allow for any later command', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('b-1', 'Bash', { command: 'echo hi' }); + // web's "Allow all Bash for session" sends the bare tool name + await respond({ id: 'b-1', approved: true, allowTools: ['Bash'] }); + expect((await first).permissionDecision).toBe('allow'); + + // a DIFFERENT command is now covered by the name-level allow → no new request + const before = Object.keys(state.requests).length; + const second = await handler.requestDecision('b-2', 'Bash', { command: 'rm -rf /tmp/x' }); + expect(second.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests).length).toBe(before); + }); + it('still prompts for a different Bash command after a literal session-allow', async () => { const { client, state, respond } = createFakeClient(); const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); diff --git a/cli/src/claude/utils/ptyPermissionHandler.ts b/cli/src/claude/utils/ptyPermissionHandler.ts index bb24aeffae..037125ddbc 100644 --- a/cli/src/claude/utils/ptyPermissionHandler.ts +++ b/cli/src/claude/utils/ptyPermissionHandler.ts @@ -96,6 +96,11 @@ export class PtyPermissionHandler extends BasePermissionHandler Date: Tue, 23 Jun 2026 20:21:25 +0900 Subject: [PATCH 19/26] fix(pty): keep the runner OAuth token when launching claude PTY mode unset every inherited CLAUDECODE / CLAUDE_CODE_* var before spawning claude so the child saves its transcript. That also dropped CLAUDE_CODE_OAUTH_TOKEN, which the runner uses to pass per-session auth, leaving runner-spawned PTY sessions unauthenticated (login prompt instead of starting). Exclude the token from the unset list. --- cli/src/claude/__tests__/claudePty.test.ts | 17 +++++++++++++++++ cli/src/claude/claudePty.ts | 5 ++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cli/src/claude/__tests__/claudePty.test.ts b/cli/src/claude/__tests__/claudePty.test.ts index 989d07a1b3..3ccc928113 100644 --- a/cli/src/claude/__tests__/claudePty.test.ts +++ b/cli/src/claude/__tests__/claudePty.test.ts @@ -75,6 +75,23 @@ describe('claudePty wrapper', () => { expect(lastCall().extraEnv).toMatchObject({ DISABLE_AUTOUPDATER: '1' }) }) + it('drops inherited CLAUDECODE / CLAUDE_CODE_* but preserves the OAuth token', async () => { + const prev = { ...process.env } + process.env.CLAUDECODE = '1' + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli' + process.env.CLAUDE_CODE_OAUTH_TOKEN = 'dummy-test-token' + try { + await claudePty(makeOpts()) + const unset = lastCall().unsetEnv ?? [] + expect(unset).toEqual(expect.arrayContaining(['CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'])) + // The runner passes per-session auth via CLAUDE_CODE_OAUTH_TOKEN; it + // must survive into the PTY child or the session starts unauthenticated. + expect(unset).not.toContain('CLAUDE_CODE_OAUTH_TOKEN') + } finally { + process.env = prev + } + }) + it('forwards callbacks and signal', async () => { const nextMessage = vi.fn() const onReady = vi.fn() diff --git a/cli/src/claude/claudePty.ts b/cli/src/claude/claudePty.ts index 6841e68028..13c448ab12 100644 --- a/cli/src/claude/claudePty.ts +++ b/cli/src/claude/claudePty.ts @@ -59,7 +59,10 @@ const CLAUDE_IDLE_MARKERS = ['for shortcuts'] // NOT matched and is preserved.) function claudeInheritedEnvKeys(): string[] { return Object.keys(process.env).filter( - (k) => k === 'CLAUDECODE' || k.startsWith('CLAUDE_CODE_') + // Drop the nested-claude markers so the child saves its transcript, but + // keep CLAUDE_CODE_OAUTH_TOKEN: the runner passes per-session auth + // through it, and unsetting it leaves the PTY child unauthenticated. + (k) => k === 'CLAUDECODE' || (k.startsWith('CLAUDE_CODE_') && k !== 'CLAUDE_CODE_OAUTH_TOKEN') ) } From a076dbea17d465c8b04f4a862e54eddd51566832 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Tue, 23 Jun 2026 20:52:46 +0900 Subject: [PATCH 20/26] fix(pty): fail spawn when the claude PTY never becomes ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pty spawn flow only waited for waitForSessionActive, but `active` is driven by session-alive, which every CLI session emits at construction — before the PTY launcher has spawned claude or reached a usable prompt. So a missing claude binary, auth failure, or early PTY exit could still make the spawn RPC return success and drop the user into an empty terminal. Emit session-ready from the launcher's onReady, and have the spawn flow wait for it (mirroring the resume path): a non-ready outcome now returns a spawn error instead of a false success. --- .../__tests__/claudePtyLauncher.test.ts | 20 +++++++++++++++++++ cli/src/claude/claudePtyLauncher.ts | 5 +++++ hub/src/sync/syncEngine.ts | 14 +++++++++++++ 3 files changed, 39 insertions(+) diff --git a/cli/src/claude/__tests__/claudePtyLauncher.test.ts b/cli/src/claude/__tests__/claudePtyLauncher.test.ts index 11a60d9f83..cc758d9995 100644 --- a/cli/src/claude/__tests__/claudePtyLauncher.test.ts +++ b/cli/src/claude/__tests__/claudePtyLauncher.test.ts @@ -167,6 +167,7 @@ function createSessionStub() { client: { sendClaudeSessionMessage: (msg: Record) => { sentMessages.push(msg) }, sendSessionEvent: vi.fn((event: Record) => { sentSessionEvents.push(event) }), + emitSessionReady: vi.fn(), emitAgentTerminalOutput: () => {}, setAgentTerminalControls: () => {}, resetAgentTerminal: () => {}, @@ -437,6 +438,25 @@ describe('claudePtyLauncher turn-interrupt', () => { await launcherPromise }) + it('emits session-ready to the hub when the PTY prompt becomes usable', async () => { + harness.exitReason = null + + const { session } = createSessionStub() + const msgPromise = deferred() + vi.mocked(session.queue.waitForMessagesAndGetAsString).mockImplementation(() => msgPromise.promise) + + const launcherPromise = claudePtyLauncher(session as never) + await tick(50) + + // onReady (fired by the default claudePty mock) must signal hub readiness, + // so the spawn flow can distinguish a usable prompt from a mere session-alive. + expect(session.client.emitSessionReady).toHaveBeenCalled() + + harness.exitReason = 'exit' + msgPromise.resolve(null) + await launcherPromise + }) + it('delays Ctrl-U until after the Esc interrupt has settled (~150 ms)', async () => { // Esc causes claude TUI to asynchronously restore the previous prompt. // Ctrl-U must arrive AFTER that restore, so we verify that Ctrl-U is NOT diff --git a/cli/src/claude/claudePtyLauncher.ts b/cli/src/claude/claudePtyLauncher.ts index 0bb3cc6aa3..c86ec750fd 100644 --- a/cli/src/claude/claudePtyLauncher.ts +++ b/cli/src/claude/claudePtyLauncher.ts @@ -411,6 +411,11 @@ class ClaudePtyLauncher extends RemoteLauncherBase { onReady: () => { reachedReady = true logger.debug('[pty]: claude PTY ready') + // Hub-level readiness: the spawn flow waits for this so a + // failed/auth-blocked/early-exit PTY launch surfaces as a + // spawn error instead of an empty terminal. session-alive + // (emitted at construction) is too early to mean "usable". + this.session.client.emitSessionReady() this.session.client.sendSessionEvent({ type: 'ready' }) }, onMessage: (data: string) => { diff --git a/hub/src/sync/syncEngine.ts b/hub/src/sync/syncEngine.ts index 2b7c8fce85..f84df02e54 100644 --- a/hub/src/sync/syncEngine.ts +++ b/hub/src/sync/syncEngine.ts @@ -765,6 +765,20 @@ export class SyncEngine { if (!becameActive) { return { type: 'error', message: 'Session spawned but failed to become active' } } + // `active` only means the runner registered the session — session-alive + // fires at AgentSessionBase construction, before the PTY launcher has + // spawned claude or reached a usable prompt. Wait for session-ready + // (emitted from the launcher's onReady) so a missing-claude / auth / + // early-exit failure surfaces as a spawn error, not an empty terminal. + const readyResult = await this.waitForSessionReady(result.sessionId) + if (readyResult !== 'ready') { + return { + type: 'error', + message: readyResult === 'ended' + ? 'Session ended before the Claude PTY became ready' + : 'Session spawned but failed to become ready' + } + } } return result } From 7dc73556dc1b4010a1c541959575196a64021023 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Tue, 23 Jun 2026 21:07:32 +0900 Subject: [PATCH 21/26] fix(pty): forward startingMode to spawnSession (was lost as serviceTier) The spawn route passed parsed.data.startingMode positionally into spawnSession's serviceTier slot, leaving the real startingMode parameter undefined. SyncEngine then skipped the PTY readiness wait and the runner defaulted to --hapi-starting-mode remote, so the web PTY checkbox silently launched the SDK remote path. Both values are strings, so the slot mismatch type-checked and stayed latent. Pass undefined for serviceTier and startingMode in its own (13th) slot. --- hub/src/web/routes/machines.test.ts | 33 +++++++++++++++++++++++++++++ hub/src/web/routes/machines.ts | 5 +++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/hub/src/web/routes/machines.test.ts b/hub/src/web/routes/machines.test.ts index 3c4e6457ba..fd7d3f32d6 100644 --- a/hub/src/web/routes/machines.test.ts +++ b/hub/src/web/routes/machines.test.ts @@ -57,6 +57,39 @@ describe('machines routes', () => { }) }) + it('forwards startingMode "pty" to SyncEngine.spawnSession in the startingMode slot', async () => { + const machine = createMachine() + let captured: unknown[] | null = null + const engine = { + getMachine: () => machine, + getMachineByNamespace: () => machine, + spawnSession: async (...args: unknown[]) => { + captured = args + return { type: 'success', sessionId: 's-1' } + } + } as unknown as Partial + + const app = new Hono() + app.use('*', async (c, next) => { + c.set('namespace', 'default') + await next() + }) + app.route('/api', createMachinesRoutes(() => engine as SyncEngine)) + + const response = await app.request('/api/machines/machine-1/spawn', { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ directory: '/tmp/x', startingMode: 'pty' }) + }) + + expect(response.status).toBe(200) + expect(captured).not.toBeNull() + // startingMode is the 13th positional arg (index 12); serviceTier (index 11) + // must stay undefined — otherwise the runner silently falls back to remote. + expect(captured![12]).toBe('pty') + expect(captured![11]).toBeUndefined() + }) + it('returns 400 when /opencode-models is called without cwd', async () => { const machine = createMachine() const engine = { diff --git a/hub/src/web/routes/machines.ts b/hub/src/web/routes/machines.ts index 01a53df8b0..2048934b47 100644 --- a/hub/src/web/routes/machines.ts +++ b/hub/src/web/routes/machines.ts @@ -49,9 +49,10 @@ export function createMachinesRoutes(getSyncEngine: () => SyncEngine | null): Ho parsed.data.yolo, parsed.data.sessionType, parsed.data.worktreeName, - undefined, + undefined, // resumeSessionId parsed.data.effort, - undefined, + undefined, // permissionMode + undefined, // serviceTier parsed.data.startingMode ) return c.json(result) From 5520fee0165fe3bfa9a861cd767a64d1acdd7186 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Tue, 23 Jun 2026 21:44:38 +0900 Subject: [PATCH 22/26] fix(pty): submit queued prompts only when the prompt is actually live MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit waitForInputReady latched promptSeen permanently, so after the first prompt every later wait returned on just output silence (~500ms). A quiet gap mid-response then satisfied it, letting a queued message be typed into a busy TUI — dropped, appended to the wrong input, or interrupting the turn. Replace the latched check with a re-armable inputReady flag: set by a prompt/idle marker (or the idle watchdog when an idle marker is missed), cleared on a busy marker and on each submit. A queued message now waits for a fresh prompt instead of any silence gap. --- cli/src/agent/__tests__/runAgentPty.test.ts | 36 +++++++++++++++++++++ cli/src/agent/runAgentPty.ts | 21 ++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/cli/src/agent/__tests__/runAgentPty.test.ts b/cli/src/agent/__tests__/runAgentPty.test.ts index 74779baa8b..8084ff1f4e 100644 --- a/cli/src/agent/__tests__/runAgentPty.test.ts +++ b/cli/src/agent/__tests__/runAgentPty.test.ts @@ -362,4 +362,40 @@ describe('runAgentPty', () => { expect(harness.m.write).not.toHaveBeenCalledWith('should not send') expect(harness.m.kill).toHaveBeenCalled() }) + + it('holds a queued message until a fresh prompt marker (not a mid-turn gap)', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + .mockImplementation(() => Promise.resolve(null)) + const promise = runAgentPty(makeOpts({ + nextMessage, + promptMarkers: ['for shortcuts'], + busyMarkers: ['esc to interrupt'], + idleMarkers: ['for shortcuts'], + idleReadyMs: 20, + })) + + // Reach the first usable prompt, then let the first message submit. + harness.triggerData('? for shortcuts') + await tick(120) + msg1.resolve({ message: 'first' }) + await tick(120) + expect(harness.m.write).toHaveBeenCalledWith('first') + + // Turn is running: busy marker, then a quiet gap with NO idle marker. + harness.triggerData('working hard… esc to interrupt') + msg2.resolve({ message: 'second' }) + await tick(200) // exceeds idleReadyMs of silence, but the prompt has not returned + expect(harness.m.write).not.toHaveBeenCalledWith('second') + + // The prompt returns (idle marker) → the queued message may now be sent. + harness.triggerData('? for shortcuts') + await tick(150) + expect(harness.m.write).toHaveBeenCalledWith('second') + + await promise.catch(() => {}) + }) }) diff --git a/cli/src/agent/runAgentPty.ts b/cli/src/agent/runAgentPty.ts index ad880f0b51..c3da882822 100644 --- a/cli/src/agent/runAgentPty.ts +++ b/cli/src/agent/runAgentPty.ts @@ -120,6 +120,11 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { let sawOutput = false // For marker-based agents (claude): true once the input prompt rendered. let promptSeen = false + // Re-armable readiness: true only while the agent is actually sitting at an + // input prompt. Set by a prompt/idle marker (or the idle watchdog) and + // cleared on a busy marker and on every submit, so a queued message waits for + // a fresh prompt rather than any mid-turn output gap. + let inputReady = false // Whether the first-run trust/safety prompt has been auto-approved. let trustHandled = false @@ -151,6 +156,9 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { if (thinking) { logger.debug(`${debugPrefix} idle watchdog: ${IDLE_SILENCE_MS}ms of silence; forcing idle`) thinking = false + // The turn really ended even though no idle marker arrived, so the + // prompt is usable again — let the next queued message proceed. + inputReady = true opts.onThinkingChange?.(false) } }, IDLE_SILENCE_MS) @@ -177,11 +185,14 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { if (signal?.aborted || !manager.isRunning) return const idle = Date.now() - lastOutputAt if (hasMarkers) { - if (promptSeen && idle >= idleReadyMs) return + // Require the prompt to be live (inputReady), not just a silence + // gap — a long response can go quiet mid-turn. The idle watchdog + // re-arms inputReady if an idle marker is missed, and the outer + // timeout is the final fallback. + if (inputReady && idle >= idleReadyMs) return } else if (sawOutput && idle >= idleReadyMs) { return } - if (sawOutput && idle >= 3000) return await sleep(80) } } @@ -246,14 +257,17 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { manager.write('\r') } else if (hasMarkers && !promptSeen && markers.some((m) => data.includes(m))) { promptSeen = true + inputReady = true } // Track the working/idle state from the live footer. The busy // marker (spinner/"esc to interrupt") wins when both appear in a // chunk; chunks with neither leave the state unchanged. if (busyMarkers.length > 0 && busyMarkers.some((m) => data.includes(m))) { setThinking(true) + inputReady = false } else if (idleMarkers.length > 0 && idleMarkers.some((m) => data.includes(m))) { setThinking(false) + inputReady = true } else if (thinking) { // Still producing output (e.g. streaming response text with no // footer marker in this chunk) — keep the silence watchdog at bay. @@ -347,6 +361,9 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { } if (process.env.DEBUG_PTY) logger.debug(`${debugPrefix} write(loop): ${next.message}`) + // The prompt is now consumed; the next queued message must wait for a + // fresh prompt/idle marker rather than this same just-cleared one. + inputReady = false await submitMessage(next.message) // The message has now been written to the PTY; let a caller verify it // actually landed (and repair it) without racing this submit path. From bc6e3280fbae66abdf915a3574b4b78090497933 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Wed, 24 Jun 2026 07:56:14 +0900 Subject: [PATCH 23/26] fix(pty): ack queued messages dropped on abort handleAbortRequest reset the queue without firing onBatchConsumed, so messages queued behind the aborted turn were cleared on the CLI but left invoked_at=null on the hub: they showed as permanently "queued" in the web and got re-delivered to the fresh prompt by seq-backfill on reconnect, defeating the abort. Collect their localIds and emit them consumed before the reset so the hub clears them. --- .../__tests__/claudePtyLauncher.test.ts | 25 +++++++++++++++++++ cli/src/claude/claudePtyLauncher.ts | 9 ++++++- cli/src/utils/MessageQueue2.ts | 11 ++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/cli/src/claude/__tests__/claudePtyLauncher.test.ts b/cli/src/claude/__tests__/claudePtyLauncher.test.ts index cc758d9995..7e96af2afd 100644 --- a/cli/src/claude/__tests__/claudePtyLauncher.test.ts +++ b/cli/src/claude/__tests__/claudePtyLauncher.test.ts @@ -163,11 +163,13 @@ function createSessionStub() { queue: { waitForMessagesAndGetAsString: vi.fn().mockResolvedValue(null), reset: vi.fn(), + pendingLocalIds: vi.fn(() => [] as string[]), }, client: { sendClaudeSessionMessage: (msg: Record) => { sentMessages.push(msg) }, sendSessionEvent: vi.fn((event: Record) => { sentSessionEvents.push(event) }), emitSessionReady: vi.fn(), + emitMessagesConsumed: vi.fn(), emitAgentTerminalOutput: () => {}, setAgentTerminalControls: () => {}, resetAgentTerminal: () => {}, @@ -438,6 +440,29 @@ describe('claudePtyLauncher turn-interrupt', () => { await launcherPromise }) + it('acks dropped queued messages on abort (no stuck-queued / re-delivery)', async () => { + harness.exitReason = null + + const { session } = createSessionStub() + session.queue.pendingLocalIds = vi.fn(() => ['l1', 'l2']) + const msgPromise = deferred() + vi.mocked(session.queue.waitForMessagesAndGetAsString).mockImplementation(() => msgPromise.promise) + + const launcherPromise = claudePtyLauncher(session as never) + await tick(50) + + await mockAbortHandlers.onAbort() + + // The queued (un-consumed) messages must be acked as consumed so the hub + // clears them instead of keeping them invoked_at=null (stuck / re-delivered). + expect(session.client.emitMessagesConsumed).toHaveBeenCalledWith(['l1', 'l2']) + expect(session.queue.reset).toHaveBeenCalled() + + harness.exitReason = 'exit' + msgPromise.resolve(null) + await launcherPromise + }) + it('emits session-ready to the hub when the PTY prompt becomes usable', async () => { harness.exitReason = null diff --git a/cli/src/claude/claudePtyLauncher.ts b/cli/src/claude/claudePtyLauncher.ts index c86ec750fd..84b096f556 100644 --- a/cli/src/claude/claudePtyLauncher.ts +++ b/cli/src/claude/claudePtyLauncher.ts @@ -335,8 +335,15 @@ class ClaudePtyLauncher extends RemoteLauncherBase { this.ptyControls.sendKeys(ClaudePtyLauncher.PTY_CLEAR_LINE) // Drop pending queued messages — they were enqueued AFTER the // message that is now being aborted and should not be auto-delivered - // to the fresh prompt. + // to the fresh prompt. Ack them as consumed first: reset() alone + // clears the queue without firing onBatchConsumed, so the hub would + // keep them invoked_at=null (stuck "queued" in the web, and re-sent + // to the fresh prompt by seq-backfill on reconnect) — defeating abort. + const droppedLocalIds = this.session.queue.pendingLocalIds() this.session.queue.reset() + if (droppedLocalIds.length > 0) { + this.session.client.emitMessagesConsumed(droppedLocalIds) + } // Signal the web composer to restore the exact prompt that was in // flight. Skip the signal entirely when nothing was being processed // so an old prompt is never replayed into an empty composer. diff --git a/cli/src/utils/MessageQueue2.ts b/cli/src/utils/MessageQueue2.ts index 54d0e5a3d2..5565d9d53d 100644 --- a/cli/src/utils/MessageQueue2.ts +++ b/cli/src/utils/MessageQueue2.ts @@ -281,6 +281,17 @@ export class MessageQueue2 { this.waiter = null; } + /** + * localIds of messages still pending in the queue (enqueued but not yet + * consumed/acked). Lets a caller reconcile them with the hub before a + * reset() that would otherwise drop them without an ack. + */ + pendingLocalIds(): string[] { + return this.queue + .map((item) => item.localId) + .filter((id): id is string => typeof id === 'string'); + } + /** * Close the queue - no more messages can be pushed */ From 0be9c0f41bda63482dacc7b6617c232d72a1ec3e Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Wed, 24 Jun 2026 07:57:27 +0900 Subject: [PATCH 24/26] fix(pty): gate PTY resume on session-ready like spawn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resumeSession waited only for waitForSessionActive (session-alive), with the ready gate scoped to Cursor ACP. A PTY resume that never reached a usable claude prompt (auth failure, bad --resume, early exit) therefore returned success and dropped the user into a black terminal — the same gap the spawn path already closed. Add the pty session-ready wait to the resume path. --- hub/src/sync/syncEngine.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hub/src/sync/syncEngine.ts b/hub/src/sync/syncEngine.ts index f84df02e54..495292205e 100644 --- a/hub/src/sync/syncEngine.ts +++ b/hub/src/sync/syncEngine.ts @@ -1254,6 +1254,23 @@ export class SyncEngine { return { type: 'error', message: 'Session failed to become active', code: 'resume_failed' } } + // PTY resume: like the spawn path, `active` (session-alive) only means the + // runner registered the session, not that the claude PTY reached a usable + // prompt. Wait for session-ready so a failed/auth-blocked PTY resume + // surfaces as resume_failed instead of a black terminal. + if (resumedStartingMode === 'pty') { + const readyResult = await this.waitForSessionReady(spawnResult.sessionId) + if (readyResult !== 'ready') { + return { + type: 'error', + message: readyResult === 'ended' + ? 'Session ended before the Claude PTY became ready' + : 'Session failed to become ready', + code: 'resume_failed' + } + } + } + // permissionMode is passed to spawnSession above; do not call set-session-config here. // session-alive can arrive before the CLI registers that RPC handler, which caused resume_failed. From ff8891d577ef1d795d9155150398402e875d6d12 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Wed, 24 Jun 2026 08:41:14 +0900 Subject: [PATCH 25/26] fix(pty): never submit a queued prompt into a still-running turn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit waitForInputReady returned void and fell through after a fixed 20s timeout, and the loop submitted the next queued message regardless. A healthy Claude turn often runs longer than 20s while still emitting busy output, so the queued prompt was typed into the active TUI. Return a boolean and wait until the prompt is actually live: the loop now keeps waiting through a long turn and only stops on process exit/abort (false → break). A startup-only hard cap preserves resilience to a quirky output-less spawn; the message loop has no such cap. --- cli/src/agent/__tests__/runAgentPty.test.ts | 40 +++++++++++++++++ cli/src/agent/runAgentPty.ts | 48 +++++++++++++-------- 2 files changed, 69 insertions(+), 19 deletions(-) diff --git a/cli/src/agent/__tests__/runAgentPty.test.ts b/cli/src/agent/__tests__/runAgentPty.test.ts index 8084ff1f4e..12de8d5285 100644 --- a/cli/src/agent/__tests__/runAgentPty.test.ts +++ b/cli/src/agent/__tests__/runAgentPty.test.ts @@ -398,4 +398,44 @@ describe('runAgentPty', () => { await promise.catch(() => {}) }) + + it('does not submit a queued message while a long turn keeps emitting busy output', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + .mockImplementation(() => Promise.resolve(null)) + const promise = runAgentPty(makeOpts({ + nextMessage, + promptMarkers: ['for shortcuts'], + busyMarkers: ['esc to interrupt'], + idleMarkers: ['for shortcuts'], + idleReadyMs: 20, + })) + + harness.triggerData('? for shortcuts') + await tick(120) + msg1.resolve({ message: 'first' }) + await tick(120) + expect(harness.m.write).toHaveBeenCalledWith('first') + + // A long turn: keep emitting busy output well past idleReadyMs. There is + // no fixed readiness timeout, so the queued message must keep waiting and + // never be typed into the busy TUI. + harness.triggerData('thinking… esc to interrupt') + msg2.resolve({ message: 'second' }) + for (let i = 0; i < 8; i++) { + await tick(50) + harness.triggerData('still working… esc to interrupt') + } + expect(harness.m.write).not.toHaveBeenCalledWith('second') + + // Turn ends (idle marker) → the queued message is finally submitted. + harness.triggerData('? for shortcuts') + await tick(150) + expect(harness.m.write).toHaveBeenCalledWith('second') + + await promise.catch(() => {}) + }) }) diff --git a/cli/src/agent/runAgentPty.ts b/cli/src/agent/runAgentPty.ts index c3da882822..06321fac11 100644 --- a/cli/src/agent/runAgentPty.ts +++ b/cli/src/agent/runAgentPty.ts @@ -175,24 +175,31 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { opts.onThinkingChange?.(next) } - // Wait until the agent's TUI is ready to receive input. Marker-based agents - // require both the prompt marker AND settled output; markerless agents use - // idle alone. A longer-idle fallback prevents hanging if a marker never - // matches (UI change). - const waitForInputReady = async (timeoutMs = 20000): Promise => { + // Wait until the agent's TUI is actually ready to receive input. Returns + // true once ready, or false if the process exited / the caller aborted — so + // the message loop never submits a queued prompt into a turn that is still + // running. Marker-based agents (claude) require a LIVE prompt (`inputReady`, + // re-armed by a prompt/idle marker or the idle watchdog), not just an output + // gap, so a long-but-healthy turn keeps us waiting instead of timing out and + // typing into a busy TUI. The quiet fallback only covers a prompt marker + // that never matches (claude UI change) while the agent is genuinely idle. + const QUIET_FALLBACK_MS = 10000 + const waitForInputReady = async (opts?: { proceedAfterMs?: number }): Promise => { const start = Date.now() - while (Date.now() - start < timeoutMs) { - if (signal?.aborted || !manager.isRunning) return + while (true) { + if (signal?.aborted || !manager.isRunning) return false const idle = Date.now() - lastOutputAt if (hasMarkers) { - // Require the prompt to be live (inputReady), not just a silence - // gap — a long response can go quiet mid-turn. The idle watchdog - // re-arms inputReady if an idle marker is missed, and the outer - // timeout is the final fallback. - if (inputReady && idle >= idleReadyMs) return + if (inputReady && idle >= idleReadyMs) return true + if (sawOutput && idle >= QUIET_FALLBACK_MS) return true } else if (sawOutput && idle >= idleReadyMs) { - return + return true } + // Startup only: proceed after a hard cap even if no prompt was ever + // detected, so a quirky spawn doesn't hang forever before onReady. The + // message loop passes no cap, so it waits for a real prompt instead of + // ever submitting a queued message blindly into a running turn. + if (opts?.proceedAfterMs != null && Date.now() - start >= opts.proceedAfterMs) return true await sleep(80) } } @@ -312,8 +319,9 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { // Wait until the prompt is actually usable BEFORE any message arrives, so // the first user message is processed immediately instead of being - // consumed as the spawn trigger. - await waitForInputReady() + // consumed as the spawn trigger. Hard-capped so an output-less spawn still + // proceeds to the readiness check below instead of hanging here. + await waitForInputReady({ proceedAfterMs: 20000 }) // A successful spawn() does not mean the agent reached a working prompt: // it can spawn and then exit before rendering one (bad config, invalid @@ -353,10 +361,12 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { continue } - // Queue semantics: wait until output goes idle (agent back at the - // prompt) before sending the next queued message. - await waitForInputReady() - if (!manager.isRunning || signal?.aborted) { + // Queue semantics: wait until the agent is back at the prompt before + // sending the next queued message. A false return means the process + // exited or the caller aborted while we waited — stop the loop rather + // than submit into a dead/aborted session. + const ready = await waitForInputReady() + if (!ready) { break } From 6bf327839181bdcf6a4006fd5253ffbd3ebf685e Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Wed, 24 Jun 2026 20:31:28 +0900 Subject: [PATCH 26/26] fix(pty): key user terminal scrollback per terminal, not per session A session can have several independent shell terminals open at once (each a separate PTY, up to maxTerminalsPerSession), but the scrollback buffer was keyed by sessionId alone. One shell's output was appended to the shared buffer and replayed into another terminal that never ran it, and exiting one terminal cleared every terminal's scrollback. Key the buffer by sessionId+terminalId and clear only the exiting terminal's entry. --- .../socket/handlers/cli/terminalHandlers.ts | 7 ++-- hub/src/socket/handlers/terminal.ts | 6 ++-- hub/src/socket/userTerminalBuffer.test.ts | 36 +++++++++++++------ hub/src/socket/userTerminalBuffer.ts | 34 ++++++++++-------- 4 files changed, 52 insertions(+), 31 deletions(-) diff --git a/hub/src/socket/handlers/cli/terminalHandlers.ts b/hub/src/socket/handlers/cli/terminalHandlers.ts index a83f20deb7..7855fa17b1 100644 --- a/hub/src/socket/handlers/cli/terminalHandlers.ts +++ b/hub/src/socket/handlers/cli/terminalHandlers.ts @@ -122,9 +122,10 @@ export function registerTerminalHandlers(socket: CliSocketWithData, deps: Termin return } terminalRegistry.remove(parsed.data.terminalId) - // Drop the scrollback so a reconnecting viewer doesn't replay a dead - // terminal's output, and so the buffer doesn't leak for the session's life. - clearUserTerminalBuffer(parsed.data.sessionId) + // Drop only this terminal's scrollback so a reconnecting viewer doesn't + // replay a dead terminal's output (and the buffer doesn't leak), without + // wiping the scrollback of the session's other live terminals. + clearUserTerminalBuffer(parsed.data.sessionId, parsed.data.terminalId) const terminalSocket = terminalNamespace.sockets.get(entry.socketId) if (!terminalSocket) { return diff --git a/hub/src/socket/handlers/terminal.ts b/hub/src/socket/handlers/terminal.ts index ebfdf60024..0f293a96de 100644 --- a/hub/src/socket/handlers/terminal.ts +++ b/hub/src/socket/handlers/terminal.ts @@ -142,9 +142,9 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH // Replay buffered output so the terminal shows scrollback immediately // instead of staying black until the next output from CLI. // The buffer is never explicitly cleared here: it persists so a client - // that navigates away and back (new socket, isReconnect=false) still - // sees the accumulated output. It is bounded to 256KB per session. - const buffered = getUserTerminalBuffer(sessionId) + // whose socket reconnects with the same terminalId still sees the + // accumulated output. It is bounded to 256KB per terminal. + const buffered = getUserTerminalBuffer(sessionId, terminalId) if (buffered && !isReconnect) { socket.emit('terminal:output', { terminalId, data: buffered }) } diff --git a/hub/src/socket/userTerminalBuffer.test.ts b/hub/src/socket/userTerminalBuffer.test.ts index 898e6cbf2c..a507d4adda 100644 --- a/hub/src/socket/userTerminalBuffer.test.ts +++ b/hub/src/socket/userTerminalBuffer.test.ts @@ -2,33 +2,49 @@ import { describe, it, expect } from 'bun:test' import { appendUserTerminalOutput, getUserTerminalBuffer, clearUserTerminalBuffer } from './userTerminalBuffer' describe('userTerminalBuffer', () => { - it('stores and retrieves output per session', () => { + it('stores and retrieves output per terminal', () => { appendUserTerminalOutput('s1', 't1', 'hello ') appendUserTerminalOutput('s1', 't1', 'world') - expect(getUserTerminalBuffer('s1')).toBe('hello world') + expect(getUserTerminalBuffer('s1', 't1')).toBe('hello world') }) it('keeps sessions isolated', () => { appendUserTerminalOutput('sa', 't1', 'alpha') appendUserTerminalOutput('sb', 't1', 'beta') - expect(getUserTerminalBuffer('sa')).toBe('alpha') - expect(getUserTerminalBuffer('sb')).toBe('beta') + expect(getUserTerminalBuffer('sa', 't1')).toBe('alpha') + expect(getUserTerminalBuffer('sb', 't1')).toBe('beta') }) - it('returns empty string for unknown session', () => { - expect(getUserTerminalBuffer('nonexistent')).toBe('') + it('keeps independent terminals of the same session isolated', () => { + appendUserTerminalOutput('s2', 'tA', 'output-from-A') + appendUserTerminalOutput('s2', 'tB', 'output-from-B') + // Each terminal replays only its own output — never the other shell's. + expect(getUserTerminalBuffer('s2', 'tA')).toBe('output-from-A') + expect(getUserTerminalBuffer('s2', 'tB')).toBe('output-from-B') + }) + + it('clearing one terminal does not wipe a sibling terminal of the same session', () => { + appendUserTerminalOutput('s6', 'tA', 'keep-A') + appendUserTerminalOutput('s6', 'tB', 'keep-B') + clearUserTerminalBuffer('s6', 'tA') + expect(getUserTerminalBuffer('s6', 'tA')).toBe('') + expect(getUserTerminalBuffer('s6', 'tB')).toBe('keep-B') + }) + + it('returns empty string for unknown terminal', () => { + expect(getUserTerminalBuffer('nonexistent', 't1')).toBe('') }) it('ignores empty data', () => { appendUserTerminalOutput('s3', 't1', 'keep') appendUserTerminalOutput('s3', 't1', '') - expect(getUserTerminalBuffer('s3')).toBe('keep') + expect(getUserTerminalBuffer('s3', 't1')).toBe('keep') }) it('clears buffer on demand', () => { appendUserTerminalOutput('s4', 't1', 'data') - clearUserTerminalBuffer('s4') - expect(getUserTerminalBuffer('s4')).toBe('') + clearUserTerminalBuffer('s4', 't1') + expect(getUserTerminalBuffer('s4', 't1')).toBe('') }) it('rolls over at max size', () => { @@ -37,7 +53,7 @@ describe('userTerminalBuffer', () => { for (let i = 0; i < 2600; i++) { appendUserTerminalOutput('s5', 't1', small) } - const buf = getUserTerminalBuffer('s5') + const buf = getUserTerminalBuffer('s5', 't1') // Should be at most MAX_BUFFER_BYTES (256KB) const MAX = 256 * 1024 expect(buf.length).toBeLessThanOrEqual(MAX) diff --git a/hub/src/socket/userTerminalBuffer.ts b/hub/src/socket/userTerminalBuffer.ts index 0b1e4730a0..ce4f697d2d 100644 --- a/hub/src/socket/userTerminalBuffer.ts +++ b/hub/src/socket/userTerminalBuffer.ts @@ -1,31 +1,35 @@ -// Per-session scrollback buffer for the user (remote) terminal output. +// Per-terminal scrollback buffer for the user (remote) terminal output. // -// A web client that navigates away and back creates a new xterm.js instance -// with a new terminalId, so the previous output is lost. We keep a rolling -// buffer per session so a fresh subscriber can replay the current terminal -// content immediately instead of showing a black screen until the next -// keystroke or output. +// A web client whose socket drops and reconnects re-subscribes with the SAME +// terminalId (held in a ref across transient reconnects), so we keep a rolling +// buffer per terminal to replay the current content immediately instead of +// showing a black screen until the next keystroke or output. // -// The buffer is keyed by sessionId only (not terminalId) because each -// navigation creates a new terminalId for the same session. +// The buffer is keyed by sessionId + terminalId (not sessionId alone): a session +// may have several independent terminals open at once (each a separate shell +// PTY, up to maxTerminalsPerSession), so keying by session alone would mix one +// shell's output into another and replay it into a terminal that never ran it. const MAX_BUFFER_BYTES = 256 * 1024 const buffers = new Map() -export function appendUserTerminalOutput(sessionId: string, _terminalId: string, data: string): void { +const keyFor = (sessionId: string, terminalId: string): string => `${sessionId}:${terminalId}` + +export function appendUserTerminalOutput(sessionId: string, terminalId: string, data: string): void { if (!data) return - const next = (buffers.get(sessionId) ?? '') + data + const key = keyFor(sessionId, terminalId) + const next = (buffers.get(key) ?? '') + data buffers.set( - sessionId, + key, next.length > MAX_BUFFER_BYTES ? next.slice(next.length - MAX_BUFFER_BYTES) : next ) } -export function getUserTerminalBuffer(sessionId: string): string { - return buffers.get(sessionId) ?? '' +export function getUserTerminalBuffer(sessionId: string, terminalId: string): string { + return buffers.get(keyFor(sessionId, terminalId)) ?? '' } -export function clearUserTerminalBuffer(sessionId: string): void { - buffers.delete(sessionId) +export function clearUserTerminalBuffer(sessionId: string, terminalId: string): void { + buffers.delete(keyFor(sessionId, terminalId)) }