diff --git a/CONTEXT.md b/CONTEXT.md index 585d62a7c..ded0aea4b 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -49,6 +49,7 @@ - Coverage manifest: `CONTRACT_COVERAGE` export beside each interaction contract test file claiming which matrix cells it proves; the coverage gate requires every enforced/delegated cell to be claimed and rejects overclaims of waived cells. - Delegation-on-error: a fast path falling back to the runtime path on semantic failure shapes. It closes failure-side guarantee cells only — never success-path parity. - Ref generation pin: optional `~s` suffix on an @ref carrying the snapshot generation it was minted from. Accepted as input everywhere, emitted by no tree output (snapshot token budget), auto-appended by the MCP layer, stripped and ignored by replay. +- Settled observation: opt-in (`--settle`) post-action payload on press/click/fill/longpress — the quiet-window stable loop re-captures until the UI settles, and the response carries the diff vs the pre-action tree (changed lines only, added lines with fresh refs, `refsGeneration` when the settled tree was stored). Best-effort: never fails the action; `settled: false` plus a hint on never-quiet content. - Snapshot capture plan: per-strategy ordered chain of iOS snapshot capture backends (recursive tree, query sweep, private AX) run by one plan runner under a shared wall-clock budget; recovery ordering is declared data, never a per-call-site branch. - Snapshot quality verdict: structured outcome (state, backend, reason code, effective depth, collapsed leaves) computed once by the plan runner and shipped with every planned snapshot payload; the daemon and CLI render it instead of re-deriving degradation from node shapes. - AX-unavailable target invalidation: iOS/macOS runner behavior where a root accessibility snapshot failure such as `kAXErrorIllegalArgument` marks the cached `XCUIApplication` target handle suspect. The runner fails closed for degraded interactive snapshots, clears the cached target, and lets the next command reacquire the app through normal activation. diff --git a/scripts/integration-progress-model.ts b/scripts/integration-progress-model.ts index e6c870b2b..c63e78f42 100644 --- a/scripts/integration-progress-model.ts +++ b/scripts/integration-progress-model.ts @@ -185,6 +185,8 @@ function summarizeProviderScenarioFlagCoverage(files) { ['findFirst', 'find first disambiguation'], ['findLast', 'find last disambiguation'], ['verify', 'post-action evidence capture on press/click/fill'], + ['settle', 'post-action settled-diff observation on press/click/fill/longpress'], + ['settleQuietMs', 'settle quiet-window tuning'], ]; const sources = files.map((file) => fs.readFileSync(file, 'utf8')).join('\n'); return flagTargets.map(([key, reason, aliases = []]) => { diff --git a/src/cli/parser/cli-flags.ts b/src/cli/parser/cli-flags.ts index 2645690fa..bb56bc46b 100644 --- a/src/cli/parser/cli-flags.ts +++ b/src/cli/parser/cli-flags.ts @@ -106,6 +106,8 @@ export type CliFlags = CloudProviderProfileFields & pixels?: number; doubleTap?: boolean; verify?: boolean; + settle?: boolean; + settleQuietMs?: number; clickButton?: ClickButton; backMode?: BackMode; pauseMs?: number; @@ -211,6 +213,10 @@ export const REPEATED_TOUCH_FLAGS = flagKeys( 'jitterPx', 'doubleTap', ); +// press/click/fill/longpress --settle (#1101): opt-in settled-diff observation. +// --timeout doubles as the settle deadline (flag-sourced budget on the +// interaction descriptors, mirroring wait's positional budget). +export const SETTLE_FLAGS = flagKeys('settle', 'settleQuietMs', 'timeoutMs'); export const REPLAY_FLAGS = flagKeys('replayUpdate', 'replayEnv'); const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ @@ -848,6 +854,22 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ usageDescription: 'Capture cheap post-action evidence (AX digest, node counts, changedFromBefore) instead of a follow-up snapshot', }, + { + key: 'settle', + names: ['--settle'], + type: 'boolean', + usageLabel: '--settle', + usageDescription: + 'After the action, wait for the UI to go quiet and return the settled diff vs the pre-action tree in the same response (best-effort; never fails the action)', + }, + { + key: 'settleQuietMs', + names: ['--settle-quiet'], + type: 'int', + min: 0, + usageLabel: '--settle-quiet ', + usageDescription: 'Settle: quiet window the UI must hold to count as settled (default 500ms)', + }, { key: 'clickButton', names: ['--button'], @@ -1051,7 +1073,7 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ min: 1, usageLabel: '--timeout ', usageDescription: - 'Prepare/Replay/Snapshot/Test: maximum wall-clock time for the command or attempt', + 'Prepare/Replay/Snapshot/Test: maximum wall-clock time for the command or attempt. With --settle: the settle-wait deadline (default 10s)', }, { key: 'retries', diff --git a/src/cli/parser/cli-help.ts b/src/cli/parser/cli-help.ts index 3a5a5f3da..740234e38 100644 --- a/src/cli/parser/cli-help.ts +++ b/src/cli/parser/cli-help.ts @@ -55,12 +55,17 @@ const AGENT_START_LINES = [ 'Default to agent-device for installs, opens, snapshots, interactions, screenshots, logs, network/perf evidence, and verification.', 'Use raw adb, simctl, xcrun, or platform scripts only when this help calls out a tool gap or platform setup step.', 'Start with agent-device help workflow to understand the core loop and how to use the tool.', + // Benchmarked 2026-07-05 (#1101): agents that only read --help skipped the + // help-workflow pointer and fell into plain-snapshot loops; stating the loop + // here is what makes small models pick snapshot -i and --settle unprompted. + 'Core loop: open -> snapshot -i (interactive tree with @refs) -> press/click/fill --settle (returns the settled diff with fresh @refs) -> repeat. Verify with diff snapshot -i.', ] as const; const AGENT_QUICKSTART_LINES = [ 'Planning output contract: when asked to plan commands, output command lines only: no prose, numbering, Markdown fences, pipes, or shell helpers.', 'Default loop: devices/apps -> open -> snapshot -i -> press/fill/get/is/wait/find -> verify with diff snapshot -> close.', 'Verify a mutation with diff snapshot (or diff snapshot -i), not a full snapshot: it prints only the added/removed/changed lines since the last snapshot in this session, so confirming an action costs a few lines instead of the whole tree.', + 'Collapse act+observe into one call with --settle on press/click/fill/longpress: the response waits for the UI to go quiet and carries the settled diff with fresh refs (settled: false plus a hint on never-quiet content; the action itself never fails). Tune with --settle-quiet and --timeout .', 'Use selectors or refs as positional targets: id="submit", label="Allow", or @e12 from snapshot -i.', 'Pin a ref to the snapshot that minted it with ~s (n = refsGeneration in the snapshot response): press @e12~s4. Pinned refs get exact staleness warnings instead of the coarse tree-changed one; plain refs stay valid input.', 'Plain snapshot reads state; snapshot -i refreshes current interactive refs only.', diff --git a/src/client/client-normalizers.ts b/src/client/client-normalizers.ts index f47dc0dfa..f9000dbcb 100644 --- a/src/client/client-normalizers.ts +++ b/src/client/client-normalizers.ts @@ -343,6 +343,8 @@ export function buildFlags(options: InternalRequestOptions): CommandFlags { pixels: options.pixels, doubleTap: options.doubleTap, verify: options.verify, + settle: options.settle, + settleQuietMs: options.settleQuietMs, clickButton: options.clickButton, pauseMs: options.pauseMs, pattern: options.pattern, diff --git a/src/client/client-types.ts b/src/client/client-types.ts index c253a71ad..2c422d75d 100644 --- a/src/client/client-types.ts +++ b/src/client/client-types.ts @@ -629,10 +629,24 @@ export type CaptureDiffOptions = DeviceCommandBaseOptions & out?: string; }; +/** + * Opt-in (#1101): after the action, wait for the UI to go quiet and return the + * settled diff vs the pre-action tree (`settle` on the result) in the same + * response. Best-effort — never fails the action. `settleQuietMs` tunes the + * quiet window (default 500ms); `timeoutMs` bounds the settle wait (default + * 10s) and is rejected without `settle`. + */ +type SettleCommandOptions = { + settle?: boolean; + settleQuietMs?: number; + timeoutMs?: number; +}; + export type ClickOptions = DeviceCommandBaseOptions & SelectorSnapshotCommandOptions & InteractionTarget & - RepeatedPressOptions & { + RepeatedPressOptions & + SettleCommandOptions & { button?: ClickButton; /** * Opt-in (#1047): return cheap post-action evidence (AX digest, node counts, @@ -645,13 +659,15 @@ export type ClickOptions = DeviceCommandBaseOptions & export type PressOptions = DeviceCommandBaseOptions & SelectorSnapshotCommandOptions & InteractionTarget & - RepeatedPressOptions & { + RepeatedPressOptions & + SettleCommandOptions & { verify?: boolean; }; export type LongPressOptions = DeviceCommandBaseOptions & SelectorSnapshotCommandOptions & - InteractionTarget & { + InteractionTarget & + SettleCommandOptions & { durationMs?: number; }; @@ -697,7 +713,8 @@ export type TypeTextOptions = DeviceCommandBaseOptions & { export type FillOptions = DeviceCommandBaseOptions & SelectorSnapshotCommandOptions & - InteractionTarget & { + InteractionTarget & + SettleCommandOptions & { text: string; delayMs?: number; verify?: boolean; @@ -928,6 +945,8 @@ type CommandExecutionOptions = Partial & { pixels?: number; doubleTap?: boolean; verify?: boolean; + settle?: boolean; + settleQuietMs?: number; clickButton?: ClickButton; pauseMs?: number; pattern?: SwipePattern; diff --git a/src/commands/cli-grammar/common.ts b/src/commands/cli-grammar/common.ts index 16972dd21..587961efa 100644 --- a/src/commands/cli-grammar/common.ts +++ b/src/commands/cli-grammar/common.ts @@ -88,6 +88,16 @@ export function selectorSnapshotOptionsFromFlags(flags: CliFlags): SelectorSnaps }; } +// press/click/fill/longpress --settle (#1101). --timeout doubles as the settle +// deadline on these commands (the daemon rejects it without --settle). +export function settleInputFromFlags(flags: CliFlags): Record { + return compactRecord({ + settle: flags.settle, + settleQuietMs: flags.settleQuietMs, + timeoutMs: flags.timeoutMs, + }); +} + export function repeatedInputFromFlags(flags: CliFlags): Record { return compactRecord({ count: flags.count, diff --git a/src/commands/interaction/index.ts b/src/commands/interaction/index.ts index 6ae015fb3..65a61af02 100644 --- a/src/commands/interaction/index.ts +++ b/src/commands/interaction/index.ts @@ -18,7 +18,11 @@ import type { TypeTextOptions, } from '../../client/client-types.ts'; import type { CommandSchemaOverride } from '../../utils/cli-command-schema-types.ts'; -import { REPEATED_TOUCH_FLAGS, SELECTOR_SNAPSHOT_FLAGS } from '../../cli/parser/cli-flags.ts'; +import { + REPEATED_TOUCH_FLAGS, + SELECTOR_SNAPSHOT_FLAGS, + SETTLE_FLAGS, +} from '../../cli/parser/cli-flags.ts'; import { defineCommandFacet, defineCommandFamilyFromFacets } from '../family/types.ts'; import { defineExecutableCommand } from '../command-contract.ts'; import { @@ -70,7 +74,13 @@ const interactionCliSchemas = { usageOverride: 'click ', positionalArgs: ['target'], allowsExtraPositionals: true, - allowedFlags: [...REPEATED_TOUCH_FLAGS, 'clickButton', 'verify', ...SELECTOR_SNAPSHOT_FLAGS], + allowedFlags: [ + ...REPEATED_TOUCH_FLAGS, + 'clickButton', + 'verify', + ...SETTLE_FLAGS, + ...SELECTOR_SNAPSHOT_FLAGS, + ], }, press: { usageOverride: 'press ', @@ -78,7 +88,7 @@ const interactionCliSchemas = { 'Short press a semantic UI target by ref, selector, or point. For native context menus or hold gestures, use longpress instead of press --hold-ms.', positionalArgs: ['targetOrX', 'y?'], allowsExtraPositionals: true, - allowedFlags: [...REPEATED_TOUCH_FLAGS, 'verify', ...SELECTOR_SNAPSHOT_FLAGS], + allowedFlags: [...REPEATED_TOUCH_FLAGS, 'verify', ...SETTLE_FLAGS, ...SELECTOR_SNAPSHOT_FLAGS], }, longpress: { usageOverride: 'longpress [durationMs]', @@ -86,7 +96,7 @@ const interactionCliSchemas = { 'Open native context menus or long-press targets by ref, selector, or point. Duration is positional, for example longpress @e12 800 or longpress 300 500 800.', positionalArgs: ['targetOrX', 'yOrDurationMs?', 'durationMs?'], allowsExtraPositionals: true, - allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS], + allowedFlags: [...SETTLE_FLAGS, ...SELECTOR_SNAPSHOT_FLAGS], }, swipe: { helpDescription: 'Swipe coordinates with optional repeat pattern', @@ -114,7 +124,7 @@ const interactionCliSchemas = { usageOverride: 'fill | fill <@ref|selector> ', positionalArgs: ['targetOrX', 'yOrText', 'text?'], allowsExtraPositionals: true, - allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS, 'delayMs', 'verify'], + allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS, 'delayMs', 'verify', ...SETTLE_FLAGS], }, scroll: { usageOverride: 'scroll [amount] [--pixels ] [--duration-ms ]', @@ -343,6 +353,7 @@ function toClickOptions(input: ClickInput): ClickOptions { ...toRepeatedOptions(input), button: input.button, verify: input.verify, + ...toSettleOptions(input), }; } @@ -353,6 +364,7 @@ function toPressOptions(input: PressInput): PressOptions { ...toSelectorSnapshotOptions(input), ...toRepeatedOptions(input), verify: input.verify, + ...toSettleOptions(input), }; } @@ -364,6 +376,7 @@ function toFillOptions(input: FillInput): FillOptions { text: input.text, delayMs: input.delayMs, verify: input.verify, + ...toSettleOptions(input), }; } @@ -373,6 +386,19 @@ function toLongPressOptions(input: LongPressInput): LongPressOptions { ...toClientInteractionTarget(input.target), ...toSelectorSnapshotOptions(input), durationMs: input.durationMs, + ...toSettleOptions(input), + }; +} + +function toSettleOptions(input: { + settle?: boolean; + settleQuietMs?: number; + timeoutMs?: number; +}): Pick { + return { + settle: input.settle, + settleQuietMs: input.settleQuietMs, + timeoutMs: input.timeoutMs, }; } diff --git a/src/commands/interaction/interactions.ts b/src/commands/interaction/interactions.ts index f5a145339..26afc7b9b 100644 --- a/src/commands/interaction/interactions.ts +++ b/src/commands/interaction/interactions.ts @@ -26,6 +26,7 @@ import { requiredDaemonString, repeatedInputFromFlags, selectorSnapshotInputFromFlags, + settleInputFromFlags, targetInputFromClientTarget, } from '../cli-grammar/common.ts'; import type { CliReader, DaemonWriter, CommandInput } from '../cli-grammar/types.ts'; @@ -35,6 +36,7 @@ export const interactionCliReaders = { ...commonInputFromFlags(flags), ...selectorSnapshotInputFromFlags(flags), ...repeatedInputFromFlags(flags), + ...settleInputFromFlags(flags), target: targetInputFromClientTarget(readInteractionTargetFromPositionals(positionals)), button: flags.clickButton, verify: flags.verify, @@ -43,6 +45,7 @@ export const interactionCliReaders = { ...commonInputFromFlags(flags), ...selectorSnapshotInputFromFlags(flags), ...repeatedInputFromFlags(flags), + ...settleInputFromFlags(flags), target: targetInputFromClientTarget(readInteractionTargetFromPositionals(positionals)), verify: flags.verify, }), @@ -51,6 +54,7 @@ export const interactionCliReaders = { return { ...commonInputFromFlags(flags), ...selectorSnapshotInputFromFlags(flags), + ...settleInputFromFlags(flags), target: targetInputFromClientTarget(decoded), durationMs: decoded.durationMs, }; @@ -79,6 +83,7 @@ export const interactionCliReaders = { return { ...commonInputFromFlags(flags), ...selectorSnapshotInputFromFlags(flags), + ...settleInputFromFlags(flags), target: targetInputFromClientTarget(decoded.target), text: decoded.text, delayMs: flags.delayMs, diff --git a/src/commands/interaction/metadata.ts b/src/commands/interaction/metadata.ts index ad322f6af..2a56be692 100644 --- a/src/commands/interaction/metadata.ts +++ b/src/commands/interaction/metadata.ts @@ -71,12 +71,21 @@ const verifyField = () => 'Capture cheap post-action evidence (AX digest, node counts, changedFromBefore) instead of a follow-up snapshot.', ); +const settleFields = () => ({ + settle: booleanField( + 'After the action, wait for the UI to go quiet and return the settled diff vs the pre-action tree in the same response. Best-effort; never fails the action.', + ), + settleQuietMs: integerField('Settle: quiet window in milliseconds (default 500).', { min: 0 }), + timeoutMs: integerField('Settle: wait deadline in milliseconds (default 10000).', { min: 1 }), +}); + const clickFields = { target: requiredField(interactionTargetField()), button: enumField(CLICK_BUTTONS, 'Pointer button for platforms that support mouse buttons.'), ...selectorSnapshotFields(), ...repeatedFields(), verify: verifyField(), + ...settleFields(), }; const pressFields = { @@ -84,6 +93,7 @@ const pressFields = { ...selectorSnapshotFields(), ...repeatedFields(), verify: verifyField(), + ...settleFields(), }; const fillFields = { @@ -92,12 +102,14 @@ const fillFields = { delayMs: integerField('Delay between typed characters.', { min: 0 }), ...selectorSnapshotFields(), verify: verifyField(), + ...settleFields(), }; const longPressFields = { target: requiredField(interactionTargetField()), durationMs: integerField('Long press duration in milliseconds.', { min: 0 }), ...selectorSnapshotFields(), + ...settleFields(), }; const swipeFields = { diff --git a/src/commands/interaction/output.ts b/src/commands/interaction/output.ts index 383775f3e..e8da95006 100644 --- a/src/commands/interaction/output.ts +++ b/src/commands/interaction/output.ts @@ -39,7 +39,48 @@ function tapCliOutput(result: CommandRequestResult): CliOutput { if (!ref || typeof x !== 'number' || typeof y !== 'number') { return defaultCommandCliOutput(data); } - return { data, text: `Tapped @${ref} (${x}, ${y})` }; + return { data, text: `Tapped @${ref} (${x}, ${y})${formatSettleText(data.settle)}` }; +} + +type SettleTextView = { + settled?: boolean; + waitedMs?: number; + hint?: string; + diff?: { + summary?: { additions?: number; removals?: number; unchanged?: number }; + lines?: Array<{ kind?: string; text?: string }>; + truncated?: boolean; + }; +}; + +/** + * Compact `--settle` (#1101) rendering appended to the tap line: the verdict, + * the changed-count summary, and the changed lines themselves (the payload the + * agent acts on). Empty for non-settle responses. + */ +function formatSettleText(settle: unknown): string { + if (!settle || typeof settle !== 'object') return ''; + const view = settle as SettleTextView; + const parts = [formatSettleVerdict(view), ...formatSettleDiffLines(view.diff)]; + if (view.hint) parts.push(`hint: ${view.hint}`); + return `\n${parts.join('\n')}`; +} + +function formatSettleDiffLines(diff: SettleTextView['diff']): string[] { + const lines = (diff?.lines ?? []).map( + (line) => `${line.kind === 'removed' ? '-' : '+'} ${line.text ?? ''}`, + ); + if (diff?.truncated) lines.push('… changed lines truncated'); + return lines; +} + +function formatSettleVerdict(view: SettleTextView): string { + const verdict = view.settled === true ? 'settled' : 'not settled'; + const summary = view.diff?.summary; + const counts = summary + ? ` +${summary.additions ?? 0} -${summary.removals ?? 0} (~${summary.unchanged ?? 0} unchanged)` + : ''; + return `${verdict} after ${view.waitedMs ?? 0}ms:${counts}`; } export const interactionCliOutputFormatters = { diff --git a/src/commands/interaction/runtime/gestures.ts b/src/commands/interaction/runtime/gestures.ts index 987dbecc2..d28538807 100644 --- a/src/commands/interaction/runtime/gestures.ts +++ b/src/commands/interaction/runtime/gestures.ts @@ -32,7 +32,7 @@ import { type BackendResultVariant, type RuntimeCommand, } from '../../runtime-types.ts'; -import type { LongPressCommandResult } from '../../../contracts/interaction.ts'; +import type { LongPressCommandResult, SettleParams } from '../../../contracts/interaction.ts'; import { assertSupportedInteractionSurface, captureInteractionSnapshot, @@ -40,6 +40,7 @@ import { type ResolvedInteractionTarget, resolveInteractionTarget, } from './resolution.ts'; +import { settleAfterInteraction } from './settle.ts'; export type FocusCommandOptions = CommandContext & { target: InteractionTarget; @@ -50,6 +51,8 @@ export type FocusCommandResult = ResolvedInteractionTarget & BackendResultEnvelo export type LongPressCommandOptions = CommandContext & { target: InteractionTarget; durationMs?: number; + /** Opt-in (#1101): settled-diff observation after the press; see settle.ts. */ + settle?: SettleParams; }; export type { LongPressCommandResult }; @@ -159,6 +162,7 @@ export const longPressCommand: RuntimeCommand< action: 'longPress', requireInteractive: true, promoteToHittableAncestor: true, + captureEvidenceBaseline: options.settle !== undefined, }); if (!runtime.backend.longPress) { throw new AppError('UNSUPPORTED_OPERATION', 'longPress is not supported by this backend'); @@ -172,10 +176,14 @@ export const longPressCommand: RuntimeCommand< durationMs, }); const formattedBackendResult = toBackendResult(backendResult); + const settle = options.settle + ? (await settleAfterInteraction(runtime, options, { ...options.settle, resolved })).observation + : undefined; return { ...resolved, ...(durationMs !== undefined ? { durationMs } : {}), ...(formattedBackendResult ? { backendResult: formattedBackendResult } : {}), + ...(settle ? { settle } : {}), ...successText(`Long pressed (${point.x}, ${point.y})`), }; }; diff --git a/src/commands/interaction/runtime/interactions.ts b/src/commands/interaction/runtime/interactions.ts index 4f3d87e55..fa4434892 100644 --- a/src/commands/interaction/runtime/interactions.ts +++ b/src/commands/interaction/runtime/interactions.ts @@ -13,6 +13,8 @@ import type { PressCommandResult, ResolvedInteractionTarget, ResolvedTarget, + SettleObservation, + SettleParams, } from '../../../contracts/interaction.ts'; import { toBackendContext } from '../../runtime-common.ts'; import { @@ -27,6 +29,7 @@ import { preflightNativeRefInteraction, resolveInteractionTarget, } from './resolution.ts'; +import { settleAfterInteraction, settleEvidence } from './settle.ts'; export { focusCommand, @@ -62,6 +65,14 @@ export type PressCommandOptions = CommandContext & * follow-up snapshot round trip to confirm the action had an effect. */ verify?: boolean; + /** + * Opt-in (#1101): after the action, wait for the UI to go quiet and return + * the settled diff vs the pre-action tree in the same response. Presence + * enables settling; quiet window and deadline default in settle.ts. + * Best-effort — never fails the action. Composes with `verify`: the settle + * loop's final capture doubles as the evidence source (no extra captures). + */ + settle?: SettleParams; }; export type ClickCommandOptions = PressCommandOptions; @@ -73,6 +84,7 @@ export type FillCommandOptions = CommandContext & { text: string; delayMs?: number; verify?: boolean; + settle?: SettleParams; }; export type TypeTextCommandOptions = CommandContext & { @@ -102,14 +114,17 @@ export const fillCommand: RuntimeCommand ): Promise => { if (!options.text) throw new AppError('INVALID_ARGS', 'fill requires text'); const verify = options.verify === true; - const nativeRefFill = verify ? null : await maybeFillRefTarget(runtime, options); + // --settle needs the resolution-path baseline and post-action captures, so + // it disables the native ref fast path exactly like --verify does. + const nativeRefFill = + verify || options.settle ? null : await maybeFillRefTarget(runtime, options); if (nativeRefFill) return nativeRefFill; const resolved = await resolveInteractionTarget(runtime, options, { action: 'fill', requireInteractive: true, promoteToHittableAncestor: false, - captureEvidenceBaseline: verify, + captureEvidenceBaseline: verify || options.settle !== undefined, }); if (!runtime.backend.fill) { throw new AppError('UNSUPPORTED_OPERATION', 'fill is not supported by this backend'); @@ -127,13 +142,16 @@ export const fillCommand: RuntimeCommand nodeType && !isFillableType(nodeType, runtime.backend.platform) ? `fill target ${formatTargetForWarning(resolved)} resolved to "${nodeType}", attempting fill anyway.` : undefined; - const evidence = verify ? await captureVerifyEvidence(runtime, options, resolved) : undefined; + const observed = await observeAfterInteraction(runtime, options, resolved, { + verify, + settle: options.settle, + }); return reconcileNonHittableHintWithEvidence({ ...resolved, text: options.text, ...(warning ? { warning } : {}), ...(formattedBackendResult ? { backendResult: formattedBackendResult } : {}), - ...(evidence ? { evidence } : {}), + ...observed, }); }; @@ -176,14 +194,17 @@ async function tapCommand( action: 'click' | 'press', ): Promise { const verify = options.verify === true; - const nativeRefTap = verify ? null : await maybeTapRefTarget(runtime, options, action); + // --settle needs the resolution-path baseline and post-action captures, so + // it disables the native ref fast path exactly like --verify does. + const nativeRefTap = + verify || options.settle ? null : await maybeTapRefTarget(runtime, options, action); if (nativeRefTap) return nativeRefTap; const resolved = await resolveInteractionTarget(runtime, options, { action, requireInteractive: true, promoteToHittableAncestor: true, - captureEvidenceBaseline: verify, + captureEvidenceBaseline: verify || options.settle !== undefined, }); if (!runtime.backend.tap) { throw new AppError('UNSUPPORTED_OPERATION', 'tap is not supported by this backend'); @@ -198,14 +219,47 @@ async function tapCommand( doubleTap: options.doubleTap, }); const formattedBackendResult = toBackendResult(backendResult); - const evidence = verify ? await captureVerifyEvidence(runtime, options, resolved) : undefined; + const observed = await observeAfterInteraction(runtime, options, resolved, { + verify, + settle: options.settle, + }); return reconcileNonHittableHintWithEvidence({ ...resolved, ...(formattedBackendResult ? { backendResult: formattedBackendResult } : {}), - ...(evidence ? { evidence } : {}), + ...observed, }); } +/** + * Post-action observation composition: `--settle` runs the quiet-window loop + * (settle.ts) and, when `--verify` rides along, its final capture doubles as + * the evidence source — the pair costs zero captures beyond the settle loop's + * own. Without settle, verify keeps its single dedicated capture. + */ +async function observeAfterInteraction( + runtime: AgentDeviceRuntime, + options: CommandContext, + resolved: ResolvedInteractionTarget, + params: { verify: boolean; settle: SettleParams | undefined }, +): Promise<{ evidence?: InteractionEvidence; settle?: SettleObservation }> { + if (params.settle) { + const outcome = await settleAfterInteraction(runtime, options, { + ...params.settle, + resolved, + }); + const evidence = params.verify + ? settleEvidence( + outcome.settledNodes, + 'preActionNodes' in resolved ? resolved.preActionNodes : undefined, + ) + : undefined; + return { settle: outcome.observation, ...(evidence ? { evidence } : {}) }; + } + if (!params.verify) return {}; + const evidence = await captureVerifyEvidence(runtime, options, resolved); + return evidence ? { evidence } : {}; +} + /** * Post-action side of `--verify` (#1047): one interactive-only capture through * the same capture helper the resolution path already uses, digested and then diff --git a/src/commands/interaction/runtime/selector-read.ts b/src/commands/interaction/runtime/selector-read.ts index 0ac338228..88dc204ac 100644 --- a/src/commands/interaction/runtime/selector-read.ts +++ b/src/commands/interaction/runtime/selector-read.ts @@ -42,6 +42,12 @@ import { resolveRefNode, } from './selector-read-shared.ts'; import { findNodeByLabel, resolveRefLabel, shouldScopeFind } from './selector-read-utils.ts'; +import { + DEFAULT_STABLE_QUIET_MS, + runStableCaptureLoop, + TINY_STABLE_TREE_HINT, + TINY_STABLE_TREE_NODE_COUNT, +} from './stable-capture.ts'; import { now, sleep, toBackendContext } from '../../runtime-common.ts'; export type { SelectorSnapshotOptions } from './selector-read-shared.ts'; @@ -153,10 +159,6 @@ export function ref(refInput: string, options: { fallbackLabel?: string } = {}): const DEFAULT_TIMEOUT_MS = 10_000; const POLL_INTERVAL_MS = 300; -const DEFAULT_QUIET_MS = 500; -// Below this node count a settled tree is suspicious: real app surfaces have -// more than a handful of accessibility nodes, splash/loading screens do not. -const TINY_STABLE_TREE_NODE_COUNT = 5; export const findCommand: RuntimeCommand = async ( runtime, @@ -521,6 +523,9 @@ async function snapshotContainsText( return Boolean(findNodeByLabel(capture.snapshot.nodes, text)); } +// The quiet-window loop itself lives in stable-capture.ts and is shared with +// the interaction `--settle` flag (#1101); this wrapper maps the loop outcome +// to wait's throwing semantics. async function waitForStable( runtime: AgentDeviceRuntime, options: WaitCommandOptions, @@ -528,110 +533,33 @@ async function waitForStable( timeoutMs: number | null | undefined, ): Promise> { const timeout = timeoutMs ?? DEFAULT_TIMEOUT_MS; - const quiet = quietMs ?? DEFAULT_QUIET_MS; - const start = now(runtime); - let captures = 0; - let lastDigest: string | undefined; - let lastNodeCount = 0; - let quietSinceMs = start; - while (now(runtime) - start < timeout) { - const capture = await captureStableSignalWithinDeadline( - runtime, - options, - timeout - (now(runtime) - start), - ); - if (!capture) { - throw new AppError('COMMAND_FAILED', 'wait timed out waiting for a stable UI', { - reason: 'wait_stable_timeout', - captureStalled: true, - quietMs: quiet, - timeoutMs: timeout, - captures, - nodeCount: lastNodeCount, - hint: 'A snapshot capture stalled past the wait timeout, so no settle verdict is available. The UI may still be readable: retry, or use screenshot to inspect the surface.', - }); - } - captures += 1; - const digest = digestSnapshotNodes(capture.snapshot.nodes); - const nowMs = now(runtime); - if (digest !== lastDigest) { - lastDigest = digest; - lastNodeCount = capture.snapshot.nodes.length; - quietSinceMs = nowMs; - } else if (captures >= 2 && nowMs - quietSinceMs >= quiet) { - return { - kind: 'stable', - waitedMs: nowMs - start, - captures, - nodeCount: lastNodeCount, - // A settled-but-tiny tree usually means a splash/loading surface, not - // real content: stability alone is a weak readiness signal there. - ...(lastNodeCount < TINY_STABLE_TREE_NODE_COUNT - ? { - hint: 'Settled on a nearly-empty tree — the app may still be loading. Wait for specific content (wait text ...) before interacting.', - } - : {}), - }; - } - await sleep(runtime, POLL_INTERVAL_MS); - } - throw new AppError('COMMAND_FAILED', 'wait timed out waiting for a stable UI', { - reason: 'wait_stable_timeout', + const quiet = quietMs ?? DEFAULT_STABLE_QUIET_MS; + const outcome = await runStableCaptureLoop(runtime, options, { quietMs: quiet, timeoutMs: timeout, - captures, - nodeCount: lastNodeCount, - }); -} - -// Intentionally does not update the session snapshot: the stable loop captures -// an interactive-only tree purely as a settle signal, and overwriting the -// session's richer cached snapshot with the filtered tree would degrade -// subsequent ref/get/find lookups against the same session. -// -// Resolves undefined when the capture does not return within remainingMs. A -// stalled backend capture (observed with macOS AX captures) must not push the -// stable wait past the user-supplied timeout into the daemon request timeout. -// The deadline uses a real timer even when runtime.clock is injected: test -// clocks advance synthetic time synchronously and cannot represent a hung -// backend call. -async function captureStableSignalWithinDeadline( - runtime: AgentDeviceRuntime, - options: WaitCommandOptions, - remainingMs: number, -): Promise { - const capture = captureSelectorSnapshot(runtime, options, { - updateSession: false, - interactiveOnly: true, }); - let timer: NodeJS.Timeout | undefined; - try { - const result = await Promise.race([ - capture, - new Promise((resolve) => { - timer = setTimeout(() => resolve(undefined), remainingMs); - }), - ]); - if (result === undefined) { - // The abandoned capture settles (or fails) on its own; swallow it so it - // cannot surface as an unhandled rejection after the wait already threw. - capture.catch(() => {}); - } - return result; - } finally { - if (timer !== undefined) clearTimeout(timer); + if (!outcome.settled) { + throw new AppError('COMMAND_FAILED', 'wait timed out waiting for a stable UI', { + reason: 'wait_stable_timeout', + ...(outcome.stalled ? { captureStalled: true } : {}), + quietMs: quiet, + timeoutMs: timeout, + captures: outcome.captures, + nodeCount: outcome.nodeCount, + ...(outcome.stalled + ? { + hint: 'A snapshot capture stalled past the wait timeout, so no settle verdict is available. The UI may still be readable: retry, or use screenshot to inspect the surface.', + } + : {}), + }); } -} - -function digestSnapshotNodes(nodes: SnapshotNode[]): string { - return nodes.map(digestSnapshotNode).join('|'); -} - -function digestSnapshotNode(node: SnapshotNode): string { - const rect = node.rect - ? `${Math.round(node.rect.x)},${Math.round(node.rect.y)},${Math.round(node.rect.width)},${Math.round(node.rect.height)}` - : ''; - return `${node.type ?? ''}#${node.label ?? ''}#${node.identifier ?? ''}#${rect}`; + return { + kind: 'stable', + waitedMs: outcome.waitedMs, + captures: outcome.captures, + nodeCount: outcome.nodeCount, + ...(outcome.nodeCount < TINY_STABLE_TREE_NODE_COUNT ? { hint: TINY_STABLE_TREE_HINT } : {}), + }; } async function resolveSelectorNode( diff --git a/src/commands/interaction/runtime/settle.test.ts b/src/commands/interaction/runtime/settle.test.ts new file mode 100644 index 000000000..3a8f7cb16 --- /dev/null +++ b/src/commands/interaction/runtime/settle.test.ts @@ -0,0 +1,379 @@ +import assert from 'node:assert/strict'; +import { test } from 'vitest'; +import type { AgentDeviceBackend, BackendSnapshotResult } from '../../../backend.ts'; +import type { SnapshotState } from '../../../kernel/snapshot.ts'; +import { createLocalArtifactAdapter } from '../../../io.ts'; +import { + createAgentDevice, + createMemorySessionStore, + localCommandPolicy, +} from '../../../runtime.ts'; +import { makeSnapshotState } from '../../../__tests__/test-utils/index.ts'; +import { ref, selector } from './selector-read.ts'; +import { NEVER_SETTLED_HINT } from './settle.ts'; + +// #1101 --settle: quiet-window settle loop composition on the interaction +// commands. Budgets are injected (fake clock) — no real waiting. + +function createFakeClock(stepMs = 300): { + now: () => number; + sleep: (ms: number) => Promise; +} { + let elapsed = 0; + return { + now: () => elapsed, + sleep: async (ms: number) => { + elapsed += ms > 0 ? ms : stepMs; + }, + }; +} + +function buttonSnapshot(): SnapshotState { + return makeSnapshotState([ + { + index: 0, + depth: 0, + type: 'Button', + label: 'Continue', + rect: { x: 10, y: 20, width: 100, height: 40 }, + hittable: true, + }, + ]); +} + +// Five nodes so a settled capture clears the tiny-tree readiness heuristic. +function welcomeSnapshot(): SnapshotState { + return makeSnapshotState( + ['Welcome!', 'Next', 'Back', 'Home', 'Menu'].map((label, index) => ({ + index, + depth: index === 0 ? 0 : 1, + ...(index === 0 ? {} : { parentIndex: 0 }), + type: index === 0 ? 'StaticText' : 'Button', + label, + rect: { x: 10, y: 20 + index * 60, width: 100, height: 40 }, + hittable: true, + })), + ); +} + +function createSettleDevice(params: { + stored: SnapshotState; + captureSnapshot: () => Promise | BackendSnapshotResult; + tap?: () => Promise>; +}): ReturnType { + return createAgentDevice({ + backend: { + platform: 'ios', + captureSnapshot: async () => await params.captureSnapshot(), + tap: async () => (params.tap ? await params.tap() : { ok: true }), + fill: async () => ({ ok: true }), + longPress: async () => ({ ok: true }), + typeText: async () => {}, + } satisfies AgentDeviceBackend, + artifacts: createLocalArtifactAdapter(), + sessions: createMemorySessionStore([{ name: 'default', snapshot: params.stored }]), + policy: localCommandPolicy(), + clock: createFakeClock(), + }); +} + +test('press --settle returns the settled diff and stores the settled tree', async () => { + const before = buttonSnapshot(); + const after = welcomeSnapshot(); + let captures = 0; + const device = createSettleDevice({ + stored: before, + captureSnapshot: () => { + captures += 1; + // Capture 1 = selector resolution (baseline). Captures 2+ = settle loop. + return { snapshot: captures === 1 ? before : after }; + }, + }); + + const result = await device.interactions.press(selector('label=Continue'), { + session: 'default', + settle: {}, + }); + + assert.equal(result.kind, 'selector'); + const settle = result.settle; + assert.ok(settle); + assert.equal(settle.settled, true); + assert.equal(settle.quietMs, 500); + assert.equal(settle.timeoutMs, 10_000); + assert.ok(settle.captures >= 2); + assert.equal(settle.hint, undefined); + assert.deepEqual(settle.diff?.summary, { additions: 5, removals: 1, unchanged: 0 }); + const addedRefs = settle.diff?.lines + .filter((line) => line.kind === 'added') + .map((line) => line.ref); + assert.deepEqual(addedRefs, ['e1', 'e2', 'e3', 'e4', 'e5']); + const removed = settle.diff?.lines.find((line) => line.kind === 'removed'); + assert.match(removed?.text ?? '', /Continue/); + assert.equal(removed?.ref, undefined); + + // The settled tree became the session snapshot: the diff's refs resolve. + const stored = (await device.sessions.get('default')) as { snapshot?: SnapshotState }; + assert.equal(stored.snapshot?.nodes[0]?.label, 'Welcome!'); +}); + +test('never-settling content returns the last diff with settled: false and a hint', async () => { + const before = buttonSnapshot(); + let captures = 0; + const device = createSettleDevice({ + stored: before, + captureSnapshot: () => { + captures += 1; + if (captures === 1) return { snapshot: before }; + // Ticker: every capture differs. + return { + snapshot: makeSnapshotState([ + { + index: 0, + depth: 0, + type: 'StaticText', + label: `Tick ${captures}`, + rect: { x: 10, y: 20, width: 100, height: 40 }, + hittable: true, + }, + ]), + }; + }, + }); + + const result = await device.interactions.press(selector('label=Continue'), { + session: 'default', + settle: { quietMs: 500, timeoutMs: 2_000 }, + }); + + const settle = result.settle; + assert.ok(settle); + assert.equal(settle.settled, false); + assert.equal(settle.hint, NEVER_SETTLED_HINT); + // The LAST capture's diff still ships — best-effort observation. + assert.ok(settle.diff); + assert.equal(settle.diff.summary.additions, 1); + assert.match(settle.diff.lines.find((line) => line.kind === 'added')?.text ?? '', /Tick/); +}); + +test('a broken settle capture never fails the action', async () => { + const before = buttonSnapshot(); + let captures = 0; + const device = createSettleDevice({ + stored: before, + captureSnapshot: () => { + captures += 1; + if (captures === 1) return { snapshot: before }; + throw new Error('AX bridge crashed'); + }, + }); + + const result = await device.interactions.press(selector('label=Continue'), { + session: 'default', + settle: {}, + }); + + // The press itself succeeded; the observation reports its own failure. + assert.equal(result.kind, 'selector'); + const settle = result.settle; + assert.ok(settle); + assert.equal(settle.settled, false); + assert.equal(settle.diff, undefined); + assert.match(settle.hint ?? '', /Settle observation unavailable \(AX bridge crashed\)/); +}); + +test('--settle --verify shares the settle captures for evidence (zero extra captures)', async () => { + const before = buttonSnapshot(); + const after = welcomeSnapshot(); + let captures = 0; + const device = createSettleDevice({ + stored: before, + captureSnapshot: () => { + captures += 1; + return { snapshot: captures === 1 ? before : after }; + }, + }); + + const result = await device.interactions.click(selector('label=Continue'), { + session: 'default', + verify: true, + settle: {}, + }); + + const settle = result.settle; + assert.ok(settle); + assert.equal(settle.settled, true); + assert.ok(result.evidence); + assert.equal(result.evidence?.changedFromBefore, true); + assert.equal(result.evidence?.nodeCount, 5); + // 1 resolution capture + the settle loop's captures — verify added none. + assert.equal(captures, 1 + settle.captures); +}); + +test('longpress --settle rides the same observation path', async () => { + const before = buttonSnapshot(); + let captures = 0; + const device = createSettleDevice({ + stored: before, + captureSnapshot: () => { + captures += 1; + return { snapshot: captures === 1 ? before : welcomeSnapshot() }; + }, + }); + + const result = await device.interactions.longPress(ref('@e1'), { + session: 'default', + durationMs: 400, + settle: { quietMs: 100, timeoutMs: 1_000 }, + }); + + assert.equal(result.kind, 'ref'); + const settle = result.settle; + assert.ok(settle); + assert.equal(settle.settled, true); + assert.equal(settle.quietMs, 100); + assert.equal(settle.timeoutMs, 1_000); + assert.ok(settle.diff); +}); + +test('the settled diff line list is bounded with a truncation marker', async () => { + const before = buttonSnapshot(); + const bigTree = makeSnapshotState( + Array.from({ length: 120 }, (_, index) => ({ + index, + depth: 0, + type: 'StaticText', + label: `Row ${index}`, + rect: { x: 0, y: index * 20, width: 100, height: 20 }, + hittable: true, + })), + ); + let captures = 0; + const device = createSettleDevice({ + stored: before, + captureSnapshot: () => { + captures += 1; + return { snapshot: captures === 1 ? before : bigTree }; + }, + }); + + const result = await device.interactions.press(selector('label=Continue'), { + session: 'default', + settle: {}, + }); + + const diff = result.settle?.diff; + assert.ok(diff); + assert.equal(diff.summary.additions, 120); + assert.equal(diff.lines.length, 80); + assert.equal(diff.truncated, true); +}); + +test('keyboard Key nodes never spend the settled diff budget', async () => { + const before = buttonSnapshot(); + // A fill-style settled tree: a summoned keyboard (container + keys) plus the + // content change the agent actually cares about. + const keyboardTree = makeSnapshotState([ + { + index: 0, + depth: 0, + type: 'StaticText', + label: 'Results for alpenglow', + rect: { x: 0, y: 0, width: 200, height: 20 }, + hittable: true, + }, + { + index: 1, + depth: 0, + type: 'Keyboard', + label: 'keyboard', + rect: { x: 0, y: 500, width: 400, height: 300 }, + hittable: true, + }, + ...Array.from({ length: 26 }, (_, key) => ({ + index: key + 2, + depth: 1, + parentIndex: 1, + type: 'Key', + label: String.fromCharCode(97 + key), + rect: { x: key * 10, y: 520, width: 10, height: 40 }, + hittable: true, + })), + ...['Next', 'Back', 'Home'].map((label, extra) => ({ + index: extra + 28, + depth: 0, + type: 'Button', + label, + rect: { x: 0, y: 40 + extra * 40, width: 100, height: 40 }, + hittable: true, + })), + ]); + let captures = 0; + const device = createSettleDevice({ + stored: before, + captureSnapshot: () => { + captures += 1; + return { snapshot: captures === 1 ? before : keyboardTree }; + }, + }); + + const result = await device.interactions.press(selector('label=Continue'), { + session: 'default', + settle: {}, + }); + + const diff = result.settle?.diff; + assert.ok(diff); + const texts = diff.lines.map((line) => line.text).join('\n'); + assert.match(texts, /Results for alpenglow/); + assert.match(texts, /keyboard/); + // The container line survives as the keyboard signal; individual keys do not. + assert.ok(!diff.lines.some((line) => /\[key\]/.test(line.text))); + assert.equal(diff.summary.additions, 5); +}); + +test('added lines win diff-budget slots over removals under truncation', async () => { + // 120 removals precede the additions positionally; the fresh-ref additions + // must still survive the 80-line cap. + const bigBefore = makeSnapshotState( + Array.from({ length: 120 }, (_, index) => ({ + index, + depth: 0, + type: 'StaticText', + label: `Old row ${index}`, + rect: { x: 0, y: index * 20, width: 100, height: 20 }, + hittable: true, + })), + ); + const afterTree = makeSnapshotState( + Array.from({ length: 10 }, (_, index) => ({ + index, + depth: 0, + type: 'Button', + label: `New action ${index}`, + rect: { x: 0, y: index * 40, width: 100, height: 40 }, + hittable: true, + })), + ); + let captures = 0; + const device = createSettleDevice({ + stored: bigBefore, + captureSnapshot: () => { + captures += 1; + return { snapshot: captures === 1 ? bigBefore : afterTree }; + }, + }); + + const result = await device.interactions.press(selector('label="Old row 0"'), { + session: 'default', + settle: {}, + }); + + const diff = result.settle?.diff; + assert.ok(diff); + assert.equal(diff.truncated, true); + assert.equal(diff.lines.length, 80); + const added = diff.lines.filter((line) => line.kind === 'added'); + assert.equal(added.length, 10); + assert.ok(added.every((line) => line.ref !== undefined)); +}); diff --git a/src/commands/interaction/runtime/settle.ts b/src/commands/interaction/runtime/settle.ts new file mode 100644 index 000000000..c0222ff66 --- /dev/null +++ b/src/commands/interaction/runtime/settle.ts @@ -0,0 +1,212 @@ +import type { SnapshotNode } from '../../../kernel/snapshot.ts'; +import type { AgentDeviceRuntime, CommandContext } from '../../../runtime-contract.ts'; +import { isSparseSnapshotQualityVerdict } from '../../../snapshot/snapshot-quality.ts'; +import { buildSnapshotDiff } from '../../../snapshot/snapshot-diff.ts'; +import { summarizeAxEvidence } from '../../../utils/ax-digest.ts'; +import type { + InteractionEvidence, + ResolvedInteractionTarget, + SettleObservation, + SettleParams, +} from '../../../contracts/interaction.ts'; +import type { CapturedSnapshot } from './selector-read-shared.ts'; +import { + DEFAULT_STABLE_QUIET_MS, + DEFAULT_STABLE_TIMEOUT_MS, + runStableCaptureLoop, + TINY_STABLE_TREE_HINT, + TINY_STABLE_TREE_NODE_COUNT, +} from './stable-capture.ts'; + +/** + * `--settle` (#1101): after a mutating interaction, wait for the UI to go + * quiet (wait stable's loop, shared via stable-capture.ts) and return the + * settled DIFF against the pre-action tree in the same response — one round + * trip instead of the interact → observe pair. + * + * Best-effort by contract: this module never throws. The action already + * succeeded when it runs; observation quality is advisory (same principle as + * `--verify` evidence). + */ + +export type SettleOutcome = { + observation: SettleObservation; + /** Nodes of the final capture; doubles as the `--verify` evidence source. */ + settledNodes?: SnapshotNode[]; +}; + +// Changed-lines bound: the settled diff is the response payload, and unbounded +// added/removed lists on a full screen transition would crowd out everything +// else. The summary always carries the true counts. +const MAX_SETTLE_DIFF_LINES = 80; + +export const NEVER_SETTLED_HINT = + 'The UI kept changing for the whole settle budget (animation, carousel, or ticker?). The diff reflects the last capture and may already be outdated. Raise --timeout, or verify the target content with wait text.'; + +const SETTLE_CAPTURE_STALLED_HINT = + 'A snapshot capture stalled past the settle budget, so no settle verdict is available. The action itself succeeded; retry the observation with snapshot or wait stable.'; + +export async function settleAfterInteraction( + runtime: AgentDeviceRuntime, + options: CommandContext, + params: SettleParams & { resolved: ResolvedInteractionTarget }, +): Promise { + const quietMs = params.quietMs ?? DEFAULT_STABLE_QUIET_MS; + const timeoutMs = params.timeoutMs ?? DEFAULT_STABLE_TIMEOUT_MS; + const base: SettleObservation = { settled: false, waitedMs: 0, captures: 0, quietMs, timeoutMs }; + try { + const outcome = await runStableCaptureLoop(runtime, options, { quietMs, timeoutMs }); + const observation: SettleObservation = { + ...base, + settled: outcome.settled, + waitedMs: outcome.waitedMs, + captures: outcome.captures, + }; + if (!outcome.lastCapture) { + return { + observation: { + ...observation, + hint: outcome.stalled ? SETTLE_CAPTURE_STALLED_HINT : NEVER_SETTLED_HINT, + }, + }; + } + const stored = await storeSettledSnapshot(runtime, options, outcome.lastCapture); + const settledNodes = outcome.lastCapture.snapshot.nodes; + return { + observation: { + ...observation, + // The diff (with its added-line refs) is only attached when the settled + // tree actually became the stored session snapshot: those refs must be + // valid against the tree the next @ref command resolves on. The daemon + // treats `diff` presence as "this response issues refs". + ...(stored + ? { diff: buildSettleDiff(resolveBaselineNodes(params.resolved), settledNodes) } + : {}), + ...resolveSettleHint(outcome, stored, settledNodes.length), + }, + settledNodes, + }; + } catch (error) { + // Never fail the action over the observation: report that settling itself + // broke and let the caller fall back to an explicit snapshot. + return { + observation: { + ...base, + hint: `Settle observation unavailable (${error instanceof Error ? error.message : String(error)}). The action itself succeeded; take a snapshot to observe the result.`, + }, + }; + } +} + +/** + * `--settle --verify` composition: the settle loop's final capture doubles as + * the verify evidence source, so the pair costs zero extra captures. Without a + * final capture there is no evidence — best-effort, like verify itself. + */ +export function settleEvidence( + settledNodes: SnapshotNode[] | undefined, + preActionNodes: SnapshotNode[] | undefined, +): InteractionEvidence | undefined { + if (!settledNodes) return undefined; + const after = summarizeAxEvidence(settledNodes); + const changedFromBefore = + preActionNodes !== undefined && after.digest !== summarizeAxEvidence(preActionNodes).digest; + return { ...after, changedFromBefore }; +} + +function resolveBaselineNodes(resolved: ResolvedInteractionTarget): SnapshotNode[] { + return 'preActionNodes' in resolved && resolved.preActionNodes ? resolved.preActionNodes : []; +} + +function buildSettleDiff( + baselineNodes: SnapshotNode[], + settledNodes: SnapshotNode[], +): NonNullable { + // Flattened compare, like `diff -i`: both sides are interactive-flavored + // captures and depth jitter across captures should not read as change. When + // the baseline came from a richer stored tree (ref targets reuse the session + // snapshot), extra baseline-only lines surface as removals — advisory noise, + // the same baseline caveat --verify's changedFromBefore already accepts. + const diff = buildSnapshotDiff( + withoutKeyboardKeys(baselineNodes), + withoutKeyboardKeys(settledNodes), + { flatten: true, withRefs: true }, + ); + const changed = diff.lines.filter((line) => line.kind !== 'unchanged'); + const lines = capSettleDiffLines(changed).map((line) => ({ + kind: line.kind as 'added' | 'removed', + text: line.text, + ...(line.ref ? { ref: line.ref } : {}), + })); + return { + summary: diff.summary, + lines, + ...(changed.length > lines.length ? { truncated: true as const } : {}), + }; +} + +// The iOS QWERTY keyboard is ~50 Key nodes; a fill that summons it would spend +// most of the capped line budget spelling out the keyboard instead of the +// content change the agent actually asked to observe. The Keyboard container +// node stays, so "keyboard appeared/left" remains one visible diff line. +function withoutKeyboardKeys(nodes: SnapshotNode[]): SnapshotNode[] { + return nodes.filter((node) => node.type !== 'Key'); +} + +/** + * Truncation policy: added lines win. They carry the settled tree's fresh + * refs — the actionable half of the diff — while removals only describe what + * left the screen (the summary still counts them). Relative order within each + * kind is preserved; removals fill whatever budget the additions leave. + */ +function capSettleDiffLines(changed: T[]): T[] { + if (changed.length <= MAX_SETTLE_DIFF_LINES) return changed; + const added = changed.filter((line) => line.kind === 'added'); + const keptAdded = new Set(added.slice(0, MAX_SETTLE_DIFF_LINES)); + let removedBudget = MAX_SETTLE_DIFF_LINES - keptAdded.size; + const kept: T[] = []; + for (const line of changed) { + if (keptAdded.has(line)) { + kept.push(line); + } else if (line.kind === 'removed' && removedBudget > 0) { + kept.push(line); + removedBudget -= 1; + } + } + return kept; +} + +function resolveSettleHint( + outcome: { settled: boolean; stalled: boolean }, + stored: boolean, + settledNodeCount: number, +): { hint?: string } { + if (outcome.stalled) return { hint: SETTLE_CAPTURE_STALLED_HINT }; + if (!outcome.settled) return { hint: NEVER_SETTLED_HINT }; + if (!stored) { + return { + hint: 'Settled on a sparse, unreadable tree — the diff is omitted. Use screenshot as visual truth before interacting further.', + }; + } + // Same weak-readiness signal wait stable reports: a settled-but-tiny tree + // usually means a splash/loading surface, not real content. + if (settledNodeCount < TINY_STABLE_TREE_NODE_COUNT) return { hint: TINY_STABLE_TREE_HINT }; + return {}; +} + +// The settle loop itself captures with updateSession: false (a capture that +// later stalls must not race a session write past the response). The FINAL +// settled tree IS stored — its refs ride the diff payload, so they must be +// resolvable by the next @ref command. Sparse-quality captures are not stored +// (mirroring captureSelectorSnapshot) and therefore issue no refs. +async function storeSettledSnapshot( + runtime: AgentDeviceRuntime, + options: CommandContext, + capture: CapturedSnapshot, +): Promise { + if (isSparseSnapshotQualityVerdict(capture.snapshot.snapshotQuality)) return false; + const session = await runtime.sessions.get(options.session ?? 'default'); + if (!session) return false; + await runtime.sessions.set({ ...session, snapshot: capture.snapshot }); + return true; +} diff --git a/src/commands/interaction/runtime/stable-capture.ts b/src/commands/interaction/runtime/stable-capture.ts new file mode 100644 index 000000000..c137d5143 --- /dev/null +++ b/src/commands/interaction/runtime/stable-capture.ts @@ -0,0 +1,154 @@ +import type { SnapshotNode } from '../../../kernel/snapshot.ts'; +import type { AgentDeviceRuntime, CommandContext } from '../../../runtime-contract.ts'; +import { now, sleep } from '../../runtime-common.ts'; +import { + captureSelectorSnapshot, + type CapturedSnapshot, + type SelectorSnapshotOptions, +} from './selector-read-shared.ts'; + +/** + * The quiet-window stable-capture loop shared by `wait stable` and the + * interaction `--settle` flag (#1101): capture the interactive-only tree, + * digest it, and declare the UI stable once at least two captures in a row + * stay identical for `quietMs`. The loop itself never throws and never + * updates the session snapshot — callers map the outcome to their own + * semantics (`wait stable` throws on timeout; `--settle` is best-effort and + * reports `settled: false`). + */ + +const STABLE_POLL_INTERVAL_MS = 300; +export const DEFAULT_STABLE_QUIET_MS = 500; +export const DEFAULT_STABLE_TIMEOUT_MS = 10_000; +// Below this node count a settled tree is suspicious: real app surfaces have +// more than a handful of accessibility nodes, splash/loading screens do not. +export const TINY_STABLE_TREE_NODE_COUNT = 5; +// A settled-but-tiny tree usually means a splash/loading surface, not real +// content: stability alone is a weak readiness signal there. +export const TINY_STABLE_TREE_HINT = + 'Settled on a nearly-empty tree — the app may still be loading. Wait for specific content (wait text ...) before interacting.'; + +export type StableCaptureLoopResult = { + /** Two identical captures held the quiet window before the deadline. */ + settled: boolean; + /** A capture stalled past the remaining deadline (no verdict available). */ + stalled: boolean; + waitedMs: number; + captures: number; + nodeCount: number; + /** The most recent completed capture, settled or not. */ + lastCapture?: CapturedSnapshot; +}; + +export async function runStableCaptureLoop( + runtime: AgentDeviceRuntime, + options: CommandContext & SelectorSnapshotOptions, + params: { quietMs: number; timeoutMs: number }, +): Promise { + const { quietMs, timeoutMs } = params; + const start = now(runtime); + // Cadence derives from the quiet window (never slower than the default + // poll): a caller asking for a 50ms quiet window should not be forced onto a + // 300ms grid — and tests inject the budget instead of waiting real time. + const pollMs = Math.min(STABLE_POLL_INTERVAL_MS, Math.max(25, quietMs)); + let captures = 0; + let lastDigest: string | undefined; + let lastNodeCount = 0; + let lastCapture: CapturedSnapshot | undefined; + let quietSinceMs = start; + while (now(runtime) - start < timeoutMs) { + const capture = await captureStableSignalWithinDeadline( + runtime, + options, + timeoutMs - (now(runtime) - start), + ); + if (!capture) { + return { + settled: false, + stalled: true, + waitedMs: now(runtime) - start, + captures, + nodeCount: lastNodeCount, + lastCapture, + }; + } + captures += 1; + lastCapture = capture; + const digest = digestSnapshotNodes(capture.snapshot.nodes); + const nowMs = now(runtime); + if (digest !== lastDigest) { + lastDigest = digest; + lastNodeCount = capture.snapshot.nodes.length; + quietSinceMs = nowMs; + } else if (captures >= 2 && nowMs - quietSinceMs >= quietMs) { + return { + settled: true, + stalled: false, + waitedMs: nowMs - start, + captures, + nodeCount: lastNodeCount, + lastCapture, + }; + } + await sleep(runtime, pollMs); + } + return { + settled: false, + stalled: false, + waitedMs: now(runtime) - start, + captures, + nodeCount: lastNodeCount, + lastCapture, + }; +} + +// Intentionally does not update the session snapshot: the stable loop captures +// an interactive-only tree purely as a settle signal, and overwriting the +// session's richer cached snapshot with the filtered tree would degrade +// subsequent ref/get/find lookups against the same session. (`--settle` DOES +// store its final capture, but explicitly, in one place, after the loop.) +// +// Resolves undefined when the capture does not return within remainingMs. A +// stalled backend capture (observed with macOS AX captures) must not push the +// stable wait past the user-supplied timeout into the daemon request timeout. +// The deadline uses a real timer even when runtime.clock is injected: test +// clocks advance synthetic time synchronously and cannot represent a hung +// backend call. +async function captureStableSignalWithinDeadline( + runtime: AgentDeviceRuntime, + options: CommandContext & SelectorSnapshotOptions, + remainingMs: number, +): Promise { + const capture = captureSelectorSnapshot(runtime, options, { + updateSession: false, + interactiveOnly: true, + }); + let timer: NodeJS.Timeout | undefined; + try { + const result = await Promise.race([ + capture, + new Promise((resolve) => { + timer = setTimeout(() => resolve(undefined), remainingMs); + }), + ]); + if (result === undefined) { + // The abandoned capture settles (or fails) on its own; swallow it so it + // cannot surface as an unhandled rejection after the wait already threw. + capture.catch(() => {}); + } + return result; + } finally { + if (timer !== undefined) clearTimeout(timer); + } +} + +function digestSnapshotNodes(nodes: SnapshotNode[]): string { + return nodes.map(digestSnapshotNode).join('|'); +} + +function digestSnapshotNode(node: SnapshotNode): string { + const rect = node.rect + ? `${Math.round(node.rect.x)},${Math.round(node.rect.y)},${Math.round(node.rect.width)},${Math.round(node.rect.height)}` + : ''; + return `${node.type ?? ''}#${node.label ?? ''}#${node.identifier ?? ''}#${rect}`; +} diff --git a/src/contracts/interaction-guarantees.ts b/src/contracts/interaction-guarantees.ts index 4cce8f3c2..70e701917 100644 --- a/src/contracts/interaction-guarantees.ts +++ b/src/contracts/interaction-guarantees.ts @@ -50,6 +50,11 @@ export const INTERACTION_GUARANTEES = [ 'responseIdentity', // --verify captures a pre-action baseline and post-action digest. 'verifyEvidence', + // --settle (#1101) waits for the UI to go quiet after the action and returns + // the settled diff vs the pre-action tree (plus refsGeneration when the + // settled tree was stored) in the same response. Best-effort: never fails + // the action. + 'settleObservation', // Failures use the shared codes/messages/hints (no-match diagnostics, // ambiguous shape, offscreen reasons). NOTE: expected to split into // errorCodes (stable codes / fallback classification) vs errorDiagnostics @@ -130,37 +135,48 @@ const SHARED_RESPONSE_CONSTRUCTION: GuaranteeEnforcement = { via: 'src/daemon/handlers/interaction-touch-response.ts#buildInteractionResponseData', }; +// The two runtime tree paths (selector and ref resolution) run the SAME shared +// guard/observation implementations; only how the target is found +// (disambiguation) and how failures are described (errorTaxonomy) differ. +const RUNTIME_TREE_SHARED_GUARANTEES = { + occlusion: { + kind: 'runtime', + via: 'src/snapshot/snapshot-occlusion.ts#isSnapshotNodeInteractionBlocked', + }, + offscreen: { + kind: 'runtime', + via: 'src/snapshot/mobile-snapshot-semantics.ts#isNodeVisibleOnScreen', + }, + nonHittable: { + kind: 'runtime', + via: 'src/core/interaction-targeting.ts#resolveActionableTouchResolution', + }, + responseConstruction: SHARED_RESPONSE_CONSTRUCTION, + responseIdentity: { + kind: 'runtime', + via: 'src/daemon/handlers/interaction-touch-targets.ts#interactionResultExtra', + }, + verifyEvidence: { + kind: 'runtime', + via: 'src/commands/interaction/runtime/interactions.ts#pressCommand', + appliesTo: ['press', 'click', 'fill'], + }, + settleObservation: { + kind: 'runtime', + via: 'src/commands/interaction/runtime/settle.ts#settleAfterInteraction', + }, +} satisfies Partial>; + export const INTERACTION_DISPATCH_PATHS: Record = { 'runtime-selector': { description: 'Daemon tree capture, selector chain resolution, guarded coordinate tap.', commands: ['press', 'click', 'fill', 'longpress'], guarantees: { + ...RUNTIME_TREE_SHARED_GUARANTEES, disambiguation: { kind: 'runtime', via: 'src/daemon/selectors-resolve.ts#resolveSelectorChain', }, - occlusion: { - kind: 'runtime', - via: 'src/snapshot/snapshot-occlusion.ts#isSnapshotNodeInteractionBlocked', - }, - offscreen: { - kind: 'runtime', - via: 'src/snapshot/mobile-snapshot-semantics.ts#isNodeVisibleOnScreen', - }, - nonHittable: { - kind: 'runtime', - via: 'src/core/interaction-targeting.ts#resolveActionableTouchResolution', - }, - responseConstruction: SHARED_RESPONSE_CONSTRUCTION, - responseIdentity: { - kind: 'runtime', - via: 'src/daemon/handlers/interaction-touch-targets.ts#interactionResultExtra', - }, - verifyEvidence: { - kind: 'runtime', - via: 'src/commands/interaction/runtime/interactions.ts#pressCommand', - appliesTo: ['press', 'click', 'fill'], - }, errorTaxonomy: { kind: 'runtime', via: 'src/daemon/selectors-resolve.ts#formatSelectorFailure', @@ -171,32 +187,11 @@ export const INTERACTION_DISPATCH_PATHS: Record; message?: string; warning?: string; evidence?: InteractionEvidence; + settle?: SettleObservation; }; export type FillCommandResult = ResolvedInteractionTarget & { @@ -90,6 +147,7 @@ export type FillCommandResult = ResolvedInteractionTarget & { backendResult?: Record; message?: string; evidence?: InteractionEvidence; + settle?: SettleObservation; }; export type LongPressCommandResult = ResolvedInteractionTarget & { @@ -97,4 +155,5 @@ export type LongPressCommandResult = ResolvedInteractionTarget & { backendResult?: Record; message?: string; warning?: string; + settle?: SettleObservation; }; diff --git a/src/core/command-descriptor/__tests__/timeout-policy.test.ts b/src/core/command-descriptor/__tests__/timeout-policy.test.ts index 80b051230..0b1656470 100644 --- a/src/core/command-descriptor/__tests__/timeout-policy.test.ts +++ b/src/core/command-descriptor/__tests__/timeout-policy.test.ts @@ -58,16 +58,24 @@ test('daemon-preserving timeout commands are a bounded, reviewed set', () => { }); test('budget sources deviating from the default are bounded, reviewed sets', () => { - const flagBudget: string[] = []; + const flagBoundBudget: string[] = []; + const flagWidenBudget: string[] = []; const positionalBudget: string[] = []; for (const descriptor of commandDescriptors) { - if (descriptor.timeoutPolicy.budget.source === 'flag') flagBudget.push(descriptor.name); - if (descriptor.timeoutPolicy.budget.source === 'positional-parser') { + const budget = descriptor.timeoutPolicy.budget; + if (budget.source === 'flag') { + const widen = 'envelope' in budget && budget.envelope === 'widen'; + (widen ? flagWidenBudget : flagBoundBudget).push(descriptor.name); + } + if (budget.source === 'positional-parser') { positionalBudget.push(descriptor.name); } } // --timeout bounds the request envelope for these commands only. - assert.deepEqual(flagBudget.sort(), ['prepare', 'replay', 'snapshot']); + assert.deepEqual(flagBoundBudget.sort(), ['prepare', 'replay', 'snapshot']); + // --timeout bounds the --settle wait on these commands (#1101); like wait's + // positional budget it only ever widens the envelope, never shrinks it. + assert.deepEqual(flagWidenBudget.sort(), ['click', 'fill', 'longpress', 'press']); // wait's budget travels as a positional and must widen the envelope. assert.deepEqual(positionalBudget, ['wait']); }); diff --git a/src/core/command-descriptor/registry.ts b/src/core/command-descriptor/registry.ts index 162cccd7d..0eaa18953 100644 --- a/src/core/command-descriptor/registry.ts +++ b/src/core/command-descriptor/registry.ts @@ -99,6 +99,15 @@ const INSTALL_TIMEOUT_POLICY: CommandTimeoutPolicy = { envelopeMs: INSTALL_REQUEST_TIMEOUT_MS, }; +// press/click/fill/longpress --settle (#1101): --timeout bounds the SETTLE +// wait, not the request — the envelope only ever WIDENS past the budget +// (mirroring wait's positional budget, #1075), never shrinks below the +// default; a slow tap must not die at a user-supplied settle deadline. +const SETTLE_FLAG_TIMEOUT_POLICY: CommandTimeoutPolicy = { + ...DEFAULT_TIMEOUT_POLICY, + budget: { source: 'flag', envelope: 'widen' }, +}; + // --------------------------------------------------------------------------- // The additive single source. Each entry carries the daemon route/traits + // capability + batchable flag copied VERBATIM from today's hand tables. @@ -455,28 +464,28 @@ const RAW_COMMAND_DESCRIPTORS = [ name: PUBLIC_COMMANDS.click, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: SETTLE_FLAG_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.fill, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: SETTLE_FLAG_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.longPress, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: SETTLE_FLAG_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.press, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: SETTLE_FLAG_TIMEOUT_POLICY, batchable: true, }, { diff --git a/src/core/command-descriptor/types.ts b/src/core/command-descriptor/types.ts index f5548f433..730fd89db 100644 --- a/src/core/command-descriptor/types.ts +++ b/src/core/command-descriptor/types.ts @@ -16,8 +16,14 @@ export type DaemonCommandTraits = Omit; * * - `'none'` — the command has no user-supplied budget; the request * envelope is exactly `envelopeMs`. - * - `'flag'` — the `--timeout` flag (`flags.timeoutMs`) overrides the - * envelope when present. + * - `'flag'` — the `--timeout` flag (`flags.timeoutMs`). By default it + * REPLACES the envelope (replay semantics: --timeout + * bounds the request). With `envelope: 'widen'` it only + * ever EXTENDS the envelope to budget + margin, never + * shrinking below `envelopeMs` (interaction --settle + * semantics, #1101: the flag bounds an internal wait + * the request must outlive — mirroring wait's + * positional budget). * - `'positional-parser'`— the budget travels inside the positionals; `parser` * extracts it (or returns null when none was given). * The client widens the envelope to @@ -25,7 +31,7 @@ export type DaemonCommandTraits = Omit; */ export type CommandTimeoutBudget = | { source: 'none' } - | { source: 'flag' } + | { source: 'flag'; envelope?: 'bound' | 'widen' } | { source: 'positional-parser'; parser: (positionals: string[]) => number | null }; /** @@ -82,8 +88,3 @@ export type CommandDescriptor = { mcpExposed: boolean; timeoutPolicy: CommandTimeoutPolicy; }; - -/** Identity helper that pins each entry to the {@link CommandDescriptor} shape. */ -export function defineCommandDescriptor(descriptor: CommandDescriptor): CommandDescriptor { - return descriptor; -} diff --git a/src/daemon/__tests__/response-views.test.ts b/src/daemon/__tests__/response-views.test.ts index cadcc64e5..814140511 100644 --- a/src/daemon/__tests__/response-views.test.ts +++ b/src/daemon/__tests__/response-views.test.ts @@ -252,3 +252,65 @@ test('snapshot digest preserves refsGeneration — the pinning signal for the re const digest = RESPONSE_VIEWS.snapshot!({ ...SNAPSHOT_DATA, refsGeneration: 7 }, 'digest'); expect(digest.refsGeneration).toBe(7); }); + +// --- #1101 --settle: interaction settle digest view --- + +const SETTLE_DATA: DaemonResponseData = { + ref: 'e2', + x: 200, + y: 322, + message: 'Tapped @e2 (200, 322)', + settle: { + settled: true, + waitedMs: 60, + captures: 2, + quietMs: 25, + timeoutMs: 2000, + refsGeneration: 8, + diff: { + summary: { additions: 1, removals: 1, unchanged: 4 }, + lines: [ + { kind: 'removed', text: '@e2 [button] "Continue"' }, + { kind: 'added', text: '@e4 [text] "Welcome!"', ref: 'e4' }, + ], + }, + }, +}; + +test('interaction settle views are registered for all four touch commands', () => { + expect(typeof RESPONSE_VIEWS.press).toBe('function'); + expect(RESPONSE_VIEWS.press).toBe(RESPONSE_VIEWS.click); + expect(RESPONSE_VIEWS.press).toBe(RESPONSE_VIEWS.fill); + expect(RESPONSE_VIEWS.press).toBe(RESPONSE_VIEWS.longpress); +}); + +test('settle digest keeps the verdict, summary, and refsGeneration; drops the line texts', () => { + const digest = RESPONSE_VIEWS.press!(SETTLE_DATA, 'digest'); + expect(digest.settle).toEqual({ + settled: true, + waitedMs: 60, + captures: 2, + quietMs: 25, + timeoutMs: 2000, + refsGeneration: 8, + diff: { summary: { additions: 1, removals: 1, unchanged: 4 } }, + }); + // Every other (cheap) field is preserved verbatim. + expect(digest.ref).toBe('e2'); + expect(digest.message).toBe('Tapped @e2 (200, 322)'); +}); + +test('plain interaction responses pass through UNCHANGED at every level', () => { + const plain: DaemonResponseData = { ref: 'e2', x: 200, y: 322, message: 'Tapped @e2' }; + expect(RESPONSE_VIEWS.press!(plain, 'digest')).toBe(plain); + expect(RESPONSE_VIEWS.press!(plain, 'default')).toBe(plain); + expect(RESPONSE_VIEWS.press!(plain, 'full')).toBe(plain); + // A diff-less settle payload (stalled/unstored observation) is already cheap. + const noDiff: DaemonResponseData = { ref: 'e2', settle: { settled: false, hint: 'stalled' } }; + expect(RESPONSE_VIEWS.press!(noDiff, 'digest')).toBe(noDiff); +}); + +test('settle default and full return today’s shape unchanged (same reference)', () => { + expect(RESPONSE_VIEWS.press!(SETTLE_DATA, 'default')).toBe(SETTLE_DATA); + expect(RESPONSE_VIEWS.press!(SETTLE_DATA, 'full')).toBe(SETTLE_DATA); +}); diff --git a/src/daemon/client/daemon-client.ts b/src/daemon/client/daemon-client.ts index 2124cc1c1..98c6ac588 100644 --- a/src/daemon/client/daemon-client.ts +++ b/src/daemon/client/daemon-client.ts @@ -134,6 +134,13 @@ export function resolveDaemonRequestTimeoutMs( } } if (policy.budget.source === 'flag' && typeof req.flags?.timeoutMs === 'number') { + // 'widen' budgets (interaction --settle, #1101) bound an internal wait the + // request must outlive: extend the envelope past the budget like wait's + // positional budget, never shrink it. Plain 'bound' budgets (replay, + // prepare, snapshot) replace the envelope verbatim. + if (policy.budget.envelope === 'widen') { + return Math.max(policy.envelopeMs, req.flags.timeoutMs + REQUEST_TIMEOUT_BUDGET_MARGIN_MS); + } return req.flags.timeoutMs; } return policy.envelopeMs; diff --git a/src/daemon/handlers/__tests__/interaction-settle.test.ts b/src/daemon/handlers/__tests__/interaction-settle.test.ts new file mode 100644 index 000000000..2dfbee9f7 --- /dev/null +++ b/src/daemon/handlers/__tests__/interaction-settle.test.ts @@ -0,0 +1,298 @@ +import { test, expect, vi, beforeEach } from 'vitest'; +import { handleInteractionCommands } from '../interaction.ts'; +import type { SessionStore } from '../../session-store.ts'; +import type { SessionState } from '../../types.ts'; +import type { CommandFlags } from '../../../core/dispatch.ts'; +import type { SnapshotBackend } from '../../../kernel/snapshot.ts'; +import { buildSnapshotState } from '../snapshot-capture.ts'; +import { setSessionSnapshot } from '../../session-snapshot.ts'; +import { makeSessionStore } from '../../../__tests__/test-utils/store-factory.ts'; +import { makeIosSession } from '../../../__tests__/test-utils/session-factories.ts'; + +// #1101 --settle daemon response shape: the settle payload (diff + settled + +// refsGeneration) rides the wire response through the shared builder, and a +// diff-carrying settle response is ref-issuing (clears snapshotRefsStale). +// Quiet windows are tuned down (--settle-quiet 25) so no test waits real time +// beyond a few poll ticks. + +vi.mock('../../../core/dispatch.ts', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + dispatchCommand: vi.fn(async () => ({})), + }; +}); + +vi.mock('../interaction-snapshot.ts', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + captureSnapshotForSession: vi.fn(async () => ({ + nodes: [], + createdAt: 0, + backend: 'xctest' as const, + })), + }; +}); + +import { dispatchCommand } from '../../../core/dispatch.ts'; +import { captureSnapshotForSession } from '../interaction-snapshot.ts'; +const mockDispatch = vi.mocked(dispatchCommand); +const mockCaptureSnapshotForSession = vi.mocked(captureSnapshotForSession); + +const BEFORE_NODES = [ + { index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } }, + { + index: 1, + parentIndex: 0, + type: 'Button', + label: 'Continue', + rect: { x: 10, y: 20, width: 120, height: 44 }, + hittable: true, + }, +]; + +const AFTER_NODES = [ + { index: 0, type: 'Application', rect: { x: 0, y: 0, width: 390, height: 844 } }, + { + index: 1, + parentIndex: 0, + type: 'StaticText', + label: 'Welcome!', + rect: { x: 10, y: 20, width: 120, height: 44 }, + hittable: true, + }, +]; + +async function emulateCaptureSnapshotForSession( + session: SessionState, + flags: CommandFlags | undefined, + sessionStore: SessionStore, + contextFromFlags: ( + flags: CommandFlags | undefined, + appBundleId?: string, + traceLogPath?: string, + ) => Record, + options: { interactiveOnly: boolean }, +) { + const effectiveFlags = { ...(flags ?? {}), snapshotInteractiveOnly: options.interactiveOnly }; + const snapshotData = (await mockDispatch( + session.device, + 'snapshot', + [], + effectiveFlags.out, + contextFromFlags(effectiveFlags, session.appBundleId, session.trace?.outPath), + )) as { nodes?: never[]; truncated?: boolean; backend?: SnapshotBackend }; + const snapshot = buildSnapshotState(snapshotData ?? {}, effectiveFlags); + setSessionSnapshot(session, snapshot); + sessionStore.set(session.name, session); + return snapshot; +} + +function seedSession(sessionName: string, sessionStore: ReturnType) { + const session = makeIosSession(sessionName); + setSessionSnapshot(session, buildSnapshotState({ nodes: BEFORE_NODES, backend: 'xctest' }, {})); + // The seed emulates a snapshot response that issued these refs. + session.snapshotRefsStale = false; + sessionStore.set(sessionName, session); + return session; +} + +function mockCommandDispatch(params: { snapshots: Array }) { + let snapshotCalls = 0; + mockDispatch.mockImplementation(async (_device, command) => { + if (command === 'snapshot') { + const nodes = params.snapshots[Math.min(snapshotCalls, params.snapshots.length - 1)]; + snapshotCalls += 1; + return { nodes, backend: 'xctest' }; + } + return {}; + }); +} + +const contextFromFlags = () => ({}); + +beforeEach(() => { + mockDispatch.mockReset(); + mockDispatch.mockResolvedValue({}); + mockCaptureSnapshotForSession.mockReset(); + mockCaptureSnapshotForSession.mockImplementation(emulateCaptureSnapshotForSession); +}); + +const SETTLE_FLAGS = { settle: true, settleQuietMs: 25, timeoutMs: 2_000 }; + +type SettlePayload = { + settled: boolean; + captures: number; + quietMs: number; + timeoutMs: number; + refsGeneration?: number; + diff?: { + summary: { additions: number; removals: number; unchanged: number }; + lines: Array<{ kind: string; text: string; ref?: string }>; + }; + hint?: string; +}; + +function expectOkData( + response: Awaited>, +): Record { + expect(response?.ok).toBe(true); + if (!response || response.ok !== true) throw new Error('expected an ok daemon response'); + return (response.data ?? {}) as Record; +} + +test('press --settle responds with the settled diff, refsGeneration, and clears the stale marker', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'settle-press'; + seedSession(sessionName, sessionStore); + // Resolution capture sees the pre-action tree; settle captures see the + // settled post-action tree. + mockCommandDispatch({ snapshots: [BEFORE_NODES, AFTER_NODES, AFTER_NODES, AFTER_NODES] }); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['label=Continue'], + flags: { ...SETTLE_FLAGS }, + }, + sessionName, + sessionStore, + contextFromFlags, + }); + + const data = expectOkData(response); + const settle = data.settle as SettlePayload; + expect(settle).toBeTruthy(); + expect(settle.settled).toBe(true); + expect(settle.quietMs).toBe(25); + expect(settle.timeoutMs).toBe(2_000); + const diff = settle.diff as NonNullable; + expect(diff.summary).toEqual({ additions: 1, removals: 1, unchanged: 1 }); + const added = diff.lines.find((line) => line.kind === 'added'); + expect(added).toEqual({ kind: 'added', text: expect.stringContaining('Welcome!'), ref: 'e2' }); + + const session = sessionStore.get(sessionName) as SessionState; + // The settle response handed the settled tree's refs to the client: the + // coarse marker clears and the payload carries the stored generation. + expect(session.snapshotRefsStale).toBe(false); + expect(settle.refsGeneration).toBe(session.snapshotGeneration); + // The settled tree became the stored session snapshot. + expect(session.snapshot?.nodes.some((node) => node.label === 'Welcome!')).toBe(true); +}); + +test('press --settle keeps the stale-refs input warning while re-issuing fresh refs', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'settle-stale-ref'; + const session = seedSession(sessionName, sessionStore); + session.snapshotRefsStale = true; + sessionStore.set(sessionName, session); + mockCommandDispatch({ snapshots: [AFTER_NODES, AFTER_NODES] }); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['@e2'], + flags: { ...SETTLE_FLAGS }, + }, + sessionName, + sessionStore, + contextFromFlags, + }); + + const data = expectOkData(response); + // The @ref was consumed while stale — the input warning stands… + expect(String(data.warning)).toMatch(/refs were issued/); + // …and the settled diff re-issues refs, so the NEXT @ref command is clean. + expect(sessionStore.get(sessionName)?.snapshotRefsStale).toBe(false); +}); + +test('a settle observation without a diff leaves ref staleness untouched', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'settle-stalled'; + seedSession(sessionName, sessionStore); + let snapshotCalls = 0; + mockDispatch.mockImplementation(async (_device, command) => { + if (command === 'snapshot') { + snapshotCalls += 1; + if (snapshotCalls === 1) return { nodes: BEFORE_NODES, backend: 'xctest' }; + throw new Error('AX bridge crashed'); + } + return {}; + }); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['label=Continue'], + flags: { ...SETTLE_FLAGS }, + }, + sessionName, + sessionStore, + contextFromFlags, + }); + + // The press still succeeds; the observation reports its own failure. + const data = expectOkData(response); + const settle = data.settle as SettlePayload; + expect(settle.settled).toBe(false); + expect(settle.diff).toBeUndefined(); + expect(settle.hint).toMatch(/Settle observation unavailable/); + // No refs were issued: the resolution capture left the marker stale. + expect(sessionStore.get(sessionName)?.snapshotRefsStale).toBe(true); +}); + +test('settle tuning flags without --settle are rejected', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'settle-guard'; + seedSession(sessionName, sessionStore); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['label=Continue'], + flags: { settleQuietMs: 25, timeoutMs: 2_000 }, + }, + sessionName, + sessionStore, + contextFromFlags, + }); + + expect(response?.ok).toBe(false); + if (response?.ok !== false) return; + expect(response.error?.code).toBe('INVALID_ARGS'); + expect(response.error?.message).toMatch(/--settle-quiet, --timeout require --settle/); +}); + +test('fill @ref --settle carries the settle payload on the ref wire shape', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'settle-fill'; + seedSession(sessionName, sessionStore); + mockCommandDispatch({ snapshots: [AFTER_NODES, AFTER_NODES] }); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'fill', + positionals: ['@e2', 'hello'], + flags: { ...SETTLE_FLAGS }, + }, + sessionName, + sessionStore, + contextFromFlags, + }); + + const data = expectOkData(response); + const settle = data.settle as SettlePayload; + expect(settle.settled).toBe(true); + expect(typeof settle.refsGeneration).toBe('number'); +}); diff --git a/src/daemon/handlers/interaction-flags.ts b/src/daemon/handlers/interaction-flags.ts index 5a8d840e1..b9e6eb3bf 100644 --- a/src/daemon/handlers/interaction-flags.ts +++ b/src/daemon/handlers/interaction-flags.ts @@ -1,4 +1,5 @@ import type { CommandFlags } from '../../core/dispatch.ts'; +import type { SettleParams } from '../../contracts/interaction.ts'; import type { DaemonResponse } from '../types.ts'; import { errorResponse } from './response.ts'; @@ -30,3 +31,36 @@ export function unsupportedRefSnapshotFlags(flags: CommandFlags | undefined): st } return unsupported; } + +/** + * `--settle` (#1101) flag grammar on press/click/fill/longpress: + * `--settle` opts in, `--settle-quiet ` overrides the quiet window, and + * `--timeout ` bounds the settle wait (the same budget the descriptor's + * flag-sourced timeout policy widens the request envelope past, mirroring + * wait's positional budget). The two tuning flags without `--settle` would + * silently do nothing — reject them so a typo cannot masquerade as a settled + * interaction. + */ +export function settleFlagGuardResponse( + command: 'press' | 'click' | 'fill' | 'longpress', + flags: CommandFlags | undefined, +): DaemonResponse | null { + if (!flags || flags.settle === true) return null; + const orphaned: string[] = []; + if (flags.settleQuietMs !== undefined) orphaned.push('--settle-quiet'); + if (flags.timeoutMs !== undefined) orphaned.push('--timeout'); + if (orphaned.length === 0) return null; + return errorResponse( + 'INVALID_ARGS', + `${command}: ${orphaned.join(', ')} require${orphaned.length === 1 ? 's' : ''} --settle.`, + ); +} + +/** The runtime settle request for a command's flags, or undefined without --settle. */ +export function readSettleRequest(flags: CommandFlags | undefined): SettleParams | undefined { + if (flags?.settle !== true) return undefined; + return { + ...(flags.settleQuietMs !== undefined ? { quietMs: flags.settleQuietMs } : {}), + ...(flags.timeoutMs !== undefined ? { timeoutMs: flags.timeoutMs } : {}), + }; +} diff --git a/src/daemon/handlers/interaction-touch-response.ts b/src/daemon/handlers/interaction-touch-response.ts index e6290b256..e86d6232f 100644 --- a/src/daemon/handlers/interaction-touch-response.ts +++ b/src/daemon/handlers/interaction-touch-response.ts @@ -3,6 +3,7 @@ import type { FillCommandResult, LongPressCommandResult, PressCommandResult, + SettleObservation, } from '../../contracts/interaction.ts'; import { successText } from '../../utils/success-text.ts'; import { interactionResultExtra, stripAtPrefix } from './interaction-touch-targets.ts'; @@ -64,6 +65,15 @@ export function buildInteractionResponseData(params: { * warning. */ staleRefsWarning?: string; + /** + * `--settle` (#1101): the session's `snapshotGeneration` AFTER the settled + * tree became the stored snapshot. Rides INSIDE the settle payload as + * `settle.refsGeneration` — a settle response with a diff hands the client + * fresh refs (added lines carry them), making it ref-issuing like snapshot/ + * find; the generation is what MCP auto-pinning merges per-ref (#1076). + * Only attached when the settle observation actually carries a diff. + */ + settleRefsGeneration?: number; }): InteractionResponsePayloads { const { source, referenceFrame, extra } = params; if (source.kind === 'runner-payload') { @@ -85,6 +95,7 @@ export function buildInteractionResponseData(params: { referenceFrame, extra: { ...interactionResultExtra(result), + ...settleExtra(result.settle, params.settleRefsGeneration), ...(extra ?? {}), }, }); @@ -96,6 +107,7 @@ export function buildInteractionResponseData(params: { ...(result.point ? { x: result.point.x, y: result.point.y } : {}), }), ...interactionResultExtra(result), + ...settleExtra(result.settle, params.settleRefsGeneration), } : visualization; const warning = composeResponseWarning( @@ -109,6 +121,17 @@ export function buildInteractionResponseData(params: { return { result: visualization, responseData }; } +// Attaches refsGeneration inside the settle payload when the response is +// ref-issuing (diff present). Overrides the raw `settle` from +// interactionResultExtra by key order in the extras spread. +function settleExtra( + settle: SettleObservation | undefined, + refsGeneration: number | undefined, +): Record { + if (!settle?.diff || refsGeneration === undefined) return {}; + return { settle: { ...settle, refsGeneration } }; +} + function composeResponseWarning( resultWarning: string | undefined, staleRefsWarning: string | undefined, diff --git a/src/daemon/handlers/interaction-touch-targets.ts b/src/daemon/handlers/interaction-touch-targets.ts index 76eac41d4..b83917ce0 100644 --- a/src/daemon/handlers/interaction-touch-targets.ts +++ b/src/daemon/handlers/interaction-touch-targets.ts @@ -175,8 +175,10 @@ export function interactionResultExtra( ): Record { // `evidence` (#1047, opt-in via --verify) is additive on press/fill only — // LongPressCommandResult has no evidence field, so it reads as undefined - // (and gets dropped by the response layer) for longpress. + // (and gets dropped by the response layer) for longpress. `settle` (#1101, + // opt-in via --settle) is additive on all four touch commands. const evidence = 'evidence' in result ? result.evidence : undefined; + const settle = result.settle; if (result.kind === 'ref') { return { ref: stripAtPrefix(result.target?.kind === 'ref' ? result.target.ref : undefined), @@ -185,6 +187,7 @@ export function interactionResultExtra( targetHittable: result.targetHittable, hint: result.hint, evidence, + settle, }; } if (result.kind === 'selector') { @@ -195,9 +198,10 @@ export function interactionResultExtra( targetHittable: result.targetHittable, hint: result.hint, evidence, + settle, }; } - return { evidence }; + return { evidence, settle }; } export function formatTouchTargetLabel( diff --git a/src/daemon/handlers/interaction-touch.ts b/src/daemon/handlers/interaction-touch.ts index 8973aeadc..6bbd3b32b 100644 --- a/src/daemon/handlers/interaction-touch.ts +++ b/src/daemon/handlers/interaction-touch.ts @@ -14,13 +14,17 @@ import type { import { asAppError, normalizeError } from '../../kernel/errors.ts'; import type { DaemonResponse, SessionState } from '../types.ts'; import { finalizeTouchInteraction, type InteractionHandlerParams } from './interaction-common.ts'; -import { resolveRefStalenessWarning } from '../session-snapshot.ts'; +import { markSessionSnapshotRefsIssued, resolveRefStalenessWarning } from '../session-snapshot.ts'; import { buildInteractionResponseData, type InteractionResponsePayloads, } from './interaction-touch-response.ts'; import type { CaptureSnapshotForSession } from './interaction-snapshot.ts'; -import type { RefSnapshotFlagGuardResponse } from './interaction-flags.ts'; +import { + readSettleRequest, + settleFlagGuardResponse, + type RefSnapshotFlagGuardResponse, +} from './interaction-flags.ts'; import { readSnapshotNodesReferenceFrame, resolveDirectTouchReferenceFrameSafely, @@ -93,6 +97,8 @@ async function dispatchTargetedTouchViaRuntime( if (unsupportedSurfaceResponse) return unsupportedSurfaceResponse; const unsupported = requireCommandSupported(capabilityCommand, session.device); if (unsupported) return unsupported; + const invalidSettleFlags = settleFlagGuardResponse(command, req.flags); + if (invalidSettleFlags) return invalidSettleFlags; const clickButton = resolveClickButton(req.flags); const resultButtonTag = buttonTag(clickButton); @@ -204,11 +210,13 @@ async function runTargetedTouchInteraction(params: { durationMs?: number; }): Promise { const { runtime, command, target, sessionName, requestId, flags } = params; + const settle = readSettleRequest(flags); if (command === 'longpress') { return await runtime.interactions.longPress(target, { session: sessionName, requestId, durationMs: params.durationMs, + settle, }); } @@ -222,6 +230,7 @@ async function runTargetedTouchInteraction(params: { jitterPx: flags?.jitterPx, doubleTap: flags?.doubleTap, verify: flags?.verify, + settle, }; return command === 'click' ? await runtime.interactions.click(target, options) @@ -253,9 +262,29 @@ async function buildTargetedTouchResponsePayloads(params: { referenceFrame, extra, staleRefsWarning: params.staleRefsWarning, + settleRefsGeneration: settleRefsGenerationIssue(session, result), }); } +/** + * #1101 `--settle`: a settle observation carrying a diff hands the client refs + * minted from the freshly stored settled tree (added lines carry them), which + * makes the response ref-issuing like snapshot/find (#1076): the coarse + * `snapshotRefsStale` marker clears (the same accepted coarse blessing as + * find's single re-issued ref) and the stored tree's generation rides inside + * the settle payload for MCP per-ref pinning. Without a diff — never captured, + * or sparse-quality capture that was not stored — nothing was issued and the + * staleness machinery is left untouched. + */ +function settleRefsGenerationIssue( + session: SessionState, + result: PressCommandResult | FillCommandResult | LongPressCommandResult, +): number | undefined { + if (!result.settle?.diff) return undefined; + markSessionSnapshotRefsIssued(session); + return session.snapshotGeneration; +} + function readLongPressResultDuration(result: TargetedTouchResult): number | undefined { return 'durationMs' in result ? result.durationMs : undefined; } @@ -271,6 +300,8 @@ function readDirectIosSelectorTapTarget(params: { if (target.kind !== 'selector') return null; if (hasNonDefaultClickOptions(flags)) return null; if (flags?.verify === true) return null; + // --settle needs the tree-based runtime path (baseline + settle captures). + if (flags?.settle === true) return null; return readDirectSelectorWithMaestroFallback(session, target.selector, flags); } @@ -438,37 +469,26 @@ async function dispatchFillViaRuntime( if (unsupported) return unsupported; } if (!session) return noActiveSessionError(); + const invalidSettleFlags = settleFlagGuardResponse('fill', req.flags); + if (invalidSettleFlags) return invalidSettleFlags; const parsedTarget = parseFillTarget(req.positionals ?? []); if (!parsedTarget.ok) return parsedTarget.response; - // Read before the Android freshness refresh recaptures — see the press path. - const staleRefsWarning = - parsedTarget.target.kind === 'ref' - ? resolveRefStalenessWarning({ - session, - ref: parsedTarget.target.ref, - mintedGeneration: parsedTarget.refGeneration, - }) - : undefined; - if (parsedTarget.target.kind === 'ref') { - const invalidRefFlagsResponse = params.refSnapshotFlagGuardResponse('fill', req.flags); - if (invalidRefFlagsResponse) return invalidRefFlagsResponse; - await refreshAndroidRefSnapshotIfFreshnessActive(params, session); - } - const directSelector = readDirectIosSelectorFillTarget({ + const refPreamble = await prepareFillRefTarget( + params, session, - target: parsedTarget.target, - flags: req.flags, - }); - if (directSelector) { - const directResponse = await dispatchDirectIosSelectorFill( - params, - session, - directSelector, - parsedTarget.text, - ); - if (directResponse) return directResponse; - } + parsedTarget.target, + parsedTarget.refGeneration, + ); + if (refPreamble.response) return refPreamble.response; + const { staleRefsWarning } = refPreamble; + const directResponse = await maybeDispatchDirectIosSelectorFill( + params, + session, + parsedTarget.target, + parsedTarget.text, + ); + if (directResponse) return directResponse; return await dispatchRuntimeInteraction(params, { run: async (runtime) => @@ -477,23 +497,73 @@ async function dispatchFillViaRuntime( requestId: req.meta?.requestId, delayMs: req.flags?.delayMs, verify: req.flags?.verify, + settle: readSettleRequest(req.flags), }), - buildPayloads: (result) => { - const referenceFrame = - result.kind === 'point' - ? undefined - : readSnapshotNodesReferenceFrame(session.snapshot?.nodes ?? []); - return buildInteractionResponseData({ - // refBackendWireShape keeps the historical fill @ref wire response - // (backendResult + identity extras) while the shared builder owns the - // extras — the hand-rolled version of this branch dropped evidence - // (PR #1064 review). - source: { kind: 'runtime', result, refBackendWireShape: true }, - referenceFrame, - extra: { text: parsedTarget.text }, - staleRefsWarning, - }); - }, + buildPayloads: (result) => + buildFillResponsePayloads({ session, result, text: parsedTarget.text, staleRefsWarning }), + }); +} + +// The fill @ref preamble shared with the press path's shape: read staleness +// relative to what the client knew BEFORE any internal recapture, validate +// @ref-incompatible flags, and run the Android freshness refresh. +async function prepareFillRefTarget( + params: InteractionHandlerParams & { + captureSnapshotForSession: CaptureSnapshotForSession; + refSnapshotFlagGuardResponse: RefSnapshotFlagGuardResponse; + }, + session: SessionState, + target: InteractionTarget, + refGeneration: number | undefined, +): Promise<{ response?: DaemonResponse; staleRefsWarning?: string }> { + if (target.kind !== 'ref') return {}; + const staleRefsWarning = resolveRefStalenessWarning({ + session, + ref: target.ref, + mintedGeneration: refGeneration, + }); + const invalidRefFlagsResponse = params.refSnapshotFlagGuardResponse('fill', params.req.flags); + if (invalidRefFlagsResponse) return { response: invalidRefFlagsResponse, staleRefsWarning }; + await refreshAndroidRefSnapshotIfFreshnessActive(params, session); + return { staleRefsWarning }; +} + +async function maybeDispatchDirectIosSelectorFill( + params: InteractionHandlerParams & { captureSnapshotForSession: CaptureSnapshotForSession }, + session: SessionState, + target: InteractionTarget, + text: string, +): Promise { + const directSelector = readDirectIosSelectorFillTarget({ + session, + target, + flags: params.req.flags, + }); + if (!directSelector) return null; + return await dispatchDirectIosSelectorFill(params, session, directSelector, text); +} + +function buildFillResponsePayloads(params: { + session: SessionState; + result: FillCommandResult; + text: string; + staleRefsWarning: string | undefined; +}): InteractionResponsePayloads { + const { session, result } = params; + const referenceFrame = + result.kind === 'point' + ? undefined + : readSnapshotNodesReferenceFrame(session.snapshot?.nodes ?? []); + return buildInteractionResponseData({ + // refBackendWireShape keeps the historical fill @ref wire response + // (backendResult + identity extras) while the shared builder owns the + // extras — the hand-rolled version of this branch dropped evidence + // (PR #1064 review). + source: { kind: 'runtime', result, refBackendWireShape: true }, + referenceFrame, + extra: { text: params.text }, + staleRefsWarning: params.staleRefsWarning, + settleRefsGeneration: settleRefsGenerationIssue(session, result), }); } @@ -505,6 +575,8 @@ function readDirectIosSelectorFillTarget(params: { const { session, target, flags } = params; if (target.kind !== 'selector') return null; if (flags?.verify === true) return null; + // --settle needs the tree-based runtime path (baseline + settle captures). + if (flags?.settle === true) return null; return readDirectSelectorWithMaestroFallback(session, target.selector, flags); } diff --git a/src/daemon/response-views.ts b/src/daemon/response-views.ts index 8b6660ecf..70b911adf 100644 --- a/src/daemon/response-views.ts +++ b/src/daemon/response-views.ts @@ -123,9 +123,33 @@ function selectorReadView(data: DaemonResponseData, level: ResponseLevel): Daemo return { ...data, node: compactSelectorNode(node as SnapshotNode) }; } +/** + * Token-cheap settle digest for interaction commands (#1101). CONSERVATIVE: + * only acts on a result that carries a `settle.diff` payload (the `--settle` + * opt-in) and otherwise returns the data UNCHANGED, so plain interaction + * responses stay byte-identical at every level. The digest keeps the verdict + * fields and the changed-line COUNTS (`diff.summary`) plus `refsGeneration`, + * and drops the diff line texts — the changed-count summary is the digest + * answer; the lines are the default-level payload. `full` returns today's + * shape unchanged (nothing richer is computed yet). + */ +function interactionSettleView(data: DaemonResponseData, level: ResponseLevel): DaemonResponseData { + if (level !== 'digest') return data; + const settle = data.settle; + if (!settle || typeof settle !== 'object' || Array.isArray(settle)) return data; + const { diff, ...rest } = settle as Record; + if (!diff || typeof diff !== 'object' || Array.isArray(diff)) return data; + const summary = (diff as Record).summary; + return { ...data, settle: { ...rest, diff: { summary } } }; +} + export const RESPONSE_VIEWS: Record = { snapshot: snapshotView, screenshot: screenshotView, find: selectorReadView, get: selectorReadView, + press: interactionSettleView, + click: interactionSettleView, + fill: interactionSettleView, + longpress: interactionSettleView, }; diff --git a/src/daemon/session-snapshot.ts b/src/daemon/session-snapshot.ts index 25ad0b86d..ab6f37c67 100644 --- a/src/daemon/session-snapshot.ts +++ b/src/daemon/session-snapshot.ts @@ -32,9 +32,13 @@ export const STALE_SNAPSHOT_REFS_WARNING = * issuing the full ref set and stays conservative-stale) * * Cleared (set false) only where the client demonstrably receives the new - * refs: the snapshot command response (buildNextSnapshotSession) and find + * refs: the snapshot command response (buildNextSnapshotSession), find * responses that return a ref minted from the freshly stored tree - * (handlers/find.ts, dispatchFindReadOnlyViaRuntime in selector-runtime.ts). + * (handlers/find.ts, dispatchFindReadOnlyViaRuntime in selector-runtime.ts), + * and interaction --settle responses whose settled diff carries refs minted + * from the freshly stored settled tree (settleRefsGenerationIssue in + * handlers/interaction-touch.ts — the same accepted coarse blessing as find's + * single re-issued ref; per-ref precision is the MCP pin layer's job). */ export function setSessionSnapshot(session: SessionState, snapshot: SnapshotState): void { if (session.snapshot !== snapshot) { diff --git a/src/mcp/__tests__/command-tools.test.ts b/src/mcp/__tests__/command-tools.test.ts index f9489f381..d24edeca1 100644 --- a/src/mcp/__tests__/command-tools.test.ts +++ b/src/mcp/__tests__/command-tools.test.ts @@ -416,6 +416,87 @@ test('MCP merges digest-level snapshot refs too', async () => { assert.deepEqual(runCalls[1]?.input, { target: { kind: 'ref', ref: '@e9~s41' } }); }); +// --- #1101 --settle: interaction responses re-pin from the settled diff --- + +test('MCP merges per-ref pins from a settle response diff (merge-only)', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createCommandToolExecutor({ + createClient: () => ({}) as AgentDeviceClient, + runCommand: async (_client, name, input) => { + runCalls.push({ name, input }); + if (name === 'snapshot') { + return { nodes: [{ ref: 'e2' }, { ref: 'e37' }], truncated: false, refsGeneration: 7 }; + } + if (name === 'press') { + // press @e2 --settle: the settled diff issues e4 at generation 8. + return { + ref: 'e2', + settle: { + settled: true, + waitedMs: 60, + captures: 2, + quietMs: 25, + timeoutMs: 2000, + refsGeneration: 8, + diff: { + summary: { additions: 1, removals: 1, unchanged: 1 }, + lines: [ + { kind: 'removed', text: '@e2 [button] "Continue"' }, + { kind: 'added', text: '@e4 [text] "Welcome!"', ref: 'e4' }, + ], + }, + }, + }; + } + return {}; + }, + }); + + await executor.execute('snapshot', { session: 'demo' }); + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e2' } }); + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e4' } }); + await executor.execute('get', { + session: 'demo', + format: 'text', + target: { kind: 'ref', ref: '@e37' }, + }); + + // The first press consumed the snapshot pin… + assert.deepEqual(runCalls[1]?.input, { session: 'demo', target: { kind: 'ref', ref: '@e2~s7' } }); + // …its settle diff issued e4 at the settle generation… + assert.deepEqual(runCalls[2]?.input, { session: 'demo', target: { kind: 'ref', ref: '@e4~s8' } }); + // …and refs absent from the diff keep their older pins (merge-only), so the + // daemon can warn precisely about the replaced tree. + assert.deepEqual(runCalls[3]?.input, { + session: 'demo', + format: 'text', + target: { kind: 'ref', ref: '@e37~s7' }, + }); +}); + +test('MCP leaves pins untouched for plain (non-settle) interaction responses', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createCommandToolExecutor({ + createClient: () => ({}) as AgentDeviceClient, + runCommand: async (_client, name, input) => { + runCalls.push({ name, input }); + if (name === 'snapshot') { + return { nodes: [{ ref: 'e2' }], truncated: false, refsGeneration: 7 }; + } + // A plain press response has no settle payload and no refsGeneration — + // it must NOT clear the scope (only snapshot/find responses without a + // generation do that). + return { ref: 'e2', x: 10, y: 20 }; + }, + }); + + await executor.execute('snapshot', { session: 'demo' }); + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e2' } }); + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e2' } }); + + assert.deepEqual(runCalls[2]?.input, { session: 'demo', target: { kind: 'ref', ref: '@e2~s7' } }); +}); + test('MCP passes never-issued refs through unpinned (coarse floor, never guess)', async () => { const runCalls: Array<{ name: string; input: unknown }> = []; const executor = createPinningExecutor(runCalls); diff --git a/src/mcp/command-output-schemas.ts b/src/mcp/command-output-schemas.ts index 0df701371..000165eb7 100644 --- a/src/mcp/command-output-schemas.ts +++ b/src/mcp/command-output-schemas.ts @@ -180,6 +180,51 @@ const interactionEvidenceSchema: JsonSchema = objectSchema( ['nodeCount', 'interactiveNodeCount', 'digest', 'changedFromBefore'], ); +// SettleObservation (src/contracts/interaction.ts) — opt-in `--settle` settled +// diff observation (#1101). +const settleObservationSchema: JsonSchema = objectSchema( + { + settled: booleanSchema( + 'Whether the UI held the quiet window before the deadline. false is advisory, not failure.', + ), + waitedMs: numberSchema(), + captures: numberSchema(), + quietMs: numberSchema(), + timeoutMs: numberSchema(), + refsGeneration: numberSchema( + 'Snapshot generation of the stored settled tree; refs on added diff lines were minted from it.', + ), + diff: objectSchema( + { + summary: objectSchema( + { + additions: numberSchema(), + removals: numberSchema(), + unchanged: numberSchema(), + }, + ['additions', 'removals', 'unchanged'], + ), + lines: { + type: 'array', + items: objectSchema( + { + kind: enumSchema(['added', 'removed']), + text: stringSchema(), + ref: stringSchema('Plain ref body (e12) for added lines.'), + }, + ['kind', 'text'], + ), + }, + truncated: booleanSchema('Lines were capped to the response bound.'), + }, + ['summary', 'lines'], + 'Settled diff vs the pre-action tree (changed lines only).', + ), + hint: stringSchema(), + }, + ['settled', 'waitedMs', 'captures', 'quietMs', 'timeoutMs'], +); + // boot / shutdown share the resolved-device header (src/contracts/device.ts). const deviceHeaderProperties: Record = { platform: enumSchema(PLATFORMS), @@ -210,6 +255,7 @@ export const COMMAND_OUTPUT_SCHEMAS = { message: stringSchema(), warning: stringSchema(), evidence: interactionEvidenceSchema, + settle: settleObservationSchema, }, }), fill: interactionResultSchema({ @@ -219,6 +265,7 @@ export const COMMAND_OUTPUT_SCHEMAS = { backendResult: backendResultSchema, message: stringSchema(), evidence: interactionEvidenceSchema, + settle: settleObservationSchema, }, required: ['text'], }), @@ -228,6 +275,7 @@ export const COMMAND_OUTPUT_SCHEMAS = { backendResult: backendResultSchema, message: stringSchema(), warning: stringSchema(), + settle: settleObservationSchema, }, }), diff --git a/src/mcp/command-tools.ts b/src/mcp/command-tools.ts index 5e493a323..56bb87268 100644 --- a/src/mcp/command-tools.ts +++ b/src/mcp/command-tools.ts @@ -113,6 +113,22 @@ export function createCommandToolExecutor(deps: CommandToolExecutorDeps = {}): C */ const REF_ISSUING_TOOLS: ReadonlySet = new Set(['snapshot', 'find'] as CommandName[]); +/** + * `--settle` (#1101) makes an interaction response CONDITIONALLY ref-issuing: + * when it carries `settle.diff` + `settle.refsGeneration`, the diff's added + * lines hand out refs minted from the freshly stored settled tree. These tools + * are NOT in REF_ISSUING_TOOLS on purpose — a plain (non-settle) press carries + * no generation, and treating that as "issuing response without a generation" + * would clear the scope's pins on every ordinary tap. Absent or diff-less + * settle payloads leave pins untouched. + */ +const SETTLE_REF_ISSUING_TOOLS: ReadonlySet = new Set([ + 'press', + 'click', + 'fill', + 'longpress', +] as CommandName[]); + const TARGET_REF_TOOLS: ReadonlySet = new Set([ 'press', 'click', @@ -155,6 +171,10 @@ function mergeIssuedRefPins( name: CommandName, result: unknown, ): void { + if (SETTLE_REF_ISSUING_TOOLS.has(name)) { + mergeSettleIssuedRefPins(refPinsByScope, scopeKey, result); + return; + } if (!REF_ISSUING_TOOLS.has(name)) return; const record = asOptionalRecord(result); const refsGeneration = record?.refsGeneration; @@ -169,6 +189,30 @@ function mergeIssuedRefPins( recordIssuedPins(pins, issuedRefs, refsGeneration); } +/** + * MERGE-ONLY, like the snapshot/find rule: refs on the settled diff's added + * lines move to the settle generation; every other pin stays put (the settle + * capture replaced the tree, so an old pin on an unchanged-looking element is + * exactly what makes the daemon warn precisely). No settle payload, no diff, + * or no generation → not an issuing response; pins are left untouched. + */ +function mergeSettleIssuedRefPins( + refPinsByScope: Map>, + scopeKey: string, + result: unknown, +): void { + const settle = asOptionalRecord(asOptionalRecord(result)?.settle); + const refsGeneration = settle?.refsGeneration; + if (typeof refsGeneration !== 'number') return; + const lines = asOptionalRecord(settle?.diff)?.lines; + const issuedRefs: string[] = []; + collectRefBodies(lines, issuedRefs); + if (issuedRefs.length === 0) return; + const pins = refPinsByScope.get(scopeKey) ?? new Map(); + refPinsByScope.set(scopeKey, pins); + recordIssuedPins(pins, issuedRefs, refsGeneration); +} + function recordIssuedPins( pins: Map, issuedRefs: string[], diff --git a/src/snapshot/snapshot-diff.ts b/src/snapshot/snapshot-diff.ts index 8febe3d87..991e106f0 100644 --- a/src/snapshot/snapshot-diff.ts +++ b/src/snapshot/snapshot-diff.ts @@ -9,6 +9,13 @@ import { export type SnapshotDiffLine = { kind: 'added' | 'removed' | 'unchanged'; text: string; + /** + * Plain ref body (`e12`) of the CURRENT-tree node behind an added line. + * Only populated with `withRefs` (interaction `--settle`, #1101) so the + * `diff` command's wire shape stays byte-identical. Removed/unchanged lines + * never carry it: a removed line's ref names a node of the replaced tree. + */ + ref?: string; }; export type SnapshotDiffSummary = { @@ -24,11 +31,14 @@ export type SnapshotDiffResult = { type SnapshotDiffOptions = { flatten?: boolean; + /** Attach the current-tree node ref to added lines (see SnapshotDiffLine.ref). */ + withRefs?: boolean; }; type SnapshotComparableLine = { text: string; comparable: string; + ref?: string; }; function snapshotNodeToComparableLine(node: SnapshotNode, depthOverride?: number): string { @@ -73,11 +83,13 @@ function snapshotNodesToLines( return nodes.map((node) => ({ text: formatSnapshotLine(node, 0, false), comparable: snapshotNodeToComparableLine(node, 0), + ...(options.withRefs && node.ref ? { ref: node.ref } : {}), })); } return buildSnapshotDisplayLines(nodes).map((line) => ({ text: line.text, comparable: snapshotNodeToComparableLine(line.node, line.depth), + ...(options.withRefs && line.node.ref ? { ref: line.node.ref } : {}), })); } @@ -96,7 +108,7 @@ function diffComparableLinesMyers( for (let d = 0; d <= max; d += 1) { trace.push(new Map(v)); for (let k = -d; k <= d; k += 2) { - const goDown = k === -d || (k !== d && getV(v, k - 1) < getV(v, k + 1)); + const goDown = shouldGoDown(v, k, d); let x = goDown ? getV(v, k + 1) : getV(v, k - 1) + 1; let y = x - k; while (x < n && y < m && previous[x]!.comparable === current[y]!.comparable) { @@ -127,7 +139,7 @@ function backtrackMyers( for (let d = trace.length - 1; d >= 0; d -= 1) { const v = trace[d]!; const k = x - y; - const goDown = k === -d || (k !== d && getV(v, k - 1) < getV(v, k + 1)); + const goDown = shouldGoDown(v, k, d); const prevK = goDown ? k + 1 : k - 1; const prevX = getV(v, prevK); const prevY = prevX - prevK; @@ -141,7 +153,8 @@ function backtrackMyers( if (d === 0) break; if (x === prevX) { - lines.push({ kind: 'added', text: current[prevY]!.text }); + const added = current[prevY]!; + lines.push({ kind: 'added', text: added.text, ...(added.ref ? { ref: added.ref } : {}) }); y = prevY; } else { lines.push({ kind: 'removed', text: previous[prevX]!.text }); @@ -153,6 +166,10 @@ function backtrackMyers( return lines; } +function shouldGoDown(v: Map, k: number, d: number): boolean { + return k === -d || (k !== d && getV(v, k - 1) < getV(v, k + 1)); +} + function getV(v: Map, k: number): number { return v.get(k) ?? 0; } diff --git a/src/utils/__tests__/daemon-client.test.ts b/src/utils/__tests__/daemon-client.test.ts index 0e94dd776..01b69a2bb 100644 --- a/src/utils/__tests__/daemon-client.test.ts +++ b/src/utils/__tests__/daemon-client.test.ts @@ -262,6 +262,35 @@ test('wait request timeout extends past the user-supplied wait budget', () => { assert.equal(resolveDaemonRequestTimeoutMs({ ...base, command: 'wait' }), 90_000); }); +test('interaction --settle budgets widen the envelope like wait budgets, never shrink it', () => { + const base = { + session: 'default', + positionals: ['@e2'], + meta: {}, + }; + + // --timeout bounds the SETTLE wait, so the envelope extends past it… + assert.equal( + resolveDaemonRequestTimeoutMs({ + ...base, + command: 'press', + flags: { settle: true, timeoutMs: 120_000 }, + }), + 150_000, + ); + // …and a small settle deadline never shrinks the envelope (a slow tap must + // not die at the user-supplied settle budget). + assert.equal( + resolveDaemonRequestTimeoutMs({ + ...base, + command: 'fill', + flags: { settle: true, timeoutMs: 5_000 }, + }), + 90_000, + ); + assert.equal(resolveDaemonRequestTimeoutMs({ ...base, command: 'press', flags: {} }), 90_000); +}); + test('snapshot uses the standard daemon request timeout with an explicit override', () => { const base = { session: 'default', diff --git a/test/integration/interaction-contract/coordinate.contract.test.ts b/test/integration/interaction-contract/coordinate.contract.test.ts index 1ebea0178..6d60098ee 100644 --- a/test/integration/interaction-contract/coordinate.contract.test.ts +++ b/test/integration/interaction-contract/coordinate.contract.test.ts @@ -5,7 +5,7 @@ import { makeSnapshotState } from '../../../src/__tests__/test-utils/index.ts'; import { assertRpcError, assertRpcOk } from '../provider-scenarios/assertions.ts'; import { scenarioName } from './coverage-manifest.ts'; import { COORDINATE_COVERAGE } from './coordinate.coverage.ts'; -import { viewportOnlySnapshot } from './fixtures.ts'; +import { settledWelcomeSnapshot, viewportOnlySnapshot } from './fixtures.ts'; import { createContractDevice } from './runtime-harness.ts'; import { runnerTapEntry, runnerTapErrorEntry, withIosContractDaemon } from './daemon-harness.ts'; @@ -52,6 +52,31 @@ test(scenario('verifyEvidence'), async () => { assert.equal(result.evidence?.changedFromBefore, true); }); +test(scenario('settleObservation'), async () => { + let captureCount = 0; + const device = createContractDevice(viewportOnlySnapshot(), { + // Point targets normally skip captures entirely; --settle opts into the + // evidence-baseline capture (first call) and the settle loop's captures. + captureSnapshot: async () => { + captureCount += 1; + if (captureCount === 1) return { snapshot: viewportOnlySnapshot() }; + return { snapshot: settledWelcomeSnapshot() }; + }, + tap: async () => ({ ok: true }), + }); + + const result = await device.interactions.press( + { kind: 'point', x: 10, y: 20 }, + { session: 'default', settle: { quietMs: 25, timeoutMs: 2_000 } }, + ); + + assert.equal(result.kind, 'point'); + const settle = result.settle; + assert.ok(settle, 'point press --settle must return a settle observation'); + assert.equal(settle.settled, true); + assert.deepEqual(settle.diff?.summary, { additions: 1, removals: 1, unchanged: 0 }); +}); + test(scenario('errorTaxonomy'), async () => { await withIosContractDaemon( [runnerTapErrorEntry(new Error('runner tap crashed'))], diff --git a/test/integration/interaction-contract/coordinate.coverage.ts b/test/integration/interaction-contract/coordinate.coverage.ts index e5ea5de53..efdce51ba 100644 --- a/test/integration/interaction-contract/coordinate.coverage.ts +++ b/test/integration/interaction-contract/coordinate.coverage.ts @@ -6,5 +6,7 @@ export const COORDINATE_COVERAGE = definePathCoverage('coordinate', { 'coordinate responseConstruction: daemon press x y response carries the canonical point field set', verifyEvidence: 'coordinate verifyEvidence: point click --verify returns a digest with change detection', + settleObservation: + 'coordinate settleObservation: point press --settle captures a baseline and returns the settled diff', errorTaxonomy: 'coordinate errorTaxonomy: backend failure surfaces as a normalized daemon error', }); diff --git a/test/integration/interaction-contract/direct-ios-selector.contract.test.ts b/test/integration/interaction-contract/direct-ios-selector.contract.test.ts index e682e6771..21a26cce4 100644 --- a/test/integration/interaction-contract/direct-ios-selector.contract.test.ts +++ b/test/integration/interaction-contract/direct-ios-selector.contract.test.ts @@ -102,6 +102,45 @@ test(scenario('verifyEvidence'), async () => { ); }); +test(scenario('settleObservation'), async () => { + await withIosContractDaemon( + [ + // --settle disables the direct path: runtime tree capture, coordinate + // tap, then the settle loop's two stable captures of the changed tree. + runnerSnapshotEntry(RUNNER_CONTINUE_NODES), + runnerTapEntry({ x: 200, y: 322 }), + runnerSnapshotEntry(RUNNER_CHANGED_NODES), + runnerSnapshotEntry(RUNNER_CHANGED_NODES), + ], + async (daemon, transcript) => { + const click = await daemon.callCommand('click', ['label=Continue'], { + settle: true, + settleQuietMs: 25, + timeoutMs: 2_000, + }); + const data = assertRpcOk(click); + + assert.equal(transcript.calls[0]?.command, 'ios.runner.snapshot'); + const tapRequest = transcript.calls.find((call) => call.command === 'ios.runner.tap') + ?.request as Record | undefined; + assert.equal(tapRequest?.selectorKey, undefined); + assert.equal(tapRequest?.x, 200); + + const settle = data.settle as Record | undefined; + assert.ok(settle, 'click --settle must return a settle observation'); + assert.equal(settle.settled, true); + assert.equal(typeof settle.refsGeneration, 'number'); + const diff = settle.diff as + | { summary: Record; lines: Array> } + | undefined; + assert.ok(diff, 'settle observation must carry the diff'); + assert.deepEqual(diff.summary, { additions: 1, removals: 1, unchanged: 1 }); + // The settled tree itself is never serialized into the response. + assert.equal(data.nodes, undefined); + }, + ); +}); + test(scenario('errorTaxonomy'), async () => { await withIosContractDaemon( [ diff --git a/test/integration/interaction-contract/direct-ios-selector.coverage.ts b/test/integration/interaction-contract/direct-ios-selector.coverage.ts index 0df1e1f80..c9fcb015a 100644 --- a/test/integration/interaction-contract/direct-ios-selector.coverage.ts +++ b/test/integration/interaction-contract/direct-ios-selector.coverage.ts @@ -13,4 +13,6 @@ export const DIRECT_IOS_SELECTOR_COVERAGE = definePathCoverage('direct-ios-selec 'direct-ios-selector responseConstruction: runner payload response carries the canonical selector field set', verifyEvidence: 'direct-ios-selector verifyEvidence: --verify disables the direct path and returns runtime evidence', + settleObservation: + 'direct-ios-selector settleObservation: --settle disables the direct path and returns the runtime settled diff', }); diff --git a/test/integration/interaction-contract/fixtures.ts b/test/integration/interaction-contract/fixtures.ts index ab4438370..232dd73d6 100644 --- a/test/integration/interaction-contract/fixtures.ts +++ b/test/integration/interaction-contract/fixtures.ts @@ -180,6 +180,22 @@ export function nonHittableButtonSnapshot(): SnapshotState { ]); } +// Post-action settled tree for --settle scenarios: vs continueButtonSnapshot +// the Continue button is replaced by a Welcome text — exactly one addition and +// one removal in the settled diff, with the added line carrying its ref. +export function settledWelcomeSnapshot(): SnapshotState { + return makeSnapshotState([ + { + index: 0, + depth: 0, + type: 'StaticText', + label: 'Welcome!', + rect: { x: 10, y: 20, width: 100, height: 40 }, + hittable: true, + }, + ]); +} + // Viewport-only tree for coordinate scenarios. export function viewportOnlySnapshot(): SnapshotState { return makeSnapshotState([ diff --git a/test/integration/interaction-contract/native-ref.contract.test.ts b/test/integration/interaction-contract/native-ref.contract.test.ts index c3eaf62af..a167b1637 100644 --- a/test/integration/interaction-contract/native-ref.contract.test.ts +++ b/test/integration/interaction-contract/native-ref.contract.test.ts @@ -8,6 +8,7 @@ import { NATIVE_REF_COVERAGE } from './native-ref.coverage.ts'; import { closedDrawerSnapshot, continueButtonSnapshot, + settledWelcomeSnapshot, coveredButtonSnapshot, nonHittableCellSnapshot, } from './fixtures.ts'; @@ -116,6 +117,33 @@ test(scenario('verifyEvidence'), async () => { assert.ok(result.evidence?.digest.startsWith('ax1:')); }); +test(scenario('settleObservation'), async () => { + const calls: string[] = []; + const device = createContractDevice(continueButtonSnapshot(), { + platform: 'web', + captureSnapshot: async () => ({ snapshot: settledWelcomeSnapshot() }), + tap: async () => ({ ok: true }), + tapTarget: async (_context, target) => { + calls.push(target.ref); + return {}; + }, + }); + + const result = await device.interactions.click(ref('@e1'), { + session: 'default', + settle: { quietMs: 25, timeoutMs: 2_000 }, + }); + + // --settle delegates to the runtime-ref path: the fast path is skipped so + // the baseline and the settle captures exist. + assert.deepEqual(calls, []); + assert.equal(result.kind, 'ref'); + const settle = result.settle; + assert.ok(settle, 'click @ref --settle must return a settle observation'); + assert.equal(settle.settled, true); + assert.deepEqual(settle.diff?.summary, { additions: 1, removals: 1, unchanged: 0 }); +}); + test(scenario('responseIdentity'), async () => { const calls: string[] = []; const device = createNativeRefDevice(continueButtonSnapshot(), calls); diff --git a/test/integration/interaction-contract/native-ref.coverage.ts b/test/integration/interaction-contract/native-ref.coverage.ts index 5cf6c04ba..c9ed53d15 100644 --- a/test/integration/interaction-contract/native-ref.coverage.ts +++ b/test/integration/interaction-contract/native-ref.coverage.ts @@ -10,6 +10,8 @@ export const NATIVE_REF_COVERAGE = definePathCoverage('native-ref', { responseIdentity: 'native-ref responseIdentity: fast-path result echoes the ref target and backend result', verifyEvidence: 'native-ref verifyEvidence: --verify skips the fast path and returns evidence', + settleObservation: + 'native-ref settleObservation: --settle skips the fast path and returns the settled diff', // The preflight raises the runtime path's exact offscreen_ref shape (code, // reason, hint), which is the shared taxonomy on this path. errorTaxonomy: diff --git a/test/integration/interaction-contract/runtime-ref.contract.test.ts b/test/integration/interaction-contract/runtime-ref.contract.test.ts index ee642b066..850e4de48 100644 --- a/test/integration/interaction-contract/runtime-ref.contract.test.ts +++ b/test/integration/interaction-contract/runtime-ref.contract.test.ts @@ -12,6 +12,7 @@ import { coveredButtonSnapshot, nonHittableCellSnapshot, RUNNER_CONTINUE_NODES, + settledWelcomeSnapshot, } from './fixtures.ts'; import { createContractDevice } from './runtime-harness.ts'; import { runnerSnapshotEntry, runnerTapEntry, withIosContractDaemon } from './daemon-harness.ts'; @@ -92,6 +93,28 @@ test(scenario('verifyEvidence'), async () => { assert.ok(result.evidence?.digest.startsWith('ax1:')); }); +test(scenario('settleObservation'), async () => { + // The @ref resolves against the STORED session tree (no resolution capture); + // the settle loop's captures see the post-action tree. + const device = createContractDevice(continueButtonSnapshot(), { + captureSnapshot: async () => ({ snapshot: settledWelcomeSnapshot() }), + tap: async () => ({ ok: true }), + }); + + const result = await device.interactions.press(ref('@e1'), { + session: 'default', + settle: { quietMs: 25, timeoutMs: 2_000 }, + }); + + assert.equal(result.kind, 'ref'); + const settle = result.settle; + assert.ok(settle, 'press @ref --settle must return a settle observation'); + assert.equal(settle.settled, true); + // Baseline is the stored pre-action tree the ref was resolved on. + assert.deepEqual(settle.diff?.summary, { additions: 1, removals: 1, unchanged: 0 }); + assert.equal(settle.diff?.lines.find((line) => line.kind === 'added')?.ref, 'e1'); +}); + test(scenario('errorTaxonomy'), async () => { const device = createContractDevice(continueButtonSnapshot(), { tap: async () => ({ ok: true }), diff --git a/test/integration/interaction-contract/runtime-ref.coverage.ts b/test/integration/interaction-contract/runtime-ref.coverage.ts index 323696084..eb9e30c2f 100644 --- a/test/integration/interaction-contract/runtime-ref.coverage.ts +++ b/test/integration/interaction-contract/runtime-ref.coverage.ts @@ -9,5 +9,7 @@ export const RUNTIME_REF_COVERAGE = definePathCoverage('runtime-ref', { responseIdentity: 'runtime-ref responseIdentity: result echoes the ref target and resolved node', verifyEvidence: 'runtime-ref verifyEvidence: click @ref --verify returns a digest with change detection', + settleObservation: + 'runtime-ref settleObservation: press @ref --settle diffs the settled tree against the stored baseline', errorTaxonomy: 'runtime-ref errorTaxonomy: unknown ref fails with the stale-ref hint', }); diff --git a/test/integration/interaction-contract/runtime-selector.contract.test.ts b/test/integration/interaction-contract/runtime-selector.contract.test.ts index 37c1cd8c0..be08df132 100644 --- a/test/integration/interaction-contract/runtime-selector.contract.test.ts +++ b/test/integration/interaction-contract/runtime-selector.contract.test.ts @@ -14,6 +14,7 @@ import { edgeGrazingDrawerSnapshot, nonHittableButtonSnapshot, RUNNER_CONTINUE_NODES, + settledWelcomeSnapshot, } from './fixtures.ts'; import { createContractDevice } from './runtime-harness.ts'; import { runnerSnapshotEntry, runnerTapEntry, withIosContractDaemon } from './daemon-harness.ts'; @@ -133,6 +134,35 @@ test(scenario('verifyEvidence'), async () => { assert.ok(result.evidence?.digest.startsWith('ax1:')); }); +test(scenario('settleObservation'), async () => { + const before = continueButtonSnapshot(); + const after = settledWelcomeSnapshot(); + let captures = 0; + const device = createContractDevice(before, { + // Resolution capture sees the pre-action tree; every settle capture sees + // the (already stable) post-action tree. + captureSnapshot: async () => ({ snapshot: captures++ === 0 ? before : after }), + tap: async () => ({ ok: true }), + }); + + const result = await device.interactions.press(selector('label=Continue'), { + session: 'default', + settle: { quietMs: 25, timeoutMs: 2_000 }, + }); + + assert.equal(result.kind, 'selector'); + const settle = result.settle; + assert.ok(settle, 'press --settle must return a settle observation'); + assert.equal(settle.settled, true); + assert.ok(settle.captures >= 2); + assert.deepEqual(settle.diff?.summary, { additions: 1, removals: 1, unchanged: 0 }); + const added = settle.diff?.lines.find((line) => line.kind === 'added'); + assert.match(added?.text ?? '', /Welcome!/); + // Fresh refs ride the diff: the added line's ref resolves on the stored + // settled tree. + assert.equal(added?.ref, 'e1'); +}); + test(scenario('errorTaxonomy'), async () => { const device = createContractDevice(continueButtonSnapshot(), { tap: async () => ({ ok: true }), diff --git a/test/integration/interaction-contract/runtime-selector.coverage.ts b/test/integration/interaction-contract/runtime-selector.coverage.ts index 27e1db9d1..530f7cda3 100644 --- a/test/integration/interaction-contract/runtime-selector.coverage.ts +++ b/test/integration/interaction-contract/runtime-selector.coverage.ts @@ -14,6 +14,8 @@ export const RUNTIME_SELECTOR_COVERAGE = definePathCoverage('runtime-selector', 'runtime-selector responseIdentity: result echoes selectorChain and the resolved node', verifyEvidence: 'runtime-selector verifyEvidence: press --verify returns a digest with change detection', + settleObservation: + 'runtime-selector settleObservation: press --settle returns the settled diff with fresh refs', errorTaxonomy: 'runtime-selector errorTaxonomy: no-match failure carries the shared code and hint', }); diff --git a/test/integration/provider-scenarios/doctor.test.ts b/test/integration/provider-scenarios/doctor.test.ts index 476cc6cca..9b283eacb 100644 --- a/test/integration/provider-scenarios/doctor.test.ts +++ b/test/integration/provider-scenarios/doctor.test.ts @@ -62,7 +62,7 @@ test('Provider-backed integration doctor infers Android RN/Metro readiness throu } finally { await server.close(); } -}); +}, 15_000); test('Provider-backed integration doctor runs predictably for supported platform selectors', async () => { const devices = [ @@ -94,7 +94,7 @@ test('Provider-backed integration doctor runs predictably for supported platform } }, ); -}); +}, 15_000); test('Provider-backed integration doctor --app verifies an installed app without opening a session', async () => { const adbCalls: string[][] = []; diff --git a/test/integration/provider-scenarios/settle-observation.test.ts b/test/integration/provider-scenarios/settle-observation.test.ts new file mode 100644 index 000000000..d4c66e5bc --- /dev/null +++ b/test/integration/provider-scenarios/settle-observation.test.ts @@ -0,0 +1,160 @@ +import assert from 'node:assert/strict'; +import { test } from 'vitest'; +import { assertRpcOk } from './assertions.ts'; +import { PROVIDER_SCENARIO_IOS_SIMULATOR } from './fixtures.ts'; +import { createProviderScenarioHarness, withProviderScenarioResource } from './harness.ts'; +import { + createAppleRunnerProviderFromTranscript, + createRecordingAppleToolProvider, + simctlListDevicesHandler, +} from './providers.ts'; +import { createProviderTranscript, type ProviderScenarioProviderEntry } from './transcript.ts'; + +const APP = 'com.example.app'; +const DEVICE_ID = PROVIDER_SCENARIO_IOS_SIMULATOR.id; + +// #1101 --settle end-to-end: press executes the tap, the settle loop captures +// until the tree goes quiet, and the SAME response carries the settled diff +// (with fresh refs + refsGeneration) — the follow-up observation round trip +// and its stale-ref hazard both disappear. + +const BEFORE_NODES = [ + { + index: 0, + type: 'Application', + label: 'Example', + rect: { x: 0, y: 0, width: 400, height: 800 }, + }, + { + index: 1, + parentIndex: 0, + type: 'Button', + label: 'Continue', + hittable: true, + rect: { x: 100, y: 300, width: 200, height: 44 }, + }, + { + index: 2, + parentIndex: 0, + type: 'Button', + label: 'Cancel', + hittable: true, + rect: { x: 100, y: 400, width: 200, height: 44 }, + }, +]; + +const SETTLED_NODES = [ + { + index: 0, + type: 'Application', + label: 'Example', + rect: { x: 0, y: 0, width: 400, height: 800 }, + }, + { + index: 1, + parentIndex: 0, + type: 'Button', + label: 'Done', + hittable: true, + rect: { x: 100, y: 500, width: 200, height: 44 }, + }, +]; + +function snapshotEntry(nodes: readonly unknown[]): ProviderScenarioProviderEntry { + return { + command: 'ios.runner.snapshot', + deviceId: DEVICE_ID, + platform: 'apple', + result: { nodes, truncated: false }, + }; +} + +function tapEntry(x: number, y: number): ProviderScenarioProviderEntry { + return { + command: 'ios.runner.tap', + deviceId: DEVICE_ID, + platform: 'apple', + result: { x, y }, + }; +} + +test('Provider-backed integration press --settle returns the settled diff and fresh refs', async () => { + const runnerTranscript = createProviderTranscript([ + // snapshot -i: issues refs + snapshotEntry(BEFORE_NODES), + // press label=Continue --settle: resolution capture, tap, settle captures + snapshotEntry(BEFORE_NODES), + tapEntry(200, 322), + snapshotEntry(SETTLED_NODES), + snapshotEntry(SETTLED_NODES), + // press @e2 (the Done ref from the settled diff): tap on the stored tree + tapEntry(200, 522), + ]); + const appleRunnerProvider = createAppleRunnerProviderFromTranscript( + runnerTranscript, + 'ios.runner', + ); + const appleTool = createRecordingAppleToolProvider({ + simctl: simctlListDevicesHandler('com.apple.CoreSimulator.SimRuntime.iOS-18-0', [ + { name: PROVIDER_SCENARIO_IOS_SIMULATOR.name, udid: DEVICE_ID }, + ]), + }); + + await withProviderScenarioResource( + async () => + await createProviderScenarioHarness({ + appleRunnerProvider: () => appleRunnerProvider, + appleToolProvider: () => appleTool.provider, + deviceInventoryProvider: async () => [PROVIDER_SCENARIO_IOS_SIMULATOR], + }), + async (daemon) => { + const open = await daemon.callCommand('open', [APP], { + platform: 'ios', + udid: DEVICE_ID, + }); + assertRpcOk(open); + + const snapshot = await daemon.callCommand('snapshot', [], { + snapshotInteractiveOnly: true, + }); + assertRpcOk(snapshot); + + const press = await daemon.callCommand('press', ['label=Continue'], { + settle: true, + settleQuietMs: 25, + timeoutMs: 2_000, + }); + const pressData = assertRpcOk(press); + const settle = pressData.settle as { + settled: boolean; + captures: number; + refsGeneration?: number; + diff?: { + summary: { additions: number; removals: number; unchanged: number }; + lines: Array<{ kind: string; text: string; ref?: string }>; + }; + hint?: string; + }; + assert.ok(settle, 'press --settle must return a settle observation'); + assert.equal(settle.settled, true); + assert.equal(settle.captures, 2); + assert.equal(typeof settle.refsGeneration, 'number'); + assert.deepEqual(settle.diff?.summary, { additions: 1, removals: 2, unchanged: 1 }); + const added = settle.diff?.lines.find((line) => line.kind === 'added'); + assert.match(added?.text ?? '', /Done/); + assert.equal(added?.ref, 'e2'); + // The settled tree is never serialized into the response. + assert.equal(pressData.nodes, undefined); + + // The diff's ref acts directly on the stored settled tree — no fresh + // snapshot round trip and no stale-refs warning. + const followUp = await daemon.callCommand('press', ['@e2'], {}); + const followUpData = assertRpcOk(followUp); + assert.equal(followUpData.warning, undefined); + assert.equal(followUpData.x, 200); + assert.equal(followUpData.y, 522); + + runnerTranscript.assertComplete(); + }, + ); +});