Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTEXT.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
- Coverage manifest: `CONTRACT_COVERAGE` export beside each interaction contract test file claiming which matrix cells it proves; the coverage gate requires every enforced/delegated cell to be claimed and rejects overclaims of waived cells.
- Delegation-on-error: a fast path falling back to the runtime path on semantic failure shapes. It closes failure-side guarantee cells only — never success-path parity.
- Ref generation pin: optional `~s<n>` suffix on an @ref carrying the snapshot generation it was minted from. Accepted as input everywhere, emitted by no tree output (snapshot token budget), auto-appended by the MCP layer, stripped and ignored by replay.
- Settled observation: opt-in (`--settle`) post-action payload on press/click/fill/longpress — the quiet-window stable loop re-captures until the UI settles, and the response carries the diff vs the pre-action tree (changed lines only, added lines with fresh refs, `refsGeneration` when the settled tree was stored). Best-effort: never fails the action; `settled: false` plus a hint on never-quiet content.
- Snapshot capture plan: per-strategy ordered chain of iOS snapshot capture backends (recursive tree, query sweep, private AX) run by one plan runner under a shared wall-clock budget; recovery ordering is declared data, never a per-call-site branch.
- Snapshot quality verdict: structured outcome (state, backend, reason code, effective depth, collapsed leaves) computed once by the plan runner and shipped with every planned snapshot payload; the daemon and CLI render it instead of re-deriving degradation from node shapes.
- AX-unavailable target invalidation: iOS/macOS runner behavior where a root accessibility snapshot failure such as `kAXErrorIllegalArgument` marks the cached `XCUIApplication` target handle suspect. The runner fails closed for degraded interactive snapshots, clears the cached target, and lets the next command reacquire the app through normal activation.
Expand Down
2 changes: 2 additions & 0 deletions scripts/integration-progress-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ function summarizeProviderScenarioFlagCoverage(files) {
['findFirst', 'find first disambiguation'],
['findLast', 'find last disambiguation'],
['verify', 'post-action evidence capture on press/click/fill'],
['settle', 'post-action settled-diff observation on press/click/fill/longpress'],
['settleQuietMs', 'settle quiet-window tuning'],
];
const sources = files.map((file) => fs.readFileSync(file, 'utf8')).join('\n');
return flagTargets.map(([key, reason, aliases = []]) => {
Expand Down
24 changes: 23 additions & 1 deletion src/cli/parser/cli-flags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ export type CliFlags = CloudProviderProfileFields &
pixels?: number;
doubleTap?: boolean;
verify?: boolean;
settle?: boolean;
settleQuietMs?: number;
clickButton?: ClickButton;
backMode?: BackMode;
pauseMs?: number;
Expand Down Expand Up @@ -211,6 +213,10 @@ export const REPEATED_TOUCH_FLAGS = flagKeys(
'jitterPx',
'doubleTap',
);
// press/click/fill/longpress --settle (#1101): opt-in settled-diff observation.
// --timeout doubles as the settle deadline (flag-sourced budget on the
// interaction descriptors, mirroring wait's positional budget).
export const SETTLE_FLAGS = flagKeys('settle', 'settleQuietMs', 'timeoutMs');
export const REPLAY_FLAGS = flagKeys('replayUpdate', 'replayEnv');

const FLAG_DEFINITIONS: readonly FlagDefinition[] = [
Expand Down Expand Up @@ -848,6 +854,22 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [
usageDescription:
'Capture cheap post-action evidence (AX digest, node counts, changedFromBefore) instead of a follow-up snapshot',
},
{
key: 'settle',
names: ['--settle'],
type: 'boolean',
usageLabel: '--settle',
usageDescription:
'After the action, wait for the UI to go quiet and return the settled diff vs the pre-action tree in the same response (best-effort; never fails the action)',
},
{
key: 'settleQuietMs',
names: ['--settle-quiet'],
type: 'int',
min: 0,
usageLabel: '--settle-quiet <ms>',
usageDescription: 'Settle: quiet window the UI must hold to count as settled (default 500ms)',
},
{
key: 'clickButton',
names: ['--button'],
Expand Down Expand Up @@ -1051,7 +1073,7 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [
min: 1,
usageLabel: '--timeout <ms>',
usageDescription:
'Prepare/Replay/Snapshot/Test: maximum wall-clock time for the command or attempt',
'Prepare/Replay/Snapshot/Test: maximum wall-clock time for the command or attempt. With --settle: the settle-wait deadline (default 10s)',
},
{
key: 'retries',
Expand Down
5 changes: 5 additions & 0 deletions src/cli/parser/cli-help.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,17 @@ const AGENT_START_LINES = [
'Default to agent-device for installs, opens, snapshots, interactions, screenshots, logs, network/perf evidence, and verification.',
'Use raw adb, simctl, xcrun, or platform scripts only when this help calls out a tool gap or platform setup step.',
'Start with agent-device help workflow to understand the core loop and how to use the tool.',
// Benchmarked 2026-07-05 (#1101): agents that only read --help skipped the
// help-workflow pointer and fell into plain-snapshot loops; stating the loop
// here is what makes small models pick snapshot -i and --settle unprompted.
'Core loop: open <app> -> snapshot -i (interactive tree with @refs) -> press/click/fill <target> --settle (returns the settled diff with fresh @refs) -> repeat. Verify with diff snapshot -i.',
] as const;

const AGENT_QUICKSTART_LINES = [
'Planning output contract: when asked to plan commands, output command lines only: no prose, numbering, Markdown fences, pipes, or shell helpers.',
'Default loop: devices/apps -> open -> snapshot -i -> press/fill/get/is/wait/find -> verify with diff snapshot -> close.',
'Verify a mutation with diff snapshot (or diff snapshot -i), not a full snapshot: it prints only the added/removed/changed lines since the last snapshot in this session, so confirming an action costs a few lines instead of the whole tree.',
'Collapse act+observe into one call with --settle on press/click/fill/longpress: the response waits for the UI to go quiet and carries the settled diff with fresh refs (settled: false plus a hint on never-quiet content; the action itself never fails). Tune with --settle-quiet <ms> and --timeout <ms>.',
'Use selectors or refs as positional targets: id="submit", label="Allow", or @e12 from snapshot -i.',
'Pin a ref to the snapshot that minted it with ~s<n> (n = refsGeneration in the snapshot response): press @e12~s4. Pinned refs get exact staleness warnings instead of the coarse tree-changed one; plain refs stay valid input.',
'Plain snapshot reads state; snapshot -i refreshes current interactive refs only.',
Expand Down
2 changes: 2 additions & 0 deletions src/client/client-normalizers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ export function buildFlags(options: InternalRequestOptions): CommandFlags {
pixels: options.pixels,
doubleTap: options.doubleTap,
verify: options.verify,
settle: options.settle,
settleQuietMs: options.settleQuietMs,
clickButton: options.clickButton,
pauseMs: options.pauseMs,
pattern: options.pattern,
Expand Down
27 changes: 23 additions & 4 deletions src/client/client-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -629,10 +629,24 @@ export type CaptureDiffOptions = DeviceCommandBaseOptions &
out?: string;
};

/**
* Opt-in (#1101): after the action, wait for the UI to go quiet and return the
* settled diff vs the pre-action tree (`settle` on the result) in the same
* response. Best-effort — never fails the action. `settleQuietMs` tunes the
* quiet window (default 500ms); `timeoutMs` bounds the settle wait (default
* 10s) and is rejected without `settle`.
*/
type SettleCommandOptions = {
settle?: boolean;
settleQuietMs?: number;
timeoutMs?: number;
};

export type ClickOptions = DeviceCommandBaseOptions &
SelectorSnapshotCommandOptions &
InteractionTarget &
RepeatedPressOptions & {
RepeatedPressOptions &
SettleCommandOptions & {
button?: ClickButton;
/**
* Opt-in (#1047): return cheap post-action evidence (AX digest, node counts,
Expand All @@ -645,13 +659,15 @@ export type ClickOptions = DeviceCommandBaseOptions &
export type PressOptions = DeviceCommandBaseOptions &
SelectorSnapshotCommandOptions &
InteractionTarget &
RepeatedPressOptions & {
RepeatedPressOptions &
SettleCommandOptions & {
verify?: boolean;
};

export type LongPressOptions = DeviceCommandBaseOptions &
SelectorSnapshotCommandOptions &
InteractionTarget & {
InteractionTarget &
SettleCommandOptions & {
durationMs?: number;
};

Expand Down Expand Up @@ -697,7 +713,8 @@ export type TypeTextOptions = DeviceCommandBaseOptions & {

export type FillOptions = DeviceCommandBaseOptions &
SelectorSnapshotCommandOptions &
InteractionTarget & {
InteractionTarget &
SettleCommandOptions & {
text: string;
delayMs?: number;
verify?: boolean;
Expand Down Expand Up @@ -928,6 +945,8 @@ type CommandExecutionOptions = Partial<ScreenshotRequestFlags> & {
pixels?: number;
doubleTap?: boolean;
verify?: boolean;
settle?: boolean;
settleQuietMs?: number;
clickButton?: ClickButton;
pauseMs?: number;
pattern?: SwipePattern;
Expand Down
10 changes: 10 additions & 0 deletions src/commands/cli-grammar/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ export function selectorSnapshotOptionsFromFlags(flags: CliFlags): SelectorSnaps
};
}

// press/click/fill/longpress --settle (#1101). --timeout doubles as the settle
// deadline on these commands (the daemon rejects it without --settle).
export function settleInputFromFlags(flags: CliFlags): Record<string, unknown> {
return compactRecord({
settle: flags.settle,
settleQuietMs: flags.settleQuietMs,
timeoutMs: flags.timeoutMs,
});
}

export function repeatedInputFromFlags(flags: CliFlags): Record<string, unknown> {
return compactRecord({
count: flags.count,
Expand Down
36 changes: 31 additions & 5 deletions src/commands/interaction/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ import type {
TypeTextOptions,
} from '../../client/client-types.ts';
import type { CommandSchemaOverride } from '../../utils/cli-command-schema-types.ts';
import { REPEATED_TOUCH_FLAGS, SELECTOR_SNAPSHOT_FLAGS } from '../../cli/parser/cli-flags.ts';
import {
REPEATED_TOUCH_FLAGS,
SELECTOR_SNAPSHOT_FLAGS,
SETTLE_FLAGS,
} from '../../cli/parser/cli-flags.ts';
import { defineCommandFacet, defineCommandFamilyFromFacets } from '../family/types.ts';
import { defineExecutableCommand } from '../command-contract.ts';
import {
Expand Down Expand Up @@ -70,23 +74,29 @@ const interactionCliSchemas = {
usageOverride: 'click <x y|@ref|selector>',
positionalArgs: ['target'],
allowsExtraPositionals: true,
allowedFlags: [...REPEATED_TOUCH_FLAGS, 'clickButton', 'verify', ...SELECTOR_SNAPSHOT_FLAGS],
allowedFlags: [
...REPEATED_TOUCH_FLAGS,
'clickButton',
'verify',
...SETTLE_FLAGS,
...SELECTOR_SNAPSHOT_FLAGS,
],
},
press: {
usageOverride: 'press <x y|@ref|selector>',
helpDescription:
'Short press a semantic UI target by ref, selector, or point. For native context menus or hold gestures, use longpress <target> <durationMs> instead of press --hold-ms.',
positionalArgs: ['targetOrX', 'y?'],
allowsExtraPositionals: true,
allowedFlags: [...REPEATED_TOUCH_FLAGS, 'verify', ...SELECTOR_SNAPSHOT_FLAGS],
allowedFlags: [...REPEATED_TOUCH_FLAGS, 'verify', ...SETTLE_FLAGS, ...SELECTOR_SNAPSHOT_FLAGS],
},
longpress: {
usageOverride: 'longpress <x y|@ref|selector> [durationMs]',
helpDescription:
'Open native context menus or long-press targets by ref, selector, or point. Duration is positional, for example longpress @e12 800 or longpress 300 500 800.',
positionalArgs: ['targetOrX', 'yOrDurationMs?', 'durationMs?'],
allowsExtraPositionals: true,
allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS],
allowedFlags: [...SETTLE_FLAGS, ...SELECTOR_SNAPSHOT_FLAGS],
},
swipe: {
helpDescription: 'Swipe coordinates with optional repeat pattern',
Expand Down Expand Up @@ -114,7 +124,7 @@ const interactionCliSchemas = {
usageOverride: 'fill <x> <y> <text> | fill <@ref|selector> <text>',
positionalArgs: ['targetOrX', 'yOrText', 'text?'],
allowsExtraPositionals: true,
allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS, 'delayMs', 'verify'],
allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS, 'delayMs', 'verify', ...SETTLE_FLAGS],
},
scroll: {
usageOverride: 'scroll <direction|top|bottom> [amount] [--pixels <n>] [--duration-ms <ms>]',
Expand Down Expand Up @@ -343,6 +353,7 @@ function toClickOptions(input: ClickInput): ClickOptions {
...toRepeatedOptions(input),
button: input.button,
verify: input.verify,
...toSettleOptions(input),
};
}

Expand All @@ -353,6 +364,7 @@ function toPressOptions(input: PressInput): PressOptions {
...toSelectorSnapshotOptions(input),
...toRepeatedOptions(input),
verify: input.verify,
...toSettleOptions(input),
};
}

Expand All @@ -364,6 +376,7 @@ function toFillOptions(input: FillInput): FillOptions {
text: input.text,
delayMs: input.delayMs,
verify: input.verify,
...toSettleOptions(input),
};
}

Expand All @@ -373,6 +386,19 @@ function toLongPressOptions(input: LongPressInput): LongPressOptions {
...toClientInteractionTarget(input.target),
...toSelectorSnapshotOptions(input),
durationMs: input.durationMs,
...toSettleOptions(input),
};
}

function toSettleOptions(input: {
settle?: boolean;
settleQuietMs?: number;
timeoutMs?: number;
}): Pick<PressOptions, 'settle' | 'settleQuietMs' | 'timeoutMs'> {
return {
settle: input.settle,
settleQuietMs: input.settleQuietMs,
timeoutMs: input.timeoutMs,
};
}

Expand Down
5 changes: 5 additions & 0 deletions src/commands/interaction/interactions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import {
requiredDaemonString,
repeatedInputFromFlags,
selectorSnapshotInputFromFlags,
settleInputFromFlags,
targetInputFromClientTarget,
} from '../cli-grammar/common.ts';
import type { CliReader, DaemonWriter, CommandInput } from '../cli-grammar/types.ts';
Expand All @@ -35,6 +36,7 @@ export const interactionCliReaders = {
...commonInputFromFlags(flags),
...selectorSnapshotInputFromFlags(flags),
...repeatedInputFromFlags(flags),
...settleInputFromFlags(flags),
target: targetInputFromClientTarget(readInteractionTargetFromPositionals(positionals)),
button: flags.clickButton,
verify: flags.verify,
Expand All @@ -43,6 +45,7 @@ export const interactionCliReaders = {
...commonInputFromFlags(flags),
...selectorSnapshotInputFromFlags(flags),
...repeatedInputFromFlags(flags),
...settleInputFromFlags(flags),
target: targetInputFromClientTarget(readInteractionTargetFromPositionals(positionals)),
verify: flags.verify,
}),
Expand All @@ -51,6 +54,7 @@ export const interactionCliReaders = {
return {
...commonInputFromFlags(flags),
...selectorSnapshotInputFromFlags(flags),
...settleInputFromFlags(flags),
target: targetInputFromClientTarget(decoded),
durationMs: decoded.durationMs,
};
Expand Down Expand Up @@ -79,6 +83,7 @@ export const interactionCliReaders = {
return {
...commonInputFromFlags(flags),
...selectorSnapshotInputFromFlags(flags),
...settleInputFromFlags(flags),
target: targetInputFromClientTarget(decoded.target),
text: decoded.text,
delayMs: flags.delayMs,
Expand Down
12 changes: 12 additions & 0 deletions src/commands/interaction/metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,29 @@ const verifyField = () =>
'Capture cheap post-action evidence (AX digest, node counts, changedFromBefore) instead of a follow-up snapshot.',
);

const settleFields = () => ({
settle: booleanField(
'After the action, wait for the UI to go quiet and return the settled diff vs the pre-action tree in the same response. Best-effort; never fails the action.',
),
settleQuietMs: integerField('Settle: quiet window in milliseconds (default 500).', { min: 0 }),
timeoutMs: integerField('Settle: wait deadline in milliseconds (default 10000).', { min: 1 }),
});

const clickFields = {
target: requiredField(interactionTargetField()),
button: enumField(CLICK_BUTTONS, 'Pointer button for platforms that support mouse buttons.'),
...selectorSnapshotFields(),
...repeatedFields(),
verify: verifyField(),
...settleFields(),
};

const pressFields = {
target: requiredField(interactionTargetField()),
...selectorSnapshotFields(),
...repeatedFields(),
verify: verifyField(),
...settleFields(),
};

const fillFields = {
Expand All @@ -92,12 +102,14 @@ const fillFields = {
delayMs: integerField('Delay between typed characters.', { min: 0 }),
...selectorSnapshotFields(),
verify: verifyField(),
...settleFields(),
};

const longPressFields = {
target: requiredField(interactionTargetField()),
durationMs: integerField('Long press duration in milliseconds.', { min: 0 }),
...selectorSnapshotFields(),
...settleFields(),
};

const swipeFields = {
Expand Down
Loading
Loading