Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTEXT.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
- Parity table: golden JSON fixture under `contracts/fixtures/` consumed by both vitest and the runner's gated Swift tests, so a cross-language rule (e.g. tap-point policy) cannot drift silently. Change the rule only via the table.
- Coverage manifest: `CONTRACT_COVERAGE` export beside each interaction contract test file claiming which matrix cells it proves; the coverage gate requires every enforced/delegated cell to be claimed and rejects overclaims of waived cells.
- Delegation-on-error: a fast path falling back to the runtime path on semantic failure shapes. It closes failure-side guarantee cells only — never success-path parity.
- Ref generation pin: optional `~s<n>` suffix on an @ref carrying the snapshot generation it was minted from. Accepted as input everywhere, emitted by no tree output (snapshot token budget), auto-appended by the MCP layer, stripped and ignored by replay.
- Snapshot capture plan: per-strategy ordered chain of iOS snapshot capture backends (recursive tree, query sweep, private AX) run by one plan runner under a shared wall-clock budget; recovery ordering is declared data, never a per-call-site branch.
- Snapshot quality verdict: structured outcome (state, backend, reason code, effective depth, collapsed leaves) computed once by the plan runner and shipped with every planned snapshot payload; the daemon and CLI render it instead of re-deriving degradation from node shapes.
- AX-unavailable target invalidation: iOS/macOS runner behavior where a root accessibility snapshot failure such as `kAXErrorIllegalArgument` marks the cached `XCUIApplication` target handle suspect. The runner fails closed for degraded interactive snapshots, clears the cached target, and lets the next command reacquire the app through normal activation.
Expand Down
1 change: 1 addition & 0 deletions src/cli/parser/cli-help.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ const AGENT_QUICKSTART_LINES = [
'Default loop: devices/apps -> open -> snapshot -i -> press/fill/get/is/wait/find -> verify with diff snapshot -> close.',
'Verify a mutation with diff snapshot (or diff snapshot -i), not a full snapshot: it prints only the added/removed/changed lines since the last snapshot in this session, so confirming an action costs a few lines instead of the whole tree.',
'Use selectors or refs as positional targets: id="submit", label="Allow", or @e12 from snapshot -i.',
'Pin a ref to the snapshot that minted it with ~s<n> (n = refsGeneration in the snapshot response): press @e12~s4. Pinned refs get exact staleness warnings instead of the coarse tree-changed one; plain refs stay valid input.',
'Plain snapshot reads state; snapshot -i refreshes current interactive refs only.',
'Default snapshot text is an agent-facing, token-efficient view for planning and targeting actions.',
'Read-only visible/state question: use snapshot/get/is/find; use snapshot -i only when refs are needed.',
Expand Down
5 changes: 5 additions & 0 deletions src/daemon/__tests__/response-views.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,8 @@ test('find/get default and full return today’s shape unchanged (same reference
expect(getView!(data, 'default')).toBe(data);
expect(getView!(data, 'full')).toBe(data);
});

test('snapshot digest preserves refsGeneration — the pinning signal for the refs it keeps (#1076)', () => {
const digest = RESPONSE_VIEWS.snapshot!({ ...SNAPSHOT_DATA, refsGeneration: 7 }, 'digest');
expect(digest.refsGeneration).toBe(7);
});
99 changes: 99 additions & 0 deletions src/daemon/__tests__/session-snapshot.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { expect, test } from 'vitest';
import type { SnapshotState } from '../../kernel/snapshot.ts';
import type { SessionState } from '../types.ts';
import {
resolveRefStalenessWarning,
setSessionSnapshot,
STALE_SNAPSHOT_REFS_WARNING,
} from '../session-snapshot.ts';

function makeSession(): SessionState {
return {
name: 'default',
device: { id: 'device-1', name: 'Test Device', platform: 'apple' },
createdAt: Date.now(),
actions: [],
} as unknown as SessionState;
}

function makeSnapshot(): SnapshotState {
return { nodes: [], createdAt: Date.now(), backend: 'xctest' };
}

test('setSessionSnapshot advances the generation on every tree replacement (#1076 versioned refs)', () => {
const session = makeSession();
expect(session.snapshotGeneration).toBeUndefined();

const first = makeSnapshot();
setSessionSnapshot(session, first);
// First bump of a lifetime is SEEDED at a random 6-digit base (see
// nextSnapshotGeneration) — assert the range, not a literal.
const seeded = session.snapshotGeneration!;
expect(seeded).toBeGreaterThanOrEqual(100_000);
expect(seeded).toBeLessThan(1_000_000);
expect(session.snapshotRefsStale).toBe(true);

// Storing the SAME snapshot object again is not a replacement.
setSessionSnapshot(session, first);
expect(session.snapshotGeneration).toBe(seeded);

// Within a lifetime the counter is strictly monotonic.
setSessionSnapshot(session, makeSnapshot());
expect(session.snapshotGeneration).toBe(seeded + 1);
});

test('a reopened session reseeds so pins from a previous lifetime do not silently collide', () => {
const firstLifetime = makeSession();
setSessionSnapshot(firstLifetime, makeSnapshot());
const oldGeneration = firstLifetime.snapshotGeneration!;

// Reopen: a fresh session object restarts the counter with a NEW seed.
const secondLifetime = makeSession();
setSessionSnapshot(secondLifetime, makeSnapshot());
secondLifetime.snapshotRefsStale = false;

// Probabilistic, not identity-based: the seeds collide with ~1/900000
// probability (an accepted residual risk, documented on the field).
expect(secondLifetime.snapshotGeneration).not.toBe(oldGeneration);
// A pin minted in the previous lifetime warns instead of reading as current.
expect(
resolveRefStalenessWarning({
session: secondLifetime,
ref: '@e1',
mintedGeneration: oldGeneration,
}),
).toContain(`minted from snapshot s${oldGeneration}`);
});

test('resolveRefStalenessWarning: pinned-current clean, pinned-stale precise, plain coarse', () => {
const session = makeSession();
session.snapshotGeneration = 15;
session.snapshotRefsStale = true;

// Pinned to the stored generation: the pin proves the ref matches the tree,
// so the coarse marker is overruled.
expect(
resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: 15 }),
).toBeUndefined();

expect(resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: 12 })).toBe(
'Ref @e37 was minted from snapshot s12 but the session tree is now s15 — re-run snapshot -i.',
);

expect(resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: undefined })).toBe(
STALE_SNAPSHOT_REFS_WARNING,
);

session.snapshotRefsStale = false;
expect(
resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: undefined }),
).toBeUndefined();
});

test('resolveRefStalenessWarning treats a missing stored generation as s0', () => {
const session = makeSession();
expect(resolveRefStalenessWarning({ session, ref: 'e2', mintedGeneration: 3 })).toBe(
'Ref @e2 was minted from snapshot s3 but the session tree is now s0 — re-run snapshot -i.',
);
expect(resolveRefStalenessWarning({ session, ref: '@e2', mintedGeneration: 0 })).toBeUndefined();
});
33 changes: 32 additions & 1 deletion src/daemon/handlers/__tests__/find.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ test('handleFindCommands click returns deterministic metadata across locator var
positionals: ['Increment', 'click'],
nodes: [hittableParentNoRect, nonHittableChildWithRect],
invoke: async () => ({ platformSpecificRef: 'XCUIElementTypeView' }),
expectedKeys: ['locator', 'message', 'query', 'ref', 'x', 'y'],
// refsGeneration rides every ref-issuing find response (#1076 versioned refs).
expectedKeys: ['locator', 'message', 'query', 'ref', 'refsGeneration', 'x', 'y'],
expectedLocator: 'any',
expectedQuery: 'Increment',
expectedCoordinates: { x: 100, y: 50 },
Expand Down Expand Up @@ -757,3 +758,33 @@ test('handleFindCommands click re-issues a fresh ref and clears the stale-refs m
// the marker clears before the internal click @ref sub-invocation runs.
expect(storedSession.snapshotRefsStale).toBe(false);
});

test('handleFindCommands click carries refsGeneration for the freshly stored tree (#1076 versioned refs)', async () => {
const sessionName = 'default';
const session = makeSession(sessionName);
// Two earlier tree replacements happened in this session.
session.snapshotGeneration = 2;

const { response, session: storedSession } = await runFindClickScenario({
positionals: ['Increment', 'click'],
nodes: [
{
index: 0,
type: 'Button',
label: 'Increment',
hittable: true,
rect: { x: 50, y: 0, width: 100, height: 100 },
depth: 0,
},
],
session,
});

expect(response.ok).toBe(true);
// The find capture replaced the stored tree (generation 3) and the response
// returns a ref minted from it, so it reports that generation ONCE.
expect(storedSession.snapshotGeneration).toBe(3);
if (response.ok) {
expect((response.data as Record<string, unknown>).refsGeneration).toBe(3);
}
});
80 changes: 79 additions & 1 deletion src/daemon/handlers/__tests__/interaction-touch-targets.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { test, expect } from 'vitest';
import { parseFillTarget, parseTouchTarget } from '../interaction-touch-targets.ts';
import {
parseFillTarget,
parseLongPressTarget,
parseTouchTarget,
} from '../interaction-touch-targets.ts';

test('parseTouchTarget preserves ref fallback label through shared grammar', () => {
const parsed = parseTouchTarget(['@e4', 'Email field'], 'press');
Expand Down Expand Up @@ -78,3 +82,77 @@ test('parseFillTarget rejects invalid coordinates instead of treating them as a
}
}
});

// --- Versioned refs (#1076): the daemon boundary splits `@e12~s3` pins ---

test('parseTouchTarget splits a pinned ref into plain ref + generation', () => {
const parsed = parseTouchTarget(['@e4~s12', 'Email field'], 'press');

expect(parsed).toEqual({
ok: true,
target: {
kind: 'ref',
ref: '@e4',
fallbackLabel: 'Email field',
},
refGeneration: 12,
});
});

test('parseTouchTarget rejects a malformed generation suffix with the grammar hint', () => {
const parsed = parseTouchTarget(['@e4~s'], 'press');

expect(parsed.ok).toBe(false);
if (!parsed.ok) {
expect(parsed.response).toMatchObject({
ok: false,
error: {
code: 'INVALID_ARGS',
message: expect.stringContaining('malformed generation suffix'),
details: { hint: expect.stringContaining('@e12~s3') },
},
});
}
});

test('parseLongPressTarget carries the pinned generation past the trailing duration', () => {
const parsed = parseLongPressTarget(['@e4~s7', '800']);

expect(parsed).toEqual({
ok: true,
target: {
kind: 'ref',
ref: '@e4',
fallbackLabel: '',
},
refGeneration: 7,
durationMs: 800,
});
});

test('parseFillTarget splits a pinned ref and keeps the text intact', () => {
const parsed = parseFillTarget(['@e4~s3', 'qa@example.com']);

expect(parsed).toEqual({
ok: true,
target: {
kind: 'ref',
ref: '@e4',
fallbackLabel: '',
},
refGeneration: 3,
text: 'qa@example.com',
});
});

test('parseFillTarget rejects a malformed pinned ref before reading text', () => {
const parsed = parseFillTarget(['@e4~x3', 'text']);

expect(parsed.ok).toBe(false);
if (!parsed.ok) {
expect(parsed.response).toMatchObject({
ok: false,
error: { code: 'INVALID_ARGS' },
});
}
});
Loading
Loading