diff --git a/CONTEXT.md b/CONTEXT.md index d214e1dd4..585d62a7c 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -48,6 +48,7 @@ - Parity table: golden JSON fixture under `contracts/fixtures/` consumed by both vitest and the runner's gated Swift tests, so a cross-language rule (e.g. tap-point policy) cannot drift silently. Change the rule only via the table. - Coverage manifest: `CONTRACT_COVERAGE` export beside each interaction contract test file claiming which matrix cells it proves; the coverage gate requires every enforced/delegated cell to be claimed and rejects overclaims of waived cells. - Delegation-on-error: a fast path falling back to the runtime path on semantic failure shapes. It closes failure-side guarantee cells only — never success-path parity. +- Ref generation pin: optional `~s` suffix on an @ref carrying the snapshot generation it was minted from. Accepted as input everywhere, emitted by no tree output (snapshot token budget), auto-appended by the MCP layer, stripped and ignored by replay. - Snapshot capture plan: per-strategy ordered chain of iOS snapshot capture backends (recursive tree, query sweep, private AX) run by one plan runner under a shared wall-clock budget; recovery ordering is declared data, never a per-call-site branch. - Snapshot quality verdict: structured outcome (state, backend, reason code, effective depth, collapsed leaves) computed once by the plan runner and shipped with every planned snapshot payload; the daemon and CLI render it instead of re-deriving degradation from node shapes. - AX-unavailable target invalidation: iOS/macOS runner behavior where a root accessibility snapshot failure such as `kAXErrorIllegalArgument` marks the cached `XCUIApplication` target handle suspect. The runner fails closed for degraded interactive snapshots, clears the cached target, and lets the next command reacquire the app through normal activation. diff --git a/src/cli/parser/cli-help.ts b/src/cli/parser/cli-help.ts index be4b04c22..3a5a5f3da 100644 --- a/src/cli/parser/cli-help.ts +++ b/src/cli/parser/cli-help.ts @@ -62,6 +62,7 @@ const AGENT_QUICKSTART_LINES = [ 'Default loop: devices/apps -> open -> snapshot -i -> press/fill/get/is/wait/find -> verify with diff snapshot -> close.', 'Verify a mutation with diff snapshot (or diff snapshot -i), not a full snapshot: it prints only the added/removed/changed lines since the last snapshot in this session, so confirming an action costs a few lines instead of the whole tree.', 'Use selectors or refs as positional targets: id="submit", label="Allow", or @e12 from snapshot -i.', + 'Pin a ref to the snapshot that minted it with ~s (n = refsGeneration in the snapshot response): press @e12~s4. Pinned refs get exact staleness warnings instead of the coarse tree-changed one; plain refs stay valid input.', 'Plain snapshot reads state; snapshot -i refreshes current interactive refs only.', 'Default snapshot text is an agent-facing, token-efficient view for planning and targeting actions.', 'Read-only visible/state question: use snapshot/get/is/find; use snapshot -i only when refs are needed.', diff --git a/src/daemon/__tests__/response-views.test.ts b/src/daemon/__tests__/response-views.test.ts index a40d9d3b1..cadcc64e5 100644 --- a/src/daemon/__tests__/response-views.test.ts +++ b/src/daemon/__tests__/response-views.test.ts @@ -247,3 +247,8 @@ test('find/get default and full return today’s shape unchanged (same reference expect(getView!(data, 'default')).toBe(data); expect(getView!(data, 'full')).toBe(data); }); + +test('snapshot digest preserves refsGeneration — the pinning signal for the refs it keeps (#1076)', () => { + const digest = RESPONSE_VIEWS.snapshot!({ ...SNAPSHOT_DATA, refsGeneration: 7 }, 'digest'); + expect(digest.refsGeneration).toBe(7); +}); diff --git a/src/daemon/__tests__/session-snapshot.test.ts b/src/daemon/__tests__/session-snapshot.test.ts new file mode 100644 index 000000000..71bb11a66 --- /dev/null +++ b/src/daemon/__tests__/session-snapshot.test.ts @@ -0,0 +1,99 @@ +import { expect, test } from 'vitest'; +import type { SnapshotState } from '../../kernel/snapshot.ts'; +import type { SessionState } from '../types.ts'; +import { + resolveRefStalenessWarning, + setSessionSnapshot, + STALE_SNAPSHOT_REFS_WARNING, +} from '../session-snapshot.ts'; + +function makeSession(): SessionState { + return { + name: 'default', + device: { id: 'device-1', name: 'Test Device', platform: 'apple' }, + createdAt: Date.now(), + actions: [], + } as unknown as SessionState; +} + +function makeSnapshot(): SnapshotState { + return { nodes: [], createdAt: Date.now(), backend: 'xctest' }; +} + +test('setSessionSnapshot advances the generation on every tree replacement (#1076 versioned refs)', () => { + const session = makeSession(); + expect(session.snapshotGeneration).toBeUndefined(); + + const first = makeSnapshot(); + setSessionSnapshot(session, first); + // First bump of a lifetime is SEEDED at a random 6-digit base (see + // nextSnapshotGeneration) — assert the range, not a literal. + const seeded = session.snapshotGeneration!; + expect(seeded).toBeGreaterThanOrEqual(100_000); + expect(seeded).toBeLessThan(1_000_000); + expect(session.snapshotRefsStale).toBe(true); + + // Storing the SAME snapshot object again is not a replacement. + setSessionSnapshot(session, first); + expect(session.snapshotGeneration).toBe(seeded); + + // Within a lifetime the counter is strictly monotonic. + setSessionSnapshot(session, makeSnapshot()); + expect(session.snapshotGeneration).toBe(seeded + 1); +}); + +test('a reopened session reseeds so pins from a previous lifetime do not silently collide', () => { + const firstLifetime = makeSession(); + setSessionSnapshot(firstLifetime, makeSnapshot()); + const oldGeneration = firstLifetime.snapshotGeneration!; + + // Reopen: a fresh session object restarts the counter with a NEW seed. + const secondLifetime = makeSession(); + setSessionSnapshot(secondLifetime, makeSnapshot()); + secondLifetime.snapshotRefsStale = false; + + // Probabilistic, not identity-based: the seeds collide with ~1/900000 + // probability (an accepted residual risk, documented on the field). + expect(secondLifetime.snapshotGeneration).not.toBe(oldGeneration); + // A pin minted in the previous lifetime warns instead of reading as current. + expect( + resolveRefStalenessWarning({ + session: secondLifetime, + ref: '@e1', + mintedGeneration: oldGeneration, + }), + ).toContain(`minted from snapshot s${oldGeneration}`); +}); + +test('resolveRefStalenessWarning: pinned-current clean, pinned-stale precise, plain coarse', () => { + const session = makeSession(); + session.snapshotGeneration = 15; + session.snapshotRefsStale = true; + + // Pinned to the stored generation: the pin proves the ref matches the tree, + // so the coarse marker is overruled. + expect( + resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: 15 }), + ).toBeUndefined(); + + expect(resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: 12 })).toBe( + 'Ref @e37 was minted from snapshot s12 but the session tree is now s15 — re-run snapshot -i.', + ); + + expect(resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: undefined })).toBe( + STALE_SNAPSHOT_REFS_WARNING, + ); + + session.snapshotRefsStale = false; + expect( + resolveRefStalenessWarning({ session, ref: '@e37', mintedGeneration: undefined }), + ).toBeUndefined(); +}); + +test('resolveRefStalenessWarning treats a missing stored generation as s0', () => { + const session = makeSession(); + expect(resolveRefStalenessWarning({ session, ref: 'e2', mintedGeneration: 3 })).toBe( + 'Ref @e2 was minted from snapshot s3 but the session tree is now s0 — re-run snapshot -i.', + ); + expect(resolveRefStalenessWarning({ session, ref: '@e2', mintedGeneration: 0 })).toBeUndefined(); +}); diff --git a/src/daemon/handlers/__tests__/find.test.ts b/src/daemon/handlers/__tests__/find.test.ts index a866163d9..2cbe13e2f 100644 --- a/src/daemon/handlers/__tests__/find.test.ts +++ b/src/daemon/handlers/__tests__/find.test.ts @@ -93,7 +93,8 @@ test('handleFindCommands click returns deterministic metadata across locator var positionals: ['Increment', 'click'], nodes: [hittableParentNoRect, nonHittableChildWithRect], invoke: async () => ({ platformSpecificRef: 'XCUIElementTypeView' }), - expectedKeys: ['locator', 'message', 'query', 'ref', 'x', 'y'], + // refsGeneration rides every ref-issuing find response (#1076 versioned refs). + expectedKeys: ['locator', 'message', 'query', 'ref', 'refsGeneration', 'x', 'y'], expectedLocator: 'any', expectedQuery: 'Increment', expectedCoordinates: { x: 100, y: 50 }, @@ -757,3 +758,33 @@ test('handleFindCommands click re-issues a fresh ref and clears the stale-refs m // the marker clears before the internal click @ref sub-invocation runs. expect(storedSession.snapshotRefsStale).toBe(false); }); + +test('handleFindCommands click carries refsGeneration for the freshly stored tree (#1076 versioned refs)', async () => { + const sessionName = 'default'; + const session = makeSession(sessionName); + // Two earlier tree replacements happened in this session. + session.snapshotGeneration = 2; + + const { response, session: storedSession } = await runFindClickScenario({ + positionals: ['Increment', 'click'], + nodes: [ + { + index: 0, + type: 'Button', + label: 'Increment', + hittable: true, + rect: { x: 50, y: 0, width: 100, height: 100 }, + depth: 0, + }, + ], + session, + }); + + expect(response.ok).toBe(true); + // The find capture replaced the stored tree (generation 3) and the response + // returns a ref minted from it, so it reports that generation ONCE. + expect(storedSession.snapshotGeneration).toBe(3); + if (response.ok) { + expect((response.data as Record).refsGeneration).toBe(3); + } +}); diff --git a/src/daemon/handlers/__tests__/interaction-touch-targets.test.ts b/src/daemon/handlers/__tests__/interaction-touch-targets.test.ts index 804d1ed33..2c489d1bb 100644 --- a/src/daemon/handlers/__tests__/interaction-touch-targets.test.ts +++ b/src/daemon/handlers/__tests__/interaction-touch-targets.test.ts @@ -1,5 +1,9 @@ import { test, expect } from 'vitest'; -import { parseFillTarget, parseTouchTarget } from '../interaction-touch-targets.ts'; +import { + parseFillTarget, + parseLongPressTarget, + parseTouchTarget, +} from '../interaction-touch-targets.ts'; test('parseTouchTarget preserves ref fallback label through shared grammar', () => { const parsed = parseTouchTarget(['@e4', 'Email field'], 'press'); @@ -78,3 +82,77 @@ test('parseFillTarget rejects invalid coordinates instead of treating them as a } } }); + +// --- Versioned refs (#1076): the daemon boundary splits `@e12~s3` pins --- + +test('parseTouchTarget splits a pinned ref into plain ref + generation', () => { + const parsed = parseTouchTarget(['@e4~s12', 'Email field'], 'press'); + + expect(parsed).toEqual({ + ok: true, + target: { + kind: 'ref', + ref: '@e4', + fallbackLabel: 'Email field', + }, + refGeneration: 12, + }); +}); + +test('parseTouchTarget rejects a malformed generation suffix with the grammar hint', () => { + const parsed = parseTouchTarget(['@e4~s'], 'press'); + + expect(parsed.ok).toBe(false); + if (!parsed.ok) { + expect(parsed.response).toMatchObject({ + ok: false, + error: { + code: 'INVALID_ARGS', + message: expect.stringContaining('malformed generation suffix'), + details: { hint: expect.stringContaining('@e12~s3') }, + }, + }); + } +}); + +test('parseLongPressTarget carries the pinned generation past the trailing duration', () => { + const parsed = parseLongPressTarget(['@e4~s7', '800']); + + expect(parsed).toEqual({ + ok: true, + target: { + kind: 'ref', + ref: '@e4', + fallbackLabel: '', + }, + refGeneration: 7, + durationMs: 800, + }); +}); + +test('parseFillTarget splits a pinned ref and keeps the text intact', () => { + const parsed = parseFillTarget(['@e4~s3', 'qa@example.com']); + + expect(parsed).toEqual({ + ok: true, + target: { + kind: 'ref', + ref: '@e4', + fallbackLabel: '', + }, + refGeneration: 3, + text: 'qa@example.com', + }); +}); + +test('parseFillTarget rejects a malformed pinned ref before reading text', () => { + const parsed = parseFillTarget(['@e4~x3', 'text']); + + expect(parsed.ok).toBe(false); + if (!parsed.ok) { + expect(parsed.response).toMatchObject({ + ok: false, + error: { code: 'INVALID_ARGS' }, + }); + } +}); diff --git a/src/daemon/handlers/__tests__/interaction.test.ts b/src/daemon/handlers/__tests__/interaction.test.ts index d9cf62ab8..1ce291c99 100644 --- a/src/daemon/handlers/__tests__/interaction.test.ts +++ b/src/daemon/handlers/__tests__/interaction.test.ts @@ -3406,3 +3406,158 @@ test('stale-ref warning appends to an existing interaction warning', async () => expect(warning.endsWith(STALE_SNAPSHOT_REFS_WARNING)).toBe(true); } }); + +// --- Versioned @ref pins (#1076 follow-up) --- + +test('press with a pinned ref matching the stored generation is clean even while the coarse marker is set', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'pinned-current-clean'; + const session = makeStaleRefSession(sessionName); + session.snapshotGeneration = 5; + // Coarse marker set (e.g. a --verify evidence capture stored the SAME tree + // shape again) — the pin proves the client's ref came from this generation. + session.snapshotRefsStale = true; + sessionStore.set(sessionName, session); + + const response = await runInteraction(sessionStore, sessionName, 'press', ['@e1~s5']); + expect(response?.ok).toBe(true); + if (response?.ok) { + expect(response.data?.warning).toBeUndefined(); + } +}); + +test('press with a pinned ref from an older generation gets the precise warning', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'pinned-stale-precise'; + const session = makeStaleRefSession(sessionName); + session.snapshotGeneration = 15; + sessionStore.set(sessionName, session); + + const response = await runInteraction(sessionStore, sessionName, 'press', ['@e1~s12']); + expect(response?.ok).toBe(true); + if (response?.ok) { + expect(response.data?.warning).toBe( + 'Ref @e1 was minted from snapshot s12 but the session tree is now s15 — re-run snapshot -i.', + ); + } +}); + +test('fill with a pinned stale ref gets the precise warning; pinned current is clean', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'pinned-fill'; + const session = makeStaleRefSession(sessionName); + session.snapshot = { + nodes: attachRefs([ + { + index: 0, + type: 'XCUIElementTypeTextField', + label: 'Email', + rect: { x: 10, y: 20, width: 200, height: 40 }, + enabled: true, + hittable: true, + }, + ]), + createdAt: Date.now(), + backend: 'xctest', + }; + session.snapshotGeneration = 3; + session.snapshotRefsStale = true; + sessionStore.set(sessionName, session); + + const stale = await runInteraction(sessionStore, sessionName, 'fill', ['@e1~s2', 'hello']); + expect(stale?.ok).toBe(true); + if (stale?.ok) { + expect(stale.data?.warning).toBe( + 'Ref @e1 was minted from snapshot s2 but the session tree is now s3 — re-run snapshot -i.', + ); + } + + const current = await runInteraction(sessionStore, sessionName, 'fill', ['@e1~s3', 'hello']); + expect(current?.ok).toBe(true); + if (current?.ok) { + expect(current.data?.warning).toBeUndefined(); + } +}); + +test('get text with a pinned stale ref gets the precise warning', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'pinned-get-text'; + const session = makeStaleRefSession(sessionName); + session.snapshotGeneration = 4; + sessionStore.set(sessionName, session); + mockDispatch.mockRejectedValue( + new Error('dispatch should not be called for snapshot-derived get text'), + ); + + const response = await runInteraction(sessionStore, sessionName, 'get', ['text', '@e1~s2']); + expect(response?.ok).toBe(true); + if (response?.ok) { + expect(response.data?.warning).toBe( + 'Ref @e1 was minted from snapshot s2 but the session tree is now s4 — re-run snapshot -i.', + ); + expect(response.data?.ref).toBe('e1'); + } +}); + +test('a plain ref keeps the coarse #1093 warning, never the pinned text', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'plain-ref-coarse'; + const session = makeStaleRefSession(sessionName); + session.snapshotGeneration = 7; + session.snapshotRefsStale = true; + sessionStore.set(sessionName, session); + + const response = await runInteraction(sessionStore, sessionName, 'press', ['@e1']); + expect(response?.ok).toBe(true); + if (response?.ok) { + expect(response.data?.warning).toBe(STALE_SNAPSHOT_REFS_WARNING); + } +}); + +test('a malformed generation suffix is INVALID_ARGS with the ref grammar hint', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'pinned-malformed'; + const session = makeStaleRefSession(sessionName); + sessionStore.set(sessionName, session); + + for (const [command, positionals] of [ + ['press', ['@e1~x3']], + ['fill', ['@e1~s', 'text']], + ['get', ['text', '@e1~3']], + ] as const) { + const response = await runInteraction(sessionStore, sessionName, command, [...positionals]); + expect(response?.ok).toBe(false); + if (response && !response.ok) { + expect(response.error.code).toBe('INVALID_ARGS'); + expect(response.error.message).toContain('malformed generation suffix'); + expect(String(response.error.details?.hint)).toContain('@e12~s3'); + } + } +}); + +test('after a session reopen, a pin from the previous lifetime warns (reseeded generations)', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'reopened-pin-warns'; + // Previous lifetime: a seeded generation minted the client's pin. + const previous = makeStaleRefSession(sessionName); + setSessionSnapshot(previous, { ...previous.snapshot! }); + const oldGeneration = previous.snapshotGeneration!; + + // Reopen: fresh session object, same name — the counter reseeds, so the + // old pin cannot silently read as current even though both lifetimes are + // one replacement deep (a per-lifetime count from 1 would collide here). + const reopened = makeStaleRefSession(sessionName); + setSessionSnapshot(reopened, { ...reopened.snapshot! }); + reopened.snapshotRefsStale = false; + sessionStore.set(sessionName, reopened); + // Probabilistic (~1/900000 collision) — accepted residual risk. + expect(reopened.snapshotGeneration).not.toBe(oldGeneration); + + const response = await runInteraction(sessionStore, sessionName, 'press', [ + `@e1~s${oldGeneration}`, + ]); + expect(response?.ok).toBe(true); + if (response?.ok) { + expect(String(response.data?.warning)).toContain(`minted from snapshot s${oldGeneration}`); + } +}); diff --git a/src/daemon/handlers/__tests__/snapshot-handler.test.ts b/src/daemon/handlers/__tests__/snapshot-handler.test.ts index cf44f7f53..5196a88ff 100644 --- a/src/daemon/handlers/__tests__/snapshot-handler.test.ts +++ b/src/daemon/handlers/__tests__/snapshot-handler.test.ts @@ -450,6 +450,72 @@ test('snapshot clears the stale-refs marker; diff leaves client refs stale (#107 expect(sessionStore.get(sessionName)?.snapshotRefsStale).toBe(true); }); +// #1076 versioned refs — shared harness for the refsGeneration tests below. +async function runVersionedRefsCommand(params: { + sessionStore: ReturnType; + sessionName: string; + command: 'snapshot' | 'diff'; +}): Promise | undefined> { + const response = await handleSnapshotCommands({ + req: { + token: 't', + session: params.sessionName, + command: params.command, + positionals: params.command === 'diff' ? ['snapshot'] : [], + flags: {}, + }, + sessionName: params.sessionName, + logPath: '/tmp/daemon.log', + sessionStore: params.sessionStore, + }); + expect(response?.ok).toBe(true); + return response?.ok ? response.data : undefined; +} + +function makeVersionedRefsScenario(sessionName: string) { + const sessionStore = makeSessionStore(); + sessionStore.set(sessionName, makeSession(sessionName, androidDevice)); + mockDispatch.mockResolvedValue({ + nodes: [{ index: 0, depth: 0, type: 'android.widget.Button', label: 'Fresh' }], + truncated: false, + backend: 'android', + }); + return sessionStore; +} + +test('snapshot responses carry refsGeneration and advance it per capture (#1076 versioned refs)', async () => { + const sessionName = 'android-refs-generation'; + const sessionStore = makeVersionedRefsScenario(sessionName); + + const first = await runVersionedRefsCommand({ sessionStore, sessionName, command: 'snapshot' }); + // Ref-issuing response reports the generation ONCE; the node tree itself + // stays plain `e1` refs (token economy). The first generation of a session + // lifetime is SEEDED (random 6-digit base), so assert relative bumps and + // echo the observed seed instead of literals. + const seed = first?.refsGeneration; + expect(typeof seed).toBe('number'); + expect(sessionStore.get(sessionName)?.snapshotGeneration).toBe(seed); + + const second = await runVersionedRefsCommand({ sessionStore, sessionName, command: 'snapshot' }); + expect(second?.refsGeneration).toBe((seed as number) + 1); +}); + +test('diff advances the generation without issuing refsGeneration (#1076 versioned refs)', async () => { + const sessionName = 'android-refs-generation-diff'; + const sessionStore = makeVersionedRefsScenario(sessionName); + + await runVersionedRefsCommand({ sessionStore, sessionName, command: 'snapshot' }); + + const seed = sessionStore.get(sessionName)?.snapshotGeneration as number; + + // diff replaces the stored tree too — the generation advances even though + // the summary response issues no refs, which is exactly what a ref pinned + // to the snapshot generation would then warn about. + const diffData = await runVersionedRefsCommand({ sessionStore, sessionName, command: 'diff' }); + expect(diffData?.refsGeneration).toBeUndefined(); + expect(sessionStore.get(sessionName)?.snapshotGeneration).toBe(seed + 1); +}); + test('snapshot surfaces filtered-to-zero Android guidance for interactive snapshots', async () => { const sessionStore = makeSessionStore(); const sessionName = 'android-empty-interactive'; diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index f2ad212ab..a04099d3e 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -178,7 +178,27 @@ export async function handleFindCommands(params: { }; const handler = actionHandlers[action]; - return handler ? handler() : null; + if (!handler) return null; + // Re-read the session AFTER the handler: internal click/fill sub-invocations + // may have replaced the stored tree again (Android freshness refresh), and + // the reported generation must describe the tree the response's ref resolves + // against at response time. + return attachIssuedRefsGeneration(await handler(), () => sessionStore.get(sessionName)); +} + +/** + * #1076 versioned refs: a find response that returns a ref is a ref-issuing + * response, so it carries the stored tree's generation ONCE as the additive + * `refsGeneration` field (the ref itself stays plain `@e12` — token economy). + */ +function attachIssuedRefsGeneration( + response: DaemonResponse | null, + getSession: () => SessionState | undefined, +): DaemonResponse | null { + if (!response?.ok || !response.data || typeof response.data.ref !== 'string') return response; + const refsGeneration = getSession()?.snapshotGeneration; + if (refsGeneration === undefined) return response; + return { ...response, data: { ...response.data, refsGeneration } }; } // --- Per-action handlers --- diff --git a/src/daemon/handlers/interaction-touch-response.ts b/src/daemon/handlers/interaction-touch-response.ts index 11c242e08..e6290b256 100644 --- a/src/daemon/handlers/interaction-touch-response.ts +++ b/src/daemon/handlers/interaction-touch-response.ts @@ -5,7 +5,6 @@ import type { PressCommandResult, } from '../../contracts/interaction.ts'; import { successText } from '../../utils/success-text.ts'; -import { STALE_SNAPSHOT_REFS_WARNING } from '../session-snapshot.ts'; import { interactionResultExtra, stripAtPrefix } from './interaction-touch-targets.ts'; /** @@ -57,12 +56,14 @@ export function buildInteractionResponseData(params: { */ extra?: Record; /** - * The command consumed an `@ref` argument while the session's stored - * snapshot had been replaced without re-issuing refs to the client - * (`session.snapshotRefsStale`, #1076). Appends - * STALE_SNAPSHOT_REFS_WARNING to the response warning. + * Staleness warning for the consumed `@ref` argument (#1076), resolved by + * `resolveRefStalenessWarning` (src/daemon/session-snapshot.ts): the coarse + * STALE_SNAPSHOT_REFS_WARNING for plain refs while `snapshotRefsStale` is + * set, or the precise pinned-generation warning for `@e12~s3` refs whose + * generation no longer matches the stored tree. Appended to the response + * warning. */ - staleRefs?: boolean; + staleRefsWarning?: string; }): InteractionResponsePayloads { const { source, referenceFrame, extra } = params; if (source.kind === 'runner-payload') { @@ -99,7 +100,7 @@ export function buildInteractionResponseData(params: { : visualization; const warning = composeResponseWarning( 'warning' in result ? result.warning : undefined, - params.staleRefs, + params.staleRefsWarning, ); if (warning) { visualization.warning = warning; @@ -110,12 +111,10 @@ export function buildInteractionResponseData(params: { function composeResponseWarning( resultWarning: string | undefined, - staleRefs: boolean | undefined, + staleRefsWarning: string | undefined, ): string | undefined { - if (staleRefs !== true) return resultWarning; - return resultWarning - ? `${resultWarning} ${STALE_SNAPSHOT_REFS_WARNING}` - : STALE_SNAPSHOT_REFS_WARNING; + if (!staleRefsWarning) return resultWarning; + return resultWarning ? `${resultWarning} ${staleRefsWarning}` : staleRefsWarning; } function buildTouchVisualizationResult(params: { diff --git a/src/daemon/handlers/interaction-touch-targets.ts b/src/daemon/handlers/interaction-touch-targets.ts index 763535057..76eac41d4 100644 --- a/src/daemon/handlers/interaction-touch-targets.ts +++ b/src/daemon/handlers/interaction-touch-targets.ts @@ -9,13 +9,39 @@ import { type DecodedFillTarget, } from '../../core/interaction-positionals.ts'; import type { DaemonResponse } from '../types.ts'; +import { REF_GRAMMAR_HINT, splitRefGenerationSuffix } from '../../kernel/snapshot.ts'; import { parseCoordinateTarget } from './interaction-targeting.ts'; import { errorResponse } from './response.ts'; export type ParsedTouchTarget = - | { ok: true; target: InteractionTarget; durationMs?: never } + | { ok: true; target: InteractionTarget; refGeneration?: number; durationMs?: never } | { ok: false; response: DaemonResponse }; +/** + * Daemon boundary for the versioned-ref suffix (#1076): a pinned `@e12~s3` + * target is split here so everything downstream (runtime resolution, backend + * fast paths, recording) sees exactly today's plain `@e12` ref, while the + * minted generation is surfaced separately for the staleness warning. + */ +type ParsedVersionedRef = + | { ok: true; ref: string; generation?: number } + | { ok: false; response: DaemonResponse }; + +export function parseVersionedRefPositional(refInput: string): ParsedVersionedRef { + const split = splitRefGenerationSuffix(refInput); + if (!split) { + return { + ok: false, + response: errorResponse( + 'INVALID_ARGS', + `Invalid ref "${refInput}" — malformed generation suffix.`, + { hint: REF_GRAMMAR_HINT }, + ), + }; + } + return { ok: true, ref: split.base, generation: split.generation }; +} + export function parseTouchTarget(positionals: string[], commandLabel: string): ParsedTouchTarget { const coordinates = parseCoordinateTarget(positionals); if (coordinates) { @@ -23,13 +49,16 @@ export function parseTouchTarget(positionals: string[], commandLabel: string): P } const first = positionals[0] ?? ''; if (first.startsWith('@')) { + const versioned = parseVersionedRefPositional(first); + if (!versioned.ok) return { ok: false, response: versioned.response }; return { ok: true, target: { kind: 'ref', - ref: first, + ref: versioned.ref, fallbackLabel: positionals.slice(1).join(' ').trim(), }, + refGeneration: versioned.generation, }; } const selector = positionals.join(' ').trim(); @@ -46,7 +75,7 @@ export function parseTouchTarget(positionals: string[], commandLabel: string): P } export type ParsedLongPressTarget = - | { ok: true; target: InteractionTarget; durationMs?: number } + | { ok: true; target: InteractionTarget; refGeneration?: number; durationMs?: number } | { ok: false; response: DaemonResponse }; export function parseLongPressTarget(positionals: string[]): ParsedLongPressTarget { @@ -65,17 +94,20 @@ export function parseLongPressTarget(positionals: string[]): ParsedLongPressTarg return { ok: true, target: parsedTarget.target, + refGeneration: parsedTarget.refGeneration, ...split.duration, }; } export type ParsedFillTarget = - | { ok: true; target: InteractionTarget; text: string } + | { ok: true; target: InteractionTarget; refGeneration?: number; text: string } | { ok: false; response: DaemonResponse }; export function parseFillTarget(positionals: string[]): ParsedFillTarget { const first = positionals[0] ?? ''; if (first.startsWith('@')) { + const versioned = parseVersionedRefPositional(first); + if (!versioned.ok) return { ok: false, response: versioned.response }; const parsed = readFillTargetFromPositionals(positionals); const text = parsed.text; if (!text) @@ -84,9 +116,10 @@ export function parseFillTarget(positionals: string[]): ParsedFillTarget { ok: true, target: { kind: 'ref', - ref: first, + ref: versioned.ref, fallbackLabel: readRefFallbackLabel(positionals), }, + refGeneration: versioned.generation, text, }; } diff --git a/src/daemon/handlers/interaction-touch.ts b/src/daemon/handlers/interaction-touch.ts index 304b61ec9..8973aeadc 100644 --- a/src/daemon/handlers/interaction-touch.ts +++ b/src/daemon/handlers/interaction-touch.ts @@ -14,6 +14,7 @@ import type { import { asAppError, normalizeError } from '../../kernel/errors.ts'; import type { DaemonResponse, SessionState } from '../types.ts'; import { finalizeTouchInteraction, type InteractionHandlerParams } from './interaction-common.ts'; +import { resolveRefStalenessWarning } from '../session-snapshot.ts'; import { buildInteractionResponseData, type InteractionResponsePayloads, @@ -118,8 +119,17 @@ async function dispatchTargetedTouchViaRuntime( if (!parsedTarget.ok) return parsedTarget.response; // Staleness relative to what the client knew when it sent this @ref — read // BEFORE any internal recapture (Android freshness refresh, --verify) flips - // the flag as a side effect of this same command (#1076). - const staleRefs = parsedTarget.target.kind === 'ref' && session.snapshotRefsStale === true; + // the flag or advances the generation as a side effect of this same command + // (#1076). Pinned refs (`@e12~s3`) get a precise generation-mismatch + // warning; plain refs keep the coarse marker warning. + const staleRefsWarning = + parsedTarget.target.kind === 'ref' + ? resolveRefStalenessWarning({ + session, + ref: parsedTarget.target.ref, + mintedGeneration: parsedTarget.refGeneration, + }) + : undefined; let androidFreshnessBaseline: SessionState['snapshot']; if (parsedTarget.target.kind === 'ref') { const invalidRefFlagsResponse = params.refSnapshotFlagGuardResponse( @@ -167,7 +177,7 @@ async function dispatchTargetedTouchViaRuntime( params, session, result, - staleRefs, + staleRefsWarning, extra: command === 'longpress' ? { @@ -224,7 +234,7 @@ async function buildTargetedTouchResponsePayloads(params: { }; session: SessionState; result: TargetedTouchResult; - staleRefs: boolean; + staleRefsWarning: string | undefined; extra: Record; }): Promise { const { params: handlerParams, session, result, extra } = params; @@ -242,7 +252,7 @@ async function buildTargetedTouchResponsePayloads(params: { source: { kind: 'runtime', result }, referenceFrame, extra, - staleRefs: params.staleRefs, + staleRefsWarning: params.staleRefsWarning, }); } @@ -432,7 +442,14 @@ async function dispatchFillViaRuntime( const parsedTarget = parseFillTarget(req.positionals ?? []); if (!parsedTarget.ok) return parsedTarget.response; // Read before the Android freshness refresh recaptures — see the press path. - const staleRefs = parsedTarget.target.kind === 'ref' && session.snapshotRefsStale === true; + const staleRefsWarning = + parsedTarget.target.kind === 'ref' + ? resolveRefStalenessWarning({ + session, + ref: parsedTarget.target.ref, + mintedGeneration: parsedTarget.refGeneration, + }) + : undefined; if (parsedTarget.target.kind === 'ref') { const invalidRefFlagsResponse = params.refSnapshotFlagGuardResponse('fill', req.flags); if (invalidRefFlagsResponse) return invalidRefFlagsResponse; @@ -474,7 +491,7 @@ async function dispatchFillViaRuntime( source: { kind: 'runtime', result, refBackendWireShape: true }, referenceFrame, extra: { text: parsedTarget.text }, - staleRefs, + staleRefsWarning, }); }, }); diff --git a/src/daemon/response-views.ts b/src/daemon/response-views.ts index 88d55b348..8b6660ecf 100644 --- a/src/daemon/response-views.ts +++ b/src/daemon/response-views.ts @@ -33,6 +33,9 @@ function snapshotView(data: DaemonResponseData, level: ResponseLevel): DaemonRes truncated: data.truncated, ...(data.visibility !== undefined ? { visibility: data.visibility } : {}), ...(data.snapshotQuality !== undefined ? { snapshotQuality: data.snapshotQuality } : {}), + // #1076 versioned refs: the one-number generation is the pinning signal for + // the refs above — cheap, and dropping it would strand auto-pinning clients. + ...(data.refsGeneration !== undefined ? { refsGeneration: data.refsGeneration } : {}), }; } diff --git a/src/daemon/selector-runtime.ts b/src/daemon/selector-runtime.ts index 9f4530dd1..bf819ba41 100644 --- a/src/daemon/selector-runtime.ts +++ b/src/daemon/selector-runtime.ts @@ -6,11 +6,12 @@ import { runAppleRunnerCommand } from '../platforms/apple/core/runner/runner-cli import { buildAppleRunnerRequestOptions } from './apple-runner-options.ts'; import type { DaemonRequest, DaemonResponse, SessionState } from './types.ts'; import { errorResponse, requireCommandSupported } from './handlers/response.ts'; -import { markSessionSnapshotRefsIssued, STALE_SNAPSHOT_REFS_WARNING } from './session-snapshot.ts'; +import { markSessionSnapshotRefsIssued, resolveRefStalenessWarning } from './session-snapshot.ts'; import { resolveSessionDevice, withSessionlessRunnerCleanup } from './handlers/snapshot-session.ts'; import { parseFindArgs, type FindAction } from '../utils/finders.ts'; import { splitIsSelectorArgs } from './selectors.ts'; import { refSnapshotFlagGuardResponse } from './handlers/interaction-flags.ts'; +import { parseVersionedRefPositional } from './handlers/interaction-touch-targets.ts'; import { evaluateIsPredicate, isSupportedPredicate, @@ -104,12 +105,17 @@ export async function dispatchFindReadOnlyViaRuntime( const data = toDaemonFindData(result); // #1076 clear choke point: this response returns a ref minted from the // freshly captured (and stored) session snapshot, so the client now holds - // refs that match the stored tree again. + // refs that match the stored tree again. As a ref-issuing response it also + // carries the stored tree's generation ONCE (`refsGeneration`) so clients + // can pin the ref (`@e12~s3`). if (typeof data.ref === 'string') { const session = params.sessionStore.get(params.sessionName); if (session) { markSessionSnapshotRefsIssued(session); params.sessionStore.set(params.sessionName, session); + if (session.snapshotGeneration !== undefined) { + return { ...data, refsGeneration: session.snapshotGeneration }; + } } } return data; @@ -143,10 +149,16 @@ export async function dispatchGetViaRuntime( if (!resolvedRuntime.ok) return resolvedRuntime.response; // #1076: get @ref reads from the stored snapshot; warn when that tree was - // replaced since the client last received refs. - const staleRefs = - target.target.kind === 'ref' && - params.sessionStore.get(params.sessionName)?.snapshotRefsStale === true; + // replaced since the client last received refs — coarse marker for plain + // refs, precise generation mismatch for pinned `@e12~s3` refs. + const staleRefsWarning = + target.target.kind === 'ref' + ? resolveRefStalenessWarning({ + session: params.sessionStore.get(params.sessionName), + ref: target.target.ref, + mintedGeneration: target.refGeneration, + }) + : undefined; return await toDaemonResponse(async () => { const result = await resolvedRuntime.runtime.selectors.get({ session: params.sessionName, @@ -161,7 +173,7 @@ export async function dispatchGetViaRuntime( buildGetRecordResult(result, sub), ); const data = toDaemonGetData(result); - return staleRefs ? { ...data, warning: STALE_SNAPSHOT_REFS_WARNING } : data; + return staleRefsWarning ? { ...data, warning: staleRefsWarning } : data; }); } @@ -235,8 +247,22 @@ export async function dispatchWaitViaRuntime( if (directResponse) return directResponse; } // #1076: wait @ref re-resolves the ref against fresh polling captures; warn - // when the stored tree already drifted from the refs the client holds. - const staleRefs = parsed.kind === 'ref' && session?.snapshotRefsStale === true; + // when the stored tree already drifted from the refs the client holds — + // coarse marker for plain refs, precise generation mismatch for pinned + // `@e12~s3` refs. The pin is split off HERE so the runtime and recording + // only ever see the plain `@e12` form. + let waitParsed = parsed; + let staleRefsWarning: string | undefined; + if (parsed.kind === 'ref') { + const versionedRef = parseVersionedRefPositional(parsed.rawRef); + if (!versionedRef.ok) return versionedRef.response; + waitParsed = { ...parsed, rawRef: versionedRef.ref }; + staleRefsWarning = resolveRefStalenessWarning({ + session, + ref: versionedRef.ref, + mintedGeneration: versionedRef.generation, + }); + } const execute = async () => { const runtime = createSelectorRuntimeForDevice({ ...params, @@ -247,11 +273,11 @@ export async function dispatchWaitViaRuntime( const result = await runtime.selectors.wait({ session: sessionName, requestId: req.meta?.requestId, - target: toWaitTarget(parsed, session), + target: toWaitTarget(waitParsed, session), }); recordIfSession(sessionStore, sessionName, req, result); const data = toDaemonWaitData(result); - return staleRefs ? { ...data, warning: STALE_SNAPSHOT_REFS_WARNING } : data; + return staleRefsWarning ? { ...data, warning: staleRefsWarning } : data; }); const enrichedResponse = await maybeWaitTimeoutSurfaceResponse( { req, logPath: params.logPath, session, device }, @@ -467,17 +493,22 @@ function parseGetTarget(req: DaemonRequest): target: | { kind: 'ref'; ref: string; fallbackLabel?: string } | { kind: 'selector'; selector: string }; + /** Minted generation from a pinned `@e12~s3` ref (#1076), split off the ref. */ + refGeneration?: number; } | { ok: false; response: DaemonResponse } { const refInput = req.positionals?.[1] ?? ''; if (refInput.startsWith('@')) { + const versionedRef = parseVersionedRefPositional(refInput); + if (!versionedRef.ok) return { ok: false, response: versionedRef.response }; return { ok: true, target: { kind: 'ref', - ref: refInput, + ref: versionedRef.ref, fallbackLabel: req.positionals.length > 2 ? req.positionals.slice(2).join(' ').trim() : '', }, + refGeneration: versionedRef.generation, }; } const selector = req.positionals?.slice(1).join(' ').trim() ?? ''; diff --git a/src/daemon/session-script-writer.ts b/src/daemon/session-script-writer.ts index 8466534a0..af1738b52 100644 --- a/src/daemon/session-script-writer.ts +++ b/src/daemon/session-script-writer.ts @@ -11,6 +11,7 @@ import { formatScriptStringLiteral, isClickLikeCommand, isTouchTargetCommand, + stripRecordedRefGeneration, } from '../replay/script-utils.ts'; import type { SessionAction, SessionState } from './types.ts'; @@ -192,7 +193,9 @@ function formatClickLikeActionLine(parts: string[], action: SessionAction): stri const first = action.positionals?.[0]; if (!first) return undefined; if (first.startsWith('@')) { - parts.push(formatScriptArg(first)); + // Recorded refs may carry a `~s` pin (#1076); scripts store the + // plain ref — generations are meaningless outside the minting session. + parts.push(formatScriptArg(stripRecordedRefGeneration(first))); appendRefLabel(parts, action); appendScriptSeriesFlags(parts, action); return parts.join(' '); @@ -208,7 +211,7 @@ function formatClickLikeActionLine(parts: string[], action: SessionAction): stri function formatFillActionLine(parts: string[], action: SessionAction): string | undefined { const ref = action.positionals?.[0]; if (!ref?.startsWith('@')) return undefined; - parts.push(formatScriptArg(ref)); + parts.push(formatScriptArg(stripRecordedRefGeneration(ref))); appendRefLabel(parts, action); const text = action.positionals.slice(1).join(' '); // Preserve explicit empty-string fill arguments. @@ -224,7 +227,7 @@ function formatGetActionLine(parts: string[], action: SessionAction): string | u const ref = action.positionals?.[1]; if (!sub || !ref) return undefined; parts.push(formatScriptArg(sub)); - parts.push(formatScriptArg(ref)); + parts.push(formatScriptArg(stripRecordedRefGeneration(ref))); if (ref.startsWith('@')) appendRefLabel(parts, action); return parts.join(' '); } diff --git a/src/daemon/session-snapshot.ts b/src/daemon/session-snapshot.ts index 9f7ef2cd5..25ad0b86d 100644 --- a/src/daemon/session-snapshot.ts +++ b/src/daemon/session-snapshot.ts @@ -1,3 +1,4 @@ +import { randomInt } from 'node:crypto'; import type { SnapshotState } from '../kernel/snapshot.ts'; import type { SessionState } from './types.ts'; @@ -38,6 +39,10 @@ export const STALE_SNAPSHOT_REFS_WARNING = export function setSessionSnapshot(session: SessionState, snapshot: SnapshotState): void { if (session.snapshot !== snapshot) { session.snapshotRefsStale = true; + // #1076 versioned refs: every tree replacement advances the session's + // snapshot generation, so refs pinned to an earlier generation + // (`@e12~s3`) can be diagnosed precisely. + session.snapshotGeneration = nextSnapshotGeneration(session.snapshotGeneration); } session.snapshot = snapshot; session.snapshotScopeSource = undefined; @@ -46,7 +51,62 @@ export function setSessionSnapshot(session: SessionState, snapshot: SnapshotStat } } +/** + * Advance `snapshotGeneration` (#1076 versioned refs). The FIRST bump of a + * session lifetime seeds at a random 6-digit base instead of 1: a reopened + * session restarts its counter, so a per-lifetime count starting at 1 would + * let a stale `@e1~s1` pin from the previous lifetime silently read as + * current. With a seeded base, cross-lifetime collisions are ~1e-6 instead of + * common — the protection is probabilistic (seeded), NOT identity-based. + * Within a lifetime the counter stays strictly monotonic (+1 per replacement), + * so pinned-vs-current comparisons remain exact. + */ +export function nextSnapshotGeneration(current: number | undefined): number { + return current === undefined ? randomInt(100_000, 1_000_000) : current + 1; +} + /** The response being returned hands the stored snapshot's refs to the client. */ export function markSessionSnapshotRefsIssued(session: SessionState): void { session.snapshotRefsStale = false; } + +/** + * Warning for a ref pinned to a generation (`@e12~s3`) that no longer matches + * the stored tree's generation. Unlike STALE_SNAPSHOT_REFS_WARNING it is + * PRECISE: the pin proves which tree minted the ref, so the mismatch is a + * fact, not a conservative marker. + */ +function buildPinnedStaleRefWarning(params: { + ref: string; + mintedGeneration: number; + currentGeneration: number; +}): string { + const plainRef = params.ref.startsWith('@') ? params.ref.slice(1) : params.ref; + return `Ref @${plainRef} was minted from snapshot s${params.mintedGeneration} but the session tree is now s${params.currentGeneration} — re-run snapshot -i.`; +} + +/** + * Staleness warning for a command consuming an `@ref` argument (#1076): + * - pinned ref (`@e12~s3`) matching the stored generation → no warning, even + * while the coarse `snapshotRefsStale` marker is set (the pin proves the + * client's ref came from the stored tree); + * - pinned ref with any other generation → the precise pinned warning; + * - plain ref → the coarse #1093 marker behavior, unchanged. + * + * Warn-only in this release: a stale pinned ref still executes with a warning + * attached. The compat ladder tightens this to an error in a later release, + * once auto-pinning clients (the MCP layer) are established. + */ +export function resolveRefStalenessWarning(params: { + session: SessionState | undefined; + ref: string; + mintedGeneration: number | undefined; +}): string | undefined { + const { session, ref, mintedGeneration } = params; + if (mintedGeneration !== undefined) { + const currentGeneration = session?.snapshotGeneration ?? 0; + if (mintedGeneration === currentGeneration) return undefined; + return buildPinnedStaleRefWarning({ ref, mintedGeneration, currentGeneration }); + } + return session?.snapshotRefsStale === true ? STALE_SNAPSHOT_REFS_WARNING : undefined; +} diff --git a/src/daemon/snapshot-runtime.ts b/src/daemon/snapshot-runtime.ts index cf109da30..50bb759b7 100644 --- a/src/daemon/snapshot-runtime.ts +++ b/src/daemon/snapshot-runtime.ts @@ -18,6 +18,7 @@ import { createDaemonRuntimePolicy } from './runtime-policy.ts'; import { createDaemonRuntimeSessionStore } from './runtime-session.ts'; import { maybeBuildAndroidSnapshotTimeoutFailure } from './android-snapshot-timeout-evidence.ts'; import { summarizeSnapshotDiagnostics } from '../snapshot-diagnostics.ts'; +import { nextSnapshotGeneration } from './session-snapshot.ts'; export async function dispatchSnapshotViaRuntime(params: { req: DaemonRequest; @@ -38,8 +39,14 @@ export async function dispatchSnapshotViaRuntime(params: { raw: req.flags?.snapshotRaw, forceFull: req.flags?.snapshotForceFull, }); + // #1076 versioned refs: the snapshot response is a ref-issuing response, + // so it carries the stored tree's generation ONCE (`refsGeneration`) — + // the node tree itself stays plain `e12` refs (token economy). The + // capture above already stored the next session via setRecord, so the + // store holds the generation these refs were minted from. + const refsGeneration = params.sessionStore.get(sessionName)?.snapshotGeneration; return { - data: result, + data: refsGeneration === undefined ? result : { ...result, refsGeneration }, record: { kind: 'snapshot', nodes: result.nodes.length, @@ -250,6 +257,13 @@ function buildNextSnapshotSession(params: { nextSession.snapshotRefsStale = keepCurrentSnapshot ? current?.snapshotRefsStale : !params.issuesRefsToClient; + // #1076 versioned refs: this path bypasses setSessionSnapshot, so it advances + // the generation itself whenever the stored tree is replaced (snapshot AND + // diff — diff's summary response leaves client refs pinned to the previous + // generation, which is exactly what the pinned warning diagnoses). + nextSession.snapshotGeneration = keepCurrentSnapshot + ? current?.snapshotGeneration + : nextSnapshotGeneration(current?.snapshotGeneration); if (record.appName) nextSession.appName = record.appName; return nextSession; } diff --git a/src/daemon/types.ts b/src/daemon/types.ts index d3b62716f..89e946e25 100644 --- a/src/daemon/types.ts +++ b/src/daemon/types.ts @@ -260,6 +260,21 @@ export type SessionState = { * `setSessionSnapshot` (src/daemon/session-snapshot.ts). */ snapshotRefsStale?: boolean; + /** + * Monotonically increasing generation of the stored session snapshot (#1076 + * versioned refs). Incremented every time the stored tree is REPLACED — at + * the `setSessionSnapshot` choke point and in the snapshot/diff command path + * (`buildNextSnapshotSession`). Ref-issuing responses (snapshot command, find + * ref outputs) report it once as the additive `refsGeneration` field; + * consumers may pin refs as `@e12~s3` and get a precise staleness warning + * when the pinned generation no longer matches the stored tree. Plain number + * with per-session lifetime — no persistence. The first bump of a lifetime + * seeds at a random 6-digit base (`nextSnapshotGeneration`), so a pin from a + * previous lifetime of a reopened same-named session collides only with + * ~1e-6 probability instead of commonly: cross-lifetime protection is + * probabilistic (seeded), NOT identity-based. + */ + snapshotGeneration?: number; /** Source snapshot used to resolve repeated `snapshot -s @ref` after scoped output replaces refs. */ snapshotScopeSource?: SnapshotState; /** Last broad snapshot safe for Android route-freshness comparisons after interactive snapshots. */ diff --git a/src/kernel/__tests__/snapshot-ref-grammar.test.ts b/src/kernel/__tests__/snapshot-ref-grammar.test.ts new file mode 100644 index 000000000..2a35646ce --- /dev/null +++ b/src/kernel/__tests__/snapshot-ref-grammar.test.ts @@ -0,0 +1,48 @@ +import { expect, test } from 'vitest'; +import { normalizeRef, splitRefGenerationSuffix } from '../snapshot.ts'; + +// #1076 versioned refs: `~s` is accepted INPUT on every ref parse +// site; node lookup strips it, and callers that care read the generation. + +test('splitRefGenerationSuffix passes plain refs through without a generation', () => { + expect(splitRefGenerationSuffix('@e12')).toEqual({ base: '@e12' }); + expect(splitRefGenerationSuffix('e12')).toEqual({ base: 'e12' }); + expect(splitRefGenerationSuffix(' @e12 ')).toEqual({ base: '@e12' }); +}); + +test('splitRefGenerationSuffix splits well-formed pinned refs', () => { + expect(splitRefGenerationSuffix('@e12~s3')).toEqual({ base: '@e12', generation: 3 }); + expect(splitRefGenerationSuffix('e12~s3')).toEqual({ base: 'e12', generation: 3 }); + expect(splitRefGenerationSuffix('@e7~s0')).toEqual({ base: '@e7', generation: 0 }); + expect(splitRefGenerationSuffix('@e7~s142')).toEqual({ base: '@e7', generation: 142 }); +}); + +test('splitRefGenerationSuffix rejects malformed suffixes', () => { + expect(splitRefGenerationSuffix('@e12~')).toBeNull(); + expect(splitRefGenerationSuffix('@e12~s')).toBeNull(); + expect(splitRefGenerationSuffix('@e12~3')).toBeNull(); + expect(splitRefGenerationSuffix('@e12~x3')).toBeNull(); + expect(splitRefGenerationSuffix('@e12~s3x')).toBeNull(); + expect(splitRefGenerationSuffix('@e12~s-3')).toBeNull(); + expect(splitRefGenerationSuffix('@e12~s3~s4')).toBeNull(); + // A leading tilde has no ref to pin. + expect(splitRefGenerationSuffix('~s3')).toBeNull(); +}); + +test('normalizeRef keeps legacy behavior for plain refs', () => { + expect(normalizeRef('@e12')).toBe('e12'); + expect(normalizeRef('e12')).toBe('e12'); + expect(normalizeRef('@')).toBeNull(); + expect(normalizeRef('12')).toBeNull(); +}); + +test('normalizeRef strips a well-formed generation suffix for node lookup', () => { + expect(normalizeRef('@e12~s3')).toBe('e12'); + expect(normalizeRef('e12~s3')).toBe('e12'); +}); + +test('normalizeRef rejects malformed generation suffixes', () => { + expect(normalizeRef('@e12~s')).toBeNull(); + expect(normalizeRef('@e12~x3')).toBeNull(); + expect(normalizeRef('@e12~')).toBeNull(); +}); diff --git a/src/kernel/snapshot.ts b/src/kernel/snapshot.ts index 79f31c236..b3a76440b 100644 --- a/src/kernel/snapshot.ts +++ b/src/kernel/snapshot.ts @@ -142,8 +142,42 @@ export function attachRefs(nodes: RawSnapshotNode[]): SnapshotNode[] { return nodes.map((node, idx) => ({ ...node, ref: `e${idx + 1}` })); } -export function normalizeRef(input: string): string | null { +/** + * Versioned-ref grammar (#1076): a ref argument may carry an optional + * `~s` suffix pinning it to the session snapshot generation that + * minted it, e.g. `@e12~s3`. The suffix is accepted INPUT only — snapshot + * output stays plain `e12` refs (the tree is the most token-expensive artifact + * agents consume), and ref-issuing responses carry the generation ONCE as the + * additive `refsGeneration` field. + */ +const REF_GENERATION_SUFFIX_RE = /^~s(\d+)$/; + +export const REF_GRAMMAR_HINT = + 'Refs look like @e12, optionally pinned to the snapshot generation that minted them: @e12~s3 (the ref, then "~s" and the refsGeneration reported by the issuing snapshot/find response).'; + +export type SplitRef = { base: string; generation?: number }; + +/** + * Split an optional `~s` suffix off a ref token (`@e12~s3` or bare + * `e12~s3`). `base` keeps the token's `@` prefix (or lack of one). Returns null + * when a `~` is present but the suffix does not match the grammar — callers + * surface INVALID_ARGS with REF_GRAMMAR_HINT. + */ +export function splitRefGenerationSuffix(input: string): SplitRef | null { const trimmed = input.trim(); + const tildeIndex = trimmed.indexOf('~'); + if (tildeIndex === -1) return { base: trimmed }; + const match = REF_GENERATION_SUFFIX_RE.exec(trimmed.slice(tildeIndex)); + if (!match || tildeIndex === 0) return null; + return { base: trimmed.slice(0, tildeIndex), generation: Number(match[1]) }; +} + +export function normalizeRef(input: string): string | null { + // Node lookup always uses the plain ref; the generation suffix is stripped + // here so every existing parse site accepts the pinned form (#1076). + const split = splitRefGenerationSuffix(input); + if (!split) return null; + const trimmed = split.base; if (trimmed.startsWith('@')) { const ref = trimmed.slice(1); return ref ? ref : null; diff --git a/src/mcp/__tests__/command-tools.test.ts b/src/mcp/__tests__/command-tools.test.ts index 2845b11b5..f9489f381 100644 --- a/src/mcp/__tests__/command-tools.test.ts +++ b/src/mcp/__tests__/command-tools.test.ts @@ -318,3 +318,203 @@ test('MCP session tool exposes state-dir resolution without a daemon round-trip' assert.deepEqual(result.structuredContent, { stateDir: '/tmp/agent-device-dev-state' }); }); + +// --- #1076 versioned refs: MCP auto-pinning --- + +function createPinningExecutor(runCalls: Array<{ name: string; input: unknown }>) { + return createCommandToolExecutor({ + createClient: () => ({}) as AgentDeviceClient, + runCommand: async (_client, name, input) => { + runCalls.push({ name, input }); + if (name === 'snapshot') { + // Issues refs e2 and e37 at generation 500012. + return { + nodes: [{ ref: 'e2' }, { ref: 'e37' }], + truncated: false, + refsGeneration: 500012, + }; + } + if (name === 'find') { + // A later find capture replaced the tree and issued ONLY e5 at 500013. + return { ref: '@e5', refsGeneration: 500013 }; + } + return { message: `Ran ${name}` }; + }, + }); +} + +test('MCP keeps per-ref provenance: a pre-find snapshot ref stays pinned to ITS generation', async () => { + // THE find-blessing scenario (#1076): snapshot issues e37 at G1, a later + // find issues e5 at G2. A plain @e37 must forward pinned to G1 — pinning it + // to G2 would make the daemon read it as current and silently re-bless it. + // (The daemon side of this flow — precise warning for the G1 pin after the + // find capture replaced the tree — is covered in the provider scenario.) + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createPinningExecutor(runCalls); + + await executor.execute('snapshot', { session: 'demo' }); + await executor.execute('find', { session: 'demo', query: 'Continue' }); + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e37' } }); + + assert.deepEqual(runCalls[2], { + name: 'press', + input: { session: 'demo', target: { kind: 'ref', ref: '@e37~s500012' } }, + }); +}); + +test('MCP pins the find-issued ref to the find generation', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createPinningExecutor(runCalls); + + await executor.execute('snapshot', { session: 'demo' }); + await executor.execute('find', { session: 'demo', query: 'Continue' }); + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e5' } }); + + assert.deepEqual(runCalls[2], { + name: 'press', + input: { session: 'demo', target: { kind: 'ref', ref: '@e5~s500013' } }, + }); +}); + +test('MCP auto-pins wait refs and get targets from the per-ref map', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createPinningExecutor(runCalls); + + await executor.execute('snapshot', { session: 'demo' }); + await executor.execute('wait', { session: 'demo', ref: '@e2' }); + await executor.execute('get', { + session: 'demo', + format: 'text', + target: { kind: 'ref', ref: '@e37' }, + }); + + assert.deepEqual(runCalls[1], { + name: 'wait', + input: { session: 'demo', ref: '@e2~s500012' }, + }); + assert.deepEqual(runCalls[2], { + name: 'get', + input: { session: 'demo', format: 'text', target: { kind: 'ref', ref: '@e37~s500012' } }, + }); +}); + +test('MCP merges digest-level snapshot refs too', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createCommandToolExecutor({ + createClient: () => ({}) as AgentDeviceClient, + runCommand: async (_client, name, input) => { + runCalls.push({ name, input }); + return name === 'snapshot' + ? { nodeCount: 1, refs: [{ ref: 'e9', label: 'Continue' }], refsGeneration: 41 } + : {}; + }, + }); + + await executor.execute('snapshot', { responseLevel: 'digest' }); + await executor.execute('press', { target: { kind: 'ref', ref: '@e9' } }); + + assert.deepEqual(runCalls[1]?.input, { target: { kind: 'ref', ref: '@e9~s41' } }); +}); + +test('MCP passes never-issued refs through unpinned (coarse floor, never guess)', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createPinningExecutor(runCalls); + + await executor.execute('snapshot', { session: 'demo' }); + // e99 was never present in any issuing response for this scope. + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e99' } }); + + assert.deepEqual(runCalls[1]?.input, { session: 'demo', target: { kind: 'ref', ref: '@e99' } }); +}); + +test('MCP passes refs through unpinned when the pin scope has no history', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createPinningExecutor(runCalls); + + // Only OTHER session names have history. + await executor.execute('snapshot', { session: 'other' }); + await executor.execute('press', { session: 'demo', target: { kind: 'ref', ref: '@e2' } }); + + assert.deepEqual(runCalls[1]?.input, { session: 'demo', target: { kind: 'ref', ref: '@e2' } }); +}); + +test('MCP pin scopes include the state dir: same-named sessions never cross-pollinate', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createPinningExecutor(runCalls); + + await executor.execute('snapshot', { session: 'demo', stateDir: '/state/a' }); + // Same session name against a DIFFERENT daemon state dir: no history there. + await executor.execute('press', { + session: 'demo', + stateDir: '/state/b', + target: { kind: 'ref', ref: '@e2' }, + }); + // The original scope still pins. + await executor.execute('press', { + session: 'demo', + stateDir: '/state/a', + target: { kind: 'ref', ref: '@e2' }, + }); + + assert.deepEqual(runCalls[1]?.input, { session: 'demo', target: { kind: 'ref', ref: '@e2' } }); + assert.deepEqual(runCalls[2]?.input, { + session: 'demo', + target: { kind: 'ref', ref: '@e2~s500012' }, + }); +}); + +test('MCP clears the whole scope when a ref-issuing response stops carrying a generation', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + let issueGeneration = true; + const executor = createCommandToolExecutor({ + createClient: () => ({}) as AgentDeviceClient, + runCommand: async (_client, name, input) => { + runCalls.push({ name, input }); + if (name === 'snapshot') { + return issueGeneration + ? { nodes: [{ ref: 'e2' }], truncated: false, refsGeneration: 4 } + : { nodes: [{ ref: 'e2' }], truncated: false }; + } + return {}; + }, + }); + + await executor.execute('snapshot', {}); + issueGeneration = false; + // An older daemon without refsGeneration: the remembered pins must not + // leak onto refs the response did not vouch for. + await executor.execute('snapshot', {}); + await executor.execute('press', { target: { kind: 'ref', ref: '@e2' } }); + + assert.deepEqual(runCalls[2], { + name: 'press', + input: { target: { kind: 'ref', ref: '@e2' } }, + }); +}); + +test('MCP never rewrites refs that already carry a suffix and never pins non-@ refs', async () => { + const runCalls: Array<{ name: string; input: unknown }> = []; + const executor = createPinningExecutor(runCalls); + + await executor.execute('snapshot', {}); + await executor.execute('press', { target: { kind: 'ref', ref: '@e2~s3' } }); + await executor.execute('press', { target: { kind: 'ref', ref: 'e2' } }); + + assert.deepEqual(runCalls[1]?.input, { target: { kind: 'ref', ref: '@e2~s3' } }); + assert.deepEqual(runCalls[2]?.input, { target: { kind: 'ref', ref: 'e2' } }); +}); + +test('MCP renders tool text from the unpinned input so the model never sees suffixes', async () => { + const executor = createCommandToolExecutor({ + createClient: () => ({}) as AgentDeviceClient, + runCommand: async (_client, name) => + name === 'snapshot' + ? { nodes: [{ ref: 'e2' }], truncated: false, refsGeneration: 9 } + : { message: 'Tapped @e2 (10, 20)' }, + }); + + await executor.execute('snapshot', {}); + const result = await executor.execute('press', { target: { kind: 'ref', ref: '@e2' } }); + + assert.doesNotMatch(result.content[0]?.text ?? '', /~s9/); +}); diff --git a/src/mcp/command-tools.ts b/src/mcp/command-tools.ts index 585d728a2..5e493a323 100644 --- a/src/mcp/command-tools.ts +++ b/src/mcp/command-tools.ts @@ -59,6 +59,9 @@ export function listCommandTools(): Array<{ } export function createCommandToolExecutor(deps: CommandToolExecutorDeps = {}): CommandToolExecutor { + // #1076 versioned refs — MCP auto-pinning state: per pin scope (state dir + + // session name), the generation each ref body was LAST ISSUED at. + const refPinsByScope = new Map>(); return { execute: async (name, input) => { if (!isCommandName(name)) { @@ -66,14 +69,19 @@ export function createCommandToolExecutor(deps: CommandToolExecutorDeps = {}): C } const config = readMcpToolConfig(input); const commandInput = stripMcpConfigFields(input); + const scopeKey = readPinScopeKey(config, commandInput); + const pinnedInput = pinPlainRefArguments(name, commandInput, refPinsByScope.get(scopeKey)); const client = await createClient(deps, config.client); - const result = await (deps.runCommand ?? runCommand)(client, name, commandInput); + const result = await (deps.runCommand ?? runCommand)(client, name, pinnedInput); + mergeIssuedRefPins(refPinsByScope, scopeKey, name, result); return { isError: false, structuredContent: result, content: [ { type: 'text', + // Render from the UNPINNED input: the model typed plain refs and + // must never see generation suffixes (zero token cost). text: renderToolText({ name, input: commandInput, @@ -88,6 +96,166 @@ export function createCommandToolExecutor(deps: CommandToolExecutorDeps = {}): C }; } +/** + * #1076 versioned refs — MCP auto-pinning. Snapshot trees and find outputs + * keep plain `e12` refs (snapshots are the most token-expensive artifact the + * model consumes); the issuing response carries the tree's generation ONCE as + * `refsGeneration`. This layer sees those responses before the model does and + * keeps PER-REF provenance: every ref present in a ref-issuing response is + * recorded at that response's generation, and refs absent from it KEEP their + * older pins. That per-ref memory is the point — after snapshot(s12) then + * find(s13), a plain `@e37` from the pre-find snapshot must still forward as + * `@e37~s12` so the daemon warns precisely; a single last-seen generation + * would silently re-bless it at s13 (the exact find-blessing hole #1076 + * describes). Refs never seen in an issuing response pass through unpinned + * (the coarse #1093 warning is the floor). The model never sees or types + * suffixes. + */ +const REF_ISSUING_TOOLS: ReadonlySet = new Set(['snapshot', 'find'] as CommandName[]); + +const TARGET_REF_TOOLS: ReadonlySet = new Set([ + 'press', + 'click', + 'fill', + 'longpress', + 'get', +] as CommandName[]); + +/** + * Bound on remembered pins per scope. Refs still alive keep getting re-merged + * at the latest generation by every snapshot, so evicting the least recently + * ISSUED pins only degrades stale-ref precision back to the coarse floor. + */ +const MAX_REF_PINS_PER_SCOPE = 1000; + +/** + * Pin scope: state dir + session name. `stateDir` is a per-tool-call MCP + * config field, so one MCP server process can serve daemons in different + * state dirs — two same-named sessions there are different sessions and must + * not cross-pollinate generations. + */ +function readPinScopeKey(config: McpToolConfig, input: unknown): string { + const record = asOptionalRecord(input); + const session = record?.session; + const sessionName = typeof session === 'string' && session.length > 0 ? session : 'default'; + // NUL separator: neither state-dir paths nor session names contain it. + return `${config.client.stateDir ?? ''}\u0000${sessionName}`; +} + +/** + * MERGE-ONLY update rule: refs present in the issuing response move to its + * generation; absent refs keep their older pins (an old pin on a replaced + * tree is exactly what makes the daemon warn). A ref-issuing response WITHOUT + * a `refsGeneration` (older daemon, find with no ref match) clears the whole + * scope — never guess. + */ +function mergeIssuedRefPins( + refPinsByScope: Map>, + scopeKey: string, + name: CommandName, + result: unknown, +): void { + if (!REF_ISSUING_TOOLS.has(name)) return; + const record = asOptionalRecord(result); + const refsGeneration = record?.refsGeneration; + if (record === undefined || typeof refsGeneration !== 'number') { + refPinsByScope.delete(scopeKey); + return; + } + const issuedRefs = readIssuedRefBodies(record); + if (issuedRefs.length === 0) return; + const pins = refPinsByScope.get(scopeKey) ?? new Map(); + refPinsByScope.set(scopeKey, pins); + recordIssuedPins(pins, issuedRefs, refsGeneration); +} + +function recordIssuedPins( + pins: Map, + issuedRefs: string[], + refsGeneration: number, +): void { + for (const ref of issuedRefs) { + // delete-then-set keeps Map insertion order = issue recency for the cap. + pins.delete(ref); + pins.set(ref, refsGeneration); + } + while (pins.size > MAX_REF_PINS_PER_SCOPE) { + const oldest = pins.keys().next().value; + if (oldest === undefined) break; + pins.delete(oldest); + } +} + +/** Ref bodies (`e12`, no `@`) issued by a snapshot/find response. */ +function readIssuedRefBodies(record: Record): string[] { + const bodies: string[] = []; + // find: the single returned ref (`@e12`). + if (typeof record.ref === 'string' && record.ref.startsWith('@')) { + bodies.push(record.ref.slice(1)); + } + // snapshot (default level): every node carries its ref. + collectRefBodies(record.nodes, bodies); + // snapshot (digest level): the capped `{ ref, label }` list. + collectRefBodies(record.refs, bodies); + return bodies; +} + +function collectRefBodies(entries: unknown, into: string[]): void { + if (!Array.isArray(entries)) return; + for (const entry of entries) { + const ref = asOptionalRecord(entry)?.ref; + if (typeof ref === 'string' && ref.length > 0) into.push(ref); + } +} + +function pinPlainRefArguments( + name: CommandName, + input: unknown, + pins: Map | undefined, +): unknown { + // No remembered pins for this scope → pass refs through unpinned. + if (pins === undefined || pins.size === 0) return input; + const record = asOptionalRecord(input); + if (!record) return input; + if (name === 'wait') return pinWaitRef(record, pins) ?? input; + if (TARGET_REF_TOOLS.has(name)) return pinTargetRef(record, pins) ?? input; + return input; +} + +function pinWaitRef( + record: Record, + pins: Map, +): Record | undefined { + if (typeof record.ref !== 'string') return undefined; + const pinned = pinRef(record.ref, pins); + return pinned === record.ref ? undefined : { ...record, ref: pinned }; +} + +function pinTargetRef( + record: Record, + pins: Map, +): Record | undefined { + const target = asOptionalRecord(record.target); + if (target?.kind !== 'ref' || typeof target.ref !== 'string') return undefined; + const pinned = pinRef(target.ref, pins); + return pinned === target.ref ? undefined : { ...record, target: { ...target, ref: pinned } }; +} + +function pinRef(ref: string, pins: Map): string { + // Only pin the canonical plain form `@e12`: an existing `~` means the ref is + // already pinned (or malformed — the daemon owns rejecting that), and a + // missing `@` prefix is not a ref the daemon would accept anyway. Refs with + // no recorded provenance pass through unpinned — never guess. + if (!ref.startsWith('@') || ref.includes('~')) return ref; + const generation = pins.get(ref.slice(1)); + return generation === undefined ? ref : `${ref}~s${generation}`; +} + +function asOptionalRecord(value: unknown): Record | undefined { + if (!value || typeof value !== 'object' || Array.isArray(value)) return undefined; + return value as Record; +} + export const commandToolExecutor = createCommandToolExecutor(); async function createClient( diff --git a/src/replay/__tests__/script.test.ts b/src/replay/__tests__/script.test.ts index 0a9d50a39..6a0b9f14f 100644 --- a/src/replay/__tests__/script.test.ts +++ b/src/replay/__tests__/script.test.ts @@ -342,3 +342,25 @@ test('readReplayScriptMetadata rejects conflicting metadata keys in context head /Conflicting replay test metadata "timeoutMs"/.test(error.message), ); }); + +test('replay parsing strips versioned-ref pins from recorded refs (#1076)', () => { + // Generations are session-scoped; a replayed script runs against a NEW + // session, so pins are stripped and IGNORED rather than re-validated. + const script = [ + 'context platform=android device=Pixel', + 'press @e2~s3 Continue', + 'fill @e4~s3 Email hello@example.com', + 'get text @e5~s3 Title', + 'wait @e2~s3 5000', + 'longpress @e2~s3 800', + ].join('\n'); + + const { actions } = parseReplayScriptDetailed(script); + assert.deepEqual( + actions.map((action) => action.positionals), + [['@e2'], ['@e4', 'hello@example.com'], ['text', '@e5'], ['@e2', '5000'], ['@e2', '800']], + ); + // Malformed pins were never minted by us — left for the daemon to reject. + const malformed = parseReplayScriptDetailed('press @e2~x3').actions[0]; + assert.deepEqual(malformed?.positionals, ['@e2~x3']); +}); diff --git a/src/replay/script-utils.ts b/src/replay/script-utils.ts index 4551a8b05..953731151 100644 --- a/src/replay/script-utils.ts +++ b/src/replay/script-utils.ts @@ -1,5 +1,22 @@ import type { SessionAction } from '../daemon/types.ts'; import { appendScreenshotScriptFlags } from '../contracts/screenshot.ts'; +import { splitRefGenerationSuffix } from '../kernel/snapshot.ts'; + +/** + * #1076 versioned refs: a recorded ref positional may carry a `~s` + * pin from the client that issued it (`@e12~s3`). Generations are meaningless + * outside the session that minted them — a replayed script runs against a NEW + * session with its own generation counter — so replay parsing and script + * writing strip well-formed suffixes and IGNORE the generation instead of + * re-validating it (which would only produce spurious staleness warnings). + * Malformed suffixes are left untouched; they were never minted by us and the + * daemon owns rejecting them. + */ +export function stripRecordedRefGeneration(token: string): string { + if (!token.startsWith('@')) return token; + const split = splitRefGenerationSuffix(token); + return split?.base ?? token; +} const NUMERIC_ARG_RE = /^-?\d+(\.\d+)?$/; const BARE_SCRIPT_TOKEN_RE = /^[^\s"\\]+$/; @@ -180,7 +197,13 @@ export function appendRuntimeActionScriptArgs( export function appendGenericActionScriptArgs(parts: string[], action: SessionAction): void { for (const positional of action.positionals ?? []) { - parts.push(formatScriptArg(positional)); + // wait @ref: recorded refs may carry a `~s` pin (#1076); + // scripts store the plain ref (see stripRecordedRefGeneration). + parts.push( + formatScriptArg( + action.command === 'wait' ? stripRecordedRefGeneration(positional) : positional, + ), + ); } appendScriptSeriesFlags(parts, action); } diff --git a/src/replay/script.ts b/src/replay/script.ts index afdc38e64..e9feb4e4d 100644 --- a/src/replay/script.ts +++ b/src/replay/script.ts @@ -12,6 +12,7 @@ import { isClickLikeCommand, parseReplaySeriesFlags, parseReplayRuntimeFlags, + stripRecordedRefGeneration, } from './script-utils.ts'; import { REPLAY_VAR_KEY_RE } from './vars.ts'; @@ -261,7 +262,9 @@ function parseReplayScriptLine(line: string): SessionAction | null { const target = parsed.positionals[0]; if (target === undefined) return action; if (target.startsWith('@')) { - action.positionals = [target]; + // Recorded refs may carry a `~s` pin — strip and IGNORE it + // (see stripRecordedRefGeneration: generations are session-scoped). + action.positionals = [stripRecordedRefGeneration(target)]; if (parsed.positionals[1]) { action.result = { refLabel: parsed.positionals[1] }; } @@ -286,12 +289,13 @@ function parseReplayScriptLine(line: string): SessionAction | null { } const [target, text, ...textRest] = parsed.positionals; if (target.startsWith('@')) { + const ref = stripRecordedRefGeneration(target); if (textRest.length > 0) { - action.positionals = [target, textRest.join(' ')]; + action.positionals = [ref, textRest.join(' ')]; action.result = { refLabel: text }; return action; } - action.positionals = [target, text]; + action.positionals = [ref, text]; return action; } action.positionals = [target, [text, ...textRest].join(' ')]; @@ -306,7 +310,7 @@ function parseReplayScriptLine(line: string): SessionAction | null { return action; } if (target.startsWith('@')) { - action.positionals = [sub, target]; + action.positionals = [sub, stripRecordedRefGeneration(target)]; if (args[2]) { action.result = { refLabel: args[2] }; } @@ -377,7 +381,12 @@ function parseReplayScriptLine(line: string): SessionAction | null { return action; } - action.positionals = args; + // wait @ref [timeout] and longpress @ref [durationMs] flow through this + // generic branch: strip recorded generation pins like the branches above. + action.positionals = + command === 'wait' || command === 'longpress' + ? args.map((token) => stripRecordedRefGeneration(token)) + : args; return action; } diff --git a/test/integration/provider-scenarios/versioned-refs.test.ts b/test/integration/provider-scenarios/versioned-refs.test.ts new file mode 100644 index 000000000..0b6a2ab91 --- /dev/null +++ b/test/integration/provider-scenarios/versioned-refs.test.ts @@ -0,0 +1,160 @@ +import assert from 'node:assert/strict'; +import { test } from 'vitest'; +import { assertRpcOk } from './assertions.ts'; +import { PROVIDER_SCENARIO_IOS_SIMULATOR } from './fixtures.ts'; +import { createProviderScenarioHarness, withProviderScenarioResource } from './harness.ts'; +import { + createAppleRunnerProviderFromTranscript, + createRecordingAppleToolProvider, + simctlListDevicesHandler, +} from './providers.ts'; +import { createProviderTranscript, type ProviderScenarioProviderEntry } from './transcript.ts'; + +const APP = 'com.example.app'; +const DEVICE_ID = PROVIDER_SCENARIO_IOS_SIMULATOR.id; + +// #1076 versioned refs: ref-issuing responses carry the session tree's +// generation once (`refsGeneration`); a consumer may pin refs as `@e2~s`. +// A pinned ref matching the stored generation is clean; a pinned ref from an +// older generation gets a PRECISE warning naming both generations — including +// after a later find issued a NEWER generation (the find-blessing hole this +// feature closes). The tree output itself stays plain `e2` refs, and the +// generation values are seeded per session lifetime, so every assertion below +// is relative to the observed seed. +const NODES = [ + { + index: 0, + type: 'Application', + label: 'Example', + rect: { x: 0, y: 0, width: 400, height: 800 }, + }, + { + index: 1, + parentIndex: 0, + type: 'Button', + label: 'Continue', + hittable: true, + rect: { x: 100, y: 300, width: 200, height: 44 }, + }, + { + index: 2, + parentIndex: 0, + type: 'Button', + label: 'Cancel', + hittable: true, + rect: { x: 100, y: 400, width: 200, height: 44 }, + }, +]; + +function snapshotEntry(): ProviderScenarioProviderEntry { + return { + command: 'ios.runner.snapshot', + deviceId: DEVICE_ID, + platform: 'apple', + result: { nodes: NODES, truncated: false }, + }; +} + +function tapEntry(x: number, y: number): ProviderScenarioProviderEntry { + return { + command: 'ios.runner.tap', + deviceId: DEVICE_ID, + platform: 'apple', + result: { x, y }, + }; +} + +function pinnedStaleWarning(ref: string, minted: number, current: number): string { + return `Ref ${ref} was minted from snapshot s${minted} but the session tree is now s${current} — re-run snapshot -i.`; +} + +test('Provider-backed integration pinned @refs get precise generation warnings', async () => { + const runnerTranscript = createProviderTranscript([ + // snapshot -i: issues refs at the seeded generation g1 + snapshotEntry(), + // press label=Continue: selector resolution capture replaces the stored + // tree (g1+1) without issuing refs + snapshotEntry(), + tapEntry(200, 322), + // press @e2~s{g1}: pinned to the outlived generation — executes, warns precisely + tapEntry(200, 422), + // press @e2~s{g1+1}: pinned to the CURRENT generation — clean + tapEntry(200, 422), + // find Cancel click: capture replaces the tree AGAIN (g1+2) and issues + // only the found ref at the new generation + snapshotEntry(), + tapEntry(200, 422), + // press @e1~s{g1+1}: a PRE-find pin — the find must not bless it (the + // #1076 hole): precise warning naming g1+1 → g1+2 + tapEntry(200, 322), + // press @e1~s{g1+2}: pinned to the post-find generation — clean + tapEntry(200, 322), + ]); + const appleRunnerProvider = createAppleRunnerProviderFromTranscript( + runnerTranscript, + 'ios.runner', + ); + const appleTool = createRecordingAppleToolProvider({ + simctl: simctlListDevicesHandler('com.apple.CoreSimulator.SimRuntime.iOS-18-0', [ + { name: PROVIDER_SCENARIO_IOS_SIMULATOR.name, udid: DEVICE_ID }, + ]), + }); + + await withProviderScenarioResource( + async () => + await createProviderScenarioHarness({ + appleRunnerProvider: () => appleRunnerProvider, + appleToolProvider: () => appleTool.provider, + deviceInventoryProvider: async () => [PROVIDER_SCENARIO_IOS_SIMULATOR], + }), + async (daemon) => { + const open = await daemon.callCommand('open', [APP], { + platform: 'ios', + udid: DEVICE_ID, + }); + assertRpcOk(open); + + const snapshot = await daemon.callCommand('snapshot', [], { + snapshotInteractiveOnly: true, + }); + const snapshotData = assertRpcOk(snapshot); + // Ref-issuing response reports the (seeded) generation ONCE; nodes stay + // plain refs — no per-node token growth. + assert.equal(typeof snapshotData.refsGeneration, 'number'); + const g1 = snapshotData.refsGeneration as number; + const nodes = snapshotData.nodes as Array<{ ref?: string }>; + assert.ok(nodes.length > 0); + assert.ok(nodes.every((node) => node.ref === undefined || !node.ref.includes('~'))); + + const selectorPress = await daemon.callCommand('press', ['label=Continue'], {}); + const selectorData = assertRpcOk(selectorPress); + assert.equal(selectorData.warning, undefined); + + const pinnedStale = await daemon.callCommand('press', [`@e2~s${g1}`], {}); + const pinnedStaleData = assertRpcOk(pinnedStale); + assert.equal(pinnedStaleData.warning, pinnedStaleWarning('@e2', g1, g1 + 1)); + + const pinnedCurrent = await daemon.callCommand('press', [`@e2~s${g1 + 1}`], {}); + const pinnedCurrentData = assertRpcOk(pinnedCurrent); + assert.equal(pinnedCurrentData.warning, undefined); + + // The blessing flow: find replaces the tree and issues its ref at the + // NEW generation… + const find = await daemon.callCommand('find', ['Cancel', 'click'], {}); + const findData = assertRpcOk(find); + assert.equal(findData.refsGeneration, g1 + 2); + + // …but a ref pinned BEFORE the find keeps warning precisely — the find + // response must not silently re-bless it. + const preFindPin = await daemon.callCommand('press', [`@e1~s${g1 + 1}`], {}); + const preFindPinData = assertRpcOk(preFindPin); + assert.equal(preFindPinData.warning, pinnedStaleWarning('@e1', g1 + 1, g1 + 2)); + + const postFindPin = await daemon.callCommand('press', [`@e1~s${g1 + 2}`], {}); + const postFindPinData = assertRpcOk(postFindPin); + assert.equal(postFindPinData.warning, undefined); + + runnerTranscript.assertComplete(); + }, + ); +});