diff --git a/docs/tool-reference.md b/docs/tool-reference.md index 4dc46392a..eb73a5274 100644 --- a/docs/tool-reference.md +++ b/docs/tool-reference.md @@ -43,7 +43,7 @@ **Parameters:** - **dblClick** (boolean) _(optional)_: Set to true for double clicks. Default is false. -- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot +- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot --- @@ -53,8 +53,8 @@ **Parameters:** -- **from_uid** (number) **(required)**: The uid of the element to [`drag`](#drag) -- **to_uid** (number) **(required)**: The uid of the element to drop into +- **from_uid** (string) **(required)**: The uid of the element to [`drag`](#drag) +- **to_uid** (string) **(required)**: The uid of the element to drop into --- @@ -64,7 +64,7 @@ **Parameters:** -- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot +- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot - **value** (string) **(required)**: The value to [`fill`](#fill) in --- @@ -96,7 +96,7 @@ **Parameters:** -- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot +- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot --- @@ -107,7 +107,7 @@ **Parameters:** - **filePath** (string) **(required)**: The local path of the file to upload -- **uid** (number) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot +- **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot --- @@ -283,7 +283,7 @@ - **format** (enum: "png", "jpeg") _(optional)_: Type of format to save the screenshot as. Default is "png" - **fullPage** (boolean) _(optional)_: If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid. -- **uid** (number) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot. +- **uid** (string) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot. --- diff --git a/src/McpContext.ts b/src/McpContext.ts index 7fb689290..db4561911 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -21,10 +21,16 @@ import path from 'node:path'; import {listPages} from './tools/pages.js'; export interface TextSnapshotNode extends SerializedAXNode { - id: number; + id: string; children: TextSnapshotNode[]; } +export interface TextSnapshot { + root: TextSnapshotNode; + idToNode: Map; + snapshotId: string; +} + export class McpContext implements Context { browser: Browser; logger: Debugger; @@ -33,8 +39,7 @@ export class McpContext implements Context { #pages: Page[] = []; #selectedPageIdx = 0; // The most recent snapshot. - #textSnapshot: TextSnapshotNode | null = null; - #idToNodeMap = new Map(); + #textSnapshot: TextSnapshot | null = null; #networkCollector: NetworkCollector; #consoleCollector: PageCollector; @@ -43,6 +48,8 @@ export class McpContext implements Context { #cpuThrottlingRate = 1; #dialog?: Dialog; + #nextSnapshotId = 1; + private constructor(browser: Browser, logger: Debugger) { this.browser = browser; this.logger = logger; @@ -192,11 +199,19 @@ export class McpContext implements Context { newPage.setDefaultNavigationTimeout(10_000); } - async getElementByUid(uid: number): Promise> { - if (!this.#idToNodeMap.size) { + async getElementByUid(uid: string): Promise> { + if (!this.#textSnapshot?.idToNode.size) { throw new Error('No snapshot found. Use browser_snapshot to capture one'); } - const node = this.#idToNodeMap.get(uid); + const [snapshotId] = uid.split('_'); + + if (this.#textSnapshot.snapshotId !== snapshotId) { + throw new Error( + 'This uid is coming from a stale snapshot. Call take_snapshot to get a fresh snapshot.', + ); + } + + const node = this.#textSnapshot?.idToNode.get(uid); if (!node) { throw new Error('No such element found in the snapshot'); } @@ -222,35 +237,39 @@ export class McpContext implements Context { /** * Creates a text snapshot of a page. */ - async createTextSnapshot(): Promise { + async createTextSnapshot(): Promise { const page = this.getSelectedPage(); const rootNode = await page.accessibility.snapshot(); if (!rootNode) { - return null; + return; } + const snapshotId = this.#nextSnapshotId++; // Iterate through the whole accessibility node tree and assign node ids that // will be used for the tree serialization and mapping ids back to nodes. let idCounter = 0; - this.#idToNodeMap.clear(); + const idToNode = new Map(); const assignIds = (node: SerializedAXNode): TextSnapshotNode => { const nodeWithId: TextSnapshotNode = { ...node, - id: idCounter++, + id: `${snapshotId}_${idCounter++}`, children: node.children ? node.children.map(child => assignIds(child)) : [], }; - this.#idToNodeMap.set(nodeWithId.id, nodeWithId); + idToNode.set(nodeWithId.id, nodeWithId); return nodeWithId; }; const rootNodeWithId = assignIds(rootNode); - this.#textSnapshot = rootNodeWithId; - return rootNodeWithId; + this.#textSnapshot = { + root: rootNodeWithId, + snapshotId: String(snapshotId), + idToNode, + }; } - getTextSnapshot(): TextSnapshotNode | null { + getTextSnapshot(): TextSnapshot | null { return this.#textSnapshot; } diff --git a/src/McpResponse.ts b/src/McpResponse.ts index 49242fabb..e37d14f58 100644 --- a/src/McpResponse.ts +++ b/src/McpResponse.ts @@ -145,9 +145,9 @@ Call browser_handle_dialog to handle it before continuing.`); } if (this.#includeSnapshot) { - const rootNode = context.getTextSnapshot(); - if (rootNode) { - const formattedSnapshot = formatA11ySnapshot(rootNode); + const snapshot = context.getTextSnapshot(); + if (snapshot) { + const formattedSnapshot = formatA11ySnapshot(snapshot.root); response.push('## Page content'); response.push(formattedSnapshot); } diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index 2da2abdd3..f141bd033 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -60,7 +60,7 @@ export type Context = Readonly<{ getPageByIdx(idx: number): Page; newPage(): Promise; setSelectedPageIdx(idx: number): void; - getElementByUid(uid: number): Promise>; + getElementByUid(uid: string): Promise>; setNetworkConditions(conditions: string | null): void; setCpuThrottlingRate(rate: number): void; saveTemporaryFile( diff --git a/src/tools/input.ts b/src/tools/input.ts index b662dc6b2..330de9d00 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -19,7 +19,7 @@ export const click = defineTool({ }, schema: { uid: z - .number() + .string() .describe( 'The uid of an element on the page from the page content snapshot', ), @@ -58,7 +58,7 @@ export const hover = defineTool({ }, schema: { uid: z - .number() + .string() .describe( 'The uid of an element on the page from the page content snapshot', ), @@ -87,7 +87,7 @@ export const fill = defineTool({ }, schema: { uid: z - .number() + .string() .describe( 'The uid of an element on the page from the page content snapshot', ), @@ -115,8 +115,8 @@ export const drag = defineTool({ readOnlyHint: false, }, schema: { - from_uid: z.number().describe('The uid of the element to drag'), - to_uid: z.number().describe('The uid of the element to drop into'), + from_uid: z.string().describe('The uid of the element to drag'), + to_uid: z.string().describe('The uid of the element to drop into'), }, handler: async (request, response, context) => { const fromHandle = await context.getElementByUid(request.params.from_uid); @@ -147,7 +147,7 @@ export const fillForm = defineTool({ elements: z .array( z.object({ - uid: z.number().describe('The uid of the element to fill out'), + uid: z.string().describe('The uid of the element to fill out'), value: z.string().describe('Value for the element'), }), ) @@ -178,7 +178,7 @@ export const uploadFile = defineTool({ }, schema: { uid: z - .number() + .string() .describe( 'The uid of the file input element or an element that will open file chooser on the page from the page content snapshot', ), diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index fb704dbdf..aa5dce26d 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -22,7 +22,7 @@ export const screenshot = defineTool({ .default('png') .describe('Type of format to save the screenshot as. Default is "png"'), uid: z - .number() + .string() .optional() .describe( 'The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot.', diff --git a/tests/McpContext.test.ts b/tests/McpContext.test.ts new file mode 100644 index 000000000..37ec3450b --- /dev/null +++ b/tests/McpContext.test.ts @@ -0,0 +1,31 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import {describe, it} from 'node:test'; +import assert from 'assert'; + +import {withBrowser} from './utils.js'; + +describe('McpResponse', () => { + it('list pages', async () => { + await withBrowser(async (response, context) => { + const page = context.getSelectedPage(); + await page.setContent(` +`); + await context.createTextSnapshot(); + assert.ok(await context.getElementByUid('1_1')); + await context.createTextSnapshot(); + try { + await context.getElementByUid('1_1'); + assert.fail('not reached'); + } catch (err) { + assert.strict( + err.message, + 'This uid is coming from a stale snapshot. Call take_snapshot to get a fresh snapshot', + ); + } + }); + }); +}); diff --git a/tests/McpResponse.test.ts b/tests/McpResponse.test.ts index b3705a408..1a89f9815 100644 --- a/tests/McpResponse.test.ts +++ b/tests/McpResponse.test.ts @@ -61,9 +61,9 @@ Testing 2`, result[0].text, `# test response ## Page content -uid=0 RootWebArea "" - uid=1 button "Click me" focusable focused - uid=2 textbox "" value="Input" +uid=1_0 RootWebArea "" + uid=1_1 button "Click me" focusable focused + uid=1_2 textbox "" value="Input" `, ); }); @@ -87,9 +87,9 @@ uid=0 RootWebArea "" result[0].text, `# test response ## Page content -uid=0 RootWebArea "My test page" - uid=1 StaticText "username" - uid=2 textbox "username" value="mcp" focusable focused +uid=1_0 RootWebArea "My test page" + uid=1_1 StaticText "username" + uid=1_2 textbox "username" value="mcp" focusable focused `, ); }); diff --git a/tests/formatters/snapshotFormatter.test.ts b/tests/formatters/snapshotFormatter.test.ts index 57c07cb83..5a4ec6607 100644 --- a/tests/formatters/snapshotFormatter.test.ts +++ b/tests/formatters/snapshotFormatter.test.ts @@ -13,13 +13,13 @@ import type {ElementHandle} from 'puppeteer-core'; describe('snapshotFormatter', () => { it('formats a snapshot with value properties', () => { const snapshot: TextSnapshotNode = { - id: 1, + id: '1_1', role: 'textbox', name: 'textbox', value: 'value', children: [ { - id: 2, + id: '1_2', role: 'statictext', name: 'text', children: [], @@ -36,21 +36,21 @@ describe('snapshotFormatter', () => { const formatted = formatA11ySnapshot(snapshot); assert.strictEqual( formatted, - `uid=1 textbox "textbox" value="value" - uid=2 statictext "text" + `uid=1_1 textbox "textbox" value="value" + uid=1_2 statictext "text" `, ); }); it('formats a snapshot with boolean properties', () => { const snapshot: TextSnapshotNode = { - id: 1, + id: '1_1', role: 'button', name: 'button', disabled: true, children: [ { - id: 2, + id: '1_2', role: 'statictext', name: 'text', children: [], @@ -67,21 +67,21 @@ describe('snapshotFormatter', () => { const formatted = formatA11ySnapshot(snapshot); assert.strictEqual( formatted, - `uid=1 button "button" disableable disabled - uid=2 statictext "text" + `uid=1_1 button "button" disableable disabled + uid=1_2 statictext "text" `, ); }); it('formats a snapshot with checked properties', () => { const snapshot: TextSnapshotNode = { - id: 1, + id: '1_1', role: 'checkbox', name: 'checkbox', checked: true, children: [ { - id: 2, + id: '1_2', role: 'statictext', name: 'text', children: [], @@ -98,20 +98,20 @@ describe('snapshotFormatter', () => { const formatted = formatA11ySnapshot(snapshot); assert.strictEqual( formatted, - `uid=1 checkbox "checkbox" checked checked="true" - uid=2 statictext "text" + `uid=1_1 checkbox "checkbox" checked checked="true" + uid=1_2 statictext "text" `, ); }); it('formats a snapshot with multiple different type attributes', () => { const snapshot: TextSnapshotNode = { - id: 1, + id: '1_1', role: 'root', name: 'root', children: [ { - id: 2, + id: '1_2', role: 'button', name: 'button', focused: true, @@ -122,7 +122,7 @@ describe('snapshotFormatter', () => { }, }, { - id: 3, + id: '1_3', role: 'textbox', name: 'textbox', value: 'value', @@ -140,9 +140,9 @@ describe('snapshotFormatter', () => { const formatted = formatA11ySnapshot(snapshot); assert.strictEqual( formatted, - `uid=1 root "root" - uid=2 button "button" disableable disabled focusable focused - uid=3 textbox "textbox" value="value" + `uid=1_1 root "root" + uid=1_2 button "button" disableable disabled focusable focused + uid=1_3 textbox "textbox" value="value" `, ); }); diff --git a/tests/tools/input.test.ts b/tests/tools/input.test.ts index 32cec9aa8..79bbe3992 100644 --- a/tests/tools/input.test.ts +++ b/tests/tools/input.test.ts @@ -34,7 +34,7 @@ describe('input', () => { await click.handler( { params: { - uid: 1, + uid: '1_1', }, }, response, @@ -58,7 +58,7 @@ describe('input', () => { await click.handler( { params: { - uid: 1, + uid: '1_1', dblClick: true, }, }, @@ -92,7 +92,7 @@ describe('input', () => { const clickPromise = click.handler( { params: { - uid: 1, + uid: '1_1', }, }, response, @@ -135,7 +135,7 @@ describe('input', () => { .handler( { params: { - uid: 1, + uid: '1_1', }, }, response, @@ -163,7 +163,7 @@ describe('input', () => { await hover.handler( { params: { - uid: 1, + uid: '1_1', }, }, response, @@ -188,7 +188,7 @@ describe('input', () => { await fill.handler( { params: { - uid: 1, + uid: '1_1', value: 'test', }, }, @@ -232,8 +232,8 @@ describe('input', () => { await drag.handler( { params: { - from_uid: 1, - to_uid: 2, + from_uid: '1_1', + to_uid: '1_2', }, }, response, @@ -265,11 +265,11 @@ describe('input', () => { params: { elements: [ { - uid: 2, + uid: '1_2', value: 'test', }, { - uid: 4, + uid: '1_4', value: 'test2', }, ], @@ -313,7 +313,7 @@ describe('input', () => { await uploadFile.handler( { params: { - uid: 1, + uid: '1_1', filePath: testFilePath, }, }, @@ -348,7 +348,7 @@ describe('input', () => { await uploadFile.handler( { params: { - uid: 1, + uid: '1_1', filePath: testFilePath, }, }, @@ -383,7 +383,7 @@ describe('input', () => { uploadFile.handler( { params: { - uid: 1, + uid: '1_1', filePath: testFilePath, }, }, diff --git a/tests/tools/screenshot.test.ts b/tests/tools/screenshot.test.ts index d70fb07b2..ec018227f 100644 --- a/tests/tools/screenshot.test.ts +++ b/tests/tools/screenshot.test.ts @@ -94,7 +94,7 @@ describe('screenshot', () => { { params: { format: 'png', - uid: 1, + uid: '1_1', }, }, response, @@ -105,7 +105,7 @@ describe('screenshot', () => { assert.equal(response.images[0].mimeType, 'image/png'); assert.equal( response.responseLines.at(0), - 'Took a screenshot of node with uid "1".', + 'Took a screenshot of node with uid "1_1".', ); }); });