Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions docs/tool-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
**Parameters:**

- **dblClick** (boolean) _(optional)_: Set to true for double clicks. Default is false.
- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot

---

Expand All @@ -53,8 +53,8 @@

**Parameters:**

- **from_uid** (number) **(required)**: The uid of the element to [`drag`](#drag)
- **to_uid** (number) **(required)**: The uid of the element to drop into
- **from_uid** (string) **(required)**: The uid of the element to [`drag`](#drag)
- **to_uid** (string) **(required)**: The uid of the element to drop into

---

Expand All @@ -64,7 +64,7 @@

**Parameters:**

- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
- **value** (string) **(required)**: The value to [`fill`](#fill) in

---
Expand Down Expand Up @@ -96,7 +96,7 @@

**Parameters:**

- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot

---

Expand All @@ -107,7 +107,7 @@
**Parameters:**

- **filePath** (string) **(required)**: The local path of the file to upload
- **uid** (number) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot
- **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot

---

Expand Down Expand Up @@ -283,7 +283,7 @@

- **format** (enum: "png", "jpeg") _(optional)_: Type of format to save the screenshot as. Default is "png"
- **fullPage** (boolean) _(optional)_: If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.
- **uid** (number) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot.
- **uid** (string) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot.

---

Expand Down
47 changes: 33 additions & 14 deletions src/McpContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@ import path from 'node:path';
import {listPages} from './tools/pages.js';

export interface TextSnapshotNode extends SerializedAXNode {
id: number;
id: string;
children: TextSnapshotNode[];
}

export interface TextSnapshot {
root: TextSnapshotNode;
idToNode: Map<string, TextSnapshotNode>;
snapshotId: string;
}

export class McpContext implements Context {
browser: Browser;
logger: Debugger;
Expand All @@ -33,8 +39,7 @@ export class McpContext implements Context {
#pages: Page[] = [];
#selectedPageIdx = 0;
// The most recent snapshot.
#textSnapshot: TextSnapshotNode | null = null;
#idToNodeMap = new Map<number, TextSnapshotNode>();
#textSnapshot: TextSnapshot | null = null;
#networkCollector: NetworkCollector;
#consoleCollector: PageCollector<ConsoleMessage | Error>;

Expand All @@ -43,6 +48,8 @@ export class McpContext implements Context {
#cpuThrottlingRate = 1;
#dialog?: Dialog;

#nextSnapshotId = 1;

private constructor(browser: Browser, logger: Debugger) {
this.browser = browser;
this.logger = logger;
Expand Down Expand Up @@ -192,11 +199,19 @@ export class McpContext implements Context {
newPage.setDefaultNavigationTimeout(10_000);
}

async getElementByUid(uid: number): Promise<ElementHandle<Element>> {
if (!this.#idToNodeMap.size) {
async getElementByUid(uid: string): Promise<ElementHandle<Element>> {
if (!this.#textSnapshot?.idToNode.size) {
throw new Error('No snapshot found. Use browser_snapshot to capture one');
}
const node = this.#idToNodeMap.get(uid);
const [snapshotId] = uid.split('_');

if (this.#textSnapshot.snapshotId !== snapshotId) {
throw new Error(
'This uid is coming from a stale snapshot. Call take_snapshot to get a fresh snapshot.',
);
}

const node = this.#textSnapshot?.idToNode.get(uid);
if (!node) {
throw new Error('No such element found in the snapshot');
}
Expand All @@ -222,35 +237,39 @@ export class McpContext implements Context {
/**
* Creates a text snapshot of a page.
*/
async createTextSnapshot(): Promise<TextSnapshotNode | null> {
async createTextSnapshot(): Promise<void> {
const page = this.getSelectedPage();
const rootNode = await page.accessibility.snapshot();
if (!rootNode) {
return null;
return;
}

const snapshotId = this.#nextSnapshotId++;
// Iterate through the whole accessibility node tree and assign node ids that
// will be used for the tree serialization and mapping ids back to nodes.
let idCounter = 0;
this.#idToNodeMap.clear();
const idToNode = new Map<string, TextSnapshotNode>();
const assignIds = (node: SerializedAXNode): TextSnapshotNode => {
const nodeWithId: TextSnapshotNode = {
...node,
id: idCounter++,
id: `${snapshotId}_${idCounter++}`,
children: node.children
? node.children.map(child => assignIds(child))
: [],
};
this.#idToNodeMap.set(nodeWithId.id, nodeWithId);
idToNode.set(nodeWithId.id, nodeWithId);
return nodeWithId;
};

const rootNodeWithId = assignIds(rootNode);
this.#textSnapshot = rootNodeWithId;
return rootNodeWithId;
this.#textSnapshot = {
root: rootNodeWithId,
snapshotId: String(snapshotId),
idToNode,
};
}

getTextSnapshot(): TextSnapshotNode | null {
getTextSnapshot(): TextSnapshot | null {
return this.#textSnapshot;
}

Expand Down
6 changes: 3 additions & 3 deletions src/McpResponse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ Call browser_handle_dialog to handle it before continuing.`);
}

if (this.#includeSnapshot) {
const rootNode = context.getTextSnapshot();
if (rootNode) {
const formattedSnapshot = formatA11ySnapshot(rootNode);
const snapshot = context.getTextSnapshot();
if (snapshot) {
const formattedSnapshot = formatA11ySnapshot(snapshot.root);
response.push('## Page content');
response.push(formattedSnapshot);
}
Expand Down
2 changes: 1 addition & 1 deletion src/tools/ToolDefinition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export type Context = Readonly<{
getPageByIdx(idx: number): Page;
newPage(): Promise<Page>;
setSelectedPageIdx(idx: number): void;
getElementByUid(uid: number): Promise<ElementHandle<Element>>;
getElementByUid(uid: string): Promise<ElementHandle<Element>>;
setNetworkConditions(conditions: string | null): void;
setCpuThrottlingRate(rate: number): void;
saveTemporaryFile(
Expand Down
14 changes: 7 additions & 7 deletions src/tools/input.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export const click = defineTool({
},
schema: {
uid: z
.number()
.string()
.describe(
'The uid of an element on the page from the page content snapshot',
),
Expand Down Expand Up @@ -58,7 +58,7 @@ export const hover = defineTool({
},
schema: {
uid: z
.number()
.string()
.describe(
'The uid of an element on the page from the page content snapshot',
),
Expand Down Expand Up @@ -87,7 +87,7 @@ export const fill = defineTool({
},
schema: {
uid: z
.number()
.string()
.describe(
'The uid of an element on the page from the page content snapshot',
),
Expand Down Expand Up @@ -115,8 +115,8 @@ export const drag = defineTool({
readOnlyHint: false,
},
schema: {
from_uid: z.number().describe('The uid of the element to drag'),
to_uid: z.number().describe('The uid of the element to drop into'),
from_uid: z.string().describe('The uid of the element to drag'),
to_uid: z.string().describe('The uid of the element to drop into'),
},
handler: async (request, response, context) => {
const fromHandle = await context.getElementByUid(request.params.from_uid);
Expand Down Expand Up @@ -147,7 +147,7 @@ export const fillForm = defineTool({
elements: z
.array(
z.object({
uid: z.number().describe('The uid of the element to fill out'),
uid: z.string().describe('The uid of the element to fill out'),
value: z.string().describe('Value for the element'),
}),
)
Expand Down Expand Up @@ -178,7 +178,7 @@ export const uploadFile = defineTool({
},
schema: {
uid: z
.number()
.string()
.describe(
'The uid of the file input element or an element that will open file chooser on the page from the page content snapshot',
),
Expand Down
2 changes: 1 addition & 1 deletion src/tools/screenshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export const screenshot = defineTool({
.default('png')
.describe('Type of format to save the screenshot as. Default is "png"'),
uid: z
.number()
.string()
.optional()
.describe(
'The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot.',
Expand Down
31 changes: 31 additions & 0 deletions tests/McpContext.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import {describe, it} from 'node:test';
import assert from 'assert';

import {withBrowser} from './utils.js';

describe('McpResponse', () => {
it('list pages', async () => {
await withBrowser(async (response, context) => {
const page = context.getSelectedPage();
await page.setContent(`<!DOCTYPE html>
<button>Click me</button><input type="text" value="Input">`);
await context.createTextSnapshot();
assert.ok(await context.getElementByUid('1_1'));
await context.createTextSnapshot();
try {
await context.getElementByUid('1_1');
assert.fail('not reached');
} catch (err) {
assert.strict(
err.message,
'This uid is coming from a stale snapshot. Call take_snapshot to get a fresh snapshot',
);
}
});
});
});
12 changes: 6 additions & 6 deletions tests/McpResponse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ Testing 2`,
result[0].text,
`# test response
## Page content
uid=0 RootWebArea ""
uid=1 button "Click me" focusable focused
uid=2 textbox "" value="Input"
uid=1_0 RootWebArea ""
uid=1_1 button "Click me" focusable focused
uid=1_2 textbox "" value="Input"
`,
);
});
Expand All @@ -87,9 +87,9 @@ uid=0 RootWebArea ""
result[0].text,
`# test response
## Page content
uid=0 RootWebArea "My test page"
uid=1 StaticText "username"
uid=2 textbox "username" value="mcp" focusable focused
uid=1_0 RootWebArea "My test page"
uid=1_1 StaticText "username"
uid=1_2 textbox "username" value="mcp" focusable focused
`,
);
});
Expand Down
Loading
Loading