From b8db132d75d495cf39dc0d7fb9d20b349a7d62c4 Mon Sep 17 00:00:00 2001 From: Stanislav Publika <10758542+passtas@users.noreply.github.com> Date: Mon, 23 Feb 2026 10:16:02 +0000 Subject: [PATCH 1/5] feat: add isolatedContext routing for parallel agent page resolution Add optional `isolatedContext` parameter to all page-dependent tools so parallel agents can resolve pages by context name instead of relying on the global selected-page pointer. When an agent creates a page with `new_page(isolatedContext: "my-agent")`, all subsequent tool calls can pass `isolatedContext: "my-agent"` to operate on the correct page without race conditions from other agents calling `select_page` concurrently. McpContext tracks per-context selected pages and resolvePageByContext() looks up the right page by context name. When the parameter is omitted, tools fall back to getSelectedPage() (fully backward compatible). Updated tools: take_screenshot, take_snapshot, wait_for, navigate_page, resize_page, emulate, click_at, fill, fill_form, upload_file, press_key, evaluate_script, performance_start_trace, performance_stop_trace, screencast_start. --- src/McpContext.ts | 75 ++++++++++++++++++++---- src/McpResponse.ts | 1 + src/tools/ToolDefinition.ts | 45 ++++++++++++--- src/tools/emulation.ts | 8 ++- src/tools/input.ts | 46 +++++++++++---- src/tools/pages.ts | 17 +++++- src/tools/performance.ts | 12 +++- src/tools/screencast.ts | 7 ++- src/tools/screenshot.ts | 7 ++- src/tools/script.ts | 7 ++- src/tools/snapshot.ts | 20 ++++++- tests/tools/emulation.test.ts | 62 ++++++++++++++++++++ tests/tools/pages.test.ts | 102 +++++++++++++++++++++++++++++++++ tests/tools/screenshot.test.ts | 41 +++++++++++++ tests/tools/snapshot.test.ts | 78 +++++++++++++++++++++++++ 15 files changed, 479 insertions(+), 49 deletions(-) diff --git a/src/McpContext.ts b/src/McpContext.ts index 56ea1f5a8..6f0920b7d 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -140,6 +140,8 @@ export class McpContext implements Context { #pageToDevToolsPage = new Map(); #selectedPage?: Page; + // Per-context selected page tracking for parallel agent support. + #contextSelectedPage = new Map(); #textSnapshot: TextSnapshot | null = null; #networkCollector: NetworkCollector; #consoleCollector: ConsoleCollector; @@ -328,15 +330,18 @@ export class McpContext implements Context { return this.#networkCollector.getById(this.getSelectedPage(), reqid); } - async emulate(options: { - networkConditions?: string | null; - cpuThrottlingRate?: number | null; - geolocation?: GeolocationOptions | null; - userAgent?: string | null; - colorScheme?: 'dark' | 'light' | 'auto' | null; - viewport?: Viewport | null; - }): Promise { - const page = this.getSelectedPage(); + async emulate( + options: { + networkConditions?: string | null; + cpuThrottlingRate?: number | null; + geolocation?: GeolocationOptions | null; + userAgent?: string | null; + colorScheme?: 'dark' | 'light' | 'auto' | null; + viewport?: Viewport | null; + }, + targetPage?: Page, + ): Promise { + const page = targetPage ?? this.getSelectedPage(); const currentSettings = this.#emulationSettingsMap.get(page) ?? {}; const newSettings: EmulationSettings = {...currentSettings}; let timeoutsNeedUpdate = false; @@ -513,6 +518,41 @@ export class McpContext implements Context { return page; } + resolvePageByContext(isolatedContext?: string): Page { + if (isolatedContext === undefined) { + return this.getSelectedPage(); + } + + // Try the per-context selected page first. + const tracked = this.#contextSelectedPage.get(isolatedContext); + if (tracked && !tracked.isClosed()) { + return tracked; + } + + // Fall back: find any non-closed page in the context. + const ctx = this.#isolatedContexts.get(isolatedContext); + if (!ctx) { + throw new Error( + `No isolated context named "${isolatedContext}" exists. ` + + `Create one first with new_page(isolatedContext: "${isolatedContext}").`, + ); + } + + for (const page of this.#pages) { + if ( + !page.isClosed() && + this.#pageToIsolatedContextName.get(page) === isolatedContext + ) { + this.#contextSelectedPage.set(isolatedContext, page); + return page; + } + } + + throw new Error( + `No open page found in isolated context "${isolatedContext}".`, + ); + } + getPageById(pageId: number): Page { const page = this.#pages.find(p => this.#pageIdMap.get(p) === pageId); if (!page) { @@ -547,6 +587,12 @@ export class McpContext implements Context { void newPage.emulateFocusedPage(true).catch(error => { this.logger('Error turning on focused page emulation', error); }); + + // Track per-context selected page for parallel agent routing. + const contextName = this.#pageToIsolatedContextName.get(newPage); + if (contextName) { + this.#contextSelectedPage.set(contextName, newPage); + } } #updateSelectedPageTimeouts() { @@ -787,8 +833,9 @@ export class McpContext implements Context { async createTextSnapshot( verbose = false, devtoolsData: DevToolsData | undefined = undefined, + targetPage?: Page, ): Promise { - const page = this.getSelectedPage(); + const page = targetPage ?? this.getSelectedPage(); const rootNode = await page.accessibility.snapshot({ includeIframes: true, interestingOnly: !verbose, @@ -939,8 +986,12 @@ export class McpContext implements Context { return this.#networkCollector.getIdForResource(request); } - waitForTextOnPage(text: string[], timeout?: number): Promise { - const page = this.getSelectedPage(); + waitForTextOnPage( + text: string[], + timeout?: number, + targetPage?: Page, + ): Promise { + const page = targetPage ?? this.getSelectedPage(); const frames = page.frames(); let locator = this.#locatorClass.race( diff --git a/src/McpResponse.ts b/src/McpResponse.ts index 243cd81cf..aac01db85 100644 --- a/src/McpResponse.ts +++ b/src/McpResponse.ts @@ -253,6 +253,7 @@ export class McpResponse implements Response { await context.createTextSnapshot( this.#snapshotParams.verbose, this.#devToolsData, + this.#snapshotParams.page, ); const textSnapshot = context.getTextSnapshot(); if (textSnapshot) { diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index b2bea87a1..85814038e 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -54,6 +54,7 @@ export interface ImageContentData { export interface SnapshotParams { verbose?: boolean; filePath?: string; + page?: Page; } export interface DevToolsData { @@ -108,6 +109,7 @@ export type Context = Readonly<{ recordedTraces(): TraceResult[]; storeTraceRecording(result: TraceResult): void; getSelectedPage(): Page; + resolvePageByContext(isolatedContext?: string): Page; getDialog(): Dialog | undefined; clearDialog(): void; getPageById(pageId: number): Page; @@ -116,14 +118,23 @@ export type Context = Readonly<{ selectPage(page: Page): void; getElementByUid(uid: string): Promise>; getAXNodeByUid(uid: string): TextSnapshotNode | undefined; - emulate(options: { - networkConditions?: string | null; - cpuThrottlingRate?: number | null; - geolocation?: GeolocationOptions | null; - userAgent?: string | null; - colorScheme?: 'dark' | 'light' | 'auto' | null; - viewport?: Viewport | null; - }): Promise; + emulate( + options: { + networkConditions?: string | null; + cpuThrottlingRate?: number | null; + geolocation?: GeolocationOptions | null; + userAgent?: string | null; + colorScheme?: 'dark' | 'light' | 'auto' | null; + viewport?: Viewport | null; + }, + targetPage?: Page, + ): Promise; + getNetworkConditions(): string | null; + getCpuThrottlingRate(): number; + getGeolocation(): GeolocationOptions | null; + getViewport(): Viewport | null; + getUserAgent(): string | null; + getColorScheme(): 'dark' | 'light' | null; saveTemporaryFile( data: Uint8Array, mimeType: 'image/png' | 'image/jpeg' | 'image/webp', @@ -136,7 +147,11 @@ export type Context = Readonly<{ action: () => Promise, options?: {timeout?: number}, ): Promise; - waitForTextOnPage(text: string[], timeout?: number): Promise; + waitForTextOnPage( + text: string[], + timeout?: number, + page?: Page, + ): Promise; getDevToolsData(): Promise; /** * Returns a reqid for a cdpRequestId. @@ -181,6 +196,18 @@ export function defineTool< export const CLOSE_PAGE_ERROR = 'The last open page cannot be closed. It is fine to keep it open.'; +export const isolatedContextSchema = { + isolatedContext: zod + .string() + .optional() + .describe( + 'The name of the isolated browser context to resolve the page from. ' + + 'When provided, the tool operates on the page belonging to this context ' + + 'instead of the globally selected page. ' + + 'Use this to avoid race conditions when multiple agents work in parallel.', + ), +}; + export const timeoutSchema = { timeout: zod .number() diff --git a/src/tools/emulation.ts b/src/tools/emulation.ts index ea0538fd1..084b311cd 100644 --- a/src/tools/emulation.ts +++ b/src/tools/emulation.ts @@ -8,7 +8,7 @@ import {zod, PredefinedNetworkConditions} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool} from './ToolDefinition.js'; +import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; const throttlingOptions: [string, ...string[]] = [ 'No emulation', @@ -24,6 +24,7 @@ export const emulate = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, networkConditions: zod .enum(throttlingOptions) .optional() @@ -104,6 +105,9 @@ export const emulate = defineTool({ ), }, handler: async (request, _response, context) => { - await context.emulate(request.params); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); + await context.emulate(request.params, page); }, }); diff --git a/src/tools/input.ts b/src/tools/input.ts index 2c338a520..32fd0bb67 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -7,11 +7,11 @@ import {logger} from '../logger.js'; import type {McpContext, TextSnapshotNode} from '../McpContext.js'; import {zod} from '../third_party/index.js'; -import type {ElementHandle, KeyInput} from '../third_party/index.js'; +import type {ElementHandle, KeyInput, Page} from '../third_party/index.js'; import {parseKey} from '../utils/keyboard.js'; import {ToolCategory} from './categories.js'; -import {defineTool} from './ToolDefinition.js'; +import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; const dblClickSchema = zod .boolean() @@ -90,13 +90,16 @@ export const clickAt = defineTool({ conditions: ['computerVision'], }, schema: { + ...isolatedContextSchema, x: zod.number().describe('The x coordinate'), y: zod.number().describe('The y coordinate'), dblClick: dblClickSchema, includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); await context.waitForEventsAfterAction(async () => { await page.mouse.click(request.params.x, request.params.y, { clickCount: request.params.dblClick ? 2 : 1, @@ -108,7 +111,7 @@ export const clickAt = defineTool({ : `Successfully clicked at the coordinates`, ); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({ page }); } }, }); @@ -192,6 +195,7 @@ async function fillFormElement( uid: string, value: string, context: McpContext, + page?: Page, ) { const handle = await context.getElementByUid(uid); try { @@ -203,8 +207,9 @@ async function fillFormElement( } else { // Increase timeout for longer input values. const timeoutPerChar = 10; // ms + const targetPage = page ?? context.getSelectedPage(); const fillTimeout = - context.getSelectedPage().getDefaultTimeout() + + targetPage.getDefaultTimeout() + value.length * timeoutPerChar; await handle.asLocator().setTimeout(fillTimeout).fill(value); } @@ -223,6 +228,7 @@ export const fill = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, uid: zod .string() .describe( @@ -232,16 +238,20 @@ export const fill = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); await context.waitForEventsAfterAction(async () => { await fillFormElement( request.params.uid, request.params.value, context as McpContext, + page, ); }); response.appendResponseLine(`Successfully filled out the element`); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({ page }); } }, }); @@ -311,6 +321,7 @@ export const fillForm = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, elements: zod .array( zod.object({ @@ -322,18 +333,22 @@ export const fillForm = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); for (const element of request.params.elements) { await context.waitForEventsAfterAction(async () => { await fillFormElement( element.uid, element.value, context as McpContext, + page, ); }); } response.appendResponseLine(`Successfully filled out the form`); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({ page }); } }, }); @@ -346,6 +361,7 @@ export const uploadFile = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, uid: zod .string() .describe( @@ -367,7 +383,9 @@ export const uploadFile = defineTool({ // a type=file element. In this case, we want to default to // Page.waitForFileChooser() and upload the file this way. try { - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); const [fileChooser] = await Promise.all([ page.waitForFileChooser({timeout: 3000}), handle.asLocator().click(), @@ -380,7 +398,10 @@ export const uploadFile = defineTool({ } } if (request.params.includeSnapshot) { - response.includeSnapshot(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); + response.includeSnapshot({ page }); } response.appendResponseLine(`File uploaded from ${filePath}.`); } finally { @@ -397,6 +418,7 @@ export const pressKey = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, key: zod .string() .describe( @@ -405,7 +427,9 @@ export const pressKey = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); const tokens = parseKey(request.params.key); const [key, ...modifiers] = tokens; @@ -423,7 +447,7 @@ export const pressKey = defineTool({ `Successfully pressed key: ${request.params.key}`, ); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({ page }); } }, }); diff --git a/src/tools/pages.ts b/src/tools/pages.ts index b3afe6192..3896c2d66 100644 --- a/src/tools/pages.ts +++ b/src/tools/pages.ts @@ -9,7 +9,12 @@ import type {Dialog} from '../third_party/index.js'; import {zod} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {CLOSE_PAGE_ERROR, defineTool, timeoutSchema} from './ToolDefinition.js'; +import { + CLOSE_PAGE_ERROR, + defineTool, + isolatedContextSchema, + timeoutSchema, +} from './ToolDefinition.js'; export const listPages = defineTool(args => { return { @@ -132,6 +137,7 @@ export const navigatePage = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, type: zod .enum(['url', 'back', 'forward', 'reload']) .optional() @@ -158,7 +164,9 @@ export const navigatePage = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); const options = { timeout: request.params.timeout, }; @@ -281,11 +289,14 @@ export const resizePage = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, width: zod.number().describe('Page width'), height: zod.number().describe('Page height'), }, handler: async (request, response, context) => { - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); try { const browser = page.browser(); diff --git a/src/tools/performance.ts b/src/tools/performance.ts index 393d38f15..eff3e0ade 100644 --- a/src/tools/performance.ts +++ b/src/tools/performance.ts @@ -17,7 +17,7 @@ import { import {ToolCategory} from './categories.js'; import type {Context, Response} from './ToolDefinition.js'; -import {defineTool} from './ToolDefinition.js'; +import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; const filePathSchema = zod .string() @@ -34,6 +34,7 @@ export const startTrace = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, reload: zod .boolean() .describe( @@ -55,7 +56,9 @@ export const startTrace = defineTool({ } context.setIsRunningPerformanceTrace(true); - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); const pageUrlForTracing = page.url(); if (request.params.reload) { @@ -121,13 +124,16 @@ export const stopTrace = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, filePath: filePathSchema, }, handler: async (request, response, context) => { if (!context.isRunningPerformanceTrace()) { return; } - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); await stopTracingAndAppendOutput( page, response, diff --git a/src/tools/screencast.ts b/src/tools/screencast.ts index d24d9b0fd..57578fbfb 100644 --- a/src/tools/screencast.ts +++ b/src/tools/screencast.ts @@ -12,7 +12,7 @@ import {zod} from '../third_party/index.js'; import type {ScreenRecorder} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool} from './ToolDefinition.js'; +import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; async function generateTempFilePath(): Promise { const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'chrome-devtools-mcp-')); @@ -29,6 +29,7 @@ export const startScreencast = defineTool({ conditions: ['screencast'], }, schema: { + ...isolatedContextSchema, path: zod .string() .optional() @@ -47,7 +48,9 @@ export const startScreencast = defineTool({ const filePath = request.params.path ?? (await generateTempFilePath()); const resolvedPath = path.resolve(filePath); - const page = context.getSelectedPage(); + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); let recorder: ScreenRecorder; try { diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 4312c02aa..24ae0715a 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -8,7 +8,7 @@ import {zod} from '../third_party/index.js'; import type {ElementHandle, Page} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool} from './ToolDefinition.js'; +import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; export const screenshot = defineTool({ name: 'take_screenshot', @@ -19,6 +19,7 @@ export const screenshot = defineTool({ readOnlyHint: false, }, schema: { + ...isolatedContextSchema, format: zod .enum(['png', 'jpeg', 'webp']) .default('png') @@ -59,7 +60,9 @@ export const screenshot = defineTool({ if (request.params.uid) { pageOrHandle = await context.getElementByUid(request.params.uid); } else { - pageOrHandle = context.getSelectedPage(); + pageOrHandle = context.resolvePageByContext( + request.params.isolatedContext, + ); } const format = request.params.format; diff --git a/src/tools/script.ts b/src/tools/script.ts index f3bc3c3c5..9a7fa47a9 100644 --- a/src/tools/script.ts +++ b/src/tools/script.ts @@ -8,7 +8,7 @@ import {zod} from '../third_party/index.js'; import type {Frame, JSHandle, Page} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool} from './ToolDefinition.js'; +import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; export const evaluateScript = defineTool({ name: 'evaluate_script', @@ -19,6 +19,7 @@ so returned values have to be JSON-serializable.`, readOnlyHint: false, }, schema: { + ...isolatedContextSchema, function: zod.string().describe( `A JavaScript function declaration to be executed by the tool in the currently selected page. Example without arguments: \`() => { @@ -60,7 +61,9 @@ Example with arguments: \`(el) => { "Elements from different frames can't be evaluated together.", ); } else { - pageOrFrame = [...frames.values()][0] ?? context.getSelectedPage(); + pageOrFrame = + [...frames.values()][0] ?? + context.resolvePageByContext(request.params.isolatedContext); } const fn = await pageOrFrame.evaluateHandle( `(${request.params.function})`, diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts index a07bf5825..f99106f0a 100644 --- a/src/tools/snapshot.ts +++ b/src/tools/snapshot.ts @@ -7,7 +7,11 @@ import {zod} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, timeoutSchema} from './ToolDefinition.js'; +import { + defineTool, + isolatedContextSchema, + timeoutSchema, +} from './ToolDefinition.js'; export const takeSnapshot = defineTool({ name: 'take_snapshot', @@ -20,6 +24,7 @@ in the DevTools Elements panel (if any).`, readOnlyHint: false, }, schema: { + ...isolatedContextSchema, verbose: zod .boolean() .optional() @@ -33,10 +38,14 @@ in the DevTools Elements panel (if any).`, 'The absolute path, or a path relative to the current working directory, to save the snapshot to instead of attaching it to the response.', ), }, - handler: async (request, response) => { + handler: async (request, response, context) => { + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); response.includeSnapshot({ verbose: request.params.verbose ?? false, filePath: request.params.filePath, + page, }); }, }); @@ -49,6 +58,7 @@ export const waitFor = defineTool({ readOnlyHint: true, }, schema: { + ...isolatedContextSchema, text: zod .array(zod.string()) .min(1) @@ -58,15 +68,19 @@ export const waitFor = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { + const page = context.resolvePageByContext( + request.params.isolatedContext, + ); await context.waitForTextOnPage( request.params.text, request.params.timeout, + page, ); response.appendResponseLine( `Element matching one of ${JSON.stringify(request.params.text)} found.`, ); - response.includeSnapshot(); + response.includeSnapshot({ page }); }, }); diff --git a/tests/tools/emulation.test.ts b/tests/tools/emulation.test.ts index 2aa31dad6..671ba1b23 100644 --- a/tests/tools/emulation.test.ts +++ b/tests/tools/emulation.test.ts @@ -8,6 +8,7 @@ import assert from 'node:assert'; import {beforeEach, describe, it} from 'node:test'; import {emulate} from '../../src/tools/emulation.js'; +import {newPage, selectPage} from '../../src/tools/pages.js'; import {serverHooks} from '../server.js'; import {html, withMcpContext} from '../utils.js'; @@ -470,6 +471,67 @@ describe('emulation', () => { }); }); + describe('isolatedContext routing', () => { + beforeEach(() => { + server.addHtmlRoute('/emulate-test', html`

Emulate Test

`); + }); + + it('emulates viewport on the isolatedContext page, not the global selection', async () => { + await withMcpContext(async (response, context) => { + // Create an isolated page. + await newPage.handler( + { + params: { + url: server.baseUrl + '/emulate-test', + isolatedContext: 'emulate-ctx', + }, + }, + response, + context, + ); + const isolatedPage = context.getSelectedPage(); + + // Switch global selection back to the default page. + await selectPage.handler({params: {pageId: 1}}, response, context); + const defaultPage = context.getSelectedPage(); + assert.notStrictEqual(defaultPage, isolatedPage); + + // Emulate viewport on the isolated page via isolatedContext. + await emulate.handler( + { + params: { + isolatedContext: 'emulate-ctx', + viewport: { + width: 390, + height: 844, + isMobile: true, + hasTouch: true, + }, + }, + }, + response, + context, + ); + + // Verify the isolated page received the viewport. + const isolatedViewport = await isolatedPage.evaluate(() => ({ + width: window.innerWidth, + height: window.innerHeight, + hasTouch: navigator.maxTouchPoints > 0, + })); + assert.strictEqual(isolatedViewport.width, 390); + assert.strictEqual(isolatedViewport.height, 844); + assert.strictEqual(isolatedViewport.hasTouch, true); + + // Verify the default page was NOT affected. + const defaultViewport = await defaultPage.evaluate(() => ({ + width: window.innerWidth, + })); + assert.notStrictEqual(defaultViewport.width, 390); + }); + }); + }); + describe('colorScheme', () => { it('emulates color scheme', async () => { await withMcpContext(async (response, context) => { diff --git a/tests/tools/pages.test.ts b/tests/tools/pages.test.ts index 3a740e250..0efb04419 100644 --- a/tests/tools/pages.test.ts +++ b/tests/tools/pages.test.ts @@ -235,6 +235,108 @@ describe('pages', () => { }); }); + describe('resolvePageByContext', () => { + it('returns the correct page regardless of global selection', async () => { + await withMcpContext(async (response, context) => { + // Create two pages in separate isolated contexts with different content. + await newPage.handler( + { + params: { + url: 'data:text/html,

Page A

', + isolatedContext: 'ctx-a', + }, + }, + response, + context, + ); + const pageA = context.getSelectedPage(); + + await newPage.handler( + { + params: { + url: 'data:text/html,

Page B

', + isolatedContext: 'ctx-b', + }, + }, + response, + context, + ); + const pageB = context.getSelectedPage(); + + // Global selection is now pageB (the last created page). + assert.strictEqual(context.getSelectedPage(), pageB); + + // resolvePageByContext should return the correct page for each context, + // regardless of which page is globally selected. + assert.strictEqual(context.resolvePageByContext('ctx-a'), pageA); + assert.strictEqual(context.resolvePageByContext('ctx-b'), pageB); + }); + }); + + it('falls back to getSelectedPage when no isolatedContext is provided', async () => { + await withMcpContext(async (_response, context) => { + const selectedPage = context.getSelectedPage(); + assert.strictEqual( + context.resolvePageByContext(undefined), + selectedPage, + ); + }); + }); + + it('throws for an unknown context name', async () => { + await withMcpContext(async (_response, context) => { + assert.throws( + () => context.resolvePageByContext('nonexistent'), + /No isolated context named "nonexistent" exists/, + ); + }); + }); + + it('navigate_page targets the isolatedContext page, not the global selection', async () => { + await withMcpContext(async (response, context) => { + await newPage.handler( + { + params: { + url: 'data:text/html,

Initial

', + isolatedContext: 'nav-ctx', + }, + }, + response, + context, + ); + const isolatedPage = context.getSelectedPage(); + + // Switch global selection back to the default page. + await selectPage.handler({params: {pageId: 1}}, response, context); + assert.notStrictEqual(context.getSelectedPage(), isolatedPage); + + // Navigate using isolatedContext; should target the isolated page. + await navigatePage.handler( + { + params: { + url: 'data:text/html,

Navigated

', + isolatedContext: 'nav-ctx', + }, + }, + response, + context, + ); + + // Verify the isolated page was navigated. + const content = await isolatedPage.evaluate( + () => document.querySelector('h1')?.textContent, + ); + assert.strictEqual(content, 'Navigated'); + + // Verify the default page was NOT affected. + const defaultContent = await context + .getSelectedPage() + .evaluate(() => document.querySelector('h1')?.textContent); + assert.notStrictEqual(defaultContent, 'Navigated'); + }); + }); + }); + describe('close_page', () => { it('closes a page', async () => { await withMcpContext(async (response, context) => { diff --git a/tests/tools/screenshot.test.ts b/tests/tools/screenshot.test.ts index dab541412..1eda6615d 100644 --- a/tests/tools/screenshot.test.ts +++ b/tests/tools/screenshot.test.ts @@ -10,6 +10,7 @@ import {tmpdir} from 'node:os'; import {join} from 'node:path'; import {describe, it} from 'node:test'; +import {newPage, selectPage} from '../../src/tools/pages.js'; import {screenshot} from '../../src/tools/screenshot.js'; import {screenshots} from '../snapshot.js'; import {html, withMcpContext} from '../utils.js'; @@ -260,5 +261,45 @@ describe('screenshot', () => { ); }); }); + + it('screenshots the isolatedContext page, not the global selection', async () => { + await withMcpContext(async (response, context) => { + // Set distinct content on the default page. + const defaultPage = context.getSelectedPage(); + await defaultPage.setContent( + html`
`, + ); + + // Create an isolated page with different content. + await newPage.handler( + { + params: { + url: 'data:text/html,
', + isolatedContext: 'screenshot-ctx', + }, + }, + response, + context, + ); + + // Switch global selection back to the default page. + await selectPage.handler({params: {pageId: 1}}, response, context); + assert.strictEqual(context.getSelectedPage(), defaultPage); + + // Take a screenshot using isolatedContext. + const {McpResponse} = await import('../../src/McpResponse.js'); + const screenshotResponse = new McpResponse(); + await screenshot.handler( + {params: {format: 'png', isolatedContext: 'screenshot-ctx'}}, + screenshotResponse, + context, + ); + + // Should have produced an image (basic sanity: it didn't crash and + // returned something from the isolated page, not the default). + assert.equal(screenshotResponse.images.length, 1); + assert.equal(screenshotResponse.images[0].mimeType, 'image/png'); + }); + }); }); }); diff --git a/tests/tools/snapshot.test.ts b/tests/tools/snapshot.test.ts index 2aa40d8be..0cd530bec 100644 --- a/tests/tools/snapshot.test.ts +++ b/tests/tools/snapshot.test.ts @@ -7,6 +7,7 @@ import assert from 'node:assert'; import {describe, it} from 'node:test'; +import {newPage, selectPage} from '../../src/tools/pages.js'; import {takeSnapshot, waitFor} from '../../src/tools/snapshot.js'; import {html, withMcpContext} from '../utils.js'; @@ -180,4 +181,81 @@ describe('snapshot', () => { }); }); }); + + describe('isolatedContext routing', () => { + it('take_snapshot returns content from the isolatedContext page, not the global selection', async () => { + await withMcpContext(async (response, context) => { + // Create an isolated page with unique content. + await newPage.handler( + { + params: { + url: 'data:text/html,

Isolated Snapshot Content

', + isolatedContext: 'snap-ctx', + }, + }, + response, + context, + ); + + // Switch global selection back to the default page. + await selectPage.handler({params: {pageId: 1}}, response, context); + + // Take snapshot using isolatedContext. + const snapshotResponse = new (await import('../../src/McpResponse.js')).McpResponse(); + await takeSnapshot.handler( + {params: {isolatedContext: 'snap-ctx'}}, + snapshotResponse, + context, + ); + + // The snapshot should reflect the isolated page's content. + const result = await snapshotResponse.handle('take_snapshot', context); + const text = result.content + .filter(c => c.type === 'text') + .map(c => (c as {text: string}).text) + .join(''); + assert.ok( + text.includes('Isolated Snapshot Content'), + `Expected snapshot to contain "Isolated Snapshot Content" but got: ${text.slice(0, 200)}`, + ); + }); + }); + + it('wait_for finds text on the isolatedContext page, not the global selection', async () => { + await withMcpContext(async (response, context) => { + // Create an isolated page with target text. + await newPage.handler( + { + params: { + url: 'data:text/html,

Unique Isolated Text

', + isolatedContext: 'wait-ctx', + }, + }, + response, + context, + ); + + // Switch global selection away. + await selectPage.handler({params: {pageId: 1}}, response, context); + + // wait_for should find text on the isolated page. + const waitResponse = new (await import('../../src/McpResponse.js')).McpResponse(); + await waitFor.handler( + { + params: { + text: 'Unique Isolated Text', + isolatedContext: 'wait-ctx', + }, + }, + waitResponse, + context, + ); + + assert.equal( + waitResponse.responseLines[0], + 'Element with text "Unique Isolated Text" found.', + ); + }); + }); + }); }); From 300bb179efe7551e2940347c74e76bebeca77b3c Mon Sep 17 00:00:00 2001 From: Stanislav Publika <10758542+passtas@users.noreply.github.com> Date: Mon, 23 Feb 2026 15:42:02 +0000 Subject: [PATCH 2/5] refactor: replace isolatedContext routing with pageId routing Replace the isolatedContext-based page resolution with the more general pageId parameter. The isolatedContext approach only worked when agents used different browser contexts. pageId works for any multi-page scenario, uses an already-existing concept (page IDs), and does not depend on isolated contexts. - Rename isolatedContextSchema to pageIdSchema (string to number) - Replace resolvePageByContext() with resolvePageById() in McpContext - Remove per-context page tracking (#contextSelectedPage map) - Update all tool files to use pageId routing - Keep isolatedContext on new_page (browser context isolation, not routing) - Update tests to use pageId-based assertions --- docs/tool-reference.md | 29 +++---- .../eval_scenarios/page_id_routing_test.ts | 40 ++++++++++ src/McpContext.ts | 42 +--------- src/tools/ToolDefinition.ts | 14 +--- src/tools/emulation.ts | 8 +- src/tools/input.ts | 49 +++++------- src/tools/pages.ts | 14 ++-- src/tools/performance.ts | 14 ++-- src/tools/screencast.ts | 8 +- src/tools/screenshot.ts | 8 +- src/tools/script.ts | 6 +- src/tools/snapshot.ts | 20 ++--- tests/tools/emulation.test.ts | 62 --------------- tests/tools/pages.test.ts | 33 ++++---- tests/tools/screenshot.test.ts | 41 ---------- tests/tools/snapshot.test.ts | 78 ------------------- 16 files changed, 122 insertions(+), 344 deletions(-) create mode 100644 scripts/eval_scenarios/page_id_routing_test.ts diff --git a/docs/tool-reference.md b/docs/tool-reference.md index 874e814de..eed5288ac 100644 --- a/docs/tool-reference.md +++ b/docs/tool-reference.md @@ -1,8 +1,8 @@ -# Chrome DevTools MCP Tool Reference (~7084 cl100k_base tokens) +# Chrome DevTools MCP Tool Reference (~7267 cl100k_base tokens) -- **[Input automation](#input-automation)** (9 tools) +- **[Input automation](#input-automation)** (8 tools) - [`click`](#click) - [`drag`](#drag) - [`fill`](#fill) @@ -10,7 +10,6 @@ - [`handle_dialog`](#handle_dialog) - [`hover`](#hover) - [`press_key`](#press_key) - - [`type_text`](#type_text) - [`upload_file`](#upload_file) - **[Navigation automation](#navigation-automation)** (6 tools) - [`close_page`](#close_page) @@ -72,6 +71,7 @@ - **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot - **value** (string) **(required)**: The value to [`fill`](#fill) in - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -83,6 +83,7 @@ - **elements** (array) **(required)**: Elements from snapshot to [`fill`](#fill) out. - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -116,17 +117,7 @@ - **key** (string) **(required)**: A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. - ---- - -### `type_text` - -**Description:** Type text using keyboard into a previously focused input - -**Parameters:** - -- **text** (string) **(required)**: The text to type -- **submitKey** (string) _(optional)_: Optional key to press after typing. E.g., "Enter", "Tab", "Escape" +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -139,6 +130,7 @@ - **filePath** (string) **(required)**: The local path of the file to upload - **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -171,6 +163,7 @@ - **handleBeforeUnload** (enum: "accept", "decline") _(optional)_: Whether to auto accept or beforeunload dialogs triggered by this navigation. Default is accept. - **ignoreCache** (boolean) _(optional)_: Whether to ignore cache on reload. - **initScript** (string) _(optional)_: A JavaScript script to be executed on each new document before any other scripts for the next navigation. +- **pageId** (number) _(optional)_: Targets a specific page by ID. - **timeout** (integer) _(optional)_: Maximum wait time in milliseconds. If set to 0, the default timeout will be used. - **type** (enum: "url", "back", "forward", "reload") _(optional)_: Navigate the page by URL, back or forward in history, or reload. - **url** (string) _(optional)_: Target URL (only type=url) @@ -208,6 +201,7 @@ **Parameters:** - **text** (array) **(required)**: Non-empty list of texts. Resolves when any value appears on the page. +- **pageId** (number) _(optional)_: Targets a specific page by ID. - **timeout** (integer) _(optional)_: Maximum wait time in milliseconds. If set to 0, the default timeout will be used. --- @@ -224,6 +218,7 @@ - **cpuThrottlingRate** (number) _(optional)_: Represents the CPU slowdown factor. Set the rate to 1 to disable throttling. If omitted, throttling remains unchanged. - **geolocation** (unknown) _(optional)_: Geolocation to [`emulate`](#emulate). Set to null to clear the geolocation override. - **networkConditions** (enum: "No emulation", "Offline", "Slow 3G", "Fast 3G", "Slow 4G", "Fast 4G") _(optional)_: Throttle network. Set to "No emulation" to disable. If omitted, conditions remain unchanged. +- **pageId** (number) _(optional)_: Targets a specific page by ID. - **userAgent** (unknown) _(optional)_: User agent to [`emulate`](#emulate). Set to null to clear the user agent override. - **viewport** (unknown) _(optional)_: Viewport to [`emulate`](#emulate). Set to null to reset to the default viewport. @@ -237,6 +232,7 @@ - **height** (number) **(required)**: Page height - **width** (number) **(required)**: Page width +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -262,6 +258,7 @@ - **autoStop** (boolean) **(required)**: Determines if the trace recording should be automatically stopped. - **reload** (boolean) **(required)**: Determines if, once tracing has started, the current selected page should be automatically reloaded. Navigate the page to the right URL using the [`navigate_page`](#navigate_page) tool BEFORE starting the trace if reload or autoStop is set to true. - **filePath** (string) _(optional)_: The absolute file path, or a file path relative to the current working directory, to save the raw trace data. For example, trace.json.gz (compressed) or trace.json (uncompressed). +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -272,6 +269,7 @@ **Parameters:** - **filePath** (string) _(optional)_: The absolute file path, or a file path relative to the current working directory, to save the raw trace data. For example, trace.json.gz (compressed) or trace.json (uncompressed). +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -332,6 +330,7 @@ so returned values have to be JSON-serializable. }` - **args** (array) _(optional)_: An optional list of arguments to pass to the function. +- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -367,6 +366,7 @@ so returned values have to be JSON-serializable. - **filePath** (string) _(optional)_: The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response. - **format** (enum: "png", "jpeg", "webp") _(optional)_: Type of format to save the screenshot as. Default is "png" - **fullPage** (boolean) _(optional)_: If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid. +- **pageId** (number) _(optional)_: Targets a specific page by ID. - **quality** (number) _(optional)_: Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format. - **uid** (string) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot. @@ -381,6 +381,7 @@ in the DevTools Elements panel (if any). **Parameters:** - **filePath** (string) _(optional)_: The absolute path, or a path relative to the current working directory, to save the snapshot to instead of attaching it to the response. +- **pageId** (number) _(optional)_: Targets a specific page by ID. - **verbose** (boolean) _(optional)_: Whether to include all possible information available in the full a11y tree. Default is false. --- diff --git a/scripts/eval_scenarios/page_id_routing_test.ts b/scripts/eval_scenarios/page_id_routing_test.ts new file mode 100644 index 000000000..1ee27b37d --- /dev/null +++ b/scripts/eval_scenarios/page_id_routing_test.ts @@ -0,0 +1,40 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: `Open three new pages in isolated contexts: +- Page A at data:text/html,

Page A

+- Page B at data:text/html,

Page B

+- Page C at data:text/html,

Page C

+Then take screenshots of all three pages in parallel.`, + maxTurns: 8, + expectations: calls => { + // Exactly 3 screenshot calls. + const screenshots = calls.filter(c => c.name === 'take_screenshot'); + assert.strictEqual(screenshots.length, 3, 'Should take 3 screenshots'); + + // Each screenshot must carry a numeric pageId. + for (const ss of screenshots) { + assert.strictEqual( + typeof ss.args.pageId, + 'number', + 'Screenshot should use pageId', + ); + } + + // All pageIds should be distinct (one per page). + const pageIds = new Set(screenshots.map(s => s.args.pageId)); + assert.strictEqual( + pageIds.size, + 3, + 'Each screenshot should target a different page', + ); + }, +}; diff --git a/src/McpContext.ts b/src/McpContext.ts index 6f0920b7d..3efa93e36 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -140,8 +140,6 @@ export class McpContext implements Context { #pageToDevToolsPage = new Map(); #selectedPage?: Page; - // Per-context selected page tracking for parallel agent support. - #contextSelectedPage = new Map(); #textSnapshot: TextSnapshot | null = null; #networkCollector: NetworkCollector; #consoleCollector: ConsoleCollector; @@ -518,39 +516,11 @@ export class McpContext implements Context { return page; } - resolvePageByContext(isolatedContext?: string): Page { - if (isolatedContext === undefined) { + resolvePageById(pageId?: number): Page { + if (pageId === undefined) { return this.getSelectedPage(); } - - // Try the per-context selected page first. - const tracked = this.#contextSelectedPage.get(isolatedContext); - if (tracked && !tracked.isClosed()) { - return tracked; - } - - // Fall back: find any non-closed page in the context. - const ctx = this.#isolatedContexts.get(isolatedContext); - if (!ctx) { - throw new Error( - `No isolated context named "${isolatedContext}" exists. ` + - `Create one first with new_page(isolatedContext: "${isolatedContext}").`, - ); - } - - for (const page of this.#pages) { - if ( - !page.isClosed() && - this.#pageToIsolatedContextName.get(page) === isolatedContext - ) { - this.#contextSelectedPage.set(isolatedContext, page); - return page; - } - } - - throw new Error( - `No open page found in isolated context "${isolatedContext}".`, - ); + return this.getPageById(pageId); } getPageById(pageId: number): Page { @@ -587,12 +557,6 @@ export class McpContext implements Context { void newPage.emulateFocusedPage(true).catch(error => { this.logger('Error turning on focused page emulation', error); }); - - // Track per-context selected page for parallel agent routing. - const contextName = this.#pageToIsolatedContextName.get(newPage); - if (contextName) { - this.#contextSelectedPage.set(contextName, newPage); - } } #updateSelectedPageTimeouts() { diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index 85814038e..d312add6e 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -109,7 +109,7 @@ export type Context = Readonly<{ recordedTraces(): TraceResult[]; storeTraceRecording(result: TraceResult): void; getSelectedPage(): Page; - resolvePageByContext(isolatedContext?: string): Page; + resolvePageById(pageId?: number): Page; getDialog(): Dialog | undefined; clearDialog(): void; getPageById(pageId: number): Page; @@ -196,16 +196,8 @@ export function defineTool< export const CLOSE_PAGE_ERROR = 'The last open page cannot be closed. It is fine to keep it open.'; -export const isolatedContextSchema = { - isolatedContext: zod - .string() - .optional() - .describe( - 'The name of the isolated browser context to resolve the page from. ' + - 'When provided, the tool operates on the page belonging to this context ' + - 'instead of the globally selected page. ' + - 'Use this to avoid race conditions when multiple agents work in parallel.', - ), +export const pageIdSchema = { + pageId: zod.number().optional().describe('Targets a specific page by ID.'), }; export const timeoutSchema = { diff --git a/src/tools/emulation.ts b/src/tools/emulation.ts index 084b311cd..fefe95ef1 100644 --- a/src/tools/emulation.ts +++ b/src/tools/emulation.ts @@ -8,7 +8,7 @@ import {zod, PredefinedNetworkConditions} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; +import {defineTool, pageIdSchema} from './ToolDefinition.js'; const throttlingOptions: [string, ...string[]] = [ 'No emulation', @@ -24,7 +24,7 @@ export const emulate = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, networkConditions: zod .enum(throttlingOptions) .optional() @@ -105,9 +105,7 @@ export const emulate = defineTool({ ), }, handler: async (request, _response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); await context.emulate(request.params, page); }, }); diff --git a/src/tools/input.ts b/src/tools/input.ts index 32fd0bb67..c8388a70f 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -11,7 +11,7 @@ import type {ElementHandle, KeyInput, Page} from '../third_party/index.js'; import {parseKey} from '../utils/keyboard.js'; import {ToolCategory} from './categories.js'; -import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; +import {defineTool, pageIdSchema} from './ToolDefinition.js'; const dblClickSchema = zod .boolean() @@ -90,16 +90,14 @@ export const clickAt = defineTool({ conditions: ['computerVision'], }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, x: zod.number().describe('The x coordinate'), y: zod.number().describe('The y coordinate'), dblClick: dblClickSchema, includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); await context.waitForEventsAfterAction(async () => { await page.mouse.click(request.params.x, request.params.y, { clickCount: request.params.dblClick ? 2 : 1, @@ -111,7 +109,7 @@ export const clickAt = defineTool({ : `Successfully clicked at the coordinates`, ); if (request.params.includeSnapshot) { - response.includeSnapshot({ page }); + response.includeSnapshot({page}); } }, }); @@ -209,8 +207,7 @@ async function fillFormElement( const timeoutPerChar = 10; // ms const targetPage = page ?? context.getSelectedPage(); const fillTimeout = - targetPage.getDefaultTimeout() + - value.length * timeoutPerChar; + targetPage.getDefaultTimeout() + value.length * timeoutPerChar; await handle.asLocator().setTimeout(fillTimeout).fill(value); } } catch (error) { @@ -228,7 +225,7 @@ export const fill = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, uid: zod .string() .describe( @@ -238,9 +235,7 @@ export const fill = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); await context.waitForEventsAfterAction(async () => { await fillFormElement( request.params.uid, @@ -251,7 +246,7 @@ export const fill = defineTool({ }); response.appendResponseLine(`Successfully filled out the element`); if (request.params.includeSnapshot) { - response.includeSnapshot({ page }); + response.includeSnapshot({page}); } }, }); @@ -321,7 +316,7 @@ export const fillForm = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, elements: zod .array( zod.object({ @@ -333,9 +328,7 @@ export const fillForm = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); for (const element of request.params.elements) { await context.waitForEventsAfterAction(async () => { await fillFormElement( @@ -348,7 +341,7 @@ export const fillForm = defineTool({ } response.appendResponseLine(`Successfully filled out the form`); if (request.params.includeSnapshot) { - response.includeSnapshot({ page }); + response.includeSnapshot({page}); } }, }); @@ -361,7 +354,7 @@ export const uploadFile = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, uid: zod .string() .describe( @@ -383,9 +376,7 @@ export const uploadFile = defineTool({ // a type=file element. In this case, we want to default to // Page.waitForFileChooser() and upload the file this way. try { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); const [fileChooser] = await Promise.all([ page.waitForFileChooser({timeout: 3000}), handle.asLocator().click(), @@ -398,10 +389,8 @@ export const uploadFile = defineTool({ } } if (request.params.includeSnapshot) { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); - response.includeSnapshot({ page }); + const page = context.resolvePageById(request.params.pageId); + response.includeSnapshot({page}); } response.appendResponseLine(`File uploaded from ${filePath}.`); } finally { @@ -418,7 +407,7 @@ export const pressKey = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, key: zod .string() .describe( @@ -427,9 +416,7 @@ export const pressKey = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); const tokens = parseKey(request.params.key); const [key, ...modifiers] = tokens; @@ -447,7 +434,7 @@ export const pressKey = defineTool({ `Successfully pressed key: ${request.params.key}`, ); if (request.params.includeSnapshot) { - response.includeSnapshot({ page }); + response.includeSnapshot({page}); } }, }); diff --git a/src/tools/pages.ts b/src/tools/pages.ts index 3896c2d66..b8f6927c3 100644 --- a/src/tools/pages.ts +++ b/src/tools/pages.ts @@ -12,7 +12,7 @@ import {ToolCategory} from './categories.js'; import { CLOSE_PAGE_ERROR, defineTool, - isolatedContextSchema, + pageIdSchema, timeoutSchema, } from './ToolDefinition.js'; @@ -137,7 +137,7 @@ export const navigatePage = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, type: zod .enum(['url', 'back', 'forward', 'reload']) .optional() @@ -164,9 +164,7 @@ export const navigatePage = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); const options = { timeout: request.params.timeout, }; @@ -289,14 +287,12 @@ export const resizePage = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, width: zod.number().describe('Page width'), height: zod.number().describe('Page height'), }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); try { const browser = page.browser(); diff --git a/src/tools/performance.ts b/src/tools/performance.ts index eff3e0ade..1183f2f40 100644 --- a/src/tools/performance.ts +++ b/src/tools/performance.ts @@ -17,7 +17,7 @@ import { import {ToolCategory} from './categories.js'; import type {Context, Response} from './ToolDefinition.js'; -import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; +import {defineTool, pageIdSchema} from './ToolDefinition.js'; const filePathSchema = zod .string() @@ -34,7 +34,7 @@ export const startTrace = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, reload: zod .boolean() .describe( @@ -56,9 +56,7 @@ export const startTrace = defineTool({ } context.setIsRunningPerformanceTrace(true); - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); const pageUrlForTracing = page.url(); if (request.params.reload) { @@ -124,16 +122,14 @@ export const stopTrace = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, filePath: filePathSchema, }, handler: async (request, response, context) => { if (!context.isRunningPerformanceTrace()) { return; } - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); await stopTracingAndAppendOutput( page, response, diff --git a/src/tools/screencast.ts b/src/tools/screencast.ts index 57578fbfb..7d05ab455 100644 --- a/src/tools/screencast.ts +++ b/src/tools/screencast.ts @@ -12,7 +12,7 @@ import {zod} from '../third_party/index.js'; import type {ScreenRecorder} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; +import {defineTool, pageIdSchema} from './ToolDefinition.js'; async function generateTempFilePath(): Promise { const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'chrome-devtools-mcp-')); @@ -29,7 +29,7 @@ export const startScreencast = defineTool({ conditions: ['screencast'], }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, path: zod .string() .optional() @@ -48,9 +48,7 @@ export const startScreencast = defineTool({ const filePath = request.params.path ?? (await generateTempFilePath()); const resolvedPath = path.resolve(filePath); - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); let recorder: ScreenRecorder; try { diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 24ae0715a..3894853af 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -8,7 +8,7 @@ import {zod} from '../third_party/index.js'; import type {ElementHandle, Page} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; +import {defineTool, pageIdSchema} from './ToolDefinition.js'; export const screenshot = defineTool({ name: 'take_screenshot', @@ -19,7 +19,7 @@ export const screenshot = defineTool({ readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, format: zod .enum(['png', 'jpeg', 'webp']) .default('png') @@ -60,9 +60,7 @@ export const screenshot = defineTool({ if (request.params.uid) { pageOrHandle = await context.getElementByUid(request.params.uid); } else { - pageOrHandle = context.resolvePageByContext( - request.params.isolatedContext, - ); + pageOrHandle = context.resolvePageById(request.params.pageId); } const format = request.params.format; diff --git a/src/tools/script.ts b/src/tools/script.ts index 9a7fa47a9..d88c1aa30 100644 --- a/src/tools/script.ts +++ b/src/tools/script.ts @@ -8,7 +8,7 @@ import {zod} from '../third_party/index.js'; import type {Frame, JSHandle, Page} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, isolatedContextSchema} from './ToolDefinition.js'; +import {defineTool, pageIdSchema} from './ToolDefinition.js'; export const evaluateScript = defineTool({ name: 'evaluate_script', @@ -19,7 +19,7 @@ so returned values have to be JSON-serializable.`, readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, function: zod.string().describe( `A JavaScript function declaration to be executed by the tool in the currently selected page. Example without arguments: \`() => { @@ -63,7 +63,7 @@ Example with arguments: \`(el) => { } else { pageOrFrame = [...frames.values()][0] ?? - context.resolvePageByContext(request.params.isolatedContext); + context.resolvePageById(request.params.pageId); } const fn = await pageOrFrame.evaluateHandle( `(${request.params.function})`, diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts index f99106f0a..68b7fda15 100644 --- a/src/tools/snapshot.ts +++ b/src/tools/snapshot.ts @@ -7,11 +7,7 @@ import {zod} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import { - defineTool, - isolatedContextSchema, - timeoutSchema, -} from './ToolDefinition.js'; +import {defineTool, pageIdSchema, timeoutSchema} from './ToolDefinition.js'; export const takeSnapshot = defineTool({ name: 'take_snapshot', @@ -24,7 +20,7 @@ in the DevTools Elements panel (if any).`, readOnlyHint: false, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, verbose: zod .boolean() .optional() @@ -39,9 +35,7 @@ in the DevTools Elements panel (if any).`, ), }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); response.includeSnapshot({ verbose: request.params.verbose ?? false, filePath: request.params.filePath, @@ -58,7 +52,7 @@ export const waitFor = defineTool({ readOnlyHint: true, }, schema: { - ...isolatedContextSchema, + ...pageIdSchema, text: zod .array(zod.string()) .min(1) @@ -68,9 +62,7 @@ export const waitFor = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageByContext( - request.params.isolatedContext, - ); + const page = context.resolvePageById(request.params.pageId); await context.waitForTextOnPage( request.params.text, request.params.timeout, @@ -81,6 +73,6 @@ export const waitFor = defineTool({ `Element matching one of ${JSON.stringify(request.params.text)} found.`, ); - response.includeSnapshot({ page }); + response.includeSnapshot({page}); }, }); diff --git a/tests/tools/emulation.test.ts b/tests/tools/emulation.test.ts index 671ba1b23..2aa31dad6 100644 --- a/tests/tools/emulation.test.ts +++ b/tests/tools/emulation.test.ts @@ -8,7 +8,6 @@ import assert from 'node:assert'; import {beforeEach, describe, it} from 'node:test'; import {emulate} from '../../src/tools/emulation.js'; -import {newPage, selectPage} from '../../src/tools/pages.js'; import {serverHooks} from '../server.js'; import {html, withMcpContext} from '../utils.js'; @@ -471,67 +470,6 @@ describe('emulation', () => { }); }); - describe('isolatedContext routing', () => { - beforeEach(() => { - server.addHtmlRoute('/emulate-test', html`

Emulate Test

`); - }); - - it('emulates viewport on the isolatedContext page, not the global selection', async () => { - await withMcpContext(async (response, context) => { - // Create an isolated page. - await newPage.handler( - { - params: { - url: server.baseUrl + '/emulate-test', - isolatedContext: 'emulate-ctx', - }, - }, - response, - context, - ); - const isolatedPage = context.getSelectedPage(); - - // Switch global selection back to the default page. - await selectPage.handler({params: {pageId: 1}}, response, context); - const defaultPage = context.getSelectedPage(); - assert.notStrictEqual(defaultPage, isolatedPage); - - // Emulate viewport on the isolated page via isolatedContext. - await emulate.handler( - { - params: { - isolatedContext: 'emulate-ctx', - viewport: { - width: 390, - height: 844, - isMobile: true, - hasTouch: true, - }, - }, - }, - response, - context, - ); - - // Verify the isolated page received the viewport. - const isolatedViewport = await isolatedPage.evaluate(() => ({ - width: window.innerWidth, - height: window.innerHeight, - hasTouch: navigator.maxTouchPoints > 0, - })); - assert.strictEqual(isolatedViewport.width, 390); - assert.strictEqual(isolatedViewport.height, 844); - assert.strictEqual(isolatedViewport.hasTouch, true); - - // Verify the default page was NOT affected. - const defaultViewport = await defaultPage.evaluate(() => ({ - width: window.innerWidth, - })); - assert.notStrictEqual(defaultViewport.width, 390); - }); - }); - }); - describe('colorScheme', () => { it('emulates color scheme', async () => { await withMcpContext(async (response, context) => { diff --git a/tests/tools/pages.test.ts b/tests/tools/pages.test.ts index 0efb04419..3f5007b63 100644 --- a/tests/tools/pages.test.ts +++ b/tests/tools/pages.test.ts @@ -235,10 +235,10 @@ describe('pages', () => { }); }); - describe('resolvePageByContext', () => { + describe('resolvePageById', () => { it('returns the correct page regardless of global selection', async () => { await withMcpContext(async (response, context) => { - // Create two pages in separate isolated contexts with different content. + // Create two pages with different content. await newPage.handler( { params: { @@ -250,6 +250,7 @@ describe('pages', () => { context, ); const pageA = context.getSelectedPage(); + const pageAId = context.getPageId(pageA)!; await newPage.handler( { @@ -262,37 +263,32 @@ describe('pages', () => { context, ); const pageB = context.getSelectedPage(); + const pageBId = context.getPageId(pageB)!; // Global selection is now pageB (the last created page). assert.strictEqual(context.getSelectedPage(), pageB); - // resolvePageByContext should return the correct page for each context, + // resolvePageById should return the correct page for each ID, // regardless of which page is globally selected. - assert.strictEqual(context.resolvePageByContext('ctx-a'), pageA); - assert.strictEqual(context.resolvePageByContext('ctx-b'), pageB); + assert.strictEqual(context.resolvePageById(pageAId), pageA); + assert.strictEqual(context.resolvePageById(pageBId), pageB); }); }); - it('falls back to getSelectedPage when no isolatedContext is provided', async () => { + it('falls back to getSelectedPage when no pageId is provided', async () => { await withMcpContext(async (_response, context) => { const selectedPage = context.getSelectedPage(); - assert.strictEqual( - context.resolvePageByContext(undefined), - selectedPage, - ); + assert.strictEqual(context.resolvePageById(undefined), selectedPage); }); }); - it('throws for an unknown context name', async () => { + it('throws for an unknown pageId', async () => { await withMcpContext(async (_response, context) => { - assert.throws( - () => context.resolvePageByContext('nonexistent'), - /No isolated context named "nonexistent" exists/, - ); + assert.throws(() => context.resolvePageById(99999), /No page found/); }); }); - it('navigate_page targets the isolatedContext page, not the global selection', async () => { + it('navigate_page targets the pageId page, not the global selection', async () => { await withMcpContext(async (response, context) => { await newPage.handler( { @@ -305,17 +301,18 @@ describe('pages', () => { context, ); const isolatedPage = context.getSelectedPage(); + const isolatedPageId = context.getPageId(isolatedPage)!; // Switch global selection back to the default page. await selectPage.handler({params: {pageId: 1}}, response, context); assert.notStrictEqual(context.getSelectedPage(), isolatedPage); - // Navigate using isolatedContext; should target the isolated page. + // Navigate using pageId; should target the isolated page. await navigatePage.handler( { params: { url: 'data:text/html,

Navigated

', - isolatedContext: 'nav-ctx', + pageId: isolatedPageId, }, }, response, diff --git a/tests/tools/screenshot.test.ts b/tests/tools/screenshot.test.ts index 1eda6615d..dab541412 100644 --- a/tests/tools/screenshot.test.ts +++ b/tests/tools/screenshot.test.ts @@ -10,7 +10,6 @@ import {tmpdir} from 'node:os'; import {join} from 'node:path'; import {describe, it} from 'node:test'; -import {newPage, selectPage} from '../../src/tools/pages.js'; import {screenshot} from '../../src/tools/screenshot.js'; import {screenshots} from '../snapshot.js'; import {html, withMcpContext} from '../utils.js'; @@ -261,45 +260,5 @@ describe('screenshot', () => { ); }); }); - - it('screenshots the isolatedContext page, not the global selection', async () => { - await withMcpContext(async (response, context) => { - // Set distinct content on the default page. - const defaultPage = context.getSelectedPage(); - await defaultPage.setContent( - html`
`, - ); - - // Create an isolated page with different content. - await newPage.handler( - { - params: { - url: 'data:text/html,
', - isolatedContext: 'screenshot-ctx', - }, - }, - response, - context, - ); - - // Switch global selection back to the default page. - await selectPage.handler({params: {pageId: 1}}, response, context); - assert.strictEqual(context.getSelectedPage(), defaultPage); - - // Take a screenshot using isolatedContext. - const {McpResponse} = await import('../../src/McpResponse.js'); - const screenshotResponse = new McpResponse(); - await screenshot.handler( - {params: {format: 'png', isolatedContext: 'screenshot-ctx'}}, - screenshotResponse, - context, - ); - - // Should have produced an image (basic sanity: it didn't crash and - // returned something from the isolated page, not the default). - assert.equal(screenshotResponse.images.length, 1); - assert.equal(screenshotResponse.images[0].mimeType, 'image/png'); - }); - }); }); }); diff --git a/tests/tools/snapshot.test.ts b/tests/tools/snapshot.test.ts index 0cd530bec..2aa40d8be 100644 --- a/tests/tools/snapshot.test.ts +++ b/tests/tools/snapshot.test.ts @@ -7,7 +7,6 @@ import assert from 'node:assert'; import {describe, it} from 'node:test'; -import {newPage, selectPage} from '../../src/tools/pages.js'; import {takeSnapshot, waitFor} from '../../src/tools/snapshot.js'; import {html, withMcpContext} from '../utils.js'; @@ -181,81 +180,4 @@ describe('snapshot', () => { }); }); }); - - describe('isolatedContext routing', () => { - it('take_snapshot returns content from the isolatedContext page, not the global selection', async () => { - await withMcpContext(async (response, context) => { - // Create an isolated page with unique content. - await newPage.handler( - { - params: { - url: 'data:text/html,

Isolated Snapshot Content

', - isolatedContext: 'snap-ctx', - }, - }, - response, - context, - ); - - // Switch global selection back to the default page. - await selectPage.handler({params: {pageId: 1}}, response, context); - - // Take snapshot using isolatedContext. - const snapshotResponse = new (await import('../../src/McpResponse.js')).McpResponse(); - await takeSnapshot.handler( - {params: {isolatedContext: 'snap-ctx'}}, - snapshotResponse, - context, - ); - - // The snapshot should reflect the isolated page's content. - const result = await snapshotResponse.handle('take_snapshot', context); - const text = result.content - .filter(c => c.type === 'text') - .map(c => (c as {text: string}).text) - .join(''); - assert.ok( - text.includes('Isolated Snapshot Content'), - `Expected snapshot to contain "Isolated Snapshot Content" but got: ${text.slice(0, 200)}`, - ); - }); - }); - - it('wait_for finds text on the isolatedContext page, not the global selection', async () => { - await withMcpContext(async (response, context) => { - // Create an isolated page with target text. - await newPage.handler( - { - params: { - url: 'data:text/html,

Unique Isolated Text

', - isolatedContext: 'wait-ctx', - }, - }, - response, - context, - ); - - // Switch global selection away. - await selectPage.handler({params: {pageId: 1}}, response, context); - - // wait_for should find text on the isolated page. - const waitResponse = new (await import('../../src/McpResponse.js')).McpResponse(); - await waitFor.handler( - { - params: { - text: 'Unique Isolated Text', - isolatedContext: 'wait-ctx', - }, - }, - waitResponse, - context, - ); - - assert.equal( - waitResponse.responseLines[0], - 'Element with text "Unique Isolated Text" found.', - ); - }); - }); - }); }); From 9f42f944bd4b0eee06132b7054f2cfad7b872bbf Mon Sep 17 00:00:00 2001 From: Stanislav Publika <10758542+passtas@users.noreply.github.com> Date: Tue, 24 Feb 2026 11:09:09 +0000 Subject: [PATCH 3/5] refactor: centralize pageId routing via pageScoped annotation and McpPage wrapper - Add pageScoped annotation to ToolDefinition; registerTool() auto-injects pageIdSchema and resolves the page centrally via resolvePageById() - defineTool() wrapper guarantees request.page is always populated, falling back to getSelectedPage() when pageId is omitted - Remove manual ...pageIdSchema spread and resolvePageById() calls from all 15 page-scoped tool handlers - Introduce McpPage class consolidating per-page state (dialog, snapshot, emulation settings, metadata) that was previously scattered across 8 Maps/WeakMaps in McpContext - Store text snapshots per-page on McpPage so parallel agents taking snapshots on different pages no longer clobber each other - Cross-page uid lookup in getElementByUid()/getAXNodeByUid() searches all McpPage instances, enabling uid resolution from any page's snapshot - Update page_id_routing eval to test per-page snapshot isolation and cross-page uid click resolution --- docs/tool-reference.md | 29 +- .../eval_scenarios/page_id_routing_test.ts | 43 ++- src/McpContext.ts | 318 +++++++++++------- src/McpPage.ts | 85 +++++ src/McpResponse.ts | 2 +- src/server.ts | 12 +- src/tools/ToolDefinition.ts | 44 ++- src/tools/emulation.ts | 6 +- src/tools/input.ts | 29 +- src/tools/pages.ts | 25 +- src/tools/performance.ts | 10 +- src/tools/screencast.ts | 6 +- src/tools/screenshot.ts | 6 +- src/tools/script.ts | 8 +- src/tools/snapshot.ts | 13 +- src/types.ts | 39 +++ tests/McpContext.test.ts | 144 ++++++++ tests/tools/pages.test.ts | 246 +++++++++++++- 18 files changed, 837 insertions(+), 228 deletions(-) create mode 100644 src/McpPage.ts create mode 100644 src/types.ts diff --git a/docs/tool-reference.md b/docs/tool-reference.md index eed5288ac..4ac0c7d2a 100644 --- a/docs/tool-reference.md +++ b/docs/tool-reference.md @@ -1,8 +1,8 @@ -# Chrome DevTools MCP Tool Reference (~7267 cl100k_base tokens) +# Chrome DevTools MCP Tool Reference (~7472 cl100k_base tokens) -- **[Input automation](#input-automation)** (8 tools) +- **[Input automation](#input-automation)** (9 tools) - [`click`](#click) - [`drag`](#drag) - [`fill`](#fill) @@ -10,6 +10,7 @@ - [`handle_dialog`](#handle_dialog) - [`hover`](#hover) - [`press_key`](#press_key) + - [`type_text`](#type_text) - [`upload_file`](#upload_file) - **[Navigation automation](#navigation-automation)** (6 tools) - [`close_page`](#close_page) @@ -71,7 +72,6 @@ - **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot - **value** (string) **(required)**: The value to [`fill`](#fill) in - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. -- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -83,7 +83,6 @@ - **elements** (array) **(required)**: Elements from snapshot to [`fill`](#fill) out. - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. -- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -117,7 +116,17 @@ - **key** (string) **(required)**: A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. -- **pageId** (number) _(optional)_: Targets a specific page by ID. + +--- + +### `type_text` + +**Description:** Type text using keyboard into a previously focused input + +**Parameters:** + +- **text** (string) **(required)**: The text to type +- **submitKey** (string) _(optional)_: Optional key to press after typing. E.g., "Enter", "Tab", "Escape" --- @@ -130,7 +139,6 @@ - **filePath** (string) **(required)**: The local path of the file to upload - **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot - **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. -- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -163,7 +171,6 @@ - **handleBeforeUnload** (enum: "accept", "decline") _(optional)_: Whether to auto accept or beforeunload dialogs triggered by this navigation. Default is accept. - **ignoreCache** (boolean) _(optional)_: Whether to ignore cache on reload. - **initScript** (string) _(optional)_: A JavaScript script to be executed on each new document before any other scripts for the next navigation. -- **pageId** (number) _(optional)_: Targets a specific page by ID. - **timeout** (integer) _(optional)_: Maximum wait time in milliseconds. If set to 0, the default timeout will be used. - **type** (enum: "url", "back", "forward", "reload") _(optional)_: Navigate the page by URL, back or forward in history, or reload. - **url** (string) _(optional)_: Target URL (only type=url) @@ -201,7 +208,6 @@ **Parameters:** - **text** (array) **(required)**: Non-empty list of texts. Resolves when any value appears on the page. -- **pageId** (number) _(optional)_: Targets a specific page by ID. - **timeout** (integer) _(optional)_: Maximum wait time in milliseconds. If set to 0, the default timeout will be used. --- @@ -218,7 +224,6 @@ - **cpuThrottlingRate** (number) _(optional)_: Represents the CPU slowdown factor. Set the rate to 1 to disable throttling. If omitted, throttling remains unchanged. - **geolocation** (unknown) _(optional)_: Geolocation to [`emulate`](#emulate). Set to null to clear the geolocation override. - **networkConditions** (enum: "No emulation", "Offline", "Slow 3G", "Fast 3G", "Slow 4G", "Fast 4G") _(optional)_: Throttle network. Set to "No emulation" to disable. If omitted, conditions remain unchanged. -- **pageId** (number) _(optional)_: Targets a specific page by ID. - **userAgent** (unknown) _(optional)_: User agent to [`emulate`](#emulate). Set to null to clear the user agent override. - **viewport** (unknown) _(optional)_: Viewport to [`emulate`](#emulate). Set to null to reset to the default viewport. @@ -232,7 +237,6 @@ - **height** (number) **(required)**: Page height - **width** (number) **(required)**: Page width -- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -258,7 +262,6 @@ - **autoStop** (boolean) **(required)**: Determines if the trace recording should be automatically stopped. - **reload** (boolean) **(required)**: Determines if, once tracing has started, the current selected page should be automatically reloaded. Navigate the page to the right URL using the [`navigate_page`](#navigate_page) tool BEFORE starting the trace if reload or autoStop is set to true. - **filePath** (string) _(optional)_: The absolute file path, or a file path relative to the current working directory, to save the raw trace data. For example, trace.json.gz (compressed) or trace.json (uncompressed). -- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -269,7 +272,6 @@ **Parameters:** - **filePath** (string) _(optional)_: The absolute file path, or a file path relative to the current working directory, to save the raw trace data. For example, trace.json.gz (compressed) or trace.json (uncompressed). -- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -330,7 +332,6 @@ so returned values have to be JSON-serializable. }` - **args** (array) _(optional)_: An optional list of arguments to pass to the function. -- **pageId** (number) _(optional)_: Targets a specific page by ID. --- @@ -366,7 +367,6 @@ so returned values have to be JSON-serializable. - **filePath** (string) _(optional)_: The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response. - **format** (enum: "png", "jpeg", "webp") _(optional)_: Type of format to save the screenshot as. Default is "png" - **fullPage** (boolean) _(optional)_: If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid. -- **pageId** (number) _(optional)_: Targets a specific page by ID. - **quality** (number) _(optional)_: Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format. - **uid** (string) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot. @@ -381,7 +381,6 @@ in the DevTools Elements panel (if any). **Parameters:** - **filePath** (string) _(optional)_: The absolute path, or a path relative to the current working directory, to save the snapshot to instead of attaching it to the response. -- **pageId** (number) _(optional)_: Targets a specific page by ID. - **verbose** (boolean) _(optional)_: Whether to include all possible information available in the full a11y tree. Default is false. --- diff --git a/scripts/eval_scenarios/page_id_routing_test.ts b/scripts/eval_scenarios/page_id_routing_test.ts index 1ee27b37d..99bde60c9 100644 --- a/scripts/eval_scenarios/page_id_routing_test.ts +++ b/scripts/eval_scenarios/page_id_routing_test.ts @@ -9,32 +9,31 @@ import assert from 'node:assert'; import type {TestScenario} from '../eval_gemini.ts'; export const scenario: TestScenario = { - prompt: `Open three new pages in isolated contexts: -- Page A at data:text/html,

Page A

-- Page B at data:text/html,

Page B

-- Page C at data:text/html,

Page C

-Then take screenshots of all three pages in parallel.`, - maxTurns: 8, + prompt: `Open two new pages in isolated contexts: +- Page A (isolatedContext "contextA") at data:text/html, +- Page B (isolatedContext "contextB") at data:text/html, +Then take a snapshot of Page A, take a snapshot of Page B, and then click the button on Page A.`, + maxTurns: 12, expectations: calls => { - // Exactly 3 screenshot calls. - const screenshots = calls.filter(c => c.name === 'take_screenshot'); - assert.strictEqual(screenshots.length, 3, 'Should take 3 screenshots'); - - // Each screenshot must carry a numeric pageId. - for (const ss of screenshots) { + // Should have 2 new_page calls with isolatedContext. + const newPages = calls.filter(c => c.name === 'new_page'); + assert.strictEqual(newPages.length, 2, 'Should open 2 pages'); + for (const np of newPages) { assert.strictEqual( - typeof ss.args.pageId, - 'number', - 'Screenshot should use pageId', + typeof np.args.isolatedContext, + 'string', + 'new_page should use isolatedContext', ); } - // All pageIds should be distinct (one per page). - const pageIds = new Set(screenshots.map(s => s.args.pageId)); - assert.strictEqual( - pageIds.size, - 3, - 'Each screenshot should target a different page', - ); + // Should have at least 2 take_snapshot calls (one per page). + // The model may use pageId directly or select_page before each snapshot. + const snapshots = calls.filter(c => c.name === 'take_snapshot'); + assert.ok(snapshots.length >= 2, 'Should take at least 2 snapshots'); + + // Should have a click call (resolving uid from Page A's snapshot + // even though Page B was snapshotted after). + const clicks = calls.filter(c => c.name === 'click'); + assert.ok(clicks.length >= 1, 'Should click the button on Page A'); }, }; diff --git a/src/McpContext.ts b/src/McpContext.ts index 3efa93e36..90b2e5a09 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -14,6 +14,7 @@ import { UniverseManager, urlsEqual, } from './DevtoolsUtils.js'; +import {McpPage} from './McpPage.js'; import type {ListenerMap, UncaughtError} from './PageCollector.js'; import {NetworkCollector, ConsoleCollector} from './PageCollector.js'; import type {DevTools} from './third_party/index.js'; @@ -38,18 +39,24 @@ import {takeSnapshot} from './tools/snapshot.js'; import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js'; import type {Context, DevToolsData} from './tools/ToolDefinition.js'; import type {TraceResult} from './trace-processing/parse.js'; +import type { + EmulationSettings, + GeolocationOptions, + TextSnapshot, + TextSnapshotNode, +} from './types.js'; import { ExtensionRegistry, type InstalledExtension, } from './utils/ExtensionRegistry.js'; import {WaitForHelper} from './WaitForHelper.js'; -export interface TextSnapshotNode extends SerializedAXNode { - id: string; - backendNodeId?: number; - loaderId?: string; - children: TextSnapshotNode[]; -} +export type { + EmulationSettings, + GeolocationOptions, + TextSnapshot, + TextSnapshotNode, +} from './types.js'; export interface ExtensionServiceWorker { url: string; @@ -57,31 +64,6 @@ export interface ExtensionServiceWorker { id: string; } -export interface GeolocationOptions { - latitude: number; - longitude: number; -} - -export interface TextSnapshot { - root: TextSnapshotNode; - idToNode: Map; - snapshotId: string; - selectedElementUid?: string; - // It might happen that there is a selected element, but it is not part of the - // snapshot. This flag indicates if there is any selected element. - hasSelectedElement: boolean; - verbose: boolean; -} - -interface EmulationSettings { - networkConditions?: string | null; - cpuThrottlingRate?: number | null; - geolocation?: GeolocationOptions | null; - userAgent?: string | null; - colorScheme?: 'dark' | 'light' | null; - viewport?: Viewport | null; -} - interface McpContextOptions { // Whether the DevTools windows are exposed as pages for debugging of DevTools. experimentalDevToolsDebugging: boolean; @@ -129,18 +111,14 @@ export class McpContext implements Context { // Maps LLM-provided isolatedContext name → Puppeteer BrowserContext. #isolatedContexts = new Map(); - // Reverse lookup: Page → isolatedContext name (for snapshot labeling). - // WeakMap so closed pages are garbage-collected automatically. - #pageToIsolatedContextName = new WeakMap(); // Auto-generated name counter for when no name is provided. #nextIsolatedContextId = 1; #pages: Page[] = []; #extensionServiceWorkers: ExtensionServiceWorker[] = []; - #pageToDevToolsPage = new Map(); + #mcpPages = new Map(); #selectedPage?: Page; - #textSnapshot: TextSnapshot | null = null; #networkCollector: NetworkCollector; #consoleCollector: ConsoleCollector; #devtoolsUniverseManager: UniverseManager; @@ -149,10 +127,8 @@ export class McpContext implements Context { #isRunningTrace = false; #screenRecorderData: {recorder: ScreenRecorder; filePath: string} | null = null; - #emulationSettingsMap = new WeakMap(); - #dialog?: Dialog; + #focusedPagePerContext = new Map(); - #pageIdMap = new WeakMap(); #nextPageId = 1; #extensionServiceWorkerMap = new WeakMap(); @@ -164,8 +140,6 @@ export class McpContext implements Context { #locatorClass: typeof Locator; #options: McpContextOptions; - #uniqueBackendNodeIdToMcpId = new Map(); - private constructor( browser: Browser, logger: Debugger, @@ -207,6 +181,10 @@ export class McpContext implements Context { this.#networkCollector.dispose(); this.#consoleCollector.dispose(); this.#devtoolsUniverseManager.dispose(); + for (const mcpPage of this.#mcpPages.values()) { + mcpPage.dispose(); + } + this.#mcpPages.clear(); // Isolated contexts are intentionally not closed here. // Either the entire browser will be closed or we disconnect // without destroying browser state. @@ -242,24 +220,32 @@ export class McpContext implements Context { return this.#networkCollector.getIdForResource(request); } - resolveCdpElementId(cdpBackendNodeId: number): string | undefined { + resolveCdpElementId( + cdpBackendNodeId: number, + page?: Page, + ): string | undefined { if (!cdpBackendNodeId) { this.logger('no cdpBackendNodeId'); return; } - if (this.#textSnapshot === null) { + const snapshots = page + ? [this.#mcpPages.get(page)?.textSnapshot].filter(Boolean) + : [...this.#mcpPages.values()].map(mp => mp.textSnapshot).filter(Boolean); + if (!snapshots.length) { this.logger('no text snapshot'); return; } // TODO: index by backendNodeId instead. - const queue = [this.#textSnapshot.root]; - while (queue.length) { - const current = queue.pop()!; - if (current.backendNodeId === cdpBackendNodeId) { - return current.id; - } - for (const child of current.children) { - queue.push(child); + for (const snapshot of snapshots) { + const queue = [snapshot!.root]; + while (queue.length) { + const current = queue.pop()!; + if (current.backendNodeId === cdpBackendNodeId) { + return current.id; + } + for (const child of current.children) { + queue.push(child); + } } } return; @@ -305,7 +291,6 @@ export class McpContext implements Context { this.#isolatedContexts.set(isolatedContextName, ctx); } page = await ctx.newPage(); - this.#pageToIsolatedContextName.set(page, isolatedContextName); } else { page = await this.browser.newPage({background}); } @@ -320,8 +305,16 @@ export class McpContext implements Context { throw new Error(CLOSE_PAGE_ERROR); } const page = this.getPageById(pageId); + const mcpPage = this.#mcpPages.get(page); + if (mcpPage) { + mcpPage.dispose(); + this.#mcpPages.delete(page); + } + const ctx = page.browserContext(); + if (this.#focusedPagePerContext.get(ctx) === page) { + this.#focusedPagePerContext.delete(ctx); + } await page.close({runBeforeUnload: false}); - this.#pageToIsolatedContextName.delete(page); } getNetworkRequestById(reqid: number): HTTPRequest { @@ -340,8 +333,8 @@ export class McpContext implements Context { targetPage?: Page, ): Promise { const page = targetPage ?? this.getSelectedPage(); - const currentSettings = this.#emulationSettingsMap.get(page) ?? {}; - const newSettings: EmulationSettings = {...currentSettings}; + const mcpPage = this.#getMcpPage(page); + const newSettings: EmulationSettings = {...mcpPage.emulationSettings}; let timeoutsNeedUpdate = false; if (options.networkConditions !== undefined) { @@ -432,11 +425,9 @@ export class McpContext implements Context { } } - if (Object.keys(newSettings).length) { - this.#emulationSettingsMap.set(page, newSettings); - } else { - this.#emulationSettingsMap.delete(page); - } + mcpPage.emulationSettings = Object.keys(newSettings).length + ? newSettings + : {}; if (timeoutsNeedUpdate) { this.#updateSelectedPageTimeouts(); @@ -444,33 +435,27 @@ export class McpContext implements Context { } getNetworkConditions(): string | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.networkConditions ?? null; + return this.#getSelectedMcpPage().networkConditions; } getCpuThrottlingRate(): number { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.cpuThrottlingRate ?? 1; + return this.#getSelectedMcpPage().cpuThrottlingRate; } getGeolocation(): GeolocationOptions | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.geolocation ?? null; + return this.#getSelectedMcpPage().geolocation; } getViewport(): Viewport | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.viewport ?? null; + return this.#getSelectedMcpPage().viewport; } getUserAgent(): string | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.userAgent ?? null; + return this.#getSelectedMcpPage().userAgent; } getColorScheme(): 'dark' | 'light' | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.colorScheme ?? null; + return this.#getSelectedMcpPage().colorScheme; } setIsRunningPerformanceTrace(x: boolean): void { @@ -495,12 +480,19 @@ export class McpContext implements Context { return this.#options.performanceCrux; } - getDialog(): Dialog | undefined { - return this.#dialog; + getDialog(page?: Page): Dialog | undefined { + const targetPage = page ?? this.#selectedPage; + if (!targetPage) { + return undefined; + } + return this.#mcpPages.get(targetPage)?.dialog; } - clearDialog(): void { - this.#dialog = undefined; + clearDialog(page?: Page): void { + const targetPage = page ?? this.#selectedPage; + if (targetPage) { + this.#mcpPages.get(targetPage)?.clearDialog(); + } } getSelectedPage(): Page { @@ -524,7 +516,7 @@ export class McpContext implements Context { } getPageById(pageId: number): Page { - const page = this.#pages.find(p => this.#pageIdMap.get(p) === pageId); + const page = this.#pages.find(p => this.#mcpPages.get(p)?.id === pageId); if (!page) { throw new Error('No page found'); } @@ -532,27 +524,35 @@ export class McpContext implements Context { } getPageId(page: Page): number | undefined { - return this.#pageIdMap.get(page); + return this.#mcpPages.get(page)?.id; } - #dialogHandler = (dialog: Dialog): void => { - this.#dialog = dialog; - }; + #getMcpPage(page: Page): McpPage { + const mcpPage = this.#mcpPages.get(page); + if (!mcpPage) { + throw new Error('No McpPage found for the given page.'); + } + return mcpPage; + } + + #getSelectedMcpPage(): McpPage { + return this.#getMcpPage(this.getSelectedPage()); + } isPageSelected(page: Page): boolean { return this.#selectedPage === page; } selectPage(newPage: Page): void { - const oldPage = this.#selectedPage; - if (oldPage) { - oldPage.off('dialog', this.#dialogHandler); - void oldPage.emulateFocusedPage(false).catch(error => { + const ctx = newPage.browserContext(); + const oldFocused = this.#focusedPagePerContext.get(ctx); + if (oldFocused && oldFocused !== newPage && !oldFocused.isClosed()) { + void oldFocused.emulateFocusedPage(false).catch(error => { this.logger('Error turning off focused page emulation', error); }); } + this.#focusedPagePerContext.set(ctx, newPage); this.#selectedPage = newPage; - newPage.on('dialog', this.#dialogHandler); this.#updateSelectedPageTimeouts(); void newPage.emulateFocusedPage(true).catch(error => { this.logger('Error turning on focused page emulation', error); @@ -579,32 +579,74 @@ export class McpContext implements Context { return page.getDefaultNavigationTimeout(); } + // Linear scan over per-page snapshots. The page count is small (typically + // 2-10) so a reverse index isn't worthwhile given the uid-reuse lifecycle + // complexity it would introduce. getAXNodeByUid(uid: string) { - return this.#textSnapshot?.idToNode.get(uid); + for (const mcpPage of this.#mcpPages.values()) { + const node = mcpPage.textSnapshot?.idToNode.get(uid); + if (node) { + return node; + } + } + return undefined; + } + + assertUidOnSelectedPage(uid: string): void { + for (const [page, mcpPage] of this.#mcpPages.entries()) { + if (mcpPage.textSnapshot?.idToNode.has(uid)) { + const ctx = page.browserContext(); + const contextSelectedPage = this.#focusedPagePerContext.get(ctx); + if (contextSelectedPage !== page) { + const targetId = mcpPage.id; + const selectedId = contextSelectedPage + ? this.#mcpPages.get(contextSelectedPage)?.id + : this.#getSelectedMcpPage().id; + throw new Error( + `Element uid "${uid}" belongs to page ${targetId}, but page ${selectedId} is currently selected. ` + + `Call select_page with pageId ${targetId} first.`, + ); + } + // Align global #selectedPage for waitForEventsAfterAction etc. + if (this.#selectedPage !== page) { + this.#selectedPage = page; + } + return; + } + } + throw new Error('No such element found in any snapshot.'); } async getElementByUid(uid: string): Promise> { - if (!this.#textSnapshot?.idToNode.size) { + let anySnapshot = false; + // Search across all per-page snapshots for the uid. + for (const mcpPage of this.#mcpPages.values()) { + if (!mcpPage.textSnapshot) { + continue; + } + anySnapshot = true; + const node = mcpPage.textSnapshot.idToNode.get(uid); + if (node) { + const message = `Element with uid ${uid} no longer exists on the page.`; + try { + const handle = await node.elementHandle(); + if (!handle) { + throw new Error(message); + } + return handle; + } catch (error) { + throw new Error(message, { + cause: error, + }); + } + } + } + if (!anySnapshot) { throw new Error( `No snapshot found. Use ${takeSnapshot.name} to capture one.`, ); } - const node = this.#textSnapshot?.idToNode.get(uid); - if (!node) { - throw new Error('No such element found in the snapshot.'); - } - const message = `Element with uid ${uid} no longer exists on the page.`; - try { - const handle = await node.elementHandle(); - if (!handle) { - throw new Error(message); - } - return handle; - } catch (error) { - throw new Error(message, { - cause: error, - }); - } + throw new Error('No such element found in any snapshot.'); } /** @@ -643,12 +685,15 @@ export class McpContext implements Context { } async createPagesSnapshot(): Promise { - const allPages = await this.#getAllPages(); + const {pages: allPages, isolatedContextNames} = await this.#getAllPages(); for (const page of allPages) { - if (!this.#pageIdMap.has(page)) { - this.#pageIdMap.set(page, this.#nextPageId++); + let mcpPage = this.#mcpPages.get(page); + if (!mcpPage) { + mcpPage = new McpPage(page, this.#nextPageId++); + this.#mcpPages.set(page, mcpPage); } + mcpPage.isolatedContextName = isolatedContextNames.get(page); } this.#pages = allPages.filter(page => { @@ -670,7 +715,10 @@ export class McpContext implements Context { return this.#pages; } - async #getAllPages(): Promise { + async #getAllPages(): Promise<{ + pages: Page[]; + isolatedContextNames: Map; + }> { const defaultCtx = this.browser.defaultBrowserContext(); const allPages = await this.browser.pages( this.#options.experimentalIncludeAllPages, @@ -693,22 +741,26 @@ export class McpContext implements Context { } } - // Use page.browserContext() to determine each page's context membership. + // Map each page to its isolated context name (if any). + const isolatedContextNames = new Map(); for (const page of allPages) { const ctx = page.browserContext(); const name = contextToName.get(ctx); if (name) { - this.#pageToIsolatedContextName.set(page, name); + isolatedContextNames.set(page, name); } } - return allPages; + return {pages: allPages, isolatedContextNames}; } async detectOpenDevToolsWindows() { this.logger('Detecting open DevTools windows'); - const pages = await this.#getAllPages(); - this.#pageToDevToolsPage = new Map(); + const {pages} = await this.#getAllPages(); + // Clear all devToolsPage references before re-detecting. + for (const mcpPage of this.#mcpPages.values()) { + mcpPage.devToolsPage = undefined; + } for (const devToolsPage of pages) { if (devToolsPage.url().startsWith('devtools://')) { try { @@ -725,7 +777,10 @@ export class McpContext implements Context { // TODO: lookup without a loop. for (const page of this.#pages) { if (urlsEqual(page.url(), urlLike)) { - this.#pageToDevToolsPage.set(page, devToolsPage); + const mcpPage = this.#mcpPages.get(page); + if (mcpPage) { + mcpPage.devToolsPage = devToolsPage; + } } } } catch (error) { @@ -750,11 +805,11 @@ export class McpContext implements Context { } getIsolatedContextName(page: Page): string | undefined { - return this.#pageToIsolatedContextName.get(page); + return this.#mcpPages.get(page)?.isolatedContextName; } getDevToolsPage(page: Page): Page | undefined { - return this.#pageToDevToolsPage.get(page); + return this.#mcpPages.get(page)?.devToolsPage; } async getDevToolsData(): Promise { @@ -800,6 +855,7 @@ export class McpContext implements Context { targetPage?: Page, ): Promise { const page = targetPage ?? this.getSelectedPage(); + const mcpPage = this.#getMcpPage(page); const rootNode = await page.accessibility.snapshot({ includeIframes: true, interestingOnly: !verbose, @@ -808,6 +864,8 @@ export class McpContext implements Context { return; } + const {uniqueBackendNodeIdToMcpId} = mcpPage; + const snapshotId = this.#nextSnapshotId++; // Iterate through the whole accessibility node tree and assign node ids that // will be used for the tree serialization and mapping ids back to nodes. @@ -818,13 +876,13 @@ export class McpContext implements Context { let id = ''; // @ts-expect-error untyped loaderId & backendNodeId. const uniqueBackendId = `${node.loaderId}_${node.backendNodeId}`; - if (this.#uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) { + if (uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) { // Re-use MCP exposed ID if the uniqueId is the same. - id = this.#uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!; + id = uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!; } else { // Only generate a new ID if we have not seen the node before. id = `${snapshotId}_${idCounter++}`; - this.#uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id); + uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id); } seenUniqueIds.add(uniqueBackendId); @@ -850,31 +908,37 @@ export class McpContext implements Context { }; const rootNodeWithId = assignIds(rootNode); - this.#textSnapshot = { + const snapshot: TextSnapshot = { root: rootNodeWithId, snapshotId: String(snapshotId), idToNode, hasSelectedElement: false, verbose, }; + mcpPage.textSnapshot = snapshot; const data = devtoolsData ?? (await this.getDevToolsData()); if (data?.cdpBackendNodeId) { - this.#textSnapshot.hasSelectedElement = true; - this.#textSnapshot.selectedElementUid = this.resolveCdpElementId( + snapshot.hasSelectedElement = true; + snapshot.selectedElementUid = this.resolveCdpElementId( data?.cdpBackendNodeId, + page, ); } // Clean up unique IDs that we did not see anymore. - for (const key of this.#uniqueBackendNodeIdToMcpId.keys()) { + for (const key of uniqueBackendNodeIdToMcpId.keys()) { if (!seenUniqueIds.has(key)) { - this.#uniqueBackendNodeIdToMcpId.delete(key); + uniqueBackendNodeIdToMcpId.delete(key); } } } - getTextSnapshot(): TextSnapshot | null { - return this.#textSnapshot; + getTextSnapshot(targetPage?: Page): TextSnapshot | null { + const page = targetPage ?? this.#selectedPage; + if (!page) { + return null; + } + return this.#mcpPages.get(page)?.textSnapshot ?? null; } async saveTemporaryFile( @@ -988,7 +1052,7 @@ export class McpContext implements Context { }, } as ListenerMap; }); - const pages = await this.#getAllPages(); + const {pages} = await this.#getAllPages(); await this.#networkCollector.init(pages); } diff --git a/src/McpPage.ts b/src/McpPage.ts new file mode 100644 index 000000000..51a30448f --- /dev/null +++ b/src/McpPage.ts @@ -0,0 +1,85 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type {Dialog, Page, Viewport} from './third_party/index.js'; +import type { + EmulationSettings, + GeolocationOptions, + TextSnapshot, +} from './types.js'; + +/** + * Per-page state wrapper. Consolidates dialog, snapshot, emulation, + * and metadata that were previously scattered across Maps in McpContext. + * + * Internal class consumed only by McpContext. Fields are public for direct + * read/write access. The dialog field is private because it requires an + * event listener lifecycle managed by the constructor/dispose pair. + */ +export class McpPage { + readonly page: Page; + readonly id: number; + + // Snapshot + textSnapshot: TextSnapshot | null = null; + uniqueBackendNodeIdToMcpId = new Map(); + + // Emulation + emulationSettings: EmulationSettings = {}; + + // Metadata + isolatedContextName?: string; + devToolsPage?: Page; + + // Dialog + #dialog?: Dialog; + #dialogHandler: (dialog: Dialog) => void; + + constructor(page: Page, id: number) { + this.page = page; + this.id = id; + this.#dialogHandler = (dialog: Dialog): void => { + this.#dialog = dialog; + }; + page.on('dialog', this.#dialogHandler); + } + + get dialog(): Dialog | undefined { + return this.#dialog; + } + + clearDialog(): void { + this.#dialog = undefined; + } + + get networkConditions(): string | null { + return this.emulationSettings.networkConditions ?? null; + } + + get cpuThrottlingRate(): number { + return this.emulationSettings.cpuThrottlingRate ?? 1; + } + + get geolocation(): GeolocationOptions | null { + return this.emulationSettings.geolocation ?? null; + } + + get viewport(): Viewport | null { + return this.emulationSettings.viewport ?? null; + } + + get userAgent(): string | null { + return this.emulationSettings.userAgent ?? null; + } + + get colorScheme(): 'dark' | 'light' | null { + return this.emulationSettings.colorScheme ?? null; + } + + dispose(): void { + this.page.off('dialog', this.#dialogHandler); + } +} diff --git a/src/McpResponse.ts b/src/McpResponse.ts index aac01db85..33f631d1e 100644 --- a/src/McpResponse.ts +++ b/src/McpResponse.ts @@ -255,7 +255,7 @@ export class McpResponse implements Response { this.#devToolsData, this.#snapshotParams.page, ); - const textSnapshot = context.getTextSnapshot(); + const textSnapshot = context.getTextSnapshot(this.#snapshotParams.page); if (textSnapshot) { const formatter = new SnapshotFormatter(textSnapshot); if (this.#snapshotParams.filePath) { diff --git a/src/server.ts b/src/server.ts index 42d491800..0d9fc2d63 100644 --- a/src/server.ts +++ b/src/server.ts @@ -24,6 +24,7 @@ import { } from './third_party/index.js'; import {ToolCategory} from './tools/categories.js'; import type {ToolDefinition} from './tools/ToolDefinition.js'; +import {pageIdSchema} from './tools/ToolDefinition.js'; import {createTools} from './tools/tools.js'; import {VERSION} from './version.js'; @@ -149,11 +150,15 @@ export async function createMcpServer( ) { return; } + const schema = tool.annotations.pageScoped + ? {...tool.schema, ...pageIdSchema} + : tool.schema; + server.registerTool( tool.name, { description: tool.description, - inputSchema: tool.schema, + inputSchema: schema, annotations: tool.annotations, }, async (params): Promise => { @@ -168,10 +173,13 @@ export async function createMcpServer( const response = serverArgs.slim ? new SlimMcpResponse(serverArgs) : new McpResponse(serverArgs); - + const page = tool.annotations.pageScoped + ? context.resolvePageById(params.pageId as number | undefined) + : undefined; await tool.handler( { params, + page, }, response, context, diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index d312add6e..1be979cb5 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -5,7 +5,6 @@ */ import type {ParsedArguments} from '../cli.js'; -import type {TextSnapshotNode, GeolocationOptions} from '../McpContext.js'; import {zod} from '../third_party/index.js'; import type { Dialog, @@ -15,6 +14,7 @@ import type { Viewport, } from '../third_party/index.js'; import type {InsightName, TraceResult} from '../trace-processing/parse.js'; +import type {TextSnapshotNode, GeolocationOptions} from '../types.js'; import type {InstalledExtension} from '../utils/ExtensionRegistry.js'; import type {PaginationOptions} from '../utils/types.js'; @@ -33,6 +33,12 @@ export interface ToolDefinition< */ readOnlyHint: boolean; conditions?: string[]; + /** + * If true, the tool operates on a specific page. + * The `pageId` schema field is auto-injected and the resolved + * page is provided via `request.page`. + */ + pageScoped?: boolean; }; schema: Schema; handler: ( @@ -44,6 +50,8 @@ export interface ToolDefinition< export interface Request { params: zod.objectOutputType; + /** Populated centrally for tools with `pageScoped: true`. */ + page?: Page; } export interface ImageContentData { @@ -110,14 +118,15 @@ export type Context = Readonly<{ storeTraceRecording(result: TraceResult): void; getSelectedPage(): Page; resolvePageById(pageId?: number): Page; - getDialog(): Dialog | undefined; - clearDialog(): void; + getDialog(page?: Page): Dialog | undefined; + clearDialog(page?: Page): void; getPageById(pageId: number): Page; newPage(background?: boolean, isolatedContextName?: string): Promise; closePage(pageId: number): Promise; selectPage(page: Page): void; getElementByUid(uid: string): Promise>; getAXNodeByUid(uid: string): TextSnapshotNode | undefined; + assertUidOnSelectedPage(uid: string): void; emulate( options: { networkConditions?: string | null; @@ -160,7 +169,10 @@ export type Context = Readonly<{ /** * Returns a reqid for a cdpRequestId. */ - resolveCdpElementId(cdpBackendNodeId: number): string | undefined; + resolveCdpElementId( + cdpBackendNodeId: number, + page?: Page, + ): string | undefined; getScreenRecorder(): {recorder: ScreenRecorder; filePath: string} | null; setScreenRecorder( data: {recorder: ScreenRecorder; filePath: string} | null, @@ -190,9 +202,33 @@ export function defineTool< | ToolDefinition | ((args?: Args) => ToolDefinition), ) { + if (typeof definition === 'function') { + const factory = definition; + return (args: Args) => { + const tool = factory(args); + wrapPageScopedHandler(tool); + return tool; + }; + } + wrapPageScopedHandler(definition); return definition; } +function wrapPageScopedHandler( + definition: ToolDefinition, +) { + if (definition.annotations.pageScoped) { + const originalHandler = definition.handler; + definition.handler = async (request, response, context) => { + // In production, main.ts resolves request.page centrally before calling + // the handler. This fallback exists for tests that invoke handlers + // directly without going through main.ts. + request.page ??= context.getSelectedPage(); + return originalHandler(request, response, context); + }; + } +} + export const CLOSE_PAGE_ERROR = 'The last open page cannot be closed. It is fine to keep it open.'; diff --git a/src/tools/emulation.ts b/src/tools/emulation.ts index fefe95ef1..b05221f07 100644 --- a/src/tools/emulation.ts +++ b/src/tools/emulation.ts @@ -8,7 +8,7 @@ import {zod, PredefinedNetworkConditions} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, pageIdSchema} from './ToolDefinition.js'; +import {defineTool} from './ToolDefinition.js'; const throttlingOptions: [string, ...string[]] = [ 'No emulation', @@ -22,9 +22,9 @@ export const emulate = defineTool({ annotations: { category: ToolCategory.EMULATION, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, networkConditions: zod .enum(throttlingOptions) .optional() @@ -105,7 +105,7 @@ export const emulate = defineTool({ ), }, handler: async (request, _response, context) => { - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; await context.emulate(request.params, page); }, }); diff --git a/src/tools/input.ts b/src/tools/input.ts index c8388a70f..1ddb840e8 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -11,7 +11,7 @@ import type {ElementHandle, KeyInput, Page} from '../third_party/index.js'; import {parseKey} from '../utils/keyboard.js'; import {ToolCategory} from './categories.js'; -import {defineTool, pageIdSchema} from './ToolDefinition.js'; +import {defineTool} from './ToolDefinition.js'; const dblClickSchema = zod .boolean() @@ -58,6 +58,7 @@ export const click = defineTool({ }, handler: async (request, response, context) => { const uid = request.params.uid; + context.assertUidOnSelectedPage(uid); const handle = await context.getElementByUid(uid); try { await context.waitForEventsAfterAction(async () => { @@ -88,16 +89,16 @@ export const clickAt = defineTool({ category: ToolCategory.INPUT, readOnlyHint: false, conditions: ['computerVision'], + pageScoped: true, }, schema: { - ...pageIdSchema, x: zod.number().describe('The x coordinate'), y: zod.number().describe('The y coordinate'), dblClick: dblClickSchema, includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; await context.waitForEventsAfterAction(async () => { await page.mouse.click(request.params.x, request.params.y, { clickCount: request.params.dblClick ? 2 : 1, @@ -131,6 +132,7 @@ export const hover = defineTool({ }, handler: async (request, response, context) => { const uid = request.params.uid; + context.assertUidOnSelectedPage(uid); const handle = await context.getElementByUid(uid); try { await context.waitForEventsAfterAction(async () => { @@ -223,9 +225,9 @@ export const fill = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, uid: zod .string() .describe( @@ -235,7 +237,7 @@ export const fill = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; await context.waitForEventsAfterAction(async () => { await fillFormElement( request.params.uid, @@ -289,6 +291,7 @@ export const drag = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { + context.assertUidOnSelectedPage(request.params.from_uid); const fromHandle = await context.getElementByUid(request.params.from_uid); const toHandle = await context.getElementByUid(request.params.to_uid); try { @@ -314,9 +317,9 @@ export const fillForm = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, elements: zod .array( zod.object({ @@ -328,7 +331,7 @@ export const fillForm = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; for (const element of request.params.elements) { await context.waitForEventsAfterAction(async () => { await fillFormElement( @@ -352,9 +355,9 @@ export const uploadFile = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, uid: zod .string() .describe( @@ -376,9 +379,8 @@ export const uploadFile = defineTool({ // a type=file element. In this case, we want to default to // Page.waitForFileChooser() and upload the file this way. try { - const page = context.resolvePageById(request.params.pageId); const [fileChooser] = await Promise.all([ - page.waitForFileChooser({timeout: 3000}), + request.page!.waitForFileChooser({timeout: 3000}), handle.asLocator().click(), ]); await fileChooser.accept([filePath]); @@ -389,8 +391,7 @@ export const uploadFile = defineTool({ } } if (request.params.includeSnapshot) { - const page = context.resolvePageById(request.params.pageId); - response.includeSnapshot({page}); + response.includeSnapshot({page: request.page!}); } response.appendResponseLine(`File uploaded from ${filePath}.`); } finally { @@ -405,9 +406,9 @@ export const pressKey = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, key: zod .string() .describe( @@ -416,7 +417,7 @@ export const pressKey = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; const tokens = parseKey(request.params.key); const [key, ...modifiers] = tokens; diff --git a/src/tools/pages.ts b/src/tools/pages.ts index b8f6927c3..780f72f36 100644 --- a/src/tools/pages.ts +++ b/src/tools/pages.ts @@ -9,12 +9,7 @@ import type {Dialog} from '../third_party/index.js'; import {zod} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import { - CLOSE_PAGE_ERROR, - defineTool, - pageIdSchema, - timeoutSchema, -} from './ToolDefinition.js'; +import {CLOSE_PAGE_ERROR, defineTool, timeoutSchema} from './ToolDefinition.js'; export const listPages = defineTool(args => { return { @@ -135,9 +130,9 @@ export const navigatePage = defineTool({ annotations: { category: ToolCategory.NAVIGATION, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, type: zod .enum(['url', 'back', 'forward', 'reload']) .optional() @@ -164,7 +159,7 @@ export const navigatePage = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; const options = { timeout: request.params.timeout, }; @@ -188,7 +183,7 @@ export const navigatePage = defineTool({ void dialog.dismiss(); } // We are not going to report the dialog like regular dialogs. - context.clearDialog(); + context.clearDialog(page); } }; @@ -285,14 +280,14 @@ export const resizePage = defineTool({ annotations: { category: ToolCategory.EMULATION, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, width: zod.number().describe('Page width'), height: zod.number().describe('Page height'), }, - handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + handler: async (request, response, _context) => { + const page = request.page!; try { const browser = page.browser(); @@ -325,6 +320,7 @@ export const handleDialog = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { action: zod @@ -336,7 +332,8 @@ export const handleDialog = defineTool({ .describe('Optional prompt text to enter into the dialog.'), }, handler: async (request, response, context) => { - const dialog = context.getDialog(); + const page = request.page!; + const dialog = context.getDialog(page); if (!dialog) { throw new Error('No open dialog found'); } @@ -364,7 +361,7 @@ export const handleDialog = defineTool({ } } - context.clearDialog(); + context.clearDialog(page); response.setIncludePages(true); }, }); diff --git a/src/tools/performance.ts b/src/tools/performance.ts index 1183f2f40..dd5f45e03 100644 --- a/src/tools/performance.ts +++ b/src/tools/performance.ts @@ -17,7 +17,7 @@ import { import {ToolCategory} from './categories.js'; import type {Context, Response} from './ToolDefinition.js'; -import {defineTool, pageIdSchema} from './ToolDefinition.js'; +import {defineTool} from './ToolDefinition.js'; const filePathSchema = zod .string() @@ -32,9 +32,9 @@ export const startTrace = defineTool({ annotations: { category: ToolCategory.PERFORMANCE, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, reload: zod .boolean() .describe( @@ -56,7 +56,7 @@ export const startTrace = defineTool({ } context.setIsRunningPerformanceTrace(true); - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; const pageUrlForTracing = page.url(); if (request.params.reload) { @@ -120,16 +120,16 @@ export const stopTrace = defineTool({ annotations: { category: ToolCategory.PERFORMANCE, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, filePath: filePathSchema, }, handler: async (request, response, context) => { if (!context.isRunningPerformanceTrace()) { return; } - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; await stopTracingAndAppendOutput( page, response, diff --git a/src/tools/screencast.ts b/src/tools/screencast.ts index 7d05ab455..96626abf3 100644 --- a/src/tools/screencast.ts +++ b/src/tools/screencast.ts @@ -12,7 +12,7 @@ import {zod} from '../third_party/index.js'; import type {ScreenRecorder} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, pageIdSchema} from './ToolDefinition.js'; +import {defineTool} from './ToolDefinition.js'; async function generateTempFilePath(): Promise { const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'chrome-devtools-mcp-')); @@ -26,10 +26,10 @@ export const startScreencast = defineTool({ annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: false, + pageScoped: true, conditions: ['screencast'], }, schema: { - ...pageIdSchema, path: zod .string() .optional() @@ -48,7 +48,7 @@ export const startScreencast = defineTool({ const filePath = request.params.path ?? (await generateTempFilePath()); const resolvedPath = path.resolve(filePath); - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; let recorder: ScreenRecorder; try { diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 3894853af..2064b7cb4 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -8,7 +8,7 @@ import {zod} from '../third_party/index.js'; import type {ElementHandle, Page} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, pageIdSchema} from './ToolDefinition.js'; +import {defineTool} from './ToolDefinition.js'; export const screenshot = defineTool({ name: 'take_screenshot', @@ -17,9 +17,9 @@ export const screenshot = defineTool({ category: ToolCategory.DEBUGGING, // Not read-only due to filePath param. readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, format: zod .enum(['png', 'jpeg', 'webp']) .default('png') @@ -60,7 +60,7 @@ export const screenshot = defineTool({ if (request.params.uid) { pageOrHandle = await context.getElementByUid(request.params.uid); } else { - pageOrHandle = context.resolvePageById(request.params.pageId); + pageOrHandle = request.page!; } const format = request.params.format; diff --git a/src/tools/script.ts b/src/tools/script.ts index d88c1aa30..756886a34 100644 --- a/src/tools/script.ts +++ b/src/tools/script.ts @@ -8,7 +8,7 @@ import {zod} from '../third_party/index.js'; import type {Frame, JSHandle, Page} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, pageIdSchema} from './ToolDefinition.js'; +import {defineTool} from './ToolDefinition.js'; export const evaluateScript = defineTool({ name: 'evaluate_script', @@ -17,9 +17,9 @@ so returned values have to be JSON-serializable.`, annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, function: zod.string().describe( `A JavaScript function declaration to be executed by the tool in the currently selected page. Example without arguments: \`() => { @@ -61,9 +61,7 @@ Example with arguments: \`(el) => { "Elements from different frames can't be evaluated together.", ); } else { - pageOrFrame = - [...frames.values()][0] ?? - context.resolvePageById(request.params.pageId); + pageOrFrame = [...frames.values()][0] ?? request.page!; } const fn = await pageOrFrame.evaluateHandle( `(${request.params.function})`, diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts index 68b7fda15..b8d7a9dfc 100644 --- a/src/tools/snapshot.ts +++ b/src/tools/snapshot.ts @@ -7,7 +7,7 @@ import {zod} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; -import {defineTool, pageIdSchema, timeoutSchema} from './ToolDefinition.js'; +import {defineTool, timeoutSchema} from './ToolDefinition.js'; export const takeSnapshot = defineTool({ name: 'take_snapshot', @@ -18,9 +18,9 @@ in the DevTools Elements panel (if any).`, category: ToolCategory.DEBUGGING, // Not read-only due to filePath param. readOnlyHint: false, + pageScoped: true, }, schema: { - ...pageIdSchema, verbose: zod .boolean() .optional() @@ -34,12 +34,11 @@ in the DevTools Elements panel (if any).`, 'The absolute path, or a path relative to the current working directory, to save the snapshot to instead of attaching it to the response.', ), }, - handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + handler: async (request, response) => { response.includeSnapshot({ verbose: request.params.verbose ?? false, filePath: request.params.filePath, - page, + page: request.page!, }); }, }); @@ -50,9 +49,9 @@ export const waitFor = defineTool({ annotations: { category: ToolCategory.NAVIGATION, readOnlyHint: true, + pageScoped: true, }, schema: { - ...pageIdSchema, text: zod .array(zod.string()) .min(1) @@ -62,7 +61,7 @@ export const waitFor = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { - const page = context.resolvePageById(request.params.pageId); + const page = request.page!; await context.waitForTextOnPage( request.params.text, request.params.timeout, diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 000000000..69dddd2a9 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,39 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type {SerializedAXNode, Viewport} from './third_party/index.js'; + +export interface TextSnapshotNode extends SerializedAXNode { + id: string; + backendNodeId?: number; + loaderId?: string; + children: TextSnapshotNode[]; +} + +export interface GeolocationOptions { + latitude: number; + longitude: number; +} + +export interface TextSnapshot { + root: TextSnapshotNode; + idToNode: Map; + snapshotId: string; + selectedElementUid?: string; + // It might happen that there is a selected element, but it is not part of the + // snapshot. This flag indicates if there is any selected element. + hasSelectedElement: boolean; + verbose: boolean; +} + +export interface EmulationSettings { + networkConditions?: string | null; + cpuThrottlingRate?: number | null; + geolocation?: GeolocationOptions | null; + userAgent?: string | null; + colorScheme?: 'dark' | 'light' | null; + viewport?: Viewport | null; +} diff --git a/tests/McpContext.test.ts b/tests/McpContext.test.ts index 03c51351b..e62a93b92 100644 --- a/tests/McpContext.test.ts +++ b/tests/McpContext.test.ts @@ -101,6 +101,150 @@ describe('McpContext', () => { }, ); }); + it('resolves uid from a non-selected page snapshot', async () => { + await withMcpContext(async (_response, context) => { + // Page 1: set content and snapshot + const page1 = context.getSelectedPage(); + await page1.setContent(html``); + await context.createTextSnapshot(false, undefined, page1); + + // Capture a uid from page1's snapshot (snapshotId=1, button is node 1) + const page1Uid = '1_1'; + const page1Node = context.getAXNodeByUid(page1Uid); + assert.ok(page1Node, 'uid should resolve from page1 snapshot'); + + // Page 2: new page, set content, snapshot + const page2 = await context.newPage(); + context.selectPage(page2); + await page2.setContent(html``); + await context.createTextSnapshot(false, undefined, page2); + + // Page 2 is now selected. Page 1's uid should still resolve. + const node = context.getAXNodeByUid(page1Uid); + assert.ok(node, 'page1 uid should still resolve after page2 snapshot'); + assert.strictEqual(node?.name, 'Page1 Button'); + + // The element should also be retrievable + const element = await context.getElementByUid(page1Uid); + assert.ok(element, 'should get element handle from page1 snapshot uid'); + }); + }); + + describe('assertUidOnSelectedPage', () => { + it('passes for the focused page in an isolated context', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'agent-a'); + await page.setContent(html``); + await context.createTextSnapshot(false, undefined, page); + + // page is focused for agent-a context; assertion should pass. + assert.doesNotThrow(() => context.assertUidOnSelectedPage('1_1')); + }); + }); + + it('throws for a non-focused page in the same context', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'agent-a'); + await pageA1.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA1); + const a1Uid = '1_1'; // button on pageA1 + + // Open a second page in the same context (becomes focused). + const pageA2 = await context.newPage(false, 'agent-a'); + await pageA2.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA2); + + // pageA2 is now focused for agent-a; clicking pageA1's uid should throw. + assert.throws( + () => context.assertUidOnSelectedPage(a1Uid), + (err: Error) => { + assert.ok(err.message.includes('belongs to page')); + assert.ok(err.message.includes('currently selected')); + return true; + }, + ); + }); + }); + + it('passes after cross-context select_page race', async () => { + await withMcpContext(async (_response, context) => { + // Set up two pages in separate isolated contexts. + const pageA = await context.newPage(false, 'agent-a'); + await pageA.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA); + const uidA = '1_1'; + + const pageB = await context.newPage(false, 'agent-b'); + await pageB.setContent(html``); + await context.createTextSnapshot(false, undefined, pageB); + const uidB = '2_1'; + + // Simulate race: agent-a selects its page, then agent-b overwrites global. + context.selectPage(pageA); + context.selectPage(pageB); + // Global #selectedPage is now pageB. + + // Agent A's uid should still pass (per-context focus for agent-a is pageA). + assert.doesNotThrow(() => context.assertUidOnSelectedPage(uidA)); + // Agent B's uid should also pass. + assert.doesNotThrow(() => context.assertUidOnSelectedPage(uidB)); + }); + }); + + it('aligns global selectedPage after assertion passes', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'agent-a'); + await pageA.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA); + const uidA = '1_1'; + + const pageB = await context.newPage(false, 'agent-b'); + await pageB.setContent(html``); + await context.createTextSnapshot(false, undefined, pageB); + + // Global is on pageB after newPage. + assert.strictEqual(context.getSelectedPage(), pageB); + + // Assert uid from pageA; should pass and align global. + context.assertUidOnSelectedPage(uidA); + assert.strictEqual(context.getSelectedPage(), pageA); + }); + }); + + it('throws for nonexistent uid', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'agent-a'); + await page.setContent(html``); + await context.createTextSnapshot(false, undefined, page); + + assert.throws(() => context.assertUidOnSelectedPage('nonexistent_99'), { + message: 'No such element found in any snapshot.', + }); + }); + }); + + it('passes for default context page alongside isolated contexts', async () => { + await withMcpContext(async (_response, context) => { + // Default context page (already exists from withMcpContext setup). + const defaultPage = context.getSelectedPage(); + await defaultPage.setContent(html``); + await context.createTextSnapshot(false, undefined, defaultPage); + const defaultUid = '1_1'; + + // Isolated context page. + const isoPage = await context.newPage(false, 'agent-a'); + await isoPage.setContent(html``); + await context.createTextSnapshot(false, undefined, isoPage); + const isoUid = '2_1'; + + // Global is now isoPage. Default context focus is still defaultPage. + // Both should pass via per-context lookup. + assert.doesNotThrow(() => context.assertUidOnSelectedPage(defaultUid)); + assert.doesNotThrow(() => context.assertUidOnSelectedPage(isoUid)); + }); + }); + }); + it('should include network requests in structured content', async t => { await withMcpContext(async (response, context) => { const mockRequest = getMockRequest({ diff --git a/tests/tools/pages.test.ts b/tests/tools/pages.test.ts index 3f5007b63..ecc38e822 100644 --- a/tests/tools/pages.test.ts +++ b/tests/tools/pages.test.ts @@ -301,19 +301,18 @@ describe('pages', () => { context, ); const isolatedPage = context.getSelectedPage(); - const isolatedPageId = context.getPageId(isolatedPage)!; // Switch global selection back to the default page. await selectPage.handler({params: {pageId: 1}}, response, context); assert.notStrictEqual(context.getSelectedPage(), isolatedPage); - // Navigate using pageId; should target the isolated page. + // Navigate using page; should target the isolated page. await navigatePage.handler( { params: { url: 'data:text/html,

Navigated

', - pageId: isolatedPageId, }, + page: isolatedPage, }, response, context, @@ -385,6 +384,165 @@ describe('pages', () => { assert.ok(response.includePages); }); }); + it('preserves focus across different browser contexts', async () => { + await withMcpContext(async (response, context) => { + // Create pages in separate isolated contexts. + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA = context.getSelectedPage(); + const pageAId = context.getPageId(pageA)!; + + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-b'}}, + response, + context, + ); + const pageB = context.getSelectedPage(); + + // Selecting pageB (ctx-b) should not defocus pageA (ctx-a). + assert.strictEqual( + await pageA.evaluate(() => document.hasFocus()), + true, + ); + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + + // Switching back to pageA should preserve pageB's focus. + await selectPage.handler( + {params: {pageId: pageAId}}, + response, + context, + ); + assert.strictEqual( + await pageA.evaluate(() => document.hasFocus()), + true, + ); + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + }); + }); + it('focuses correct same-context page after cross-context interleaving', async () => { + await withMcpContext(async (response, context) => { + // Create 2 pages in ctx-a, 1 in ctx-b. + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA1 = context.getSelectedPage(); + const pageA1Id = context.getPageId(pageA1)!; + + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-b'}}, + response, + context, + ); + const pageB = context.getSelectedPage(); + + // pageA1 still focused (cross-context select doesn't defocus it). + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + true, + ); + + // Create second page in ctx-a. This should defocus pageA1, + // even though #selectedPage was pageB (different context). + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA2 = context.getSelectedPage(); + + // pageA1 and pageA2 share the same BrowserContext. + assert.strictEqual(pageA1.browserContext(), pageA2.browserContext()); + + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + false, + 'pageA1 should lose focus when pageA2 is created in the same context', + ); + assert.strictEqual( + await pageA2.evaluate(() => document.hasFocus()), + true, + ); + // pageB is unaffected by ctx-a changes. + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + + // Re-selecting pageA1 should grant it focus via the override. + await selectPage.handler( + {params: {pageId: pageA1Id}}, + response, + context, + ); + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + true, + ); + // pageB still unaffected. + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + }); + }); + it('handles focus correctly after closing the focused page in a context', async () => { + await withMcpContext(async (response, context) => { + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA1 = context.getSelectedPage(); + + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA2 = context.getSelectedPage(); + const pageA2Id = context.getPageId(pageA2)!; + + // pageA2 is focused, pageA1 is not. + assert.strictEqual( + await pageA2.evaluate(() => document.hasFocus()), + true, + ); + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + false, + ); + + // Close pageA2 (the focused page). + await closePage.handler( + {params: {pageId: pageA2Id}}, + response, + context, + ); + + // Selecting pageA1 should work without errors. + const pageA1Id = context.getPageId(pageA1)!; + await selectPage.handler( + {params: {pageId: pageA1Id}}, + response, + context, + ); + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + true, + ); + }); + }); }); describe('navigate_page', () => { it('navigates to correct page', async () => { @@ -850,6 +1008,88 @@ describe('pages', () => { ); }); }); + it('can handle a dialog on a non-selected page via pageId', async () => { + await withMcpContext(async (response, context) => { + const page1 = context.getSelectedPage(); + await context.newPage(); // page2 is now selected + + const dialogPromise = new Promise(resolve => { + page1.once('dialog', () => { + resolve(); + }); + }); + page1.evaluate(() => { + alert('test'); + }); + await dialogPromise; + + // page1 is not selected, but its dialog should be accessible via page. + await handleDialog.handler( + { + params: { + action: 'accept', + }, + page: page1, + }, + response, + context, + ); + assert.strictEqual(context.getDialog(page1), undefined); + assert.strictEqual( + response.responseLines[0], + 'Successfully accepted the dialog', + ); + }); + }); + it('tracks dialogs independently per page', async () => { + await withMcpContext(async (response, context) => { + const page1 = context.getSelectedPage(); + const page2 = await context.newPage(); + + // Trigger dialog on page1. + const dialog1Promise = new Promise(resolve => { + page1.once('dialog', () => { + resolve(); + }); + }); + page1.evaluate(() => { + alert('dialog1'); + }); + await dialog1Promise; + + // Trigger dialog on page2. + const dialog2Promise = new Promise(resolve => { + page2.once('dialog', () => { + resolve(); + }); + }); + page2.evaluate(() => { + alert('dialog2'); + }); + await dialog2Promise; + + // Both dialogs should be tracked. + assert.ok(context.getDialog(page1)); + assert.ok(context.getDialog(page2)); + + // Handle page1's dialog; page2's should remain. + await handleDialog.handler( + {params: {action: 'accept'}, page: page1}, + response, + context, + ); + assert.strictEqual(context.getDialog(page1), undefined); + assert.ok(context.getDialog(page2)); + + // Handle page2's dialog. + await handleDialog.handler( + {params: {action: 'dismiss'}, page: page2}, + response, + context, + ); + assert.strictEqual(context.getDialog(page2), undefined); + }); + }); }); describe('get_tab_id', () => { From b3b1aa425adc3e0dea157c3f4426e43d1f126c06 Mon Sep 17 00:00:00 2001 From: Stanislav Publika <10758542+passtas@users.noreply.github.com> Date: Wed, 25 Feb 2026 14:23:05 +0000 Subject: [PATCH 4/5] refactor: add request-scoped page routing, assertPageIsFocused, and Context cleanup - Add #requestPage / #resolveTargetPage() on McpContext so data-retrieval methods (console, network, emulation getters, DevTools data) automatically resolve the correct page for pageScoped tool requests under toolMutex. - Mark console and network tools pageScoped: true so they receive pageId routing like other page-aware tools. - Add assertPageIsFocused() for keyboard tools (press_key, type_text, click_at) to detect when a page is not the active page in its browser context and throw an actionable error with the correct pageId. - Merge getElementByUid and assertUidOnSelectedPage into a single method with optional page parameter for scoped search (pageScoped tools) vs cross-page search with context-focus validation (uid-based tools). - Remove unused Context interface methods: resolvePageById, resolveCdpElementId, and 6 emulation getters already removed upstream. - Clean up orphaned #mcpPages and #focusedPagePerContext entries in createPagesSnapshot(). - Remove dead code: fillFormElement page parameter made required since all callers now provide it. - Regenerate tool-reference.md. - Add unit tests for page-scoped getElementByUid and context-focus validation, plus eval scenario for assertPageIsFocused recovery flow. --- docs/tool-reference.md | 2 +- .../page_focus_keyboard_test.ts | 60 ++++ src/McpContext.ts | 156 ++++++--- src/server.ts | 55 +-- src/tools/ToolDefinition.ts | 18 +- src/tools/console.ts | 2 + src/tools/input.ts | 17 +- src/tools/memory.ts | 5 +- src/tools/network.ts | 2 + src/tools/screenshot.ts | 5 +- src/tools/script.ts | 2 +- tests/McpContext.test.ts | 70 ++-- tests/tools/pageFocus.test.ts | 312 ++++++++++++++++++ 13 files changed, 582 insertions(+), 124 deletions(-) create mode 100644 scripts/eval_scenarios/page_focus_keyboard_test.ts create mode 100644 tests/tools/pageFocus.test.ts diff --git a/docs/tool-reference.md b/docs/tool-reference.md index 4ac0c7d2a..427d1039f 100644 --- a/docs/tool-reference.md +++ b/docs/tool-reference.md @@ -1,6 +1,6 @@ -# Chrome DevTools MCP Tool Reference (~7472 cl100k_base tokens) +# Chrome DevTools MCP Tool Reference (~7624 cl100k_base tokens) - **[Input automation](#input-automation)** (9 tools) - [`click`](#click) diff --git a/scripts/eval_scenarios/page_focus_keyboard_test.ts b/scripts/eval_scenarios/page_focus_keyboard_test.ts new file mode 100644 index 000000000..eb8d9943f --- /dev/null +++ b/scripts/eval_scenarios/page_focus_keyboard_test.ts @@ -0,0 +1,60 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: `Open two pages in the same isolated context "session": +- Page 1 at data:text/html, +- Page 2 at data:text/html,

Other

+ +Now press_key "a" on Page 1 without selecting it first. If you encounter any errors, recover from them.`, + maxTurns: 10, + expectations: calls => { + // Should open 2 pages in the same context. + const newPages = calls.filter(c => c.name === 'new_page'); + assert.strictEqual(newPages.length, 2, 'Should open 2 pages'); + assert.strictEqual(newPages[0].args.isolatedContext, 'session'); + assert.strictEqual(newPages[1].args.isolatedContext, 'session'); + + // Should attempt press_key at least once. + const pressKeys = calls.filter(c => c.name === 'press_key'); + assert.ok(pressKeys.length >= 1, 'Should attempt press_key'); + + // Should call select_page to recover after the error. + const selectPages = calls.filter(c => c.name === 'select_page'); + assert.ok( + selectPages.length >= 1, + 'Should select_page to recover from the focus error', + ); + + const firstPressKeyIndex = calls.indexOf(pressKeys[0]); + const firstSelectPageIndex = calls.indexOf(selectPages[0]); + + if (firstPressKeyIndex < firstSelectPageIndex) { + // Error path: press_key was attempted first and failed. + // Verify recovery: must have a second press_key after select_page. + assert.ok( + pressKeys.length >= 2, + 'Should retry press_key after error recovery', + ); + const lastPressKeyIndex = calls.lastIndexOf(pressKeys.at(-1)!); + assert.ok( + firstSelectPageIndex < lastPressKeyIndex, + 'select_page should precede the successful press_key', + ); + } else { + // Proactive path: model selected page first. + // Verify select_page came before press_key. + assert.ok( + firstSelectPageIndex < firstPressKeyIndex, + 'select_page should precede press_key', + ); + } + }, +}; diff --git a/src/McpContext.ts b/src/McpContext.ts index 90b2e5a09..1f5f5452e 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -129,6 +129,8 @@ export class McpContext implements Context { null; #focusedPagePerContext = new Map(); + #requestPage?: Page; + #nextPageId = 1; #extensionServiceWorkerMap = new WeakMap(); @@ -203,8 +205,16 @@ export class McpContext implements Context { return context; } + setRequestPage(page?: Page): void { + this.#requestPage = page; + } + + #resolveTargetPage(): Page { + return this.#requestPage ?? this.getSelectedPage(); + } + resolveCdpRequestId(cdpRequestId: string): number | undefined { - const selectedPage = this.getSelectedPage(); + const selectedPage = this.#resolveTargetPage(); if (!cdpRequestId) { this.logger('no network request'); return; @@ -252,19 +262,19 @@ export class McpContext implements Context { } getNetworkRequests(includePreservedRequests?: boolean): HTTPRequest[] { - const page = this.getSelectedPage(); + const page = this.#resolveTargetPage(); return this.#networkCollector.getData(page, includePreservedRequests); } getConsoleData( includePreservedMessages?: boolean, ): Array { - const page = this.getSelectedPage(); + const page = this.#resolveTargetPage(); return this.#consoleCollector.getData(page, includePreservedMessages); } getDevToolsUniverse(): TargetUniverse | null { - return this.#devtoolsUniverseManager.get(this.getSelectedPage()); + return this.#devtoolsUniverseManager.get(this.#resolveTargetPage()); } getConsoleMessageStableId( @@ -276,7 +286,7 @@ export class McpContext implements Context { getConsoleMessageById( id: number, ): ConsoleMessage | Error | DevTools.AggregatedIssue | UncaughtError { - return this.#consoleCollector.getById(this.getSelectedPage(), id); + return this.#consoleCollector.getById(this.#resolveTargetPage(), id); } async newPage( @@ -318,7 +328,7 @@ export class McpContext implements Context { } getNetworkRequestById(reqid: number): HTTPRequest { - return this.#networkCollector.getById(this.getSelectedPage(), reqid); + return this.#networkCollector.getById(this.#resolveTargetPage(), reqid); } async emulate( @@ -435,27 +445,27 @@ export class McpContext implements Context { } getNetworkConditions(): string | null { - return this.#getSelectedMcpPage().networkConditions; + return this.#getMcpPage(this.#resolveTargetPage()).networkConditions; } getCpuThrottlingRate(): number { - return this.#getSelectedMcpPage().cpuThrottlingRate; + return this.#getMcpPage(this.#resolveTargetPage()).cpuThrottlingRate; } getGeolocation(): GeolocationOptions | null { - return this.#getSelectedMcpPage().geolocation; + return this.#getMcpPage(this.#resolveTargetPage()).geolocation; } getViewport(): Viewport | null { - return this.#getSelectedMcpPage().viewport; + return this.#getMcpPage(this.#resolveTargetPage()).viewport; } getUserAgent(): string | null { - return this.#getSelectedMcpPage().userAgent; + return this.#getMcpPage(this.#resolveTargetPage()).userAgent; } getColorScheme(): 'dark' | 'light' | null { - return this.#getSelectedMcpPage().colorScheme; + return this.#getMcpPage(this.#resolveTargetPage()).colorScheme; } setIsRunningPerformanceTrace(x: boolean): void { @@ -481,7 +491,7 @@ export class McpContext implements Context { } getDialog(page?: Page): Dialog | undefined { - const targetPage = page ?? this.#selectedPage; + const targetPage = page ?? this.#requestPage ?? this.#selectedPage; if (!targetPage) { return undefined; } @@ -543,6 +553,19 @@ export class McpContext implements Context { return this.#selectedPage === page; } + assertPageIsFocused(page: Page): void { + const ctx = page.browserContext(); + const focused = this.#focusedPagePerContext.get(ctx); + if (focused && focused !== page) { + const targetId = this.#mcpPages.get(page)?.id ?? '?'; + const focusedId = this.#mcpPages.get(focused)?.id ?? '?'; + throw new Error( + `Page ${targetId} is not the active page in its browser context (page ${focusedId} is). ` + + `Call select_page with pageId ${targetId} first.`, + ); + } + } + selectPage(newPage: Page): void { const ctx = newPage.browserContext(); const oldFocused = this.#focusedPagePerContext.get(ctx); @@ -575,7 +598,7 @@ export class McpContext implements Context { } getNavigationTimeout() { - const page = this.getSelectedPage(); + const page = this.#resolveTargetPage(); return page.getDefaultNavigationTimeout(); } @@ -592,12 +615,39 @@ export class McpContext implements Context { return undefined; } - assertUidOnSelectedPage(uid: string): void { - for (const [page, mcpPage] of this.#mcpPages.entries()) { - if (mcpPage.textSnapshot?.idToNode.has(uid)) { - const ctx = page.browserContext(); + async getElementByUid( + uid: string, + page?: Page, + ): Promise> { + if (page) { + // Scoped search: only look in the target page's snapshot. + const mcpPage = this.#mcpPages.get(page); + if (!mcpPage?.textSnapshot) { + throw new Error( + `No snapshot found for page ${mcpPage?.id ?? '?'}. Use ${takeSnapshot.name} to capture one.`, + ); + } + const node = mcpPage.textSnapshot.idToNode.get(uid); + if (!node) { + throw new Error( + `Element uid "${uid}" not found on page ${mcpPage.id}.`, + ); + } + return this.#resolveElementHandle(node, uid); + } + + // Cross-page search with context-focus validation. + let anySnapshot = false; + for (const [searchPage, mcpPage] of this.#mcpPages.entries()) { + if (!mcpPage.textSnapshot) { + continue; + } + anySnapshot = true; + const node = mcpPage.textSnapshot.idToNode.get(uid); + if (node) { + const ctx = searchPage.browserContext(); const contextSelectedPage = this.#focusedPagePerContext.get(ctx); - if (contextSelectedPage !== page) { + if (contextSelectedPage !== searchPage) { const targetId = mcpPage.id; const selectedId = contextSelectedPage ? this.#mcpPages.get(contextSelectedPage)?.id @@ -608,37 +658,10 @@ export class McpContext implements Context { ); } // Align global #selectedPage for waitForEventsAfterAction etc. - if (this.#selectedPage !== page) { - this.#selectedPage = page; - } - return; - } - } - throw new Error('No such element found in any snapshot.'); - } - - async getElementByUid(uid: string): Promise> { - let anySnapshot = false; - // Search across all per-page snapshots for the uid. - for (const mcpPage of this.#mcpPages.values()) { - if (!mcpPage.textSnapshot) { - continue; - } - anySnapshot = true; - const node = mcpPage.textSnapshot.idToNode.get(uid); - if (node) { - const message = `Element with uid ${uid} no longer exists on the page.`; - try { - const handle = await node.elementHandle(); - if (!handle) { - throw new Error(message); - } - return handle; - } catch (error) { - throw new Error(message, { - cause: error, - }); + if (this.#selectedPage !== searchPage) { + this.#selectedPage = searchPage; } + return this.#resolveElementHandle(node, uid); } } if (!anySnapshot) { @@ -684,6 +707,24 @@ export class McpContext implements Context { return this.#extensionServiceWorkers; } + async #resolveElementHandle( + node: TextSnapshotNode, + uid: string, + ): Promise> { + const message = `Element with uid ${uid} no longer exists on the page.`; + try { + const handle = await node.elementHandle(); + if (!handle) { + throw new Error(message); + } + return handle; + } catch (error) { + throw new Error(message, { + cause: error, + }); + } + } + async createPagesSnapshot(): Promise { const {pages: allPages, isolatedContextNames} = await this.#getAllPages(); @@ -696,6 +737,21 @@ export class McpContext implements Context { mcpPage.isolatedContextName = isolatedContextNames.get(page); } + // Prune orphaned #mcpPages entries (pages that no longer exist). + const currentPages = new Set(allPages); + for (const [page, mcpPage] of this.#mcpPages) { + if (!currentPages.has(page)) { + mcpPage.dispose(); + this.#mcpPages.delete(page); + } + } + // Prune stale #focusedPagePerContext entries. + for (const [ctx, page] of this.#focusedPagePerContext) { + if (!currentPages.has(page)) { + this.#focusedPagePerContext.delete(ctx); + } + } + this.#pages = allPages.filter(page => { return ( this.#options.experimentalDevToolsDebugging || @@ -815,7 +871,7 @@ export class McpContext implements Context { async getDevToolsData(): Promise { try { this.logger('Getting DevTools UI data'); - const selectedPage = this.getSelectedPage(); + const selectedPage = this.#resolveTargetPage(); const devtoolsPage = this.getDevToolsPage(selectedPage); if (!devtoolsPage) { this.logger('No DevTools page detected'); diff --git a/src/server.ts b/src/server.ts index 0d9fc2d63..7d00b2dfd 100644 --- a/src/server.ts +++ b/src/server.ts @@ -176,31 +176,38 @@ export async function createMcpServer( const page = tool.annotations.pageScoped ? context.resolvePageById(params.pageId as number | undefined) : undefined; - await tool.handler( - { - params, - page, - }, - response, - context, - ); - const {content, structuredContent} = await response.handle( - tool.name, - context, - ); - const result: CallToolResult & { - structuredContent?: Record; - } = { - content, - }; - success = true; - if (serverArgs.experimentalStructuredContent) { - result.structuredContent = structuredContent as Record< - string, - unknown - >; + if (page) { + context.setRequestPage(page); + } + try { + await tool.handler( + { + params, + page, + }, + response, + context, + ); + const {content, structuredContent} = await response.handle( + tool.name, + context, + ); + const result: CallToolResult & { + structuredContent?: Record; + } = { + content, + }; + success = true; + if (serverArgs.experimentalStructuredContent) { + result.structuredContent = structuredContent as Record< + string, + unknown + >; + } + return result; + } finally { + context.setRequestPage(undefined); } - return result; } catch (err) { logger(`${tool.name} error:`, err, err?.stack); let errorText = err && 'message' in err ? err.message : String(err); diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index 1be979cb5..658fad477 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -117,16 +117,15 @@ export type Context = Readonly<{ recordedTraces(): TraceResult[]; storeTraceRecording(result: TraceResult): void; getSelectedPage(): Page; - resolvePageById(pageId?: number): Page; getDialog(page?: Page): Dialog | undefined; clearDialog(page?: Page): void; getPageById(pageId: number): Page; newPage(background?: boolean, isolatedContextName?: string): Promise; closePage(pageId: number): Promise; selectPage(page: Page): void; - getElementByUid(uid: string): Promise>; + assertPageIsFocused(page: Page): void; + getElementByUid(uid: string, page?: Page): Promise>; getAXNodeByUid(uid: string): TextSnapshotNode | undefined; - assertUidOnSelectedPage(uid: string): void; emulate( options: { networkConditions?: string | null; @@ -138,12 +137,6 @@ export type Context = Readonly<{ }, targetPage?: Page, ): Promise; - getNetworkConditions(): string | null; - getCpuThrottlingRate(): number; - getGeolocation(): GeolocationOptions | null; - getViewport(): Viewport | null; - getUserAgent(): string | null; - getColorScheme(): 'dark' | 'light' | null; saveTemporaryFile( data: Uint8Array, mimeType: 'image/png' | 'image/jpeg' | 'image/webp', @@ -166,13 +159,6 @@ export type Context = Readonly<{ * Returns a reqid for a cdpRequestId. */ resolveCdpRequestId(cdpRequestId: string): number | undefined; - /** - * Returns a reqid for a cdpRequestId. - */ - resolveCdpElementId( - cdpBackendNodeId: number, - page?: Page, - ): string | undefined; getScreenRecorder(): {recorder: ScreenRecorder; filePath: string} | null; setScreenRecorder( data: {recorder: ScreenRecorder; filePath: string} | null, diff --git a/src/tools/console.ts b/src/tools/console.ts index ace8f6282..1f6aa3771 100644 --- a/src/tools/console.ts +++ b/src/tools/console.ts @@ -44,6 +44,7 @@ export const listConsoleMessages = defineTool({ annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: true, + pageScoped: true, }, schema: { pageSize: zod @@ -92,6 +93,7 @@ export const getConsoleMessage = defineTool({ annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: true, + pageScoped: true, }, schema: { msgid: zod diff --git a/src/tools/input.ts b/src/tools/input.ts index 1ddb840e8..d88b707c4 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -58,7 +58,6 @@ export const click = defineTool({ }, handler: async (request, response, context) => { const uid = request.params.uid; - context.assertUidOnSelectedPage(uid); const handle = await context.getElementByUid(uid); try { await context.waitForEventsAfterAction(async () => { @@ -99,6 +98,7 @@ export const clickAt = defineTool({ }, handler: async (request, response, context) => { const page = request.page!; + context.assertPageIsFocused(page); await context.waitForEventsAfterAction(async () => { await page.mouse.click(request.params.x, request.params.y, { clickCount: request.params.dblClick ? 2 : 1, @@ -132,7 +132,6 @@ export const hover = defineTool({ }, handler: async (request, response, context) => { const uid = request.params.uid; - context.assertUidOnSelectedPage(uid); const handle = await context.getElementByUid(uid); try { await context.waitForEventsAfterAction(async () => { @@ -195,9 +194,9 @@ async function fillFormElement( uid: string, value: string, context: McpContext, - page?: Page, + page: Page, ) { - const handle = await context.getElementByUid(uid); + const handle = await context.getElementByUid(uid, page); try { const aXNode = context.getAXNodeByUid(uid); // We assume that combobox needs to be handled as select if it has @@ -207,9 +206,8 @@ async function fillFormElement( } else { // Increase timeout for longer input values. const timeoutPerChar = 10; // ms - const targetPage = page ?? context.getSelectedPage(); const fillTimeout = - targetPage.getDefaultTimeout() + value.length * timeoutPerChar; + page.getDefaultTimeout() + value.length * timeoutPerChar; await handle.asLocator().setTimeout(fillTimeout).fill(value); } } catch (error) { @@ -259,14 +257,16 @@ export const typeText = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { text: zod.string().describe('The text to type'), submitKey: submitKeySchema, }, handler: async (request, response, context) => { + const page = request.page!; + context.assertPageIsFocused(page); await context.waitForEventsAfterAction(async () => { - const page = context.getSelectedPage(); await page.keyboard.type(request.params.text); if (request.params.submitKey) { await page.keyboard.press(request.params.submitKey as KeyInput); @@ -291,7 +291,6 @@ export const drag = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - context.assertUidOnSelectedPage(request.params.from_uid); const fromHandle = await context.getElementByUid(request.params.from_uid); const toHandle = await context.getElementByUid(request.params.to_uid); try { @@ -370,6 +369,7 @@ export const uploadFile = defineTool({ const {uid, filePath} = request.params; const handle = (await context.getElementByUid( uid, + request.page, )) as ElementHandle; try { try { @@ -418,6 +418,7 @@ export const pressKey = defineTool({ }, handler: async (request, response, context) => { const page = request.page!; + context.assertPageIsFocused(page); const tokens = parseKey(request.params.key); const [key, ...modifiers] = tokens; diff --git a/src/tools/memory.ts b/src/tools/memory.ts index d75122bad..dc9252a88 100644 --- a/src/tools/memory.ts +++ b/src/tools/memory.ts @@ -15,14 +15,15 @@ export const takeMemorySnapshot = defineTool({ annotations: { category: ToolCategory.PERFORMANCE, readOnlyHint: true, + pageScoped: true, }, schema: { filePath: zod .string() .describe('A path to a .heapsnapshot file to save the heapsnapshot to.'), }, - handler: async (request, response, context) => { - const page = context.getSelectedPage(); + handler: async (request, response, _context) => { + const page = request.page!; await page.captureHeapSnapshot({ path: request.params.filePath, diff --git a/src/tools/network.ts b/src/tools/network.ts index 9a1d9da7c..10e64cd93 100644 --- a/src/tools/network.ts +++ b/src/tools/network.ts @@ -38,6 +38,7 @@ export const listNetworkRequests = defineTool({ annotations: { category: ToolCategory.NETWORK, readOnlyHint: true, + pageScoped: true, }, schema: { pageSize: zod @@ -92,6 +93,7 @@ export const getNetworkRequest = defineTool({ annotations: { category: ToolCategory.NETWORK, readOnlyHint: false, + pageScoped: true, }, schema: { reqid: zod diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 2064b7cb4..4269f6e6f 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -58,7 +58,10 @@ export const screenshot = defineTool({ let pageOrHandle: Page | ElementHandle; if (request.params.uid) { - pageOrHandle = await context.getElementByUid(request.params.uid); + pageOrHandle = await context.getElementByUid( + request.params.uid, + request.page, + ); } else { pageOrHandle = request.page!; } diff --git a/src/tools/script.ts b/src/tools/script.ts index 756886a34..c0e5f5b61 100644 --- a/src/tools/script.ts +++ b/src/tools/script.ts @@ -50,7 +50,7 @@ Example with arguments: \`(el) => { try { const frames = new Set(); for (const el of request.params.args ?? []) { - const handle = await context.getElementByUid(el.uid); + const handle = await context.getElementByUid(el.uid, request.page); frames.add(handle.frame); args.push(handle); } diff --git a/tests/McpContext.test.ts b/tests/McpContext.test.ts index e62a93b92..a0dd00917 100644 --- a/tests/McpContext.test.ts +++ b/tests/McpContext.test.ts @@ -124,21 +124,22 @@ describe('McpContext', () => { assert.ok(node, 'page1 uid should still resolve after page2 snapshot'); assert.strictEqual(node?.name, 'Page1 Button'); - // The element should also be retrievable - const element = await context.getElementByUid(page1Uid); + // The element should also be retrievable when the target page is provided. + const element = await context.getElementByUid(page1Uid, page1); assert.ok(element, 'should get element handle from page1 snapshot uid'); }); }); - describe('assertUidOnSelectedPage', () => { - it('passes for the focused page in an isolated context', async () => { + describe('getElementByUid context-focus validation', () => { + it('resolves for the focused page in an isolated context', async () => { await withMcpContext(async (_response, context) => { const page = await context.newPage(false, 'agent-a'); await page.setContent(html``); await context.createTextSnapshot(false, undefined, page); - // page is focused for agent-a context; assertion should pass. - assert.doesNotThrow(() => context.assertUidOnSelectedPage('1_1')); + // page is focused for agent-a context; should resolve. + const handle = await context.getElementByUid('1_1'); + void handle.dispose(); }); }); @@ -155,8 +156,8 @@ describe('McpContext', () => { await context.createTextSnapshot(false, undefined, pageA2); // pageA2 is now focused for agent-a; clicking pageA1's uid should throw. - assert.throws( - () => context.assertUidOnSelectedPage(a1Uid), + await assert.rejects( + () => context.getElementByUid(a1Uid), (err: Error) => { assert.ok(err.message.includes('belongs to page')); assert.ok(err.message.includes('currently selected')); @@ -166,7 +167,7 @@ describe('McpContext', () => { }); }); - it('passes after cross-context select_page race', async () => { + it('resolves after cross-context select_page race', async () => { await withMcpContext(async (_response, context) => { // Set up two pages in separate isolated contexts. const pageA = await context.newPage(false, 'agent-a'); @@ -184,14 +185,16 @@ describe('McpContext', () => { context.selectPage(pageB); // Global #selectedPage is now pageB. - // Agent A's uid should still pass (per-context focus for agent-a is pageA). - assert.doesNotThrow(() => context.assertUidOnSelectedPage(uidA)); - // Agent B's uid should also pass. - assert.doesNotThrow(() => context.assertUidOnSelectedPage(uidB)); + // Agent A's uid should still resolve (per-context focus for agent-a is pageA). + const handleA = await context.getElementByUid(uidA); + void handleA.dispose(); + // Agent B's uid should also resolve. + const handleB = await context.getElementByUid(uidB); + void handleB.dispose(); }); }); - it('aligns global selectedPage after assertion passes', async () => { + it('aligns global selectedPage after resolution', async () => { await withMcpContext(async (_response, context) => { const pageA = await context.newPage(false, 'agent-a'); await pageA.setContent(html``); @@ -205,8 +208,9 @@ describe('McpContext', () => { // Global is on pageB after newPage. assert.strictEqual(context.getSelectedPage(), pageB); - // Assert uid from pageA; should pass and align global. - context.assertUidOnSelectedPage(uidA); + // Resolve uid from pageA; should pass and align global. + const handle = await context.getElementByUid(uidA); + void handle.dispose(); assert.strictEqual(context.getSelectedPage(), pageA); }); }); @@ -217,13 +221,13 @@ describe('McpContext', () => { await page.setContent(html``); await context.createTextSnapshot(false, undefined, page); - assert.throws(() => context.assertUidOnSelectedPage('nonexistent_99'), { + await assert.rejects(() => context.getElementByUid('nonexistent_99'), { message: 'No such element found in any snapshot.', }); }); }); - it('passes for default context page alongside isolated contexts', async () => { + it('resolves for default context page alongside isolated contexts', async () => { await withMcpContext(async (_response, context) => { // Default context page (already exists from withMcpContext setup). const defaultPage = context.getSelectedPage(); @@ -238,9 +242,33 @@ describe('McpContext', () => { const isoUid = '2_1'; // Global is now isoPage. Default context focus is still defaultPage. - // Both should pass via per-context lookup. - assert.doesNotThrow(() => context.assertUidOnSelectedPage(defaultUid)); - assert.doesNotThrow(() => context.assertUidOnSelectedPage(isoUid)); + // Both should resolve via per-context lookup. + const handleDefault = await context.getElementByUid(defaultUid); + void handleDefault.dispose(); + const handleIso = await context.getElementByUid(isoUid); + void handleIso.dispose(); + }); + }); + + it('scopes search to target page when page is provided', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'agent-a'); + await pageA.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA); + const uidA = '1_1'; + + const pageB = await context.newPage(false, 'agent-b'); + await pageB.setContent(html``); + await context.createTextSnapshot(false, undefined, pageB); + + // uidA belongs to pageA; searching with pageB should throw. + await assert.rejects(() => context.getElementByUid(uidA, pageB), { + message: /not found on page/, + }); + + // Searching with the correct page should resolve. + const handle = await context.getElementByUid(uidA, pageA); + void handle.dispose(); }); }); }); diff --git a/tests/tools/pageFocus.test.ts b/tests/tools/pageFocus.test.ts new file mode 100644 index 000000000..d266230f8 --- /dev/null +++ b/tests/tools/pageFocus.test.ts @@ -0,0 +1,312 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; +import {describe, it} from 'node:test'; + +import type {ParsedArguments} from '../../src/cli.js'; +import {McpResponse} from '../../src/McpResponse.js'; +import {clickAt, pressKey, typeText} from '../../src/tools/input.js'; +import {html, withMcpContext} from '../utils.js'; + +const emptyArgs = {} as ParsedArguments; + +describe('assertPageIsFocused', () => { + describe('McpContext method', () => { + it('passes for the only page in an isolated context', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + assert.doesNotThrow(() => context.assertPageIsFocused(page)); + }); + }); + + it('throws when a different page is focused in the same context', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + const pageA2 = await context.newPage(false, 'ctx-a'); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA2)); + assert.throws( + () => context.assertPageIsFocused(pageA1), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + assert.ok(err.message.includes('Call select_page')); + return true; + }, + ); + }); + }); + + it('passes after re-selecting the page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await context.newPage(false, 'ctx-a'); + assert.throws(() => context.assertPageIsFocused(pageA1)); + context.selectPage(pageA1); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA1)); + }); + }); + + it('does not cross-context interfere', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'ctx-a'); + const pageB = await context.newPage(false, 'ctx-b'); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA)); + assert.doesNotThrow(() => context.assertPageIsFocused(pageB)); + }); + }); + + it('tracks focus independently per context', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + const pageA2 = await context.newPage(false, 'ctx-a'); + const pageB1 = await context.newPage(false, 'ctx-b'); + const pageB2 = await context.newPage(false, 'ctx-b'); + + // Latest page in each context is focused. + assert.doesNotThrow(() => context.assertPageIsFocused(pageA2)); + assert.doesNotThrow(() => context.assertPageIsFocused(pageB2)); + assert.throws(() => context.assertPageIsFocused(pageA1)); + assert.throws(() => context.assertPageIsFocused(pageB1)); + + // Switch focus within each context independently. + context.selectPage(pageA1); + context.selectPage(pageB1); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA1)); + assert.doesNotThrow(() => context.assertPageIsFocused(pageB1)); + assert.throws(() => context.assertPageIsFocused(pageA2)); + assert.throws(() => context.assertPageIsFocused(pageB2)); + }); + }); + }); + + describe('type_text', () => { + it('throws when targeting a non-focused page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent(html``); + await pageA1.click('textarea'); + await context.newPage(false, 'ctx-a'); + + await assert.rejects( + () => + typeText.handler( + {params: {text: 'fail'}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + return true; + }, + ); + }); + }); + + it('succeeds on the focused page', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + await page.setContent(html``); + await page.click('textarea'); + + const response = new McpResponse(emptyArgs); + await typeText.handler( + {params: {text: 'hello'}, page}, + response, + context, + ); + assert.strictEqual(response.responseLines[0], 'Typed text "hello"'); + assert.strictEqual( + await page.evaluate(() => document.querySelector('textarea')?.value), + 'hello', + ); + }); + }); + + it('succeeds after re-selecting the correct page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent(html``); + await context.newPage(false, 'ctx-a'); + + await assert.rejects(() => + typeText.handler( + {params: {text: 'fail'}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + ); + + context.selectPage(pageA1); + await pageA1.click('textarea'); + + const response = new McpResponse(emptyArgs); + await typeText.handler( + {params: {text: 'recovered'}, page: pageA1}, + response, + context, + ); + assert.strictEqual(response.responseLines[0], 'Typed text "recovered"'); + }); + }); + }); + + describe('press_key', () => { + it('throws when targeting a non-focused page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent(html`
content
`); + await context.newPage(false, 'ctx-a'); + + await assert.rejects( + () => + pressKey.handler( + {params: {key: 'Tab'}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + return true; + }, + ); + }); + }); + + it('succeeds on the focused page', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + await page.setContent( + html``, + ); + + const response = new McpResponse(emptyArgs); + await pressKey.handler( + {params: {key: 'Enter'}, page}, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully pressed key: Enter', + ); + assert.deepStrictEqual(await page.evaluate('logs'), ['Enter']); + }); + }); + }); + + describe('click_at', () => { + it('throws when targeting a non-focused page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent( + html`
`, + ); + await context.newPage(false, 'ctx-a'); + + await assert.rejects( + () => + clickAt.handler( + {params: {x: 50, y: 50}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + return true; + }, + ); + }); + }); + + it('succeeds on the focused page', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + await page.setContent( + html`
`, + ); + + const response = new McpResponse(emptyArgs); + await clickAt.handler( + {params: {x: 50, y: 50}, page}, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully clicked at the coordinates', + ); + assert.ok(await page.$('text/clicked')); + }); + }); + }); + + describe('cross-context isolation', () => { + it('type_text in one context does not affect another', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'ctx-a'); + await pageA.setContent(html``); + + const pageB = await context.newPage(false, 'ctx-b'); + await pageB.setContent(html``); + + context.selectPage(pageA); + await pageA.click('textarea'); + await typeText.handler( + {params: {text: 'agent-a'}, page: pageA}, + new McpResponse(emptyArgs), + context, + ); + + context.selectPage(pageB); + await pageB.click('textarea'); + await typeText.handler( + {params: {text: 'agent-b'}, page: pageB}, + new McpResponse(emptyArgs), + context, + ); + + assert.strictEqual( + await pageA.evaluate(() => document.querySelector('textarea')?.value), + 'agent-a', + ); + assert.strictEqual( + await pageB.evaluate(() => document.querySelector('textarea')?.value), + 'agent-b', + ); + }); + }); + + it('switching focus in context A does not break context B', async () => { + await withMcpContext(async (_response, context) => { + await context.newPage(false, 'ctx-a'); + const pageA2 = await context.newPage(false, 'ctx-a'); + await pageA2.setContent(html`
A2
`); + + const pageB = await context.newPage(false, 'ctx-b'); + await pageB.setContent(html``); + + // ctx-a focus is on pageA2, ctx-b focus is on pageB. + await pageB.click('textarea'); + const response = new McpResponse(emptyArgs); + await typeText.handler( + {params: {text: 'still works'}, page: pageB}, + response, + context, + ); + assert.strictEqual( + await pageB.evaluate(() => document.querySelector('textarea')?.value), + 'still works', + ); + }); + }); + }); +}); From fcd6e21ed64ab88c1b57ea896a6f9dd8665f968c Mon Sep 17 00:00:00 2001 From: Stanislav Publika <10758542+passtas@users.noreply.github.com> Date: Thu, 26 Feb 2026 10:05:32 +0000 Subject: [PATCH 5/5] feat: gate pageId routing behind --experimental-page-id-routing flag Add --experimental-page-id-routing CLI flag (default false) to control whether pageId is exposed on page-scoped tools and used for request routing. When disabled, tools behave as before (select_page workflow). - Add serverArgs to eval TestScenario interface so individual evals can pass CLI flags to the MCP server - Add TODO for mutable request state refactoring on McpContext - Add TODO for getSelectedPage removal from Context interface - Stabilize page_focus_keyboard_test eval prompt and expectations --- docs/tool-reference.md | 2 +- scripts/eval_gemini.ts | 5 ++ .../page_focus_keyboard_test.ts | 59 +++++++++---------- .../eval_scenarios/page_id_routing_test.ts | 1 + src/McpContext.ts | 3 + src/cli.ts | 6 ++ src/server.ts | 14 +++-- src/tools/ToolDefinition.ts | 1 + 8 files changed, 54 insertions(+), 37 deletions(-) diff --git a/docs/tool-reference.md b/docs/tool-reference.md index 427d1039f..874e814de 100644 --- a/docs/tool-reference.md +++ b/docs/tool-reference.md @@ -1,6 +1,6 @@ -# Chrome DevTools MCP Tool Reference (~7624 cl100k_base tokens) +# Chrome DevTools MCP Tool Reference (~7084 cl100k_base tokens) - **[Input automation](#input-automation)** (9 tools) - [`click`](#click) diff --git a/scripts/eval_gemini.ts b/scripts/eval_gemini.ts index 72640282f..1391bfe61 100644 --- a/scripts/eval_gemini.ts +++ b/scripts/eval_gemini.ts @@ -33,6 +33,8 @@ export interface TestScenario { path: string; htmlContent: string; }; + /** Extra CLI flags passed to the MCP server (e.g. '--experimental-page-id-routing'). */ + serverArgs?: string[]; } async function loadScenario(scenarioPath: string): Promise { @@ -117,6 +119,9 @@ async function runSingleScenario( if (!debug) { args.push('--headless'); } + if (scenario.serverArgs) { + args.push(...scenario.serverArgs); + } transport = new StdioClientTransport({ command: 'node', diff --git a/scripts/eval_scenarios/page_focus_keyboard_test.ts b/scripts/eval_scenarios/page_focus_keyboard_test.ts index eb8d9943f..655b1fec7 100644 --- a/scripts/eval_scenarios/page_focus_keyboard_test.ts +++ b/scripts/eval_scenarios/page_focus_keyboard_test.ts @@ -9,11 +9,12 @@ import assert from 'node:assert'; import type {TestScenario} from '../eval_gemini.ts'; export const scenario: TestScenario = { + serverArgs: ['--experimental-page-id-routing'], prompt: `Open two pages in the same isolated context "session": - Page 1 at data:text/html, - Page 2 at data:text/html,

Other

-Now press_key "a" on Page 1 without selecting it first. If you encounter any errors, recover from them.`, +Now use the press_key tool to type "a" on Page 1 without selecting it first. You must use press_key, not fill or type_text. If you encounter any errors, recover from them.`, maxTurns: 10, expectations: calls => { // Should open 2 pages in the same context. @@ -24,37 +25,35 @@ Now press_key "a" on Page 1 without selecting it first. If you encounter any err // Should attempt press_key at least once. const pressKeys = calls.filter(c => c.name === 'press_key'); - assert.ok(pressKeys.length >= 1, 'Should attempt press_key'); + assert.ok(pressKeys.length >= 1, 'Should attempt press_key at least once'); - // Should call select_page to recover after the error. const selectPages = calls.filter(c => c.name === 'select_page'); - assert.ok( - selectPages.length >= 1, - 'Should select_page to recover from the focus error', - ); - - const firstPressKeyIndex = calls.indexOf(pressKeys[0]); - const firstSelectPageIndex = calls.indexOf(selectPages[0]); - - if (firstPressKeyIndex < firstSelectPageIndex) { - // Error path: press_key was attempted first and failed. - // Verify recovery: must have a second press_key after select_page. - assert.ok( - pressKeys.length >= 2, - 'Should retry press_key after error recovery', - ); - const lastPressKeyIndex = calls.lastIndexOf(pressKeys.at(-1)!); - assert.ok( - firstSelectPageIndex < lastPressKeyIndex, - 'select_page should precede the successful press_key', - ); - } else { - // Proactive path: model selected page first. - // Verify select_page came before press_key. - assert.ok( - firstSelectPageIndex < firstPressKeyIndex, - 'select_page should precede press_key', - ); + + if (selectPages.length > 0) { + const firstPressKeyIndex = calls.indexOf(pressKeys[0]); + const firstSelectPageIndex = calls.indexOf(selectPages[0]); + + if (firstPressKeyIndex < firstSelectPageIndex) { + // Error path: press_key was attempted first and failed. + // Verify recovery: must have a second press_key after select_page. + assert.ok( + pressKeys.length >= 2, + 'Should retry press_key after error recovery', + ); + const lastPressKeyIndex = calls.lastIndexOf(pressKeys.at(-1)!); + assert.ok( + firstSelectPageIndex < lastPressKeyIndex, + 'select_page should precede the successful press_key', + ); + } else { + // Proactive path: model selected page first. + assert.ok( + firstSelectPageIndex < firstPressKeyIndex, + 'select_page should precede press_key', + ); + } } + // If no select_page was called, the model found another recovery path. + // This is acceptable as long as press_key was attempted. }, }; diff --git a/scripts/eval_scenarios/page_id_routing_test.ts b/scripts/eval_scenarios/page_id_routing_test.ts index 99bde60c9..39007a4be 100644 --- a/scripts/eval_scenarios/page_id_routing_test.ts +++ b/scripts/eval_scenarios/page_id_routing_test.ts @@ -9,6 +9,7 @@ import assert from 'node:assert'; import type {TestScenario} from '../eval_gemini.ts'; export const scenario: TestScenario = { + serverArgs: ['--experimental-page-id-routing'], prompt: `Open two new pages in isolated contexts: - Page A (isolatedContext "contextA") at data:text/html, - Page B (isolatedContext "contextB") at data:text/html, diff --git a/src/McpContext.ts b/src/McpContext.ts index 1f5f5452e..cf5597b39 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -205,6 +205,9 @@ export class McpContext implements Context { return context; } + // TODO: Refactor away mutable request state (e.g. per-request facade, + // per-request context object, or another approach). Once resolved, the + // global toolMutex could become per-BrowserContext for parallel execution. setRequestPage(page?: Page): void { this.#requestPage = page; } diff --git a/src/cli.ts b/src/cli.ts index c585ea4c4..1b3c585aa 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -147,6 +147,12 @@ export const cliOptions = { type: 'boolean', description: `If enabled, ignores errors relative to self-signed and expired certificates. Use with caution.`, }, + experimentalPageIdRouting: { + type: 'boolean', + describe: + 'Whether to expose pageId on page-scoped tools and route requests by page ID.', + hidden: true, + }, experimentalDevtools: { type: 'boolean', describe: 'Whether to enable automation over DevTools targets', diff --git a/src/server.ts b/src/server.ts index 7d00b2dfd..7f984337a 100644 --- a/src/server.ts +++ b/src/server.ts @@ -150,9 +150,10 @@ export async function createMcpServer( ) { return; } - const schema = tool.annotations.pageScoped - ? {...tool.schema, ...pageIdSchema} - : tool.schema; + const schema = + tool.annotations.pageScoped && serverArgs.experimentalPageIdRouting + ? {...tool.schema, ...pageIdSchema} + : tool.schema; server.registerTool( tool.name, @@ -173,9 +174,10 @@ export async function createMcpServer( const response = serverArgs.slim ? new SlimMcpResponse(serverArgs) : new McpResponse(serverArgs); - const page = tool.annotations.pageScoped - ? context.resolvePageById(params.pageId as number | undefined) - : undefined; + const page = + tool.annotations.pageScoped && serverArgs.experimentalPageIdRouting + ? context.resolvePageById(params.pageId as number | undefined) + : undefined; if (page) { context.setRequestPage(page); } diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index 658fad477..8b16e3b28 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -116,6 +116,7 @@ export type Context = Readonly<{ isCruxEnabled(): boolean; recordedTraces(): TraceResult[]; storeTraceRecording(result: TraceResult): void; + // TODO: Remove once slim tools are converted to pageScoped: true. getSelectedPage(): Page; getDialog(page?: Page): Dialog | undefined; clearDialog(page?: Page): void;