diff --git a/scripts/eval_gemini.ts b/scripts/eval_gemini.ts index 72640282f..1391bfe61 100644 --- a/scripts/eval_gemini.ts +++ b/scripts/eval_gemini.ts @@ -33,6 +33,8 @@ export interface TestScenario { path: string; htmlContent: string; }; + /** Extra CLI flags passed to the MCP server (e.g. '--experimental-page-id-routing'). */ + serverArgs?: string[]; } async function loadScenario(scenarioPath: string): Promise { @@ -117,6 +119,9 @@ async function runSingleScenario( if (!debug) { args.push('--headless'); } + if (scenario.serverArgs) { + args.push(...scenario.serverArgs); + } transport = new StdioClientTransport({ command: 'node', diff --git a/scripts/eval_scenarios/page_focus_keyboard_test.ts b/scripts/eval_scenarios/page_focus_keyboard_test.ts new file mode 100644 index 000000000..655b1fec7 --- /dev/null +++ b/scripts/eval_scenarios/page_focus_keyboard_test.ts @@ -0,0 +1,59 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + serverArgs: ['--experimental-page-id-routing'], + prompt: `Open two pages in the same isolated context "session": +- Page 1 at data:text/html, +- Page 2 at data:text/html,

Other

+ +Now use the press_key tool to type "a" on Page 1 without selecting it first. You must use press_key, not fill or type_text. If you encounter any errors, recover from them.`, + maxTurns: 10, + expectations: calls => { + // Should open 2 pages in the same context. + const newPages = calls.filter(c => c.name === 'new_page'); + assert.strictEqual(newPages.length, 2, 'Should open 2 pages'); + assert.strictEqual(newPages[0].args.isolatedContext, 'session'); + assert.strictEqual(newPages[1].args.isolatedContext, 'session'); + + // Should attempt press_key at least once. + const pressKeys = calls.filter(c => c.name === 'press_key'); + assert.ok(pressKeys.length >= 1, 'Should attempt press_key at least once'); + + const selectPages = calls.filter(c => c.name === 'select_page'); + + if (selectPages.length > 0) { + const firstPressKeyIndex = calls.indexOf(pressKeys[0]); + const firstSelectPageIndex = calls.indexOf(selectPages[0]); + + if (firstPressKeyIndex < firstSelectPageIndex) { + // Error path: press_key was attempted first and failed. + // Verify recovery: must have a second press_key after select_page. + assert.ok( + pressKeys.length >= 2, + 'Should retry press_key after error recovery', + ); + const lastPressKeyIndex = calls.lastIndexOf(pressKeys.at(-1)!); + assert.ok( + firstSelectPageIndex < lastPressKeyIndex, + 'select_page should precede the successful press_key', + ); + } else { + // Proactive path: model selected page first. + assert.ok( + firstSelectPageIndex < firstPressKeyIndex, + 'select_page should precede press_key', + ); + } + } + // If no select_page was called, the model found another recovery path. + // This is acceptable as long as press_key was attempted. + }, +}; diff --git a/scripts/eval_scenarios/page_id_routing_test.ts b/scripts/eval_scenarios/page_id_routing_test.ts new file mode 100644 index 000000000..39007a4be --- /dev/null +++ b/scripts/eval_scenarios/page_id_routing_test.ts @@ -0,0 +1,40 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + serverArgs: ['--experimental-page-id-routing'], + prompt: `Open two new pages in isolated contexts: +- Page A (isolatedContext "contextA") at data:text/html, +- Page B (isolatedContext "contextB") at data:text/html, +Then take a snapshot of Page A, take a snapshot of Page B, and then click the button on Page A.`, + maxTurns: 12, + expectations: calls => { + // Should have 2 new_page calls with isolatedContext. + const newPages = calls.filter(c => c.name === 'new_page'); + assert.strictEqual(newPages.length, 2, 'Should open 2 pages'); + for (const np of newPages) { + assert.strictEqual( + typeof np.args.isolatedContext, + 'string', + 'new_page should use isolatedContext', + ); + } + + // Should have at least 2 take_snapshot calls (one per page). + // The model may use pageId directly or select_page before each snapshot. + const snapshots = calls.filter(c => c.name === 'take_snapshot'); + assert.ok(snapshots.length >= 2, 'Should take at least 2 snapshots'); + + // Should have a click call (resolving uid from Page A's snapshot + // even though Page B was snapshotted after). + const clicks = calls.filter(c => c.name === 'click'); + assert.ok(clicks.length >= 1, 'Should click the button on Page A'); + }, +}; diff --git a/src/McpContext.ts b/src/McpContext.ts index 56ea1f5a8..cf5597b39 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -14,6 +14,7 @@ import { UniverseManager, urlsEqual, } from './DevtoolsUtils.js'; +import {McpPage} from './McpPage.js'; import type {ListenerMap, UncaughtError} from './PageCollector.js'; import {NetworkCollector, ConsoleCollector} from './PageCollector.js'; import type {DevTools} from './third_party/index.js'; @@ -38,18 +39,24 @@ import {takeSnapshot} from './tools/snapshot.js'; import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js'; import type {Context, DevToolsData} from './tools/ToolDefinition.js'; import type {TraceResult} from './trace-processing/parse.js'; +import type { + EmulationSettings, + GeolocationOptions, + TextSnapshot, + TextSnapshotNode, +} from './types.js'; import { ExtensionRegistry, type InstalledExtension, } from './utils/ExtensionRegistry.js'; import {WaitForHelper} from './WaitForHelper.js'; -export interface TextSnapshotNode extends SerializedAXNode { - id: string; - backendNodeId?: number; - loaderId?: string; - children: TextSnapshotNode[]; -} +export type { + EmulationSettings, + GeolocationOptions, + TextSnapshot, + TextSnapshotNode, +} from './types.js'; export interface ExtensionServiceWorker { url: string; @@ -57,31 +64,6 @@ export interface ExtensionServiceWorker { id: string; } -export interface GeolocationOptions { - latitude: number; - longitude: number; -} - -export interface TextSnapshot { - root: TextSnapshotNode; - idToNode: Map; - snapshotId: string; - selectedElementUid?: string; - // It might happen that there is a selected element, but it is not part of the - // snapshot. This flag indicates if there is any selected element. - hasSelectedElement: boolean; - verbose: boolean; -} - -interface EmulationSettings { - networkConditions?: string | null; - cpuThrottlingRate?: number | null; - geolocation?: GeolocationOptions | null; - userAgent?: string | null; - colorScheme?: 'dark' | 'light' | null; - viewport?: Viewport | null; -} - interface McpContextOptions { // Whether the DevTools windows are exposed as pages for debugging of DevTools. experimentalDevToolsDebugging: boolean; @@ -129,18 +111,14 @@ export class McpContext implements Context { // Maps LLM-provided isolatedContext name → Puppeteer BrowserContext. #isolatedContexts = new Map(); - // Reverse lookup: Page → isolatedContext name (for snapshot labeling). - // WeakMap so closed pages are garbage-collected automatically. - #pageToIsolatedContextName = new WeakMap(); // Auto-generated name counter for when no name is provided. #nextIsolatedContextId = 1; #pages: Page[] = []; #extensionServiceWorkers: ExtensionServiceWorker[] = []; - #pageToDevToolsPage = new Map(); + #mcpPages = new Map(); #selectedPage?: Page; - #textSnapshot: TextSnapshot | null = null; #networkCollector: NetworkCollector; #consoleCollector: ConsoleCollector; #devtoolsUniverseManager: UniverseManager; @@ -149,10 +127,10 @@ export class McpContext implements Context { #isRunningTrace = false; #screenRecorderData: {recorder: ScreenRecorder; filePath: string} | null = null; - #emulationSettingsMap = new WeakMap(); - #dialog?: Dialog; + #focusedPagePerContext = new Map(); + + #requestPage?: Page; - #pageIdMap = new WeakMap(); #nextPageId = 1; #extensionServiceWorkerMap = new WeakMap(); @@ -164,8 +142,6 @@ export class McpContext implements Context { #locatorClass: typeof Locator; #options: McpContextOptions; - #uniqueBackendNodeIdToMcpId = new Map(); - private constructor( browser: Browser, logger: Debugger, @@ -207,6 +183,10 @@ export class McpContext implements Context { this.#networkCollector.dispose(); this.#consoleCollector.dispose(); this.#devtoolsUniverseManager.dispose(); + for (const mcpPage of this.#mcpPages.values()) { + mcpPage.dispose(); + } + this.#mcpPages.clear(); // Isolated contexts are intentionally not closed here. // Either the entire browser will be closed or we disconnect // without destroying browser state. @@ -225,8 +205,19 @@ export class McpContext implements Context { return context; } + // TODO: Refactor away mutable request state (e.g. per-request facade, + // per-request context object, or another approach). Once resolved, the + // global toolMutex could become per-BrowserContext for parallel execution. + setRequestPage(page?: Page): void { + this.#requestPage = page; + } + + #resolveTargetPage(): Page { + return this.#requestPage ?? this.getSelectedPage(); + } + resolveCdpRequestId(cdpRequestId: string): number | undefined { - const selectedPage = this.getSelectedPage(); + const selectedPage = this.#resolveTargetPage(); if (!cdpRequestId) { this.logger('no network request'); return; @@ -242,43 +233,51 @@ export class McpContext implements Context { return this.#networkCollector.getIdForResource(request); } - resolveCdpElementId(cdpBackendNodeId: number): string | undefined { + resolveCdpElementId( + cdpBackendNodeId: number, + page?: Page, + ): string | undefined { if (!cdpBackendNodeId) { this.logger('no cdpBackendNodeId'); return; } - if (this.#textSnapshot === null) { + const snapshots = page + ? [this.#mcpPages.get(page)?.textSnapshot].filter(Boolean) + : [...this.#mcpPages.values()].map(mp => mp.textSnapshot).filter(Boolean); + if (!snapshots.length) { this.logger('no text snapshot'); return; } // TODO: index by backendNodeId instead. - const queue = [this.#textSnapshot.root]; - while (queue.length) { - const current = queue.pop()!; - if (current.backendNodeId === cdpBackendNodeId) { - return current.id; - } - for (const child of current.children) { - queue.push(child); + for (const snapshot of snapshots) { + const queue = [snapshot!.root]; + while (queue.length) { + const current = queue.pop()!; + if (current.backendNodeId === cdpBackendNodeId) { + return current.id; + } + for (const child of current.children) { + queue.push(child); + } } } return; } getNetworkRequests(includePreservedRequests?: boolean): HTTPRequest[] { - const page = this.getSelectedPage(); + const page = this.#resolveTargetPage(); return this.#networkCollector.getData(page, includePreservedRequests); } getConsoleData( includePreservedMessages?: boolean, ): Array { - const page = this.getSelectedPage(); + const page = this.#resolveTargetPage(); return this.#consoleCollector.getData(page, includePreservedMessages); } getDevToolsUniverse(): TargetUniverse | null { - return this.#devtoolsUniverseManager.get(this.getSelectedPage()); + return this.#devtoolsUniverseManager.get(this.#resolveTargetPage()); } getConsoleMessageStableId( @@ -290,7 +289,7 @@ export class McpContext implements Context { getConsoleMessageById( id: number, ): ConsoleMessage | Error | DevTools.AggregatedIssue | UncaughtError { - return this.#consoleCollector.getById(this.getSelectedPage(), id); + return this.#consoleCollector.getById(this.#resolveTargetPage(), id); } async newPage( @@ -305,7 +304,6 @@ export class McpContext implements Context { this.#isolatedContexts.set(isolatedContextName, ctx); } page = await ctx.newPage(); - this.#pageToIsolatedContextName.set(page, isolatedContextName); } else { page = await this.browser.newPage({background}); } @@ -320,25 +318,36 @@ export class McpContext implements Context { throw new Error(CLOSE_PAGE_ERROR); } const page = this.getPageById(pageId); + const mcpPage = this.#mcpPages.get(page); + if (mcpPage) { + mcpPage.dispose(); + this.#mcpPages.delete(page); + } + const ctx = page.browserContext(); + if (this.#focusedPagePerContext.get(ctx) === page) { + this.#focusedPagePerContext.delete(ctx); + } await page.close({runBeforeUnload: false}); - this.#pageToIsolatedContextName.delete(page); } getNetworkRequestById(reqid: number): HTTPRequest { - return this.#networkCollector.getById(this.getSelectedPage(), reqid); - } - - async emulate(options: { - networkConditions?: string | null; - cpuThrottlingRate?: number | null; - geolocation?: GeolocationOptions | null; - userAgent?: string | null; - colorScheme?: 'dark' | 'light' | 'auto' | null; - viewport?: Viewport | null; - }): Promise { - const page = this.getSelectedPage(); - const currentSettings = this.#emulationSettingsMap.get(page) ?? {}; - const newSettings: EmulationSettings = {...currentSettings}; + return this.#networkCollector.getById(this.#resolveTargetPage(), reqid); + } + + async emulate( + options: { + networkConditions?: string | null; + cpuThrottlingRate?: number | null; + geolocation?: GeolocationOptions | null; + userAgent?: string | null; + colorScheme?: 'dark' | 'light' | 'auto' | null; + viewport?: Viewport | null; + }, + targetPage?: Page, + ): Promise { + const page = targetPage ?? this.getSelectedPage(); + const mcpPage = this.#getMcpPage(page); + const newSettings: EmulationSettings = {...mcpPage.emulationSettings}; let timeoutsNeedUpdate = false; if (options.networkConditions !== undefined) { @@ -429,11 +438,9 @@ export class McpContext implements Context { } } - if (Object.keys(newSettings).length) { - this.#emulationSettingsMap.set(page, newSettings); - } else { - this.#emulationSettingsMap.delete(page); - } + mcpPage.emulationSettings = Object.keys(newSettings).length + ? newSettings + : {}; if (timeoutsNeedUpdate) { this.#updateSelectedPageTimeouts(); @@ -441,33 +448,27 @@ export class McpContext implements Context { } getNetworkConditions(): string | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.networkConditions ?? null; + return this.#getMcpPage(this.#resolveTargetPage()).networkConditions; } getCpuThrottlingRate(): number { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.cpuThrottlingRate ?? 1; + return this.#getMcpPage(this.#resolveTargetPage()).cpuThrottlingRate; } getGeolocation(): GeolocationOptions | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.geolocation ?? null; + return this.#getMcpPage(this.#resolveTargetPage()).geolocation; } getViewport(): Viewport | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.viewport ?? null; + return this.#getMcpPage(this.#resolveTargetPage()).viewport; } getUserAgent(): string | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.userAgent ?? null; + return this.#getMcpPage(this.#resolveTargetPage()).userAgent; } getColorScheme(): 'dark' | 'light' | null { - const page = this.getSelectedPage(); - return this.#emulationSettingsMap.get(page)?.colorScheme ?? null; + return this.#getMcpPage(this.#resolveTargetPage()).colorScheme; } setIsRunningPerformanceTrace(x: boolean): void { @@ -492,12 +493,19 @@ export class McpContext implements Context { return this.#options.performanceCrux; } - getDialog(): Dialog | undefined { - return this.#dialog; + getDialog(page?: Page): Dialog | undefined { + const targetPage = page ?? this.#requestPage ?? this.#selectedPage; + if (!targetPage) { + return undefined; + } + return this.#mcpPages.get(targetPage)?.dialog; } - clearDialog(): void { - this.#dialog = undefined; + clearDialog(page?: Page): void { + const targetPage = page ?? this.#selectedPage; + if (targetPage) { + this.#mcpPages.get(targetPage)?.clearDialog(); + } } getSelectedPage(): Page { @@ -513,8 +521,15 @@ export class McpContext implements Context { return page; } + resolvePageById(pageId?: number): Page { + if (pageId === undefined) { + return this.getSelectedPage(); + } + return this.getPageById(pageId); + } + getPageById(pageId: number): Page { - const page = this.#pages.find(p => this.#pageIdMap.get(p) === pageId); + const page = this.#pages.find(p => this.#mcpPages.get(p)?.id === pageId); if (!page) { throw new Error('No page found'); } @@ -522,27 +537,48 @@ export class McpContext implements Context { } getPageId(page: Page): number | undefined { - return this.#pageIdMap.get(page); + return this.#mcpPages.get(page)?.id; } - #dialogHandler = (dialog: Dialog): void => { - this.#dialog = dialog; - }; + #getMcpPage(page: Page): McpPage { + const mcpPage = this.#mcpPages.get(page); + if (!mcpPage) { + throw new Error('No McpPage found for the given page.'); + } + return mcpPage; + } + + #getSelectedMcpPage(): McpPage { + return this.#getMcpPage(this.getSelectedPage()); + } isPageSelected(page: Page): boolean { return this.#selectedPage === page; } + assertPageIsFocused(page: Page): void { + const ctx = page.browserContext(); + const focused = this.#focusedPagePerContext.get(ctx); + if (focused && focused !== page) { + const targetId = this.#mcpPages.get(page)?.id ?? '?'; + const focusedId = this.#mcpPages.get(focused)?.id ?? '?'; + throw new Error( + `Page ${targetId} is not the active page in its browser context (page ${focusedId} is). ` + + `Call select_page with pageId ${targetId} first.`, + ); + } + } + selectPage(newPage: Page): void { - const oldPage = this.#selectedPage; - if (oldPage) { - oldPage.off('dialog', this.#dialogHandler); - void oldPage.emulateFocusedPage(false).catch(error => { + const ctx = newPage.browserContext(); + const oldFocused = this.#focusedPagePerContext.get(ctx); + if (oldFocused && oldFocused !== newPage && !oldFocused.isClosed()) { + void oldFocused.emulateFocusedPage(false).catch(error => { this.logger('Error turning off focused page emulation', error); }); } + this.#focusedPagePerContext.set(ctx, newPage); this.#selectedPage = newPage; - newPage.on('dialog', this.#dialogHandler); this.#updateSelectedPageTimeouts(); void newPage.emulateFocusedPage(true).catch(error => { this.logger('Error turning on focused page emulation', error); @@ -565,36 +601,78 @@ export class McpContext implements Context { } getNavigationTimeout() { - const page = this.getSelectedPage(); + const page = this.#resolveTargetPage(); return page.getDefaultNavigationTimeout(); } + // Linear scan over per-page snapshots. The page count is small (typically + // 2-10) so a reverse index isn't worthwhile given the uid-reuse lifecycle + // complexity it would introduce. getAXNodeByUid(uid: string) { - return this.#textSnapshot?.idToNode.get(uid); - } + for (const mcpPage of this.#mcpPages.values()) { + const node = mcpPage.textSnapshot?.idToNode.get(uid); + if (node) { + return node; + } + } + return undefined; + } + + async getElementByUid( + uid: string, + page?: Page, + ): Promise> { + if (page) { + // Scoped search: only look in the target page's snapshot. + const mcpPage = this.#mcpPages.get(page); + if (!mcpPage?.textSnapshot) { + throw new Error( + `No snapshot found for page ${mcpPage?.id ?? '?'}. Use ${takeSnapshot.name} to capture one.`, + ); + } + const node = mcpPage.textSnapshot.idToNode.get(uid); + if (!node) { + throw new Error( + `Element uid "${uid}" not found on page ${mcpPage.id}.`, + ); + } + return this.#resolveElementHandle(node, uid); + } - async getElementByUid(uid: string): Promise> { - if (!this.#textSnapshot?.idToNode.size) { + // Cross-page search with context-focus validation. + let anySnapshot = false; + for (const [searchPage, mcpPage] of this.#mcpPages.entries()) { + if (!mcpPage.textSnapshot) { + continue; + } + anySnapshot = true; + const node = mcpPage.textSnapshot.idToNode.get(uid); + if (node) { + const ctx = searchPage.browserContext(); + const contextSelectedPage = this.#focusedPagePerContext.get(ctx); + if (contextSelectedPage !== searchPage) { + const targetId = mcpPage.id; + const selectedId = contextSelectedPage + ? this.#mcpPages.get(contextSelectedPage)?.id + : this.#getSelectedMcpPage().id; + throw new Error( + `Element uid "${uid}" belongs to page ${targetId}, but page ${selectedId} is currently selected. ` + + `Call select_page with pageId ${targetId} first.`, + ); + } + // Align global #selectedPage for waitForEventsAfterAction etc. + if (this.#selectedPage !== searchPage) { + this.#selectedPage = searchPage; + } + return this.#resolveElementHandle(node, uid); + } + } + if (!anySnapshot) { throw new Error( `No snapshot found. Use ${takeSnapshot.name} to capture one.`, ); } - const node = this.#textSnapshot?.idToNode.get(uid); - if (!node) { - throw new Error('No such element found in the snapshot.'); - } - const message = `Element with uid ${uid} no longer exists on the page.`; - try { - const handle = await node.elementHandle(); - if (!handle) { - throw new Error(message); - } - return handle; - } catch (error) { - throw new Error(message, { - cause: error, - }); - } + throw new Error('No such element found in any snapshot.'); } /** @@ -632,12 +710,48 @@ export class McpContext implements Context { return this.#extensionServiceWorkers; } + async #resolveElementHandle( + node: TextSnapshotNode, + uid: string, + ): Promise> { + const message = `Element with uid ${uid} no longer exists on the page.`; + try { + const handle = await node.elementHandle(); + if (!handle) { + throw new Error(message); + } + return handle; + } catch (error) { + throw new Error(message, { + cause: error, + }); + } + } + async createPagesSnapshot(): Promise { - const allPages = await this.#getAllPages(); + const {pages: allPages, isolatedContextNames} = await this.#getAllPages(); for (const page of allPages) { - if (!this.#pageIdMap.has(page)) { - this.#pageIdMap.set(page, this.#nextPageId++); + let mcpPage = this.#mcpPages.get(page); + if (!mcpPage) { + mcpPage = new McpPage(page, this.#nextPageId++); + this.#mcpPages.set(page, mcpPage); + } + mcpPage.isolatedContextName = isolatedContextNames.get(page); + } + + // Prune orphaned #mcpPages entries (pages that no longer exist). + const currentPages = new Set(allPages); + for (const [page, mcpPage] of this.#mcpPages) { + if (!currentPages.has(page)) { + mcpPage.dispose(); + this.#mcpPages.delete(page); + } + } + // Prune stale #focusedPagePerContext entries. + for (const [ctx, page] of this.#focusedPagePerContext) { + if (!currentPages.has(page)) { + this.#focusedPagePerContext.delete(ctx); } } @@ -660,7 +774,10 @@ export class McpContext implements Context { return this.#pages; } - async #getAllPages(): Promise { + async #getAllPages(): Promise<{ + pages: Page[]; + isolatedContextNames: Map; + }> { const defaultCtx = this.browser.defaultBrowserContext(); const allPages = await this.browser.pages( this.#options.experimentalIncludeAllPages, @@ -683,22 +800,26 @@ export class McpContext implements Context { } } - // Use page.browserContext() to determine each page's context membership. + // Map each page to its isolated context name (if any). + const isolatedContextNames = new Map(); for (const page of allPages) { const ctx = page.browserContext(); const name = contextToName.get(ctx); if (name) { - this.#pageToIsolatedContextName.set(page, name); + isolatedContextNames.set(page, name); } } - return allPages; + return {pages: allPages, isolatedContextNames}; } async detectOpenDevToolsWindows() { this.logger('Detecting open DevTools windows'); - const pages = await this.#getAllPages(); - this.#pageToDevToolsPage = new Map(); + const {pages} = await this.#getAllPages(); + // Clear all devToolsPage references before re-detecting. + for (const mcpPage of this.#mcpPages.values()) { + mcpPage.devToolsPage = undefined; + } for (const devToolsPage of pages) { if (devToolsPage.url().startsWith('devtools://')) { try { @@ -715,7 +836,10 @@ export class McpContext implements Context { // TODO: lookup without a loop. for (const page of this.#pages) { if (urlsEqual(page.url(), urlLike)) { - this.#pageToDevToolsPage.set(page, devToolsPage); + const mcpPage = this.#mcpPages.get(page); + if (mcpPage) { + mcpPage.devToolsPage = devToolsPage; + } } } } catch (error) { @@ -740,17 +864,17 @@ export class McpContext implements Context { } getIsolatedContextName(page: Page): string | undefined { - return this.#pageToIsolatedContextName.get(page); + return this.#mcpPages.get(page)?.isolatedContextName; } getDevToolsPage(page: Page): Page | undefined { - return this.#pageToDevToolsPage.get(page); + return this.#mcpPages.get(page)?.devToolsPage; } async getDevToolsData(): Promise { try { this.logger('Getting DevTools UI data'); - const selectedPage = this.getSelectedPage(); + const selectedPage = this.#resolveTargetPage(); const devtoolsPage = this.getDevToolsPage(selectedPage); if (!devtoolsPage) { this.logger('No DevTools page detected'); @@ -787,8 +911,10 @@ export class McpContext implements Context { async createTextSnapshot( verbose = false, devtoolsData: DevToolsData | undefined = undefined, + targetPage?: Page, ): Promise { - const page = this.getSelectedPage(); + const page = targetPage ?? this.getSelectedPage(); + const mcpPage = this.#getMcpPage(page); const rootNode = await page.accessibility.snapshot({ includeIframes: true, interestingOnly: !verbose, @@ -797,6 +923,8 @@ export class McpContext implements Context { return; } + const {uniqueBackendNodeIdToMcpId} = mcpPage; + const snapshotId = this.#nextSnapshotId++; // Iterate through the whole accessibility node tree and assign node ids that // will be used for the tree serialization and mapping ids back to nodes. @@ -807,13 +935,13 @@ export class McpContext implements Context { let id = ''; // @ts-expect-error untyped loaderId & backendNodeId. const uniqueBackendId = `${node.loaderId}_${node.backendNodeId}`; - if (this.#uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) { + if (uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) { // Re-use MCP exposed ID if the uniqueId is the same. - id = this.#uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!; + id = uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!; } else { // Only generate a new ID if we have not seen the node before. id = `${snapshotId}_${idCounter++}`; - this.#uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id); + uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id); } seenUniqueIds.add(uniqueBackendId); @@ -839,31 +967,37 @@ export class McpContext implements Context { }; const rootNodeWithId = assignIds(rootNode); - this.#textSnapshot = { + const snapshot: TextSnapshot = { root: rootNodeWithId, snapshotId: String(snapshotId), idToNode, hasSelectedElement: false, verbose, }; + mcpPage.textSnapshot = snapshot; const data = devtoolsData ?? (await this.getDevToolsData()); if (data?.cdpBackendNodeId) { - this.#textSnapshot.hasSelectedElement = true; - this.#textSnapshot.selectedElementUid = this.resolveCdpElementId( + snapshot.hasSelectedElement = true; + snapshot.selectedElementUid = this.resolveCdpElementId( data?.cdpBackendNodeId, + page, ); } // Clean up unique IDs that we did not see anymore. - for (const key of this.#uniqueBackendNodeIdToMcpId.keys()) { + for (const key of uniqueBackendNodeIdToMcpId.keys()) { if (!seenUniqueIds.has(key)) { - this.#uniqueBackendNodeIdToMcpId.delete(key); + uniqueBackendNodeIdToMcpId.delete(key); } } } - getTextSnapshot(): TextSnapshot | null { - return this.#textSnapshot; + getTextSnapshot(targetPage?: Page): TextSnapshot | null { + const page = targetPage ?? this.#selectedPage; + if (!page) { + return null; + } + return this.#mcpPages.get(page)?.textSnapshot ?? null; } async saveTemporaryFile( @@ -939,8 +1073,12 @@ export class McpContext implements Context { return this.#networkCollector.getIdForResource(request); } - waitForTextOnPage(text: string[], timeout?: number): Promise { - const page = this.getSelectedPage(); + waitForTextOnPage( + text: string[], + timeout?: number, + targetPage?: Page, + ): Promise { + const page = targetPage ?? this.getSelectedPage(); const frames = page.frames(); let locator = this.#locatorClass.race( @@ -973,7 +1111,7 @@ export class McpContext implements Context { }, } as ListenerMap; }); - const pages = await this.#getAllPages(); + const {pages} = await this.#getAllPages(); await this.#networkCollector.init(pages); } diff --git a/src/McpPage.ts b/src/McpPage.ts new file mode 100644 index 000000000..51a30448f --- /dev/null +++ b/src/McpPage.ts @@ -0,0 +1,85 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type {Dialog, Page, Viewport} from './third_party/index.js'; +import type { + EmulationSettings, + GeolocationOptions, + TextSnapshot, +} from './types.js'; + +/** + * Per-page state wrapper. Consolidates dialog, snapshot, emulation, + * and metadata that were previously scattered across Maps in McpContext. + * + * Internal class consumed only by McpContext. Fields are public for direct + * read/write access. The dialog field is private because it requires an + * event listener lifecycle managed by the constructor/dispose pair. + */ +export class McpPage { + readonly page: Page; + readonly id: number; + + // Snapshot + textSnapshot: TextSnapshot | null = null; + uniqueBackendNodeIdToMcpId = new Map(); + + // Emulation + emulationSettings: EmulationSettings = {}; + + // Metadata + isolatedContextName?: string; + devToolsPage?: Page; + + // Dialog + #dialog?: Dialog; + #dialogHandler: (dialog: Dialog) => void; + + constructor(page: Page, id: number) { + this.page = page; + this.id = id; + this.#dialogHandler = (dialog: Dialog): void => { + this.#dialog = dialog; + }; + page.on('dialog', this.#dialogHandler); + } + + get dialog(): Dialog | undefined { + return this.#dialog; + } + + clearDialog(): void { + this.#dialog = undefined; + } + + get networkConditions(): string | null { + return this.emulationSettings.networkConditions ?? null; + } + + get cpuThrottlingRate(): number { + return this.emulationSettings.cpuThrottlingRate ?? 1; + } + + get geolocation(): GeolocationOptions | null { + return this.emulationSettings.geolocation ?? null; + } + + get viewport(): Viewport | null { + return this.emulationSettings.viewport ?? null; + } + + get userAgent(): string | null { + return this.emulationSettings.userAgent ?? null; + } + + get colorScheme(): 'dark' | 'light' | null { + return this.emulationSettings.colorScheme ?? null; + } + + dispose(): void { + this.page.off('dialog', this.#dialogHandler); + } +} diff --git a/src/McpResponse.ts b/src/McpResponse.ts index 243cd81cf..33f631d1e 100644 --- a/src/McpResponse.ts +++ b/src/McpResponse.ts @@ -253,8 +253,9 @@ export class McpResponse implements Response { await context.createTextSnapshot( this.#snapshotParams.verbose, this.#devToolsData, + this.#snapshotParams.page, ); - const textSnapshot = context.getTextSnapshot(); + const textSnapshot = context.getTextSnapshot(this.#snapshotParams.page); if (textSnapshot) { const formatter = new SnapshotFormatter(textSnapshot); if (this.#snapshotParams.filePath) { diff --git a/src/cli.ts b/src/cli.ts index c585ea4c4..1b3c585aa 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -147,6 +147,12 @@ export const cliOptions = { type: 'boolean', description: `If enabled, ignores errors relative to self-signed and expired certificates. Use with caution.`, }, + experimentalPageIdRouting: { + type: 'boolean', + describe: + 'Whether to expose pageId on page-scoped tools and route requests by page ID.', + hidden: true, + }, experimentalDevtools: { type: 'boolean', describe: 'Whether to enable automation over DevTools targets', diff --git a/src/server.ts b/src/server.ts index 42d491800..7f984337a 100644 --- a/src/server.ts +++ b/src/server.ts @@ -24,6 +24,7 @@ import { } from './third_party/index.js'; import {ToolCategory} from './tools/categories.js'; import type {ToolDefinition} from './tools/ToolDefinition.js'; +import {pageIdSchema} from './tools/ToolDefinition.js'; import {createTools} from './tools/tools.js'; import {VERSION} from './version.js'; @@ -149,11 +150,16 @@ export async function createMcpServer( ) { return; } + const schema = + tool.annotations.pageScoped && serverArgs.experimentalPageIdRouting + ? {...tool.schema, ...pageIdSchema} + : tool.schema; + server.registerTool( tool.name, { description: tool.description, - inputSchema: tool.schema, + inputSchema: schema, annotations: tool.annotations, }, async (params): Promise => { @@ -168,31 +174,42 @@ export async function createMcpServer( const response = serverArgs.slim ? new SlimMcpResponse(serverArgs) : new McpResponse(serverArgs); - - await tool.handler( - { - params, - }, - response, - context, - ); - const {content, structuredContent} = await response.handle( - tool.name, - context, - ); - const result: CallToolResult & { - structuredContent?: Record; - } = { - content, - }; - success = true; - if (serverArgs.experimentalStructuredContent) { - result.structuredContent = structuredContent as Record< - string, - unknown - >; + const page = + tool.annotations.pageScoped && serverArgs.experimentalPageIdRouting + ? context.resolvePageById(params.pageId as number | undefined) + : undefined; + if (page) { + context.setRequestPage(page); + } + try { + await tool.handler( + { + params, + page, + }, + response, + context, + ); + const {content, structuredContent} = await response.handle( + tool.name, + context, + ); + const result: CallToolResult & { + structuredContent?: Record; + } = { + content, + }; + success = true; + if (serverArgs.experimentalStructuredContent) { + result.structuredContent = structuredContent as Record< + string, + unknown + >; + } + return result; + } finally { + context.setRequestPage(undefined); } - return result; } catch (err) { logger(`${tool.name} error:`, err, err?.stack); let errorText = err && 'message' in err ? err.message : String(err); diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index b2bea87a1..8b16e3b28 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -5,7 +5,6 @@ */ import type {ParsedArguments} from '../cli.js'; -import type {TextSnapshotNode, GeolocationOptions} from '../McpContext.js'; import {zod} from '../third_party/index.js'; import type { Dialog, @@ -15,6 +14,7 @@ import type { Viewport, } from '../third_party/index.js'; import type {InsightName, TraceResult} from '../trace-processing/parse.js'; +import type {TextSnapshotNode, GeolocationOptions} from '../types.js'; import type {InstalledExtension} from '../utils/ExtensionRegistry.js'; import type {PaginationOptions} from '../utils/types.js'; @@ -33,6 +33,12 @@ export interface ToolDefinition< */ readOnlyHint: boolean; conditions?: string[]; + /** + * If true, the tool operates on a specific page. + * The `pageId` schema field is auto-injected and the resolved + * page is provided via `request.page`. + */ + pageScoped?: boolean; }; schema: Schema; handler: ( @@ -44,6 +50,8 @@ export interface ToolDefinition< export interface Request { params: zod.objectOutputType; + /** Populated centrally for tools with `pageScoped: true`. */ + page?: Page; } export interface ImageContentData { @@ -54,6 +62,7 @@ export interface ImageContentData { export interface SnapshotParams { verbose?: boolean; filePath?: string; + page?: Page; } export interface DevToolsData { @@ -107,23 +116,28 @@ export type Context = Readonly<{ isCruxEnabled(): boolean; recordedTraces(): TraceResult[]; storeTraceRecording(result: TraceResult): void; + // TODO: Remove once slim tools are converted to pageScoped: true. getSelectedPage(): Page; - getDialog(): Dialog | undefined; - clearDialog(): void; + getDialog(page?: Page): Dialog | undefined; + clearDialog(page?: Page): void; getPageById(pageId: number): Page; newPage(background?: boolean, isolatedContextName?: string): Promise; closePage(pageId: number): Promise; selectPage(page: Page): void; - getElementByUid(uid: string): Promise>; + assertPageIsFocused(page: Page): void; + getElementByUid(uid: string, page?: Page): Promise>; getAXNodeByUid(uid: string): TextSnapshotNode | undefined; - emulate(options: { - networkConditions?: string | null; - cpuThrottlingRate?: number | null; - geolocation?: GeolocationOptions | null; - userAgent?: string | null; - colorScheme?: 'dark' | 'light' | 'auto' | null; - viewport?: Viewport | null; - }): Promise; + emulate( + options: { + networkConditions?: string | null; + cpuThrottlingRate?: number | null; + geolocation?: GeolocationOptions | null; + userAgent?: string | null; + colorScheme?: 'dark' | 'light' | 'auto' | null; + viewport?: Viewport | null; + }, + targetPage?: Page, + ): Promise; saveTemporaryFile( data: Uint8Array, mimeType: 'image/png' | 'image/jpeg' | 'image/webp', @@ -136,16 +150,16 @@ export type Context = Readonly<{ action: () => Promise, options?: {timeout?: number}, ): Promise; - waitForTextOnPage(text: string[], timeout?: number): Promise; + waitForTextOnPage( + text: string[], + timeout?: number, + page?: Page, + ): Promise; getDevToolsData(): Promise; /** * Returns a reqid for a cdpRequestId. */ resolveCdpRequestId(cdpRequestId: string): number | undefined; - /** - * Returns a reqid for a cdpRequestId. - */ - resolveCdpElementId(cdpBackendNodeId: number): string | undefined; getScreenRecorder(): {recorder: ScreenRecorder; filePath: string} | null; setScreenRecorder( data: {recorder: ScreenRecorder; filePath: string} | null, @@ -175,12 +189,40 @@ export function defineTool< | ToolDefinition | ((args?: Args) => ToolDefinition), ) { + if (typeof definition === 'function') { + const factory = definition; + return (args: Args) => { + const tool = factory(args); + wrapPageScopedHandler(tool); + return tool; + }; + } + wrapPageScopedHandler(definition); return definition; } +function wrapPageScopedHandler( + definition: ToolDefinition, +) { + if (definition.annotations.pageScoped) { + const originalHandler = definition.handler; + definition.handler = async (request, response, context) => { + // In production, main.ts resolves request.page centrally before calling + // the handler. This fallback exists for tests that invoke handlers + // directly without going through main.ts. + request.page ??= context.getSelectedPage(); + return originalHandler(request, response, context); + }; + } +} + export const CLOSE_PAGE_ERROR = 'The last open page cannot be closed. It is fine to keep it open.'; +export const pageIdSchema = { + pageId: zod.number().optional().describe('Targets a specific page by ID.'), +}; + export const timeoutSchema = { timeout: zod .number() diff --git a/src/tools/console.ts b/src/tools/console.ts index ace8f6282..1f6aa3771 100644 --- a/src/tools/console.ts +++ b/src/tools/console.ts @@ -44,6 +44,7 @@ export const listConsoleMessages = defineTool({ annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: true, + pageScoped: true, }, schema: { pageSize: zod @@ -92,6 +93,7 @@ export const getConsoleMessage = defineTool({ annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: true, + pageScoped: true, }, schema: { msgid: zod diff --git a/src/tools/emulation.ts b/src/tools/emulation.ts index ea0538fd1..b05221f07 100644 --- a/src/tools/emulation.ts +++ b/src/tools/emulation.ts @@ -22,6 +22,7 @@ export const emulate = defineTool({ annotations: { category: ToolCategory.EMULATION, readOnlyHint: false, + pageScoped: true, }, schema: { networkConditions: zod @@ -104,6 +105,7 @@ export const emulate = defineTool({ ), }, handler: async (request, _response, context) => { - await context.emulate(request.params); + const page = request.page!; + await context.emulate(request.params, page); }, }); diff --git a/src/tools/input.ts b/src/tools/input.ts index 2c338a520..d88b707c4 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -7,7 +7,7 @@ import {logger} from '../logger.js'; import type {McpContext, TextSnapshotNode} from '../McpContext.js'; import {zod} from '../third_party/index.js'; -import type {ElementHandle, KeyInput} from '../third_party/index.js'; +import type {ElementHandle, KeyInput, Page} from '../third_party/index.js'; import {parseKey} from '../utils/keyboard.js'; import {ToolCategory} from './categories.js'; @@ -88,6 +88,7 @@ export const clickAt = defineTool({ category: ToolCategory.INPUT, readOnlyHint: false, conditions: ['computerVision'], + pageScoped: true, }, schema: { x: zod.number().describe('The x coordinate'), @@ -96,7 +97,8 @@ export const clickAt = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.getSelectedPage(); + const page = request.page!; + context.assertPageIsFocused(page); await context.waitForEventsAfterAction(async () => { await page.mouse.click(request.params.x, request.params.y, { clickCount: request.params.dblClick ? 2 : 1, @@ -108,7 +110,7 @@ export const clickAt = defineTool({ : `Successfully clicked at the coordinates`, ); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({page}); } }, }); @@ -192,8 +194,9 @@ async function fillFormElement( uid: string, value: string, context: McpContext, + page: Page, ) { - const handle = await context.getElementByUid(uid); + const handle = await context.getElementByUid(uid, page); try { const aXNode = context.getAXNodeByUid(uid); // We assume that combobox needs to be handled as select if it has @@ -204,8 +207,7 @@ async function fillFormElement( // Increase timeout for longer input values. const timeoutPerChar = 10; // ms const fillTimeout = - context.getSelectedPage().getDefaultTimeout() + - value.length * timeoutPerChar; + page.getDefaultTimeout() + value.length * timeoutPerChar; await handle.asLocator().setTimeout(fillTimeout).fill(value); } } catch (error) { @@ -221,6 +223,7 @@ export const fill = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { uid: zod @@ -232,16 +235,18 @@ export const fill = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { + const page = request.page!; await context.waitForEventsAfterAction(async () => { await fillFormElement( request.params.uid, request.params.value, context as McpContext, + page, ); }); response.appendResponseLine(`Successfully filled out the element`); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({page}); } }, }); @@ -252,14 +257,16 @@ export const typeText = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { text: zod.string().describe('The text to type'), submitKey: submitKeySchema, }, handler: async (request, response, context) => { + const page = request.page!; + context.assertPageIsFocused(page); await context.waitForEventsAfterAction(async () => { - const page = context.getSelectedPage(); await page.keyboard.type(request.params.text); if (request.params.submitKey) { await page.keyboard.press(request.params.submitKey as KeyInput); @@ -309,6 +316,7 @@ export const fillForm = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { elements: zod @@ -322,18 +330,20 @@ export const fillForm = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { + const page = request.page!; for (const element of request.params.elements) { await context.waitForEventsAfterAction(async () => { await fillFormElement( element.uid, element.value, context as McpContext, + page, ); }); } response.appendResponseLine(`Successfully filled out the form`); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({page}); } }, }); @@ -344,6 +354,7 @@ export const uploadFile = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { uid: zod @@ -358,6 +369,7 @@ export const uploadFile = defineTool({ const {uid, filePath} = request.params; const handle = (await context.getElementByUid( uid, + request.page, )) as ElementHandle; try { try { @@ -367,9 +379,8 @@ export const uploadFile = defineTool({ // a type=file element. In this case, we want to default to // Page.waitForFileChooser() and upload the file this way. try { - const page = context.getSelectedPage(); const [fileChooser] = await Promise.all([ - page.waitForFileChooser({timeout: 3000}), + request.page!.waitForFileChooser({timeout: 3000}), handle.asLocator().click(), ]); await fileChooser.accept([filePath]); @@ -380,7 +391,7 @@ export const uploadFile = defineTool({ } } if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({page: request.page!}); } response.appendResponseLine(`File uploaded from ${filePath}.`); } finally { @@ -395,6 +406,7 @@ export const pressKey = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { key: zod @@ -405,7 +417,8 @@ export const pressKey = defineTool({ includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { - const page = context.getSelectedPage(); + const page = request.page!; + context.assertPageIsFocused(page); const tokens = parseKey(request.params.key); const [key, ...modifiers] = tokens; @@ -423,7 +436,7 @@ export const pressKey = defineTool({ `Successfully pressed key: ${request.params.key}`, ); if (request.params.includeSnapshot) { - response.includeSnapshot(); + response.includeSnapshot({page}); } }, }); diff --git a/src/tools/memory.ts b/src/tools/memory.ts index d75122bad..dc9252a88 100644 --- a/src/tools/memory.ts +++ b/src/tools/memory.ts @@ -15,14 +15,15 @@ export const takeMemorySnapshot = defineTool({ annotations: { category: ToolCategory.PERFORMANCE, readOnlyHint: true, + pageScoped: true, }, schema: { filePath: zod .string() .describe('A path to a .heapsnapshot file to save the heapsnapshot to.'), }, - handler: async (request, response, context) => { - const page = context.getSelectedPage(); + handler: async (request, response, _context) => { + const page = request.page!; await page.captureHeapSnapshot({ path: request.params.filePath, diff --git a/src/tools/network.ts b/src/tools/network.ts index 9a1d9da7c..10e64cd93 100644 --- a/src/tools/network.ts +++ b/src/tools/network.ts @@ -38,6 +38,7 @@ export const listNetworkRequests = defineTool({ annotations: { category: ToolCategory.NETWORK, readOnlyHint: true, + pageScoped: true, }, schema: { pageSize: zod @@ -92,6 +93,7 @@ export const getNetworkRequest = defineTool({ annotations: { category: ToolCategory.NETWORK, readOnlyHint: false, + pageScoped: true, }, schema: { reqid: zod diff --git a/src/tools/pages.ts b/src/tools/pages.ts index b3afe6192..780f72f36 100644 --- a/src/tools/pages.ts +++ b/src/tools/pages.ts @@ -130,6 +130,7 @@ export const navigatePage = defineTool({ annotations: { category: ToolCategory.NAVIGATION, readOnlyHint: false, + pageScoped: true, }, schema: { type: zod @@ -158,7 +159,7 @@ export const navigatePage = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { - const page = context.getSelectedPage(); + const page = request.page!; const options = { timeout: request.params.timeout, }; @@ -182,7 +183,7 @@ export const navigatePage = defineTool({ void dialog.dismiss(); } // We are not going to report the dialog like regular dialogs. - context.clearDialog(); + context.clearDialog(page); } }; @@ -279,13 +280,14 @@ export const resizePage = defineTool({ annotations: { category: ToolCategory.EMULATION, readOnlyHint: false, + pageScoped: true, }, schema: { width: zod.number().describe('Page width'), height: zod.number().describe('Page height'), }, - handler: async (request, response, context) => { - const page = context.getSelectedPage(); + handler: async (request, response, _context) => { + const page = request.page!; try { const browser = page.browser(); @@ -318,6 +320,7 @@ export const handleDialog = defineTool({ annotations: { category: ToolCategory.INPUT, readOnlyHint: false, + pageScoped: true, }, schema: { action: zod @@ -329,7 +332,8 @@ export const handleDialog = defineTool({ .describe('Optional prompt text to enter into the dialog.'), }, handler: async (request, response, context) => { - const dialog = context.getDialog(); + const page = request.page!; + const dialog = context.getDialog(page); if (!dialog) { throw new Error('No open dialog found'); } @@ -357,7 +361,7 @@ export const handleDialog = defineTool({ } } - context.clearDialog(); + context.clearDialog(page); response.setIncludePages(true); }, }); diff --git a/src/tools/performance.ts b/src/tools/performance.ts index 393d38f15..dd5f45e03 100644 --- a/src/tools/performance.ts +++ b/src/tools/performance.ts @@ -32,6 +32,7 @@ export const startTrace = defineTool({ annotations: { category: ToolCategory.PERFORMANCE, readOnlyHint: false, + pageScoped: true, }, schema: { reload: zod @@ -55,7 +56,7 @@ export const startTrace = defineTool({ } context.setIsRunningPerformanceTrace(true); - const page = context.getSelectedPage(); + const page = request.page!; const pageUrlForTracing = page.url(); if (request.params.reload) { @@ -119,6 +120,7 @@ export const stopTrace = defineTool({ annotations: { category: ToolCategory.PERFORMANCE, readOnlyHint: false, + pageScoped: true, }, schema: { filePath: filePathSchema, @@ -127,7 +129,7 @@ export const stopTrace = defineTool({ if (!context.isRunningPerformanceTrace()) { return; } - const page = context.getSelectedPage(); + const page = request.page!; await stopTracingAndAppendOutput( page, response, diff --git a/src/tools/screencast.ts b/src/tools/screencast.ts index d24d9b0fd..96626abf3 100644 --- a/src/tools/screencast.ts +++ b/src/tools/screencast.ts @@ -26,6 +26,7 @@ export const startScreencast = defineTool({ annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: false, + pageScoped: true, conditions: ['screencast'], }, schema: { @@ -47,7 +48,7 @@ export const startScreencast = defineTool({ const filePath = request.params.path ?? (await generateTempFilePath()); const resolvedPath = path.resolve(filePath); - const page = context.getSelectedPage(); + const page = request.page!; let recorder: ScreenRecorder; try { diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 4312c02aa..4269f6e6f 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -17,6 +17,7 @@ export const screenshot = defineTool({ category: ToolCategory.DEBUGGING, // Not read-only due to filePath param. readOnlyHint: false, + pageScoped: true, }, schema: { format: zod @@ -57,9 +58,12 @@ export const screenshot = defineTool({ let pageOrHandle: Page | ElementHandle; if (request.params.uid) { - pageOrHandle = await context.getElementByUid(request.params.uid); + pageOrHandle = await context.getElementByUid( + request.params.uid, + request.page, + ); } else { - pageOrHandle = context.getSelectedPage(); + pageOrHandle = request.page!; } const format = request.params.format; diff --git a/src/tools/script.ts b/src/tools/script.ts index f3bc3c3c5..c0e5f5b61 100644 --- a/src/tools/script.ts +++ b/src/tools/script.ts @@ -17,6 +17,7 @@ so returned values have to be JSON-serializable.`, annotations: { category: ToolCategory.DEBUGGING, readOnlyHint: false, + pageScoped: true, }, schema: { function: zod.string().describe( @@ -49,7 +50,7 @@ Example with arguments: \`(el) => { try { const frames = new Set(); for (const el of request.params.args ?? []) { - const handle = await context.getElementByUid(el.uid); + const handle = await context.getElementByUid(el.uid, request.page); frames.add(handle.frame); args.push(handle); } @@ -60,7 +61,7 @@ Example with arguments: \`(el) => { "Elements from different frames can't be evaluated together.", ); } else { - pageOrFrame = [...frames.values()][0] ?? context.getSelectedPage(); + pageOrFrame = [...frames.values()][0] ?? request.page!; } const fn = await pageOrFrame.evaluateHandle( `(${request.params.function})`, diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts index a07bf5825..b8d7a9dfc 100644 --- a/src/tools/snapshot.ts +++ b/src/tools/snapshot.ts @@ -18,6 +18,7 @@ in the DevTools Elements panel (if any).`, category: ToolCategory.DEBUGGING, // Not read-only due to filePath param. readOnlyHint: false, + pageScoped: true, }, schema: { verbose: zod @@ -37,6 +38,7 @@ in the DevTools Elements panel (if any).`, response.includeSnapshot({ verbose: request.params.verbose ?? false, filePath: request.params.filePath, + page: request.page!, }); }, }); @@ -47,6 +49,7 @@ export const waitFor = defineTool({ annotations: { category: ToolCategory.NAVIGATION, readOnlyHint: true, + pageScoped: true, }, schema: { text: zod @@ -58,15 +61,17 @@ export const waitFor = defineTool({ ...timeoutSchema, }, handler: async (request, response, context) => { + const page = request.page!; await context.waitForTextOnPage( request.params.text, request.params.timeout, + page, ); response.appendResponseLine( `Element matching one of ${JSON.stringify(request.params.text)} found.`, ); - response.includeSnapshot(); + response.includeSnapshot({page}); }, }); diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 000000000..69dddd2a9 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,39 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type {SerializedAXNode, Viewport} from './third_party/index.js'; + +export interface TextSnapshotNode extends SerializedAXNode { + id: string; + backendNodeId?: number; + loaderId?: string; + children: TextSnapshotNode[]; +} + +export interface GeolocationOptions { + latitude: number; + longitude: number; +} + +export interface TextSnapshot { + root: TextSnapshotNode; + idToNode: Map; + snapshotId: string; + selectedElementUid?: string; + // It might happen that there is a selected element, but it is not part of the + // snapshot. This flag indicates if there is any selected element. + hasSelectedElement: boolean; + verbose: boolean; +} + +export interface EmulationSettings { + networkConditions?: string | null; + cpuThrottlingRate?: number | null; + geolocation?: GeolocationOptions | null; + userAgent?: string | null; + colorScheme?: 'dark' | 'light' | null; + viewport?: Viewport | null; +} diff --git a/tests/McpContext.test.ts b/tests/McpContext.test.ts index 03c51351b..a0dd00917 100644 --- a/tests/McpContext.test.ts +++ b/tests/McpContext.test.ts @@ -101,6 +101,178 @@ describe('McpContext', () => { }, ); }); + it('resolves uid from a non-selected page snapshot', async () => { + await withMcpContext(async (_response, context) => { + // Page 1: set content and snapshot + const page1 = context.getSelectedPage(); + await page1.setContent(html``); + await context.createTextSnapshot(false, undefined, page1); + + // Capture a uid from page1's snapshot (snapshotId=1, button is node 1) + const page1Uid = '1_1'; + const page1Node = context.getAXNodeByUid(page1Uid); + assert.ok(page1Node, 'uid should resolve from page1 snapshot'); + + // Page 2: new page, set content, snapshot + const page2 = await context.newPage(); + context.selectPage(page2); + await page2.setContent(html``); + await context.createTextSnapshot(false, undefined, page2); + + // Page 2 is now selected. Page 1's uid should still resolve. + const node = context.getAXNodeByUid(page1Uid); + assert.ok(node, 'page1 uid should still resolve after page2 snapshot'); + assert.strictEqual(node?.name, 'Page1 Button'); + + // The element should also be retrievable when the target page is provided. + const element = await context.getElementByUid(page1Uid, page1); + assert.ok(element, 'should get element handle from page1 snapshot uid'); + }); + }); + + describe('getElementByUid context-focus validation', () => { + it('resolves for the focused page in an isolated context', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'agent-a'); + await page.setContent(html``); + await context.createTextSnapshot(false, undefined, page); + + // page is focused for agent-a context; should resolve. + const handle = await context.getElementByUid('1_1'); + void handle.dispose(); + }); + }); + + it('throws for a non-focused page in the same context', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'agent-a'); + await pageA1.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA1); + const a1Uid = '1_1'; // button on pageA1 + + // Open a second page in the same context (becomes focused). + const pageA2 = await context.newPage(false, 'agent-a'); + await pageA2.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA2); + + // pageA2 is now focused for agent-a; clicking pageA1's uid should throw. + await assert.rejects( + () => context.getElementByUid(a1Uid), + (err: Error) => { + assert.ok(err.message.includes('belongs to page')); + assert.ok(err.message.includes('currently selected')); + return true; + }, + ); + }); + }); + + it('resolves after cross-context select_page race', async () => { + await withMcpContext(async (_response, context) => { + // Set up two pages in separate isolated contexts. + const pageA = await context.newPage(false, 'agent-a'); + await pageA.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA); + const uidA = '1_1'; + + const pageB = await context.newPage(false, 'agent-b'); + await pageB.setContent(html``); + await context.createTextSnapshot(false, undefined, pageB); + const uidB = '2_1'; + + // Simulate race: agent-a selects its page, then agent-b overwrites global. + context.selectPage(pageA); + context.selectPage(pageB); + // Global #selectedPage is now pageB. + + // Agent A's uid should still resolve (per-context focus for agent-a is pageA). + const handleA = await context.getElementByUid(uidA); + void handleA.dispose(); + // Agent B's uid should also resolve. + const handleB = await context.getElementByUid(uidB); + void handleB.dispose(); + }); + }); + + it('aligns global selectedPage after resolution', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'agent-a'); + await pageA.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA); + const uidA = '1_1'; + + const pageB = await context.newPage(false, 'agent-b'); + await pageB.setContent(html``); + await context.createTextSnapshot(false, undefined, pageB); + + // Global is on pageB after newPage. + assert.strictEqual(context.getSelectedPage(), pageB); + + // Resolve uid from pageA; should pass and align global. + const handle = await context.getElementByUid(uidA); + void handle.dispose(); + assert.strictEqual(context.getSelectedPage(), pageA); + }); + }); + + it('throws for nonexistent uid', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'agent-a'); + await page.setContent(html``); + await context.createTextSnapshot(false, undefined, page); + + await assert.rejects(() => context.getElementByUid('nonexistent_99'), { + message: 'No such element found in any snapshot.', + }); + }); + }); + + it('resolves for default context page alongside isolated contexts', async () => { + await withMcpContext(async (_response, context) => { + // Default context page (already exists from withMcpContext setup). + const defaultPage = context.getSelectedPage(); + await defaultPage.setContent(html``); + await context.createTextSnapshot(false, undefined, defaultPage); + const defaultUid = '1_1'; + + // Isolated context page. + const isoPage = await context.newPage(false, 'agent-a'); + await isoPage.setContent(html``); + await context.createTextSnapshot(false, undefined, isoPage); + const isoUid = '2_1'; + + // Global is now isoPage. Default context focus is still defaultPage. + // Both should resolve via per-context lookup. + const handleDefault = await context.getElementByUid(defaultUid); + void handleDefault.dispose(); + const handleIso = await context.getElementByUid(isoUid); + void handleIso.dispose(); + }); + }); + + it('scopes search to target page when page is provided', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'agent-a'); + await pageA.setContent(html``); + await context.createTextSnapshot(false, undefined, pageA); + const uidA = '1_1'; + + const pageB = await context.newPage(false, 'agent-b'); + await pageB.setContent(html``); + await context.createTextSnapshot(false, undefined, pageB); + + // uidA belongs to pageA; searching with pageB should throw. + await assert.rejects(() => context.getElementByUid(uidA, pageB), { + message: /not found on page/, + }); + + // Searching with the correct page should resolve. + const handle = await context.getElementByUid(uidA, pageA); + void handle.dispose(); + }); + }); + }); + it('should include network requests in structured content', async t => { await withMcpContext(async (response, context) => { const mockRequest = getMockRequest({ diff --git a/tests/tools/pageFocus.test.ts b/tests/tools/pageFocus.test.ts new file mode 100644 index 000000000..d266230f8 --- /dev/null +++ b/tests/tools/pageFocus.test.ts @@ -0,0 +1,312 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; +import {describe, it} from 'node:test'; + +import type {ParsedArguments} from '../../src/cli.js'; +import {McpResponse} from '../../src/McpResponse.js'; +import {clickAt, pressKey, typeText} from '../../src/tools/input.js'; +import {html, withMcpContext} from '../utils.js'; + +const emptyArgs = {} as ParsedArguments; + +describe('assertPageIsFocused', () => { + describe('McpContext method', () => { + it('passes for the only page in an isolated context', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + assert.doesNotThrow(() => context.assertPageIsFocused(page)); + }); + }); + + it('throws when a different page is focused in the same context', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + const pageA2 = await context.newPage(false, 'ctx-a'); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA2)); + assert.throws( + () => context.assertPageIsFocused(pageA1), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + assert.ok(err.message.includes('Call select_page')); + return true; + }, + ); + }); + }); + + it('passes after re-selecting the page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await context.newPage(false, 'ctx-a'); + assert.throws(() => context.assertPageIsFocused(pageA1)); + context.selectPage(pageA1); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA1)); + }); + }); + + it('does not cross-context interfere', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'ctx-a'); + const pageB = await context.newPage(false, 'ctx-b'); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA)); + assert.doesNotThrow(() => context.assertPageIsFocused(pageB)); + }); + }); + + it('tracks focus independently per context', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + const pageA2 = await context.newPage(false, 'ctx-a'); + const pageB1 = await context.newPage(false, 'ctx-b'); + const pageB2 = await context.newPage(false, 'ctx-b'); + + // Latest page in each context is focused. + assert.doesNotThrow(() => context.assertPageIsFocused(pageA2)); + assert.doesNotThrow(() => context.assertPageIsFocused(pageB2)); + assert.throws(() => context.assertPageIsFocused(pageA1)); + assert.throws(() => context.assertPageIsFocused(pageB1)); + + // Switch focus within each context independently. + context.selectPage(pageA1); + context.selectPage(pageB1); + assert.doesNotThrow(() => context.assertPageIsFocused(pageA1)); + assert.doesNotThrow(() => context.assertPageIsFocused(pageB1)); + assert.throws(() => context.assertPageIsFocused(pageA2)); + assert.throws(() => context.assertPageIsFocused(pageB2)); + }); + }); + }); + + describe('type_text', () => { + it('throws when targeting a non-focused page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent(html``); + await pageA1.click('textarea'); + await context.newPage(false, 'ctx-a'); + + await assert.rejects( + () => + typeText.handler( + {params: {text: 'fail'}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + return true; + }, + ); + }); + }); + + it('succeeds on the focused page', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + await page.setContent(html``); + await page.click('textarea'); + + const response = new McpResponse(emptyArgs); + await typeText.handler( + {params: {text: 'hello'}, page}, + response, + context, + ); + assert.strictEqual(response.responseLines[0], 'Typed text "hello"'); + assert.strictEqual( + await page.evaluate(() => document.querySelector('textarea')?.value), + 'hello', + ); + }); + }); + + it('succeeds after re-selecting the correct page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent(html``); + await context.newPage(false, 'ctx-a'); + + await assert.rejects(() => + typeText.handler( + {params: {text: 'fail'}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + ); + + context.selectPage(pageA1); + await pageA1.click('textarea'); + + const response = new McpResponse(emptyArgs); + await typeText.handler( + {params: {text: 'recovered'}, page: pageA1}, + response, + context, + ); + assert.strictEqual(response.responseLines[0], 'Typed text "recovered"'); + }); + }); + }); + + describe('press_key', () => { + it('throws when targeting a non-focused page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent(html`
content
`); + await context.newPage(false, 'ctx-a'); + + await assert.rejects( + () => + pressKey.handler( + {params: {key: 'Tab'}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + return true; + }, + ); + }); + }); + + it('succeeds on the focused page', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + await page.setContent( + html``, + ); + + const response = new McpResponse(emptyArgs); + await pressKey.handler( + {params: {key: 'Enter'}, page}, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully pressed key: Enter', + ); + assert.deepStrictEqual(await page.evaluate('logs'), ['Enter']); + }); + }); + }); + + describe('click_at', () => { + it('throws when targeting a non-focused page', async () => { + await withMcpContext(async (_response, context) => { + const pageA1 = await context.newPage(false, 'ctx-a'); + await pageA1.setContent( + html`
`, + ); + await context.newPage(false, 'ctx-a'); + + await assert.rejects( + () => + clickAt.handler( + {params: {x: 50, y: 50}, page: pageA1}, + new McpResponse(emptyArgs), + context, + ), + (err: Error) => { + assert.ok(err.message.includes('not the active page')); + return true; + }, + ); + }); + }); + + it('succeeds on the focused page', async () => { + await withMcpContext(async (_response, context) => { + const page = await context.newPage(false, 'ctx-a'); + await page.setContent( + html`
`, + ); + + const response = new McpResponse(emptyArgs); + await clickAt.handler( + {params: {x: 50, y: 50}, page}, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully clicked at the coordinates', + ); + assert.ok(await page.$('text/clicked')); + }); + }); + }); + + describe('cross-context isolation', () => { + it('type_text in one context does not affect another', async () => { + await withMcpContext(async (_response, context) => { + const pageA = await context.newPage(false, 'ctx-a'); + await pageA.setContent(html``); + + const pageB = await context.newPage(false, 'ctx-b'); + await pageB.setContent(html``); + + context.selectPage(pageA); + await pageA.click('textarea'); + await typeText.handler( + {params: {text: 'agent-a'}, page: pageA}, + new McpResponse(emptyArgs), + context, + ); + + context.selectPage(pageB); + await pageB.click('textarea'); + await typeText.handler( + {params: {text: 'agent-b'}, page: pageB}, + new McpResponse(emptyArgs), + context, + ); + + assert.strictEqual( + await pageA.evaluate(() => document.querySelector('textarea')?.value), + 'agent-a', + ); + assert.strictEqual( + await pageB.evaluate(() => document.querySelector('textarea')?.value), + 'agent-b', + ); + }); + }); + + it('switching focus in context A does not break context B', async () => { + await withMcpContext(async (_response, context) => { + await context.newPage(false, 'ctx-a'); + const pageA2 = await context.newPage(false, 'ctx-a'); + await pageA2.setContent(html`
A2
`); + + const pageB = await context.newPage(false, 'ctx-b'); + await pageB.setContent(html``); + + // ctx-a focus is on pageA2, ctx-b focus is on pageB. + await pageB.click('textarea'); + const response = new McpResponse(emptyArgs); + await typeText.handler( + {params: {text: 'still works'}, page: pageB}, + response, + context, + ); + assert.strictEqual( + await pageB.evaluate(() => document.querySelector('textarea')?.value), + 'still works', + ); + }); + }); + }); +}); diff --git a/tests/tools/pages.test.ts b/tests/tools/pages.test.ts index 3a740e250..ecc38e822 100644 --- a/tests/tools/pages.test.ts +++ b/tests/tools/pages.test.ts @@ -235,6 +235,104 @@ describe('pages', () => { }); }); + describe('resolvePageById', () => { + it('returns the correct page regardless of global selection', async () => { + await withMcpContext(async (response, context) => { + // Create two pages with different content. + await newPage.handler( + { + params: { + url: 'data:text/html,

Page A

', + isolatedContext: 'ctx-a', + }, + }, + response, + context, + ); + const pageA = context.getSelectedPage(); + const pageAId = context.getPageId(pageA)!; + + await newPage.handler( + { + params: { + url: 'data:text/html,

Page B

', + isolatedContext: 'ctx-b', + }, + }, + response, + context, + ); + const pageB = context.getSelectedPage(); + const pageBId = context.getPageId(pageB)!; + + // Global selection is now pageB (the last created page). + assert.strictEqual(context.getSelectedPage(), pageB); + + // resolvePageById should return the correct page for each ID, + // regardless of which page is globally selected. + assert.strictEqual(context.resolvePageById(pageAId), pageA); + assert.strictEqual(context.resolvePageById(pageBId), pageB); + }); + }); + + it('falls back to getSelectedPage when no pageId is provided', async () => { + await withMcpContext(async (_response, context) => { + const selectedPage = context.getSelectedPage(); + assert.strictEqual(context.resolvePageById(undefined), selectedPage); + }); + }); + + it('throws for an unknown pageId', async () => { + await withMcpContext(async (_response, context) => { + assert.throws(() => context.resolvePageById(99999), /No page found/); + }); + }); + + it('navigate_page targets the pageId page, not the global selection', async () => { + await withMcpContext(async (response, context) => { + await newPage.handler( + { + params: { + url: 'data:text/html,

Initial

', + isolatedContext: 'nav-ctx', + }, + }, + response, + context, + ); + const isolatedPage = context.getSelectedPage(); + + // Switch global selection back to the default page. + await selectPage.handler({params: {pageId: 1}}, response, context); + assert.notStrictEqual(context.getSelectedPage(), isolatedPage); + + // Navigate using page; should target the isolated page. + await navigatePage.handler( + { + params: { + url: 'data:text/html,

Navigated

', + }, + page: isolatedPage, + }, + response, + context, + ); + + // Verify the isolated page was navigated. + const content = await isolatedPage.evaluate( + () => document.querySelector('h1')?.textContent, + ); + assert.strictEqual(content, 'Navigated'); + + // Verify the default page was NOT affected. + const defaultContent = await context + .getSelectedPage() + .evaluate(() => document.querySelector('h1')?.textContent); + assert.notStrictEqual(defaultContent, 'Navigated'); + }); + }); + }); + describe('close_page', () => { it('closes a page', async () => { await withMcpContext(async (response, context) => { @@ -286,6 +384,165 @@ describe('pages', () => { assert.ok(response.includePages); }); }); + it('preserves focus across different browser contexts', async () => { + await withMcpContext(async (response, context) => { + // Create pages in separate isolated contexts. + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA = context.getSelectedPage(); + const pageAId = context.getPageId(pageA)!; + + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-b'}}, + response, + context, + ); + const pageB = context.getSelectedPage(); + + // Selecting pageB (ctx-b) should not defocus pageA (ctx-a). + assert.strictEqual( + await pageA.evaluate(() => document.hasFocus()), + true, + ); + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + + // Switching back to pageA should preserve pageB's focus. + await selectPage.handler( + {params: {pageId: pageAId}}, + response, + context, + ); + assert.strictEqual( + await pageA.evaluate(() => document.hasFocus()), + true, + ); + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + }); + }); + it('focuses correct same-context page after cross-context interleaving', async () => { + await withMcpContext(async (response, context) => { + // Create 2 pages in ctx-a, 1 in ctx-b. + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA1 = context.getSelectedPage(); + const pageA1Id = context.getPageId(pageA1)!; + + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-b'}}, + response, + context, + ); + const pageB = context.getSelectedPage(); + + // pageA1 still focused (cross-context select doesn't defocus it). + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + true, + ); + + // Create second page in ctx-a. This should defocus pageA1, + // even though #selectedPage was pageB (different context). + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA2 = context.getSelectedPage(); + + // pageA1 and pageA2 share the same BrowserContext. + assert.strictEqual(pageA1.browserContext(), pageA2.browserContext()); + + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + false, + 'pageA1 should lose focus when pageA2 is created in the same context', + ); + assert.strictEqual( + await pageA2.evaluate(() => document.hasFocus()), + true, + ); + // pageB is unaffected by ctx-a changes. + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + + // Re-selecting pageA1 should grant it focus via the override. + await selectPage.handler( + {params: {pageId: pageA1Id}}, + response, + context, + ); + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + true, + ); + // pageB still unaffected. + assert.strictEqual( + await pageB.evaluate(() => document.hasFocus()), + true, + ); + }); + }); + it('handles focus correctly after closing the focused page in a context', async () => { + await withMcpContext(async (response, context) => { + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA1 = context.getSelectedPage(); + + await newPage.handler( + {params: {url: 'about:blank', isolatedContext: 'ctx-a'}}, + response, + context, + ); + const pageA2 = context.getSelectedPage(); + const pageA2Id = context.getPageId(pageA2)!; + + // pageA2 is focused, pageA1 is not. + assert.strictEqual( + await pageA2.evaluate(() => document.hasFocus()), + true, + ); + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + false, + ); + + // Close pageA2 (the focused page). + await closePage.handler( + {params: {pageId: pageA2Id}}, + response, + context, + ); + + // Selecting pageA1 should work without errors. + const pageA1Id = context.getPageId(pageA1)!; + await selectPage.handler( + {params: {pageId: pageA1Id}}, + response, + context, + ); + assert.strictEqual( + await pageA1.evaluate(() => document.hasFocus()), + true, + ); + }); + }); }); describe('navigate_page', () => { it('navigates to correct page', async () => { @@ -751,6 +1008,88 @@ describe('pages', () => { ); }); }); + it('can handle a dialog on a non-selected page via pageId', async () => { + await withMcpContext(async (response, context) => { + const page1 = context.getSelectedPage(); + await context.newPage(); // page2 is now selected + + const dialogPromise = new Promise(resolve => { + page1.once('dialog', () => { + resolve(); + }); + }); + page1.evaluate(() => { + alert('test'); + }); + await dialogPromise; + + // page1 is not selected, but its dialog should be accessible via page. + await handleDialog.handler( + { + params: { + action: 'accept', + }, + page: page1, + }, + response, + context, + ); + assert.strictEqual(context.getDialog(page1), undefined); + assert.strictEqual( + response.responseLines[0], + 'Successfully accepted the dialog', + ); + }); + }); + it('tracks dialogs independently per page', async () => { + await withMcpContext(async (response, context) => { + const page1 = context.getSelectedPage(); + const page2 = await context.newPage(); + + // Trigger dialog on page1. + const dialog1Promise = new Promise(resolve => { + page1.once('dialog', () => { + resolve(); + }); + }); + page1.evaluate(() => { + alert('dialog1'); + }); + await dialog1Promise; + + // Trigger dialog on page2. + const dialog2Promise = new Promise(resolve => { + page2.once('dialog', () => { + resolve(); + }); + }); + page2.evaluate(() => { + alert('dialog2'); + }); + await dialog2Promise; + + // Both dialogs should be tracked. + assert.ok(context.getDialog(page1)); + assert.ok(context.getDialog(page2)); + + // Handle page1's dialog; page2's should remain. + await handleDialog.handler( + {params: {action: 'accept'}, page: page1}, + response, + context, + ); + assert.strictEqual(context.getDialog(page1), undefined); + assert.ok(context.getDialog(page2)); + + // Handle page2's dialog. + await handleDialog.handler( + {params: {action: 'dismiss'}, page: page2}, + response, + context, + ); + assert.strictEqual(context.getDialog(page2), undefined); + }); + }); }); describe('get_tab_id', () => {