Skip to content

Commit fcd6e21

Browse files
committed
feat: gate pageId routing behind --experimental-page-id-routing flag
Add --experimental-page-id-routing CLI flag (default false) to control whether pageId is exposed on page-scoped tools and used for request routing. When disabled, tools behave as before (select_page workflow). - Add serverArgs to eval TestScenario interface so individual evals can pass CLI flags to the MCP server - Add TODO for mutable request state refactoring on McpContext - Add TODO for getSelectedPage removal from Context interface - Stabilize page_focus_keyboard_test eval prompt and expectations
1 parent b3b1aa4 commit fcd6e21

8 files changed

Lines changed: 54 additions & 37 deletions

File tree

docs/tool-reference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<!-- AUTO GENERATED DO NOT EDIT - run 'npm run docs' to update-->
22

3-
# Chrome DevTools MCP Tool Reference (~7624 cl100k_base tokens)
3+
# Chrome DevTools MCP Tool Reference (~7084 cl100k_base tokens)
44

55
- **[Input automation](#input-automation)** (9 tools)
66
- [`click`](#click)

scripts/eval_gemini.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ export interface TestScenario {
3333
path: string;
3434
htmlContent: string;
3535
};
36+
/** Extra CLI flags passed to the MCP server (e.g. '--experimental-page-id-routing'). */
37+
serverArgs?: string[];
3638
}
3739

3840
async function loadScenario(scenarioPath: string): Promise<TestScenario> {
@@ -117,6 +119,9 @@ async function runSingleScenario(
117119
if (!debug) {
118120
args.push('--headless');
119121
}
122+
if (scenario.serverArgs) {
123+
args.push(...scenario.serverArgs);
124+
}
120125

121126
transport = new StdioClientTransport({
122127
command: 'node',

scripts/eval_scenarios/page_focus_keyboard_test.ts

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@ import assert from 'node:assert';
99
import type {TestScenario} from '../eval_gemini.ts';
1010

1111
export const scenario: TestScenario = {
12+
serverArgs: ['--experimental-page-id-routing'],
1213
prompt: `Open two pages in the same isolated context "session":
1314
- Page 1 at data:text/html,<textarea id="ta"></textarea>
1415
- Page 2 at data:text/html,<h1>Other</h1>
1516
16-
Now press_key "a" on Page 1 without selecting it first. If you encounter any errors, recover from them.`,
17+
Now use the press_key tool to type "a" on Page 1 without selecting it first. You must use press_key, not fill or type_text. If you encounter any errors, recover from them.`,
1718
maxTurns: 10,
1819
expectations: calls => {
1920
// Should open 2 pages in the same context.
@@ -24,37 +25,35 @@ Now press_key "a" on Page 1 without selecting it first. If you encounter any err
2425

2526
// Should attempt press_key at least once.
2627
const pressKeys = calls.filter(c => c.name === 'press_key');
27-
assert.ok(pressKeys.length >= 1, 'Should attempt press_key');
28+
assert.ok(pressKeys.length >= 1, 'Should attempt press_key at least once');
2829

29-
// Should call select_page to recover after the error.
3030
const selectPages = calls.filter(c => c.name === 'select_page');
31-
assert.ok(
32-
selectPages.length >= 1,
33-
'Should select_page to recover from the focus error',
34-
);
35-
36-
const firstPressKeyIndex = calls.indexOf(pressKeys[0]);
37-
const firstSelectPageIndex = calls.indexOf(selectPages[0]);
38-
39-
if (firstPressKeyIndex < firstSelectPageIndex) {
40-
// Error path: press_key was attempted first and failed.
41-
// Verify recovery: must have a second press_key after select_page.
42-
assert.ok(
43-
pressKeys.length >= 2,
44-
'Should retry press_key after error recovery',
45-
);
46-
const lastPressKeyIndex = calls.lastIndexOf(pressKeys.at(-1)!);
47-
assert.ok(
48-
firstSelectPageIndex < lastPressKeyIndex,
49-
'select_page should precede the successful press_key',
50-
);
51-
} else {
52-
// Proactive path: model selected page first.
53-
// Verify select_page came before press_key.
54-
assert.ok(
55-
firstSelectPageIndex < firstPressKeyIndex,
56-
'select_page should precede press_key',
57-
);
31+
32+
if (selectPages.length > 0) {
33+
const firstPressKeyIndex = calls.indexOf(pressKeys[0]);
34+
const firstSelectPageIndex = calls.indexOf(selectPages[0]);
35+
36+
if (firstPressKeyIndex < firstSelectPageIndex) {
37+
// Error path: press_key was attempted first and failed.
38+
// Verify recovery: must have a second press_key after select_page.
39+
assert.ok(
40+
pressKeys.length >= 2,
41+
'Should retry press_key after error recovery',
42+
);
43+
const lastPressKeyIndex = calls.lastIndexOf(pressKeys.at(-1)!);
44+
assert.ok(
45+
firstSelectPageIndex < lastPressKeyIndex,
46+
'select_page should precede the successful press_key',
47+
);
48+
} else {
49+
// Proactive path: model selected page first.
50+
assert.ok(
51+
firstSelectPageIndex < firstPressKeyIndex,
52+
'select_page should precede press_key',
53+
);
54+
}
5855
}
56+
// If no select_page was called, the model found another recovery path.
57+
// This is acceptable as long as press_key was attempted.
5958
},
6059
};

scripts/eval_scenarios/page_id_routing_test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import assert from 'node:assert';
99
import type {TestScenario} from '../eval_gemini.ts';
1010

1111
export const scenario: TestScenario = {
12+
serverArgs: ['--experimental-page-id-routing'],
1213
prompt: `Open two new pages in isolated contexts:
1314
- Page A (isolatedContext "contextA") at data:text/html,<button>Click A</button>
1415
- Page B (isolatedContext "contextB") at data:text/html,<button>Click B</button>

src/McpContext.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ export class McpContext implements Context {
205205
return context;
206206
}
207207

208+
// TODO: Refactor away mutable request state (e.g. per-request facade,
209+
// per-request context object, or another approach). Once resolved, the
210+
// global toolMutex could become per-BrowserContext for parallel execution.
208211
setRequestPage(page?: Page): void {
209212
this.#requestPage = page;
210213
}

src/cli.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,12 @@ export const cliOptions = {
147147
type: 'boolean',
148148
description: `If enabled, ignores errors relative to self-signed and expired certificates. Use with caution.`,
149149
},
150+
experimentalPageIdRouting: {
151+
type: 'boolean',
152+
describe:
153+
'Whether to expose pageId on page-scoped tools and route requests by page ID.',
154+
hidden: true,
155+
},
150156
experimentalDevtools: {
151157
type: 'boolean',
152158
describe: 'Whether to enable automation over DevTools targets',

src/server.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ export async function createMcpServer(
150150
) {
151151
return;
152152
}
153-
const schema = tool.annotations.pageScoped
154-
? {...tool.schema, ...pageIdSchema}
155-
: tool.schema;
153+
const schema =
154+
tool.annotations.pageScoped && serverArgs.experimentalPageIdRouting
155+
? {...tool.schema, ...pageIdSchema}
156+
: tool.schema;
156157

157158
server.registerTool(
158159
tool.name,
@@ -173,9 +174,10 @@ export async function createMcpServer(
173174
const response = serverArgs.slim
174175
? new SlimMcpResponse(serverArgs)
175176
: new McpResponse(serverArgs);
176-
const page = tool.annotations.pageScoped
177-
? context.resolvePageById(params.pageId as number | undefined)
178-
: undefined;
177+
const page =
178+
tool.annotations.pageScoped && serverArgs.experimentalPageIdRouting
179+
? context.resolvePageById(params.pageId as number | undefined)
180+
: undefined;
179181
if (page) {
180182
context.setRequestPage(page);
181183
}

src/tools/ToolDefinition.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ export type Context = Readonly<{
116116
isCruxEnabled(): boolean;
117117
recordedTraces(): TraceResult[];
118118
storeTraceRecording(result: TraceResult): void;
119+
// TODO: Remove once slim tools are converted to pageScoped: true.
119120
getSelectedPage(): Page;
120121
getDialog(page?: Page): Dialog | undefined;
121122
clearDialog(page?: Page): void;

0 commit comments

Comments
 (0)