diff --git a/.gitignore b/.gitignore index 6043309f0..2ea3f0b63 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ yarn-error.log* lerna-debug.log* .pnpm-debug.log* +trace.json +trace.json.gz + # Diagnostic reports (https://nodejs.org/api/report.html) report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json diff --git a/GEMINI.md b/GEMINI.md index fad71d480..25f6ca7e5 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -3,4 +3,15 @@ - Use only scripts from `package.json` to run commands. - Use `npm run build` to run tsc and test build. - Use `npm run test` to build and run tests, run all tests to verify correctness. -- use `npm run test path-to-test.ts` to build and run a single test file, for example, `npm run test tests/McpContext.test.ts`. +- Use `npm run test path-to-test.ts` to build and run a single test file, for example, `npm run test tests/McpContext.test.ts`. +- Use `npm run format` to fix formatting and get linting errors. + +## Rules for TypeScript + +- Do not use `any` type. +- Do not use `as` keyword for type casting. +- Do not use `!` operator for type assertion. +- Do not use `// @ts-ignore` comments. +- Do not use `// @ts-nocheck` comments. +- Do not use `// @ts-expect-error` comments. +- Prefer `for..of` instead of `forEach`. diff --git a/package-lock.json b/package-lock.json index 9fdb7d7bc..b37fdcc91 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ }, "devDependencies": { "@eslint/js": "^9.35.0", + "@google/generative-ai": "^0.24.1", "@modelcontextprotocol/sdk": "1.25.2", "@rollup/plugin-commonjs": "^29.0.0", "@rollup/plugin-json": "^6.1.0", @@ -320,6 +321,16 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, + "node_modules/@google/generative-ai": { + "version": "0.24.1", + "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", + "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@hono/node-server": { "version": "1.19.7", "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.7.tgz", @@ -4030,9 +4041,9 @@ } }, "node_modules/hono": { - "version": "4.11.3", - "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.3.tgz", - "integrity": "sha512-PmQi306+M/ct/m5s66Hrg+adPnkD5jiO6IjA7WhWw0gSBSo1EcRegwuI1deZ+wd5pzCGynCcn2DprnE4/yEV4w==", + "version": "4.11.4", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.4.tgz", + "integrity": "sha512-U7tt8JsyrxSRKspfhtLET79pU8K+tInj5QZXs1jSugO1Vq5dFj3kmZsRldo29mTBfcjDRVRXrEZ6LS63Cog9ZA==", "dev": true, "license": "MIT", "peer": true, diff --git a/package.json b/package.json index 148753e63..fdc42f7c3 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,8 @@ "test:only": "npm run build && node scripts/test.mjs --test-only", "test:update-snapshots": "npm run build && node scripts/test.mjs --test-update-snapshots", "prepare": "node --experimental-strip-types scripts/prepare.ts", - "verify-server-json-version": "node --experimental-strip-types scripts/verify-server-json-version.ts" + "verify-server-json-version": "node --experimental-strip-types scripts/verify-server-json-version.ts", + "eval": "npm run build && node --experimental-strip-types --test scripts/eval_gemini.ts" }, "files": [ "build/src", @@ -39,6 +40,7 @@ "mcpName": "io.github.ChromeDevTools/chrome-devtools-mcp", "devDependencies": { "@eslint/js": "^9.35.0", + "@google/generative-ai": "^0.24.1", "@modelcontextprotocol/sdk": "1.25.2", "@rollup/plugin-commonjs": "^29.0.0", "@rollup/plugin-json": "^6.1.0", diff --git a/scripts/eval_gemini.ts b/scripts/eval_gemini.ts new file mode 100644 index 000000000..8163dc641 --- /dev/null +++ b/scripts/eval_gemini.ts @@ -0,0 +1,268 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import {describe, test} from 'node:test'; + +import { + GoogleGenerativeAI, + type FunctionDeclaration, + SchemaType, +} from '@google/generative-ai'; +import {Client} from '@modelcontextprotocol/sdk/client/index.js'; +import {StdioClientTransport} from '@modelcontextprotocol/sdk/client/stdio.js'; + +const ROOT_DIR = path.resolve(import.meta.dirname, '..'); +const SCENARIOS_DIR = path.join(import.meta.dirname, 'eval_scenarios'); + +// Define schema for our test scenarios +export interface CapturedFunctionCall { + name: string; + args: Record; +} + +export interface TestScenario { + prompt: string; + maxTurns: number; + expectations: (calls: CapturedFunctionCall[]) => void; +} + +async function loadScenario(scenarioPath: string): Promise { + // Dynamic import of the test scenario + // We assume the test file exports a 'scenario' object + const module = await import(scenarioPath); + if (!module.scenario) { + throw new Error( + `Scenario file ${scenarioPath} does not export a 'scenario' object.`, + ); + } + return module.scenario; +} + +// Helper to sanitize schema for Gemini +function isRecord(v: unknown): v is Record { + return typeof v === 'object' && v !== null && !Array.isArray(v); +} + +const cleanSchemaRecursive = (schema: unknown): unknown => { + if (!isRecord(schema)) { + return schema; + } + + const out: Record = {}; + for (const key in schema) { + if ( + key === 'default' || + key === 'additionalProperties' || + key === 'exclusiveMinimum' + ) { + continue; + } + + const value = schema[key]; + if (Array.isArray(value)) { + out[key] = value.map(cleanSchemaRecursive); + } else if (isRecord(value)) { + out[key] = cleanSchemaRecursive(value); + } else { + out[key] = value; + } + } + return out; +}; + +async function runSingleScenario( + scenarioPath: string, + apiKey: string, +): Promise { + const absolutePath = path.resolve(scenarioPath); + console.log(`\n### Running Scenario: ${absolutePath} ###`); + + let client: Client | undefined; + let transport: StdioClientTransport | undefined; + + try { + const scenario = await loadScenario(absolutePath); + + // Path to the compiled MCP server + const serverPath = path.join(ROOT_DIR, 'build/src/index.js'); + if (!fs.existsSync(serverPath)) { + throw new Error( + `MCP server not found at ${serverPath}. Please run 'npm run build' first.`, + ); + } + + // Environment variables + const env: Record = {}; + Object.entries(process.env).forEach(([key, value]) => { + if (value !== undefined) { + env[key] = value; + } + }); + + transport = new StdioClientTransport({ + command: 'node', + args: [serverPath], + env, + }); + + client = new Client( + {name: 'gemini-eval-client', version: '1.0.0'}, + {capabilities: {}}, + ); + + await client.connect(transport); + + const toolsResult = await client.listTools(); + const mcpTools = toolsResult.tools; + + // Convert MCP tools to Gemini function declarations + const functionDeclarations: FunctionDeclaration[] = mcpTools.map(tool => ({ + name: tool.name.replace(/-/g, '_').replace(/\./g, '_'), // Sanitize name for Gemini + description: tool.description?.substring(0, 1024) || '', + parameters: cleanSchemaRecursive({ + type: SchemaType.OBJECT, + properties: + isRecord(tool.inputSchema) && 'properties' in tool.inputSchema + ? tool.inputSchema.properties + : {}, + required: + isRecord(tool.inputSchema) && + 'required' in tool.inputSchema && + Array.isArray(tool.inputSchema.required) + ? tool.inputSchema.required + : [], + }) as FunctionDeclaration['parameters'], + })); + + // Keep a map of sanitized names to original names for execution + const contentToolsMap = new Map(); + for (const tool of mcpTools) { + const sanitized = tool.name.replace(/-/g, '_').replace(/\./g, '_'); + contentToolsMap.set(sanitized, tool.name); + } + + const genAI = new GoogleGenerativeAI(apiKey); + const model = genAI.getGenerativeModel({ + model: 'gemini-3-pro-preview', + tools: [{functionDeclarations}], + }); + + const chat = model.startChat({ + systemInstruction: { + role: 'system', + parts: [{text: `Use available tools.`}], + }, + }); + + const expectations = scenario.expectations; + const allCalls: CapturedFunctionCall[] = []; + + // Execute turns + let turnCount = 0; + console.log(`\n--- Turn 1 (User) ---`); + console.log(scenario.prompt); + + let result = await chat.sendMessage(scenario.prompt); + let response = result.response; + + while (turnCount < scenario.maxTurns) { + turnCount++; + console.log(`\n--- Turn ${turnCount} (Model) ---`); + const text = response.text(); + if (text) { + console.log(`Text: ${text}`); + } + + const functionCalls = response.functionCalls(); + if (functionCalls && functionCalls.length > 0) { + console.log( + `Function Calls: ${JSON.stringify(functionCalls, null, 2)}`, + ); + + const functionResponses = []; + for (const call of functionCalls) { + const originalName = contentToolsMap.get(call.name); + if (!originalName) { + console.error(`Unknown tool called: ${call.name}`); + functionResponses.push({ + functionResponse: { + name: call.name, + response: {error: `Unknown tool: ${call.name}`}, + }, + }); + continue; + } + + const safeArgs = isRecord(call.args) ? call.args : {}; + + console.log( + `Executing tool: ${originalName} with args: ${JSON.stringify(call.args)}`, + ); + + allCalls.push({ + name: originalName, + args: safeArgs, + }); + + try { + const toolResult = await client.callTool({ + name: originalName, + arguments: safeArgs, + }); + + functionResponses.push({ + functionResponse: { + name: call.name, + response: {name: call.name, content: toolResult}, + }, + }); + } catch (e) { + const errorMessage = e instanceof Error ? e.message : String(e); + console.error(`Error executing tool ${originalName}:`, e); + functionResponses.push({ + functionResponse: { + name: call.name, + response: {error: errorMessage}, + }, + }); + } + } + + // Send tool results back + console.log(`Sending ${functionResponses.length} tool outputs back...`); + result = await chat.sendMessage(functionResponses); + response = result.response; + } else { + console.log('No tool calls. Interaction finished.'); + break; + } + } + + console.log('\nVerifying expectations...'); + expectations(allCalls); + } finally { + await client?.close(); + await transport?.close(); + } +} +const apiKey = process.env.GEMINI_API_KEY; +if (!apiKey) { + throw new Error('GEMINI_API_KEY environment variable is required.'); +} + +void describe('Gemini Eval Scenarios', () => { + const files = fs.readdirSync(SCENARIOS_DIR).filter(file => { + return file.endsWith('.ts') || file.endsWith('.js'); + }); + + for (const file of files) { + void test(file, async () => { + await runSingleScenario(path.join(SCENARIOS_DIR, file), apiKey); + }); + } +}); diff --git a/scripts/eval_scenarios/navigation_test.ts b/scripts/eval_scenarios/navigation_test.ts new file mode 100644 index 000000000..6942ab565 --- /dev/null +++ b/scripts/eval_scenarios/navigation_test.ts @@ -0,0 +1,21 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: 'Navigate to https://developers.chrome.com and tell me if it worked.', + maxTurns: 1, + expectations: calls => { + assert.deepStrictEqual(calls, [ + { + name: 'navigate_page', + args: {url: 'https://developers.chrome.com'}, + }, + ]); + }, +}; diff --git a/scripts/eval_scenarios/performance_test.ts b/scripts/eval_scenarios/performance_test.ts new file mode 100644 index 000000000..c7c7ae2b2 --- /dev/null +++ b/scripts/eval_scenarios/performance_test.ts @@ -0,0 +1,20 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: 'Check the performance of https://developers.chrome.com', + maxTurns: 2, + expectations: calls => { + assert.strictEqual(calls.length, 2); + assert.ok( + calls[0].name === 'navigate_page' || calls[0].name === 'new_page', + ); + assert.ok(calls[1].name === 'performance_start_trace'); + }, +};