diff --git a/scripts/eval_gemini.ts b/scripts/eval_gemini.ts index 6a3e9662d..d75a2a532 100644 --- a/scripts/eval_gemini.ts +++ b/scripts/eval_gemini.ts @@ -16,6 +16,7 @@ import {TestServer} from '../build/tests/server.js'; const ROOT_DIR = path.resolve(import.meta.dirname, '..'); const SCENARIOS_DIR = path.join(import.meta.dirname, 'eval_scenarios'); +const SKILL_PATH = path.join(ROOT_DIR, 'skills', 'chrome-devtools', 'SKILL.md'); // Define schema for our test scenarios export interface CapturedFunctionCall { @@ -49,6 +50,7 @@ async function runSingleScenario( server: TestServer, modelId: string, debug: boolean, + includeSkill: boolean, ): Promise { const debugLog = (...args: unknown[]) => { if (debug) { @@ -67,6 +69,17 @@ async function runSingleScenario( const loadedScenario = await loadScenario(absolutePath); const scenario = {...loadedScenario}; + // Prepend skill content if requested + if (includeSkill) { + if (!fs.existsSync(SKILL_PATH)) { + throw new Error( + `Skill file not found at ${SKILL_PATH}. Please ensure the skill file exists.`, + ); + } + const skillContent = fs.readFileSync(SKILL_PATH, 'utf-8'); + scenario.prompt = `${skillContent}\n\n---\n\n${scenario.prompt}`; + } + // Append random queryid to avoid caching issues and test distinct runs const randomId = Math.floor(Math.random() * 1000000); scenario.prompt = `${scenario.prompt}\nqueryid=${randomId}`; @@ -180,6 +193,10 @@ async function main() { type: 'boolean', default: false, }, + 'include-skill': { + type: 'boolean', + default: false, + }, }, allowPositionals: true, }); @@ -187,6 +204,7 @@ async function main() { const modelId = values.model; const debug = values.debug; const repeat = values.repeat; + const includeSkill = values['include-skill']; const scenarioFiles = positionals.length > 0 @@ -211,7 +229,14 @@ async function main() { `Running scenario: ${path.relative(ROOT_DIR, scenarioPath)} (Run ${i}/3)`, ); } - await runSingleScenario(scenarioPath, apiKey, server, modelId, debug); + await runSingleScenario( + scenarioPath, + apiKey, + server, + modelId, + debug, + includeSkill, + ); console.log(`✔ ${path.relative(ROOT_DIR, scenarioPath)} (Run ${i})`); successCount++; } catch (e) { diff --git a/skills/chrome-devtools/SKILL.md b/skills/chrome-devtools/SKILL.md new file mode 100644 index 000000000..551c03be8 --- /dev/null +++ b/skills/chrome-devtools/SKILL.md @@ -0,0 +1,44 @@ +--- +name: chrome-devtools +description: Uses Chrome DevTools via MCP for efficient debugging, troubleshooting and browser automation. Use when debugging web pages, automating browser interactions, analyzing performance, or inspecting network requests. +--- + +## Core Concepts + +**Browser lifecycle**: Browser starts automatically on first tool call using a persistent Chrome profile. Configure via CLI args in the MCP server configuration: `npx chrome-devtools-mcp@latest --help`. + +**Page selection**: Tools operate on the currently selected page. Use `list_pages` to see available pages, then `select_page` to switch context. + +**Element interaction**: Use `take_snapshot` to get page structure with element `uid`s. Each element has a unique `uid` for interaction. If an element isn't found, take a fresh snapshot - the element may have been removed or the page changed. + +## Workflow Patterns + +### Before interacting with a page + +1. Navigate: `navigate_page` or `new_page` +2. Wait: `wait_for` to ensure content is loaded if you know what you look for. +3. Snapshot: `take_snapshot` to understand page structure +4. Interact: Use element `uid`s from snapshot for `click`, `fill`, etc. + +### Efficient data retrieval + +- Use `filePath` parameter for large outputs (screenshots, snapshots, traces) +- Use pagination (`pageIdx`, `pageSize`) and filtering (`types`) to minimize data +- Set `includeSnapshot: false` on input actions unless you need updated page state + +### Tool selection + +- **Automation/interaction**: `take_snapshot` (text-based, faster, better for automation) +- **Visual inspection**: `take_screenshot` (when user needs to see visual state) +- **Additional details**: `evaluate_script` for data not in accessibility tree + +### Parallel execution + +You can send multiple tool calls in parallel, but maintain correct order: navigate → wait → snapshot → interact. + +## Troubleshooting + +If `chrome-devtools-mcp` is insufficient, guide users to use Chrome DevTools UI: + +- https://developer.chrome.com/docs/devtools +- https://developer.chrome.com/docs/devtools/ai-assistance