Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 32 additions & 23 deletions scripts/generate-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ interface ToolWithAnnotations extends Tool {
annotations?: {
title?: string;
category?: typeof ToolCategory;
conditions?: string[];
};
}

Expand Down Expand Up @@ -265,31 +266,39 @@ async function generateToolDocumentation(): Promise<void> {
console.log('Generating tool documentation from definitions...');

// Convert ToolDefinitions to ToolWithAnnotations
const toolsWithAnnotations: ToolWithAnnotations[] = tools.map(tool => {
const properties: Record<string, TypeInfo> = {};
const required: string[] = [];

for (const [key, schema] of Object.entries(
tool.schema as unknown as Record<string, ZodSchema>,
)) {
const info = getZodTypeInfo(schema);
properties[key] = info;
if (isRequired(schema)) {
required.push(key);
const toolsWithAnnotations: ToolWithAnnotations[] = tools
.filter(tool => {
if (!tool.annotations.conditions) {
return true;
}
// Only include unconditional tools.
return tool.annotations.conditions.length === 0;
})
.map(tool => {
const properties: Record<string, TypeInfo> = {};
const required: string[] = [];

for (const [key, schema] of Object.entries(
tool.schema as unknown as Record<string, ZodSchema>,
)) {
const info = getZodTypeInfo(schema);
properties[key] = info;
if (isRequired(schema)) {
required.push(key);
}
}
}

return {
name: tool.name,
description: tool.description,
inputSchema: {
type: 'object',
properties,
required,
},
annotations: tool.annotations,
};
});
return {
name: tool.name,
description: tool.description,
inputSchema: {
type: 'object',
properties,
required,
},
annotations: tool.annotations,
};
});

console.log(`Found ${toolsWithAnnotations.length} tools`);

Expand Down
5 changes: 5 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ export const cliOptions = {
describe: 'Whether to enable automation over DevTools targets',
hidden: true,
},
experimentalVision: {
type: 'boolean',
describe: 'Whether to enable vision tools',
hidden: true,
},
experimentalIncludeAllPages: {
type: 'boolean',
describe:
Expand Down
6 changes: 6 additions & 0 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ function registerTool(tool: ToolDefinition): void {
) {
return;
}
if (
tool.annotations.conditions?.includes('computerVision') &&
!args.experimentalVision
) {
return;
}
server.registerTool(
tool.name,
{
Expand Down
1 change: 1 addition & 0 deletions src/tools/ToolDefinition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export interface ToolDefinition<
* If true, the tool does not modify its environment.
*/
readOnlyHint: boolean;
conditions?: string[];
};
schema: Schema;
handler: (
Expand Down
39 changes: 35 additions & 4 deletions src/tools/input.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ import {parseKey} from '../utils/keyboard.js';
import {ToolCategory} from './categories.js';
import {defineTool} from './ToolDefinition.js';

const dblClickSchema = zod
.boolean()
.optional()
.describe('Set to true for double clicks. Default is false.');

export const click = defineTool({
name: 'click',
description: `Clicks on the provided element`,
Expand All @@ -25,10 +30,7 @@ export const click = defineTool({
.describe(
'The uid of an element on the page from the page content snapshot',
),
dblClick: zod
.boolean()
.optional()
.describe('Set to true for double clicks. Default is false.'),
dblClick: dblClickSchema,
},
handler: async (request, response, context) => {
const uid = request.params.uid;
Expand All @@ -51,6 +53,35 @@ export const click = defineTool({
},
});

export const clickAt = defineTool({
name: 'click_at',
description: `Clicks at the provided coordinates`,
annotations: {
category: ToolCategory.INPUT,
readOnlyHint: false,
conditions: ['computerVision'],
},
schema: {
x: zod.number().describe('The x coordinate'),
y: zod.number().describe('The y coordinate'),
dblClick: dblClickSchema,
},
handler: async (request, response, context) => {
const page = context.getSelectedPage();
await context.waitForEventsAfterAction(async () => {
await page.mouse.click(request.params.x, request.params.y, {
clickCount: request.params.dblClick ? 2 : 1,
});
});
response.appendResponseLine(
request.params.dblClick
? `Successfully double clicked at the coordinates`
: `Successfully clicked at the coordinates`,
);
response.includeSnapshot();
},
});

export const hover = defineTool({
name: 'hover',
description: `Hover over the provided element`,
Expand Down
24 changes: 22 additions & 2 deletions tests/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@ import {Client} from '@modelcontextprotocol/sdk/client/index.js';
import {StdioClientTransport} from '@modelcontextprotocol/sdk/client/stdio.js';
import {executablePath} from 'puppeteer';

import type {ToolDefinition} from '../src/tools/ToolDefinition';

describe('e2e', () => {
async function withClient(cb: (client: Client) => Promise<void>) {
async function withClient(
cb: (client: Client) => Promise<void>,
extraArgs: string[] = [],
) {
const transport = new StdioClientTransport({
command: 'node',
args: [
Expand All @@ -22,6 +27,7 @@ describe('e2e', () => {
'--isolated',
'--executable-path',
executablePath(),
...extraArgs,
],
});
const client = new Client(
Expand Down Expand Up @@ -90,8 +96,11 @@ describe('e2e', () => {
continue;
}
const fileTools = await import(`../src/tools/${file}`);
for (const maybeTool of Object.values<object>(fileTools)) {
for (const maybeTool of Object.values<ToolDefinition>(fileTools)) {
if ('name' in maybeTool) {
if (maybeTool.annotations?.conditions?.includes('computerVision')) {
continue;
}
definedNames.push(maybeTool.name);
}
}
Expand All @@ -100,4 +109,15 @@ describe('e2e', () => {
assert.deepStrictEqual(exposedNames, definedNames);
});
});

it('has experimental vision tools', async () => {
await withClient(
async client => {
const {tools} = await client.listTools();
const clickAt = tools.find(t => t.name === 'click_at');
assert.ok(clickAt);
},
['--experimental-vision'],
);
});
});
62 changes: 62 additions & 0 deletions tests/tools/input.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
fillForm,
uploadFile,
pressKey,
clickAt,
} from '../../src/tools/input.js';
import {parseKey} from '../../src/utils/keyboard.js';
import {serverHooks} from '../server.js';
Expand Down Expand Up @@ -183,6 +184,67 @@ describe('input', () => {
});
});

describe('click_at', () => {
it('clicks at coordinates', async () => {
await withMcpContext(async (response, context) => {
const page = context.getSelectedPage();
await page.setContent(
html`<div
style="width: 100px; height: 100px; background: red;"
onclick="this.innerText = 'clicked'"
></div>`,
);
await context.createTextSnapshot();
await clickAt.handler(
{
params: {
x: 50,
y: 50,
},
},
response,
context,
);
assert.strictEqual(
response.responseLines[0],
'Successfully clicked at the coordinates',
);
assert.ok(response.includeSnapshot);
assert.ok(await page.$('text/clicked'));
});
});

it('double clicks at coordinates', async () => {
await withMcpContext(async (response, context) => {
const page = context.getSelectedPage();
await page.setContent(
html`<div
style="width: 100px; height: 100px; background: red;"
ondblclick="this.innerText = 'dblclicked'"
></div>`,
);
await context.createTextSnapshot();
await clickAt.handler(
{
params: {
x: 50,
y: 50,
dblClick: true,
},
},
response,
context,
);
assert.strictEqual(
response.responseLines[0],
'Successfully double clicked at the coordinates',
);
assert.ok(response.includeSnapshot);
assert.ok(await page.$('text/dblclicked'));
});
});
});

describe('fill', () => {
it('fills out an input', async () => {
await withMcpContext(async (response, context) => {
Expand Down