chore: infer tool params (#241)

Moves the `schema.parse` call to the calling side of the handler, so we
don't have to duplicate it everywhere.
This commit is contained in:
Simon Knott 2025-04-22 13:24:38 +02:00 committed by GitHub
parent 9578a5b2af
commit c80f7cf222
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 212 additions and 273 deletions

View File

@ -124,7 +124,7 @@ export class Context {
async run(tool: Tool, params: Record<string, unknown> | undefined) { async run(tool: Tool, params: Record<string, unknown> | undefined) {
// Tab management is done outside of the action() call. // Tab management is done outside of the action() call.
const toolResult = await tool.handle(this, params); const toolResult = await tool.handle(this, tool.schema.inputSchema.parse(params));
const { code, action, waitForNetwork, captureSnapshot, resultOverride } = toolResult; const { code, action, waitForNetwork, captureSnapshot, resultOverride } = toolResult;
const racingAction = action ? () => this._raceAgainstModalDialogs(action) : undefined; const racingAction = action ? () => this._raceAgainstModalDialogs(action) : undefined;

View File

@ -35,7 +35,7 @@ import type { Tool, ToolCapability } from './tools/tool';
import type { Server } from '@modelcontextprotocol/sdk/server/index.js'; import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
import type { LaunchOptions } from 'playwright'; import type { LaunchOptions } from 'playwright';
const snapshotTools: Tool[] = [ const snapshotTools: Tool<any>[] = [
...common(true), ...common(true),
...console, ...console,
...dialogs(true), ...dialogs(true),
@ -48,7 +48,7 @@ const snapshotTools: Tool[] = [
...tabs(true), ...tabs(true),
]; ];
const screenshotTools: Tool[] = [ const screenshotTools: Tool<any>[] = [
...common(false), ...common(false),
...console, ...console,
...dialogs(false), ...dialogs(false),

View File

@ -16,6 +16,7 @@
import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { Context } from './context'; import { Context } from './context';
@ -41,7 +42,13 @@ export function createServerWithTools(options: Options): Server {
}); });
server.setRequestHandler(ListToolsRequestSchema, async () => { server.setRequestHandler(ListToolsRequestSchema, async () => {
return { tools: tools.map(tool => tool.schema) }; return {
tools: tools.map(tool => ({
name: tool.schema.name,
description: tool.schema.description,
inputSchema: zodToJsonSchema(tool.schema.inputSchema)
})),
};
}); });
server.setRequestHandler(ListResourcesRequestSchema, async () => { server.setRequestHandler(ListResourcesRequestSchema, async () => {

View File

@ -15,43 +15,36 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool, type ToolFactory } from './tool';
import type { Tool, ToolFactory } from './tool'; const wait: ToolFactory = captureSnapshot => defineTool({
const waitSchema = z.object({
time: z.number().describe('The time to wait in seconds'),
});
const wait: ToolFactory = captureSnapshot => ({
capability: 'wait', capability: 'wait',
schema: { schema: {
name: 'browser_wait', name: 'browser_wait',
description: 'Wait for a specified time in seconds', description: 'Wait for a specified time in seconds',
inputSchema: zodToJsonSchema(waitSchema), inputSchema: z.object({
time: z.number().describe('The time to wait in seconds'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = waitSchema.parse(params); await new Promise(f => setTimeout(f, Math.min(10000, params.time * 1000)));
await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000)));
return { return {
code: [`// Waited for ${validatedParams.time} seconds`], code: [`// Waited for ${params.time} seconds`],
captureSnapshot, captureSnapshot,
waitForNetwork: false, waitForNetwork: false,
}; };
}, },
}); });
const closeSchema = z.object({}); const close = defineTool({
const close: Tool = {
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_close', name: 'browser_close',
description: 'Close the page', description: 'Close the page',
inputSchema: zodToJsonSchema(closeSchema), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
@ -62,33 +55,29 @@ const close: Tool = {
waitForNetwork: false, waitForNetwork: false,
}; };
}, },
};
const resizeSchema = z.object({
width: z.number().describe('Width of the browser window'),
height: z.number().describe('Height of the browser window'),
}); });
const resize: ToolFactory = captureSnapshot => ({ const resize: ToolFactory = captureSnapshot => defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_resize', name: 'browser_resize',
description: 'Resize the browser window', description: 'Resize the browser window',
inputSchema: zodToJsonSchema(resizeSchema), inputSchema: z.object({
width: z.number().describe('Width of the browser window'),
height: z.number().describe('Height of the browser window'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = resizeSchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const code = [ const code = [
`// Resize browser window to ${validatedParams.width}x${validatedParams.height}`, `// Resize browser window to ${params.width}x${params.height}`,
`await page.setViewportSize({ width: ${validatedParams.width}, height: ${validatedParams.height} });` `await page.setViewportSize({ width: ${params.width}, height: ${params.height} });`
]; ];
const action = async () => { const action = async () => {
await tab.page.setViewportSize({ width: validatedParams.width, height: validatedParams.height }); await tab.page.setViewportSize({ width: params.width, height: params.height });
}; };
return { return {

View File

@ -15,18 +15,14 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool } from './tool';
import type { Tool } from './tool'; const console = defineTool({
const consoleSchema = z.object({});
const console: Tool = {
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_console_messages', name: 'browser_console_messages',
description: 'Returns all console messages', description: 'Returns all console messages',
inputSchema: zodToJsonSchema(consoleSchema), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
const messages = await context.currentTabOrDie().console(); const messages = await context.currentTabOrDie().console();
@ -42,7 +38,7 @@ const console: Tool = {
waitForNetwork: false, waitForNetwork: false,
}; };
}, },
}; });
export default [ export default [
console, console,

View File

@ -15,32 +15,27 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool'; const handleDialog: ToolFactory = captureSnapshot => defineTool({
const handleDialogSchema = z.object({
accept: z.boolean().describe('Whether to accept the dialog.'),
promptText: z.string().optional().describe('The text of the prompt in case of a prompt dialog.'),
});
const handleDialog: ToolFactory = captureSnapshot => ({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_handle_dialog', name: 'browser_handle_dialog',
description: 'Handle a dialog', description: 'Handle a dialog',
inputSchema: zodToJsonSchema(handleDialogSchema), inputSchema: z.object({
accept: z.boolean().describe('Whether to accept the dialog.'),
promptText: z.string().optional().describe('The text of the prompt in case of a prompt dialog.'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = handleDialogSchema.parse(params);
const dialogState = context.modalStates().find(state => state.type === 'dialog'); const dialogState = context.modalStates().find(state => state.type === 'dialog');
if (!dialogState) if (!dialogState)
throw new Error('No dialog visible'); throw new Error('No dialog visible');
if (validatedParams.accept) if (params.accept)
await dialogState.dialog.accept(validatedParams.promptText); await dialogState.dialog.accept(params.promptText);
else else
await dialogState.dialog.dismiss(); await dialogState.dialog.dismiss();

View File

@ -15,35 +15,30 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool'; const uploadFile: ToolFactory = captureSnapshot => defineTool({
const uploadFileSchema = z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
});
const uploadFile: ToolFactory = captureSnapshot => ({
capability: 'files', capability: 'files',
schema: { schema: {
name: 'browser_file_upload', name: 'browser_file_upload',
description: 'Upload one or multiple files', description: 'Upload one or multiple files',
inputSchema: zodToJsonSchema(uploadFileSchema), inputSchema: z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = uploadFileSchema.parse(params);
const modalState = context.modalStates().find(state => state.type === 'fileChooser'); const modalState = context.modalStates().find(state => state.type === 'fileChooser');
if (!modalState) if (!modalState)
throw new Error('No file chooser visible'); throw new Error('No file chooser visible');
const code = [ const code = [
`// <internal code to chose files ${validatedParams.paths.join(', ')}`, `// <internal code to chose files ${params.paths.join(', ')}`,
]; ];
const action = async () => { const action = async () => {
await modalState.fileChooser.setFiles(validatedParams.paths); await modalState.fileChooser.setFiles(params.paths);
context.clearModalState(modalState); context.clearModalState(modalState);
}; };

View File

@ -18,16 +18,14 @@ import { fork } from 'child_process';
import path from 'path'; import path from 'path';
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool } from './tool';
import type { Tool } from './tool'; const install = defineTool({
const install: Tool = {
capability: 'install', capability: 'install',
schema: { schema: {
name: 'browser_install', name: 'browser_install',
description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.', description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.',
inputSchema: zodToJsonSchema(z.object({})), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
@ -53,7 +51,7 @@ const install: Tool = {
waitForNetwork: false, waitForNetwork: false,
}; };
}, },
}; });
export default [ export default [
install, install,

View File

@ -15,33 +15,28 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema'; import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool'; const pressKey: ToolFactory = captureSnapshot => defineTool({
const pressKeySchema = z.object({
key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
});
const pressKey: ToolFactory = captureSnapshot => ({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_press_key', name: 'browser_press_key',
description: 'Press a key on the keyboard', description: 'Press a key on the keyboard',
inputSchema: zodToJsonSchema(pressKeySchema), inputSchema: z.object({
key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = pressKeySchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const code = [ const code = [
`// Press ${validatedParams.key}`, `// Press ${params.key}`,
`await page.keyboard.press('${validatedParams.key}');`, `await page.keyboard.press('${params.key}');`,
]; ];
const action = () => tab.page.keyboard.press(validatedParams.key); const action = () => tab.page.keyboard.press(params.key);
return { return {
code, code,

View File

@ -15,31 +15,26 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool'; const navigate: ToolFactory = captureSnapshot => defineTool({
const navigateSchema = z.object({
url: z.string().describe('The URL to navigate to'),
});
const navigate: ToolFactory = captureSnapshot => ({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_navigate', name: 'browser_navigate',
description: 'Navigate to a URL', description: 'Navigate to a URL',
inputSchema: zodToJsonSchema(navigateSchema), inputSchema: z.object({
url: z.string().describe('The URL to navigate to'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = navigateSchema.parse(params);
const tab = await context.ensureTab(); const tab = await context.ensureTab();
await tab.navigate(validatedParams.url); await tab.navigate(params.url);
const code = [ const code = [
`// Navigate to ${validatedParams.url}`, `// Navigate to ${params.url}`,
`await page.goto('${validatedParams.url}');`, `await page.goto('${params.url}');`,
]; ];
return { return {
@ -50,14 +45,12 @@ const navigate: ToolFactory = captureSnapshot => ({
}, },
}); });
const goBackSchema = z.object({}); const goBack: ToolFactory = captureSnapshot => defineTool({
const goBack: ToolFactory = captureSnapshot => ({
capability: 'history', capability: 'history',
schema: { schema: {
name: 'browser_navigate_back', name: 'browser_navigate_back',
description: 'Go back to the previous page', description: 'Go back to the previous page',
inputSchema: zodToJsonSchema(goBackSchema), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
@ -76,14 +69,12 @@ const goBack: ToolFactory = captureSnapshot => ({
}, },
}); });
const goForwardSchema = z.object({}); const goForward: ToolFactory = captureSnapshot => defineTool({
const goForward: ToolFactory = captureSnapshot => ({
capability: 'history', capability: 'history',
schema: { schema: {
name: 'browser_navigate_forward', name: 'browser_navigate_forward',
description: 'Go forward to the next page', description: 'Go forward to the next page',
inputSchema: zodToJsonSchema(goForwardSchema), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();

View File

@ -18,22 +18,18 @@ import os from 'os';
import path from 'path'; import path from 'path';
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool } from './tool';
import { sanitizeForFilePath } from './utils'; import { sanitizeForFilePath } from './utils';
import * as javascript from '../javascript'; import * as javascript from '../javascript';
import type { Tool } from './tool'; const pdf = defineTool({
const pdfSchema = z.object({});
const pdf: Tool = {
capability: 'pdf', capability: 'pdf',
schema: { schema: {
name: 'browser_pdf_save', name: 'browser_pdf_save',
description: 'Save page as PDF', description: 'Save page as PDF',
inputSchema: zodToJsonSchema(pdfSchema), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
@ -52,7 +48,7 @@ const pdf: Tool = {
waitForNetwork: false, waitForNetwork: false,
}; };
}, },
}; });
export default [ export default [
pdf, pdf,

View File

@ -15,18 +15,20 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool } from './tool';
import * as javascript from '../javascript'; import * as javascript from '../javascript';
import type { Tool } from './tool'; const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
});
const screenshot: Tool = { const screenshot = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_screen_capture', name: 'browser_screen_capture',
description: 'Take a screenshot of the current page', description: 'Take a screenshot of the current page',
inputSchema: zodToJsonSchema(z.object({})), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
@ -51,33 +53,26 @@ const screenshot: Tool = {
waitForNetwork: false waitForNetwork: false
}; };
}, },
};
const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
}); });
const moveMouseSchema = elementSchema.extend({ const moveMouse = defineTool({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
});
const moveMouse: Tool = {
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_screen_move_mouse', name: 'browser_screen_move_mouse',
description: 'Move mouse to a given position', description: 'Move mouse to a given position',
inputSchema: zodToJsonSchema(moveMouseSchema), inputSchema: elementSchema.extend({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = moveMouseSchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const code = [ const code = [
`// Move mouse to (${validatedParams.x}, ${validatedParams.y})`, `// Move mouse to (${params.x}, ${params.y})`,
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`, `await page.mouse.move(${params.x}, ${params.y});`,
]; ];
const action = () => tab.page.mouse.move(validatedParams.x, validatedParams.y); const action = () => tab.page.mouse.move(params.x, params.y);
return { return {
code, code,
action, action,
@ -85,32 +80,29 @@ const moveMouse: Tool = {
waitForNetwork: false waitForNetwork: false
}; };
}, },
};
const clickSchema = elementSchema.extend({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
}); });
const click: Tool = { const click = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_screen_click', name: 'browser_screen_click',
description: 'Click left mouse button', description: 'Click left mouse button',
inputSchema: zodToJsonSchema(clickSchema), inputSchema: elementSchema.extend({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = clickSchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const code = [ const code = [
`// Click mouse at coordinates (${validatedParams.x}, ${validatedParams.y})`, `// Click mouse at coordinates (${params.x}, ${params.y})`,
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`, `await page.mouse.move(${params.x}, ${params.y});`,
`await page.mouse.down();`, `await page.mouse.down();`,
`await page.mouse.up();`, `await page.mouse.up();`,
]; ];
const action = async () => { const action = async () => {
await tab.page.mouse.move(validatedParams.x, validatedParams.y); await tab.page.mouse.move(params.x, params.y);
await tab.page.mouse.down(); await tab.page.mouse.down();
await tab.page.mouse.up(); await tab.page.mouse.up();
}; };
@ -121,40 +113,37 @@ const click: Tool = {
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
};
const dragSchema = elementSchema.extend({
startX: z.number().describe('Start X coordinate'),
startY: z.number().describe('Start Y coordinate'),
endX: z.number().describe('End X coordinate'),
endY: z.number().describe('End Y coordinate'),
}); });
const drag: Tool = { const drag = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_screen_drag', name: 'browser_screen_drag',
description: 'Drag left mouse button', description: 'Drag left mouse button',
inputSchema: zodToJsonSchema(dragSchema), inputSchema: elementSchema.extend({
startX: z.number().describe('Start X coordinate'),
startY: z.number().describe('Start Y coordinate'),
endX: z.number().describe('End X coordinate'),
endY: z.number().describe('End Y coordinate'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const code = [ const code = [
`// Drag mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`, `// Drag mouse from (${params.startX}, ${params.startY}) to (${params.endX}, ${params.endY})`,
`await page.mouse.move(${validatedParams.startX}, ${validatedParams.startY});`, `await page.mouse.move(${params.startX}, ${params.startY});`,
`await page.mouse.down();`, `await page.mouse.down();`,
`await page.mouse.move(${validatedParams.endX}, ${validatedParams.endY});`, `await page.mouse.move(${params.endX}, ${params.endY});`,
`await page.mouse.up();`, `await page.mouse.up();`,
]; ];
const action = async () => { const action = async () => {
await tab.page.mouse.move(validatedParams.startX, validatedParams.startY); await tab.page.mouse.move(params.startX, params.startY);
await tab.page.mouse.down(); await tab.page.mouse.down();
await tab.page.mouse.move(validatedParams.endX, validatedParams.endY); await tab.page.mouse.move(params.endX, params.endY);
await tab.page.mouse.up(); await tab.page.mouse.up();
}; };
@ -165,38 +154,35 @@ const drag: Tool = {
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
};
const typeSchema = z.object({
text: z.string().describe('Text to type into the element'),
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
}); });
const type: Tool = { const type = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_screen_type', name: 'browser_screen_type',
description: 'Type text', description: 'Type text',
inputSchema: zodToJsonSchema(typeSchema), inputSchema: z.object({
text: z.string().describe('Text to type into the element'),
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const code = [ const code = [
`// Type ${validatedParams.text}`, `// Type ${params.text}`,
`await page.keyboard.type('${validatedParams.text}');`, `await page.keyboard.type('${params.text}');`,
]; ];
const action = async () => { const action = async () => {
await tab.page.keyboard.type(validatedParams.text); await tab.page.keyboard.type(params.text);
if (validatedParams.submit) if (params.submit)
await tab.page.keyboard.press('Enter'); await tab.page.keyboard.press('Enter');
}; };
if (validatedParams.submit) { if (params.submit) {
code.push(`// Submit text`); code.push(`// Submit text`);
code.push(`await page.keyboard.press('Enter');`); code.push(`await page.keyboard.press('Enter');`);
} }
@ -208,7 +194,7 @@ const type: Tool = {
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
}; });
export default [ export default [
screenshot, screenshot,

View File

@ -18,21 +18,20 @@ import path from 'path';
import os from 'os'; import os from 'os';
import { z } from 'zod'; import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';
import { sanitizeForFilePath } from './utils'; import { sanitizeForFilePath } from './utils';
import { generateLocator } from '../context'; import { generateLocator } from '../context';
import * as javascript from '../javascript'; import * as javascript from '../javascript';
import type * as playwright from 'playwright'; import type * as playwright from 'playwright';
import type { Tool } from './tool'; import { defineTool } from './tool';
const snapshot: Tool = { const snapshot = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_snapshot', name: 'browser_snapshot',
description: 'Capture accessibility snapshot of the current page, this is better than screenshot', description: 'Capture accessibility snapshot of the current page, this is better than screenshot',
inputSchema: zodToJsonSchema(z.object({})), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
@ -44,28 +43,27 @@ const snapshot: Tool = {
waitForNetwork: false, waitForNetwork: false,
}; };
}, },
}; });
const elementSchema = z.object({ const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'), element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
ref: z.string().describe('Exact target element reference from the page snapshot'), ref: z.string().describe('Exact target element reference from the page snapshot'),
}); });
const click: Tool = { const click = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_click', name: 'browser_click',
description: 'Perform click on a web page', description: 'Perform click on a web page',
inputSchema: zodToJsonSchema(elementSchema), inputSchema: elementSchema,
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const locator = tab.snapshotOrDie().refLocator(validatedParams.ref); const locator = tab.snapshotOrDie().refLocator(params.ref);
const code = [ const code = [
`// Click ${validatedParams.element}`, `// Click ${params.element}`,
`await page.${await generateLocator(locator)}.click();` `await page.${await generateLocator(locator)}.click();`
]; ];
@ -76,31 +74,28 @@ const click: Tool = {
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
};
const dragSchema = z.object({
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
startRef: z.string().describe('Exact source element reference from the page snapshot'),
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
endRef: z.string().describe('Exact target element reference from the page snapshot'),
}); });
const drag: Tool = { const drag = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_drag', name: 'browser_drag',
description: 'Perform drag and drop between two elements', description: 'Perform drag and drop between two elements',
inputSchema: zodToJsonSchema(dragSchema), inputSchema: z.object({
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
startRef: z.string().describe('Exact source element reference from the page snapshot'),
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
endRef: z.string().describe('Exact target element reference from the page snapshot'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie(); const snapshot = context.currentTabOrDie().snapshotOrDie();
const startLocator = snapshot.refLocator(validatedParams.startRef); const startLocator = snapshot.refLocator(params.startRef);
const endLocator = snapshot.refLocator(validatedParams.endRef); const endLocator = snapshot.refLocator(params.endRef);
const code = [ const code = [
`// Drag ${validatedParams.startElement} to ${validatedParams.endElement}`, `// Drag ${params.startElement} to ${params.endElement}`,
`await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});` `await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});`
]; ];
@ -111,23 +106,22 @@ const drag: Tool = {
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
}; });
const hover: Tool = { const hover = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_hover', name: 'browser_hover',
description: 'Hover over element on page', description: 'Hover over element on page',
inputSchema: zodToJsonSchema(elementSchema), inputSchema: elementSchema,
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie(); const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref); const locator = snapshot.refLocator(params.ref);
const code = [ const code = [
`// Hover over ${validatedParams.element}`, `// Hover over ${params.element}`,
`await page.${await generateLocator(locator)}.hover();` `await page.${await generateLocator(locator)}.hover();`
]; ];
@ -138,7 +132,7 @@ const hover: Tool = {
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
}; });
const typeSchema = elementSchema.extend({ const typeSchema = elementSchema.extend({
text: z.string().describe('Text to type into the element'), text: z.string().describe('Text to type into the element'),
@ -146,33 +140,32 @@ const typeSchema = elementSchema.extend({
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'), slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
}); });
const type: Tool = { const type = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_type', name: 'browser_type',
description: 'Type text into editable element', description: 'Type text into editable element',
inputSchema: zodToJsonSchema(typeSchema), inputSchema: typeSchema,
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie(); const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref); const locator = snapshot.refLocator(params.ref);
const code: string[] = []; const code: string[] = [];
const steps: (() => Promise<void>)[] = []; const steps: (() => Promise<void>)[] = [];
if (validatedParams.slowly) { if (params.slowly) {
code.push(`// Press "${validatedParams.text}" sequentially into "${validatedParams.element}"`); code.push(`// Press "${params.text}" sequentially into "${params.element}"`);
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(validatedParams.text)});`); code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`);
steps.push(() => locator.pressSequentially(validatedParams.text)); steps.push(() => locator.pressSequentially(params.text));
} else { } else {
code.push(`// Fill "${validatedParams.text}" into "${validatedParams.element}"`); code.push(`// Fill "${params.text}" into "${params.element}"`);
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(validatedParams.text)});`); code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`);
steps.push(() => locator.fill(validatedParams.text)); steps.push(() => locator.fill(params.text));
} }
if (validatedParams.submit) { if (params.submit) {
code.push(`// Submit text`); code.push(`// Submit text`);
code.push(`await page.${await generateLocator(locator)}.press('Enter');`); code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
steps.push(() => locator.press('Enter')); steps.push(() => locator.press('Enter'));
@ -185,38 +178,37 @@ const type: Tool = {
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
}; });
const selectOptionSchema = elementSchema.extend({ const selectOptionSchema = elementSchema.extend({
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'), values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
}); });
const selectOption: Tool = { const selectOption = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_select_option', name: 'browser_select_option',
description: 'Select an option in a dropdown', description: 'Select an option in a dropdown',
inputSchema: zodToJsonSchema(selectOptionSchema), inputSchema: selectOptionSchema,
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = selectOptionSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie(); const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref); const locator = snapshot.refLocator(params.ref);
const code = [ const code = [
`// Select options [${validatedParams.values.join(', ')}] in ${validatedParams.element}`, `// Select options [${params.values.join(', ')}] in ${params.element}`,
`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(validatedParams.values)});` `await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(params.values)});`
]; ];
return { return {
code, code,
action: () => locator.selectOption(validatedParams.values).then(() => {}), action: () => locator.selectOption(params.values).then(() => {}),
captureSnapshot: true, captureSnapshot: true,
waitForNetwork: true, waitForNetwork: true,
}; };
}, },
}; });
const screenshotSchema = z.object({ const screenshotSchema = z.object({
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'), raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
@ -229,28 +221,27 @@ const screenshotSchema = z.object({
path: ['ref', 'element'] path: ['ref', 'element']
}); });
const screenshot: Tool = { const screenshot = defineTool({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_take_screenshot', name: 'browser_take_screenshot',
description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`, description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
inputSchema: zodToJsonSchema(screenshotSchema), inputSchema: screenshotSchema,
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = screenshotSchema.parse(params);
const tab = context.currentTabOrDie(); const tab = context.currentTabOrDie();
const snapshot = tab.snapshotOrDie(); const snapshot = tab.snapshotOrDie();
const fileType = validatedParams.raw ? 'png' : 'jpeg'; const fileType = params.raw ? 'png' : 'jpeg';
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`; const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`;
const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName }; const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
const isElementScreenshot = validatedParams.element && validatedParams.ref; const isElementScreenshot = params.element && params.ref;
const code = [ const code = [
`// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'} and save it as ${fileName}`, `// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`,
]; ];
const locator = validatedParams.ref ? snapshot.refLocator(validatedParams.ref) : null; const locator = params.ref ? snapshot.refLocator(params.ref) : null;
if (locator) if (locator)
code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`); code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
@ -275,7 +266,7 @@ const screenshot: Tool = {
waitForNetwork: false, waitForNetwork: false,
}; };
} }
}; });
export default [ export default [

View File

@ -15,17 +15,15 @@
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory, Tool } from './tool'; const listTabs = defineTool({
const listTabs: Tool = {
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_list', name: 'browser_tab_list',
description: 'List browser tabs', description: 'List browser tabs',
inputSchema: zodToJsonSchema(z.object({})), inputSchema: z.object({}),
}, },
handle: async context => { handle: async context => {
@ -42,26 +40,23 @@ const listTabs: Tool = {
}, },
}; };
}, },
};
const selectTabSchema = z.object({
index: z.number().describe('The index of the tab to select'),
}); });
const selectTab: ToolFactory = captureSnapshot => ({ const selectTab: ToolFactory = captureSnapshot => defineTool({
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_select', name: 'browser_tab_select',
description: 'Select a tab by index', description: 'Select a tab by index',
inputSchema: zodToJsonSchema(selectTabSchema), inputSchema: z.object({
index: z.number().describe('The index of the tab to select'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = selectTabSchema.parse(params); await context.selectTab(params.index);
await context.selectTab(validatedParams.index);
const code = [ const code = [
`// <internal code to select tab ${validatedParams.index}>`, `// <internal code to select tab ${params.index}>`,
]; ];
return { return {
@ -72,24 +67,21 @@ const selectTab: ToolFactory = captureSnapshot => ({
}, },
}); });
const newTabSchema = z.object({ const newTab: ToolFactory = captureSnapshot => defineTool({
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
});
const newTab: ToolFactory = captureSnapshot => ({
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_new', name: 'browser_tab_new',
description: 'Open a new tab', description: 'Open a new tab',
inputSchema: zodToJsonSchema(newTabSchema), inputSchema: z.object({
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = newTabSchema.parse(params);
await context.newTab(); await context.newTab();
if (validatedParams.url) if (params.url)
await context.currentTabOrDie().navigate(validatedParams.url); await context.currentTabOrDie().navigate(params.url);
const code = [ const code = [
`// <internal code to open a new tab>`, `// <internal code to open a new tab>`,
@ -102,24 +94,21 @@ const newTab: ToolFactory = captureSnapshot => ({
}, },
}); });
const closeTabSchema = z.object({ const closeTab: ToolFactory = captureSnapshot => defineTool({
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
});
const closeTab: ToolFactory = captureSnapshot => ({
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_close', name: 'browser_tab_close',
description: 'Close a tab', description: 'Close a tab',
inputSchema: zodToJsonSchema(closeTabSchema), inputSchema: z.object({
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
}),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = closeTabSchema.parse(params); await context.closeTab(params.index);
await context.closeTab(validatedParams.index);
const code = [ const code = [
`// <internal code to close tab ${validatedParams.index}>`, `// <internal code to close tab ${params.index}>`,
]; ];
return { return {
code, code,

View File

@ -15,17 +15,19 @@
*/ */
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types'; import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
import type { JsonSchema7Type } from 'zod-to-json-schema'; import type { z } from 'zod';
import type { Context } from '../context'; import type { Context } from '../context';
import type * as playwright from 'playwright'; import type * as playwright from 'playwright';
export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install'; export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
export type ToolSchema = { export type ToolSchema<Input extends InputType> = {
name: string; name: string;
description: string; description: string;
inputSchema: JsonSchema7Type; inputSchema: Input;
}; };
type InputType = z.Schema;
export type FileUploadModalState = { export type FileUploadModalState = {
type: 'fileChooser'; type: 'fileChooser';
description: string; description: string;
@ -50,11 +52,15 @@ export type ToolResult = {
resultOverride?: ToolActionResult; resultOverride?: ToolActionResult;
}; };
export type Tool = { export type Tool<Input extends InputType = InputType> = {
capability: ToolCapability; capability: ToolCapability;
schema: ToolSchema; schema: ToolSchema<Input>;
clearsModalState?: ModalState['type']; clearsModalState?: ModalState['type'];
handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>; handle: (context: Context, params: z.output<Input>) => Promise<ToolResult>;
}; };
export type ToolFactory = (snapshot: boolean) => Tool; export type ToolFactory = (snapshot: boolean) => Tool<any>;
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
return tool;
}

View File

@ -206,5 +206,5 @@ test('browser_resize', async ({ client }) => {
// Resize browser window to 390x780 // Resize browser window to 390x780
await page.setViewportSize({ width: 390, height: 780 }); await page.setViewportSize({ width: 390, height: 780 });
\`\`\``); \`\`\``);
await expect.poll(() => client.callTool({ name: 'browser_snapshot' })).toContainTextContent('Window size: 390x780'); await expect.poll(() => client.callTool({ name: 'browser_snapshot', arguments: {} })).toContainTextContent('Window size: 390x780');
}); });

View File

@ -26,6 +26,7 @@ test('test reopen browser', async ({ client }) => {
expect(await client.callTool({ expect(await client.callTool({
name: 'browser_close', name: 'browser_close',
arguments: {},
})).toContainTextContent('No open pages available'); })).toContainTextContent('No open pages available');
expect(await client.callTool({ expect(await client.callTool({

View File

@ -41,6 +41,7 @@ test('save as pdf', async ({ client, mcpBrowser }) => {
const response = await client.callTool({ const response = await client.callTool({
name: 'browser_pdf_save', name: 'browser_pdf_save',
arguments: {},
}); });
expect(response).toHaveTextContent(/Save page as.*page-[^:]+.pdf/); expect(response).toHaveTextContent(/Save page as.*page-[^:]+.pdf/);
}); });

View File

@ -32,6 +32,7 @@ async function createTab(client: Client, title: string, body: string) {
test('list initial tabs', async ({ client }) => { test('list initial tabs', async ({ client }) => {
expect(await client.callTool({ expect(await client.callTool({
name: 'browser_tab_list', name: 'browser_tab_list',
arguments: {},
})).toHaveTextContent(`### Open tabs })).toHaveTextContent(`### Open tabs
- 1: (current) [] (about:blank)`); - 1: (current) [] (about:blank)`);
}); });
@ -40,6 +41,7 @@ test('list first tab', async ({ client }) => {
await createTab(client, 'Tab one', 'Body one'); await createTab(client, 'Tab one', 'Body one');
expect(await client.callTool({ expect(await client.callTool({
name: 'browser_tab_list', name: 'browser_tab_list',
arguments: {},
})).toHaveTextContent(`### Open tabs })).toHaveTextContent(`### Open tabs
- 1: [] (about:blank) - 1: [] (about:blank)
- 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)`); - 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)`);

View File

@ -18,6 +18,7 @@
const fs = require('node:fs'); const fs = require('node:fs');
const path = require('node:path'); const path = require('node:path');
const zodToJsonSchema = require('zod-to-json-schema').default;
const commonTools = require('../lib/tools/common').default; const commonTools = require('../lib/tools/common').default;
const consoleTools = require('../lib/tools/console').default; const consoleTools = require('../lib/tools/console').default;
@ -107,11 +108,11 @@ function formatToolForReadme(tool) {
*/ */
/** /**
* @param {import('../src/tools/tool').ToolSchema} schema * @param {import('../src/tools/tool').ToolSchema<any>} schema
* @returns {ParsedToolSchema} * @returns {ParsedToolSchema}
*/ */
function processToolSchema(schema) { function processToolSchema(schema) {
const inputSchema = /** @type {import('zod-to-json-schema').JsonSchema7ObjectType} */ (schema.inputSchema || {}); const inputSchema = /** @type {import('zod-to-json-schema').JsonSchema7ObjectType} */ zodToJsonSchema(schema.inputSchema || {});
if (inputSchema.type !== 'object') if (inputSchema.type !== 'object')
throw new Error(`Tool ${schema.name} input schema is not an object`); throw new Error(`Tool ${schema.name} input schema is not an object`);