chore: infer tool params (#241)

Moves the `schema.parse` call to the calling side of the handler, so we
don't have to duplicate it everywhere.
This commit is contained in:
Simon Knott 2025-04-22 13:24:38 +02:00 committed by GitHub
parent 9578a5b2af
commit c80f7cf222
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 212 additions and 273 deletions

View File

@ -124,7 +124,7 @@ export class Context {
async run(tool: Tool, params: Record<string, unknown> | undefined) {
// Tab management is done outside of the action() call.
const toolResult = await tool.handle(this, params);
const toolResult = await tool.handle(this, tool.schema.inputSchema.parse(params));
const { code, action, waitForNetwork, captureSnapshot, resultOverride } = toolResult;
const racingAction = action ? () => this._raceAgainstModalDialogs(action) : undefined;

View File

@ -35,7 +35,7 @@ import type { Tool, ToolCapability } from './tools/tool';
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
import type { LaunchOptions } from 'playwright';
const snapshotTools: Tool[] = [
const snapshotTools: Tool<any>[] = [
...common(true),
...console,
...dialogs(true),
@ -48,7 +48,7 @@ const snapshotTools: Tool[] = [
...tabs(true),
];
const screenshotTools: Tool[] = [
const screenshotTools: Tool<any>[] = [
...common(false),
...console,
...dialogs(false),

View File

@ -16,6 +16,7 @@
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { Context } from './context';
@ -41,7 +42,13 @@ export function createServerWithTools(options: Options): Server {
});
server.setRequestHandler(ListToolsRequestSchema, async () => {
return { tools: tools.map(tool => tool.schema) };
return {
tools: tools.map(tool => ({
name: tool.schema.name,
description: tool.schema.description,
inputSchema: zodToJsonSchema(tool.schema.inputSchema)
})),
};
});
server.setRequestHandler(ListResourcesRequestSchema, async () => {

View File

@ -15,43 +15,36 @@
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool, type ToolFactory } from './tool';
import type { Tool, ToolFactory } from './tool';
const waitSchema = z.object({
time: z.number().describe('The time to wait in seconds'),
});
const wait: ToolFactory = captureSnapshot => ({
const wait: ToolFactory = captureSnapshot => defineTool({
capability: 'wait',
schema: {
name: 'browser_wait',
description: 'Wait for a specified time in seconds',
inputSchema: zodToJsonSchema(waitSchema),
inputSchema: z.object({
time: z.number().describe('The time to wait in seconds'),
}),
},
handle: async (context, params) => {
const validatedParams = waitSchema.parse(params);
await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000)));
await new Promise(f => setTimeout(f, Math.min(10000, params.time * 1000)));
return {
code: [`// Waited for ${validatedParams.time} seconds`],
code: [`// Waited for ${params.time} seconds`],
captureSnapshot,
waitForNetwork: false,
};
},
});
const closeSchema = z.object({});
const close: Tool = {
const close = defineTool({
capability: 'core',
schema: {
name: 'browser_close',
description: 'Close the page',
inputSchema: zodToJsonSchema(closeSchema),
inputSchema: z.object({}),
},
handle: async context => {
@ -62,33 +55,29 @@ const close: Tool = {
waitForNetwork: false,
};
},
};
const resizeSchema = z.object({
width: z.number().describe('Width of the browser window'),
height: z.number().describe('Height of the browser window'),
});
const resize: ToolFactory = captureSnapshot => ({
const resize: ToolFactory = captureSnapshot => defineTool({
capability: 'core',
schema: {
name: 'browser_resize',
description: 'Resize the browser window',
inputSchema: zodToJsonSchema(resizeSchema),
inputSchema: z.object({
width: z.number().describe('Width of the browser window'),
height: z.number().describe('Height of the browser window'),
}),
},
handle: async (context, params) => {
const validatedParams = resizeSchema.parse(params);
const tab = context.currentTabOrDie();
const code = [
`// Resize browser window to ${validatedParams.width}x${validatedParams.height}`,
`await page.setViewportSize({ width: ${validatedParams.width}, height: ${validatedParams.height} });`
`// Resize browser window to ${params.width}x${params.height}`,
`await page.setViewportSize({ width: ${params.width}, height: ${params.height} });`
];
const action = async () => {
await tab.page.setViewportSize({ width: validatedParams.width, height: validatedParams.height });
await tab.page.setViewportSize({ width: params.width, height: params.height });
};
return {

View File

@ -15,18 +15,14 @@
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool } from './tool';
import type { Tool } from './tool';
const consoleSchema = z.object({});
const console: Tool = {
const console = defineTool({
capability: 'core',
schema: {
name: 'browser_console_messages',
description: 'Returns all console messages',
inputSchema: zodToJsonSchema(consoleSchema),
inputSchema: z.object({}),
},
handle: async context => {
const messages = await context.currentTabOrDie().console();
@ -42,7 +38,7 @@ const console: Tool = {
waitForNetwork: false,
};
},
};
});
export default [
console,

View File

@ -15,32 +15,27 @@
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool';
const handleDialogSchema = z.object({
accept: z.boolean().describe('Whether to accept the dialog.'),
promptText: z.string().optional().describe('The text of the prompt in case of a prompt dialog.'),
});
const handleDialog: ToolFactory = captureSnapshot => ({
const handleDialog: ToolFactory = captureSnapshot => defineTool({
capability: 'core',
schema: {
name: 'browser_handle_dialog',
description: 'Handle a dialog',
inputSchema: zodToJsonSchema(handleDialogSchema),
inputSchema: z.object({
accept: z.boolean().describe('Whether to accept the dialog.'),
promptText: z.string().optional().describe('The text of the prompt in case of a prompt dialog.'),
}),
},
handle: async (context, params) => {
const validatedParams = handleDialogSchema.parse(params);
const dialogState = context.modalStates().find(state => state.type === 'dialog');
if (!dialogState)
throw new Error('No dialog visible');
if (validatedParams.accept)
await dialogState.dialog.accept(validatedParams.promptText);
if (params.accept)
await dialogState.dialog.accept(params.promptText);
else
await dialogState.dialog.dismiss();

View File

@ -15,35 +15,30 @@
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool';
const uploadFileSchema = z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
});
const uploadFile: ToolFactory = captureSnapshot => ({
const uploadFile: ToolFactory = captureSnapshot => defineTool({
capability: 'files',
schema: {
name: 'browser_file_upload',
description: 'Upload one or multiple files',
inputSchema: zodToJsonSchema(uploadFileSchema),
inputSchema: z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
}),
},
handle: async (context, params) => {
const validatedParams = uploadFileSchema.parse(params);
const modalState = context.modalStates().find(state => state.type === 'fileChooser');
if (!modalState)
throw new Error('No file chooser visible');
const code = [
`// <internal code to chose files ${validatedParams.paths.join(', ')}`,
`// <internal code to chose files ${params.paths.join(', ')}`,
];
const action = async () => {
await modalState.fileChooser.setFiles(validatedParams.paths);
await modalState.fileChooser.setFiles(params.paths);
context.clearModalState(modalState);
};

View File

@ -18,16 +18,14 @@ import { fork } from 'child_process';
import path from 'path';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool } from './tool';
import type { Tool } from './tool';
const install: Tool = {
const install = defineTool({
capability: 'install',
schema: {
name: 'browser_install',
description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.',
inputSchema: zodToJsonSchema(z.object({})),
inputSchema: z.object({}),
},
handle: async context => {
@ -53,7 +51,7 @@ const install: Tool = {
waitForNetwork: false,
};
},
};
});
export default [
install,

View File

@ -15,33 +15,28 @@
*/
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';
import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool';
const pressKeySchema = z.object({
key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
});
const pressKey: ToolFactory = captureSnapshot => ({
const pressKey: ToolFactory = captureSnapshot => defineTool({
capability: 'core',
schema: {
name: 'browser_press_key',
description: 'Press a key on the keyboard',
inputSchema: zodToJsonSchema(pressKeySchema),
inputSchema: z.object({
key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
}),
},
handle: async (context, params) => {
const validatedParams = pressKeySchema.parse(params);
const tab = context.currentTabOrDie();
const code = [
`// Press ${validatedParams.key}`,
`await page.keyboard.press('${validatedParams.key}');`,
`// Press ${params.key}`,
`await page.keyboard.press('${params.key}');`,
];
const action = () => tab.page.keyboard.press(validatedParams.key);
const action = () => tab.page.keyboard.press(params.key);
return {
code,

View File

@ -15,31 +15,26 @@
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory } from './tool';
const navigateSchema = z.object({
url: z.string().describe('The URL to navigate to'),
});
const navigate: ToolFactory = captureSnapshot => ({
const navigate: ToolFactory = captureSnapshot => defineTool({
capability: 'core',
schema: {
name: 'browser_navigate',
description: 'Navigate to a URL',
inputSchema: zodToJsonSchema(navigateSchema),
inputSchema: z.object({
url: z.string().describe('The URL to navigate to'),
}),
},
handle: async (context, params) => {
const validatedParams = navigateSchema.parse(params);
const tab = await context.ensureTab();
await tab.navigate(validatedParams.url);
await tab.navigate(params.url);
const code = [
`// Navigate to ${validatedParams.url}`,
`await page.goto('${validatedParams.url}');`,
`// Navigate to ${params.url}`,
`await page.goto('${params.url}');`,
];
return {
@ -50,14 +45,12 @@ const navigate: ToolFactory = captureSnapshot => ({
},
});
const goBackSchema = z.object({});
const goBack: ToolFactory = captureSnapshot => ({
const goBack: ToolFactory = captureSnapshot => defineTool({
capability: 'history',
schema: {
name: 'browser_navigate_back',
description: 'Go back to the previous page',
inputSchema: zodToJsonSchema(goBackSchema),
inputSchema: z.object({}),
},
handle: async context => {
@ -76,14 +69,12 @@ const goBack: ToolFactory = captureSnapshot => ({
},
});
const goForwardSchema = z.object({});
const goForward: ToolFactory = captureSnapshot => ({
const goForward: ToolFactory = captureSnapshot => defineTool({
capability: 'history',
schema: {
name: 'browser_navigate_forward',
description: 'Go forward to the next page',
inputSchema: zodToJsonSchema(goForwardSchema),
inputSchema: z.object({}),
},
handle: async context => {
const tab = context.currentTabOrDie();

View File

@ -18,22 +18,18 @@ import os from 'os';
import path from 'path';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool } from './tool';
import { sanitizeForFilePath } from './utils';
import * as javascript from '../javascript';
import type { Tool } from './tool';
const pdfSchema = z.object({});
const pdf: Tool = {
const pdf = defineTool({
capability: 'pdf',
schema: {
name: 'browser_pdf_save',
description: 'Save page as PDF',
inputSchema: zodToJsonSchema(pdfSchema),
inputSchema: z.object({}),
},
handle: async context => {
@ -52,7 +48,7 @@ const pdf: Tool = {
waitForNetwork: false,
};
},
};
});
export default [
pdf,

View File

@ -15,18 +15,20 @@
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool } from './tool';
import * as javascript from '../javascript';
import type { Tool } from './tool';
const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
});
const screenshot: Tool = {
const screenshot = defineTool({
capability: 'core',
schema: {
name: 'browser_screen_capture',
description: 'Take a screenshot of the current page',
inputSchema: zodToJsonSchema(z.object({})),
inputSchema: z.object({}),
},
handle: async context => {
@ -51,33 +53,26 @@ const screenshot: Tool = {
waitForNetwork: false
};
},
};
const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
});
const moveMouseSchema = elementSchema.extend({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
});
const moveMouse: Tool = {
const moveMouse = defineTool({
capability: 'core',
schema: {
name: 'browser_screen_move_mouse',
description: 'Move mouse to a given position',
inputSchema: zodToJsonSchema(moveMouseSchema),
inputSchema: elementSchema.extend({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
}),
},
handle: async (context, params) => {
const validatedParams = moveMouseSchema.parse(params);
const tab = context.currentTabOrDie();
const code = [
`// Move mouse to (${validatedParams.x}, ${validatedParams.y})`,
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`,
`// Move mouse to (${params.x}, ${params.y})`,
`await page.mouse.move(${params.x}, ${params.y});`,
];
const action = () => tab.page.mouse.move(validatedParams.x, validatedParams.y);
const action = () => tab.page.mouse.move(params.x, params.y);
return {
code,
action,
@ -85,32 +80,29 @@ const moveMouse: Tool = {
waitForNetwork: false
};
},
};
const clickSchema = elementSchema.extend({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
});
const click: Tool = {
const click = defineTool({
capability: 'core',
schema: {
name: 'browser_screen_click',
description: 'Click left mouse button',
inputSchema: zodToJsonSchema(clickSchema),
inputSchema: elementSchema.extend({
x: z.number().describe('X coordinate'),
y: z.number().describe('Y coordinate'),
}),
},
handle: async (context, params) => {
const validatedParams = clickSchema.parse(params);
const tab = context.currentTabOrDie();
const code = [
`// Click mouse at coordinates (${validatedParams.x}, ${validatedParams.y})`,
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`,
`// Click mouse at coordinates (${params.x}, ${params.y})`,
`await page.mouse.move(${params.x}, ${params.y});`,
`await page.mouse.down();`,
`await page.mouse.up();`,
];
const action = async () => {
await tab.page.mouse.move(validatedParams.x, validatedParams.y);
await tab.page.mouse.move(params.x, params.y);
await tab.page.mouse.down();
await tab.page.mouse.up();
};
@ -121,40 +113,37 @@ const click: Tool = {
waitForNetwork: true,
};
},
};
const dragSchema = elementSchema.extend({
startX: z.number().describe('Start X coordinate'),
startY: z.number().describe('Start Y coordinate'),
endX: z.number().describe('End X coordinate'),
endY: z.number().describe('End Y coordinate'),
});
const drag: Tool = {
const drag = defineTool({
capability: 'core',
schema: {
name: 'browser_screen_drag',
description: 'Drag left mouse button',
inputSchema: zodToJsonSchema(dragSchema),
inputSchema: elementSchema.extend({
startX: z.number().describe('Start X coordinate'),
startY: z.number().describe('Start Y coordinate'),
endX: z.number().describe('End X coordinate'),
endY: z.number().describe('End Y coordinate'),
}),
},
handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
const tab = context.currentTabOrDie();
const code = [
`// Drag mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`,
`await page.mouse.move(${validatedParams.startX}, ${validatedParams.startY});`,
`// Drag mouse from (${params.startX}, ${params.startY}) to (${params.endX}, ${params.endY})`,
`await page.mouse.move(${params.startX}, ${params.startY});`,
`await page.mouse.down();`,
`await page.mouse.move(${validatedParams.endX}, ${validatedParams.endY});`,
`await page.mouse.move(${params.endX}, ${params.endY});`,
`await page.mouse.up();`,
];
const action = async () => {
await tab.page.mouse.move(validatedParams.startX, validatedParams.startY);
await tab.page.mouse.move(params.startX, params.startY);
await tab.page.mouse.down();
await tab.page.mouse.move(validatedParams.endX, validatedParams.endY);
await tab.page.mouse.move(params.endX, params.endY);
await tab.page.mouse.up();
};
@ -165,38 +154,35 @@ const drag: Tool = {
waitForNetwork: true,
};
},
};
const typeSchema = z.object({
text: z.string().describe('Text to type into the element'),
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
});
const type: Tool = {
const type = defineTool({
capability: 'core',
schema: {
name: 'browser_screen_type',
description: 'Type text',
inputSchema: zodToJsonSchema(typeSchema),
inputSchema: z.object({
text: z.string().describe('Text to type into the element'),
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
}),
},
handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
const tab = context.currentTabOrDie();
const code = [
`// Type ${validatedParams.text}`,
`await page.keyboard.type('${validatedParams.text}');`,
`// Type ${params.text}`,
`await page.keyboard.type('${params.text}');`,
];
const action = async () => {
await tab.page.keyboard.type(validatedParams.text);
if (validatedParams.submit)
await tab.page.keyboard.type(params.text);
if (params.submit)
await tab.page.keyboard.press('Enter');
};
if (validatedParams.submit) {
if (params.submit) {
code.push(`// Submit text`);
code.push(`await page.keyboard.press('Enter');`);
}
@ -208,7 +194,7 @@ const type: Tool = {
waitForNetwork: true,
};
},
};
});
export default [
screenshot,

View File

@ -18,21 +18,20 @@ import path from 'path';
import os from 'os';
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';
import { sanitizeForFilePath } from './utils';
import { generateLocator } from '../context';
import * as javascript from '../javascript';
import type * as playwright from 'playwright';
import type { Tool } from './tool';
import { defineTool } from './tool';
const snapshot: Tool = {
const snapshot = defineTool({
capability: 'core',
schema: {
name: 'browser_snapshot',
description: 'Capture accessibility snapshot of the current page, this is better than screenshot',
inputSchema: zodToJsonSchema(z.object({})),
inputSchema: z.object({}),
},
handle: async context => {
@ -44,28 +43,27 @@ const snapshot: Tool = {
waitForNetwork: false,
};
},
};
});
const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
ref: z.string().describe('Exact target element reference from the page snapshot'),
});
const click: Tool = {
const click = defineTool({
capability: 'core',
schema: {
name: 'browser_click',
description: 'Perform click on a web page',
inputSchema: zodToJsonSchema(elementSchema),
inputSchema: elementSchema,
},
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
const tab = context.currentTabOrDie();
const locator = tab.snapshotOrDie().refLocator(validatedParams.ref);
const locator = tab.snapshotOrDie().refLocator(params.ref);
const code = [
`// Click ${validatedParams.element}`,
`// Click ${params.element}`,
`await page.${await generateLocator(locator)}.click();`
];
@ -76,31 +74,28 @@ const click: Tool = {
waitForNetwork: true,
};
},
};
const dragSchema = z.object({
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
startRef: z.string().describe('Exact source element reference from the page snapshot'),
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
endRef: z.string().describe('Exact target element reference from the page snapshot'),
});
const drag: Tool = {
const drag = defineTool({
capability: 'core',
schema: {
name: 'browser_drag',
description: 'Perform drag and drop between two elements',
inputSchema: zodToJsonSchema(dragSchema),
inputSchema: z.object({
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
startRef: z.string().describe('Exact source element reference from the page snapshot'),
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
endRef: z.string().describe('Exact target element reference from the page snapshot'),
}),
},
handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const startLocator = snapshot.refLocator(validatedParams.startRef);
const endLocator = snapshot.refLocator(validatedParams.endRef);
const startLocator = snapshot.refLocator(params.startRef);
const endLocator = snapshot.refLocator(params.endRef);
const code = [
`// Drag ${validatedParams.startElement} to ${validatedParams.endElement}`,
`// Drag ${params.startElement} to ${params.endElement}`,
`await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});`
];
@ -111,23 +106,22 @@ const drag: Tool = {
waitForNetwork: true,
};
},
};
});
const hover: Tool = {
const hover = defineTool({
capability: 'core',
schema: {
name: 'browser_hover',
description: 'Hover over element on page',
inputSchema: zodToJsonSchema(elementSchema),
inputSchema: elementSchema,
},
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref);
const locator = snapshot.refLocator(params.ref);
const code = [
`// Hover over ${validatedParams.element}`,
`// Hover over ${params.element}`,
`await page.${await generateLocator(locator)}.hover();`
];
@ -138,7 +132,7 @@ const hover: Tool = {
waitForNetwork: true,
};
},
};
});
const typeSchema = elementSchema.extend({
text: z.string().describe('Text to type into the element'),
@ -146,33 +140,32 @@ const typeSchema = elementSchema.extend({
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
});
const type: Tool = {
const type = defineTool({
capability: 'core',
schema: {
name: 'browser_type',
description: 'Type text into editable element',
inputSchema: zodToJsonSchema(typeSchema),
inputSchema: typeSchema,
},
handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref);
const locator = snapshot.refLocator(params.ref);
const code: string[] = [];
const steps: (() => Promise<void>)[] = [];
if (validatedParams.slowly) {
code.push(`// Press "${validatedParams.text}" sequentially into "${validatedParams.element}"`);
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(validatedParams.text)});`);
steps.push(() => locator.pressSequentially(validatedParams.text));
if (params.slowly) {
code.push(`// Press "${params.text}" sequentially into "${params.element}"`);
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`);
steps.push(() => locator.pressSequentially(params.text));
} else {
code.push(`// Fill "${validatedParams.text}" into "${validatedParams.element}"`);
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(validatedParams.text)});`);
steps.push(() => locator.fill(validatedParams.text));
code.push(`// Fill "${params.text}" into "${params.element}"`);
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`);
steps.push(() => locator.fill(params.text));
}
if (validatedParams.submit) {
if (params.submit) {
code.push(`// Submit text`);
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
steps.push(() => locator.press('Enter'));
@ -185,38 +178,37 @@ const type: Tool = {
waitForNetwork: true,
};
},
};
});
const selectOptionSchema = elementSchema.extend({
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
});
const selectOption: Tool = {
const selectOption = defineTool({
capability: 'core',
schema: {
name: 'browser_select_option',
description: 'Select an option in a dropdown',
inputSchema: zodToJsonSchema(selectOptionSchema),
inputSchema: selectOptionSchema,
},
handle: async (context, params) => {
const validatedParams = selectOptionSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref);
const locator = snapshot.refLocator(params.ref);
const code = [
`// Select options [${validatedParams.values.join(', ')}] in ${validatedParams.element}`,
`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(validatedParams.values)});`
`// Select options [${params.values.join(', ')}] in ${params.element}`,
`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(params.values)});`
];
return {
code,
action: () => locator.selectOption(validatedParams.values).then(() => {}),
action: () => locator.selectOption(params.values).then(() => {}),
captureSnapshot: true,
waitForNetwork: true,
};
},
};
});
const screenshotSchema = z.object({
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
@ -229,28 +221,27 @@ const screenshotSchema = z.object({
path: ['ref', 'element']
});
const screenshot: Tool = {
const screenshot = defineTool({
capability: 'core',
schema: {
name: 'browser_take_screenshot',
description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
inputSchema: zodToJsonSchema(screenshotSchema),
inputSchema: screenshotSchema,
},
handle: async (context, params) => {
const validatedParams = screenshotSchema.parse(params);
const tab = context.currentTabOrDie();
const snapshot = tab.snapshotOrDie();
const fileType = validatedParams.raw ? 'png' : 'jpeg';
const fileType = params.raw ? 'png' : 'jpeg';
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`;
const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
const isElementScreenshot = validatedParams.element && validatedParams.ref;
const isElementScreenshot = params.element && params.ref;
const code = [
`// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'} and save it as ${fileName}`,
`// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`,
];
const locator = validatedParams.ref ? snapshot.refLocator(validatedParams.ref) : null;
const locator = params.ref ? snapshot.refLocator(params.ref) : null;
if (locator)
code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
@ -275,7 +266,7 @@ const screenshot: Tool = {
waitForNetwork: false,
};
}
};
});
export default [

View File

@ -15,17 +15,15 @@
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { defineTool, type ToolFactory } from './tool';
import type { ToolFactory, Tool } from './tool';
const listTabs: Tool = {
const listTabs = defineTool({
capability: 'tabs',
schema: {
name: 'browser_tab_list',
description: 'List browser tabs',
inputSchema: zodToJsonSchema(z.object({})),
inputSchema: z.object({}),
},
handle: async context => {
@ -42,26 +40,23 @@ const listTabs: Tool = {
},
};
},
};
const selectTabSchema = z.object({
index: z.number().describe('The index of the tab to select'),
});
const selectTab: ToolFactory = captureSnapshot => ({
const selectTab: ToolFactory = captureSnapshot => defineTool({
capability: 'tabs',
schema: {
name: 'browser_tab_select',
description: 'Select a tab by index',
inputSchema: zodToJsonSchema(selectTabSchema),
inputSchema: z.object({
index: z.number().describe('The index of the tab to select'),
}),
},
handle: async (context, params) => {
const validatedParams = selectTabSchema.parse(params);
await context.selectTab(validatedParams.index);
await context.selectTab(params.index);
const code = [
`// <internal code to select tab ${validatedParams.index}>`,
`// <internal code to select tab ${params.index}>`,
];
return {
@ -72,24 +67,21 @@ const selectTab: ToolFactory = captureSnapshot => ({
},
});
const newTabSchema = z.object({
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
});
const newTab: ToolFactory = captureSnapshot => ({
const newTab: ToolFactory = captureSnapshot => defineTool({
capability: 'tabs',
schema: {
name: 'browser_tab_new',
description: 'Open a new tab',
inputSchema: zodToJsonSchema(newTabSchema),
inputSchema: z.object({
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
}),
},
handle: async (context, params) => {
const validatedParams = newTabSchema.parse(params);
await context.newTab();
if (validatedParams.url)
await context.currentTabOrDie().navigate(validatedParams.url);
if (params.url)
await context.currentTabOrDie().navigate(params.url);
const code = [
`// <internal code to open a new tab>`,
@ -102,24 +94,21 @@ const newTab: ToolFactory = captureSnapshot => ({
},
});
const closeTabSchema = z.object({
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
});
const closeTab: ToolFactory = captureSnapshot => ({
const closeTab: ToolFactory = captureSnapshot => defineTool({
capability: 'tabs',
schema: {
name: 'browser_tab_close',
description: 'Close a tab',
inputSchema: zodToJsonSchema(closeTabSchema),
inputSchema: z.object({
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
}),
},
handle: async (context, params) => {
const validatedParams = closeTabSchema.parse(params);
await context.closeTab(validatedParams.index);
await context.closeTab(params.index);
const code = [
`// <internal code to close tab ${validatedParams.index}>`,
`// <internal code to close tab ${params.index}>`,
];
return {
code,

View File

@ -15,17 +15,19 @@
*/
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
import type { JsonSchema7Type } from 'zod-to-json-schema';
import type { z } from 'zod';
import type { Context } from '../context';
import type * as playwright from 'playwright';
export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
export type ToolSchema = {
export type ToolSchema<Input extends InputType> = {
name: string;
description: string;
inputSchema: JsonSchema7Type;
inputSchema: Input;
};
type InputType = z.Schema;
export type FileUploadModalState = {
type: 'fileChooser';
description: string;
@ -50,11 +52,15 @@ export type ToolResult = {
resultOverride?: ToolActionResult;
};
export type Tool = {
export type Tool<Input extends InputType = InputType> = {
capability: ToolCapability;
schema: ToolSchema;
schema: ToolSchema<Input>;
clearsModalState?: ModalState['type'];
handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>;
handle: (context: Context, params: z.output<Input>) => Promise<ToolResult>;
};
export type ToolFactory = (snapshot: boolean) => Tool;
export type ToolFactory = (snapshot: boolean) => Tool<any>;
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
return tool;
}

View File

@ -206,5 +206,5 @@ test('browser_resize', async ({ client }) => {
// Resize browser window to 390x780
await page.setViewportSize({ width: 390, height: 780 });
\`\`\``);
await expect.poll(() => client.callTool({ name: 'browser_snapshot' })).toContainTextContent('Window size: 390x780');
await expect.poll(() => client.callTool({ name: 'browser_snapshot', arguments: {} })).toContainTextContent('Window size: 390x780');
});

View File

@ -26,6 +26,7 @@ test('test reopen browser', async ({ client }) => {
expect(await client.callTool({
name: 'browser_close',
arguments: {},
})).toContainTextContent('No open pages available');
expect(await client.callTool({

View File

@ -41,6 +41,7 @@ test('save as pdf', async ({ client, mcpBrowser }) => {
const response = await client.callTool({
name: 'browser_pdf_save',
arguments: {},
});
expect(response).toHaveTextContent(/Save page as.*page-[^:]+.pdf/);
});

View File

@ -32,6 +32,7 @@ async function createTab(client: Client, title: string, body: string) {
test('list initial tabs', async ({ client }) => {
expect(await client.callTool({
name: 'browser_tab_list',
arguments: {},
})).toHaveTextContent(`### Open tabs
- 1: (current) [] (about:blank)`);
});
@ -40,6 +41,7 @@ test('list first tab', async ({ client }) => {
await createTab(client, 'Tab one', 'Body one');
expect(await client.callTool({
name: 'browser_tab_list',
arguments: {},
})).toHaveTextContent(`### Open tabs
- 1: [] (about:blank)
- 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)`);

View File

@ -18,6 +18,7 @@
const fs = require('node:fs');
const path = require('node:path');
const zodToJsonSchema = require('zod-to-json-schema').default;
const commonTools = require('../lib/tools/common').default;
const consoleTools = require('../lib/tools/console').default;
@ -107,11 +108,11 @@ function formatToolForReadme(tool) {
*/
/**
* @param {import('../src/tools/tool').ToolSchema} schema
* @param {import('../src/tools/tool').ToolSchema<any>} schema
* @returns {ParsedToolSchema}
*/
function processToolSchema(schema) {
const inputSchema = /** @type {import('zod-to-json-schema').JsonSchema7ObjectType} */ (schema.inputSchema || {});
const inputSchema = /** @type {import('zod-to-json-schema').JsonSchema7ObjectType} */ zodToJsonSchema(schema.inputSchema || {});
if (inputSchema.type !== 'object')
throw new Error(`Tool ${schema.name} input schema is not an object`);