diff --git a/src/context.ts b/src/context.ts index 3b08461..73e58f3 100644 --- a/src/context.ts +++ b/src/context.ts @@ -124,7 +124,7 @@ export class Context { async run(tool: Tool, params: Record | undefined) { // Tab management is done outside of the action() call. - const toolResult = await tool.handle(this, params); + const toolResult = await tool.handle(this, tool.schema.inputSchema.parse(params)); const { code, action, waitForNetwork, captureSnapshot, resultOverride } = toolResult; const racingAction = action ? () => this._raceAgainstModalDialogs(action) : undefined; diff --git a/src/index.ts b/src/index.ts index f845544..2831c4e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -35,7 +35,7 @@ import type { Tool, ToolCapability } from './tools/tool'; import type { Server } from '@modelcontextprotocol/sdk/server/index.js'; import type { LaunchOptions } from 'playwright'; -const snapshotTools: Tool[] = [ +const snapshotTools: Tool[] = [ ...common(true), ...console, ...dialogs(true), @@ -48,7 +48,7 @@ const snapshotTools: Tool[] = [ ...tabs(true), ]; -const screenshotTools: Tool[] = [ +const screenshotTools: Tool[] = [ ...common(false), ...console, ...dialogs(false), diff --git a/src/server.ts b/src/server.ts index bc5e8e3..2716528 100644 --- a/src/server.ts +++ b/src/server.ts @@ -16,6 +16,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js'; +import { zodToJsonSchema } from 'zod-to-json-schema'; import { Context } from './context'; @@ -41,7 +42,13 @@ export function createServerWithTools(options: Options): Server { }); server.setRequestHandler(ListToolsRequestSchema, async () => { - return { tools: tools.map(tool => tool.schema) }; + return { + tools: tools.map(tool => ({ + name: tool.schema.name, + description: tool.schema.description, + inputSchema: zodToJsonSchema(tool.schema.inputSchema) + })), + }; }); server.setRequestHandler(ListResourcesRequestSchema, async () => { diff --git a/src/tools/common.ts b/src/tools/common.ts index 03b9dc8..269fc0b 100644 --- a/src/tools/common.ts +++ b/src/tools/common.ts @@ -15,43 +15,36 @@ */ import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool, type ToolFactory } from './tool'; -import type { Tool, ToolFactory } from './tool'; - -const waitSchema = z.object({ - time: z.number().describe('The time to wait in seconds'), -}); - -const wait: ToolFactory = captureSnapshot => ({ +const wait: ToolFactory = captureSnapshot => defineTool({ capability: 'wait', schema: { name: 'browser_wait', description: 'Wait for a specified time in seconds', - inputSchema: zodToJsonSchema(waitSchema), + inputSchema: z.object({ + time: z.number().describe('The time to wait in seconds'), + }), }, handle: async (context, params) => { - const validatedParams = waitSchema.parse(params); - await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000))); + await new Promise(f => setTimeout(f, Math.min(10000, params.time * 1000))); return { - code: [`// Waited for ${validatedParams.time} seconds`], + code: [`// Waited for ${params.time} seconds`], captureSnapshot, waitForNetwork: false, }; }, }); -const closeSchema = z.object({}); - -const close: Tool = { +const close = defineTool({ capability: 'core', schema: { name: 'browser_close', description: 'Close the page', - inputSchema: zodToJsonSchema(closeSchema), + inputSchema: z.object({}), }, handle: async context => { @@ -62,33 +55,29 @@ const close: Tool = { waitForNetwork: false, }; }, -}; - -const resizeSchema = z.object({ - width: z.number().describe('Width of the browser window'), - height: z.number().describe('Height of the browser window'), }); -const resize: ToolFactory = captureSnapshot => ({ +const resize: ToolFactory = captureSnapshot => defineTool({ capability: 'core', schema: { name: 'browser_resize', description: 'Resize the browser window', - inputSchema: zodToJsonSchema(resizeSchema), + inputSchema: z.object({ + width: z.number().describe('Width of the browser window'), + height: z.number().describe('Height of the browser window'), + }), }, handle: async (context, params) => { - const validatedParams = resizeSchema.parse(params); - const tab = context.currentTabOrDie(); const code = [ - `// Resize browser window to ${validatedParams.width}x${validatedParams.height}`, - `await page.setViewportSize({ width: ${validatedParams.width}, height: ${validatedParams.height} });` + `// Resize browser window to ${params.width}x${params.height}`, + `await page.setViewportSize({ width: ${params.width}, height: ${params.height} });` ]; const action = async () => { - await tab.page.setViewportSize({ width: validatedParams.width, height: validatedParams.height }); + await tab.page.setViewportSize({ width: params.width, height: params.height }); }; return { diff --git a/src/tools/console.ts b/src/tools/console.ts index ecc8bb1..d0dc4e8 100644 --- a/src/tools/console.ts +++ b/src/tools/console.ts @@ -15,18 +15,14 @@ */ import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool } from './tool'; -import type { Tool } from './tool'; - -const consoleSchema = z.object({}); - -const console: Tool = { +const console = defineTool({ capability: 'core', schema: { name: 'browser_console_messages', description: 'Returns all console messages', - inputSchema: zodToJsonSchema(consoleSchema), + inputSchema: z.object({}), }, handle: async context => { const messages = await context.currentTabOrDie().console(); @@ -42,7 +38,7 @@ const console: Tool = { waitForNetwork: false, }; }, -}; +}); export default [ console, diff --git a/src/tools/dialogs.ts b/src/tools/dialogs.ts index 4c08bad..b9cfffc 100644 --- a/src/tools/dialogs.ts +++ b/src/tools/dialogs.ts @@ -15,32 +15,27 @@ */ import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool, type ToolFactory } from './tool'; -import type { ToolFactory } from './tool'; - -const handleDialogSchema = z.object({ - accept: z.boolean().describe('Whether to accept the dialog.'), - promptText: z.string().optional().describe('The text of the prompt in case of a prompt dialog.'), -}); - -const handleDialog: ToolFactory = captureSnapshot => ({ +const handleDialog: ToolFactory = captureSnapshot => defineTool({ capability: 'core', schema: { name: 'browser_handle_dialog', description: 'Handle a dialog', - inputSchema: zodToJsonSchema(handleDialogSchema), + inputSchema: z.object({ + accept: z.boolean().describe('Whether to accept the dialog.'), + promptText: z.string().optional().describe('The text of the prompt in case of a prompt dialog.'), + }), }, handle: async (context, params) => { - const validatedParams = handleDialogSchema.parse(params); const dialogState = context.modalStates().find(state => state.type === 'dialog'); if (!dialogState) throw new Error('No dialog visible'); - if (validatedParams.accept) - await dialogState.dialog.accept(validatedParams.promptText); + if (params.accept) + await dialogState.dialog.accept(params.promptText); else await dialogState.dialog.dismiss(); diff --git a/src/tools/files.ts b/src/tools/files.ts index 816632f..4c733a3 100644 --- a/src/tools/files.ts +++ b/src/tools/files.ts @@ -15,35 +15,30 @@ */ import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool, type ToolFactory } from './tool'; -import type { ToolFactory } from './tool'; - -const uploadFileSchema = z.object({ - paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'), -}); - -const uploadFile: ToolFactory = captureSnapshot => ({ +const uploadFile: ToolFactory = captureSnapshot => defineTool({ capability: 'files', schema: { name: 'browser_file_upload', description: 'Upload one or multiple files', - inputSchema: zodToJsonSchema(uploadFileSchema), + inputSchema: z.object({ + paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'), + }), }, handle: async (context, params) => { - const validatedParams = uploadFileSchema.parse(params); const modalState = context.modalStates().find(state => state.type === 'fileChooser'); if (!modalState) throw new Error('No file chooser visible'); const code = [ - `// { - await modalState.fileChooser.setFiles(validatedParams.paths); + await modalState.fileChooser.setFiles(params.paths); context.clearModalState(modalState); }; diff --git a/src/tools/install.ts b/src/tools/install.ts index 9f15819..bf69697 100644 --- a/src/tools/install.ts +++ b/src/tools/install.ts @@ -18,16 +18,14 @@ import { fork } from 'child_process'; import path from 'path'; import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool } from './tool'; -import type { Tool } from './tool'; - -const install: Tool = { +const install = defineTool({ capability: 'install', schema: { name: 'browser_install', description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.', - inputSchema: zodToJsonSchema(z.object({})), + inputSchema: z.object({}), }, handle: async context => { @@ -53,7 +51,7 @@ const install: Tool = { waitForNetwork: false, }; }, -}; +}); export default [ install, diff --git a/src/tools/keyboard.ts b/src/tools/keyboard.ts index 07eacd9..68cb32c 100644 --- a/src/tools/keyboard.ts +++ b/src/tools/keyboard.ts @@ -15,33 +15,28 @@ */ import { z } from 'zod'; -import zodToJsonSchema from 'zod-to-json-schema'; +import { defineTool, type ToolFactory } from './tool'; -import type { ToolFactory } from './tool'; - -const pressKeySchema = z.object({ - key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'), -}); - -const pressKey: ToolFactory = captureSnapshot => ({ +const pressKey: ToolFactory = captureSnapshot => defineTool({ capability: 'core', schema: { name: 'browser_press_key', description: 'Press a key on the keyboard', - inputSchema: zodToJsonSchema(pressKeySchema), + inputSchema: z.object({ + key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'), + }), }, handle: async (context, params) => { - const validatedParams = pressKeySchema.parse(params); const tab = context.currentTabOrDie(); const code = [ - `// Press ${validatedParams.key}`, - `await page.keyboard.press('${validatedParams.key}');`, + `// Press ${params.key}`, + `await page.keyboard.press('${params.key}');`, ]; - const action = () => tab.page.keyboard.press(validatedParams.key); + const action = () => tab.page.keyboard.press(params.key); return { code, diff --git a/src/tools/navigate.ts b/src/tools/navigate.ts index 0651c7e..e424a11 100644 --- a/src/tools/navigate.ts +++ b/src/tools/navigate.ts @@ -15,31 +15,26 @@ */ import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool, type ToolFactory } from './tool'; -import type { ToolFactory } from './tool'; - -const navigateSchema = z.object({ - url: z.string().describe('The URL to navigate to'), -}); - -const navigate: ToolFactory = captureSnapshot => ({ +const navigate: ToolFactory = captureSnapshot => defineTool({ capability: 'core', schema: { name: 'browser_navigate', description: 'Navigate to a URL', - inputSchema: zodToJsonSchema(navigateSchema), + inputSchema: z.object({ + url: z.string().describe('The URL to navigate to'), + }), }, handle: async (context, params) => { - const validatedParams = navigateSchema.parse(params); const tab = await context.ensureTab(); - await tab.navigate(validatedParams.url); + await tab.navigate(params.url); const code = [ - `// Navigate to ${validatedParams.url}`, - `await page.goto('${validatedParams.url}');`, + `// Navigate to ${params.url}`, + `await page.goto('${params.url}');`, ]; return { @@ -50,14 +45,12 @@ const navigate: ToolFactory = captureSnapshot => ({ }, }); -const goBackSchema = z.object({}); - -const goBack: ToolFactory = captureSnapshot => ({ +const goBack: ToolFactory = captureSnapshot => defineTool({ capability: 'history', schema: { name: 'browser_navigate_back', description: 'Go back to the previous page', - inputSchema: zodToJsonSchema(goBackSchema), + inputSchema: z.object({}), }, handle: async context => { @@ -76,14 +69,12 @@ const goBack: ToolFactory = captureSnapshot => ({ }, }); -const goForwardSchema = z.object({}); - -const goForward: ToolFactory = captureSnapshot => ({ +const goForward: ToolFactory = captureSnapshot => defineTool({ capability: 'history', schema: { name: 'browser_navigate_forward', description: 'Go forward to the next page', - inputSchema: zodToJsonSchema(goForwardSchema), + inputSchema: z.object({}), }, handle: async context => { const tab = context.currentTabOrDie(); diff --git a/src/tools/pdf.ts b/src/tools/pdf.ts index 66cf02a..1929fe3 100644 --- a/src/tools/pdf.ts +++ b/src/tools/pdf.ts @@ -18,22 +18,18 @@ import os from 'os'; import path from 'path'; import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool } from './tool'; import { sanitizeForFilePath } from './utils'; import * as javascript from '../javascript'; -import type { Tool } from './tool'; - -const pdfSchema = z.object({}); - -const pdf: Tool = { +const pdf = defineTool({ capability: 'pdf', schema: { name: 'browser_pdf_save', description: 'Save page as PDF', - inputSchema: zodToJsonSchema(pdfSchema), + inputSchema: z.object({}), }, handle: async context => { @@ -52,7 +48,7 @@ const pdf: Tool = { waitForNetwork: false, }; }, -}; +}); export default [ pdf, diff --git a/src/tools/screen.ts b/src/tools/screen.ts index 0cd9320..7e906b9 100644 --- a/src/tools/screen.ts +++ b/src/tools/screen.ts @@ -15,18 +15,20 @@ */ import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool } from './tool'; import * as javascript from '../javascript'; -import type { Tool } from './tool'; +const elementSchema = z.object({ + element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'), +}); -const screenshot: Tool = { +const screenshot = defineTool({ capability: 'core', schema: { name: 'browser_screen_capture', description: 'Take a screenshot of the current page', - inputSchema: zodToJsonSchema(z.object({})), + inputSchema: z.object({}), }, handle: async context => { @@ -51,33 +53,26 @@ const screenshot: Tool = { waitForNetwork: false }; }, -}; - -const elementSchema = z.object({ - element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'), }); -const moveMouseSchema = elementSchema.extend({ - x: z.number().describe('X coordinate'), - y: z.number().describe('Y coordinate'), -}); - -const moveMouse: Tool = { +const moveMouse = defineTool({ capability: 'core', schema: { name: 'browser_screen_move_mouse', description: 'Move mouse to a given position', - inputSchema: zodToJsonSchema(moveMouseSchema), + inputSchema: elementSchema.extend({ + x: z.number().describe('X coordinate'), + y: z.number().describe('Y coordinate'), + }), }, handle: async (context, params) => { - const validatedParams = moveMouseSchema.parse(params); const tab = context.currentTabOrDie(); const code = [ - `// Move mouse to (${validatedParams.x}, ${validatedParams.y})`, - `await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`, + `// Move mouse to (${params.x}, ${params.y})`, + `await page.mouse.move(${params.x}, ${params.y});`, ]; - const action = () => tab.page.mouse.move(validatedParams.x, validatedParams.y); + const action = () => tab.page.mouse.move(params.x, params.y); return { code, action, @@ -85,32 +80,29 @@ const moveMouse: Tool = { waitForNetwork: false }; }, -}; - -const clickSchema = elementSchema.extend({ - x: z.number().describe('X coordinate'), - y: z.number().describe('Y coordinate'), }); -const click: Tool = { +const click = defineTool({ capability: 'core', schema: { name: 'browser_screen_click', description: 'Click left mouse button', - inputSchema: zodToJsonSchema(clickSchema), + inputSchema: elementSchema.extend({ + x: z.number().describe('X coordinate'), + y: z.number().describe('Y coordinate'), + }), }, handle: async (context, params) => { - const validatedParams = clickSchema.parse(params); const tab = context.currentTabOrDie(); const code = [ - `// Click mouse at coordinates (${validatedParams.x}, ${validatedParams.y})`, - `await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`, + `// Click mouse at coordinates (${params.x}, ${params.y})`, + `await page.mouse.move(${params.x}, ${params.y});`, `await page.mouse.down();`, `await page.mouse.up();`, ]; const action = async () => { - await tab.page.mouse.move(validatedParams.x, validatedParams.y); + await tab.page.mouse.move(params.x, params.y); await tab.page.mouse.down(); await tab.page.mouse.up(); }; @@ -121,40 +113,37 @@ const click: Tool = { waitForNetwork: true, }; }, -}; - -const dragSchema = elementSchema.extend({ - startX: z.number().describe('Start X coordinate'), - startY: z.number().describe('Start Y coordinate'), - endX: z.number().describe('End X coordinate'), - endY: z.number().describe('End Y coordinate'), }); -const drag: Tool = { +const drag = defineTool({ capability: 'core', schema: { name: 'browser_screen_drag', description: 'Drag left mouse button', - inputSchema: zodToJsonSchema(dragSchema), + inputSchema: elementSchema.extend({ + startX: z.number().describe('Start X coordinate'), + startY: z.number().describe('Start Y coordinate'), + endX: z.number().describe('End X coordinate'), + endY: z.number().describe('End Y coordinate'), + }), }, handle: async (context, params) => { - const validatedParams = dragSchema.parse(params); const tab = context.currentTabOrDie(); const code = [ - `// Drag mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`, - `await page.mouse.move(${validatedParams.startX}, ${validatedParams.startY});`, + `// Drag mouse from (${params.startX}, ${params.startY}) to (${params.endX}, ${params.endY})`, + `await page.mouse.move(${params.startX}, ${params.startY});`, `await page.mouse.down();`, - `await page.mouse.move(${validatedParams.endX}, ${validatedParams.endY});`, + `await page.mouse.move(${params.endX}, ${params.endY});`, `await page.mouse.up();`, ]; const action = async () => { - await tab.page.mouse.move(validatedParams.startX, validatedParams.startY); + await tab.page.mouse.move(params.startX, params.startY); await tab.page.mouse.down(); - await tab.page.mouse.move(validatedParams.endX, validatedParams.endY); + await tab.page.mouse.move(params.endX, params.endY); await tab.page.mouse.up(); }; @@ -165,38 +154,35 @@ const drag: Tool = { waitForNetwork: true, }; }, -}; - -const typeSchema = z.object({ - text: z.string().describe('Text to type into the element'), - submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'), }); -const type: Tool = { +const type = defineTool({ capability: 'core', schema: { name: 'browser_screen_type', description: 'Type text', - inputSchema: zodToJsonSchema(typeSchema), + inputSchema: z.object({ + text: z.string().describe('Text to type into the element'), + submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'), + }), }, handle: async (context, params) => { - const validatedParams = typeSchema.parse(params); const tab = context.currentTabOrDie(); const code = [ - `// Type ${validatedParams.text}`, - `await page.keyboard.type('${validatedParams.text}');`, + `// Type ${params.text}`, + `await page.keyboard.type('${params.text}');`, ]; const action = async () => { - await tab.page.keyboard.type(validatedParams.text); - if (validatedParams.submit) + await tab.page.keyboard.type(params.text); + if (params.submit) await tab.page.keyboard.press('Enter'); }; - if (validatedParams.submit) { + if (params.submit) { code.push(`// Submit text`); code.push(`await page.keyboard.press('Enter');`); } @@ -208,7 +194,7 @@ const type: Tool = { waitForNetwork: true, }; }, -}; +}); export default [ screenshot, diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts index 5e6ca1d..43b0e9c 100644 --- a/src/tools/snapshot.ts +++ b/src/tools/snapshot.ts @@ -18,21 +18,20 @@ import path from 'path'; import os from 'os'; import { z } from 'zod'; -import zodToJsonSchema from 'zod-to-json-schema'; import { sanitizeForFilePath } from './utils'; import { generateLocator } from '../context'; import * as javascript from '../javascript'; import type * as playwright from 'playwright'; -import type { Tool } from './tool'; +import { defineTool } from './tool'; -const snapshot: Tool = { +const snapshot = defineTool({ capability: 'core', schema: { name: 'browser_snapshot', description: 'Capture accessibility snapshot of the current page, this is better than screenshot', - inputSchema: zodToJsonSchema(z.object({})), + inputSchema: z.object({}), }, handle: async context => { @@ -44,28 +43,27 @@ const snapshot: Tool = { waitForNetwork: false, }; }, -}; +}); const elementSchema = z.object({ element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'), ref: z.string().describe('Exact target element reference from the page snapshot'), }); -const click: Tool = { +const click = defineTool({ capability: 'core', schema: { name: 'browser_click', description: 'Perform click on a web page', - inputSchema: zodToJsonSchema(elementSchema), + inputSchema: elementSchema, }, handle: async (context, params) => { - const validatedParams = elementSchema.parse(params); const tab = context.currentTabOrDie(); - const locator = tab.snapshotOrDie().refLocator(validatedParams.ref); + const locator = tab.snapshotOrDie().refLocator(params.ref); const code = [ - `// Click ${validatedParams.element}`, + `// Click ${params.element}`, `await page.${await generateLocator(locator)}.click();` ]; @@ -76,31 +74,28 @@ const click: Tool = { waitForNetwork: true, }; }, -}; - -const dragSchema = z.object({ - startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'), - startRef: z.string().describe('Exact source element reference from the page snapshot'), - endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'), - endRef: z.string().describe('Exact target element reference from the page snapshot'), }); -const drag: Tool = { +const drag = defineTool({ capability: 'core', schema: { name: 'browser_drag', description: 'Perform drag and drop between two elements', - inputSchema: zodToJsonSchema(dragSchema), + inputSchema: z.object({ + startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'), + startRef: z.string().describe('Exact source element reference from the page snapshot'), + endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'), + endRef: z.string().describe('Exact target element reference from the page snapshot'), + }), }, handle: async (context, params) => { - const validatedParams = dragSchema.parse(params); const snapshot = context.currentTabOrDie().snapshotOrDie(); - const startLocator = snapshot.refLocator(validatedParams.startRef); - const endLocator = snapshot.refLocator(validatedParams.endRef); + const startLocator = snapshot.refLocator(params.startRef); + const endLocator = snapshot.refLocator(params.endRef); const code = [ - `// Drag ${validatedParams.startElement} to ${validatedParams.endElement}`, + `// Drag ${params.startElement} to ${params.endElement}`, `await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});` ]; @@ -111,23 +106,22 @@ const drag: Tool = { waitForNetwork: true, }; }, -}; +}); -const hover: Tool = { +const hover = defineTool({ capability: 'core', schema: { name: 'browser_hover', description: 'Hover over element on page', - inputSchema: zodToJsonSchema(elementSchema), + inputSchema: elementSchema, }, handle: async (context, params) => { - const validatedParams = elementSchema.parse(params); const snapshot = context.currentTabOrDie().snapshotOrDie(); - const locator = snapshot.refLocator(validatedParams.ref); + const locator = snapshot.refLocator(params.ref); const code = [ - `// Hover over ${validatedParams.element}`, + `// Hover over ${params.element}`, `await page.${await generateLocator(locator)}.hover();` ]; @@ -138,7 +132,7 @@ const hover: Tool = { waitForNetwork: true, }; }, -}; +}); const typeSchema = elementSchema.extend({ text: z.string().describe('Text to type into the element'), @@ -146,33 +140,32 @@ const typeSchema = elementSchema.extend({ slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'), }); -const type: Tool = { +const type = defineTool({ capability: 'core', schema: { name: 'browser_type', description: 'Type text into editable element', - inputSchema: zodToJsonSchema(typeSchema), + inputSchema: typeSchema, }, handle: async (context, params) => { - const validatedParams = typeSchema.parse(params); const snapshot = context.currentTabOrDie().snapshotOrDie(); - const locator = snapshot.refLocator(validatedParams.ref); + const locator = snapshot.refLocator(params.ref); const code: string[] = []; const steps: (() => Promise)[] = []; - if (validatedParams.slowly) { - code.push(`// Press "${validatedParams.text}" sequentially into "${validatedParams.element}"`); - code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(validatedParams.text)});`); - steps.push(() => locator.pressSequentially(validatedParams.text)); + if (params.slowly) { + code.push(`// Press "${params.text}" sequentially into "${params.element}"`); + code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`); + steps.push(() => locator.pressSequentially(params.text)); } else { - code.push(`// Fill "${validatedParams.text}" into "${validatedParams.element}"`); - code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(validatedParams.text)});`); - steps.push(() => locator.fill(validatedParams.text)); + code.push(`// Fill "${params.text}" into "${params.element}"`); + code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`); + steps.push(() => locator.fill(params.text)); } - if (validatedParams.submit) { + if (params.submit) { code.push(`// Submit text`); code.push(`await page.${await generateLocator(locator)}.press('Enter');`); steps.push(() => locator.press('Enter')); @@ -185,38 +178,37 @@ const type: Tool = { waitForNetwork: true, }; }, -}; +}); const selectOptionSchema = elementSchema.extend({ values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'), }); -const selectOption: Tool = { +const selectOption = defineTool({ capability: 'core', schema: { name: 'browser_select_option', description: 'Select an option in a dropdown', - inputSchema: zodToJsonSchema(selectOptionSchema), + inputSchema: selectOptionSchema, }, handle: async (context, params) => { - const validatedParams = selectOptionSchema.parse(params); const snapshot = context.currentTabOrDie().snapshotOrDie(); - const locator = snapshot.refLocator(validatedParams.ref); + const locator = snapshot.refLocator(params.ref); const code = [ - `// Select options [${validatedParams.values.join(', ')}] in ${validatedParams.element}`, - `await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(validatedParams.values)});` + `// Select options [${params.values.join(', ')}] in ${params.element}`, + `await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(params.values)});` ]; return { code, - action: () => locator.selectOption(validatedParams.values).then(() => {}), + action: () => locator.selectOption(params.values).then(() => {}), captureSnapshot: true, waitForNetwork: true, }; }, -}; +}); const screenshotSchema = z.object({ raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'), @@ -229,28 +221,27 @@ const screenshotSchema = z.object({ path: ['ref', 'element'] }); -const screenshot: Tool = { +const screenshot = defineTool({ capability: 'core', schema: { name: 'browser_take_screenshot', description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`, - inputSchema: zodToJsonSchema(screenshotSchema), + inputSchema: screenshotSchema, }, handle: async (context, params) => { - const validatedParams = screenshotSchema.parse(params); const tab = context.currentTabOrDie(); const snapshot = tab.snapshotOrDie(); - const fileType = validatedParams.raw ? 'png' : 'jpeg'; + const fileType = params.raw ? 'png' : 'jpeg'; const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`; const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName }; - const isElementScreenshot = validatedParams.element && validatedParams.ref; + const isElementScreenshot = params.element && params.ref; const code = [ - `// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'} and save it as ${fileName}`, + `// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`, ]; - const locator = validatedParams.ref ? snapshot.refLocator(validatedParams.ref) : null; + const locator = params.ref ? snapshot.refLocator(params.ref) : null; if (locator) code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`); @@ -275,7 +266,7 @@ const screenshot: Tool = { waitForNetwork: false, }; } -}; +}); export default [ diff --git a/src/tools/tabs.ts b/src/tools/tabs.ts index aed7180..2be1ac4 100644 --- a/src/tools/tabs.ts +++ b/src/tools/tabs.ts @@ -15,17 +15,15 @@ */ import { z } from 'zod'; -import { zodToJsonSchema } from 'zod-to-json-schema'; +import { defineTool, type ToolFactory } from './tool'; -import type { ToolFactory, Tool } from './tool'; - -const listTabs: Tool = { +const listTabs = defineTool({ capability: 'tabs', schema: { name: 'browser_tab_list', description: 'List browser tabs', - inputSchema: zodToJsonSchema(z.object({})), + inputSchema: z.object({}), }, handle: async context => { @@ -42,26 +40,23 @@ const listTabs: Tool = { }, }; }, -}; - -const selectTabSchema = z.object({ - index: z.number().describe('The index of the tab to select'), }); -const selectTab: ToolFactory = captureSnapshot => ({ +const selectTab: ToolFactory = captureSnapshot => defineTool({ capability: 'tabs', schema: { name: 'browser_tab_select', description: 'Select a tab by index', - inputSchema: zodToJsonSchema(selectTabSchema), + inputSchema: z.object({ + index: z.number().describe('The index of the tab to select'), + }), }, handle: async (context, params) => { - const validatedParams = selectTabSchema.parse(params); - await context.selectTab(validatedParams.index); + await context.selectTab(params.index); const code = [ - `// `, + `// `, ]; return { @@ -72,24 +67,21 @@ const selectTab: ToolFactory = captureSnapshot => ({ }, }); -const newTabSchema = z.object({ - url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'), -}); - -const newTab: ToolFactory = captureSnapshot => ({ +const newTab: ToolFactory = captureSnapshot => defineTool({ capability: 'tabs', schema: { name: 'browser_tab_new', description: 'Open a new tab', - inputSchema: zodToJsonSchema(newTabSchema), + inputSchema: z.object({ + url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'), + }), }, handle: async (context, params) => { - const validatedParams = newTabSchema.parse(params); await context.newTab(); - if (validatedParams.url) - await context.currentTabOrDie().navigate(validatedParams.url); + if (params.url) + await context.currentTabOrDie().navigate(params.url); const code = [ `// `, @@ -102,24 +94,21 @@ const newTab: ToolFactory = captureSnapshot => ({ }, }); -const closeTabSchema = z.object({ - index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'), -}); - -const closeTab: ToolFactory = captureSnapshot => ({ +const closeTab: ToolFactory = captureSnapshot => defineTool({ capability: 'tabs', schema: { name: 'browser_tab_close', description: 'Close a tab', - inputSchema: zodToJsonSchema(closeTabSchema), + inputSchema: z.object({ + index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'), + }), }, handle: async (context, params) => { - const validatedParams = closeTabSchema.parse(params); - await context.closeTab(validatedParams.index); + await context.closeTab(params.index); const code = [ - `// `, + `// `, ]; return { code, diff --git a/src/tools/tool.ts b/src/tools/tool.ts index d161129..ef9af39 100644 --- a/src/tools/tool.ts +++ b/src/tools/tool.ts @@ -15,17 +15,19 @@ */ import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types'; -import type { JsonSchema7Type } from 'zod-to-json-schema'; +import type { z } from 'zod'; import type { Context } from '../context'; import type * as playwright from 'playwright'; export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install'; -export type ToolSchema = { +export type ToolSchema = { name: string; description: string; - inputSchema: JsonSchema7Type; + inputSchema: Input; }; +type InputType = z.Schema; + export type FileUploadModalState = { type: 'fileChooser'; description: string; @@ -50,11 +52,15 @@ export type ToolResult = { resultOverride?: ToolActionResult; }; -export type Tool = { +export type Tool = { capability: ToolCapability; - schema: ToolSchema; + schema: ToolSchema; clearsModalState?: ModalState['type']; - handle: (context: Context, params?: Record) => Promise; + handle: (context: Context, params: z.output) => Promise; }; -export type ToolFactory = (snapshot: boolean) => Tool; +export type ToolFactory = (snapshot: boolean) => Tool; + +export function defineTool(tool: Tool): Tool { + return tool; +} diff --git a/tests/core.spec.ts b/tests/core.spec.ts index cf3ced2..ff415ee 100644 --- a/tests/core.spec.ts +++ b/tests/core.spec.ts @@ -206,5 +206,5 @@ test('browser_resize', async ({ client }) => { // Resize browser window to 390x780 await page.setViewportSize({ width: 390, height: 780 }); \`\`\``); - await expect.poll(() => client.callTool({ name: 'browser_snapshot' })).toContainTextContent('Window size: 390x780'); + await expect.poll(() => client.callTool({ name: 'browser_snapshot', arguments: {} })).toContainTextContent('Window size: 390x780'); }); diff --git a/tests/launch.spec.ts b/tests/launch.spec.ts index e296820..dd78725 100644 --- a/tests/launch.spec.ts +++ b/tests/launch.spec.ts @@ -26,6 +26,7 @@ test('test reopen browser', async ({ client }) => { expect(await client.callTool({ name: 'browser_close', + arguments: {}, })).toContainTextContent('No open pages available'); expect(await client.callTool({ diff --git a/tests/pdf.spec.ts b/tests/pdf.spec.ts index 13e8d48..da53e0f 100644 --- a/tests/pdf.spec.ts +++ b/tests/pdf.spec.ts @@ -41,6 +41,7 @@ test('save as pdf', async ({ client, mcpBrowser }) => { const response = await client.callTool({ name: 'browser_pdf_save', + arguments: {}, }); expect(response).toHaveTextContent(/Save page as.*page-[^:]+.pdf/); }); diff --git a/tests/tabs.spec.ts b/tests/tabs.spec.ts index ca18989..5336169 100644 --- a/tests/tabs.spec.ts +++ b/tests/tabs.spec.ts @@ -32,6 +32,7 @@ async function createTab(client: Client, title: string, body: string) { test('list initial tabs', async ({ client }) => { expect(await client.callTool({ name: 'browser_tab_list', + arguments: {}, })).toHaveTextContent(`### Open tabs - 1: (current) [] (about:blank)`); }); @@ -40,6 +41,7 @@ test('list first tab', async ({ client }) => { await createTab(client, 'Tab one', 'Body one'); expect(await client.callTool({ name: 'browser_tab_list', + arguments: {}, })).toHaveTextContent(`### Open tabs - 1: [] (about:blank) - 2: (current) [Tab one] (data:text/html,Tab oneBody one)`); diff --git a/utils/update-readme.js b/utils/update-readme.js index fb76f05..15f9cbf 100644 --- a/utils/update-readme.js +++ b/utils/update-readme.js @@ -18,6 +18,7 @@ const fs = require('node:fs'); const path = require('node:path'); +const zodToJsonSchema = require('zod-to-json-schema').default; const commonTools = require('../lib/tools/common').default; const consoleTools = require('../lib/tools/console').default; @@ -107,11 +108,11 @@ function formatToolForReadme(tool) { */ /** - * @param {import('../src/tools/tool').ToolSchema} schema + * @param {import('../src/tools/tool').ToolSchema} schema * @returns {ParsedToolSchema} */ function processToolSchema(schema) { - const inputSchema = /** @type {import('zod-to-json-schema').JsonSchema7ObjectType} */ (schema.inputSchema || {}); + const inputSchema = /** @type {import('zod-to-json-schema').JsonSchema7ObjectType} */ zodToJsonSchema(schema.inputSchema || {}); if (inputSchema.type !== 'object') throw new Error(`Tool ${schema.name} input schema is not an object`);