chore: flatten tool calling, prep for timeout handling (#205)

This commit is contained in:
Pavel Feldman 2025-04-16 19:36:48 -07:00 committed by GitHub
parent cea347d067
commit 7e4a964b0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 484 additions and 336 deletions

View File

@ -19,7 +19,8 @@ import yaml from 'yaml';
import { waitForCompletion } from './tools/utils'; import { waitForCompletion } from './tools/utils';
import type { ModalState, Tool, ToolResult } from './tools/tool'; import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
import type { ModalState, Tool } from './tools/tool';
export type ContextOptions = { export type ContextOptions = {
browserName?: 'chromium' | 'firefox' | 'webkit'; browserName?: 'chromium' | 'firefox' | 'webkit';
@ -31,11 +32,6 @@ export type ContextOptions = {
type PageOrFrameLocator = playwright.Page | playwright.FrameLocator; type PageOrFrameLocator = playwright.Page | playwright.FrameLocator;
type RunOptions = {
captureSnapshot?: boolean;
waitForCompletion?: boolean;
};
export class Context { export class Context {
readonly tools: Tool[]; readonly tools: Tool[];
readonly options: ContextOptions; readonly options: ContextOptions;
@ -75,7 +71,7 @@ export class Context {
return this._tabs; return this._tabs;
} }
currentTab(): Tab { currentTabOrDie(): Tab {
if (!this._currentTab) if (!this._currentTab)
throw new Error('No current snapshot available. Capture a snapshot of navigate to a new location first.'); throw new Error('No current snapshot available. Capture a snapshot of navigate to a new location first.');
return this._currentTab; return this._currentTab;
@ -100,7 +96,7 @@ export class Context {
return this._currentTab!; return this._currentTab!;
} }
async listTabs(): Promise<string> { async listTabsMarkdown(): Promise<string> {
if (!this._tabs.length) if (!this._tabs.length)
return '### No tabs open'; return '### No tabs open';
const lines: string[] = ['### Open tabs']; const lines: string[] = ['### Open tabs'];
@ -115,9 +111,75 @@ export class Context {
} }
async closeTab(index: number | undefined) { async closeTab(index: number | undefined) {
const tab = index === undefined ? this.currentTab() : this._tabs[index - 1]; const tab = index === undefined ? this._currentTab : this._tabs[index - 1];
await tab.page.close(); await tab?.page.close();
return await this.listTabs(); return await this.listTabsMarkdown();
}
async run(tool: Tool, params: Record<string, unknown> | undefined) {
// Tab management is done outside of the action() call.
const toolResult = await tool.handle(this, params);
const { code, action, waitForNetwork, captureSnapshot } = toolResult;
if (!this._currentTab) {
return {
content: [{
type: 'text',
text: 'No open pages available. Use the "browser_navigate" tool to navigate to a page first.',
}],
};
}
const tab = this.currentTabOrDie();
// TODO: race against modal dialogs to resolve clicks.
let actionResult: { content?: (ImageContent | TextContent)[] };
try {
if (waitForNetwork)
actionResult = await waitForCompletion(tab.page, () => action()) ?? undefined;
else
actionResult = await action();
} finally {
if (captureSnapshot)
await tab.captureSnapshot();
}
const result: string[] = [];
result.push(`- Ran Playwright code:
\`\`\`js
${code.join('\n')}
\`\`\`
`);
if (this.modalStates().length) {
result.push(...this.modalStatesMarkdown());
return {
content: [{
type: 'text',
text: result.join('\n'),
}],
};
}
if (this.tabs().length > 1)
result.push(await this.listTabsMarkdown(), '');
if (tab.hasSnapshot()) {
if (this.tabs().length > 1)
result.push('### Current tab');
result.push(tab.snapshotOrDie().text());
}
const content = actionResult?.content ?? [];
return {
content: [
...content,
{
type: 'text',
text: result.join('\n'),
}
],
};
} }
private _onPageCreated(page: playwright.Page) { private _onPageCreated(page: playwright.Page) {
@ -199,17 +261,7 @@ export class Context {
} }
} }
type RunResult = { export class Tab {
code: string[];
images?: ImageContent[];
};
type ImageContent = {
data: string;
mimeType: string;
};
class Tab {
readonly context: Context; readonly context: Context;
readonly page: playwright.Page; readonly page: playwright.Page;
private _console: playwright.ConsoleMessage[] = []; private _console: playwright.ConsoleMessage[] = [];
@ -248,76 +300,11 @@ class Tab {
await this.page.waitForLoadState('load', { timeout: 5000 }).catch(() => {}); await this.page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
} }
async run(callback: (tab: Tab) => Promise<RunResult>, options?: RunOptions): Promise<ToolResult> { hasSnapshot(): boolean {
let runResult: RunResult | undefined; return !!this._snapshot;
try {
if (options?.waitForCompletion)
runResult = await waitForCompletion(this.page, () => callback(this)) ?? undefined;
else
runResult = await callback(this) ?? undefined;
} finally {
if (options?.captureSnapshot)
this._snapshot = await PageSnapshot.create(this.page);
}
const result: string[] = [];
result.push(`- Ran Playwright code:
\`\`\`js
${runResult.code.join('\n')}
\`\`\`
`);
if (this.context.modalStates().length) {
result.push(...this.context.modalStatesMarkdown());
return {
content: [{
type: 'text',
text: result.join('\n'),
}],
};
}
if (this.context.tabs().length > 1)
result.push(await this.context.listTabs(), '');
if (this._snapshot) {
if (this.context.tabs().length > 1)
result.push('### Current tab');
result.push(this._snapshot.text());
}
const images = runResult.images?.map(image => {
return {
type: 'image' as 'image',
data: image.data,
mimeType: image.mimeType,
};
}) ?? [];
return {
content: [...images, {
type: 'text',
text: result.join('\n'),
}],
};
} }
async runAndWait(callback: (tab: Tab) => Promise<RunResult>, options?: RunOptions): Promise<ToolResult> { snapshotOrDie(): PageSnapshot {
return await this.run(callback, {
waitForCompletion: true,
...options,
});
}
async runAndWaitWithSnapshot(callback: (snapshot: PageSnapshot) => Promise<RunResult>, options?: RunOptions): Promise<ToolResult> {
return await this.run(tab => callback(tab.lastSnapshot()), {
captureSnapshot: true,
waitForCompletion: true,
...options,
});
}
lastSnapshot(): PageSnapshot {
if (!this._snapshot) if (!this._snapshot)
throw new Error('No snapshot available'); throw new Error('No snapshot available');
return this._snapshot; return this._snapshot;
@ -326,6 +313,10 @@ ${runResult.code.join('\n')}
async console(): Promise<playwright.ConsoleMessage[]> { async console(): Promise<playwright.ConsoleMessage[]> {
return this._console; return this._console;
} }
async captureSnapshot() {
this._snapshot = await PageSnapshot.create(this.page);
}
} }
class PageSnapshot { class PageSnapshot {

View File

@ -71,7 +71,7 @@ export function createServerWithTools(options: Options): Server {
} }
try { try {
return await tool.handle(context, request.params.arguments); return await context.run(tool, request.params.arguments);
} catch (error) { } catch (error) {
return { return {
content: [{ type: 'text', text: String(error) }], content: [{ type: 'text', text: String(error) }],

View File

@ -23,41 +23,45 @@ const waitSchema = z.object({
time: z.number().describe('The time to wait in seconds'), time: z.number().describe('The time to wait in seconds'),
}); });
const wait: Tool = { const wait: ToolFactory = captureSnapshot => ({
capability: 'wait', capability: 'wait',
schema: { schema: {
name: 'browser_wait', name: 'browser_wait',
description: 'Wait for a specified time in seconds', description: 'Wait for a specified time in seconds',
inputSchema: zodToJsonSchema(waitSchema), inputSchema: zodToJsonSchema(waitSchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = waitSchema.parse(params); const validatedParams = waitSchema.parse(params);
await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000))); await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000)));
return { return {
content: [{ code: [`// Waited for ${validatedParams.time} seconds`],
type: 'text', action: async () => ({}),
text: `Waited for ${validatedParams.time} seconds`, captureSnapshot,
}], waitForNetwork: false,
}; };
}, },
}; });
const closeSchema = z.object({}); const closeSchema = z.object({});
const close: Tool = { const close: Tool = {
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_close', name: 'browser_close',
description: 'Close the page', description: 'Close the page',
inputSchema: zodToJsonSchema(closeSchema), inputSchema: zodToJsonSchema(closeSchema),
}, },
handle: async context => { handle: async context => {
await context.close(); await context.close();
return { return {
content: [{ code: [`// Internal to close the page`],
type: 'text', action: async () => ({}),
text: `Page closed`, captureSnapshot: false,
}], waitForNetwork: false,
}; };
}, },
}; };
@ -74,25 +78,33 @@ const resize: ToolFactory = captureSnapshot => ({
description: 'Resize the browser window', description: 'Resize the browser window',
inputSchema: zodToJsonSchema(resizeSchema), inputSchema: zodToJsonSchema(resizeSchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = resizeSchema.parse(params); const validatedParams = resizeSchema.parse(params);
const tab = context.currentTab(); const tab = context.currentTabOrDie();
return await tab.run(async tab => {
const code = [
`// Resize browser window to ${validatedParams.width}x${validatedParams.height}`,
`await page.setViewportSize({ width: ${validatedParams.width}, height: ${validatedParams.height} });`
];
const action = async () => {
await tab.page.setViewportSize({ width: validatedParams.width, height: validatedParams.height }); await tab.page.setViewportSize({ width: validatedParams.width, height: validatedParams.height });
const code = [ return {};
`// Resize browser window to ${validatedParams.width}x${validatedParams.height}`, };
`await page.setViewportSize({ width: ${validatedParams.width}, height: ${validatedParams.height} });`
]; return {
return { code }; code,
}, { action,
captureSnapshot, captureSnapshot,
}); waitForNetwork: true
};
}, },
}); });
export default (captureSnapshot: boolean) => [ export default (captureSnapshot: boolean) => [
close, close,
wait, wait(captureSnapshot),
resize(captureSnapshot) resize(captureSnapshot)
]; ];

View File

@ -29,13 +29,17 @@ const console: Tool = {
inputSchema: zodToJsonSchema(consoleSchema), inputSchema: zodToJsonSchema(consoleSchema),
}, },
handle: async context => { handle: async context => {
const messages = await context.currentTab().console(); const messages = await context.currentTabOrDie().console();
const log = messages.map(message => `[${message.type().toUpperCase()}] ${message.text()}`).join('\n'); const log = messages.map(message => `[${message.type().toUpperCase()}] ${message.text()}`).join('\n');
return { return {
content: [{ code: [`// <internal code to get console messages>`],
type: 'text', action: async () => {
text: log return {
}], content: [{ type: 'text', text: log }]
};
},
captureSnapshot: false,
waitForNetwork: false,
}; };
}, },
}; };

View File

@ -25,27 +25,35 @@ const uploadFileSchema = z.object({
const uploadFile: ToolFactory = captureSnapshot => ({ const uploadFile: ToolFactory = captureSnapshot => ({
capability: 'files', capability: 'files',
schema: { schema: {
name: 'browser_file_upload', name: 'browser_file_upload',
description: 'Upload one or multiple files', description: 'Upload one or multiple files',
inputSchema: zodToJsonSchema(uploadFileSchema), inputSchema: zodToJsonSchema(uploadFileSchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = uploadFileSchema.parse(params); const validatedParams = uploadFileSchema.parse(params);
const tab = context.currentTab(); const modalState = context.modalStates().find(state => state.type === 'fileChooser');
return await tab.runAndWait(async () => { if (!modalState)
const modalState = context.modalStates().find(state => state.type === 'fileChooser'); throw new Error('No file chooser visible');
if (!modalState)
throw new Error('No file chooser visible'); const code = [
`// <internal code to chose files ${validatedParams.paths.join(', ')}`,
];
const action = async () => {
await modalState.fileChooser.setFiles(validatedParams.paths); await modalState.fileChooser.setFiles(validatedParams.paths);
context.clearModalState(modalState); context.clearModalState(modalState);
const code = [ return {};
`// <internal code to chose files ${validatedParams.paths.join(', ')}`, };
];
return { code }; return {
}, { code,
action,
captureSnapshot, captureSnapshot,
}); waitForNetwork: true,
};
}, },
clearsModalState: 'fileChooser', clearsModalState: 'fileChooser',
}); });

View File

@ -48,10 +48,10 @@ const install: Tool = {
}); });
}); });
return { return {
content: [{ code: [`// Browser ${channel} installed`],
type: 'text', action: async () => ({}),
text: `Browser ${channel} installed`, captureSnapshot: false,
}], waitForNetwork: false,
}; };
}, },
}; };

View File

@ -25,23 +25,30 @@ const pressKeySchema = z.object({
const pressKey: ToolFactory = captureSnapshot => ({ const pressKey: ToolFactory = captureSnapshot => ({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_press_key', name: 'browser_press_key',
description: 'Press a key on the keyboard', description: 'Press a key on the keyboard',
inputSchema: zodToJsonSchema(pressKeySchema), inputSchema: zodToJsonSchema(pressKeySchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = pressKeySchema.parse(params); const validatedParams = pressKeySchema.parse(params);
return await context.currentTab().runAndWait(async tab => { const tab = context.currentTabOrDie();
await tab.page.keyboard.press(validatedParams.key);
const code = [ const code = [
`// Press ${validatedParams.key}`, `// Press ${validatedParams.key}`,
`await page.keyboard.press('${validatedParams.key}');`, `await page.keyboard.press('${validatedParams.key}');`,
]; ];
return { code };
}, { const action = () => tab.page.keyboard.press(validatedParams.key).then(() => ({}));
return {
code,
action,
captureSnapshot, captureSnapshot,
}); waitForNetwork: true
};
}, },
}); });

View File

@ -25,53 +25,62 @@ const navigateSchema = z.object({
const navigate: ToolFactory = captureSnapshot => ({ const navigate: ToolFactory = captureSnapshot => ({
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_navigate', name: 'browser_navigate',
description: 'Navigate to a URL', description: 'Navigate to a URL',
inputSchema: zodToJsonSchema(navigateSchema), inputSchema: zodToJsonSchema(navigateSchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = navigateSchema.parse(params); const validatedParams = navigateSchema.parse(params);
const currentTab = await context.ensureTab(); const tab = await context.ensureTab();
return await currentTab.run(async tab => { await tab.navigate(validatedParams.url);
await tab.navigate(validatedParams.url);
const code = [ const code = [
`// Navigate to ${validatedParams.url}`, `// Navigate to ${validatedParams.url}`,
`await page.goto('${validatedParams.url}');`, `await page.goto('${validatedParams.url}');`,
]; ];
return { code };
}, { return {
code,
action: async () => ({}),
captureSnapshot, captureSnapshot,
}); waitForNetwork: false,
};
}, },
}); });
const goBackSchema = z.object({}); const goBackSchema = z.object({});
const goBack: ToolFactory = snapshot => ({ const goBack: ToolFactory = captureSnapshot => ({
capability: 'history', capability: 'history',
schema: { schema: {
name: 'browser_navigate_back', name: 'browser_navigate_back',
description: 'Go back to the previous page', description: 'Go back to the previous page',
inputSchema: zodToJsonSchema(goBackSchema), inputSchema: zodToJsonSchema(goBackSchema),
}, },
handle: async context => { handle: async context => {
return await context.currentTab().runAndWait(async tab => { const tab = await context.ensureTab();
await tab.page.goBack(); await tab.page.goBack();
const code = [ const code = [
`// Navigate back`, `// Navigate back`,
`await page.goBack();`, `await page.goBack();`,
]; ];
return { code };
}, { return {
captureSnapshot: snapshot, code,
}); action: async () => ({}),
captureSnapshot,
waitForNetwork: false,
};
}, },
}); });
const goForwardSchema = z.object({}); const goForwardSchema = z.object({});
const goForward: ToolFactory = snapshot => ({ const goForward: ToolFactory = captureSnapshot => ({
capability: 'history', capability: 'history',
schema: { schema: {
name: 'browser_navigate_forward', name: 'browser_navigate_forward',
@ -79,16 +88,18 @@ const goForward: ToolFactory = snapshot => ({
inputSchema: zodToJsonSchema(goForwardSchema), inputSchema: zodToJsonSchema(goForwardSchema),
}, },
handle: async context => { handle: async context => {
return await context.currentTab().runAndWait(async tab => { const tab = context.currentTabOrDie();
await tab.page.goForward(); await tab.page.goForward();
const code = [ const code = [
`// Navigate forward`, `// Navigate forward`,
`await page.goForward();`, `await page.goForward();`,
]; ];
return { code }; return {
}, { code,
captureSnapshot: snapshot, action: async () => ({}),
}); captureSnapshot,
waitForNetwork: false,
};
}, },
}); });

View File

@ -21,6 +21,7 @@ import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { zodToJsonSchema } from 'zod-to-json-schema';
import { sanitizeForFilePath } from './utils'; import { sanitizeForFilePath } from './utils';
import * as javascript from '../javascript';
import type { Tool } from './tool'; import type { Tool } from './tool';
@ -28,20 +29,27 @@ const pdfSchema = z.object({});
const pdf: Tool = { const pdf: Tool = {
capability: 'pdf', capability: 'pdf',
schema: { schema: {
name: 'browser_pdf_save', name: 'browser_pdf_save',
description: 'Save page as PDF', description: 'Save page as PDF',
inputSchema: zodToJsonSchema(pdfSchema), inputSchema: zodToJsonSchema(pdfSchema),
}, },
handle: async context => { handle: async context => {
const tab = context.currentTab(); const tab = context.currentTabOrDie();
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf'; const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf';
await tab.page.pdf({ path: fileName });
const code = [
`// Save page as ${fileName}`,
`await page.pdf(${javascript.formatObject({ path: fileName })});`,
];
return { return {
content: [{ code,
type: 'text', action: async () => tab.page.pdf({ path: fileName }).then(() => ({})),
text: `Saved as ${fileName}`, captureSnapshot: false,
}], waitForNetwork: false,
}; };
}, },
}; };

View File

@ -17,6 +17,8 @@
import { z } from 'zod'; import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema'; import { zodToJsonSchema } from 'zod-to-json-schema';
import * as javascript from '../javascript';
import type { Tool } from './tool'; import type { Tool } from './tool';
const screenshot: Tool = { const screenshot: Tool = {
@ -29,9 +31,24 @@ const screenshot: Tool = {
handle: async context => { handle: async context => {
const tab = await context.ensureTab(); const tab = await context.ensureTab();
const screenshot = await tab.page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' }); const options = { type: 'jpeg' as 'jpeg', quality: 50, scale: 'css' as 'css' };
const code = [
`// Take a screenshot of the current page`,
`await page.screenshot(${javascript.formatObject(options)});`,
];
const action = () => tab.page.screenshot(options).then(buffer => {
return {
content: [{ type: 'image' as 'image', data: buffer.toString('base64'), mimeType: 'image/jpeg' }],
};
});
return { return {
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: 'image/jpeg' }], code,
action,
captureSnapshot: false,
waitForNetwork: false
}; };
}, },
}; };
@ -55,10 +72,17 @@ const moveMouse: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = moveMouseSchema.parse(params); const validatedParams = moveMouseSchema.parse(params);
const tab = context.currentTab(); const tab = context.currentTabOrDie();
await tab.page.mouse.move(validatedParams.x, validatedParams.y); const code = [
`// Move mouse to (${validatedParams.x}, ${validatedParams.y})`,
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`,
];
const action = () => tab.page.mouse.move(validatedParams.x, validatedParams.y).then(() => ({}));
return { return {
content: [{ type: 'text', text: `Moved mouse to (${validatedParams.x}, ${validatedParams.y})` }], code,
action,
captureSnapshot: false,
waitForNetwork: false
}; };
}, },
}; };
@ -77,19 +101,26 @@ const click: Tool = {
}, },
handle: async (context, params) => { handle: async (context, params) => {
return await context.currentTab().runAndWait(async tab => { const validatedParams = clickSchema.parse(params);
const validatedParams = clickSchema.parse(params); const tab = context.currentTabOrDie();
const code = [ const code = [
`// Click mouse at coordinates (${validatedParams.x}, ${validatedParams.y})`, `// Click mouse at coordinates (${validatedParams.x}, ${validatedParams.y})`,
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`, `await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`,
`await page.mouse.down();`, `await page.mouse.down();`,
`await page.mouse.up();`, `await page.mouse.up();`,
]; ];
const action = async () => {
await tab.page.mouse.move(validatedParams.x, validatedParams.y); await tab.page.mouse.move(validatedParams.x, validatedParams.y);
await tab.page.mouse.down(); await tab.page.mouse.down();
await tab.page.mouse.up(); await tab.page.mouse.up();
return { code }; return {};
}); };
return {
code,
action,
captureSnapshot: false,
waitForNetwork: true,
};
}, },
}; };
@ -102,6 +133,7 @@ const dragSchema = elementSchema.extend({
const drag: Tool = { const drag: Tool = {
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_screen_drag', name: 'browser_screen_drag',
description: 'Drag left mouse button', description: 'Drag left mouse button',
@ -110,20 +142,30 @@ const drag: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = dragSchema.parse(params); const validatedParams = dragSchema.parse(params);
return await context.currentTab().runAndWait(async tab => { const tab = context.currentTabOrDie();
const code = [
`// Drag mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`,
`await page.mouse.move(${validatedParams.startX}, ${validatedParams.startY});`,
`await page.mouse.down();`,
`await page.mouse.move(${validatedParams.endX}, ${validatedParams.endY});`,
`await page.mouse.up();`,
];
const action = async () => {
await tab.page.mouse.move(validatedParams.startX, validatedParams.startY); await tab.page.mouse.move(validatedParams.startX, validatedParams.startY);
await tab.page.mouse.down(); await tab.page.mouse.down();
await tab.page.mouse.move(validatedParams.endX, validatedParams.endY); await tab.page.mouse.move(validatedParams.endX, validatedParams.endY);
await tab.page.mouse.up(); await tab.page.mouse.up();
const code = [ return {};
`// Drag mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`, };
`await page.mouse.move(${validatedParams.startX}, ${validatedParams.startY});`,
`await page.mouse.down();`, return {
`await page.mouse.move(${validatedParams.endX}, ${validatedParams.endY});`, code,
`await page.mouse.up();`, action,
]; captureSnapshot: false,
return { code }; waitForNetwork: true,
}); };
}, },
}; };
@ -134,6 +176,7 @@ const typeSchema = z.object({
const type: Tool = { const type: Tool = {
capability: 'core', capability: 'core',
schema: { schema: {
name: 'browser_screen_type', name: 'browser_screen_type',
description: 'Type text', description: 'Type text',
@ -142,19 +185,31 @@ const type: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = typeSchema.parse(params); const validatedParams = typeSchema.parse(params);
return await context.currentTab().runAndWait(async tab => { const tab = context.currentTabOrDie();
const code = [
`// Type ${validatedParams.text}`, const code = [
`await page.keyboard.type('${validatedParams.text}');`, `// Type ${validatedParams.text}`,
]; `await page.keyboard.type('${validatedParams.text}');`,
];
const action = async () => {
await tab.page.keyboard.type(validatedParams.text); await tab.page.keyboard.type(validatedParams.text);
if (validatedParams.submit) { if (validatedParams.submit)
code.push(`// Submit text`);
code.push(`await page.keyboard.press('Enter');`);
await tab.page.keyboard.press('Enter'); await tab.page.keyboard.press('Enter');
} return {};
return { code }; };
});
if (validatedParams.submit) {
code.push(`// Submit text`);
code.push(`await page.keyboard.press('Enter');`);
}
return {
code,
action,
captureSnapshot: false,
waitForNetwork: true,
};
}, },
}; };

View File

@ -14,17 +14,19 @@
* limitations under the License. * limitations under the License.
*/ */
import path from 'path';
import os from 'os';
import { z } from 'zod'; import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema'; import zodToJsonSchema from 'zod-to-json-schema';
import type * as playwright from 'playwright';
import type { Tool } from './tool';
import path from 'path';
import os from 'os';
import { sanitizeForFilePath } from './utils'; import { sanitizeForFilePath } from './utils';
import { generateLocator } from '../context'; import { generateLocator } from '../context';
import * as javascript from '../javascript'; import * as javascript from '../javascript';
import type * as playwright from 'playwright';
import type { Tool } from './tool';
const snapshot: Tool = { const snapshot: Tool = {
capability: 'core', capability: 'core',
schema: { schema: {
@ -34,11 +36,14 @@ const snapshot: Tool = {
}, },
handle: async context => { handle: async context => {
const tab = await context.ensureTab(); await context.ensureTab();
return await tab.run(async () => {
const code = [`// <internal code to capture accessibility snapshot>`]; return {
return { code }; code: [`// <internal code to capture accessibility snapshot>`],
}, { captureSnapshot: true }); action: async () => ({}),
captureSnapshot: true,
waitForNetwork: false,
};
}, },
}; };
@ -57,15 +62,20 @@ const click: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = elementSchema.parse(params); const validatedParams = elementSchema.parse(params);
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => { const tab = context.currentTabOrDie();
const locator = snapshot.refLocator(validatedParams.ref); const locator = tab.snapshotOrDie().refLocator(validatedParams.ref);
const code = [
`// Click ${validatedParams.element}`, const code = [
`await page.${await generateLocator(locator)}.click();` `// Click ${validatedParams.element}`,
]; `await page.${await generateLocator(locator)}.click();`
await locator.click(); ];
return { code };
}); return {
code,
action: () => locator.click().then(() => ({})),
captureSnapshot: true,
waitForNetwork: true,
};
}, },
}; };
@ -86,16 +96,21 @@ const drag: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = dragSchema.parse(params); const validatedParams = dragSchema.parse(params);
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => { const snapshot = context.currentTabOrDie().snapshotOrDie();
const startLocator = snapshot.refLocator(validatedParams.startRef); const startLocator = snapshot.refLocator(validatedParams.startRef);
const endLocator = snapshot.refLocator(validatedParams.endRef); const endLocator = snapshot.refLocator(validatedParams.endRef);
const code = [
`// Drag ${validatedParams.startElement} to ${validatedParams.endElement}`, const code = [
`await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});` `// Drag ${validatedParams.startElement} to ${validatedParams.endElement}`,
]; `await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});`
await startLocator.dragTo(endLocator); ];
return { code };
}); return {
code,
action: () => startLocator.dragTo(endLocator).then(() => ({})),
captureSnapshot: true,
waitForNetwork: true,
};
}, },
}; };
@ -109,15 +124,20 @@ const hover: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = elementSchema.parse(params); const validatedParams = elementSchema.parse(params);
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => { const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref); const locator = snapshot.refLocator(validatedParams.ref);
const code = [
`// Hover over ${validatedParams.element}`, const code = [
`await page.${await generateLocator(locator)}.hover();` `// Hover over ${validatedParams.element}`,
]; `await page.${await generateLocator(locator)}.hover();`
await locator.hover(); ];
return { code };
}); return {
code,
action: () => locator.hover().then(() => ({})),
captureSnapshot: true,
waitForNetwork: true,
};
}, },
}; };
@ -137,26 +157,34 @@ const type: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = typeSchema.parse(params); const validatedParams = typeSchema.parse(params);
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => { const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref); const locator = snapshot.refLocator(validatedParams.ref);
const code: string[] = []; const code: string[] = [];
if (validatedParams.slowly) { const steps: (() => Promise<void>)[] = [];
code.push(`// Press "${validatedParams.text}" sequentially into "${validatedParams.element}"`);
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(validatedParams.text)});`); if (validatedParams.slowly) {
await locator.pressSequentially(validatedParams.text); code.push(`// Press "${validatedParams.text}" sequentially into "${validatedParams.element}"`);
} else { code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(validatedParams.text)});`);
code.push(`// Fill "${validatedParams.text}" into "${validatedParams.element}"`); steps.push(() => locator.pressSequentially(validatedParams.text));
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(validatedParams.text)});`); } else {
await locator.fill(validatedParams.text); code.push(`// Fill "${validatedParams.text}" into "${validatedParams.element}"`);
} code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(validatedParams.text)});`);
if (validatedParams.submit) { steps.push(() => locator.fill(validatedParams.text));
code.push(`// Submit text`); }
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
await locator.press('Enter'); if (validatedParams.submit) {
} code.push(`// Submit text`);
return { code }; code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
}); steps.push(() => locator.press('Enter'));
}
return {
code,
action: () => steps.reduce((acc, step) => acc.then(step), Promise.resolve()).then(() => ({})),
captureSnapshot: true,
waitForNetwork: true,
};
}, },
}; };
@ -174,15 +202,20 @@ const selectOption: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = selectOptionSchema.parse(params); const validatedParams = selectOptionSchema.parse(params);
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => { const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref); const locator = snapshot.refLocator(validatedParams.ref);
const code = [
`// Select options [${validatedParams.values.join(', ')}] in ${validatedParams.element}`, const code = [
`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(validatedParams.values)});` `// Select options [${validatedParams.values.join(', ')}] in ${validatedParams.element}`,
]; `await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(validatedParams.values)});`
await locator.selectOption(validatedParams.values); ];
return { code };
}); return {
code,
action: () => locator.selectOption(validatedParams.values).then(() => ({})),
captureSnapshot: true,
waitForNetwork: true,
};
}, },
}; };
@ -207,32 +240,41 @@ const screenshot: Tool = {
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = screenshotSchema.parse(params); const validatedParams = screenshotSchema.parse(params);
const tab = context.currentTab(); const tab = context.currentTabOrDie();
const snapshot = tab.snapshotOrDie();
const fileType = validatedParams.raw ? 'png' : 'jpeg'; const fileType = validatedParams.raw ? 'png' : 'jpeg';
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`; const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`;
const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName }; const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
const isElementScreenshot = validatedParams.element && validatedParams.ref; const isElementScreenshot = validatedParams.element && validatedParams.ref;
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => {
let screenshot: Buffer | undefined; const code = [
const code = [ `// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'} and save it as ${fileName}`,
`// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'} and save it as ${fileName}`, ];
];
if (isElementScreenshot) { const locator = validatedParams.ref ? snapshot.refLocator(validatedParams.ref) : null;
const locator = snapshot.refLocator(validatedParams.ref!);
code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`); if (locator)
screenshot = await locator.screenshot(options); code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
} else { else
code.push(`await page.screenshot(${javascript.formatObject(options)});`); code.push(`await page.screenshot(${javascript.formatObject(options)});`);
screenshot = await tab.page.screenshot(options);
} const action = async () => {
const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
return { return {
code, content: [{
images: [{ type: 'image' as 'image',
data: screenshot.toString('base64'), data: screenshot.toString('base64'),
mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg', mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg',
}] }]
}; };
}); };
return {
code,
action,
captureSnapshot: true,
waitForNetwork: false,
};
} }
}; };

View File

@ -21,17 +21,19 @@ import type { ToolFactory, Tool } from './tool';
const listTabs: Tool = { const listTabs: Tool = {
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_list', name: 'browser_tab_list',
description: 'List browser tabs', description: 'List browser tabs',
inputSchema: zodToJsonSchema(z.object({})), inputSchema: zodToJsonSchema(z.object({})),
}, },
handle: async context => {
handle: async () => {
return { return {
content: [{ code: [`// <internal code to list tabs>`],
type: 'text', action: async () => ({}),
text: await context.listTabs(), captureSnapshot: false,
}], waitForNetwork: false,
}; };
}, },
}; };
@ -42,21 +44,26 @@ const selectTabSchema = z.object({
const selectTab: ToolFactory = captureSnapshot => ({ const selectTab: ToolFactory = captureSnapshot => ({
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_select', name: 'browser_tab_select',
description: 'Select a tab by index', description: 'Select a tab by index',
inputSchema: zodToJsonSchema(selectTabSchema), inputSchema: zodToJsonSchema(selectTabSchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = selectTabSchema.parse(params); const validatedParams = selectTabSchema.parse(params);
await context.selectTab(validatedParams.index); await context.selectTab(validatedParams.index);
const currentTab = await context.ensureTab(); const code = [
return await currentTab.run(async () => { `// <internal code to select tab ${validatedParams.index}>`,
const code = [ ];
`// <internal code to select tab ${validatedParams.index}>`,
]; return {
return { code }; code,
}, { captureSnapshot }); action: async () => ({}),
captureSnapshot,
waitForNetwork: false
};
}, },
}); });
@ -64,26 +71,32 @@ const newTabSchema = z.object({
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'), url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
}); });
const newTab: Tool = { const newTab: ToolFactory = captureSnapshot => ({
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_new', name: 'browser_tab_new',
description: 'Open a new tab', description: 'Open a new tab',
inputSchema: zodToJsonSchema(newTabSchema), inputSchema: zodToJsonSchema(newTabSchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = newTabSchema.parse(params); const validatedParams = newTabSchema.parse(params);
await context.newTab(); await context.newTab();
if (validatedParams.url) if (validatedParams.url)
await context.currentTab().navigate(validatedParams.url); await context.currentTabOrDie().navigate(validatedParams.url);
return await context.currentTab().run(async () => {
const code = [ const code = [
`// <internal code to open a new tab>`, `// <internal code to open a new tab>`,
]; ];
return { code }; return {
}, { captureSnapshot: true }); code,
action: async () => ({}),
captureSnapshot,
waitForNetwork: false
};
}, },
}; });
const closeTabSchema = z.object({ const closeTabSchema = z.object({
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'), index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
@ -91,35 +104,31 @@ const closeTabSchema = z.object({
const closeTab: ToolFactory = captureSnapshot => ({ const closeTab: ToolFactory = captureSnapshot => ({
capability: 'tabs', capability: 'tabs',
schema: { schema: {
name: 'browser_tab_close', name: 'browser_tab_close',
description: 'Close a tab', description: 'Close a tab',
inputSchema: zodToJsonSchema(closeTabSchema), inputSchema: zodToJsonSchema(closeTabSchema),
}, },
handle: async (context, params) => { handle: async (context, params) => {
const validatedParams = closeTabSchema.parse(params); const validatedParams = closeTabSchema.parse(params);
await context.closeTab(validatedParams.index); await context.closeTab(validatedParams.index);
const currentTab = context.currentTab(); const code = [
if (currentTab) { `// <internal code to close tab ${validatedParams.index}>`,
return await currentTab.run(async () => { ];
const code = [
`// <internal code to close tab ${validatedParams.index}>`,
];
return { code };
}, { captureSnapshot });
}
return { return {
content: [{ code,
type: 'text', action: async () => ({}),
text: await context.listTabs(), captureSnapshot,
}], waitForNetwork: false
}; };
}, },
}); });
export default (captureSnapshot: boolean) => [ export default (captureSnapshot: boolean) => [
listTabs, listTabs,
newTab, newTab(captureSnapshot),
selectTab(captureSnapshot), selectTab(captureSnapshot),
closeTab(captureSnapshot), closeTab(captureSnapshot),
]; ];

View File

@ -35,8 +35,10 @@ export type FileUploadModalState = {
export type ModalState = FileUploadModalState; export type ModalState = FileUploadModalState;
export type ToolResult = { export type ToolResult = {
content: (ImageContent | TextContent)[]; code: string[];
isError?: boolean; action: () => Promise<{ content?: (ImageContent | TextContent)[] }>;
captureSnapshot: boolean;
waitForNetwork: boolean;
}; };
export type Tool = { export type Tool = {

View File

@ -69,7 +69,6 @@ await page.getByRole('button', { name: 'Submit' }).click();
`); `);
}); });
test('browser_select_option', async ({ client }) => { test('browser_select_option', async ({ client }) => {
await client.callTool({ await client.callTool({
name: 'browser_navigate', name: 'browser_navigate',

View File

@ -26,7 +26,7 @@ test('test reopen browser', async ({ client }) => {
expect(await client.callTool({ expect(await client.callTool({
name: 'browser_close', name: 'browser_close',
})).toHaveTextContent('Page closed'); })).toContainTextContent('No open pages available');
expect(await client.callTool({ expect(await client.callTool({
name: 'browser_navigate', name: 'browser_navigate',

View File

@ -42,5 +42,5 @@ test('save as pdf', async ({ client, mcpBrowser }) => {
const response = await client.callTool({ const response = await client.callTool({
name: 'browser_pdf_save', name: 'browser_pdf_save',
}); });
expect(response).toHaveTextContent(/^Saved as.*page-[^:]+.pdf$/); expect(response).toHaveTextContent(/Save page as.*page-[^:]+.pdf/);
}); });