From d4bc2c78d27a2ba6a847abf757b33617b2840bc8 Mon Sep 17 00:00:00 2001 From: Pavel Feldman Date: Thu, 27 Mar 2025 07:27:34 -0700 Subject: [PATCH] chore: allow taking pixel screenshots in snapshot mode (#44) Ref: https://github.com/microsoft/playwright-mcp/issues/39 --- README.md | 5 +++++ src/context.ts | 2 +- src/index.ts | 1 + src/tools/common.ts | 2 +- src/tools/screenshot.ts | 4 ++-- src/tools/snapshot.ts | 24 +++++++++++++++++++++++- src/tools/utils.ts | 2 +- tests/basic.spec.ts | 3 +++ 8 files changed, 37 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a22000e..ed6092f 100644 --- a/README.md +++ b/README.md @@ -224,6 +224,11 @@ The Playwright MCP provides a set of tools for browser automation. Here are all - Description: Save page as PDF - Parameters: None +- **browser_take_screenshot** + - Description: Capture screenshot of the page + - Parameters: + - `raw` (string): Optionally returns lossless PNG screenshot. JPEG by default. + - **browser_wait** - Description: Wait for a specified time in seconds - Parameters: diff --git a/src/context.ts b/src/context.ts index 6828573..5dd5a3e 100644 --- a/src/context.ts +++ b/src/context.ts @@ -58,7 +58,7 @@ export class Context { this._console.length = 0; } - async existingPage(): Promise { + existingPage(): playwright.Page { if (!this._page) throw new Error('Navigate to a location to create a page'); return this._page; diff --git a/src/index.ts b/src/index.ts index a312aff..3530459 100644 --- a/src/index.ts +++ b/src/index.ts @@ -41,6 +41,7 @@ const snapshotTools: Tool[] = [ snapshot.hover, snapshot.type, snapshot.selectOption, + snapshot.screenshot, ...commonTools, ]; diff --git a/src/tools/common.ts b/src/tools/common.ts index cb8ce65..d0ef843 100644 --- a/src/tools/common.ts +++ b/src/tools/common.ts @@ -126,7 +126,7 @@ export const pdf: Tool = { inputSchema: zodToJsonSchema(pdfSchema), }, handle: async context => { - const page = await context.existingPage(); + const page = context.existingPage(); const fileName = path.join(os.tmpdir(), `/page-${new Date().toISOString()}.pdf`); await page.pdf({ path: fileName }); return { diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 261ac71..cef2ba1 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -29,7 +29,7 @@ export const screenshot: Tool = { }, handle: async context => { - const page = await context.existingPage(); + const page = context.existingPage(); const screenshot = await page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' }); return { content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: 'image/jpeg' }], @@ -55,7 +55,7 @@ export const moveMouse: Tool = { handle: async (context, params) => { const validatedParams = moveMouseSchema.parse(params); - const page = await context.existingPage(); + const page = context.existingPage(); await page.mouse.move(validatedParams.x, validatedParams.y); return { content: [{ type: 'text', text: `Moved mouse to (${validatedParams.x}, ${validatedParams.y})` }], diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts index a505659..f41794c 100644 --- a/src/tools/snapshot.ts +++ b/src/tools/snapshot.ts @@ -30,7 +30,7 @@ export const snapshot: Tool = { }, handle: async context => { - return await captureAriaSnapshot(await context.existingPage()); + return await captureAriaSnapshot(context.existingPage()); }, }; @@ -132,6 +132,28 @@ export const selectOption: Tool = { }, }; +const screenshotSchema = z.object({ + raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'), +}); + +export const screenshot: Tool = { + schema: { + name: 'browser_take_screenshot', + description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`, + inputSchema: zodToJsonSchema(screenshotSchema), + }, + + handle: async (context, params) => { + const validatedParams = screenshotSchema.parse(params); + const page = context.existingPage(); + const options: playwright.PageScreenshotOptions = validatedParams.raw ? { type: 'png', scale: 'css' } : { type: 'jpeg', quality: 50, scale: 'css' }; + const screenshot = await page.screenshot(options); + return { + content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: validatedParams.raw ? 'image/png' : 'image/jpeg' }], + }; + }, +}; + function refLocator(page: playwright.Page, ref: string): playwright.Locator { return page.locator(`aria-ref=${ref}`); } diff --git a/src/tools/utils.ts b/src/tools/utils.ts index b374f78..11b17e6 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -72,7 +72,7 @@ async function waitForCompletion(page: playwright.Page, callback: () => Promi } export async function runAndWait(context: Context, status: string, callback: (page: playwright.Page) => Promise, snapshot: boolean = false): Promise { - const page = await context.existingPage(); + const page = context.existingPage(); await waitForCompletion(page, () => callback(page)); return snapshot ? captureAriaSnapshot(page, status) : { content: [{ type: 'text', text: status }], diff --git a/tests/basic.spec.ts b/tests/basic.spec.ts index a20bbb2..2476a7b 100644 --- a/tests/basic.spec.ts +++ b/tests/basic.spec.ts @@ -51,6 +51,9 @@ test('test tool list', async ({ server }) => { expect.objectContaining({ name: 'browser_select_option', }), + expect.objectContaining({ + name: 'browser_take_screenshot', + }), expect.objectContaining({ name: 'browser_press_key', }),