chore: allow taking pixel screenshots in snapshot mode (#44)

Ref: https://github.com/microsoft/playwright-mcp/issues/39
This commit is contained in:
Pavel Feldman 2025-03-27 07:27:34 -07:00 committed by GitHub
parent 702fa0bdf3
commit d4bc2c78d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 37 additions and 6 deletions

View File

@ -224,6 +224,11 @@ The Playwright MCP provides a set of tools for browser automation. Here are all
- Description: Save page as PDF
- Parameters: None
- **browser_take_screenshot**
- Description: Capture screenshot of the page
- Parameters:
- `raw` (string): Optionally returns lossless PNG screenshot. JPEG by default.
- **browser_wait**
- Description: Wait for a specified time in seconds
- Parameters:

View File

@ -58,7 +58,7 @@ export class Context {
this._console.length = 0;
}
async existingPage(): Promise<playwright.Page> {
existingPage(): playwright.Page {
if (!this._page)
throw new Error('Navigate to a location to create a page');
return this._page;

View File

@ -41,6 +41,7 @@ const snapshotTools: Tool[] = [
snapshot.hover,
snapshot.type,
snapshot.selectOption,
snapshot.screenshot,
...commonTools,
];

View File

@ -126,7 +126,7 @@ export const pdf: Tool = {
inputSchema: zodToJsonSchema(pdfSchema),
},
handle: async context => {
const page = await context.existingPage();
const page = context.existingPage();
const fileName = path.join(os.tmpdir(), `/page-${new Date().toISOString()}.pdf`);
await page.pdf({ path: fileName });
return {

View File

@ -29,7 +29,7 @@ export const screenshot: Tool = {
},
handle: async context => {
const page = await context.existingPage();
const page = context.existingPage();
const screenshot = await page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' });
return {
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: 'image/jpeg' }],
@ -55,7 +55,7 @@ export const moveMouse: Tool = {
handle: async (context, params) => {
const validatedParams = moveMouseSchema.parse(params);
const page = await context.existingPage();
const page = context.existingPage();
await page.mouse.move(validatedParams.x, validatedParams.y);
return {
content: [{ type: 'text', text: `Moved mouse to (${validatedParams.x}, ${validatedParams.y})` }],

View File

@ -30,7 +30,7 @@ export const snapshot: Tool = {
},
handle: async context => {
return await captureAriaSnapshot(await context.existingPage());
return await captureAriaSnapshot(context.existingPage());
},
};
@ -132,6 +132,28 @@ export const selectOption: Tool = {
},
};
const screenshotSchema = z.object({
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
});
export const screenshot: Tool = {
schema: {
name: 'browser_take_screenshot',
description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
inputSchema: zodToJsonSchema(screenshotSchema),
},
handle: async (context, params) => {
const validatedParams = screenshotSchema.parse(params);
const page = context.existingPage();
const options: playwright.PageScreenshotOptions = validatedParams.raw ? { type: 'png', scale: 'css' } : { type: 'jpeg', quality: 50, scale: 'css' };
const screenshot = await page.screenshot(options);
return {
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: validatedParams.raw ? 'image/png' : 'image/jpeg' }],
};
},
};
function refLocator(page: playwright.Page, ref: string): playwright.Locator {
return page.locator(`aria-ref=${ref}`);
}

View File

@ -72,7 +72,7 @@ async function waitForCompletion<R>(page: playwright.Page, callback: () => Promi
}
export async function runAndWait(context: Context, status: string, callback: (page: playwright.Page) => Promise<any>, snapshot: boolean = false): Promise<ToolResult> {
const page = await context.existingPage();
const page = context.existingPage();
await waitForCompletion(page, () => callback(page));
return snapshot ? captureAriaSnapshot(page, status) : {
content: [{ type: 'text', text: status }],

View File

@ -51,6 +51,9 @@ test('test tool list', async ({ server }) => {
expect.objectContaining({
name: 'browser_select_option',
}),
expect.objectContaining({
name: 'browser_take_screenshot',
}),
expect.objectContaining({
name: 'browser_press_key',
}),