feat: add element screenshot action for snapshots (#182)

This commit is contained in:
Andrei-Daniel Barzu 2025-04-16 20:28:44 +03:00 committed by GitHub
parent e7c7709b33
commit 6d4adfe5c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 53 additions and 7 deletions

View File

@ -176,6 +176,12 @@ export class Context {
type RunResult = {
code: string[];
images?: ImageContent[];
};
type ImageContent = {
data: string;
mimeType: string;
};
class Tab {
@ -243,8 +249,16 @@ ${runResult.code.join('\n')}
result.push(this._snapshot.text({ hasFileChooser: !!this._fileChooser }));
}
const images = runResult.images?.map(image => {
return {
type: 'image' as 'image',
data: image.data,
mimeType: image.mimeType,
};
}) ?? [];
return {
content: [{
content: [...images, {
type: 'text',
text: result.join('\n'),
}],

View File

@ -19,6 +19,9 @@ import zodToJsonSchema from 'zod-to-json-schema';
import type * as playwright from 'playwright';
import type { Tool } from './tool';
import path from 'path';
import os from 'os';
import { sanitizeForFilePath } from './utils';
import { generateLocator } from '../context';
import * as javascript from '../javascript';
@ -185,6 +188,13 @@ const selectOption: Tool = {
const screenshotSchema = z.object({
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
element: z.string().optional().describe('Human-readable element description used to obtain permission to interact with the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'),
ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'),
}).refine(data => {
return (!data.element) === (!data.ref);
}, {
message: 'Both element and ref must be provided or neither.',
path: ['ref', 'element']
});
const screenshot: Tool = {
@ -198,14 +208,36 @@ const screenshot: Tool = {
handle: async (context, params) => {
const validatedParams = screenshotSchema.parse(params);
const tab = context.currentTab();
const options: playwright.PageScreenshotOptions = validatedParams.raw ? { type: 'png', scale: 'css' } : { type: 'jpeg', quality: 50, scale: 'css' };
const screenshot = await tab.page.screenshot(options);
return {
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: validatedParams.raw ? 'image/png' : 'image/jpeg' }],
};
},
const fileType = validatedParams.raw ? 'png' : 'jpeg';
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`;
const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
const isElementScreenshot = validatedParams.element && validatedParams.ref;
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => {
let screenshot: Buffer | undefined;
const code = [
`// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'}`,
];
if (isElementScreenshot) {
const locator = snapshot.refLocator(validatedParams.ref!);
code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
screenshot = await locator.screenshot(options);
} else {
code.push(`await page.screenshot(${javascript.formatObject(options)});`);
screenshot = await tab.page.screenshot(options);
}
code.push(`// Screenshot saved as ${fileName}`);
return {
code,
images: [{
data: screenshot.toString('base64'),
mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg',
}]
};
}, { captureSnapshot: false });
}
};
export default [
snapshot,
click,