2025-03-21 10:58:58 -07:00
/ * *
* Copyright ( c ) Microsoft Corporation .
*
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
import { z } from 'zod' ;
import zodToJsonSchema from 'zod-to-json-schema' ;
import type * as playwright from 'playwright' ;
import type { Tool } from './tool' ;
2025-04-16 20:28:44 +03:00
import path from 'path' ;
import os from 'os' ;
import { sanitizeForFilePath } from './utils' ;
2025-04-15 19:55:20 +02:00
import { generateLocator } from '../context' ;
import * as javascript from '../javascript' ;
2025-03-21 10:58:58 -07:00
2025-04-04 15:22:00 -07:00
const snapshot : Tool = {
2025-04-04 17:14:30 -07:00
capability : 'core' ,
2025-03-21 10:58:58 -07:00
schema : {
name : 'browser_snapshot' ,
description : 'Capture accessibility snapshot of the current page, this is better than screenshot' ,
inputSchema : zodToJsonSchema ( z . object ( { } ) ) ,
} ,
handle : async context = > {
2025-04-14 16:39:58 -07:00
const tab = await context . ensureTab ( ) ;
2025-04-15 12:54:45 -07:00
return await tab . run ( async ( ) = > {
const code = [ ` // <internal code to capture accessibility snapshot> ` ] ;
return { code } ;
} , { captureSnapshot : true } ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
const elementSchema = z . object ( {
element : z.string ( ) . describe ( 'Human-readable element description used to obtain permission to interact with the element' ) ,
ref : z.string ( ) . describe ( 'Exact target element reference from the page snapshot' ) ,
} ) ;
2025-04-04 15:22:00 -07:00
const click : Tool = {
2025-04-04 17:14:30 -07:00
capability : 'core' ,
2025-03-21 10:58:58 -07:00
schema : {
name : 'browser_click' ,
description : 'Perform click on a web page' ,
inputSchema : zodToJsonSchema ( elementSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = elementSchema . parse ( params ) ;
2025-04-14 16:39:58 -07:00
return await context . currentTab ( ) . runAndWaitWithSnapshot ( async snapshot = > {
const locator = snapshot . refLocator ( validatedParams . ref ) ;
2025-04-15 12:54:45 -07:00
const code = [
` // Click ${ validatedParams . element } ` ,
` await page. ${ await generateLocator ( locator ) } .click(); `
] ;
2025-04-02 11:42:39 -07:00
await locator . click ( ) ;
2025-04-15 12:54:45 -07:00
return { code } ;
2025-04-02 11:42:39 -07:00
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
const dragSchema = z . object ( {
startElement : z.string ( ) . describe ( 'Human-readable source element description used to obtain the permission to interact with the element' ) ,
startRef : z.string ( ) . describe ( 'Exact source element reference from the page snapshot' ) ,
endElement : z.string ( ) . describe ( 'Human-readable target element description used to obtain the permission to interact with the element' ) ,
endRef : z.string ( ) . describe ( 'Exact target element reference from the page snapshot' ) ,
} ) ;
2025-04-04 15:22:00 -07:00
const drag : Tool = {
2025-04-04 17:14:30 -07:00
capability : 'core' ,
2025-03-21 10:58:58 -07:00
schema : {
name : 'browser_drag' ,
description : 'Perform drag and drop between two elements' ,
inputSchema : zodToJsonSchema ( dragSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = dragSchema . parse ( params ) ;
2025-04-14 16:39:58 -07:00
return await context . currentTab ( ) . runAndWaitWithSnapshot ( async snapshot = > {
const startLocator = snapshot . refLocator ( validatedParams . startRef ) ;
const endLocator = snapshot . refLocator ( validatedParams . endRef ) ;
2025-04-15 12:54:45 -07:00
const code = [
` // Drag ${ validatedParams . startElement } to ${ validatedParams . endElement } ` ,
` await page. ${ await generateLocator ( startLocator ) } .dragTo(page. ${ await generateLocator ( endLocator ) } ); `
] ;
2025-03-21 10:58:58 -07:00
await startLocator . dragTo ( endLocator ) ;
2025-04-15 12:54:45 -07:00
return { code } ;
2025-04-02 11:42:39 -07:00
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
2025-04-04 15:22:00 -07:00
const hover : Tool = {
2025-04-04 17:14:30 -07:00
capability : 'core' ,
2025-03-21 10:58:58 -07:00
schema : {
name : 'browser_hover' ,
description : 'Hover over element on page' ,
inputSchema : zodToJsonSchema ( elementSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = elementSchema . parse ( params ) ;
2025-04-14 16:39:58 -07:00
return await context . currentTab ( ) . runAndWaitWithSnapshot ( async snapshot = > {
const locator = snapshot . refLocator ( validatedParams . ref ) ;
2025-04-15 12:54:45 -07:00
const code = [
` // Hover over ${ validatedParams . element } ` ,
` await page. ${ await generateLocator ( locator ) } .hover(); `
] ;
2025-04-02 11:42:39 -07:00
await locator . hover ( ) ;
2025-04-15 12:54:45 -07:00
return { code } ;
2025-04-02 11:42:39 -07:00
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
const typeSchema = elementSchema . extend ( {
text : z.string ( ) . describe ( 'Text to type into the element' ) ,
2025-04-02 14:36:30 -07:00
submit : z.boolean ( ) . optional ( ) . describe ( 'Whether to submit entered text (press Enter after)' ) ,
2025-04-02 17:26:45 -07:00
slowly : z.boolean ( ) . optional ( ) . describe ( 'Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.' ) ,
2025-03-21 10:58:58 -07:00
} ) ;
2025-04-04 15:22:00 -07:00
const type : Tool = {
2025-04-04 17:14:30 -07:00
capability : 'core' ,
2025-03-21 10:58:58 -07:00
schema : {
name : 'browser_type' ,
description : 'Type text into editable element' ,
inputSchema : zodToJsonSchema ( typeSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = typeSchema . parse ( params ) ;
2025-04-14 16:39:58 -07:00
return await context . currentTab ( ) . runAndWaitWithSnapshot ( async snapshot = > {
const locator = snapshot . refLocator ( validatedParams . ref ) ;
2025-04-15 19:55:20 +02:00
2025-04-15 12:54:45 -07:00
const code : string [ ] = [ ] ;
2025-04-15 19:55:20 +02:00
if ( validatedParams . slowly ) {
2025-04-15 12:54:45 -07:00
code . push ( ` // Press " ${ validatedParams . text } " sequentially into " ${ validatedParams . element } " ` ) ;
code . push ( ` await page. ${ await generateLocator ( locator ) } .pressSequentially( ${ javascript . quote ( validatedParams . text ) } ); ` ) ;
2025-04-02 14:36:30 -07:00
await locator . pressSequentially ( validatedParams . text ) ;
2025-04-15 19:55:20 +02:00
} else {
2025-04-15 12:54:45 -07:00
code . push ( ` // Fill " ${ validatedParams . text } " into " ${ validatedParams . element } " ` ) ;
code . push ( ` await page. ${ await generateLocator ( locator ) } .fill( ${ javascript . quote ( validatedParams . text ) } ); ` ) ;
2025-04-02 14:36:30 -07:00
await locator . fill ( validatedParams . text ) ;
2025-04-15 19:55:20 +02:00
}
if ( validatedParams . submit ) {
2025-04-15 12:54:45 -07:00
code . push ( ` // Submit text ` ) ;
code . push ( ` await page. ${ await generateLocator ( locator ) } .press('Enter'); ` ) ;
2025-03-21 10:58:58 -07:00
await locator . press ( 'Enter' ) ;
2025-04-15 19:55:20 +02:00
}
2025-04-15 12:54:45 -07:00
return { code } ;
2025-04-02 11:42:39 -07:00
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
2025-03-26 13:53:56 +09:00
const selectOptionSchema = elementSchema . extend ( {
values : z.array ( z . string ( ) ) . describe ( 'Array of values to select in the dropdown. This can be a single value or multiple values.' ) ,
} ) ;
2025-04-04 15:22:00 -07:00
const selectOption : Tool = {
2025-04-04 17:14:30 -07:00
capability : 'core' ,
2025-03-26 13:53:56 +09:00
schema : {
name : 'browser_select_option' ,
description : 'Select an option in a dropdown' ,
inputSchema : zodToJsonSchema ( selectOptionSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = selectOptionSchema . parse ( params ) ;
2025-04-14 16:39:58 -07:00
return await context . currentTab ( ) . runAndWaitWithSnapshot ( async snapshot = > {
const locator = snapshot . refLocator ( validatedParams . ref ) ;
2025-04-15 12:54:45 -07:00
const code = [
` // Select options [ ${ validatedParams . values . join ( ', ' ) } ] in ${ validatedParams . element } ` ,
` await page. ${ await generateLocator ( locator ) } .selectOption( ${ javascript . formatObject ( validatedParams . values ) } ); `
] ;
2025-03-26 13:53:56 +09:00
await locator . selectOption ( validatedParams . values ) ;
2025-04-15 12:54:45 -07:00
return { code } ;
2025-04-02 11:42:39 -07:00
} ) ;
2025-03-26 13:53:56 +09:00
} ,
} ;
2025-03-27 07:27:34 -07:00
const screenshotSchema = z . object ( {
raw : z.boolean ( ) . optional ( ) . describe ( 'Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.' ) ,
2025-04-16 12:53:27 -07:00
element : z.string ( ) . optional ( ) . describe ( 'Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.' ) ,
2025-04-16 20:28:44 +03:00
ref : z.string ( ) . optional ( ) . describe ( 'Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.' ) ,
} ) . refine ( data = > {
2025-04-16 12:53:27 -07:00
return ! ! data . element === ! ! data . ref ;
2025-04-16 20:28:44 +03:00
} , {
message : 'Both element and ref must be provided or neither.' ,
path : [ 'ref' , 'element' ]
2025-03-27 07:27:34 -07:00
} ) ;
2025-04-04 15:22:00 -07:00
const screenshot : Tool = {
2025-04-04 17:14:30 -07:00
capability : 'core' ,
2025-03-27 07:27:34 -07:00
schema : {
name : 'browser_take_screenshot' ,
description : ` Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions. ` ,
inputSchema : zodToJsonSchema ( screenshotSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = screenshotSchema . parse ( params ) ;
2025-04-03 19:24:17 -07:00
const tab = context . currentTab ( ) ;
2025-04-16 20:28:44 +03:00
const fileType = validatedParams . raw ? 'png' : 'jpeg' ;
const fileName = path . join ( os . tmpdir ( ) , sanitizeForFilePath ( ` page- ${ new Date ( ) . toISOString ( ) } ` ) ) + ` . ${ fileType } ` ;
const options : playwright.PageScreenshotOptions = { type : fileType , quality : fileType === 'png' ? undefined : 50 , scale : 'css' , path : fileName } ;
const isElementScreenshot = validatedParams . element && validatedParams . ref ;
return await context . currentTab ( ) . runAndWaitWithSnapshot ( async snapshot = > {
let screenshot : Buffer | undefined ;
const code = [
2025-04-16 12:53:27 -07:00
` // Screenshot ${ isElementScreenshot ? validatedParams . element : 'viewport' } and save it as ${ fileName } ` ,
2025-04-16 20:28:44 +03:00
] ;
if ( isElementScreenshot ) {
const locator = snapshot . refLocator ( validatedParams . ref ! ) ;
code . push ( ` await page. ${ await generateLocator ( locator ) } .screenshot( ${ javascript . formatObject ( options ) } ); ` ) ;
screenshot = await locator . screenshot ( options ) ;
} else {
code . push ( ` await page.screenshot( ${ javascript . formatObject ( options ) } ); ` ) ;
screenshot = await tab . page . screenshot ( options ) ;
}
return {
code ,
images : [ {
data : screenshot.toString ( 'base64' ) ,
mimeType : fileType === 'png' ? 'image/png' : 'image/jpeg' ,
} ]
} ;
2025-04-16 12:53:27 -07:00
} ) ;
2025-04-16 20:28:44 +03:00
}
2025-03-27 07:27:34 -07:00
} ;
2025-04-04 15:22:00 -07:00
2025-04-16 20:28:44 +03:00
2025-04-04 15:22:00 -07:00
export default [
snapshot ,
click ,
drag ,
hover ,
type ,
selectOption ,
screenshot ,
] ;