2025-03-21 10:58:58 -07:00
/ * *
* Copyright ( c ) Microsoft Corporation .
*
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
import { z } from 'zod' ;
import zodToJsonSchema from 'zod-to-json-schema' ;
import type * as playwright from 'playwright' ;
import type { Tool } from './tool' ;
export const snapshot : Tool = {
schema : {
name : 'browser_snapshot' ,
description : 'Capture accessibility snapshot of the current page, this is better than screenshot' ,
inputSchema : zodToJsonSchema ( z . object ( { } ) ) ,
} ,
handle : async context = > {
2025-04-02 11:42:39 -07:00
return await context . run ( async ( ) = > { } , { captureSnapshot : true } ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
const elementSchema = z . object ( {
element : z.string ( ) . describe ( 'Human-readable element description used to obtain permission to interact with the element' ) ,
ref : z.string ( ) . describe ( 'Exact target element reference from the page snapshot' ) ,
} ) ;
export const click : Tool = {
schema : {
name : 'browser_click' ,
description : 'Perform click on a web page' ,
inputSchema : zodToJsonSchema ( elementSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = elementSchema . parse ( params ) ;
2025-04-02 11:42:39 -07:00
return await context . runAndWaitWithSnapshot ( async ( ) = > {
const locator = context . lastSnapshot ( ) . refLocator ( validatedParams . ref ) ;
await locator . click ( ) ;
} , {
status : ` Clicked " ${ validatedParams . element } " ` ,
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
const dragSchema = z . object ( {
startElement : z.string ( ) . describe ( 'Human-readable source element description used to obtain the permission to interact with the element' ) ,
startRef : z.string ( ) . describe ( 'Exact source element reference from the page snapshot' ) ,
endElement : z.string ( ) . describe ( 'Human-readable target element description used to obtain the permission to interact with the element' ) ,
endRef : z.string ( ) . describe ( 'Exact target element reference from the page snapshot' ) ,
} ) ;
export const drag : Tool = {
schema : {
name : 'browser_drag' ,
description : 'Perform drag and drop between two elements' ,
inputSchema : zodToJsonSchema ( dragSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = dragSchema . parse ( params ) ;
2025-04-02 11:42:39 -07:00
return await context . runAndWaitWithSnapshot ( async ( ) = > {
const startLocator = context . lastSnapshot ( ) . refLocator ( validatedParams . startRef ) ;
const endLocator = context . lastSnapshot ( ) . refLocator ( validatedParams . endRef ) ;
2025-03-21 10:58:58 -07:00
await startLocator . dragTo ( endLocator ) ;
2025-04-02 11:42:39 -07:00
} , {
status : ` Dragged " ${ validatedParams . startElement } " to " ${ validatedParams . endElement } " ` ,
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
export const hover : Tool = {
schema : {
name : 'browser_hover' ,
description : 'Hover over element on page' ,
inputSchema : zodToJsonSchema ( elementSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = elementSchema . parse ( params ) ;
2025-04-02 11:42:39 -07:00
return context . runAndWaitWithSnapshot ( async ( ) = > {
const locator = context . lastSnapshot ( ) . refLocator ( validatedParams . ref ) ;
await locator . hover ( ) ;
} , {
status : ` Hovered over " ${ validatedParams . element } " ` ,
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
const typeSchema = elementSchema . extend ( {
text : z.string ( ) . describe ( 'Text to type into the element' ) ,
2025-04-02 14:36:30 -07:00
submit : z.boolean ( ) . optional ( ) . describe ( 'Whether to submit entered text (press Enter after)' ) ,
2025-04-02 17:26:45 -07:00
slowly : z.boolean ( ) . optional ( ) . describe ( 'Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.' ) ,
2025-03-21 10:58:58 -07:00
} ) ;
export const type : Tool = {
schema : {
name : 'browser_type' ,
description : 'Type text into editable element' ,
inputSchema : zodToJsonSchema ( typeSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = typeSchema . parse ( params ) ;
2025-04-02 11:42:39 -07:00
return await context . runAndWaitWithSnapshot ( async ( ) = > {
const locator = context . lastSnapshot ( ) . refLocator ( validatedParams . ref ) ;
2025-04-02 14:36:30 -07:00
if ( validatedParams . slowly )
await locator . pressSequentially ( validatedParams . text ) ;
else
await locator . fill ( validatedParams . text ) ;
2025-03-21 10:58:58 -07:00
if ( validatedParams . submit )
await locator . press ( 'Enter' ) ;
2025-04-02 11:42:39 -07:00
} , {
status : ` Typed " ${ validatedParams . text } " into " ${ validatedParams . element } " ` ,
} ) ;
2025-03-21 10:58:58 -07:00
} ,
} ;
2025-03-26 13:53:56 +09:00
const selectOptionSchema = elementSchema . extend ( {
values : z.array ( z . string ( ) ) . describe ( 'Array of values to select in the dropdown. This can be a single value or multiple values.' ) ,
} ) ;
export const selectOption : Tool = {
schema : {
name : 'browser_select_option' ,
description : 'Select an option in a dropdown' ,
inputSchema : zodToJsonSchema ( selectOptionSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = selectOptionSchema . parse ( params ) ;
2025-04-02 11:42:39 -07:00
return await context . runAndWaitWithSnapshot ( async ( ) = > {
const locator = context . lastSnapshot ( ) . refLocator ( validatedParams . ref ) ;
2025-03-26 13:53:56 +09:00
await locator . selectOption ( validatedParams . values ) ;
2025-04-02 11:42:39 -07:00
} , {
status : ` Selected option in " ${ validatedParams . element } " ` ,
} ) ;
2025-03-26 13:53:56 +09:00
} ,
} ;
2025-03-27 07:27:34 -07:00
const screenshotSchema = z . object ( {
raw : z.boolean ( ) . optional ( ) . describe ( 'Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.' ) ,
} ) ;
export const screenshot : Tool = {
schema : {
name : 'browser_take_screenshot' ,
description : ` Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions. ` ,
inputSchema : zodToJsonSchema ( screenshotSchema ) ,
} ,
handle : async ( context , params ) = > {
const validatedParams = screenshotSchema . parse ( params ) ;
const page = context . existingPage ( ) ;
const options : playwright.PageScreenshotOptions = validatedParams . raw ? { type : 'png' , scale : 'css' } : { type : 'jpeg' , quality : 50 , scale : 'css' } ;
const screenshot = await page . screenshot ( options ) ;
return {
content : [ { type : 'image' , data : screenshot.toString ( 'base64' ) , mimeType : validatedParams.raw ? 'image/png' : 'image/jpeg' } ] ,
} ;
} ,
} ;