mirror of
https://github.com/microsoft/playwright-mcp.git
synced 2025-07-23 22:22:28 +08:00
chore: turn vision into capability (#679)
Fixes https://github.com/microsoft/playwright-mcp/issues/420
This commit is contained in:
parent
012c906500
commit
d61aa16fee
351
README.md
351
README.md
@ -193,9 +193,8 @@ Playwright MCP server supports following arguments. They can be provided in the
|
||||
--browser <browser> browser or chrome channel to use, possible
|
||||
values: chrome, firefox, webkit, msedge.
|
||||
--browser-agent <endpoint> Use browser agent (experimental).
|
||||
--caps <caps> comma-separated list of capabilities to enable,
|
||||
possible values: tabs, pdf, history, wait, files,
|
||||
install. Default is all.
|
||||
--caps <caps> comma-separated list of additional capabilities
|
||||
to enable, possible values: vision, pdf.
|
||||
--cdp-endpoint <endpoint> CDP endpoint to connect to.
|
||||
--config <path> path to the configuration file.
|
||||
--device <device> device to emulate, for example: "iPhone 15"
|
||||
@ -227,8 +226,6 @@ Playwright MCP server supports following arguments. They can be provided in the
|
||||
specified, a temporary directory will be created.
|
||||
--viewport-size <size> specify browser viewport size in pixels, for
|
||||
example "1280, 720"
|
||||
--vision Run server that uses screenshots (Aria snapshots
|
||||
are used by default)
|
||||
```
|
||||
|
||||
<!--- End of options generated section -->
|
||||
@ -329,21 +326,14 @@ npx @playwright/mcp@latest --config path/to/config.json
|
||||
host?: string; // Host to bind to (default: localhost)
|
||||
},
|
||||
|
||||
// List of enabled capabilities
|
||||
// List of additional capabilities
|
||||
capabilities?: Array<
|
||||
'core' | // Core browser automation
|
||||
'tabs' | // Tab management
|
||||
'pdf' | // PDF generation
|
||||
'history' | // Browser history
|
||||
'wait' | // Wait utilities
|
||||
'files' | // File handling
|
||||
'install' | // Browser installation
|
||||
'testing' // Testing
|
||||
'pdf' | // PDF generation
|
||||
'vision' | // Coordinate-based interactions
|
||||
>;
|
||||
|
||||
// Enable vision mode (screenshots instead of accessibility snapshots)
|
||||
vision?: boolean;
|
||||
|
||||
// Directory for output files
|
||||
outputDir?: string;
|
||||
|
||||
@ -433,42 +423,10 @@ http.createServer(async (req, res) => {
|
||||
|
||||
### Tools
|
||||
|
||||
The tools are available in two modes:
|
||||
|
||||
1. **Snapshot Mode** (default): Uses accessibility snapshots for better performance and reliability
|
||||
2. **Vision Mode**: Uses screenshots for visual-based interactions
|
||||
|
||||
To use Vision Mode, add the `--vision` flag when starting the server:
|
||||
|
||||
```js
|
||||
{
|
||||
"mcpServers": {
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"@playwright/mcp@latest",
|
||||
"--vision"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Vision Mode works best with the computer use models that are able to interact with elements using
|
||||
X Y coordinate space, based on the provided screenshot.
|
||||
|
||||
<!--- Tools generated by update-readme.js -->
|
||||
|
||||
<details>
|
||||
<summary><b>Interactions</b></summary>
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_snapshot**
|
||||
- Title: Page snapshot
|
||||
- Description: Capture accessibility snapshot of the current page, this is better than screenshot
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
<summary><b>Core automation</b></summary>
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
@ -483,6 +441,22 @@ X Y coordinate space, based on the provided screenshot.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_close**
|
||||
- Title: Close browser
|
||||
- Description: Close the page
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_console_messages**
|
||||
- Title: Get console messages
|
||||
- Description: Returns all console messages
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_drag**
|
||||
- Title: Drag mouse
|
||||
- Description: Perform drag and drop between two elements
|
||||
@ -495,60 +469,17 @@ X Y coordinate space, based on the provided screenshot.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_hover**
|
||||
- Title: Hover mouse
|
||||
- Description: Hover over element on page
|
||||
- **browser_evaluate**
|
||||
- Title: Evaluate JavaScript
|
||||
- Description: Evaluate JavaScript expression on page or element
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_type**
|
||||
- Title: Type text
|
||||
- Description: Type text into editable element
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- `text` (string): Text to type into the element
|
||||
- `submit` (boolean, optional): Whether to submit entered text (press Enter after)
|
||||
- `slowly` (boolean, optional): Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.
|
||||
- `function` (string): () => { /* code */ } or (element) => { /* code */ } when element is provided
|
||||
- `element` (string, optional): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string, optional): Exact target element reference from the page snapshot
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_select_option**
|
||||
- Title: Select option
|
||||
- Description: Select an option in a dropdown
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- `values` (array): Array of values to select in the dropdown. This can be a single value or multiple values.
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_press_key**
|
||||
- Title: Press a key
|
||||
- Description: Press a key on the keyboard
|
||||
- Parameters:
|
||||
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_wait_for**
|
||||
- Title: Wait for
|
||||
- Description: Wait for text to appear or disappear or a specified time to pass
|
||||
- Parameters:
|
||||
- `time` (number, optional): The time to wait in seconds
|
||||
- `text` (string, optional): The text to wait for
|
||||
- `textGone` (string, optional): The text to wait for to disappear
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_file_upload**
|
||||
- Title: Upload files
|
||||
- Description: Upload one or multiple files
|
||||
@ -566,10 +497,15 @@ X Y coordinate space, based on the provided screenshot.
|
||||
- `promptText` (string, optional): The text of the prompt in case of a prompt dialog.
|
||||
- Read-only: **false**
|
||||
|
||||
</details>
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
<details>
|
||||
<summary><b>Navigation</b></summary>
|
||||
- **browser_hover**
|
||||
- Title: Hover mouse
|
||||
- Description: Hover over element on page
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
@ -596,26 +532,51 @@ X Y coordinate space, based on the provided screenshot.
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
</details>
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
<details>
|
||||
<summary><b>Evaluation</b></summary>
|
||||
- **browser_network_requests**
|
||||
- Title: List network requests
|
||||
- Description: Returns all network requests since loading the page
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_evaluate**
|
||||
- Title: Evaluate JavaScript
|
||||
- Description: Evaluate JavaScript expression on page or element
|
||||
- **browser_press_key**
|
||||
- Title: Press a key
|
||||
- Description: Press a key on the keyboard
|
||||
- Parameters:
|
||||
- `function` (string): () => { /* code */ } or (element) => { /* code */ } when element is provided
|
||||
- `element` (string, optional): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string, optional): Exact target element reference from the page snapshot
|
||||
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||
- Read-only: **false**
|
||||
|
||||
</details>
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
<details>
|
||||
<summary><b>Resources</b></summary>
|
||||
- **browser_resize**
|
||||
- Title: Resize browser window
|
||||
- Description: Resize the browser window
|
||||
- Parameters:
|
||||
- `width` (number): Width of the browser window
|
||||
- `height` (number): Height of the browser window
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_select_option**
|
||||
- Title: Select option
|
||||
- Description: Select an option in a dropdown
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- `values` (array): Array of values to select in the dropdown. This can be a single value or multiple values.
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_snapshot**
|
||||
- Title: Page snapshot
|
||||
- Description: Capture accessibility snapshot of the current page, this is better than screenshot
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
@ -631,64 +592,41 @@ X Y coordinate space, based on the provided screenshot.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_pdf_save**
|
||||
- Title: Save as PDF
|
||||
- Description: Save page as PDF
|
||||
- **browser_type**
|
||||
- Title: Type text
|
||||
- Description: Type text into editable element
|
||||
- Parameters:
|
||||
- `filename` (string, optional): File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_network_requests**
|
||||
- Title: List network requests
|
||||
- Description: Returns all network requests since loading the page
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_console_messages**
|
||||
- Title: Get console messages
|
||||
- Description: Returns all console messages
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Utilities</b></summary>
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_install**
|
||||
- Title: Install the browser specified in the config
|
||||
- Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
|
||||
- Parameters: None
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- `text` (string): Text to type into the element
|
||||
- `submit` (boolean, optional): Whether to submit entered text (press Enter after)
|
||||
- `slowly` (boolean, optional): Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_close**
|
||||
- Title: Close browser
|
||||
- Description: Close the page
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_resize**
|
||||
- Title: Resize browser window
|
||||
- Description: Resize the browser window
|
||||
- **browser_wait_for**
|
||||
- Title: Wait for
|
||||
- Description: Wait for text to appear or disappear or a specified time to pass
|
||||
- Parameters:
|
||||
- `width` (number): Width of the browser window
|
||||
- `height` (number): Height of the browser window
|
||||
- `time` (number, optional): The time to wait in seconds
|
||||
- `text` (string, optional): The text to wait for
|
||||
- `textGone` (string, optional): The text to wait for to disappear
|
||||
- Read-only: **true**
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Tabs</b></summary>
|
||||
<summary><b>Tab management</b></summary>
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_tab_close**
|
||||
- Title: Close a tab
|
||||
- Description: Close a tab
|
||||
- Parameters:
|
||||
- `index` (number, optional): The index of the tab to close. Closes current tab if not provided.
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
@ -716,44 +654,29 @@ X Y coordinate space, based on the provided screenshot.
|
||||
- `index` (number): The index of the tab to select
|
||||
- Read-only: **true**
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Browser installation</b></summary>
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_tab_close**
|
||||
- Title: Close a tab
|
||||
- Description: Close a tab
|
||||
- Parameters:
|
||||
- `index` (number, optional): The index of the tab to close. Closes current tab if not provided.
|
||||
- **browser_install**
|
||||
- Title: Install the browser specified in the config
|
||||
- Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
|
||||
- Parameters: None
|
||||
- Read-only: **false**
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Vision mode</b></summary>
|
||||
<summary><b>Coordinate-based (opt-in via --caps=vision)</b></summary>
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_capture**
|
||||
- Title: Take a screenshot
|
||||
- Description: Take a screenshot of the current page
|
||||
- Parameters: None
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_move_mouse**
|
||||
- Title: Move mouse
|
||||
- Description: Move mouse to a given position
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `x` (number): X coordinate
|
||||
- `y` (number): Y coordinate
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_click**
|
||||
- **browser_mouse_click_xy**
|
||||
- Title: Click
|
||||
- Description: Click left mouse button
|
||||
- Description: Click left mouse button at a given position
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `x` (number): X coordinate
|
||||
@ -762,9 +685,9 @@ X Y coordinate space, based on the provided screenshot.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_drag**
|
||||
- **browser_mouse_drag_xy**
|
||||
- Title: Drag mouse
|
||||
- Description: Drag left mouse button
|
||||
- Description: Drag left mouse button to a given position
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `startX` (number): Start X coordinate
|
||||
@ -775,52 +698,28 @@ X Y coordinate space, based on the provided screenshot.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_type**
|
||||
- Title: Type text
|
||||
- Description: Type text
|
||||
- **browser_mouse_move_xy**
|
||||
- Title: Move mouse
|
||||
- Description: Move mouse to a given position
|
||||
- Parameters:
|
||||
- `text` (string): Text to type into the element
|
||||
- `submit` (boolean, optional): Whether to submit entered text (press Enter after)
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_press_key**
|
||||
- Title: Press a key
|
||||
- Description: Press a key on the keyboard
|
||||
- Parameters:
|
||||
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||
- Read-only: **false**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_wait_for**
|
||||
- Title: Wait for
|
||||
- Description: Wait for text to appear or disappear or a specified time to pass
|
||||
- Parameters:
|
||||
- `time` (number, optional): The time to wait in seconds
|
||||
- `text` (string, optional): The text to wait for
|
||||
- `textGone` (string, optional): The text to wait for to disappear
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `x` (number): X coordinate
|
||||
- `y` (number): Y coordinate
|
||||
- Read-only: **true**
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
</details>
|
||||
|
||||
- **browser_file_upload**
|
||||
- Title: Upload files
|
||||
- Description: Upload one or multiple files
|
||||
- Parameters:
|
||||
- `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
|
||||
- Read-only: **false**
|
||||
<details>
|
||||
<summary><b>PDF generation (opt-in via --caps=pdf)</b></summary>
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_handle_dialog**
|
||||
- Title: Handle a dialog
|
||||
- Description: Handle a dialog
|
||||
- **browser_pdf_save**
|
||||
- Title: Save as PDF
|
||||
- Description: Save page as PDF
|
||||
- Parameters:
|
||||
- `accept` (boolean): Whether to accept the dialog.
|
||||
- `promptText` (string, optional): The text of the prompt in case of a prompt dialog.
|
||||
- Read-only: **false**
|
||||
- `filename` (string, optional): File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.
|
||||
- Read-only: **true**
|
||||
|
||||
</details>
|
||||
|
||||
|
13
config.d.ts
vendored
13
config.d.ts
vendored
@ -16,7 +16,7 @@
|
||||
|
||||
import type * as playwright from 'playwright';
|
||||
|
||||
export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install' | 'testing';
|
||||
export type ToolCapability = 'core' | 'core-tabs' | 'core-install' | 'vision' | 'pdf';
|
||||
|
||||
export type Config = {
|
||||
/**
|
||||
@ -85,20 +85,11 @@ export type Config = {
|
||||
/**
|
||||
* List of enabled tool capabilities. Possible values:
|
||||
* - 'core': Core browser automation features.
|
||||
* - 'tabs': Tab management features.
|
||||
* - 'pdf': PDF generation and manipulation.
|
||||
* - 'history': Browser history access.
|
||||
* - 'wait': Wait and timing utilities.
|
||||
* - 'files': File upload/download support.
|
||||
* - 'install': Browser installation utilities.
|
||||
* - 'vision': Coordinate-based interactions.
|
||||
*/
|
||||
capabilities?: ToolCapability[];
|
||||
|
||||
/**
|
||||
* Run server that uses screenshots (Aria snapshots are used by default).
|
||||
*/
|
||||
vision?: boolean;
|
||||
|
||||
/**
|
||||
* Whether to save the Playwright trace of the session into the output directory.
|
||||
*/
|
||||
|
@ -49,7 +49,6 @@ export type CLIOptions = {
|
||||
userAgent?: string;
|
||||
userDataDir?: string;
|
||||
viewportSize?: string;
|
||||
vision?: boolean;
|
||||
};
|
||||
|
||||
const defaultConfig: FullConfig = {
|
||||
@ -185,7 +184,6 @@ export async function configFromCLIOptions(cliOptions: CLIOptions): Promise<Conf
|
||||
host: cliOptions.host,
|
||||
},
|
||||
capabilities: cliOptions.caps?.split(',').map((c: string) => c.trim() as ToolCapability),
|
||||
vision: !!cliOptions.vision,
|
||||
network: {
|
||||
allowedOrigins: cliOptions.allowedOrigins,
|
||||
blockedOrigins: cliOptions.blockedOrigins,
|
||||
|
@ -19,7 +19,7 @@ import { CallToolRequestSchema, ListToolsRequestSchema, Tool as McpTool } from '
|
||||
import { zodToJsonSchema } from 'zod-to-json-schema';
|
||||
|
||||
import { Context } from './context.js';
|
||||
import { snapshotTools, visionTools } from './tools.js';
|
||||
import { allTools } from './tools.js';
|
||||
import { packageJSON } from './package.js';
|
||||
|
||||
import { FullConfig } from './config.js';
|
||||
@ -27,8 +27,7 @@ import { FullConfig } from './config.js';
|
||||
import type { BrowserContextFactory } from './browserContextFactory.js';
|
||||
|
||||
export function createConnection(config: FullConfig, browserContextFactory: BrowserContextFactory): Connection {
|
||||
const allTools = config.vision ? visionTools : snapshotTools;
|
||||
const tools = allTools.filter(tool => !config.capabilities || tool.capability === 'core' || config.capabilities.includes(tool.capability));
|
||||
const tools = allTools.filter(tool => tool.capability.startsWith('core') || config.capabilities?.includes(tool.capability));
|
||||
const context = new Context(tools, config, browserContextFactory);
|
||||
const server = new McpServer({ name: 'Playwright', version: packageJSON.version }, {
|
||||
capabilities: {
|
||||
|
@ -14,7 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { program } from 'commander';
|
||||
import { program, Option } from 'commander';
|
||||
// @ts-ignore
|
||||
import { startTraceViewerServer } from 'playwright-core/lib/server';
|
||||
|
||||
@ -31,7 +31,7 @@ program
|
||||
.option('--block-service-workers', 'block service workers')
|
||||
.option('--browser <browser>', 'browser or chrome channel to use, possible values: chrome, firefox, webkit, msedge.')
|
||||
.option('--browser-agent <endpoint>', 'Use browser agent (experimental).')
|
||||
.option('--caps <caps>', 'comma-separated list of capabilities to enable, possible values: tabs, pdf, history, wait, files, install. Default is all.')
|
||||
.option('--caps <caps>', 'comma-separated list of additional capabilities to enable, possible values: vision, pdf.')
|
||||
.option('--cdp-endpoint <endpoint>', 'CDP endpoint to connect to.')
|
||||
.option('--config <path>', 'path to the configuration file.')
|
||||
.option('--device <device>', 'device to emulate, for example: "iPhone 15"')
|
||||
@ -51,8 +51,13 @@ program
|
||||
.option('--user-agent <ua string>', 'specify user agent string')
|
||||
.option('--user-data-dir <path>', 'path to the user data directory. If not specified, a temporary directory will be created.')
|
||||
.option('--viewport-size <size>', 'specify browser viewport size in pixels, for example "1280, 720"')
|
||||
.option('--vision', 'Run server that uses screenshots (Aria snapshots are used by default)')
|
||||
.addOption(new Option('--vision', 'Legacy option, use --caps=vision instead').hideHelp())
|
||||
.action(async options => {
|
||||
if (options.vision) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error('The --vision option is deprecated, use --caps=vision instead');
|
||||
options.caps = 'vision';
|
||||
}
|
||||
const config = await resolveCLIConfig(options);
|
||||
const httpServer = config.server.port !== undefined ? await startHttpServer(config.server) : undefined;
|
||||
|
||||
|
34
src/tools.ts
34
src/tools.ts
@ -27,39 +27,25 @@ import pdf from './tools/pdf.js';
|
||||
import snapshot from './tools/snapshot.js';
|
||||
import tabs from './tools/tabs.js';
|
||||
import screenshot from './tools/screenshot.js';
|
||||
import vision from './tools/vision.js';
|
||||
import wait from './tools/wait.js';
|
||||
import mouse from './tools/mouse.js';
|
||||
|
||||
import type { Tool } from './tools/tool.js';
|
||||
|
||||
export const snapshotTools: Tool<any>[] = [
|
||||
...common(true),
|
||||
export const allTools: Tool<any>[] = [
|
||||
...common,
|
||||
...console,
|
||||
...dialogs(true),
|
||||
...dialogs,
|
||||
...evaluate,
|
||||
...files(true),
|
||||
...files,
|
||||
...install,
|
||||
...keyboard(true),
|
||||
...navigate(true),
|
||||
...keyboard,
|
||||
...navigate,
|
||||
...network,
|
||||
...mouse,
|
||||
...pdf,
|
||||
...screenshot,
|
||||
...snapshot,
|
||||
...tabs(true),
|
||||
...wait(true),
|
||||
];
|
||||
|
||||
export const visionTools: Tool<any>[] = [
|
||||
...common(false),
|
||||
...console,
|
||||
...dialogs(false),
|
||||
...files(false),
|
||||
...install,
|
||||
...keyboard(false),
|
||||
...navigate(false),
|
||||
...network,
|
||||
...pdf,
|
||||
...tabs(false),
|
||||
...vision,
|
||||
...wait(false),
|
||||
...tabs,
|
||||
...wait,
|
||||
];
|
||||
|
@ -15,7 +15,7 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const close = defineTool({
|
||||
capability: 'core',
|
||||
@ -38,7 +38,7 @@ const close = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const resize: ToolFactory = captureSnapshot => defineTool({
|
||||
const resize = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_resize',
|
||||
@ -66,13 +66,13 @@ const resize: ToolFactory = captureSnapshot => defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
export default [
|
||||
close,
|
||||
resize(captureSnapshot)
|
||||
resize
|
||||
];
|
||||
|
@ -15,9 +15,9 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const handleDialog: ToolFactory = captureSnapshot => defineTool({
|
||||
const handleDialog = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
@ -49,7 +49,7 @@ const handleDialog: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
@ -57,6 +57,6 @@ const handleDialog: ToolFactory = captureSnapshot => defineTool({
|
||||
clearsModalState: 'dialog',
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
handleDialog(captureSnapshot),
|
||||
export default [
|
||||
handleDialog,
|
||||
];
|
||||
|
@ -15,10 +15,10 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const uploadFile: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'files',
|
||||
const uploadFile = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_file_upload',
|
||||
@ -47,13 +47,13 @@ const uploadFile: ToolFactory = captureSnapshot => defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
clearsModalState: 'fileChooser',
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
uploadFile(captureSnapshot),
|
||||
export default [
|
||||
uploadFile,
|
||||
];
|
||||
|
@ -23,7 +23,7 @@ import { defineTool } from './tool.js';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const install = defineTool({
|
||||
capability: 'install',
|
||||
capability: 'core-install',
|
||||
schema: {
|
||||
name: 'browser_install',
|
||||
title: 'Install the browser specified in the config',
|
||||
|
@ -15,9 +15,13 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
|
||||
const pressKey: ToolFactory = captureSnapshot => defineTool({
|
||||
import { defineTool } from './tool.js';
|
||||
import { elementSchema } from './snapshot.js';
|
||||
import { generateLocator } from './utils.js';
|
||||
import * as javascript from '../javascript.js';
|
||||
|
||||
const pressKey = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
@ -43,12 +47,61 @@ const pressKey: ToolFactory = captureSnapshot => defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
pressKey(captureSnapshot),
|
||||
const typeSchema = elementSchema.extend({
|
||||
text: z.string().describe('Text to type into the element'),
|
||||
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
|
||||
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
|
||||
});
|
||||
|
||||
const type = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_type',
|
||||
title: 'Type text',
|
||||
description: 'Type text into editable element',
|
||||
inputSchema: typeSchema,
|
||||
type: 'destructive',
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const locator = snapshot.refLocator(params);
|
||||
|
||||
const code: string[] = [];
|
||||
const steps: (() => Promise<void>)[] = [];
|
||||
|
||||
if (params.slowly) {
|
||||
code.push(`// Press "${params.text}" sequentially into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.pressSequentially(params.text));
|
||||
} else {
|
||||
code.push(`// Fill "${params.text}" into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.fill(params.text));
|
||||
}
|
||||
|
||||
if (params.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
|
||||
steps.push(() => locator.press('Enter'));
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => steps.reduce((acc, step) => acc.then(step), Promise.resolve()),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default [
|
||||
pressKey,
|
||||
type,
|
||||
];
|
||||
|
@ -17,50 +17,14 @@
|
||||
import { z } from 'zod';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
import * as javascript from '../javascript.js';
|
||||
|
||||
const elementSchema = z.object({
|
||||
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
|
||||
});
|
||||
|
||||
const screenshot = defineTool({
|
||||
capability: 'core',
|
||||
const mouseMove = defineTool({
|
||||
capability: 'vision',
|
||||
schema: {
|
||||
name: 'browser_screen_capture',
|
||||
title: 'Take a screenshot',
|
||||
description: 'Take a screenshot of the current page',
|
||||
inputSchema: z.object({}),
|
||||
type: 'readOnly',
|
||||
},
|
||||
|
||||
handle: async context => {
|
||||
const tab = await context.ensureTab();
|
||||
const options = { type: 'jpeg' as 'jpeg', quality: 50, scale: 'css' as 'css' };
|
||||
|
||||
const code = [
|
||||
`// Take a screenshot of the current page`,
|
||||
`await page.screenshot(${javascript.formatObject(options)});`,
|
||||
];
|
||||
|
||||
const action = () => tab.page.screenshot(options).then(buffer => {
|
||||
return {
|
||||
content: [{ type: 'image' as 'image', data: buffer.toString('base64'), mimeType: 'image/jpeg' }],
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const moveMouse = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_screen_move_mouse',
|
||||
name: 'browser_mouse_move_xy',
|
||||
title: 'Move mouse',
|
||||
description: 'Move mouse to a given position',
|
||||
inputSchema: elementSchema.extend({
|
||||
@ -86,12 +50,12 @@ const moveMouse = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const click = defineTool({
|
||||
capability: 'core',
|
||||
const mouseClick = defineTool({
|
||||
capability: 'vision',
|
||||
schema: {
|
||||
name: 'browser_screen_click',
|
||||
name: 'browser_mouse_click_xy',
|
||||
title: 'Click',
|
||||
description: 'Click left mouse button',
|
||||
description: 'Click left mouse button at a given position',
|
||||
inputSchema: elementSchema.extend({
|
||||
x: z.number().describe('X coordinate'),
|
||||
y: z.number().describe('Y coordinate'),
|
||||
@ -121,12 +85,12 @@ const click = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const drag = defineTool({
|
||||
capability: 'core',
|
||||
const mouseDrag = defineTool({
|
||||
capability: 'vision',
|
||||
schema: {
|
||||
name: 'browser_screen_drag',
|
||||
name: 'browser_mouse_drag_xy',
|
||||
title: 'Drag mouse',
|
||||
description: 'Drag left mouse button',
|
||||
description: 'Drag left mouse button to a given position',
|
||||
inputSchema: elementSchema.extend({
|
||||
startX: z.number().describe('Start X coordinate'),
|
||||
startY: z.number().describe('Start Y coordinate'),
|
||||
@ -163,51 +127,8 @@ const drag = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const type = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_screen_type',
|
||||
title: 'Type text',
|
||||
description: 'Type text',
|
||||
inputSchema: z.object({
|
||||
text: z.string().describe('Text to type into the element'),
|
||||
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
|
||||
}),
|
||||
type: 'destructive',
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const tab = context.currentTabOrDie();
|
||||
|
||||
const code = [
|
||||
`// Type ${params.text}`,
|
||||
`await page.keyboard.type('${params.text}');`,
|
||||
];
|
||||
|
||||
const action = async () => {
|
||||
await tab.page.keyboard.type(params.text);
|
||||
if (params.submit)
|
||||
await tab.page.keyboard.press('Enter');
|
||||
};
|
||||
|
||||
if (params.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.keyboard.press('Enter');`);
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default [
|
||||
screenshot,
|
||||
moveMouse,
|
||||
click,
|
||||
drag,
|
||||
type,
|
||||
mouseMove,
|
||||
mouseClick,
|
||||
mouseDrag,
|
||||
];
|
@ -15,9 +15,9 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const navigate: ToolFactory = captureSnapshot => defineTool({
|
||||
const navigate = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
@ -41,14 +41,14 @@ const navigate: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const goBack: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'history',
|
||||
const goBack = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_navigate_back',
|
||||
title: 'Go back',
|
||||
@ -67,14 +67,14 @@ const goBack: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const goForward: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'history',
|
||||
const goForward = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_navigate_forward',
|
||||
title: 'Go forward',
|
||||
@ -91,14 +91,14 @@ const goForward: ToolFactory = captureSnapshot => defineTool({
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
navigate(captureSnapshot),
|
||||
goBack(captureSnapshot),
|
||||
goForward(captureSnapshot),
|
||||
export default [
|
||||
navigate,
|
||||
goBack,
|
||||
goForward,
|
||||
];
|
||||
|
@ -79,7 +79,7 @@ const screenshot = defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: true,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ const snapshot = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const elementSchema = z.object({
|
||||
export const elementSchema = z.object({
|
||||
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
|
||||
ref: z.string().describe('Exact target element reference from the page snapshot'),
|
||||
});
|
||||
@ -144,54 +144,6 @@ const hover = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const typeSchema = elementSchema.extend({
|
||||
text: z.string().describe('Text to type into the element'),
|
||||
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
|
||||
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
|
||||
});
|
||||
|
||||
const type = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_type',
|
||||
title: 'Type text',
|
||||
description: 'Type text into editable element',
|
||||
inputSchema: typeSchema,
|
||||
type: 'destructive',
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const locator = snapshot.refLocator(params);
|
||||
|
||||
const code: string[] = [];
|
||||
const steps: (() => Promise<void>)[] = [];
|
||||
|
||||
if (params.slowly) {
|
||||
code.push(`// Press "${params.text}" sequentially into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.pressSequentially(params.text));
|
||||
} else {
|
||||
code.push(`// Fill "${params.text}" into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.fill(params.text));
|
||||
}
|
||||
|
||||
if (params.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
|
||||
steps.push(() => locator.press('Enter'));
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => steps.reduce((acc, step) => acc.then(step), Promise.resolve()),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const selectOptionSchema = elementSchema.extend({
|
||||
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
|
||||
});
|
||||
@ -229,6 +181,5 @@ export default [
|
||||
click,
|
||||
drag,
|
||||
hover,
|
||||
type,
|
||||
selectOption,
|
||||
];
|
||||
|
@ -15,10 +15,10 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const listTabs = defineTool({
|
||||
capability: 'tabs',
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_list',
|
||||
@ -44,8 +44,8 @@ const listTabs = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const selectTab: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'tabs',
|
||||
const selectTab = defineTool({
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_select',
|
||||
@ -65,14 +65,14 @@ const selectTab: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const newTab: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'tabs',
|
||||
const newTab = defineTool({
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_new',
|
||||
@ -94,14 +94,14 @@ const newTab: ToolFactory = captureSnapshot => defineTool({
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const closeTab: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'tabs',
|
||||
const closeTab = defineTool({
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_close',
|
||||
@ -120,15 +120,15 @@ const closeTab: ToolFactory = captureSnapshot => defineTool({
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
export default [
|
||||
listTabs,
|
||||
newTab(captureSnapshot),
|
||||
selectTab(captureSnapshot),
|
||||
closeTab(captureSnapshot),
|
||||
newTab,
|
||||
selectTab,
|
||||
closeTab,
|
||||
];
|
||||
|
@ -61,8 +61,6 @@ export type Tool<Input extends InputType = InputType> = {
|
||||
handle: (context: Context, params: z.output<Input>) => Promise<ToolResult>;
|
||||
};
|
||||
|
||||
export type ToolFactory = (snapshot: boolean) => Tool<any>;
|
||||
|
||||
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
|
||||
return tool;
|
||||
}
|
||||
|
@ -15,10 +15,10 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const wait: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'wait',
|
||||
const wait = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_wait_for',
|
||||
@ -40,7 +40,7 @@ const wait: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
if (params.time) {
|
||||
code.push(`await new Promise(f => setTimeout(f, ${params.time!} * 1000));`);
|
||||
await new Promise(f => setTimeout(f, Math.min(10000, params.time! * 1000)));
|
||||
await new Promise(f => setTimeout(f, Math.min(30000, params.time! * 1000)));
|
||||
}
|
||||
|
||||
const tab = context.currentTabOrDie();
|
||||
@ -59,12 +59,12 @@ const wait: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
wait(captureSnapshot),
|
||||
export default [
|
||||
wait,
|
||||
];
|
||||
|
@ -34,7 +34,6 @@ test('test snapshot tool list', async ({ client }) => {
|
||||
'browser_navigate_forward',
|
||||
'browser_navigate',
|
||||
'browser_network_requests',
|
||||
'browser_pdf_save',
|
||||
'browser_press_key',
|
||||
'browser_resize',
|
||||
'browser_snapshot',
|
||||
@ -47,45 +46,33 @@ test('test snapshot tool list', async ({ client }) => {
|
||||
]));
|
||||
});
|
||||
|
||||
test('test vision tool list', async ({ visionClient }) => {
|
||||
const { tools: visionTools } = await visionClient.listTools();
|
||||
expect(new Set(visionTools.map(t => t.name))).toEqual(new Set([
|
||||
'browser_close',
|
||||
'browser_console_messages',
|
||||
'browser_file_upload',
|
||||
'browser_handle_dialog',
|
||||
'browser_install',
|
||||
'browser_navigate_back',
|
||||
'browser_navigate_forward',
|
||||
'browser_navigate',
|
||||
'browser_network_requests',
|
||||
'browser_pdf_save',
|
||||
'browser_press_key',
|
||||
'browser_resize',
|
||||
'browser_screen_capture',
|
||||
'browser_screen_click',
|
||||
'browser_screen_drag',
|
||||
'browser_screen_move_mouse',
|
||||
'browser_screen_type',
|
||||
'browser_tab_close',
|
||||
'browser_tab_list',
|
||||
'browser_tab_new',
|
||||
'browser_tab_select',
|
||||
'browser_wait_for',
|
||||
]));
|
||||
});
|
||||
|
||||
test('test capabilities', async ({ startClient }) => {
|
||||
test('test capabilities (pdf)', async ({ startClient }) => {
|
||||
const { client } = await startClient({
|
||||
args: ['--caps="core"'],
|
||||
args: ['--caps=pdf'],
|
||||
});
|
||||
const { tools } = await client.listTools();
|
||||
const toolNames = tools.map(t => t.name);
|
||||
expect(toolNames).not.toContain('browser_file_upload');
|
||||
expect(toolNames).not.toContain('browser_pdf_save');
|
||||
expect(toolNames).not.toContain('browser_screen_capture');
|
||||
expect(toolNames).not.toContain('browser_screen_click');
|
||||
expect(toolNames).not.toContain('browser_screen_drag');
|
||||
expect(toolNames).not.toContain('browser_screen_move_mouse');
|
||||
expect(toolNames).not.toContain('browser_screen_type');
|
||||
expect(toolNames).toContain('browser_pdf_save');
|
||||
});
|
||||
|
||||
test('test capabilities (vision)', async ({ startClient }) => {
|
||||
const { client } = await startClient({
|
||||
args: ['--caps=vision'],
|
||||
});
|
||||
const { tools } = await client.listTools();
|
||||
const toolNames = tools.map(t => t.name);
|
||||
expect(toolNames).toContain('browser_mouse_move_xy');
|
||||
expect(toolNames).toContain('browser_mouse_click_xy');
|
||||
expect(toolNames).toContain('browser_mouse_drag_xy');
|
||||
});
|
||||
|
||||
test('support for legacy --vision option', async ({ startClient }) => {
|
||||
const { client } = await startClient({
|
||||
args: ['--vision'],
|
||||
});
|
||||
const { tools } = await client.listTools();
|
||||
const toolNames = tools.map(t => t.name);
|
||||
expect(toolNames).toContain('browser_mouse_move_xy');
|
||||
expect(toolNames).toContain('browser_mouse_click_xy');
|
||||
expect(toolNames).toContain('browser_mouse_drag_xy');
|
||||
});
|
||||
|
51
tests/evaluate.spec.ts
Normal file
51
tests/evaluate.spec.ts
Normal file
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { test, expect } from './fixtures.js';
|
||||
|
||||
test('browser_evaluate', async ({ client, server }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: { url: server.HELLO_WORLD },
|
||||
})).toContainTextContent(`- Page Title: Title`);
|
||||
|
||||
const result = await client.callTool({
|
||||
name: 'browser_evaluate',
|
||||
arguments: {
|
||||
function: '() => document.title',
|
||||
},
|
||||
});
|
||||
expect(result).toContainTextContent(`"Title"`);
|
||||
});
|
||||
|
||||
test('browser_evaluate (element)', async ({ client, server }) => {
|
||||
server.setContent('/', `
|
||||
<body style="background-color: red">Hello, world!</body>
|
||||
`, 'text/html');
|
||||
await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: { url: server.PREFIX },
|
||||
});
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_evaluate',
|
||||
arguments: {
|
||||
function: 'element => element.style.backgroundColor',
|
||||
element: 'body',
|
||||
ref: 'e1',
|
||||
},
|
||||
})).toContainTextContent(`- Result: "red"`);
|
||||
});
|
@ -41,7 +41,6 @@ type CDPServer = {
|
||||
|
||||
type TestFixtures = {
|
||||
client: Client;
|
||||
visionClient: Client;
|
||||
startClient: (options?: { clientName?: string, args?: string[], config?: Config }) => Promise<{ client: Client, stderr: () => string }>;
|
||||
wsEndpoint: string;
|
||||
cdpServer: CDPServer;
|
||||
@ -61,11 +60,6 @@ export const test = baseTest.extend<TestFixtures & TestOptions, WorkerFixtures>(
|
||||
await use(client);
|
||||
},
|
||||
|
||||
visionClient: async ({ startClient }, use) => {
|
||||
const { client } = await startClient({ args: ['--vision'] });
|
||||
await use(client);
|
||||
},
|
||||
|
||||
startClient: async ({ mcpHeadless, mcpBrowser, mcpMode }, use, testInfo) => {
|
||||
const userDataDir = mcpMode !== 'docker' ? testInfo.outputPath('user-data-dir') : undefined;
|
||||
const configDir = path.dirname(test.info().config.configFile!);
|
||||
|
@ -19,7 +19,7 @@ import fs from 'fs';
|
||||
import { test, expect } from './fixtures.js';
|
||||
|
||||
test('save as pdf unavailable', async ({ startClient, server }) => {
|
||||
const { client } = await startClient({ args: ['--caps="no-pdf"'] });
|
||||
const { client } = await startClient();
|
||||
await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: { url: server.HELLO_WORLD },
|
||||
@ -32,7 +32,7 @@ test('save as pdf unavailable', async ({ startClient, server }) => {
|
||||
|
||||
test('save as pdf', async ({ startClient, mcpBrowser, server }, testInfo) => {
|
||||
const { client } = await startClient({
|
||||
config: { outputDir: testInfo.outputPath('output') },
|
||||
config: { outputDir: testInfo.outputPath('output'), capabilities: ['pdf'] },
|
||||
});
|
||||
|
||||
test.skip(!!mcpBrowser && !['chromium', 'chrome', 'msedge'].includes(mcpBrowser), 'Save as PDF is only supported in Chromium.');
|
||||
@ -52,7 +52,7 @@ test('save as pdf (filename: output.pdf)', async ({ startClient, mcpBrowser, ser
|
||||
const outputDir = testInfo.outputPath('output');
|
||||
test.skip(!!mcpBrowser && !['chromium', 'chrome', 'msedge'].includes(mcpBrowser), 'Save as PDF is only supported in Chromium.');
|
||||
const { client } = await startClient({
|
||||
config: { outputDir },
|
||||
config: { outputDir, capabilities: ['pdf'] },
|
||||
});
|
||||
|
||||
expect(await client.callTool({
|
||||
|
@ -20,60 +20,20 @@ import fs from 'node:fs'
|
||||
import path from 'node:path'
|
||||
import url from 'node:url'
|
||||
import zodToJsonSchema from 'zod-to-json-schema'
|
||||
|
||||
import commonTools from '../lib/tools/common.js';
|
||||
import consoleTools from '../lib/tools/console.js';
|
||||
import dialogsTools from '../lib/tools/dialogs.js';
|
||||
import evaluateTools from '../lib/tools/evaluate.js';
|
||||
import filesTools from '../lib/tools/files.js';
|
||||
import installTools from '../lib/tools/install.js';
|
||||
import keyboardTools from '../lib/tools/keyboard.js';
|
||||
import navigateTools from '../lib/tools/navigate.js';
|
||||
import networkTools from '../lib/tools/network.js';
|
||||
import pdfTools from '../lib/tools/pdf.js';
|
||||
import snapshotTools from '../lib/tools/snapshot.js';
|
||||
import tabsTools from '../lib/tools/tabs.js';
|
||||
import screenshotTools from '../lib/tools/screenshot.js';
|
||||
import visionTools from '../lib/tools/vision.js';
|
||||
import waitTools from '../lib/tools/wait.js';
|
||||
import { execSync } from 'node:child_process';
|
||||
|
||||
const categories = {
|
||||
'Interactions': [
|
||||
...snapshotTools,
|
||||
...keyboardTools(true),
|
||||
...waitTools(true),
|
||||
...filesTools(true),
|
||||
...dialogsTools(true),
|
||||
],
|
||||
'Navigation': [
|
||||
...navigateTools(true),
|
||||
],
|
||||
'Evaluation': [
|
||||
...evaluateTools,
|
||||
],
|
||||
'Resources': [
|
||||
...screenshotTools,
|
||||
...pdfTools,
|
||||
...networkTools,
|
||||
...consoleTools,
|
||||
],
|
||||
'Utilities': [
|
||||
...installTools,
|
||||
...commonTools(true),
|
||||
],
|
||||
'Tabs': [
|
||||
...tabsTools(true),
|
||||
],
|
||||
'Vision mode': [
|
||||
...visionTools,
|
||||
...keyboardTools(),
|
||||
...waitTools(false),
|
||||
...filesTools(false),
|
||||
...dialogsTools(false),
|
||||
],
|
||||
import { allTools } from '../lib/tools.js';
|
||||
|
||||
const capabilities = {
|
||||
'core': 'Core automation',
|
||||
'core-tabs': 'Tab management',
|
||||
'core-install': 'Browser installation',
|
||||
'vision': 'Coordinate-based (opt-in via --caps=vision)',
|
||||
'pdf': 'PDF generation (opt-in via --caps=pdf)',
|
||||
};
|
||||
|
||||
const toolsByCapability = Object.fromEntries(Object.entries(capabilities).map(([capability, title]) => [title, allTools.filter(tool => tool.capability === capability).sort((a, b) => a.schema.name.localeCompare(b.schema.name))]));
|
||||
|
||||
// NOTE: Can be removed when we drop Node.js 18 support and changed to import.meta.filename.
|
||||
const __filename = url.fileURLToPath(import.meta.url);
|
||||
|
||||
@ -139,14 +99,12 @@ async function updateSection(content, startMarker, endMarker, generatedLines) {
|
||||
async function updateTools(content) {
|
||||
console.log('Loading tool information from compiled modules...');
|
||||
|
||||
const totalTools = Object.values(categories).flat().length;
|
||||
console.log(`Found ${totalTools} tools`);
|
||||
|
||||
const generatedLines = /** @type {string[]} */ ([]);
|
||||
for (const [category, categoryTools] of Object.entries(categories)) {
|
||||
generatedLines.push(`<details>\n<summary><b>${category}</b></summary>`);
|
||||
for (const [capability, tools] of Object.entries(toolsByCapability)) {
|
||||
console.log('Updating tools for capability:', capability);
|
||||
generatedLines.push(`<details>\n<summary><b>${capability}</b></summary>`);
|
||||
generatedLines.push('');
|
||||
for (const tool of categoryTools)
|
||||
for (const tool of tools)
|
||||
generatedLines.push(...formatToolForReadme(tool.schema));
|
||||
generatedLines.push(`</details>`);
|
||||
generatedLines.push('');
|
||||
|
Loading…
x
Reference in New Issue
Block a user