diff --git a/README.md b/README.md
index 4b365ea..ee245ed 100644
--- a/README.md
+++ b/README.md
@@ -4,25 +4,22 @@ A Model Context Protocol (MCP) server that provides browser automation capabilit
### Key Features
-- **Fast and lightweight**: Uses Playwright's accessibility tree, not pixel-based input.
-- **LLM-friendly**: No vision models needed, operates purely on structured data.
-- **Deterministic tool application**: Avoids ambiguity common with screenshot-based approaches.
+- **Fast and lightweight**. Uses Playwright's accessibility tree, not pixel-based input.
+- **LLM-friendly**. No vision models needed, operates purely on structured data.
+- **Deterministic tool application**. Avoids ambiguity common with screenshot-based approaches.
-### Use Cases
-
-- Web navigation and form-filling
-- Data extraction from structured content
-- Automated testing driven by LLMs
-- General-purpose browser interaction for agents
+### Requirements
+- Node.js 18 or newer
+- VS Code, Cursor, Windsurf, Claude Desktop or any other MCP client
-[
](https://insiders.vscode.dev/redirect?url=vscode%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D) [
](https://insiders.vscode.dev/redirect?url=vscode-insiders%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D)
+### Getting started
-### Example config
+First, install the Playwright MCP server with your client. A typical configuration looks like this:
```js
{
@@ -37,20 +34,12 @@ node utils/generate-links.js
}
```
-### Table of Contents
+[
](https://insiders.vscode.dev/redirect?url=vscode%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D) [
](https://insiders.vscode.dev/redirect?url=vscode-insiders%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D)
-- [Installation in VS Code](#installation-in-vs-code)
-- [Command line](#command-line)
-- [User profile](#user-profile)
-- [Configuration file](#configuration-file)
-- [Running on Linux](#running-on-linux)
-- [Docker](#docker)
-- [Programmatic usage](#programmatic-usage)
-- [Tool modes](#tool-modes)
-### Installation in VS Code
+Install in VS Code
-You can install the Playwright MCP server using the VS Code CLI:
+You can also install the Playwright MCP server using the VS Code CLI:
```bash
# For VS Code
@@ -58,10 +47,68 @@ code --add-mcp '{"name":"playwright","command":"npx","args":["@playwright/mcp@la
```
After installation, the Playwright MCP server will be available for use with your GitHub Copilot agent in VS Code.
+
-### Command line
+
+Install in Cursor
-The Playwright MCP server supports the following command-line options:
+Go to `Cursor Settings` -> `MCP` -> `Add new MCP Server`. Name to your liking, use `command` type with the command `npx @playwright/mcp`. You can also verify config or add command like arguments via clicking `Edit`.
+
+```js
+{
+ "mcpServers": {
+ "playwright": {
+ "command": "npx",
+ "args": [
+ "@playwright/mcp@latest"
+ ]
+ }
+ }
+}
+```
+
+
+
+Install in Windsurf
+
+Follow Windsuff MCP [documentation](https://docs.windsurf.com/windsurf/cascade/mcp). Use following configuration:
+
+```js
+{
+ "mcpServers": {
+ "playwright": {
+ "command": "npx",
+ "args": [
+ "@playwright/mcp@latest"
+ ]
+ }
+ }
+}
+```
+
+
+
+Install in Claude Desktop
+
+Follow the MCP install [guide](https://modelcontextprotocol.io/quickstart/user), use following configuration:
+
+```js
+{
+ "mcpServers": {
+ "playwright": {
+ "command": "npx",
+ "args": [
+ "@playwright/mcp@latest"
+ ]
+ }
+ }
+}
+```
+
+
+### Configuration
+
+Playwright MCP server supports following arguments. They can be provided in the JSON configuration above, as a part of the `"args"` list:
- `--browser `: Browser or chrome channel to use. Possible values:
- `chrome`, `firefox`, `webkit`, `msedge`
@@ -96,7 +143,15 @@ All the logged in information will be stored in that profile, you can delete it
### Configuration file
-The Playwright MCP server can be configured using a JSON configuration file. Here's the complete configuration format:
+The Playwright MCP server can be configured using a JSON configuration file. You can specify the configuration file
+using the `--config` command line option:
+
+```bash
+npx @playwright/mcp@latest --config path/to/config.json
+```
+
+
+Configuration file schema
```typescript
{
@@ -170,14 +225,9 @@ The Playwright MCP server can be configured using a JSON configuration file. Her
noImageResponses?: boolean;
}
```
+
-You can specify the configuration file using the `--config` command line option:
-
-```bash
-npx @playwright/mcp@latest --config path/to/config.json
-```
-
-### Running on Linux
+### Standalone MCP server
When running headed browser on system w/o display or from worker processes of the IDEs,
run the MCP server from environment with the DISPLAY and pass the `--port` flag to enable SSE transport.
@@ -198,7 +248,8 @@ And then in MCP client config, set the `url` to the SSE endpoint:
}
```
-### Docker
+
+Docker
**NOTE:** The Docker implementation only supports headless chromium at the moment.
@@ -218,8 +269,10 @@ You can build the Docker image yourself.
```
docker build -t mcr.microsoft.com/playwright/mcp .
```
+
-### Programmatic usage
+
+Programmatic usage
```js
import http from 'http';
@@ -238,8 +291,9 @@ http.createServer(async (req, res) => {
// ...
});
```
+
-### Tool modes
+### Tools
The tools are available in two modes:
@@ -265,10 +319,10 @@ To use Vision Mode, add the `--vision` flag when starting the server:
Vision Mode works best with the computer use models that are able to interact with elements using
X Y coordinate space, based on the provided screenshot.
-
-### Snapshot-based Interactions
+
+Interactions
@@ -336,6 +390,80 @@ X Y coordinate space, based on the provided screenshot.
+- **browser_press_key**
+ - Title: Press a key
+ - Description: Press a key on the keyboard
+ - Parameters:
+ - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
+ - Read-only: **false**
+
+
+
+- **browser_wait_for**
+ - Title: Wait for
+ - Description: Wait for text to appear or disappear or a specified time to pass
+ - Parameters:
+ - `time` (number, optional): The time to wait in seconds
+ - `text` (string, optional): The text to wait for
+ - `textGone` (string, optional): The text to wait for to disappear
+ - Read-only: **true**
+
+
+
+- **browser_file_upload**
+ - Title: Upload files
+ - Description: Upload one or multiple files
+ - Parameters:
+ - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
+ - Read-only: **false**
+
+
+
+- **browser_handle_dialog**
+ - Title: Handle a dialog
+ - Description: Handle a dialog
+ - Parameters:
+ - `accept` (boolean): Whether to accept the dialog.
+ - `promptText` (string, optional): The text of the prompt in case of a prompt dialog.
+ - Read-only: **false**
+
+
+
+
+Navigation
+
+
+
+- **browser_navigate**
+ - Title: Navigate to a URL
+ - Description: Navigate to a URL
+ - Parameters:
+ - `url` (string): The URL to navigate to
+ - Read-only: **false**
+
+
+
+- **browser_navigate_back**
+ - Title: Go back
+ - Description: Go back to the previous page
+ - Parameters: None
+ - Read-only: **true**
+
+
+
+- **browser_navigate_forward**
+ - Title: Go forward
+ - Description: Go forward to the next page
+ - Parameters: None
+ - Read-only: **true**
+
+
+
+
+Resources
+
+
+
- **browser_take_screenshot**
- Title: Take a screenshot
- Description: Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.
@@ -346,7 +474,122 @@ X Y coordinate space, based on the provided screenshot.
- `ref` (string, optional): Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.
- Read-only: **true**
-### Vision-based Interactions
+
+
+- **browser_pdf_save**
+ - Title: Save as PDF
+ - Description: Save page as PDF
+ - Parameters:
+ - `filename` (string, optional): File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.
+ - Read-only: **true**
+
+
+
+- **browser_network_requests**
+ - Title: List network requests
+ - Description: Returns all network requests since loading the page
+ - Parameters: None
+ - Read-only: **true**
+
+
+
+- **browser_console_messages**
+ - Title: Get console messages
+ - Description: Returns all console messages
+ - Parameters: None
+ - Read-only: **true**
+
+
+
+
+Utilities
+
+
+
+- **browser_install**
+ - Title: Install the browser specified in the config
+ - Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
+ - Parameters: None
+ - Read-only: **false**
+
+
+
+- **browser_close**
+ - Title: Close browser
+ - Description: Close the page
+ - Parameters: None
+ - Read-only: **true**
+
+
+
+- **browser_resize**
+ - Title: Resize browser window
+ - Description: Resize the browser window
+ - Parameters:
+ - `width` (number): Width of the browser window
+ - `height` (number): Height of the browser window
+ - Read-only: **true**
+
+
+
+
+Tabs
+
+
+
+- **browser_tab_list**
+ - Title: List tabs
+ - Description: List browser tabs
+ - Parameters: None
+ - Read-only: **true**
+
+
+
+- **browser_tab_new**
+ - Title: Open a new tab
+ - Description: Open a new tab
+ - Parameters:
+ - `url` (string, optional): The URL to navigate to in the new tab. If not provided, the new tab will be blank.
+ - Read-only: **true**
+
+
+
+- **browser_tab_select**
+ - Title: Select a tab
+ - Description: Select a tab by index
+ - Parameters:
+ - `index` (number): The index of the tab to select
+ - Read-only: **true**
+
+
+
+- **browser_tab_close**
+ - Title: Close a tab
+ - Description: Close a tab
+ - Parameters:
+ - `index` (number, optional): The index of the tab to close. Closes current tab if not provided.
+ - Read-only: **false**
+
+
+
+
+Testing
+
+
+
+- **browser_generate_playwright_test**
+ - Title: Generate a Playwright test
+ - Description: Generate a Playwright test for given scenario
+ - Parameters:
+ - `name` (string): The name of the test
+ - `description` (string): The description of the test
+ - `steps` (array): The steps of the test
+ - Read-only: **true**
+
+
+
+
+Vision mode
@@ -401,72 +644,6 @@ X Y coordinate space, based on the provided screenshot.
- `submit` (boolean, optional): Whether to submit entered text (press Enter after)
- Read-only: **false**
-### Tab Management
-
-
-
-- **browser_tab_list**
- - Title: List tabs
- - Description: List browser tabs
- - Parameters: None
- - Read-only: **true**
-
-
-
-- **browser_tab_new**
- - Title: Open a new tab
- - Description: Open a new tab
- - Parameters:
- - `url` (string, optional): The URL to navigate to in the new tab. If not provided, the new tab will be blank.
- - Read-only: **true**
-
-
-
-- **browser_tab_select**
- - Title: Select a tab
- - Description: Select a tab by index
- - Parameters:
- - `index` (number): The index of the tab to select
- - Read-only: **true**
-
-
-
-- **browser_tab_close**
- - Title: Close a tab
- - Description: Close a tab
- - Parameters:
- - `index` (number, optional): The index of the tab to close. Closes current tab if not provided.
- - Read-only: **false**
-
-### Navigation
-
-
-
-- **browser_navigate**
- - Title: Navigate to a URL
- - Description: Navigate to a URL
- - Parameters:
- - `url` (string): The URL to navigate to
- - Read-only: **false**
-
-
-
-- **browser_navigate_back**
- - Title: Go back
- - Description: Go back to the previous page
- - Parameters: None
- - Read-only: **true**
-
-
-
-- **browser_navigate_forward**
- - Title: Go forward
- - Description: Go forward to the next page
- - Parameters: None
- - Read-only: **true**
-
-### Keyboard
-
- **browser_press_key**
@@ -476,46 +653,6 @@ X Y coordinate space, based on the provided screenshot.
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
- Read-only: **false**
-### Console
-
-
-
-- **browser_console_messages**
- - Title: Get console messages
- - Description: Returns all console messages
- - Parameters: None
- - Read-only: **true**
-
-### Files and Media
-
-
-
-- **browser_file_upload**
- - Title: Upload files
- - Description: Upload one or multiple files
- - Parameters:
- - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
- - Read-only: **false**
-
-
-
-- **browser_pdf_save**
- - Title: Save as PDF
- - Description: Save page as PDF
- - Parameters:
- - `filename` (string, optional): File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.
- - Read-only: **true**
-
-### Utilities
-
-
-
-- **browser_close**
- - Title: Close browser
- - Description: Close the page
- - Parameters: None
- - Read-only: **true**
-
- **browser_wait_for**
@@ -529,20 +666,11 @@ X Y coordinate space, based on the provided screenshot.
-- **browser_resize**
- - Title: Resize browser window
- - Description: Resize the browser window
+- **browser_file_upload**
+ - Title: Upload files
+ - Description: Upload one or multiple files
- Parameters:
- - `width` (number): Width of the browser window
- - `height` (number): Height of the browser window
- - Read-only: **true**
-
-
-
-- **browser_install**
- - Title: Install the browser specified in the config
- - Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
- - Parameters: None
+ - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
- Read-only: **false**
@@ -555,25 +683,6 @@ X Y coordinate space, based on the provided screenshot.
- `promptText` (string, optional): The text of the prompt in case of a prompt dialog.
- Read-only: **false**
-
-
-- **browser_network_requests**
- - Title: List network requests
- - Description: Returns all network requests since loading the page
- - Parameters: None
- - Read-only: **true**
-
-### Testing
-
-
-
-- **browser_generate_playwright_test**
- - Title: Generate a Playwright test
- - Description: Generate a Playwright test for given scenario
- - Parameters:
- - `name` (string): The name of the test
- - `description` (string): The description of the test
- - `steps` (array): The steps of the test
- - Read-only: **true**
+
diff --git a/src/connection.ts b/src/connection.ts
index 7065387..e29e4ac 100644
--- a/src/connection.ts
+++ b/src/connection.ts
@@ -19,13 +19,13 @@ import { CallToolRequestSchema, ListToolsRequestSchema, Tool as McpTool } from '
import { zodToJsonSchema } from 'zod-to-json-schema';
import { Context, packageJSON } from './context.js';
-import { snapshotTools, screenshotTools } from './tools.js';
+import { snapshotTools, visionTools } from './tools.js';
import type { Config } from '../config.js';
import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
export async function createConnection(config: Config): Promise {
- const allTools = config.vision ? screenshotTools : snapshotTools;
+ const allTools = config.vision ? visionTools : snapshotTools;
const tools = allTools.filter(tool => !config.capabilities || tool.capability === 'core' || config.capabilities.includes(tool.capability));
const context = new Context(tools, config);
diff --git a/src/program.ts b/src/program.ts
index ddf699c..77fb50a 100644
--- a/src/program.ts
+++ b/src/program.ts
@@ -31,7 +31,8 @@ program
.option('--executable-path ', 'Path to the browser executable.')
.option('--headless', 'Run browser in headless mode, headed by default')
.option('--device ', 'Device to emulate, for example: "iPhone 15"')
- .option('--user-data-dir ', 'Path to the user data directory')
+ .option('--user-data-dir ', 'Path to the user data directory. If not specified, a temporary directory will be created.')
+ .option('--in-memory', 'Use in-memory storage for user data directory.')
.option('--port ', 'Port to listen on for SSE transport.')
.option('--host ', 'Host to bind server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces.')
.option('--allowed-origins ', 'Semicolon-separated list of origins to allow the browser to request. Default is to allow all.', semicolonSeparatedList)
diff --git a/src/tools.ts b/src/tools.ts
index 8613d92..bd6db0f 100644
--- a/src/tools.ts
+++ b/src/tools.ts
@@ -25,8 +25,10 @@ import network from './tools/network.js';
import pdf from './tools/pdf.js';
import snapshot from './tools/snapshot.js';
import tabs from './tools/tabs.js';
-import screen from './tools/screen.js';
+import screenshot from './tools/screenshot.js';
import testing from './tools/testing.js';
+import vision from './tools/vision.js';
+import wait from './tools/wait.js';
import type { Tool } from './tools/tool.js';
@@ -40,12 +42,14 @@ export const snapshotTools: Tool[] = [
...navigate(true),
...network,
...pdf,
+ ...screenshot,
...snapshot,
...tabs(true),
...testing,
+ ...wait(true),
];
-export const screenshotTools: Tool[] = [
+export const visionTools: Tool[] = [
...common(false),
...console,
...dialogs(false),
@@ -55,7 +59,8 @@ export const screenshotTools: Tool[] = [
...navigate(false),
...network,
...pdf,
- ...screen,
...tabs(false),
...testing,
+ ...vision,
+ ...wait(false),
];
diff --git a/src/tools/common.ts b/src/tools/common.ts
index ca2dab0..d140380 100644
--- a/src/tools/common.ts
+++ b/src/tools/common.ts
@@ -17,54 +17,6 @@
import { z } from 'zod';
import { defineTool, type ToolFactory } from './tool.js';
-const wait: ToolFactory = captureSnapshot => defineTool({
- capability: 'wait',
-
- schema: {
- name: 'browser_wait_for',
- title: 'Wait for',
- description: 'Wait for text to appear or disappear or a specified time to pass',
- inputSchema: z.object({
- time: z.number().optional().describe('The time to wait in seconds'),
- text: z.string().optional().describe('The text to wait for'),
- textGone: z.string().optional().describe('The text to wait for to disappear'),
- }),
- type: 'readOnly',
- },
-
- handle: async (context, params) => {
- if (!params.text && !params.textGone && !params.time)
- throw new Error('Either time, text or textGone must be provided');
-
- const code: string[] = [];
-
- if (params.time) {
- code.push(`await new Promise(f => setTimeout(f, ${params.time!} * 1000));`);
- await new Promise(f => setTimeout(f, Math.min(10000, params.time! * 1000)));
- }
-
- const tab = context.currentTabOrDie();
- const locator = params.text ? tab.page.getByText(params.text).first() : undefined;
- const goneLocator = params.textGone ? tab.page.getByText(params.textGone).first() : undefined;
-
- if (goneLocator) {
- code.push(`await page.getByText(${JSON.stringify(params.textGone)}).first().waitFor({ state: 'hidden' });`);
- await goneLocator.waitFor({ state: 'hidden' });
- }
-
- if (locator) {
- code.push(`await page.getByText(${JSON.stringify(params.text)}).first().waitFor({ state: 'visible' });`);
- await locator.waitFor({ state: 'visible' });
- }
-
- return {
- code,
- captureSnapshot,
- waitForNetwork: false,
- };
- },
-});
-
const close = defineTool({
capability: 'core',
@@ -122,6 +74,5 @@ const resize: ToolFactory = captureSnapshot => defineTool({
export default (captureSnapshot: boolean) => [
close,
- wait(captureSnapshot),
resize(captureSnapshot)
];
diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts
new file mode 100644
index 0000000..db5b7f1
--- /dev/null
+++ b/src/tools/screenshot.ts
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { z } from 'zod';
+
+import { defineTool } from './tool.js';
+import * as javascript from '../javascript.js';
+import { outputFile } from '../config.js';
+import { generateLocator } from './utils.js';
+
+import type * as playwright from 'playwright';
+
+const screenshotSchema = z.object({
+ raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
+ filename: z.string().optional().describe('File name to save the screenshot to. Defaults to `page-{timestamp}.{png|jpeg}` if not specified.'),
+ element: z.string().optional().describe('Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'),
+ ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'),
+}).refine(data => {
+ return !!data.element === !!data.ref;
+}, {
+ message: 'Both element and ref must be provided or neither.',
+ path: ['ref', 'element']
+});
+
+const screenshot = defineTool({
+ capability: 'core',
+ schema: {
+ name: 'browser_take_screenshot',
+ title: 'Take a screenshot',
+ description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
+ inputSchema: screenshotSchema,
+ type: 'readOnly',
+ },
+
+ handle: async (context, params) => {
+ const tab = context.currentTabOrDie();
+ const snapshot = tab.snapshotOrDie();
+ const fileType = params.raw ? 'png' : 'jpeg';
+ const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`);
+ const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
+ const isElementScreenshot = params.element && params.ref;
+
+ const code = [
+ `// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`,
+ ];
+
+ const locator = params.ref ? snapshot.refLocator(params.ref) : null;
+
+ if (locator)
+ code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
+ else
+ code.push(`await page.screenshot(${javascript.formatObject(options)});`);
+
+ const includeBase64 = !context.config.noImageResponses;
+ const action = async () => {
+ const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
+ return {
+ content: includeBase64 ? [{
+ type: 'image' as 'image',
+ data: screenshot.toString('base64'),
+ mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg',
+ }] : []
+ };
+ };
+
+ return {
+ code,
+ action,
+ captureSnapshot: true,
+ waitForNetwork: false,
+ };
+ }
+});
+
+export default [
+ screenshot,
+];
diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts
index e6a2d66..576d578 100644
--- a/src/tools/snapshot.ts
+++ b/src/tools/snapshot.ts
@@ -18,9 +18,7 @@ import { z } from 'zod';
import { defineTool } from './tool.js';
import * as javascript from '../javascript.js';
-import { outputFile } from '../config.js';
-
-import type * as playwright from 'playwright';
+import { generateLocator } from './utils.js';
const snapshot = defineTool({
capability: 'core',
@@ -218,72 +216,6 @@ const selectOption = defineTool({
},
});
-const screenshotSchema = z.object({
- raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
- filename: z.string().optional().describe('File name to save the screenshot to. Defaults to `page-{timestamp}.{png|jpeg}` if not specified.'),
- element: z.string().optional().describe('Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'),
- ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'),
-}).refine(data => {
- return !!data.element === !!data.ref;
-}, {
- message: 'Both element and ref must be provided or neither.',
- path: ['ref', 'element']
-});
-
-const screenshot = defineTool({
- capability: 'core',
- schema: {
- name: 'browser_take_screenshot',
- title: 'Take a screenshot',
- description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
- inputSchema: screenshotSchema,
- type: 'readOnly',
- },
-
- handle: async (context, params) => {
- const tab = context.currentTabOrDie();
- const snapshot = tab.snapshotOrDie();
- const fileType = params.raw ? 'png' : 'jpeg';
- const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`);
- const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
- const isElementScreenshot = params.element && params.ref;
-
- const code = [
- `// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`,
- ];
-
- const locator = params.ref ? snapshot.refLocator(params.ref) : null;
-
- if (locator)
- code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
- else
- code.push(`await page.screenshot(${javascript.formatObject(options)});`);
-
- const includeBase64 = !context.config.noImageResponses;
- const action = async () => {
- const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
- return {
- content: includeBase64 ? [{
- type: 'image' as 'image',
- data: screenshot.toString('base64'),
- mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg',
- }] : []
- };
- };
-
- return {
- code,
- action,
- captureSnapshot: true,
- waitForNetwork: false,
- };
- }
-});
-
-export async function generateLocator(locator: playwright.Locator): Promise {
- return (locator as any)._generateLocatorString();
-}
-
export default [
snapshot,
click,
@@ -291,5 +223,4 @@ export default [
hover,
type,
selectOption,
- screenshot,
];
diff --git a/src/tools/utils.ts b/src/tools/utils.ts
index 6fd16d4..4a0a2a4 100644
--- a/src/tools/utils.ts
+++ b/src/tools/utils.ts
@@ -77,3 +77,7 @@ export function sanitizeForFilePath(s: string) {
return sanitize(s);
return sanitize(s.substring(0, separator)) + '.' + sanitize(s.substring(separator + 1));
}
+
+export async function generateLocator(locator: playwright.Locator): Promise {
+ return (locator as any)._generateLocatorString();
+}
diff --git a/src/tools/screen.ts b/src/tools/vision.ts
similarity index 100%
rename from src/tools/screen.ts
rename to src/tools/vision.ts
diff --git a/src/tools/wait.ts b/src/tools/wait.ts
new file mode 100644
index 0000000..fc8be82
--- /dev/null
+++ b/src/tools/wait.ts
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { z } from 'zod';
+import { defineTool, type ToolFactory } from './tool.js';
+
+const wait: ToolFactory = captureSnapshot => defineTool({
+ capability: 'wait',
+
+ schema: {
+ name: 'browser_wait_for',
+ title: 'Wait for',
+ description: 'Wait for text to appear or disappear or a specified time to pass',
+ inputSchema: z.object({
+ time: z.number().optional().describe('The time to wait in seconds'),
+ text: z.string().optional().describe('The text to wait for'),
+ textGone: z.string().optional().describe('The text to wait for to disappear'),
+ }),
+ type: 'readOnly',
+ },
+
+ handle: async (context, params) => {
+ if (!params.text && !params.textGone && !params.time)
+ throw new Error('Either time, text or textGone must be provided');
+
+ const code: string[] = [];
+
+ if (params.time) {
+ code.push(`await new Promise(f => setTimeout(f, ${params.time!} * 1000));`);
+ await new Promise(f => setTimeout(f, Math.min(10000, params.time! * 1000)));
+ }
+
+ const tab = context.currentTabOrDie();
+ const locator = params.text ? tab.page.getByText(params.text).first() : undefined;
+ const goneLocator = params.textGone ? tab.page.getByText(params.textGone).first() : undefined;
+
+ if (goneLocator) {
+ code.push(`await page.getByText(${JSON.stringify(params.textGone)}).first().waitFor({ state: 'hidden' });`);
+ await goneLocator.waitFor({ state: 'hidden' });
+ }
+
+ if (locator) {
+ code.push(`await page.getByText(${JSON.stringify(params.text)}).first().waitFor({ state: 'visible' });`);
+ await locator.waitFor({ state: 'visible' });
+ }
+
+ return {
+ code,
+ captureSnapshot,
+ waitForNetwork: false,
+ };
+ },
+});
+
+export default (captureSnapshot: boolean) => [
+ wait(captureSnapshot),
+];
diff --git a/utils/update-readme.js b/utils/update-readme.js
index d0f6cea..fde7864 100644
--- a/utils/update-readme.js
+++ b/utils/update-readme.js
@@ -32,42 +32,46 @@ import networkTools from '../lib/tools/network.js';
import pdfTools from '../lib/tools/pdf.js';
import snapshotTools from '../lib/tools/snapshot.js';
import tabsTools from '../lib/tools/tabs.js';
-import screenTools from '../lib/tools/screen.js';
+import screenshotTools from '../lib/tools/screenshot.js';
import testTools from '../lib/tools/testing.js';
+import visionTools from '../lib/tools/vision.js';
+import waitTools from '../lib/tools/wait.js';
// Category definitions for tools
const categories = {
- 'Snapshot-based Interactions': [
+ 'Interactions': [
...snapshotTools,
- ],
- 'Vision-based Interactions': [
- ...screenTools
- ],
- 'Tab Management': [
- ...tabsTools(true),
+ ...keyboardTools(true),
+ ...waitTools(true),
+ ...filesTools(true),
+ ...dialogsTools(true),
],
'Navigation': [
...navigateTools(true),
],
- 'Keyboard': [
- ...keyboardTools(true)
- ],
- 'Console': [
- ...consoleTools
- ],
- 'Files and Media': [
- ...filesTools(true),
- ...pdfTools
+ 'Resources': [
+ ...screenshotTools,
+ ...pdfTools,
+ ...networkTools,
+ ...consoleTools,
],
'Utilities': [
- ...commonTools(true),
...installTools,
- ...dialogsTools(true),
- ...networkTools,
+ ...commonTools(true),
+ ],
+ 'Tabs': [
+ ...tabsTools(true),
],
'Testing': [
...testTools,
],
+ 'Vision mode': [
+ ...visionTools,
+ ...keyboardTools(),
+ ...waitTools(false),
+ ...filesTools(false),
+ ...dialogsTools(false),
+ ],
};
// NOTE: Can be removed when we drop Node.js 18 support and changed to import.meta.filename.
@@ -118,9 +122,12 @@ async function updateReadme() {
const generatedLines = /** @type {string[]} */ ([]);
for (const [category, categoryTools] of Object.entries(categories)) {
- generatedLines.push(`### ${category}\n\n`);
+ generatedLines.push(`\n${category}
\n\n`);
+
for (const tool of categoryTools)
generatedLines.push(formatToolForReadme(tool.schema));
+
+ generatedLines.push(` \n\n`);
}
const readmePath = path.join(path.dirname(__filename), '..', 'README.md');