From a1eee8351e8e39d44e846f0ed3ac6a747388d84e Mon Sep 17 00:00:00 2001
From: Pavel Feldman <pavel.feldman@gmail.com>
Date: Mon, 12 May 2025 16:42:47 -0700
Subject: [PATCH] chore: collapse readme (#404)

---
 README.md                          | 463 ++++++++++++++++++-----------
 src/connection.ts                  |   4 +-
 src/program.ts                     |   3 +-
 src/tools.ts                       |  11 +-
 src/tools/common.ts                |  49 ---
 src/tools/screenshot.ts            |  90 ++++++
 src/tools/snapshot.ts              |  71 +----
 src/tools/utils.ts                 |   4 +
 src/tools/{screen.ts => vision.ts} |   0
 src/tools/wait.ts                  |  70 +++++
 utils/update-readme.js             |  49 +--
 11 files changed, 491 insertions(+), 323 deletions(-)
 create mode 100644 src/tools/screenshot.ts
 rename src/tools/{screen.ts => vision.ts} (100%)
 create mode 100644 src/tools/wait.ts
diff --git a/README.md b/README.md
index 4b365ea..ee245ed 100644
--- a/README.md
+++ b/README.md
@@ -4,25 +4,22 @@ A Model Context Protocol (MCP) server that provides browser automation capabilit
 
 ### Key Features
 
-- **Fast and lightweight**: Uses Playwright's accessibility tree, not pixel-based input.
-- **LLM-friendly**: No vision models needed, operates purely on structured data.
-- **Deterministic tool application**: Avoids ambiguity common with screenshot-based approaches.
+- **Fast and lightweight**. Uses Playwright's accessibility tree, not pixel-based input.
+- **LLM-friendly**. No vision models needed, operates purely on structured data.
+- **Deterministic tool application**. Avoids ambiguity common with screenshot-based approaches.
 
-### Use Cases
-
-- Web navigation and form-filling
-- Data extraction from structured content
-- Automated testing driven by LLMs
-- General-purpose browser interaction for agents
+### Requirements
+- Node.js 18 or newer
+- VS Code, Cursor, Windsurf, Claude Desktop or any other MCP client
 
 <!--
 // Generate using:
 node utils/generate-links.js
 -->
 
-[<img src="https://img.shields.io/badge/VS_Code-VS_Code?style=flat-square&label=Install%20Server&color=0098FF" alt="Install in VS Code">](https://insiders.vscode.dev/redirect?url=vscode%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D) [<img alt="Install in VS Code Insiders" src="https://img.shields.io/badge/VS_Code_Insiders-VS_Code_Insiders?style=flat-square&label=Install%20Server&color=24bfa5">](https://insiders.vscode.dev/redirect?url=vscode-insiders%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D)
+### Getting started
 
-### Example config
+First, install the Playwright MCP server with your client. A typical configuration looks like this:
 
 ```js
 {
@@ -37,20 +34,12 @@ node utils/generate-links.js
 }
 ```
 
-### Table of Contents
+[<img src="https://img.shields.io/badge/VS_Code-VS_Code?style=flat-square&label=Install%20Server&color=0098FF" alt="Install in VS Code">](https://insiders.vscode.dev/redirect?url=vscode%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D) [<img alt="Install in VS Code Insiders" src="https://img.shields.io/badge/VS_Code_Insiders-VS_Code_Insiders?style=flat-square&label=Install%20Server&color=24bfa5">](https://insiders.vscode.dev/redirect?url=vscode-insiders%3Amcp%2Finstall%3F%257B%2522name%2522%253A%2522playwright%2522%252C%2522command%2522%253A%2522npx%2522%252C%2522args%2522%253A%255B%2522%2540playwright%252Fmcp%2540latest%2522%255D%257D)
 
-- [Installation in VS Code](#installation-in-vs-code)
-- [Command line](#command-line)
-- [User profile](#user-profile)
-- [Configuration file](#configuration-file)
-- [Running on Linux](#running-on-linux)
-- [Docker](#docker)
-- [Programmatic usage](#programmatic-usage)
-- [Tool modes](#tool-modes)
 
-### Installation in VS Code
+<details><summary><b>Install in VS Code</b></summary>
 
-You can install the Playwright MCP server using the VS Code CLI:
+You can also install the Playwright MCP server using the VS Code CLI:
 
 ```bash
 # For VS Code
@@ -58,10 +47,68 @@ code --add-mcp '{"name":"playwright","command":"npx","args":["@playwright/mcp@la
 ```
 
 After installation, the Playwright MCP server will be available for use with your GitHub Copilot agent in VS Code.
+</details>
 
-### Command line
+<details>
+<summary><b>Install in Cursor</b></summary>
 
-The Playwright MCP server supports the following command-line options:
+Go to `Cursor Settings` -> `MCP` -> `Add new MCP Server`. Name to your liking, use `command` type with the command `npx @playwright/mcp`. You can also verify config or add command like arguments via clicking `Edit`.
+
+```js
+{
+  "mcpServers": {
+    "playwright": {
+      "command": "npx",
+      "args": [
+        "@playwright/mcp@latest"
+      ]
+    }
+  }
+}
+```
+</details>
+
+<details>
+<summary><b>Install in Windsurf</b></summary>
+
+Follow Windsuff MCP [documentation](https://docs.windsurf.com/windsurf/cascade/mcp). Use following configuration:
+
+```js
+{
+  "mcpServers": {
+    "playwright": {
+      "command": "npx",
+      "args": [
+        "@playwright/mcp@latest"
+      ]
+    }
+  }
+}
+```
+</details>
+
+<details>
+<summary><b>Install in Claude Desktop</b></summary>
+
+Follow the MCP install [guide](https://modelcontextprotocol.io/quickstart/user), use following configuration:
+
+```js
+{
+  "mcpServers": {
+    "playwright": {
+      "command": "npx",
+      "args": [
+        "@playwright/mcp@latest"
+      ]
+    }
+  }
+}
+```
+</details>
+
+### Configuration
+
+Playwright MCP server supports following arguments. They can be provided in the JSON configuration above, as a part of the `"args"` list:
 
 - `--browser <browser>`: Browser or chrome channel to use. Possible values:
   - `chrome`, `firefox`, `webkit`, `msedge`
@@ -96,7 +143,15 @@ All the logged in information will be stored in that profile, you can delete it
 
 ### Configuration file
 
-The Playwright MCP server can be configured using a JSON configuration file. Here's the complete configuration format:
+The Playwright MCP server can be configured using a JSON configuration file. You can specify the configuration file
+using the `--config` command line option:
+
+```bash
+npx @playwright/mcp@latest --config path/to/config.json
+```
+
+<details>
+<summary>Configuration file schema</summary>
 
 ```typescript
 {
@@ -170,14 +225,9 @@ The Playwright MCP server can be configured using a JSON configuration file. Her
   noImageResponses?: boolean;
 }
 ```
+</details>
 
-You can specify the configuration file using the `--config` command line option:
-
-```bash
-npx @playwright/mcp@latest --config path/to/config.json
-```
-
-### Running on Linux
+### Standalone MCP server
 
 When running headed browser on system w/o display or from worker processes of the IDEs,
 run the MCP server from environment with the DISPLAY and pass the `--port` flag to enable SSE transport.
@@ -198,7 +248,8 @@ And then in MCP client config, set the `url` to the SSE endpoint:
 }
 ```
 
-### Docker
+<details>
+<summary><b>Docker</b></summary>
 
 **NOTE:** The Docker implementation only supports headless chromium at the moment.
 
@@ -218,8 +269,10 @@ You can build the Docker image yourself.
 ```
 docker build -t mcr.microsoft.com/playwright/mcp .
 ```
+</details>
 
-### Programmatic usage
+<details>
+<summary><b>Programmatic usage</b></summary>
 
 ```js
 import http from 'http';
@@ -238,8 +291,9 @@ http.createServer(async (req, res) => {
   // ...
 });
 ```
+</details>
 
-### Tool modes
+### Tools
 
 The tools are available in two modes:
 
@@ -265,10 +319,10 @@ To use Vision Mode, add the `--vision` flag when starting the server:
 Vision Mode works best with the computer use models that are able to interact with elements using
 X Y coordinate space, based on the provided screenshot.
 
-
 <!--- Generated by update-readme.js -->
 
-### Snapshot-based Interactions
+<details>
+<summary><b>Interactions</b></summary>
 
 <!-- NOTE: This has been generated via update-readme.js -->
 
@@ -336,6 +390,80 @@ X Y coordinate space, based on the provided screenshot.
 
 <!-- NOTE: This has been generated via update-readme.js -->
 
+- **browser_press_key**
+  - Title: Press a key
+  - Description: Press a key on the keyboard
+  - Parameters:
+    - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
+  - Read-only: **false**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_wait_for**
+  - Title: Wait for
+  - Description: Wait for text to appear or disappear or a specified time to pass
+  - Parameters:
+    - `time` (number, optional): The time to wait in seconds
+    - `text` (string, optional): The text to wait for
+    - `textGone` (string, optional): The text to wait for to disappear
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_file_upload**
+  - Title: Upload files
+  - Description: Upload one or multiple files
+  - Parameters:
+    - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
+  - Read-only: **false**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_handle_dialog**
+  - Title: Handle a dialog
+  - Description: Handle a dialog
+  - Parameters:
+    - `accept` (boolean): Whether to accept the dialog.
+    - `promptText` (string, optional): The text of the prompt in case of a prompt dialog.
+  - Read-only: **false**
+
+</details>
+
+<details>
+<summary><b>Navigation</b></summary>
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_navigate**
+  - Title: Navigate to a URL
+  - Description: Navigate to a URL
+  - Parameters:
+    - `url` (string): The URL to navigate to
+  - Read-only: **false**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_navigate_back**
+  - Title: Go back
+  - Description: Go back to the previous page
+  - Parameters: None
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_navigate_forward**
+  - Title: Go forward
+  - Description: Go forward to the next page
+  - Parameters: None
+  - Read-only: **true**
+
+</details>
+
+<details>
+<summary><b>Resources</b></summary>
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
 - **browser_take_screenshot**
   - Title: Take a screenshot
   - Description: Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.
@@ -346,7 +474,122 @@ X Y coordinate space, based on the provided screenshot.
     - `ref` (string, optional): Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.
   - Read-only: **true**
 
-### Vision-based Interactions
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_pdf_save**
+  - Title: Save as PDF
+  - Description: Save page as PDF
+  - Parameters:
+    - `filename` (string, optional): File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_network_requests**
+  - Title: List network requests
+  - Description: Returns all network requests since loading the page
+  - Parameters: None
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_console_messages**
+  - Title: Get console messages
+  - Description: Returns all console messages
+  - Parameters: None
+  - Read-only: **true**
+
+</details>
+
+<details>
+<summary><b>Utilities</b></summary>
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_install**
+  - Title: Install the browser specified in the config
+  - Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
+  - Parameters: None
+  - Read-only: **false**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_close**
+  - Title: Close browser
+  - Description: Close the page
+  - Parameters: None
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_resize**
+  - Title: Resize browser window
+  - Description: Resize the browser window
+  - Parameters:
+    - `width` (number): Width of the browser window
+    - `height` (number): Height of the browser window
+  - Read-only: **true**
+
+</details>
+
+<details>
+<summary><b>Tabs</b></summary>
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_tab_list**
+  - Title: List tabs
+  - Description: List browser tabs
+  - Parameters: None
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_tab_new**
+  - Title: Open a new tab
+  - Description: Open a new tab
+  - Parameters:
+    - `url` (string, optional): The URL to navigate to in the new tab. If not provided, the new tab will be blank.
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_tab_select**
+  - Title: Select a tab
+  - Description: Select a tab by index
+  - Parameters:
+    - `index` (number): The index of the tab to select
+  - Read-only: **true**
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_tab_close**
+  - Title: Close a tab
+  - Description: Close a tab
+  - Parameters:
+    - `index` (number, optional): The index of the tab to close. Closes current tab if not provided.
+  - Read-only: **false**
+
+</details>
+
+<details>
+<summary><b>Testing</b></summary>
+
+<!-- NOTE: This has been generated via update-readme.js -->
+
+- **browser_generate_playwright_test**
+  - Title: Generate a Playwright test
+  - Description: Generate a Playwright test for given scenario
+  - Parameters:
+    - `name` (string): The name of the test
+    - `description` (string): The description of the test
+    - `steps` (array): The steps of the test
+  - Read-only: **true**
+
+</details>
+
+<details>
+<summary><b>Vision mode</b></summary>
 
 <!-- NOTE: This has been generated via update-readme.js -->
 
@@ -401,72 +644,6 @@ X Y coordinate space, based on the provided screenshot.
     - `submit` (boolean, optional): Whether to submit entered text (press Enter after)
   - Read-only: **false**
 
-### Tab Management
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_tab_list**
-  - Title: List tabs
-  - Description: List browser tabs
-  - Parameters: None
-  - Read-only: **true**
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_tab_new**
-  - Title: Open a new tab
-  - Description: Open a new tab
-  - Parameters:
-    - `url` (string, optional): The URL to navigate to in the new tab. If not provided, the new tab will be blank.
-  - Read-only: **true**
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_tab_select**
-  - Title: Select a tab
-  - Description: Select a tab by index
-  - Parameters:
-    - `index` (number): The index of the tab to select
-  - Read-only: **true**
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_tab_close**
-  - Title: Close a tab
-  - Description: Close a tab
-  - Parameters:
-    - `index` (number, optional): The index of the tab to close. Closes current tab if not provided.
-  - Read-only: **false**
-
-### Navigation
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_navigate**
-  - Title: Navigate to a URL
-  - Description: Navigate to a URL
-  - Parameters:
-    - `url` (string): The URL to navigate to
-  - Read-only: **false**
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_navigate_back**
-  - Title: Go back
-  - Description: Go back to the previous page
-  - Parameters: None
-  - Read-only: **true**
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_navigate_forward**
-  - Title: Go forward
-  - Description: Go forward to the next page
-  - Parameters: None
-  - Read-only: **true**
-
-### Keyboard
-
 <!-- NOTE: This has been generated via update-readme.js -->
 
 - **browser_press_key**
@@ -476,46 +653,6 @@ X Y coordinate space, based on the provided screenshot.
     - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
   - Read-only: **false**
 
-### Console
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_console_messages**
-  - Title: Get console messages
-  - Description: Returns all console messages
-  - Parameters: None
-  - Read-only: **true**
-
-### Files and Media
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_file_upload**
-  - Title: Upload files
-  - Description: Upload one or multiple files
-  - Parameters:
-    - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
-  - Read-only: **false**
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_pdf_save**
-  - Title: Save as PDF
-  - Description: Save page as PDF
-  - Parameters:
-    - `filename` (string, optional): File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.
-  - Read-only: **true**
-
-### Utilities
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_close**
-  - Title: Close browser
-  - Description: Close the page
-  - Parameters: None
-  - Read-only: **true**
-
 <!-- NOTE: This has been generated via update-readme.js -->
 
 - **browser_wait_for**
@@ -529,20 +666,11 @@ X Y coordinate space, based on the provided screenshot.
 
 <!-- NOTE: This has been generated via update-readme.js -->
 
-- **browser_resize**
-  - Title: Resize browser window
-  - Description: Resize the browser window
+- **browser_file_upload**
+  - Title: Upload files
+  - Description: Upload one or multiple files
   - Parameters:
-    - `width` (number): Width of the browser window
-    - `height` (number): Height of the browser window
-  - Read-only: **true**
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_install**
-  - Title: Install the browser specified in the config
-  - Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
-  - Parameters: None
+    - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
   - Read-only: **false**
 
 <!-- NOTE: This has been generated via update-readme.js -->
@@ -555,25 +683,6 @@ X Y coordinate space, based on the provided screenshot.
     - `promptText` (string, optional): The text of the prompt in case of a prompt dialog.
   - Read-only: **false**
 
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_network_requests**
-  - Title: List network requests
-  - Description: Returns all network requests since loading the page
-  - Parameters: None
-  - Read-only: **true**
-
-### Testing
-
-<!-- NOTE: This has been generated via update-readme.js -->
-
-- **browser_generate_playwright_test**
-  - Title: Generate a Playwright test
-  - Description: Generate a Playwright test for given scenario
-  - Parameters:
-    - `name` (string): The name of the test
-    - `description` (string): The description of the test
-    - `steps` (array): The steps of the test
-  - Read-only: **true**
+</details>
 
 <!--- End of generated section -->
diff --git a/src/connection.ts b/src/connection.ts
index 7065387..e29e4ac 100644
--- a/src/connection.ts
+++ b/src/connection.ts
@@ -19,13 +19,13 @@ import { CallToolRequestSchema, ListToolsRequestSchema, Tool as McpTool } from '
 import { zodToJsonSchema } from 'zod-to-json-schema';
 
 import { Context, packageJSON } from './context.js';
-import { snapshotTools, screenshotTools } from './tools.js';
+import { snapshotTools, visionTools } from './tools.js';
 
 import type { Config } from '../config.js';
 import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
 
 export async function createConnection(config: Config): Promise<Connection> {
-  const allTools = config.vision ? screenshotTools : snapshotTools;
+  const allTools = config.vision ? visionTools : snapshotTools;
   const tools = allTools.filter(tool => !config.capabilities || tool.capability === 'core' || config.capabilities.includes(tool.capability));
 
   const context = new Context(tools, config);
diff --git a/src/program.ts b/src/program.ts
index ddf699c..77fb50a 100644
--- a/src/program.ts
+++ b/src/program.ts
@@ -31,7 +31,8 @@ program
     .option('--executable-path <path>', 'Path to the browser executable.')
     .option('--headless', 'Run browser in headless mode, headed by default')
     .option('--device <device>', 'Device to emulate, for example: "iPhone 15"')
-    .option('--user-data-dir <path>', 'Path to the user data directory')
+    .option('--user-data-dir <path>', 'Path to the user data directory. If not specified, a temporary directory will be created.')
+    .option('--in-memory', 'Use in-memory storage for user data directory.')
     .option('--port <port>', 'Port to listen on for SSE transport.')
     .option('--host <host>', 'Host to bind server to. Default is localhost. Use 0.0.0.0 to bind to all interfaces.')
     .option('--allowed-origins <origins>', 'Semicolon-separated list of origins to allow the browser to request. Default is to allow all.', semicolonSeparatedList)
diff --git a/src/tools.ts b/src/tools.ts
index 8613d92..bd6db0f 100644
--- a/src/tools.ts
+++ b/src/tools.ts
@@ -25,8 +25,10 @@ import network from './tools/network.js';
 import pdf from './tools/pdf.js';
 import snapshot from './tools/snapshot.js';
 import tabs from './tools/tabs.js';
-import screen from './tools/screen.js';
+import screenshot from './tools/screenshot.js';
 import testing from './tools/testing.js';
+import vision from './tools/vision.js';
+import wait from './tools/wait.js';
 
 import type { Tool } from './tools/tool.js';
 
@@ -40,12 +42,14 @@ export const snapshotTools: Tool<any>[] = [
   ...navigate(true),
   ...network,
   ...pdf,
+  ...screenshot,
   ...snapshot,
   ...tabs(true),
   ...testing,
+  ...wait(true),
 ];
 
-export const screenshotTools: Tool<any>[] = [
+export const visionTools: Tool<any>[] = [
   ...common(false),
   ...console,
   ...dialogs(false),
@@ -55,7 +59,8 @@ export const screenshotTools: Tool<any>[] = [
   ...navigate(false),
   ...network,
   ...pdf,
-  ...screen,
   ...tabs(false),
   ...testing,
+  ...vision,
+  ...wait(false),
 ];
diff --git a/src/tools/common.ts b/src/tools/common.ts
index ca2dab0..d140380 100644
--- a/src/tools/common.ts
+++ b/src/tools/common.ts
@@ -17,54 +17,6 @@
 import { z } from 'zod';
 import { defineTool, type ToolFactory } from './tool.js';
 
-const wait: ToolFactory = captureSnapshot => defineTool({
-  capability: 'wait',
-
-  schema: {
-    name: 'browser_wait_for',
-    title: 'Wait for',
-    description: 'Wait for text to appear or disappear or a specified time to pass',
-    inputSchema: z.object({
-      time: z.number().optional().describe('The time to wait in seconds'),
-      text: z.string().optional().describe('The text to wait for'),
-      textGone: z.string().optional().describe('The text to wait for to disappear'),
-    }),
-    type: 'readOnly',
-  },
-
-  handle: async (context, params) => {
-    if (!params.text && !params.textGone && !params.time)
-      throw new Error('Either time, text or textGone must be provided');
-
-    const code: string[] = [];
-
-    if (params.time) {
-      code.push(`await new Promise(f => setTimeout(f, ${params.time!} * 1000));`);
-      await new Promise(f => setTimeout(f, Math.min(10000, params.time! * 1000)));
-    }
-
-    const tab = context.currentTabOrDie();
-    const locator = params.text ? tab.page.getByText(params.text).first() : undefined;
-    const goneLocator = params.textGone ? tab.page.getByText(params.textGone).first() : undefined;
-
-    if (goneLocator) {
-      code.push(`await page.getByText(${JSON.stringify(params.textGone)}).first().waitFor({ state: 'hidden' });`);
-      await goneLocator.waitFor({ state: 'hidden' });
-    }
-
-    if (locator) {
-      code.push(`await page.getByText(${JSON.stringify(params.text)}).first().waitFor({ state: 'visible' });`);
-      await locator.waitFor({ state: 'visible' });
-    }
-
-    return {
-      code,
-      captureSnapshot,
-      waitForNetwork: false,
-    };
-  },
-});
-
 const close = defineTool({
   capability: 'core',
 
@@ -122,6 +74,5 @@ const resize: ToolFactory = captureSnapshot => defineTool({
 
 export default (captureSnapshot: boolean) => [
   close,
-  wait(captureSnapshot),
   resize(captureSnapshot)
 ];
diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts
new file mode 100644
index 0000000..db5b7f1
--- /dev/null
+++ b/src/tools/screenshot.ts
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { z } from 'zod';
+
+import { defineTool } from './tool.js';
+import * as javascript from '../javascript.js';
+import { outputFile } from '../config.js';
+import { generateLocator } from './utils.js';
+
+import type * as playwright from 'playwright';
+
+const screenshotSchema = z.object({
+  raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
+  filename: z.string().optional().describe('File name to save the screenshot to. Defaults to `page-{timestamp}.{png|jpeg}` if not specified.'),
+  element: z.string().optional().describe('Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'),
+  ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'),
+}).refine(data => {
+  return !!data.element === !!data.ref;
+}, {
+  message: 'Both element and ref must be provided or neither.',
+  path: ['ref', 'element']
+});
+
+const screenshot = defineTool({
+  capability: 'core',
+  schema: {
+    name: 'browser_take_screenshot',
+    title: 'Take a screenshot',
+    description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
+    inputSchema: screenshotSchema,
+    type: 'readOnly',
+  },
+
+  handle: async (context, params) => {
+    const tab = context.currentTabOrDie();
+    const snapshot = tab.snapshotOrDie();
+    const fileType = params.raw ? 'png' : 'jpeg';
+    const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`);
+    const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
+    const isElementScreenshot = params.element && params.ref;
+
+    const code = [
+      `// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`,
+    ];
+
+    const locator = params.ref ? snapshot.refLocator(params.ref) : null;
+
+    if (locator)
+      code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
+    else
+      code.push(`await page.screenshot(${javascript.formatObject(options)});`);
+
+    const includeBase64 = !context.config.noImageResponses;
+    const action = async () => {
+      const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
+      return {
+        content: includeBase64 ? [{
+          type: 'image' as 'image',
+          data: screenshot.toString('base64'),
+          mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg',
+        }] : []
+      };
+    };
+
+    return {
+      code,
+      action,
+      captureSnapshot: true,
+      waitForNetwork: false,
+    };
+  }
+});
+
+export default [
+  screenshot,
+];
diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts
index e6a2d66..576d578 100644
--- a/src/tools/snapshot.ts
+++ b/src/tools/snapshot.ts
@@ -18,9 +18,7 @@ import { z } from 'zod';
 
 import { defineTool } from './tool.js';
 import * as javascript from '../javascript.js';
-import { outputFile } from '../config.js';
-
-import type * as playwright from 'playwright';
+import { generateLocator } from './utils.js';
 
 const snapshot = defineTool({
   capability: 'core',
@@ -218,72 +216,6 @@ const selectOption = defineTool({
   },
 });
 
-const screenshotSchema = z.object({
-  raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
-  filename: z.string().optional().describe('File name to save the screenshot to. Defaults to `page-{timestamp}.{png|jpeg}` if not specified.'),
-  element: z.string().optional().describe('Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'),
-  ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'),
-}).refine(data => {
-  return !!data.element === !!data.ref;
-}, {
-  message: 'Both element and ref must be provided or neither.',
-  path: ['ref', 'element']
-});
-
-const screenshot = defineTool({
-  capability: 'core',
-  schema: {
-    name: 'browser_take_screenshot',
-    title: 'Take a screenshot',
-    description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
-    inputSchema: screenshotSchema,
-    type: 'readOnly',
-  },
-
-  handle: async (context, params) => {
-    const tab = context.currentTabOrDie();
-    const snapshot = tab.snapshotOrDie();
-    const fileType = params.raw ? 'png' : 'jpeg';
-    const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`);
-    const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
-    const isElementScreenshot = params.element && params.ref;
-
-    const code = [
-      `// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`,
-    ];
-
-    const locator = params.ref ? snapshot.refLocator(params.ref) : null;
-
-    if (locator)
-      code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
-    else
-      code.push(`await page.screenshot(${javascript.formatObject(options)});`);
-
-    const includeBase64 = !context.config.noImageResponses;
-    const action = async () => {
-      const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
-      return {
-        content: includeBase64 ? [{
-          type: 'image' as 'image',
-          data: screenshot.toString('base64'),
-          mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg',
-        }] : []
-      };
-    };
-
-    return {
-      code,
-      action,
-      captureSnapshot: true,
-      waitForNetwork: false,
-    };
-  }
-});
-
-export async function generateLocator(locator: playwright.Locator): Promise<string> {
-  return (locator as any)._generateLocatorString();
-}
-
 export default [
   snapshot,
   click,
@@ -291,5 +223,4 @@ export default [
   hover,
   type,
   selectOption,
-  screenshot,
 ];
diff --git a/src/tools/utils.ts b/src/tools/utils.ts
index 6fd16d4..4a0a2a4 100644
--- a/src/tools/utils.ts
+++ b/src/tools/utils.ts
@@ -77,3 +77,7 @@ export function sanitizeForFilePath(s: string) {
     return sanitize(s);
   return sanitize(s.substring(0, separator)) + '.' + sanitize(s.substring(separator + 1));
 }
+
+export async function generateLocator(locator: playwright.Locator): Promise<string> {
+  return (locator as any)._generateLocatorString();
+}
diff --git a/src/tools/screen.ts b/src/tools/vision.ts
similarity index 100%
rename from src/tools/screen.ts
rename to src/tools/vision.ts
diff --git a/src/tools/wait.ts b/src/tools/wait.ts
new file mode 100644
index 0000000..fc8be82
--- /dev/null
+++ b/src/tools/wait.ts
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { z } from 'zod';
+import { defineTool, type ToolFactory } from './tool.js';
+
+const wait: ToolFactory = captureSnapshot => defineTool({
+  capability: 'wait',
+
+  schema: {
+    name: 'browser_wait_for',
+    title: 'Wait for',
+    description: 'Wait for text to appear or disappear or a specified time to pass',
+    inputSchema: z.object({
+      time: z.number().optional().describe('The time to wait in seconds'),
+      text: z.string().optional().describe('The text to wait for'),
+      textGone: z.string().optional().describe('The text to wait for to disappear'),
+    }),
+    type: 'readOnly',
+  },
+
+  handle: async (context, params) => {
+    if (!params.text && !params.textGone && !params.time)
+      throw new Error('Either time, text or textGone must be provided');
+
+    const code: string[] = [];
+
+    if (params.time) {
+      code.push(`await new Promise(f => setTimeout(f, ${params.time!} * 1000));`);
+      await new Promise(f => setTimeout(f, Math.min(10000, params.time! * 1000)));
+    }
+
+    const tab = context.currentTabOrDie();
+    const locator = params.text ? tab.page.getByText(params.text).first() : undefined;
+    const goneLocator = params.textGone ? tab.page.getByText(params.textGone).first() : undefined;
+
+    if (goneLocator) {
+      code.push(`await page.getByText(${JSON.stringify(params.textGone)}).first().waitFor({ state: 'hidden' });`);
+      await goneLocator.waitFor({ state: 'hidden' });
+    }
+
+    if (locator) {
+      code.push(`await page.getByText(${JSON.stringify(params.text)}).first().waitFor({ state: 'visible' });`);
+      await locator.waitFor({ state: 'visible' });
+    }
+
+    return {
+      code,
+      captureSnapshot,
+      waitForNetwork: false,
+    };
+  },
+});
+
+export default (captureSnapshot: boolean) => [
+  wait(captureSnapshot),
+];
diff --git a/utils/update-readme.js b/utils/update-readme.js
index d0f6cea..fde7864 100644
--- a/utils/update-readme.js
+++ b/utils/update-readme.js
@@ -32,42 +32,46 @@ import networkTools from '../lib/tools/network.js';
 import pdfTools from '../lib/tools/pdf.js';
 import snapshotTools from '../lib/tools/snapshot.js';
 import tabsTools from '../lib/tools/tabs.js';
-import screenTools from '../lib/tools/screen.js';
+import screenshotTools from '../lib/tools/screenshot.js';
 import testTools from '../lib/tools/testing.js';
+import visionTools from '../lib/tools/vision.js';
+import waitTools from '../lib/tools/wait.js';
 
 // Category definitions for tools
 const categories = {
-  'Snapshot-based Interactions': [
+  'Interactions': [
     ...snapshotTools,
-  ],
-  'Vision-based Interactions': [
-    ...screenTools
-  ],
-  'Tab Management': [
-    ...tabsTools(true),
+    ...keyboardTools(true),
+    ...waitTools(true),
+    ...filesTools(true),
+    ...dialogsTools(true),
   ],
   'Navigation': [
     ...navigateTools(true),
   ],
-  'Keyboard': [
-    ...keyboardTools(true)
-  ],
-  'Console': [
-    ...consoleTools
-  ],
-  'Files and Media': [
-    ...filesTools(true),
-    ...pdfTools
+  'Resources': [
+    ...screenshotTools,
+    ...pdfTools,
+    ...networkTools,
+    ...consoleTools,
   ],
   'Utilities': [
-    ...commonTools(true),
     ...installTools,
-    ...dialogsTools(true),
-    ...networkTools,
+    ...commonTools(true),
+  ],
+  'Tabs': [
+    ...tabsTools(true),
   ],
   'Testing': [
     ...testTools,
   ],
+  'Vision mode': [
+    ...visionTools,
+    ...keyboardTools(),
+    ...waitTools(false),
+    ...filesTools(false),
+    ...dialogsTools(false),
+  ],
 };
 
 // NOTE: Can be removed when we drop Node.js 18 support and changed to import.meta.filename.
@@ -118,9 +122,12 @@ async function updateReadme() {
   const generatedLines = /** @type {string[]} */ ([]);
 
   for (const [category, categoryTools] of Object.entries(categories)) {
-    generatedLines.push(`### ${category}\n\n`);
+    generatedLines.push(`<details>\n<summary><b>${category}</b></summary>\n\n`);
+
     for (const tool of categoryTools)
       generatedLines.push(formatToolForReadme(tool.schema));
+
+    generatedLines.push(`</details>\n\n`);
   }
 
   const readmePath = path.join(path.dirname(__filename), '..', 'README.md');