diff --git a/src/connection.ts b/src/connection.ts index 1ee4c75..513c176 100644 --- a/src/connection.ts +++ b/src/connection.ts @@ -60,13 +60,6 @@ export function createConnection(config: FullConfig, browserContextFactory: Brow if (!tool) return errorResult(`Tool "${request.params.name}" not found`); - - const modalStates = context.modalStates().map(state => state.type); - if (tool.clearsModalState && !modalStates.includes(tool.clearsModalState)) - return errorResult(`The tool "${request.params.name}" can only be used when there is related modal state present.`, ...context.modalStatesMarkdown()); - if (!tool.clearsModalState && modalStates.length) - return errorResult(`Tool "${request.params.name}" does not handle the modal state.`, ...context.modalStatesMarkdown()); - try { return await context.run(tool, request.params.arguments); } catch (error) { diff --git a/src/context.ts b/src/context.ts index 3a19872..6fa4218 100644 --- a/src/context.ts +++ b/src/context.ts @@ -17,19 +17,12 @@ import debug from 'debug'; import * as playwright from 'playwright'; -import { callOnPageNoTrace, waitForCompletion } from './tools/utils.js'; -import { ManualPromise } from './manualPromise.js'; import { Tab } from './tab.js'; -import { outputFile } from './config.js'; -import type { ModalState, Tool, ToolActionResult } from './tools/tool.js'; +import type { Tool } from './tools/tool.js'; import type { FullConfig } from './config.js'; import type { BrowserContextFactory } from './browserContextFactory.js'; -type PendingAction = { - dialogShown: ManualPromise; -}; - const testDebug = debug('pw:mcp:test'); export class Context { @@ -39,9 +32,6 @@ export class Context { private _browserContextFactory: BrowserContextFactory; private _tabs: Tab[] = []; private _currentTab: Tab | undefined; - private _modalStates: (ModalState & { tab: Tab })[] = []; - private _pendingAction: PendingAction | undefined; - private _downloads: { download: playwright.Download, finished: boolean, outputFile: string }[] = []; clientVersion: { name: string; version: string; } | undefined; constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory) { @@ -51,42 +41,13 @@ export class Context { testDebug('create context'); } - clientSupportsImages(): boolean { - if (this.config.imageResponses === 'omit') - return false; - return true; - } - - modalStates(): ModalState[] { - return this._modalStates; - } - - setModalState(modalState: ModalState, inTab: Tab) { - this._modalStates.push({ ...modalState, tab: inTab }); - } - - clearModalState(modalState: ModalState) { - this._modalStates = this._modalStates.filter(state => state !== modalState); - } - - modalStatesMarkdown(): string[] { - const result: string[] = ['### Modal state']; - if (this._modalStates.length === 0) - result.push('- There is no modal state present'); - for (const state of this._modalStates) { - const tool = this.tools.find(tool => tool.clearsModalState === state.type); - result.push(`- [${state.description}]: can be handled by the "${tool?.schema.name}" tool`); - } - return result; - } - tabs(): Tab[] { return this._tabs; } currentTabOrDie(): Tab { if (!this._currentTab) - throw new Error('No current snapshot available. Capture a snapshot or navigate to a new location first.'); + throw new Error('No open pages available. Use the "browser_navigate" tool to navigate to a page first.'); return this._currentTab; } @@ -109,9 +70,9 @@ export class Context { return this._currentTab!; } - async listTabsMarkdown(): Promise { + async listTabsMarkdown(): Promise { if (!this._tabs.length) - return '### No tabs open'; + return ['### No tabs open']; const lines: string[] = ['### Open tabs']; for (let i = 0; i < this._tabs.length; i++) { const tab = this._tabs[i]; @@ -120,7 +81,7 @@ export class Context { const current = tab === this._currentTab ? ' (current)' : ''; lines.push(`- ${i}:${current} [${title}] (${url})`); } - return lines.join('\n'); + return lines; } async closeTab(index: number | undefined) { @@ -137,28 +98,8 @@ export class Context { if (resultOverride) return resultOverride; - if (!this._currentTab) { - return { - content: [{ - type: 'text', - text: 'No open pages available. Use the "browser_navigate" tool to navigate to a page first.', - }], - }; - } - const tab = this.currentTabOrDie(); - // TODO: race against modal dialogs to resolve clicks. - const actionResult = await this._raceAgainstModalDialogs(async () => { - try { - if (waitForNetwork) - return await waitForCompletion(this, tab, async () => action?.()) ?? undefined; - else - return await action?.() ?? undefined; - } finally { - if (captureSnapshot && !this._javaScriptBlocked()) - await tab.captureSnapshot(); - } - }); + const { actionResult, snapshot } = await tab.run(action || (() => Promise.resolve()), { waitForNetwork, captureSnapshot }); const result: string[] = []; result.push(`### Ran Playwright code @@ -166,8 +107,8 @@ export class Context { ${code.join('\n')} \`\`\``); - if (this.modalStates().length) { - result.push('', ...this.modalStatesMarkdown()); + if (tab.modalStates().length) { + result.push('', ...tab.modalStatesMarkdown()); return { content: [{ type: 'text', @@ -176,37 +117,13 @@ ${code.join('\n')} }; } - const messages = tab.takeRecentConsoleMessages(); - if (messages.length) { - result.push('', `### New console messages`); - for (const message of messages) - result.push(`- ${trim(message.toString(), 100)}`); - } + result.push(...tab.takeRecentConsoleMarkdown()); + result.push(...tab.listDownloadsMarkdown()); - if (this._downloads.length) { - result.push('', '### Downloads'); - for (const entry of this._downloads) { - if (entry.finished) - result.push(`- Downloaded file ${entry.download.suggestedFilename()} to ${entry.outputFile}`); - else - result.push(`- Downloading file ${entry.download.suggestedFilename()} ...`); - } - } - - if (captureSnapshot && tab.hasSnapshot()) { + if (snapshot) { if (this.tabs().length > 1) - result.push('', await this.listTabsMarkdown()); - - if (this.tabs().length > 1) - result.push('', '### Current tab'); - else - result.push('', '### Page state'); - - result.push( - `- Page URL: ${tab.page.url()}`, - `- Page Title: ${await tab.title()}` - ); - result.push(tab.snapshotOrDie().text()); + result.push('', ...(await this.listTabsMarkdown())); + result.push('', snapshot); } const content = actionResult?.content ?? []; @@ -222,58 +139,6 @@ ${code.join('\n')} }; } - async waitForTimeout(time: number) { - if (!this._currentTab || this._javaScriptBlocked()) { - await new Promise(f => setTimeout(f, time)); - return; - } - - await callOnPageNoTrace(this._currentTab.page, page => { - return page.evaluate(() => new Promise(f => setTimeout(f, 1000))); - }); - } - - private async _raceAgainstModalDialogs(action: () => Promise): Promise { - this._pendingAction = { - dialogShown: new ManualPromise(), - }; - - let result: ToolActionResult | undefined; - try { - await Promise.race([ - action().then(r => result = r), - this._pendingAction.dialogShown, - ]); - } finally { - this._pendingAction = undefined; - } - return result; - } - - private _javaScriptBlocked(): boolean { - return this._modalStates.some(state => state.type === 'dialog'); - } - - dialogShown(tab: Tab, dialog: playwright.Dialog) { - this.setModalState({ - type: 'dialog', - description: `"${dialog.type()}" dialog with message "${dialog.message()}"`, - dialog, - }, tab); - this._pendingAction?.dialogShown.resolve(); - } - - async downloadStarted(tab: Tab, download: playwright.Download) { - const entry = { - download, - finished: false, - outputFile: await outputFile(this.config, download.suggestedFilename()) - }; - this._downloads.push(entry); - await download.saveAs(entry.outputFile); - entry.finished = true; - } - private _onPageCreated(page: playwright.Page) { const tab = new Tab(this, page, tab => this._onPageClosed(tab)); this._tabs.push(tab); @@ -282,7 +147,6 @@ ${code.join('\n')} } private _onPageClosed(tab: Tab) { - this._modalStates = this._modalStates.filter(state => state.tab !== tab); const index = this._tabs.indexOf(tab); if (index === -1) return; @@ -353,9 +217,3 @@ ${code.join('\n')} return result; } } - -function trim(text: string, maxLength: number) { - if (text.length <= maxLength) - return text; - return text.slice(0, maxLength) + '...'; -} diff --git a/src/pageSnapshot.ts b/src/pageSnapshot.ts deleted file mode 100644 index 85f2587..0000000 --- a/src/pageSnapshot.ts +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright (c) Microsoft Corporation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import * as playwright from 'playwright'; -import { callOnPageNoTrace } from './tools/utils.js'; - -type PageEx = playwright.Page & { - _snapshotForAI: () => Promise; -}; - -export class PageSnapshot { - private _page: playwright.Page; - private _text!: string; - - constructor(page: playwright.Page) { - this._page = page; - } - - static async create(page: playwright.Page): Promise { - const snapshot = new PageSnapshot(page); - await snapshot._build(); - return snapshot; - } - - text(): string { - return this._text; - } - - private async _build() { - const snapshot = await callOnPageNoTrace(this._page, page => (page as PageEx)._snapshotForAI()); - this._text = [ - `- Page Snapshot:`, - '```yaml', - snapshot, - '```', - ].join('\n'); - } - - refLocator(params: { element: string, ref: string }): playwright.Locator { - return this._page.locator(`aria-ref=${params.ref}`).describe(params.element); - } -} diff --git a/src/tab.ts b/src/tab.ts index 91746bd..8f6ea23 100644 --- a/src/tab.ts +++ b/src/tab.ts @@ -16,20 +16,33 @@ import * as playwright from 'playwright'; -import { PageSnapshot } from './pageSnapshot.js'; -import { callOnPageNoTrace } from './tools/utils.js'; +import { callOnPageNoTrace, waitForCompletion } from './tools/utils.js'; import { logUnhandledError } from './log.js'; +import { ManualPromise } from './manualPromise.js'; +import { ModalState } from './tools/tool.js'; +import { outputFile } from './config.js'; import type { Context } from './context.js'; +import type { ToolActionResult } from './tools/tool.js'; + +type PageEx = playwright.Page & { + _snapshotForAI: () => Promise; +}; + +type PendingAction = { + dialogShown: ManualPromise; +}; export class Tab { readonly context: Context; readonly page: playwright.Page; private _consoleMessages: ConsoleMessage[] = []; private _recentConsoleMessages: ConsoleMessage[] = []; + private _pendingAction: PendingAction | undefined; private _requests: Map = new Map(); - private _snapshot: PageSnapshot | undefined; private _onPageClose: (tab: Tab) => void; + private _modalStates: ModalState[] = []; + private _downloads: { download: playwright.Download, finished: boolean, outputFile: string }[] = []; constructor(context: Context, page: playwright.Page, onPageClose: (tab: Tab) => void) { this.context = context; @@ -41,20 +54,63 @@ export class Tab { page.on('response', response => this._requests.set(response.request(), response)); page.on('close', () => this._onClose()); page.on('filechooser', chooser => { - this.context.setModalState({ + this.setModalState({ type: 'fileChooser', description: 'File chooser', fileChooser: chooser, - }, this); + }); }); - page.on('dialog', dialog => this.context.dialogShown(this, dialog)); + page.on('dialog', dialog => this._dialogShown(dialog)); page.on('download', download => { - void this.context.downloadStarted(this, download); + void this._downloadStarted(download); }); page.setDefaultNavigationTimeout(60000); page.setDefaultTimeout(5000); } + modalStates(): ModalState[] { + return this._modalStates; + } + + setModalState(modalState: ModalState) { + this._modalStates.push(modalState); + } + + clearModalState(modalState: ModalState) { + this._modalStates = this._modalStates.filter(state => state !== modalState); + } + + modalStatesMarkdown(): string[] { + const result: string[] = ['### Modal state']; + if (this._modalStates.length === 0) + result.push('- There is no modal state present'); + for (const state of this._modalStates) { + const tool = this.context.tools.find(tool => tool.clearsModalState === state.type); + result.push(`- [${state.description}]: can be handled by the "${tool?.schema.name}" tool`); + } + return result; + } + + private _dialogShown(dialog: playwright.Dialog) { + this.setModalState({ + type: 'dialog', + description: `"${dialog.type()}" dialog with message "${dialog.message()}"`, + dialog, + }); + this._pendingAction?.dialogShown.resolve(); + } + + private async _downloadStarted(download: playwright.Download) { + const entry = { + download, + finished: false, + outputFile: await outputFile(this.context.config, download.suggestedFilename()) + }; + this._downloads.push(entry); + await download.saveAs(entry.outputFile); + entry.finished = true; + } + private _clearCollectedArtifacts() { this._consoleMessages.length = 0; this._recentConsoleMessages.length = 0; @@ -105,16 +161,6 @@ export class Tab { await this.waitForLoadState('load', { timeout: 5000 }); } - hasSnapshot(): boolean { - return !!this._snapshot; - } - - snapshotOrDie(): PageSnapshot { - if (!this._snapshot) - throw new Error('No snapshot available'); - return this._snapshot; - } - consoleMessages(): ConsoleMessage[] { return this._consoleMessages; } @@ -123,15 +169,102 @@ export class Tab { return this._requests; } - async captureSnapshot() { - this._snapshot = await PageSnapshot.create(this.page); + takeRecentConsoleMarkdown(): string[] { + if (!this._recentConsoleMessages.length) + return []; + const result = this._recentConsoleMessages.map(message => { + return `- ${trim(message.toString(), 100)}`; + }); + return ['', `### New console messages`, ...result]; } - takeRecentConsoleMessages(): ConsoleMessage[] { - const result = this._recentConsoleMessages.slice(); - this._recentConsoleMessages.length = 0; + listDownloadsMarkdown(): string[] { + if (!this._downloads.length) + return []; + + const result: string[] = ['', '### Downloads']; + for (const entry of this._downloads) { + if (entry.finished) + result.push(`- Downloaded file ${entry.download.suggestedFilename()} to ${entry.outputFile}`); + else + result.push(`- Downloading file ${entry.download.suggestedFilename()} ...`); + } return result; } + + async captureSnapshot(): Promise { + const snapshot = await (this.page as PageEx)._snapshotForAI(); + return [ + `### Page state`, + `- Page URL: ${this.page.url()}`, + `- Page Title: ${await this.page.title()}`, + `- Page Snapshot:`, + '```yaml', + snapshot, + '```', + ].join('\n'); + } + + private _javaScriptBlocked(): boolean { + return this._modalStates.some(state => state.type === 'dialog'); + } + + private async _raceAgainstModalDialogs(action: () => Promise): Promise { + this._pendingAction = { + dialogShown: new ManualPromise(), + }; + + let result: R | undefined; + try { + await Promise.race([ + action().then(r => result = r), + this._pendingAction.dialogShown, + ]); + } finally { + this._pendingAction = undefined; + } + return result; + } + + async run(callback: () => Promise, options: { waitForNetwork?: boolean, captureSnapshot?: boolean }): Promise<{ actionResult: ToolActionResult | undefined, snapshot: string | undefined }> { + let snapshot: string | undefined; + const actionResult = await this._raceAgainstModalDialogs(async () => { + try { + if (options.waitForNetwork) + return await waitForCompletion(this, async () => callback?.()) ?? undefined; + else + return await callback?.() ?? undefined; + } finally { + if (options.captureSnapshot && !this._javaScriptBlocked()) + snapshot = await this.captureSnapshot(); + } + }); + return { actionResult, snapshot }; + } + + async refLocator(params: { element: string, ref: string }): Promise { + return (await this.refLocators([params]))[0]; + } + + async refLocators(params: { element: string, ref: string }[]): Promise { + const snapshot = await this.captureSnapshot(); + return params.map(param => { + if (!snapshot.includes(`[ref=${param.ref}]`)) + throw new Error(`Ref ${param.ref} not found in the current page snapshot. Try capturing new snapshot.`); + return this.page.locator(`aria-ref=${param.ref}`).describe(param.element); + }); + } + + async waitForTimeout(time: number) { + if (this._javaScriptBlocked()) { + await new Promise(f => setTimeout(f, time)); + return; + } + + await callOnPageNoTrace(this.page, page => { + return page.evaluate(() => new Promise(f => setTimeout(f, 1000))); + }); + } } export type ConsoleMessage = { @@ -162,3 +295,9 @@ function pageErrorToConsoleMessage(errorOrValue: Error | any): ConsoleMessage { toString: () => String(errorOrValue), }; } + +function trim(text: string, maxLength: number) { + if (text.length <= maxLength) + return text; + return text.slice(0, maxLength) + '...'; +} diff --git a/src/tools/common.ts b/src/tools/common.ts index 5a8e064..614ae3b 100644 --- a/src/tools/common.ts +++ b/src/tools/common.ts @@ -15,7 +15,7 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool, defineTool } from './tool.js'; const close = defineTool({ capability: 'core', @@ -38,7 +38,7 @@ const close = defineTool({ }, }); -const resize = defineTool({ +const resize = defineTabTool({ capability: 'core', schema: { name: 'browser_resize', @@ -51,9 +51,7 @@ const resize = defineTool({ type: 'readOnly', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); - + handle: async (tab, params) => { const code = [ `// Resize browser window to ${params.width}x${params.height}`, `await page.setViewportSize({ width: ${params.width}, height: ${params.height} });` diff --git a/src/tools/console.ts b/src/tools/console.ts index 704fa05..02f9aa7 100644 --- a/src/tools/console.ts +++ b/src/tools/console.ts @@ -15,9 +15,9 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; -const console = defineTool({ +const console = defineTabTool({ capability: 'core', schema: { name: 'browser_console_messages', @@ -26,8 +26,8 @@ const console = defineTool({ inputSchema: z.object({}), type: 'readOnly', }, - handle: async context => { - const messages = context.currentTabOrDie().consoleMessages(); + handle: async tab => { + const messages = tab.consoleMessages(); const log = messages.map(message => message.toString()).join('\n'); return { code: [`// `], diff --git a/src/tools/dialogs.ts b/src/tools/dialogs.ts index 5eaf905..4527a47 100644 --- a/src/tools/dialogs.ts +++ b/src/tools/dialogs.ts @@ -15,9 +15,9 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; -const handleDialog = defineTool({ +const handleDialog = defineTabTool({ capability: 'core', schema: { @@ -31,8 +31,8 @@ const handleDialog = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const dialogState = context.modalStates().find(state => state.type === 'dialog'); + handle: async (tab, params) => { + const dialogState = tab.modalStates().find(state => state.type === 'dialog'); if (!dialogState) throw new Error('No dialog visible'); @@ -41,7 +41,7 @@ const handleDialog = defineTool({ else await dialogState.dialog.dismiss(); - context.clearModalState(dialogState); + tab.clearModalState(dialogState); const code = [ `// `, diff --git a/src/tools/evaluate.ts b/src/tools/evaluate.ts index 73820e5..7097cb8 100644 --- a/src/tools/evaluate.ts +++ b/src/tools/evaluate.ts @@ -16,7 +16,7 @@ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; import * as javascript from '../javascript.js'; import { generateLocator } from './utils.js'; @@ -28,7 +28,7 @@ const evaluateSchema = z.object({ ref: z.string().optional().describe('Exact target element reference from the page snapshot'), }); -const evaluate = defineTool({ +const evaluate = defineTabTool({ capability: 'core', schema: { name: 'browser_evaluate', @@ -38,14 +38,12 @@ const evaluate = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); + handle: async (tab, params) => { const code: string[] = []; let locator: playwright.Locator | undefined; if (params.ref && params.element) { - const snapshot = tab.snapshotOrDie(); - locator = snapshot.refLocator({ ref: params.ref, element: params.element }); + locator = await tab.refLocator({ ref: params.ref, element: params.element }); code.push(`await page.${await generateLocator(locator)}.evaluate(${javascript.quote(params.function)});`); } else { code.push(`await page.evaluate(${javascript.quote(params.function)});`); diff --git a/src/tools/files.ts b/src/tools/files.ts index a396cf7..f1f0bdb 100644 --- a/src/tools/files.ts +++ b/src/tools/files.ts @@ -15,9 +15,9 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; -const uploadFile = defineTool({ +const uploadFile = defineTabTool({ capability: 'core', schema: { @@ -30,8 +30,8 @@ const uploadFile = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const modalState = context.modalStates().find(state => state.type === 'fileChooser'); + handle: async (tab, params) => { + const modalState = tab.modalStates().find(state => state.type === 'fileChooser'); if (!modalState) throw new Error('No file chooser visible'); @@ -41,7 +41,7 @@ const uploadFile = defineTool({ const action = async () => { await modalState.fileChooser.setFiles(params.paths); - context.clearModalState(modalState); + tab.clearModalState(modalState); }; return { diff --git a/src/tools/keyboard.ts b/src/tools/keyboard.ts index 1687ddd..ffb20cc 100644 --- a/src/tools/keyboard.ts +++ b/src/tools/keyboard.ts @@ -16,12 +16,12 @@ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; import { elementSchema } from './snapshot.js'; import { generateLocator } from './utils.js'; import * as javascript from '../javascript.js'; -const pressKey = defineTool({ +const pressKey = defineTabTool({ capability: 'core', schema: { @@ -34,9 +34,7 @@ const pressKey = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); - + handle: async (tab, params) => { const code = [ `// Press ${params.key}`, `await page.keyboard.press('${params.key}');`, @@ -59,7 +57,7 @@ const typeSchema = elementSchema.extend({ slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'), }); -const type = defineTool({ +const type = defineTabTool({ capability: 'core', schema: { name: 'browser_type', @@ -69,9 +67,8 @@ const type = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const snapshot = context.currentTabOrDie().snapshotOrDie(); - const locator = snapshot.refLocator(params); + handle: async (tab, params) => { + const locator = await tab.refLocator(params); const code: string[] = []; const steps: (() => Promise)[] = []; diff --git a/src/tools/mouse.ts b/src/tools/mouse.ts index 9171eb7..8015484 100644 --- a/src/tools/mouse.ts +++ b/src/tools/mouse.ts @@ -15,13 +15,13 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; const elementSchema = z.object({ element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'), }); -const mouseMove = defineTool({ +const mouseMove = defineTabTool({ capability: 'vision', schema: { name: 'browser_mouse_move_xy', @@ -34,8 +34,7 @@ const mouseMove = defineTool({ type: 'readOnly', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); + handle: async (tab, params) => { const code = [ `// Move mouse to (${params.x}, ${params.y})`, `await page.mouse.move(${params.x}, ${params.y});`, @@ -50,7 +49,7 @@ const mouseMove = defineTool({ }, }); -const mouseClick = defineTool({ +const mouseClick = defineTabTool({ capability: 'vision', schema: { name: 'browser_mouse_click_xy', @@ -63,8 +62,7 @@ const mouseClick = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); + handle: async (tab, params) => { const code = [ `// Click mouse at coordinates (${params.x}, ${params.y})`, `await page.mouse.move(${params.x}, ${params.y});`, @@ -85,7 +83,7 @@ const mouseClick = defineTool({ }, }); -const mouseDrag = defineTool({ +const mouseDrag = defineTabTool({ capability: 'vision', schema: { name: 'browser_mouse_drag_xy', @@ -100,9 +98,7 @@ const mouseDrag = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); - + handle: async (tab, params) => { const code = [ `// Drag mouse from (${params.startX}, ${params.startY}) to (${params.endX}, ${params.endY})`, `await page.mouse.move(${params.startX}, ${params.startY});`, diff --git a/src/tools/navigate.ts b/src/tools/navigate.ts index a210a13..581550f 100644 --- a/src/tools/navigate.ts +++ b/src/tools/navigate.ts @@ -15,7 +15,7 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTool, defineTabTool } from './tool.js'; const navigate = defineTool({ capability: 'core', @@ -47,7 +47,7 @@ const navigate = defineTool({ }, }); -const goBack = defineTool({ +const goBack = defineTabTool({ capability: 'core', schema: { name: 'browser_navigate_back', @@ -57,8 +57,7 @@ const goBack = defineTool({ type: 'readOnly', }, - handle: async context => { - const tab = await context.ensureTab(); + handle: async tab => { await tab.page.goBack(); const code = [ `// Navigate back`, @@ -73,7 +72,7 @@ const goBack = defineTool({ }, }); -const goForward = defineTool({ +const goForward = defineTabTool({ capability: 'core', schema: { name: 'browser_navigate_forward', @@ -82,8 +81,7 @@ const goForward = defineTool({ inputSchema: z.object({}), type: 'readOnly', }, - handle: async context => { - const tab = context.currentTabOrDie(); + handle: async tab => { await tab.page.goForward(); const code = [ `// Navigate forward`, diff --git a/src/tools/network.ts b/src/tools/network.ts index 9e1946c..1874305 100644 --- a/src/tools/network.ts +++ b/src/tools/network.ts @@ -15,11 +15,11 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; import type * as playwright from 'playwright'; -const requests = defineTool({ +const requests = defineTabTool({ capability: 'core', schema: { @@ -30,8 +30,8 @@ const requests = defineTool({ type: 'readOnly', }, - handle: async context => { - const requests = context.currentTabOrDie().requests(); + handle: async tab => { + const requests = tab.requests(); const log = [...requests.entries()].map(([request, response]) => renderRequest(request, response)).join('\n'); return { code: [`// `], diff --git a/src/tools/pdf.ts b/src/tools/pdf.ts index c020f03..0d40a69 100644 --- a/src/tools/pdf.ts +++ b/src/tools/pdf.ts @@ -15,7 +15,7 @@ */ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; import * as javascript from '../javascript.js'; import { outputFile } from '../config.js'; @@ -24,7 +24,7 @@ const pdfSchema = z.object({ filename: z.string().optional().describe('File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.'), }); -const pdf = defineTool({ +const pdf = defineTabTool({ capability: 'pdf', schema: { @@ -35,9 +35,8 @@ const pdf = defineTool({ type: 'readOnly', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); - const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.pdf`); + handle: async (tab, params) => { + const fileName = await outputFile(tab.context.config, params.filename ?? `page-${new Date().toISOString()}.pdf`); const code = [ `// Save page as ${fileName}`, diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 9544737..317c8c9 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -16,7 +16,7 @@ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool } from './tool.js'; import * as javascript from '../javascript.js'; import { outputFile } from '../config.js'; import { generateLocator } from './utils.js'; @@ -41,7 +41,7 @@ const screenshotSchema = z.object({ path: ['fullPage'] }); -const screenshot = defineTool({ +const screenshot = defineTabTool({ capability: 'core', schema: { name: 'browser_take_screenshot', @@ -51,10 +51,9 @@ const screenshot = defineTool({ type: 'readOnly', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); + handle: async (tab, params) => { const fileType = params.raw ? 'png' : 'jpeg'; - const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`); + const fileName = await outputFile(tab.context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`); const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, @@ -70,14 +69,14 @@ const screenshot = defineTool({ ]; // Only get snapshot when element screenshot is needed - const locator = params.ref ? tab.snapshotOrDie().refLocator({ element: params.element || '', ref: params.ref }) : null; + const locator = params.ref ? await tab.refLocator({ element: params.element || '', ref: params.ref }) : null; if (locator) code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`); else code.push(`await page.screenshot(${javascript.formatObject(options)});`); - const includeBase64 = context.clientSupportsImages(); + const includeBase64 = tab.context.config.imageResponses !== 'omit'; const action = async () => { const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options); return { diff --git a/src/tools/snapshot.ts b/src/tools/snapshot.ts index 1cca749..2534b9b 100644 --- a/src/tools/snapshot.ts +++ b/src/tools/snapshot.ts @@ -16,7 +16,7 @@ import { z } from 'zod'; -import { defineTool } from './tool.js'; +import { defineTabTool, defineTool } from './tool.js'; import * as javascript from '../javascript.js'; import { generateLocator } from './utils.js'; @@ -51,7 +51,7 @@ const clickSchema = elementSchema.extend({ button: z.enum(['left', 'right', 'middle']).optional().describe('Button to click, defaults to left'), }); -const click = defineTool({ +const click = defineTabTool({ capability: 'core', schema: { name: 'browser_click', @@ -61,9 +61,8 @@ const click = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const tab = context.currentTabOrDie(); - const locator = tab.snapshotOrDie().refLocator(params); + handle: async (tab, params) => { + const locator = await tab.refLocator(params); const button = params.button; const buttonAttr = button ? `{ button: '${button}' }` : ''; @@ -85,7 +84,7 @@ const click = defineTool({ }, }); -const drag = defineTool({ +const drag = defineTabTool({ capability: 'core', schema: { name: 'browser_drag', @@ -100,10 +99,11 @@ const drag = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const snapshot = context.currentTabOrDie().snapshotOrDie(); - const startLocator = snapshot.refLocator({ ref: params.startRef, element: params.startElement }); - const endLocator = snapshot.refLocator({ ref: params.endRef, element: params.endElement }); + handle: async (tab, params) => { + const [startLocator, endLocator] = await tab.refLocators([ + { ref: params.startRef, element: params.startElement }, + { ref: params.endRef, element: params.endElement }, + ]); const code = [ `// Drag ${params.startElement} to ${params.endElement}`, @@ -119,7 +119,7 @@ const drag = defineTool({ }, }); -const hover = defineTool({ +const hover = defineTabTool({ capability: 'core', schema: { name: 'browser_hover', @@ -129,9 +129,8 @@ const hover = defineTool({ type: 'readOnly', }, - handle: async (context, params) => { - const snapshot = context.currentTabOrDie().snapshotOrDie(); - const locator = snapshot.refLocator(params); + handle: async (tab, params) => { + const locator = await tab.refLocator(params); const code = [ `// Hover over ${params.element}`, @@ -151,7 +150,7 @@ const selectOptionSchema = elementSchema.extend({ values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'), }); -const selectOption = defineTool({ +const selectOption = defineTabTool({ capability: 'core', schema: { name: 'browser_select_option', @@ -161,9 +160,8 @@ const selectOption = defineTool({ type: 'destructive', }, - handle: async (context, params) => { - const snapshot = context.currentTabOrDie().snapshotOrDie(); - const locator = snapshot.refLocator(params); + handle: async (tab, params) => { + const locator = await tab.refLocator(params); const code = [ `// Select options [${params.values.join(', ')}] in ${params.element}`, diff --git a/src/tools/tabs.ts b/src/tools/tabs.ts index 5256fee..d0659b3 100644 --- a/src/tools/tabs.ts +++ b/src/tools/tabs.ts @@ -37,7 +37,7 @@ const listTabs = defineTool({ resultOverride: { content: [{ type: 'text', - text: await context.listTabsMarkdown(), + text: (await context.listTabsMarkdown()).join('\n'), }], }, }; @@ -85,9 +85,9 @@ const newTab = defineTool({ }, handle: async (context, params) => { - await context.newTab(); + const tab = await context.newTab(); if (params.url) - await context.currentTabOrDie().navigate(params.url); + await tab.navigate(params.url); const code = [ `// `, diff --git a/src/tools/tool.ts b/src/tools/tool.ts index 2f9f5d0..628df7f 100644 --- a/src/tools/tool.ts +++ b/src/tools/tool.ts @@ -19,6 +19,7 @@ import type { z } from 'zod'; import type { Context } from '../context.js'; import type * as playwright from 'playwright'; import type { ToolCapability } from '../../config.js'; +import type { Tab } from '../tab.js'; export type ToolSchema = { name: string; @@ -64,3 +65,25 @@ export type Tool = { export function defineTool(tool: Tool): Tool { return tool; } + +export type TabTool = { + capability: ToolCapability; + schema: ToolSchema; + clearsModalState?: ModalState['type']; + handle: (tab: Tab, params: z.output) => Promise; +}; + +export function defineTabTool(tool: TabTool): Tool { + return { + ...tool, + handle: async (context, params) => { + const tab = context.currentTabOrDie(); + const modalStates = tab.modalStates().map(state => state.type); + if (tool.clearsModalState && !modalStates.includes(tool.clearsModalState)) + throw new Error(`The tool "${tool.schema.name}" can only be used when there is related modal state present.\n` + tab.modalStatesMarkdown().join('\n')); + if (!tool.clearsModalState && modalStates.length) + throw new Error(`Tool "${tool.schema.name}" does not handle the modal state.\n` + tab.modalStatesMarkdown().join('\n')); + return tool.handle(tab, params); + }, + }; +} diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 68929c5..dfacef2 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -18,10 +18,9 @@ import { asLocator } from 'playwright-core/lib/utils'; import type * as playwright from 'playwright'; -import type { Context } from '../context.js'; import type { Tab } from '../tab.js'; -export async function waitForCompletion(context: Context, tab: Tab, callback: () => Promise): Promise { +export async function waitForCompletion(tab: Tab, callback: () => Promise): Promise { const requests = new Set(); let frameNavigated = false; let waitCallback: () => void = () => {}; @@ -65,7 +64,7 @@ export async function waitForCompletion(context: Context, tab: Tab, callback: if (!requests.size && !frameNavigated) waitCallback(); await waitBarrier; - await context.waitForTimeout(1000); + await tab.waitForTimeout(1000); return result; } finally { dispose(); diff --git a/tests/cdp.spec.ts b/tests/cdp.spec.ts index 57ce17a..d64aebb 100644 --- a/tests/cdp.spec.ts +++ b/tests/cdp.spec.ts @@ -41,7 +41,7 @@ test('cdp server reuse tab', async ({ cdpServer, startClient, server }) => { element: 'Hello, world!', ref: 'f0', }, - })).toHaveTextContent(`Error: No current snapshot available. Capture a snapshot or navigate to a new location first.`); + })).toHaveTextContent(`Error: No open pages available. Use the \"browser_navigate\" tool to navigate to a page first.`); expect(await client.callTool({ name: 'browser_snapshot', diff --git a/tests/core.spec.ts b/tests/core.spec.ts index 34b398b..abc6c77 100644 --- a/tests/core.spec.ts +++ b/tests/core.spec.ts @@ -242,7 +242,7 @@ test('old locator error message', async ({ client, server }) => { element: 'Button 2', ref: 'e3', }, - })).toContainTextContent('Ref not found'); + })).toContainTextContent('Ref e3 not found in the current page snapshot. Try capturing new snapshot.'); }); test('visibility: hidden > visible should be shown', { annotation: { type: 'issue', description: 'https://github.com/microsoft/playwright-mcp/issues/535' } }, async ({ client, server }) => { diff --git a/tests/files.spec.ts b/tests/files.spec.ts index 600d392..63f7d9d 100644 --- a/tests/files.spec.ts +++ b/tests/files.spec.ts @@ -38,7 +38,7 @@ test('browser_file_upload', async ({ client, server }, testInfo) => { name: 'browser_file_upload', arguments: { paths: [] }, })).toHaveTextContent(` -The tool "browser_file_upload" can only be used when there is related modal state present. +Error: The tool "browser_file_upload" can only be used when there is related modal state present. ### Modal state - There is no modal state present `.trim()); @@ -88,7 +88,7 @@ The tool "browser_file_upload" can only be used when there is related modal stat }, }); - expect(response).toContainTextContent(`Tool "browser_click" does not handle the modal state. + expect(response).toContainTextContent(`Error: Tool "browser_click" does not handle the modal state. ### Modal state - [File chooser]: can be handled by the "browser_file_upload" tool`); } diff --git a/tests/tabs.spec.ts b/tests/tabs.spec.ts index 174cef4..29394fb 100644 --- a/tests/tabs.spec.ts +++ b/tests/tabs.spec.ts @@ -49,7 +49,7 @@ test('create new tab', async ({ client }) => { - 0: [] (about:blank) - 1: (current) [Tab one] (data:text/html,Tab oneBody one) -### Current tab +### Page state - Page URL: data:text/html,Tab oneBody one - Page Title: Tab one - Page Snapshot: @@ -63,7 +63,7 @@ test('create new tab', async ({ client }) => { - 1: [Tab one] (data:text/html,Tab oneBody one) - 2: (current) [Tab two] (data:text/html,Tab twoBody two) -### Current tab +### Page state - Page URL: data:text/html,Tab twoBody two - Page Title: Tab two - Page Snapshot: @@ -75,23 +75,21 @@ test('create new tab', async ({ client }) => { test('select tab', async ({ client }) => { await createTab(client, 'Tab one', 'Body one'); await createTab(client, 'Tab two', 'Body two'); - expect(await client.callTool({ + + const result = await client.callTool({ name: 'browser_tab_select', arguments: { index: 1, }, - })).toHaveTextContent(` -### Ran Playwright code -\`\`\`js -// -\`\`\` - + }); + expect(result).toContainTextContent(` ### Open tabs - 0: [] (about:blank) - 1: (current) [Tab one] (data:text/html,Tab oneBody one) -- 2: [Tab two] (data:text/html,Tab twoBody two) +- 2: [Tab two] (data:text/html,Tab twoBody two)`); -### Current tab + expect(result).toContainTextContent(` +### Page state - Page URL: data:text/html,Tab oneBody one - Page Title: Tab one - Page Snapshot: @@ -103,22 +101,20 @@ test('select tab', async ({ client }) => { test('close tab', async ({ client }) => { await createTab(client, 'Tab one', 'Body one'); await createTab(client, 'Tab two', 'Body two'); - expect(await client.callTool({ + + const result = await client.callTool({ name: 'browser_tab_close', arguments: { index: 2, }, - })).toHaveTextContent(` -### Ran Playwright code -\`\`\`js -// -\`\`\` - + }); + expect(result).toContainTextContent(` ### Open tabs - 0: [] (about:blank) -- 1: (current) [Tab one] (data:text/html,Tab oneBody one) +- 1: (current) [Tab one] (data:text/html,Tab oneBody one)`); -### Current tab + expect(result).toContainTextContent(` +### Page state - Page URL: data:text/html,Tab oneBody one - Page Title: Tab one - Page Snapshot: