/** * Copyright (c) Microsoft Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import * as playwright from 'playwright'; import yaml from 'yaml'; import { waitForCompletion } from './tools/utils'; import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types'; import type { ModalState, Tool } from './tools/tool'; export type ContextOptions = { browserName?: 'chromium' | 'firefox' | 'webkit'; userDataDir: string; launchOptions?: playwright.LaunchOptions; cdpEndpoint?: string; remoteEndpoint?: string; }; type PageOrFrameLocator = playwright.Page | playwright.FrameLocator; export class Context { readonly tools: Tool[]; readonly options: ContextOptions; private _browser: playwright.Browser | undefined; private _browserContext: playwright.BrowserContext | undefined; private _tabs: Tab[] = []; private _currentTab: Tab | undefined; private _modalStates: (ModalState & { tab: Tab })[] = []; constructor(tools: Tool[], options: ContextOptions) { this.tools = tools; this.options = options; } modalStates(): ModalState[] { return this._modalStates; } setModalState(modalState: ModalState, inTab: Tab) { this._modalStates.push({ ...modalState, tab: inTab }); } clearModalState(modalState: ModalState) { this._modalStates = this._modalStates.filter(state => state !== modalState); } modalStatesMarkdown(): string[] { const result: string[] = ['### Modal state']; for (const state of this._modalStates) { const tool = this.tools.find(tool => tool.clearsModalState === state.type); result.push(`- [${state.description}]: can be handled by the "${tool?.schema.name}" tool`); } return result; } tabs(): Tab[] { return this._tabs; } currentTabOrDie(): Tab { if (!this._currentTab) throw new Error('No current snapshot available. Capture a snapshot of navigate to a new location first.'); return this._currentTab; } async newTab(): Promise { const browserContext = await this._ensureBrowserContext(); const page = await browserContext.newPage(); this._currentTab = this._tabs.find(t => t.page === page)!; return this._currentTab; } async selectTab(index: number) { this._currentTab = this._tabs[index - 1]; await this._currentTab.page.bringToFront(); } async ensureTab(): Promise { const context = await this._ensureBrowserContext(); if (!this._currentTab) await context.newPage(); return this._currentTab!; } async listTabsMarkdown(): Promise { if (!this._tabs.length) return '### No tabs open'; const lines: string[] = ['### Open tabs']; for (let i = 0; i < this._tabs.length; i++) { const tab = this._tabs[i]; const title = await tab.page.title(); const url = tab.page.url(); const current = tab === this._currentTab ? ' (current)' : ''; lines.push(`- ${i + 1}:${current} [${title}] (${url})`); } return lines.join('\n'); } async closeTab(index: number | undefined) { const tab = index === undefined ? this._currentTab : this._tabs[index - 1]; await tab?.page.close(); return await this.listTabsMarkdown(); } async run(tool: Tool, params: Record | undefined) { // Tab management is done outside of the action() call. const toolResult = await tool.handle(this, params); const { code, action, waitForNetwork, captureSnapshot } = toolResult; if (!this._currentTab) { return { content: [{ type: 'text', text: 'No open pages available. Use the "browser_navigate" tool to navigate to a page first.', }], }; } const tab = this.currentTabOrDie(); // TODO: race against modal dialogs to resolve clicks. let actionResult: { content?: (ImageContent | TextContent)[] }; try { if (waitForNetwork) actionResult = await waitForCompletion(tab.page, () => action()) ?? undefined; else actionResult = await action(); } finally { if (captureSnapshot) await tab.captureSnapshot(); } const result: string[] = []; result.push(`- Ran Playwright code: \`\`\`js ${code.join('\n')} \`\`\` `); if (this.modalStates().length) { result.push(...this.modalStatesMarkdown()); return { content: [{ type: 'text', text: result.join('\n'), }], }; } if (this.tabs().length > 1) result.push(await this.listTabsMarkdown(), ''); if (tab.hasSnapshot()) { if (this.tabs().length > 1) result.push('### Current tab'); result.push(tab.snapshotOrDie().text()); } const content = actionResult?.content ?? []; return { content: [ ...content, { type: 'text', text: result.join('\n'), } ], }; } private _onPageCreated(page: playwright.Page) { const tab = new Tab(this, page, tab => this._onPageClosed(tab)); this._tabs.push(tab); if (!this._currentTab) this._currentTab = tab; } private _onPageClosed(tab: Tab) { this._modalStates = this._modalStates.filter(state => state.tab !== tab); const index = this._tabs.indexOf(tab); if (index === -1) return; this._tabs.splice(index, 1); if (this._currentTab === tab) this._currentTab = this._tabs[Math.min(index, this._tabs.length - 1)]; if (this._browserContext && !this._tabs.length) void this.close(); } async close() { if (!this._browserContext) return; const browserContext = this._browserContext; const browser = this._browser; this._browserContext = undefined; this._browser = undefined; await browserContext?.close().then(async () => { await browser?.close(); }).catch(() => {}); } private async _ensureBrowserContext() { if (!this._browserContext) { const context = await this._createBrowserContext(); this._browser = context.browser; this._browserContext = context.browserContext; for (const page of this._browserContext.pages()) this._onPageCreated(page); this._browserContext.on('page', page => this._onPageCreated(page)); } return this._browserContext; } private async _createBrowserContext(): Promise<{ browser?: playwright.Browser, browserContext: playwright.BrowserContext }> { if (this.options.remoteEndpoint) { const url = new URL(this.options.remoteEndpoint); if (this.options.browserName) url.searchParams.set('browser', this.options.browserName); if (this.options.launchOptions) url.searchParams.set('launch-options', JSON.stringify(this.options.launchOptions)); const browser = await playwright[this.options.browserName ?? 'chromium'].connect(String(url)); const browserContext = await browser.newContext(); return { browser, browserContext }; } if (this.options.cdpEndpoint) { const browser = await playwright.chromium.connectOverCDP(this.options.cdpEndpoint); const browserContext = browser.contexts()[0]; return { browser, browserContext }; } const browserContext = await this._launchPersistentContext(); return { browserContext }; } private async _launchPersistentContext(): Promise { try { const browserType = this.options.browserName ? playwright[this.options.browserName] : playwright.chromium; return await browserType.launchPersistentContext(this.options.userDataDir, this.options.launchOptions); } catch (error: any) { if (error.message.includes('Executable doesn\'t exist')) throw new Error(`Browser specified in your config is not installed. Either install it (likely) or change the config.`); throw error; } } } export class Tab { readonly context: Context; readonly page: playwright.Page; private _console: playwright.ConsoleMessage[] = []; private _snapshot: PageSnapshot | undefined; private _onPageClose: (tab: Tab) => void; constructor(context: Context, page: playwright.Page, onPageClose: (tab: Tab) => void) { this.context = context; this.page = page; this._onPageClose = onPageClose; page.on('console', event => this._console.push(event)); page.on('framenavigated', frame => { if (!frame.parentFrame()) this._console.length = 0; }); page.on('close', () => this._onClose()); page.on('filechooser', chooser => { this.context.setModalState({ type: 'fileChooser', description: 'File chooser', fileChooser: chooser, }, this); }); page.setDefaultNavigationTimeout(60000); page.setDefaultTimeout(5000); } private _onClose() { this._console.length = 0; this._onPageClose(this); } async navigate(url: string) { await this.page.goto(url, { waitUntil: 'domcontentloaded' }); // Cap load event to 5 seconds, the page is operational at this point. await this.page.waitForLoadState('load', { timeout: 5000 }).catch(() => {}); } hasSnapshot(): boolean { return !!this._snapshot; } snapshotOrDie(): PageSnapshot { if (!this._snapshot) throw new Error('No snapshot available'); return this._snapshot; } async console(): Promise { return this._console; } async captureSnapshot() { this._snapshot = await PageSnapshot.create(this.page); } } class PageSnapshot { private _frameLocators: PageOrFrameLocator[] = []; private _text!: string; constructor() { } static async create(page: playwright.Page): Promise { const snapshot = new PageSnapshot(); await snapshot._build(page); return snapshot; } text(): string { return this._text; } private async _build(page: playwright.Page) { const yamlDocument = await this._snapshotFrame(page); const lines = []; lines.push( `- Page URL: ${page.url()}`, `- Page Title: ${await page.title()}` ); lines.push( `- Page Snapshot`, '```yaml', yamlDocument.toString().trim(), '```', '' ); this._text = lines.join('\n'); } private async _snapshotFrame(frame: playwright.Page | playwright.FrameLocator) { const frameIndex = this._frameLocators.push(frame) - 1; const snapshotString = await frame.locator('body').ariaSnapshot({ ref: true }); const snapshot = yaml.parseDocument(snapshotString); const visit = async (node: any): Promise => { if (yaml.isPair(node)) { await Promise.all([ visit(node.key).then(k => node.key = k), visit(node.value).then(v => node.value = v) ]); } else if (yaml.isSeq(node) || yaml.isMap(node)) { node.items = await Promise.all(node.items.map(visit)); } else if (yaml.isScalar(node)) { if (typeof node.value === 'string') { const value = node.value; if (frameIndex > 0) node.value = value.replace('[ref=', `[ref=f${frameIndex}`); if (value.startsWith('iframe ')) { const ref = value.match(/\[ref=(.*)\]/)?.[1]; if (ref) { try { const childSnapshot = await this._snapshotFrame(frame.frameLocator(`aria-ref=${ref}`)); return snapshot.createPair(node.value, childSnapshot); } catch (error) { return snapshot.createPair(node.value, ''); } } } } } return node; }; await visit(snapshot.contents); return snapshot; } refLocator(ref: string): playwright.Locator { let frame = this._frameLocators[0]; const match = ref.match(/^f(\d+)(.*)/); if (match) { const frameIndex = parseInt(match[1], 10); frame = this._frameLocators[frameIndex]; ref = match[2]; } if (!frame) throw new Error(`Frame does not exist. Provide ref from the most current snapshot.`); return frame.locator(`aria-ref=${ref}`); } } export async function generateLocator(locator: playwright.Locator): Promise { return (locator as any)._generateLocatorString(); }