diff --git a/README.md b/README.md index f177c28..3f2b70e 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,11 @@ The Playwright MCP provides a set of tools for browser automation. Here are all - `ref` (string): Exact target element reference from the page snapshot - `values` (array): Array of values to select in the dropdown. +- **browser_choose_file** + - Description: Choose one or multiple files to upload + - Parameters: + - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files. + - **browser_press_key** - Description: Press a key on the keyboard - Parameters: @@ -283,6 +288,11 @@ Vision Mode provides tools for visual-based interactions using screenshots. Here - Parameters: - `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a` +- **browser_choose_file** + - Description: Choose one or multiple files to upload + - Parameters: + - `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files. + - **browser_save_as_pdf** - Description: Save page as PDF - Parameters: None diff --git a/src/context.ts b/src/context.ts index a0658b1..bb4af93 100644 --- a/src/context.ts +++ b/src/context.ts @@ -23,6 +23,7 @@ export class Context { private _page: playwright.Page | undefined; private _console: playwright.ConsoleMessage[] = []; private _createPagePromise: Promise | undefined; + private _fileChooser: playwright.FileChooser | undefined; private _lastSnapshotFrames: playwright.FrameLocator[] = []; constructor(userDataDir: string, launchOptions?: playwright.LaunchOptions) { @@ -41,6 +42,7 @@ export class Context { this._console.length = 0; }); page.on('close', () => this._onPageClose()); + page.on('filechooser', chooser => this._fileChooser = chooser); page.setDefaultNavigationTimeout(60000); page.setDefaultTimeout(5000); this._page = page; @@ -58,6 +60,7 @@ export class Context { this._createPagePromise = undefined; this._browser = undefined; this._page = undefined; + this._fileChooser = undefined; this._console.length = 0; } @@ -77,6 +80,21 @@ export class Context { await this._page.close(); } + async submitFileChooser(paths: string[]) { + if (!this._fileChooser) + throw new Error('No file chooser visible'); + await this._fileChooser.setFiles(paths); + this._fileChooser = undefined; + } + + hasFileChooser() { + return !!this._fileChooser; + } + + clearFileChooser() { + this._fileChooser = undefined; + } + private async _createPage(): Promise<{ browser?: playwright.Browser, page: playwright.Page }> { if (process.env.PLAYWRIGHT_WS_ENDPOINT) { const url = new URL(process.env.PLAYWRIGHT_WS_ENDPOINT); diff --git a/src/index.ts b/src/index.ts index 3530459..ea71f12 100644 --- a/src/index.ts +++ b/src/index.ts @@ -36,6 +36,7 @@ const snapshotTools: Tool[] = [ common.navigate(true), common.goBack(true), common.goForward(true), + common.chooseFile(true), snapshot.snapshot, snapshot.click, snapshot.hover, @@ -49,6 +50,7 @@ const screenshotTools: Tool[] = [ common.navigate(false), common.goBack(false), common.goForward(false), + common.chooseFile(false), screenshot.screenshot, screenshot.moveMouse, screenshot.click, diff --git a/src/tools/common.ts b/src/tools/common.ts index 828cfb7..a04ddaf 100644 --- a/src/tools/common.ts +++ b/src/tools/common.ts @@ -156,3 +156,21 @@ export const close: Tool = { }; }, }; + +const chooseFileSchema = z.object({ + paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'), +}); + +export const chooseFile: ToolFactory = snapshot => ({ + schema: { + name: 'browser_choose_file', + description: 'Choose one or multiple files to upload', + inputSchema: zodToJsonSchema(chooseFileSchema), + }, + handle: async (context, params) => { + const validatedParams = chooseFileSchema.parse(params); + return await runAndWait(context, `Chose files ${validatedParams.paths.join(', ')}`, async () => { + await context.submitFileChooser(validatedParams.paths); + }, snapshot); + }, +}); diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 47ede9a..29b6dc9 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -73,23 +73,36 @@ async function waitForCompletion(page: playwright.Page, callback: () => Promi export async function runAndWait(context: Context, status: string, callback: (page: playwright.Page) => Promise, snapshot: boolean = false): Promise { const page = context.existingPage(); + const dismissFileChooser = context.hasFileChooser(); await waitForCompletion(page, () => callback(page)); - return snapshot ? captureAriaSnapshot(context, status) : { + if (dismissFileChooser) + context.clearFileChooser(); + const result: ToolResult = snapshot ? await captureAriaSnapshot(context, status) : { content: [{ type: 'text', text: status }], }; + return result; } export async function captureAriaSnapshot(context: Context, status: string = ''): Promise { const page = context.existingPage(); + const lines = []; + if (status) + lines.push(`${status}`); + lines.push( + '', + `- Page URL: ${page.url()}`, + `- Page Title: ${await page.title()}` + ); + if (context.hasFileChooser()) + lines.push(`- There is a file chooser visible that requires browser_choose_file to be called`); + lines.push( + `- Page Snapshot`, + '```yaml', + await context.allFramesSnapshot(), + '```', + '' + ); return { - content: [{ type: 'text', text: `${status ? `${status}\n` : ''} -- Page URL: ${page.url()} -- Page Title: ${await page.title()} -- Page Snapshot -\`\`\`yaml -${await context.allFramesSnapshot()} -\`\`\` -` - }], + content: [{ type: 'text', text: lines.join('\n') }], }; } diff --git a/tests/basic.spec.ts b/tests/basic.spec.ts index e12a9e0..d945611 100644 --- a/tests/basic.spec.ts +++ b/tests/basic.spec.ts @@ -14,6 +14,7 @@ * limitations under the License. */ +import fs from 'fs/promises'; import { spawn } from 'node:child_process'; import path from 'node:path'; import { test, expect } from './fixtures'; @@ -38,6 +39,9 @@ test('test tool list', async ({ server, visionServer }) => { expect.objectContaining({ name: 'browser_go_forward', }), + expect.objectContaining({ + name: 'browser_choose_file', + }), expect.objectContaining({ name: 'browser_snapshot', }), @@ -455,6 +459,83 @@ test('stitched aria frames', async ({ server }) => { })); }); +test('browser_choose_file', async ({ server }) => { + let response = await server.send({ + jsonrpc: '2.0', + id: 2, + method: 'tools/call', + params: { + name: 'browser_navigate', + arguments: { + url: 'data:text/html,Title', + }, + }, + }); + + expect(response.result.content[0].text).toContain('- textbox [ref=s1e4]'); + + response = await server.send({ + jsonrpc: '2.0', + id: 2, + method: 'tools/call', + params: { + name: 'browser_click', + arguments: { + element: 'Textbox', + ref: 's1e4', + }, + }, + }); + + expect(response.result.content[0].text).toContain('There is a file chooser visible that requires browser_choose_file to be called'); + + const filePath = test.info().outputPath('test.txt'); + await fs.writeFile(filePath, 'Hello, world!'); + response = await server.send({ + jsonrpc: '2.0', + id: 2, + method: 'tools/call', + params: { + name: 'browser_choose_file', + arguments: { + paths: [filePath], + }, + }, + }); + + expect(response.result.content[0].text).not.toContain('There is a file chooser visible that requires browser_choose_file to be called'); + expect(response.result.content[0].text).toContain('textbox [ref=s3e4]: C:\\fakepath\\test.txt'); + + response = await server.send({ + jsonrpc: '2.0', + id: 2, + method: 'tools/call', + params: { + name: 'browser_click', + arguments: { + element: 'Textbox', + ref: 's3e4', + }, + }, + }); + expect(response.result.content[0].text).toContain('There is a file chooser visible that requires browser_choose_file to be called'); + expect(response.result.content[0].text).toContain('button "Button" [ref=s4e5]'); + + response = await server.send({ + jsonrpc: '2.0', + id: 2, + method: 'tools/call', + params: { + name: 'browser_click', + arguments: { + element: 'Button', + ref: 's4e5', + }, + }, + }); + expect(response.result.content[0].text, 'not submitting browser_choose_file dismisses file chooser').not.toContain('There is a file chooser visible that requires browser_choose_file to be called'); +}); + test('sse transport', async () => { const cp = spawn('node', [path.join(__dirname, '../cli.js'), '--port', '0'], { stdio: 'pipe' }); try {