mirror of
https://github.com/microsoft/playwright-mcp.git
synced 2025-07-26 08:32:26 +08:00
feat: browser_choose_file (#52)
Resolves https://github.com/microsoft/playwright-mcp/issues/31. Prompt used for testing: ``` upload test.txt to dropbox ``` This won't work for asynchronously opened filechoosers, but let's start with the synchronous variant. I also tested with including the file chooser open state in the snapshot, but that didn't give better results. I also tested with OneDrive and WeTransfer, but somehow our ARIA snapshots for those pages are missing some elements that are crucial to locate the upload buttons.
This commit is contained in:
parent
f033213618
commit
5e200405e5
10
README.md
10
README.md
@ -203,6 +203,11 @@ The Playwright MCP provides a set of tools for browser automation. Here are all
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- `values` (array): Array of values to select in the dropdown.
|
||||
|
||||
- **browser_choose_file**
|
||||
- Description: Choose one or multiple files to upload
|
||||
- Parameters:
|
||||
- `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
|
||||
|
||||
- **browser_press_key**
|
||||
- Description: Press a key on the keyboard
|
||||
- Parameters:
|
||||
@ -283,6 +288,11 @@ Vision Mode provides tools for visual-based interactions using screenshots. Here
|
||||
- Parameters:
|
||||
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||
|
||||
- **browser_choose_file**
|
||||
- Description: Choose one or multiple files to upload
|
||||
- Parameters:
|
||||
- `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
|
||||
|
||||
- **browser_save_as_pdf**
|
||||
- Description: Save page as PDF
|
||||
- Parameters: None
|
||||
|
@ -23,6 +23,7 @@ export class Context {
|
||||
private _page: playwright.Page | undefined;
|
||||
private _console: playwright.ConsoleMessage[] = [];
|
||||
private _createPagePromise: Promise<playwright.Page> | undefined;
|
||||
private _fileChooser: playwright.FileChooser | undefined;
|
||||
private _lastSnapshotFrames: playwright.FrameLocator[] = [];
|
||||
|
||||
constructor(userDataDir: string, launchOptions?: playwright.LaunchOptions) {
|
||||
@ -41,6 +42,7 @@ export class Context {
|
||||
this._console.length = 0;
|
||||
});
|
||||
page.on('close', () => this._onPageClose());
|
||||
page.on('filechooser', chooser => this._fileChooser = chooser);
|
||||
page.setDefaultNavigationTimeout(60000);
|
||||
page.setDefaultTimeout(5000);
|
||||
this._page = page;
|
||||
@ -58,6 +60,7 @@ export class Context {
|
||||
this._createPagePromise = undefined;
|
||||
this._browser = undefined;
|
||||
this._page = undefined;
|
||||
this._fileChooser = undefined;
|
||||
this._console.length = 0;
|
||||
}
|
||||
|
||||
@ -77,6 +80,21 @@ export class Context {
|
||||
await this._page.close();
|
||||
}
|
||||
|
||||
async submitFileChooser(paths: string[]) {
|
||||
if (!this._fileChooser)
|
||||
throw new Error('No file chooser visible');
|
||||
await this._fileChooser.setFiles(paths);
|
||||
this._fileChooser = undefined;
|
||||
}
|
||||
|
||||
hasFileChooser() {
|
||||
return !!this._fileChooser;
|
||||
}
|
||||
|
||||
clearFileChooser() {
|
||||
this._fileChooser = undefined;
|
||||
}
|
||||
|
||||
private async _createPage(): Promise<{ browser?: playwright.Browser, page: playwright.Page }> {
|
||||
if (process.env.PLAYWRIGHT_WS_ENDPOINT) {
|
||||
const url = new URL(process.env.PLAYWRIGHT_WS_ENDPOINT);
|
||||
|
@ -36,6 +36,7 @@ const snapshotTools: Tool[] = [
|
||||
common.navigate(true),
|
||||
common.goBack(true),
|
||||
common.goForward(true),
|
||||
common.chooseFile(true),
|
||||
snapshot.snapshot,
|
||||
snapshot.click,
|
||||
snapshot.hover,
|
||||
@ -49,6 +50,7 @@ const screenshotTools: Tool[] = [
|
||||
common.navigate(false),
|
||||
common.goBack(false),
|
||||
common.goForward(false),
|
||||
common.chooseFile(false),
|
||||
screenshot.screenshot,
|
||||
screenshot.moveMouse,
|
||||
screenshot.click,
|
||||
|
@ -156,3 +156,21 @@ export const close: Tool = {
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const chooseFileSchema = z.object({
|
||||
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
|
||||
});
|
||||
|
||||
export const chooseFile: ToolFactory = snapshot => ({
|
||||
schema: {
|
||||
name: 'browser_choose_file',
|
||||
description: 'Choose one or multiple files to upload',
|
||||
inputSchema: zodToJsonSchema(chooseFileSchema),
|
||||
},
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = chooseFileSchema.parse(params);
|
||||
return await runAndWait(context, `Chose files ${validatedParams.paths.join(', ')}`, async () => {
|
||||
await context.submitFileChooser(validatedParams.paths);
|
||||
}, snapshot);
|
||||
},
|
||||
});
|
||||
|
@ -73,23 +73,36 @@ async function waitForCompletion<R>(page: playwright.Page, callback: () => Promi
|
||||
|
||||
export async function runAndWait(context: Context, status: string, callback: (page: playwright.Page) => Promise<any>, snapshot: boolean = false): Promise<ToolResult> {
|
||||
const page = context.existingPage();
|
||||
const dismissFileChooser = context.hasFileChooser();
|
||||
await waitForCompletion(page, () => callback(page));
|
||||
return snapshot ? captureAriaSnapshot(context, status) : {
|
||||
if (dismissFileChooser)
|
||||
context.clearFileChooser();
|
||||
const result: ToolResult = snapshot ? await captureAriaSnapshot(context, status) : {
|
||||
content: [{ type: 'text', text: status }],
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function captureAriaSnapshot(context: Context, status: string = ''): Promise<ToolResult> {
|
||||
const page = context.existingPage();
|
||||
const lines = [];
|
||||
if (status)
|
||||
lines.push(`${status}`);
|
||||
lines.push(
|
||||
'',
|
||||
`- Page URL: ${page.url()}`,
|
||||
`- Page Title: ${await page.title()}`
|
||||
);
|
||||
if (context.hasFileChooser())
|
||||
lines.push(`- There is a file chooser visible that requires browser_choose_file to be called`);
|
||||
lines.push(
|
||||
`- Page Snapshot`,
|
||||
'```yaml',
|
||||
await context.allFramesSnapshot(),
|
||||
'```',
|
||||
''
|
||||
);
|
||||
return {
|
||||
content: [{ type: 'text', text: `${status ? `${status}\n` : ''}
|
||||
- Page URL: ${page.url()}
|
||||
- Page Title: ${await page.title()}
|
||||
- Page Snapshot
|
||||
\`\`\`yaml
|
||||
${await context.allFramesSnapshot()}
|
||||
\`\`\`
|
||||
`
|
||||
}],
|
||||
content: [{ type: 'text', text: lines.join('\n') }],
|
||||
};
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import fs from 'fs/promises';
|
||||
import { spawn } from 'node:child_process';
|
||||
import path from 'node:path';
|
||||
import { test, expect } from './fixtures';
|
||||
@ -38,6 +39,9 @@ test('test tool list', async ({ server, visionServer }) => {
|
||||
expect.objectContaining({
|
||||
name: 'browser_go_forward',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
name: 'browser_choose_file',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
name: 'browser_snapshot',
|
||||
}),
|
||||
@ -455,6 +459,83 @@ test('stitched aria frames', async ({ server }) => {
|
||||
}));
|
||||
});
|
||||
|
||||
test('browser_choose_file', async ({ server }) => {
|
||||
let response = await server.send({
|
||||
jsonrpc: '2.0',
|
||||
id: 2,
|
||||
method: 'tools/call',
|
||||
params: {
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(response.result.content[0].text).toContain('- textbox [ref=s1e4]');
|
||||
|
||||
response = await server.send({
|
||||
jsonrpc: '2.0',
|
||||
id: 2,
|
||||
method: 'tools/call',
|
||||
params: {
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's1e4',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(response.result.content[0].text).toContain('There is a file chooser visible that requires browser_choose_file to be called');
|
||||
|
||||
const filePath = test.info().outputPath('test.txt');
|
||||
await fs.writeFile(filePath, 'Hello, world!');
|
||||
response = await server.send({
|
||||
jsonrpc: '2.0',
|
||||
id: 2,
|
||||
method: 'tools/call',
|
||||
params: {
|
||||
name: 'browser_choose_file',
|
||||
arguments: {
|
||||
paths: [filePath],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(response.result.content[0].text).not.toContain('There is a file chooser visible that requires browser_choose_file to be called');
|
||||
expect(response.result.content[0].text).toContain('textbox [ref=s3e4]: C:\\fakepath\\test.txt');
|
||||
|
||||
response = await server.send({
|
||||
jsonrpc: '2.0',
|
||||
id: 2,
|
||||
method: 'tools/call',
|
||||
params: {
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's3e4',
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(response.result.content[0].text).toContain('There is a file chooser visible that requires browser_choose_file to be called');
|
||||
expect(response.result.content[0].text).toContain('button "Button" [ref=s4e5]');
|
||||
|
||||
response = await server.send({
|
||||
jsonrpc: '2.0',
|
||||
id: 2,
|
||||
method: 'tools/call',
|
||||
params: {
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's4e5',
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(response.result.content[0].text, 'not submitting browser_choose_file dismisses file chooser').not.toContain('There is a file chooser visible that requires browser_choose_file to be called');
|
||||
});
|
||||
|
||||
test('sse transport', async () => {
|
||||
const cp = spawn('node', [path.join(__dirname, '../cli.js'), '--port', '0'], { stdio: 'pipe' });
|
||||
try {
|
||||
|
Loading…
x
Reference in New Issue
Block a user