mirror of
https://github.com/microsoft/playwright-mcp.git
synced 2025-07-26 08:32:26 +08:00
chore: introduce modal states (#204)
This commit is contained in:
parent
6054290d9a
commit
cea347d067
@ -18,7 +18,8 @@ import * as playwright from 'playwright';
|
||||
import yaml from 'yaml';
|
||||
|
||||
import { waitForCompletion } from './tools/utils';
|
||||
import { ToolResult } from './tools/tool';
|
||||
|
||||
import type { ModalState, Tool, ToolResult } from './tools/tool';
|
||||
|
||||
export type ContextOptions = {
|
||||
browserName?: 'chromium' | 'firefox' | 'webkit';
|
||||
@ -33,20 +34,43 @@ type PageOrFrameLocator = playwright.Page | playwright.FrameLocator;
|
||||
type RunOptions = {
|
||||
captureSnapshot?: boolean;
|
||||
waitForCompletion?: boolean;
|
||||
noClearFileChooser?: boolean;
|
||||
};
|
||||
|
||||
export class Context {
|
||||
readonly tools: Tool[];
|
||||
readonly options: ContextOptions;
|
||||
private _browser: playwright.Browser | undefined;
|
||||
private _browserContext: playwright.BrowserContext | undefined;
|
||||
private _tabs: Tab[] = [];
|
||||
private _currentTab: Tab | undefined;
|
||||
private _modalStates: (ModalState & { tab: Tab })[] = [];
|
||||
|
||||
constructor(options: ContextOptions) {
|
||||
constructor(tools: Tool[], options: ContextOptions) {
|
||||
this.tools = tools;
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
modalStates(): ModalState[] {
|
||||
return this._modalStates;
|
||||
}
|
||||
|
||||
setModalState(modalState: ModalState, inTab: Tab) {
|
||||
this._modalStates.push({ ...modalState, tab: inTab });
|
||||
}
|
||||
|
||||
clearModalState(modalState: ModalState) {
|
||||
this._modalStates = this._modalStates.filter(state => state !== modalState);
|
||||
}
|
||||
|
||||
modalStatesMarkdown(): string[] {
|
||||
const result: string[] = ['### Modal state'];
|
||||
for (const state of this._modalStates) {
|
||||
const tool = this.tools.find(tool => tool.clearsModalState === state.type);
|
||||
result.push(`- [${state.description}]: can be handled by the "${tool?.schema.name}" tool`);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
tabs(): Tab[] {
|
||||
return this._tabs;
|
||||
}
|
||||
@ -104,6 +128,7 @@ export class Context {
|
||||
}
|
||||
|
||||
private _onPageClosed(tab: Tab) {
|
||||
this._modalStates = this._modalStates.filter(state => state.tab !== tab);
|
||||
const index = this._tabs.indexOf(tab);
|
||||
if (index === -1)
|
||||
return;
|
||||
@ -188,7 +213,6 @@ class Tab {
|
||||
readonly context: Context;
|
||||
readonly page: playwright.Page;
|
||||
private _console: playwright.ConsoleMessage[] = [];
|
||||
private _fileChooser: playwright.FileChooser | undefined;
|
||||
private _snapshot: PageSnapshot | undefined;
|
||||
private _onPageClose: (tab: Tab) => void;
|
||||
|
||||
@ -202,13 +226,18 @@ class Tab {
|
||||
this._console.length = 0;
|
||||
});
|
||||
page.on('close', () => this._onClose());
|
||||
page.on('filechooser', chooser => this._fileChooser = chooser);
|
||||
page.on('filechooser', chooser => {
|
||||
this.context.setModalState({
|
||||
type: 'fileChooser',
|
||||
description: 'File chooser',
|
||||
fileChooser: chooser,
|
||||
}, this);
|
||||
});
|
||||
page.setDefaultNavigationTimeout(60000);
|
||||
page.setDefaultTimeout(5000);
|
||||
}
|
||||
|
||||
private _onClose() {
|
||||
this._fileChooser = undefined;
|
||||
this._console.length = 0;
|
||||
this._onPageClose(this);
|
||||
}
|
||||
@ -222,8 +251,6 @@ class Tab {
|
||||
async run(callback: (tab: Tab) => Promise<RunResult>, options?: RunOptions): Promise<ToolResult> {
|
||||
let runResult: RunResult | undefined;
|
||||
try {
|
||||
if (!options?.noClearFileChooser)
|
||||
this._fileChooser = undefined;
|
||||
if (options?.waitForCompletion)
|
||||
runResult = await waitForCompletion(this.page, () => callback(this)) ?? undefined;
|
||||
else
|
||||
@ -240,13 +267,23 @@ ${runResult.code.join('\n')}
|
||||
\`\`\`
|
||||
`);
|
||||
|
||||
if (this.context.modalStates().length) {
|
||||
result.push(...this.context.modalStatesMarkdown());
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: result.join('\n'),
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
if (this.context.tabs().length > 1)
|
||||
result.push(await this.context.listTabs(), '');
|
||||
|
||||
if (this._snapshot) {
|
||||
if (this.context.tabs().length > 1)
|
||||
result.push('### Current tab');
|
||||
result.push(this._snapshot.text({ hasFileChooser: !!this._fileChooser }));
|
||||
result.push(this._snapshot.text());
|
||||
}
|
||||
|
||||
const images = runResult.images?.map(image => {
|
||||
@ -289,13 +326,6 @@ ${runResult.code.join('\n')}
|
||||
async console(): Promise<playwright.ConsoleMessage[]> {
|
||||
return this._console;
|
||||
}
|
||||
|
||||
async submitFileChooser(paths: string[]) {
|
||||
if (!this._fileChooser)
|
||||
throw new Error('No file chooser visible');
|
||||
await this._fileChooser.setFiles(paths);
|
||||
this._fileChooser = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
class PageSnapshot {
|
||||
@ -311,14 +341,8 @@ class PageSnapshot {
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
text(options: { hasFileChooser: boolean }): string {
|
||||
const results: string[] = [];
|
||||
if (options.hasFileChooser) {
|
||||
results.push('- There is a file chooser visible that requires browser_file_upload to be called');
|
||||
results.push('');
|
||||
}
|
||||
results.push(this._text);
|
||||
return results.join('\n');
|
||||
text(): string {
|
||||
return this._text;
|
||||
}
|
||||
|
||||
private async _build(page: playwright.Page) {
|
||||
|
@ -32,7 +32,7 @@ type Options = ContextOptions & {
|
||||
|
||||
export function createServerWithTools(options: Options): Server {
|
||||
const { name, version, tools, resources } = options;
|
||||
const context = new Context(options);
|
||||
const context = new Context(tools, options);
|
||||
const server = new Server({ name, version }, {
|
||||
capabilities: {
|
||||
tools: {},
|
||||
@ -57,9 +57,21 @@ export function createServerWithTools(options: Options): Server {
|
||||
};
|
||||
}
|
||||
|
||||
const modalStates = context.modalStates().map(state => state.type);
|
||||
if ((tool.clearsModalState && !modalStates.includes(tool.clearsModalState)) ||
|
||||
(!tool.clearsModalState && modalStates.length)) {
|
||||
const text = [
|
||||
`Tool "${request.params.name}" does not handle the modal state.`,
|
||||
...context.modalStatesMarkdown(),
|
||||
].join('\n');
|
||||
return {
|
||||
content: [{ type: 'text', text }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await tool.handle(context, request.params.arguments);
|
||||
return result;
|
||||
return await tool.handle(context, request.params.arguments);
|
||||
} catch (error) {
|
||||
return {
|
||||
content: [{ type: 'text', text: String(error) }],
|
||||
|
@ -34,16 +34,20 @@ const uploadFile: ToolFactory = captureSnapshot => ({
|
||||
const validatedParams = uploadFileSchema.parse(params);
|
||||
const tab = context.currentTab();
|
||||
return await tab.runAndWait(async () => {
|
||||
await tab.submitFileChooser(validatedParams.paths);
|
||||
const modalState = context.modalStates().find(state => state.type === 'fileChooser');
|
||||
if (!modalState)
|
||||
throw new Error('No file chooser visible');
|
||||
await modalState.fileChooser.setFiles(validatedParams.paths);
|
||||
context.clearModalState(modalState);
|
||||
const code = [
|
||||
`// <internal code to chose files ${validatedParams.paths.join(', ')}`,
|
||||
];
|
||||
return { code };
|
||||
}, {
|
||||
captureSnapshot,
|
||||
noClearFileChooser: true,
|
||||
});
|
||||
},
|
||||
clearsModalState: 'fileChooser',
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
|
@ -17,7 +17,7 @@
|
||||
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
|
||||
import type { JsonSchema7Type } from 'zod-to-json-schema';
|
||||
import type { Context } from '../context';
|
||||
|
||||
import type * as playwright from 'playwright';
|
||||
export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
|
||||
|
||||
export type ToolSchema = {
|
||||
@ -26,6 +26,14 @@ export type ToolSchema = {
|
||||
inputSchema: JsonSchema7Type;
|
||||
};
|
||||
|
||||
export type FileUploadModalState = {
|
||||
type: 'fileChooser';
|
||||
description: string;
|
||||
fileChooser: playwright.FileChooser;
|
||||
};
|
||||
|
||||
export type ModalState = FileUploadModalState;
|
||||
|
||||
export type ToolResult = {
|
||||
content: (ImageContent | TextContent)[];
|
||||
isError?: boolean;
|
||||
@ -34,6 +42,7 @@ export type ToolResult = {
|
||||
export type Tool = {
|
||||
capability: ToolCapability;
|
||||
schema: ToolSchema;
|
||||
clearsModalState?: ModalState['type'];
|
||||
handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>;
|
||||
};
|
||||
|
||||
|
@ -14,7 +14,6 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import fs from 'fs/promises';
|
||||
import { test, expect } from './fixtures';
|
||||
|
||||
test('browser_navigate', async ({ client }) => {
|
||||
@ -138,63 +137,6 @@ await page.getByRole('listbox').selectOption(['bar', 'baz']);
|
||||
`);
|
||||
});
|
||||
|
||||
test('browser_file_upload', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- textbox [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
|
||||
const filePath = test.info().outputPath('test.txt');
|
||||
await fs.writeFile(filePath, 'Hello, world!');
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_file_upload',
|
||||
arguments: {
|
||||
paths: [filePath],
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's3e3',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
expect(response).toContainTextContent('button "Button" [ref=s4e4]');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's4e4',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response, 'not submitting browser_file_upload dismisses file chooser').not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
}
|
||||
});
|
||||
|
||||
test('browser_type', async ({ client }) => {
|
||||
await client.callTool({
|
||||
name: 'browser_navigate',
|
77
tests/files.spec.ts
Normal file
77
tests/files.spec.ts
Normal file
@ -0,0 +1,77 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { test, expect } from './fixtures';
|
||||
import fs from 'fs/promises';
|
||||
|
||||
test('browser_file_upload', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- textbox [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toContainTextContent(`### Modal state
|
||||
- [File chooser]: can be handled by the "browser_file_upload" tool`);
|
||||
|
||||
const filePath = test.info().outputPath('test.txt');
|
||||
await fs.writeFile(filePath, 'Hello, world!');
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_file_upload',
|
||||
arguments: {
|
||||
paths: [filePath],
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).not.toContainTextContent('### Modal state');
|
||||
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's3e3',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).toContainTextContent('- [File chooser]: can be handled by the \"browser_file_upload\" tool');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's4e4',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).toContainTextContent(`Tool "browser_click" does not handle the modal state.
|
||||
### Modal state
|
||||
- [File chooser]: can be handled by the "browser_file_upload" tool`);
|
||||
}
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user