chore: introduce modal states (#204)

This commit is contained in:
Pavel Feldman 2025-04-16 15:21:45 -07:00 committed by GitHub
parent 6054290d9a
commit cea347d067
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 156 additions and 88 deletions

View File

@ -18,7 +18,8 @@ import * as playwright from 'playwright';
import yaml from 'yaml';
import { waitForCompletion } from './tools/utils';
import { ToolResult } from './tools/tool';
import type { ModalState, Tool, ToolResult } from './tools/tool';
export type ContextOptions = {
browserName?: 'chromium' | 'firefox' | 'webkit';
@ -33,20 +34,43 @@ type PageOrFrameLocator = playwright.Page | playwright.FrameLocator;
type RunOptions = {
captureSnapshot?: boolean;
waitForCompletion?: boolean;
noClearFileChooser?: boolean;
};
export class Context {
readonly tools: Tool[];
readonly options: ContextOptions;
private _browser: playwright.Browser | undefined;
private _browserContext: playwright.BrowserContext | undefined;
private _tabs: Tab[] = [];
private _currentTab: Tab | undefined;
private _modalStates: (ModalState & { tab: Tab })[] = [];
constructor(options: ContextOptions) {
constructor(tools: Tool[], options: ContextOptions) {
this.tools = tools;
this.options = options;
}
modalStates(): ModalState[] {
return this._modalStates;
}
setModalState(modalState: ModalState, inTab: Tab) {
this._modalStates.push({ ...modalState, tab: inTab });
}
clearModalState(modalState: ModalState) {
this._modalStates = this._modalStates.filter(state => state !== modalState);
}
modalStatesMarkdown(): string[] {
const result: string[] = ['### Modal state'];
for (const state of this._modalStates) {
const tool = this.tools.find(tool => tool.clearsModalState === state.type);
result.push(`- [${state.description}]: can be handled by the "${tool?.schema.name}" tool`);
}
return result;
}
tabs(): Tab[] {
return this._tabs;
}
@ -104,6 +128,7 @@ export class Context {
}
private _onPageClosed(tab: Tab) {
this._modalStates = this._modalStates.filter(state => state.tab !== tab);
const index = this._tabs.indexOf(tab);
if (index === -1)
return;
@ -188,7 +213,6 @@ class Tab {
readonly context: Context;
readonly page: playwright.Page;
private _console: playwright.ConsoleMessage[] = [];
private _fileChooser: playwright.FileChooser | undefined;
private _snapshot: PageSnapshot | undefined;
private _onPageClose: (tab: Tab) => void;
@ -202,13 +226,18 @@ class Tab {
this._console.length = 0;
});
page.on('close', () => this._onClose());
page.on('filechooser', chooser => this._fileChooser = chooser);
page.on('filechooser', chooser => {
this.context.setModalState({
type: 'fileChooser',
description: 'File chooser',
fileChooser: chooser,
}, this);
});
page.setDefaultNavigationTimeout(60000);
page.setDefaultTimeout(5000);
}
private _onClose() {
this._fileChooser = undefined;
this._console.length = 0;
this._onPageClose(this);
}
@ -222,8 +251,6 @@ class Tab {
async run(callback: (tab: Tab) => Promise<RunResult>, options?: RunOptions): Promise<ToolResult> {
let runResult: RunResult | undefined;
try {
if (!options?.noClearFileChooser)
this._fileChooser = undefined;
if (options?.waitForCompletion)
runResult = await waitForCompletion(this.page, () => callback(this)) ?? undefined;
else
@ -240,13 +267,23 @@ ${runResult.code.join('\n')}
\`\`\`
`);
if (this.context.modalStates().length) {
result.push(...this.context.modalStatesMarkdown());
return {
content: [{
type: 'text',
text: result.join('\n'),
}],
};
}
if (this.context.tabs().length > 1)
result.push(await this.context.listTabs(), '');
if (this._snapshot) {
if (this.context.tabs().length > 1)
result.push('### Current tab');
result.push(this._snapshot.text({ hasFileChooser: !!this._fileChooser }));
result.push(this._snapshot.text());
}
const images = runResult.images?.map(image => {
@ -289,13 +326,6 @@ ${runResult.code.join('\n')}
async console(): Promise<playwright.ConsoleMessage[]> {
return this._console;
}
async submitFileChooser(paths: string[]) {
if (!this._fileChooser)
throw new Error('No file chooser visible');
await this._fileChooser.setFiles(paths);
this._fileChooser = undefined;
}
}
class PageSnapshot {
@ -311,14 +341,8 @@ class PageSnapshot {
return snapshot;
}
text(options: { hasFileChooser: boolean }): string {
const results: string[] = [];
if (options.hasFileChooser) {
results.push('- There is a file chooser visible that requires browser_file_upload to be called');
results.push('');
}
results.push(this._text);
return results.join('\n');
text(): string {
return this._text;
}
private async _build(page: playwright.Page) {

View File

@ -32,7 +32,7 @@ type Options = ContextOptions & {
export function createServerWithTools(options: Options): Server {
const { name, version, tools, resources } = options;
const context = new Context(options);
const context = new Context(tools, options);
const server = new Server({ name, version }, {
capabilities: {
tools: {},
@ -57,9 +57,21 @@ export function createServerWithTools(options: Options): Server {
};
}
const modalStates = context.modalStates().map(state => state.type);
if ((tool.clearsModalState && !modalStates.includes(tool.clearsModalState)) ||
(!tool.clearsModalState && modalStates.length)) {
const text = [
`Tool "${request.params.name}" does not handle the modal state.`,
...context.modalStatesMarkdown(),
].join('\n');
return {
content: [{ type: 'text', text }],
isError: true,
};
}
try {
const result = await tool.handle(context, request.params.arguments);
return result;
return await tool.handle(context, request.params.arguments);
} catch (error) {
return {
content: [{ type: 'text', text: String(error) }],

View File

@ -34,16 +34,20 @@ const uploadFile: ToolFactory = captureSnapshot => ({
const validatedParams = uploadFileSchema.parse(params);
const tab = context.currentTab();
return await tab.runAndWait(async () => {
await tab.submitFileChooser(validatedParams.paths);
const modalState = context.modalStates().find(state => state.type === 'fileChooser');
if (!modalState)
throw new Error('No file chooser visible');
await modalState.fileChooser.setFiles(validatedParams.paths);
context.clearModalState(modalState);
const code = [
`// <internal code to chose files ${validatedParams.paths.join(', ')}`,
];
return { code };
}, {
captureSnapshot,
noClearFileChooser: true,
});
},
clearsModalState: 'fileChooser',
});
export default (captureSnapshot: boolean) => [

View File

@ -17,7 +17,7 @@
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
import type { JsonSchema7Type } from 'zod-to-json-schema';
import type { Context } from '../context';
import type * as playwright from 'playwright';
export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
export type ToolSchema = {
@ -26,6 +26,14 @@ export type ToolSchema = {
inputSchema: JsonSchema7Type;
};
export type FileUploadModalState = {
type: 'fileChooser';
description: string;
fileChooser: playwright.FileChooser;
};
export type ModalState = FileUploadModalState;
export type ToolResult = {
content: (ImageContent | TextContent)[];
isError?: boolean;
@ -34,6 +42,7 @@ export type ToolResult = {
export type Tool = {
capability: ToolCapability;
schema: ToolSchema;
clearsModalState?: ModalState['type'];
handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>;
};

View File

@ -14,7 +14,6 @@
* limitations under the License.
*/
import fs from 'fs/promises';
import { test, expect } from './fixtures';
test('browser_navigate', async ({ client }) => {
@ -138,63 +137,6 @@ await page.getByRole('listbox').selectOption(['bar', 'baz']);
`);
});
test('browser_file_upload', async ({ client }) => {
expect(await client.callTool({
name: 'browser_navigate',
arguments: {
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
},
})).toContainTextContent('- textbox [ref=s1e3]');
expect(await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's1e3',
},
})).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
const filePath = test.info().outputPath('test.txt');
await fs.writeFile(filePath, 'Hello, world!');
{
const response = await client.callTool({
name: 'browser_file_upload',
arguments: {
paths: [filePath],
},
});
expect(response).not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's3e3',
},
});
expect(response).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
expect(response).toContainTextContent('button "Button" [ref=s4e4]');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Button',
ref: 's4e4',
},
});
expect(response, 'not submitting browser_file_upload dismisses file chooser').not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
}
});
test('browser_type', async ({ client }) => {
await client.callTool({
name: 'browser_navigate',

77
tests/files.spec.ts Normal file
View File

@ -0,0 +1,77 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { test, expect } from './fixtures';
import fs from 'fs/promises';
test('browser_file_upload', async ({ client }) => {
expect(await client.callTool({
name: 'browser_navigate',
arguments: {
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
},
})).toContainTextContent('- textbox [ref=s1e3]');
expect(await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's1e3',
},
})).toContainTextContent(`### Modal state
- [File chooser]: can be handled by the "browser_file_upload" tool`);
const filePath = test.info().outputPath('test.txt');
await fs.writeFile(filePath, 'Hello, world!');
{
const response = await client.callTool({
name: 'browser_file_upload',
arguments: {
paths: [filePath],
},
});
expect(response).not.toContainTextContent('### Modal state');
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's3e3',
},
});
expect(response).toContainTextContent('- [File chooser]: can be handled by the \"browser_file_upload\" tool');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Button',
ref: 's4e4',
},
});
expect(response).toContainTextContent(`Tool "browser_click" does not handle the modal state.
### Modal state
- [File chooser]: can be handled by the "browser_file_upload" tool`);
}
});