chore: introduce modal states (#204)

This commit is contained in:
Pavel Feldman 2025-04-16 15:21:45 -07:00 committed by GitHub
parent 6054290d9a
commit cea347d067
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 156 additions and 88 deletions

View File

@ -18,7 +18,8 @@ import * as playwright from 'playwright';
import yaml from 'yaml'; import yaml from 'yaml';
import { waitForCompletion } from './tools/utils'; import { waitForCompletion } from './tools/utils';
import { ToolResult } from './tools/tool';
import type { ModalState, Tool, ToolResult } from './tools/tool';
export type ContextOptions = { export type ContextOptions = {
browserName?: 'chromium' | 'firefox' | 'webkit'; browserName?: 'chromium' | 'firefox' | 'webkit';
@ -33,20 +34,43 @@ type PageOrFrameLocator = playwright.Page | playwright.FrameLocator;
type RunOptions = { type RunOptions = {
captureSnapshot?: boolean; captureSnapshot?: boolean;
waitForCompletion?: boolean; waitForCompletion?: boolean;
noClearFileChooser?: boolean;
}; };
export class Context { export class Context {
readonly tools: Tool[];
readonly options: ContextOptions; readonly options: ContextOptions;
private _browser: playwright.Browser | undefined; private _browser: playwright.Browser | undefined;
private _browserContext: playwright.BrowserContext | undefined; private _browserContext: playwright.BrowserContext | undefined;
private _tabs: Tab[] = []; private _tabs: Tab[] = [];
private _currentTab: Tab | undefined; private _currentTab: Tab | undefined;
private _modalStates: (ModalState & { tab: Tab })[] = [];
constructor(options: ContextOptions) { constructor(tools: Tool[], options: ContextOptions) {
this.tools = tools;
this.options = options; this.options = options;
} }
modalStates(): ModalState[] {
return this._modalStates;
}
setModalState(modalState: ModalState, inTab: Tab) {
this._modalStates.push({ ...modalState, tab: inTab });
}
clearModalState(modalState: ModalState) {
this._modalStates = this._modalStates.filter(state => state !== modalState);
}
modalStatesMarkdown(): string[] {
const result: string[] = ['### Modal state'];
for (const state of this._modalStates) {
const tool = this.tools.find(tool => tool.clearsModalState === state.type);
result.push(`- [${state.description}]: can be handled by the "${tool?.schema.name}" tool`);
}
return result;
}
tabs(): Tab[] { tabs(): Tab[] {
return this._tabs; return this._tabs;
} }
@ -104,6 +128,7 @@ export class Context {
} }
private _onPageClosed(tab: Tab) { private _onPageClosed(tab: Tab) {
this._modalStates = this._modalStates.filter(state => state.tab !== tab);
const index = this._tabs.indexOf(tab); const index = this._tabs.indexOf(tab);
if (index === -1) if (index === -1)
return; return;
@ -188,7 +213,6 @@ class Tab {
readonly context: Context; readonly context: Context;
readonly page: playwright.Page; readonly page: playwright.Page;
private _console: playwright.ConsoleMessage[] = []; private _console: playwright.ConsoleMessage[] = [];
private _fileChooser: playwright.FileChooser | undefined;
private _snapshot: PageSnapshot | undefined; private _snapshot: PageSnapshot | undefined;
private _onPageClose: (tab: Tab) => void; private _onPageClose: (tab: Tab) => void;
@ -202,13 +226,18 @@ class Tab {
this._console.length = 0; this._console.length = 0;
}); });
page.on('close', () => this._onClose()); page.on('close', () => this._onClose());
page.on('filechooser', chooser => this._fileChooser = chooser); page.on('filechooser', chooser => {
this.context.setModalState({
type: 'fileChooser',
description: 'File chooser',
fileChooser: chooser,
}, this);
});
page.setDefaultNavigationTimeout(60000); page.setDefaultNavigationTimeout(60000);
page.setDefaultTimeout(5000); page.setDefaultTimeout(5000);
} }
private _onClose() { private _onClose() {
this._fileChooser = undefined;
this._console.length = 0; this._console.length = 0;
this._onPageClose(this); this._onPageClose(this);
} }
@ -222,8 +251,6 @@ class Tab {
async run(callback: (tab: Tab) => Promise<RunResult>, options?: RunOptions): Promise<ToolResult> { async run(callback: (tab: Tab) => Promise<RunResult>, options?: RunOptions): Promise<ToolResult> {
let runResult: RunResult | undefined; let runResult: RunResult | undefined;
try { try {
if (!options?.noClearFileChooser)
this._fileChooser = undefined;
if (options?.waitForCompletion) if (options?.waitForCompletion)
runResult = await waitForCompletion(this.page, () => callback(this)) ?? undefined; runResult = await waitForCompletion(this.page, () => callback(this)) ?? undefined;
else else
@ -240,13 +267,23 @@ ${runResult.code.join('\n')}
\`\`\` \`\`\`
`); `);
if (this.context.modalStates().length) {
result.push(...this.context.modalStatesMarkdown());
return {
content: [{
type: 'text',
text: result.join('\n'),
}],
};
}
if (this.context.tabs().length > 1) if (this.context.tabs().length > 1)
result.push(await this.context.listTabs(), ''); result.push(await this.context.listTabs(), '');
if (this._snapshot) { if (this._snapshot) {
if (this.context.tabs().length > 1) if (this.context.tabs().length > 1)
result.push('### Current tab'); result.push('### Current tab');
result.push(this._snapshot.text({ hasFileChooser: !!this._fileChooser })); result.push(this._snapshot.text());
} }
const images = runResult.images?.map(image => { const images = runResult.images?.map(image => {
@ -289,13 +326,6 @@ ${runResult.code.join('\n')}
async console(): Promise<playwright.ConsoleMessage[]> { async console(): Promise<playwright.ConsoleMessage[]> {
return this._console; return this._console;
} }
async submitFileChooser(paths: string[]) {
if (!this._fileChooser)
throw new Error('No file chooser visible');
await this._fileChooser.setFiles(paths);
this._fileChooser = undefined;
}
} }
class PageSnapshot { class PageSnapshot {
@ -311,14 +341,8 @@ class PageSnapshot {
return snapshot; return snapshot;
} }
text(options: { hasFileChooser: boolean }): string { text(): string {
const results: string[] = []; return this._text;
if (options.hasFileChooser) {
results.push('- There is a file chooser visible that requires browser_file_upload to be called');
results.push('');
}
results.push(this._text);
return results.join('\n');
} }
private async _build(page: playwright.Page) { private async _build(page: playwright.Page) {

View File

@ -32,7 +32,7 @@ type Options = ContextOptions & {
export function createServerWithTools(options: Options): Server { export function createServerWithTools(options: Options): Server {
const { name, version, tools, resources } = options; const { name, version, tools, resources } = options;
const context = new Context(options); const context = new Context(tools, options);
const server = new Server({ name, version }, { const server = new Server({ name, version }, {
capabilities: { capabilities: {
tools: {}, tools: {},
@ -57,9 +57,21 @@ export function createServerWithTools(options: Options): Server {
}; };
} }
const modalStates = context.modalStates().map(state => state.type);
if ((tool.clearsModalState && !modalStates.includes(tool.clearsModalState)) ||
(!tool.clearsModalState && modalStates.length)) {
const text = [
`Tool "${request.params.name}" does not handle the modal state.`,
...context.modalStatesMarkdown(),
].join('\n');
return {
content: [{ type: 'text', text }],
isError: true,
};
}
try { try {
const result = await tool.handle(context, request.params.arguments); return await tool.handle(context, request.params.arguments);
return result;
} catch (error) { } catch (error) {
return { return {
content: [{ type: 'text', text: String(error) }], content: [{ type: 'text', text: String(error) }],

View File

@ -34,16 +34,20 @@ const uploadFile: ToolFactory = captureSnapshot => ({
const validatedParams = uploadFileSchema.parse(params); const validatedParams = uploadFileSchema.parse(params);
const tab = context.currentTab(); const tab = context.currentTab();
return await tab.runAndWait(async () => { return await tab.runAndWait(async () => {
await tab.submitFileChooser(validatedParams.paths); const modalState = context.modalStates().find(state => state.type === 'fileChooser');
if (!modalState)
throw new Error('No file chooser visible');
await modalState.fileChooser.setFiles(validatedParams.paths);
context.clearModalState(modalState);
const code = [ const code = [
`// <internal code to chose files ${validatedParams.paths.join(', ')}`, `// <internal code to chose files ${validatedParams.paths.join(', ')}`,
]; ];
return { code }; return { code };
}, { }, {
captureSnapshot, captureSnapshot,
noClearFileChooser: true,
}); });
}, },
clearsModalState: 'fileChooser',
}); });
export default (captureSnapshot: boolean) => [ export default (captureSnapshot: boolean) => [

View File

@ -17,7 +17,7 @@
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types'; import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
import type { JsonSchema7Type } from 'zod-to-json-schema'; import type { JsonSchema7Type } from 'zod-to-json-schema';
import type { Context } from '../context'; import type { Context } from '../context';
import type * as playwright from 'playwright';
export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install'; export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
export type ToolSchema = { export type ToolSchema = {
@ -26,6 +26,14 @@ export type ToolSchema = {
inputSchema: JsonSchema7Type; inputSchema: JsonSchema7Type;
}; };
export type FileUploadModalState = {
type: 'fileChooser';
description: string;
fileChooser: playwright.FileChooser;
};
export type ModalState = FileUploadModalState;
export type ToolResult = { export type ToolResult = {
content: (ImageContent | TextContent)[]; content: (ImageContent | TextContent)[];
isError?: boolean; isError?: boolean;
@ -34,6 +42,7 @@ export type ToolResult = {
export type Tool = { export type Tool = {
capability: ToolCapability; capability: ToolCapability;
schema: ToolSchema; schema: ToolSchema;
clearsModalState?: ModalState['type'];
handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>; handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>;
}; };

View File

@ -14,7 +14,6 @@
* limitations under the License. * limitations under the License.
*/ */
import fs from 'fs/promises';
import { test, expect } from './fixtures'; import { test, expect } from './fixtures';
test('browser_navigate', async ({ client }) => { test('browser_navigate', async ({ client }) => {
@ -138,63 +137,6 @@ await page.getByRole('listbox').selectOption(['bar', 'baz']);
`); `);
}); });
test('browser_file_upload', async ({ client }) => {
expect(await client.callTool({
name: 'browser_navigate',
arguments: {
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
},
})).toContainTextContent('- textbox [ref=s1e3]');
expect(await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's1e3',
},
})).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
const filePath = test.info().outputPath('test.txt');
await fs.writeFile(filePath, 'Hello, world!');
{
const response = await client.callTool({
name: 'browser_file_upload',
arguments: {
paths: [filePath],
},
});
expect(response).not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's3e3',
},
});
expect(response).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
expect(response).toContainTextContent('button "Button" [ref=s4e4]');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Button',
ref: 's4e4',
},
});
expect(response, 'not submitting browser_file_upload dismisses file chooser').not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
}
});
test('browser_type', async ({ client }) => { test('browser_type', async ({ client }) => {
await client.callTool({ await client.callTool({
name: 'browser_navigate', name: 'browser_navigate',

77
tests/files.spec.ts Normal file
View File

@ -0,0 +1,77 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { test, expect } from './fixtures';
import fs from 'fs/promises';
test('browser_file_upload', async ({ client }) => {
expect(await client.callTool({
name: 'browser_navigate',
arguments: {
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
},
})).toContainTextContent('- textbox [ref=s1e3]');
expect(await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's1e3',
},
})).toContainTextContent(`### Modal state
- [File chooser]: can be handled by the "browser_file_upload" tool`);
const filePath = test.info().outputPath('test.txt');
await fs.writeFile(filePath, 'Hello, world!');
{
const response = await client.callTool({
name: 'browser_file_upload',
arguments: {
paths: [filePath],
},
});
expect(response).not.toContainTextContent('### Modal state');
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Textbox',
ref: 's3e3',
},
});
expect(response).toContainTextContent('- [File chooser]: can be handled by the \"browser_file_upload\" tool');
}
{
const response = await client.callTool({
name: 'browser_click',
arguments: {
element: 'Button',
ref: 's4e4',
},
});
expect(response).toContainTextContent(`Tool "browser_click" does not handle the modal state.
### Modal state
- [File chooser]: can be handled by the "browser_file_upload" tool`);
}
});