chore: extract loop tools into a separate folder (#755)

This commit is contained in:
Pavel Feldman 2025-07-24 16:22:03 -07:00 committed by GitHub
parent e153ac3b7c
commit ecfa10448b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 296 additions and 155 deletions

View File

@ -41,15 +41,16 @@ export type LLMConversation = {
};
export interface LLMDelegate {
createConversation(task: string, tools: Tool[]): LLMConversation;
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation;
makeApiCall(conversation: LLMConversation): Promise<LLMToolCall[]>;
addToolResults(conversation: LLMConversation, results: Array<{ toolCallId: string; content: string; isError?: boolean }>): void;
checkDoneToolCall(toolCall: LLMToolCall): string | null;
}
export async function runTask(delegate: LLMDelegate, client: Client, task: string): Promise<string> {
export async function runTask(delegate: LLMDelegate, client: Client, task: string, oneShot: boolean = false): Promise<string> {
const { tools } = await client.listTools();
const conversation = delegate.createConversation(task, tools);
const taskContent = oneShot ? `Perform following task: ${task}.` : `Perform following task: ${task}. Once the task is complete, call the "done" tool.`;
const conversation = delegate.createConversation(taskContent, tools, oneShot);
for (let iteration = 0; iteration < 5; ++iteration) {
debug('history')('Making API call for iteration', iteration);
@ -99,8 +100,10 @@ export async function runTask(delegate: LLMDelegate, client: Client, task: strin
}
}
// Add tool results to conversation
delegate.addToolResults(conversation, toolResults);
if (oneShot)
return toolResults.map(result => result.content).join('\n');
else
delegate.addToolResults(conversation, toolResults);
}
throw new Error('Failed to perform step, max attempts reached');

View File

@ -14,38 +14,48 @@
* limitations under the License.
*/
import Anthropic from '@anthropic-ai/sdk';
import type Anthropic from '@anthropic-ai/sdk';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'claude-sonnet-4-20250514';
export class ClaudeDelegate implements LLMDelegate {
private anthropic = new Anthropic();
private _anthropic: Anthropic | undefined;
createConversation(task: string, tools: Tool[]): LLMConversation {
async anthropic(): Promise<Anthropic> {
if (!this._anthropic) {
const anthropic = await import('@anthropic-ai/sdk');
this._anthropic = new anthropic.Anthropic();
}
return this._anthropic;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
const llmTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
// Add the "done" tool
llmTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
if (!oneShot) {
llmTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
},
required: ['result'],
},
},
});
});
}
return {
messages: [{
role: 'user',
content: `Perform following task: ${task}. Once the task is complete, call the "done" tool.`
content: task
}],
tools: llmTools,
};
@ -119,7 +129,8 @@ export class ClaudeDelegate implements LLMDelegate {
input_schema: tool.inputSchema,
}));
const response = await this.anthropic.messages.create({
const anthropic = await this.anthropic();
const response = await anthropic.messages.create({
model,
max_tokens: 10000,
messages: claudeMessages,

View File

@ -14,39 +14,48 @@
* limitations under the License.
*/
import OpenAI from 'openai';
import type OpenAI from 'openai';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'gpt-4.1';
export class OpenAIDelegate implements LLMDelegate {
private openai = new OpenAI();
private _openai: OpenAI | undefined;
createConversation(task: string, tools: Tool[]): LLMConversation {
async openai(): Promise<OpenAI> {
if (!this._openai) {
const oai = await import('openai');
this._openai = new oai.OpenAI();
}
return this._openai;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
const genericTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
// Add the "done" tool
genericTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
if (!oneShot) {
genericTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
},
required: ['result'],
},
required: ['result'],
},
});
});
}
return {
messages: [{
role: 'user',
content: `Peform following task: ${task}. Once the task is complete, call the "done" tool.`
content: task
}],
tools: genericTools,
};
@ -108,7 +117,8 @@ export class OpenAIDelegate implements LLMDelegate {
},
}));
const response = await this.openai.chat.completions.create({
const openai = await this.openai();
const response = await openai.chat.completions.create({
model,
messages: openaiMessages,
tools: openaiTools,

View File

@ -1,85 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import dotenv from 'dotenv';
import { z } from 'zod';
import { contextFactory } from '../browserContextFactory.js';
import { BrowserServerBackend } from '../browserServerBackend.js';
import { Context } from '../context.js';
import { logUnhandledError } from '../log.js';
import { InProcessTransport } from '../mcp/inProcessTransport.js';
import * as mcpServer from '../mcp/server.js';
import * as mcpTransport from '../mcp/transport.js';
import { packageJSON } from '../package.js';
import { runTask } from './loop.js';
import { OpenAIDelegate } from './loopOpenAI.js';
import type { FullConfig } from '../config.js';
import type { ServerBackend } from '../mcp/server.js';
const oneToolSchema: mcpServer.ToolSchema<any> = {
name: 'browser',
title: 'Perform a task with the browser',
description: 'Perform a task with the browser. It can click, type, export, capture screenshot, drag, hover, select options, etc.',
inputSchema: z.object({
task: z.string().describe('The task to perform with the browser'),
}),
type: 'readOnly',
};
export async function runOneTool(config: FullConfig) {
dotenv.config();
const serverBackendFactory = () => new OneToolServerBackend(config);
await mcpTransport.start(serverBackendFactory, config.server);
}
class OneToolServerBackend implements ServerBackend {
readonly name = 'Playwright';
readonly version = packageJSON.version;
private _innerClient: Client | undefined;
private _config: FullConfig;
constructor(config: FullConfig) {
this._config = config;
}
async initialize() {
const client = new Client({ name: 'Playwright Proxy', version: '1.0.0' });
const browserContextFactory = contextFactory(this._config.browser);
const server = mcpServer.createServer(new BrowserServerBackend(this._config, browserContextFactory));
await client.connect(new InProcessTransport(server));
await client.ping();
this._innerClient = client;
}
tools(): mcpServer.ToolSchema<any>[] {
return [oneToolSchema];
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any): Promise<mcpServer.ToolResponse> {
const delegate = new OpenAIDelegate();
const result = await runTask(delegate, this._innerClient!, parsedArguments.task as string);
return {
content: [{ type: 'text', text: result }],
};
}
serverClosed() {
void Context.disposeAll().catch(logUnhandledError);
}
}

65
src/loopTools/context.ts Normal file
View File

@ -0,0 +1,65 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { contextFactory } from '../browserContextFactory.js';
import { BrowserServerBackend } from '../browserServerBackend.js';
import { Context as BrowserContext } from '../context.js';
import { runTask } from '../loop/loop.js';
import { OpenAIDelegate } from '../loop/loopOpenAI.js';
import { ClaudeDelegate } from '../loop/loopClaude.js';
import { InProcessTransport } from '../mcp/inProcessTransport.js';
import * as mcpServer from '../mcp/server.js';
import type { LLMDelegate } from '../loop/loop.js';
import type { FullConfig } from '../config.js';
export class Context {
readonly config: FullConfig;
private _client: Client;
private _delegate: LLMDelegate;
constructor(config: FullConfig, client: Client) {
this.config = config;
this._client = client;
if (process.env.OPENAI_API_KEY)
this._delegate = new OpenAIDelegate();
else if (process.env.ANTHROPIC_API_KEY)
this._delegate = new ClaudeDelegate();
else
throw new Error('No LLM API key found. Please set OPENAI_API_KEY or ANTHROPIC_API_KEY environment variable.');
}
static async create(config: FullConfig) {
const client = new Client({ name: 'Playwright Proxy', version: '1.0.0' });
const browserContextFactory = contextFactory(config.browser);
const server = mcpServer.createServer(new BrowserServerBackend(config, browserContextFactory));
await client.connect(new InProcessTransport(server));
await client.ping();
return new Context(config, client);
}
async runTask(task: string, oneShot: boolean = false): Promise<mcpServer.ToolResponse> {
const result = await runTask(this._delegate, this._client!, task, oneShot);
return {
content: [{ type: 'text', text: result }],
};
}
async close() {
await BrowserContext.disposeAll();
}
}

63
src/loopTools/main.ts Normal file
View File

@ -0,0 +1,63 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import dotenv from 'dotenv';
import * as mcpServer from '../mcp/server.js';
import * as mcpTransport from '../mcp/transport.js';
import { packageJSON } from '../package.js';
import { Context } from './context.js';
import { perform } from './perform.js';
import { snapshot } from './snapshot.js';
import type { FullConfig } from '../config.js';
import type { ServerBackend } from '../mcp/server.js';
import type { Tool } from './tool.js';
export async function runLoopTools(config: FullConfig) {
dotenv.config();
const serverBackendFactory = () => new LoopToolsServerBackend(config);
await mcpTransport.start(serverBackendFactory, config.server);
}
class LoopToolsServerBackend implements ServerBackend {
readonly name = 'Playwright';
readonly version = packageJSON.version;
private _config: FullConfig;
private _context: Context | undefined;
private _tools: Tool<any>[] = [perform, snapshot];
constructor(config: FullConfig) {
this._config = config;
}
async initialize() {
this._context = await Context.create(this._config);
}
tools(): mcpServer.ToolSchema<any>[] {
return this._tools.map(tool => tool.schema);
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any): Promise<mcpServer.ToolResponse> {
const tool = this._tools.find(tool => tool.schema.name === schema.name)!;
return await tool.handle(this._context!, parsedArguments);
}
serverClosed() {
void this._context!.close();
}
}

36
src/loopTools/perform.ts Normal file
View File

@ -0,0 +1,36 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
const performSchema = z.object({
task: z.string().describe('The task to perform with the browser'),
});
export const perform = defineTool({
schema: {
name: 'browser_perform',
title: 'Perform a task with the browser',
description: 'Perform a task with the browser. It can click, type, export, capture screenshot, drag, hover, select options, etc.',
inputSchema: performSchema,
type: 'destructive',
},
handle: async (context, params) => {
return await context.runTask(params.task);
},
});

32
src/loopTools/snapshot.ts Normal file
View File

@ -0,0 +1,32 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
export const snapshot = defineTool({
schema: {
name: 'browser_snapshot',
title: 'Take a snapshot of the browser',
description: 'Take a snapshot of the browser to read what is on the page.',
inputSchema: z.object({}),
type: 'readOnly',
},
handle: async (context, params) => {
return await context.runTask('Capture browser snapshot', true);
},
});

29
src/loopTools/tool.ts Normal file
View File

@ -0,0 +1,29 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type { z } from 'zod';
import type * as mcpServer from '../mcp/server.js';
import type { Context } from './context.js';
export type Tool<Input extends z.Schema = z.Schema> = {
schema: mcpServer.ToolSchema<Input>;
handle: (context: Context, params: z.output<Input>) => Promise<mcpServer.ToolResponse>;
};
export function defineTool<Input extends z.Schema>(tool: Tool<Input>): Tool<Input> {
return tool;
}

View File

@ -25,6 +25,7 @@ import { runWithExtension } from './extension/main.js';
import { BrowserServerBackend } from './browserServerBackend.js';
import { Context } from './context.js';
import { contextFactory } from './browserContextFactory.js';
import { runLoopTools } from './loopTools/main.js';
program
.version('Version ' + packageJSON.version)
@ -55,6 +56,7 @@ program
.option('--user-data-dir <path>', 'path to the user data directory. If not specified, a temporary directory will be created.')
.option('--viewport-size <size>', 'specify browser viewport size in pixels, for example "1280, 720"')
.addOption(new Option('--extension', 'Connect to a running browser instance (Edge/Chrome only). Requires the "Playwright MCP Bridge" browser extension to be installed.').hideHelp())
.addOption(new Option('--loop-tools', 'Run loop tools').hideHelp())
.addOption(new Option('--vision', 'Legacy option, use --caps=vision instead').hideHelp())
.action(async options => {
const abortController = setupExitWatchdog();
@ -70,6 +72,10 @@ program
await runWithExtension(config, abortController);
return;
}
if (options.loopTools) {
await runLoopTools(config);
return;
}
const browserContextFactory = contextFactory(config.browser);
const serverBackendFactory = () => new BrowserServerBackend(config, browserContextFactory);

View File

@ -20,16 +20,7 @@ import type * as playwright from 'playwright';
import type { ToolCapability } from '../../config.js';
import type { Tab } from '../tab.js';
import type { Response } from '../response.js';
export type ToolSchema<Input extends InputType> = {
name: string;
title: string;
description: string;
inputSchema: Input;
type: 'readOnly' | 'destructive';
};
type InputType = z.Schema;
import type { ToolSchema } from '../mcp/server.js';
export type FileUploadModalState = {
type: 'fileChooser';
@ -45,44 +36,24 @@ export type DialogModalState = {
export type ModalState = FileUploadModalState | DialogModalState;
export type SnapshotContent = {
type: 'snapshot';
snapshot: string;
};
export type TextContent = {
type: 'text';
text: string;
};
export type ImageContent = {
type: 'image';
image: string;
};
export type CodeContent = {
type: 'code';
code: string[];
};
export type Tool<Input extends InputType = InputType> = {
export type Tool<Input extends z.Schema = z.Schema> = {
capability: ToolCapability;
schema: ToolSchema<Input>;
handle: (context: Context, params: z.output<Input>, response: Response) => Promise<void>;
};
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
export function defineTool<Input extends z.Schema>(tool: Tool<Input>): Tool<Input> {
return tool;
}
export type TabTool<Input extends InputType = InputType> = {
export type TabTool<Input extends z.Schema = z.Schema> = {
capability: ToolCapability;
schema: ToolSchema<Input>;
clearsModalState?: ModalState['type'];
handle: (tab: Tab, params: z.output<Input>, response: Response) => Promise<void>;
};
export function defineTabTool<Input extends InputType>(tool: TabTool<Input>): Tool<Input> {
export function defineTabTool<Input extends z.Schema>(tool: TabTool<Input>): Tool<Input> {
return {
...tool,
handle: async (context, params, response) => {