mirror of
https://github.com/microsoft/playwright-mcp.git
synced 2025-07-25 16:02:26 +08:00
chore: add eval script (#743)
This commit is contained in:
parent
288f1b863b
commit
2c5eac89a8
3
.gitignore
vendored
3
.gitignore
vendored
@ -3,6 +3,7 @@ node_modules/
|
||||
test-results/
|
||||
playwright-report/
|
||||
.vscode/mcp.json
|
||||
|
||||
.idea
|
||||
.DS_Store
|
||||
.env
|
||||
sessions/
|
||||
|
159
package-lock.json
generated
159
package-lock.json
generated
@ -22,6 +22,7 @@
|
||||
"mcp-server-playwright": "cli.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@anthropic-ai/sdk": "^0.57.0",
|
||||
"@eslint/eslintrc": "^3.2.0",
|
||||
"@eslint/js": "^9.19.0",
|
||||
"@playwright/test": "1.55.0-alpha-1752701791000",
|
||||
@ -33,15 +34,27 @@
|
||||
"@typescript-eslint/eslint-plugin": "^8.26.1",
|
||||
"@typescript-eslint/parser": "^8.26.1",
|
||||
"@typescript-eslint/utils": "^8.26.1",
|
||||
"dotenv": "^17.2.0",
|
||||
"eslint": "^9.19.0",
|
||||
"eslint-plugin-import": "^2.31.0",
|
||||
"eslint-plugin-notice": "^1.0.0",
|
||||
"openai": "^5.10.2",
|
||||
"typescript": "^5.8.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.57.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.57.0.tgz",
|
||||
"integrity": "sha512-z5LMy0MWu0+w2hflUgj4RlJr1R+0BxKXL7ldXTO8FasU8fu599STghO+QKwId2dAD0d464aHtU+ChWuRHw4FNw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"anthropic-ai-sdk": "bin/cli"
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint-community/eslint-utils": {
|
||||
"version": "4.5.1",
|
||||
"resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.5.1.tgz",
|
||||
@ -72,9 +85,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint/config-array": {
|
||||
"version": "0.19.2",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.19.2.tgz",
|
||||
"integrity": "sha512-GNKqxfHG2ySmJOBSHg7LxeUx4xpuCoFjacmlCoYWEbaPXLwvfIjixRI12xCQZeULksQb23uiA8F40w5TojpV7w==",
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.0.tgz",
|
||||
"integrity": "sha512-ENIdc4iLu0d93HeYirvKmrzshzofPw6VkZRKQGe9Nv46ZnWUzcF1xV01dcvEg/1wXUR61OmmlSfyeyO7EvjLxQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
@ -87,9 +100,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint/config-helpers": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.1.0.tgz",
|
||||
"integrity": "sha512-kLrdPDJE1ckPo94kmPPf9Hfd0DU0Jw6oKYrhe+pwSC0iTUInmTa+w6fw8sGgcfkFJGNdWOUeOaDM4quW4a7OkA==",
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.3.0.tgz",
|
||||
"integrity": "sha512-ViuymvFmcJi04qdZeDc2whTHryouGcDlaxPqarTD0ZE10ISpxGUVZGZDx4w01upyIynL3iu6IXH2bS1NhclQMw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
@ -97,9 +110,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint/core": {
|
||||
"version": "0.12.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.12.0.tgz",
|
||||
"integrity": "sha512-cmrR6pytBuSMTaBweKoGMwu3EiHiEC+DoyupPmlZ0HxBJBtIxwe+j/E4XPIKNx+Q74c8lXKPwYawBf5glsTkHg==",
|
||||
"version": "0.15.1",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.15.1.tgz",
|
||||
"integrity": "sha512-bkOp+iumZCCbt1K1CmWf0R9pM5yKpDv+ZXtvSyQpudrI9kuFLp+bM2WOPXImuD/ceQuaa8f5pj93Y7zyECIGNA==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
@ -110,9 +123,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint/eslintrc": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.0.tgz",
|
||||
"integrity": "sha512-yaVPAiNAalnCZedKLdR21GOGILMLKPyqSLWaAjQFvYA2i/ciDi8ArYVr69Anohb6cH2Ukhqti4aFnYyPm8wdwQ==",
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.1.tgz",
|
||||
"integrity": "sha512-gtF186CXhIl1p4pJNGZw8Yc6RlshoePRvE0X91oPGb3vZ8pM3qOS9W9NGPat9LziaBV7XrJWGylNQXkGcnM3IQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@ -134,13 +147,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint/js": {
|
||||
"version": "9.22.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.22.0.tgz",
|
||||
"integrity": "sha512-vLFajx9o8d1/oL2ZkpMYbkLv8nDB6yaIwFNt7nI4+I80U/z03SxmfOMsLbvWr3p7C+Wnoh//aOu2pQW8cS0HCQ==",
|
||||
"version": "9.31.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.31.0.tgz",
|
||||
"integrity": "sha512-LOm5OVt7D4qiKCqoiPbA7LWmI+tbw1VbTUowBcUMgQSuM6poJufkFkYDcQpo5KfgD39TnNySV26QjOh7VFpSyw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://eslint.org/donate"
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint/object-schema": {
|
||||
@ -154,13 +170,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint/plugin-kit": {
|
||||
"version": "0.2.7",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.2.7.tgz",
|
||||
"integrity": "sha512-JubJ5B2pJ4k4yGxaNLdbjrnk9d/iDz6/q8wOilpIowd6PJPgaxCuHBnBszq7Ce2TyMrywm5r4PnKm6V3iiZF+g==",
|
||||
"version": "0.3.4",
|
||||
"resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.3.4.tgz",
|
||||
"integrity": "sha512-Ul5l+lHEcw3L5+k8POx6r74mxEYKG5kOb6Xpy2gCRW6zweT6TEhAf8vhxGgjhqrd/VO/Dirhsb+1hNpD1ue9hw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@eslint/core": "^0.12.0",
|
||||
"@eslint/core": "^0.15.1",
|
||||
"levn": "^0.4.1"
|
||||
},
|
||||
"engines": {
|
||||
@ -595,9 +611,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@ -689,9 +705,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/acorn": {
|
||||
"version": "8.14.1",
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.1.tgz",
|
||||
"integrity": "sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==",
|
||||
"version": "8.15.0",
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
@ -924,9 +940,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@ -1269,6 +1285,19 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "17.2.0",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.0.tgz",
|
||||
"integrity": "sha512-Q4sgBT60gzd0BB0lSyYD3xM4YxrXA9y4uBDof1JNYGzOXrQdQ6yX+7XIAqoFOGQFOTK1D3Hts5OllpxMDZFONQ==",
|
||||
"dev": true,
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
}
|
||||
},
|
||||
"node_modules/dunder-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
||||
@ -1461,20 +1490,20 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint": {
|
||||
"version": "9.22.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint/-/eslint-9.22.0.tgz",
|
||||
"integrity": "sha512-9V/QURhsRN40xuHXWjV64yvrzMjcz7ZyNoF2jJFmy9j/SLk0u1OLSZgXi28MrXjymnjEGSR80WCdab3RGMDveQ==",
|
||||
"version": "9.31.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint/-/eslint-9.31.0.tgz",
|
||||
"integrity": "sha512-QldCVh/ztyKJJZLr4jXNUByx3gR+TDYZCRXEktiZoUR3PGy4qCmSbkxcIle8GEwGpb5JBZazlaJ/CxLidXdEbQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@eslint-community/eslint-utils": "^4.2.0",
|
||||
"@eslint-community/regexpp": "^4.12.1",
|
||||
"@eslint/config-array": "^0.19.2",
|
||||
"@eslint/config-helpers": "^0.1.0",
|
||||
"@eslint/core": "^0.12.0",
|
||||
"@eslint/eslintrc": "^3.3.0",
|
||||
"@eslint/js": "9.22.0",
|
||||
"@eslint/plugin-kit": "^0.2.7",
|
||||
"@eslint/config-array": "^0.21.0",
|
||||
"@eslint/config-helpers": "^0.3.0",
|
||||
"@eslint/core": "^0.15.0",
|
||||
"@eslint/eslintrc": "^3.3.1",
|
||||
"@eslint/js": "9.31.0",
|
||||
"@eslint/plugin-kit": "^0.3.1",
|
||||
"@humanfs/node": "^0.16.6",
|
||||
"@humanwhocodes/module-importer": "^1.0.1",
|
||||
"@humanwhocodes/retry": "^0.4.2",
|
||||
@ -1485,9 +1514,9 @@
|
||||
"cross-spawn": "^7.0.6",
|
||||
"debug": "^4.3.2",
|
||||
"escape-string-regexp": "^4.0.0",
|
||||
"eslint-scope": "^8.3.0",
|
||||
"eslint-visitor-keys": "^4.2.0",
|
||||
"espree": "^10.3.0",
|
||||
"eslint-scope": "^8.4.0",
|
||||
"eslint-visitor-keys": "^4.2.1",
|
||||
"espree": "^10.4.0",
|
||||
"esquery": "^1.5.0",
|
||||
"esutils": "^2.0.2",
|
||||
"fast-deep-equal": "^3.1.3",
|
||||
@ -1641,9 +1670,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-scope": {
|
||||
"version": "8.3.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.3.0.tgz",
|
||||
"integrity": "sha512-pUNxi75F8MJ/GdeKtVLSbYg4ZI34J6C0C7sbL4YOp2exGwen7ZsuBqKzUhXd0qMQ362yET3z+uPwKeg/0C2XCQ==",
|
||||
"version": "8.4.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz",
|
||||
"integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==",
|
||||
"dev": true,
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
@ -1671,9 +1700,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint/node_modules/eslint-visitor-keys": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.0.tgz",
|
||||
"integrity": "sha512-UyLnSehNt62FFhSwjZlHmeokpRK59rcz29j+F1/aDgbkbRTk7wIc9XzdoasMUbRNKDM0qQt/+BJ4BrpFeABemw==",
|
||||
"version": "4.2.1",
|
||||
"resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz",
|
||||
"integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
@ -1684,15 +1713,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/espree": {
|
||||
"version": "10.3.0",
|
||||
"resolved": "https://registry.npmjs.org/espree/-/espree-10.3.0.tgz",
|
||||
"integrity": "sha512-0QYC8b24HWY8zjRnDTL6RiHfDbAWn63qb4LMj1Z4b076A4une81+z03Kg7l7mn/48PUTqoLptSXez8oknU8Clg==",
|
||||
"version": "10.4.0",
|
||||
"resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz",
|
||||
"integrity": "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==",
|
||||
"dev": true,
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
"acorn": "^8.14.0",
|
||||
"acorn": "^8.15.0",
|
||||
"acorn-jsx": "^5.3.2",
|
||||
"eslint-visitor-keys": "^4.2.0"
|
||||
"eslint-visitor-keys": "^4.2.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
|
||||
@ -1702,9 +1731,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/espree/node_modules/eslint-visitor-keys": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.0.tgz",
|
||||
"integrity": "sha512-UyLnSehNt62FFhSwjZlHmeokpRK59rcz29j+F1/aDgbkbRTk7wIc9XzdoasMUbRNKDM0qQt/+BJ4BrpFeABemw==",
|
||||
"version": "4.2.1",
|
||||
"resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz",
|
||||
"integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
@ -3151,6 +3180,28 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/openai": {
|
||||
"version": "5.10.2",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-5.10.2.tgz",
|
||||
"integrity": "sha512-n+vi74LzHtvlKcDPn9aApgELGiu5CwhaLG40zxLTlFQdoSJCLACORIPC2uVQ3JEYAbqapM+XyRKFy2Thej7bIw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"openai": "bin/cli"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"ws": "^8.18.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"ws": {
|
||||
"optional": true
|
||||
},
|
||||
"zod": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/optionator": {
|
||||
"version": "0.9.4",
|
||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
|
||||
|
@ -49,6 +49,7 @@
|
||||
"zod-to-json-schema": "^3.24.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@anthropic-ai/sdk": "^0.57.0",
|
||||
"@eslint/eslintrc": "^3.2.0",
|
||||
"@eslint/js": "^9.19.0",
|
||||
"@playwright/test": "1.55.0-alpha-1752701791000",
|
||||
@ -60,9 +61,11 @@
|
||||
"@typescript-eslint/eslint-plugin": "^8.26.1",
|
||||
"@typescript-eslint/parser": "^8.26.1",
|
||||
"@typescript-eslint/utils": "^8.26.1",
|
||||
"dotenv": "^17.2.0",
|
||||
"eslint": "^9.19.0",
|
||||
"eslint-plugin-import": "^2.31.0",
|
||||
"eslint-plugin-notice": "^1.0.0",
|
||||
"openai": "^5.10.2",
|
||||
"typescript": "^5.8.2"
|
||||
},
|
||||
"bin": {
|
||||
|
@ -17,7 +17,6 @@
|
||||
import { Server as McpServer } from '@modelcontextprotocol/sdk/server/index.js';
|
||||
import { CallToolRequestSchema, ListToolsRequestSchema, Tool as McpTool } from '@modelcontextprotocol/sdk/types.js';
|
||||
import { zodToJsonSchema } from 'zod-to-json-schema';
|
||||
|
||||
import { Context } from './context.js';
|
||||
import { Response } from './response.js';
|
||||
import { allTools } from './tools.js';
|
||||
|
119
src/eval/loopClaude.ts
Normal file
119
src/eval/loopClaude.ts
Normal file
@ -0,0 +1,119 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import Anthropic from '@anthropic-ai/sdk';
|
||||
import debug from 'debug';
|
||||
|
||||
import type { Tool, ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js';
|
||||
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
|
||||
const model = 'claude-sonnet-4-20250514';
|
||||
|
||||
export async function runTask(client: Client, task: string): Promise<string | undefined> {
|
||||
const anthropic = new Anthropic();
|
||||
const messages: Anthropic.Messages.MessageParam[] = [];
|
||||
|
||||
const { tools } = await client.listTools();
|
||||
const claudeTools = tools.map(tool => asClaudeDeclaration(tool));
|
||||
|
||||
// Add initial user message
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: `Perform following task: ${task}.`
|
||||
});
|
||||
|
||||
for (let iteration = 0; iteration < 5; ++iteration) {
|
||||
debug('history')(messages);
|
||||
|
||||
const response = await anthropic.messages.create({
|
||||
model,
|
||||
max_tokens: 10000,
|
||||
messages,
|
||||
tools: claudeTools,
|
||||
});
|
||||
|
||||
const content = response.content;
|
||||
|
||||
const toolUseBlocks = content.filter(block => block.type === 'tool_use');
|
||||
const textBlocks = content.filter(block => block.type === 'text');
|
||||
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: content
|
||||
});
|
||||
|
||||
if (toolUseBlocks.length === 0)
|
||||
return textBlocks.map(block => block.text).join('\n');
|
||||
|
||||
const toolResults: Anthropic.Messages.ToolResultBlockParam[] = [];
|
||||
|
||||
for (const toolUse of toolUseBlocks) {
|
||||
if (toolUse.name === 'done')
|
||||
return JSON.stringify(toolUse.input, null, 2);
|
||||
|
||||
try {
|
||||
debug('tool')(toolUse.name, toolUse.input);
|
||||
const response = await client.callTool({
|
||||
name: toolUse.name,
|
||||
arguments: toolUse.input as any,
|
||||
});
|
||||
const responseContent = (response.content || []) as (TextContent | ImageContent)[];
|
||||
debug('tool')(responseContent);
|
||||
const text = responseContent.filter(part => part.type === 'text').map(part => part.text).join('\n');
|
||||
|
||||
toolResults.push({
|
||||
type: 'tool_result',
|
||||
tool_use_id: toolUse.id,
|
||||
content: text,
|
||||
});
|
||||
} catch (error) {
|
||||
debug('tool')(error);
|
||||
toolResults.push({
|
||||
type: 'tool_result',
|
||||
tool_use_id: toolUse.id,
|
||||
content: `Error while executing tool "${toolUse.name}": ${error instanceof Error ? error.message : String(error)}\n\nPlease try to recover and complete the task.`,
|
||||
is_error: true,
|
||||
});
|
||||
// Skip remaining tool calls for this iteration
|
||||
for (const remainingToolUse of toolUseBlocks.slice(toolUseBlocks.indexOf(toolUse) + 1)) {
|
||||
toolResults.push({
|
||||
type: 'tool_result',
|
||||
tool_use_id: remainingToolUse.id,
|
||||
content: `This tool call is skipped due to previous error.`,
|
||||
is_error: true,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Add tool results as user message
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: toolResults
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error('Failed to perform step, max attempts reached');
|
||||
}
|
||||
|
||||
function asClaudeDeclaration(tool: Tool): Anthropic.Messages.Tool {
|
||||
return {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
input_schema: tool.inputSchema,
|
||||
};
|
||||
}
|
105
src/eval/loopOpenAI.ts
Normal file
105
src/eval/loopOpenAI.ts
Normal file
@ -0,0 +1,105 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import OpenAI from 'openai';
|
||||
import debug from 'debug';
|
||||
|
||||
import type { Tool, ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js';
|
||||
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
|
||||
const model = 'gpt-4.1';
|
||||
|
||||
export async function runTask(client: Client, task: string): Promise<string | undefined> {
|
||||
const openai = new OpenAI();
|
||||
const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: `Peform following task: ${task}. Once the task is complete, call the "done" tool.`
|
||||
}
|
||||
];
|
||||
|
||||
const { tools } = await client.listTools();
|
||||
|
||||
for (let iteration = 0; iteration < 5; ++iteration) {
|
||||
debug('history')(messages);
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model,
|
||||
messages,
|
||||
tools: tools.map(tool => asOpenAIDeclaration(tool)),
|
||||
tool_choice: 'auto'
|
||||
});
|
||||
|
||||
const message = response.choices[0].message;
|
||||
if (!message.tool_calls?.length)
|
||||
return JSON.stringify(message.content, null, 2);
|
||||
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
tool_calls: message.tool_calls
|
||||
});
|
||||
|
||||
for (const toolCall of message.tool_calls) {
|
||||
const functionCall = toolCall.function;
|
||||
|
||||
if (functionCall.name === 'done')
|
||||
return JSON.stringify(functionCall.arguments, null, 2);
|
||||
|
||||
try {
|
||||
debug('tool')(functionCall.name, functionCall.arguments);
|
||||
const response = await client.callTool({
|
||||
name: functionCall.name,
|
||||
arguments: JSON.parse(functionCall.arguments)
|
||||
});
|
||||
const content = (response.content || []) as (TextContent | ImageContent)[];
|
||||
debug('tool')(content);
|
||||
const text = content.filter(part => part.type === 'text').map(part => part.text).join('\n');
|
||||
messages.push({
|
||||
role: 'tool',
|
||||
tool_call_id: toolCall.id,
|
||||
content: text,
|
||||
});
|
||||
} catch (error) {
|
||||
debug('tool')(error);
|
||||
messages.push({
|
||||
role: 'tool',
|
||||
tool_call_id: toolCall.id,
|
||||
content: `Error while executing tool "${functionCall.name}": ${error instanceof Error ? error.message : String(error)}\n\nPlease try to recover and complete the task.`,
|
||||
});
|
||||
for (const ignoredToolCall of message.tool_calls.slice(message.tool_calls.indexOf(toolCall) + 1)) {
|
||||
messages.push({
|
||||
role: 'tool',
|
||||
tool_call_id: ignoredToolCall.id,
|
||||
content: `This tool call is skipped due to previous error.`,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new Error('Failed to perform step, max attempts reached');
|
||||
}
|
||||
|
||||
function asOpenAIDeclaration(tool: Tool): OpenAI.Chat.Completions.ChatCompletionTool {
|
||||
return {
|
||||
type: 'function',
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.inputSchema,
|
||||
},
|
||||
};
|
||||
}
|
68
src/eval/main.ts
Normal file
68
src/eval/main.ts
Normal file
@ -0,0 +1,68 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* eslint-disable no-console */
|
||||
|
||||
import path from 'path';
|
||||
import url from 'url';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
||||
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { program } from 'commander';
|
||||
import { runTask as runTaskOpenAI } from './loopOpenAI.js';
|
||||
import { runTask as runTaskClaude } from './loopClaude.js';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
const __filename = url.fileURLToPath(import.meta.url);
|
||||
|
||||
async function run(runTask: (client: Client, task: string) => Promise<string | undefined>) {
|
||||
const transport = new StdioClientTransport({
|
||||
command: 'node',
|
||||
args: [
|
||||
path.resolve(__filename, '../../../cli.js'),
|
||||
'--save-session',
|
||||
'--output-dir', path.resolve(__filename, '../../../sessions')
|
||||
],
|
||||
stderr: 'inherit',
|
||||
env: process.env as Record<string, string>,
|
||||
});
|
||||
|
||||
const client = new Client({ name: 'test', version: '1.0.0' });
|
||||
await client.connect(transport);
|
||||
await client.ping();
|
||||
|
||||
let lastResult: string | undefined;
|
||||
for (const task of tasks)
|
||||
lastResult = await runTask(client, task);
|
||||
console.log(lastResult);
|
||||
await client.close();
|
||||
}
|
||||
|
||||
const tasks = [
|
||||
'Open https://playwright.dev/',
|
||||
];
|
||||
|
||||
program
|
||||
.option('--model <model>', 'model to use')
|
||||
.action(async options => {
|
||||
if (options.model === 'claude')
|
||||
await run(runTaskClaude);
|
||||
else
|
||||
await run(runTaskOpenAI);
|
||||
});
|
||||
void program.parseAsync(process.argv);
|
@ -14,9 +14,9 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import url from 'node:url';
|
||||
import path from 'node:path';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import url from 'url';
|
||||
|
||||
const __filename = url.fileURLToPath(import.meta.url);
|
||||
export const packageJSON = JSON.parse(fs.readFileSync(path.join(path.dirname(__filename), '..', 'package.json'), 'utf8'));
|
||||
|
Loading…
x
Reference in New Issue
Block a user