mirror of
https://github.com/promptfoo/promptfoo.git
synced 2023-08-15 01:10:51 +03:00
Web UI for running evals (#103)
This commit is contained in:
@@ -1,2 +1,5 @@
|
||||
dist
|
||||
venv
|
||||
.aider*
|
||||
src/web/nextui/out
|
||||
src/web/nextui/.next
|
||||
|
||||
161
package-lock.json
generated
161
package-lock.json
generated
@@ -18,6 +18,7 @@
|
||||
"cli-progress": "^3.12.0",
|
||||
"cli-table3": "^0.6.3",
|
||||
"commander": "^10.0.1",
|
||||
"compression": "^1.7.4",
|
||||
"cors": "^2.8.5",
|
||||
"csv-parse": "^5.3.8",
|
||||
"csv-stringify": "^6.3.2",
|
||||
@@ -33,6 +34,7 @@
|
||||
"semver": "^7.5.3",
|
||||
"socket.io": "^4.6.1",
|
||||
"tiny-invariant": "^1.3.1",
|
||||
"uuid": "^9.0.0",
|
||||
"winston": "^3.8.2"
|
||||
},
|
||||
"bin": {
|
||||
@@ -43,6 +45,7 @@
|
||||
"@types/cache-manager": "^4.0.2",
|
||||
"@types/cache-manager-fs-hash": "^0.0.1",
|
||||
"@types/cli-progress": "^3.11.0",
|
||||
"@types/compression": "^1.7.2",
|
||||
"@types/cors": "^2.8.13",
|
||||
"@types/debounce": "^1.2.1",
|
||||
"@types/express": "^4.17.17",
|
||||
@@ -53,6 +56,7 @@
|
||||
"@types/nunjucks": "^3.2.2",
|
||||
"@types/opener": "^1.4.0",
|
||||
"@types/semver": "^7.5.0",
|
||||
"@types/uuid": "^9.0.2",
|
||||
"babel-jest": "^29.5.0",
|
||||
"jest": "^29.5.0",
|
||||
"jest-watch-typeahead": "^2.2.2",
|
||||
@@ -1366,6 +1370,15 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/compression": {
|
||||
"version": "1.7.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/compression/-/compression-1.7.2.tgz",
|
||||
"integrity": "sha512-lwEL4M/uAGWngWFLSG87ZDr2kLrbuR8p7X+QZB1OQlT+qkHsCPDVFnHPyXf4Vyl4yDDorNY+mAhosxkCvppatg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@types/express": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/connect": {
|
||||
"version": "3.4.35",
|
||||
"resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz",
|
||||
@@ -1590,6 +1603,12 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.2.tgz",
|
||||
"integrity": "sha512-txGIh+0eDFzKGC25zORnswy+br1Ha7hj5cMVwKIU7+s0U2AxxJru/jZSMU6OC9MJWP6+pc/hc6ZjyZShpsyY2g=="
|
||||
},
|
||||
"node_modules/@types/uuid": {
|
||||
"version": "9.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.2.tgz",
|
||||
"integrity": "sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/yargs": {
|
||||
"version": "17.0.24",
|
||||
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.24.tgz",
|
||||
@@ -2249,6 +2268,60 @@
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/compressible": {
|
||||
"version": "2.0.18",
|
||||
"resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.18.tgz",
|
||||
"integrity": "sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==",
|
||||
"dependencies": {
|
||||
"mime-db": ">= 1.43.0 < 2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/compression": {
|
||||
"version": "1.7.4",
|
||||
"resolved": "https://registry.npmjs.org/compression/-/compression-1.7.4.tgz",
|
||||
"integrity": "sha512-jaSIDzP9pZVS4ZfQ+TzvtiWhdpFhE2RDHz8QJkpX9SIpLq88VueF5jJw6t+6CUQcAoA6t+x89MLrWAqpfDE8iQ==",
|
||||
"dependencies": {
|
||||
"accepts": "~1.3.5",
|
||||
"bytes": "3.0.0",
|
||||
"compressible": "~2.0.16",
|
||||
"debug": "2.6.9",
|
||||
"on-headers": "~1.0.2",
|
||||
"safe-buffer": "5.1.2",
|
||||
"vary": "~1.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/compression/node_modules/bytes": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz",
|
||||
"integrity": "sha512-pMhOfFDPiv9t5jjIXkHosWmkSyQbvsgEVNkz0ERHbuLh2T/7j4Mqqpz523Fe8MVY89KC6Sh/QfS2sM+SjgFDcw==",
|
||||
"engines": {
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/compression/node_modules/debug": {
|
||||
"version": "2.6.9",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||
"dependencies": {
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/compression/node_modules/ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
|
||||
},
|
||||
"node_modules/compression/node_modules/safe-buffer": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
||||
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
|
||||
},
|
||||
"node_modules/concat-map": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||
@@ -4409,6 +4482,14 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/on-headers": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.2.tgz",
|
||||
"integrity": "sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA==",
|
||||
"engines": {
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
@@ -5501,6 +5582,14 @@
|
||||
"node": ">= 0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "9.0.0",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
|
||||
"integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==",
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/v8-compile-cache-lib": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
||||
@@ -6791,6 +6880,15 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"@types/compression": {
|
||||
"version": "1.7.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/compression/-/compression-1.7.2.tgz",
|
||||
"integrity": "sha512-lwEL4M/uAGWngWFLSG87ZDr2kLrbuR8p7X+QZB1OQlT+qkHsCPDVFnHPyXf4Vyl4yDDorNY+mAhosxkCvppatg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"@types/express": "*"
|
||||
}
|
||||
},
|
||||
"@types/connect": {
|
||||
"version": "3.4.35",
|
||||
"resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz",
|
||||
@@ -7015,6 +7113,12 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.2.tgz",
|
||||
"integrity": "sha512-txGIh+0eDFzKGC25zORnswy+br1Ha7hj5cMVwKIU7+s0U2AxxJru/jZSMU6OC9MJWP6+pc/hc6ZjyZShpsyY2g=="
|
||||
},
|
||||
"@types/uuid": {
|
||||
"version": "9.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.2.tgz",
|
||||
"integrity": "sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/yargs": {
|
||||
"version": "17.0.24",
|
||||
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.24.tgz",
|
||||
@@ -7513,6 +7617,53 @@
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz",
|
||||
"integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug=="
|
||||
},
|
||||
"compressible": {
|
||||
"version": "2.0.18",
|
||||
"resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.18.tgz",
|
||||
"integrity": "sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==",
|
||||
"requires": {
|
||||
"mime-db": ">= 1.43.0 < 2"
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"version": "1.7.4",
|
||||
"resolved": "https://registry.npmjs.org/compression/-/compression-1.7.4.tgz",
|
||||
"integrity": "sha512-jaSIDzP9pZVS4ZfQ+TzvtiWhdpFhE2RDHz8QJkpX9SIpLq88VueF5jJw6t+6CUQcAoA6t+x89MLrWAqpfDE8iQ==",
|
||||
"requires": {
|
||||
"accepts": "~1.3.5",
|
||||
"bytes": "3.0.0",
|
||||
"compressible": "~2.0.16",
|
||||
"debug": "2.6.9",
|
||||
"on-headers": "~1.0.2",
|
||||
"safe-buffer": "5.1.2",
|
||||
"vary": "~1.1.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"bytes": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz",
|
||||
"integrity": "sha512-pMhOfFDPiv9t5jjIXkHosWmkSyQbvsgEVNkz0ERHbuLh2T/7j4Mqqpz523Fe8MVY89KC6Sh/QfS2sM+SjgFDcw=="
|
||||
},
|
||||
"debug": {
|
||||
"version": "2.6.9",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
|
||||
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
|
||||
"requires": {
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
|
||||
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
|
||||
},
|
||||
"safe-buffer": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
||||
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
|
||||
}
|
||||
}
|
||||
},
|
||||
"concat-map": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||
@@ -9121,6 +9272,11 @@
|
||||
"ee-first": "1.1.1"
|
||||
}
|
||||
},
|
||||
"on-headers": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.2.tgz",
|
||||
"integrity": "sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA=="
|
||||
},
|
||||
"once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
@@ -9879,6 +10035,11 @@
|
||||
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
|
||||
"integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA=="
|
||||
},
|
||||
"uuid": {
|
||||
"version": "9.0.0",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
|
||||
"integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg=="
|
||||
},
|
||||
"v8-compile-cache-lib": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
||||
|
||||
13
package.json
13
package.json
@@ -25,13 +25,14 @@
|
||||
"promptfoo": "dist/src/main.js"
|
||||
},
|
||||
"scripts": {
|
||||
"tsc": "tsc",
|
||||
"local": "ts-node --esm --files src/main.ts",
|
||||
"install:client": "cd src/web/client && npm install",
|
||||
"install:nextui": "cd src/web/nextui && npm install",
|
||||
"build:clean": "rm -rf dist",
|
||||
"build:client": "cd src/web/client && npm run build && cp -r dist/ ../../../dist/src/web/client",
|
||||
"build:nextui": "cd src/web/nextui && npm run build && cp -r out/ ../../../dist/src/web/nextui",
|
||||
"build:watch": "tsc --watch",
|
||||
"build": "tsc && cp src/*.html dist/src && npm run build:client && chmod +x dist/src/main.js",
|
||||
"prepare": "npm run install:client && npm run build:clean && npm run build",
|
||||
"build": "tsc && cp src/*.html dist/src && npm run build:nextui && chmod +x dist/src/main.js",
|
||||
"prepare": "npm run install:nextui && npm run build:clean && npm run build",
|
||||
"test": "jest",
|
||||
"test:watch": "jest --watch",
|
||||
"format": "prettier -w ."
|
||||
@@ -41,6 +42,7 @@
|
||||
"@types/cache-manager": "^4.0.2",
|
||||
"@types/cache-manager-fs-hash": "^0.0.1",
|
||||
"@types/cli-progress": "^3.11.0",
|
||||
"@types/compression": "^1.7.2",
|
||||
"@types/cors": "^2.8.13",
|
||||
"@types/debounce": "^1.2.1",
|
||||
"@types/express": "^4.17.17",
|
||||
@@ -51,6 +53,7 @@
|
||||
"@types/nunjucks": "^3.2.2",
|
||||
"@types/opener": "^1.4.0",
|
||||
"@types/semver": "^7.5.0",
|
||||
"@types/uuid": "^9.0.2",
|
||||
"babel-jest": "^29.5.0",
|
||||
"jest": "^29.5.0",
|
||||
"jest-watch-typeahead": "^2.2.2",
|
||||
@@ -69,6 +72,7 @@
|
||||
"cli-progress": "^3.12.0",
|
||||
"cli-table3": "^0.6.3",
|
||||
"commander": "^10.0.1",
|
||||
"compression": "^1.7.4",
|
||||
"cors": "^2.8.5",
|
||||
"csv-parse": "^5.3.8",
|
||||
"csv-stringify": "^6.3.2",
|
||||
@@ -84,6 +88,7 @@
|
||||
"semver": "^7.5.3",
|
||||
"socket.io": "^4.6.1",
|
||||
"tiny-invariant": "^1.3.1",
|
||||
"uuid": "^9.0.0",
|
||||
"winston": "^3.8.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -385,7 +385,6 @@ class Evaluator {
|
||||
// Set up progress bar...
|
||||
let progressbar: SingleBar | undefined;
|
||||
if (options.showProgressBar) {
|
||||
const totalNumRuns = runEvalOptions.length;
|
||||
const cliProgress = await import('cli-progress');
|
||||
progressbar = new cliProgress.SingleBar(
|
||||
{
|
||||
@@ -394,34 +393,40 @@ class Evaluator {
|
||||
},
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
progressbar.start(totalNumRuns, 0, {
|
||||
progressbar.start(runEvalOptions.length, 0, {
|
||||
provider: '',
|
||||
prompt: '',
|
||||
vars: '',
|
||||
});
|
||||
}
|
||||
if (options.progressCallback) {
|
||||
options.progressCallback(0, runEvalOptions.length);
|
||||
}
|
||||
|
||||
// Actually run the eval
|
||||
const results: EvaluateResult[] = [];
|
||||
await async.forEachOfLimit(
|
||||
runEvalOptions,
|
||||
options.maxConcurrency || DEFAULT_MAX_CONCURRENCY,
|
||||
async (options: RunEvalOptions, index: number | string) => {
|
||||
const row = await this.runEval(options);
|
||||
async (evalStep: RunEvalOptions, index: number | string) => {
|
||||
const row = await this.runEval(evalStep);
|
||||
|
||||
results.push(row);
|
||||
|
||||
if (progressbar) {
|
||||
progressbar.increment({
|
||||
provider: options.provider.id(),
|
||||
prompt: options.prompt.raw.slice(0, 10).replace(/\n/g, ' '),
|
||||
vars: Object.entries(options.test.vars || {})
|
||||
provider: evalStep.provider.id(),
|
||||
prompt: evalStep.prompt.raw.slice(0, 10).replace(/\n/g, ' '),
|
||||
vars: Object.entries(evalStep.test.vars || {})
|
||||
.map(([k, v]) => `${k}=${v}`)
|
||||
.join(' ')
|
||||
.slice(0, 10)
|
||||
.replace(/\n/g, ' '),
|
||||
});
|
||||
}
|
||||
if (options.progressCallback) {
|
||||
options.progressCallback(results.length, runEvalOptions.length);
|
||||
}
|
||||
|
||||
// Bookkeeping for table
|
||||
if (typeof index !== 'number') {
|
||||
@@ -441,13 +446,13 @@ class Evaluator {
|
||||
resultText = row.response?.output || row.error || '';
|
||||
}
|
||||
|
||||
const { rowIndex, colIndex } = options;
|
||||
const { rowIndex, colIndex } = evalStep;
|
||||
if (!table.body[rowIndex]) {
|
||||
table.body[rowIndex] = {
|
||||
outputs: [],
|
||||
vars: table.head.vars
|
||||
.map((varName) => {
|
||||
const varValue = options.test.vars?.[varName] || '';
|
||||
const varValue = evalStep.test.vars?.[varName] || '';
|
||||
if (typeof varValue === 'string') {
|
||||
return varValue;
|
||||
}
|
||||
@@ -475,6 +480,9 @@ class Evaluator {
|
||||
if (progressbar) {
|
||||
progressbar.stop();
|
||||
}
|
||||
if (options.progressCallback) {
|
||||
options.progressCallback(runEvalOptions.length, runEvalOptions.length);
|
||||
}
|
||||
|
||||
telemetry.record('eval_ran', {});
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import providers from './providers';
|
||||
import telemetry from './telemetry';
|
||||
import { evaluate as doEvaluate } from './evaluator';
|
||||
import { loadApiProviders } from './providers';
|
||||
import { readTests, writeOutput } from './util';
|
||||
import { readTests, writeLatestResults, writeOutput } from './util';
|
||||
import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
|
||||
|
||||
export * from './types';
|
||||
@@ -12,6 +12,7 @@ export { generateTable } from './table';
|
||||
|
||||
interface EvaluateTestSuite extends TestSuiteConfig {
|
||||
prompts: string[];
|
||||
writeLatestResults?: boolean;
|
||||
}
|
||||
|
||||
async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions = {}) {
|
||||
@@ -27,12 +28,17 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
|
||||
})),
|
||||
};
|
||||
telemetry.maybeShowNotice();
|
||||
|
||||
const ret = await doEvaluate(constructedTestSuite, options);
|
||||
|
||||
if (testSuite.outputPath) {
|
||||
writeOutput(testSuite.outputPath, ret, testSuite, null);
|
||||
}
|
||||
|
||||
if (testSuite.writeLatestResults) {
|
||||
writeLatestResults(ret, {});
|
||||
}
|
||||
|
||||
await telemetry.send();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ import {
|
||||
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
|
||||
import { disableCache, clearCache } from './cache';
|
||||
import { getDirectory } from './esm';
|
||||
import { init } from './web/server';
|
||||
import { startServer } from './web/server';
|
||||
import { checkForUpdates } from './updates';
|
||||
|
||||
import type {
|
||||
@@ -127,7 +127,7 @@ async function main() {
|
||||
name: 'view',
|
||||
});
|
||||
await telemetry.send();
|
||||
init(cmdObj.port);
|
||||
startServer(cmdObj.port);
|
||||
});
|
||||
|
||||
program
|
||||
@@ -424,7 +424,7 @@ async function main() {
|
||||
logger.info('Done.');
|
||||
|
||||
if (cmdObj.view) {
|
||||
init(parseInt(cmdObj.view, 10) || 15500);
|
||||
startServer(parseInt(cmdObj.view, 10) || 15500);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -21,7 +21,12 @@ import type {
|
||||
} from './types';
|
||||
|
||||
export async function loadApiProviders(
|
||||
providerPaths: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction,
|
||||
providerPaths:
|
||||
| ProviderId
|
||||
| ProviderId[]
|
||||
| RawProviderConfig[]
|
||||
| ProviderConfig[]
|
||||
| ProviderFunction,
|
||||
basePath?: string,
|
||||
): Promise<ApiProvider[]> {
|
||||
if (typeof providerPaths === 'string') {
|
||||
@@ -43,9 +48,13 @@ export async function loadApiProviders(
|
||||
id: () => `custom-function-${idx}`,
|
||||
callApi: provider,
|
||||
};
|
||||
} else if (provider.id) {
|
||||
// List of ProviderConfig objects
|
||||
return loadApiProvider((provider as ProviderConfig).id!, provider, basePath);
|
||||
} else {
|
||||
// List of { id: string, config: ProviderConfig } objects
|
||||
const id = Object.keys(provider)[0];
|
||||
const providerObject = provider[id];
|
||||
const providerObject = (provider as RawProviderConfig)[id];
|
||||
const context = { ...providerObject, id: providerObject.id || id };
|
||||
return loadApiProvider(id, context, basePath);
|
||||
}
|
||||
|
||||
@@ -80,6 +80,7 @@ export interface OutputConfig {
|
||||
export interface EvaluateOptions {
|
||||
maxConcurrency?: number;
|
||||
showProgressBar?: boolean;
|
||||
progressCallback?: (progress: number, total: number) => void;
|
||||
generateSuggestions?: boolean;
|
||||
repeat?: number;
|
||||
}
|
||||
@@ -256,7 +257,7 @@ export interface TestSuiteConfig {
|
||||
description?: string;
|
||||
|
||||
// One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
|
||||
providers: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction;
|
||||
providers: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderConfig[] | ProviderFunction;
|
||||
|
||||
// One or more prompt files to load
|
||||
prompts: string | string[];
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
module.exports = {
|
||||
env: { browser: true, es2020: true },
|
||||
extends: [
|
||||
'eslint:recommended',
|
||||
'plugin:@typescript-eslint/recommended',
|
||||
'plugin:react-hooks/recommended',
|
||||
],
|
||||
parser: '@typescript-eslint/parser',
|
||||
parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
|
||||
plugins: ['react-refresh'],
|
||||
rules: {
|
||||
'react-refresh/only-export-components': 'warn',
|
||||
},
|
||||
};
|
||||
24
src/web/client/.gitignore
vendored
24
src/web/client/.gitignore
vendored
@@ -1,24 +0,0 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
@@ -1,13 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="favicon.ico" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>promptfoo web viewer</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
5726
src/web/client/package-lock.json
generated
5726
src/web/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,39 +0,0 @@
|
||||
{
|
||||
"name": "promptfoo-client",
|
||||
"private": true,
|
||||
"version": "0.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc && vite build",
|
||||
"lint": "eslint src --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@emotion/react": "^11.11.0",
|
||||
"@emotion/styled": "^11.11.0",
|
||||
"@mui/icons-material": "^5.11.16",
|
||||
"@mui/material": "^5.13.0",
|
||||
"@tanstack/react-table": "^8.9.1",
|
||||
"js-yaml": "^4.1.0",
|
||||
"react": "^18.2.0",
|
||||
"react-dnd": "^16.0.1",
|
||||
"react-dnd-html5-backend": "^16.0.1",
|
||||
"react-dom": "^18.2.0",
|
||||
"socket.io-client": "^4.6.1",
|
||||
"tiny-invariant": "^1.3.1",
|
||||
"zustand": "^4.3.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "^18.0.28",
|
||||
"@types/react-dom": "^18.0.11",
|
||||
"@typescript-eslint/eslint-plugin": "^5.57.1",
|
||||
"@typescript-eslint/parser": "^5.57.1",
|
||||
"@vitejs/plugin-react-swc": "^3.0.0",
|
||||
"eslint": "^8.38.0",
|
||||
"eslint-plugin-react-hooks": "^4.6.0",
|
||||
"eslint-plugin-react-refresh": "^0.3.4",
|
||||
"typescript": "^5.0.2",
|
||||
"vite": "^4.3.2"
|
||||
}
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
body {
|
||||
background-color: var(--background-color);
|
||||
color: var(--text-color);
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
.logo {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.logo img {
|
||||
width: 30px;
|
||||
}
|
||||
|
||||
.logo span {
|
||||
margin-bottom: 6px;
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
[data-theme='dark'] .logo img {
|
||||
filter: invert(1);
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
import React from 'react';
|
||||
import ReactDOM from 'react-dom/client';
|
||||
import App from './App.tsx';
|
||||
import './index.css';
|
||||
|
||||
ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render(
|
||||
<React.StrictMode>
|
||||
<App />
|
||||
</React.StrictMode>,
|
||||
);
|
||||
1
src/web/client/src/vite-env.d.ts
vendored
1
src/web/client/src/vite-env.d.ts
vendored
@@ -1 +0,0 @@
|
||||
/// <reference types="vite/client" />
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"lib": ["DOM", "DOM.Iterable", "ESNext"],
|
||||
"module": "ESNext",
|
||||
"skipLibCheck": true,
|
||||
|
||||
/* Bundler mode */
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx",
|
||||
|
||||
/* Linting */
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"noFallthroughCasesInSwitch": true
|
||||
},
|
||||
"include": ["src"],
|
||||
"references": [{ "path": "./tsconfig.node.json" }]
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"composite": true,
|
||||
"skipLibCheck": true,
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"allowSyntheticDefaultImports": true
|
||||
},
|
||||
"include": ["vite.config.ts"]
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
import { defineConfig } from 'vite';
|
||||
import react from '@vitejs/plugin-react-swc';
|
||||
|
||||
// https://vitejs.dev/config/
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
});
|
||||
3
src/web/nextui/.eslintrc.json
Normal file
3
src/web/nextui/.eslintrc.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"extends": "next/core-web-vitals"
|
||||
}
|
||||
35
src/web/nextui/.gitignore
vendored
Normal file
35
src/web/nextui/.gitignore
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
8
src/web/nextui/next.config.js
Normal file
8
src/web/nextui/next.config.js
Normal file
@@ -0,0 +1,8 @@
|
||||
/** @type {import('next').NextConfig} */
|
||||
const nextConfig = {
|
||||
//output: 'standalone',
|
||||
output: 'export',
|
||||
trailingSlash: true,
|
||||
};
|
||||
|
||||
module.exports = nextConfig;
|
||||
4606
src/web/nextui/package-lock.json
generated
Normal file
4606
src/web/nextui/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
42
src/web/nextui/package.json
Normal file
42
src/web/nextui/package.json
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"name": "nextui",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@emotion/react": "^11.11.1",
|
||||
"@emotion/styled": "^11.11.0",
|
||||
"@mui/icons-material": "^5.14.3",
|
||||
"@mui/material": "^5.14.4",
|
||||
"@tanstack/react-table": "^8.9.3",
|
||||
"@types/diff": "^5.0.3",
|
||||
"@types/node": "20.4.10",
|
||||
"@types/react": "18.2.20",
|
||||
"@types/react-dom": "18.2.7",
|
||||
"@types/react-syntax-highlighter": "^15.5.7",
|
||||
"@types/uuid": "^9.0.2",
|
||||
"debounce": "^1.2.1",
|
||||
"diff": "^5.1.0",
|
||||
"eslint": "8.47.0",
|
||||
"eslint-config-next": "13.4.13",
|
||||
"js-yaml": "^4.1.0",
|
||||
"next": "13.4.13",
|
||||
"opener": "^1.5.2",
|
||||
"react": "18.2.0",
|
||||
"react-dnd": "^16.0.1",
|
||||
"react-dnd-html5-backend": "^16.0.1",
|
||||
"react-dom": "18.2.0",
|
||||
"react-syntax-highlighter": "^15.5.0",
|
||||
"socket.io": "^4.7.2",
|
||||
"socket.io-client": "^4.7.2",
|
||||
"tiny-invariant": "^1.3.1",
|
||||
"typescript": "5.1.6",
|
||||
"uuid": "^9.0.0",
|
||||
"zustand": "^4.4.1"
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 1.7 KiB |
3
src/web/nextui/src/app/Home.css
Normal file
3
src/web/nextui/src/app/Home.css
Normal file
@@ -0,0 +1,3 @@
|
||||
.container {
|
||||
margin: 2rem;
|
||||
}
|
||||
6
src/web/nextui/src/app/api/route.ts
Normal file
6
src/web/nextui/src/app/api/route.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
|
||||
export async function GET() {
|
||||
const data = { message: Math.random() < 0.5 ? 'Hello World!' : 'Hello mom' };
|
||||
return NextResponse.json({ data });
|
||||
}
|
||||
@@ -7,6 +7,7 @@ nav {
|
||||
}
|
||||
|
||||
.dark-mode-toggle {
|
||||
display: flex;
|
||||
background-color: transparent;
|
||||
border: none;
|
||||
color: var(--text-color);
|
||||
@@ -1,9 +1,7 @@
|
||||
import Logo from './Logo';
|
||||
|
||||
import DarkModeIcon from '@mui/icons-material/DarkMode';
|
||||
import LightModeIcon from '@mui/icons-material/LightMode';
|
||||
|
||||
import './NavBar.css';
|
||||
import './DarkMode.css';
|
||||
|
||||
interface NavbarProps {
|
||||
darkMode: boolean;
|
||||
@@ -12,11 +10,8 @@ interface NavbarProps {
|
||||
|
||||
export default function NavBar({ darkMode, onToggleDarkMode }: NavbarProps) {
|
||||
return (
|
||||
<nav>
|
||||
<Logo />
|
||||
<div className="dark-mode-toggle" onClick={onToggleDarkMode}>
|
||||
{darkMode ? <DarkModeIcon /> : <LightModeIcon />}
|
||||
</div>
|
||||
</nav>
|
||||
<div className="dark-mode-toggle" onClick={onToggleDarkMode}>
|
||||
{darkMode ? <DarkModeIcon /> : <LightModeIcon />}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
32
src/web/nextui/src/app/components/Logo.css
Normal file
32
src/web/nextui/src/app/components/Logo.css
Normal file
@@ -0,0 +1,32 @@
|
||||
.logo {
|
||||
display: flex;
|
||||
font-family: mono;
|
||||
font-size: 0.8rem;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
background-color: #f0f0f0;
|
||||
padding: 0 2rem 0 0;
|
||||
border-radius: 0.5rem;
|
||||
}
|
||||
|
||||
[data-theme='dark'] .logo {
|
||||
background-color: #333;
|
||||
border-color: #444;
|
||||
}
|
||||
|
||||
.logo img {
|
||||
width: 25px;
|
||||
margin-top: 2px;
|
||||
}
|
||||
|
||||
[data-theme='dark'] .logo img {
|
||||
filter: invert(1);
|
||||
}
|
||||
|
||||
.logo span {
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
[data-theme='dark'] .logo span {
|
||||
color: #f0f0f0;
|
||||
}
|
||||
33
src/web/nextui/src/app/components/PageShell.css
Normal file
33
src/web/nextui/src/app/components/PageShell.css
Normal file
@@ -0,0 +1,33 @@
|
||||
.nav {
|
||||
padding: 0.25rem 0 0.25rem 1rem;
|
||||
background-color: #eee;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
[data-theme='dark'] .nav {
|
||||
background-color: #333;
|
||||
}
|
||||
|
||||
.nav a {
|
||||
padding: 0 0.5rem;
|
||||
text-decoration: none;
|
||||
color: #000;
|
||||
align-self: center;
|
||||
}
|
||||
|
||||
[data-theme='dark'] .nav a {
|
||||
color: #f0f0f0;
|
||||
}
|
||||
|
||||
.nav div:last-child {
|
||||
margin-left: auto;
|
||||
margin-right: 0.5rem;
|
||||
}
|
||||
|
||||
.nav a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
[data-theme='dark'] .nav a:hover {
|
||||
color: #ddd;
|
||||
}
|
||||
87
src/web/nextui/src/app/components/PageShell.tsx
Normal file
87
src/web/nextui/src/app/components/PageShell.tsx
Normal file
@@ -0,0 +1,87 @@
|
||||
'use client';
|
||||
|
||||
import React from 'react';
|
||||
import Link from 'next/link';
|
||||
import useMediaQuery from '@mui/material/useMediaQuery';
|
||||
import { Stack } from '@mui/material';
|
||||
import { ThemeProvider, createTheme } from '@mui/material/styles';
|
||||
|
||||
import Logo from './Logo';
|
||||
import DarkMode from './DarkMode';
|
||||
|
||||
import './PageShell.css';
|
||||
|
||||
export { PageShell };
|
||||
|
||||
function PageShell({ children }: { children: React.ReactNode }) {
|
||||
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
|
||||
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
|
||||
|
||||
const theme = React.useMemo(
|
||||
() =>
|
||||
createTheme({
|
||||
typography: {
|
||||
fontFamily: 'inherit',
|
||||
},
|
||||
palette: {
|
||||
mode: darkMode ? 'dark' : 'light',
|
||||
},
|
||||
}),
|
||||
[darkMode],
|
||||
);
|
||||
|
||||
const toggleDarkMode = () => {
|
||||
setDarkMode(!darkMode);
|
||||
if (!darkMode) {
|
||||
document.documentElement.setAttribute('data-theme', 'dark');
|
||||
} else {
|
||||
document.documentElement.removeAttribute('data-theme');
|
||||
}
|
||||
};
|
||||
|
||||
React.useEffect(() => {
|
||||
if (prefersDarkMode) {
|
||||
document.documentElement.setAttribute('data-theme', 'dark');
|
||||
}
|
||||
}, [prefersDarkMode]);
|
||||
|
||||
return (
|
||||
<React.StrictMode>
|
||||
<ThemeProvider theme={theme}>
|
||||
<Layout>
|
||||
<Navigation darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
|
||||
<div>{children}</div>
|
||||
</Layout>
|
||||
</ThemeProvider>
|
||||
</React.StrictMode>
|
||||
);
|
||||
}
|
||||
|
||||
function Layout({ children }: { children: React.ReactNode }) {
|
||||
return <div>{children}</div>;
|
||||
}
|
||||
|
||||
function Navigation({
|
||||
darkMode,
|
||||
onToggleDarkMode,
|
||||
}: {
|
||||
darkMode: boolean;
|
||||
onToggleDarkMode: () => void;
|
||||
}) {
|
||||
if (process.env.NEXT_PUBLIC_NO_BROWSING) {
|
||||
return (
|
||||
<Stack direction="row" spacing={2} className="nav">
|
||||
<Logo />
|
||||
<DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<Stack direction="row" spacing={2} className="nav">
|
||||
<Logo />
|
||||
<Link href="/setup">New Eval</Link>
|
||||
<Link href="/eval">View Evals</Link>
|
||||
<DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
import React from 'react';
|
||||
import Dialog from '@mui/material/Dialog';
|
||||
import DialogTitle from '@mui/material/DialogTitle';
|
||||
import DialogContent from '@mui/material/DialogContent';
|
||||
import DialogActions from '@mui/material/DialogActions';
|
||||
import Box from '@mui/material/Box';
|
||||
import Button from '@mui/material/Button';
|
||||
import Check from '@mui/icons-material/Check';
|
||||
import Dialog from '@mui/material/Dialog';
|
||||
import DialogActions from '@mui/material/DialogActions';
|
||||
import DialogContent from '@mui/material/DialogContent';
|
||||
import DialogTitle from '@mui/material/DialogTitle';
|
||||
import FileCopy from '@mui/icons-material/FileCopy';
|
||||
import IconButton from '@mui/material/IconButton';
|
||||
import Typography from '@mui/material/Typography';
|
||||
|
||||
import { useStore } from './store';
|
||||
import { IconButton, Box } from '@mui/material';
|
||||
import { FileCopy, Check } from '@mui/icons-material';
|
||||
|
||||
interface ConfigModalProps {
|
||||
open: boolean;
|
||||
@@ -5,7 +5,7 @@ import Dialog from '@mui/material/Dialog';
|
||||
import DialogActions from '@mui/material/DialogActions';
|
||||
import DialogContent from '@mui/material/DialogContent';
|
||||
import DialogTitle from '@mui/material/DialogTitle';
|
||||
import TextareaAutosize from '@mui/base/TextareaAutosize';
|
||||
import TextareaAutosize from '@mui/material/TextareaAutosize';
|
||||
import IconButton from '@mui/material/IconButton';
|
||||
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
|
||||
import CheckIcon from '@mui/icons-material/Check';
|
||||
@@ -17,7 +17,7 @@ import TableHead from '@mui/material/TableHead';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
import Typography from '@mui/material/Typography';
|
||||
|
||||
import type { GradingResult } from '../../../types';
|
||||
import type { GradingResult } from '../../../../../types';
|
||||
|
||||
interface EvalOutputPromptDialogProps {
|
||||
open: boolean;
|
||||
@@ -1,15 +1,3 @@
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
html {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
|
||||
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
|
||||
font-size: 16px;
|
||||
background-color: var(--background-color);
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
table,
|
||||
.divTable {
|
||||
border: 1px solid var(--table-border-color);
|
||||
@@ -20,6 +8,16 @@ table,
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
ins {
|
||||
background-color: var(--insert-highlight-color);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
del {
|
||||
background-color: var(--delete-highlight-color);
|
||||
text-decoration: strikethrough;
|
||||
}
|
||||
|
||||
.tr {
|
||||
display: flex;
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
import * as React from 'react';
|
||||
import { diffSentences, diffJson, diffWords } from 'diff';
|
||||
|
||||
import './index.css';
|
||||
|
||||
@@ -12,13 +13,14 @@ import {
|
||||
import Checkbox from '@mui/material/Checkbox';
|
||||
import FormControlLabel from '@mui/material/FormControlLabel';
|
||||
|
||||
import { useStore } from './store.js';
|
||||
import { useStore } from './store';
|
||||
|
||||
import type { CellContext, VisibilityState } from '@tanstack/table-core';
|
||||
|
||||
import EvalOutputPromptDialog from './EvalOutputPromptDialog';
|
||||
|
||||
import type { EvalRow, EvalRowOutput, FilterMode } from './types.js';
|
||||
import type { EvalRow, EvalRowOutput, FilterMode } from './types';
|
||||
import type { GradingResult } from '../../../../../types';
|
||||
|
||||
import './ResultsTable.css';
|
||||
|
||||
@@ -99,7 +101,9 @@ function EvalOutputCell({
|
||||
rowIndex,
|
||||
promptIndex,
|
||||
onRating,
|
||||
}: PromptOutputProps) {
|
||||
firstOutput,
|
||||
filterMode,
|
||||
}: PromptOutputProps & { firstOutput: EvalRowOutput; filterMode: FilterMode }) {
|
||||
const [openPrompt, setOpen] = React.useState(false);
|
||||
const handlePromptOpen = () => {
|
||||
setOpen(true);
|
||||
@@ -115,6 +119,42 @@ function EvalOutputCell({
|
||||
text = chunks.slice(1).join('---');
|
||||
}
|
||||
|
||||
if (filterMode === 'different' && firstOutput) {
|
||||
let firstOutputText =
|
||||
typeof firstOutput.text === 'string' ? firstOutput.text : JSON.stringify(firstOutput.text);
|
||||
|
||||
if (firstOutputText.includes('---')) {
|
||||
firstOutputText = firstOutputText.split('---').slice(1).join('---');
|
||||
}
|
||||
|
||||
let diffResult;
|
||||
try {
|
||||
// Try parsing the texts as JSON
|
||||
JSON.parse(firstOutputText);
|
||||
JSON.parse(text);
|
||||
// If no errors are thrown, the texts are valid JSON
|
||||
diffResult = diffJson(firstOutputText, text);
|
||||
} catch (error) {
|
||||
// If an error is thrown, the texts are not valid JSON
|
||||
if (firstOutputText.includes('. ') && text.includes('. ')) {
|
||||
// If the texts contain a period, they are considered as prose
|
||||
diffResult = diffSentences(firstOutputText, text);
|
||||
} else {
|
||||
// If the texts do not contain a period, use diffWords
|
||||
diffResult = diffWords(firstOutputText, text);
|
||||
}
|
||||
}
|
||||
text = diffResult
|
||||
.map((part: { added?: boolean; removed?: boolean; value: string }) =>
|
||||
part.added
|
||||
? `<ins>${part.value}</ins>`
|
||||
: part.removed
|
||||
? `<del>${part.value}</del>`
|
||||
: part.value,
|
||||
)
|
||||
.join('');
|
||||
}
|
||||
|
||||
const handleClick = (isPass: boolean) => {
|
||||
onRating(rowIndex, promptIndex, isPass);
|
||||
};
|
||||
@@ -239,7 +279,9 @@ export default function ResultsTable({
|
||||
const numGoodAsserts = head.prompts.map((_, idx) =>
|
||||
body.reduce((acc, row) => {
|
||||
const componentResults = row.outputs[idx].gradingResult?.componentResults;
|
||||
return acc + (componentResults ? componentResults.filter((r) => r.pass).length : 0);
|
||||
return (
|
||||
acc + (componentResults ? componentResults.filter((r: GradingResult) => r.pass).length : 0)
|
||||
);
|
||||
}, 0),
|
||||
);
|
||||
|
||||
@@ -348,6 +390,8 @@ export default function ResultsTable({
|
||||
rowIndex={info.row.index}
|
||||
promptIndex={idx}
|
||||
onRating={handleRating}
|
||||
firstOutput={filteredBody[info.row.index].outputs[0]}
|
||||
filterMode={filterMode}
|
||||
/>
|
||||
),
|
||||
}),
|
||||
@@ -394,13 +438,13 @@ export default function ResultsTable({
|
||||
}}
|
||||
>
|
||||
<thead>
|
||||
{reactTable.getHeaderGroups().map((headerGroup) => (
|
||||
{reactTable.getHeaderGroups().map((headerGroup: any) => (
|
||||
<tr key={headerGroup.id} className="header">
|
||||
{headerGroup.headers.map((header) => {
|
||||
{headerGroup.headers.map((header: any) => {
|
||||
return (
|
||||
<th
|
||||
key={header.id}
|
||||
{...{
|
||||
key: header.id,
|
||||
colSpan: header.colSpan,
|
||||
style: {
|
||||
width: header.getSize(),
|
||||
@@ -424,11 +468,11 @@ export default function ResultsTable({
|
||||
))}
|
||||
</thead>
|
||||
<tbody>
|
||||
{reactTable.getRowModel().rows.map((row, rowIndex) => {
|
||||
{reactTable.getRowModel().rows.map((row: any, rowIndex: any) => {
|
||||
let colBorderDrawn = false;
|
||||
return (
|
||||
<tr key={row.id}>
|
||||
{row.getVisibleCells().map((cell) => {
|
||||
{row.getVisibleCells().map((cell: any) => {
|
||||
const isVariableCol = cell.column.id.startsWith('Variable');
|
||||
const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
|
||||
if (shouldDrawColBorder) {
|
||||
@@ -437,8 +481,8 @@ export default function ResultsTable({
|
||||
const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
|
||||
return (
|
||||
<td
|
||||
key={cell.id}
|
||||
{...{
|
||||
key: cell.id,
|
||||
style: {
|
||||
width: cell.column.getSize(),
|
||||
},
|
||||
@@ -21,13 +21,13 @@ import ShareIcon from '@mui/icons-material/Share';
|
||||
import VisibilityIcon from '@mui/icons-material/Visibility';
|
||||
import { styled } from '@mui/system';
|
||||
|
||||
import ResultsTable from './ResultsTable.js';
|
||||
import ResultsTable from './ResultsTable';
|
||||
import ConfigModal from './ConfigModal';
|
||||
import ShareModal from './ShareModal';
|
||||
import { useStore } from './store.js';
|
||||
import { useStore } from './store';
|
||||
|
||||
import type { VisibilityState } from '@tanstack/table-core';
|
||||
import type { FilterMode } from './types.js';
|
||||
import type { FilterMode } from './types';
|
||||
|
||||
const ResponsiveStack = styled(Stack)(({ theme }) => ({
|
||||
maxWidth: '100%',
|
||||
@@ -171,7 +171,7 @@ export default function ResultsView({ recentFiles, onRecentFileSelected }: Resul
|
||||
}, [head]);
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div style={{ marginLeft: '1rem', marginRight: '1rem' }}>
|
||||
<Paper py="md">
|
||||
<ResponsiveStack direction="row" spacing={4} alignItems="center">
|
||||
<Box>
|
||||
0
src/web/nextui/src/app/eval/index.css
Normal file
0
src/web/nextui/src/app/eval/index.css
Normal file
13
src/web/nextui/src/app/eval/page.css
Normal file
13
src/web/nextui/src/app/eval/page.css
Normal file
@@ -0,0 +1,13 @@
|
||||
body {
|
||||
background-color: var(--background-color);
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
.loading {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1.5rem;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
height: 9rem;
|
||||
}
|
||||
@@ -1,14 +1,13 @@
|
||||
import * as React from 'react';
|
||||
'use client';
|
||||
|
||||
import useMediaQuery from '@mui/material/useMediaQuery';
|
||||
import { ThemeProvider, createTheme } from '@mui/material/styles';
|
||||
import * as React from 'react';
|
||||
import CircularProgress from '@mui/material/CircularProgress';
|
||||
import { io as SocketIOClient } from 'socket.io-client';
|
||||
|
||||
import ResultsView from './ResultsView.js';
|
||||
import NavBar from './NavBar.js';
|
||||
import { useStore } from './store.js';
|
||||
|
||||
import './App.css';
|
||||
import ResultsView from './ResultsView';
|
||||
import { API_BASE_URL } from '@/util/api';
|
||||
import { useStore } from './store';
|
||||
import './page.css';
|
||||
|
||||
function App() {
|
||||
const { table, setTable, setConfig } = useStore();
|
||||
@@ -16,52 +15,24 @@ function App() {
|
||||
const loadedFromApi = React.useRef(false);
|
||||
const [recentFiles, setRecentFiles] = React.useState<string[]>([]);
|
||||
|
||||
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
|
||||
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
|
||||
|
||||
const theme = React.useMemo(
|
||||
() =>
|
||||
createTheme({
|
||||
palette: {
|
||||
mode: darkMode ? 'dark' : 'light',
|
||||
},
|
||||
}),
|
||||
[darkMode],
|
||||
);
|
||||
|
||||
const toggleDarkMode = () => {
|
||||
setDarkMode(!darkMode);
|
||||
if (!darkMode) {
|
||||
document.documentElement.setAttribute('data-theme', 'dark');
|
||||
} else {
|
||||
document.documentElement.removeAttribute('data-theme');
|
||||
}
|
||||
};
|
||||
|
||||
React.useEffect(() => {
|
||||
if (prefersDarkMode) {
|
||||
document.documentElement.setAttribute('data-theme', 'dark');
|
||||
}
|
||||
}, [prefersDarkMode]);
|
||||
|
||||
const fetchRecentFiles = async () => {
|
||||
if (!window.location.href.includes('localhost')) {
|
||||
return;
|
||||
}
|
||||
const resp = await fetch(`http://localhost:15500/results`);
|
||||
const resp = await fetch(`${API_BASE_URL}/results`);
|
||||
const body = await resp.json();
|
||||
setRecentFiles(body.data);
|
||||
};
|
||||
|
||||
const handleRecentFileSelection = async (file: string) => {
|
||||
const resp = await fetch(`http://localhost:15500/results/${file}`);
|
||||
const resp = await fetch(`${API_BASE_URL}/results/${file}`);
|
||||
const body = await resp.json();
|
||||
setTable(body.data.results.table);
|
||||
setConfig(body.data.config);
|
||||
};
|
||||
|
||||
React.useEffect(() => {
|
||||
const fetchEvalData = async (id: string) => {
|
||||
const fetchPublicEvalData = async (id: string) => {
|
||||
if (loadedFromApi.current) {
|
||||
return;
|
||||
}
|
||||
@@ -77,12 +48,12 @@ function App() {
|
||||
setLoaded(true);
|
||||
};
|
||||
|
||||
const socket = SocketIOClient(`http://localhost:15500`);
|
||||
const socket = SocketIOClient(API_BASE_URL);
|
||||
|
||||
const pathMatch = window.location.pathname.match(/\/eval\/([\w:-]+)/);
|
||||
if (pathMatch) {
|
||||
const id = pathMatch[1];
|
||||
fetchEvalData(id);
|
||||
fetchPublicEvalData(id);
|
||||
} else {
|
||||
socket.on('init', (data) => {
|
||||
console.log('Initialized socket connection', data);
|
||||
@@ -105,15 +76,15 @@ function App() {
|
||||
};
|
||||
}, [setTable, setConfig]);
|
||||
|
||||
return (
|
||||
<ThemeProvider theme={theme}>
|
||||
<NavBar darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
|
||||
{loaded && table ? (
|
||||
<ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
|
||||
) : (
|
||||
<div>Loading...</div>
|
||||
)}
|
||||
</ThemeProvider>
|
||||
return loaded && table ? (
|
||||
<ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
|
||||
) : (
|
||||
<div className="loading">
|
||||
<div>
|
||||
<CircularProgress size={22} />
|
||||
</div>
|
||||
<div>Loading eval data</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import create from 'zustand';
|
||||
|
||||
import type { EvalTable, UnifiedConfig } from './types.js';
|
||||
import type { EvalTable, UnifiedConfig } from './types';
|
||||
|
||||
interface TableState {
|
||||
table: EvalTable | null;
|
||||
@@ -1,4 +1,4 @@
|
||||
import { EvaluateResult, TokenUsage } from '../../../types';
|
||||
import type { EvaluateResult, TokenUsage } from '../../../../../types';
|
||||
|
||||
type Prompt = {
|
||||
display: string;
|
||||
@@ -33,4 +33,4 @@ export type EvalTable = {
|
||||
|
||||
export type FilterMode = 'all' | 'failures' | 'different';
|
||||
|
||||
export type { UnifiedConfig } from '../../../types';
|
||||
export type { UnifiedConfig } from '../../../../../types';
|
||||
@@ -1,6 +1,10 @@
|
||||
:root {
|
||||
font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
|
||||
/* This CSS is common to all pages */
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
:root {
|
||||
font-synthesis: none;
|
||||
text-rendering: optimizeLegibility;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
@@ -18,6 +22,8 @@
|
||||
--success-background-color: #d1ffd7;
|
||||
--variable-background-color: #f7f7f7;
|
||||
--header-background-color: #fffdf7;
|
||||
--insert-highlight-color: #d4fcbc;
|
||||
--delete-highlight-color: #fbb6c2;
|
||||
}
|
||||
|
||||
/* Dark mode colors */
|
||||
@@ -33,8 +39,20 @@
|
||||
--success-background-color: #216d2b;
|
||||
--variable-background-color: #333;
|
||||
--header-background-color: #333;
|
||||
--insert-highlight-color: #4f8a34;
|
||||
--delete-highlight-color: #8a3434;
|
||||
}
|
||||
|
||||
html {
|
||||
font-size: calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)));
|
||||
font-size: 16px;
|
||||
background-color: var(--background-color);
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
25
src/web/nextui/src/app/layout.tsx
Normal file
25
src/web/nextui/src/app/layout.tsx
Normal file
@@ -0,0 +1,25 @@
|
||||
import './globals.css';
|
||||
import type { Metadata } from 'next';
|
||||
import { Roboto } from 'next/font/google';
|
||||
import { PageShell } from './components/PageShell';
|
||||
|
||||
const roboto = Roboto({
|
||||
weight: ['400', '500', '700'],
|
||||
style: ['normal'],
|
||||
subsets: ['latin'],
|
||||
});
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'promptfoo',
|
||||
description: 'LLM testing and evaluation',
|
||||
};
|
||||
|
||||
export default function RootLayout({ children }: { children: React.ReactNode }) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body className={roboto.className}>
|
||||
<PageShell>{children}</PageShell>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
7
src/web/nextui/src/app/page.tsx
Normal file
7
src/web/nextui/src/app/page.tsx
Normal file
@@ -0,0 +1,7 @@
|
||||
import { redirect } from 'next/navigation';
|
||||
|
||||
import './Home.css';
|
||||
|
||||
export default function Page() {
|
||||
redirect('/eval');
|
||||
}
|
||||
118
src/web/nextui/src/app/setup/AssertsForm.tsx
Normal file
118
src/web/nextui/src/app/setup/AssertsForm.tsx
Normal file
@@ -0,0 +1,118 @@
|
||||
// src/components/AssertsForm.tsx
|
||||
import React, { useState } from 'react';
|
||||
import Autocomplete from '@mui/material/Autocomplete';
|
||||
import Box from '@mui/material/Box';
|
||||
import Button from '@mui/material/Button';
|
||||
import Delete from '@mui/icons-material/Delete';
|
||||
import IconButton from '@mui/material/IconButton';
|
||||
import Stack from '@mui/material/Stack';
|
||||
import TextField from '@mui/material/TextField';
|
||||
import Typography from '@mui/material/Typography';
|
||||
import type { Assertion, AssertionType } from '../../../../../types';
|
||||
|
||||
interface AssertsFormProps {
|
||||
onAdd: (asserts: Assertion[]) => void;
|
||||
initialValues: Assertion[];
|
||||
}
|
||||
|
||||
const assertTypes: AssertionType[] = [
|
||||
'equals',
|
||||
'contains',
|
||||
'icontains',
|
||||
'contains-all',
|
||||
'contains-any',
|
||||
'starts-with',
|
||||
'regex',
|
||||
'is-json',
|
||||
'contains-json',
|
||||
'javascript',
|
||||
'python',
|
||||
'similar',
|
||||
'llm-rubric',
|
||||
'webhook',
|
||||
'rouge-n',
|
||||
'rouge-s',
|
||||
'rouge-l',
|
||||
'not-equals',
|
||||
'not-contains',
|
||||
'not-icontains',
|
||||
'not-contains-all',
|
||||
'not-contains-any',
|
||||
'not-starts-with',
|
||||
'not-regex',
|
||||
'not-is-json',
|
||||
'not-contains-json',
|
||||
'not-javascript',
|
||||
'not-python',
|
||||
'not-similar',
|
||||
'not-llm-rubric',
|
||||
'not-webhook',
|
||||
'not-rouge-n',
|
||||
'not-rouge-s',
|
||||
'not-rouge-l',
|
||||
];
|
||||
|
||||
const AssertsForm: React.FC<AssertsFormProps> = ({ onAdd, initialValues }) => {
|
||||
const [asserts, setAsserts] = useState<Assertion[]>(initialValues || []);
|
||||
|
||||
const handleAdd = () => {
|
||||
const newAsserts = [...asserts, { type: 'equals' as AssertionType, value: '' }];
|
||||
setAsserts(newAsserts);
|
||||
onAdd(newAsserts);
|
||||
};
|
||||
|
||||
const handleRemoveAssert = (indexToRemove: number) => {
|
||||
const newAsserts = asserts.filter((_, index) => index !== indexToRemove);
|
||||
setAsserts(newAsserts);
|
||||
onAdd(newAsserts);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Typography variant="h6">Asserts</Typography>
|
||||
<Box my={asserts.length > 0 ? 2 : 0}>
|
||||
<Stack direction="column" spacing={2}>
|
||||
{asserts.map((assert, index) => (
|
||||
<Stack key={index} direction="row" spacing={2} alignItems="center">
|
||||
<Autocomplete
|
||||
value={assert.type}
|
||||
options={assertTypes}
|
||||
sx={{ minWidth: 200 }}
|
||||
onChange={(event, newValue) => {
|
||||
const newType = newValue;
|
||||
const newAsserts = asserts.map((a, i) =>
|
||||
i === index ? { ...a, type: newType as AssertionType } : a,
|
||||
);
|
||||
setAsserts(newAsserts);
|
||||
onAdd(newAsserts);
|
||||
}}
|
||||
renderInput={(params) => <TextField {...params} label="Type" />}
|
||||
/>
|
||||
<TextField
|
||||
label="Value"
|
||||
value={assert.value}
|
||||
fullWidth
|
||||
onChange={(e) => {
|
||||
const newValue = e.target.value;
|
||||
const newAsserts = asserts.map((a, i) =>
|
||||
i === index ? { ...a, value: newValue } : a,
|
||||
);
|
||||
setAsserts(newAsserts);
|
||||
onAdd(newAsserts);
|
||||
}}
|
||||
/>
|
||||
<IconButton onClick={() => handleRemoveAssert(index)} size="small">
|
||||
<Delete />
|
||||
</IconButton>
|
||||
</Stack>
|
||||
))}
|
||||
</Stack>
|
||||
</Box>
|
||||
<Button color="primary" onClick={handleAdd}>
|
||||
Add Assert
|
||||
</Button>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default AssertsForm;
|
||||
77
src/web/nextui/src/app/setup/PromptDialog.tsx
Normal file
77
src/web/nextui/src/app/setup/PromptDialog.tsx
Normal file
@@ -0,0 +1,77 @@
|
||||
import React from 'react';
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
TextField,
|
||||
Button,
|
||||
} from '@mui/material';
|
||||
|
||||
interface PromptDialogProps {
|
||||
open: boolean;
|
||||
prompt: string;
|
||||
index: number;
|
||||
onAdd: (prompt: string) => void;
|
||||
onCancel: () => void;
|
||||
}
|
||||
|
||||
const PromptDialog: React.FC<PromptDialogProps> = ({ open, prompt, index, onAdd, onCancel }) => {
|
||||
const [editingPrompt, setEditingPrompt] = React.useState(prompt);
|
||||
const textFieldRef = React.useRef<HTMLInputElement>(null);
|
||||
|
||||
React.useEffect(() => {
|
||||
setEditingPrompt(prompt);
|
||||
}, [prompt]);
|
||||
|
||||
const handleAdd = (close: boolean) => {
|
||||
onAdd(editingPrompt);
|
||||
setEditingPrompt('');
|
||||
if (close) {
|
||||
onCancel();
|
||||
} else if (textFieldRef.current) {
|
||||
textFieldRef.current.focus();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={onCancel} fullWidth maxWidth="md">
|
||||
<DialogTitle>{`Edit Prompt ${index + 1}`}</DialogTitle>
|
||||
<DialogContent>
|
||||
<TextField
|
||||
value={editingPrompt}
|
||||
onChange={(e) => setEditingPrompt(e.target.value)}
|
||||
fullWidth
|
||||
margin="normal"
|
||||
multiline
|
||||
placeholder="The quick brown {{animal1}} jumps over the lazy {{animal2}}."
|
||||
helperText="Tip: use the {{varname}} syntax to add variables to your prompt."
|
||||
inputRef={textFieldRef}
|
||||
/>
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button
|
||||
onClick={handleAdd.bind(null, true)}
|
||||
color="primary"
|
||||
variant="contained"
|
||||
disabled={!editingPrompt.length}
|
||||
>
|
||||
Add
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleAdd.bind(null, false)}
|
||||
color="primary"
|
||||
variant="contained"
|
||||
disabled={!editingPrompt.length}
|
||||
>
|
||||
Add Another
|
||||
</Button>
|
||||
<Button onClick={onCancel} color="secondary">
|
||||
Cancel
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default PromptDialog;
|
||||
190
src/web/nextui/src/app/setup/PromptsSection.tsx
Normal file
190
src/web/nextui/src/app/setup/PromptsSection.tsx
Normal file
@@ -0,0 +1,190 @@
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import Button from '@mui/material/Button';
|
||||
import Typography from '@mui/material/Typography';
|
||||
import IconButton from '@mui/material/IconButton';
|
||||
import Table from '@mui/material/Table';
|
||||
import TableBody from '@mui/material/TableBody';
|
||||
import TableCell from '@mui/material/TableCell';
|
||||
import TableContainer from '@mui/material/TableContainer';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
import Tooltip from '@mui/material/Tooltip';
|
||||
import Stack from '@mui/material/Stack';
|
||||
import Edit from '@mui/icons-material/Edit';
|
||||
import Delete from '@mui/icons-material/Delete';
|
||||
import Publish from '@mui/icons-material/Publish';
|
||||
import Copy from '@mui/icons-material/ContentCopy';
|
||||
|
||||
import PromptDialog from './PromptDialog';
|
||||
import { useStore } from '../../util/store';
|
||||
|
||||
const PromptsSection: React.FC = () => {
|
||||
const [promptDialogOpen, setPromptDialogOpen] = useState(false);
|
||||
const [editingPromptIndex, setEditingPromptIndex] = useState<number | null>(null);
|
||||
|
||||
const { prompts, setPrompts } = useStore();
|
||||
const newPromptInputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (editingPromptIndex !== null && editingPromptIndex > 0 && newPromptInputRef.current) {
|
||||
newPromptInputRef.current.focus();
|
||||
}
|
||||
}, [editingPromptIndex]);
|
||||
|
||||
const handleEditPrompt = (index: number) => {
|
||||
setEditingPromptIndex(index);
|
||||
setPromptDialogOpen(true);
|
||||
};
|
||||
|
||||
const handleAddPromptFromFile = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
event.stopPropagation();
|
||||
event.preventDefault();
|
||||
|
||||
const file = event.target.files?.[0];
|
||||
if (file) {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (e) => {
|
||||
const text = e.target?.result?.toString();
|
||||
if (text) {
|
||||
setPrompts([...prompts, text]);
|
||||
}
|
||||
};
|
||||
reader.readAsText(file);
|
||||
}
|
||||
};
|
||||
|
||||
const handleDuplicatePrompt = (event: React.MouseEvent, index: number) => {
|
||||
event.stopPropagation();
|
||||
const duplicatedPrompt = prompts[index];
|
||||
setPrompts([...prompts, duplicatedPrompt]);
|
||||
};
|
||||
|
||||
const handleChangePrompt = (index: number, newPrompt: string) => {
|
||||
setPrompts(prompts.map((p, i) => (i === index ? newPrompt : p)));
|
||||
};
|
||||
|
||||
const handleRemovePrompt = (event: React.MouseEvent, indexToRemove: number) => {
|
||||
event.stopPropagation();
|
||||
|
||||
if (confirm('Are you sure you want to remove this prompt?')) {
|
||||
setPrompts(prompts.filter((_, index) => index !== indexToRemove));
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Stack direction="row" spacing={2} justifyContent="space-between">
|
||||
<Typography variant="h5">Prompts</Typography>
|
||||
<div>
|
||||
<label htmlFor={`file-input-add-prompt`}>
|
||||
<Tooltip title="Upload prompt from file">
|
||||
<span>
|
||||
<IconButton component="span">
|
||||
<Publish />
|
||||
</IconButton>
|
||||
<input
|
||||
id={`file-input-add-prompt`}
|
||||
type="file"
|
||||
accept=".txt,.md"
|
||||
onChange={handleAddPromptFromFile}
|
||||
style={{ display: 'none' }}
|
||||
/>
|
||||
</span>
|
||||
</Tooltip>
|
||||
</label>
|
||||
<Button
|
||||
color="primary"
|
||||
onClick={() => {
|
||||
setPromptDialogOpen(true);
|
||||
}}
|
||||
variant="contained"
|
||||
>
|
||||
Add Prompt
|
||||
</Button>
|
||||
</div>
|
||||
</Stack>
|
||||
<TableContainer>
|
||||
<Table>
|
||||
<TableBody>
|
||||
{prompts.length === 0 ? (
|
||||
<TableRow>
|
||||
<TableCell colSpan={2} align="center">
|
||||
No prompts added yet.
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
) : (
|
||||
prompts.map((prompt, index) => (
|
||||
<TableRow
|
||||
key={index}
|
||||
sx={{
|
||||
'&:hover': {
|
||||
backgroundColor: 'rgba(0, 0, 0, 0.04)',
|
||||
cursor: 'pointer',
|
||||
},
|
||||
}}
|
||||
onClick={() => handleEditPrompt(index)}
|
||||
>
|
||||
<TableCell>
|
||||
<Typography variant="body2">
|
||||
{`Prompt #${index + 1}: `}
|
||||
{(prompt.length > 250 ? prompt.slice(0, 250) + ' ...' : prompt)
|
||||
.split(/({{\w+}})/g)
|
||||
.map((part, i) =>
|
||||
/{{\w+}}/g.test(part) ? (
|
||||
<span
|
||||
key={i}
|
||||
style={{
|
||||
backgroundColor: 'linen',
|
||||
padding: '0.25rem',
|
||||
borderRadius: '4px',
|
||||
}}
|
||||
>
|
||||
{part}
|
||||
</span>
|
||||
) : (
|
||||
part
|
||||
),
|
||||
)}
|
||||
</Typography>
|
||||
</TableCell>
|
||||
<TableCell align="right" sx={{ minWidth: 150 }}>
|
||||
<IconButton onClick={() => handleEditPrompt(index)} size="small">
|
||||
<Edit />
|
||||
</IconButton>
|
||||
<IconButton
|
||||
onClick={(event) => handleDuplicatePrompt(event, index)}
|
||||
size="small"
|
||||
>
|
||||
<Copy />
|
||||
</IconButton>
|
||||
<IconButton onClick={(event) => handleRemovePrompt(event, index)} size="small">
|
||||
<Delete />
|
||||
</IconButton>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))
|
||||
)}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
<PromptDialog
|
||||
open={promptDialogOpen}
|
||||
prompt={editingPromptIndex !== null ? prompts[editingPromptIndex] : ''}
|
||||
index={editingPromptIndex !== null ? editingPromptIndex : 0}
|
||||
onAdd={(newPrompt) => {
|
||||
if (editingPromptIndex !== null) {
|
||||
handleChangePrompt(editingPromptIndex, newPrompt);
|
||||
} else {
|
||||
setPrompts([...prompts, newPrompt]);
|
||||
}
|
||||
setEditingPromptIndex(null);
|
||||
}}
|
||||
onCancel={() => {
|
||||
setEditingPromptIndex(null);
|
||||
setPromptDialogOpen(false);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default PromptsSection;
|
||||
99
src/web/nextui/src/app/setup/ProviderConfigDialog.tsx
Normal file
99
src/web/nextui/src/app/setup/ProviderConfigDialog.tsx
Normal file
@@ -0,0 +1,99 @@
|
||||
import React from 'react';
|
||||
import {
|
||||
Box,
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
TextField,
|
||||
DialogActions,
|
||||
Button,
|
||||
} from '@mui/material';
|
||||
import { ProviderConfig } from '../../../../../types';
|
||||
|
||||
interface ProviderConfigDialogProps {
|
||||
open: boolean;
|
||||
providerId: string;
|
||||
config: ProviderConfig['config'];
|
||||
onClose: () => void;
|
||||
onSave: (config: ProviderConfig['config']) => void;
|
||||
}
|
||||
|
||||
const ProviderConfigDialog: React.FC<ProviderConfigDialogProps> = ({
|
||||
open,
|
||||
providerId,
|
||||
config,
|
||||
onClose,
|
||||
onSave,
|
||||
}) => {
|
||||
const [localConfig, setLocalConfig] = React.useState(config);
|
||||
|
||||
React.useEffect(() => {
|
||||
setLocalConfig(config);
|
||||
}, [config]);
|
||||
|
||||
const handleSave = () => {
|
||||
onSave(localConfig);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={onClose}>
|
||||
<DialogTitle>Edit {providerId}</DialogTitle>
|
||||
<DialogContent>
|
||||
{Object.keys(localConfig).map((key) => {
|
||||
const value = localConfig[key];
|
||||
let handleChange;
|
||||
|
||||
if (
|
||||
typeof value === 'number' ||
|
||||
typeof value === 'boolean' ||
|
||||
typeof value === 'string'
|
||||
) {
|
||||
if (typeof value === 'number') {
|
||||
handleChange = (e: React.ChangeEvent<HTMLInputElement>) =>
|
||||
setLocalConfig({ ...localConfig, [key]: parseFloat(e.target.value) });
|
||||
} else if (typeof value === 'boolean') {
|
||||
handleChange = (e: React.ChangeEvent<HTMLInputElement>) =>
|
||||
setLocalConfig({ ...localConfig, [key]: e.target.value === 'true' });
|
||||
} else {
|
||||
handleChange = (e: React.ChangeEvent<HTMLInputElement>) =>
|
||||
setLocalConfig({ ...localConfig, [key]: e.target.value });
|
||||
}
|
||||
|
||||
return (
|
||||
<Box key={key} my={2}>
|
||||
<TextField
|
||||
label={key}
|
||||
value={value}
|
||||
onChange={handleChange}
|
||||
fullWidth
|
||||
type={typeof value === 'number' ? 'number' : 'text'}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
} else {
|
||||
return (
|
||||
<Box key={key} my={2}>
|
||||
<TextField
|
||||
label={key}
|
||||
value={JSON.stringify(value)}
|
||||
onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
|
||||
setLocalConfig({ ...localConfig, [key]: JSON.parse(e.target.value) })
|
||||
}
|
||||
fullWidth
|
||||
multiline
|
||||
minRows={3}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
})}
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button onClick={onClose}>Cancel</Button>
|
||||
<Button onClick={handleSave}>Save</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default ProviderConfigDialog;
|
||||
149
src/web/nextui/src/app/setup/ProviderSelector.tsx
Normal file
149
src/web/nextui/src/app/setup/ProviderSelector.tsx
Normal file
@@ -0,0 +1,149 @@
|
||||
import React from 'react';
|
||||
import { Autocomplete, Box, Chip, TextField } from '@mui/material';
|
||||
import { ProviderConfig } from '../../../../../types';
|
||||
import ProviderConfigDialog from './ProviderConfigDialog';
|
||||
|
||||
const defaultProviders: ProviderConfig[] = [
|
||||
{
|
||||
id: 'replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48',
|
||||
config: { temperature: 0.5 },
|
||||
},
|
||||
]
|
||||
.concat(
|
||||
[
|
||||
'anthropic:claude-1',
|
||||
'anthropic:claude-1-100k',
|
||||
'anthropic:claude-instant-1',
|
||||
'anthropic:claude-instant-1-100k',
|
||||
].map((id) => ({ id, config: { temperature: 0.5 } })),
|
||||
)
|
||||
.concat(
|
||||
[
|
||||
'openai:gpt-3.5-turbo',
|
||||
'openai:gpt-3.5-turbo-0301',
|
||||
'openai:gpt-3.5-turbo-0613',
|
||||
'openai:gpt-3.5-turbo-16k',
|
||||
'openai:gpt-3.5-turbo-16k-0613',
|
||||
'openai:gpt-4',
|
||||
'openai:gpt-4-0314',
|
||||
'openai:gpt-4-0613',
|
||||
'openai:gpt-4-32k',
|
||||
'openai:gpt-4-32k-0314',
|
||||
].map((id) => ({ id, config: { temperature: 0.5, max_tokens: 1024 } })),
|
||||
)
|
||||
.concat(
|
||||
[
|
||||
'azureopenai:gpt-3.5-turbo',
|
||||
'azureopenai:gpt-3.5-turbo-0301',
|
||||
'azureopenai:gpt-3.5-turbo-0613',
|
||||
'azureopenai:gpt-3.5-turbo-16k',
|
||||
'azureopenai:gpt-3.5-turbo-16k-0613',
|
||||
'azureopenai:gpt-4',
|
||||
'azureopenai:gpt-4-0314',
|
||||
'azureopenai:gpt-4-0613',
|
||||
'azureopenai:gpt-4-32k',
|
||||
'azureopenai:gpt-4-32k-0314',
|
||||
].map((id) => ({ id, config: { temperature: 0.5, max_tokens: 1024 } })),
|
||||
)
|
||||
.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
interface ProviderSelectorProps {
|
||||
providers: ProviderConfig[];
|
||||
onChange: (providers: ProviderConfig[]) => void;
|
||||
}
|
||||
|
||||
const ProviderSelector: React.FC<ProviderSelectorProps> = ({ providers, onChange }) => {
|
||||
const [selectedProvider, setSelectedProvider] = React.useState<ProviderConfig | null>(null);
|
||||
|
||||
const getProviderLabel = (provider: string | ProviderConfig) => {
|
||||
if (typeof provider === 'string') {
|
||||
return provider;
|
||||
}
|
||||
return provider.id || 'Unknown provider';
|
||||
};
|
||||
|
||||
const getProviderKey = (provider: string | ProviderConfig, index: number) => {
|
||||
if (typeof provider === 'string') {
|
||||
return provider;
|
||||
}
|
||||
return provider.id || index;
|
||||
};
|
||||
|
||||
const handleProviderClick = (provider: string | ProviderConfig) => {
|
||||
if (typeof provider === 'string') {
|
||||
alert('Cannot edit custom providers');
|
||||
} else if (!provider.config) {
|
||||
alert('There is no config for this provider');
|
||||
} else {
|
||||
setSelectedProvider(provider as ProviderConfig);
|
||||
}
|
||||
};
|
||||
|
||||
const handleSave = (config: ProviderConfig['config']) => {
|
||||
if (selectedProvider) {
|
||||
const updatedProviders = providers.map((provider) =>
|
||||
provider.id === selectedProvider.id ? { ...provider, config } : provider,
|
||||
);
|
||||
onChange(updatedProviders);
|
||||
setSelectedProvider(null);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Box mt={2}>
|
||||
<Autocomplete
|
||||
multiple
|
||||
freeSolo
|
||||
options={defaultProviders}
|
||||
value={providers}
|
||||
onChange={(event, newValue: (string | ProviderConfig)[]) => {
|
||||
onChange(newValue.map((value) => (typeof value === 'string' ? { id: value } : value)));
|
||||
}}
|
||||
getOptionLabel={(option) => {
|
||||
if (!option) {
|
||||
return '';
|
||||
}
|
||||
if (typeof option === 'string') {
|
||||
return option;
|
||||
}
|
||||
return (option as ProviderConfig).id || 'Unknown provider';
|
||||
}}
|
||||
renderTags={(value, getTagProps) =>
|
||||
value.map((provider, index: number) => {
|
||||
const label = getProviderLabel(provider);
|
||||
const key = getProviderKey(provider, index);
|
||||
|
||||
return (
|
||||
<Chip
|
||||
variant="outlined"
|
||||
label={label}
|
||||
{...getTagProps({ index })}
|
||||
key={key}
|
||||
onClick={() => handleProviderClick(provider)}
|
||||
/>
|
||||
);
|
||||
})
|
||||
}
|
||||
renderInput={(params) => (
|
||||
<TextField
|
||||
{...params}
|
||||
variant="outlined"
|
||||
placeholder="Select LLM providers"
|
||||
helperText={providers.length > 0 ? 'Click a provider to configure its settings.' : null}
|
||||
/>
|
||||
)}
|
||||
/>
|
||||
{selectedProvider && selectedProvider.id && (
|
||||
<ProviderConfigDialog
|
||||
open={!!selectedProvider}
|
||||
providerId={selectedProvider.id}
|
||||
config={selectedProvider.config}
|
||||
onClose={() => setSelectedProvider(null)}
|
||||
onSave={handleSave}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default ProviderSelector;
|
||||
88
src/web/nextui/src/app/setup/RunTestSuiteButton.tsx
Normal file
88
src/web/nextui/src/app/setup/RunTestSuiteButton.tsx
Normal file
@@ -0,0 +1,88 @@
|
||||
'use client';
|
||||
|
||||
import React, { useState } from 'react';
|
||||
import { useRouter } from 'next/navigation';
|
||||
import { Button, CircularProgress } from '@mui/material';
|
||||
|
||||
import { useStore } from '@/util/store';
|
||||
import { API_BASE_URL } from '@/util/api';
|
||||
|
||||
const RunTestSuiteButton: React.FC = () => {
|
||||
const router = useRouter();
|
||||
const { description, providers, prompts, testCases } = useStore();
|
||||
const [isRunning, setIsRunning] = useState(false);
|
||||
const [progressPercent, setProgressPercent] = useState(0);
|
||||
|
||||
const runTestSuite = async () => {
|
||||
setIsRunning(true);
|
||||
|
||||
const testSuite = {
|
||||
description,
|
||||
providers,
|
||||
prompts,
|
||||
tests: testCases,
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/eval`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(testSuite),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
const job = await response.json();
|
||||
|
||||
const intervalId = setInterval(async () => {
|
||||
const progressResponse = await fetch(`${API_BASE_URL}/api/eval/${job.id}`);
|
||||
|
||||
if (!progressResponse.ok) {
|
||||
clearInterval(intervalId);
|
||||
throw new Error(`HTTP error! status: ${progressResponse.status}`);
|
||||
}
|
||||
|
||||
const progressData = await progressResponse.json();
|
||||
|
||||
if (progressData.status === 'completed') {
|
||||
clearInterval(intervalId);
|
||||
setIsRunning(false);
|
||||
router.push('/eval');
|
||||
} else if (progressData.status === 'failed') {
|
||||
clearInterval(intervalId);
|
||||
setIsRunning(false);
|
||||
throw new Error('Job failed');
|
||||
} else {
|
||||
const percent =
|
||||
progressData.total === 0
|
||||
? 0
|
||||
: Math.round((progressData.progress / progressData.total) * 100);
|
||||
setProgressPercent(percent);
|
||||
}
|
||||
}, 1000);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
setIsRunning(false);
|
||||
alert(`An error occurred: ${(error as Error).message}`);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Button variant="contained" color="primary" onClick={runTestSuite} disabled={isRunning}>
|
||||
{isRunning ? (
|
||||
<>
|
||||
<CircularProgress size={24} sx={{ marginRight: 2 }} />
|
||||
{progressPercent.toFixed(0)}% complete
|
||||
</>
|
||||
) : (
|
||||
'Run Evaluation'
|
||||
)}
|
||||
</Button>
|
||||
);
|
||||
};
|
||||
|
||||
export default RunTestSuiteButton;
|
||||
108
src/web/nextui/src/app/setup/TestCaseDialog.tsx
Normal file
108
src/web/nextui/src/app/setup/TestCaseDialog.tsx
Normal file
@@ -0,0 +1,108 @@
|
||||
import React, { useState } from 'react';
|
||||
import {
|
||||
Button,
|
||||
TextField,
|
||||
Box,
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
} from '@mui/material';
|
||||
import VarsForm from './VarsForm';
|
||||
import AssertsForm from './AssertsForm';
|
||||
import type { TestCase } from '../../../../../types';
|
||||
|
||||
interface TestCaseFormProps {
|
||||
open: boolean;
|
||||
onAdd: (testCase: TestCase, shouldClose: boolean) => void;
|
||||
varsList: string[];
|
||||
initialValues?: TestCase;
|
||||
onCancel: () => void;
|
||||
}
|
||||
|
||||
const TestCaseForm: React.FC<TestCaseFormProps> = ({
|
||||
open,
|
||||
onAdd,
|
||||
varsList,
|
||||
initialValues,
|
||||
onCancel,
|
||||
}) => {
|
||||
const [description, setDescription] = useState(initialValues?.description || '');
|
||||
const [vars, setVars] = useState(initialValues?.vars || {});
|
||||
const [asserts, setAsserts] = useState(initialValues?.assert || []);
|
||||
const [assertsFormKey, setAssertsFormKey] = useState(0);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (initialValues) {
|
||||
setDescription(initialValues.description || '');
|
||||
setVars(initialValues.vars || {});
|
||||
setAsserts(initialValues.assert || []);
|
||||
} else {
|
||||
setDescription('');
|
||||
setVars({});
|
||||
setAsserts([]);
|
||||
}
|
||||
}, [initialValues]);
|
||||
|
||||
const handleAdd = (close: boolean) => {
|
||||
onAdd(
|
||||
{
|
||||
description,
|
||||
vars,
|
||||
assert: asserts,
|
||||
},
|
||||
close,
|
||||
);
|
||||
if (close) {
|
||||
onCancel();
|
||||
}
|
||||
setDescription('');
|
||||
setVars({});
|
||||
setAsserts([]);
|
||||
setAssertsFormKey((prevKey) => prevKey + 1);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={onCancel} fullWidth maxWidth="md">
|
||||
<DialogTitle>{initialValues ? 'Edit Test Case' : 'Add Test Case'}</DialogTitle>
|
||||
<DialogContent>
|
||||
<Box>
|
||||
{/*
|
||||
<TextField
|
||||
label="Description"
|
||||
value={description}
|
||||
onChange={(e) => setDescription(e.target.value)}
|
||||
fullWidth
|
||||
margin="normal"
|
||||
/>
|
||||
*/}
|
||||
<VarsForm
|
||||
onAdd={(vars) => setVars(vars)}
|
||||
varsList={varsList}
|
||||
initialValues={initialValues?.vars as Record<string, string>}
|
||||
/>
|
||||
<AssertsForm
|
||||
key={assertsFormKey}
|
||||
onAdd={(asserts) => setAsserts(asserts)}
|
||||
initialValues={initialValues?.assert || []}
|
||||
/>
|
||||
</Box>
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button onClick={handleAdd.bind(this, true)} color="primary" variant="contained">
|
||||
{initialValues ? 'Update Test Case' : 'Add Test Case'}
|
||||
</Button>
|
||||
{!initialValues && (
|
||||
<Button onClick={handleAdd.bind(this, false)} color="primary" variant="contained">
|
||||
Add Another
|
||||
</Button>
|
||||
)}
|
||||
<Button onClick={onCancel} color="secondary">
|
||||
Cancel
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default TestCaseForm;
|
||||
154
src/web/nextui/src/app/setup/TestCasesSection.tsx
Normal file
154
src/web/nextui/src/app/setup/TestCasesSection.tsx
Normal file
@@ -0,0 +1,154 @@
|
||||
import React from 'react';
|
||||
import Button from '@mui/material/Button';
|
||||
import Copy from '@mui/icons-material/ContentCopy';
|
||||
import Delete from '@mui/icons-material/Delete';
|
||||
import Edit from '@mui/icons-material/Edit';
|
||||
import IconButton from '@mui/material/IconButton';
|
||||
import Stack from '@mui/material/Stack';
|
||||
import Table from '@mui/material/Table';
|
||||
import TableBody from '@mui/material/TableBody';
|
||||
import TableCell from '@mui/material/TableCell';
|
||||
import TableContainer from '@mui/material/TableContainer';
|
||||
import TableHead from '@mui/material/TableHead';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
import Typography from '@mui/material/Typography';
|
||||
|
||||
import TestCaseDialog from './TestCaseDialog';
|
||||
import { useStore } from '../../util/store';
|
||||
|
||||
import type { TestCase } from '../../../../../types';
|
||||
|
||||
interface TestCasesSectionProps {
|
||||
varsList: string[];
|
||||
}
|
||||
|
||||
const TestCasesSection: React.FC<TestCasesSectionProps> = ({ varsList }) => {
|
||||
const { testCases, setTestCases } = useStore();
|
||||
const [editingTestCaseIndex, setEditingTestCaseIndex] = React.useState<number | null>(null);
|
||||
const [testCaseDialogOpen, setTestCaseDialogOpen] = React.useState(false);
|
||||
|
||||
const handleAddTestCase = (testCase: TestCase, shouldClose: boolean) => {
|
||||
if (editingTestCaseIndex === null) {
|
||||
setTestCases([...testCases, testCase]);
|
||||
} else {
|
||||
const updatedTestCases = testCases.map((tc, index) =>
|
||||
index === editingTestCaseIndex ? testCase : tc,
|
||||
);
|
||||
setTestCases(updatedTestCases);
|
||||
setEditingTestCaseIndex(null);
|
||||
}
|
||||
|
||||
if (shouldClose) {
|
||||
setTestCaseDialogOpen(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleRemoveTestCase = (event: React.MouseEvent, index: number) => {
|
||||
event.stopPropagation();
|
||||
|
||||
if (confirm('Are you sure you want to delete this test case?')) {
|
||||
setTestCases(testCases.filter((_, i) => i !== index));
|
||||
}
|
||||
};
|
||||
|
||||
const handleDuplicateTestCase = (event: React.MouseEvent, index: number) => {
|
||||
event.stopPropagation();
|
||||
const duplicatedTestCase = JSON.parse(JSON.stringify(testCases[index]));
|
||||
setTestCases([...testCases, duplicatedTestCase]);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Stack direction="row" spacing={2} justifyContent="space-between">
|
||||
<Typography variant="h5">Test Cases</Typography>
|
||||
<Button color="primary" onClick={() => setTestCaseDialogOpen(true)} variant="contained">
|
||||
Add Test Case
|
||||
</Button>
|
||||
</Stack>
|
||||
<TableContainer>
|
||||
<Table>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell>Description</TableCell>
|
||||
<TableCell>Assertions</TableCell>
|
||||
<TableCell>Variables</TableCell>
|
||||
<TableCell align="right"></TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{testCases.length === 0 ? (
|
||||
<TableRow>
|
||||
<TableCell colSpan={4} align="center">
|
||||
No test cases added yet.
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
) : (
|
||||
testCases.map((testCase, index) => (
|
||||
<TableRow
|
||||
key={index}
|
||||
sx={{
|
||||
'&:hover': {
|
||||
backgroundColor: 'rgba(0, 0, 0, 0.04)',
|
||||
cursor: 'pointer',
|
||||
},
|
||||
}}
|
||||
onClick={() => {
|
||||
setEditingTestCaseIndex(index);
|
||||
setTestCaseDialogOpen(true);
|
||||
}}
|
||||
>
|
||||
<TableCell>
|
||||
<Typography variant="body2">
|
||||
{testCase.description || `Test Case #${index + 1}`}
|
||||
</Typography>
|
||||
</TableCell>
|
||||
<TableCell>{testCase.assert?.length || 0} assertions</TableCell>
|
||||
<TableCell>
|
||||
{Object.entries(testCase.vars || {})
|
||||
.map(([k, v]) => k + '=' + v)
|
||||
.join(', ')}
|
||||
</TableCell>
|
||||
<TableCell align="right" sx={{ minWidth: 150 }}>
|
||||
<IconButton
|
||||
onClick={() => {
|
||||
setEditingTestCaseIndex(index);
|
||||
setTestCaseDialogOpen(true);
|
||||
}}
|
||||
size="small"
|
||||
>
|
||||
<Edit />
|
||||
</IconButton>
|
||||
<IconButton
|
||||
onClick={(event) => handleDuplicateTestCase(event, index)}
|
||||
size="small"
|
||||
>
|
||||
<Copy />
|
||||
</IconButton>
|
||||
<IconButton
|
||||
onClick={(event) => handleRemoveTestCase(event, index)}
|
||||
size="small"
|
||||
>
|
||||
<Delete />
|
||||
</IconButton>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))
|
||||
)}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
<TestCaseDialog
|
||||
open={testCaseDialogOpen}
|
||||
onAdd={handleAddTestCase}
|
||||
varsList={varsList}
|
||||
initialValues={editingTestCaseIndex !== null ? testCases[editingTestCaseIndex] : undefined}
|
||||
onCancel={() => {
|
||||
setEditingTestCaseIndex(null);
|
||||
setTestCaseDialogOpen(false);
|
||||
}}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default TestCasesSection;
|
||||
57
src/web/nextui/src/app/setup/VarsForm.tsx
Normal file
57
src/web/nextui/src/app/setup/VarsForm.tsx
Normal file
@@ -0,0 +1,57 @@
|
||||
import React, { useEffect } from 'react';
|
||||
import { Box, TextField, Typography, Stack } from '@mui/material';
|
||||
|
||||
interface VarsFormProps {
|
||||
onAdd: (vars: Record<string, string>) => void;
|
||||
varsList: string[];
|
||||
initialValues?: Record<string, string>;
|
||||
}
|
||||
|
||||
const VarsForm: React.FC<VarsFormProps> = ({ onAdd, varsList, initialValues }) => {
|
||||
const [vars, setVars] = React.useState<Record<string, string>>(initialValues || {});
|
||||
|
||||
useEffect(() => {
|
||||
const newVars: Record<string, string> = {};
|
||||
varsList.forEach((v) => {
|
||||
newVars[v] = initialValues?.[v] || '';
|
||||
});
|
||||
setVars(newVars);
|
||||
}, [varsList, initialValues]);
|
||||
|
||||
return (
|
||||
<Box my={2}>
|
||||
<Typography variant="h6" mb={2}>
|
||||
Vars
|
||||
</Typography>
|
||||
{varsList.length > 0 ? (
|
||||
<Stack direction="row" spacing={2} alignItems="center">
|
||||
{Object.keys(vars).map((varName, index) => (
|
||||
<Stack key={index} direction="row" spacing={2} alignItems="center">
|
||||
<TextField
|
||||
placeholder={varName}
|
||||
label={varName}
|
||||
value={vars[varName]}
|
||||
fullWidth
|
||||
onChange={(e) => {
|
||||
const newValue = e.target.value;
|
||||
const newVars = {
|
||||
...vars,
|
||||
[varName]: newValue,
|
||||
};
|
||||
setVars(newVars);
|
||||
onAdd(newVars);
|
||||
}}
|
||||
/>
|
||||
</Stack>
|
||||
))}
|
||||
</Stack>
|
||||
) : (
|
||||
<Typography variant="subtitle1" gutterBottom>
|
||||
Add variables to your prompt using the {'{{varname}}'} syntax.
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default VarsForm;
|
||||
3
src/web/nextui/src/app/setup/page.css
Normal file
3
src/web/nextui/src/app/setup/page.css
Normal file
@@ -0,0 +1,3 @@
|
||||
.yaml-config {
|
||||
font-size: 10px;
|
||||
}
|
||||
156
src/web/nextui/src/app/setup/page.tsx
Normal file
156
src/web/nextui/src/app/setup/page.tsx
Normal file
@@ -0,0 +1,156 @@
|
||||
'use client';
|
||||
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import Link from 'next/link';
|
||||
import yaml from 'js-yaml';
|
||||
import { Light as SyntaxHighlighter } from 'react-syntax-highlighter';
|
||||
import { docco } from 'react-syntax-highlighter/dist/cjs/styles/hljs';
|
||||
import Button from '@mui/material/Button';
|
||||
import Container from '@mui/material/Container';
|
||||
import Typography from '@mui/material/Typography';
|
||||
import Box from '@mui/material/Box';
|
||||
import Stack from '@mui/material/Stack';
|
||||
import Dialog from '@mui/material/Dialog';
|
||||
import DialogActions from '@mui/material/DialogActions';
|
||||
import DialogContent from '@mui/material/DialogContent';
|
||||
import DialogContentText from '@mui/material/DialogContentText';
|
||||
import DialogTitle from '@mui/material/DialogTitle';
|
||||
|
||||
import RunTestSuiteButton from './RunTestSuiteButton';
|
||||
import PromptsSection from './PromptsSection';
|
||||
import TestCasesSection from './TestCasesSection';
|
||||
import ProviderSelector from './ProviderSelector';
|
||||
import { useStore } from '../../util/store';
|
||||
|
||||
import './page.css';
|
||||
|
||||
const EvaluateTestSuiteCreator: React.FC = () => {
|
||||
const [yamlString, setYamlString] = useState('');
|
||||
const [resetDialogOpen, setResetDialogOpen] = useState(false);
|
||||
|
||||
const {
|
||||
description,
|
||||
setDescription,
|
||||
providers,
|
||||
setProviders,
|
||||
prompts,
|
||||
setPrompts,
|
||||
testCases,
|
||||
setTestCases,
|
||||
} = useStore();
|
||||
|
||||
useEffect(() => {
|
||||
useStore.persist.rehydrate();
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
const testSuite = {
|
||||
description,
|
||||
providers,
|
||||
prompts,
|
||||
tests: testCases,
|
||||
};
|
||||
setYamlString(yaml.dump(testSuite));
|
||||
}, [description, providers, prompts, testCases]);
|
||||
|
||||
const extractVarsFromPrompts = (prompts: string[]): string[] => {
|
||||
const varRegex = /{{(\w+)}}/g;
|
||||
const varsSet = new Set<string>();
|
||||
|
||||
prompts.forEach((prompt) => {
|
||||
let match;
|
||||
while ((match = varRegex.exec(prompt)) !== null) {
|
||||
varsSet.add(match[1]);
|
||||
}
|
||||
});
|
||||
|
||||
return Array.from(varsSet);
|
||||
};
|
||||
|
||||
const varsList = extractVarsFromPrompts(prompts);
|
||||
|
||||
const handleReset = () => {
|
||||
setDescription('');
|
||||
setProviders([]);
|
||||
setPrompts([]);
|
||||
setTestCases([]);
|
||||
setYamlString('');
|
||||
setResetDialogOpen(false);
|
||||
};
|
||||
|
||||
return (
|
||||
<Container maxWidth="lg" sx={{ marginTop: '2rem' }}>
|
||||
<Stack direction="row" spacing={2} justifyContent="space-between">
|
||||
<Typography variant="h4">Set up an evaluation</Typography>
|
||||
<Stack direction="row" spacing={2}>
|
||||
<RunTestSuiteButton />
|
||||
<Button variant="outlined" color="primary" onClick={() => setResetDialogOpen(true)}>
|
||||
Reset
|
||||
</Button>
|
||||
</Stack>
|
||||
</Stack>
|
||||
<Box mt={4} />
|
||||
{/*
|
||||
<Box mt={4}>
|
||||
<TextField
|
||||
label="Description"
|
||||
value={description}
|
||||
onChange={(e) => {
|
||||
setDescription(e.target.value);
|
||||
}}
|
||||
fullWidth
|
||||
margin="normal"
|
||||
/>
|
||||
</Box>
|
||||
*/}
|
||||
<Box mt={2}>
|
||||
<Stack direction="column" spacing={2} justifyContent="space-between">
|
||||
<Typography variant="h5">Providers</Typography>
|
||||
<ProviderSelector providers={providers} onChange={setProviders} />
|
||||
</Stack>
|
||||
</Box>
|
||||
<Box mt={4} />
|
||||
<PromptsSection />
|
||||
<Box mt={6} />
|
||||
<TestCasesSection varsList={varsList} />
|
||||
<Box mt={8}>
|
||||
{yamlString && (
|
||||
<Box mt={4}>
|
||||
<Typography variant="h5" gutterBottom>
|
||||
YAML config
|
||||
</Typography>
|
||||
<Typography variant="body1" gutterBottom>
|
||||
This is the evaluation config that is run by promptfoo. See{' '}
|
||||
<Link href="https://promptfoo.dev/docs/configuration/guide">configuration docs</Link>{' '}
|
||||
to learn more.
|
||||
</Typography>
|
||||
<SyntaxHighlighter className="yaml-config" language="yaml" style={docco}>
|
||||
{yamlString}
|
||||
</SyntaxHighlighter>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
<Dialog
|
||||
open={resetDialogOpen}
|
||||
onClose={() => setResetDialogOpen(false)}
|
||||
aria-labelledby="alert-dialog-title"
|
||||
aria-describedby="alert-dialog-description"
|
||||
>
|
||||
<DialogTitle id="alert-dialog-title">{'Confirm Reset'}</DialogTitle>
|
||||
<DialogContent>
|
||||
<DialogContentText id="alert-dialog-description">
|
||||
Are you sure you want to reset all the fields? This action cannot be undone.
|
||||
</DialogContentText>
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button onClick={() => setResetDialogOpen(false)}>Cancel</Button>
|
||||
<Button onClick={handleReset} autoFocus>
|
||||
Reset
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
</Container>
|
||||
);
|
||||
};
|
||||
|
||||
export default EvaluateTestSuiteCreator;
|
||||
1
src/web/nextui/src/util/api.ts
Normal file
1
src/web/nextui/src/util/api.ts
Normal file
@@ -0,0 +1 @@
|
||||
export const API_BASE_URL = `http://localhost:15500`;
|
||||
53
src/web/nextui/src/util/store.ts
Normal file
53
src/web/nextui/src/util/store.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import { create } from 'zustand';
|
||||
import { persist } from 'zustand/middleware';
|
||||
|
||||
import type { Assertion, ProviderConfig, TestCase } from '../../../../types';
|
||||
|
||||
export interface State {
|
||||
asserts: Assertion[];
|
||||
testCases: TestCase[];
|
||||
description: string;
|
||||
providers: ProviderConfig[];
|
||||
prompts: string[];
|
||||
setAsserts: (asserts: Assertion[]) => void;
|
||||
setTestCases: (testCases: TestCase[]) => void;
|
||||
setDescription: (description: string) => void;
|
||||
setProviders: (providers: ProviderConfig[]) => void;
|
||||
setPrompts: (prompts: string[]) => void;
|
||||
}
|
||||
|
||||
export const useStore = create<State>()(
|
||||
persist(
|
||||
(set) => ({
|
||||
asserts: [],
|
||||
testCases: [],
|
||||
description: '',
|
||||
providers: [],
|
||||
prompts: [],
|
||||
setAsserts: (asserts) => set({ asserts }),
|
||||
setTestCases: (testCases) => set({ testCases }),
|
||||
setDescription: (description) => set({ description }),
|
||||
setProviders: (providers) => set({ providers }),
|
||||
setPrompts: (prompts) => set({ prompts }),
|
||||
}),
|
||||
{
|
||||
name: 'promptfoo',
|
||||
skipHydration: true,
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
/*
|
||||
export const useStore = create<State>((set) => ({
|
||||
asserts: [],
|
||||
testCases: [],
|
||||
description: '',
|
||||
providers: [],
|
||||
prompts: [],
|
||||
setAsserts: (asserts) => set({ asserts }),
|
||||
setTestCases: (testCases) => set({ testCases }),
|
||||
setDescription: (description) => set({ description }),
|
||||
setProviders: (providers) => set({ providers }),
|
||||
setPrompts: (prompts) => set({ prompts }),
|
||||
}));
|
||||
*/
|
||||
28
src/web/nextui/tsconfig.json
Normal file
28
src/web/nextui/tsconfig.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es5",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
@@ -2,23 +2,37 @@ import fs, { Stats } from 'fs';
|
||||
import path from 'node:path';
|
||||
import readline from 'node:readline';
|
||||
import http from 'node:http';
|
||||
import invariant from 'tiny-invariant';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
import debounce from 'debounce';
|
||||
import express from 'express';
|
||||
import cors from 'cors';
|
||||
import compression from 'compression';
|
||||
import opener from 'opener';
|
||||
import { Server as SocketIOServer } from 'socket.io';
|
||||
import promptfoo, { EvaluateSummary } from '../index';
|
||||
|
||||
import logger from '../logger';
|
||||
import { getDirectory } from '../esm';
|
||||
import { getLatestResultsPath, listPreviousResults, readResult } from '../util';
|
||||
|
||||
export function init(port = 15500) {
|
||||
interface Job {
|
||||
status: 'in-progress' | 'completed';
|
||||
progress: number;
|
||||
total: number;
|
||||
result: EvaluateSummary | null;
|
||||
}
|
||||
|
||||
const evalJobs = new Map<string, Job>();
|
||||
|
||||
export function startServer(port = 15500) {
|
||||
const app = express();
|
||||
|
||||
const staticDir = path.join(getDirectory(), 'web', 'client');
|
||||
const staticDir = path.join(getDirectory(), 'web', 'nextui');
|
||||
|
||||
app.use(cors());
|
||||
app.use(compression());
|
||||
app.use(express.json());
|
||||
app.use(express.static(staticDir));
|
||||
|
||||
@@ -55,9 +69,49 @@ export function init(port = 15500) {
|
||||
|
||||
app.get('/results', (req, res) => {
|
||||
const previousResults = listPreviousResults();
|
||||
previousResults.reverse();
|
||||
res.json({ data: previousResults });
|
||||
});
|
||||
|
||||
app.post('/api/eval', (req, res) => {
|
||||
const testSuite = req.body;
|
||||
const id = uuidv4();
|
||||
evalJobs.set(id, { status: 'in-progress', progress: 0, total: 0, result: null });
|
||||
|
||||
promptfoo
|
||||
.evaluate(Object.assign({}, testSuite, { writeLatestResults: true }), {
|
||||
progressCallback: (progress, total) => {
|
||||
const job = evalJobs.get(id);
|
||||
invariant(job, 'Job not found');
|
||||
job.progress = progress;
|
||||
job.total = total;
|
||||
console.log(`Progress: ${progress}/${total}`);
|
||||
},
|
||||
})
|
||||
.then((result) => {
|
||||
const job = evalJobs.get(id);
|
||||
invariant(job, 'Job not found');
|
||||
job.status = 'completed';
|
||||
job.result = result;
|
||||
});
|
||||
|
||||
res.json({ id });
|
||||
});
|
||||
|
||||
app.get('/api/eval/:id', (req, res) => {
|
||||
const id = req.params.id;
|
||||
const job = evalJobs.get(id);
|
||||
if (!job) {
|
||||
res.status(404).json({ error: 'Job not found' });
|
||||
return;
|
||||
}
|
||||
if (job.status === 'completed') {
|
||||
res.json({ status: 'completed', result: job.result });
|
||||
} else {
|
||||
res.json({ status: 'in-progress', progress: job.progress, total: job.total });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/results/:filename', (req, res) => {
|
||||
const filename = req.params.filename;
|
||||
const safeFilename = path.basename(filename);
|
||||
|
||||
@@ -13,5 +13,5 @@
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/", "typings/**/*"],
|
||||
"exclude": ["node_modules", "dist", "src/web/client/**/*"]
|
||||
"exclude": ["node_modules", "dist", "src/web/client/**/*", "src/web/nextui/**/*"]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user