Web UI for running evals (#103)

This commit is contained in:
Ian Webster
2023-08-13 20:52:27 -07:00
committed by GitHub
parent 4bb5415b0a
commit 8f6491d583
65 changed files with 6580 additions and 6012 deletions

View File

@@ -1,2 +1,5 @@
dist
venv
.aider*
src/web/nextui/out
src/web/nextui/.next

161
package-lock.json generated
View File

@@ -18,6 +18,7 @@
"cli-progress": "^3.12.0",
"cli-table3": "^0.6.3",
"commander": "^10.0.1",
"compression": "^1.7.4",
"cors": "^2.8.5",
"csv-parse": "^5.3.8",
"csv-stringify": "^6.3.2",
@@ -33,6 +34,7 @@
"semver": "^7.5.3",
"socket.io": "^4.6.1",
"tiny-invariant": "^1.3.1",
"uuid": "^9.0.0",
"winston": "^3.8.2"
},
"bin": {
@@ -43,6 +45,7 @@
"@types/cache-manager": "^4.0.2",
"@types/cache-manager-fs-hash": "^0.0.1",
"@types/cli-progress": "^3.11.0",
"@types/compression": "^1.7.2",
"@types/cors": "^2.8.13",
"@types/debounce": "^1.2.1",
"@types/express": "^4.17.17",
@@ -53,6 +56,7 @@
"@types/nunjucks": "^3.2.2",
"@types/opener": "^1.4.0",
"@types/semver": "^7.5.0",
"@types/uuid": "^9.0.2",
"babel-jest": "^29.5.0",
"jest": "^29.5.0",
"jest-watch-typeahead": "^2.2.2",
@@ -1366,6 +1370,15 @@
"@types/node": "*"
}
},
"node_modules/@types/compression": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/@types/compression/-/compression-1.7.2.tgz",
"integrity": "sha512-lwEL4M/uAGWngWFLSG87ZDr2kLrbuR8p7X+QZB1OQlT+qkHsCPDVFnHPyXf4Vyl4yDDorNY+mAhosxkCvppatg==",
"dev": true,
"dependencies": {
"@types/express": "*"
}
},
"node_modules/@types/connect": {
"version": "3.4.35",
"resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz",
@@ -1590,6 +1603,12 @@
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.2.tgz",
"integrity": "sha512-txGIh+0eDFzKGC25zORnswy+br1Ha7hj5cMVwKIU7+s0U2AxxJru/jZSMU6OC9MJWP6+pc/hc6ZjyZShpsyY2g=="
},
"node_modules/@types/uuid": {
"version": "9.0.2",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.2.tgz",
"integrity": "sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==",
"dev": true
},
"node_modules/@types/yargs": {
"version": "17.0.24",
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.24.tgz",
@@ -2249,6 +2268,60 @@
"node": ">=14"
}
},
"node_modules/compressible": {
"version": "2.0.18",
"resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.18.tgz",
"integrity": "sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==",
"dependencies": {
"mime-db": ">= 1.43.0 < 2"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/compression": {
"version": "1.7.4",
"resolved": "https://registry.npmjs.org/compression/-/compression-1.7.4.tgz",
"integrity": "sha512-jaSIDzP9pZVS4ZfQ+TzvtiWhdpFhE2RDHz8QJkpX9SIpLq88VueF5jJw6t+6CUQcAoA6t+x89MLrWAqpfDE8iQ==",
"dependencies": {
"accepts": "~1.3.5",
"bytes": "3.0.0",
"compressible": "~2.0.16",
"debug": "2.6.9",
"on-headers": "~1.0.2",
"safe-buffer": "5.1.2",
"vary": "~1.1.2"
},
"engines": {
"node": ">= 0.8.0"
}
},
"node_modules/compression/node_modules/bytes": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz",
"integrity": "sha512-pMhOfFDPiv9t5jjIXkHosWmkSyQbvsgEVNkz0ERHbuLh2T/7j4Mqqpz523Fe8MVY89KC6Sh/QfS2sM+SjgFDcw==",
"engines": {
"node": ">= 0.8"
}
},
"node_modules/compression/node_modules/debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"dependencies": {
"ms": "2.0.0"
}
},
"node_modules/compression/node_modules/ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
},
"node_modules/compression/node_modules/safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
},
"node_modules/concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@@ -4409,6 +4482,14 @@
"node": ">= 0.8"
}
},
"node_modules/on-headers": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.2.tgz",
"integrity": "sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA==",
"engines": {
"node": ">= 0.8"
}
},
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -5501,6 +5582,14 @@
"node": ">= 0.4.0"
}
},
"node_modules/uuid": {
"version": "9.0.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
"integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==",
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/v8-compile-cache-lib": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
@@ -6791,6 +6880,15 @@
"@types/node": "*"
}
},
"@types/compression": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/@types/compression/-/compression-1.7.2.tgz",
"integrity": "sha512-lwEL4M/uAGWngWFLSG87ZDr2kLrbuR8p7X+QZB1OQlT+qkHsCPDVFnHPyXf4Vyl4yDDorNY+mAhosxkCvppatg==",
"dev": true,
"requires": {
"@types/express": "*"
}
},
"@types/connect": {
"version": "3.4.35",
"resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz",
@@ -7015,6 +7113,12 @@
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.2.tgz",
"integrity": "sha512-txGIh+0eDFzKGC25zORnswy+br1Ha7hj5cMVwKIU7+s0U2AxxJru/jZSMU6OC9MJWP6+pc/hc6ZjyZShpsyY2g=="
},
"@types/uuid": {
"version": "9.0.2",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.2.tgz",
"integrity": "sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==",
"dev": true
},
"@types/yargs": {
"version": "17.0.24",
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.24.tgz",
@@ -7513,6 +7617,53 @@
"resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz",
"integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug=="
},
"compressible": {
"version": "2.0.18",
"resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.18.tgz",
"integrity": "sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==",
"requires": {
"mime-db": ">= 1.43.0 < 2"
}
},
"compression": {
"version": "1.7.4",
"resolved": "https://registry.npmjs.org/compression/-/compression-1.7.4.tgz",
"integrity": "sha512-jaSIDzP9pZVS4ZfQ+TzvtiWhdpFhE2RDHz8QJkpX9SIpLq88VueF5jJw6t+6CUQcAoA6t+x89MLrWAqpfDE8iQ==",
"requires": {
"accepts": "~1.3.5",
"bytes": "3.0.0",
"compressible": "~2.0.16",
"debug": "2.6.9",
"on-headers": "~1.0.2",
"safe-buffer": "5.1.2",
"vary": "~1.1.2"
},
"dependencies": {
"bytes": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz",
"integrity": "sha512-pMhOfFDPiv9t5jjIXkHosWmkSyQbvsgEVNkz0ERHbuLh2T/7j4Mqqpz523Fe8MVY89KC6Sh/QfS2sM+SjgFDcw=="
},
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
},
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
}
}
},
"concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@@ -9121,6 +9272,11 @@
"ee-first": "1.1.1"
}
},
"on-headers": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.2.tgz",
"integrity": "sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA=="
},
"once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -9879,6 +10035,11 @@
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
"integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA=="
},
"uuid": {
"version": "9.0.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
"integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg=="
},
"v8-compile-cache-lib": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",

View File

@@ -25,13 +25,14 @@
"promptfoo": "dist/src/main.js"
},
"scripts": {
"tsc": "tsc",
"local": "ts-node --esm --files src/main.ts",
"install:client": "cd src/web/client && npm install",
"install:nextui": "cd src/web/nextui && npm install",
"build:clean": "rm -rf dist",
"build:client": "cd src/web/client && npm run build && cp -r dist/ ../../../dist/src/web/client",
"build:nextui": "cd src/web/nextui && npm run build && cp -r out/ ../../../dist/src/web/nextui",
"build:watch": "tsc --watch",
"build": "tsc && cp src/*.html dist/src && npm run build:client && chmod +x dist/src/main.js",
"prepare": "npm run install:client && npm run build:clean && npm run build",
"build": "tsc && cp src/*.html dist/src && npm run build:nextui && chmod +x dist/src/main.js",
"prepare": "npm run install:nextui && npm run build:clean && npm run build",
"test": "jest",
"test:watch": "jest --watch",
"format": "prettier -w ."
@@ -41,6 +42,7 @@
"@types/cache-manager": "^4.0.2",
"@types/cache-manager-fs-hash": "^0.0.1",
"@types/cli-progress": "^3.11.0",
"@types/compression": "^1.7.2",
"@types/cors": "^2.8.13",
"@types/debounce": "^1.2.1",
"@types/express": "^4.17.17",
@@ -51,6 +53,7 @@
"@types/nunjucks": "^3.2.2",
"@types/opener": "^1.4.0",
"@types/semver": "^7.5.0",
"@types/uuid": "^9.0.2",
"babel-jest": "^29.5.0",
"jest": "^29.5.0",
"jest-watch-typeahead": "^2.2.2",
@@ -69,6 +72,7 @@
"cli-progress": "^3.12.0",
"cli-table3": "^0.6.3",
"commander": "^10.0.1",
"compression": "^1.7.4",
"cors": "^2.8.5",
"csv-parse": "^5.3.8",
"csv-stringify": "^6.3.2",
@@ -84,6 +88,7 @@
"semver": "^7.5.3",
"socket.io": "^4.6.1",
"tiny-invariant": "^1.3.1",
"uuid": "^9.0.0",
"winston": "^3.8.2"
}
}

View File

@@ -385,7 +385,6 @@ class Evaluator {
// Set up progress bar...
let progressbar: SingleBar | undefined;
if (options.showProgressBar) {
const totalNumRuns = runEvalOptions.length;
const cliProgress = await import('cli-progress');
progressbar = new cliProgress.SingleBar(
{
@@ -394,34 +393,40 @@ class Evaluator {
},
cliProgress.Presets.shades_classic,
);
progressbar.start(totalNumRuns, 0, {
progressbar.start(runEvalOptions.length, 0, {
provider: '',
prompt: '',
vars: '',
});
}
if (options.progressCallback) {
options.progressCallback(0, runEvalOptions.length);
}
// Actually run the eval
const results: EvaluateResult[] = [];
await async.forEachOfLimit(
runEvalOptions,
options.maxConcurrency || DEFAULT_MAX_CONCURRENCY,
async (options: RunEvalOptions, index: number | string) => {
const row = await this.runEval(options);
async (evalStep: RunEvalOptions, index: number | string) => {
const row = await this.runEval(evalStep);
results.push(row);
if (progressbar) {
progressbar.increment({
provider: options.provider.id(),
prompt: options.prompt.raw.slice(0, 10).replace(/\n/g, ' '),
vars: Object.entries(options.test.vars || {})
provider: evalStep.provider.id(),
prompt: evalStep.prompt.raw.slice(0, 10).replace(/\n/g, ' '),
vars: Object.entries(evalStep.test.vars || {})
.map(([k, v]) => `${k}=${v}`)
.join(' ')
.slice(0, 10)
.replace(/\n/g, ' '),
});
}
if (options.progressCallback) {
options.progressCallback(results.length, runEvalOptions.length);
}
// Bookkeeping for table
if (typeof index !== 'number') {
@@ -441,13 +446,13 @@ class Evaluator {
resultText = row.response?.output || row.error || '';
}
const { rowIndex, colIndex } = options;
const { rowIndex, colIndex } = evalStep;
if (!table.body[rowIndex]) {
table.body[rowIndex] = {
outputs: [],
vars: table.head.vars
.map((varName) => {
const varValue = options.test.vars?.[varName] || '';
const varValue = evalStep.test.vars?.[varName] || '';
if (typeof varValue === 'string') {
return varValue;
}
@@ -475,6 +480,9 @@ class Evaluator {
if (progressbar) {
progressbar.stop();
}
if (options.progressCallback) {
options.progressCallback(runEvalOptions.length, runEvalOptions.length);
}
telemetry.record('eval_ran', {});

View File

@@ -3,7 +3,7 @@ import providers from './providers';
import telemetry from './telemetry';
import { evaluate as doEvaluate } from './evaluator';
import { loadApiProviders } from './providers';
import { readTests, writeOutput } from './util';
import { readTests, writeLatestResults, writeOutput } from './util';
import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
export * from './types';
@@ -12,6 +12,7 @@ export { generateTable } from './table';
interface EvaluateTestSuite extends TestSuiteConfig {
prompts: string[];
writeLatestResults?: boolean;
}
async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions = {}) {
@@ -27,12 +28,17 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
})),
};
telemetry.maybeShowNotice();
const ret = await doEvaluate(constructedTestSuite, options);
if (testSuite.outputPath) {
writeOutput(testSuite.outputPath, ret, testSuite, null);
}
if (testSuite.writeLatestResults) {
writeLatestResults(ret, {});
}
await telemetry.send();
return ret;
}

View File

@@ -24,7 +24,7 @@ import {
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
import { disableCache, clearCache } from './cache';
import { getDirectory } from './esm';
import { init } from './web/server';
import { startServer } from './web/server';
import { checkForUpdates } from './updates';
import type {
@@ -127,7 +127,7 @@ async function main() {
name: 'view',
});
await telemetry.send();
init(cmdObj.port);
startServer(cmdObj.port);
});
program
@@ -424,7 +424,7 @@ async function main() {
logger.info('Done.');
if (cmdObj.view) {
init(parseInt(cmdObj.view, 10) || 15500);
startServer(parseInt(cmdObj.view, 10) || 15500);
}
});

View File

@@ -21,7 +21,12 @@ import type {
} from './types';
export async function loadApiProviders(
providerPaths: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction,
providerPaths:
| ProviderId
| ProviderId[]
| RawProviderConfig[]
| ProviderConfig[]
| ProviderFunction,
basePath?: string,
): Promise<ApiProvider[]> {
if (typeof providerPaths === 'string') {
@@ -43,9 +48,13 @@ export async function loadApiProviders(
id: () => `custom-function-${idx}`,
callApi: provider,
};
} else if (provider.id) {
// List of ProviderConfig objects
return loadApiProvider((provider as ProviderConfig).id!, provider, basePath);
} else {
// List of { id: string, config: ProviderConfig } objects
const id = Object.keys(provider)[0];
const providerObject = provider[id];
const providerObject = (provider as RawProviderConfig)[id];
const context = { ...providerObject, id: providerObject.id || id };
return loadApiProvider(id, context, basePath);
}

View File

@@ -80,6 +80,7 @@ export interface OutputConfig {
export interface EvaluateOptions {
maxConcurrency?: number;
showProgressBar?: boolean;
progressCallback?: (progress: number, total: number) => void;
generateSuggestions?: boolean;
repeat?: number;
}
@@ -256,7 +257,7 @@ export interface TestSuiteConfig {
description?: string;
// One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
providers: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction;
providers: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderConfig[] | ProviderFunction;
// One or more prompt files to load
prompts: string | string[];

View File

@@ -1,14 +0,0 @@
module.exports = {
env: { browser: true, es2020: true },
extends: [
'eslint:recommended',
'plugin:@typescript-eslint/recommended',
'plugin:react-hooks/recommended',
],
parser: '@typescript-eslint/parser',
parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
plugins: ['react-refresh'],
rules: {
'react-refresh/only-export-components': 'warn',
},
};

View File

@@ -1,24 +0,0 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

View File

@@ -1,13 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="favicon.ico" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>promptfoo web viewer</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -1,39 +0,0 @@
{
"name": "promptfoo-client",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"lint": "eslint src --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
"preview": "vite preview"
},
"dependencies": {
"@emotion/react": "^11.11.0",
"@emotion/styled": "^11.11.0",
"@mui/icons-material": "^5.11.16",
"@mui/material": "^5.13.0",
"@tanstack/react-table": "^8.9.1",
"js-yaml": "^4.1.0",
"react": "^18.2.0",
"react-dnd": "^16.0.1",
"react-dnd-html5-backend": "^16.0.1",
"react-dom": "^18.2.0",
"socket.io-client": "^4.6.1",
"tiny-invariant": "^1.3.1",
"zustand": "^4.3.8"
},
"devDependencies": {
"@types/react": "^18.0.28",
"@types/react-dom": "^18.0.11",
"@typescript-eslint/eslint-plugin": "^5.57.1",
"@typescript-eslint/parser": "^5.57.1",
"@vitejs/plugin-react-swc": "^3.0.0",
"eslint": "^8.38.0",
"eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.3.4",
"typescript": "^5.0.2",
"vite": "^4.3.2"
}
}

View File

@@ -1,4 +0,0 @@
body {
background-color: var(--background-color);
color: var(--text-color);
}

View File

@@ -1,18 +0,0 @@
.logo {
display: flex;
align-items: center;
gap: 4px;
}
.logo img {
width: 30px;
}
.logo span {
margin-bottom: 6px;
color: var(--text-color);
}
[data-theme='dark'] .logo img {
filter: invert(1);
}

View File

@@ -1,10 +0,0 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import App from './App.tsx';
import './index.css';
ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render(
<React.StrictMode>
<App />
</React.StrictMode>,
);

View File

@@ -1 +0,0 @@
/// <reference types="vite/client" />

View File

@@ -1,24 +0,0 @@
{
"compilerOptions": {
"target": "ESNext",
"lib": ["DOM", "DOM.Iterable", "ESNext"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true
},
"include": ["src"],
"references": [{ "path": "./tsconfig.node.json" }]
}

View File

@@ -1,10 +0,0 @@
{
"compilerOptions": {
"composite": true,
"skipLibCheck": true,
"module": "ESNext",
"moduleResolution": "bundler",
"allowSyntheticDefaultImports": true
},
"include": ["vite.config.ts"]
}

View File

@@ -1,7 +0,0 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react-swc';
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
});

View File

@@ -0,0 +1,3 @@
{
"extends": "next/core-web-vitals"
}

35
src/web/nextui/.gitignore vendored Normal file
View File

@@ -0,0 +1,35 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts

View File

@@ -0,0 +1,8 @@
/** @type {import('next').NextConfig} */
const nextConfig = {
//output: 'standalone',
output: 'export',
trailingSlash: true,
};
module.exports = nextConfig;

4606
src/web/nextui/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
{
"name": "nextui",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@emotion/react": "^11.11.1",
"@emotion/styled": "^11.11.0",
"@mui/icons-material": "^5.14.3",
"@mui/material": "^5.14.4",
"@tanstack/react-table": "^8.9.3",
"@types/diff": "^5.0.3",
"@types/node": "20.4.10",
"@types/react": "18.2.20",
"@types/react-dom": "18.2.7",
"@types/react-syntax-highlighter": "^15.5.7",
"@types/uuid": "^9.0.2",
"debounce": "^1.2.1",
"diff": "^5.1.0",
"eslint": "8.47.0",
"eslint-config-next": "13.4.13",
"js-yaml": "^4.1.0",
"next": "13.4.13",
"opener": "^1.5.2",
"react": "18.2.0",
"react-dnd": "^16.0.1",
"react-dnd-html5-backend": "^16.0.1",
"react-dom": "18.2.0",
"react-syntax-highlighter": "^15.5.0",
"socket.io": "^4.7.2",
"socket.io-client": "^4.7.2",
"tiny-invariant": "^1.3.1",
"typescript": "5.1.6",
"uuid": "^9.0.0",
"zustand": "^4.4.1"
}
}

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

View File

@@ -0,0 +1,3 @@
.container {
margin: 2rem;
}

View File

@@ -0,0 +1,6 @@
import { NextResponse } from 'next/server';
export async function GET() {
const data = { message: Math.random() < 0.5 ? 'Hello World!' : 'Hello mom' };
return NextResponse.json({ data });
}

View File

@@ -7,6 +7,7 @@ nav {
}
.dark-mode-toggle {
display: flex;
background-color: transparent;
border: none;
color: var(--text-color);

View File

@@ -1,9 +1,7 @@
import Logo from './Logo';
import DarkModeIcon from '@mui/icons-material/DarkMode';
import LightModeIcon from '@mui/icons-material/LightMode';
import './NavBar.css';
import './DarkMode.css';
interface NavbarProps {
darkMode: boolean;
@@ -12,11 +10,8 @@ interface NavbarProps {
export default function NavBar({ darkMode, onToggleDarkMode }: NavbarProps) {
return (
<nav>
<Logo />
<div className="dark-mode-toggle" onClick={onToggleDarkMode}>
{darkMode ? <DarkModeIcon /> : <LightModeIcon />}
</div>
</nav>
<div className="dark-mode-toggle" onClick={onToggleDarkMode}>
{darkMode ? <DarkModeIcon /> : <LightModeIcon />}
</div>
);
}

View File

@@ -0,0 +1,32 @@
.logo {
display: flex;
font-family: mono;
font-size: 0.8rem;
align-items: center;
gap: 8px;
background-color: #f0f0f0;
padding: 0 2rem 0 0;
border-radius: 0.5rem;
}
[data-theme='dark'] .logo {
background-color: #333;
border-color: #444;
}
.logo img {
width: 25px;
margin-top: 2px;
}
[data-theme='dark'] .logo img {
filter: invert(1);
}
.logo span {
color: var(--text-color);
}
[data-theme='dark'] .logo span {
color: #f0f0f0;
}

View File

@@ -0,0 +1,33 @@
.nav {
padding: 0.25rem 0 0.25rem 1rem;
background-color: #eee;
margin-bottom: 1rem;
}
[data-theme='dark'] .nav {
background-color: #333;
}
.nav a {
padding: 0 0.5rem;
text-decoration: none;
color: #000;
align-self: center;
}
[data-theme='dark'] .nav a {
color: #f0f0f0;
}
.nav div:last-child {
margin-left: auto;
margin-right: 0.5rem;
}
.nav a:hover {
text-decoration: underline;
}
[data-theme='dark'] .nav a:hover {
color: #ddd;
}

View File

@@ -0,0 +1,87 @@
'use client';
import React from 'react';
import Link from 'next/link';
import useMediaQuery from '@mui/material/useMediaQuery';
import { Stack } from '@mui/material';
import { ThemeProvider, createTheme } from '@mui/material/styles';
import Logo from './Logo';
import DarkMode from './DarkMode';
import './PageShell.css';
export { PageShell };
function PageShell({ children }: { children: React.ReactNode }) {
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
const theme = React.useMemo(
() =>
createTheme({
typography: {
fontFamily: 'inherit',
},
palette: {
mode: darkMode ? 'dark' : 'light',
},
}),
[darkMode],
);
const toggleDarkMode = () => {
setDarkMode(!darkMode);
if (!darkMode) {
document.documentElement.setAttribute('data-theme', 'dark');
} else {
document.documentElement.removeAttribute('data-theme');
}
};
React.useEffect(() => {
if (prefersDarkMode) {
document.documentElement.setAttribute('data-theme', 'dark');
}
}, [prefersDarkMode]);
return (
<React.StrictMode>
<ThemeProvider theme={theme}>
<Layout>
<Navigation darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
<div>{children}</div>
</Layout>
</ThemeProvider>
</React.StrictMode>
);
}
function Layout({ children }: { children: React.ReactNode }) {
return <div>{children}</div>;
}
function Navigation({
darkMode,
onToggleDarkMode,
}: {
darkMode: boolean;
onToggleDarkMode: () => void;
}) {
if (process.env.NEXT_PUBLIC_NO_BROWSING) {
return (
<Stack direction="row" spacing={2} className="nav">
<Logo />
<DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
</Stack>
);
}
return (
<Stack direction="row" spacing={2} className="nav">
<Logo />
<Link href="/setup">New Eval</Link>
<Link href="/eval">View Evals</Link>
<DarkMode darkMode={darkMode} onToggleDarkMode={onToggleDarkMode} />
</Stack>
);
}

View File

@@ -1,13 +1,16 @@
import React from 'react';
import Dialog from '@mui/material/Dialog';
import DialogTitle from '@mui/material/DialogTitle';
import DialogContent from '@mui/material/DialogContent';
import DialogActions from '@mui/material/DialogActions';
import Box from '@mui/material/Box';
import Button from '@mui/material/Button';
import Check from '@mui/icons-material/Check';
import Dialog from '@mui/material/Dialog';
import DialogActions from '@mui/material/DialogActions';
import DialogContent from '@mui/material/DialogContent';
import DialogTitle from '@mui/material/DialogTitle';
import FileCopy from '@mui/icons-material/FileCopy';
import IconButton from '@mui/material/IconButton';
import Typography from '@mui/material/Typography';
import { useStore } from './store';
import { IconButton, Box } from '@mui/material';
import { FileCopy, Check } from '@mui/icons-material';
interface ConfigModalProps {
open: boolean;

View File

@@ -5,7 +5,7 @@ import Dialog from '@mui/material/Dialog';
import DialogActions from '@mui/material/DialogActions';
import DialogContent from '@mui/material/DialogContent';
import DialogTitle from '@mui/material/DialogTitle';
import TextareaAutosize from '@mui/base/TextareaAutosize';
import TextareaAutosize from '@mui/material/TextareaAutosize';
import IconButton from '@mui/material/IconButton';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
import CheckIcon from '@mui/icons-material/Check';
@@ -17,7 +17,7 @@ import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow';
import Typography from '@mui/material/Typography';
import type { GradingResult } from '../../../types';
import type { GradingResult } from '../../../../../types';
interface EvalOutputPromptDialogProps {
open: boolean;

View File

@@ -1,15 +1,3 @@
* {
box-sizing: border-box;
}
html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif,
'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
font-size: 16px;
background-color: var(--background-color);
color: var(--text-color);
}
table,
.divTable {
border: 1px solid var(--table-border-color);
@@ -20,6 +8,16 @@ table,
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
ins {
background-color: var(--insert-highlight-color);
text-decoration: none;
}
del {
background-color: var(--delete-highlight-color);
text-decoration: strikethrough;
}
.tr {
display: flex;
}

View File

@@ -1,4 +1,5 @@
import * as React from 'react';
import { diffSentences, diffJson, diffWords } from 'diff';
import './index.css';
@@ -12,13 +13,14 @@ import {
import Checkbox from '@mui/material/Checkbox';
import FormControlLabel from '@mui/material/FormControlLabel';
import { useStore } from './store.js';
import { useStore } from './store';
import type { CellContext, VisibilityState } from '@tanstack/table-core';
import EvalOutputPromptDialog from './EvalOutputPromptDialog';
import type { EvalRow, EvalRowOutput, FilterMode } from './types.js';
import type { EvalRow, EvalRowOutput, FilterMode } from './types';
import type { GradingResult } from '../../../../../types';
import './ResultsTable.css';
@@ -99,7 +101,9 @@ function EvalOutputCell({
rowIndex,
promptIndex,
onRating,
}: PromptOutputProps) {
firstOutput,
filterMode,
}: PromptOutputProps & { firstOutput: EvalRowOutput; filterMode: FilterMode }) {
const [openPrompt, setOpen] = React.useState(false);
const handlePromptOpen = () => {
setOpen(true);
@@ -115,6 +119,42 @@ function EvalOutputCell({
text = chunks.slice(1).join('---');
}
if (filterMode === 'different' && firstOutput) {
let firstOutputText =
typeof firstOutput.text === 'string' ? firstOutput.text : JSON.stringify(firstOutput.text);
if (firstOutputText.includes('---')) {
firstOutputText = firstOutputText.split('---').slice(1).join('---');
}
let diffResult;
try {
// Try parsing the texts as JSON
JSON.parse(firstOutputText);
JSON.parse(text);
// If no errors are thrown, the texts are valid JSON
diffResult = diffJson(firstOutputText, text);
} catch (error) {
// If an error is thrown, the texts are not valid JSON
if (firstOutputText.includes('. ') && text.includes('. ')) {
// If the texts contain a period, they are considered as prose
diffResult = diffSentences(firstOutputText, text);
} else {
// If the texts do not contain a period, use diffWords
diffResult = diffWords(firstOutputText, text);
}
}
text = diffResult
.map((part: { added?: boolean; removed?: boolean; value: string }) =>
part.added
? `<ins>${part.value}</ins>`
: part.removed
? `<del>${part.value}</del>`
: part.value,
)
.join('');
}
const handleClick = (isPass: boolean) => {
onRating(rowIndex, promptIndex, isPass);
};
@@ -239,7 +279,9 @@ export default function ResultsTable({
const numGoodAsserts = head.prompts.map((_, idx) =>
body.reduce((acc, row) => {
const componentResults = row.outputs[idx].gradingResult?.componentResults;
return acc + (componentResults ? componentResults.filter((r) => r.pass).length : 0);
return (
acc + (componentResults ? componentResults.filter((r: GradingResult) => r.pass).length : 0)
);
}, 0),
);
@@ -348,6 +390,8 @@ export default function ResultsTable({
rowIndex={info.row.index}
promptIndex={idx}
onRating={handleRating}
firstOutput={filteredBody[info.row.index].outputs[0]}
filterMode={filterMode}
/>
),
}),
@@ -394,13 +438,13 @@ export default function ResultsTable({
}}
>
<thead>
{reactTable.getHeaderGroups().map((headerGroup) => (
{reactTable.getHeaderGroups().map((headerGroup: any) => (
<tr key={headerGroup.id} className="header">
{headerGroup.headers.map((header) => {
{headerGroup.headers.map((header: any) => {
return (
<th
key={header.id}
{...{
key: header.id,
colSpan: header.colSpan,
style: {
width: header.getSize(),
@@ -424,11 +468,11 @@ export default function ResultsTable({
))}
</thead>
<tbody>
{reactTable.getRowModel().rows.map((row, rowIndex) => {
{reactTable.getRowModel().rows.map((row: any, rowIndex: any) => {
let colBorderDrawn = false;
return (
<tr key={row.id}>
{row.getVisibleCells().map((cell) => {
{row.getVisibleCells().map((cell: any) => {
const isVariableCol = cell.column.id.startsWith('Variable');
const shouldDrawColBorder = !isVariableCol && !colBorderDrawn;
if (shouldDrawColBorder) {
@@ -437,8 +481,8 @@ export default function ResultsTable({
const shouldDrawRowBorder = rowIndex === 0 && !isVariableCol;
return (
<td
key={cell.id}
{...{
key: cell.id,
style: {
width: cell.column.getSize(),
},

View File

@@ -21,13 +21,13 @@ import ShareIcon from '@mui/icons-material/Share';
import VisibilityIcon from '@mui/icons-material/Visibility';
import { styled } from '@mui/system';
import ResultsTable from './ResultsTable.js';
import ResultsTable from './ResultsTable';
import ConfigModal from './ConfigModal';
import ShareModal from './ShareModal';
import { useStore } from './store.js';
import { useStore } from './store';
import type { VisibilityState } from '@tanstack/table-core';
import type { FilterMode } from './types.js';
import type { FilterMode } from './types';
const ResponsiveStack = styled(Stack)(({ theme }) => ({
maxWidth: '100%',
@@ -171,7 +171,7 @@ export default function ResultsView({ recentFiles, onRecentFileSelected }: Resul
}, [head]);
return (
<div>
<div style={{ marginLeft: '1rem', marginRight: '1rem' }}>
<Paper py="md">
<ResponsiveStack direction="row" spacing={4} alignItems="center">
<Box>

View File

View File

@@ -0,0 +1,13 @@
body {
background-color: var(--background-color);
color: var(--text-color);
}
.loading {
display: flex;
flex-direction: column;
gap: 1.5rem;
justify-content: center;
align-items: center;
height: 9rem;
}

View File

@@ -1,14 +1,13 @@
import * as React from 'react';
'use client';
import useMediaQuery from '@mui/material/useMediaQuery';
import { ThemeProvider, createTheme } from '@mui/material/styles';
import * as React from 'react';
import CircularProgress from '@mui/material/CircularProgress';
import { io as SocketIOClient } from 'socket.io-client';
import ResultsView from './ResultsView.js';
import NavBar from './NavBar.js';
import { useStore } from './store.js';
import './App.css';
import ResultsView from './ResultsView';
import { API_BASE_URL } from '@/util/api';
import { useStore } from './store';
import './page.css';
function App() {
const { table, setTable, setConfig } = useStore();
@@ -16,52 +15,24 @@ function App() {
const loadedFromApi = React.useRef(false);
const [recentFiles, setRecentFiles] = React.useState<string[]>([]);
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
const theme = React.useMemo(
() =>
createTheme({
palette: {
mode: darkMode ? 'dark' : 'light',
},
}),
[darkMode],
);
const toggleDarkMode = () => {
setDarkMode(!darkMode);
if (!darkMode) {
document.documentElement.setAttribute('data-theme', 'dark');
} else {
document.documentElement.removeAttribute('data-theme');
}
};
React.useEffect(() => {
if (prefersDarkMode) {
document.documentElement.setAttribute('data-theme', 'dark');
}
}, [prefersDarkMode]);
const fetchRecentFiles = async () => {
if (!window.location.href.includes('localhost')) {
return;
}
const resp = await fetch(`http://localhost:15500/results`);
const resp = await fetch(`${API_BASE_URL}/results`);
const body = await resp.json();
setRecentFiles(body.data);
};
const handleRecentFileSelection = async (file: string) => {
const resp = await fetch(`http://localhost:15500/results/${file}`);
const resp = await fetch(`${API_BASE_URL}/results/${file}`);
const body = await resp.json();
setTable(body.data.results.table);
setConfig(body.data.config);
};
React.useEffect(() => {
const fetchEvalData = async (id: string) => {
const fetchPublicEvalData = async (id: string) => {
if (loadedFromApi.current) {
return;
}
@@ -77,12 +48,12 @@ function App() {
setLoaded(true);
};
const socket = SocketIOClient(`http://localhost:15500`);
const socket = SocketIOClient(API_BASE_URL);
const pathMatch = window.location.pathname.match(/\/eval\/([\w:-]+)/);
if (pathMatch) {
const id = pathMatch[1];
fetchEvalData(id);
fetchPublicEvalData(id);
} else {
socket.on('init', (data) => {
console.log('Initialized socket connection', data);
@@ -105,15 +76,15 @@ function App() {
};
}, [setTable, setConfig]);
return (
<ThemeProvider theme={theme}>
<NavBar darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
{loaded && table ? (
<ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
) : (
<div>Loading...</div>
)}
</ThemeProvider>
return loaded && table ? (
<ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
) : (
<div className="loading">
<div>
<CircularProgress size={22} />
</div>
<div>Loading eval data</div>
</div>
);
}

View File

@@ -1,6 +1,6 @@
import create from 'zustand';
import type { EvalTable, UnifiedConfig } from './types.js';
import type { EvalTable, UnifiedConfig } from './types';
interface TableState {
table: EvalTable | null;

View File

@@ -1,4 +1,4 @@
import { EvaluateResult, TokenUsage } from '../../../types';
import type { EvaluateResult, TokenUsage } from '../../../../../types';
type Prompt = {
display: string;
@@ -33,4 +33,4 @@ export type EvalTable = {
export type FilterMode = 'all' | 'failures' | 'different';
export type { UnifiedConfig } from '../../../types';
export type { UnifiedConfig } from '../../../../../types';

View File

@@ -1,6 +1,10 @@
:root {
font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
/* This CSS is common to all pages */
* {
box-sizing: border-box;
}
:root {
font-synthesis: none;
text-rendering: optimizeLegibility;
-webkit-font-smoothing: antialiased;
@@ -18,6 +22,8 @@
--success-background-color: #d1ffd7;
--variable-background-color: #f7f7f7;
--header-background-color: #fffdf7;
--insert-highlight-color: #d4fcbc;
--delete-highlight-color: #fbb6c2;
}
/* Dark mode colors */
@@ -33,8 +39,20 @@
--success-background-color: #216d2b;
--variable-background-color: #333;
--header-background-color: #333;
--insert-highlight-color: #4f8a34;
--delete-highlight-color: #8a3434;
}
html {
font-size: calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)));
font-size: 16px;
background-color: var(--background-color);
color: var(--text-color);
}
body {
margin: 0;
}
* {
box-sizing: border-box;
}

View File

@@ -0,0 +1,25 @@
import './globals.css';
import type { Metadata } from 'next';
import { Roboto } from 'next/font/google';
import { PageShell } from './components/PageShell';
const roboto = Roboto({
weight: ['400', '500', '700'],
style: ['normal'],
subsets: ['latin'],
});
export const metadata: Metadata = {
title: 'promptfoo',
description: 'LLM testing and evaluation',
};
export default function RootLayout({ children }: { children: React.ReactNode }) {
return (
<html lang="en">
<body className={roboto.className}>
<PageShell>{children}</PageShell>
</body>
</html>
);
}

View File

@@ -0,0 +1,7 @@
import { redirect } from 'next/navigation';
import './Home.css';
export default function Page() {
redirect('/eval');
}

View File

@@ -0,0 +1,118 @@
// src/components/AssertsForm.tsx
import React, { useState } from 'react';
import Autocomplete from '@mui/material/Autocomplete';
import Box from '@mui/material/Box';
import Button from '@mui/material/Button';
import Delete from '@mui/icons-material/Delete';
import IconButton from '@mui/material/IconButton';
import Stack from '@mui/material/Stack';
import TextField from '@mui/material/TextField';
import Typography from '@mui/material/Typography';
import type { Assertion, AssertionType } from '../../../../../types';
interface AssertsFormProps {
onAdd: (asserts: Assertion[]) => void;
initialValues: Assertion[];
}
const assertTypes: AssertionType[] = [
'equals',
'contains',
'icontains',
'contains-all',
'contains-any',
'starts-with',
'regex',
'is-json',
'contains-json',
'javascript',
'python',
'similar',
'llm-rubric',
'webhook',
'rouge-n',
'rouge-s',
'rouge-l',
'not-equals',
'not-contains',
'not-icontains',
'not-contains-all',
'not-contains-any',
'not-starts-with',
'not-regex',
'not-is-json',
'not-contains-json',
'not-javascript',
'not-python',
'not-similar',
'not-llm-rubric',
'not-webhook',
'not-rouge-n',
'not-rouge-s',
'not-rouge-l',
];
const AssertsForm: React.FC<AssertsFormProps> = ({ onAdd, initialValues }) => {
const [asserts, setAsserts] = useState<Assertion[]>(initialValues || []);
const handleAdd = () => {
const newAsserts = [...asserts, { type: 'equals' as AssertionType, value: '' }];
setAsserts(newAsserts);
onAdd(newAsserts);
};
const handleRemoveAssert = (indexToRemove: number) => {
const newAsserts = asserts.filter((_, index) => index !== indexToRemove);
setAsserts(newAsserts);
onAdd(newAsserts);
};
return (
<>
<Typography variant="h6">Asserts</Typography>
<Box my={asserts.length > 0 ? 2 : 0}>
<Stack direction="column" spacing={2}>
{asserts.map((assert, index) => (
<Stack key={index} direction="row" spacing={2} alignItems="center">
<Autocomplete
value={assert.type}
options={assertTypes}
sx={{ minWidth: 200 }}
onChange={(event, newValue) => {
const newType = newValue;
const newAsserts = asserts.map((a, i) =>
i === index ? { ...a, type: newType as AssertionType } : a,
);
setAsserts(newAsserts);
onAdd(newAsserts);
}}
renderInput={(params) => <TextField {...params} label="Type" />}
/>
<TextField
label="Value"
value={assert.value}
fullWidth
onChange={(e) => {
const newValue = e.target.value;
const newAsserts = asserts.map((a, i) =>
i === index ? { ...a, value: newValue } : a,
);
setAsserts(newAsserts);
onAdd(newAsserts);
}}
/>
<IconButton onClick={() => handleRemoveAssert(index)} size="small">
<Delete />
</IconButton>
</Stack>
))}
</Stack>
</Box>
<Button color="primary" onClick={handleAdd}>
Add Assert
</Button>
</>
);
};
export default AssertsForm;

View File

@@ -0,0 +1,77 @@
import React from 'react';
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
TextField,
Button,
} from '@mui/material';
interface PromptDialogProps {
open: boolean;
prompt: string;
index: number;
onAdd: (prompt: string) => void;
onCancel: () => void;
}
const PromptDialog: React.FC<PromptDialogProps> = ({ open, prompt, index, onAdd, onCancel }) => {
const [editingPrompt, setEditingPrompt] = React.useState(prompt);
const textFieldRef = React.useRef<HTMLInputElement>(null);
React.useEffect(() => {
setEditingPrompt(prompt);
}, [prompt]);
const handleAdd = (close: boolean) => {
onAdd(editingPrompt);
setEditingPrompt('');
if (close) {
onCancel();
} else if (textFieldRef.current) {
textFieldRef.current.focus();
}
};
return (
<Dialog open={open} onClose={onCancel} fullWidth maxWidth="md">
<DialogTitle>{`Edit Prompt ${index + 1}`}</DialogTitle>
<DialogContent>
<TextField
value={editingPrompt}
onChange={(e) => setEditingPrompt(e.target.value)}
fullWidth
margin="normal"
multiline
placeholder="The quick brown {{animal1}} jumps over the lazy {{animal2}}."
helperText="Tip: use the {{varname}} syntax to add variables to your prompt."
inputRef={textFieldRef}
/>
</DialogContent>
<DialogActions>
<Button
onClick={handleAdd.bind(null, true)}
color="primary"
variant="contained"
disabled={!editingPrompt.length}
>
Add
</Button>
<Button
onClick={handleAdd.bind(null, false)}
color="primary"
variant="contained"
disabled={!editingPrompt.length}
>
Add Another
</Button>
<Button onClick={onCancel} color="secondary">
Cancel
</Button>
</DialogActions>
</Dialog>
);
};
export default PromptDialog;

View File

@@ -0,0 +1,190 @@
import React, { useState, useRef, useEffect } from 'react';
import Button from '@mui/material/Button';
import Typography from '@mui/material/Typography';
import IconButton from '@mui/material/IconButton';
import Table from '@mui/material/Table';
import TableBody from '@mui/material/TableBody';
import TableCell from '@mui/material/TableCell';
import TableContainer from '@mui/material/TableContainer';
import TableRow from '@mui/material/TableRow';
import Tooltip from '@mui/material/Tooltip';
import Stack from '@mui/material/Stack';
import Edit from '@mui/icons-material/Edit';
import Delete from '@mui/icons-material/Delete';
import Publish from '@mui/icons-material/Publish';
import Copy from '@mui/icons-material/ContentCopy';
import PromptDialog from './PromptDialog';
import { useStore } from '../../util/store';
const PromptsSection: React.FC = () => {
const [promptDialogOpen, setPromptDialogOpen] = useState(false);
const [editingPromptIndex, setEditingPromptIndex] = useState<number | null>(null);
const { prompts, setPrompts } = useStore();
const newPromptInputRef = useRef<HTMLInputElement>(null);
useEffect(() => {
if (editingPromptIndex !== null && editingPromptIndex > 0 && newPromptInputRef.current) {
newPromptInputRef.current.focus();
}
}, [editingPromptIndex]);
const handleEditPrompt = (index: number) => {
setEditingPromptIndex(index);
setPromptDialogOpen(true);
};
const handleAddPromptFromFile = (event: React.ChangeEvent<HTMLInputElement>) => {
event.stopPropagation();
event.preventDefault();
const file = event.target.files?.[0];
if (file) {
const reader = new FileReader();
reader.onload = (e) => {
const text = e.target?.result?.toString();
if (text) {
setPrompts([...prompts, text]);
}
};
reader.readAsText(file);
}
};
const handleDuplicatePrompt = (event: React.MouseEvent, index: number) => {
event.stopPropagation();
const duplicatedPrompt = prompts[index];
setPrompts([...prompts, duplicatedPrompt]);
};
const handleChangePrompt = (index: number, newPrompt: string) => {
setPrompts(prompts.map((p, i) => (i === index ? newPrompt : p)));
};
const handleRemovePrompt = (event: React.MouseEvent, indexToRemove: number) => {
event.stopPropagation();
if (confirm('Are you sure you want to remove this prompt?')) {
setPrompts(prompts.filter((_, index) => index !== indexToRemove));
}
};
return (
<div>
<Stack direction="row" spacing={2} justifyContent="space-between">
<Typography variant="h5">Prompts</Typography>
<div>
<label htmlFor={`file-input-add-prompt`}>
<Tooltip title="Upload prompt from file">
<span>
<IconButton component="span">
<Publish />
</IconButton>
<input
id={`file-input-add-prompt`}
type="file"
accept=".txt,.md"
onChange={handleAddPromptFromFile}
style={{ display: 'none' }}
/>
</span>
</Tooltip>
</label>
<Button
color="primary"
onClick={() => {
setPromptDialogOpen(true);
}}
variant="contained"
>
Add Prompt
</Button>
</div>
</Stack>
<TableContainer>
<Table>
<TableBody>
{prompts.length === 0 ? (
<TableRow>
<TableCell colSpan={2} align="center">
No prompts added yet.
</TableCell>
</TableRow>
) : (
prompts.map((prompt, index) => (
<TableRow
key={index}
sx={{
'&:hover': {
backgroundColor: 'rgba(0, 0, 0, 0.04)',
cursor: 'pointer',
},
}}
onClick={() => handleEditPrompt(index)}
>
<TableCell>
<Typography variant="body2">
{`Prompt #${index + 1}: `}
{(prompt.length > 250 ? prompt.slice(0, 250) + ' ...' : prompt)
.split(/({{\w+}})/g)
.map((part, i) =>
/{{\w+}}/g.test(part) ? (
<span
key={i}
style={{
backgroundColor: 'linen',
padding: '0.25rem',
borderRadius: '4px',
}}
>
{part}
</span>
) : (
part
),
)}
</Typography>
</TableCell>
<TableCell align="right" sx={{ minWidth: 150 }}>
<IconButton onClick={() => handleEditPrompt(index)} size="small">
<Edit />
</IconButton>
<IconButton
onClick={(event) => handleDuplicatePrompt(event, index)}
size="small"
>
<Copy />
</IconButton>
<IconButton onClick={(event) => handleRemovePrompt(event, index)} size="small">
<Delete />
</IconButton>
</TableCell>
</TableRow>
))
)}
</TableBody>
</Table>
</TableContainer>
<PromptDialog
open={promptDialogOpen}
prompt={editingPromptIndex !== null ? prompts[editingPromptIndex] : ''}
index={editingPromptIndex !== null ? editingPromptIndex : 0}
onAdd={(newPrompt) => {
if (editingPromptIndex !== null) {
handleChangePrompt(editingPromptIndex, newPrompt);
} else {
setPrompts([...prompts, newPrompt]);
}
setEditingPromptIndex(null);
}}
onCancel={() => {
setEditingPromptIndex(null);
setPromptDialogOpen(false);
}}
/>
</div>
);
};
export default PromptsSection;

View File

@@ -0,0 +1,99 @@
import React from 'react';
import {
Box,
Dialog,
DialogTitle,
DialogContent,
TextField,
DialogActions,
Button,
} from '@mui/material';
import { ProviderConfig } from '../../../../../types';
interface ProviderConfigDialogProps {
open: boolean;
providerId: string;
config: ProviderConfig['config'];
onClose: () => void;
onSave: (config: ProviderConfig['config']) => void;
}
const ProviderConfigDialog: React.FC<ProviderConfigDialogProps> = ({
open,
providerId,
config,
onClose,
onSave,
}) => {
const [localConfig, setLocalConfig] = React.useState(config);
React.useEffect(() => {
setLocalConfig(config);
}, [config]);
const handleSave = () => {
onSave(localConfig);
};
return (
<Dialog open={open} onClose={onClose}>
<DialogTitle>Edit {providerId}</DialogTitle>
<DialogContent>
{Object.keys(localConfig).map((key) => {
const value = localConfig[key];
let handleChange;
if (
typeof value === 'number' ||
typeof value === 'boolean' ||
typeof value === 'string'
) {
if (typeof value === 'number') {
handleChange = (e: React.ChangeEvent<HTMLInputElement>) =>
setLocalConfig({ ...localConfig, [key]: parseFloat(e.target.value) });
} else if (typeof value === 'boolean') {
handleChange = (e: React.ChangeEvent<HTMLInputElement>) =>
setLocalConfig({ ...localConfig, [key]: e.target.value === 'true' });
} else {
handleChange = (e: React.ChangeEvent<HTMLInputElement>) =>
setLocalConfig({ ...localConfig, [key]: e.target.value });
}
return (
<Box key={key} my={2}>
<TextField
label={key}
value={value}
onChange={handleChange}
fullWidth
type={typeof value === 'number' ? 'number' : 'text'}
/>
</Box>
);
} else {
return (
<Box key={key} my={2}>
<TextField
label={key}
value={JSON.stringify(value)}
onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
setLocalConfig({ ...localConfig, [key]: JSON.parse(e.target.value) })
}
fullWidth
multiline
minRows={3}
/>
</Box>
);
}
})}
</DialogContent>
<DialogActions>
<Button onClick={onClose}>Cancel</Button>
<Button onClick={handleSave}>Save</Button>
</DialogActions>
</Dialog>
);
};
export default ProviderConfigDialog;

View File

@@ -0,0 +1,149 @@
import React from 'react';
import { Autocomplete, Box, Chip, TextField } from '@mui/material';
import { ProviderConfig } from '../../../../../types';
import ProviderConfigDialog from './ProviderConfigDialog';
const defaultProviders: ProviderConfig[] = [
{
id: 'replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48',
config: { temperature: 0.5 },
},
]
.concat(
[
'anthropic:claude-1',
'anthropic:claude-1-100k',
'anthropic:claude-instant-1',
'anthropic:claude-instant-1-100k',
].map((id) => ({ id, config: { temperature: 0.5 } })),
)
.concat(
[
'openai:gpt-3.5-turbo',
'openai:gpt-3.5-turbo-0301',
'openai:gpt-3.5-turbo-0613',
'openai:gpt-3.5-turbo-16k',
'openai:gpt-3.5-turbo-16k-0613',
'openai:gpt-4',
'openai:gpt-4-0314',
'openai:gpt-4-0613',
'openai:gpt-4-32k',
'openai:gpt-4-32k-0314',
].map((id) => ({ id, config: { temperature: 0.5, max_tokens: 1024 } })),
)
.concat(
[
'azureopenai:gpt-3.5-turbo',
'azureopenai:gpt-3.5-turbo-0301',
'azureopenai:gpt-3.5-turbo-0613',
'azureopenai:gpt-3.5-turbo-16k',
'azureopenai:gpt-3.5-turbo-16k-0613',
'azureopenai:gpt-4',
'azureopenai:gpt-4-0314',
'azureopenai:gpt-4-0613',
'azureopenai:gpt-4-32k',
'azureopenai:gpt-4-32k-0314',
].map((id) => ({ id, config: { temperature: 0.5, max_tokens: 1024 } })),
)
.sort((a, b) => a.id.localeCompare(b.id));
interface ProviderSelectorProps {
providers: ProviderConfig[];
onChange: (providers: ProviderConfig[]) => void;
}
const ProviderSelector: React.FC<ProviderSelectorProps> = ({ providers, onChange }) => {
const [selectedProvider, setSelectedProvider] = React.useState<ProviderConfig | null>(null);
const getProviderLabel = (provider: string | ProviderConfig) => {
if (typeof provider === 'string') {
return provider;
}
return provider.id || 'Unknown provider';
};
const getProviderKey = (provider: string | ProviderConfig, index: number) => {
if (typeof provider === 'string') {
return provider;
}
return provider.id || index;
};
const handleProviderClick = (provider: string | ProviderConfig) => {
if (typeof provider === 'string') {
alert('Cannot edit custom providers');
} else if (!provider.config) {
alert('There is no config for this provider');
} else {
setSelectedProvider(provider as ProviderConfig);
}
};
const handleSave = (config: ProviderConfig['config']) => {
if (selectedProvider) {
const updatedProviders = providers.map((provider) =>
provider.id === selectedProvider.id ? { ...provider, config } : provider,
);
onChange(updatedProviders);
setSelectedProvider(null);
}
};
return (
<Box mt={2}>
<Autocomplete
multiple
freeSolo
options={defaultProviders}
value={providers}
onChange={(event, newValue: (string | ProviderConfig)[]) => {
onChange(newValue.map((value) => (typeof value === 'string' ? { id: value } : value)));
}}
getOptionLabel={(option) => {
if (!option) {
return '';
}
if (typeof option === 'string') {
return option;
}
return (option as ProviderConfig).id || 'Unknown provider';
}}
renderTags={(value, getTagProps) =>
value.map((provider, index: number) => {
const label = getProviderLabel(provider);
const key = getProviderKey(provider, index);
return (
<Chip
variant="outlined"
label={label}
{...getTagProps({ index })}
key={key}
onClick={() => handleProviderClick(provider)}
/>
);
})
}
renderInput={(params) => (
<TextField
{...params}
variant="outlined"
placeholder="Select LLM providers"
helperText={providers.length > 0 ? 'Click a provider to configure its settings.' : null}
/>
)}
/>
{selectedProvider && selectedProvider.id && (
<ProviderConfigDialog
open={!!selectedProvider}
providerId={selectedProvider.id}
config={selectedProvider.config}
onClose={() => setSelectedProvider(null)}
onSave={handleSave}
/>
)}
</Box>
);
};
export default ProviderSelector;

View File

@@ -0,0 +1,88 @@
'use client';
import React, { useState } from 'react';
import { useRouter } from 'next/navigation';
import { Button, CircularProgress } from '@mui/material';
import { useStore } from '@/util/store';
import { API_BASE_URL } from '@/util/api';
const RunTestSuiteButton: React.FC = () => {
const router = useRouter();
const { description, providers, prompts, testCases } = useStore();
const [isRunning, setIsRunning] = useState(false);
const [progressPercent, setProgressPercent] = useState(0);
const runTestSuite = async () => {
setIsRunning(true);
const testSuite = {
description,
providers,
prompts,
tests: testCases,
};
try {
const response = await fetch(`${API_BASE_URL}/api/eval`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(testSuite),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const job = await response.json();
const intervalId = setInterval(async () => {
const progressResponse = await fetch(`${API_BASE_URL}/api/eval/${job.id}`);
if (!progressResponse.ok) {
clearInterval(intervalId);
throw new Error(`HTTP error! status: ${progressResponse.status}`);
}
const progressData = await progressResponse.json();
if (progressData.status === 'completed') {
clearInterval(intervalId);
setIsRunning(false);
router.push('/eval');
} else if (progressData.status === 'failed') {
clearInterval(intervalId);
setIsRunning(false);
throw new Error('Job failed');
} else {
const percent =
progressData.total === 0
? 0
: Math.round((progressData.progress / progressData.total) * 100);
setProgressPercent(percent);
}
}, 1000);
} catch (error) {
console.error(error);
setIsRunning(false);
alert(`An error occurred: ${(error as Error).message}`);
}
};
return (
<Button variant="contained" color="primary" onClick={runTestSuite} disabled={isRunning}>
{isRunning ? (
<>
<CircularProgress size={24} sx={{ marginRight: 2 }} />
{progressPercent.toFixed(0)}% complete
</>
) : (
'Run Evaluation'
)}
</Button>
);
};
export default RunTestSuiteButton;

View File

@@ -0,0 +1,108 @@
import React, { useState } from 'react';
import {
Button,
TextField,
Box,
Dialog,
DialogTitle,
DialogContent,
DialogActions,
} from '@mui/material';
import VarsForm from './VarsForm';
import AssertsForm from './AssertsForm';
import type { TestCase } from '../../../../../types';
interface TestCaseFormProps {
open: boolean;
onAdd: (testCase: TestCase, shouldClose: boolean) => void;
varsList: string[];
initialValues?: TestCase;
onCancel: () => void;
}
const TestCaseForm: React.FC<TestCaseFormProps> = ({
open,
onAdd,
varsList,
initialValues,
onCancel,
}) => {
const [description, setDescription] = useState(initialValues?.description || '');
const [vars, setVars] = useState(initialValues?.vars || {});
const [asserts, setAsserts] = useState(initialValues?.assert || []);
const [assertsFormKey, setAssertsFormKey] = useState(0);
React.useEffect(() => {
if (initialValues) {
setDescription(initialValues.description || '');
setVars(initialValues.vars || {});
setAsserts(initialValues.assert || []);
} else {
setDescription('');
setVars({});
setAsserts([]);
}
}, [initialValues]);
const handleAdd = (close: boolean) => {
onAdd(
{
description,
vars,
assert: asserts,
},
close,
);
if (close) {
onCancel();
}
setDescription('');
setVars({});
setAsserts([]);
setAssertsFormKey((prevKey) => prevKey + 1);
};
return (
<Dialog open={open} onClose={onCancel} fullWidth maxWidth="md">
<DialogTitle>{initialValues ? 'Edit Test Case' : 'Add Test Case'}</DialogTitle>
<DialogContent>
<Box>
{/*
<TextField
label="Description"
value={description}
onChange={(e) => setDescription(e.target.value)}
fullWidth
margin="normal"
/>
*/}
<VarsForm
onAdd={(vars) => setVars(vars)}
varsList={varsList}
initialValues={initialValues?.vars as Record<string, string>}
/>
<AssertsForm
key={assertsFormKey}
onAdd={(asserts) => setAsserts(asserts)}
initialValues={initialValues?.assert || []}
/>
</Box>
</DialogContent>
<DialogActions>
<Button onClick={handleAdd.bind(this, true)} color="primary" variant="contained">
{initialValues ? 'Update Test Case' : 'Add Test Case'}
</Button>
{!initialValues && (
<Button onClick={handleAdd.bind(this, false)} color="primary" variant="contained">
Add Another
</Button>
)}
<Button onClick={onCancel} color="secondary">
Cancel
</Button>
</DialogActions>
</Dialog>
);
};
export default TestCaseForm;

View File

@@ -0,0 +1,154 @@
import React from 'react';
import Button from '@mui/material/Button';
import Copy from '@mui/icons-material/ContentCopy';
import Delete from '@mui/icons-material/Delete';
import Edit from '@mui/icons-material/Edit';
import IconButton from '@mui/material/IconButton';
import Stack from '@mui/material/Stack';
import Table from '@mui/material/Table';
import TableBody from '@mui/material/TableBody';
import TableCell from '@mui/material/TableCell';
import TableContainer from '@mui/material/TableContainer';
import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow';
import Typography from '@mui/material/Typography';
import TestCaseDialog from './TestCaseDialog';
import { useStore } from '../../util/store';
import type { TestCase } from '../../../../../types';
interface TestCasesSectionProps {
varsList: string[];
}
const TestCasesSection: React.FC<TestCasesSectionProps> = ({ varsList }) => {
const { testCases, setTestCases } = useStore();
const [editingTestCaseIndex, setEditingTestCaseIndex] = React.useState<number | null>(null);
const [testCaseDialogOpen, setTestCaseDialogOpen] = React.useState(false);
const handleAddTestCase = (testCase: TestCase, shouldClose: boolean) => {
if (editingTestCaseIndex === null) {
setTestCases([...testCases, testCase]);
} else {
const updatedTestCases = testCases.map((tc, index) =>
index === editingTestCaseIndex ? testCase : tc,
);
setTestCases(updatedTestCases);
setEditingTestCaseIndex(null);
}
if (shouldClose) {
setTestCaseDialogOpen(false);
}
};
const handleRemoveTestCase = (event: React.MouseEvent, index: number) => {
event.stopPropagation();
if (confirm('Are you sure you want to delete this test case?')) {
setTestCases(testCases.filter((_, i) => i !== index));
}
};
const handleDuplicateTestCase = (event: React.MouseEvent, index: number) => {
event.stopPropagation();
const duplicatedTestCase = JSON.parse(JSON.stringify(testCases[index]));
setTestCases([...testCases, duplicatedTestCase]);
};
return (
<>
<Stack direction="row" spacing={2} justifyContent="space-between">
<Typography variant="h5">Test Cases</Typography>
<Button color="primary" onClick={() => setTestCaseDialogOpen(true)} variant="contained">
Add Test Case
</Button>
</Stack>
<TableContainer>
<Table>
<TableHead>
<TableRow>
<TableCell>Description</TableCell>
<TableCell>Assertions</TableCell>
<TableCell>Variables</TableCell>
<TableCell align="right"></TableCell>
</TableRow>
</TableHead>
<TableBody>
{testCases.length === 0 ? (
<TableRow>
<TableCell colSpan={4} align="center">
No test cases added yet.
</TableCell>
</TableRow>
) : (
testCases.map((testCase, index) => (
<TableRow
key={index}
sx={{
'&:hover': {
backgroundColor: 'rgba(0, 0, 0, 0.04)',
cursor: 'pointer',
},
}}
onClick={() => {
setEditingTestCaseIndex(index);
setTestCaseDialogOpen(true);
}}
>
<TableCell>
<Typography variant="body2">
{testCase.description || `Test Case #${index + 1}`}
</Typography>
</TableCell>
<TableCell>{testCase.assert?.length || 0} assertions</TableCell>
<TableCell>
{Object.entries(testCase.vars || {})
.map(([k, v]) => k + '=' + v)
.join(', ')}
</TableCell>
<TableCell align="right" sx={{ minWidth: 150 }}>
<IconButton
onClick={() => {
setEditingTestCaseIndex(index);
setTestCaseDialogOpen(true);
}}
size="small"
>
<Edit />
</IconButton>
<IconButton
onClick={(event) => handleDuplicateTestCase(event, index)}
size="small"
>
<Copy />
</IconButton>
<IconButton
onClick={(event) => handleRemoveTestCase(event, index)}
size="small"
>
<Delete />
</IconButton>
</TableCell>
</TableRow>
))
)}
</TableBody>
</Table>
</TableContainer>
<TestCaseDialog
open={testCaseDialogOpen}
onAdd={handleAddTestCase}
varsList={varsList}
initialValues={editingTestCaseIndex !== null ? testCases[editingTestCaseIndex] : undefined}
onCancel={() => {
setEditingTestCaseIndex(null);
setTestCaseDialogOpen(false);
}}
/>
</>
);
};
export default TestCasesSection;

View File

@@ -0,0 +1,57 @@
import React, { useEffect } from 'react';
import { Box, TextField, Typography, Stack } from '@mui/material';
interface VarsFormProps {
onAdd: (vars: Record<string, string>) => void;
varsList: string[];
initialValues?: Record<string, string>;
}
const VarsForm: React.FC<VarsFormProps> = ({ onAdd, varsList, initialValues }) => {
const [vars, setVars] = React.useState<Record<string, string>>(initialValues || {});
useEffect(() => {
const newVars: Record<string, string> = {};
varsList.forEach((v) => {
newVars[v] = initialValues?.[v] || '';
});
setVars(newVars);
}, [varsList, initialValues]);
return (
<Box my={2}>
<Typography variant="h6" mb={2}>
Vars
</Typography>
{varsList.length > 0 ? (
<Stack direction="row" spacing={2} alignItems="center">
{Object.keys(vars).map((varName, index) => (
<Stack key={index} direction="row" spacing={2} alignItems="center">
<TextField
placeholder={varName}
label={varName}
value={vars[varName]}
fullWidth
onChange={(e) => {
const newValue = e.target.value;
const newVars = {
...vars,
[varName]: newValue,
};
setVars(newVars);
onAdd(newVars);
}}
/>
</Stack>
))}
</Stack>
) : (
<Typography variant="subtitle1" gutterBottom>
Add variables to your prompt using the {'{{varname}}'} syntax.
</Typography>
)}
</Box>
);
};
export default VarsForm;

View File

@@ -0,0 +1,3 @@
.yaml-config {
font-size: 10px;
}

View File

@@ -0,0 +1,156 @@
'use client';
import React, { useState, useEffect } from 'react';
import Link from 'next/link';
import yaml from 'js-yaml';
import { Light as SyntaxHighlighter } from 'react-syntax-highlighter';
import { docco } from 'react-syntax-highlighter/dist/cjs/styles/hljs';
import Button from '@mui/material/Button';
import Container from '@mui/material/Container';
import Typography from '@mui/material/Typography';
import Box from '@mui/material/Box';
import Stack from '@mui/material/Stack';
import Dialog from '@mui/material/Dialog';
import DialogActions from '@mui/material/DialogActions';
import DialogContent from '@mui/material/DialogContent';
import DialogContentText from '@mui/material/DialogContentText';
import DialogTitle from '@mui/material/DialogTitle';
import RunTestSuiteButton from './RunTestSuiteButton';
import PromptsSection from './PromptsSection';
import TestCasesSection from './TestCasesSection';
import ProviderSelector from './ProviderSelector';
import { useStore } from '../../util/store';
import './page.css';
const EvaluateTestSuiteCreator: React.FC = () => {
const [yamlString, setYamlString] = useState('');
const [resetDialogOpen, setResetDialogOpen] = useState(false);
const {
description,
setDescription,
providers,
setProviders,
prompts,
setPrompts,
testCases,
setTestCases,
} = useStore();
useEffect(() => {
useStore.persist.rehydrate();
}, []);
useEffect(() => {
const testSuite = {
description,
providers,
prompts,
tests: testCases,
};
setYamlString(yaml.dump(testSuite));
}, [description, providers, prompts, testCases]);
const extractVarsFromPrompts = (prompts: string[]): string[] => {
const varRegex = /{{(\w+)}}/g;
const varsSet = new Set<string>();
prompts.forEach((prompt) => {
let match;
while ((match = varRegex.exec(prompt)) !== null) {
varsSet.add(match[1]);
}
});
return Array.from(varsSet);
};
const varsList = extractVarsFromPrompts(prompts);
const handleReset = () => {
setDescription('');
setProviders([]);
setPrompts([]);
setTestCases([]);
setYamlString('');
setResetDialogOpen(false);
};
return (
<Container maxWidth="lg" sx={{ marginTop: '2rem' }}>
<Stack direction="row" spacing={2} justifyContent="space-between">
<Typography variant="h4">Set up an evaluation</Typography>
<Stack direction="row" spacing={2}>
<RunTestSuiteButton />
<Button variant="outlined" color="primary" onClick={() => setResetDialogOpen(true)}>
Reset
</Button>
</Stack>
</Stack>
<Box mt={4} />
{/*
<Box mt={4}>
<TextField
label="Description"
value={description}
onChange={(e) => {
setDescription(e.target.value);
}}
fullWidth
margin="normal"
/>
</Box>
*/}
<Box mt={2}>
<Stack direction="column" spacing={2} justifyContent="space-between">
<Typography variant="h5">Providers</Typography>
<ProviderSelector providers={providers} onChange={setProviders} />
</Stack>
</Box>
<Box mt={4} />
<PromptsSection />
<Box mt={6} />
<TestCasesSection varsList={varsList} />
<Box mt={8}>
{yamlString && (
<Box mt={4}>
<Typography variant="h5" gutterBottom>
YAML config
</Typography>
<Typography variant="body1" gutterBottom>
This is the evaluation config that is run by promptfoo. See{' '}
<Link href="https://promptfoo.dev/docs/configuration/guide">configuration docs</Link>{' '}
to learn more.
</Typography>
<SyntaxHighlighter className="yaml-config" language="yaml" style={docco}>
{yamlString}
</SyntaxHighlighter>
</Box>
)}
</Box>
<Dialog
open={resetDialogOpen}
onClose={() => setResetDialogOpen(false)}
aria-labelledby="alert-dialog-title"
aria-describedby="alert-dialog-description"
>
<DialogTitle id="alert-dialog-title">{'Confirm Reset'}</DialogTitle>
<DialogContent>
<DialogContentText id="alert-dialog-description">
Are you sure you want to reset all the fields? This action cannot be undone.
</DialogContentText>
</DialogContent>
<DialogActions>
<Button onClick={() => setResetDialogOpen(false)}>Cancel</Button>
<Button onClick={handleReset} autoFocus>
Reset
</Button>
</DialogActions>
</Dialog>
</Container>
);
};
export default EvaluateTestSuiteCreator;

View File

@@ -0,0 +1 @@
export const API_BASE_URL = `http://localhost:15500`;

View File

@@ -0,0 +1,53 @@
import { create } from 'zustand';
import { persist } from 'zustand/middleware';
import type { Assertion, ProviderConfig, TestCase } from '../../../../types';
export interface State {
asserts: Assertion[];
testCases: TestCase[];
description: string;
providers: ProviderConfig[];
prompts: string[];
setAsserts: (asserts: Assertion[]) => void;
setTestCases: (testCases: TestCase[]) => void;
setDescription: (description: string) => void;
setProviders: (providers: ProviderConfig[]) => void;
setPrompts: (prompts: string[]) => void;
}
export const useStore = create<State>()(
persist(
(set) => ({
asserts: [],
testCases: [],
description: '',
providers: [],
prompts: [],
setAsserts: (asserts) => set({ asserts }),
setTestCases: (testCases) => set({ testCases }),
setDescription: (description) => set({ description }),
setProviders: (providers) => set({ providers }),
setPrompts: (prompts) => set({ prompts }),
}),
{
name: 'promptfoo',
skipHydration: true,
},
),
);
/*
export const useStore = create<State>((set) => ({
asserts: [],
testCases: [],
description: '',
providers: [],
prompts: [],
setAsserts: (asserts) => set({ asserts }),
setTestCases: (testCases) => set({ testCases }),
setDescription: (description) => set({ description }),
setProviders: (providers) => set({ providers }),
setPrompts: (prompts) => set({ prompts }),
}));
*/

View File

@@ -0,0 +1,28 @@
{
"compilerOptions": {
"target": "es5",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules"]
}

View File

@@ -2,23 +2,37 @@ import fs, { Stats } from 'fs';
import path from 'node:path';
import readline from 'node:readline';
import http from 'node:http';
import invariant from 'tiny-invariant';
import { v4 as uuidv4 } from 'uuid';
import debounce from 'debounce';
import express from 'express';
import cors from 'cors';
import compression from 'compression';
import opener from 'opener';
import { Server as SocketIOServer } from 'socket.io';
import promptfoo, { EvaluateSummary } from '../index';
import logger from '../logger';
import { getDirectory } from '../esm';
import { getLatestResultsPath, listPreviousResults, readResult } from '../util';
export function init(port = 15500) {
interface Job {
status: 'in-progress' | 'completed';
progress: number;
total: number;
result: EvaluateSummary | null;
}
const evalJobs = new Map<string, Job>();
export function startServer(port = 15500) {
const app = express();
const staticDir = path.join(getDirectory(), 'web', 'client');
const staticDir = path.join(getDirectory(), 'web', 'nextui');
app.use(cors());
app.use(compression());
app.use(express.json());
app.use(express.static(staticDir));
@@ -55,9 +69,49 @@ export function init(port = 15500) {
app.get('/results', (req, res) => {
const previousResults = listPreviousResults();
previousResults.reverse();
res.json({ data: previousResults });
});
app.post('/api/eval', (req, res) => {
const testSuite = req.body;
const id = uuidv4();
evalJobs.set(id, { status: 'in-progress', progress: 0, total: 0, result: null });
promptfoo
.evaluate(Object.assign({}, testSuite, { writeLatestResults: true }), {
progressCallback: (progress, total) => {
const job = evalJobs.get(id);
invariant(job, 'Job not found');
job.progress = progress;
job.total = total;
console.log(`Progress: ${progress}/${total}`);
},
})
.then((result) => {
const job = evalJobs.get(id);
invariant(job, 'Job not found');
job.status = 'completed';
job.result = result;
});
res.json({ id });
});
app.get('/api/eval/:id', (req, res) => {
const id = req.params.id;
const job = evalJobs.get(id);
if (!job) {
res.status(404).json({ error: 'Job not found' });
return;
}
if (job.status === 'completed') {
res.json({ status: 'completed', result: job.result });
} else {
res.json({ status: 'in-progress', progress: job.progress, total: job.total });
}
});
app.get('/results/:filename', (req, res) => {
const filename = req.params.filename;
const safeFilename = path.basename(filename);

View File

@@ -13,5 +13,5 @@
"resolveJsonModule": true
},
"include": ["src/", "typings/**/*"],
"exclude": ["node_modules", "dist", "src/web/client/**/*"]
"exclude": ["node_modules", "dist", "src/web/client/**/*", "src/web/nextui/**/*"]
}