Add support for loading test cases from file/directory path (#88)

This commit is contained in:
Ian Webster
2023-07-23 21:46:38 -07:00
committed by GitHub
parent 4f9f6e93cd
commit c8a8bb25cf
9 changed files with 206 additions and 19 deletions

View File

@@ -0,0 +1,7 @@
This example shows how you can use paths and wildcards to simplify for `promptfooconfig.yaml`.
Run:
```
promptfoo eval
```

View File

@@ -0,0 +1,20 @@
prompts: prompts.txt
providers: openai:gpt-3.5-turbo
# The defaultTest configuration is applied to every test case in this config.
defaultTest:
options:
suffix: Be extremely concise
# Loads & runs all test cases matching these filepaths
tests:
# You can supply an exact filepath
- tests/tests2.yaml
# Or a glob (wildcard)
- tests/*
# Mix and match with actual test cases
- vars:
topic: the internet
content_type: witty tweets

View File

@@ -0,0 +1,3 @@
Generate a list of creative {{content_type}} about {{topic}}.
---
Imagine you are an expert in {{topic}}. What {{content_type}} would you recommend writing about?

View File

@@ -0,0 +1,24 @@
# This file contains a list of test cases. They're added alongside test cases
# from tests2.yaml in the example promptfooconfig.yaml.
- vars:
topic: artificial intelligence
content_type: blog post ideas
assert:
- type: javascript
value: output.length > 100
- vars:
topic: climate change
content_type: policy proposals
- vars:
topic: vegetarian cooking
content_type: recipe ideas
- vars:
topic: cybersecurity
content_type: educational topics
- vars:
topic: remote work
content_type: challenges
- vars:
topic: quantum computing
content_type: use cases

View File

@@ -0,0 +1,25 @@
# This file contains a list of test cases. They're added alongside test cases
# from tests1.yaml in the example promptfooconfig.yaml.
- vars:
topic: alien invasions
content_type: survival tips
assert:
- type: javascript
value: output.length > 100
- vars:
topic: time travel
content_type: tourist attractions
- vars:
topic: zombie apocalypse
content_type: home renovation ideas
- vars:
topic: unicorn breeding
content_type: best practices
- vars:
topic: telepathy for beginners
content_type: etiquette rules
- vars:
topic: fairy dust
content_type: cleaning hacks
- vars: ../vars/vars_*.yaml

View File

@@ -0,0 +1,2 @@
topic: bananas
content_type: musings of an evil genius

View File

@@ -188,6 +188,9 @@ export interface TestCase {
// Key-value pairs to substitute in the prompt
vars?: Record<string, string | string[] | object>;
// Optional filepath or glob pattern to load vars from
loadVars?: string | string[];
// Optional list of automatic checks to run on the LLM output
assert?: Assertion[];
@@ -238,7 +241,7 @@ export interface TestSuiteConfig {
prompts: string | string[];
// Path to a test file, OR list of LLM prompt variations (aka "test case")
tests: string | TestCase[];
tests: string | string[] | TestCase[];
// Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
defaultTest?: Omit<TestCase, 'description'>;

View File

@@ -224,7 +224,31 @@ export async function fetchCsvFromGoogleSheet(url: string): Promise<string> {
return csvData;
}
export async function readVars(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
export async function readVarsFiles(
pathOrGlobs: string | string[],
basePath: string = '',
): Promise<Record<string, string | string[] | object>> {
if (typeof pathOrGlobs === 'string') {
pathOrGlobs = [pathOrGlobs];
}
const ret: Record<string, string | string[] | object> = {};
for (const pathOrGlob of pathOrGlobs) {
const resolvedPath = path.resolve(basePath, pathOrGlob);
const paths = globSync(resolvedPath);
for (const p of paths) {
const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
Object.assign(ret, yamlData);
}
}
return ret;
}
export async function readTestsFile(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
// This function is confusingly named - it reads a CSV, JSON, or YAML file of
// TESTS or test equivalents.
const resolvedVarsPath = path.resolve(basePath, varsPath);
const fileExtension = parsePath(varsPath).ext.slice(1);
let rows: CsvRow[] = [];
@@ -246,25 +270,53 @@ export async function readVars(varsPath: string, basePath: string = ''): Promise
}
export async function readTests(
tests: string | TestCase[] | undefined,
tests: string | string[] | TestCase[] | undefined,
basePath: string = '',
): Promise<TestCase[]> {
if (!tests) {
return [];
}
const ret: TestCase[] = [];
const loadTestsFromGlob = async (loadTestsGlob: string) => {
const resolvedPath = path.resolve(basePath, loadTestsGlob);
const testFiles = globSync(resolvedPath);
for (const testFile of testFiles) {
const testFileContent = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
for (const testCase of testFileContent) {
if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
const testcaseBasePath = path.dirname(testFile);
testCase.vars = await readVarsFiles(testCase.vars, testcaseBasePath);
}
}
ret.push(...testFileContent);
}
};
if (typeof tests === 'string') {
// It's a filepath, load from CSV
const vars = await readVars(tests, basePath);
return vars.map((row, idx) => {
const test = testCaseFromCsvRow(row);
test.description = `Row #${idx + 1}`;
return test;
});
if (tests.endsWith('yaml') || tests.endsWith('yml')) {
// Load testcase config from yaml
await loadTestsFromGlob(tests);
} else {
// Legacy load CSV
const vars = await readTestsFile(tests, basePath);
return vars.map((row, idx) => {
const test = testCaseFromCsvRow(row);
test.description = `Row #${idx + 1}`;
return test;
});
}
} else if (Array.isArray(tests)) {
for (const maybeTestsGlob of tests) {
if (typeof maybeTestsGlob === 'string') {
// Assume it's a filepath
await loadTestsFromGlob(maybeTestsGlob);
} else {
// Assume it's a full test case
ret.push(maybeTestsGlob);
}
}
}
// Some validation of the shape of tests
for (const test of tests) {
for (const test of ret) {
if (!test.assert && !test.vars) {
throw new Error(
`Test case must have either "assert" or "vars" property. Instead got ${JSON.stringify(
@@ -276,7 +328,7 @@ export async function readTests(
}
}
return tests;
return ret;
}
export function writeOutput(

View File

@@ -5,7 +5,7 @@ import yaml from 'js-yaml';
import { globSync } from 'glob';
import {
readVars,
readTestsFile,
readPrompts,
writeOutput,
readTests,
@@ -137,7 +137,7 @@ describe('util', () => {
(fs.readFileSync as jest.Mock).mockReturnValue('var1,var2\nvalue1,value2');
const varsPath = 'vars.csv';
const result = await readVars(varsPath);
const result = await readTestsFile(varsPath);
expect(fs.readFileSync).toHaveBeenCalledTimes(1);
expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
@@ -147,7 +147,7 @@ describe('util', () => {
(fs.readFileSync as jest.Mock).mockReturnValue('[{"var1": "value1", "var2": "value2"}]');
const varsPath = 'vars.json';
const result = await readVars(varsPath);
const result = await readTestsFile(varsPath);
expect(fs.readFileSync).toHaveBeenCalledTimes(1);
expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
@@ -157,7 +157,7 @@ describe('util', () => {
(fs.readFileSync as jest.Mock).mockReturnValue('- var1: value1\n var2: value2');
const varsPath = 'vars.yaml';
const result = await readVars(varsPath);
const result = await readTestsFile(varsPath);
expect(fs.readFileSync).toHaveBeenCalledTimes(1);
expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
@@ -447,4 +447,55 @@ describe('readTests', () => {
expect(result).toEqual(input);
});
test('readTests with string array input (paths to test configs)', async () => {
const testsPaths = ['test1.yaml', 'test2.yaml'];
const test1Content = [
{
description: 'Test 1',
vars: { var1: 'value1', var2: 'value2' },
assert: [{ type: 'equals', value: 'value1' }],
},
];
const test2Content = [
{
description: 'Test 2',
vars: { var1: 'value3', var2: 'value4' },
assert: [{ type: 'contains-json', value: 'value3' }],
},
];
(fs.readFileSync as jest.Mock)
.mockReturnValueOnce(yaml.dump(test1Content))
.mockReturnValueOnce(yaml.dump(test2Content));
(globSync as jest.Mock).mockImplementation((pathOrGlob) => [pathOrGlob]);
const result = await readTests(testsPaths);
expect(fs.readFileSync).toHaveBeenCalledTimes(2);
expect(result).toEqual([...test1Content, ...test2Content]);
});
test('readTests with vars glob input (paths to vars configs)', async () => {
const testsPaths = ['test1.yaml'];
const test1Content = [
{
description: 'Test 1',
vars: 'vars1.yaml',
assert: [{ type: 'equals', value: 'value1' }],
},
];
const vars1Content = {
var1: 'value1',
var2: 'value2',
};
(fs.readFileSync as jest.Mock)
.mockReturnValueOnce(yaml.dump(test1Content))
.mockReturnValueOnce(yaml.dump(vars1Content));
(globSync as jest.Mock).mockImplementation((pathOrGlob) => [pathOrGlob]);
const result = await readTests(testsPaths);
expect(fs.readFileSync).toHaveBeenCalledTimes(2);
expect(result).toEqual([Object.assign({}, test1Content[0], {vars: vars1Content})]);
});
});