mirror of
https://github.com/promptfoo/promptfoo.git
synced 2023-08-15 01:10:51 +03:00
Add support for loading test cases from file/directory path (#88)
This commit is contained in:
7
examples/separate-test-configs/README.md
Normal file
7
examples/separate-test-configs/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
This example shows how you can use paths and wildcards to simplify for `promptfooconfig.yaml`.
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
promptfoo eval
|
||||
```
|
||||
20
examples/separate-test-configs/promptfooconfig.yaml
Normal file
20
examples/separate-test-configs/promptfooconfig.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
prompts: prompts.txt
|
||||
providers: openai:gpt-3.5-turbo
|
||||
|
||||
# The defaultTest configuration is applied to every test case in this config.
|
||||
defaultTest:
|
||||
options:
|
||||
suffix: Be extremely concise
|
||||
|
||||
# Loads & runs all test cases matching these filepaths
|
||||
tests:
|
||||
# You can supply an exact filepath
|
||||
- tests/tests2.yaml
|
||||
|
||||
# Or a glob (wildcard)
|
||||
- tests/*
|
||||
|
||||
# Mix and match with actual test cases
|
||||
- vars:
|
||||
topic: the internet
|
||||
content_type: witty tweets
|
||||
3
examples/separate-test-configs/prompts.txt
Normal file
3
examples/separate-test-configs/prompts.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
Generate a list of creative {{content_type}} about {{topic}}.
|
||||
---
|
||||
Imagine you are an expert in {{topic}}. What {{content_type}} would you recommend writing about?
|
||||
24
examples/separate-test-configs/tests/tests1.yaml
Normal file
24
examples/separate-test-configs/tests/tests1.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# This file contains a list of test cases. They're added alongside test cases
|
||||
# from tests2.yaml in the example promptfooconfig.yaml.
|
||||
|
||||
- vars:
|
||||
topic: artificial intelligence
|
||||
content_type: blog post ideas
|
||||
assert:
|
||||
- type: javascript
|
||||
value: output.length > 100
|
||||
- vars:
|
||||
topic: climate change
|
||||
content_type: policy proposals
|
||||
- vars:
|
||||
topic: vegetarian cooking
|
||||
content_type: recipe ideas
|
||||
- vars:
|
||||
topic: cybersecurity
|
||||
content_type: educational topics
|
||||
- vars:
|
||||
topic: remote work
|
||||
content_type: challenges
|
||||
- vars:
|
||||
topic: quantum computing
|
||||
content_type: use cases
|
||||
25
examples/separate-test-configs/tests/tests2.yaml
Normal file
25
examples/separate-test-configs/tests/tests2.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
# This file contains a list of test cases. They're added alongside test cases
|
||||
# from tests1.yaml in the example promptfooconfig.yaml.
|
||||
|
||||
- vars:
|
||||
topic: alien invasions
|
||||
content_type: survival tips
|
||||
assert:
|
||||
- type: javascript
|
||||
value: output.length > 100
|
||||
- vars:
|
||||
topic: time travel
|
||||
content_type: tourist attractions
|
||||
- vars:
|
||||
topic: zombie apocalypse
|
||||
content_type: home renovation ideas
|
||||
- vars:
|
||||
topic: unicorn breeding
|
||||
content_type: best practices
|
||||
- vars:
|
||||
topic: telepathy for beginners
|
||||
content_type: etiquette rules
|
||||
- vars:
|
||||
topic: fairy dust
|
||||
content_type: cleaning hacks
|
||||
- vars: ../vars/vars_*.yaml
|
||||
2
examples/separate-test-configs/vars/vars_extra.yaml
Normal file
2
examples/separate-test-configs/vars/vars_extra.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
topic: bananas
|
||||
content_type: musings of an evil genius
|
||||
@@ -188,6 +188,9 @@ export interface TestCase {
|
||||
// Key-value pairs to substitute in the prompt
|
||||
vars?: Record<string, string | string[] | object>;
|
||||
|
||||
// Optional filepath or glob pattern to load vars from
|
||||
loadVars?: string | string[];
|
||||
|
||||
// Optional list of automatic checks to run on the LLM output
|
||||
assert?: Assertion[];
|
||||
|
||||
@@ -238,7 +241,7 @@ export interface TestSuiteConfig {
|
||||
prompts: string | string[];
|
||||
|
||||
// Path to a test file, OR list of LLM prompt variations (aka "test case")
|
||||
tests: string | TestCase[];
|
||||
tests: string | string[] | TestCase[];
|
||||
|
||||
// Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
|
||||
defaultTest?: Omit<TestCase, 'description'>;
|
||||
|
||||
80
src/util.ts
80
src/util.ts
@@ -224,7 +224,31 @@ export async function fetchCsvFromGoogleSheet(url: string): Promise<string> {
|
||||
return csvData;
|
||||
}
|
||||
|
||||
export async function readVars(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
|
||||
export async function readVarsFiles(
|
||||
pathOrGlobs: string | string[],
|
||||
basePath: string = '',
|
||||
): Promise<Record<string, string | string[] | object>> {
|
||||
if (typeof pathOrGlobs === 'string') {
|
||||
pathOrGlobs = [pathOrGlobs];
|
||||
}
|
||||
|
||||
const ret: Record<string, string | string[] | object> = {};
|
||||
for (const pathOrGlob of pathOrGlobs) {
|
||||
const resolvedPath = path.resolve(basePath, pathOrGlob);
|
||||
const paths = globSync(resolvedPath);
|
||||
|
||||
for (const p of paths) {
|
||||
const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
|
||||
Object.assign(ret, yamlData);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
export async function readTestsFile(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
|
||||
// This function is confusingly named - it reads a CSV, JSON, or YAML file of
|
||||
// TESTS or test equivalents.
|
||||
const resolvedVarsPath = path.resolve(basePath, varsPath);
|
||||
const fileExtension = parsePath(varsPath).ext.slice(1);
|
||||
let rows: CsvRow[] = [];
|
||||
@@ -246,25 +270,53 @@ export async function readVars(varsPath: string, basePath: string = ''): Promise
|
||||
}
|
||||
|
||||
export async function readTests(
|
||||
tests: string | TestCase[] | undefined,
|
||||
tests: string | string[] | TestCase[] | undefined,
|
||||
basePath: string = '',
|
||||
): Promise<TestCase[]> {
|
||||
if (!tests) {
|
||||
return [];
|
||||
}
|
||||
const ret: TestCase[] = [];
|
||||
|
||||
const loadTestsFromGlob = async (loadTestsGlob: string) => {
|
||||
const resolvedPath = path.resolve(basePath, loadTestsGlob);
|
||||
const testFiles = globSync(resolvedPath);
|
||||
for (const testFile of testFiles) {
|
||||
const testFileContent = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
|
||||
for (const testCase of testFileContent) {
|
||||
if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
|
||||
const testcaseBasePath = path.dirname(testFile);
|
||||
testCase.vars = await readVarsFiles(testCase.vars, testcaseBasePath);
|
||||
}
|
||||
}
|
||||
ret.push(...testFileContent);
|
||||
}
|
||||
};
|
||||
|
||||
if (typeof tests === 'string') {
|
||||
// It's a filepath, load from CSV
|
||||
const vars = await readVars(tests, basePath);
|
||||
return vars.map((row, idx) => {
|
||||
const test = testCaseFromCsvRow(row);
|
||||
test.description = `Row #${idx + 1}`;
|
||||
return test;
|
||||
});
|
||||
if (tests.endsWith('yaml') || tests.endsWith('yml')) {
|
||||
// Load testcase config from yaml
|
||||
await loadTestsFromGlob(tests);
|
||||
} else {
|
||||
// Legacy load CSV
|
||||
const vars = await readTestsFile(tests, basePath);
|
||||
return vars.map((row, idx) => {
|
||||
const test = testCaseFromCsvRow(row);
|
||||
test.description = `Row #${idx + 1}`;
|
||||
return test;
|
||||
});
|
||||
}
|
||||
} else if (Array.isArray(tests)) {
|
||||
for (const maybeTestsGlob of tests) {
|
||||
if (typeof maybeTestsGlob === 'string') {
|
||||
// Assume it's a filepath
|
||||
await loadTestsFromGlob(maybeTestsGlob);
|
||||
} else {
|
||||
// Assume it's a full test case
|
||||
ret.push(maybeTestsGlob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Some validation of the shape of tests
|
||||
for (const test of tests) {
|
||||
for (const test of ret) {
|
||||
if (!test.assert && !test.vars) {
|
||||
throw new Error(
|
||||
`Test case must have either "assert" or "vars" property. Instead got ${JSON.stringify(
|
||||
@@ -276,7 +328,7 @@ export async function readTests(
|
||||
}
|
||||
}
|
||||
|
||||
return tests;
|
||||
return ret;
|
||||
}
|
||||
|
||||
export function writeOutput(
|
||||
|
||||
@@ -5,7 +5,7 @@ import yaml from 'js-yaml';
|
||||
import { globSync } from 'glob';
|
||||
|
||||
import {
|
||||
readVars,
|
||||
readTestsFile,
|
||||
readPrompts,
|
||||
writeOutput,
|
||||
readTests,
|
||||
@@ -137,7 +137,7 @@ describe('util', () => {
|
||||
(fs.readFileSync as jest.Mock).mockReturnValue('var1,var2\nvalue1,value2');
|
||||
const varsPath = 'vars.csv';
|
||||
|
||||
const result = await readVars(varsPath);
|
||||
const result = await readTestsFile(varsPath);
|
||||
|
||||
expect(fs.readFileSync).toHaveBeenCalledTimes(1);
|
||||
expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
|
||||
@@ -147,7 +147,7 @@ describe('util', () => {
|
||||
(fs.readFileSync as jest.Mock).mockReturnValue('[{"var1": "value1", "var2": "value2"}]');
|
||||
const varsPath = 'vars.json';
|
||||
|
||||
const result = await readVars(varsPath);
|
||||
const result = await readTestsFile(varsPath);
|
||||
|
||||
expect(fs.readFileSync).toHaveBeenCalledTimes(1);
|
||||
expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
|
||||
@@ -157,7 +157,7 @@ describe('util', () => {
|
||||
(fs.readFileSync as jest.Mock).mockReturnValue('- var1: value1\n var2: value2');
|
||||
const varsPath = 'vars.yaml';
|
||||
|
||||
const result = await readVars(varsPath);
|
||||
const result = await readTestsFile(varsPath);
|
||||
|
||||
expect(fs.readFileSync).toHaveBeenCalledTimes(1);
|
||||
expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
|
||||
@@ -447,4 +447,55 @@ describe('readTests', () => {
|
||||
|
||||
expect(result).toEqual(input);
|
||||
});
|
||||
|
||||
test('readTests with string array input (paths to test configs)', async () => {
|
||||
const testsPaths = ['test1.yaml', 'test2.yaml'];
|
||||
const test1Content = [
|
||||
{
|
||||
description: 'Test 1',
|
||||
vars: { var1: 'value1', var2: 'value2' },
|
||||
assert: [{ type: 'equals', value: 'value1' }],
|
||||
},
|
||||
];
|
||||
const test2Content = [
|
||||
{
|
||||
description: 'Test 2',
|
||||
vars: { var1: 'value3', var2: 'value4' },
|
||||
assert: [{ type: 'contains-json', value: 'value3' }],
|
||||
},
|
||||
];
|
||||
(fs.readFileSync as jest.Mock)
|
||||
.mockReturnValueOnce(yaml.dump(test1Content))
|
||||
.mockReturnValueOnce(yaml.dump(test2Content));
|
||||
(globSync as jest.Mock).mockImplementation((pathOrGlob) => [pathOrGlob]);
|
||||
|
||||
const result = await readTests(testsPaths);
|
||||
|
||||
expect(fs.readFileSync).toHaveBeenCalledTimes(2);
|
||||
expect(result).toEqual([...test1Content, ...test2Content]);
|
||||
});
|
||||
|
||||
test('readTests with vars glob input (paths to vars configs)', async () => {
|
||||
const testsPaths = ['test1.yaml'];
|
||||
const test1Content = [
|
||||
{
|
||||
description: 'Test 1',
|
||||
vars: 'vars1.yaml',
|
||||
assert: [{ type: 'equals', value: 'value1' }],
|
||||
},
|
||||
];
|
||||
const vars1Content = {
|
||||
var1: 'value1',
|
||||
var2: 'value2',
|
||||
};
|
||||
(fs.readFileSync as jest.Mock)
|
||||
.mockReturnValueOnce(yaml.dump(test1Content))
|
||||
.mockReturnValueOnce(yaml.dump(vars1Content));
|
||||
(globSync as jest.Mock).mockImplementation((pathOrGlob) => [pathOrGlob]);
|
||||
|
||||
const result = await readTests(testsPaths);
|
||||
|
||||
expect(fs.readFileSync).toHaveBeenCalledTimes(2);
|
||||
expect(result).toEqual([Object.assign({}, test1Content[0], {vars: vars1Content})]);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user