mirror of
https://github.com/promptfoo/promptfoo.git
synced 2023-08-15 01:10:51 +03:00
theories -> scenarios
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
prompts: prompts.txt
|
||||
providers: [openai:gpt-3.5-turbo, openai:gpt-4]
|
||||
theories:
|
||||
- dataSet:
|
||||
scenarios:
|
||||
- config:
|
||||
- vars:
|
||||
language: Spanish
|
||||
expectedHelloWorld: 'Hola mundo'
|
||||
|
||||
@@ -280,9 +280,13 @@ ${renderedValue}`,
|
||||
if (baseType === 'python') {
|
||||
try {
|
||||
const { execSync } = require('child_process');
|
||||
const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');;
|
||||
const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');
|
||||
const escapedContext = JSON.stringify(context).replace(/'/g, "\\'").replace(/"/g, '\\"');
|
||||
const result = execSync(`python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`).toString().trim();
|
||||
const result = execSync(
|
||||
`python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`,
|
||||
)
|
||||
.toString()
|
||||
.trim();
|
||||
if (result === 'true') {
|
||||
pass = true;
|
||||
score = 1.0;
|
||||
@@ -295,7 +299,9 @@ ${renderedValue}`,
|
||||
pass = true;
|
||||
score = parseFloat(result);
|
||||
if (isNaN(score)) {
|
||||
throw new Error('Python code must return a boolean, number, or {pass, score, reason} object');
|
||||
throw new Error(
|
||||
'Python code must return a boolean, number, or {pass, score, reason} object',
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
|
||||
@@ -258,7 +258,7 @@ class Evaluator {
|
||||
let tests = (
|
||||
testSuite.tests && testSuite.tests.length > 0
|
||||
? testSuite.tests
|
||||
: testSuite.theories
|
||||
: testSuite.scenarios
|
||||
? []
|
||||
: [
|
||||
{
|
||||
@@ -270,10 +270,10 @@ class Evaluator {
|
||||
return Object.assign(finalTestCase, test);
|
||||
});
|
||||
|
||||
//build theories and add to tests
|
||||
if (testSuite.theories && testSuite.theories.length > 0) {
|
||||
for (const theory of testSuite.theories) {
|
||||
for (const data of theory.dataSet) {
|
||||
//build scenarios and add to tests
|
||||
if (testSuite.scenarios && testSuite.scenarios.length > 0) {
|
||||
for (const theory of testSuite.scenarios) {
|
||||
for (const data of theory.config) {
|
||||
//merge defaultTest with TheoryData
|
||||
const theoryTests = (
|
||||
theory.tests || [
|
||||
|
||||
@@ -281,7 +281,7 @@ async function main() {
|
||||
prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
|
||||
providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
|
||||
tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
|
||||
theories: fileConfig.theories || defaultConfig.theories,
|
||||
scenarios: fileConfig.scenarios || defaultConfig.scenarios,
|
||||
sharing:
|
||||
process.env.PROMPTFOO_DISABLE_SHARING === '1'
|
||||
? false
|
||||
@@ -313,8 +313,8 @@ async function main() {
|
||||
);
|
||||
|
||||
//parse testCases for each theory
|
||||
if (fileConfig.theories) {
|
||||
for (const theory of fileConfig.theories) {
|
||||
if (fileConfig.scenarios) {
|
||||
for (const theory of fileConfig.scenarios) {
|
||||
const parsedTheoryTests: TestCase[] = await readTests(
|
||||
theory.tests,
|
||||
cmdObj.tests ? undefined : basePath,
|
||||
@@ -347,7 +347,7 @@ async function main() {
|
||||
providers: parsedProviders,
|
||||
providerPromptMap: parsedProviderPromptMap,
|
||||
tests: parsedTests,
|
||||
theories: config.theories,
|
||||
scenarios: config.scenarios,
|
||||
defaultTest,
|
||||
};
|
||||
|
||||
|
||||
12
src/types.ts
12
src/types.ts
@@ -202,12 +202,12 @@ export interface TestCase {
|
||||
options?: PromptConfig & OutputConfig & GradingConfig;
|
||||
}
|
||||
|
||||
export interface Theory {
|
||||
export interface Scenario {
|
||||
// Optional description of what you're testing
|
||||
description?: string;
|
||||
|
||||
// Default test case config
|
||||
dataSet: Partial<TestCase>[];
|
||||
config: Partial<TestCase>[];
|
||||
|
||||
// Optional list of automatic checks to run on the LLM output
|
||||
tests: TestCase[];
|
||||
@@ -236,8 +236,8 @@ export interface TestSuite {
|
||||
// Test cases
|
||||
tests?: TestCase[];
|
||||
|
||||
// Theories
|
||||
theories?: Theory[];
|
||||
// scenarios
|
||||
scenarios?: Scenario[];
|
||||
|
||||
// Default test case config
|
||||
defaultTest?: Partial<TestCase>;
|
||||
@@ -263,8 +263,8 @@ export interface TestSuiteConfig {
|
||||
// Path to a test file, OR list of LLM prompt variations (aka "test case")
|
||||
tests: string | string[] | TestCase[];
|
||||
|
||||
// Theories, groupings of data and tests to be evaluated
|
||||
theories?: Theory[];
|
||||
// Scenarios, groupings of data and tests to be evaluated
|
||||
scenarios?: Scenario[];
|
||||
|
||||
// Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
|
||||
defaultTest?: Omit<TestCase, 'description'>;
|
||||
|
||||
@@ -416,7 +416,7 @@ describe('evaluator', () => {
|
||||
expect(summary.results[0].response?.output).toBe('Test output');
|
||||
});
|
||||
|
||||
test('evaluate with theories', async () => {
|
||||
test('evaluate with scenarios', async () => {
|
||||
const mockApiProvider: ApiProvider = {
|
||||
id: jest.fn().mockReturnValue('test-provider'),
|
||||
callApi: jest
|
||||
@@ -434,9 +434,9 @@ describe('evaluator', () => {
|
||||
const testSuite: TestSuite = {
|
||||
providers: [mockApiProvider],
|
||||
prompts: [toPrompt('Test prompt {{ language }}')],
|
||||
theories: [
|
||||
scenarios: [
|
||||
{
|
||||
dataSet: [
|
||||
config: [
|
||||
{
|
||||
vars: {
|
||||
language: 'Spanish',
|
||||
@@ -473,7 +473,7 @@ describe('evaluator', () => {
|
||||
expect(summary.results[1].response?.output).toBe('Bonjour le monde');
|
||||
});
|
||||
|
||||
test('evaluate with theories and multiple vars', async () => {
|
||||
test('evaluate with scenarios and multiple vars', async () => {
|
||||
const mockApiProvider: ApiProvider = {
|
||||
id: jest.fn().mockReturnValue('test-provider'),
|
||||
callApi: jest
|
||||
@@ -498,9 +498,9 @@ describe('evaluator', () => {
|
||||
const testSuite: TestSuite = {
|
||||
providers: [mockApiProvider],
|
||||
prompts: [toPrompt('Test prompt {{ language }} {{ greeting }}')],
|
||||
theories: [
|
||||
scenarios: [
|
||||
{
|
||||
dataSet: [
|
||||
config: [
|
||||
{
|
||||
vars: {
|
||||
language: ['Spanish', 'French'],
|
||||
|
||||
Reference in New Issue
Block a user