theories -> scenarios

This commit is contained in:
Ian Webster
2023-07-27 22:31:33 -07:00
parent 1e36a4f7f4
commit 81fdbe9a2d
6 changed files with 32 additions and 26 deletions

View File

@@ -1,7 +1,7 @@
prompts: prompts.txt
providers: [openai:gpt-3.5-turbo, openai:gpt-4]
theories:
- dataSet:
scenarios:
- config:
- vars:
language: Spanish
expectedHelloWorld: 'Hola mundo'

View File

@@ -280,9 +280,13 @@ ${renderedValue}`,
if (baseType === 'python') {
try {
const { execSync } = require('child_process');
const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');;
const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');
const escapedContext = JSON.stringify(context).replace(/'/g, "\\'").replace(/"/g, '\\"');
const result = execSync(`python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`).toString().trim();
const result = execSync(
`python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`,
)
.toString()
.trim();
if (result === 'true') {
pass = true;
score = 1.0;
@@ -295,7 +299,9 @@ ${renderedValue}`,
pass = true;
score = parseFloat(result);
if (isNaN(score)) {
throw new Error('Python code must return a boolean, number, or {pass, score, reason} object');
throw new Error(
'Python code must return a boolean, number, or {pass, score, reason} object',
);
}
}
} catch (err) {

View File

@@ -258,7 +258,7 @@ class Evaluator {
let tests = (
testSuite.tests && testSuite.tests.length > 0
? testSuite.tests
: testSuite.theories
: testSuite.scenarios
? []
: [
{
@@ -270,10 +270,10 @@ class Evaluator {
return Object.assign(finalTestCase, test);
});
//build theories and add to tests
if (testSuite.theories && testSuite.theories.length > 0) {
for (const theory of testSuite.theories) {
for (const data of theory.dataSet) {
//build scenarios and add to tests
if (testSuite.scenarios && testSuite.scenarios.length > 0) {
for (const theory of testSuite.scenarios) {
for (const data of theory.config) {
//merge defaultTest with TheoryData
const theoryTests = (
theory.tests || [

View File

@@ -281,7 +281,7 @@ async function main() {
prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
theories: fileConfig.theories || defaultConfig.theories,
scenarios: fileConfig.scenarios || defaultConfig.scenarios,
sharing:
process.env.PROMPTFOO_DISABLE_SHARING === '1'
? false
@@ -313,8 +313,8 @@ async function main() {
);
//parse testCases for each theory
if (fileConfig.theories) {
for (const theory of fileConfig.theories) {
if (fileConfig.scenarios) {
for (const theory of fileConfig.scenarios) {
const parsedTheoryTests: TestCase[] = await readTests(
theory.tests,
cmdObj.tests ? undefined : basePath,
@@ -347,7 +347,7 @@ async function main() {
providers: parsedProviders,
providerPromptMap: parsedProviderPromptMap,
tests: parsedTests,
theories: config.theories,
scenarios: config.scenarios,
defaultTest,
};

View File

@@ -202,12 +202,12 @@ export interface TestCase {
options?: PromptConfig & OutputConfig & GradingConfig;
}
export interface Theory {
export interface Scenario {
// Optional description of what you're testing
description?: string;
// Default test case config
dataSet: Partial<TestCase>[];
config: Partial<TestCase>[];
// Optional list of automatic checks to run on the LLM output
tests: TestCase[];
@@ -236,8 +236,8 @@ export interface TestSuite {
// Test cases
tests?: TestCase[];
// Theories
theories?: Theory[];
// scenarios
scenarios?: Scenario[];
// Default test case config
defaultTest?: Partial<TestCase>;
@@ -263,8 +263,8 @@ export interface TestSuiteConfig {
// Path to a test file, OR list of LLM prompt variations (aka "test case")
tests: string | string[] | TestCase[];
// Theories, groupings of data and tests to be evaluated
theories?: Theory[];
// Scenarios, groupings of data and tests to be evaluated
scenarios?: Scenario[];
// Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
defaultTest?: Omit<TestCase, 'description'>;

View File

@@ -416,7 +416,7 @@ describe('evaluator', () => {
expect(summary.results[0].response?.output).toBe('Test output');
});
test('evaluate with theories', async () => {
test('evaluate with scenarios', async () => {
const mockApiProvider: ApiProvider = {
id: jest.fn().mockReturnValue('test-provider'),
callApi: jest
@@ -434,9 +434,9 @@ describe('evaluator', () => {
const testSuite: TestSuite = {
providers: [mockApiProvider],
prompts: [toPrompt('Test prompt {{ language }}')],
theories: [
scenarios: [
{
dataSet: [
config: [
{
vars: {
language: 'Spanish',
@@ -473,7 +473,7 @@ describe('evaluator', () => {
expect(summary.results[1].response?.output).toBe('Bonjour le monde');
});
test('evaluate with theories and multiple vars', async () => {
test('evaluate with scenarios and multiple vars', async () => {
const mockApiProvider: ApiProvider = {
id: jest.fn().mockReturnValue('test-provider'),
callApi: jest
@@ -498,9 +498,9 @@ describe('evaluator', () => {
const testSuite: TestSuite = {
providers: [mockApiProvider],
prompts: [toPrompt('Test prompt {{ language }} {{ greeting }}')],
theories: [
scenarios: [
{
dataSet: [
config: [
{
vars: {
language: ['Spanish', 'French'],