theories -> scenarios

2023-08-15 01:10:51 +03:00 · 2023-07-27 22:31:33 -07:00
parent 1e36a4f7f4
commit 81fdbe9a2d
6 changed files with 32 additions and 26 deletions
--- a/examples/multiple-translations-theory/promptfooconfig.yaml
+++ b/examples/multiple-translations-theory/promptfooconfig.yaml
@@ -1,7 +1,7 @@
 prompts: prompts.txt
 providers: [openai:gpt-3.5-turbo, openai:gpt-4]
-theories:
-  - dataSet:
+scenarios:
+  - config:
      - vars:
          language: Spanish
          expectedHelloWorld: 'Hola mundo'
--- a/src/assertions.ts
+++ b/src/assertions.ts
@@ -280,9 +280,13 @@ ${renderedValue}`,
  if (baseType === 'python') {
    try {
      const { execSync } = require('child_process');
-      const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');;
+      const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');
      const escapedContext = JSON.stringify(context).replace(/'/g, "\\'").replace(/"/g, '\\"');
-      const result = execSync(`python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`).toString().trim();
+      const result = execSync(
+        `python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`,
+      )
+        .toString()
+        .trim();
      if (result === 'true') {
        pass = true;
        score = 1.0;
@@ -295,7 +299,9 @@ ${renderedValue}`,
        pass = true;
        score = parseFloat(result);
        if (isNaN(score)) {
-          throw new Error('Python code must return a boolean, number, or {pass, score, reason} object');
+          throw new Error(
+            'Python code must return a boolean, number, or {pass, score, reason} object',
+          );
        }
      }
    } catch (err) {
--- a/src/evaluator.ts
+++ b/src/evaluator.ts
@@ -258,7 +258,7 @@ class Evaluator {
    let tests = (
      testSuite.tests && testSuite.tests.length > 0
        ? testSuite.tests
-        : testSuite.theories
+        : testSuite.scenarios
        ? []
        : [
            {
@@ -270,10 +270,10 @@ class Evaluator {
      return Object.assign(finalTestCase, test);
    });

-    //build theories and add to tests
-    if (testSuite.theories && testSuite.theories.length > 0) {
-      for (const theory of testSuite.theories) {
-        for (const data of theory.dataSet) {
+    //build scenarios and add to tests
+    if (testSuite.scenarios && testSuite.scenarios.length > 0) {
+      for (const theory of testSuite.scenarios) {
+        for (const data of theory.config) {
          //merge defaultTest with TheoryData
          const theoryTests = (
            theory.tests || [
--- a/src/main.ts
+++ b/src/main.ts
@@ -281,7 +281,7 @@ async function main() {
        prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
        providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
        tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
-        theories: fileConfig.theories || defaultConfig.theories,
+        scenarios: fileConfig.scenarios || defaultConfig.scenarios,
        sharing:
          process.env.PROMPTFOO_DISABLE_SHARING === '1'
            ? false
@@ -313,8 +313,8 @@ async function main() {
      );

      //parse testCases for each theory
-      if (fileConfig.theories) {
-        for (const theory of fileConfig.theories) {
+      if (fileConfig.scenarios) {
+        for (const theory of fileConfig.scenarios) {
          const parsedTheoryTests: TestCase[] = await readTests(
            theory.tests,
            cmdObj.tests ? undefined : basePath,
@@ -347,7 +347,7 @@ async function main() {
        providers: parsedProviders,
        providerPromptMap: parsedProviderPromptMap,
        tests: parsedTests,
-        theories: config.theories,
+        scenarios: config.scenarios,
        defaultTest,
      };

--- a/src/types.ts
+++ b/src/types.ts
@@ -202,12 +202,12 @@ export interface TestCase {
  options?: PromptConfig & OutputConfig & GradingConfig;
 }

-export interface Theory {
+export interface Scenario {
  // Optional description of what you're testing
  description?: string;

  // Default test case config
-  dataSet: Partial<TestCase>[];
+  config: Partial<TestCase>[];

  // Optional list of automatic checks to run on the LLM output
  tests: TestCase[];
@@ -236,8 +236,8 @@ export interface TestSuite {
  // Test cases
  tests?: TestCase[];

-  // Theories
-  theories?: Theory[];
+  // scenarios
+  scenarios?: Scenario[];

  // Default test case config
  defaultTest?: Partial<TestCase>;
@@ -263,8 +263,8 @@ export interface TestSuiteConfig {
  // Path to a test file, OR list of LLM prompt variations (aka "test case")
  tests: string | string[] | TestCase[];

-  // Theories, groupings of data and tests to be evaluated
-  theories?: Theory[];
+  // Scenarios, groupings of data and tests to be evaluated
+  scenarios?: Scenario[];

  // Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
  defaultTest?: Omit<TestCase, 'description'>;
--- a/test/evaluator.test.ts
+++ b/test/evaluator.test.ts
@@ -416,7 +416,7 @@ describe('evaluator', () => {
    expect(summary.results[0].response?.output).toBe('Test output');
  });

-  test('evaluate with theories', async () => {
+  test('evaluate with scenarios', async () => {
    const mockApiProvider: ApiProvider = {
      id: jest.fn().mockReturnValue('test-provider'),
      callApi: jest
@@ -434,9 +434,9 @@ describe('evaluator', () => {
    const testSuite: TestSuite = {
      providers: [mockApiProvider],
      prompts: [toPrompt('Test prompt {{ language }}')],
-      theories: [
+      scenarios: [
        {
-          dataSet: [
+          config: [
            {
              vars: {
                language: 'Spanish',
@@ -473,7 +473,7 @@ describe('evaluator', () => {
    expect(summary.results[1].response?.output).toBe('Bonjour le monde');
  });

-  test('evaluate with theories and multiple vars', async () => {
+  test('evaluate with scenarios and multiple vars', async () => {
    const mockApiProvider: ApiProvider = {
      id: jest.fn().mockReturnValue('test-provider'),
      callApi: jest
@@ -498,9 +498,9 @@ describe('evaluator', () => {
    const testSuite: TestSuite = {
      providers: [mockApiProvider],
      prompts: [toPrompt('Test prompt {{ language }} {{ greeting }}')],
-      theories: [
+      scenarios: [
        {
-          dataSet: [
+          config: [
            {
              vars: {
                language: ['Spanish', 'French'],