Add support for loading test cases from file/directory path (#88)

2023-08-15 01:10:51 +03:00 · 2023-07-23 21:46:38 -07:00
parent 4f9f6e93cd
commit c8a8bb25cf
9 changed files with 206 additions and 19 deletions
--- a/examples/separate-test-configs/README.md
+++ b/examples/separate-test-configs/README.md
@@ -0,0 +1,7 @@
+This example shows how you can use paths and wildcards to simplify for `promptfooconfig.yaml`.
+
+Run:
+
+```
+promptfoo eval
+```
--- a/examples/separate-test-configs/promptfooconfig.yaml
+++ b/examples/separate-test-configs/promptfooconfig.yaml
@@ -0,0 +1,20 @@
+prompts: prompts.txt
+providers: openai:gpt-3.5-turbo
+
+# The defaultTest configuration is applied to every test case in this config.
+defaultTest:
+  options:
+    suffix: Be extremely concise
+
+# Loads & runs all test cases matching these filepaths
+tests:
+  # You can supply an exact filepath
+  - tests/tests2.yaml
+
+  # Or a glob (wildcard)
+  - tests/*
+
+  # Mix and match with actual test cases
+  - vars:
+      topic: the internet
+      content_type: witty tweets
--- a/examples/separate-test-configs/prompts.txt
+++ b/examples/separate-test-configs/prompts.txt
@@ -0,0 +1,3 @@
+Generate a list of creative {{content_type}} about {{topic}}.
+---
+Imagine you are an expert in {{topic}}. What {{content_type}} would you recommend writing about?
--- a/examples/separate-test-configs/tests/tests1.yaml
+++ b/examples/separate-test-configs/tests/tests1.yaml
@@ -0,0 +1,24 @@
+# This file contains a list of test cases. They're added alongside test cases
+# from tests2.yaml in the example promptfooconfig.yaml.
+
+- vars:
+    topic: artificial intelligence
+    content_type: blog post ideas
+  assert:
+    - type: javascript
+      value: output.length > 100
+- vars:
+    topic: climate change
+    content_type: policy proposals
+- vars:
+    topic: vegetarian cooking
+    content_type: recipe ideas
+- vars:
+    topic: cybersecurity
+    content_type: educational topics
+- vars:
+    topic: remote work
+    content_type: challenges
+- vars:
+    topic: quantum computing
+    content_type: use cases
--- a/examples/separate-test-configs/tests/tests2.yaml
+++ b/examples/separate-test-configs/tests/tests2.yaml
@@ -0,0 +1,25 @@
+# This file contains a list of test cases. They're added alongside test cases
+# from tests1.yaml in the example promptfooconfig.yaml.
+
+- vars:
+    topic: alien invasions
+    content_type: survival tips
+  assert:
+    - type: javascript
+      value: output.length > 100
+- vars:
+    topic: time travel
+    content_type: tourist attractions
+- vars:
+    topic: zombie apocalypse
+    content_type: home renovation ideas
+- vars:
+    topic: unicorn breeding
+    content_type: best practices
+- vars:
+    topic: telepathy for beginners
+    content_type: etiquette rules
+- vars:
+    topic: fairy dust
+    content_type: cleaning hacks
+- vars: ../vars/vars_*.yaml
--- a/examples/separate-test-configs/vars/vars_extra.yaml
+++ b/examples/separate-test-configs/vars/vars_extra.yaml
@@ -0,0 +1,2 @@
+topic: bananas
+content_type: musings of an evil genius
--- a/src/types.ts
+++ b/src/types.ts
@@ -188,6 +188,9 @@ export interface TestCase {
  // Key-value pairs to substitute in the prompt
  vars?: Record<string, string | string[] | object>;

+  // Optional filepath or glob pattern to load vars from
+  loadVars?: string | string[];
+
  // Optional list of automatic checks to run on the LLM output
  assert?: Assertion[];

@@ -238,7 +241,7 @@ export interface TestSuiteConfig {
  prompts: string | string[];

  // Path to a test file, OR list of LLM prompt variations (aka "test case")
-  tests: string | TestCase[];
+  tests: string | string[] | TestCase[];

  // Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
  defaultTest?: Omit<TestCase, 'description'>;
--- a/src/util.ts
+++ b/src/util.ts
@@ -224,7 +224,31 @@ export async function fetchCsvFromGoogleSheet(url: string): Promise<string> {
  return csvData;
 }

-export async function readVars(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
+export async function readVarsFiles(
+  pathOrGlobs: string | string[],
+  basePath: string = '',
+): Promise<Record<string, string | string[] | object>> {
+  if (typeof pathOrGlobs === 'string') {
+    pathOrGlobs = [pathOrGlobs];
+  }
+
+  const ret: Record<string, string | string[] | object> = {};
+  for (const pathOrGlob of pathOrGlobs) {
+    const resolvedPath = path.resolve(basePath, pathOrGlob);
+    const paths = globSync(resolvedPath);
+
+    for (const p of paths) {
+      const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
+      Object.assign(ret, yamlData);
+    }
+  }
+
+  return ret;
+}
+
+export async function readTestsFile(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
+  // This function is confusingly named - it reads a CSV, JSON, or YAML file of
+  // TESTS or test equivalents.
  const resolvedVarsPath = path.resolve(basePath, varsPath);
  const fileExtension = parsePath(varsPath).ext.slice(1);
  let rows: CsvRow[] = [];
@@ -246,25 +270,53 @@ export async function readVars(varsPath: string, basePath: string = ''): Promise
 }

 export async function readTests(
-  tests: string | TestCase[] | undefined,
+  tests: string | string[] | TestCase[] | undefined,
  basePath: string = '',
 ): Promise<TestCase[]> {
-  if (!tests) {
-    return [];
-  }
+  const ret: TestCase[] = [];
+
+  const loadTestsFromGlob = async (loadTestsGlob: string) => {
+    const resolvedPath = path.resolve(basePath, loadTestsGlob);
+    const testFiles = globSync(resolvedPath);
+    for (const testFile of testFiles) {
+      const testFileContent = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
+      for (const testCase of testFileContent) {
+        if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
+          const testcaseBasePath = path.dirname(testFile);
+          testCase.vars = await readVarsFiles(testCase.vars, testcaseBasePath);
+        }
+      }
+      ret.push(...testFileContent);
+    }
+  };

  if (typeof tests === 'string') {
-    // It's a filepath, load from CSV
-    const vars = await readVars(tests, basePath);
-    return vars.map((row, idx) => {
-      const test = testCaseFromCsvRow(row);
-      test.description = `Row #${idx + 1}`;
-      return test;
-    });
+    if (tests.endsWith('yaml') || tests.endsWith('yml')) {
+      // Load testcase config from yaml
+      await loadTestsFromGlob(tests);
+    } else {
+      // Legacy load CSV
+      const vars = await readTestsFile(tests, basePath);
+      return vars.map((row, idx) => {
+        const test = testCaseFromCsvRow(row);
+        test.description = `Row #${idx + 1}`;
+        return test;
+      });
+    }
+  } else if (Array.isArray(tests)) {
+    for (const maybeTestsGlob of tests) {
+      if (typeof maybeTestsGlob === 'string') {
+        // Assume it's a filepath
+        await loadTestsFromGlob(maybeTestsGlob);
+      } else {
+        // Assume it's a full test case
+        ret.push(maybeTestsGlob);
+      }
+    }
  }

  // Some validation of the shape of tests
-  for (const test of tests) {
+  for (const test of ret) {
    if (!test.assert && !test.vars) {
      throw new Error(
        `Test case must have either "assert" or "vars" property. Instead got ${JSON.stringify(
@@ -276,7 +328,7 @@ export async function readTests(
    }
  }

-  return tests;
+  return ret;
 }

 export function writeOutput(
--- a/test/util.test.ts
+++ b/test/util.test.ts
@@ -5,7 +5,7 @@ import yaml from 'js-yaml';
 import { globSync } from 'glob';

 import {
-  readVars,
+  readTestsFile,
  readPrompts,
  writeOutput,
  readTests,
@@ -137,7 +137,7 @@ describe('util', () => {
    (fs.readFileSync as jest.Mock).mockReturnValue('var1,var2\nvalue1,value2');
    const varsPath = 'vars.csv';

-    const result = await readVars(varsPath);
+    const result = await readTestsFile(varsPath);

    expect(fs.readFileSync).toHaveBeenCalledTimes(1);
    expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
@@ -147,7 +147,7 @@ describe('util', () => {
    (fs.readFileSync as jest.Mock).mockReturnValue('[{"var1": "value1", "var2": "value2"}]');
    const varsPath = 'vars.json';

-    const result = await readVars(varsPath);
+    const result = await readTestsFile(varsPath);

    expect(fs.readFileSync).toHaveBeenCalledTimes(1);
    expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
@@ -157,7 +157,7 @@ describe('util', () => {
    (fs.readFileSync as jest.Mock).mockReturnValue('- var1: value1\n  var2: value2');
    const varsPath = 'vars.yaml';

-    const result = await readVars(varsPath);
+    const result = await readTestsFile(varsPath);

    expect(fs.readFileSync).toHaveBeenCalledTimes(1);
    expect(result).toEqual([{ var1: 'value1', var2: 'value2' }]);
@@ -447,4 +447,55 @@ describe('readTests', () => {

    expect(result).toEqual(input);
  });
+
+  test('readTests with string array input (paths to test configs)', async () => {
+    const testsPaths = ['test1.yaml', 'test2.yaml'];
+    const test1Content = [
+      {
+        description: 'Test 1',
+        vars: { var1: 'value1', var2: 'value2' },
+        assert: [{ type: 'equals', value: 'value1' }],
+      },
+    ];
+    const test2Content = [
+      {
+        description: 'Test 2',
+        vars: { var1: 'value3', var2: 'value4' },
+        assert: [{ type: 'contains-json', value: 'value3' }],
+      },
+    ];
+    (fs.readFileSync as jest.Mock)
+      .mockReturnValueOnce(yaml.dump(test1Content))
+      .mockReturnValueOnce(yaml.dump(test2Content));
+    (globSync as jest.Mock).mockImplementation((pathOrGlob) => [pathOrGlob]);
+
+    const result = await readTests(testsPaths);
+
+    expect(fs.readFileSync).toHaveBeenCalledTimes(2);
+    expect(result).toEqual([...test1Content, ...test2Content]);
+  });
+
+  test('readTests with vars glob input (paths to vars configs)', async () => {
+    const testsPaths = ['test1.yaml'];
+    const test1Content = [
+      {
+        description: 'Test 1',
+        vars: 'vars1.yaml',
+        assert: [{ type: 'equals', value: 'value1' }],
+      },
+    ];
+    const vars1Content = {
+      var1: 'value1',
+      var2: 'value2',
+    };
+    (fs.readFileSync as jest.Mock)
+      .mockReturnValueOnce(yaml.dump(test1Content))
+      .mockReturnValueOnce(yaml.dump(vars1Content));
+    (globSync as jest.Mock).mockImplementation((pathOrGlob) => [pathOrGlob]);
+
+    const result = await readTests(testsPaths);
+
+    expect(fs.readFileSync).toHaveBeenCalledTimes(2);
+    expect(result).toEqual([Object.assign({}, test1Content[0], {vars: vars1Content})]);
+  });
 });