Add webhook assert type

2023-08-15 01:10:51 +03:00 · 2023-06-10 13:16:38 -07:00
parent 5b456ec790
commit d2b093ab1e
5 changed files with 131 additions and 20 deletions
--- a/README.md
+++ b/README.md
@@ -87,21 +87,22 @@ tests:

 See [Test assertions](https://promptfoo.dev/docs/configuration/expected-outputs) for full details.

-| Assertion Type   | Returns true if...                                                        |
-|------------------|---------------------------------------------------------------------------|
-| `equals`         | output matches exactly                                                    |
-| `contains`       | output contains substring                                                 |
-| `icontains`      | output contains substring, case insensitive                               |
-| `regex`          | output matches regex                                                      |
-| `contains-some`  | output contains some in list of substrings                                |
-| `contains-all`   | output contains all list of substrings                                    |
-| `is-json`        | output is valid json                                                      |
-| `contains-json`  | output contains valid json                                                |
-| `javascript`     | provided Javascript function validates the output                         |
-| `similar`        | embeddings and cosine similarity are above a threshold                    |
-| `llm-rubric`     | LLM output matches a given rubric, using a Language Model to grade output |
+| Assertion Type  | Returns true if...                                                        |
+| --------------- | ------------------------------------------------------------------------- |
+| `equals`        | output matches exactly                                                    |
+| `contains`      | output contains substring                                                 |
+| `icontains`     | output contains substring, case insensitive                               |
+| `regex`         | output matches regex                                                      |
+| `contains-some` | output contains some in list of substrings                                |
+| `contains-all`  | output contains all list of substrings                                    |
+| `is-json`       | output is valid json                                                      |
+| `contains-json` | output contains valid json                                                |
+| `javascript`    | provided Javascript function validates the output                         |
+| `webhook`       | provided webhook returns `{pass: true}                                    |
+| `similar`       | embeddings and cosine similarity are above a threshold                    |
+| `llm-rubric`    | LLM output matches a given rubric, using a Language Model to grade output |

-Every test type can be negated by prepending `not-`.  For example, `not-equals` or `not-regex`.
+Every test type can be negated by prepending `not-`. For example, `not-equals` or `not-regex`.

 ### Tests from spreadsheet

@@ -198,7 +199,7 @@ You can also use `promptfoo` as a library in your project by importing the `eval
  }

  interface Assertion {
-    type: 'equality' | 'is-json' | 'contains-json' | 'function' | 'similarity' | 'llm-rubric';
+    type: string;
    value?: string;
    threshold?: number; // For similarity assertions
    provider?: ApiProvider; // For assertions that require an LLM provider
--- a/src/assertions.ts
+++ b/src/assertions.ts
@@ -2,7 +2,7 @@ import invariant from 'tiny-invariant';
 import nunjucks from 'nunjucks';

 import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
-import { cosineSimilarity } from './util';
+import { cosineSimilarity, fetchWithTimeout } from './util';
 import { loadApiProvider } from './providers';
 import { DEFAULT_GRADING_PROMPT } from './prompts';

@@ -206,6 +206,48 @@ ${assertion.value}`,
    return matchesLlmRubric(assertion.value, output, test.options);
  }

+  if (baseType === 'webhook') {
+    invariant(assertion.value, '"webhook" assertion type must have a URL value');
+    invariant(
+      typeof assertion.value === 'string',
+      '"webhook" assertion type must have a URL value',
+    );
+
+    try {
+      const context = {
+        vars: test.vars || {},
+      };
+      const response = await fetchWithTimeout(
+        assertion.value,
+        {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify({ output, context }),
+        },
+        process.env.WEBHOOK_TIMEOUT ? parseInt(process.env.WEBHOOK_TIMEOUT, 10) : 5000,
+      );
+
+      if (!response.ok) {
+        throw new Error(`Webhook response status: ${response.status}`);
+      }
+
+      const jsonResponse = await response.json();
+      pass = jsonResponse.pass !== inverse;
+    } catch (err) {
+      return {
+        pass: false,
+        reason: `Webhook error: ${(err as Error).message}`,
+      };
+    }
+
+    return {
+      pass,
+      reason: pass ? 'Assertion passed' : `Webhook returned ${inverse ? 'true' : 'false'}`,
+    };
+  }
+
  throw new Error('Unknown assertion type: ' + assertion.type);
 }

--- a/src/main.ts
+++ b/src/main.ts
@@ -154,8 +154,16 @@ async function main() {
      'This suffix is append to every prompt',
      config.defaultTest?.options?.suffix,
    )
-    .option('--no-write', 'Do not write results to promptfoo directory', config?.commandLineOptions?.write)
-    .option('--no-cache', 'Do not read or write results to disk cache', config?.commandLineOptions?.cache)
+    .option(
+      '--no-write',
+      'Do not write results to promptfoo directory',
+      config?.commandLineOptions?.write,
+    )
+    .option(
+      '--no-cache',
+      'Do not read or write results to disk cache',
+      config?.commandLineOptions?.cache,
+    )
    .option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
    .option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
    .option('--view [port]', 'View in browser ui')
--- a/src/types.ts
+++ b/src/types.ts
@@ -124,7 +124,8 @@ type BaseAssertionTypes =
  | 'contains-json'
  | 'javascript'
  | 'similar'
-  | 'llm-rubric';
+  | 'llm-rubric'
+  | 'webhook';

 type NotPrefixed<T extends string> = `not-${T}`;

--- a/test/assertions.test.ts
+++ b/test/assertions.test.ts
@@ -1,3 +1,4 @@
+import { Response } from 'node-fetch';
 import {
  runAssertions,
  runAssertion,
@@ -5,12 +6,12 @@ import {
  matchesLlmRubric,
  assertionFromString,
 } from '../src/assertions';
+import * as util from '../src/util';
 import { DefaultEmbeddingProvider } from '../src/providers/openai';
 import type {
  Assertion,
  ApiProvider,
  AtomicTestCase,
-  TestCase,
  GradingConfig,
  ProviderResponse,
  GradingResult,
@@ -393,6 +394,64 @@ describe('runAssertion', () => {
    expect(result.pass).toBeFalsy();
    expect(result.reason).toBe('Expected output to not match regex "\\d{3}-\\d{2}-\\d{4}"');
  });
+
+  // Tests for webhook assertion
+  const webhookAssertion: Assertion = {
+    type: 'webhook',
+    value: 'https://example.com/webhook',
+  };
+
+  it('should pass when the webhook assertion passes', async () => {
+    const output = 'Expected output';
+
+    jest
+      .spyOn(util, 'fetchWithTimeout')
+      .mockImplementation(() =>
+        Promise.resolve(new Response(JSON.stringify({ pass: true }), { status: 200 })),
+      );
+
+    const result: GradingResult = await runAssertion(
+      webhookAssertion,
+      {} as AtomicTestCase,
+      output,
+    );
+    expect(result.pass).toBeTruthy();
+    expect(result.reason).toBe('Assertion passed');
+  });
+
+  it('should fail when the webhook assertion fails', async () => {
+    const output = 'Different output';
+
+    jest
+      .spyOn(util, 'fetchWithTimeout')
+      .mockImplementation(() =>
+        Promise.resolve(new Response(JSON.stringify({ pass: false }), { status: 200 })),
+      );
+
+    const result: GradingResult = await runAssertion(
+      webhookAssertion,
+      {} as AtomicTestCase,
+      output,
+    );
+    expect(result.pass).toBeFalsy();
+    expect(result.reason).toBe('Webhook returned false');
+  });
+
+  it('should fail when the webhook returns an error', async () => {
+    const output = 'Expected output';
+
+    jest
+      .spyOn(util, 'fetchWithTimeout')
+      .mockImplementation(() => Promise.resolve(new Response('', { status: 500 })));
+
+    const result: GradingResult = await runAssertion(
+      webhookAssertion,
+      {} as AtomicTestCase,
+      output,
+    );
+    expect(result.pass).toBeFalsy();
+    expect(result.reason).toBe('Webhook error: Webhook response status: 500');
+  });
 });

 describe('assertionFromString', () => {