Add webhook assert type

This commit is contained in:
Ian Webster
2023-06-10 13:16:38 -07:00
parent 5b456ec790
commit d2b093ab1e
5 changed files with 131 additions and 20 deletions

View File

@@ -87,21 +87,22 @@ tests:
See [Test assertions](https://promptfoo.dev/docs/configuration/expected-outputs) for full details.
| Assertion Type | Returns true if... |
|------------------|---------------------------------------------------------------------------|
| `equals` | output matches exactly |
| `contains` | output contains substring |
| `icontains` | output contains substring, case insensitive |
| `regex` | output matches regex |
| `contains-some` | output contains some in list of substrings |
| `contains-all` | output contains all list of substrings |
| `is-json` | output is valid json |
| `contains-json` | output contains valid json |
| `javascript` | provided Javascript function validates the output |
| `similar` | embeddings and cosine similarity are above a threshold |
| `llm-rubric` | LLM output matches a given rubric, using a Language Model to grade output |
| Assertion Type | Returns true if... |
| --------------- | ------------------------------------------------------------------------- |
| `equals` | output matches exactly |
| `contains` | output contains substring |
| `icontains` | output contains substring, case insensitive |
| `regex` | output matches regex |
| `contains-some` | output contains some in list of substrings |
| `contains-all` | output contains all list of substrings |
| `is-json` | output is valid json |
| `contains-json` | output contains valid json |
| `javascript` | provided Javascript function validates the output |
| `webhook` | provided webhook returns `{pass: true} |
| `similar` | embeddings and cosine similarity are above a threshold |
| `llm-rubric` | LLM output matches a given rubric, using a Language Model to grade output |
Every test type can be negated by prepending `not-`. For example, `not-equals` or `not-regex`.
Every test type can be negated by prepending `not-`. For example, `not-equals` or `not-regex`.
### Tests from spreadsheet
@@ -198,7 +199,7 @@ You can also use `promptfoo` as a library in your project by importing the `eval
}
interface Assertion {
type: 'equality' | 'is-json' | 'contains-json' | 'function' | 'similarity' | 'llm-rubric';
type: string;
value?: string;
threshold?: number; // For similarity assertions
provider?: ApiProvider; // For assertions that require an LLM provider

View File

@@ -2,7 +2,7 @@ import invariant from 'tiny-invariant';
import nunjucks from 'nunjucks';
import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
import { cosineSimilarity } from './util';
import { cosineSimilarity, fetchWithTimeout } from './util';
import { loadApiProvider } from './providers';
import { DEFAULT_GRADING_PROMPT } from './prompts';
@@ -206,6 +206,48 @@ ${assertion.value}`,
return matchesLlmRubric(assertion.value, output, test.options);
}
if (baseType === 'webhook') {
invariant(assertion.value, '"webhook" assertion type must have a URL value');
invariant(
typeof assertion.value === 'string',
'"webhook" assertion type must have a URL value',
);
try {
const context = {
vars: test.vars || {},
};
const response = await fetchWithTimeout(
assertion.value,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ output, context }),
},
process.env.WEBHOOK_TIMEOUT ? parseInt(process.env.WEBHOOK_TIMEOUT, 10) : 5000,
);
if (!response.ok) {
throw new Error(`Webhook response status: ${response.status}`);
}
const jsonResponse = await response.json();
pass = jsonResponse.pass !== inverse;
} catch (err) {
return {
pass: false,
reason: `Webhook error: ${(err as Error).message}`,
};
}
return {
pass,
reason: pass ? 'Assertion passed' : `Webhook returned ${inverse ? 'true' : 'false'}`,
};
}
throw new Error('Unknown assertion type: ' + assertion.type);
}

View File

@@ -154,8 +154,16 @@ async function main() {
'This suffix is append to every prompt',
config.defaultTest?.options?.suffix,
)
.option('--no-write', 'Do not write results to promptfoo directory', config?.commandLineOptions?.write)
.option('--no-cache', 'Do not read or write results to disk cache', config?.commandLineOptions?.cache)
.option(
'--no-write',
'Do not write results to promptfoo directory',
config?.commandLineOptions?.write,
)
.option(
'--no-cache',
'Do not read or write results to disk cache',
config?.commandLineOptions?.cache,
)
.option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
.option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
.option('--view [port]', 'View in browser ui')

View File

@@ -124,7 +124,8 @@ type BaseAssertionTypes =
| 'contains-json'
| 'javascript'
| 'similar'
| 'llm-rubric';
| 'llm-rubric'
| 'webhook';
type NotPrefixed<T extends string> = `not-${T}`;

View File

@@ -1,3 +1,4 @@
import { Response } from 'node-fetch';
import {
runAssertions,
runAssertion,
@@ -5,12 +6,12 @@ import {
matchesLlmRubric,
assertionFromString,
} from '../src/assertions';
import * as util from '../src/util';
import { DefaultEmbeddingProvider } from '../src/providers/openai';
import type {
Assertion,
ApiProvider,
AtomicTestCase,
TestCase,
GradingConfig,
ProviderResponse,
GradingResult,
@@ -393,6 +394,64 @@ describe('runAssertion', () => {
expect(result.pass).toBeFalsy();
expect(result.reason).toBe('Expected output to not match regex "\\d{3}-\\d{2}-\\d{4}"');
});
// Tests for webhook assertion
const webhookAssertion: Assertion = {
type: 'webhook',
value: 'https://example.com/webhook',
};
it('should pass when the webhook assertion passes', async () => {
const output = 'Expected output';
jest
.spyOn(util, 'fetchWithTimeout')
.mockImplementation(() =>
Promise.resolve(new Response(JSON.stringify({ pass: true }), { status: 200 })),
);
const result: GradingResult = await runAssertion(
webhookAssertion,
{} as AtomicTestCase,
output,
);
expect(result.pass).toBeTruthy();
expect(result.reason).toBe('Assertion passed');
});
it('should fail when the webhook assertion fails', async () => {
const output = 'Different output';
jest
.spyOn(util, 'fetchWithTimeout')
.mockImplementation(() =>
Promise.resolve(new Response(JSON.stringify({ pass: false }), { status: 200 })),
);
const result: GradingResult = await runAssertion(
webhookAssertion,
{} as AtomicTestCase,
output,
);
expect(result.pass).toBeFalsy();
expect(result.reason).toBe('Webhook returned false');
});
it('should fail when the webhook returns an error', async () => {
const output = 'Expected output';
jest
.spyOn(util, 'fetchWithTimeout')
.mockImplementation(() => Promise.resolve(new Response('', { status: 500 })));
const result: GradingResult = await runAssertion(
webhookAssertion,
{} as AtomicTestCase,
output,
);
expect(result.pass).toBeFalsy();
expect(result.reason).toBe('Webhook error: Webhook response status: 500');
});
});
describe('assertionFromString', () => {