mirror of
https://github.com/promptfoo/promptfoo.git
synced 2023-08-15 01:10:51 +03:00
Add webhook assert type
This commit is contained in:
31
README.md
31
README.md
@@ -87,21 +87,22 @@ tests:
|
||||
|
||||
See [Test assertions](https://promptfoo.dev/docs/configuration/expected-outputs) for full details.
|
||||
|
||||
| Assertion Type | Returns true if... |
|
||||
|------------------|---------------------------------------------------------------------------|
|
||||
| `equals` | output matches exactly |
|
||||
| `contains` | output contains substring |
|
||||
| `icontains` | output contains substring, case insensitive |
|
||||
| `regex` | output matches regex |
|
||||
| `contains-some` | output contains some in list of substrings |
|
||||
| `contains-all` | output contains all list of substrings |
|
||||
| `is-json` | output is valid json |
|
||||
| `contains-json` | output contains valid json |
|
||||
| `javascript` | provided Javascript function validates the output |
|
||||
| `similar` | embeddings and cosine similarity are above a threshold |
|
||||
| `llm-rubric` | LLM output matches a given rubric, using a Language Model to grade output |
|
||||
| Assertion Type | Returns true if... |
|
||||
| --------------- | ------------------------------------------------------------------------- |
|
||||
| `equals` | output matches exactly |
|
||||
| `contains` | output contains substring |
|
||||
| `icontains` | output contains substring, case insensitive |
|
||||
| `regex` | output matches regex |
|
||||
| `contains-some` | output contains some in list of substrings |
|
||||
| `contains-all` | output contains all list of substrings |
|
||||
| `is-json` | output is valid json |
|
||||
| `contains-json` | output contains valid json |
|
||||
| `javascript` | provided Javascript function validates the output |
|
||||
| `webhook` | provided webhook returns `{pass: true} |
|
||||
| `similar` | embeddings and cosine similarity are above a threshold |
|
||||
| `llm-rubric` | LLM output matches a given rubric, using a Language Model to grade output |
|
||||
|
||||
Every test type can be negated by prepending `not-`. For example, `not-equals` or `not-regex`.
|
||||
Every test type can be negated by prepending `not-`. For example, `not-equals` or `not-regex`.
|
||||
|
||||
### Tests from spreadsheet
|
||||
|
||||
@@ -198,7 +199,7 @@ You can also use `promptfoo` as a library in your project by importing the `eval
|
||||
}
|
||||
|
||||
interface Assertion {
|
||||
type: 'equality' | 'is-json' | 'contains-json' | 'function' | 'similarity' | 'llm-rubric';
|
||||
type: string;
|
||||
value?: string;
|
||||
threshold?: number; // For similarity assertions
|
||||
provider?: ApiProvider; // For assertions that require an LLM provider
|
||||
|
||||
@@ -2,7 +2,7 @@ import invariant from 'tiny-invariant';
|
||||
import nunjucks from 'nunjucks';
|
||||
|
||||
import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
|
||||
import { cosineSimilarity } from './util';
|
||||
import { cosineSimilarity, fetchWithTimeout } from './util';
|
||||
import { loadApiProvider } from './providers';
|
||||
import { DEFAULT_GRADING_PROMPT } from './prompts';
|
||||
|
||||
@@ -206,6 +206,48 @@ ${assertion.value}`,
|
||||
return matchesLlmRubric(assertion.value, output, test.options);
|
||||
}
|
||||
|
||||
if (baseType === 'webhook') {
|
||||
invariant(assertion.value, '"webhook" assertion type must have a URL value');
|
||||
invariant(
|
||||
typeof assertion.value === 'string',
|
||||
'"webhook" assertion type must have a URL value',
|
||||
);
|
||||
|
||||
try {
|
||||
const context = {
|
||||
vars: test.vars || {},
|
||||
};
|
||||
const response = await fetchWithTimeout(
|
||||
assertion.value,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ output, context }),
|
||||
},
|
||||
process.env.WEBHOOK_TIMEOUT ? parseInt(process.env.WEBHOOK_TIMEOUT, 10) : 5000,
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Webhook response status: ${response.status}`);
|
||||
}
|
||||
|
||||
const jsonResponse = await response.json();
|
||||
pass = jsonResponse.pass !== inverse;
|
||||
} catch (err) {
|
||||
return {
|
||||
pass: false,
|
||||
reason: `Webhook error: ${(err as Error).message}`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
pass,
|
||||
reason: pass ? 'Assertion passed' : `Webhook returned ${inverse ? 'true' : 'false'}`,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error('Unknown assertion type: ' + assertion.type);
|
||||
}
|
||||
|
||||
|
||||
12
src/main.ts
12
src/main.ts
@@ -154,8 +154,16 @@ async function main() {
|
||||
'This suffix is append to every prompt',
|
||||
config.defaultTest?.options?.suffix,
|
||||
)
|
||||
.option('--no-write', 'Do not write results to promptfoo directory', config?.commandLineOptions?.write)
|
||||
.option('--no-cache', 'Do not read or write results to disk cache', config?.commandLineOptions?.cache)
|
||||
.option(
|
||||
'--no-write',
|
||||
'Do not write results to promptfoo directory',
|
||||
config?.commandLineOptions?.write,
|
||||
)
|
||||
.option(
|
||||
'--no-cache',
|
||||
'Do not read or write results to disk cache',
|
||||
config?.commandLineOptions?.cache,
|
||||
)
|
||||
.option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
|
||||
.option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
|
||||
.option('--view [port]', 'View in browser ui')
|
||||
|
||||
@@ -124,7 +124,8 @@ type BaseAssertionTypes =
|
||||
| 'contains-json'
|
||||
| 'javascript'
|
||||
| 'similar'
|
||||
| 'llm-rubric';
|
||||
| 'llm-rubric'
|
||||
| 'webhook';
|
||||
|
||||
type NotPrefixed<T extends string> = `not-${T}`;
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { Response } from 'node-fetch';
|
||||
import {
|
||||
runAssertions,
|
||||
runAssertion,
|
||||
@@ -5,12 +6,12 @@ import {
|
||||
matchesLlmRubric,
|
||||
assertionFromString,
|
||||
} from '../src/assertions';
|
||||
import * as util from '../src/util';
|
||||
import { DefaultEmbeddingProvider } from '../src/providers/openai';
|
||||
import type {
|
||||
Assertion,
|
||||
ApiProvider,
|
||||
AtomicTestCase,
|
||||
TestCase,
|
||||
GradingConfig,
|
||||
ProviderResponse,
|
||||
GradingResult,
|
||||
@@ -393,6 +394,64 @@ describe('runAssertion', () => {
|
||||
expect(result.pass).toBeFalsy();
|
||||
expect(result.reason).toBe('Expected output to not match regex "\\d{3}-\\d{2}-\\d{4}"');
|
||||
});
|
||||
|
||||
// Tests for webhook assertion
|
||||
const webhookAssertion: Assertion = {
|
||||
type: 'webhook',
|
||||
value: 'https://example.com/webhook',
|
||||
};
|
||||
|
||||
it('should pass when the webhook assertion passes', async () => {
|
||||
const output = 'Expected output';
|
||||
|
||||
jest
|
||||
.spyOn(util, 'fetchWithTimeout')
|
||||
.mockImplementation(() =>
|
||||
Promise.resolve(new Response(JSON.stringify({ pass: true }), { status: 200 })),
|
||||
);
|
||||
|
||||
const result: GradingResult = await runAssertion(
|
||||
webhookAssertion,
|
||||
{} as AtomicTestCase,
|
||||
output,
|
||||
);
|
||||
expect(result.pass).toBeTruthy();
|
||||
expect(result.reason).toBe('Assertion passed');
|
||||
});
|
||||
|
||||
it('should fail when the webhook assertion fails', async () => {
|
||||
const output = 'Different output';
|
||||
|
||||
jest
|
||||
.spyOn(util, 'fetchWithTimeout')
|
||||
.mockImplementation(() =>
|
||||
Promise.resolve(new Response(JSON.stringify({ pass: false }), { status: 200 })),
|
||||
);
|
||||
|
||||
const result: GradingResult = await runAssertion(
|
||||
webhookAssertion,
|
||||
{} as AtomicTestCase,
|
||||
output,
|
||||
);
|
||||
expect(result.pass).toBeFalsy();
|
||||
expect(result.reason).toBe('Webhook returned false');
|
||||
});
|
||||
|
||||
it('should fail when the webhook returns an error', async () => {
|
||||
const output = 'Expected output';
|
||||
|
||||
jest
|
||||
.spyOn(util, 'fetchWithTimeout')
|
||||
.mockImplementation(() => Promise.resolve(new Response('', { status: 500 })));
|
||||
|
||||
const result: GradingResult = await runAssertion(
|
||||
webhookAssertion,
|
||||
{} as AtomicTestCase,
|
||||
output,
|
||||
);
|
||||
expect(result.pass).toBeFalsy();
|
||||
expect(result.reason).toBe('Webhook error: Webhook response status: 500');
|
||||
});
|
||||
});
|
||||
|
||||
describe('assertionFromString', () => {
|
||||
|
||||
Reference in New Issue
Block a user