add evaluations
This commit is contained in:
31
src/server/utils/evaluateOutput.ts
Normal file
31
src/server/utils/evaluateOutput.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { type Evaluation, type ModelOutput, type TestScenario } from "@prisma/client";
|
||||
import { type ChatCompletion } from "openai/resources/chat";
|
||||
import { type VariableMap, fillTemplate } from "./fillTemplate";
|
||||
|
||||
export const evaluateOutput = (
|
||||
modelOutput: ModelOutput,
|
||||
scenario: TestScenario,
|
||||
evaluation: Evaluation
|
||||
): boolean => {
|
||||
const output = modelOutput.output as unknown as ChatCompletion;
|
||||
const message = output.choices?.[0]?.message;
|
||||
|
||||
if (!message) return false;
|
||||
|
||||
const stringifiedMessage = JSON.stringify(message);
|
||||
|
||||
const matchRegex = fillTemplate(evaluation.matchString, scenario.variableValues as VariableMap);
|
||||
|
||||
let match;
|
||||
|
||||
switch (evaluation.matchType) {
|
||||
case "CONTAINS":
|
||||
match = stringifiedMessage.match(matchRegex) !== null;
|
||||
break;
|
||||
case "DOES_NOT_CONTAIN":
|
||||
match = stringifiedMessage.match(matchRegex) === null;
|
||||
break;
|
||||
}
|
||||
|
||||
return match;
|
||||
};
|
||||
91
src/server/utils/evaluations.ts
Normal file
91
src/server/utils/evaluations.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
import { type Evaluation } from "@prisma/client";
|
||||
import { prisma } from "../db";
|
||||
import { evaluateOutput } from "./evaluateOutput";
|
||||
|
||||
export const reevaluateVariant = async (variantId: string) => {
|
||||
const variant = await prisma.promptVariant.findUnique({
|
||||
where: { id: variantId },
|
||||
});
|
||||
if (!variant) return;
|
||||
|
||||
const evaluations = await prisma.evaluation.findMany({
|
||||
where: { experimentId: variant.experimentId },
|
||||
});
|
||||
|
||||
const modelOutputs = await prisma.modelOutput.findMany({
|
||||
where: { promptVariantId: variantId },
|
||||
include: { testScenario: true },
|
||||
});
|
||||
|
||||
const scenarios = await prisma.testScenario.findMany({
|
||||
where: { experimentId: variant.experimentId, visible: true },
|
||||
});
|
||||
|
||||
await Promise.all(
|
||||
evaluations.map(async (evaluation) => {
|
||||
const passCount = modelOutputs.filter((output) =>
|
||||
evaluateOutput(output, output.testScenario, evaluation)
|
||||
).length;
|
||||
const failCount = scenarios.length - passCount;
|
||||
|
||||
await prisma.evaluationResult.upsert({
|
||||
where: {
|
||||
evaluationId_promptVariantId: {
|
||||
evaluationId: evaluation.id,
|
||||
promptVariantId: variantId,
|
||||
},
|
||||
},
|
||||
create: {
|
||||
evaluationId: evaluation.id,
|
||||
promptVariantId: variantId,
|
||||
passCount,
|
||||
failCount,
|
||||
},
|
||||
update: {
|
||||
passCount,
|
||||
failCount,
|
||||
},
|
||||
});
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
export const reevaluateEvaluation = async (evaluation: Evaluation) => {
|
||||
const variants = await prisma.promptVariant.findMany({
|
||||
where: { experimentId: evaluation.experimentId, visible: true },
|
||||
});
|
||||
|
||||
const modelOutputs = await prisma.modelOutput.findMany({
|
||||
where: { promptVariantId: { in: variants.map((v) => v.id) }, testScenario: { visible: true } },
|
||||
include: { testScenario: true },
|
||||
});
|
||||
|
||||
await Promise.all(
|
||||
variants.map(async (variant) => {
|
||||
const outputs = modelOutputs.filter((output) => output.promptVariantId === variant.id);
|
||||
const passCount = outputs.filter((output) =>
|
||||
evaluateOutput(output, output.testScenario, evaluation)
|
||||
).length;
|
||||
const failCount = outputs.length - passCount;
|
||||
|
||||
await prisma.evaluationResult.upsert({
|
||||
where: {
|
||||
evaluationId_promptVariantId: {
|
||||
evaluationId: evaluation.id,
|
||||
promptVariantId: variant.id,
|
||||
},
|
||||
},
|
||||
create: {
|
||||
evaluationId: evaluation.id,
|
||||
promptVariantId: variant.id,
|
||||
passCount,
|
||||
failCount,
|
||||
},
|
||||
update: {
|
||||
passCount,
|
||||
failCount,
|
||||
},
|
||||
});
|
||||
})
|
||||
);
|
||||
};
|
||||
@@ -2,17 +2,21 @@ import { type JSONSerializable } from "../types";
|
||||
|
||||
export type VariableMap = Record<string, string>;
|
||||
|
||||
export default function fillTemplate<T extends JSONSerializable>(
|
||||
export function fillTemplate(template: string, variables: VariableMap): string {
|
||||
return template.replace(/{{\s*(\w+)\s*}}/g, (_, key: string) => variables[key] || "");
|
||||
}
|
||||
|
||||
export function fillTemplateJson<T extends JSONSerializable>(
|
||||
template: T,
|
||||
variables: VariableMap
|
||||
): T {
|
||||
if (typeof template === "string") {
|
||||
return template.replace(/{{\s*(\w+)\s*}}/g, (_, key: string) => variables[key] || "") as T;
|
||||
return fillTemplate(template, variables) as T;
|
||||
} else if (Array.isArray(template)) {
|
||||
return template.map((item) => fillTemplate(item, variables)) as T;
|
||||
return template.map((item) => fillTemplateJson(item, variables)) as T;
|
||||
} else if (typeof template === "object" && template !== null) {
|
||||
return Object.keys(template).reduce((acc, key) => {
|
||||
acc[key] = fillTemplate(template[key] as JSONSerializable, variables);
|
||||
acc[key] = fillTemplateJson(template[key] as JSONSerializable, variables);
|
||||
return acc;
|
||||
}, {} as { [key: string]: JSONSerializable } & T);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user