cache eval outputs and add gpt4 eval
This commit is contained in:
@@ -4,7 +4,7 @@ import { runOneEval } from "./runOneEval";
|
||||
import { type Scenario } from "~/components/OutputsTable/types";
|
||||
|
||||
const saveResult = async (evaluation: Evaluation, scenario: Scenario, modelOutput: ModelOutput) => {
|
||||
const result = runOneEval(evaluation, scenario, modelOutput);
|
||||
const result = await runOneEval(evaluation, scenario, modelOutput);
|
||||
return await prisma.outputEvaluation.upsert({
|
||||
where: {
|
||||
modelOutputId_evaluationId: {
|
||||
@@ -15,10 +15,10 @@ const saveResult = async (evaluation: Evaluation, scenario: Scenario, modelOutpu
|
||||
create: {
|
||||
modelOutputId: modelOutput.id,
|
||||
evaluationId: evaluation.id,
|
||||
result,
|
||||
...result,
|
||||
},
|
||||
update: {
|
||||
result,
|
||||
...result,
|
||||
},
|
||||
});
|
||||
};
|
||||
@@ -35,43 +35,6 @@ export const runEvalsForOutput = async (
|
||||
await Promise.all(
|
||||
evaluations.map(async (evaluation) => await saveResult(evaluation, scenario, modelOutput)),
|
||||
);
|
||||
|
||||
// const cells = await prisma.scenarioVariantCell.findMany({
|
||||
// where: {
|
||||
// promptVariantId: variantId,
|
||||
// retrievalStatus: "COMPLETE",
|
||||
// testScenario: { visible: true },
|
||||
// },
|
||||
// include: { testScenario: true, modelOutput: { include: { OutputEvaluation: true } } },
|
||||
// });
|
||||
|
||||
// await Promise.all(
|
||||
// evaluations.map(async (evaluation) => {
|
||||
// const passCount = cells.filter((cell) =>
|
||||
// runOneEval(cell.modelOutput as ModelOutput, cell.testScenario, evaluation),
|
||||
// ).length;
|
||||
// const failCount = cells.length - passCount;
|
||||
|
||||
// await prisma.evaluationResult.upsert({
|
||||
// where: {
|
||||
// evaluationId_promptVariantId: {
|
||||
// evaluationId: evaluation.id,
|
||||
// promptVariantId: variantId,
|
||||
// },
|
||||
// },
|
||||
// create: {
|
||||
// evaluationId: evaluation.id,
|
||||
// promptVariantId: variantId,
|
||||
// passCount,
|
||||
// failCount,
|
||||
// },
|
||||
// update: {
|
||||
// passCount,
|
||||
// failCount,
|
||||
// },
|
||||
// });
|
||||
// }),
|
||||
// );
|
||||
};
|
||||
|
||||
export const runAllEvals = async (experimentId: string) => {
|
||||
@@ -113,42 +76,4 @@ export const runAllEvals = async (experimentId: string) => {
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
// const cells = await prisma.scenarioVariantCell.findMany({
|
||||
// where: {
|
||||
// promptVariantId: { in: variants.map((v) => v.id) },
|
||||
// testScenario: { visible: true },
|
||||
// statusCode: { notIn: [429] },
|
||||
// },
|
||||
// include: { testScenario: true, modelOutput: true },
|
||||
// });
|
||||
|
||||
// await Promise.all(
|
||||
// variants.map(async (variant) => {
|
||||
// const variantCells = cells.filter((cell) => cell.promptVariantId === variant.id);
|
||||
// const passCount = variantCells.filter((cell) =>
|
||||
// runOneEval(cell.modelOutput as ModelOutput, cell.testScenario, evaluation),
|
||||
// ).length;
|
||||
// const failCount = variantCells.length - passCount;
|
||||
|
||||
// await prisma.evaluationResult.upsert({
|
||||
// where: {
|
||||
// evaluationId_promptVariantId: {
|
||||
// evaluationId: evaluation.id,
|
||||
// promptVariantId: variant.id,
|
||||
// },
|
||||
// },
|
||||
// create: {
|
||||
// evaluationId: evaluation.id,
|
||||
// promptVariantId: variant.id,
|
||||
// passCount,
|
||||
// failCount,
|
||||
// },
|
||||
// update: {
|
||||
// passCount,
|
||||
// failCount,
|
||||
// },
|
||||
// });
|
||||
// }),
|
||||
// );
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user