129 lines
3.3 KiB
TypeScript
129 lines
3.3 KiB
TypeScript
import { prisma } from "~/server/db";
|
|
import { generateNewCell } from "~/server/utils/generateNewCell";
|
|
import dedent from "dedent";
|
|
import { execSync } from "child_process";
|
|
import fs from "fs";
|
|
import { promptConstructorVersion } from "~/promptConstructor/version";
|
|
|
|
const defaultId = "11111111-1111-1111-1111-111111111112";
|
|
|
|
await prisma.organization.deleteMany({
|
|
where: { id: defaultId },
|
|
});
|
|
|
|
// If there's an existing org, just seed into it
|
|
const org =
|
|
(await prisma.organization.findFirst({})) ??
|
|
(await prisma.organization.create({
|
|
data: { id: defaultId },
|
|
}));
|
|
|
|
// Clone the repo from git@github.com:microsoft/AGIEval.git into a tmp dir if it doesn't exist
|
|
const tmpDir = "/tmp/agi-eval";
|
|
if (!fs.existsSync(tmpDir)) {
|
|
execSync(`git clone git@github.com:microsoft/AGIEval.git ${tmpDir}`);
|
|
}
|
|
|
|
const datasets = [
|
|
"sat-en",
|
|
"sat-math",
|
|
"lsat-rc",
|
|
"lsat-ar",
|
|
"aqua-rat",
|
|
"logiqa-en",
|
|
"lsat-lr",
|
|
"math",
|
|
];
|
|
|
|
type Scenario = {
|
|
passage: string | null;
|
|
question: string;
|
|
options: string[] | null;
|
|
label: string;
|
|
};
|
|
|
|
for (const dataset of datasets) {
|
|
const experimentName = `AGI-Eval: ${dataset}`;
|
|
const oldExperiment = await prisma.experiment.findFirst({
|
|
where: {
|
|
label: experimentName,
|
|
organizationId: org.id,
|
|
},
|
|
});
|
|
if (oldExperiment) {
|
|
await prisma.experiment.deleteMany({
|
|
where: { id: oldExperiment.id },
|
|
});
|
|
}
|
|
|
|
const experiment = await prisma.experiment.create({
|
|
data: {
|
|
id: oldExperiment?.id ?? undefined,
|
|
label: experimentName,
|
|
organizationId: org.id,
|
|
},
|
|
});
|
|
|
|
const scenarios: Scenario[] = fs
|
|
.readFileSync(`${tmpDir}/data/v1/${dataset}.jsonl`, "utf8")
|
|
.split("\n")
|
|
.filter((line) => line.length > 0)
|
|
.map((line) => JSON.parse(line) as Scenario);
|
|
console.log("scenarios", scenarios.length);
|
|
|
|
await prisma.testScenario.createMany({
|
|
data: scenarios.slice(0, 30).map((scenario, i) => ({
|
|
experimentId: experiment.id,
|
|
sortIndex: i,
|
|
variableValues: {
|
|
passage: scenario.passage,
|
|
question: scenario.question,
|
|
options: scenario.options?.join("\n"),
|
|
label: scenario.label,
|
|
},
|
|
})),
|
|
});
|
|
|
|
await prisma.templateVariable.createMany({
|
|
data: ["passage", "question", "options", "label"].map((label) => ({
|
|
experimentId: experiment.id,
|
|
label,
|
|
})),
|
|
});
|
|
|
|
await prisma.promptVariant.createMany({
|
|
data: [
|
|
{
|
|
experimentId: experiment.id,
|
|
label: "Prompt Variant 1",
|
|
sortIndex: 0,
|
|
model: "gpt-3.5-turbo-0613",
|
|
modelProvider: "openai/ChatCompletion",
|
|
promptConstructorVersion,
|
|
promptConstructor: dedent`
|
|
definePrompt("openai/ChatCompletion", {
|
|
model: "gpt-3.5-turbo-0613",
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: \`Passage: ${"$"}{scenario.passage}\n\nQuestion: ${"$"}{scenario.question}\n\nOptions: ${"$"}{scenario.options}\n\n Respond with just the letter of the best option in the format Answer: (A).\`
|
|
}
|
|
],
|
|
temperature: 0,
|
|
})`,
|
|
},
|
|
],
|
|
});
|
|
|
|
await prisma.evaluation.createMany({
|
|
data: [
|
|
{
|
|
experimentId: experiment.id,
|
|
label: "Eval",
|
|
evalType: "CONTAINS",
|
|
value: "Answer: ({{label}})",
|
|
},
|
|
],
|
|
});
|
|
}
|