Add scenario editing modal, twitter sentiment seeding (#101)
* testing agi-eval benchmark * Add scenario modal editor * Add initial values to ScenarioEditorModal * Add seedTwitterSentiment.ts --------- Co-authored-by: Kyle Corbitt <kyle@corbt.com>
This commit is contained in:
127
prisma/seedAgiEval.ts
Normal file
127
prisma/seedAgiEval.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
import { prisma } from "~/server/db";
|
||||
import { generateNewCell } from "~/server/utils/generateNewCell";
|
||||
import dedent from "dedent";
|
||||
import { execSync } from "child_process";
|
||||
import fs from "fs";
|
||||
|
||||
const defaultId = "11111111-1111-1111-1111-111111111112";
|
||||
|
||||
await prisma.organization.deleteMany({
|
||||
where: { id: defaultId },
|
||||
});
|
||||
|
||||
// If there's an existing org, just seed into it
|
||||
const org =
|
||||
(await prisma.organization.findFirst({})) ??
|
||||
(await prisma.organization.create({
|
||||
data: { id: defaultId },
|
||||
}));
|
||||
|
||||
// Clone the repo from git@github.com:microsoft/AGIEval.git into a tmp dir if it doesn't exist
|
||||
const tmpDir = "/tmp/agi-eval";
|
||||
if (!fs.existsSync(tmpDir)) {
|
||||
execSync(`git clone git@github.com:microsoft/AGIEval.git ${tmpDir}`);
|
||||
}
|
||||
|
||||
const datasets = [
|
||||
"sat-en",
|
||||
"sat-math",
|
||||
"lsat-rc",
|
||||
"lsat-ar",
|
||||
"aqua-rat",
|
||||
"logiqa-en",
|
||||
"lsat-lr",
|
||||
"math",
|
||||
];
|
||||
|
||||
type Scenario = {
|
||||
passage: string | null;
|
||||
question: string;
|
||||
options: string[] | null;
|
||||
label: string;
|
||||
};
|
||||
|
||||
for (const dataset of datasets) {
|
||||
const experimentName = `AGI-Eval: ${dataset}`;
|
||||
const oldExperiment = await prisma.experiment.findFirst({
|
||||
where: {
|
||||
label: experimentName,
|
||||
organizationId: org.id,
|
||||
},
|
||||
});
|
||||
if (oldExperiment) {
|
||||
await prisma.experiment.deleteMany({
|
||||
where: { id: oldExperiment.id },
|
||||
});
|
||||
}
|
||||
|
||||
const experiment = await prisma.experiment.create({
|
||||
data: {
|
||||
id: oldExperiment?.id ?? undefined,
|
||||
label: experimentName,
|
||||
organizationId: org.id,
|
||||
},
|
||||
});
|
||||
|
||||
const scenarios: Scenario[] = fs
|
||||
.readFileSync(`${tmpDir}/data/v1/${dataset}.jsonl`, "utf8")
|
||||
.split("\n")
|
||||
.filter((line) => line.length > 0)
|
||||
.map((line) => JSON.parse(line) as Scenario);
|
||||
console.log("scenarios", scenarios.length);
|
||||
|
||||
await prisma.testScenario.createMany({
|
||||
data: scenarios.slice(0, 30).map((scenario, i) => ({
|
||||
experimentId: experiment.id,
|
||||
sortIndex: i,
|
||||
variableValues: {
|
||||
passage: scenario.passage,
|
||||
question: scenario.question,
|
||||
options: scenario.options?.join("\n"),
|
||||
label: scenario.label,
|
||||
},
|
||||
})),
|
||||
});
|
||||
|
||||
await prisma.templateVariable.createMany({
|
||||
data: ["passage", "question", "options", "label"].map((label) => ({
|
||||
experimentId: experiment.id,
|
||||
label,
|
||||
})),
|
||||
});
|
||||
|
||||
await prisma.promptVariant.createMany({
|
||||
data: [
|
||||
{
|
||||
experimentId: experiment.id,
|
||||
label: "Prompt Variant 1",
|
||||
sortIndex: 0,
|
||||
model: "gpt-3.5-turbo-0613",
|
||||
modelProvider: "openai/ChatCompletion",
|
||||
constructFnVersion: 1,
|
||||
constructFn: dedent`
|
||||
definePrompt("openai/ChatCompletion", {
|
||||
model: "gpt-3.5-turbo-0613",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: \`Passage: ${"$"}{scenario.passage}\n\nQuestion: ${"$"}{scenario.question}\n\nOptions: ${"$"}{scenario.options}\n\n Respond with just the letter of the best option in the format Answer: (A).\`
|
||||
}
|
||||
],
|
||||
temperature: 0,
|
||||
})`,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
await prisma.evaluation.createMany({
|
||||
data: [
|
||||
{
|
||||
experimentId: experiment.id,
|
||||
label: "Eval",
|
||||
evalType: "CONTAINS",
|
||||
value: "Answer: ({{label}})",
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user