Fix seeds and update eval field names

2023-07-17 14:14:20 -07:00
parent 6b84a59372
commit 54369dba54
18 changed files with 136 additions and 80 deletions
--- a/package.json
+++ b/package.json
@@ -16,7 +16,8 @@
    "postinstall": "prisma generate",
    "lint": "next lint",
    "start": "next start",
-    "codegen": "tsx src/codegen/export-openai-types.ts"
+    "codegen": "tsx src/codegen/export-openai-types.ts",
+    "seed": "tsx prisma/seed.ts"
  },
  "dependencies": {
    "@babel/preset-typescript": "^7.22.5",
@@ -49,7 +50,7 @@
    "immer": "^10.0.2",
    "isolated-vm": "^4.5.0",
    "json-stringify-pretty-compact": "^4.0.0",
-    "lodash": "^4.17.21",
+    "lodash-es": "^4.17.21",
    "next": "^13.4.2",
    "next-auth": "^4.22.1",
    "nextjs-routes": "^2.0.1",
@@ -77,7 +78,7 @@
    "@types/cors": "^2.8.13",
    "@types/eslint": "^8.37.0",
    "@types/express": "^4.17.17",
-    "@types/lodash": "^4.14.195",
+    "@types/lodash-es": "^4.17.8",
    "@types/node": "^18.16.0",
    "@types/pluralize": "^0.0.30",
    "@types/react": "^18.2.6",
@@ -100,6 +101,6 @@
    "initVersion": "7.14.0"
  },
  "prisma": {
-    "seed": "tsx prisma/seed.ts"
+    "seed": "pnpm seed"
  }
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -95,7 +95,7 @@ dependencies:
  json-stringify-pretty-compact:
    specifier: ^4.0.0
    version: 4.0.0
-  lodash:
+  lodash-es:
    specifier: ^4.17.21
    version: 4.17.21
  next:
@@ -175,9 +175,9 @@ devDependencies:
  '@types/express':
    specifier: ^4.17.17
    version: 4.17.17
-  '@types/lodash':
-    specifier: ^4.14.195
-    version: 4.14.195
+  '@types/lodash-es':
+    specifier: ^4.17.8
+    version: 4.17.8
  '@types/node':
    specifier: ^18.16.0
    version: 18.16.0
@@ -2753,6 +2753,12 @@ packages:
    resolution: {integrity: sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==}
    dev: true

+  /@types/lodash-es@4.17.8:
+    resolution: {integrity: sha512-euY3XQcZmIzSy7YH5+Unb3b2X12Wtk54YWINBvvGQ5SmMvwb11JQskGsfkH/5HXK77Kr8GF0wkVDIxzAisWtog==}
+    dependencies:
+      '@types/lodash': 4.14.195
+    dev: true
+
  /@types/lodash.mergewith@4.6.7:
    resolution: {integrity: sha512-3m+lkO5CLRRYU0fhGRp7zbsGi6+BZj0uTVSwvcKU+nSlhjA9/QRNfuSGnD2mX6hQA7ZbmcCkzk5h4ZYGOtk14A==}
    dependencies:
@@ -5379,6 +5385,10 @@ packages:
      p-locate: 5.0.0
    dev: true

+  /lodash-es@4.17.21:
+    resolution: {integrity: sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==}
+    dev: false
+
  /lodash.merge@4.6.2:
    resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==}
    dev: true
--- a/prisma/migrations/20230717203031_add_gpt4_eval/migration.sql
+++ b/prisma/migrations/20230717203031_add_gpt4_eval/migration.sql
@@ -0,0 +1,24 @@
+/*
+  Warnings:
+
+  - You are about to rename the column `matchString` on the `Evaluation` table. If there is any code or views referring to the old name, they will break.
+  - You are about to rename the column `matchType` on the `Evaluation` table. If there is any code or views referring to the old name, they will break.
+  - You are about to rename the column `name` on the `Evaluation` table. If there is any code or views referring to the old name, they will break.
+  - You are about to rename the enum `EvaluationMatchType` to `EvalType`. If there is any code or views referring to the old name, they will break.
+*/
+
+-- RenameEnum
+ALTER TYPE "EvaluationMatchType" RENAME TO "EvalType";
+
+-- AlterTable
+ALTER TABLE "Evaluation" RENAME COLUMN "matchString" TO "value";
+ALTER TABLE "Evaluation" RENAME COLUMN "matchType" TO "evalType";
+ALTER TABLE "Evaluation" RENAME COLUMN "name" TO "label";
+
+-- AlterColumnType
+ALTER TABLE "Evaluation" ALTER COLUMN "evalType" TYPE "EvalType" USING "evalType"::text::"EvalType";
+
+-- SetNotNullConstraint
+ALTER TABLE "Evaluation" ALTER COLUMN "evalType" SET NOT NULL;
+ALTER TABLE "Evaluation" ALTER COLUMN "label" SET NOT NULL;
+ALTER TABLE "Evaluation" ALTER COLUMN "value" SET NOT NULL;
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -2,8 +2,7 @@
 // learn more about it in the docs: https://pris.ly/d/prisma-schema

 generator client {
-    provider        = "prisma-client-js"
-    previewFeatures = ["jsonProtocol"]
+    provider = "prisma-client-js"
 }

 datasource db {
@@ -130,7 +129,7 @@ model ModelOutput {
    @@index([inputHash])
 }

-enum EvaluationMatchType {
+enum EvalType {
    CONTAINS
    DOES_NOT_CONTAIN
 }
@@ -138,9 +137,9 @@ enum EvaluationMatchType {
 model Evaluation {
    id String @id @default(uuid()) @db.Uuid

-    name        String
-    matchString String
-    matchType   EvaluationMatchType
+    label    String
+    evalType EvalType
+    value    String

    experimentId String     @db.Uuid
    experiment   Experiment @relation(fields: [experimentId], references: [id], onDelete: Cascade)
--- a/prisma/seed.ts
+++ b/prisma/seed.ts
@@ -1,4 +1,6 @@
 import { prisma } from "~/server/db";
+import dedent from "dedent";
+import { generateNewCell } from "~/server/utils/generateNewCell";

 const experimentId = "11111111-1111-1111-1111-111111111111";

@@ -9,7 +11,7 @@ await prisma.experiment.deleteMany({
  },
 });

-const experiment = await prisma.experiment.create({
+await prisma.experiment.create({
  data: {
    id: experimentId,
    label: "Country Capitals Example",
@@ -37,28 +39,34 @@ await prisma.promptVariant.createMany({
      label: "Prompt Variant 1",
      sortIndex: 0,
      model: "gpt-3.5-turbo-0613",
-      constructFn: `prompt = {
-        model: "gpt-3.5-turbo-0613",
-        messages: [{ role: "user", content: "What is the capital of {{country}}?" }],
-        temperature: 0,
-      }`,
+      constructFn: dedent`
+        prompt = {
+          model: "gpt-3.5-turbo-0613",
+          messages: [
+            {
+              role: "user",
+              content: \`What is the capital of ${"$"}{scenario.country}?\`
+            }
+          ],
+          temperature: 0,
+        }`,
    },
    {
      experimentId,
      label: "Prompt Variant 2",
      sortIndex: 1,
      model: "gpt-3.5-turbo-0613",
-      constructFn: `prompt = {
-        model: "gpt-3.5-turbo-0613",
-        messages: [
-          {
-            role: "user",
-            content:
-              "What is the capital of {{country}}? Return just the city name and nothing else.",
-          },
-        ],
-        temperature: 0,
-      }`,
+      constructFn: dedent`
+        prompt = {
+          model: "gpt-3.5-turbo-0613",
+          messages: [
+            {
+              role: "user",
+              content: \`What is the capital of ${"$"}{scenario.country}? Return just the city name and nothing else.\`
+            }
+          ],
+          temperature: 0,
+        }`,
    },
  ],
 });
@@ -109,3 +117,26 @@ await prisma.testScenario.createMany({
    },
  ],
 });
+
+const variants = await prisma.promptVariant.findMany({
+  where: {
+    experimentId,
+  },
+});
+
+const scenarios = await prisma.testScenario.findMany({
+  where: {
+    experimentId,
+  },
+});
+
+await Promise.all(
+  variants
+    .flatMap((variant) =>
+      scenarios.map((scenario) => ({
+        promptVariantId: variant.id,
+        testScenarioId: scenario.id,
+      })),
+    )
+    .map((cell) => generateNewCell(cell.promptVariantId, cell.testScenarioId)),
+);
--- a/prisma/seedDemo.ts
+++ b/prisma/seedDemo.ts
@@ -183,9 +183,9 @@ await prisma.templateVariable.createMany({
 await prisma.evaluation.create({
  data: {
    experimentId: redditExperiment.id,
-    name: "Relevance Accuracy",
-    matchType: "CONTAINS",
-    matchString: '"{{relevance}}"',
+    label: "Relevance Accuracy",
+    evalType: "CONTAINS",
+    value: '"{{relevance}}"',
  },
 });

@@ -1124,12 +1124,3 @@ await prisma.testScenario.createMany({
    variableValues: vars,
  })),
 });
-
-// await prisma.evaluation.create({
-//   data: {
-//     experimentId: redditExperiment.id,
-//     name: "Scores Match",
-//     matchType: "CONTAINS",
-//     matchString: "{{score}}",
-//   },
-// });
--- a/src/codegen/export-openai-types.ts
+++ b/src/codegen/export-openai-types.ts
@@ -2,7 +2,7 @@ import fs from "fs";
 import path from "path";
 import openapiTS, { type OpenAPI3 } from "openapi-typescript";
 import YAML from "yaml";
-import _ from "lodash";
+import { pick } from "lodash-es";
 import assert from "assert";

 const OPENAPI_URL =
@@ -31,7 +31,7 @@ modelProperty.oneOf = undefined;

 delete schema["paths"];
 assert(schema.components?.schemas);
-schema.components.schemas = _.pick(schema.components?.schemas, [
+schema.components.schemas = pick(schema.components?.schemas, [
  "CreateChatCompletionRequest",
  "ChatCompletionRequestMessage",
  "ChatCompletionFunctions",
--- a/src/components/OutputsTable/EditEvaluations.tsx
+++ b/src/components/OutputsTable/EditEvaluations.tsx
@@ -12,13 +12,13 @@ import {
  Select,
  FormHelperText,
 } from "@chakra-ui/react";
-import { type Evaluation, EvaluationMatchType } from "@prisma/client";
+import { type Evaluation, EvalType } from "@prisma/client";
 import { useCallback, useState } from "react";
 import { BsPencil, BsX } from "react-icons/bs";
 import { api } from "~/utils/api";
 import { useExperiment, useHandledAsyncCallback } from "~/utils/hooks";

-type EvalValues = Pick<Evaluation, "name" | "matchString" | "matchType">;
+type EvalValues = Pick<Evaluation, "label" | "value" | "evalType">;

 export function EvaluationEditor(props: {
  evaluation: Evaluation | null;
@@ -27,9 +27,9 @@ export function EvaluationEditor(props: {
  onCancel: () => void;
 }) {
  const [values, setValues] = useState<EvalValues>({
-    name: props.evaluation?.name ?? props.defaultName ?? "",
-    matchString: props.evaluation?.matchString ?? "",
-    matchType: props.evaluation?.matchType ?? "CONTAINS",
+    label: props.evaluation?.label ?? props.defaultName ?? "",
+    value: props.evaluation?.value ?? "",
+    evalType: props.evaluation?.evalType ?? "CONTAINS",
  });

  return (
@@ -39,7 +39,7 @@ export function EvaluationEditor(props: {
          <FormLabel fontSize="sm">Evaluation Name</FormLabel>
          <Input
            size="sm"
-            value={values.name}
+            value={values.label}
            onChange={(e) => setValues((values) => ({ ...values, name: e.target.value }))}
          />
        </FormControl>
@@ -47,15 +47,15 @@ export function EvaluationEditor(props: {
          <FormLabel fontSize="sm">Match Type</FormLabel>
          <Select
            size="sm"
-            value={values.matchType}
+            value={values.evalType}
            onChange={(e) =>
              setValues((values) => ({
                ...values,
-                matchType: e.target.value as EvaluationMatchType,
+                evalType: e.target.value as EvalType,
              }))
            }
          >
-            {Object.values(EvaluationMatchType).map((type) => (
+            {Object.values(EvalType).map((type) => (
              <option key={type} value={type}>
                {type}
              </option>
@@ -67,8 +67,8 @@ export function EvaluationEditor(props: {
        <FormLabel fontSize="sm">Match String</FormLabel>
        <Input
          size="sm"
-          value={values.matchString}
-          onChange={(e) => setValues((values) => ({ ...values, matchString: e.target.value }))}
+          value={values.value}
+          onChange={(e) => setValues((values) => ({ ...values, value: e.target.value }))}
        />
        <FormHelperText>
          This string will be interpreted as a regex and checked against each model output.
@@ -156,9 +156,9 @@ export default function EditEvaluations() {
                align="center"
                key={evaluation.id}
              >
-                <Text fontWeight="bold">{evaluation.name}</Text>
+                <Text fontWeight="bold">{evaluation.label}</Text>
                <Text flex={1}>
-                  {evaluation.matchType}: &quot;{evaluation.matchString}&quot;
+                  {evaluation.evalType}: &quot;{evaluation.value}&quot;
                </Text>
                <Button
                  variant="unstyled"
--- a/src/components/OutputsTable/OutputCell/OutputStats.tsx
+++ b/src/components/OutputsTable/OutputCell/OutputStats.tsx
@@ -42,7 +42,7 @@ export const OutputStats = ({
          const passed = evaluateOutput(modelOutput, scenario, evaluation);
          return (
            <HStack spacing={0} key={evaluation.id}>
-              <Text>{evaluation.name}</Text>
+              <Text>{evaluation.label}</Text>
              <Icon
                as={passed ? BsCheck : BsX}
                color={passed ? "green.500" : "red.500"}
--- a/src/components/OutputsTable/ScenarioEditor.tsx
+++ b/src/components/OutputsTable/ScenarioEditor.tsx
@@ -1,6 +1,6 @@
 import { type DragEvent } from "react";
 import { api } from "~/utils/api";
-import { isEqual } from "lodash";
+import { isEqual } from "lodash-es";
 import { type Scenario } from "./types";
 import { useExperiment, useHandledAsyncCallback } from "~/utils/hooks";
 import { useState } from "react";
--- a/src/components/OutputsTable/VariantStats.tsx
+++ b/src/components/OutputsTable/VariantStats.tsx
@@ -47,7 +47,7 @@ export default function VariantStats(props: { variant: PromptVariant }) {
          const passedFrac = result.passCount / (result.passCount + result.failCount);
          return (
            <HStack key={result.id}>
-              <Text>{result.evaluation.name}</Text>
+              <Text>{result.evaluation.label}</Text>
              <Text color={scale(passedFrac).hex()} fontWeight="bold">
                {(passedFrac * 100).toFixed(1)}%
              </Text>
--- a/src/server/api/autogen.ts
+++ b/src/server/api/autogen.ts
@@ -1,7 +1,7 @@
 import { type CompletionCreateParams } from "openai/resources/chat";
 import { prisma } from "../db";
 import { openai } from "../utils/openai";
-import { pick } from "lodash";
+import { pick } from "lodash-es";

 type AxiosError = {
  response?: {
--- a/src/server/api/routers/evaluations.router.ts
+++ b/src/server/api/routers/evaluations.router.ts
@@ -1,4 +1,4 @@
-import { EvaluationMatchType } from "@prisma/client";
+import { EvalType } from "@prisma/client";
 import { z } from "zod";
 import { createTRPCRouter, publicProcedure } from "~/server/api/trpc";
 import { prisma } from "~/server/db";
@@ -18,18 +18,18 @@ export const evaluationsRouter = createTRPCRouter({
    .input(
      z.object({
        experimentId: z.string(),
-        name: z.string(),
-        matchString: z.string(),
-        matchType: z.nativeEnum(EvaluationMatchType),
+        label: z.string(),
+        value: z.string(),
+        evalType: z.nativeEnum(EvalType),
      }),
    )
    .mutation(async ({ input }) => {
      const evaluation = await prisma.evaluation.create({
        data: {
          experimentId: input.experimentId,
-          name: input.name,
-          matchString: input.matchString,
-          matchType: input.matchType,
+          label: input.label,
+          value: input.value,
+          evalType: input.evalType,
        },
      });
      await reevaluateEvaluation(evaluation);
@@ -41,8 +41,8 @@ export const evaluationsRouter = createTRPCRouter({
        id: z.string(),
        updates: z.object({
          name: z.string().optional(),
-          matchString: z.string().optional(),
-          matchType: z.nativeEnum(EvaluationMatchType).optional(),
+          value: z.string().optional(),
+          evalType: z.nativeEnum(EvalType).optional(),
        }),
      }),
    )
@@ -50,9 +50,9 @@ export const evaluationsRouter = createTRPCRouter({
      await prisma.evaluation.update({
        where: { id: input.id },
        data: {
-          name: input.updates.name,
-          matchString: input.updates.matchString,
-          matchType: input.updates.matchType,
+          label: input.updates.name,
+          value: input.updates.value,
+          evalType: input.updates.evalType,
        },
      });
      await reevaluateEvaluation(
--- a/src/server/api/routers/promptVariants.router.ts
+++ b/src/server/api/routers/promptVariants.router.ts
@@ -1,5 +1,5 @@
 import dedent from "dedent";
-import { isObject } from "lodash";
+import { isObject } from "lodash-es";
 import { z } from "zod";
 import { createTRPCRouter, publicProcedure } from "~/server/api/trpc";
 import { prisma } from "~/server/db";
--- a/src/server/utils/evaluateOutput.ts
+++ b/src/server/utils/evaluateOutput.ts
@@ -14,11 +14,11 @@ export const evaluateOutput = (

  const stringifiedMessage = message.content ?? JSON.stringify(message.function_call);

-  const matchRegex = fillTemplate(evaluation.matchString, scenario.variableValues as VariableMap);
+  const matchRegex = fillTemplate(evaluation.value, scenario.variableValues as VariableMap);

  let match;

-  switch (evaluation.matchType) {
+  switch (evaluation.evalType) {
    case "CONTAINS":
      match = stringifiedMessage.match(matchRegex) !== null;
      break;
--- a/src/server/utils/getCompletion.ts
+++ b/src/server/utils/getCompletion.ts
@@ -1,5 +1,5 @@
 /* eslint-disable @typescript-eslint/no-unsafe-call */
-import { isObject } from "lodash";
+import { isObject } from "lodash-es";
 import { Prisma } from "@prisma/client";
 import { streamChatCompletion } from "./openai";
 import { wsConnection } from "~/utils/wsConnection";
--- a/src/server/utils/openai.ts
+++ b/src/server/utils/openai.ts
@@ -1,4 +1,4 @@
-import { omit } from "lodash";
+import { omit } from "lodash-es";
 import { env } from "~/env.mjs";

 import OpenAI from "openai";
--- a/src/server/utils/shouldStream.ts
+++ b/src/server/utils/shouldStream.ts
@@ -1,4 +1,4 @@
-import { isObject } from "lodash";
+import { isObject } from "lodash-es";
 import { type JSONSerializable } from "../types";

 export const shouldStream = (config: JSONSerializable): boolean => {