cache eval outputs and add gpt4 eval

2023-07-17 17:55:36 -07:00
parent 011b12abb9
commit 7d41e94ca2
8 changed files with 168 additions and 115 deletions
--- a/src/components/OutputsTable/EditEvaluations.tsx
+++ b/src/components/OutputsTable/EditEvaluations.tsx
@@ -11,12 +11,14 @@ import {
  FormLabel,
  Select,
  FormHelperText,
+  Code,
 } from "@chakra-ui/react";
 import { type Evaluation, EvalType } from "@prisma/client";
 import { useCallback, useState } from "react";
 import { BsPencil, BsX } from "react-icons/bs";
 import { api } from "~/utils/api";
 import { useExperiment, useHandledAsyncCallback } from "~/utils/hooks";
+import AutoResizeTextArea from "../AutoResizeTextArea";

 type EvalValues = Pick<Evaluation, "label" | "value" | "evalType">;

@@ -36,7 +38,7 @@ export function EvaluationEditor(props: {
    <VStack borderTopWidth={1} borderColor="gray.200" py={4}>
      <HStack w="100%">
        <FormControl flex={1}>
-          <FormLabel fontSize="sm">Evaluation Name</FormLabel>
+          <FormLabel fontSize="sm">Eval Name</FormLabel>
          <Input
            size="sm"
            value={values.label}
@@ -44,7 +46,7 @@ export function EvaluationEditor(props: {
          />
        </FormControl>
        <FormControl flex={1}>
-          <FormLabel fontSize="sm">Match Type</FormLabel>
+          <FormLabel fontSize="sm">Eval Type</FormLabel>
          <Select
            size="sm"
            value={values.evalType}
@@ -63,17 +65,37 @@ export function EvaluationEditor(props: {
          </Select>
        </FormControl>
      </HStack>
-      <FormControl>
-        <FormLabel fontSize="sm">Match String</FormLabel>
-        <Input
-          size="sm"
-          value={values.value}
-          onChange={(e) => setValues((values) => ({ ...values, value: e.target.value }))}
-        />
-        <FormHelperText>
-          This string will be interpreted as a regex and checked against each model output.
-        </FormHelperText>
-      </FormControl>
+      {["CONTAINS", "DOES_NOT_CONTAIN"].includes(values.evalType) && (
+        <FormControl>
+          <FormLabel fontSize="sm">Match String</FormLabel>
+          <Input
+            size="sm"
+            value={values.value}
+            onChange={(e) => setValues((values) => ({ ...values, value: e.target.value }))}
+          />
+          <FormHelperText>
+            This string will be interpreted as a regex and checked against each model output. You
+            can include scenario variables using <Code>{"{{curly_braces}}"}</Code>
+          </FormHelperText>
+        </FormControl>
+      )}
+      {values.evalType === "GPT4_EVAL" && (
+        <FormControl pt={2}>
+          <FormLabel fontSize="sm">GPT4 Instructions</FormLabel>
+          <AutoResizeTextArea
+            size="sm"
+            value={values.value}
+            onChange={(e) => setValues((values) => ({ ...values, value: e.target.value }))}
+            minRows={3}
+          />
+          <FormHelperText>
+            Give instructions to GPT-4 for how to evaluate your prompt. It will have access to the
+            full scenario as well as the output it is evaluating. It will <strong>not</strong> have
+            access to the specific prompt variant, so be sure to be clear about the task you want it
+            to perform.
+          </FormHelperText>
+        </FormControl>
+      )}
      <HStack alignSelf="flex-end">
        <Button size="sm" onClick={props.onCancel} colorScheme="gray">
          Cancel
--- a/src/components/OutputsTable/EditScenarioVars.tsx
+++ b/src/components/OutputsTable/EditScenarioVars.tsx
@@ -1,4 +1,4 @@
-import { Text, Button, HStack, Heading, Icon, Input, Stack, Code } from "@chakra-ui/react";
+import { Text, Button, HStack, Heading, Icon, Input, Stack } from "@chakra-ui/react";
 import { useState } from "react";
 import { BsCheck, BsX } from "react-icons/bs";
 import { api } from "~/utils/api";
@@ -36,8 +36,7 @@ export default function EditScenarioVars() {
      <Heading size="sm">Scenario Variables</Heading>
      <Stack spacing={2}>
        <Text fontSize="sm">
-          Scenario variables can be used in your prompt variants as well as evaluations. Reference
-          them using <Code>{"{{curly_braces}}"}</Code>.
+          Scenario variables can be used in your prompt variants as well as evaluations.
        </Text>
        <HStack spacing={0}>
          <Input
--- a/src/components/OutputsTable/OutputCell/OutputStats.tsx
+++ b/src/components/OutputsTable/OutputCell/OutputStats.tsx
@@ -2,7 +2,7 @@ import { type SupportedModel } from "~/server/types";
 import { type Scenario } from "../types";
 import { type RouterOutputs } from "~/utils/api";
 import { calculateTokenCost } from "~/utils/calculateTokenCost";
-import { HStack, Icon, Text } from "@chakra-ui/react";
+import { HStack, Icon, Text, Tooltip } from "@chakra-ui/react";
 import { BsCheck, BsClock, BsCurrencyDollar, BsX } from "react-icons/bs";
 import { CostTooltip } from "~/components/tooltip/CostTooltip";

@@ -36,14 +36,20 @@ export const OutputStats = ({
        {modelOutput.outputEvaluation.map((evaluation) => {
          const passed = evaluation.result > 0.5;
          return (
-            <HStack spacing={0} key={evaluation.id}>
-              <Text>{evaluation.evaluation.label}</Text>
-              <Icon
-                as={passed ? BsCheck : BsX}
-                color={passed ? "green.500" : "red.500"}
-                boxSize={6}
-              />
-            </HStack>
+            <Tooltip
+              isDisabled={!evaluation.details}
+              label={evaluation.details}
+              key={evaluation.id}
+            >
+              <HStack spacing={0}>
+                <Text>{evaluation.evaluation.label}</Text>
+                <Icon
+                  as={passed ? BsCheck : BsX}
+                  color={passed ? "green.500" : "red.500"}
+                  boxSize={6}
+                />
+              </HStack>
+            </Tooltip>
          );
        })}
      </HStack>