cache eval outputs and add gpt4 eval
This commit is contained in:
@@ -11,12 +11,14 @@ import {
|
||||
FormLabel,
|
||||
Select,
|
||||
FormHelperText,
|
||||
Code,
|
||||
} from "@chakra-ui/react";
|
||||
import { type Evaluation, EvalType } from "@prisma/client";
|
||||
import { useCallback, useState } from "react";
|
||||
import { BsPencil, BsX } from "react-icons/bs";
|
||||
import { api } from "~/utils/api";
|
||||
import { useExperiment, useHandledAsyncCallback } from "~/utils/hooks";
|
||||
import AutoResizeTextArea from "../AutoResizeTextArea";
|
||||
|
||||
type EvalValues = Pick<Evaluation, "label" | "value" | "evalType">;
|
||||
|
||||
@@ -36,7 +38,7 @@ export function EvaluationEditor(props: {
|
||||
<VStack borderTopWidth={1} borderColor="gray.200" py={4}>
|
||||
<HStack w="100%">
|
||||
<FormControl flex={1}>
|
||||
<FormLabel fontSize="sm">Evaluation Name</FormLabel>
|
||||
<FormLabel fontSize="sm">Eval Name</FormLabel>
|
||||
<Input
|
||||
size="sm"
|
||||
value={values.label}
|
||||
@@ -44,7 +46,7 @@ export function EvaluationEditor(props: {
|
||||
/>
|
||||
</FormControl>
|
||||
<FormControl flex={1}>
|
||||
<FormLabel fontSize="sm">Match Type</FormLabel>
|
||||
<FormLabel fontSize="sm">Eval Type</FormLabel>
|
||||
<Select
|
||||
size="sm"
|
||||
value={values.evalType}
|
||||
@@ -63,17 +65,37 @@ export function EvaluationEditor(props: {
|
||||
</Select>
|
||||
</FormControl>
|
||||
</HStack>
|
||||
<FormControl>
|
||||
<FormLabel fontSize="sm">Match String</FormLabel>
|
||||
<Input
|
||||
size="sm"
|
||||
value={values.value}
|
||||
onChange={(e) => setValues((values) => ({ ...values, value: e.target.value }))}
|
||||
/>
|
||||
<FormHelperText>
|
||||
This string will be interpreted as a regex and checked against each model output.
|
||||
</FormHelperText>
|
||||
</FormControl>
|
||||
{["CONTAINS", "DOES_NOT_CONTAIN"].includes(values.evalType) && (
|
||||
<FormControl>
|
||||
<FormLabel fontSize="sm">Match String</FormLabel>
|
||||
<Input
|
||||
size="sm"
|
||||
value={values.value}
|
||||
onChange={(e) => setValues((values) => ({ ...values, value: e.target.value }))}
|
||||
/>
|
||||
<FormHelperText>
|
||||
This string will be interpreted as a regex and checked against each model output. You
|
||||
can include scenario variables using <Code>{"{{curly_braces}}"}</Code>
|
||||
</FormHelperText>
|
||||
</FormControl>
|
||||
)}
|
||||
{values.evalType === "GPT4_EVAL" && (
|
||||
<FormControl pt={2}>
|
||||
<FormLabel fontSize="sm">GPT4 Instructions</FormLabel>
|
||||
<AutoResizeTextArea
|
||||
size="sm"
|
||||
value={values.value}
|
||||
onChange={(e) => setValues((values) => ({ ...values, value: e.target.value }))}
|
||||
minRows={3}
|
||||
/>
|
||||
<FormHelperText>
|
||||
Give instructions to GPT-4 for how to evaluate your prompt. It will have access to the
|
||||
full scenario as well as the output it is evaluating. It will <strong>not</strong> have
|
||||
access to the specific prompt variant, so be sure to be clear about the task you want it
|
||||
to perform.
|
||||
</FormHelperText>
|
||||
</FormControl>
|
||||
)}
|
||||
<HStack alignSelf="flex-end">
|
||||
<Button size="sm" onClick={props.onCancel} colorScheme="gray">
|
||||
Cancel
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Text, Button, HStack, Heading, Icon, Input, Stack, Code } from "@chakra-ui/react";
|
||||
import { Text, Button, HStack, Heading, Icon, Input, Stack } from "@chakra-ui/react";
|
||||
import { useState } from "react";
|
||||
import { BsCheck, BsX } from "react-icons/bs";
|
||||
import { api } from "~/utils/api";
|
||||
@@ -36,8 +36,7 @@ export default function EditScenarioVars() {
|
||||
<Heading size="sm">Scenario Variables</Heading>
|
||||
<Stack spacing={2}>
|
||||
<Text fontSize="sm">
|
||||
Scenario variables can be used in your prompt variants as well as evaluations. Reference
|
||||
them using <Code>{"{{curly_braces}}"}</Code>.
|
||||
Scenario variables can be used in your prompt variants as well as evaluations.
|
||||
</Text>
|
||||
<HStack spacing={0}>
|
||||
<Input
|
||||
|
||||
@@ -2,7 +2,7 @@ import { type SupportedModel } from "~/server/types";
|
||||
import { type Scenario } from "../types";
|
||||
import { type RouterOutputs } from "~/utils/api";
|
||||
import { calculateTokenCost } from "~/utils/calculateTokenCost";
|
||||
import { HStack, Icon, Text } from "@chakra-ui/react";
|
||||
import { HStack, Icon, Text, Tooltip } from "@chakra-ui/react";
|
||||
import { BsCheck, BsClock, BsCurrencyDollar, BsX } from "react-icons/bs";
|
||||
import { CostTooltip } from "~/components/tooltip/CostTooltip";
|
||||
|
||||
@@ -36,14 +36,20 @@ export const OutputStats = ({
|
||||
{modelOutput.outputEvaluation.map((evaluation) => {
|
||||
const passed = evaluation.result > 0.5;
|
||||
return (
|
||||
<HStack spacing={0} key={evaluation.id}>
|
||||
<Text>{evaluation.evaluation.label}</Text>
|
||||
<Icon
|
||||
as={passed ? BsCheck : BsX}
|
||||
color={passed ? "green.500" : "red.500"}
|
||||
boxSize={6}
|
||||
/>
|
||||
</HStack>
|
||||
<Tooltip
|
||||
isDisabled={!evaluation.details}
|
||||
label={evaluation.details}
|
||||
key={evaluation.id}
|
||||
>
|
||||
<HStack spacing={0}>
|
||||
<Text>{evaluation.evaluation.label}</Text>
|
||||
<Icon
|
||||
as={passed ? BsCheck : BsX}
|
||||
color={passed ? "green.500" : "red.500"}
|
||||
boxSize={6}
|
||||
/>
|
||||
</HStack>
|
||||
</Tooltip>
|
||||
);
|
||||
})}
|
||||
</HStack>
|
||||
|
||||
Reference in New Issue
Block a user