import { Text, Button, HStack, Heading, Icon, Input, Stack, VStack, FormControl, FormLabel, Select, FormHelperText, } from "@chakra-ui/react"; import { type Evaluation, EvaluationMatchType } from "@prisma/client"; import { useCallback, useState } from "react"; import { BsPencil, BsX } from "react-icons/bs"; import { api } from "~/utils/api"; import { useExperiment, useHandledAsyncCallback } from "~/utils/hooks"; type EvalValues = Pick; export function EvaluationEditor(props: { evaluation: Evaluation | null; defaultName?: string; onSave: (id: string | undefined, vals: EvalValues) => void; onCancel: () => void; }) { const [values, setValues] = useState({ name: props.evaluation?.name ?? props.defaultName ?? "", matchString: props.evaluation?.matchString ?? "", matchType: props.evaluation?.matchType ?? "CONTAINS", }); return ( Evaluation Name setValues((values) => ({ ...values, name: e.target.value }))} /> Match Type Match String setValues((values) => ({ ...values, matchString: e.target.value }))} /> This string will be interpreted as a regex and checked against each model output. ); } export default function EditEvaluations() { const experiment = useExperiment(); const evaluations = api.evaluations.list.useQuery({ experimentId: experiment.data?.id ?? "" }).data ?? []; const [editingId, setEditingId] = useState(null); const utils = api.useContext(); const createMutation = api.evaluations.create.useMutation(); const updateMutation = api.evaluations.update.useMutation(); const deleteMutation = api.evaluations.delete.useMutation(); const [onDelete] = useHandledAsyncCallback(async (id: string) => { await deleteMutation.mutateAsync({ id }); await utils.evaluations.list.invalidate(); await utils.promptVariants.stats.invalidate(); }, []); const [onSave] = useHandledAsyncCallback(async (id: string | undefined, vals: EvalValues) => { setEditingId(null); if (!experiment.data?.id) return; if (id) { await updateMutation.mutateAsync({ id, updates: vals, }); } else { await createMutation.mutateAsync({ experimentId: experiment.data.id, ...vals, }); } await utils.evaluations.list.invalidate(); await utils.promptVariants.stats.invalidate(); }, []); const onCancel = useCallback(() => { setEditingId(null); }, []); return ( Evaluations Evaluations allow you to compare prompt performance in an automated way. {evaluations.map((evaluation) => editingId == evaluation.id ? ( ) : ( {evaluation.name} {evaluation.matchType}: "{evaluation.matchString}" ), )} {editingId == null && ( )} {editingId == "new" && ( )} ); }