Compare commits
9 Commits
scenario-s
...
function-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
328cd4f5e6 | ||
|
|
26b6fa4f0c | ||
|
|
807665fdc1 | ||
|
|
d6597d2c8a | ||
|
|
566d67bf48 | ||
|
|
d4fb8b689a | ||
|
|
98b231c8bd | ||
|
|
45afb1f1f4 | ||
|
|
223b990005 |
@@ -1,2 +1,2 @@
|
||||
src/codegen/openai.schema.json
|
||||
*.schema.json
|
||||
pnpm-lock.yaml
|
||||
@@ -45,6 +45,7 @@ Natively supports [OpenAI function calls](https://openai.com/blog/function-calli
|
||||
|
||||
- All models available through the OpenAI [chat completion API](https://platform.openai.com/docs/guides/gpt/chat-completions-api)
|
||||
- Llama2 [7b chat](https://replicate.com/a16z-infra/llama7b-v2-chat), [13b chat](https://replicate.com/a16z-infra/llama13b-v2-chat), [70b chat](https://replicate.com/replicate/llama70b-v2-chat).
|
||||
- Anthropic's [Claude 1 Instant](https://www.anthropic.com/index/introducing-claude) and [Claude 2](https://www.anthropic.com/index/claude-2)
|
||||
|
||||
## Running Locally
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
"check": "concurrently 'pnpm lint' 'pnpm tsc' 'pnpm prettier . --check'"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.5.8",
|
||||
"@apidevtools/json-schema-ref-parser": "^10.1.0",
|
||||
"@babel/preset-typescript": "^7.22.5",
|
||||
"@babel/standalone": "^7.22.9",
|
||||
|
||||
21
pnpm-lock.yaml
generated
21
pnpm-lock.yaml
generated
@@ -1,10 +1,13 @@
|
||||
lockfileVersion: '6.0'
|
||||
lockfileVersion: '6.1'
|
||||
|
||||
settings:
|
||||
autoInstallPeers: true
|
||||
excludeLinksFromLockfile: false
|
||||
|
||||
dependencies:
|
||||
'@anthropic-ai/sdk':
|
||||
specifier: ^0.5.8
|
||||
version: 0.5.8
|
||||
'@apidevtools/json-schema-ref-parser':
|
||||
specifier: ^10.1.0
|
||||
version: 10.1.0
|
||||
@@ -298,6 +301,22 @@ packages:
|
||||
'@jridgewell/gen-mapping': 0.3.3
|
||||
'@jridgewell/trace-mapping': 0.3.18
|
||||
|
||||
/@anthropic-ai/sdk@0.5.8:
|
||||
resolution: {integrity: sha512-iHenjcE2Q/az6VZiP1DueOSvKNRmxsly6Rx2yjJBoy7OBYVFGVjEdgs2mPQHtTX0ibKAR7tPq6F6MQbKDPWcKg==}
|
||||
dependencies:
|
||||
'@types/node': 18.16.0
|
||||
'@types/node-fetch': 2.6.4
|
||||
abort-controller: 3.0.0
|
||||
agentkeepalive: 4.3.0
|
||||
digest-fetch: 1.3.0
|
||||
form-data-encoder: 1.7.2
|
||||
formdata-node: 4.4.1
|
||||
node-fetch: 2.6.12
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- supports-color
|
||||
dev: false
|
||||
|
||||
/@apidevtools/json-schema-ref-parser@10.1.0:
|
||||
resolution: {integrity: sha512-3e+viyMuXdrcK8v5pvP+SDoAQ77FH6OyRmuK48SZKmdHJRFm87RsSs8qm6kP39a/pOPURByJw+OXzQIqcfmKtA==}
|
||||
engines: {node: '>= 16'}
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
-- DropForeignKey
|
||||
ALTER TABLE "ModelOutput" DROP CONSTRAINT "ModelOutput_scenarioVariantCellId_fkey";
|
||||
|
||||
-- DropForeignKey
|
||||
ALTER TABLE "OutputEvaluation" DROP CONSTRAINT "OutputEvaluation_modelOutputId_fkey";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "OutputEvaluation_modelOutputId_evaluationId_key";
|
||||
|
||||
-- AlterTable
|
||||
ALTER TABLE "OutputEvaluation" RENAME COLUMN "modelOutputId" TO "modelResponseId";
|
||||
|
||||
-- AlterTable
|
||||
ALTER TABLE "ScenarioVariantCell" DROP COLUMN "retryTime",
|
||||
DROP COLUMN "statusCode",
|
||||
ADD COLUMN "jobQueuedAt" TIMESTAMP(3),
|
||||
ADD COLUMN "jobStartedAt" TIMESTAMP(3);
|
||||
|
||||
ALTER TABLE "ModelOutput" RENAME TO "ModelResponse";
|
||||
|
||||
ALTER TABLE "ModelResponse"
|
||||
ADD COLUMN "requestedAt" TIMESTAMP(3),
|
||||
ADD COLUMN "receivedAt" TIMESTAMP(3),
|
||||
ADD COLUMN "statusCode" INTEGER,
|
||||
ADD COLUMN "errorMessage" TEXT,
|
||||
ADD COLUMN "retryTime" TIMESTAMP(3),
|
||||
ADD COLUMN "outdated" BOOLEAN NOT NULL DEFAULT false;
|
||||
|
||||
-- 3. Remove the unnecessary column
|
||||
ALTER TABLE "ModelResponse"
|
||||
DROP COLUMN "timeToComplete";
|
||||
|
||||
-- AlterTable
|
||||
ALTER TABLE "ModelResponse" RENAME CONSTRAINT "ModelOutput_pkey" TO "ModelResponse_pkey";
|
||||
ALTER TABLE "ModelResponse" ALTER COLUMN "output" DROP NOT NULL;
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "ModelOutput_scenarioVariantCellId_key";
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "ModelResponse" ADD CONSTRAINT "ModelResponse_scenarioVariantCellId_fkey" FOREIGN KEY ("scenarioVariantCellId") REFERENCES "ScenarioVariantCell"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- RenameIndex
|
||||
ALTER INDEX "ModelOutput_inputHash_idx" RENAME TO "ModelResponse_inputHash_idx";
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "OutputEvaluation_modelResponseId_evaluationId_key" ON "OutputEvaluation"("modelResponseId", "evaluationId");
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "OutputEvaluation" ADD CONSTRAINT "OutputEvaluation_modelResponseId_fkey" FOREIGN KEY ("modelResponseId") REFERENCES "ModelResponse"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
|
||||
@@ -90,12 +90,11 @@ enum CellRetrievalStatus {
|
||||
model ScenarioVariantCell {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
|
||||
statusCode Int?
|
||||
errorMessage String?
|
||||
retryTime DateTime?
|
||||
retrievalStatus CellRetrievalStatus @default(COMPLETE)
|
||||
|
||||
modelOutput ModelOutput?
|
||||
jobQueuedAt DateTime?
|
||||
jobStartedAt DateTime?
|
||||
modelResponses ModelResponse[]
|
||||
errorMessage String? // Contains errors that occurred independently of model responses
|
||||
|
||||
promptVariantId String @db.Uuid
|
||||
promptVariant PromptVariant @relation(fields: [promptVariantId], references: [id], onDelete: Cascade)
|
||||
@@ -110,15 +109,20 @@ model ScenarioVariantCell {
|
||||
@@unique([promptVariantId, testScenarioId])
|
||||
}
|
||||
|
||||
model ModelOutput {
|
||||
model ModelResponse {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
|
||||
inputHash String
|
||||
output Json
|
||||
timeToComplete Int @default(0)
|
||||
cost Float?
|
||||
promptTokens Int?
|
||||
completionTokens Int?
|
||||
inputHash String
|
||||
requestedAt DateTime?
|
||||
receivedAt DateTime?
|
||||
output Json?
|
||||
cost Float?
|
||||
promptTokens Int?
|
||||
completionTokens Int?
|
||||
statusCode Int?
|
||||
errorMessage String?
|
||||
retryTime DateTime?
|
||||
outdated Boolean @default(false)
|
||||
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
@@ -127,7 +131,6 @@ model ModelOutput {
|
||||
scenarioVariantCell ScenarioVariantCell @relation(fields: [scenarioVariantCellId], references: [id], onDelete: Cascade)
|
||||
outputEvaluations OutputEvaluation[]
|
||||
|
||||
@@unique([scenarioVariantCellId])
|
||||
@@index([inputHash])
|
||||
}
|
||||
|
||||
@@ -159,8 +162,8 @@ model OutputEvaluation {
|
||||
result Float
|
||||
details String?
|
||||
|
||||
modelOutputId String @db.Uuid
|
||||
modelOutput ModelOutput @relation(fields: [modelOutputId], references: [id], onDelete: Cascade)
|
||||
modelResponseId String @db.Uuid
|
||||
modelResponse ModelResponse @relation(fields: [modelResponseId], references: [id], onDelete: Cascade)
|
||||
|
||||
evaluationId String @db.Uuid
|
||||
evaluation Evaluation @relation(fields: [evaluationId], references: [id], onDelete: Cascade)
|
||||
@@ -168,7 +171,7 @@ model OutputEvaluation {
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
@@unique([modelOutputId, evaluationId])
|
||||
@@unique([modelResponseId, evaluationId])
|
||||
}
|
||||
|
||||
model Organization {
|
||||
|
||||
@@ -36,17 +36,9 @@ export const DeleteButton = () => {
|
||||
|
||||
return (
|
||||
<>
|
||||
<Button
|
||||
size="sm"
|
||||
variant={{ base: "outline", lg: "ghost" }}
|
||||
colorScheme="red"
|
||||
fontWeight="normal"
|
||||
onClick={onOpen}
|
||||
>
|
||||
<Button size="sm" variant="ghost" colorScheme="red" fontWeight="normal" onClick={onOpen}>
|
||||
<Icon as={BsTrash} boxSize={4} />
|
||||
<Text display={{ base: "none", lg: "block" }} ml={2}>
|
||||
Delete Experiment
|
||||
</Text>
|
||||
<Text ml={2}>Delete Experiment</Text>
|
||||
</Button>
|
||||
|
||||
<AlertDialog isOpen={isOpen} leastDestructiveRef={cancelRef} onClose={onClose}>
|
||||
|
||||
19
src/components/OutputsTable/OutputCell/CellContent.tsx
Normal file
19
src/components/OutputsTable/OutputCell/CellContent.tsx
Normal file
@@ -0,0 +1,19 @@
|
||||
import { type StackProps, VStack } from "@chakra-ui/react";
|
||||
import { CellOptions } from "./CellOptions";
|
||||
|
||||
export const CellContent = ({
|
||||
hardRefetch,
|
||||
hardRefetching,
|
||||
children,
|
||||
...props
|
||||
}: {
|
||||
hardRefetch: () => void;
|
||||
hardRefetching: boolean;
|
||||
} & StackProps) => (
|
||||
<VStack w="full" alignItems="flex-start" {...props}>
|
||||
<CellOptions refetchingOutput={hardRefetching} refetchOutput={hardRefetch} />
|
||||
<VStack w="full" alignItems="flex-start" maxH={500} overflowY="auto">
|
||||
{children}
|
||||
</VStack>
|
||||
</VStack>
|
||||
);
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Button, HStack, Icon, Tooltip } from "@chakra-ui/react";
|
||||
import { Button, HStack, Icon, Spinner, Tooltip } from "@chakra-ui/react";
|
||||
import { BsArrowClockwise } from "react-icons/bs";
|
||||
import { useExperimentAccess } from "~/utils/hooks";
|
||||
|
||||
@@ -12,7 +12,7 @@ export const CellOptions = ({
|
||||
const { canModify } = useExperimentAccess();
|
||||
return (
|
||||
<HStack justifyContent="flex-end" w="full">
|
||||
{!refetchingOutput && canModify && (
|
||||
{canModify && (
|
||||
<Tooltip label="Refetch output" aria-label="refetch output">
|
||||
<Button
|
||||
size="xs"
|
||||
@@ -28,7 +28,7 @@ export const CellOptions = ({
|
||||
onClick={refetchOutput}
|
||||
aria-label="refetch output"
|
||||
>
|
||||
<Icon as={BsArrowClockwise} boxSize={4} />
|
||||
<Icon as={refetchingOutput ? Spinner : BsArrowClockwise} boxSize={4} />
|
||||
</Button>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
@@ -1,16 +1,19 @@
|
||||
import { api } from "~/utils/api";
|
||||
import { type PromptVariant, type Scenario } from "../types";
|
||||
import { Spinner, Text, Center, VStack } from "@chakra-ui/react";
|
||||
import { Text, VStack } from "@chakra-ui/react";
|
||||
import { useExperiment, useHandledAsyncCallback } from "~/utils/hooks";
|
||||
import SyntaxHighlighter from "react-syntax-highlighter";
|
||||
import { docco } from "react-syntax-highlighter/dist/cjs/styles/hljs";
|
||||
import stringify from "json-stringify-pretty-compact";
|
||||
import { type ReactElement, useState, useEffect } from "react";
|
||||
import { type ReactElement, useState, useEffect, Fragment } from "react";
|
||||
import useSocket from "~/utils/useSocket";
|
||||
import { OutputStats } from "./OutputStats";
|
||||
import { ErrorHandler } from "./ErrorHandler";
|
||||
import { CellOptions } from "./CellOptions";
|
||||
import { RetryCountdown } from "./RetryCountdown";
|
||||
import frontendModelProviders from "~/modelProviders/frontendModelProviders";
|
||||
import { ResponseLog } from "./ResponseLog";
|
||||
import { CellContent } from "./CellContent";
|
||||
|
||||
const WAITING_MESSAGE_INTERVAL = 20000;
|
||||
|
||||
export default function OutputCell({
|
||||
scenario,
|
||||
@@ -60,51 +63,97 @@ export default function OutputCell({
|
||||
|
||||
const awaitingOutput =
|
||||
!cell ||
|
||||
!cell.evalsComplete ||
|
||||
cell.retrievalStatus === "PENDING" ||
|
||||
cell.retrievalStatus === "IN_PROGRESS" ||
|
||||
hardRefetching;
|
||||
useEffect(() => setRefetchInterval(awaitingOutput ? 1000 : 0), [awaitingOutput]);
|
||||
|
||||
const modelOutput = cell?.modelOutput;
|
||||
|
||||
// TODO: disconnect from socket if we're not streaming anymore
|
||||
const streamedMessage = useSocket<OutputSchema>(cell?.id);
|
||||
|
||||
if (!vars) return null;
|
||||
|
||||
if (disabledReason) return <Text color="gray.500">{disabledReason}</Text>;
|
||||
|
||||
if (awaitingOutput && !streamedMessage)
|
||||
return (
|
||||
<Center h="100%" w="100%">
|
||||
<Spinner />
|
||||
</Center>
|
||||
);
|
||||
|
||||
if (!cell && !fetchingOutput)
|
||||
return (
|
||||
<VStack>
|
||||
<CellOptions refetchingOutput={hardRefetching} refetchOutput={hardRefetch} />
|
||||
<CellContent hardRefetching={hardRefetching} hardRefetch={hardRefetch}>
|
||||
<Text color="gray.500">Error retrieving output</Text>
|
||||
</VStack>
|
||||
</CellContent>
|
||||
);
|
||||
|
||||
if (cell && cell.errorMessage) {
|
||||
return (
|
||||
<VStack>
|
||||
<CellOptions refetchingOutput={hardRefetching} refetchOutput={hardRefetch} />
|
||||
<ErrorHandler cell={cell} refetchOutput={hardRefetch} />
|
||||
</VStack>
|
||||
<CellContent hardRefetching={hardRefetching} hardRefetch={hardRefetch}>
|
||||
<Text color="red.500">{cell.errorMessage}</Text>
|
||||
</CellContent>
|
||||
);
|
||||
}
|
||||
|
||||
const normalizedOutput = modelOutput
|
||||
? provider.normalizeOutput(modelOutput.output)
|
||||
if (disabledReason) return <Text color="gray.500">{disabledReason}</Text>;
|
||||
|
||||
const mostRecentResponse = cell?.modelResponses[cell.modelResponses.length - 1];
|
||||
const showLogs = !streamedMessage && !mostRecentResponse?.output;
|
||||
|
||||
if (showLogs)
|
||||
return (
|
||||
<CellContent
|
||||
hardRefetching={hardRefetching}
|
||||
hardRefetch={hardRefetch}
|
||||
alignItems="flex-start"
|
||||
fontFamily="inconsolata, monospace"
|
||||
spacing={0}
|
||||
>
|
||||
{cell?.jobQueuedAt && <ResponseLog time={cell.jobQueuedAt} title="Job queued" />}
|
||||
{cell?.jobStartedAt && <ResponseLog time={cell.jobStartedAt} title="Job started" />}
|
||||
{cell?.modelResponses?.map((response) => {
|
||||
let numWaitingMessages = 0;
|
||||
const relativeWaitingTime = response.receivedAt
|
||||
? response.receivedAt.getTime()
|
||||
: Date.now();
|
||||
if (response.requestedAt) {
|
||||
numWaitingMessages = Math.floor(
|
||||
(relativeWaitingTime - response.requestedAt.getTime()) / WAITING_MESSAGE_INTERVAL,
|
||||
);
|
||||
}
|
||||
return (
|
||||
<Fragment key={response.id}>
|
||||
{response.requestedAt && (
|
||||
<ResponseLog time={response.requestedAt} title="Request sent to API" />
|
||||
)}
|
||||
{response.requestedAt &&
|
||||
Array.from({ length: numWaitingMessages }, (_, i) => (
|
||||
<ResponseLog
|
||||
key={`waiting-${i}`}
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
time={new Date(response.requestedAt!.getTime() + i * WAITING_MESSAGE_INTERVAL)}
|
||||
title="Waiting for response"
|
||||
/>
|
||||
))}
|
||||
{response.receivedAt && (
|
||||
<ResponseLog
|
||||
time={response.receivedAt}
|
||||
title="Response received from API"
|
||||
message={`statusCode: ${response.statusCode ?? ""}\n ${
|
||||
response.errorMessage ?? ""
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
</Fragment>
|
||||
);
|
||||
}) ?? null}
|
||||
{mostRecentResponse?.retryTime && (
|
||||
<RetryCountdown retryTime={mostRecentResponse.retryTime} />
|
||||
)}
|
||||
</CellContent>
|
||||
);
|
||||
|
||||
const normalizedOutput = mostRecentResponse?.output
|
||||
? provider.normalizeOutput(mostRecentResponse?.output)
|
||||
: streamedMessage
|
||||
? provider.normalizeOutput(streamedMessage)
|
||||
: null;
|
||||
|
||||
if (modelOutput && normalizedOutput?.type === "json") {
|
||||
if (mostRecentResponse?.output && normalizedOutput?.type === "json") {
|
||||
return (
|
||||
<VStack
|
||||
w="100%"
|
||||
@@ -114,8 +163,13 @@ export default function OutputCell({
|
||||
overflowX="hidden"
|
||||
justifyContent="space-between"
|
||||
>
|
||||
<VStack w="full" flex={1} spacing={0}>
|
||||
<CellOptions refetchingOutput={hardRefetching} refetchOutput={hardRefetch} />
|
||||
<CellContent
|
||||
hardRefetching={hardRefetching}
|
||||
hardRefetch={hardRefetch}
|
||||
w="full"
|
||||
flex={1}
|
||||
spacing={0}
|
||||
>
|
||||
<SyntaxHighlighter
|
||||
customStyle={{ overflowX: "unset", width: "100%", flex: 1 }}
|
||||
language="json"
|
||||
@@ -127,8 +181,8 @@ export default function OutputCell({
|
||||
>
|
||||
{stringify(normalizedOutput.value, { maxLength: 40 })}
|
||||
</SyntaxHighlighter>
|
||||
</VStack>
|
||||
<OutputStats modelOutput={modelOutput} scenario={scenario} />
|
||||
</CellContent>
|
||||
<OutputStats modelResponse={mostRecentResponse} scenario={scenario} />
|
||||
</VStack>
|
||||
);
|
||||
}
|
||||
@@ -138,10 +192,13 @@ export default function OutputCell({
|
||||
return (
|
||||
<VStack w="100%" h="100%" justifyContent="space-between" whiteSpace="pre-wrap">
|
||||
<VStack w="full" alignItems="flex-start" spacing={0}>
|
||||
<CellOptions refetchingOutput={hardRefetching} refetchOutput={hardRefetch} />
|
||||
<Text>{contentToDisplay}</Text>
|
||||
<CellContent hardRefetching={hardRefetching} hardRefetch={hardRefetch}>
|
||||
<Text>{contentToDisplay}</Text>
|
||||
</CellContent>
|
||||
</VStack>
|
||||
{modelOutput && <OutputStats modelOutput={modelOutput} scenario={scenario} />}
|
||||
{mostRecentResponse?.output && (
|
||||
<OutputStats modelResponse={mostRecentResponse} scenario={scenario} />
|
||||
)}
|
||||
</VStack>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -7,28 +7,32 @@ import { CostTooltip } from "~/components/tooltip/CostTooltip";
|
||||
const SHOW_TIME = true;
|
||||
|
||||
export const OutputStats = ({
|
||||
modelOutput,
|
||||
modelResponse,
|
||||
}: {
|
||||
modelOutput: NonNullable<
|
||||
NonNullable<RouterOutputs["scenarioVariantCells"]["get"]>["modelOutput"]
|
||||
modelResponse: NonNullable<
|
||||
NonNullable<RouterOutputs["scenarioVariantCells"]["get"]>["modelResponses"][0]
|
||||
>;
|
||||
scenario: Scenario;
|
||||
}) => {
|
||||
const timeToComplete = modelOutput.timeToComplete;
|
||||
const timeToComplete =
|
||||
modelResponse.receivedAt && modelResponse.requestedAt
|
||||
? modelResponse.receivedAt.getTime() - modelResponse.requestedAt.getTime()
|
||||
: 0;
|
||||
|
||||
const promptTokens = modelOutput.promptTokens;
|
||||
const completionTokens = modelOutput.completionTokens;
|
||||
const promptTokens = modelResponse.promptTokens;
|
||||
const completionTokens = modelResponse.completionTokens;
|
||||
|
||||
return (
|
||||
<HStack w="full" align="center" color="gray.500" fontSize="2xs" mt={{ base: 0, md: 1 }}>
|
||||
<HStack flex={1}>
|
||||
{modelOutput.outputEvaluations.map((evaluation) => {
|
||||
{modelResponse.outputEvaluations.map((evaluation) => {
|
||||
const passed = evaluation.result > 0.5;
|
||||
return (
|
||||
<Tooltip
|
||||
isDisabled={!evaluation.details}
|
||||
label={evaluation.details}
|
||||
key={evaluation.id}
|
||||
shouldWrapChildren
|
||||
>
|
||||
<HStack spacing={0}>
|
||||
<Text>{evaluation.evaluation.label}</Text>
|
||||
@@ -42,15 +46,15 @@ export const OutputStats = ({
|
||||
);
|
||||
})}
|
||||
</HStack>
|
||||
{modelOutput.cost && (
|
||||
{modelResponse.cost && (
|
||||
<CostTooltip
|
||||
promptTokens={promptTokens}
|
||||
completionTokens={completionTokens}
|
||||
cost={modelOutput.cost}
|
||||
cost={modelResponse.cost}
|
||||
>
|
||||
<HStack spacing={0}>
|
||||
<Icon as={BsCurrencyDollar} />
|
||||
<Text mr={1}>{modelOutput.cost.toFixed(3)}</Text>
|
||||
<Text mr={1}>{modelResponse.cost.toFixed(3)}</Text>
|
||||
</HStack>
|
||||
</CostTooltip>
|
||||
)}
|
||||
|
||||
22
src/components/OutputsTable/OutputCell/ResponseLog.tsx
Normal file
22
src/components/OutputsTable/OutputCell/ResponseLog.tsx
Normal file
@@ -0,0 +1,22 @@
|
||||
import { HStack, VStack, Text } from "@chakra-ui/react";
|
||||
import dayjs from "dayjs";
|
||||
|
||||
export const ResponseLog = ({
|
||||
time,
|
||||
title,
|
||||
message,
|
||||
}: {
|
||||
time: Date;
|
||||
title: string;
|
||||
message?: string;
|
||||
}) => {
|
||||
return (
|
||||
<VStack spacing={0} alignItems="flex-start">
|
||||
<HStack>
|
||||
<Text>{dayjs(time).format("HH:mm:ss")}</Text>
|
||||
<Text>{title}</Text>
|
||||
</HStack>
|
||||
{message && <Text pl={4}>{message}</Text>}
|
||||
</VStack>
|
||||
);
|
||||
};
|
||||
@@ -1,21 +1,12 @@
|
||||
import { type ScenarioVariantCell } from "@prisma/client";
|
||||
import { VStack, Text } from "@chakra-ui/react";
|
||||
import { Text } from "@chakra-ui/react";
|
||||
import { useEffect, useState } from "react";
|
||||
import pluralize from "pluralize";
|
||||
|
||||
export const ErrorHandler = ({
|
||||
cell,
|
||||
refetchOutput,
|
||||
}: {
|
||||
cell: ScenarioVariantCell;
|
||||
refetchOutput: () => void;
|
||||
}) => {
|
||||
export const RetryCountdown = ({ retryTime }: { retryTime: Date }) => {
|
||||
const [msToWait, setMsToWait] = useState(0);
|
||||
|
||||
useEffect(() => {
|
||||
if (!cell.retryTime) return;
|
||||
|
||||
const initialWaitTime = cell.retryTime.getTime() - Date.now();
|
||||
const initialWaitTime = retryTime.getTime() - Date.now();
|
||||
const msModuloOneSecond = initialWaitTime % 1000;
|
||||
let remainingTime = initialWaitTime - msModuloOneSecond;
|
||||
setMsToWait(remainingTime);
|
||||
@@ -36,18 +27,13 @@ export const ErrorHandler = ({
|
||||
clearInterval(interval);
|
||||
clearTimeout(timeout);
|
||||
};
|
||||
}, [cell.retryTime, cell.statusCode, setMsToWait, refetchOutput]);
|
||||
}, [retryTime]);
|
||||
|
||||
if (msToWait <= 0) return null;
|
||||
|
||||
return (
|
||||
<VStack w="full">
|
||||
<Text color="red.600" wordBreak="break-word">
|
||||
{cell.errorMessage}
|
||||
</Text>
|
||||
{msToWait > 0 && (
|
||||
<Text color="red.600" fontSize="sm">
|
||||
Retrying in {pluralize("second", Math.ceil(msToWait / 1000), true)}...
|
||||
</Text>
|
||||
)}
|
||||
</VStack>
|
||||
<Text color="red.600" fontSize="sm">
|
||||
Retrying in {pluralize("second", Math.ceil(msToWait / 1000), true)}...
|
||||
</Text>
|
||||
);
|
||||
};
|
||||
@@ -21,17 +21,14 @@ export default function VariantStats(props: { variant: PromptVariant }) {
|
||||
completionTokens: 0,
|
||||
scenarioCount: 0,
|
||||
outputCount: 0,
|
||||
awaitingRetrievals: false,
|
||||
awaitingEvals: false,
|
||||
},
|
||||
refetchInterval,
|
||||
},
|
||||
);
|
||||
|
||||
// Poll every two seconds while we are waiting for LLM retrievals to finish
|
||||
useEffect(
|
||||
() => setRefetchInterval(data.awaitingRetrievals ? 2000 : 0),
|
||||
[data.awaitingRetrievals],
|
||||
);
|
||||
useEffect(() => setRefetchInterval(data.awaitingEvals ? 5000 : 0), [data.awaitingEvals]);
|
||||
|
||||
const [passColor, neutralColor, failColor] = useToken("colors", [
|
||||
"green.500",
|
||||
@@ -69,7 +66,7 @@ export default function VariantStats(props: { variant: PromptVariant }) {
|
||||
);
|
||||
})}
|
||||
</HStack>
|
||||
{data.overallCost && !data.awaitingRetrievals && (
|
||||
{data.overallCost && (
|
||||
<CostTooltip
|
||||
promptTokens={data.promptTokens}
|
||||
completionTokens={data.completionTokens}
|
||||
|
||||
@@ -9,15 +9,32 @@ import { ScenariosHeader } from "./ScenariosHeader";
|
||||
import { borders } from "./styles";
|
||||
import { useScenarios } from "~/utils/hooks";
|
||||
import ScenarioPaginator from "./ScenarioPaginator";
|
||||
import { Fragment } from "react";
|
||||
import { Fragment, useEffect, useState } from "react";
|
||||
|
||||
export default function OutputsTable({ experimentId }: { experimentId: string | undefined }) {
|
||||
export default function OutputsTable({
|
||||
experimentId,
|
||||
func,
|
||||
}: {
|
||||
experimentId: string | undefined;
|
||||
func: () => void;
|
||||
}) {
|
||||
const variants = api.promptVariants.list.useQuery(
|
||||
{ experimentId: experimentId as string },
|
||||
{ enabled: !!experimentId },
|
||||
);
|
||||
|
||||
const scenarios = useScenarios();
|
||||
const [newFunc, setNewFunc] = useState<() => void | null>();
|
||||
|
||||
useEffect(() => {
|
||||
console.log('func', func)
|
||||
if (func) {
|
||||
setNewFunc(prev => {
|
||||
console.log('Setting newFunc from', prev, 'to', func);
|
||||
return func;
|
||||
});
|
||||
}
|
||||
}, [func]);
|
||||
|
||||
if (!variants.data || !scenarios.data) return null;
|
||||
|
||||
@@ -33,7 +50,7 @@ export default function OutputsTable({ experimentId }: { experimentId: string |
|
||||
<Grid
|
||||
pt={4}
|
||||
pb={24}
|
||||
pl={4}
|
||||
pl={8}
|
||||
display="grid"
|
||||
gridTemplateColumns={`250px repeat(${variants.data.length}, minmax(300px, 1fr)) auto`}
|
||||
sx={{
|
||||
@@ -46,13 +63,14 @@ export default function OutputsTable({ experimentId }: { experimentId: string |
|
||||
<GridItem rowSpan={variantHeaderRows}>
|
||||
<AddVariantButton />
|
||||
</GridItem>
|
||||
|
||||
{newFunc && newFunc.toString()}
|
||||
{variants.data.map((variant, i) => {
|
||||
const sharedProps: GridItemProps = {
|
||||
...borders,
|
||||
colStart: i + 2,
|
||||
borderLeftWidth: i === 0 ? 1 : 0,
|
||||
marginLeft: i === 0 ? "-1px" : 0,
|
||||
backgroundColor: "gray.100",
|
||||
};
|
||||
return (
|
||||
<Fragment key={variant.uiId}>
|
||||
|
||||
@@ -1,11 +1,4 @@
|
||||
import { type GridItemProps, type SystemStyleObject } from "@chakra-ui/react";
|
||||
|
||||
export const stickyHeaderStyle: SystemStyleObject = {
|
||||
position: "sticky",
|
||||
top: "0",
|
||||
backgroundColor: "#fff",
|
||||
zIndex: 10,
|
||||
};
|
||||
import { type GridItemProps } from "@chakra-ui/react";
|
||||
|
||||
export const borders: GridItemProps = {
|
||||
borderRightWidth: 1,
|
||||
|
||||
@@ -6,7 +6,6 @@ import { useExperimentAccess, useHandledAsyncCallback } from "~/utils/hooks";
|
||||
import { HStack, Icon, Text, GridItem, type GridItemProps } from "@chakra-ui/react"; // Changed here
|
||||
import { cellPadding, headerMinHeight } from "../constants";
|
||||
import AutoResizeTextArea from "../AutoResizeTextArea";
|
||||
import { stickyHeaderStyle } from "../OutputsTable/styles";
|
||||
import VariantHeaderMenuButton from "./VariantHeaderMenuButton";
|
||||
|
||||
export default function VariantHeader(
|
||||
@@ -53,7 +52,17 @@ export default function VariantHeader(
|
||||
|
||||
if (!canModify) {
|
||||
return (
|
||||
<GridItem padding={0} sx={stickyHeaderStyle} borderTopWidth={1} {...gridItemProps}>
|
||||
<GridItem
|
||||
padding={0}
|
||||
sx={{
|
||||
position: "sticky",
|
||||
top: "0",
|
||||
// Ensure that the menu always appears above the sticky header of other variants
|
||||
zIndex: menuOpen ? "dropdown" : 10,
|
||||
}}
|
||||
borderTopWidth={1}
|
||||
{...gridItemProps}
|
||||
>
|
||||
<Text fontSize={16} fontWeight="bold" px={cellPadding.x} py={cellPadding.y}>
|
||||
{variant.label}
|
||||
</Text>
|
||||
@@ -65,15 +74,16 @@ export default function VariantHeader(
|
||||
<GridItem
|
||||
padding={0}
|
||||
sx={{
|
||||
...stickyHeaderStyle,
|
||||
position: "sticky",
|
||||
top: "0",
|
||||
// Ensure that the menu always appears above the sticky header of other variants
|
||||
zIndex: menuOpen ? "dropdown" : stickyHeaderStyle.zIndex,
|
||||
zIndex: menuOpen ? "dropdown" : 10,
|
||||
}}
|
||||
borderTopWidth={1}
|
||||
{...gridItemProps}
|
||||
>
|
||||
<HStack
|
||||
spacing={4}
|
||||
spacing={2}
|
||||
alignItems="flex-start"
|
||||
minH={headerMinHeight}
|
||||
draggable={!isInputHovered}
|
||||
@@ -92,7 +102,8 @@ export default function VariantHeader(
|
||||
setIsDragTarget(false);
|
||||
}}
|
||||
onDrop={onReorder}
|
||||
backgroundColor={isDragTarget ? "gray.100" : "transparent"}
|
||||
backgroundColor={isDragTarget ? "gray.200" : "gray.100"}
|
||||
h="full"
|
||||
>
|
||||
<Icon
|
||||
as={RiDraggable}
|
||||
|
||||
@@ -24,7 +24,7 @@ export const HeaderButtons = () => {
|
||||
colorScheme={canModify ? undefined : "orange"}
|
||||
bgColor={canModify ? undefined : "orange.400"}
|
||||
minW={0}
|
||||
variant={canModify ? "ghost" : "solid"}
|
||||
variant={{ base: "solid", md: canModify ? "ghost" : "solid" }}
|
||||
>
|
||||
{isForking ? <Spinner boxSize={5} /> : <Icon as={TbGitFork} boxSize={5} />}
|
||||
<Text ml={2}>Fork</Text>
|
||||
|
||||
@@ -18,6 +18,7 @@ export const env = createEnv({
|
||||
GITHUB_CLIENT_SECRET: z.string().min(1),
|
||||
OPENAI_API_KEY: z.string().min(1),
|
||||
REPLICATE_API_TOKEN: z.string().default("placeholder"),
|
||||
ANTHROPIC_API_KEY: z.string().default("placeholder"),
|
||||
},
|
||||
|
||||
/**
|
||||
@@ -44,6 +45,7 @@ export const env = createEnv({
|
||||
GITHUB_CLIENT_ID: process.env.GITHUB_CLIENT_ID,
|
||||
GITHUB_CLIENT_SECRET: process.env.GITHUB_CLIENT_SECRET,
|
||||
REPLICATE_API_TOKEN: process.env.REPLICATE_API_TOKEN,
|
||||
ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
|
||||
},
|
||||
/**
|
||||
* Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation.
|
||||
|
||||
69
src/modelProviders/anthropic/codegen/codegen.ts
Normal file
69
src/modelProviders/anthropic/codegen/codegen.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
|
||||
import YAML from "yaml";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { openapiSchemaToJsonSchema } from "@openapi-contrib/openapi-schema-to-json-schema";
|
||||
import $RefParser from "@apidevtools/json-schema-ref-parser";
|
||||
import { type JSONObject } from "superjson/dist/types";
|
||||
import assert from "assert";
|
||||
import { type JSONSchema4Object } from "json-schema";
|
||||
import { isObject } from "lodash-es";
|
||||
|
||||
// @ts-expect-error for some reason missing from types
|
||||
import parserEstree from "prettier/plugins/estree";
|
||||
import parserBabel from "prettier/plugins/babel";
|
||||
import prettier from "prettier/standalone";
|
||||
|
||||
const OPENAPI_URL =
|
||||
"https://raw.githubusercontent.com/tryAGI/Anthropic/1c0871e861de60a4c3a843cb90e17d63e86c234a/docs/openapi.yaml";
|
||||
|
||||
// Fetch the openapi document
|
||||
const response = await fetch(OPENAPI_URL);
|
||||
const openApiYaml = await response.text();
|
||||
|
||||
// Parse the yaml document
|
||||
let schema = YAML.parse(openApiYaml) as JSONObject;
|
||||
schema = openapiSchemaToJsonSchema(schema);
|
||||
|
||||
const jsonSchema = await $RefParser.dereference(schema);
|
||||
|
||||
assert("components" in jsonSchema);
|
||||
const completionRequestSchema = jsonSchema.components.schemas
|
||||
.CreateCompletionRequest as JSONSchema4Object;
|
||||
|
||||
// We need to do a bit of surgery here since the Monaco editor doesn't like
|
||||
// the fact that the schema says `model` can be either a string or an enum,
|
||||
// and displays a warning in the editor. Let's stick with just an enum for
|
||||
// now and drop the string option.
|
||||
assert(
|
||||
"properties" in completionRequestSchema &&
|
||||
isObject(completionRequestSchema.properties) &&
|
||||
"model" in completionRequestSchema.properties &&
|
||||
isObject(completionRequestSchema.properties.model),
|
||||
);
|
||||
|
||||
const modelProperty = completionRequestSchema.properties.model;
|
||||
assert(
|
||||
"oneOf" in modelProperty &&
|
||||
Array.isArray(modelProperty.oneOf) &&
|
||||
modelProperty.oneOf.length === 2 &&
|
||||
isObject(modelProperty.oneOf[1]) &&
|
||||
"enum" in modelProperty.oneOf[1],
|
||||
"Expected model to have oneOf length of 2",
|
||||
);
|
||||
modelProperty.type = "string";
|
||||
modelProperty.enum = modelProperty.oneOf[1].enum;
|
||||
delete modelProperty["oneOf"];
|
||||
|
||||
// Get the directory of the current script
|
||||
const currentDirectory = path.dirname(import.meta.url).replace("file://", "");
|
||||
|
||||
// Write the JSON schema to a file in the current directory
|
||||
fs.writeFileSync(
|
||||
path.join(currentDirectory, "input.schema.json"),
|
||||
await prettier.format(JSON.stringify(completionRequestSchema, null, 2), {
|
||||
parser: "json",
|
||||
plugins: [parserBabel, parserEstree],
|
||||
}),
|
||||
);
|
||||
129
src/modelProviders/anthropic/codegen/input.schema.json
Normal file
129
src/modelProviders/anthropic/codegen/input.schema.json
Normal file
@@ -0,0 +1,129 @@
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"description": "The model that will complete your prompt.\nAs we improve Claude, we develop new versions of it that you can query.\nThis parameter controls which version of Claude answers your request.\nRight now we are offering two model families: Claude, and Claude Instant.\nYou can use them by setting model to \"claude-2\" or \"claude-instant-1\", respectively.\nSee models for additional details.\n",
|
||||
"x-oaiTypeLabel": "string",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"claude-2",
|
||||
"claude-2.0",
|
||||
"claude-instant-1",
|
||||
"claude-instant-1.1"
|
||||
]
|
||||
},
|
||||
"prompt": {
|
||||
"description": "The prompt that you want Claude to complete.\n\nFor proper response generation you will need to format your prompt as follows:\n\\n\\nHuman: ${userQuestion}\\n\\nAssistant:\nSee our comments on prompts for more context.\n",
|
||||
"default": "<|endoftext|>",
|
||||
"nullable": true,
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"example": "This is a test."
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"example": "This is a test."
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"items": {
|
||||
"type": "integer"
|
||||
},
|
||||
"example": "[1212, 318, 257, 1332, 13]"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"items": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"example": "[[1212, 318, 257, 1332, 13]]"
|
||||
}
|
||||
]
|
||||
},
|
||||
"max_tokens_to_sample": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 256,
|
||||
"example": 256,
|
||||
"nullable": true,
|
||||
"description": "The maximum number of tokens to generate before stopping.\n\nNote that our models may stop before reaching this maximum. This parameter only specifies the absolute maximum number of tokens to generate.\n"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"default": 1,
|
||||
"example": 1,
|
||||
"nullable": true,
|
||||
"description": "Amount of randomness injected into the response.\n\nDefaults to 1. Ranges from 0 to 1. Use temp closer to 0 for analytical / multiple choice, and closer to 1 for creative and generative tasks.\n"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"default": 1,
|
||||
"example": 1,
|
||||
"nullable": true,
|
||||
"description": "Use nucleus sampling.\n\nIn nucleus sampling, we compute the cumulative distribution over all the options \nfor each subsequent token in decreasing probability order and cut it off once \nit reaches a particular probability specified by top_p. You should either alter temperature or top_p, but not both.\n"
|
||||
},
|
||||
"top_k": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"default": 5,
|
||||
"example": 5,
|
||||
"nullable": true,
|
||||
"description": "Only sample from the top K options for each subsequent token.\n\nUsed to remove \"long tail\" low probability responses. Learn more technical details here.\n"
|
||||
},
|
||||
"stream": {
|
||||
"description": "Whether to incrementally stream the response using server-sent events.\nSee this guide to SSE events for details.type: boolean\n",
|
||||
"nullable": true,
|
||||
"default": false
|
||||
},
|
||||
"stop_sequences": {
|
||||
"description": "Sequences that will cause the model to stop generating completion text.\nOur models stop on \"\\n\\nHuman:\", and may include additional built-in stop sequences in the future. By providing the stop_sequences parameter, you may include additional strings that will cause the model to stop generating.\n",
|
||||
"default": null,
|
||||
"nullable": true,
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"default": "<|endoftext|>",
|
||||
"example": "\n",
|
||||
"nullable": true
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"maxItems": 4,
|
||||
"items": {
|
||||
"type": "string",
|
||||
"example": "[\"\\n\"]"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"user_id": {
|
||||
"type": "string",
|
||||
"example": "13803d75-b4b5-4c3e-b2a2-6f21399b021b",
|
||||
"description": "An external identifier for the user who is associated with the request.\n\nThis should be a uuid, hash value, or other opaque identifier. Anthropic may use this id to help detect abuse. \nDo not include any identifying information such as name, email address, or phone number.\n"
|
||||
}
|
||||
},
|
||||
"description": "An object describing metadata about the request.\n"
|
||||
}
|
||||
},
|
||||
"required": ["model", "prompt", "max_tokens_to_sample"]
|
||||
}
|
||||
40
src/modelProviders/anthropic/frontend.ts
Normal file
40
src/modelProviders/anthropic/frontend.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { type Completion } from "@anthropic-ai/sdk/resources";
|
||||
import { type SupportedModel } from ".";
|
||||
import { type FrontendModelProvider } from "../types";
|
||||
import { refinementActions } from "./refinementActions";
|
||||
|
||||
const frontendModelProvider: FrontendModelProvider<SupportedModel, Completion> = {
|
||||
name: "Replicate Llama2",
|
||||
|
||||
models: {
|
||||
"claude-2.0": {
|
||||
name: "Claude 2.0",
|
||||
contextWindow: 100000,
|
||||
promptTokenPrice: 11.02 / 1000000,
|
||||
completionTokenPrice: 32.68 / 1000000,
|
||||
speed: "medium",
|
||||
provider: "anthropic",
|
||||
learnMoreUrl: "https://www.anthropic.com/product",
|
||||
},
|
||||
"claude-instant-1.1": {
|
||||
name: "Claude Instant 1.1",
|
||||
contextWindow: 100000,
|
||||
promptTokenPrice: 1.63 / 1000000,
|
||||
completionTokenPrice: 5.51 / 1000000,
|
||||
speed: "fast",
|
||||
provider: "anthropic",
|
||||
learnMoreUrl: "https://www.anthropic.com/product",
|
||||
},
|
||||
},
|
||||
|
||||
refinementActions,
|
||||
|
||||
normalizeOutput: (output) => {
|
||||
return {
|
||||
type: "text",
|
||||
value: output.completion,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export default frontendModelProvider;
|
||||
86
src/modelProviders/anthropic/getCompletion.ts
Normal file
86
src/modelProviders/anthropic/getCompletion.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import { env } from "~/env.mjs";
|
||||
import { type CompletionResponse } from "../types";
|
||||
|
||||
import Anthropic, { APIError } from "@anthropic-ai/sdk";
|
||||
import { type Completion, type CompletionCreateParams } from "@anthropic-ai/sdk/resources";
|
||||
import { isObject, isString } from "lodash-es";
|
||||
|
||||
const anthropic = new Anthropic({
|
||||
apiKey: env.ANTHROPIC_API_KEY,
|
||||
});
|
||||
|
||||
export async function getCompletion(
|
||||
input: CompletionCreateParams,
|
||||
onStream: ((partialOutput: Completion) => void) | null,
|
||||
): Promise<CompletionResponse<Completion>> {
|
||||
const start = Date.now();
|
||||
let finalCompletion: Completion | null = null;
|
||||
|
||||
try {
|
||||
if (onStream) {
|
||||
const resp = await anthropic.completions.create(
|
||||
{ ...input, stream: true },
|
||||
{
|
||||
maxRetries: 0,
|
||||
},
|
||||
);
|
||||
|
||||
for await (const part of resp) {
|
||||
if (finalCompletion === null) {
|
||||
finalCompletion = part;
|
||||
} else {
|
||||
finalCompletion = { ...part, completion: finalCompletion.completion + part.completion };
|
||||
}
|
||||
onStream(finalCompletion);
|
||||
}
|
||||
if (!finalCompletion) {
|
||||
return {
|
||||
type: "error",
|
||||
message: "Streaming failed to return a completion",
|
||||
autoRetry: false,
|
||||
};
|
||||
}
|
||||
} else {
|
||||
const resp = await anthropic.completions.create(
|
||||
{ ...input, stream: false },
|
||||
{
|
||||
maxRetries: 0,
|
||||
},
|
||||
);
|
||||
finalCompletion = resp;
|
||||
}
|
||||
const timeToComplete = Date.now() - start;
|
||||
|
||||
return {
|
||||
type: "success",
|
||||
statusCode: 200,
|
||||
value: finalCompletion,
|
||||
timeToComplete,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
console.log("CAUGHT ERROR", error);
|
||||
if (error instanceof APIError) {
|
||||
const message =
|
||||
isObject(error.error) &&
|
||||
"error" in error.error &&
|
||||
isObject(error.error.error) &&
|
||||
"message" in error.error.error &&
|
||||
isString(error.error.error.message)
|
||||
? error.error.error.message
|
||||
: error.message;
|
||||
|
||||
return {
|
||||
type: "error",
|
||||
message,
|
||||
autoRetry: error.status === 429 || error.status === 503,
|
||||
statusCode: error.status,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
type: "error",
|
||||
message: (error as Error).message,
|
||||
autoRetry: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
34
src/modelProviders/anthropic/index.ts
Normal file
34
src/modelProviders/anthropic/index.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import { type JSONSchema4 } from "json-schema";
|
||||
import { type ModelProvider } from "../types";
|
||||
import inputSchema from "./codegen/input.schema.json";
|
||||
import { getCompletion } from "./getCompletion";
|
||||
import frontendModelProvider from "./frontend";
|
||||
import type { Completion, CompletionCreateParams } from "@anthropic-ai/sdk/resources";
|
||||
|
||||
const supportedModels = ["claude-2.0", "claude-instant-1.1"] as const;
|
||||
|
||||
export type SupportedModel = (typeof supportedModels)[number];
|
||||
|
||||
export type AnthropicProvider = ModelProvider<SupportedModel, CompletionCreateParams, Completion>;
|
||||
|
||||
const modelProvider: AnthropicProvider = {
|
||||
getModel: (input) => {
|
||||
if (supportedModels.includes(input.model as SupportedModel))
|
||||
return input.model as SupportedModel;
|
||||
|
||||
const modelMaps: Record<string, SupportedModel> = {
|
||||
"claude-2": "claude-2.0",
|
||||
"claude-instant-1": "claude-instant-1.1",
|
||||
};
|
||||
|
||||
if (input.model in modelMaps) return modelMaps[input.model] as SupportedModel;
|
||||
|
||||
return null;
|
||||
},
|
||||
inputSchema: inputSchema as JSONSchema4,
|
||||
canStream: true,
|
||||
getCompletion,
|
||||
...frontendModelProvider,
|
||||
};
|
||||
|
||||
export default modelProvider;
|
||||
3
src/modelProviders/anthropic/refinementActions.ts
Normal file
3
src/modelProviders/anthropic/refinementActions.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import { type RefinementAction } from "../types";
|
||||
|
||||
export const refinementActions: Record<string, RefinementAction> = {};
|
||||
@@ -1,15 +1,15 @@
|
||||
import openaiChatCompletionFrontend from "./openai-ChatCompletion/frontend";
|
||||
import replicateLlama2Frontend from "./replicate-llama2/frontend";
|
||||
import anthropicFrontend from "./anthropic/frontend";
|
||||
import { type SupportedProvider, type FrontendModelProvider } from "./types";
|
||||
|
||||
// TODO: make sure we get a typescript error if you forget to add a provider here
|
||||
|
||||
// Keep attributes here that need to be accessible from the frontend. We can't
|
||||
// just include them in the default `modelProviders` object because it has some
|
||||
// transient dependencies that can only be imported on the server.
|
||||
const frontendModelProviders: Record<SupportedProvider, FrontendModelProvider<any, any>> = {
|
||||
"openai/ChatCompletion": openaiChatCompletionFrontend,
|
||||
"replicate/llama2": replicateLlama2Frontend,
|
||||
anthropic: anthropicFrontend,
|
||||
};
|
||||
|
||||
export default frontendModelProviders;
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import openaiChatCompletion from "./openai-ChatCompletion";
|
||||
import replicateLlama2 from "./replicate-llama2";
|
||||
import anthropic from "./anthropic";
|
||||
import { type SupportedProvider, type ModelProvider } from "./types";
|
||||
|
||||
const modelProviders: Record<SupportedProvider, ModelProvider<any, any, any>> = {
|
||||
"openai/ChatCompletion": openaiChatCompletion,
|
||||
"replicate/llama2": replicateLlama2,
|
||||
anthropic,
|
||||
};
|
||||
|
||||
export default modelProviders;
|
||||
|
||||
@@ -120,7 +120,6 @@ export async function getCompletion(
|
||||
cost,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
console.error("ERROR IS", error);
|
||||
if (error instanceof APIError) {
|
||||
return {
|
||||
type: "error",
|
||||
|
||||
@@ -6,6 +6,7 @@ import { z } from "zod";
|
||||
export const ZodSupportedProvider = z.union([
|
||||
z.literal("openai/ChatCompletion"),
|
||||
z.literal("replicate/llama2"),
|
||||
z.literal("anthropic"),
|
||||
]);
|
||||
|
||||
export type SupportedProvider = z.infer<typeof ZodSupportedProvider>;
|
||||
|
||||
@@ -61,6 +61,14 @@ export default function Experiment() {
|
||||
|
||||
const canModify = experiment.data?.access.canModify ?? false;
|
||||
|
||||
const y = "5"
|
||||
const z = {abc: "123"}
|
||||
|
||||
const func = () => {
|
||||
const u = 12;
|
||||
const m = `hello ${y} ${z.abc} ${u} world`;
|
||||
}
|
||||
|
||||
return (
|
||||
<AppShell title={experiment.data?.label}>
|
||||
<VStack h="full">
|
||||
@@ -106,7 +114,7 @@ export default function Experiment() {
|
||||
</Flex>
|
||||
<ExperimentSettingsDrawer />
|
||||
<Box w="100%" overflowX="auto" flex={1}>
|
||||
<OutputsTable experimentId={router.query.id as string | undefined} />
|
||||
<OutputsTable experimentId={router.query.id as string | undefined} func={func} />
|
||||
</Box>
|
||||
</VStack>
|
||||
</AppShell>
|
||||
|
||||
@@ -2,7 +2,7 @@ import { EvalType } from "@prisma/client";
|
||||
import { z } from "zod";
|
||||
import { createTRPCRouter, protectedProcedure, publicProcedure } from "~/server/api/trpc";
|
||||
import { prisma } from "~/server/db";
|
||||
import { runAllEvals } from "~/server/utils/evaluations";
|
||||
import { queueRunNewEval } from "~/server/tasks/runNewEval.task";
|
||||
import { requireCanModifyExperiment, requireCanViewExperiment } from "~/utils/accessControl";
|
||||
|
||||
export const evaluationsRouter = createTRPCRouter({
|
||||
@@ -40,9 +40,7 @@ export const evaluationsRouter = createTRPCRouter({
|
||||
},
|
||||
});
|
||||
|
||||
// TODO: this may be a bad UX for slow evals (eg. GPT-4 evals) Maybe need
|
||||
// to kick off a background job or something instead
|
||||
await runAllEvals(input.experimentId);
|
||||
await queueRunNewEval(input.experimentId);
|
||||
}),
|
||||
|
||||
update: protectedProcedure
|
||||
@@ -78,7 +76,7 @@ export const evaluationsRouter = createTRPCRouter({
|
||||
});
|
||||
// Re-run all evals. Other eval results will already be cached, so this
|
||||
// should only re-run the updated one.
|
||||
await runAllEvals(evaluation.experimentId);
|
||||
await queueRunNewEval(experimentId);
|
||||
}),
|
||||
|
||||
delete: protectedProcedure
|
||||
|
||||
@@ -118,7 +118,7 @@ export const experimentsRouter = createTRPCRouter({
|
||||
},
|
||||
},
|
||||
include: {
|
||||
modelOutput: {
|
||||
modelResponses: {
|
||||
include: {
|
||||
outputEvaluations: true,
|
||||
},
|
||||
@@ -177,11 +177,11 @@ export const experimentsRouter = createTRPCRouter({
|
||||
}
|
||||
|
||||
const cellsToCreate: Prisma.ScenarioVariantCellCreateManyInput[] = [];
|
||||
const modelOutputsToCreate: Prisma.ModelOutputCreateManyInput[] = [];
|
||||
const modelResponsesToCreate: Prisma.ModelResponseCreateManyInput[] = [];
|
||||
const outputEvaluationsToCreate: Prisma.OutputEvaluationCreateManyInput[] = [];
|
||||
for (const cell of existingCells) {
|
||||
const newCellId = uuidv4();
|
||||
const { modelOutput, ...cellData } = cell;
|
||||
const { modelResponses, ...cellData } = cell;
|
||||
cellsToCreate.push({
|
||||
...cellData,
|
||||
id: newCellId,
|
||||
@@ -189,20 +189,20 @@ export const experimentsRouter = createTRPCRouter({
|
||||
testScenarioId: existingToNewScenarioIds.get(cell.testScenarioId) ?? "",
|
||||
prompt: (cell.prompt as Prisma.InputJsonValue) ?? undefined,
|
||||
});
|
||||
if (modelOutput) {
|
||||
const newModelOutputId = uuidv4();
|
||||
const { outputEvaluations, ...modelOutputData } = modelOutput;
|
||||
modelOutputsToCreate.push({
|
||||
...modelOutputData,
|
||||
id: newModelOutputId,
|
||||
for (const modelResponse of modelResponses) {
|
||||
const newModelResponseId = uuidv4();
|
||||
const { outputEvaluations, ...modelResponseData } = modelResponse;
|
||||
modelResponsesToCreate.push({
|
||||
...modelResponseData,
|
||||
id: newModelResponseId,
|
||||
scenarioVariantCellId: newCellId,
|
||||
output: (modelOutput.output as Prisma.InputJsonValue) ?? undefined,
|
||||
output: (modelResponse.output as Prisma.InputJsonValue) ?? undefined,
|
||||
});
|
||||
for (const evaluation of outputEvaluations) {
|
||||
outputEvaluationsToCreate.push({
|
||||
...evaluation,
|
||||
id: uuidv4(),
|
||||
modelOutputId: newModelOutputId,
|
||||
modelResponseId: newModelResponseId,
|
||||
evaluationId: existingToNewEvaluationIds.get(evaluation.evaluationId) ?? "",
|
||||
});
|
||||
}
|
||||
@@ -245,8 +245,8 @@ export const experimentsRouter = createTRPCRouter({
|
||||
prisma.scenarioVariantCell.createMany({
|
||||
data: cellsToCreate,
|
||||
}),
|
||||
prisma.modelOutput.createMany({
|
||||
data: modelOutputsToCreate,
|
||||
prisma.modelResponse.createMany({
|
||||
data: modelResponsesToCreate,
|
||||
}),
|
||||
prisma.evaluation.createMany({
|
||||
data: evaluationsToCreate,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { z } from "zod";
|
||||
import { createTRPCRouter, protectedProcedure, publicProcedure } from "~/server/api/trpc";
|
||||
import { prisma } from "~/server/db";
|
||||
import { Prisma } from "@prisma/client";
|
||||
import { generateNewCell } from "~/server/utils/generateNewCell";
|
||||
import userError from "~/server/utils/error";
|
||||
import { recordExperimentUpdated } from "~/server/utils/recordExperimentUpdated";
|
||||
@@ -51,7 +52,9 @@ export const promptVariantsRouter = createTRPCRouter({
|
||||
id: true,
|
||||
},
|
||||
where: {
|
||||
modelOutput: {
|
||||
modelResponse: {
|
||||
outdated: false,
|
||||
output: { not: Prisma.AnyNull },
|
||||
scenarioVariantCell: {
|
||||
promptVariant: {
|
||||
id: input.variantId,
|
||||
@@ -93,14 +96,23 @@ export const promptVariantsRouter = createTRPCRouter({
|
||||
where: {
|
||||
promptVariantId: input.variantId,
|
||||
testScenario: { visible: true },
|
||||
modelOutput: {
|
||||
is: {},
|
||||
modelResponses: {
|
||||
some: {
|
||||
outdated: false,
|
||||
output: {
|
||||
not: Prisma.AnyNull,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const overallTokens = await prisma.modelOutput.aggregate({
|
||||
const overallTokens = await prisma.modelResponse.aggregate({
|
||||
where: {
|
||||
outdated: false,
|
||||
output: {
|
||||
not: Prisma.AnyNull,
|
||||
},
|
||||
scenarioVariantCell: {
|
||||
promptVariantId: input.variantId,
|
||||
testScenario: {
|
||||
@@ -118,16 +130,9 @@ export const promptVariantsRouter = createTRPCRouter({
|
||||
const promptTokens = overallTokens._sum?.promptTokens ?? 0;
|
||||
const completionTokens = overallTokens._sum?.completionTokens ?? 0;
|
||||
|
||||
const awaitingRetrievals = !!(await prisma.scenarioVariantCell.findFirst({
|
||||
where: {
|
||||
promptVariantId: input.variantId,
|
||||
testScenario: { visible: true },
|
||||
// Check if is PENDING or IN_PROGRESS
|
||||
retrievalStatus: {
|
||||
in: ["PENDING", "IN_PROGRESS"],
|
||||
},
|
||||
},
|
||||
}));
|
||||
const awaitingEvals = !!evalResults.find(
|
||||
(result) => result.totalCount < scenarioCount * evals.length,
|
||||
);
|
||||
|
||||
return {
|
||||
evalResults,
|
||||
@@ -136,7 +141,7 @@ export const promptVariantsRouter = createTRPCRouter({
|
||||
overallCost: overallTokens._sum?.cost ?? 0,
|
||||
scenarioCount,
|
||||
outputCount,
|
||||
awaitingRetrievals,
|
||||
awaitingEvals,
|
||||
};
|
||||
}),
|
||||
|
||||
|
||||
@@ -19,27 +19,45 @@ export const scenarioVariantCellsRouter = createTRPCRouter({
|
||||
});
|
||||
await requireCanViewExperiment(experimentId, ctx);
|
||||
|
||||
return await prisma.scenarioVariantCell.findUnique({
|
||||
where: {
|
||||
promptVariantId_testScenarioId: {
|
||||
promptVariantId: input.variantId,
|
||||
testScenarioId: input.scenarioId,
|
||||
const [cell, numTotalEvals] = await prisma.$transaction([
|
||||
prisma.scenarioVariantCell.findUnique({
|
||||
where: {
|
||||
promptVariantId_testScenarioId: {
|
||||
promptVariantId: input.variantId,
|
||||
testScenarioId: input.scenarioId,
|
||||
},
|
||||
},
|
||||
},
|
||||
include: {
|
||||
modelOutput: {
|
||||
include: {
|
||||
outputEvaluations: {
|
||||
include: {
|
||||
evaluation: {
|
||||
select: { label: true },
|
||||
include: {
|
||||
modelResponses: {
|
||||
where: {
|
||||
outdated: false,
|
||||
},
|
||||
include: {
|
||||
outputEvaluations: {
|
||||
include: {
|
||||
evaluation: {
|
||||
select: { label: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}),
|
||||
prisma.evaluation.count({
|
||||
where: { experimentId },
|
||||
}),
|
||||
]);
|
||||
|
||||
if (!cell) return null;
|
||||
|
||||
const lastResponse = cell.modelResponses?.[cell.modelResponses?.length - 1];
|
||||
const evalsComplete = lastResponse?.outputEvaluations?.length === numTotalEvals;
|
||||
|
||||
return {
|
||||
...cell,
|
||||
evalsComplete,
|
||||
};
|
||||
}),
|
||||
forceRefetch: protectedProcedure
|
||||
.input(
|
||||
@@ -62,7 +80,6 @@ export const scenarioVariantCellsRouter = createTRPCRouter({
|
||||
testScenarioId: input.scenarioId,
|
||||
},
|
||||
},
|
||||
include: { modelOutput: true },
|
||||
});
|
||||
|
||||
if (!cell) {
|
||||
@@ -70,12 +87,12 @@ export const scenarioVariantCellsRouter = createTRPCRouter({
|
||||
return;
|
||||
}
|
||||
|
||||
if (cell.modelOutput) {
|
||||
// TODO: Maybe keep these around to show previous generations?
|
||||
await prisma.modelOutput.delete({
|
||||
where: { id: cell.modelOutput.id },
|
||||
});
|
||||
}
|
||||
await prisma.modelResponse.updateMany({
|
||||
where: { scenarioVariantCellId: cell.id },
|
||||
data: {
|
||||
outdated: true,
|
||||
},
|
||||
});
|
||||
|
||||
await queueQueryModel(cell.id, true);
|
||||
}),
|
||||
|
||||
@@ -7,9 +7,9 @@ function defineTask<TPayload>(
|
||||
taskIdentifier: string,
|
||||
taskHandler: (payload: TPayload, helpers: Helpers) => Promise<void>,
|
||||
) {
|
||||
const enqueue = async (payload: TPayload) => {
|
||||
const enqueue = async (payload: TPayload, runAt?: Date) => {
|
||||
console.log("Enqueuing task", taskIdentifier, payload);
|
||||
await quickAddJob({ connectionString: env.DATABASE_URL }, taskIdentifier, payload);
|
||||
await quickAddJob({ connectionString: env.DATABASE_URL }, taskIdentifier, payload, { runAt });
|
||||
};
|
||||
|
||||
const handler = (payload: TPayload, helpers: Helpers) => {
|
||||
|
||||
@@ -6,15 +6,15 @@ import { wsConnection } from "~/utils/wsConnection";
|
||||
import { runEvalsForOutput } from "../utils/evaluations";
|
||||
import hashPrompt from "../utils/hashPrompt";
|
||||
import parseConstructFn from "../utils/parseConstructFn";
|
||||
import { sleep } from "../utils/sleep";
|
||||
import defineTask from "./defineTask";
|
||||
|
||||
export type QueryModelJob = {
|
||||
cellId: string;
|
||||
stream: boolean;
|
||||
numPreviousTries: number;
|
||||
};
|
||||
|
||||
const MAX_AUTO_RETRIES = 10;
|
||||
const MAX_AUTO_RETRIES = 50;
|
||||
const MIN_DELAY = 500; // milliseconds
|
||||
const MAX_DELAY = 15000; // milliseconds
|
||||
|
||||
@@ -26,20 +26,12 @@ function calculateDelay(numPreviousTries: number): number {
|
||||
|
||||
export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) => {
|
||||
console.log("RUNNING TASK", task);
|
||||
const { cellId, stream } = task;
|
||||
const { cellId, stream, numPreviousTries } = task;
|
||||
const cell = await prisma.scenarioVariantCell.findUnique({
|
||||
where: { id: cellId },
|
||||
include: { modelOutput: true },
|
||||
include: { modelResponses: true },
|
||||
});
|
||||
if (!cell) {
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
statusCode: 404,
|
||||
errorMessage: "Cell not found",
|
||||
retrievalStatus: "ERROR",
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -51,6 +43,7 @@ export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) =
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
retrievalStatus: "IN_PROGRESS",
|
||||
jobStartedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
@@ -61,7 +54,6 @@ export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) =
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
statusCode: 404,
|
||||
errorMessage: "Prompt Variant not found",
|
||||
retrievalStatus: "ERROR",
|
||||
},
|
||||
@@ -76,7 +68,6 @@ export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) =
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
statusCode: 404,
|
||||
errorMessage: "Scenario not found",
|
||||
retrievalStatus: "ERROR",
|
||||
},
|
||||
@@ -90,7 +81,6 @@ export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) =
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
statusCode: 400,
|
||||
errorMessage: prompt.error,
|
||||
retrievalStatus: "ERROR",
|
||||
},
|
||||
@@ -106,58 +96,79 @@ export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) =
|
||||
}
|
||||
: null;
|
||||
|
||||
for (let i = 0; true; i++) {
|
||||
const response = await provider.getCompletion(prompt.modelInput, onStream);
|
||||
if (response.type === "success") {
|
||||
const inputHash = hashPrompt(prompt);
|
||||
const inputHash = hashPrompt(prompt);
|
||||
|
||||
const modelOutput = await prisma.modelOutput.create({
|
||||
data: {
|
||||
scenarioVariantCellId: cellId,
|
||||
inputHash,
|
||||
output: response.value as Prisma.InputJsonObject,
|
||||
timeToComplete: response.timeToComplete,
|
||||
promptTokens: response.promptTokens,
|
||||
completionTokens: response.completionTokens,
|
||||
cost: response.cost,
|
||||
let modelResponse = await prisma.modelResponse.create({
|
||||
data: {
|
||||
inputHash,
|
||||
scenarioVariantCellId: cellId,
|
||||
requestedAt: new Date(),
|
||||
},
|
||||
});
|
||||
const response = await provider.getCompletion(prompt.modelInput, onStream);
|
||||
if (response.type === "success") {
|
||||
modelResponse = await prisma.modelResponse.update({
|
||||
where: { id: modelResponse.id },
|
||||
data: {
|
||||
output: response.value as Prisma.InputJsonObject,
|
||||
statusCode: response.statusCode,
|
||||
receivedAt: new Date(),
|
||||
promptTokens: response.promptTokens,
|
||||
completionTokens: response.completionTokens,
|
||||
cost: response.cost,
|
||||
},
|
||||
});
|
||||
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
retrievalStatus: "COMPLETE",
|
||||
},
|
||||
});
|
||||
|
||||
await runEvalsForOutput(variant.experimentId, scenario, modelResponse, prompt.modelProvider);
|
||||
} else {
|
||||
const shouldRetry = response.autoRetry && numPreviousTries < MAX_AUTO_RETRIES;
|
||||
const delay = calculateDelay(numPreviousTries);
|
||||
const retryTime = new Date(Date.now() + delay);
|
||||
|
||||
await prisma.modelResponse.update({
|
||||
where: { id: modelResponse.id },
|
||||
data: {
|
||||
statusCode: response.statusCode,
|
||||
errorMessage: response.message,
|
||||
receivedAt: new Date(),
|
||||
retryTime: shouldRetry ? retryTime : null,
|
||||
},
|
||||
});
|
||||
|
||||
if (shouldRetry) {
|
||||
await queryModel.enqueue(
|
||||
{
|
||||
cellId,
|
||||
stream,
|
||||
numPreviousTries: numPreviousTries + 1,
|
||||
},
|
||||
});
|
||||
|
||||
retryTime,
|
||||
);
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
statusCode: response.statusCode,
|
||||
retrievalStatus: "COMPLETE",
|
||||
retrievalStatus: "PENDING",
|
||||
},
|
||||
});
|
||||
|
||||
await runEvalsForOutput(variant.experimentId, scenario, modelOutput);
|
||||
break;
|
||||
} else {
|
||||
const shouldRetry = response.autoRetry && i < MAX_AUTO_RETRIES;
|
||||
const delay = calculateDelay(i);
|
||||
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cellId },
|
||||
data: {
|
||||
errorMessage: response.message,
|
||||
statusCode: response.statusCode,
|
||||
retryTime: shouldRetry ? new Date(Date.now() + delay) : null,
|
||||
retrievalStatus: "ERROR",
|
||||
},
|
||||
});
|
||||
|
||||
if (shouldRetry) {
|
||||
await sleep(delay);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
export const queueQueryModel = async (cellId: string, stream: boolean) => {
|
||||
console.log("queueQueryModel", cellId, stream);
|
||||
await Promise.all([
|
||||
prisma.scenarioVariantCell.update({
|
||||
where: {
|
||||
@@ -166,10 +177,9 @@ export const queueQueryModel = async (cellId: string, stream: boolean) => {
|
||||
data: {
|
||||
retrievalStatus: "PENDING",
|
||||
errorMessage: null,
|
||||
jobQueuedAt: new Date(),
|
||||
},
|
||||
}),
|
||||
|
||||
await queryModel.enqueue({ cellId, stream }),
|
||||
console.log("queued"),
|
||||
queryModel.enqueue({ cellId, stream, numPreviousTries: 0 }),
|
||||
]);
|
||||
};
|
||||
|
||||
17
src/server/tasks/runNewEval.task.ts
Normal file
17
src/server/tasks/runNewEval.task.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import { runAllEvals } from "../utils/evaluations";
|
||||
import defineTask from "./defineTask";
|
||||
|
||||
export type RunNewEvalJob = {
|
||||
experimentId: string;
|
||||
};
|
||||
|
||||
// When a new eval is created, we want to run it on all existing outputs, but return the new eval first
|
||||
export const runNewEval = defineTask<RunNewEvalJob>("runNewEval", async (task) => {
|
||||
console.log("RUNNING TASK", task);
|
||||
const { experimentId } = task;
|
||||
await runAllEvals(experimentId);
|
||||
});
|
||||
|
||||
export const queueRunNewEval = async (experimentId: string) => {
|
||||
await runNewEval.enqueue({ experimentId });
|
||||
};
|
||||
@@ -3,10 +3,11 @@ import "dotenv/config";
|
||||
|
||||
import { env } from "~/env.mjs";
|
||||
import { queryModel } from "./queryModel.task";
|
||||
import { runNewEval } from "./runNewEval.task";
|
||||
|
||||
console.log("Starting worker");
|
||||
|
||||
const registeredTasks = [queryModel];
|
||||
const registeredTasks = [queryModel, runNewEval];
|
||||
|
||||
const taskList = registeredTasks.reduce((acc, task) => {
|
||||
acc[task.task.identifier] = task.task.handler;
|
||||
@@ -16,7 +17,7 @@ const taskList = registeredTasks.reduce((acc, task) => {
|
||||
// Run a worker to execute jobs:
|
||||
const runner = await run({
|
||||
connectionString: env.DATABASE_URL,
|
||||
concurrency: 20,
|
||||
concurrency: 50,
|
||||
// Install signal handlers for graceful shutdown on SIGINT, SIGTERM, etc
|
||||
noHandleSignals: false,
|
||||
pollInterval: 1000,
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
import { type ModelOutput, type Evaluation } from "@prisma/client";
|
||||
import { type ModelResponse, type Evaluation, Prisma } from "@prisma/client";
|
||||
import { prisma } from "../db";
|
||||
import { runOneEval } from "./runOneEval";
|
||||
import { type Scenario } from "~/components/OutputsTable/types";
|
||||
import { type SupportedProvider } from "~/modelProviders/types";
|
||||
|
||||
const saveResult = async (evaluation: Evaluation, scenario: Scenario, modelOutput: ModelOutput) => {
|
||||
const result = await runOneEval(evaluation, scenario, modelOutput);
|
||||
const runAndSaveEval = async (
|
||||
evaluation: Evaluation,
|
||||
scenario: Scenario,
|
||||
modelResponse: ModelResponse,
|
||||
provider: SupportedProvider,
|
||||
) => {
|
||||
const result = await runOneEval(evaluation, scenario, modelResponse, provider);
|
||||
return await prisma.outputEvaluation.upsert({
|
||||
where: {
|
||||
modelOutputId_evaluationId: {
|
||||
modelOutputId: modelOutput.id,
|
||||
modelResponseId_evaluationId: {
|
||||
modelResponseId: modelResponse.id,
|
||||
evaluationId: evaluation.id,
|
||||
},
|
||||
},
|
||||
create: {
|
||||
modelOutputId: modelOutput.id,
|
||||
modelResponseId: modelResponse.id,
|
||||
evaluationId: evaluation.id,
|
||||
...result,
|
||||
},
|
||||
@@ -26,20 +32,28 @@ const saveResult = async (evaluation: Evaluation, scenario: Scenario, modelOutpu
|
||||
export const runEvalsForOutput = async (
|
||||
experimentId: string,
|
||||
scenario: Scenario,
|
||||
modelOutput: ModelOutput,
|
||||
modelResponse: ModelResponse,
|
||||
provider: SupportedProvider,
|
||||
) => {
|
||||
const evaluations = await prisma.evaluation.findMany({
|
||||
where: { experimentId },
|
||||
});
|
||||
|
||||
await Promise.all(
|
||||
evaluations.map(async (evaluation) => await saveResult(evaluation, scenario, modelOutput)),
|
||||
evaluations.map(
|
||||
async (evaluation) => await runAndSaveEval(evaluation, scenario, modelResponse, provider),
|
||||
),
|
||||
);
|
||||
};
|
||||
|
||||
// Will not run eval-output pairs that already exist in the database
|
||||
export const runAllEvals = async (experimentId: string) => {
|
||||
const outputs = await prisma.modelOutput.findMany({
|
||||
const outputs = await prisma.modelResponse.findMany({
|
||||
where: {
|
||||
outdated: false,
|
||||
output: {
|
||||
not: Prisma.AnyNull,
|
||||
},
|
||||
scenarioVariantCell: {
|
||||
promptVariant: {
|
||||
experimentId,
|
||||
@@ -54,6 +68,7 @@ export const runAllEvals = async (experimentId: string) => {
|
||||
scenarioVariantCell: {
|
||||
include: {
|
||||
testScenario: true,
|
||||
promptVariant: true,
|
||||
},
|
||||
},
|
||||
outputEvaluations: true,
|
||||
@@ -65,13 +80,18 @@ export const runAllEvals = async (experimentId: string) => {
|
||||
|
||||
await Promise.all(
|
||||
outputs.map(async (output) => {
|
||||
const unrunEvals = evals.filter(
|
||||
const evalsToBeRun = evals.filter(
|
||||
(evaluation) => !output.outputEvaluations.find((e) => e.evaluationId === evaluation.id),
|
||||
);
|
||||
|
||||
await Promise.all(
|
||||
unrunEvals.map(async (evaluation) => {
|
||||
await saveResult(evaluation, output.scenarioVariantCell.testScenario, output);
|
||||
evalsToBeRun.map(async (evaluation) => {
|
||||
await runAndSaveEval(
|
||||
evaluation,
|
||||
output.scenarioVariantCell.testScenario,
|
||||
output,
|
||||
output.scenarioVariantCell.promptVariant.modelProvider as SupportedProvider,
|
||||
);
|
||||
}),
|
||||
);
|
||||
}),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { type Prisma } from "@prisma/client";
|
||||
import { Prisma } from "@prisma/client";
|
||||
import { prisma } from "../db";
|
||||
import parseConstructFn from "./parseConstructFn";
|
||||
import { type JsonObject } from "type-fest";
|
||||
@@ -35,7 +35,7 @@ export const generateNewCell = async (
|
||||
},
|
||||
},
|
||||
include: {
|
||||
modelOutput: true,
|
||||
modelResponses: true,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -51,8 +51,6 @@ export const generateNewCell = async (
|
||||
data: {
|
||||
promptVariantId: variantId,
|
||||
testScenarioId: scenarioId,
|
||||
statusCode: 400,
|
||||
errorMessage: parsedConstructFn.error,
|
||||
retrievalStatus: "ERROR",
|
||||
},
|
||||
});
|
||||
@@ -69,36 +67,55 @@ export const generateNewCell = async (
|
||||
retrievalStatus: "PENDING",
|
||||
},
|
||||
include: {
|
||||
modelOutput: true,
|
||||
modelResponses: true,
|
||||
},
|
||||
});
|
||||
|
||||
const matchingModelOutput = await prisma.modelOutput.findFirst({
|
||||
where: { inputHash },
|
||||
const matchingModelResponse = await prisma.modelResponse.findFirst({
|
||||
where: {
|
||||
inputHash,
|
||||
output: {
|
||||
not: Prisma.AnyNull,
|
||||
},
|
||||
},
|
||||
orderBy: {
|
||||
receivedAt: "desc",
|
||||
},
|
||||
include: {
|
||||
scenarioVariantCell: true,
|
||||
},
|
||||
take: 1,
|
||||
});
|
||||
|
||||
if (matchingModelOutput) {
|
||||
const newModelOutput = await prisma.modelOutput.create({
|
||||
if (matchingModelResponse) {
|
||||
const newModelResponse = await prisma.modelResponse.create({
|
||||
data: {
|
||||
...omit(matchingModelOutput, ["id"]),
|
||||
...omit(matchingModelResponse, ["id", "scenarioVariantCell"]),
|
||||
scenarioVariantCellId: cell.id,
|
||||
output: matchingModelOutput.output as Prisma.InputJsonValue,
|
||||
output: matchingModelResponse.output as Prisma.InputJsonValue,
|
||||
},
|
||||
});
|
||||
|
||||
await prisma.scenarioVariantCell.update({
|
||||
where: { id: cell.id },
|
||||
data: { retrievalStatus: "COMPLETE" },
|
||||
data: {
|
||||
retrievalStatus: "COMPLETE",
|
||||
jobStartedAt: matchingModelResponse.scenarioVariantCell.jobStartedAt,
|
||||
jobQueuedAt: matchingModelResponse.scenarioVariantCell.jobQueuedAt,
|
||||
},
|
||||
});
|
||||
|
||||
// Copy over all eval results as well
|
||||
await Promise.all(
|
||||
(
|
||||
await prisma.outputEvaluation.findMany({ where: { modelOutputId: matchingModelOutput.id } })
|
||||
await prisma.outputEvaluation.findMany({
|
||||
where: { modelResponseId: matchingModelResponse.id },
|
||||
})
|
||||
).map(async (evaluation) => {
|
||||
await prisma.outputEvaluation.create({
|
||||
data: {
|
||||
...omit(evaluation, ["id"]),
|
||||
modelOutputId: newModelOutput.id,
|
||||
modelResponseId: newModelResponse.id,
|
||||
},
|
||||
});
|
||||
}),
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import { type Evaluation, type ModelOutput, type TestScenario } from "@prisma/client";
|
||||
import { type ChatCompletion } from "openai/resources/chat";
|
||||
import { type Evaluation, type ModelResponse, type TestScenario } from "@prisma/client";
|
||||
import { type VariableMap, fillTemplate, escapeRegExp, escapeQuotes } from "./fillTemplate";
|
||||
import { openai } from "./openai";
|
||||
import dedent from "dedent";
|
||||
import modelProviders from "~/modelProviders/modelProviders";
|
||||
import { type SupportedProvider } from "~/modelProviders/types";
|
||||
|
||||
export const runGpt4Eval = async (
|
||||
evaluation: Evaluation,
|
||||
scenario: TestScenario,
|
||||
message: ChatCompletion.Choice.Message,
|
||||
stringifiedOutput: string,
|
||||
): Promise<{ result: number; details: string }> => {
|
||||
const output = await openai.chat.completions.create({
|
||||
model: "gpt-4-0613",
|
||||
@@ -26,11 +27,7 @@ export const runGpt4Eval = async (
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `The full output of the simpler message:\n---\n${JSON.stringify(
|
||||
message.content ?? message.function_call,
|
||||
null,
|
||||
2,
|
||||
)}`,
|
||||
content: `The full output of the simpler message:\n---\n${stringifiedOutput}`,
|
||||
},
|
||||
],
|
||||
function_call: {
|
||||
@@ -70,15 +67,16 @@ export const runGpt4Eval = async (
|
||||
export const runOneEval = async (
|
||||
evaluation: Evaluation,
|
||||
scenario: TestScenario,
|
||||
modelOutput: ModelOutput,
|
||||
modelResponse: ModelResponse,
|
||||
provider: SupportedProvider,
|
||||
): Promise<{ result: number; details?: string }> => {
|
||||
const output = modelOutput.output as unknown as ChatCompletion;
|
||||
|
||||
const message = output?.choices?.[0]?.message;
|
||||
const modelProvider = modelProviders[provider];
|
||||
const message = modelProvider.normalizeOutput(modelResponse.output);
|
||||
|
||||
if (!message) return { result: 0 };
|
||||
|
||||
const stringifiedMessage = message.content ?? JSON.stringify(message.function_call);
|
||||
const stringifiedOutput =
|
||||
message.type === "json" ? JSON.stringify(message.value, null, 2) : message.value;
|
||||
|
||||
const matchRegex = escapeRegExp(
|
||||
fillTemplate(escapeQuotes(evaluation.value), scenario.variableValues as VariableMap),
|
||||
@@ -86,10 +84,10 @@ export const runOneEval = async (
|
||||
|
||||
switch (evaluation.evalType) {
|
||||
case "CONTAINS":
|
||||
return { result: stringifiedMessage.match(matchRegex) !== null ? 1 : 0 };
|
||||
return { result: stringifiedOutput.match(matchRegex) !== null ? 1 : 0 };
|
||||
case "DOES_NOT_CONTAIN":
|
||||
return { result: stringifiedMessage.match(matchRegex) === null ? 1 : 0 };
|
||||
return { result: stringifiedOutput.match(matchRegex) === null ? 1 : 0 };
|
||||
case "GPT4_EVAL":
|
||||
return await runGpt4Eval(evaluation, scenario, message);
|
||||
return await runGpt4Eval(evaluation, scenario, stringifiedOutput);
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user