Compare commits

...

5 Commits

Author SHA1 Message Date
David Corbitt
4e176088e9 Auto-resize project menu width 2023-08-10 22:46:01 -07:00
David Corbitt
3cec1f7786 Merge branch 'main' into logged-calls 2023-08-10 22:19:16 -07:00
David Corbitt
b3d8f96fa8 Merge branch 'main' into logged-calls 2023-08-10 21:49:31 -07:00
David Corbitt
54d97ddfa8 Add getUsage function 2023-08-10 19:51:36 -07:00
David Corbitt
1f8e3b820f Rename prompt and completion tokens to input and output tokens 2023-08-10 19:49:18 -07:00
13 changed files with 92 additions and 55 deletions

View File

@@ -0,0 +1,15 @@
/*
Warnings:
- You are about to rename the column `completionTokens` to `outputTokens` on the `ModelResponse` table.
- You are about to rename the column `promptTokens` to `inputTokens` on the `ModelResponse` table.
*/
-- Rename completionTokens to outputTokens
ALTER TABLE "ModelResponse"
RENAME COLUMN "completionTokens" TO "outputTokens";
-- Rename promptTokens to inputTokens
ALTER TABLE "ModelResponse"
RENAME COLUMN "promptTokens" TO "inputTokens";

View File

@@ -117,8 +117,8 @@ model ModelResponse {
receivedAt DateTime?
output Json?
cost Float?
promptTokens Int?
completionTokens Int?
inputTokens Int?
outputTokens Int?
statusCode Int?
errorMessage String?
retryTime DateTime?

View File

@@ -19,8 +19,8 @@ export const OutputStats = ({
? modelResponse.receivedAt.getTime() - modelResponse.requestedAt.getTime()
: 0;
const promptTokens = modelResponse.promptTokens;
const completionTokens = modelResponse.completionTokens;
const inputTokens = modelResponse.inputTokens;
const outputTokens = modelResponse.outputTokens;
return (
<HStack
@@ -55,8 +55,8 @@ export const OutputStats = ({
</HStack>
{modelResponse.cost && (
<CostTooltip
promptTokens={promptTokens}
completionTokens={completionTokens}
inputTokens={inputTokens}
outputTokens={outputTokens}
cost={modelResponse.cost}
>
<HStack spacing={0}>

View File

@@ -17,8 +17,8 @@ export default function VariantStats(props: { variant: PromptVariant }) {
initialData: {
evalResults: [],
overallCost: 0,
promptTokens: 0,
completionTokens: 0,
inputTokens: 0,
outputTokens: 0,
scenarioCount: 0,
outputCount: 0,
awaitingEvals: false,
@@ -68,8 +68,8 @@ export default function VariantStats(props: { variant: PromptVariant }) {
</HStack>
{data.overallCost && (
<CostTooltip
promptTokens={data.promptTokens}
completionTokens={data.completionTokens}
inputTokens={data.inputTokens}
outputTokens={data.outputTokens}
cost={data.overallCost}
>
<HStack spacing={0} align="center" color="gray.500">

View File

@@ -90,7 +90,7 @@ export default function ProjectMenu() {
</HStack>
</NavSidebarOption>
</PopoverTrigger>
<PopoverContent _focusVisible={{ outline: "unset" }} ml={-1} minW={0} w="full">
<PopoverContent _focusVisible={{ outline: "unset" }} ml={-1} w="auto" minW={100} maxW={280}>
<VStack alignItems="flex-start" spacing={2} py={4} px={2}>
<Text color="gray.500" fontSize="xs" fontWeight="bold" pb={1}>
PROJECTS
@@ -150,6 +150,7 @@ const ProjectOption = ({
_hover={gearHovered ? undefined : { bgColor: "gray.200", textDecoration: "none" }}
p={2}
borderRadius={4}
spacing={4}
>
<Text>{proj.name}</Text>
<IconButton

View File

@@ -2,14 +2,14 @@ import { HStack, Icon, Text, Tooltip, type TooltipProps, VStack, Divider } from
import { BsCurrencyDollar } from "react-icons/bs";
type CostTooltipProps = {
promptTokens: number | null;
completionTokens: number | null;
inputTokens: number | null;
outputTokens: number | null;
cost: number;
} & TooltipProps;
export const CostTooltip = ({
promptTokens,
completionTokens,
inputTokens,
outputTokens,
cost,
children,
...props
@@ -36,12 +36,12 @@ export const CostTooltip = ({
<HStack>
<VStack w="28" spacing={1}>
<Text>Prompt</Text>
<Text>{promptTokens ?? 0}</Text>
<Text>{inputTokens ?? 0}</Text>
</VStack>
<Divider borderColor="gray.200" h={8} orientation="vertical" />
<VStack w="28" spacing={1}>
<Text whiteSpace="nowrap">Completion</Text>
<Text>{completionTokens ?? 0}</Text>
<Text>{outputTokens ?? 0}</Text>
</VStack>
</HStack>
</VStack>

View File

@@ -28,6 +28,10 @@ const modelProvider: AnthropicProvider = {
inputSchema: inputSchema as JSONSchema4,
canStream: true,
getCompletion,
getUsage: (input, output) => {
// TODO: add usage logic
return null;
},
...frontendModelProvider,
};

View File

@@ -4,11 +4,9 @@ import {
type ChatCompletion,
type CompletionCreateParams,
} from "openai/resources/chat";
import { countOpenAIChatTokens } from "~/utils/countTokens";
import { type CompletionResponse } from "../types";
import { isArray, isString, omit } from "lodash-es";
import { openai } from "~/server/utils/openai";
import { truthyFilter } from "~/utils/utils";
import { APIError } from "openai";
import frontendModelProvider from "./frontend";
import modelProvider, { type SupportedModel } from ".";
@@ -60,9 +58,6 @@ export async function getCompletion(
): Promise<CompletionResponse<ChatCompletion>> {
const start = Date.now();
let finalCompletion: ChatCompletion | null = null;
let promptTokens: number | undefined = undefined;
let completionTokens: number | undefined = undefined;
const modelName = modelProvider.getModel(input) as SupportedModel;
try {
if (onStream) {
@@ -86,16 +81,6 @@ export async function getCompletion(
autoRetry: false,
};
}
try {
promptTokens = countOpenAIChatTokens(modelName, input.messages);
completionTokens = countOpenAIChatTokens(
modelName,
finalCompletion.choices.map((c) => c.message).filter(truthyFilter),
);
} catch (err) {
// TODO handle this, library seems like maybe it doesn't work with function calls?
console.error(err);
}
} else {
const resp = await openai.chat.completions.create(
{ ...input, stream: false },
@@ -104,25 +89,14 @@ export async function getCompletion(
},
);
finalCompletion = resp;
promptTokens = resp.usage?.prompt_tokens ?? 0;
completionTokens = resp.usage?.completion_tokens ?? 0;
}
const timeToComplete = Date.now() - start;
const { promptTokenPrice, completionTokenPrice } = frontendModelProvider.models[modelName];
let cost = undefined;
if (promptTokenPrice && completionTokenPrice && promptTokens && completionTokens) {
cost = promptTokens * promptTokenPrice + completionTokens * completionTokenPrice;
}
return {
type: "success",
statusCode: 200,
value: finalCompletion,
timeToComplete,
promptTokens,
completionTokens,
cost,
};
} catch (error: unknown) {
if (error instanceof APIError) {

View File

@@ -4,6 +4,8 @@ import inputSchema from "./codegen/input.schema.json";
import { type ChatCompletion, type CompletionCreateParams } from "openai/resources/chat";
import { getCompletion } from "./getCompletion";
import frontendModelProvider from "./frontend";
import { countOpenAIChatTokens } from "~/utils/countTokens";
import { truthyFilter } from "~/utils/utils";
const supportedModels = [
"gpt-4-0613",
@@ -39,6 +41,41 @@ const modelProvider: OpenaiChatModelProvider = {
inputSchema: inputSchema as JSONSchema4,
canStream: true,
getCompletion,
getUsage: (input, output) => {
if (output.choices.length === 0) return null;
const model = modelProvider.getModel(input);
if (!model) return null;
let inputTokens: number;
let outputTokens: number;
if (output.usage) {
inputTokens = output.usage.prompt_tokens;
outputTokens = output.usage.completion_tokens;
} else {
try {
inputTokens = countOpenAIChatTokens(model, input.messages);
outputTokens = countOpenAIChatTokens(
model,
output.choices.map((c) => c.message).filter(truthyFilter),
);
} catch (err) {
inputTokens = 0;
outputTokens = 0;
// TODO handle this, library seems like maybe it doesn't work with function calls?
console.error(err);
}
}
const { promptTokenPrice, completionTokenPrice } = frontendModelProvider.models[model];
let cost = undefined;
if (promptTokenPrice && completionTokenPrice && inputTokens && outputTokens) {
cost = inputTokens * promptTokenPrice + outputTokens * completionTokenPrice;
}
return { inputTokens: inputTokens, outputTokens: outputTokens, cost };
},
...frontendModelProvider,
};

View File

@@ -75,6 +75,10 @@ const modelProvider: ReplicateLlama2Provider = {
},
canStream: true,
getCompletion,
getUsage: (input, output) => {
// TODO: add usage logic
return null;
},
...frontendModelProvider,
};

View File

@@ -43,9 +43,6 @@ export type CompletionResponse<T> =
value: T;
timeToComplete: number;
statusCode: number;
promptTokens?: number;
completionTokens?: number;
cost?: number;
};
export type ModelProvider<SupportedModels extends string, InputSchema, OutputSchema> = {
@@ -56,6 +53,10 @@ export type ModelProvider<SupportedModels extends string, InputSchema, OutputSch
input: InputSchema,
onStream: ((partialOutput: OutputSchema) => void) | null,
) => Promise<CompletionResponse<OutputSchema>>;
getUsage: (
input: InputSchema,
output: OutputSchema,
) => { gpuRuntime?: number; inputTokens?: number; outputTokens?: number; cost?: number } | null;
// This is just a convenience for type inference, don't use it at runtime
_outputSchema?: OutputSchema | null;

View File

@@ -123,13 +123,13 @@ export const promptVariantsRouter = createTRPCRouter({
},
_sum: {
cost: true,
promptTokens: true,
completionTokens: true,
inputTokens: true,
outputTokens: true,
},
});
const promptTokens = overallTokens._sum?.promptTokens ?? 0;
const completionTokens = overallTokens._sum?.completionTokens ?? 0;
const inputTokens = overallTokens._sum?.inputTokens ?? 0;
const outputTokens = overallTokens._sum?.outputTokens ?? 0;
const awaitingEvals = !!evalResults.find(
(result) => result.totalCount < scenarioCount * evals.length,
@@ -137,8 +137,8 @@ export const promptVariantsRouter = createTRPCRouter({
return {
evalResults,
promptTokens,
completionTokens,
inputTokens,
outputTokens,
overallCost: overallTokens._sum?.cost ?? 0,
scenarioCount,
outputCount,

View File

@@ -110,15 +110,16 @@ export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) =
});
const response = await provider.getCompletion(prompt.modelInput, onStream);
if (response.type === "success") {
const usage = provider.getUsage(prompt.modelInput, response.value);
modelResponse = await prisma.modelResponse.update({
where: { id: modelResponse.id },
data: {
output: response.value as Prisma.InputJsonObject,
statusCode: response.statusCode,
receivedAt: new Date(),
promptTokens: response.promptTokens,
completionTokens: response.completionTokens,
cost: response.cost,
inputTokens: usage?.inputTokens,
outputTokens: usage?.outputTokens,
cost: usage?.cost,
},
});