Compare commits
5 Commits
priorities
...
project-me
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e176088e9 | ||
|
|
3cec1f7786 | ||
|
|
b3d8f96fa8 | ||
|
|
54d97ddfa8 | ||
|
|
1f8e3b820f |
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
Warnings:
|
||||
|
||||
- You are about to rename the column `completionTokens` to `outputTokens` on the `ModelResponse` table.
|
||||
- You are about to rename the column `promptTokens` to `inputTokens` on the `ModelResponse` table.
|
||||
|
||||
*/
|
||||
|
||||
-- Rename completionTokens to outputTokens
|
||||
ALTER TABLE "ModelResponse"
|
||||
RENAME COLUMN "completionTokens" TO "outputTokens";
|
||||
|
||||
-- Rename promptTokens to inputTokens
|
||||
ALTER TABLE "ModelResponse"
|
||||
RENAME COLUMN "promptTokens" TO "inputTokens";
|
||||
@@ -117,8 +117,8 @@ model ModelResponse {
|
||||
receivedAt DateTime?
|
||||
output Json?
|
||||
cost Float?
|
||||
promptTokens Int?
|
||||
completionTokens Int?
|
||||
inputTokens Int?
|
||||
outputTokens Int?
|
||||
statusCode Int?
|
||||
errorMessage String?
|
||||
retryTime DateTime?
|
||||
|
||||
@@ -19,8 +19,8 @@ export const OutputStats = ({
|
||||
? modelResponse.receivedAt.getTime() - modelResponse.requestedAt.getTime()
|
||||
: 0;
|
||||
|
||||
const promptTokens = modelResponse.promptTokens;
|
||||
const completionTokens = modelResponse.completionTokens;
|
||||
const inputTokens = modelResponse.inputTokens;
|
||||
const outputTokens = modelResponse.outputTokens;
|
||||
|
||||
return (
|
||||
<HStack
|
||||
@@ -55,8 +55,8 @@ export const OutputStats = ({
|
||||
</HStack>
|
||||
{modelResponse.cost && (
|
||||
<CostTooltip
|
||||
promptTokens={promptTokens}
|
||||
completionTokens={completionTokens}
|
||||
inputTokens={inputTokens}
|
||||
outputTokens={outputTokens}
|
||||
cost={modelResponse.cost}
|
||||
>
|
||||
<HStack spacing={0}>
|
||||
|
||||
@@ -17,8 +17,8 @@ export default function VariantStats(props: { variant: PromptVariant }) {
|
||||
initialData: {
|
||||
evalResults: [],
|
||||
overallCost: 0,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
scenarioCount: 0,
|
||||
outputCount: 0,
|
||||
awaitingEvals: false,
|
||||
@@ -68,8 +68,8 @@ export default function VariantStats(props: { variant: PromptVariant }) {
|
||||
</HStack>
|
||||
{data.overallCost && (
|
||||
<CostTooltip
|
||||
promptTokens={data.promptTokens}
|
||||
completionTokens={data.completionTokens}
|
||||
inputTokens={data.inputTokens}
|
||||
outputTokens={data.outputTokens}
|
||||
cost={data.overallCost}
|
||||
>
|
||||
<HStack spacing={0} align="center" color="gray.500">
|
||||
|
||||
@@ -90,7 +90,7 @@ export default function ProjectMenu() {
|
||||
</HStack>
|
||||
</NavSidebarOption>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent _focusVisible={{ outline: "unset" }} ml={-1} minW={0} w="full">
|
||||
<PopoverContent _focusVisible={{ outline: "unset" }} ml={-1} w="auto" minW={100} maxW={280}>
|
||||
<VStack alignItems="flex-start" spacing={2} py={4} px={2}>
|
||||
<Text color="gray.500" fontSize="xs" fontWeight="bold" pb={1}>
|
||||
PROJECTS
|
||||
@@ -150,6 +150,7 @@ const ProjectOption = ({
|
||||
_hover={gearHovered ? undefined : { bgColor: "gray.200", textDecoration: "none" }}
|
||||
p={2}
|
||||
borderRadius={4}
|
||||
spacing={4}
|
||||
>
|
||||
<Text>{proj.name}</Text>
|
||||
<IconButton
|
||||
|
||||
@@ -2,14 +2,14 @@ import { HStack, Icon, Text, Tooltip, type TooltipProps, VStack, Divider } from
|
||||
import { BsCurrencyDollar } from "react-icons/bs";
|
||||
|
||||
type CostTooltipProps = {
|
||||
promptTokens: number | null;
|
||||
completionTokens: number | null;
|
||||
inputTokens: number | null;
|
||||
outputTokens: number | null;
|
||||
cost: number;
|
||||
} & TooltipProps;
|
||||
|
||||
export const CostTooltip = ({
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cost,
|
||||
children,
|
||||
...props
|
||||
@@ -36,12 +36,12 @@ export const CostTooltip = ({
|
||||
<HStack>
|
||||
<VStack w="28" spacing={1}>
|
||||
<Text>Prompt</Text>
|
||||
<Text>{promptTokens ?? 0}</Text>
|
||||
<Text>{inputTokens ?? 0}</Text>
|
||||
</VStack>
|
||||
<Divider borderColor="gray.200" h={8} orientation="vertical" />
|
||||
<VStack w="28" spacing={1}>
|
||||
<Text whiteSpace="nowrap">Completion</Text>
|
||||
<Text>{completionTokens ?? 0}</Text>
|
||||
<Text>{outputTokens ?? 0}</Text>
|
||||
</VStack>
|
||||
</HStack>
|
||||
</VStack>
|
||||
|
||||
@@ -28,6 +28,10 @@ const modelProvider: AnthropicProvider = {
|
||||
inputSchema: inputSchema as JSONSchema4,
|
||||
canStream: true,
|
||||
getCompletion,
|
||||
getUsage: (input, output) => {
|
||||
// TODO: add usage logic
|
||||
return null;
|
||||
},
|
||||
...frontendModelProvider,
|
||||
};
|
||||
|
||||
|
||||
@@ -4,11 +4,9 @@ import {
|
||||
type ChatCompletion,
|
||||
type CompletionCreateParams,
|
||||
} from "openai/resources/chat";
|
||||
import { countOpenAIChatTokens } from "~/utils/countTokens";
|
||||
import { type CompletionResponse } from "../types";
|
||||
import { isArray, isString, omit } from "lodash-es";
|
||||
import { openai } from "~/server/utils/openai";
|
||||
import { truthyFilter } from "~/utils/utils";
|
||||
import { APIError } from "openai";
|
||||
import frontendModelProvider from "./frontend";
|
||||
import modelProvider, { type SupportedModel } from ".";
|
||||
@@ -60,9 +58,6 @@ export async function getCompletion(
|
||||
): Promise<CompletionResponse<ChatCompletion>> {
|
||||
const start = Date.now();
|
||||
let finalCompletion: ChatCompletion | null = null;
|
||||
let promptTokens: number | undefined = undefined;
|
||||
let completionTokens: number | undefined = undefined;
|
||||
const modelName = modelProvider.getModel(input) as SupportedModel;
|
||||
|
||||
try {
|
||||
if (onStream) {
|
||||
@@ -86,16 +81,6 @@ export async function getCompletion(
|
||||
autoRetry: false,
|
||||
};
|
||||
}
|
||||
try {
|
||||
promptTokens = countOpenAIChatTokens(modelName, input.messages);
|
||||
completionTokens = countOpenAIChatTokens(
|
||||
modelName,
|
||||
finalCompletion.choices.map((c) => c.message).filter(truthyFilter),
|
||||
);
|
||||
} catch (err) {
|
||||
// TODO handle this, library seems like maybe it doesn't work with function calls?
|
||||
console.error(err);
|
||||
}
|
||||
} else {
|
||||
const resp = await openai.chat.completions.create(
|
||||
{ ...input, stream: false },
|
||||
@@ -104,25 +89,14 @@ export async function getCompletion(
|
||||
},
|
||||
);
|
||||
finalCompletion = resp;
|
||||
promptTokens = resp.usage?.prompt_tokens ?? 0;
|
||||
completionTokens = resp.usage?.completion_tokens ?? 0;
|
||||
}
|
||||
const timeToComplete = Date.now() - start;
|
||||
|
||||
const { promptTokenPrice, completionTokenPrice } = frontendModelProvider.models[modelName];
|
||||
let cost = undefined;
|
||||
if (promptTokenPrice && completionTokenPrice && promptTokens && completionTokens) {
|
||||
cost = promptTokens * promptTokenPrice + completionTokens * completionTokenPrice;
|
||||
}
|
||||
|
||||
return {
|
||||
type: "success",
|
||||
statusCode: 200,
|
||||
value: finalCompletion,
|
||||
timeToComplete,
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
cost,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
if (error instanceof APIError) {
|
||||
|
||||
@@ -4,6 +4,8 @@ import inputSchema from "./codegen/input.schema.json";
|
||||
import { type ChatCompletion, type CompletionCreateParams } from "openai/resources/chat";
|
||||
import { getCompletion } from "./getCompletion";
|
||||
import frontendModelProvider from "./frontend";
|
||||
import { countOpenAIChatTokens } from "~/utils/countTokens";
|
||||
import { truthyFilter } from "~/utils/utils";
|
||||
|
||||
const supportedModels = [
|
||||
"gpt-4-0613",
|
||||
@@ -39,6 +41,41 @@ const modelProvider: OpenaiChatModelProvider = {
|
||||
inputSchema: inputSchema as JSONSchema4,
|
||||
canStream: true,
|
||||
getCompletion,
|
||||
getUsage: (input, output) => {
|
||||
if (output.choices.length === 0) return null;
|
||||
|
||||
const model = modelProvider.getModel(input);
|
||||
if (!model) return null;
|
||||
|
||||
let inputTokens: number;
|
||||
let outputTokens: number;
|
||||
|
||||
if (output.usage) {
|
||||
inputTokens = output.usage.prompt_tokens;
|
||||
outputTokens = output.usage.completion_tokens;
|
||||
} else {
|
||||
try {
|
||||
inputTokens = countOpenAIChatTokens(model, input.messages);
|
||||
outputTokens = countOpenAIChatTokens(
|
||||
model,
|
||||
output.choices.map((c) => c.message).filter(truthyFilter),
|
||||
);
|
||||
} catch (err) {
|
||||
inputTokens = 0;
|
||||
outputTokens = 0;
|
||||
// TODO handle this, library seems like maybe it doesn't work with function calls?
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
|
||||
const { promptTokenPrice, completionTokenPrice } = frontendModelProvider.models[model];
|
||||
let cost = undefined;
|
||||
if (promptTokenPrice && completionTokenPrice && inputTokens && outputTokens) {
|
||||
cost = inputTokens * promptTokenPrice + outputTokens * completionTokenPrice;
|
||||
}
|
||||
|
||||
return { inputTokens: inputTokens, outputTokens: outputTokens, cost };
|
||||
},
|
||||
...frontendModelProvider,
|
||||
};
|
||||
|
||||
|
||||
@@ -75,6 +75,10 @@ const modelProvider: ReplicateLlama2Provider = {
|
||||
},
|
||||
canStream: true,
|
||||
getCompletion,
|
||||
getUsage: (input, output) => {
|
||||
// TODO: add usage logic
|
||||
return null;
|
||||
},
|
||||
...frontendModelProvider,
|
||||
};
|
||||
|
||||
|
||||
@@ -43,9 +43,6 @@ export type CompletionResponse<T> =
|
||||
value: T;
|
||||
timeToComplete: number;
|
||||
statusCode: number;
|
||||
promptTokens?: number;
|
||||
completionTokens?: number;
|
||||
cost?: number;
|
||||
};
|
||||
|
||||
export type ModelProvider<SupportedModels extends string, InputSchema, OutputSchema> = {
|
||||
@@ -56,6 +53,10 @@ export type ModelProvider<SupportedModels extends string, InputSchema, OutputSch
|
||||
input: InputSchema,
|
||||
onStream: ((partialOutput: OutputSchema) => void) | null,
|
||||
) => Promise<CompletionResponse<OutputSchema>>;
|
||||
getUsage: (
|
||||
input: InputSchema,
|
||||
output: OutputSchema,
|
||||
) => { gpuRuntime?: number; inputTokens?: number; outputTokens?: number; cost?: number } | null;
|
||||
|
||||
// This is just a convenience for type inference, don't use it at runtime
|
||||
_outputSchema?: OutputSchema | null;
|
||||
|
||||
@@ -123,13 +123,13 @@ export const promptVariantsRouter = createTRPCRouter({
|
||||
},
|
||||
_sum: {
|
||||
cost: true,
|
||||
promptTokens: true,
|
||||
completionTokens: true,
|
||||
inputTokens: true,
|
||||
outputTokens: true,
|
||||
},
|
||||
});
|
||||
|
||||
const promptTokens = overallTokens._sum?.promptTokens ?? 0;
|
||||
const completionTokens = overallTokens._sum?.completionTokens ?? 0;
|
||||
const inputTokens = overallTokens._sum?.inputTokens ?? 0;
|
||||
const outputTokens = overallTokens._sum?.outputTokens ?? 0;
|
||||
|
||||
const awaitingEvals = !!evalResults.find(
|
||||
(result) => result.totalCount < scenarioCount * evals.length,
|
||||
@@ -137,8 +137,8 @@ export const promptVariantsRouter = createTRPCRouter({
|
||||
|
||||
return {
|
||||
evalResults,
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
overallCost: overallTokens._sum?.cost ?? 0,
|
||||
scenarioCount,
|
||||
outputCount,
|
||||
|
||||
@@ -110,15 +110,16 @@ export const queryModel = defineTask<QueryModelJob>("queryModel", async (task) =
|
||||
});
|
||||
const response = await provider.getCompletion(prompt.modelInput, onStream);
|
||||
if (response.type === "success") {
|
||||
const usage = provider.getUsage(prompt.modelInput, response.value);
|
||||
modelResponse = await prisma.modelResponse.update({
|
||||
where: { id: modelResponse.id },
|
||||
data: {
|
||||
output: response.value as Prisma.InputJsonObject,
|
||||
statusCode: response.statusCode,
|
||||
receivedAt: new Date(),
|
||||
promptTokens: response.promptTokens,
|
||||
completionTokens: response.completionTokens,
|
||||
cost: response.cost,
|
||||
inputTokens: usage?.inputTokens,
|
||||
outputTokens: usage?.outputTokens,
|
||||
cost: usage?.cost,
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user