move app to app/ subdir

This commit is contained in:
Kyle Corbitt
2023-08-05 10:00:10 -07:00
parent 7707d451e0
commit 21ef67ed4c
203 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,69 @@
/* eslint-disable @typescript-eslint/no-var-requires */
import YAML from "yaml";
import fs from "fs";
import path from "path";
import { openapiSchemaToJsonSchema } from "@openapi-contrib/openapi-schema-to-json-schema";
import $RefParser from "@apidevtools/json-schema-ref-parser";
import { type JSONObject } from "superjson/dist/types";
import assert from "assert";
import { type JSONSchema4Object } from "json-schema";
import { isObject } from "lodash-es";
// @ts-expect-error for some reason missing from types
import parserEstree from "prettier/plugins/estree";
import parserBabel from "prettier/plugins/babel";
import prettier from "prettier/standalone";
const OPENAPI_URL =
"https://raw.githubusercontent.com/tryAGI/Anthropic/1c0871e861de60a4c3a843cb90e17d63e86c234a/docs/openapi.yaml";
// Fetch the openapi document
const response = await fetch(OPENAPI_URL);
const openApiYaml = await response.text();
// Parse the yaml document
let schema = YAML.parse(openApiYaml) as JSONObject;
schema = openapiSchemaToJsonSchema(schema);
const jsonSchema = await $RefParser.dereference(schema);
assert("components" in jsonSchema);
const completionRequestSchema = jsonSchema.components.schemas
.CreateCompletionRequest as JSONSchema4Object;
// We need to do a bit of surgery here since the Monaco editor doesn't like
// the fact that the schema says `model` can be either a string or an enum,
// and displays a warning in the editor. Let's stick with just an enum for
// now and drop the string option.
assert(
"properties" in completionRequestSchema &&
isObject(completionRequestSchema.properties) &&
"model" in completionRequestSchema.properties &&
isObject(completionRequestSchema.properties.model),
);
const modelProperty = completionRequestSchema.properties.model;
assert(
"oneOf" in modelProperty &&
Array.isArray(modelProperty.oneOf) &&
modelProperty.oneOf.length === 2 &&
isObject(modelProperty.oneOf[1]) &&
"enum" in modelProperty.oneOf[1],
"Expected model to have oneOf length of 2",
);
modelProperty.type = "string";
modelProperty.enum = modelProperty.oneOf[1].enum;
delete modelProperty["oneOf"];
// Get the directory of the current script
const currentDirectory = path.dirname(import.meta.url).replace("file://", "");
// Write the JSON schema to a file in the current directory
fs.writeFileSync(
path.join(currentDirectory, "input.schema.json"),
await prettier.format(JSON.stringify(completionRequestSchema, null, 2), {
parser: "json",
plugins: [parserBabel, parserEstree],
}),
);

View File

@@ -0,0 +1,63 @@
{
"type": "object",
"properties": {
"model": {
"description": "The model that will complete your prompt.",
"x-oaiTypeLabel": "string",
"type": "string",
"enum": [
"claude-2",
"claude-2.0",
"claude-instant-1",
"claude-instant-1.1"
]
},
"prompt": {
"description": "The prompt that you want Claude to complete.\n\nFor proper response generation you will need to format your prompt as follows:\n\"\\n\\nHuman: all instructions for the assistant\\n\\nAssistant:\". The prompt string should begin with the characters \"Human:\" and end with \"Assistant:\".",
"default": "<|endoftext|>",
"example": "\\n\\nHuman: What is the correct translation of ${scenario.input}? I would like a long analysis followed by a short answer.\\n\\nAssistant:",
"type": "string"
},
"max_tokens_to_sample": {
"type": "integer",
"minimum": 1,
"default": 256,
"nullable": true,
"description": "The maximum number of tokens to generate before stopping."
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 1,
"nullable": true,
"description": "Amount of randomness injected into the response.\n\nDefaults to 1."
},
"top_p": {
"type": "number",
"minimum": 0,
"maximum": 1,
"nullable": true,
"description": "Use nucleus sampling.\n\nYou should either alter temperature or top_p, but not both.\n"
},
"top_k": {
"type": "number",
"minimum": 0,
"default": 5,
"nullable": true,
"description": "Only sample from the top K options for each subsequent token."
},
"stream": {
"description": "Whether to incrementally stream the response using server-sent events.",
"type": "boolean",
"nullable": true,
"default": false
},
"stop_sequences": {
"description": "Sequences that will cause the model to stop generating completion text.\nBy default, our models stop on \"\\n\\nHuman:\".",
"default": null,
"nullable": true,
"type": "array"
}
},
"required": ["model", "prompt", "max_tokens_to_sample"]
}

View File

@@ -0,0 +1,42 @@
import { type Completion } from "@anthropic-ai/sdk/resources";
import { type SupportedModel } from ".";
import { type FrontendModelProvider } from "../types";
import { refinementActions } from "./refinementActions";
const frontendModelProvider: FrontendModelProvider<SupportedModel, Completion> = {
name: "Replicate Llama2",
models: {
"claude-2.0": {
name: "Claude 2.0",
contextWindow: 100000,
promptTokenPrice: 11.02 / 1000000,
completionTokenPrice: 32.68 / 1000000,
speed: "medium",
provider: "anthropic/completion",
learnMoreUrl: "https://www.anthropic.com/product",
apiDocsUrl: "https://docs.anthropic.com/claude/reference/complete_post",
},
"claude-instant-1.1": {
name: "Claude Instant 1.1",
contextWindow: 100000,
promptTokenPrice: 1.63 / 1000000,
completionTokenPrice: 5.51 / 1000000,
speed: "fast",
provider: "anthropic/completion",
learnMoreUrl: "https://www.anthropic.com/product",
apiDocsUrl: "https://docs.anthropic.com/claude/reference/complete_post",
},
},
refinementActions,
normalizeOutput: (output) => {
return {
type: "text",
value: output.completion,
};
},
};
export default frontendModelProvider;

View File

@@ -0,0 +1,86 @@
import { env } from "~/env.mjs";
import { type CompletionResponse } from "../types";
import Anthropic, { APIError } from "@anthropic-ai/sdk";
import { type Completion, type CompletionCreateParams } from "@anthropic-ai/sdk/resources";
import { isObject, isString } from "lodash-es";
const anthropic = new Anthropic({
apiKey: env.ANTHROPIC_API_KEY,
});
export async function getCompletion(
input: CompletionCreateParams,
onStream: ((partialOutput: Completion) => void) | null,
): Promise<CompletionResponse<Completion>> {
const start = Date.now();
let finalCompletion: Completion | null = null;
try {
if (onStream) {
const resp = await anthropic.completions.create(
{ ...input, stream: true },
{
maxRetries: 0,
},
);
for await (const part of resp) {
if (finalCompletion === null) {
finalCompletion = part;
} else {
finalCompletion = { ...part, completion: finalCompletion.completion + part.completion };
}
onStream(finalCompletion);
}
if (!finalCompletion) {
return {
type: "error",
message: "Streaming failed to return a completion",
autoRetry: false,
};
}
} else {
const resp = await anthropic.completions.create(
{ ...input, stream: false },
{
maxRetries: 0,
},
);
finalCompletion = resp;
}
const timeToComplete = Date.now() - start;
return {
type: "success",
statusCode: 200,
value: finalCompletion,
timeToComplete,
};
} catch (error: unknown) {
console.log("CAUGHT ERROR", error);
if (error instanceof APIError) {
const message =
isObject(error.error) &&
"error" in error.error &&
isObject(error.error.error) &&
"message" in error.error.error &&
isString(error.error.error.message)
? error.error.error.message
: error.message;
return {
type: "error",
message,
autoRetry: error.status === 429 || error.status === 503,
statusCode: error.status,
};
} else {
return {
type: "error",
message: (error as Error).message,
autoRetry: true,
};
}
}
}

View File

@@ -0,0 +1,34 @@
import { type JSONSchema4 } from "json-schema";
import { type ModelProvider } from "../types";
import inputSchema from "./codegen/input.schema.json";
import { getCompletion } from "./getCompletion";
import frontendModelProvider from "./frontend";
import type { Completion, CompletionCreateParams } from "@anthropic-ai/sdk/resources";
const supportedModels = ["claude-2.0", "claude-instant-1.1"] as const;
export type SupportedModel = (typeof supportedModels)[number];
export type AnthropicProvider = ModelProvider<SupportedModel, CompletionCreateParams, Completion>;
const modelProvider: AnthropicProvider = {
getModel: (input) => {
if (supportedModels.includes(input.model as SupportedModel))
return input.model as SupportedModel;
const modelMaps: Record<string, SupportedModel> = {
"claude-2": "claude-2.0",
"claude-instant-1": "claude-instant-1.1",
};
if (input.model in modelMaps) return modelMaps[input.model] as SupportedModel;
return null;
},
inputSchema: inputSchema as JSONSchema4,
canStream: true,
getCompletion,
...frontendModelProvider,
};
export default modelProvider;

View File

@@ -0,0 +1,3 @@
import { type RefinementAction } from "../types";
export const refinementActions: Record<string, RefinementAction> = {};

View File

@@ -0,0 +1,15 @@
import openaiChatCompletionFrontend from "./openai-ChatCompletion/frontend";
import replicateLlama2Frontend from "./replicate-llama2/frontend";
import anthropicFrontend from "./anthropic-completion/frontend";
import { type SupportedProvider, type FrontendModelProvider } from "./types";
// Keep attributes here that need to be accessible from the frontend. We can't
// just include them in the default `modelProviders` object because it has some
// transient dependencies that can only be imported on the server.
const frontendModelProviders: Record<SupportedProvider, FrontendModelProvider<any, any>> = {
"openai/ChatCompletion": openaiChatCompletionFrontend,
"replicate/llama2": replicateLlama2Frontend,
"anthropic/completion": anthropicFrontend,
};
export default frontendModelProviders;

View File

@@ -0,0 +1,36 @@
import { type JSONSchema4Object } from "json-schema";
import modelProviders from "./modelProviders";
import { compile } from "json-schema-to-typescript";
import dedent from "dedent";
export default async function generateTypes() {
const combinedSchema = {
type: "object",
properties: {} as Record<string, JSONSchema4Object>,
};
Object.entries(modelProviders).forEach(([id, provider]) => {
combinedSchema.properties[id] = provider.inputSchema;
});
Object.entries(modelProviders).forEach(([id, provider]) => {
combinedSchema.properties[id] = provider.inputSchema;
});
const promptTypes = (
await compile(combinedSchema as JSONSchema4Object, "PromptTypes", {
additionalProperties: false,
bannerComment: dedent`
/**
* This type map defines the input types for each model provider.
*/
`,
})
).replace(/export interface PromptTypes/g, "interface PromptTypes");
return dedent`
${promptTypes}
declare function definePrompt<T extends keyof PromptTypes>(modelProvider: T, input: PromptTypes[T])
`;
}

View File

@@ -0,0 +1,12 @@
import openaiChatCompletion from "./openai-ChatCompletion";
import replicateLlama2 from "./replicate-llama2";
import anthropicCompletion from "./anthropic-completion";
import { type SupportedProvider, type ModelProvider } from "./types";
const modelProviders: Record<SupportedProvider, ModelProvider<any, any, any>> = {
"openai/ChatCompletion": openaiChatCompletion,
"replicate/llama2": replicateLlama2,
"anthropic/completion": anthropicCompletion,
};
export default modelProviders;

View File

@@ -0,0 +1,77 @@
/* eslint-disable @typescript-eslint/no-var-requires */
import YAML from "yaml";
import fs from "fs";
import path from "path";
import { openapiSchemaToJsonSchema } from "@openapi-contrib/openapi-schema-to-json-schema";
import $RefParser from "@apidevtools/json-schema-ref-parser";
import { type JSONObject } from "superjson/dist/types";
import assert from "assert";
import { type JSONSchema4Object } from "json-schema";
import { isObject } from "lodash-es";
// @ts-expect-error for some reason missing from types
import parserEstree from "prettier/plugins/estree";
import parserBabel from "prettier/plugins/babel";
import prettier from "prettier/standalone";
const OPENAPI_URL =
"https://raw.githubusercontent.com/openai/openai-openapi/0c432eb66fd0c758fd8b9bd69db41c1096e5f4db/openapi.yaml";
// Fetch the openapi document
const response = await fetch(OPENAPI_URL);
const openApiYaml = await response.text();
// Parse the yaml document
let schema = YAML.parse(openApiYaml) as JSONObject;
schema = openapiSchemaToJsonSchema(schema);
const jsonSchema = await $RefParser.dereference(schema);
assert("components" in jsonSchema);
const completionRequestSchema = jsonSchema.components.schemas
.CreateChatCompletionRequest as JSONSchema4Object;
// We need to do a bit of surgery here since the Monaco editor doesn't like
// the fact that the schema says `model` can be either a string or an enum,
// and displays a warning in the editor. Let's stick with just an enum for
// now and drop the string option.
assert(
"properties" in completionRequestSchema &&
isObject(completionRequestSchema.properties) &&
"model" in completionRequestSchema.properties &&
isObject(completionRequestSchema.properties.model),
);
const modelProperty = completionRequestSchema.properties.model;
assert(
"oneOf" in modelProperty &&
Array.isArray(modelProperty.oneOf) &&
modelProperty.oneOf.length === 2 &&
isObject(modelProperty.oneOf[1]) &&
"enum" in modelProperty.oneOf[1],
"Expected model to have oneOf length of 2",
);
modelProperty.type = "string";
modelProperty.enum = modelProperty.oneOf[1].enum;
delete modelProperty["oneOf"];
// The default of "inf" confuses the Typescript generator, so can just remove it
assert(
"max_tokens" in completionRequestSchema.properties &&
isObject(completionRequestSchema.properties.max_tokens) &&
"default" in completionRequestSchema.properties.max_tokens,
);
delete completionRequestSchema.properties.max_tokens["default"];
// Get the directory of the current script
const currentDirectory = path.dirname(import.meta.url).replace("file://", "");
// Write the JSON schema to a file in the current directory
fs.writeFileSync(
path.join(currentDirectory, "input.schema.json"),
await prettier.format(JSON.stringify(completionRequestSchema, null, 2), {
parser: "json",
plugins: [parserBabel, parserEstree],
}),
);

View File

@@ -0,0 +1,185 @@
{
"type": "object",
"properties": {
"model": {
"description": "ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API.",
"example": "gpt-3.5-turbo",
"type": "string",
"enum": [
"gpt-4",
"gpt-4-0613",
"gpt-4-32k",
"gpt-4-32k-0613",
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613"
]
},
"messages": {
"description": "A list of messages comprising the conversation so far. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).",
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"properties": {
"role": {
"type": "string",
"enum": ["system", "user", "assistant", "function"],
"description": "The role of the messages author. One of `system`, `user`, `assistant`, or `function`."
},
"content": {
"type": "string",
"description": "The contents of the message. `content` is required for all messages except assistant messages with function calls."
},
"name": {
"type": "string",
"description": "The name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters."
},
"function_call": {
"type": "object",
"description": "The name and arguments of a function that should be called, as generated by the model.",
"properties": {
"name": {
"type": "string",
"description": "The name of the function to call."
},
"arguments": {
"type": "string",
"description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
}
}
}
},
"required": ["role"]
}
},
"functions": {
"description": "A list of functions the model may generate JSON inputs for.",
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
},
"description": {
"type": "string",
"description": "The description of what the function does."
},
"parameters": {
"type": "object",
"description": "The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.",
"additionalProperties": true
}
},
"required": ["name"]
}
},
"function_call": {
"description": "Controls how the model responds to function calls. \"none\" means the model does not call a function, and responds to the end-user. \"auto\" means the model can pick between an end-user or calling a function. Specifying a particular function via `{\"name\":\\ \"my_function\"}` forces the model to call that function. \"none\" is the default when no functions are present. \"auto\" is the default if functions are present.",
"oneOf": [
{
"type": "string",
"enum": ["none", "auto"]
},
{
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of the function to call."
}
},
"required": ["name"]
}
]
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 2,
"default": 1,
"example": 1,
"nullable": true,
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n"
},
"top_p": {
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 1,
"example": 1,
"nullable": true,
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n"
},
"n": {
"type": "integer",
"minimum": 1,
"maximum": 128,
"default": 1,
"example": 1,
"nullable": true,
"description": "How many chat completion choices to generate for each input message."
},
"stream": {
"description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).\n",
"type": "boolean",
"nullable": true,
"default": false
},
"stop": {
"description": "Up to 4 sequences where the API will stop generating further tokens.\n",
"default": null,
"oneOf": [
{
"type": "string",
"nullable": true
},
{
"type": "array",
"minItems": 1,
"maxItems": 4,
"items": {
"type": "string"
}
}
]
},
"max_tokens": {
"description": "The maximum number of [tokens](/tokenizer) to generate in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) for counting tokens.\n",
"type": "integer"
},
"presence_penalty": {
"type": "number",
"default": 0,
"minimum": -2,
"maximum": 2,
"nullable": true,
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)\n"
},
"frequency_penalty": {
"type": "number",
"default": 0,
"minimum": -2,
"maximum": 2,
"nullable": true,
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)\n"
},
"logit_bias": {
"type": "object",
"x-oaiTypeLabel": "map",
"default": null,
"nullable": true,
"description": "Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.\n"
},
"user": {
"type": "string",
"example": "user-1234",
"description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n"
}
},
"required": ["model", "messages"]
}

View File

@@ -0,0 +1,87 @@
import { type JsonValue } from "type-fest";
import { type SupportedModel } from ".";
import { type FrontendModelProvider } from "../types";
import { type ChatCompletion } from "openai/resources/chat";
import { refinementActions } from "./refinementActions";
const frontendModelProvider: FrontendModelProvider<SupportedModel, ChatCompletion> = {
name: "OpenAI ChatCompletion",
models: {
"gpt-4-0613": {
name: "GPT-4",
contextWindow: 8192,
promptTokenPrice: 0.00003,
completionTokenPrice: 0.00006,
speed: "medium",
provider: "openai/ChatCompletion",
learnMoreUrl: "https://openai.com/gpt-4",
},
"gpt-4-32k-0613": {
name: "GPT-4 32k",
contextWindow: 32768,
promptTokenPrice: 0.00006,
completionTokenPrice: 0.00012,
speed: "medium",
provider: "openai/ChatCompletion",
learnMoreUrl: "https://openai.com/gpt-4",
},
"gpt-3.5-turbo-0613": {
name: "GPT-3.5 Turbo",
contextWindow: 4096,
promptTokenPrice: 0.0000015,
completionTokenPrice: 0.000002,
speed: "fast",
provider: "openai/ChatCompletion",
learnMoreUrl: "https://platform.openai.com/docs/guides/gpt/chat-completions-api",
},
"gpt-3.5-turbo-16k-0613": {
name: "GPT-3.5 Turbo 16k",
contextWindow: 16384,
promptTokenPrice: 0.000003,
completionTokenPrice: 0.000004,
speed: "fast",
provider: "openai/ChatCompletion",
learnMoreUrl: "https://platform.openai.com/docs/guides/gpt/chat-completions-api",
},
},
refinementActions,
normalizeOutput: (output) => {
const message = output.choices[0]?.message;
if (!message)
return {
type: "json",
value: output as unknown as JsonValue,
};
if (message.content) {
return {
type: "text",
value: message.content,
};
} else if (message.function_call) {
let args = message.function_call.arguments ?? "";
try {
args = JSON.parse(args);
} catch (e) {
// Ignore
}
return {
type: "json",
value: {
...message.function_call,
arguments: args,
},
};
} else {
return {
type: "json",
value: message as unknown as JsonValue,
};
}
},
};
export default frontendModelProvider;

View File

@@ -0,0 +1,152 @@
/* eslint-disable @typescript-eslint/no-unsafe-call */
import {
type ChatCompletionChunk,
type ChatCompletion,
type CompletionCreateParams,
} from "openai/resources/chat";
import { countOpenAIChatTokens } from "~/utils/countTokens";
import { type CompletionResponse } from "../types";
import { isArray, isString, omit } from "lodash-es";
import { openai } from "~/server/utils/openai";
import { truthyFilter } from "~/utils/utils";
import { APIError } from "openai";
import frontendModelProvider from "./frontend";
import modelProvider, { type SupportedModel } from ".";
const mergeStreamedChunks = (
base: ChatCompletion | null,
chunk: ChatCompletionChunk,
): ChatCompletion => {
if (base === null) {
return mergeStreamedChunks({ ...chunk, choices: [] }, chunk);
}
const choices = [...base.choices];
for (const choice of chunk.choices) {
const baseChoice = choices.find((c) => c.index === choice.index);
if (baseChoice) {
baseChoice.finish_reason = choice.finish_reason ?? baseChoice.finish_reason;
baseChoice.message = baseChoice.message ?? { role: "assistant" };
if (choice.delta?.content)
baseChoice.message.content =
((baseChoice.message.content as string) ?? "") + (choice.delta.content ?? "");
if (choice.delta?.function_call) {
const fnCall = baseChoice.message.function_call ?? {};
fnCall.name =
((fnCall.name as string) ?? "") + ((choice.delta.function_call.name as string) ?? "");
fnCall.arguments =
((fnCall.arguments as string) ?? "") +
((choice.delta.function_call.arguments as string) ?? "");
}
} else {
// @ts-expect-error the types are correctly telling us that finish_reason
// could be null, but don't want to fix it right now.
choices.push({ ...omit(choice, "delta"), message: { role: "assistant", ...choice.delta } });
}
}
const merged: ChatCompletion = {
...base,
choices,
};
return merged;
};
export async function getCompletion(
input: CompletionCreateParams,
onStream: ((partialOutput: ChatCompletion) => void) | null,
): Promise<CompletionResponse<ChatCompletion>> {
const start = Date.now();
let finalCompletion: ChatCompletion | null = null;
let promptTokens: number | undefined = undefined;
let completionTokens: number | undefined = undefined;
const modelName = modelProvider.getModel(input) as SupportedModel;
try {
if (onStream) {
console.log("got started");
const resp = await openai.chat.completions.create(
{ ...input, stream: true },
{
maxRetries: 0,
},
);
for await (const part of resp) {
console.log("got part", part);
finalCompletion = mergeStreamedChunks(finalCompletion, part);
onStream(finalCompletion);
}
console.log("got final", finalCompletion);
if (!finalCompletion) {
return {
type: "error",
message: "Streaming failed to return a completion",
autoRetry: false,
};
}
try {
promptTokens = countOpenAIChatTokens(modelName, input.messages);
completionTokens = countOpenAIChatTokens(
modelName,
finalCompletion.choices.map((c) => c.message).filter(truthyFilter),
);
} catch (err) {
// TODO handle this, library seems like maybe it doesn't work with function calls?
console.error(err);
}
} else {
const resp = await openai.chat.completions.create(
{ ...input, stream: false },
{
maxRetries: 0,
},
);
finalCompletion = resp;
promptTokens = resp.usage?.prompt_tokens ?? 0;
completionTokens = resp.usage?.completion_tokens ?? 0;
}
const timeToComplete = Date.now() - start;
const { promptTokenPrice, completionTokenPrice } = frontendModelProvider.models[modelName];
let cost = undefined;
if (promptTokenPrice && completionTokenPrice && promptTokens && completionTokens) {
cost = promptTokens * promptTokenPrice + completionTokens * completionTokenPrice;
}
return {
type: "success",
statusCode: 200,
value: finalCompletion,
timeToComplete,
promptTokens,
completionTokens,
cost,
};
} catch (error: unknown) {
if (error instanceof APIError) {
// The types from the sdk are wrong
const rawMessage = error.message as string | string[];
// If the message is not a string, stringify it
const message = isString(rawMessage)
? rawMessage
: isArray(rawMessage)
? rawMessage.map((m) => m.toString()).join("\n")
: (rawMessage as any).toString();
return {
type: "error",
message,
autoRetry: error.status === 429 || error.status === 503,
statusCode: error.status,
};
} else {
console.error(error);
return {
type: "error",
message: (error as Error).message,
autoRetry: true,
};
}
}
}

View File

@@ -0,0 +1,45 @@
import { type JSONSchema4 } from "json-schema";
import { type ModelProvider } from "../types";
import inputSchema from "./codegen/input.schema.json";
import { type ChatCompletion, type CompletionCreateParams } from "openai/resources/chat";
import { getCompletion } from "./getCompletion";
import frontendModelProvider from "./frontend";
const supportedModels = [
"gpt-4-0613",
"gpt-4-32k-0613",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
] as const;
export type SupportedModel = (typeof supportedModels)[number];
export type OpenaiChatModelProvider = ModelProvider<
SupportedModel,
CompletionCreateParams,
ChatCompletion
>;
const modelProvider: OpenaiChatModelProvider = {
getModel: (input) => {
if (supportedModels.includes(input.model as SupportedModel))
return input.model as SupportedModel;
const modelMaps: Record<string, SupportedModel> = {
"gpt-4": "gpt-4-0613",
"gpt-4-32k": "gpt-4-32k-0613",
"gpt-3.5-turbo": "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
};
if (input.model in modelMaps) return modelMaps[input.model] as SupportedModel;
return null;
},
inputSchema: inputSchema as JSONSchema4,
canStream: true,
getCompletion,
...frontendModelProvider,
};
export default modelProvider;

View File

@@ -0,0 +1,279 @@
import { TfiThought } from "react-icons/tfi";
import { type RefinementAction } from "../types";
import { VscJson } from "react-icons/vsc";
export const refinementActions: Record<string, RefinementAction> = {
"Add chain of thought": {
icon: VscJson,
description: "Asking the model to plan its answer can increase accuracy.",
instructions: `Adding chain of thought means asking the model to think about its answer before it gives it to you. This is useful for getting more accurate answers. Do not add an assistant message.
This is what a prompt looks like before adding chain of thought:
definePrompt("openai/ChatCompletion", {
model: "gpt-4",
stream: true,
messages: [
{
role: "system",
content: \`Evaluate sentiment.\`,
},
{
role: "user",
content: \`This is the user's message: \${scenario.user_message}. Return "positive" or "negative" or "neutral"\`,
},
],
});
This is what one looks like after adding chain of thought:
definePrompt("openai/ChatCompletion", {
model: "gpt-4",
stream: true,
messages: [
{
role: "system",
content: \`Evaluate sentiment.\`,
},
{
role: "user",
content: \`This is the user's message: \${scenario.user_message}. Return "positive" or "negative" or "neutral". Explain your answer before you give a score, then return the score on a new line.\`,
},
],
});
Here's another example:
Before:
definePrompt("openai/ChatCompletion", {
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: \`Title: \${scenario.title}
Body: \${scenario.body}
Need: \${scenario.need}
Rate likelihood on 1-3 scale.\`,
},
],
temperature: 0,
functions: [
{
name: "score_post",
parameters: {
type: "object",
properties: {
score: {
type: "number",
},
},
},
},
],
function_call: {
name: "score_post",
},
});
After:
definePrompt("openai/ChatCompletion", {
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: \`Title: \${scenario.title}
Body: \${scenario.body}
Need: \${scenario.need}
Rate likelihood on 1-3 scale. Provide an explanation, but always provide a score afterward.\`,
},
],
temperature: 0,
functions: [
{
name: "score_post",
parameters: {
type: "object",
properties: {
explanation: {
type: "string",
}
score: {
type: "number",
},
},
},
},
],
function_call: {
name: "score_post",
},
});
Add chain of thought to the original prompt.`,
},
"Convert to function call": {
icon: TfiThought,
description: "Use function calls to get output from the model in a more structured way.",
instructions: `OpenAI functions are a specialized way for an LLM to return output.
This is what a prompt looks like before adding a function:
definePrompt("openai/ChatCompletion", {
model: "gpt-4",
stream: true,
messages: [
{
role: "system",
content: \`Evaluate sentiment.\`,
},
{
role: "user",
content: \`This is the user's message: \${scenario.user_message}. Return "positive" or "negative" or "neutral"\`,
},
],
});
This is what one looks like after adding a function:
definePrompt("openai/ChatCompletion", {
model: "gpt-4",
stream: true,
messages: [
{
role: "system",
content: "Evaluate sentiment.",
},
{
role: "user",
content: scenario.user_message,
},
],
functions: [
{
name: "extract_sentiment",
parameters: {
type: "object", // parameters must always be an object with a properties key
properties: { // properties key is required
sentiment: {
type: "string",
description: "one of positive/negative/neutral",
},
},
},
},
],
function_call: {
name: "extract_sentiment",
},
});
Here's another example of adding a function:
Before:
definePrompt("openai/ChatCompletion", {
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: \`Here is the title and body of a reddit post I am interested in:
title: \${scenario.title}
body: \${scenario.body}
On a scale from 1 to 3, how likely is it that the person writing this post has the following need? If you are not sure, make your best guess, or answer 1.
Need: \${scenario.need}
Answer one integer between 1 and 3.\`,
},
],
temperature: 0,
});
After:
definePrompt("openai/ChatCompletion", {
model: "gpt-3.5-turbo",
messages: [
{
role: "user",
content: \`Title: \${scenario.title}
Body: \${scenario.body}
Need: \${scenario.need}
Rate likelihood on 1-3 scale.\`,
},
],
temperature: 0,
functions: [
{
name: "score_post",
parameters: {
type: "object",
properties: {
score: {
type: "number",
},
},
},
},
],
function_call: {
name: "score_post",
},
});
Another example
Before:
definePrompt("openai/ChatCompletion", {
model: "gpt-3.5-turbo",
stream: true,
messages: [
{
role: "system",
content: \`Write 'Start experimenting!' in \${scenario.language}\`,
},
],
});
After:
definePrompt("openai/ChatCompletion", {
model: "gpt-3.5-turbo",
messages: [
{
role: "system",
content: \`Write 'Start experimenting!' in \${scenario.language}\`,
},
],
functions: [
{
name: "write_in_language",
parameters: {
type: "object",
properties: {
text: {
type: "string",
},
},
},
},
],
function_call: {
name: "write_in_language",
},
});
Add an OpenAI function that takes one or more nested parameters that match the expected output from this prompt.`,
},
};

View File

@@ -0,0 +1,45 @@
import { type SupportedModel, type ReplicateLlama2Output } from ".";
import { type FrontendModelProvider } from "../types";
import { refinementActions } from "./refinementActions";
const frontendModelProvider: FrontendModelProvider<SupportedModel, ReplicateLlama2Output> = {
name: "Replicate Llama2",
models: {
"7b-chat": {
name: "LLama 2 7B Chat",
contextWindow: 4096,
pricePerSecond: 0.0023,
speed: "fast",
provider: "replicate/llama2",
learnMoreUrl: "https://replicate.com/a16z-infra/llama7b-v2-chat",
},
"13b-chat": {
name: "LLama 2 13B Chat",
contextWindow: 4096,
pricePerSecond: 0.0023,
speed: "medium",
provider: "replicate/llama2",
learnMoreUrl: "https://replicate.com/a16z-infra/llama13b-v2-chat",
},
"70b-chat": {
name: "LLama 2 70B Chat",
contextWindow: 4096,
pricePerSecond: 0.0032,
speed: "slow",
provider: "replicate/llama2",
learnMoreUrl: "https://replicate.com/replicate/llama70b-v2-chat",
},
},
refinementActions,
normalizeOutput: (output) => {
return {
type: "text",
value: output.join(""),
};
},
};
export default frontendModelProvider;

View File

@@ -0,0 +1,60 @@
import { env } from "~/env.mjs";
import { type ReplicateLlama2Input, type ReplicateLlama2Output } from ".";
import { type CompletionResponse } from "../types";
import Replicate from "replicate";
const replicate = new Replicate({
auth: env.REPLICATE_API_TOKEN || "",
});
const modelIds: Record<ReplicateLlama2Input["model"], string> = {
"7b-chat": "4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc",
"13b-chat": "2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
"70b-chat": "2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
};
export async function getCompletion(
input: ReplicateLlama2Input,
onStream: ((partialOutput: string[]) => void) | null,
): Promise<CompletionResponse<ReplicateLlama2Output>> {
const start = Date.now();
const { model, ...rest } = input;
try {
const prediction = await replicate.predictions.create({
version: modelIds[model],
input: rest,
});
const interval = onStream
? // eslint-disable-next-line @typescript-eslint/no-misused-promises
setInterval(async () => {
const partialPrediction = await replicate.predictions.get(prediction.id);
if (partialPrediction.output) onStream(partialPrediction.output as ReplicateLlama2Output);
}, 500)
: null;
const resp = await replicate.wait(prediction, {});
if (interval) clearInterval(interval);
const timeToComplete = Date.now() - start;
if (resp.error) throw new Error(resp.error as string);
return {
type: "success",
statusCode: 200,
value: resp.output as ReplicateLlama2Output,
timeToComplete,
};
} catch (error: unknown) {
console.error("ERROR IS", error);
return {
type: "error",
message: (error as Error).message,
autoRetry: true,
};
}
}

View File

@@ -0,0 +1,81 @@
import { type ModelProvider } from "../types";
import frontendModelProvider from "./frontend";
import { getCompletion } from "./getCompletion";
const supportedModels = ["7b-chat", "13b-chat", "70b-chat"] as const;
export type SupportedModel = (typeof supportedModels)[number];
export type ReplicateLlama2Input = {
model: SupportedModel;
prompt: string;
max_length?: number;
temperature?: number;
top_p?: number;
repetition_penalty?: number;
debug?: boolean;
};
export type ReplicateLlama2Output = string[];
export type ReplicateLlama2Provider = ModelProvider<
SupportedModel,
ReplicateLlama2Input,
ReplicateLlama2Output
>;
const modelProvider: ReplicateLlama2Provider = {
getModel: (input) => {
if (supportedModels.includes(input.model)) return input.model;
return null;
},
inputSchema: {
type: "object",
properties: {
model: {
type: "string",
enum: supportedModels as unknown as string[],
},
system_prompt: {
type: "string",
description:
"System prompt to send to Llama v2. This is prepended to the prompt and helps guide system behavior.",
},
prompt: {
type: "string",
description: "Prompt to send to Llama v2.",
},
max_new_tokens: {
type: "number",
description:
"Maximum number of tokens to generate. A word is generally 2-3 tokens (minimum: 1)",
},
temperature: {
type: "number",
description:
"Adjusts randomness of outputs, 0.1 is a good starting value. (minimum: 0.01; maximum: 5)",
},
top_p: {
type: "number",
description:
"When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens (minimum: 0.01; maximum: 1)",
},
repetition_penalty: {
type: "number",
description:
"Penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, less than 1 encourage it. (minimum: 0.01; maximum: 5)",
},
debug: {
type: "boolean",
description: "provide debugging output in logs",
},
},
required: ["model", "prompt"],
},
canStream: true,
getCompletion,
...frontendModelProvider,
};
export default modelProvider;

View File

@@ -0,0 +1,3 @@
import { type RefinementAction } from "../types";
export const refinementActions: Record<string, RefinementAction> = {};

View File

@@ -0,0 +1,72 @@
import { type JSONSchema4 } from "json-schema";
import { type IconType } from "react-icons";
import { type JsonValue } from "type-fest";
import { z } from "zod";
export const ZodSupportedProvider = z.union([
z.literal("openai/ChatCompletion"),
z.literal("replicate/llama2"),
z.literal("anthropic/completion"),
]);
export type SupportedProvider = z.infer<typeof ZodSupportedProvider>;
export type Model = {
name: string;
contextWindow: number;
promptTokenPrice?: number;
completionTokenPrice?: number;
pricePerSecond?: number;
speed: "fast" | "medium" | "slow";
provider: SupportedProvider;
description?: string;
learnMoreUrl?: string;
apiDocsUrl?: string;
};
export type ProviderModel = { provider: z.infer<typeof ZodSupportedProvider>; model: string };
export type RefinementAction = { icon?: IconType; description: string; instructions: string };
export type FrontendModelProvider<SupportedModels extends string, OutputSchema> = {
name: string;
models: Record<SupportedModels, Model>;
refinementActions?: Record<string, RefinementAction>;
normalizeOutput: (output: OutputSchema) => NormalizedOutput;
};
export type CompletionResponse<T> =
| { type: "error"; message: string; autoRetry: boolean; statusCode?: number }
| {
type: "success";
value: T;
timeToComplete: number;
statusCode: number;
promptTokens?: number;
completionTokens?: number;
cost?: number;
};
export type ModelProvider<SupportedModels extends string, InputSchema, OutputSchema> = {
getModel: (input: InputSchema) => SupportedModels | null;
canStream: boolean;
inputSchema: JSONSchema4;
getCompletion: (
input: InputSchema,
onStream: ((partialOutput: OutputSchema) => void) | null,
) => Promise<CompletionResponse<OutputSchema>>;
// This is just a convenience for type inference, don't use it at runtime
_outputSchema?: OutputSchema | null;
} & FrontendModelProvider<SupportedModels, OutputSchema>;
export type NormalizedOutput =
| {
type: "text";
value: string;
}
| {
type: "json";
value: JsonValue;
};