Ad openpipe/Chat provider with Open-Orca/OpenOrcaxOpenChat-Preview2-13B model (#163)

* Display 4 decimal points in ModelStatsCard

* Add openpipe-chat provider
This commit is contained in:
arcticfly
2023-08-16 22:37:37 -07:00
committed by GitHub
parent ad7665664a
commit c6ec901374
10 changed files with 303 additions and 1 deletions

View File

@@ -87,7 +87,7 @@ export const ModelStatsCard = ({
label="Price" label="Price"
info={ info={
<Text> <Text>
${model.pricePerSecond.toFixed(3)} ${model.pricePerSecond.toFixed(4)}
<Text color="gray.500"> / second</Text> <Text color="gray.500"> / second</Text>
</Text> </Text>
} }

View File

@@ -1,6 +1,7 @@
import openaiChatCompletionFrontend from "./openai-ChatCompletion/frontend"; import openaiChatCompletionFrontend from "./openai-ChatCompletion/frontend";
import replicateLlama2Frontend from "./replicate-llama2/frontend"; import replicateLlama2Frontend from "./replicate-llama2/frontend";
import anthropicFrontend from "./anthropic-completion/frontend"; import anthropicFrontend from "./anthropic-completion/frontend";
import openpipeFrontend from "./openpipe-chat/frontend";
import { type SupportedProvider, type FrontendModelProvider } from "./types"; import { type SupportedProvider, type FrontendModelProvider } from "./types";
// Keep attributes here that need to be accessible from the frontend. We can't // Keep attributes here that need to be accessible from the frontend. We can't
@@ -10,6 +11,7 @@ const frontendModelProviders: Record<SupportedProvider, FrontendModelProvider<an
"openai/ChatCompletion": openaiChatCompletionFrontend, "openai/ChatCompletion": openaiChatCompletionFrontend,
"replicate/llama2": replicateLlama2Frontend, "replicate/llama2": replicateLlama2Frontend,
"anthropic/completion": anthropicFrontend, "anthropic/completion": anthropicFrontend,
"openpipe/Chat": openpipeFrontend,
}; };
export default frontendModelProviders; export default frontendModelProviders;

View File

@@ -1,12 +1,14 @@
import openaiChatCompletion from "./openai-ChatCompletion"; import openaiChatCompletion from "./openai-ChatCompletion";
import replicateLlama2 from "./replicate-llama2"; import replicateLlama2 from "./replicate-llama2";
import anthropicCompletion from "./anthropic-completion"; import anthropicCompletion from "./anthropic-completion";
import openpipeChatCompletion from "./openpipe-chat";
import { type SupportedProvider, type ModelProvider } from "./types"; import { type SupportedProvider, type ModelProvider } from "./types";
const modelProviders: Record<SupportedProvider, ModelProvider<any, any, any>> = { const modelProviders: Record<SupportedProvider, ModelProvider<any, any, any>> = {
"openai/ChatCompletion": openaiChatCompletion, "openai/ChatCompletion": openaiChatCompletion,
"replicate/llama2": replicateLlama2, "replicate/llama2": replicateLlama2,
"anthropic/completion": anthropicCompletion, "anthropic/completion": anthropicCompletion,
"openpipe/Chat": openpipeChatCompletion,
}; };
export default modelProviders; export default modelProviders;

View File

@@ -0,0 +1,26 @@
import { type OpenpipeChatOutput, type SupportedModel } from ".";
import { type FrontendModelProvider } from "../types";
import { refinementActions } from "./refinementActions";
import { templateOpenOrcaPrompt } from "./templatePrompt";
const frontendModelProvider: FrontendModelProvider<SupportedModel, OpenpipeChatOutput> = {
name: "OpenAI ChatCompletion",
models: {
"Open-Orca/OpenOrcaxOpenChat-Preview2-13B": {
name: "OpenOrca-Platypus2-13B",
contextWindow: 4096,
pricePerSecond: 0.0003,
speed: "medium",
provider: "openpipe/Chat",
learnMoreUrl: "https://huggingface.co/Open-Orca/OpenOrcaxOpenChat-Preview2-13B",
templatePrompt: templateOpenOrcaPrompt,
},
},
refinementActions,
normalizeOutput: (output) => ({ type: "text", value: output }),
};
export default frontendModelProvider;

View File

@@ -0,0 +1,104 @@
/* eslint-disable @typescript-eslint/no-unsafe-call */
import { isArray, isString } from "lodash-es";
import OpenAI, { APIError } from "openai";
import { type CompletionResponse } from "../types";
import { type OpenpipeChatInput, type OpenpipeChatOutput } from ".";
import frontendModelProvider from "./frontend";
const modelEndpoints: Record<OpenpipeChatInput["model"], string> = {
"Open-Orca/OpenOrcaxOpenChat-Preview2-13B": "https://5ef82gjxk8kdys-8000.proxy.runpod.net/v1",
};
export async function getCompletion(
input: OpenpipeChatInput,
onStream: ((partialOutput: OpenpipeChatOutput) => void) | null,
): Promise<CompletionResponse<OpenpipeChatOutput>> {
const { model, messages, ...rest } = input;
const templatedPrompt = frontendModelProvider.models[model].templatePrompt?.(messages);
if (!templatedPrompt) {
return {
type: "error",
message: "Failed to generate prompt",
autoRetry: false,
};
}
const openai = new OpenAI({
baseURL: modelEndpoints[model],
});
const start = Date.now();
let finalCompletion: OpenpipeChatOutput = "";
try {
if (onStream) {
const resp = await openai.completions.create(
{ model, prompt: templatedPrompt, ...rest, stream: true },
{
maxRetries: 0,
},
);
for await (const part of resp) {
finalCompletion += part.choices[0]?.text;
onStream(finalCompletion);
}
if (!finalCompletion) {
return {
type: "error",
message: "Streaming failed to return a completion",
autoRetry: false,
};
}
} else {
const resp = await openai.completions.create(
{ model, prompt: templatedPrompt, ...rest, stream: false },
{
maxRetries: 0,
},
);
finalCompletion = resp.choices[0]?.text || "";
if (!finalCompletion) {
return {
type: "error",
message: "Failed to return a completion",
autoRetry: false,
};
}
}
const timeToComplete = Date.now() - start;
return {
type: "success",
statusCode: 200,
value: finalCompletion,
timeToComplete,
};
} catch (error: unknown) {
if (error instanceof APIError) {
// The types from the sdk are wrong
const rawMessage = error.message as string | string[];
// If the message is not a string, stringify it
const message = isString(rawMessage)
? rawMessage
: isArray(rawMessage)
? rawMessage.map((m) => m.toString()).join("\n")
: (rawMessage as any).toString();
return {
type: "error",
message,
autoRetry: error.status === 429 || error.status === 503,
statusCode: error.status,
};
} else {
console.error(error);
return {
type: "error",
message: (error as Error).message,
autoRetry: true,
};
}
}
}

View File

@@ -0,0 +1,50 @@
import { type JSONSchema4 } from "json-schema";
import { type ModelProvider } from "../types";
import inputSchema from "./input.schema.json";
import { getCompletion } from "./getCompletion";
import frontendModelProvider from "./frontend";
const supportedModels = ["Open-Orca/OpenOrcaxOpenChat-Preview2-13B"] as const;
export type SupportedModel = (typeof supportedModels)[number];
export type OpenpipeChatInput = {
model: SupportedModel;
messages: {
role: "system" | "user" | "assistant";
content: string;
}[];
temperature?: number;
top_p?: number;
stop?: string[] | string;
max_tokens?: number;
presence_penalty?: number;
frequency_penalty?: number;
};
export type OpenpipeChatOutput = string;
export type OpenpipeChatModelProvider = ModelProvider<
SupportedModel,
OpenpipeChatInput,
OpenpipeChatOutput
>;
const modelProvider: OpenpipeChatModelProvider = {
getModel: (input) => {
if (supportedModels.includes(input.model as SupportedModel))
return input.model as SupportedModel;
return null;
},
inputSchema: inputSchema as JSONSchema4,
canStream: true,
getCompletion,
getUsage: (input, output) => {
// TODO: Implement this
return null;
},
...frontendModelProvider,
};
export default modelProvider;

View File

@@ -0,0 +1,88 @@
{
"type": "object",
"properties": {
"model": {
"description": "ID of the model to use.",
"example": "Open-Orca/OpenOrcaxOpenChat-Preview2-13B",
"type": "string",
"enum": ["Open-Orca/OpenOrcaxOpenChat-Preview2-13B"]
},
"messages": {
"description": "A list of messages comprising the conversation so far.",
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"properties": {
"role": {
"type": "string",
"enum": ["system", "user", "assistant"],
"description": "The role of the messages author. One of `system`, `user`, or `assistant`."
},
"content": {
"type": "string",
"description": "The contents of the message. `content` is required for all messages."
}
},
"required": ["role", "content"]
}
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 2,
"default": 1,
"example": 1,
"nullable": true,
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n"
},
"top_p": {
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 1,
"example": 1,
"nullable": true,
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n"
},
"stop": {
"description": "Up to 4 sequences where the API will stop generating further tokens.\n",
"default": null,
"oneOf": [
{
"type": "string",
"nullable": true
},
{
"type": "array",
"minItems": 1,
"maxItems": 4,
"items": {
"type": "string"
}
}
]
},
"max_tokens": {
"description": "The maximum number of [tokens](/tokenizer) to generate in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) for counting tokens.\n",
"type": "integer"
},
"presence_penalty": {
"type": "number",
"default": 0,
"minimum": -2,
"maximum": 2,
"nullable": true,
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)\n"
},
"frequency_penalty": {
"type": "number",
"default": 0,
"minimum": -2,
"maximum": 2,
"nullable": true,
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)\n"
}
},
"required": ["model", "messages"]
}

View File

@@ -0,0 +1,3 @@
import { type RefinementAction } from "../types";
export const refinementActions: Record<string, RefinementAction> = {};

View File

@@ -0,0 +1,24 @@
import { type OpenpipeChatInput } from ".";
export const templateOpenOrcaPrompt = (messages: OpenpipeChatInput["messages"]) => {
const splitter = "<|end_of_turn|>"; // end of turn splitter
const formattedMessages = messages.map((message) => {
if (message.role === "system" || message.role === "user") {
return "User: " + message.content;
} else {
return "Assistant: " + message.content;
}
});
let prompt = formattedMessages.join(splitter);
// Ensure that the prompt ends with an assistant message
const lastUserIndex = prompt.lastIndexOf("User:");
const lastAssistantIndex = prompt.lastIndexOf("Assistant:");
if (lastUserIndex > lastAssistantIndex) {
prompt += splitter + "Assistant:";
}
return prompt;
};

View File

@@ -2,11 +2,13 @@ import { type JSONSchema4 } from "json-schema";
import { type IconType } from "react-icons"; import { type IconType } from "react-icons";
import { type JsonValue } from "type-fest"; import { type JsonValue } from "type-fest";
import { z } from "zod"; import { z } from "zod";
import { type OpenpipeChatInput } from "./openpipe-chat";
export const ZodSupportedProvider = z.union([ export const ZodSupportedProvider = z.union([
z.literal("openai/ChatCompletion"), z.literal("openai/ChatCompletion"),
z.literal("replicate/llama2"), z.literal("replicate/llama2"),
z.literal("anthropic/completion"), z.literal("anthropic/completion"),
z.literal("openpipe/Chat"),
]); ]);
export type SupportedProvider = z.infer<typeof ZodSupportedProvider>; export type SupportedProvider = z.infer<typeof ZodSupportedProvider>;
@@ -22,6 +24,7 @@ export type Model = {
description?: string; description?: string;
learnMoreUrl?: string; learnMoreUrl?: string;
apiDocsUrl?: string; apiDocsUrl?: string;
templatePrompt?: (initialPrompt: OpenpipeChatInput["messages"]) => string;
}; };
export type ProviderModel = { provider: z.infer<typeof ZodSupportedProvider>; model: string }; export type ProviderModel = { provider: z.infer<typeof ZodSupportedProvider>; model: string };