Ad openpipe/Chat provider with Open-Orca/OpenOrcaxOpenChat-Preview2-13B model (#163)
* Display 4 decimal points in ModelStatsCard * Add openpipe-chat provider
This commit is contained in:
@@ -87,7 +87,7 @@ export const ModelStatsCard = ({
|
||||
label="Price"
|
||||
info={
|
||||
<Text>
|
||||
${model.pricePerSecond.toFixed(3)}
|
||||
${model.pricePerSecond.toFixed(4)}
|
||||
<Text color="gray.500"> / second</Text>
|
||||
</Text>
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import openaiChatCompletionFrontend from "./openai-ChatCompletion/frontend";
|
||||
import replicateLlama2Frontend from "./replicate-llama2/frontend";
|
||||
import anthropicFrontend from "./anthropic-completion/frontend";
|
||||
import openpipeFrontend from "./openpipe-chat/frontend";
|
||||
import { type SupportedProvider, type FrontendModelProvider } from "./types";
|
||||
|
||||
// Keep attributes here that need to be accessible from the frontend. We can't
|
||||
@@ -10,6 +11,7 @@ const frontendModelProviders: Record<SupportedProvider, FrontendModelProvider<an
|
||||
"openai/ChatCompletion": openaiChatCompletionFrontend,
|
||||
"replicate/llama2": replicateLlama2Frontend,
|
||||
"anthropic/completion": anthropicFrontend,
|
||||
"openpipe/Chat": openpipeFrontend,
|
||||
};
|
||||
|
||||
export default frontendModelProviders;
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import openaiChatCompletion from "./openai-ChatCompletion";
|
||||
import replicateLlama2 from "./replicate-llama2";
|
||||
import anthropicCompletion from "./anthropic-completion";
|
||||
import openpipeChatCompletion from "./openpipe-chat";
|
||||
import { type SupportedProvider, type ModelProvider } from "./types";
|
||||
|
||||
const modelProviders: Record<SupportedProvider, ModelProvider<any, any, any>> = {
|
||||
"openai/ChatCompletion": openaiChatCompletion,
|
||||
"replicate/llama2": replicateLlama2,
|
||||
"anthropic/completion": anthropicCompletion,
|
||||
"openpipe/Chat": openpipeChatCompletion,
|
||||
};
|
||||
|
||||
export default modelProviders;
|
||||
|
||||
26
app/src/modelProviders/openpipe-chat/frontend.ts
Normal file
26
app/src/modelProviders/openpipe-chat/frontend.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { type OpenpipeChatOutput, type SupportedModel } from ".";
|
||||
import { type FrontendModelProvider } from "../types";
|
||||
import { refinementActions } from "./refinementActions";
|
||||
import { templateOpenOrcaPrompt } from "./templatePrompt";
|
||||
|
||||
const frontendModelProvider: FrontendModelProvider<SupportedModel, OpenpipeChatOutput> = {
|
||||
name: "OpenAI ChatCompletion",
|
||||
|
||||
models: {
|
||||
"Open-Orca/OpenOrcaxOpenChat-Preview2-13B": {
|
||||
name: "OpenOrca-Platypus2-13B",
|
||||
contextWindow: 4096,
|
||||
pricePerSecond: 0.0003,
|
||||
speed: "medium",
|
||||
provider: "openpipe/Chat",
|
||||
learnMoreUrl: "https://huggingface.co/Open-Orca/OpenOrcaxOpenChat-Preview2-13B",
|
||||
templatePrompt: templateOpenOrcaPrompt,
|
||||
},
|
||||
},
|
||||
|
||||
refinementActions,
|
||||
|
||||
normalizeOutput: (output) => ({ type: "text", value: output }),
|
||||
};
|
||||
|
||||
export default frontendModelProvider;
|
||||
104
app/src/modelProviders/openpipe-chat/getCompletion.ts
Normal file
104
app/src/modelProviders/openpipe-chat/getCompletion.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-call */
|
||||
import { isArray, isString } from "lodash-es";
|
||||
import OpenAI, { APIError } from "openai";
|
||||
|
||||
import { type CompletionResponse } from "../types";
|
||||
import { type OpenpipeChatInput, type OpenpipeChatOutput } from ".";
|
||||
import frontendModelProvider from "./frontend";
|
||||
|
||||
const modelEndpoints: Record<OpenpipeChatInput["model"], string> = {
|
||||
"Open-Orca/OpenOrcaxOpenChat-Preview2-13B": "https://5ef82gjxk8kdys-8000.proxy.runpod.net/v1",
|
||||
};
|
||||
|
||||
export async function getCompletion(
|
||||
input: OpenpipeChatInput,
|
||||
onStream: ((partialOutput: OpenpipeChatOutput) => void) | null,
|
||||
): Promise<CompletionResponse<OpenpipeChatOutput>> {
|
||||
const { model, messages, ...rest } = input;
|
||||
|
||||
const templatedPrompt = frontendModelProvider.models[model].templatePrompt?.(messages);
|
||||
|
||||
if (!templatedPrompt) {
|
||||
return {
|
||||
type: "error",
|
||||
message: "Failed to generate prompt",
|
||||
autoRetry: false,
|
||||
};
|
||||
}
|
||||
|
||||
const openai = new OpenAI({
|
||||
baseURL: modelEndpoints[model],
|
||||
});
|
||||
const start = Date.now();
|
||||
let finalCompletion: OpenpipeChatOutput = "";
|
||||
|
||||
try {
|
||||
if (onStream) {
|
||||
const resp = await openai.completions.create(
|
||||
{ model, prompt: templatedPrompt, ...rest, stream: true },
|
||||
{
|
||||
maxRetries: 0,
|
||||
},
|
||||
);
|
||||
|
||||
for await (const part of resp) {
|
||||
finalCompletion += part.choices[0]?.text;
|
||||
onStream(finalCompletion);
|
||||
}
|
||||
if (!finalCompletion) {
|
||||
return {
|
||||
type: "error",
|
||||
message: "Streaming failed to return a completion",
|
||||
autoRetry: false,
|
||||
};
|
||||
}
|
||||
} else {
|
||||
const resp = await openai.completions.create(
|
||||
{ model, prompt: templatedPrompt, ...rest, stream: false },
|
||||
{
|
||||
maxRetries: 0,
|
||||
},
|
||||
);
|
||||
finalCompletion = resp.choices[0]?.text || "";
|
||||
if (!finalCompletion) {
|
||||
return {
|
||||
type: "error",
|
||||
message: "Failed to return a completion",
|
||||
autoRetry: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
const timeToComplete = Date.now() - start;
|
||||
|
||||
return {
|
||||
type: "success",
|
||||
statusCode: 200,
|
||||
value: finalCompletion,
|
||||
timeToComplete,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
if (error instanceof APIError) {
|
||||
// The types from the sdk are wrong
|
||||
const rawMessage = error.message as string | string[];
|
||||
// If the message is not a string, stringify it
|
||||
const message = isString(rawMessage)
|
||||
? rawMessage
|
||||
: isArray(rawMessage)
|
||||
? rawMessage.map((m) => m.toString()).join("\n")
|
||||
: (rawMessage as any).toString();
|
||||
return {
|
||||
type: "error",
|
||||
message,
|
||||
autoRetry: error.status === 429 || error.status === 503,
|
||||
statusCode: error.status,
|
||||
};
|
||||
} else {
|
||||
console.error(error);
|
||||
return {
|
||||
type: "error",
|
||||
message: (error as Error).message,
|
||||
autoRetry: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
50
app/src/modelProviders/openpipe-chat/index.ts
Normal file
50
app/src/modelProviders/openpipe-chat/index.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { type JSONSchema4 } from "json-schema";
|
||||
import { type ModelProvider } from "../types";
|
||||
import inputSchema from "./input.schema.json";
|
||||
import { getCompletion } from "./getCompletion";
|
||||
import frontendModelProvider from "./frontend";
|
||||
|
||||
const supportedModels = ["Open-Orca/OpenOrcaxOpenChat-Preview2-13B"] as const;
|
||||
|
||||
export type SupportedModel = (typeof supportedModels)[number];
|
||||
|
||||
export type OpenpipeChatInput = {
|
||||
model: SupportedModel;
|
||||
messages: {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
}[];
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
stop?: string[] | string;
|
||||
max_tokens?: number;
|
||||
presence_penalty?: number;
|
||||
frequency_penalty?: number;
|
||||
};
|
||||
|
||||
export type OpenpipeChatOutput = string;
|
||||
|
||||
export type OpenpipeChatModelProvider = ModelProvider<
|
||||
SupportedModel,
|
||||
OpenpipeChatInput,
|
||||
OpenpipeChatOutput
|
||||
>;
|
||||
|
||||
const modelProvider: OpenpipeChatModelProvider = {
|
||||
getModel: (input) => {
|
||||
if (supportedModels.includes(input.model as SupportedModel))
|
||||
return input.model as SupportedModel;
|
||||
|
||||
return null;
|
||||
},
|
||||
inputSchema: inputSchema as JSONSchema4,
|
||||
canStream: true,
|
||||
getCompletion,
|
||||
getUsage: (input, output) => {
|
||||
// TODO: Implement this
|
||||
return null;
|
||||
},
|
||||
...frontendModelProvider,
|
||||
};
|
||||
|
||||
export default modelProvider;
|
||||
88
app/src/modelProviders/openpipe-chat/input.schema.json
Normal file
88
app/src/modelProviders/openpipe-chat/input.schema.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"description": "ID of the model to use.",
|
||||
"example": "Open-Orca/OpenOrcaxOpenChat-Preview2-13B",
|
||||
"type": "string",
|
||||
"enum": ["Open-Orca/OpenOrcaxOpenChat-Preview2-13B"]
|
||||
},
|
||||
"messages": {
|
||||
"description": "A list of messages comprising the conversation so far.",
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"role": {
|
||||
"type": "string",
|
||||
"enum": ["system", "user", "assistant"],
|
||||
"description": "The role of the messages author. One of `system`, `user`, or `assistant`."
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The contents of the message. `content` is required for all messages."
|
||||
}
|
||||
},
|
||||
"required": ["role", "content"]
|
||||
}
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 2,
|
||||
"default": 1,
|
||||
"example": 1,
|
||||
"nullable": true,
|
||||
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"default": 1,
|
||||
"example": 1,
|
||||
"nullable": true,
|
||||
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n"
|
||||
},
|
||||
"stop": {
|
||||
"description": "Up to 4 sequences where the API will stop generating further tokens.\n",
|
||||
"default": null,
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"nullable": true
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"maxItems": 4,
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"max_tokens": {
|
||||
"description": "The maximum number of [tokens](/tokenizer) to generate in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) for counting tokens.\n",
|
||||
"type": "integer"
|
||||
},
|
||||
"presence_penalty": {
|
||||
"type": "number",
|
||||
"default": 0,
|
||||
"minimum": -2,
|
||||
"maximum": 2,
|
||||
"nullable": true,
|
||||
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)\n"
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"type": "number",
|
||||
"default": 0,
|
||||
"minimum": -2,
|
||||
"maximum": 2,
|
||||
"nullable": true,
|
||||
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)\n"
|
||||
}
|
||||
},
|
||||
"required": ["model", "messages"]
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
import { type RefinementAction } from "../types";
|
||||
|
||||
export const refinementActions: Record<string, RefinementAction> = {};
|
||||
24
app/src/modelProviders/openpipe-chat/templatePrompt.ts
Normal file
24
app/src/modelProviders/openpipe-chat/templatePrompt.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import { type OpenpipeChatInput } from ".";
|
||||
|
||||
export const templateOpenOrcaPrompt = (messages: OpenpipeChatInput["messages"]) => {
|
||||
const splitter = "<|end_of_turn|>"; // end of turn splitter
|
||||
|
||||
const formattedMessages = messages.map((message) => {
|
||||
if (message.role === "system" || message.role === "user") {
|
||||
return "User: " + message.content;
|
||||
} else {
|
||||
return "Assistant: " + message.content;
|
||||
}
|
||||
});
|
||||
|
||||
let prompt = formattedMessages.join(splitter);
|
||||
|
||||
// Ensure that the prompt ends with an assistant message
|
||||
const lastUserIndex = prompt.lastIndexOf("User:");
|
||||
const lastAssistantIndex = prompt.lastIndexOf("Assistant:");
|
||||
if (lastUserIndex > lastAssistantIndex) {
|
||||
prompt += splitter + "Assistant:";
|
||||
}
|
||||
|
||||
return prompt;
|
||||
};
|
||||
@@ -2,11 +2,13 @@ import { type JSONSchema4 } from "json-schema";
|
||||
import { type IconType } from "react-icons";
|
||||
import { type JsonValue } from "type-fest";
|
||||
import { z } from "zod";
|
||||
import { type OpenpipeChatInput } from "./openpipe-chat";
|
||||
|
||||
export const ZodSupportedProvider = z.union([
|
||||
z.literal("openai/ChatCompletion"),
|
||||
z.literal("replicate/llama2"),
|
||||
z.literal("anthropic/completion"),
|
||||
z.literal("openpipe/Chat"),
|
||||
]);
|
||||
|
||||
export type SupportedProvider = z.infer<typeof ZodSupportedProvider>;
|
||||
@@ -22,6 +24,7 @@ export type Model = {
|
||||
description?: string;
|
||||
learnMoreUrl?: string;
|
||||
apiDocsUrl?: string;
|
||||
templatePrompt?: (initialPrompt: OpenpipeChatInput["messages"]) => string;
|
||||
};
|
||||
|
||||
export type ProviderModel = { provider: z.infer<typeof ZodSupportedProvider>; model: string };
|
||||
|
||||
Reference in New Issue
Block a user