replicate/llama2 provider

Still need to fix the types but it runs
2023-07-20 19:55:03 -07:00
parent 332a2101c0
commit 847753c32b
14 changed files with 202 additions and 29 deletions
--- a/package.json
+++ b/package.json
@@ -73,6 +73,7 @@
    "react-syntax-highlighter": "^15.5.0",
    "react-textarea-autosize": "^8.5.0",
    "recast": "^0.23.3",
+    "replicate": "^0.12.3",
    "socket.io": "^4.7.1",
    "socket.io-client": "^4.7.1",
    "superjson": "1.12.2",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -161,6 +161,9 @@ dependencies:
  recast:
    specifier: ^0.23.3
    version: 0.23.3
+  replicate:
+    specifier: ^0.12.3
+    version: 0.12.3
  socket.io:
    specifier: ^4.7.1
    version: 4.7.1
@@ -6988,6 +6991,11 @@ packages:
      functions-have-names: 1.2.3
    dev: true

+  /replicate@0.12.3:
+    resolution: {integrity: sha512-HVWKPoVhWVTONlWk+lUXmq9Vy2J8MxBJMtDBQq3dA5uq71ZzKTh0xvJfvzW4+VLBjhBeL7tkdua6hZJmKfzAPQ==}
+    engines: {git: '>=2.11.0', node: '>=16.6.0', npm: '>=7.19.0', yarn: '>=1.7.0'}
+    dev: false
+
  /require-directory@2.1.1:
    resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==}
    engines: {node: '>=0.10.0'}
--- a/src/components/OutputsTable/OutputCell/OutputCell.tsx
+++ b/src/components/OutputsTable/OutputCell/OutputCell.tsx
@@ -88,9 +88,11 @@ export default function OutputCell({
  }

  const normalizedOutput = modelOutput
-    ? provider.normalizeOutput(modelOutput.output as unknown as OutputSchema)
+    ? // @ts-expect-error TODO FIX ASAP
+      provider.normalizeOutput(modelOutput.output as unknown as OutputSchema)
    : streamedMessage
-    ? provider.normalizeOutput(streamedMessage)
+    ? // @ts-expect-error TODO FIX ASAP
+      provider.normalizeOutput(streamedMessage)
    : null;

  if (modelOutput && normalizedOutput?.type === "json") {
--- a/src/env.mjs
+++ b/src/env.mjs
@@ -17,6 +17,7 @@ export const env = createEnv({
      .transform((val) => val.toLowerCase() === "true"),
    GITHUB_CLIENT_ID: z.string().min(1),
    GITHUB_CLIENT_SECRET: z.string().min(1),
+    REPLICATE_API_TOKEN: z.string().min(1),
  },

  /**
@@ -42,6 +43,7 @@ export const env = createEnv({
    NEXT_PUBLIC_SOCKET_URL: process.env.NEXT_PUBLIC_SOCKET_URL,
    GITHUB_CLIENT_ID: process.env.GITHUB_CLIENT_ID,
    GITHUB_CLIENT_SECRET: process.env.GITHUB_CLIENT_SECRET,
+    REPLICATE_API_TOKEN: process.env.REPLICATE_API_TOKEN,
  },
  /**
   * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation.
--- a/src/modelProviders/modelProviders.ts
+++ b/src/modelProviders/modelProviders.ts
@@ -1,7 +1,9 @@
 import openaiChatCompletion from "./openai-ChatCompletion";
+import replicateLlama2 from "./replicate-llama2";

 const modelProviders = {
  "openai/ChatCompletion": openaiChatCompletion,
+  "replicate/llama2": replicateLlama2,
 } as const;

 export default modelProviders;
--- a/src/modelProviders/modelProvidersFrontend.ts
+++ b/src/modelProviders/modelProvidersFrontend.ts
@@ -1,10 +1,14 @@
-import modelProviderFrontend from "./openai-ChatCompletion/frontend";
+import openaiChatCompletionFrontend from "./openai-ChatCompletion/frontend";
+import replicateLlama2Frontend from "./replicate-llama2/frontend";
+
+// TODO: make sure we get a typescript error if you forget to add a provider here

 // Keep attributes here that need to be accessible from the frontend. We can't
 // just include them in the default `modelProviders` object because it has some
 // transient dependencies that can only be imported on the server.
 const modelProvidersFrontend = {
-  "openai/ChatCompletion": modelProviderFrontend,
+  "openai/ChatCompletion": openaiChatCompletionFrontend,
+  "replicate/llama2": replicateLlama2Frontend,
 } as const;

 export default modelProvidersFrontend;
--- a/src/modelProviders/replicate-llama2/frontend.ts
+++ b/src/modelProviders/replicate-llama2/frontend.ts
@@ -0,0 +1,13 @@
+import { type ReplicateLlama2Provider } from ".";
+import { type ModelProviderFrontend } from "../types";
+
+const modelProviderFrontend: ModelProviderFrontend<ReplicateLlama2Provider> = {
+  normalizeOutput: (output) => {
+    return {
+      type: "text",
+      value: output.join(""),
+    };
+  },
+};
+
+export default modelProviderFrontend;
--- a/src/modelProviders/replicate-llama2/getCompletion.ts
+++ b/src/modelProviders/replicate-llama2/getCompletion.ts
@@ -0,0 +1,62 @@
+import { env } from "~/env.mjs";
+import { type ReplicateLlama2Input, type ReplicateLlama2Output } from ".";
+import { type CompletionResponse } from "../types";
+import Replicate from "replicate";
+
+const replicate = new Replicate({
+  auth: env.REPLICATE_API_TOKEN || "",
+});
+
+const modelIds: Record<ReplicateLlama2Input["model"], string> = {
+  "7b-chat": "3725a659b5afff1a0ba9bead5fac3899d998feaad00e07032ca2b0e35eb14f8a",
+  "13b-chat": "5c785d117c5bcdd1928d5a9acb1ffa6272d6cf13fcb722e90886a0196633f9d3",
+  "70b-chat": "e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48",
+};
+
+export async function getCompletion(
+  input: ReplicateLlama2Input,
+  onStream: ((partialOutput: string[]) => void) | null,
+): Promise<CompletionResponse<ReplicateLlama2Output>> {
+  const start = Date.now();
+
+  const { model, stream, ...rest } = input;
+
+  try {
+    const prediction = await replicate.predictions.create({
+      version: modelIds[model],
+      input: rest,
+    });
+
+    console.log("stream?", onStream);
+
+    const interval = onStream
+      ? // eslint-disable-next-line @typescript-eslint/no-misused-promises
+        setInterval(async () => {
+          const partialPrediction = await replicate.predictions.get(prediction.id);
+
+          if (partialPrediction.output) onStream(partialPrediction.output as ReplicateLlama2Output);
+        }, 500)
+      : null;
+
+    const resp = await replicate.wait(prediction, {});
+    if (interval) clearInterval(interval);
+
+    const timeToComplete = Date.now() - start;
+
+    if (resp.error) throw new Error(resp.error as string);
+
+    return {
+      type: "success",
+      statusCode: 200,
+      value: resp.output as ReplicateLlama2Output,
+      timeToComplete,
+    };
+  } catch (error: unknown) {
+    console.error("ERROR IS", error);
+    return {
+      type: "error",
+      message: (error as Error).message,
+      autoRetry: true,
+    };
+  }
+}
--- a/src/modelProviders/replicate-llama2/index.ts
+++ b/src/modelProviders/replicate-llama2/index.ts
@@ -0,0 +1,74 @@
+import { type ModelProvider } from "../types";
+import { getCompletion } from "./getCompletion";
+
+const supportedModels = ["7b-chat", "13b-chat", "70b-chat"] as const;
+
+type SupportedModel = (typeof supportedModels)[number];
+
+export type ReplicateLlama2Input = {
+  model: SupportedModel;
+  prompt: string;
+  stream?: boolean;
+  max_length?: number;
+  temperature?: number;
+  top_p?: number;
+  repetition_penalty?: number;
+  debug?: boolean;
+};
+
+export type ReplicateLlama2Output = string[];
+
+export type ReplicateLlama2Provider = ModelProvider<
+  SupportedModel,
+  ReplicateLlama2Input,
+  ReplicateLlama2Output
+>;
+
+const modelProvider: ReplicateLlama2Provider = {
+  name: "OpenAI ChatCompletion",
+  models: {
+    "7b-chat": {},
+    "13b-chat": {},
+    "70b-chat": {},
+  },
+  getModel: (input) => {
+    if (supportedModels.includes(input.model)) return input.model;
+
+    return null;
+  },
+  inputSchema: {
+    type: "object",
+    properties: {
+      model: {
+        type: "string",
+        enum: supportedModels as unknown as string[],
+      },
+      prompt: {
+        type: "string",
+      },
+      stream: {
+        type: "boolean",
+      },
+      max_length: {
+        type: "number",
+      },
+      temperature: {
+        type: "number",
+      },
+      top_p: {
+        type: "number",
+      },
+      repetition_penalty: {
+        type: "number",
+      },
+      debug: {
+        type: "boolean",
+      },
+    },
+    required: ["model", "prompt"],
+  },
+  shouldStream: (input) => input.stream ?? false,
+  getCompletion,
+};
+
+export default modelProvider;
--- a/src/modelProviders/types.ts
+++ b/src/modelProviders/types.ts
@@ -2,8 +2,8 @@ import { type JSONSchema4 } from "json-schema";
 import { type JsonValue } from "type-fest";

 type ModelProviderModel = {
-  name: string;
-  learnMore: string;
+  name?: string;
+  learnMore?: string;
 };

 export type CompletionResponse<T> =
--- a/src/server/api/routers/experiments.router.ts
+++ b/src/server/api/routers/experiments.router.ts
@@ -109,8 +109,7 @@ export const experimentsRouter = createTRPCRouter({
          constructFn: dedent`
          /**
           * Use Javascript to define an OpenAI chat completion
-           * (https://platform.openai.com/docs/api-reference/chat/create) and
-           * assign it to the \`prompt\` variable.
+           * (https://platform.openai.com/docs/api-reference/chat/create).
           *
           * You have access to the current scenario in the \`scenario\`
           * variable.
--- a/src/server/scripts/replicate-test.ts
+++ b/src/server/scripts/replicate-test.ts
@@ -1,26 +1,26 @@
-// /* eslint-disable */
+/* eslint-disable */

-// import "dotenv/config";
-// import Replicate from "replicate";
+import "dotenv/config";
+import Replicate from "replicate";

-// const replicate = new Replicate({
-//   auth: process.env.REPLICATE_API_TOKEN || "",
-// });
+const replicate = new Replicate({
+  auth: process.env.REPLICATE_API_TOKEN || "",
+});

-// console.log("going to run");
-// const prediction = await replicate.predictions.create({
-//   version: "e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48",
-//   input: {
-//     prompt: "...",
-//   },
-// });
+console.log("going to run");
+const prediction = await replicate.predictions.create({
+  version: "3725a659b5afff1a0ba9bead5fac3899d998feaad00e07032ca2b0e35eb14f8a",
+  input: {
+    prompt: "...",
+  },
+});

-// console.log("waiting");
-// setInterval(() => {
-//   replicate.predictions.get(prediction.id).then((prediction) => {
-//     console.log(prediction.output);
-//   });
-// }, 500);
-// // const output = await replicate.wait(prediction, {});
+console.log("waiting");
+setInterval(() => {
+  replicate.predictions.get(prediction.id).then((prediction) => {
+    console.log(prediction);
+  });
+}, 500);
+// const output = await replicate.wait(prediction, {});

-// // console.log(output);
+// console.log(output);
--- a/src/server/tasks/queryLLM.task.ts
+++ b/src/server/tasks/queryLLM.task.ts
@@ -99,6 +99,7 @@ export const queryLLM = defineTask<queryLLMJob>("queryLLM", async (task) => {

  const provider = modelProviders[prompt.modelProvider];

+  // @ts-expect-error TODO FIX ASAP
  const streamingChannel = provider.shouldStream(prompt.modelInput) ? generateChannel() : null;

  if (streamingChannel) {
@@ -115,6 +116,8 @@ export const queryLLM = defineTask<queryLLMJob>("queryLLM", async (task) => {
    : null;

  for (let i = 0; true; i++) {
+    // @ts-expect-error TODO FIX ASAP
+
    const response = await provider.getCompletion(prompt.modelInput, onStream);
    if (response.type === "success") {
      const inputHash = hashPrompt(prompt);
--- a/src/server/utils/parseConstructFn.ts
+++ b/src/server/utils/parseConstructFn.ts
@@ -70,6 +70,7 @@ export default async function parseConstructFn(
    // We've validated the JSON schema so this should be safe
    const input = prompt.input as Parameters<(typeof provider)["getModel"]>[0];

+    // @ts-expect-error TODO FIX ASAP
    const model = provider.getModel(input);
    if (!model) {
      return {
@@ -79,6 +80,8 @@ export default async function parseConstructFn(

    return {
      modelProvider: prompt.modelProvider as keyof typeof modelProviders,
+      // @ts-expect-error TODO FIX ASAP
+
      model,
      modelInput: input,
    };