Better streaming

- Always stream the visible scenarios, if the modelProvider supports it - Never stream the invisible scenarios Also actually runs our query tasks in a background worker, which we weren't quite doing before.
2023-07-24 18:34:30 -07:00
parent d6b97b29f7
commit e1cbeccb90
25 changed files with 152 additions and 153 deletions
--- a/src/modelProviders/replicate-llama2/index.ts
+++ b/src/modelProviders/replicate-llama2/index.ts
@@ -9,7 +9,6 @@ export type SupportedModel = (typeof supportedModels)[number];
 export type ReplicateLlama2Input = {
  model: SupportedModel;
  prompt: string;
-  stream?: boolean;
  max_length?: number;
  temperature?: number;
  top_p?: number;
@@ -47,10 +46,6 @@ const modelProvider: ReplicateLlama2Provider = {
        type: "string",
        description: "Prompt to send to Llama v2.",
      },
-      stream: {
-        type: "boolean",
-        description: "Whether to stream output from Llama v2.",
-      },
      max_new_tokens: {
        type: "number",
        description:
@@ -78,7 +73,7 @@ const modelProvider: ReplicateLlama2Provider = {
    },
    required: ["model", "prompt"],
  },
-  shouldStream: (input) => input.stream ?? false,
+  canStream: true,
  getCompletion,
  ...frontendModelProvider,
 };