Better streaming

- Always stream the visible scenarios, if the modelProvider supports it
 - Never stream the invisible scenarios

Also actually runs our query tasks in a background worker, which we weren't quite doing before.
This commit is contained in:
Kyle Corbitt
2023-07-24 18:34:30 -07:00
parent d6b97b29f7
commit e1cbeccb90
25 changed files with 152 additions and 153 deletions

View File

@@ -9,7 +9,6 @@ export type SupportedModel = (typeof supportedModels)[number];
export type ReplicateLlama2Input = {
model: SupportedModel;
prompt: string;
stream?: boolean;
max_length?: number;
temperature?: number;
top_p?: number;
@@ -47,10 +46,6 @@ const modelProvider: ReplicateLlama2Provider = {
type: "string",
description: "Prompt to send to Llama v2.",
},
stream: {
type: "boolean",
description: "Whether to stream output from Llama v2.",
},
max_new_tokens: {
type: "number",
description:
@@ -78,7 +73,7 @@ const modelProvider: ReplicateLlama2Provider = {
},
required: ["model", "prompt"],
},
shouldStream: (input) => input.stream ?? false,
canStream: true,
getCompletion,
...frontendModelProvider,
};