diff --git a/app/.env.example b/app/.env.example
index 33ba9ae..4376905 100644
--- a/app/.env.example
+++ b/app/.env.example
@@ -40,3 +40,8 @@ SMTP_HOST="placeholder"
SMTP_PORT="placeholder"
SMTP_LOGIN="placeholder"
SMTP_PASSWORD="placeholder"
+
+# Azure credentials are necessary for uploading large training data files
+AZURE_STORAGE_ACCOUNT_NAME="placeholder"
+AZURE_STORAGE_ACCOUNT_KEY="placeholder"
+AZURE_STORAGE_CONTAINER_NAME="placeholder"
diff --git a/app/package.json b/app/package.json
index 7a2a3b9..821277a 100644
--- a/app/package.json
+++ b/app/package.json
@@ -26,6 +26,8 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.5.8",
"@apidevtools/json-schema-ref-parser": "^10.1.0",
+ "@azure/identity": "^3.3.0",
+ "@azure/storage-blob": "12.15.0",
"@babel/standalone": "^7.22.9",
"@chakra-ui/anatomy": "^2.2.0",
"@chakra-ui/next-js": "^2.1.4",
@@ -69,6 +71,7 @@
"jsonschema": "^1.4.1",
"kysely": "^0.26.1",
"kysely-codegen": "^0.10.1",
+ "llama-tokenizer-js": "^1.1.3",
"lodash-es": "^4.17.21",
"lucide-react": "^0.265.0",
"marked": "^7.0.3",
diff --git a/app/prisma/migrations/20230907120707_add_dataset_file_upload/migration.sql b/app/prisma/migrations/20230907120707_add_dataset_file_upload/migration.sql
new file mode 100644
index 0000000..8a2e6a0
--- /dev/null
+++ b/app/prisma/migrations/20230907120707_add_dataset_file_upload/migration.sql
@@ -0,0 +1,23 @@
+-- CreateEnum
+CREATE TYPE "DatasetFileUploadStatus" AS ENUM ('PENDING', 'DOWNLOADING', 'PROCESSING', 'SAVING', 'COMPLETE', 'ERROR');
+
+-- CreateTable
+CREATE TABLE "DatasetFileUpload" (
+ "id" UUID NOT NULL,
+ "datasetId" UUID NOT NULL,
+ "blobName" TEXT NOT NULL,
+ "fileName" TEXT NOT NULL,
+ "fileSize" INTEGER NOT NULL,
+ "progress" INTEGER NOT NULL DEFAULT 0,
+ "status" "DatasetFileUploadStatus" NOT NULL DEFAULT 'PENDING',
+ "uploadedAt" TIMESTAMP(3) NOT NULL,
+ "visible" BOOLEAN NOT NULL DEFAULT true,
+ "errorMessage" TEXT,
+ "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ "updatedAt" TIMESTAMP(3) NOT NULL,
+
+ CONSTRAINT "DatasetFileUpload_pkey" PRIMARY KEY ("id")
+);
+
+-- AddForeignKey
+ALTER TABLE "DatasetFileUpload" ADD CONSTRAINT "DatasetFileUpload_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset"("id") ON DELETE CASCADE ON UPDATE CASCADE;
diff --git a/app/prisma/schema.prisma b/app/prisma/schema.prisma
index feaf327..d378a8a 100644
--- a/app/prisma/schema.prisma
+++ b/app/prisma/schema.prisma
@@ -176,12 +176,41 @@ model OutputEvaluation {
@@unique([modelResponseId, evaluationId])
}
+
+enum DatasetFileUploadStatus {
+ PENDING
+ DOWNLOADING
+ PROCESSING
+ SAVING
+ COMPLETE
+ ERROR
+}
+
+model DatasetFileUpload {
+ id String @id @default(uuid()) @db.Uuid
+
+ datasetId String @db.Uuid
+ dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
+ blobName String
+ fileName String
+ fileSize Int
+ progress Int @default(0) // Percentage
+ status DatasetFileUploadStatus @default(PENDING)
+ uploadedAt DateTime
+ visible Boolean @default(true)
+ errorMessage String?
+
+ createdAt DateTime @default(now())
+ updatedAt DateTime @updatedAt
+}
+
model Dataset {
id String @id @default(uuid()) @db.Uuid
name String
datasetEntries DatasetEntry[]
fineTunes FineTune[]
+ datasetFileUploads DatasetFileUpload[]
trainingRatio Float @default(0.8)
projectId String @db.Uuid
diff --git a/app/src/components/datasets/FileUploadCard.tsx b/app/src/components/datasets/FileUploadCard.tsx
new file mode 100644
index 0000000..4d48cd2
--- /dev/null
+++ b/app/src/components/datasets/FileUploadCard.tsx
@@ -0,0 +1,61 @@
+import { VStack, HStack, Button, Text, Card, Progress, IconButton } from "@chakra-ui/react";
+import { BsX } from "react-icons/bs";
+
+import { type RouterOutputs, api } from "~/utils/api";
+import { useHandledAsyncCallback } from "~/utils/hooks";
+import { formatFileSize } from "~/utils/utils";
+
+type FileUpload = RouterOutputs["datasets"]["listFileUploads"][0];
+
+const FileUploadCard = ({ fileUpload }: { fileUpload: FileUpload }) => {
+ const { id, fileName, fileSize, progress, status, errorMessage } = fileUpload;
+
+ const utils = api.useContext();
+
+ const hideFileUploadMutation = api.datasets.hideFileUpload.useMutation();
+ const [hideFileUpload, hidingInProgress] = useHandledAsyncCallback(async () => {
+ await hideFileUploadMutation.mutateAsync({ fileUploadId: id });
+ await utils.datasets.listFileUploads.invalidate();
+ }, [id, hideFileUploadMutation, utils]);
+
+ const [refreshDatasetEntries] = useHandledAsyncCallback(async () => {
+ await utils.datasetEntries.list.invalidate();
+ }, [utils]);
+
+ return (
+
+
+
+
+ Uploading {fileName} ({formatFileSize(fileSize, 2)})
+
+
+ {status === "COMPLETE" && (
+
+ )}
+
+
+
+
+
+ {errorMessage ? errorMessage : `${status} (${progress}%)`}
+
+
+
+
+
+ );
+};
+
+export default FileUploadCard;
diff --git a/app/src/components/datasets/ImportDataButton.tsx b/app/src/components/datasets/ImportDataButton.tsx
index e179982..ea6d82c 100644
--- a/app/src/components/datasets/ImportDataButton.tsx
+++ b/app/src/components/datasets/ImportDataButton.tsx
@@ -1,4 +1,4 @@
-import { useState, useEffect, useRef } from "react";
+import { useState, useEffect, useRef, useCallback } from "react";
import {
Modal,
ModalOverlay,
@@ -16,13 +16,15 @@ import {
useDisclosure,
type UseDisclosureReturn,
} from "@chakra-ui/react";
+import pluralize from "pluralize";
import { AiOutlineCloudUpload, AiOutlineFile } from "react-icons/ai";
import { useDataset, useHandledAsyncCallback } from "~/utils/hooks";
import { api } from "~/utils/api";
import ActionButton from "../ActionButton";
import { validateTrainingRows, type TrainingRow, parseJSONL } from "./validateTrainingRows";
-import pluralize from "pluralize";
+import { uploadDatasetEntryFile } from "~/utils/azure/website";
+import { formatFileSize } from "~/utils/utils";
const ImportDataButton = () => {
const disclosure = useDisclosure();
@@ -48,6 +50,7 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
const [validationError, setValidationError] = useState(null);
const [trainingRows, setTrainingRows] = useState(null);
+ const [file, setFile] = useState(null);
const fileInputRef = useRef(null);
@@ -67,6 +70,14 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
};
const processFile = (file: File) => {
+ setFile(file);
+
+ // skip reading if file is larger than 10MB
+ if (file.size > 10000000) {
+ setTrainingRows(null);
+ return;
+ }
+
const reader = new FileReader();
reader.onload = (e: ProgressEvent) => {
const content = e.target?.result as string;
@@ -83,7 +94,6 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
setTrainingRows(parsedJSONL);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
- console.log("e is", e);
setValidationError("Unable to parse JSONL file: " + (e.message as string));
setTrainingRows(null);
return;
@@ -92,28 +102,38 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
reader.readAsText(file);
};
+ const resetState = useCallback(() => {
+ setValidationError(null);
+ setTrainingRows(null);
+ setFile(null);
+ }, [setValidationError, setTrainingRows, setFile]);
+
useEffect(() => {
if (disclosure.isOpen) {
- setTrainingRows(null);
- setValidationError(null);
+ resetState();
}
- }, [disclosure.isOpen]);
+ }, [disclosure.isOpen, resetState]);
+
+ const triggerFileDownloadMutation = api.datasets.triggerFileDownload.useMutation();
const utils = api.useContext();
- const sendJSONLMutation = api.datasetEntries.create.useMutation();
-
const [sendJSONL, sendingInProgress] = useHandledAsyncCallback(async () => {
- if (!dataset || !trainingRows) return;
+ if (!dataset || !file) return;
- await sendJSONLMutation.mutateAsync({
+ const blobName = await uploadDatasetEntryFile(file);
+
+ await triggerFileDownloadMutation.mutateAsync({
datasetId: dataset.id,
- jsonl: JSON.stringify(trainingRows),
+ blobName,
+ fileName: file.name,
+ fileSize: file.size,
});
- await utils.datasetEntries.list.invalidate();
+ await utils.datasets.listFileUploads.invalidate();
+
disclosure.onClose();
- }, [dataset, trainingRows, sendJSONLMutation]);
+ }, [dataset, trainingRows, triggerFileDownloadMutation, file, utils]);
return (
@@ -127,7 +147,28 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
- {!trainingRows && !validationError && (
+ {validationError && (
+
+
+
+
+ Error
+
+ {validationError}
+
+
+ Try again
+
+
+ )}
+ {!validationError && !file && (
)}
- {validationError && (
-
-
-
-
- Error
-
- {validationError}
-
- setValidationError(null)}
- >
- Try again
-
-
- )}
- {trainingRows && !validationError && (
+ {!validationError && file && (
-
- Success
-
-
- We'll upload {trainingRows.length}{" "}
- {pluralize("row", trainingRows.length)} into {dataset?.name}.{" "}
-
+ {trainingRows ? (
+ <>
+
+ Success
+
+
+ We'll upload {trainingRows.length}{" "}
+ {pluralize("row", trainingRows.length)} into {dataset?.name}.{" "}
+
+ >
+ ) : (
+ <>
+
+ {file.name}
+
+ {formatFileSize(file.size)}
+ >
+ )}
color="gray.500"
_hover={{ color: "orange.400" }}
cursor="pointer"
- onClick={() => setTrainingRows(null)}
+ onClick={resetState}
>
Change file
@@ -224,7 +255,7 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
onClick={sendJSONL}
isLoading={sendingInProgress}
minW={24}
- isDisabled={!trainingRows || !!validationError}
+ isDisabled={!file || !!validationError}
>
Upload
diff --git a/app/src/env.mjs b/app/src/env.mjs
index e47966b..c4a00eb 100644
--- a/app/src/env.mjs
+++ b/app/src/env.mjs
@@ -26,6 +26,9 @@ export const env = createEnv({
SMTP_PORT: z.string().default("placeholder"),
SMTP_LOGIN: z.string().default("placeholder"),
SMTP_PASSWORD: z.string().default("placeholder"),
+ AZURE_STORAGE_ACCOUNT_NAME: z.string().default("placeholder"),
+ AZURE_STORAGE_ACCOUNT_KEY: z.string().default("placeholder"),
+ AZURE_STORAGE_CONTAINER_NAME: z.string().default("placeholder"),
WORKER_CONCURRENCY: z
.string()
.default("10")
@@ -72,6 +75,9 @@ export const env = createEnv({
SMTP_PORT: process.env.SMTP_PORT,
SMTP_LOGIN: process.env.SMTP_LOGIN,
SMTP_PASSWORD: process.env.SMTP_PASSWORD,
+ AZURE_STORAGE_ACCOUNT_NAME: process.env.AZURE_STORAGE_ACCOUNT_NAME,
+ AZURE_STORAGE_ACCOUNT_KEY: process.env.AZURE_STORAGE_ACCOUNT_KEY,
+ AZURE_STORAGE_CONTAINER_NAME: process.env.AZURE_STORAGE_CONTAINER_NAME,
WORKER_CONCURRENCY: process.env.WORKER_CONCURRENCY,
WORKER_MAX_POOL_SIZE: process.env.WORKER_MAX_POOL_SIZE,
},
diff --git a/app/src/pages/datasets/[id].tsx b/app/src/pages/datasets/[id].tsx
index bb88f86..afe3b92 100644
--- a/app/src/pages/datasets/[id].tsx
+++ b/app/src/pages/datasets/[id].tsx
@@ -28,6 +28,7 @@ import ExperimentButton from "~/components/datasets/ExperimentButton";
import ImportDataButton from "~/components/datasets/ImportDataButton";
import DownloadButton from "~/components/datasets/ExportButton";
import DeleteButton from "~/components/datasets/DeleteButton";
+import FileUploadCard from "~/components/datasets/FileUploadCard";
export default function Dataset() {
const utils = api.useContext();
@@ -40,6 +41,19 @@ export default function Dataset() {
setName(dataset.data?.name || "");
}, [dataset.data?.name]);
+ const [fileUploadsRefetchInterval, setFileUploadsRefetchInterval] = useState(500);
+ const fileUploads = api.datasets.listFileUploads.useQuery(
+ { datasetId: dataset.data?.id as string },
+ { enabled: !!dataset.data?.id, refetchInterval: fileUploadsRefetchInterval },
+ );
+ useEffect(() => {
+ if (fileUploads?.data?.some((fu) => fu.status !== "COMPLETE" && fu.status !== "ERROR")) {
+ setFileUploadsRefetchInterval(500);
+ } else {
+ setFileUploadsRefetchInterval(0);
+ }
+ }, [fileUploads]);
+
useEffect(() => {
useAppStore.getState().sharedArgumentsEditor.loadMonaco().catch(console.error);
}, []);
@@ -101,6 +115,13 @@ export default function Dataset() {
+
+
+ {fileUploads?.data?.map((upload) => (
+
+ ))}
+
+
diff --git a/app/src/server/api/routers/datasetEntries.router.ts b/app/src/server/api/routers/datasetEntries.router.ts
index aa8c459..b0baaca 100644
--- a/app/src/server/api/routers/datasetEntries.router.ts
+++ b/app/src/server/api/routers/datasetEntries.router.ts
@@ -1,4 +1,3 @@
-import { type Prisma } from "@prisma/client";
import { z } from "zod";
import { v4 as uuidv4 } from "uuid";
import {
@@ -7,18 +6,18 @@ import {
type CreateChatCompletionRequestMessage,
} from "openai/resources/chat";
import { TRPCError } from "@trpc/server";
-import { shuffle } from "lodash-es";
import archiver from "archiver";
+import { WritableStreamBuffer } from "stream-buffers";
import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc";
import { prisma } from "~/server/db";
import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl";
import { error, success } from "~/utils/errorHandling/standardResponses";
import { countOpenAIChatTokens } from "~/utils/countTokens";
-import { type TrainingRow, validateTrainingRows } from "~/components/datasets/validateTrainingRows";
+import { type TrainingRow } from "~/components/datasets/validateTrainingRows";
import hashObject from "~/server/utils/hashObject";
import { type JsonValue } from "type-fest";
-import { WritableStreamBuffer } from "stream-buffers";
+import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows";
export const datasetEntriesRouter = createTRPCRouter({
list: protectedProcedure
@@ -100,7 +99,6 @@ export const datasetEntriesRouter = createTRPCRouter({
})
.optional(),
loggedCallIds: z.string().array().optional(),
- jsonl: z.string().optional(),
}),
)
.mutation(async ({ input, ctx }) => {
@@ -121,104 +119,48 @@ export const datasetEntriesRouter = createTRPCRouter({
return error("No datasetId or newDatasetParams provided");
}
- if (!input.loggedCallIds && !input.jsonl) {
- return error("No loggedCallIds or jsonl provided");
+ if (!input.loggedCallIds) {
+ return error("No loggedCallIds provided");
}
- let trainingRows: TrainingRow[];
-
- if (input.loggedCallIds) {
- const loggedCalls = await prisma.loggedCall.findMany({
- where: {
- id: {
- in: input.loggedCallIds,
- },
- modelResponse: {
- isNot: null,
+ const loggedCalls = await prisma.loggedCall.findMany({
+ where: {
+ id: {
+ in: input.loggedCallIds,
+ },
+ modelResponse: {
+ isNot: null,
+ },
+ },
+ include: {
+ modelResponse: {
+ select: {
+ reqPayload: true,
+ respPayload: true,
+ inputTokens: true,
+ outputTokens: true,
},
},
- include: {
- modelResponse: {
- select: {
- reqPayload: true,
- respPayload: true,
- inputTokens: true,
- outputTokens: true,
- },
- },
- },
- orderBy: { createdAt: "desc" },
- });
+ },
+ orderBy: { createdAt: "desc" },
+ });
- trainingRows = loggedCalls.map((loggedCall) => {
- const inputMessages = (
- loggedCall.modelResponse?.reqPayload as unknown as CompletionCreateParams
- ).messages;
- let output: ChatCompletion.Choice.Message | undefined = undefined;
- const resp = loggedCall.modelResponse?.respPayload as unknown as
- | ChatCompletion
- | undefined;
- if (resp && resp.choices?.[0]) {
- output = resp.choices[0].message;
- }
- return {
- input: inputMessages as unknown as CreateChatCompletionRequestMessage[],
- output: output as unknown as CreateChatCompletionRequestMessage,
- };
- });
- } else {
- trainingRows = JSON.parse(input.jsonl as string) as TrainingRow[];
- const validationError = validateTrainingRows(trainingRows);
- if (validationError) {
- return error(`Invalid JSONL: ${validationError}`);
+ const trainingRows = loggedCalls.map((loggedCall) => {
+ const inputMessages = (
+ loggedCall.modelResponse?.reqPayload as unknown as CompletionCreateParams
+ ).messages;
+ let output: ChatCompletion.Choice.Message | undefined = undefined;
+ const resp = loggedCall.modelResponse?.respPayload as unknown as ChatCompletion | undefined;
+ if (resp && resp.choices?.[0]) {
+ output = resp.choices[0].message;
}
- }
+ return {
+ input: inputMessages as unknown as CreateChatCompletionRequestMessage[],
+ output: output as unknown as CreateChatCompletionRequestMessage,
+ };
+ });
- const [existingTrainingCount, existingTestingCount] = await prisma.$transaction([
- prisma.datasetEntry.count({
- where: {
- datasetId,
- type: "TRAIN",
- },
- }),
- prisma.datasetEntry.count({
- where: {
- datasetId,
- type: "TEST",
- },
- }),
- ]);
-
- const newTotalEntries = existingTrainingCount + existingTestingCount + trainingRows.length;
- const numTrainingToAdd = Math.floor(trainingRatio * newTotalEntries) - existingTrainingCount;
- const numTestingToAdd = trainingRows.length - numTrainingToAdd;
- const typesToAssign = shuffle([
- ...Array(numTrainingToAdd).fill("TRAIN"),
- ...Array(numTestingToAdd).fill("TEST"),
- ]);
- const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = [];
- for (const row of trainingRows) {
- let outputTokens = 0;
- if (row.output) {
- outputTokens = countOpenAIChatTokens("gpt-4-0613", [
- row.output as unknown as ChatCompletion.Choice.Message,
- ]);
- }
- datasetEntriesToCreate.push({
- datasetId: datasetId,
- input: row.input as unknown as Prisma.InputJsonValue,
- output: (row.output as unknown as Prisma.InputJsonValue) ?? {
- role: "assistant",
- content: "",
- },
- inputTokens: countOpenAIChatTokens(
- "gpt-4-0613",
- row.input as unknown as CreateChatCompletionRequestMessage[],
- ),
- outputTokens,
- type: typesToAssign.pop() as "TRAIN" | "TEST",
- });
- }
+ const datasetEntriesToCreate = await formatEntriesFromTrainingRows(datasetId, trainingRows);
// Ensure dataset and dataset entries are created atomically
await prisma.$transaction([
@@ -239,7 +181,6 @@ export const datasetEntriesRouter = createTRPCRouter({
return success(datasetId);
}),
-
update: protectedProcedure
.input(
z.object({
diff --git a/app/src/server/api/routers/datasets.router.ts b/app/src/server/api/routers/datasets.router.ts
index 596717e..af86a8e 100644
--- a/app/src/server/api/routers/datasets.router.ts
+++ b/app/src/server/api/routers/datasets.router.ts
@@ -1,8 +1,11 @@
import { z } from "zod";
+
import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc";
import { prisma } from "~/server/db";
import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl";
import { success } from "~/utils/errorHandling/standardResponses";
+import { generateServiceClientUrl } from "~/utils/azure/server";
+import { queueImportDatasetEntries } from "~/server/tasks/importDatasetEntries.task";
export const datasetsRouter = createTRPCRouter({
get: protectedProcedure.input(z.object({ id: z.string() })).query(async ({ input, ctx }) => {
@@ -94,4 +97,73 @@ export const datasetsRouter = createTRPCRouter({
return success("Dataset deleted");
}),
+ getServiceClientUrl: protectedProcedure
+ .input(z.object({ projectId: z.string() }))
+ .query(async ({ input, ctx }) => {
+ // The user must at least be authenticated to get a SAS token
+ await requireCanModifyProject(input.projectId, ctx);
+ return generateServiceClientUrl();
+ }),
+ triggerFileDownload: protectedProcedure
+ .input(
+ z.object({
+ datasetId: z.string(),
+ blobName: z.string(),
+ fileName: z.string(),
+ fileSize: z.number(),
+ }),
+ )
+ .mutation(async ({ input, ctx }) => {
+ const { projectId } = await prisma.dataset.findUniqueOrThrow({
+ where: { id: input.datasetId },
+ });
+ await requireCanViewProject(projectId, ctx);
+
+ const { id } = await prisma.datasetFileUpload.create({
+ data: {
+ datasetId: input.datasetId,
+ blobName: input.blobName,
+ status: "PENDING",
+ fileName: input.fileName,
+ fileSize: input.fileSize,
+ uploadedAt: new Date(),
+ },
+ });
+
+ await queueImportDatasetEntries(id);
+ }),
+ listFileUploads: protectedProcedure
+ .input(z.object({ datasetId: z.string() }))
+ .query(async ({ input, ctx }) => {
+ const { projectId } = await prisma.dataset.findUniqueOrThrow({
+ where: { id: input.datasetId },
+ });
+ await requireCanViewProject(projectId, ctx);
+
+ return await prisma.datasetFileUpload.findMany({
+ where: {
+ datasetId: input.datasetId,
+ visible: true,
+ },
+ orderBy: { createdAt: "desc" },
+ });
+ }),
+ hideFileUpload: protectedProcedure
+ .input(z.object({ fileUploadId: z.string() }))
+ .mutation(async ({ input, ctx }) => {
+ const { datasetId } = await prisma.datasetFileUpload.findUniqueOrThrow({
+ where: { id: input.fileUploadId },
+ });
+ const { projectId } = await prisma.dataset.findUniqueOrThrow({
+ where: { id: datasetId },
+ });
+ await requireCanModifyProject(projectId, ctx);
+
+ await prisma.datasetFileUpload.update({
+ where: { id: input.fileUploadId },
+ data: {
+ visible: false,
+ },
+ });
+ }),
});
diff --git a/app/src/server/tasks/importDatasetEntries.task.ts b/app/src/server/tasks/importDatasetEntries.task.ts
new file mode 100644
index 0000000..81059e3
--- /dev/null
+++ b/app/src/server/tasks/importDatasetEntries.task.ts
@@ -0,0 +1,132 @@
+import { type DatasetFileUpload } from "@prisma/client";
+import { prisma } from "~/server/db";
+import defineTask from "./defineTask";
+import { downloadBlobToString } from "~/utils/azure/server";
+import {
+ type TrainingRow,
+ validateTrainingRows,
+ parseJSONL,
+} from "~/components/datasets/validateTrainingRows";
+import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows";
+
+export type ImportDatasetEntriesJob = {
+ datasetFileUploadId: string;
+};
+
+export const importDatasetEntries = defineTask(
+ "importDatasetEntries",
+ async (task) => {
+ const { datasetFileUploadId } = task;
+ const datasetFileUpload = await prisma.datasetFileUpload.findUnique({
+ where: { id: datasetFileUploadId },
+ });
+ if (!datasetFileUpload) {
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ errorMessage: "Dataset File Upload not found",
+ status: "ERROR",
+ },
+ });
+ return;
+ }
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ status: "DOWNLOADING",
+ progress: 5,
+ },
+ });
+
+ const jsonlStr = await downloadBlobToString(datasetFileUpload.blobName);
+ const trainingRows = parseJSONL(jsonlStr) as TrainingRow[];
+ const validationError = validateTrainingRows(trainingRows);
+ if (validationError) {
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ errorMessage: `Invalid JSONL: ${validationError}`,
+ status: "ERROR",
+ },
+ });
+ return;
+ }
+
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ status: "PROCESSING",
+ progress: 30,
+ },
+ });
+
+ const updatePromises: Promise[] = [];
+
+ const updateCallback = async (progress: number) => {
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ progress: 30 + Math.floor((progress / trainingRows.length) * 69),
+ },
+ });
+ };
+
+ let datasetEntriesToCreate;
+ try {
+ datasetEntriesToCreate = await formatEntriesFromTrainingRows(
+ datasetFileUpload.datasetId,
+ trainingRows,
+ updateCallback,
+ 500,
+ );
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ } catch (e: any) {
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ errorMessage: `Error formatting rows: ${e.message as string}`,
+ status: "ERROR",
+ },
+ });
+ return;
+ }
+
+ await Promise.all(updatePromises);
+
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ status: "SAVING",
+ progress: 99,
+ },
+ });
+
+ await prisma.datasetEntry.createMany({
+ data: datasetEntriesToCreate,
+ });
+
+ await prisma.datasetFileUpload.update({
+ where: { id: datasetFileUploadId },
+ data: {
+ status: "COMPLETE",
+ progress: 100,
+ },
+ });
+ },
+);
+
+export const queueImportDatasetEntries = async (datasetFileUploadId: string) => {
+ await Promise.all([
+ prisma.datasetFileUpload.update({
+ where: {
+ id: datasetFileUploadId,
+ },
+ data: {
+ errorMessage: null,
+ status: "PENDING",
+ },
+ }),
+
+ importDatasetEntries.enqueue({ datasetFileUploadId }),
+ ]);
+};
diff --git a/app/src/server/tasks/worker.ts b/app/src/server/tasks/worker.ts
index 1daff35..326c505 100644
--- a/app/src/server/tasks/worker.ts
+++ b/app/src/server/tasks/worker.ts
@@ -5,10 +5,11 @@ import "../../../sentry.server.config";
import { env } from "~/env.mjs";
import { queryModel } from "./queryModel.task";
import { runNewEval } from "./runNewEval.task";
+import { importDatasetEntries } from "./importDatasetEntries.task";
console.log("Starting worker");
-const registeredTasks = [queryModel, runNewEval];
+const registeredTasks = [queryModel, runNewEval, importDatasetEntries];
const taskList = registeredTasks.reduce((acc, task) => {
acc[task.task.identifier] = task.task.handler;
diff --git a/app/src/server/utils/createEntriesFromTrainingRows.ts b/app/src/server/utils/createEntriesFromTrainingRows.ts
new file mode 100644
index 0000000..7b4197a
--- /dev/null
+++ b/app/src/server/utils/createEntriesFromTrainingRows.ts
@@ -0,0 +1,70 @@
+import { type Prisma } from "@prisma/client";
+import { shuffle } from "lodash-es";
+import {
+ type CreateChatCompletionRequestMessage,
+ type ChatCompletion,
+} from "openai/resources/chat";
+
+import { prisma } from "~/server/db";
+import { type TrainingRow } from "~/components/datasets/validateTrainingRows";
+import { countLlamaChatTokens } from "~/utils/countTokens";
+
+export const formatEntriesFromTrainingRows = async (
+ datasetId: string,
+ trainingRows: TrainingRow[],
+ updateCallback?: (progress: number) => Promise,
+ updateFrequency = 1000,
+) => {
+ const [dataset, existingTrainingCount, existingTestingCount] = await prisma.$transaction([
+ prisma.dataset.findUnique({ where: { id: datasetId } }),
+ prisma.datasetEntry.count({
+ where: {
+ datasetId,
+ type: "TRAIN",
+ },
+ }),
+ prisma.datasetEntry.count({
+ where: {
+ datasetId,
+ type: "TEST",
+ },
+ }),
+ ]);
+
+ const trainingRatio = dataset?.trainingRatio ?? 0.8;
+
+ const newTotalEntries = existingTrainingCount + existingTestingCount + trainingRows.length;
+ const numTrainingToAdd = Math.floor(trainingRatio * newTotalEntries) - existingTrainingCount;
+ const numTestingToAdd = trainingRows.length - numTrainingToAdd;
+ const typesToAssign = shuffle([
+ ...Array(numTrainingToAdd).fill("TRAIN"),
+ ...Array(numTestingToAdd).fill("TEST"),
+ ]);
+ const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = [];
+ let i = 0;
+ for (const row of trainingRows) {
+ // console.log(row);
+ if (updateCallback && i % updateFrequency === 0) await updateCallback(i);
+ let outputTokens = 0;
+ if (row.output) {
+ outputTokens = countLlamaChatTokens([row.output as unknown as ChatCompletion.Choice.Message]);
+ }
+ // console.log("outputTokens", outputTokens);
+ datasetEntriesToCreate.push({
+ datasetId: datasetId,
+ input: row.input as unknown as Prisma.InputJsonValue,
+ output: (row.output as unknown as Prisma.InputJsonValue) ?? {
+ role: "assistant",
+ content: "",
+ },
+ inputTokens: countLlamaChatTokens(
+ row.input as unknown as CreateChatCompletionRequestMessage[],
+ ),
+ outputTokens,
+ type: typesToAssign.pop() as "TRAIN" | "TEST",
+ });
+ i++;
+ }
+
+ return datasetEntriesToCreate;
+};
diff --git a/app/src/utils/azure/server.ts b/app/src/utils/azure/server.ts
new file mode 100644
index 0000000..e82b03c
--- /dev/null
+++ b/app/src/utils/azure/server.ts
@@ -0,0 +1,71 @@
+import {
+ BlobServiceClient,
+ generateAccountSASQueryParameters,
+ AccountSASPermissions,
+ AccountSASServices,
+ AccountSASResourceTypes,
+ StorageSharedKeyCredential,
+ SASProtocol,
+} from "@azure/storage-blob";
+import { DefaultAzureCredential } from "@azure/identity";
+
+const accountName = process.env.AZURE_STORAGE_ACCOUNT_NAME;
+if (!accountName) throw Error("Azure Storage accountName not found");
+const accountKey = process.env.AZURE_STORAGE_ACCOUNT_KEY;
+if (!accountKey) throw Error("Azure Storage accountKey not found");
+const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
+if (!containerName) throw Error("Azure Storage containerName not found");
+
+const sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey);
+
+const blobServiceClient = new BlobServiceClient(
+ `https://${accountName}.blob.core.windows.net`,
+ new DefaultAzureCredential(),
+);
+
+const containerClient = blobServiceClient.getContainerClient(containerName);
+
+export const generateServiceClientUrl = () => {
+ const sasOptions = {
+ services: AccountSASServices.parse("b").toString(), // blobs
+ resourceTypes: AccountSASResourceTypes.parse("sco").toString(), // service, container, object
+ permissions: AccountSASPermissions.parse("w"), // write permissions
+ protocol: SASProtocol.Https,
+ startsOn: new Date(),
+ expiresOn: new Date(new Date().valueOf() + 10 * 60 * 1000), // 10 minutes
+ };
+ let sasToken = generateAccountSASQueryParameters(sasOptions, sharedKeyCredential).toString();
+
+ // remove leading "?"
+ sasToken = sasToken[0] === "?" ? sasToken.substring(1) : sasToken;
+ return {
+ serviceClientUrl: `https://${accountName}.blob.core.windows.net?${sasToken}`,
+ containerName,
+ };
+};
+
+export async function downloadBlobToString(blobName: string) {
+ const blobClient = containerClient.getBlobClient(blobName);
+
+ const downloadResponse = await blobClient.download();
+
+ if (!downloadResponse) throw Error("error downloading blob");
+ if (!downloadResponse.readableStreamBody)
+ throw Error("downloadResponse.readableStreamBody not found");
+
+ const downloaded = await streamToBuffer(downloadResponse.readableStreamBody);
+ return downloaded.toString();
+}
+
+async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise {
+ return new Promise((resolve, reject) => {
+ const chunks: Uint8Array[] = [];
+ readableStream.on("data", (data: ArrayBuffer) => {
+ chunks.push(data instanceof Buffer ? data : Buffer.from(data));
+ });
+ readableStream.on("end", () => {
+ resolve(Buffer.concat(chunks));
+ });
+ readableStream.on("error", reject);
+ });
+}
diff --git a/app/src/utils/azure/website.ts b/app/src/utils/azure/website.ts
new file mode 100644
index 0000000..c64024d
--- /dev/null
+++ b/app/src/utils/azure/website.ts
@@ -0,0 +1,30 @@
+import { BlobServiceClient } from "@azure/storage-blob";
+import { v4 as uuidv4 } from "uuid";
+
+import { useAppStore } from "~/state/store";
+
+export const uploadDatasetEntryFile = async (file: File) => {
+ const { selectedProjectId: projectId, api } = useAppStore.getState();
+ if (!projectId) throw Error("projectId not found");
+ if (!api) throw Error("api not initialized");
+ const { serviceClientUrl, containerName } = await api.client.datasets.getServiceClientUrl.query({
+ projectId,
+ });
+
+ const blobServiceClient = new BlobServiceClient(serviceClientUrl);
+ // create container client
+ const containerClient = blobServiceClient.getContainerClient(containerName);
+
+ // base name without extension
+ const basename = file.name.split("/").pop()?.split(".").shift();
+ if (!basename) throw Error("basename not found");
+
+ const blobName = `${basename}-${uuidv4()}.jsonl`;
+ // create blob client
+ const blobClient = containerClient.getBlockBlobClient(blobName);
+
+ // upload file
+ await blobClient.uploadData(file);
+
+ return blobName;
+};
diff --git a/app/src/utils/countTokens.ts b/app/src/utils/countTokens.ts
index 2a4ff14..8181bb1 100644
--- a/app/src/utils/countTokens.ts
+++ b/app/src/utils/countTokens.ts
@@ -1,5 +1,7 @@
import { type ChatCompletion } from "openai/resources/chat";
import { GPTTokens } from "gpt-tokens";
+import llamaTokenizer from "llama-tokenizer-js";
+
import { type SupportedModel } from "~/modelProviders/openai-ChatCompletion";
interface GPTTokensMessageItem {
@@ -22,3 +24,11 @@ export const countOpenAIChatTokens = (
messages: reformattedMessages as unknown as GPTTokensMessageItem[],
}).usedTokens;
};
+
+export const countLlamaChatTokens = (messages: ChatCompletion.Choice.Message[]) => {
+ const stringToTokenize = messages
+ .map((message) => message.content || JSON.stringify(message.function_call))
+ .join("\n");
+ const tokens = llamaTokenizer.encode(stringToTokenize);
+ return tokens.length;
+};
diff --git a/app/src/utils/utils.ts b/app/src/utils/utils.ts
index 6688013..0c51be8 100644
--- a/app/src/utils/utils.ts
+++ b/app/src/utils/utils.ts
@@ -52,3 +52,18 @@ export const parseableToFunctionCall = (str: string) => {
return true;
};
+
+export const formatFileSize = (bytes: number, decimals = 2) => {
+ if (bytes === 0) return "0 Bytes";
+
+ const k = 1024;
+ const dm = decimals < 0 ? 0 : decimals;
+ const sizes = ["Bytes", "KB", "MB", "GB", "TB"];
+
+ for (const size of sizes) {
+ if (bytes < k) return `${parseFloat(bytes.toFixed(dm))} ${size}`;
+ bytes /= k;
+ }
+
+ return "> 1024 TB";
+};
diff --git a/app/tsconfig.json b/app/tsconfig.json
index 32c81d2..039b1d2 100644
--- a/app/tsconfig.json
+++ b/app/tsconfig.json
@@ -19,7 +19,9 @@
"baseUrl": ".",
"paths": {
"~/*": ["./src/*"]
- }
+ },
+ "typeRoots": ["./types", "./node_modules/@types"],
+ "types": ["llama-tokenizer-js", "node"]
},
"include": [
".eslintrc.cjs",
diff --git a/app/types/llama-tokenizer-js/index.d.ts b/app/types/llama-tokenizer-js/index.d.ts
new file mode 100644
index 0000000..f96a650
--- /dev/null
+++ b/app/types/llama-tokenizer-js/index.d.ts
@@ -0,0 +1,4 @@
+declare module "llama-tokenizer-js" {
+ export function encode(input: string): number[];
+ export function decode(input: number[]): string;
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index e958ffe..50e334d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -14,6 +14,12 @@ importers:
'@apidevtools/json-schema-ref-parser':
specifier: ^10.1.0
version: 10.1.0
+ '@azure/identity':
+ specifier: ^3.3.0
+ version: 3.3.0
+ '@azure/storage-blob':
+ specifier: 12.15.0
+ version: 12.15.0
'@babel/standalone':
specifier: ^7.22.9
version: 7.22.9
@@ -143,6 +149,9 @@ importers:
kysely-codegen:
specifier: ^0.10.1
version: 0.10.1(kysely@0.26.1)(pg@8.11.2)
+ llama-tokenizer-js:
+ specifier: ^1.1.3
+ version: 1.1.3
lodash-es:
specifier: ^4.17.21
version: 4.17.21
@@ -465,6 +474,184 @@ packages:
js-yaml: 4.1.0
dev: true
+ /@azure/abort-controller@1.1.0:
+ resolution: {integrity: sha512-TrRLIoSQVzfAJX9H1JeFjzAoDGcoK1IYX1UImfceTZpsyYfWr09Ss1aHW1y5TrrR3iq6RZLBwJ3E24uwPhwahw==}
+ engines: {node: '>=12.0.0'}
+ dependencies:
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/core-auth@1.5.0:
+ resolution: {integrity: sha512-udzoBuYG1VBoHVohDTrvKjyzel34zt77Bhp7dQntVGGD0ehVq48owENbBG8fIgkHRNUBQH5k1r0hpoMu5L8+kw==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ '@azure/core-util': 1.4.0
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/core-client@1.7.3:
+ resolution: {integrity: sha512-kleJ1iUTxcO32Y06dH9Pfi9K4U+Tlb111WXEnbt7R/ne+NLRwppZiTGJuTD5VVoxTMK5NTbEtm5t2vcdNCFe2g==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ '@azure/core-auth': 1.5.0
+ '@azure/core-rest-pipeline': 1.12.0
+ '@azure/core-tracing': 1.0.1
+ '@azure/core-util': 1.4.0
+ '@azure/logger': 1.0.4
+ tslib: 2.6.1
+ transitivePeerDependencies:
+ - supports-color
+ dev: false
+
+ /@azure/core-http@3.0.3:
+ resolution: {integrity: sha512-QMib3wXotJMFhHgmJBPUF9YsyErw34H0XDFQd9CauH7TPB+RGcyl9Ayy7iURtJB04ngXhE6YwrQsWDXlSLrilg==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ '@azure/core-auth': 1.5.0
+ '@azure/core-tracing': 1.0.0-preview.13
+ '@azure/core-util': 1.4.0
+ '@azure/logger': 1.0.4
+ '@types/node-fetch': 2.6.4
+ '@types/tunnel': 0.0.3
+ form-data: 4.0.0
+ node-fetch: 2.6.12(encoding@0.1.13)
+ process: 0.11.10
+ tslib: 2.6.1
+ tunnel: 0.0.6
+ uuid: 8.3.2
+ xml2js: 0.5.0
+ transitivePeerDependencies:
+ - encoding
+ dev: false
+
+ /@azure/core-lro@2.5.4:
+ resolution: {integrity: sha512-3GJiMVH7/10bulzOKGrrLeG/uCBH/9VtxqaMcB9lIqAeamI/xYQSHJL/KcsLDuH+yTjYpro/u6D/MuRe4dN70Q==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ '@azure/core-util': 1.4.0
+ '@azure/logger': 1.0.4
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/core-paging@1.5.0:
+ resolution: {integrity: sha512-zqWdVIt+2Z+3wqxEOGzR5hXFZ8MGKK52x4vFLw8n58pR6ZfKRx3EXYTxTaYxYHc/PexPUTyimcTWFJbji9Z6Iw==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/core-rest-pipeline@1.12.0:
+ resolution: {integrity: sha512-+MnSB0vGZjszSzr5AW8z93/9fkDu2RLtWmAN8gskURq7EW2sSwqy8jZa0V26rjuBVkwhdA3Hw8z3VWoeBUOw+A==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ '@azure/core-auth': 1.5.0
+ '@azure/core-tracing': 1.0.1
+ '@azure/core-util': 1.4.0
+ '@azure/logger': 1.0.4
+ form-data: 4.0.0
+ http-proxy-agent: 5.0.0
+ https-proxy-agent: 5.0.1
+ tslib: 2.6.1
+ transitivePeerDependencies:
+ - supports-color
+ dev: false
+
+ /@azure/core-tracing@1.0.0-preview.13:
+ resolution: {integrity: sha512-KxDlhXyMlh2Jhj2ykX6vNEU0Vou4nHr025KoSEiz7cS3BNiHNaZcdECk/DmLkEB0as5T7b/TpRcehJ5yV6NeXQ==}
+ engines: {node: '>=12.0.0'}
+ dependencies:
+ '@opentelemetry/api': 1.4.1
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/core-tracing@1.0.1:
+ resolution: {integrity: sha512-I5CGMoLtX+pI17ZdiFJZgxMJApsK6jjfm85hpgp3oazCdq5Wxgh4wMr7ge/TTWW1B5WBuvIOI1fMU/FrOAMKrw==}
+ engines: {node: '>=12.0.0'}
+ dependencies:
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/core-util@1.4.0:
+ resolution: {integrity: sha512-eGAyJpm3skVQoLiRqm/xPa+SXi/NPDdSHMxbRAz2lSprd+Zs+qrpQGQQ2VQ3Nttu+nSZR4XoYQC71LbEI7jsig==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/identity@3.3.0:
+ resolution: {integrity: sha512-gISa/dAAxrWt6F2WiDXZY0y2xY4MLlN2wkNW4cPuq5OgPQKLSkxLc4I2WR04puTfZyQZnpXbAapAMEj1b96fgg==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ '@azure/core-auth': 1.5.0
+ '@azure/core-client': 1.7.3
+ '@azure/core-rest-pipeline': 1.12.0
+ '@azure/core-tracing': 1.0.1
+ '@azure/core-util': 1.4.0
+ '@azure/logger': 1.0.4
+ '@azure/msal-browser': 2.38.2
+ '@azure/msal-common': 13.3.0
+ '@azure/msal-node': 1.18.3
+ events: 3.3.0
+ jws: 4.0.0
+ open: 8.4.2
+ stoppable: 1.1.0
+ tslib: 2.6.1
+ uuid: 8.3.2
+ transitivePeerDependencies:
+ - supports-color
+ dev: false
+
+ /@azure/logger@1.0.4:
+ resolution: {integrity: sha512-ustrPY8MryhloQj7OWGe+HrYx+aoiOxzbXTtgblbV3xwCqpzUK36phH3XNHQKj3EPonyFUuDTfR3qFhTEAuZEg==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ tslib: 2.6.1
+ dev: false
+
+ /@azure/msal-browser@2.38.2:
+ resolution: {integrity: sha512-71BeIn2we6LIgMplwCSaMq5zAwmalyJR3jFcVOZxNVfQ1saBRwOD+P77nLs5vrRCedVKTq8RMFhIOdpMLNno0A==}
+ engines: {node: '>=0.8.0'}
+ dependencies:
+ '@azure/msal-common': 13.3.0
+ dev: false
+
+ /@azure/msal-common@13.3.0:
+ resolution: {integrity: sha512-/VFWTicjcJbrGp3yQP7A24xU95NiDMe23vxIU1U6qdRPFsprMDNUohMudclnd+WSHE4/McqkZs/nUU3sAKkVjg==}
+ engines: {node: '>=0.8.0'}
+ dev: false
+
+ /@azure/msal-node@1.18.3:
+ resolution: {integrity: sha512-lI1OsxNbS/gxRD4548Wyj22Dk8kS7eGMwD9GlBZvQmFV8FJUXoXySL1BiNzDsHUE96/DS/DHmA+F73p1Dkcktg==}
+ engines: {node: 10 || 12 || 14 || 16 || 18}
+ dependencies:
+ '@azure/msal-common': 13.3.0
+ jsonwebtoken: 9.0.2
+ uuid: 8.3.2
+ dev: false
+
+ /@azure/storage-blob@12.15.0:
+ resolution: {integrity: sha512-e7JBKLOFi0QVJqqLzrjx1eL3je3/Ug2IQj24cTM9b85CsnnFjLGeGjJVIjbGGZaytewiCEG7r3lRwQX7fKj0/w==}
+ engines: {node: '>=14.0.0'}
+ dependencies:
+ '@azure/abort-controller': 1.1.0
+ '@azure/core-http': 3.0.3
+ '@azure/core-lro': 2.5.4
+ '@azure/core-paging': 1.5.0
+ '@azure/core-tracing': 1.0.0-preview.13
+ '@azure/logger': 1.0.4
+ events: 3.3.0
+ tslib: 2.6.1
+ transitivePeerDependencies:
+ - encoding
+ dev: false
+
/@babel/code-frame@7.22.10:
resolution: {integrity: sha512-/KKIMG4UEL35WmI9OlvMhurwtytjvXoFcGNrOvyG9zIzA8YmPjVtIZUf7b05+TPO7G7/GEmLHDaoCgACHl9hhA==}
engines: {node: '>=6.9.0'}
@@ -2602,6 +2789,11 @@ packages:
openapi-typescript: 5.4.1
dev: true
+ /@opentelemetry/api@1.4.1:
+ resolution: {integrity: sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==}
+ engines: {node: '>=8.0.0'}
+ dev: false
+
/@panva/hkdf@1.1.1:
resolution: {integrity: sha512-dhPeilub1NuIG0X5Kvhh9lH4iW3ZsHlnzwgwbOlgwQ2wG1IqFzsgHqmKPk3WzsdWAeaxKJxgM0+W433RmN45GA==}
dev: false
@@ -2916,6 +3108,11 @@ packages:
use-sync-external-store: 1.2.0(react@18.2.0)
dev: false
+ /@tootallnate/once@2.0.0:
+ resolution: {integrity: sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==}
+ engines: {node: '>= 10'}
+ dev: false
+
/@trpc/client@10.26.0(@trpc/server@10.26.0):
resolution: {integrity: sha512-ojHxQFIE97rBEGPK8p1ijbzo0T1IdEBoJ9fFSgWWL9FMuEEA/DNQ9s0uuiOrDKhCCdTFT1unfRharoJhB2/O2w==}
peerDependencies:
@@ -3333,6 +3530,12 @@ packages:
resolution: {integrity: sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==}
dev: false
+ /@types/tunnel@0.0.3:
+ resolution: {integrity: sha512-sOUTGn6h1SfQ+gbgqC364jLFBw2lnFqkgF3q0WovEHRLMrVD1sd5aufqi/aJObLekJO+Aq5z646U4Oxy6shXMA==}
+ dependencies:
+ '@types/node': 20.4.10
+ dev: false
+
/@types/unist@2.0.7:
resolution: {integrity: sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==}
dev: false
@@ -4102,6 +4305,10 @@ packages:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
dev: false
+ /buffer-equal-constant-time@1.0.1:
+ resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
+ dev: false
+
/buffer-from@0.1.2:
resolution: {integrity: sha512-RiWIenusJsmI2KcvqQABB83tLxCByE3upSP8QU3rJDMVFGPWLvPQJt/O1Su9moRWeH7d+Q2HYb68f6+v+tw2vg==}
dev: false
@@ -4707,6 +4914,11 @@ packages:
resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==}
dev: true
+ /define-lazy-prop@2.0.0:
+ resolution: {integrity: sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==}
+ engines: {node: '>=8'}
+ dev: false
+
/define-properties@1.2.0:
resolution: {integrity: sha512-xvqAVKGfT1+UAvPwKTVw/njhdQ8ZhXK4lI0bCIuCMrp2up9nPnaDftrLtmpTazqd1o+UY4zgzU+avtMbDP+ldA==}
engines: {node: '>= 0.4'}
@@ -4818,6 +5030,12 @@ packages:
safer-buffer: 2.1.2
dev: false
+ /ecdsa-sig-formatter@1.0.11:
+ resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==}
+ dependencies:
+ safe-buffer: 5.2.1
+ dev: false
+
/ee-first@1.1.1:
resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
dev: false
@@ -6061,6 +6279,17 @@ packages:
toidentifier: 1.0.1
dev: false
+ /http-proxy-agent@5.0.0:
+ resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==}
+ engines: {node: '>= 6'}
+ dependencies:
+ '@tootallnate/once': 2.0.0
+ agent-base: 6.0.2
+ debug: 4.3.4
+ transitivePeerDependencies:
+ - supports-color
+ dev: false
+
/http-signature@1.2.0:
resolution: {integrity: sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ==}
engines: {node: '>=0.8', npm: '>=1.3.7'}
@@ -6256,6 +6485,12 @@ packages:
resolution: {integrity: sha512-RGdriMmQQvZ2aqaQq3awNA6dCGtKpiDFcOzrTWrDAT2MiWrKQVPmxLGHl7Y2nNu6led0kEyoX0enY0qXYsv9zw==}
dev: false
+ /is-docker@2.2.1:
+ resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==}
+ engines: {node: '>=8'}
+ hasBin: true
+ dev: false
+
/is-extglob@2.1.1:
resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==}
engines: {node: '>=0.10.0'}
@@ -6370,6 +6605,13 @@ packages:
engines: {node: '>=12.13'}
dev: false
+ /is-wsl@2.2.0:
+ resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==}
+ engines: {node: '>=8'}
+ dependencies:
+ is-docker: 2.2.1
+ dev: false
+
/isarray@0.0.1:
resolution: {integrity: sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==}
dev: false
@@ -6399,7 +6641,7 @@ packages:
resolution: {integrity: sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==}
engines: {node: '>= 10.13.0'}
dependencies:
- '@types/node': 20.4.10
+ '@types/node': 18.16.0
merge-stream: 2.0.0
supports-color: 8.1.1
@@ -6514,6 +6756,22 @@ packages:
resolution: {integrity: sha512-S6cATIPVv1z0IlxdN+zUk5EPjkGCdnhN4wVSBlvoUO1tOLJootbo9CquNJmbIh4yikWHiUedhRYrNPn1arpEmQ==}
dev: false
+ /jsonwebtoken@9.0.2:
+ resolution: {integrity: sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==}
+ engines: {node: '>=12', npm: '>=6'}
+ dependencies:
+ jws: 3.2.2
+ lodash.includes: 4.3.0
+ lodash.isboolean: 3.0.3
+ lodash.isinteger: 4.0.4
+ lodash.isnumber: 3.0.3
+ lodash.isplainobject: 4.0.6
+ lodash.isstring: 4.0.1
+ lodash.once: 4.1.1
+ ms: 2.1.3
+ semver: 7.5.4
+ dev: false
+
/jsprim@1.4.2:
resolution: {integrity: sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==}
engines: {node: '>=0.6.0'}
@@ -6534,6 +6792,36 @@ packages:
object.values: 1.1.6
dev: true
+ /jwa@1.4.1:
+ resolution: {integrity: sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==}
+ dependencies:
+ buffer-equal-constant-time: 1.0.1
+ ecdsa-sig-formatter: 1.0.11
+ safe-buffer: 5.2.1
+ dev: false
+
+ /jwa@2.0.0:
+ resolution: {integrity: sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==}
+ dependencies:
+ buffer-equal-constant-time: 1.0.1
+ ecdsa-sig-formatter: 1.0.11
+ safe-buffer: 5.2.1
+ dev: false
+
+ /jws@3.2.2:
+ resolution: {integrity: sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==}
+ dependencies:
+ jwa: 1.4.1
+ safe-buffer: 5.2.1
+ dev: false
+
+ /jws@4.0.0:
+ resolution: {integrity: sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==}
+ dependencies:
+ jwa: 2.0.0
+ safe-buffer: 5.2.1
+ dev: false
+
/kysely-codegen@0.10.1(kysely@0.26.1)(pg@8.11.2):
resolution: {integrity: sha512-8Bslh952gN5gtucRv4jTZDFD18RBioS6M50zHfe5kwb5iSyEAunU4ZYMdHzkHraa4zxjg5/183XlOryBCXLRIw==}
hasBin: true
@@ -6605,6 +6893,10 @@ packages:
resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
dev: false
+ /llama-tokenizer-js@1.1.3:
+ resolution: {integrity: sha512-+BUgsLCXVQJkjiD/t7PdESLn+yXJIRX/BJfwzVVYfKZ9aN3gsP9xoadBZxKnCxGz2Slby+S7x41gUr2TKNaS4Q==}
+ dev: false
+
/loader-runner@4.3.0:
resolution: {integrity: sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==}
engines: {node: '>=6.11.5'}
@@ -6660,10 +6952,30 @@ packages:
resolution: {integrity: sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==}
dev: false
+ /lodash.includes@4.3.0:
+ resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==}
+ dev: false
+
+ /lodash.isboolean@3.0.3:
+ resolution: {integrity: sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==}
+ dev: false
+
+ /lodash.isinteger@4.0.4:
+ resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==}
+ dev: false
+
+ /lodash.isnumber@3.0.3:
+ resolution: {integrity: sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==}
+ dev: false
+
/lodash.isplainobject@4.0.6:
resolution: {integrity: sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==}
dev: false
+ /lodash.isstring@4.0.1:
+ resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==}
+ dev: false
+
/lodash.merge@4.6.2:
resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==}
dev: true
@@ -6672,6 +6984,10 @@ packages:
resolution: {integrity: sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ==}
dev: false
+ /lodash.once@4.1.1:
+ resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==}
+ dev: false
+
/lodash.union@4.6.0:
resolution: {integrity: sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==}
dev: false
@@ -7177,6 +7493,15 @@ packages:
dependencies:
wrappy: 1.0.2
+ /open@8.4.2:
+ resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
+ engines: {node: '>=12'}
+ dependencies:
+ define-lazy-prop: 2.0.0
+ is-docker: 2.2.1
+ is-wsl: 2.2.0
+ dev: false
+
/openai@3.3.0:
resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==}
dependencies:
@@ -7627,6 +7952,11 @@ packages:
resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==}
dev: false
+ /process@0.11.10:
+ resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
+ engines: {node: '>= 0.6.0'}
+ dev: false
+
/progress@2.0.3:
resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==}
engines: {node: '>=0.4.0'}
@@ -8272,6 +8602,10 @@ packages:
yoga-wasm-web: 0.3.3
dev: false
+ /sax@1.2.4:
+ resolution: {integrity: sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==}
+ dev: false
+
/scheduler@0.23.0:
resolution: {integrity: sha512-CtuThmgHNg7zIZWAXi3AsyIzA3n4xx7aNyjwC2VJldO2LMVDhFK+63xGqq6CsJH4rTAt6/M+N4GhZiDYPx9eUw==}
dependencies:
@@ -8296,7 +8630,6 @@ packages:
hasBin: true
dependencies:
lru-cache: 6.0.0
- dev: true
/send@0.18.0:
resolution: {integrity: sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==}
@@ -8504,6 +8837,11 @@ packages:
resolution: {integrity: sha512-Rz6yejtVyWnVjC1RFvNmYL10kgjC49EOghxWn0RFqlCHGFpQx+Xe7yW3I4ceK1SGrWIGMjD5Kbue8W/udkbMJg==}
dev: true
+ /stoppable@1.1.0:
+ resolution: {integrity: sha512-KXDYZ9dszj6bzvnEMRYvxgeTHU74QBFL54XKtP3nyMuJ81CFYtABZ3bAzL2EdFUaEwJOBOgENyFj3R7oTzDyyw==}
+ engines: {node: '>=4', npm: '>=6'}
+ dev: false
+
/stream-buffers@3.0.2:
resolution: {integrity: sha512-DQi1h8VEBA/lURbSwFtEHnSTb9s2/pwLEaFuNhXwy1Dx3Sa0lOuYT2yNUr4/j2fs8oCAMANtrZ5OrPZtyVs3MQ==}
engines: {node: '>= 0.10.0'}
@@ -8876,6 +9214,11 @@ packages:
safe-buffer: 5.2.1
dev: false
+ /tunnel@0.0.6:
+ resolution: {integrity: sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==}
+ engines: {node: '>=0.6.11 <=0.7.0 || >=0.7.3'}
+ dev: false
+
/tweetnacl@0.14.5:
resolution: {integrity: sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==}
dev: false
@@ -9464,6 +9807,19 @@ packages:
optional: true
dev: false
+ /xml2js@0.5.0:
+ resolution: {integrity: sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==}
+ engines: {node: '>=4.0.0'}
+ dependencies:
+ sax: 1.2.4
+ xmlbuilder: 11.0.1
+ dev: false
+
+ /xmlbuilder@11.0.1:
+ resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
+ engines: {node: '>=4.0'}
+ dev: false
+
/xmlhttprequest-ssl@2.0.0:
resolution: {integrity: sha512-QKxVRxiRACQcVuQEYFsI1hhkrMlrXHPegbbd1yn9UHOmRxY+si12nQYzri3vbzt8VdTTRviqcKxcyllFas5z2A==}
engines: {node: '>=0.4.0'}