diff --git a/app/.env.example b/app/.env.example index 33ba9ae..4376905 100644 --- a/app/.env.example +++ b/app/.env.example @@ -40,3 +40,8 @@ SMTP_HOST="placeholder" SMTP_PORT="placeholder" SMTP_LOGIN="placeholder" SMTP_PASSWORD="placeholder" + +# Azure credentials are necessary for uploading large training data files +AZURE_STORAGE_ACCOUNT_NAME="placeholder" +AZURE_STORAGE_ACCOUNT_KEY="placeholder" +AZURE_STORAGE_CONTAINER_NAME="placeholder" diff --git a/app/package.json b/app/package.json index 7a2a3b9..821277a 100644 --- a/app/package.json +++ b/app/package.json @@ -26,6 +26,8 @@ "dependencies": { "@anthropic-ai/sdk": "^0.5.8", "@apidevtools/json-schema-ref-parser": "^10.1.0", + "@azure/identity": "^3.3.0", + "@azure/storage-blob": "12.15.0", "@babel/standalone": "^7.22.9", "@chakra-ui/anatomy": "^2.2.0", "@chakra-ui/next-js": "^2.1.4", @@ -69,6 +71,7 @@ "jsonschema": "^1.4.1", "kysely": "^0.26.1", "kysely-codegen": "^0.10.1", + "llama-tokenizer-js": "^1.1.3", "lodash-es": "^4.17.21", "lucide-react": "^0.265.0", "marked": "^7.0.3", diff --git a/app/prisma/migrations/20230907120707_add_dataset_file_upload/migration.sql b/app/prisma/migrations/20230907120707_add_dataset_file_upload/migration.sql new file mode 100644 index 0000000..8a2e6a0 --- /dev/null +++ b/app/prisma/migrations/20230907120707_add_dataset_file_upload/migration.sql @@ -0,0 +1,23 @@ +-- CreateEnum +CREATE TYPE "DatasetFileUploadStatus" AS ENUM ('PENDING', 'DOWNLOADING', 'PROCESSING', 'SAVING', 'COMPLETE', 'ERROR'); + +-- CreateTable +CREATE TABLE "DatasetFileUpload" ( + "id" UUID NOT NULL, + "datasetId" UUID NOT NULL, + "blobName" TEXT NOT NULL, + "fileName" TEXT NOT NULL, + "fileSize" INTEGER NOT NULL, + "progress" INTEGER NOT NULL DEFAULT 0, + "status" "DatasetFileUploadStatus" NOT NULL DEFAULT 'PENDING', + "uploadedAt" TIMESTAMP(3) NOT NULL, + "visible" BOOLEAN NOT NULL DEFAULT true, + "errorMessage" TEXT, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "DatasetFileUpload_pkey" PRIMARY KEY ("id") +); + +-- AddForeignKey +ALTER TABLE "DatasetFileUpload" ADD CONSTRAINT "DatasetFileUpload_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/app/prisma/schema.prisma b/app/prisma/schema.prisma index feaf327..d378a8a 100644 --- a/app/prisma/schema.prisma +++ b/app/prisma/schema.prisma @@ -176,12 +176,41 @@ model OutputEvaluation { @@unique([modelResponseId, evaluationId]) } + +enum DatasetFileUploadStatus { + PENDING + DOWNLOADING + PROCESSING + SAVING + COMPLETE + ERROR +} + +model DatasetFileUpload { + id String @id @default(uuid()) @db.Uuid + + datasetId String @db.Uuid + dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade) + blobName String + fileName String + fileSize Int + progress Int @default(0) // Percentage + status DatasetFileUploadStatus @default(PENDING) + uploadedAt DateTime + visible Boolean @default(true) + errorMessage String? + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt +} + model Dataset { id String @id @default(uuid()) @db.Uuid name String datasetEntries DatasetEntry[] fineTunes FineTune[] + datasetFileUploads DatasetFileUpload[] trainingRatio Float @default(0.8) projectId String @db.Uuid diff --git a/app/src/components/datasets/FileUploadCard.tsx b/app/src/components/datasets/FileUploadCard.tsx new file mode 100644 index 0000000..4d48cd2 --- /dev/null +++ b/app/src/components/datasets/FileUploadCard.tsx @@ -0,0 +1,61 @@ +import { VStack, HStack, Button, Text, Card, Progress, IconButton } from "@chakra-ui/react"; +import { BsX } from "react-icons/bs"; + +import { type RouterOutputs, api } from "~/utils/api"; +import { useHandledAsyncCallback } from "~/utils/hooks"; +import { formatFileSize } from "~/utils/utils"; + +type FileUpload = RouterOutputs["datasets"]["listFileUploads"][0]; + +const FileUploadCard = ({ fileUpload }: { fileUpload: FileUpload }) => { + const { id, fileName, fileSize, progress, status, errorMessage } = fileUpload; + + const utils = api.useContext(); + + const hideFileUploadMutation = api.datasets.hideFileUpload.useMutation(); + const [hideFileUpload, hidingInProgress] = useHandledAsyncCallback(async () => { + await hideFileUploadMutation.mutateAsync({ fileUploadId: id }); + await utils.datasets.listFileUploads.invalidate(); + }, [id, hideFileUploadMutation, utils]); + + const [refreshDatasetEntries] = useHandledAsyncCallback(async () => { + await utils.datasetEntries.list.invalidate(); + }, [utils]); + + return ( + + + + + Uploading {fileName} ({formatFileSize(fileSize, 2)}) + + + {status === "COMPLETE" && ( + + )} + + + + + + {errorMessage ? errorMessage : `${status} (${progress}%)`} + + + + + + ); +}; + +export default FileUploadCard; diff --git a/app/src/components/datasets/ImportDataButton.tsx b/app/src/components/datasets/ImportDataButton.tsx index e179982..ea6d82c 100644 --- a/app/src/components/datasets/ImportDataButton.tsx +++ b/app/src/components/datasets/ImportDataButton.tsx @@ -1,4 +1,4 @@ -import { useState, useEffect, useRef } from "react"; +import { useState, useEffect, useRef, useCallback } from "react"; import { Modal, ModalOverlay, @@ -16,13 +16,15 @@ import { useDisclosure, type UseDisclosureReturn, } from "@chakra-ui/react"; +import pluralize from "pluralize"; import { AiOutlineCloudUpload, AiOutlineFile } from "react-icons/ai"; import { useDataset, useHandledAsyncCallback } from "~/utils/hooks"; import { api } from "~/utils/api"; import ActionButton from "../ActionButton"; import { validateTrainingRows, type TrainingRow, parseJSONL } from "./validateTrainingRows"; -import pluralize from "pluralize"; +import { uploadDatasetEntryFile } from "~/utils/azure/website"; +import { formatFileSize } from "~/utils/utils"; const ImportDataButton = () => { const disclosure = useDisclosure(); @@ -48,6 +50,7 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => const [validationError, setValidationError] = useState(null); const [trainingRows, setTrainingRows] = useState(null); + const [file, setFile] = useState(null); const fileInputRef = useRef(null); @@ -67,6 +70,14 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => }; const processFile = (file: File) => { + setFile(file); + + // skip reading if file is larger than 10MB + if (file.size > 10000000) { + setTrainingRows(null); + return; + } + const reader = new FileReader(); reader.onload = (e: ProgressEvent) => { const content = e.target?.result as string; @@ -83,7 +94,6 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => setTrainingRows(parsedJSONL); // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (e: any) { - console.log("e is", e); setValidationError("Unable to parse JSONL file: " + (e.message as string)); setTrainingRows(null); return; @@ -92,28 +102,38 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => reader.readAsText(file); }; + const resetState = useCallback(() => { + setValidationError(null); + setTrainingRows(null); + setFile(null); + }, [setValidationError, setTrainingRows, setFile]); + useEffect(() => { if (disclosure.isOpen) { - setTrainingRows(null); - setValidationError(null); + resetState(); } - }, [disclosure.isOpen]); + }, [disclosure.isOpen, resetState]); + + const triggerFileDownloadMutation = api.datasets.triggerFileDownload.useMutation(); const utils = api.useContext(); - const sendJSONLMutation = api.datasetEntries.create.useMutation(); - const [sendJSONL, sendingInProgress] = useHandledAsyncCallback(async () => { - if (!dataset || !trainingRows) return; + if (!dataset || !file) return; - await sendJSONLMutation.mutateAsync({ + const blobName = await uploadDatasetEntryFile(file); + + await triggerFileDownloadMutation.mutateAsync({ datasetId: dataset.id, - jsonl: JSON.stringify(trainingRows), + blobName, + fileName: file.name, + fileSize: file.size, }); - await utils.datasetEntries.list.invalidate(); + await utils.datasets.listFileUploads.invalidate(); + disclosure.onClose(); - }, [dataset, trainingRows, sendJSONLMutation]); + }, [dataset, trainingRows, triggerFileDownloadMutation, file, utils]); return ( @@ -127,7 +147,28 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => - {!trainingRows && !validationError && ( + {validationError && ( + + + + + Error + + {validationError} + + + Try again + + + )} + {!validationError && !file && ( )} - {validationError && ( - - - - - Error - - {validationError} - - setValidationError(null)} - > - Try again - - - )} - {trainingRows && !validationError && ( + {!validationError && file && ( - - Success - - - We'll upload {trainingRows.length}{" "} - {pluralize("row", trainingRows.length)} into {dataset?.name}.{" "} - + {trainingRows ? ( + <> + + Success + + + We'll upload {trainingRows.length}{" "} + {pluralize("row", trainingRows.length)} into {dataset?.name}.{" "} + + + ) : ( + <> + + {file.name} + + {formatFileSize(file.size)} + + )} color="gray.500" _hover={{ color: "orange.400" }} cursor="pointer" - onClick={() => setTrainingRows(null)} + onClick={resetState} > Change file @@ -224,7 +255,7 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => onClick={sendJSONL} isLoading={sendingInProgress} minW={24} - isDisabled={!trainingRows || !!validationError} + isDisabled={!file || !!validationError} > Upload diff --git a/app/src/env.mjs b/app/src/env.mjs index e47966b..c4a00eb 100644 --- a/app/src/env.mjs +++ b/app/src/env.mjs @@ -26,6 +26,9 @@ export const env = createEnv({ SMTP_PORT: z.string().default("placeholder"), SMTP_LOGIN: z.string().default("placeholder"), SMTP_PASSWORD: z.string().default("placeholder"), + AZURE_STORAGE_ACCOUNT_NAME: z.string().default("placeholder"), + AZURE_STORAGE_ACCOUNT_KEY: z.string().default("placeholder"), + AZURE_STORAGE_CONTAINER_NAME: z.string().default("placeholder"), WORKER_CONCURRENCY: z .string() .default("10") @@ -72,6 +75,9 @@ export const env = createEnv({ SMTP_PORT: process.env.SMTP_PORT, SMTP_LOGIN: process.env.SMTP_LOGIN, SMTP_PASSWORD: process.env.SMTP_PASSWORD, + AZURE_STORAGE_ACCOUNT_NAME: process.env.AZURE_STORAGE_ACCOUNT_NAME, + AZURE_STORAGE_ACCOUNT_KEY: process.env.AZURE_STORAGE_ACCOUNT_KEY, + AZURE_STORAGE_CONTAINER_NAME: process.env.AZURE_STORAGE_CONTAINER_NAME, WORKER_CONCURRENCY: process.env.WORKER_CONCURRENCY, WORKER_MAX_POOL_SIZE: process.env.WORKER_MAX_POOL_SIZE, }, diff --git a/app/src/pages/datasets/[id].tsx b/app/src/pages/datasets/[id].tsx index bb88f86..afe3b92 100644 --- a/app/src/pages/datasets/[id].tsx +++ b/app/src/pages/datasets/[id].tsx @@ -28,6 +28,7 @@ import ExperimentButton from "~/components/datasets/ExperimentButton"; import ImportDataButton from "~/components/datasets/ImportDataButton"; import DownloadButton from "~/components/datasets/ExportButton"; import DeleteButton from "~/components/datasets/DeleteButton"; +import FileUploadCard from "~/components/datasets/FileUploadCard"; export default function Dataset() { const utils = api.useContext(); @@ -40,6 +41,19 @@ export default function Dataset() { setName(dataset.data?.name || ""); }, [dataset.data?.name]); + const [fileUploadsRefetchInterval, setFileUploadsRefetchInterval] = useState(500); + const fileUploads = api.datasets.listFileUploads.useQuery( + { datasetId: dataset.data?.id as string }, + { enabled: !!dataset.data?.id, refetchInterval: fileUploadsRefetchInterval }, + ); + useEffect(() => { + if (fileUploads?.data?.some((fu) => fu.status !== "COMPLETE" && fu.status !== "ERROR")) { + setFileUploadsRefetchInterval(500); + } else { + setFileUploadsRefetchInterval(0); + } + }, [fileUploads]); + useEffect(() => { useAppStore.getState().sharedArgumentsEditor.loadMonaco().catch(console.error); }, []); @@ -101,6 +115,13 @@ export default function Dataset() { + + + {fileUploads?.data?.map((upload) => ( + + ))} + + diff --git a/app/src/server/api/routers/datasetEntries.router.ts b/app/src/server/api/routers/datasetEntries.router.ts index aa8c459..b0baaca 100644 --- a/app/src/server/api/routers/datasetEntries.router.ts +++ b/app/src/server/api/routers/datasetEntries.router.ts @@ -1,4 +1,3 @@ -import { type Prisma } from "@prisma/client"; import { z } from "zod"; import { v4 as uuidv4 } from "uuid"; import { @@ -7,18 +6,18 @@ import { type CreateChatCompletionRequestMessage, } from "openai/resources/chat"; import { TRPCError } from "@trpc/server"; -import { shuffle } from "lodash-es"; import archiver from "archiver"; +import { WritableStreamBuffer } from "stream-buffers"; import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc"; import { prisma } from "~/server/db"; import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl"; import { error, success } from "~/utils/errorHandling/standardResponses"; import { countOpenAIChatTokens } from "~/utils/countTokens"; -import { type TrainingRow, validateTrainingRows } from "~/components/datasets/validateTrainingRows"; +import { type TrainingRow } from "~/components/datasets/validateTrainingRows"; import hashObject from "~/server/utils/hashObject"; import { type JsonValue } from "type-fest"; -import { WritableStreamBuffer } from "stream-buffers"; +import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows"; export const datasetEntriesRouter = createTRPCRouter({ list: protectedProcedure @@ -100,7 +99,6 @@ export const datasetEntriesRouter = createTRPCRouter({ }) .optional(), loggedCallIds: z.string().array().optional(), - jsonl: z.string().optional(), }), ) .mutation(async ({ input, ctx }) => { @@ -121,104 +119,48 @@ export const datasetEntriesRouter = createTRPCRouter({ return error("No datasetId or newDatasetParams provided"); } - if (!input.loggedCallIds && !input.jsonl) { - return error("No loggedCallIds or jsonl provided"); + if (!input.loggedCallIds) { + return error("No loggedCallIds provided"); } - let trainingRows: TrainingRow[]; - - if (input.loggedCallIds) { - const loggedCalls = await prisma.loggedCall.findMany({ - where: { - id: { - in: input.loggedCallIds, - }, - modelResponse: { - isNot: null, + const loggedCalls = await prisma.loggedCall.findMany({ + where: { + id: { + in: input.loggedCallIds, + }, + modelResponse: { + isNot: null, + }, + }, + include: { + modelResponse: { + select: { + reqPayload: true, + respPayload: true, + inputTokens: true, + outputTokens: true, }, }, - include: { - modelResponse: { - select: { - reqPayload: true, - respPayload: true, - inputTokens: true, - outputTokens: true, - }, - }, - }, - orderBy: { createdAt: "desc" }, - }); + }, + orderBy: { createdAt: "desc" }, + }); - trainingRows = loggedCalls.map((loggedCall) => { - const inputMessages = ( - loggedCall.modelResponse?.reqPayload as unknown as CompletionCreateParams - ).messages; - let output: ChatCompletion.Choice.Message | undefined = undefined; - const resp = loggedCall.modelResponse?.respPayload as unknown as - | ChatCompletion - | undefined; - if (resp && resp.choices?.[0]) { - output = resp.choices[0].message; - } - return { - input: inputMessages as unknown as CreateChatCompletionRequestMessage[], - output: output as unknown as CreateChatCompletionRequestMessage, - }; - }); - } else { - trainingRows = JSON.parse(input.jsonl as string) as TrainingRow[]; - const validationError = validateTrainingRows(trainingRows); - if (validationError) { - return error(`Invalid JSONL: ${validationError}`); + const trainingRows = loggedCalls.map((loggedCall) => { + const inputMessages = ( + loggedCall.modelResponse?.reqPayload as unknown as CompletionCreateParams + ).messages; + let output: ChatCompletion.Choice.Message | undefined = undefined; + const resp = loggedCall.modelResponse?.respPayload as unknown as ChatCompletion | undefined; + if (resp && resp.choices?.[0]) { + output = resp.choices[0].message; } - } + return { + input: inputMessages as unknown as CreateChatCompletionRequestMessage[], + output: output as unknown as CreateChatCompletionRequestMessage, + }; + }); - const [existingTrainingCount, existingTestingCount] = await prisma.$transaction([ - prisma.datasetEntry.count({ - where: { - datasetId, - type: "TRAIN", - }, - }), - prisma.datasetEntry.count({ - where: { - datasetId, - type: "TEST", - }, - }), - ]); - - const newTotalEntries = existingTrainingCount + existingTestingCount + trainingRows.length; - const numTrainingToAdd = Math.floor(trainingRatio * newTotalEntries) - existingTrainingCount; - const numTestingToAdd = trainingRows.length - numTrainingToAdd; - const typesToAssign = shuffle([ - ...Array(numTrainingToAdd).fill("TRAIN"), - ...Array(numTestingToAdd).fill("TEST"), - ]); - const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = []; - for (const row of trainingRows) { - let outputTokens = 0; - if (row.output) { - outputTokens = countOpenAIChatTokens("gpt-4-0613", [ - row.output as unknown as ChatCompletion.Choice.Message, - ]); - } - datasetEntriesToCreate.push({ - datasetId: datasetId, - input: row.input as unknown as Prisma.InputJsonValue, - output: (row.output as unknown as Prisma.InputJsonValue) ?? { - role: "assistant", - content: "", - }, - inputTokens: countOpenAIChatTokens( - "gpt-4-0613", - row.input as unknown as CreateChatCompletionRequestMessage[], - ), - outputTokens, - type: typesToAssign.pop() as "TRAIN" | "TEST", - }); - } + const datasetEntriesToCreate = await formatEntriesFromTrainingRows(datasetId, trainingRows); // Ensure dataset and dataset entries are created atomically await prisma.$transaction([ @@ -239,7 +181,6 @@ export const datasetEntriesRouter = createTRPCRouter({ return success(datasetId); }), - update: protectedProcedure .input( z.object({ diff --git a/app/src/server/api/routers/datasets.router.ts b/app/src/server/api/routers/datasets.router.ts index 596717e..af86a8e 100644 --- a/app/src/server/api/routers/datasets.router.ts +++ b/app/src/server/api/routers/datasets.router.ts @@ -1,8 +1,11 @@ import { z } from "zod"; + import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc"; import { prisma } from "~/server/db"; import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl"; import { success } from "~/utils/errorHandling/standardResponses"; +import { generateServiceClientUrl } from "~/utils/azure/server"; +import { queueImportDatasetEntries } from "~/server/tasks/importDatasetEntries.task"; export const datasetsRouter = createTRPCRouter({ get: protectedProcedure.input(z.object({ id: z.string() })).query(async ({ input, ctx }) => { @@ -94,4 +97,73 @@ export const datasetsRouter = createTRPCRouter({ return success("Dataset deleted"); }), + getServiceClientUrl: protectedProcedure + .input(z.object({ projectId: z.string() })) + .query(async ({ input, ctx }) => { + // The user must at least be authenticated to get a SAS token + await requireCanModifyProject(input.projectId, ctx); + return generateServiceClientUrl(); + }), + triggerFileDownload: protectedProcedure + .input( + z.object({ + datasetId: z.string(), + blobName: z.string(), + fileName: z.string(), + fileSize: z.number(), + }), + ) + .mutation(async ({ input, ctx }) => { + const { projectId } = await prisma.dataset.findUniqueOrThrow({ + where: { id: input.datasetId }, + }); + await requireCanViewProject(projectId, ctx); + + const { id } = await prisma.datasetFileUpload.create({ + data: { + datasetId: input.datasetId, + blobName: input.blobName, + status: "PENDING", + fileName: input.fileName, + fileSize: input.fileSize, + uploadedAt: new Date(), + }, + }); + + await queueImportDatasetEntries(id); + }), + listFileUploads: protectedProcedure + .input(z.object({ datasetId: z.string() })) + .query(async ({ input, ctx }) => { + const { projectId } = await prisma.dataset.findUniqueOrThrow({ + where: { id: input.datasetId }, + }); + await requireCanViewProject(projectId, ctx); + + return await prisma.datasetFileUpload.findMany({ + where: { + datasetId: input.datasetId, + visible: true, + }, + orderBy: { createdAt: "desc" }, + }); + }), + hideFileUpload: protectedProcedure + .input(z.object({ fileUploadId: z.string() })) + .mutation(async ({ input, ctx }) => { + const { datasetId } = await prisma.datasetFileUpload.findUniqueOrThrow({ + where: { id: input.fileUploadId }, + }); + const { projectId } = await prisma.dataset.findUniqueOrThrow({ + where: { id: datasetId }, + }); + await requireCanModifyProject(projectId, ctx); + + await prisma.datasetFileUpload.update({ + where: { id: input.fileUploadId }, + data: { + visible: false, + }, + }); + }), }); diff --git a/app/src/server/tasks/importDatasetEntries.task.ts b/app/src/server/tasks/importDatasetEntries.task.ts new file mode 100644 index 0000000..81059e3 --- /dev/null +++ b/app/src/server/tasks/importDatasetEntries.task.ts @@ -0,0 +1,132 @@ +import { type DatasetFileUpload } from "@prisma/client"; +import { prisma } from "~/server/db"; +import defineTask from "./defineTask"; +import { downloadBlobToString } from "~/utils/azure/server"; +import { + type TrainingRow, + validateTrainingRows, + parseJSONL, +} from "~/components/datasets/validateTrainingRows"; +import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows"; + +export type ImportDatasetEntriesJob = { + datasetFileUploadId: string; +}; + +export const importDatasetEntries = defineTask( + "importDatasetEntries", + async (task) => { + const { datasetFileUploadId } = task; + const datasetFileUpload = await prisma.datasetFileUpload.findUnique({ + where: { id: datasetFileUploadId }, + }); + if (!datasetFileUpload) { + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + errorMessage: "Dataset File Upload not found", + status: "ERROR", + }, + }); + return; + } + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + status: "DOWNLOADING", + progress: 5, + }, + }); + + const jsonlStr = await downloadBlobToString(datasetFileUpload.blobName); + const trainingRows = parseJSONL(jsonlStr) as TrainingRow[]; + const validationError = validateTrainingRows(trainingRows); + if (validationError) { + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + errorMessage: `Invalid JSONL: ${validationError}`, + status: "ERROR", + }, + }); + return; + } + + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + status: "PROCESSING", + progress: 30, + }, + }); + + const updatePromises: Promise[] = []; + + const updateCallback = async (progress: number) => { + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + progress: 30 + Math.floor((progress / trainingRows.length) * 69), + }, + }); + }; + + let datasetEntriesToCreate; + try { + datasetEntriesToCreate = await formatEntriesFromTrainingRows( + datasetFileUpload.datasetId, + trainingRows, + updateCallback, + 500, + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (e: any) { + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + errorMessage: `Error formatting rows: ${e.message as string}`, + status: "ERROR", + }, + }); + return; + } + + await Promise.all(updatePromises); + + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + status: "SAVING", + progress: 99, + }, + }); + + await prisma.datasetEntry.createMany({ + data: datasetEntriesToCreate, + }); + + await prisma.datasetFileUpload.update({ + where: { id: datasetFileUploadId }, + data: { + status: "COMPLETE", + progress: 100, + }, + }); + }, +); + +export const queueImportDatasetEntries = async (datasetFileUploadId: string) => { + await Promise.all([ + prisma.datasetFileUpload.update({ + where: { + id: datasetFileUploadId, + }, + data: { + errorMessage: null, + status: "PENDING", + }, + }), + + importDatasetEntries.enqueue({ datasetFileUploadId }), + ]); +}; diff --git a/app/src/server/tasks/worker.ts b/app/src/server/tasks/worker.ts index 1daff35..326c505 100644 --- a/app/src/server/tasks/worker.ts +++ b/app/src/server/tasks/worker.ts @@ -5,10 +5,11 @@ import "../../../sentry.server.config"; import { env } from "~/env.mjs"; import { queryModel } from "./queryModel.task"; import { runNewEval } from "./runNewEval.task"; +import { importDatasetEntries } from "./importDatasetEntries.task"; console.log("Starting worker"); -const registeredTasks = [queryModel, runNewEval]; +const registeredTasks = [queryModel, runNewEval, importDatasetEntries]; const taskList = registeredTasks.reduce((acc, task) => { acc[task.task.identifier] = task.task.handler; diff --git a/app/src/server/utils/createEntriesFromTrainingRows.ts b/app/src/server/utils/createEntriesFromTrainingRows.ts new file mode 100644 index 0000000..7b4197a --- /dev/null +++ b/app/src/server/utils/createEntriesFromTrainingRows.ts @@ -0,0 +1,70 @@ +import { type Prisma } from "@prisma/client"; +import { shuffle } from "lodash-es"; +import { + type CreateChatCompletionRequestMessage, + type ChatCompletion, +} from "openai/resources/chat"; + +import { prisma } from "~/server/db"; +import { type TrainingRow } from "~/components/datasets/validateTrainingRows"; +import { countLlamaChatTokens } from "~/utils/countTokens"; + +export const formatEntriesFromTrainingRows = async ( + datasetId: string, + trainingRows: TrainingRow[], + updateCallback?: (progress: number) => Promise, + updateFrequency = 1000, +) => { + const [dataset, existingTrainingCount, existingTestingCount] = await prisma.$transaction([ + prisma.dataset.findUnique({ where: { id: datasetId } }), + prisma.datasetEntry.count({ + where: { + datasetId, + type: "TRAIN", + }, + }), + prisma.datasetEntry.count({ + where: { + datasetId, + type: "TEST", + }, + }), + ]); + + const trainingRatio = dataset?.trainingRatio ?? 0.8; + + const newTotalEntries = existingTrainingCount + existingTestingCount + trainingRows.length; + const numTrainingToAdd = Math.floor(trainingRatio * newTotalEntries) - existingTrainingCount; + const numTestingToAdd = trainingRows.length - numTrainingToAdd; + const typesToAssign = shuffle([ + ...Array(numTrainingToAdd).fill("TRAIN"), + ...Array(numTestingToAdd).fill("TEST"), + ]); + const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = []; + let i = 0; + for (const row of trainingRows) { + // console.log(row); + if (updateCallback && i % updateFrequency === 0) await updateCallback(i); + let outputTokens = 0; + if (row.output) { + outputTokens = countLlamaChatTokens([row.output as unknown as ChatCompletion.Choice.Message]); + } + // console.log("outputTokens", outputTokens); + datasetEntriesToCreate.push({ + datasetId: datasetId, + input: row.input as unknown as Prisma.InputJsonValue, + output: (row.output as unknown as Prisma.InputJsonValue) ?? { + role: "assistant", + content: "", + }, + inputTokens: countLlamaChatTokens( + row.input as unknown as CreateChatCompletionRequestMessage[], + ), + outputTokens, + type: typesToAssign.pop() as "TRAIN" | "TEST", + }); + i++; + } + + return datasetEntriesToCreate; +}; diff --git a/app/src/utils/azure/server.ts b/app/src/utils/azure/server.ts new file mode 100644 index 0000000..e82b03c --- /dev/null +++ b/app/src/utils/azure/server.ts @@ -0,0 +1,71 @@ +import { + BlobServiceClient, + generateAccountSASQueryParameters, + AccountSASPermissions, + AccountSASServices, + AccountSASResourceTypes, + StorageSharedKeyCredential, + SASProtocol, +} from "@azure/storage-blob"; +import { DefaultAzureCredential } from "@azure/identity"; + +const accountName = process.env.AZURE_STORAGE_ACCOUNT_NAME; +if (!accountName) throw Error("Azure Storage accountName not found"); +const accountKey = process.env.AZURE_STORAGE_ACCOUNT_KEY; +if (!accountKey) throw Error("Azure Storage accountKey not found"); +const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME; +if (!containerName) throw Error("Azure Storage containerName not found"); + +const sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey); + +const blobServiceClient = new BlobServiceClient( + `https://${accountName}.blob.core.windows.net`, + new DefaultAzureCredential(), +); + +const containerClient = blobServiceClient.getContainerClient(containerName); + +export const generateServiceClientUrl = () => { + const sasOptions = { + services: AccountSASServices.parse("b").toString(), // blobs + resourceTypes: AccountSASResourceTypes.parse("sco").toString(), // service, container, object + permissions: AccountSASPermissions.parse("w"), // write permissions + protocol: SASProtocol.Https, + startsOn: new Date(), + expiresOn: new Date(new Date().valueOf() + 10 * 60 * 1000), // 10 minutes + }; + let sasToken = generateAccountSASQueryParameters(sasOptions, sharedKeyCredential).toString(); + + // remove leading "?" + sasToken = sasToken[0] === "?" ? sasToken.substring(1) : sasToken; + return { + serviceClientUrl: `https://${accountName}.blob.core.windows.net?${sasToken}`, + containerName, + }; +}; + +export async function downloadBlobToString(blobName: string) { + const blobClient = containerClient.getBlobClient(blobName); + + const downloadResponse = await blobClient.download(); + + if (!downloadResponse) throw Error("error downloading blob"); + if (!downloadResponse.readableStreamBody) + throw Error("downloadResponse.readableStreamBody not found"); + + const downloaded = await streamToBuffer(downloadResponse.readableStreamBody); + return downloaded.toString(); +} + +async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise { + return new Promise((resolve, reject) => { + const chunks: Uint8Array[] = []; + readableStream.on("data", (data: ArrayBuffer) => { + chunks.push(data instanceof Buffer ? data : Buffer.from(data)); + }); + readableStream.on("end", () => { + resolve(Buffer.concat(chunks)); + }); + readableStream.on("error", reject); + }); +} diff --git a/app/src/utils/azure/website.ts b/app/src/utils/azure/website.ts new file mode 100644 index 0000000..c64024d --- /dev/null +++ b/app/src/utils/azure/website.ts @@ -0,0 +1,30 @@ +import { BlobServiceClient } from "@azure/storage-blob"; +import { v4 as uuidv4 } from "uuid"; + +import { useAppStore } from "~/state/store"; + +export const uploadDatasetEntryFile = async (file: File) => { + const { selectedProjectId: projectId, api } = useAppStore.getState(); + if (!projectId) throw Error("projectId not found"); + if (!api) throw Error("api not initialized"); + const { serviceClientUrl, containerName } = await api.client.datasets.getServiceClientUrl.query({ + projectId, + }); + + const blobServiceClient = new BlobServiceClient(serviceClientUrl); + // create container client + const containerClient = blobServiceClient.getContainerClient(containerName); + + // base name without extension + const basename = file.name.split("/").pop()?.split(".").shift(); + if (!basename) throw Error("basename not found"); + + const blobName = `${basename}-${uuidv4()}.jsonl`; + // create blob client + const blobClient = containerClient.getBlockBlobClient(blobName); + + // upload file + await blobClient.uploadData(file); + + return blobName; +}; diff --git a/app/src/utils/countTokens.ts b/app/src/utils/countTokens.ts index 2a4ff14..8181bb1 100644 --- a/app/src/utils/countTokens.ts +++ b/app/src/utils/countTokens.ts @@ -1,5 +1,7 @@ import { type ChatCompletion } from "openai/resources/chat"; import { GPTTokens } from "gpt-tokens"; +import llamaTokenizer from "llama-tokenizer-js"; + import { type SupportedModel } from "~/modelProviders/openai-ChatCompletion"; interface GPTTokensMessageItem { @@ -22,3 +24,11 @@ export const countOpenAIChatTokens = ( messages: reformattedMessages as unknown as GPTTokensMessageItem[], }).usedTokens; }; + +export const countLlamaChatTokens = (messages: ChatCompletion.Choice.Message[]) => { + const stringToTokenize = messages + .map((message) => message.content || JSON.stringify(message.function_call)) + .join("\n"); + const tokens = llamaTokenizer.encode(stringToTokenize); + return tokens.length; +}; diff --git a/app/src/utils/utils.ts b/app/src/utils/utils.ts index 6688013..0c51be8 100644 --- a/app/src/utils/utils.ts +++ b/app/src/utils/utils.ts @@ -52,3 +52,18 @@ export const parseableToFunctionCall = (str: string) => { return true; }; + +export const formatFileSize = (bytes: number, decimals = 2) => { + if (bytes === 0) return "0 Bytes"; + + const k = 1024; + const dm = decimals < 0 ? 0 : decimals; + const sizes = ["Bytes", "KB", "MB", "GB", "TB"]; + + for (const size of sizes) { + if (bytes < k) return `${parseFloat(bytes.toFixed(dm))} ${size}`; + bytes /= k; + } + + return "> 1024 TB"; +}; diff --git a/app/tsconfig.json b/app/tsconfig.json index 32c81d2..039b1d2 100644 --- a/app/tsconfig.json +++ b/app/tsconfig.json @@ -19,7 +19,9 @@ "baseUrl": ".", "paths": { "~/*": ["./src/*"] - } + }, + "typeRoots": ["./types", "./node_modules/@types"], + "types": ["llama-tokenizer-js", "node"] }, "include": [ ".eslintrc.cjs", diff --git a/app/types/llama-tokenizer-js/index.d.ts b/app/types/llama-tokenizer-js/index.d.ts new file mode 100644 index 0000000..f96a650 --- /dev/null +++ b/app/types/llama-tokenizer-js/index.d.ts @@ -0,0 +1,4 @@ +declare module "llama-tokenizer-js" { + export function encode(input: string): number[]; + export function decode(input: number[]): string; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e958ffe..50e334d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -14,6 +14,12 @@ importers: '@apidevtools/json-schema-ref-parser': specifier: ^10.1.0 version: 10.1.0 + '@azure/identity': + specifier: ^3.3.0 + version: 3.3.0 + '@azure/storage-blob': + specifier: 12.15.0 + version: 12.15.0 '@babel/standalone': specifier: ^7.22.9 version: 7.22.9 @@ -143,6 +149,9 @@ importers: kysely-codegen: specifier: ^0.10.1 version: 0.10.1(kysely@0.26.1)(pg@8.11.2) + llama-tokenizer-js: + specifier: ^1.1.3 + version: 1.1.3 lodash-es: specifier: ^4.17.21 version: 4.17.21 @@ -465,6 +474,184 @@ packages: js-yaml: 4.1.0 dev: true + /@azure/abort-controller@1.1.0: + resolution: {integrity: sha512-TrRLIoSQVzfAJX9H1JeFjzAoDGcoK1IYX1UImfceTZpsyYfWr09Ss1aHW1y5TrrR3iq6RZLBwJ3E24uwPhwahw==} + engines: {node: '>=12.0.0'} + dependencies: + tslib: 2.6.1 + dev: false + + /@azure/core-auth@1.5.0: + resolution: {integrity: sha512-udzoBuYG1VBoHVohDTrvKjyzel34zt77Bhp7dQntVGGD0ehVq48owENbBG8fIgkHRNUBQH5k1r0hpoMu5L8+kw==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + '@azure/core-util': 1.4.0 + tslib: 2.6.1 + dev: false + + /@azure/core-client@1.7.3: + resolution: {integrity: sha512-kleJ1iUTxcO32Y06dH9Pfi9K4U+Tlb111WXEnbt7R/ne+NLRwppZiTGJuTD5VVoxTMK5NTbEtm5t2vcdNCFe2g==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + '@azure/core-auth': 1.5.0 + '@azure/core-rest-pipeline': 1.12.0 + '@azure/core-tracing': 1.0.1 + '@azure/core-util': 1.4.0 + '@azure/logger': 1.0.4 + tslib: 2.6.1 + transitivePeerDependencies: + - supports-color + dev: false + + /@azure/core-http@3.0.3: + resolution: {integrity: sha512-QMib3wXotJMFhHgmJBPUF9YsyErw34H0XDFQd9CauH7TPB+RGcyl9Ayy7iURtJB04ngXhE6YwrQsWDXlSLrilg==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + '@azure/core-auth': 1.5.0 + '@azure/core-tracing': 1.0.0-preview.13 + '@azure/core-util': 1.4.0 + '@azure/logger': 1.0.4 + '@types/node-fetch': 2.6.4 + '@types/tunnel': 0.0.3 + form-data: 4.0.0 + node-fetch: 2.6.12(encoding@0.1.13) + process: 0.11.10 + tslib: 2.6.1 + tunnel: 0.0.6 + uuid: 8.3.2 + xml2js: 0.5.0 + transitivePeerDependencies: + - encoding + dev: false + + /@azure/core-lro@2.5.4: + resolution: {integrity: sha512-3GJiMVH7/10bulzOKGrrLeG/uCBH/9VtxqaMcB9lIqAeamI/xYQSHJL/KcsLDuH+yTjYpro/u6D/MuRe4dN70Q==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + '@azure/core-util': 1.4.0 + '@azure/logger': 1.0.4 + tslib: 2.6.1 + dev: false + + /@azure/core-paging@1.5.0: + resolution: {integrity: sha512-zqWdVIt+2Z+3wqxEOGzR5hXFZ8MGKK52x4vFLw8n58pR6ZfKRx3EXYTxTaYxYHc/PexPUTyimcTWFJbji9Z6Iw==} + engines: {node: '>=14.0.0'} + dependencies: + tslib: 2.6.1 + dev: false + + /@azure/core-rest-pipeline@1.12.0: + resolution: {integrity: sha512-+MnSB0vGZjszSzr5AW8z93/9fkDu2RLtWmAN8gskURq7EW2sSwqy8jZa0V26rjuBVkwhdA3Hw8z3VWoeBUOw+A==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + '@azure/core-auth': 1.5.0 + '@azure/core-tracing': 1.0.1 + '@azure/core-util': 1.4.0 + '@azure/logger': 1.0.4 + form-data: 4.0.0 + http-proxy-agent: 5.0.0 + https-proxy-agent: 5.0.1 + tslib: 2.6.1 + transitivePeerDependencies: + - supports-color + dev: false + + /@azure/core-tracing@1.0.0-preview.13: + resolution: {integrity: sha512-KxDlhXyMlh2Jhj2ykX6vNEU0Vou4nHr025KoSEiz7cS3BNiHNaZcdECk/DmLkEB0as5T7b/TpRcehJ5yV6NeXQ==} + engines: {node: '>=12.0.0'} + dependencies: + '@opentelemetry/api': 1.4.1 + tslib: 2.6.1 + dev: false + + /@azure/core-tracing@1.0.1: + resolution: {integrity: sha512-I5CGMoLtX+pI17ZdiFJZgxMJApsK6jjfm85hpgp3oazCdq5Wxgh4wMr7ge/TTWW1B5WBuvIOI1fMU/FrOAMKrw==} + engines: {node: '>=12.0.0'} + dependencies: + tslib: 2.6.1 + dev: false + + /@azure/core-util@1.4.0: + resolution: {integrity: sha512-eGAyJpm3skVQoLiRqm/xPa+SXi/NPDdSHMxbRAz2lSprd+Zs+qrpQGQQ2VQ3Nttu+nSZR4XoYQC71LbEI7jsig==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + tslib: 2.6.1 + dev: false + + /@azure/identity@3.3.0: + resolution: {integrity: sha512-gISa/dAAxrWt6F2WiDXZY0y2xY4MLlN2wkNW4cPuq5OgPQKLSkxLc4I2WR04puTfZyQZnpXbAapAMEj1b96fgg==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + '@azure/core-auth': 1.5.0 + '@azure/core-client': 1.7.3 + '@azure/core-rest-pipeline': 1.12.0 + '@azure/core-tracing': 1.0.1 + '@azure/core-util': 1.4.0 + '@azure/logger': 1.0.4 + '@azure/msal-browser': 2.38.2 + '@azure/msal-common': 13.3.0 + '@azure/msal-node': 1.18.3 + events: 3.3.0 + jws: 4.0.0 + open: 8.4.2 + stoppable: 1.1.0 + tslib: 2.6.1 + uuid: 8.3.2 + transitivePeerDependencies: + - supports-color + dev: false + + /@azure/logger@1.0.4: + resolution: {integrity: sha512-ustrPY8MryhloQj7OWGe+HrYx+aoiOxzbXTtgblbV3xwCqpzUK36phH3XNHQKj3EPonyFUuDTfR3qFhTEAuZEg==} + engines: {node: '>=14.0.0'} + dependencies: + tslib: 2.6.1 + dev: false + + /@azure/msal-browser@2.38.2: + resolution: {integrity: sha512-71BeIn2we6LIgMplwCSaMq5zAwmalyJR3jFcVOZxNVfQ1saBRwOD+P77nLs5vrRCedVKTq8RMFhIOdpMLNno0A==} + engines: {node: '>=0.8.0'} + dependencies: + '@azure/msal-common': 13.3.0 + dev: false + + /@azure/msal-common@13.3.0: + resolution: {integrity: sha512-/VFWTicjcJbrGp3yQP7A24xU95NiDMe23vxIU1U6qdRPFsprMDNUohMudclnd+WSHE4/McqkZs/nUU3sAKkVjg==} + engines: {node: '>=0.8.0'} + dev: false + + /@azure/msal-node@1.18.3: + resolution: {integrity: sha512-lI1OsxNbS/gxRD4548Wyj22Dk8kS7eGMwD9GlBZvQmFV8FJUXoXySL1BiNzDsHUE96/DS/DHmA+F73p1Dkcktg==} + engines: {node: 10 || 12 || 14 || 16 || 18} + dependencies: + '@azure/msal-common': 13.3.0 + jsonwebtoken: 9.0.2 + uuid: 8.3.2 + dev: false + + /@azure/storage-blob@12.15.0: + resolution: {integrity: sha512-e7JBKLOFi0QVJqqLzrjx1eL3je3/Ug2IQj24cTM9b85CsnnFjLGeGjJVIjbGGZaytewiCEG7r3lRwQX7fKj0/w==} + engines: {node: '>=14.0.0'} + dependencies: + '@azure/abort-controller': 1.1.0 + '@azure/core-http': 3.0.3 + '@azure/core-lro': 2.5.4 + '@azure/core-paging': 1.5.0 + '@azure/core-tracing': 1.0.0-preview.13 + '@azure/logger': 1.0.4 + events: 3.3.0 + tslib: 2.6.1 + transitivePeerDependencies: + - encoding + dev: false + /@babel/code-frame@7.22.10: resolution: {integrity: sha512-/KKIMG4UEL35WmI9OlvMhurwtytjvXoFcGNrOvyG9zIzA8YmPjVtIZUf7b05+TPO7G7/GEmLHDaoCgACHl9hhA==} engines: {node: '>=6.9.0'} @@ -2602,6 +2789,11 @@ packages: openapi-typescript: 5.4.1 dev: true + /@opentelemetry/api@1.4.1: + resolution: {integrity: sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==} + engines: {node: '>=8.0.0'} + dev: false + /@panva/hkdf@1.1.1: resolution: {integrity: sha512-dhPeilub1NuIG0X5Kvhh9lH4iW3ZsHlnzwgwbOlgwQ2wG1IqFzsgHqmKPk3WzsdWAeaxKJxgM0+W433RmN45GA==} dev: false @@ -2916,6 +3108,11 @@ packages: use-sync-external-store: 1.2.0(react@18.2.0) dev: false + /@tootallnate/once@2.0.0: + resolution: {integrity: sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==} + engines: {node: '>= 10'} + dev: false + /@trpc/client@10.26.0(@trpc/server@10.26.0): resolution: {integrity: sha512-ojHxQFIE97rBEGPK8p1ijbzo0T1IdEBoJ9fFSgWWL9FMuEEA/DNQ9s0uuiOrDKhCCdTFT1unfRharoJhB2/O2w==} peerDependencies: @@ -3333,6 +3530,12 @@ packages: resolution: {integrity: sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==} dev: false + /@types/tunnel@0.0.3: + resolution: {integrity: sha512-sOUTGn6h1SfQ+gbgqC364jLFBw2lnFqkgF3q0WovEHRLMrVD1sd5aufqi/aJObLekJO+Aq5z646U4Oxy6shXMA==} + dependencies: + '@types/node': 20.4.10 + dev: false + /@types/unist@2.0.7: resolution: {integrity: sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==} dev: false @@ -4102,6 +4305,10 @@ packages: resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==} dev: false + /buffer-equal-constant-time@1.0.1: + resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==} + dev: false + /buffer-from@0.1.2: resolution: {integrity: sha512-RiWIenusJsmI2KcvqQABB83tLxCByE3upSP8QU3rJDMVFGPWLvPQJt/O1Su9moRWeH7d+Q2HYb68f6+v+tw2vg==} dev: false @@ -4707,6 +4914,11 @@ packages: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} dev: true + /define-lazy-prop@2.0.0: + resolution: {integrity: sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==} + engines: {node: '>=8'} + dev: false + /define-properties@1.2.0: resolution: {integrity: sha512-xvqAVKGfT1+UAvPwKTVw/njhdQ8ZhXK4lI0bCIuCMrp2up9nPnaDftrLtmpTazqd1o+UY4zgzU+avtMbDP+ldA==} engines: {node: '>= 0.4'} @@ -4818,6 +5030,12 @@ packages: safer-buffer: 2.1.2 dev: false + /ecdsa-sig-formatter@1.0.11: + resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==} + dependencies: + safe-buffer: 5.2.1 + dev: false + /ee-first@1.1.1: resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} dev: false @@ -6061,6 +6279,17 @@ packages: toidentifier: 1.0.1 dev: false + /http-proxy-agent@5.0.0: + resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==} + engines: {node: '>= 6'} + dependencies: + '@tootallnate/once': 2.0.0 + agent-base: 6.0.2 + debug: 4.3.4 + transitivePeerDependencies: + - supports-color + dev: false + /http-signature@1.2.0: resolution: {integrity: sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ==} engines: {node: '>=0.8', npm: '>=1.3.7'} @@ -6256,6 +6485,12 @@ packages: resolution: {integrity: sha512-RGdriMmQQvZ2aqaQq3awNA6dCGtKpiDFcOzrTWrDAT2MiWrKQVPmxLGHl7Y2nNu6led0kEyoX0enY0qXYsv9zw==} dev: false + /is-docker@2.2.1: + resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==} + engines: {node: '>=8'} + hasBin: true + dev: false + /is-extglob@2.1.1: resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==} engines: {node: '>=0.10.0'} @@ -6370,6 +6605,13 @@ packages: engines: {node: '>=12.13'} dev: false + /is-wsl@2.2.0: + resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==} + engines: {node: '>=8'} + dependencies: + is-docker: 2.2.1 + dev: false + /isarray@0.0.1: resolution: {integrity: sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==} dev: false @@ -6399,7 +6641,7 @@ packages: resolution: {integrity: sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==} engines: {node: '>= 10.13.0'} dependencies: - '@types/node': 20.4.10 + '@types/node': 18.16.0 merge-stream: 2.0.0 supports-color: 8.1.1 @@ -6514,6 +6756,22 @@ packages: resolution: {integrity: sha512-S6cATIPVv1z0IlxdN+zUk5EPjkGCdnhN4wVSBlvoUO1tOLJootbo9CquNJmbIh4yikWHiUedhRYrNPn1arpEmQ==} dev: false + /jsonwebtoken@9.0.2: + resolution: {integrity: sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==} + engines: {node: '>=12', npm: '>=6'} + dependencies: + jws: 3.2.2 + lodash.includes: 4.3.0 + lodash.isboolean: 3.0.3 + lodash.isinteger: 4.0.4 + lodash.isnumber: 3.0.3 + lodash.isplainobject: 4.0.6 + lodash.isstring: 4.0.1 + lodash.once: 4.1.1 + ms: 2.1.3 + semver: 7.5.4 + dev: false + /jsprim@1.4.2: resolution: {integrity: sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==} engines: {node: '>=0.6.0'} @@ -6534,6 +6792,36 @@ packages: object.values: 1.1.6 dev: true + /jwa@1.4.1: + resolution: {integrity: sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==} + dependencies: + buffer-equal-constant-time: 1.0.1 + ecdsa-sig-formatter: 1.0.11 + safe-buffer: 5.2.1 + dev: false + + /jwa@2.0.0: + resolution: {integrity: sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==} + dependencies: + buffer-equal-constant-time: 1.0.1 + ecdsa-sig-formatter: 1.0.11 + safe-buffer: 5.2.1 + dev: false + + /jws@3.2.2: + resolution: {integrity: sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==} + dependencies: + jwa: 1.4.1 + safe-buffer: 5.2.1 + dev: false + + /jws@4.0.0: + resolution: {integrity: sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==} + dependencies: + jwa: 2.0.0 + safe-buffer: 5.2.1 + dev: false + /kysely-codegen@0.10.1(kysely@0.26.1)(pg@8.11.2): resolution: {integrity: sha512-8Bslh952gN5gtucRv4jTZDFD18RBioS6M50zHfe5kwb5iSyEAunU4ZYMdHzkHraa4zxjg5/183XlOryBCXLRIw==} hasBin: true @@ -6605,6 +6893,10 @@ packages: resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} dev: false + /llama-tokenizer-js@1.1.3: + resolution: {integrity: sha512-+BUgsLCXVQJkjiD/t7PdESLn+yXJIRX/BJfwzVVYfKZ9aN3gsP9xoadBZxKnCxGz2Slby+S7x41gUr2TKNaS4Q==} + dev: false + /loader-runner@4.3.0: resolution: {integrity: sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==} engines: {node: '>=6.11.5'} @@ -6660,10 +6952,30 @@ packages: resolution: {integrity: sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==} dev: false + /lodash.includes@4.3.0: + resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==} + dev: false + + /lodash.isboolean@3.0.3: + resolution: {integrity: sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==} + dev: false + + /lodash.isinteger@4.0.4: + resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==} + dev: false + + /lodash.isnumber@3.0.3: + resolution: {integrity: sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==} + dev: false + /lodash.isplainobject@4.0.6: resolution: {integrity: sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==} dev: false + /lodash.isstring@4.0.1: + resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==} + dev: false + /lodash.merge@4.6.2: resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==} dev: true @@ -6672,6 +6984,10 @@ packages: resolution: {integrity: sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ==} dev: false + /lodash.once@4.1.1: + resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==} + dev: false + /lodash.union@4.6.0: resolution: {integrity: sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==} dev: false @@ -7177,6 +7493,15 @@ packages: dependencies: wrappy: 1.0.2 + /open@8.4.2: + resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==} + engines: {node: '>=12'} + dependencies: + define-lazy-prop: 2.0.0 + is-docker: 2.2.1 + is-wsl: 2.2.0 + dev: false + /openai@3.3.0: resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==} dependencies: @@ -7627,6 +7952,11 @@ packages: resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} dev: false + /process@0.11.10: + resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==} + engines: {node: '>= 0.6.0'} + dev: false + /progress@2.0.3: resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==} engines: {node: '>=0.4.0'} @@ -8272,6 +8602,10 @@ packages: yoga-wasm-web: 0.3.3 dev: false + /sax@1.2.4: + resolution: {integrity: sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==} + dev: false + /scheduler@0.23.0: resolution: {integrity: sha512-CtuThmgHNg7zIZWAXi3AsyIzA3n4xx7aNyjwC2VJldO2LMVDhFK+63xGqq6CsJH4rTAt6/M+N4GhZiDYPx9eUw==} dependencies: @@ -8296,7 +8630,6 @@ packages: hasBin: true dependencies: lru-cache: 6.0.0 - dev: true /send@0.18.0: resolution: {integrity: sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==} @@ -8504,6 +8837,11 @@ packages: resolution: {integrity: sha512-Rz6yejtVyWnVjC1RFvNmYL10kgjC49EOghxWn0RFqlCHGFpQx+Xe7yW3I4ceK1SGrWIGMjD5Kbue8W/udkbMJg==} dev: true + /stoppable@1.1.0: + resolution: {integrity: sha512-KXDYZ9dszj6bzvnEMRYvxgeTHU74QBFL54XKtP3nyMuJ81CFYtABZ3bAzL2EdFUaEwJOBOgENyFj3R7oTzDyyw==} + engines: {node: '>=4', npm: '>=6'} + dev: false + /stream-buffers@3.0.2: resolution: {integrity: sha512-DQi1h8VEBA/lURbSwFtEHnSTb9s2/pwLEaFuNhXwy1Dx3Sa0lOuYT2yNUr4/j2fs8oCAMANtrZ5OrPZtyVs3MQ==} engines: {node: '>= 0.10.0'} @@ -8876,6 +9214,11 @@ packages: safe-buffer: 5.2.1 dev: false + /tunnel@0.0.6: + resolution: {integrity: sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==} + engines: {node: '>=0.6.11 <=0.7.0 || >=0.7.3'} + dev: false + /tweetnacl@0.14.5: resolution: {integrity: sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==} dev: false @@ -9464,6 +9807,19 @@ packages: optional: true dev: false + /xml2js@0.5.0: + resolution: {integrity: sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==} + engines: {node: '>=4.0.0'} + dependencies: + sax: 1.2.4 + xmlbuilder: 11.0.1 + dev: false + + /xmlbuilder@11.0.1: + resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==} + engines: {node: '>=4.0'} + dev: false + /xmlhttprequest-ssl@2.0.0: resolution: {integrity: sha512-QKxVRxiRACQcVuQEYFsI1hhkrMlrXHPegbbd1yn9UHOmRxY+si12nQYzri3vbzt8VdTTRviqcKxcyllFas5z2A==} engines: {node: '>=0.4.0'}