Upload training data through Azure Blob Storage

This commit is contained in:
David Corbitt
2023-09-07 05:12:26 -07:00
parent 4fddc80dc5
commit 071ce47411
20 changed files with 1029 additions and 146 deletions

View File

@@ -40,3 +40,8 @@ SMTP_HOST="placeholder"
SMTP_PORT="placeholder"
SMTP_LOGIN="placeholder"
SMTP_PASSWORD="placeholder"
# Azure credentials are necessary for uploading large training data files
AZURE_STORAGE_ACCOUNT_NAME="placeholder"
AZURE_STORAGE_ACCOUNT_KEY="placeholder"
AZURE_STORAGE_CONTAINER_NAME="placeholder"

View File

@@ -26,6 +26,8 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.5.8",
"@apidevtools/json-schema-ref-parser": "^10.1.0",
"@azure/identity": "^3.3.0",
"@azure/storage-blob": "12.15.0",
"@babel/standalone": "^7.22.9",
"@chakra-ui/anatomy": "^2.2.0",
"@chakra-ui/next-js": "^2.1.4",
@@ -69,6 +71,7 @@
"jsonschema": "^1.4.1",
"kysely": "^0.26.1",
"kysely-codegen": "^0.10.1",
"llama-tokenizer-js": "^1.1.3",
"lodash-es": "^4.17.21",
"lucide-react": "^0.265.0",
"marked": "^7.0.3",

View File

@@ -0,0 +1,23 @@
-- CreateEnum
CREATE TYPE "DatasetFileUploadStatus" AS ENUM ('PENDING', 'DOWNLOADING', 'PROCESSING', 'SAVING', 'COMPLETE', 'ERROR');
-- CreateTable
CREATE TABLE "DatasetFileUpload" (
"id" UUID NOT NULL,
"datasetId" UUID NOT NULL,
"blobName" TEXT NOT NULL,
"fileName" TEXT NOT NULL,
"fileSize" INTEGER NOT NULL,
"progress" INTEGER NOT NULL DEFAULT 0,
"status" "DatasetFileUploadStatus" NOT NULL DEFAULT 'PENDING',
"uploadedAt" TIMESTAMP(3) NOT NULL,
"visible" BOOLEAN NOT NULL DEFAULT true,
"errorMessage" TEXT,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "DatasetFileUpload_pkey" PRIMARY KEY ("id")
);
-- AddForeignKey
ALTER TABLE "DatasetFileUpload" ADD CONSTRAINT "DatasetFileUpload_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -176,12 +176,41 @@ model OutputEvaluation {
@@unique([modelResponseId, evaluationId])
}
enum DatasetFileUploadStatus {
PENDING
DOWNLOADING
PROCESSING
SAVING
COMPLETE
ERROR
}
model DatasetFileUpload {
id String @id @default(uuid()) @db.Uuid
datasetId String @db.Uuid
dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
blobName String
fileName String
fileSize Int
progress Int @default(0) // Percentage
status DatasetFileUploadStatus @default(PENDING)
uploadedAt DateTime
visible Boolean @default(true)
errorMessage String?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
model Dataset {
id String @id @default(uuid()) @db.Uuid
name String
datasetEntries DatasetEntry[]
fineTunes FineTune[]
datasetFileUploads DatasetFileUpload[]
trainingRatio Float @default(0.8)
projectId String @db.Uuid

View File

@@ -0,0 +1,61 @@
import { VStack, HStack, Button, Text, Card, Progress, IconButton } from "@chakra-ui/react";
import { BsX } from "react-icons/bs";
import { type RouterOutputs, api } from "~/utils/api";
import { useHandledAsyncCallback } from "~/utils/hooks";
import { formatFileSize } from "~/utils/utils";
type FileUpload = RouterOutputs["datasets"]["listFileUploads"][0];
const FileUploadCard = ({ fileUpload }: { fileUpload: FileUpload }) => {
const { id, fileName, fileSize, progress, status, errorMessage } = fileUpload;
const utils = api.useContext();
const hideFileUploadMutation = api.datasets.hideFileUpload.useMutation();
const [hideFileUpload, hidingInProgress] = useHandledAsyncCallback(async () => {
await hideFileUploadMutation.mutateAsync({ fileUploadId: id });
await utils.datasets.listFileUploads.invalidate();
}, [id, hideFileUploadMutation, utils]);
const [refreshDatasetEntries] = useHandledAsyncCallback(async () => {
await utils.datasetEntries.list.invalidate();
}, [utils]);
return (
<Card w="full">
<VStack w="full" alignItems="flex-start" p={4}>
<HStack w="full" justifyContent="space-between">
<Text fontWeight="bold">
Uploading {fileName} ({formatFileSize(fileSize, 2)})
</Text>
<HStack spacing={0}>
{status === "COMPLETE" && (
<Button variant="ghost" onClick={refreshDatasetEntries} color="orange.400" size="xs">
Refresh Table
</Button>
)}
<IconButton
aria-label="Hide file upload"
as={BsX}
boxSize={6}
minW={0}
variant="ghost"
isLoading={hidingInProgress}
onClick={hideFileUpload}
cursor="pointer"
/>
</HStack>
</HStack>
<Text alignSelf="center" fontSize="xs">
{errorMessage ? errorMessage : `${status} (${progress}%)`}
</Text>
<Progress w="full" value={progress} borderRadius={2} />
</VStack>
</Card>
);
};
export default FileUploadCard;

View File

@@ -1,4 +1,4 @@
import { useState, useEffect, useRef } from "react";
import { useState, useEffect, useRef, useCallback } from "react";
import {
Modal,
ModalOverlay,
@@ -16,13 +16,15 @@ import {
useDisclosure,
type UseDisclosureReturn,
} from "@chakra-ui/react";
import pluralize from "pluralize";
import { AiOutlineCloudUpload, AiOutlineFile } from "react-icons/ai";
import { useDataset, useHandledAsyncCallback } from "~/utils/hooks";
import { api } from "~/utils/api";
import ActionButton from "../ActionButton";
import { validateTrainingRows, type TrainingRow, parseJSONL } from "./validateTrainingRows";
import pluralize from "pluralize";
import { uploadDatasetEntryFile } from "~/utils/azure/website";
import { formatFileSize } from "~/utils/utils";
const ImportDataButton = () => {
const disclosure = useDisclosure();
@@ -48,6 +50,7 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
const [validationError, setValidationError] = useState<string | null>(null);
const [trainingRows, setTrainingRows] = useState<TrainingRow[] | null>(null);
const [file, setFile] = useState<File | null>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
@@ -67,6 +70,14 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
};
const processFile = (file: File) => {
setFile(file);
// skip reading if file is larger than 10MB
if (file.size > 10000000) {
setTrainingRows(null);
return;
}
const reader = new FileReader();
reader.onload = (e: ProgressEvent<FileReader>) => {
const content = e.target?.result as string;
@@ -83,7 +94,6 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
setTrainingRows(parsedJSONL);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
console.log("e is", e);
setValidationError("Unable to parse JSONL file: " + (e.message as string));
setTrainingRows(null);
return;
@@ -92,28 +102,38 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
reader.readAsText(file);
};
const resetState = useCallback(() => {
setValidationError(null);
setTrainingRows(null);
setFile(null);
}, [setValidationError, setTrainingRows, setFile]);
useEffect(() => {
if (disclosure.isOpen) {
setTrainingRows(null);
setValidationError(null);
resetState();
}
}, [disclosure.isOpen]);
}, [disclosure.isOpen, resetState]);
const triggerFileDownloadMutation = api.datasets.triggerFileDownload.useMutation();
const utils = api.useContext();
const sendJSONLMutation = api.datasetEntries.create.useMutation();
const [sendJSONL, sendingInProgress] = useHandledAsyncCallback(async () => {
if (!dataset || !trainingRows) return;
if (!dataset || !file) return;
await sendJSONLMutation.mutateAsync({
const blobName = await uploadDatasetEntryFile(file);
await triggerFileDownloadMutation.mutateAsync({
datasetId: dataset.id,
jsonl: JSON.stringify(trainingRows),
blobName,
fileName: file.name,
fileSize: file.size,
});
await utils.datasetEntries.list.invalidate();
await utils.datasets.listFileUploads.invalidate();
disclosure.onClose();
}, [dataset, trainingRows, sendJSONLMutation]);
}, [dataset, trainingRows, triggerFileDownloadMutation, file, utils]);
return (
<Modal size={{ base: "xl", md: "2xl" }} {...disclosure}>
@@ -127,7 +147,28 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
<ModalCloseButton />
<ModalBody maxW="unset" p={8}>
<Box w="full" aspectRatio={1.5}>
{!trainingRows && !validationError && (
{validationError && (
<VStack w="full" h="full" justifyContent="center" spacing={8}>
<Icon as={AiOutlineFile} boxSize={24} color="gray.300" />
<VStack w="full">
<Text fontSize={32} color="gray.500" fontWeight="bold">
Error
</Text>
<Text color="gray.500">{validationError}</Text>
</VStack>
<Text
as="span"
textDecor="underline"
color="gray.500"
_hover={{ color: "orange.400" }}
cursor="pointer"
onClick={resetState}
>
Try again
</Text>
</VStack>
)}
{!validationError && !file && (
<VStack
w="full"
h="full"
@@ -167,38 +208,28 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
</Text>
</VStack>
)}
{validationError && (
<VStack w="full" h="full" justifyContent="center" spacing={8}>
<Icon as={AiOutlineFile} boxSize={24} color="gray.300" />
<VStack w="full">
<Text fontSize={32} color="gray.500" fontWeight="bold">
Error
</Text>
<Text color="gray.500">{validationError}</Text>
</VStack>
<Text
as="span"
textDecor="underline"
color="gray.500"
_hover={{ color: "orange.400" }}
cursor="pointer"
onClick={() => setValidationError(null)}
>
Try again
</Text>
</VStack>
)}
{trainingRows && !validationError && (
{!validationError && file && (
<VStack w="full" h="full" justifyContent="center" spacing={8}>
<JsonFileIcon />
<VStack w="full">
<Text fontSize={32} color="gray.500" fontWeight="bold">
Success
</Text>
<Text color="gray.500">
We'll upload <b>{trainingRows.length}</b>{" "}
{pluralize("row", trainingRows.length)} into <b>{dataset?.name}</b>.{" "}
</Text>
{trainingRows ? (
<>
<Text fontSize={32} color="gray.500" fontWeight="bold">
Success
</Text>
<Text color="gray.500">
We'll upload <b>{trainingRows.length}</b>{" "}
{pluralize("row", trainingRows.length)} into <b>{dataset?.name}</b>.{" "}
</Text>
</>
) : (
<>
<Text fontSize={32} color="gray.500" fontWeight="bold">
{file.name}
</Text>
<Text color="gray.500">{formatFileSize(file.size)}</Text>
</>
)}
</VStack>
<Text
as="span"
@@ -206,7 +237,7 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
color="gray.500"
_hover={{ color: "orange.400" }}
cursor="pointer"
onClick={() => setTrainingRows(null)}
onClick={resetState}
>
Change file
</Text>
@@ -224,7 +255,7 @@ const ImportDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) =>
onClick={sendJSONL}
isLoading={sendingInProgress}
minW={24}
isDisabled={!trainingRows || !!validationError}
isDisabled={!file || !!validationError}
>
Upload
</Button>

View File

@@ -26,6 +26,9 @@ export const env = createEnv({
SMTP_PORT: z.string().default("placeholder"),
SMTP_LOGIN: z.string().default("placeholder"),
SMTP_PASSWORD: z.string().default("placeholder"),
AZURE_STORAGE_ACCOUNT_NAME: z.string().default("placeholder"),
AZURE_STORAGE_ACCOUNT_KEY: z.string().default("placeholder"),
AZURE_STORAGE_CONTAINER_NAME: z.string().default("placeholder"),
WORKER_CONCURRENCY: z
.string()
.default("10")
@@ -72,6 +75,9 @@ export const env = createEnv({
SMTP_PORT: process.env.SMTP_PORT,
SMTP_LOGIN: process.env.SMTP_LOGIN,
SMTP_PASSWORD: process.env.SMTP_PASSWORD,
AZURE_STORAGE_ACCOUNT_NAME: process.env.AZURE_STORAGE_ACCOUNT_NAME,
AZURE_STORAGE_ACCOUNT_KEY: process.env.AZURE_STORAGE_ACCOUNT_KEY,
AZURE_STORAGE_CONTAINER_NAME: process.env.AZURE_STORAGE_CONTAINER_NAME,
WORKER_CONCURRENCY: process.env.WORKER_CONCURRENCY,
WORKER_MAX_POOL_SIZE: process.env.WORKER_MAX_POOL_SIZE,
},

View File

@@ -28,6 +28,7 @@ import ExperimentButton from "~/components/datasets/ExperimentButton";
import ImportDataButton from "~/components/datasets/ImportDataButton";
import DownloadButton from "~/components/datasets/ExportButton";
import DeleteButton from "~/components/datasets/DeleteButton";
import FileUploadCard from "~/components/datasets/FileUploadCard";
export default function Dataset() {
const utils = api.useContext();
@@ -40,6 +41,19 @@ export default function Dataset() {
setName(dataset.data?.name || "");
}, [dataset.data?.name]);
const [fileUploadsRefetchInterval, setFileUploadsRefetchInterval] = useState<number>(500);
const fileUploads = api.datasets.listFileUploads.useQuery(
{ datasetId: dataset.data?.id as string },
{ enabled: !!dataset.data?.id, refetchInterval: fileUploadsRefetchInterval },
);
useEffect(() => {
if (fileUploads?.data?.some((fu) => fu.status !== "COMPLETE" && fu.status !== "ERROR")) {
setFileUploadsRefetchInterval(500);
} else {
setFileUploadsRefetchInterval(0);
}
}, [fileUploads]);
useEffect(() => {
useAppStore.getState().sharedArgumentsEditor.loadMonaco().catch(console.error);
}, []);
@@ -101,6 +115,13 @@ export default function Dataset() {
<DatasetHeaderButtons openDrawer={drawerDisclosure.onOpen} />
</PageHeaderContainer>
<VStack px={8} py={8} alignItems="flex-start" spacing={4} w="full">
<HStack w="full">
<VStack w="full">
{fileUploads?.data?.map((upload) => (
<FileUploadCard key={upload.id} fileUpload={upload} />
))}
</VStack>
</HStack>
<HStack w="full" justifyContent="flex-end">
<FineTuneButton />
<ImportDataButton />

View File

@@ -1,4 +1,3 @@
import { type Prisma } from "@prisma/client";
import { z } from "zod";
import { v4 as uuidv4 } from "uuid";
import {
@@ -7,18 +6,18 @@ import {
type CreateChatCompletionRequestMessage,
} from "openai/resources/chat";
import { TRPCError } from "@trpc/server";
import { shuffle } from "lodash-es";
import archiver from "archiver";
import { WritableStreamBuffer } from "stream-buffers";
import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc";
import { prisma } from "~/server/db";
import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl";
import { error, success } from "~/utils/errorHandling/standardResponses";
import { countOpenAIChatTokens } from "~/utils/countTokens";
import { type TrainingRow, validateTrainingRows } from "~/components/datasets/validateTrainingRows";
import { type TrainingRow } from "~/components/datasets/validateTrainingRows";
import hashObject from "~/server/utils/hashObject";
import { type JsonValue } from "type-fest";
import { WritableStreamBuffer } from "stream-buffers";
import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows";
export const datasetEntriesRouter = createTRPCRouter({
list: protectedProcedure
@@ -100,7 +99,6 @@ export const datasetEntriesRouter = createTRPCRouter({
})
.optional(),
loggedCallIds: z.string().array().optional(),
jsonl: z.string().optional(),
}),
)
.mutation(async ({ input, ctx }) => {
@@ -121,104 +119,48 @@ export const datasetEntriesRouter = createTRPCRouter({
return error("No datasetId or newDatasetParams provided");
}
if (!input.loggedCallIds && !input.jsonl) {
return error("No loggedCallIds or jsonl provided");
if (!input.loggedCallIds) {
return error("No loggedCallIds provided");
}
let trainingRows: TrainingRow[];
if (input.loggedCallIds) {
const loggedCalls = await prisma.loggedCall.findMany({
where: {
id: {
in: input.loggedCallIds,
},
modelResponse: {
isNot: null,
const loggedCalls = await prisma.loggedCall.findMany({
where: {
id: {
in: input.loggedCallIds,
},
modelResponse: {
isNot: null,
},
},
include: {
modelResponse: {
select: {
reqPayload: true,
respPayload: true,
inputTokens: true,
outputTokens: true,
},
},
include: {
modelResponse: {
select: {
reqPayload: true,
respPayload: true,
inputTokens: true,
outputTokens: true,
},
},
},
orderBy: { createdAt: "desc" },
});
},
orderBy: { createdAt: "desc" },
});
trainingRows = loggedCalls.map((loggedCall) => {
const inputMessages = (
loggedCall.modelResponse?.reqPayload as unknown as CompletionCreateParams
).messages;
let output: ChatCompletion.Choice.Message | undefined = undefined;
const resp = loggedCall.modelResponse?.respPayload as unknown as
| ChatCompletion
| undefined;
if (resp && resp.choices?.[0]) {
output = resp.choices[0].message;
}
return {
input: inputMessages as unknown as CreateChatCompletionRequestMessage[],
output: output as unknown as CreateChatCompletionRequestMessage,
};
});
} else {
trainingRows = JSON.parse(input.jsonl as string) as TrainingRow[];
const validationError = validateTrainingRows(trainingRows);
if (validationError) {
return error(`Invalid JSONL: ${validationError}`);
const trainingRows = loggedCalls.map((loggedCall) => {
const inputMessages = (
loggedCall.modelResponse?.reqPayload as unknown as CompletionCreateParams
).messages;
let output: ChatCompletion.Choice.Message | undefined = undefined;
const resp = loggedCall.modelResponse?.respPayload as unknown as ChatCompletion | undefined;
if (resp && resp.choices?.[0]) {
output = resp.choices[0].message;
}
}
return {
input: inputMessages as unknown as CreateChatCompletionRequestMessage[],
output: output as unknown as CreateChatCompletionRequestMessage,
};
});
const [existingTrainingCount, existingTestingCount] = await prisma.$transaction([
prisma.datasetEntry.count({
where: {
datasetId,
type: "TRAIN",
},
}),
prisma.datasetEntry.count({
where: {
datasetId,
type: "TEST",
},
}),
]);
const newTotalEntries = existingTrainingCount + existingTestingCount + trainingRows.length;
const numTrainingToAdd = Math.floor(trainingRatio * newTotalEntries) - existingTrainingCount;
const numTestingToAdd = trainingRows.length - numTrainingToAdd;
const typesToAssign = shuffle([
...Array(numTrainingToAdd).fill("TRAIN"),
...Array(numTestingToAdd).fill("TEST"),
]);
const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = [];
for (const row of trainingRows) {
let outputTokens = 0;
if (row.output) {
outputTokens = countOpenAIChatTokens("gpt-4-0613", [
row.output as unknown as ChatCompletion.Choice.Message,
]);
}
datasetEntriesToCreate.push({
datasetId: datasetId,
input: row.input as unknown as Prisma.InputJsonValue,
output: (row.output as unknown as Prisma.InputJsonValue) ?? {
role: "assistant",
content: "",
},
inputTokens: countOpenAIChatTokens(
"gpt-4-0613",
row.input as unknown as CreateChatCompletionRequestMessage[],
),
outputTokens,
type: typesToAssign.pop() as "TRAIN" | "TEST",
});
}
const datasetEntriesToCreate = await formatEntriesFromTrainingRows(datasetId, trainingRows);
// Ensure dataset and dataset entries are created atomically
await prisma.$transaction([
@@ -239,7 +181,6 @@ export const datasetEntriesRouter = createTRPCRouter({
return success(datasetId);
}),
update: protectedProcedure
.input(
z.object({

View File

@@ -1,8 +1,11 @@
import { z } from "zod";
import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc";
import { prisma } from "~/server/db";
import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl";
import { success } from "~/utils/errorHandling/standardResponses";
import { generateServiceClientUrl } from "~/utils/azure/server";
import { queueImportDatasetEntries } from "~/server/tasks/importDatasetEntries.task";
export const datasetsRouter = createTRPCRouter({
get: protectedProcedure.input(z.object({ id: z.string() })).query(async ({ input, ctx }) => {
@@ -94,4 +97,73 @@ export const datasetsRouter = createTRPCRouter({
return success("Dataset deleted");
}),
getServiceClientUrl: protectedProcedure
.input(z.object({ projectId: z.string() }))
.query(async ({ input, ctx }) => {
// The user must at least be authenticated to get a SAS token
await requireCanModifyProject(input.projectId, ctx);
return generateServiceClientUrl();
}),
triggerFileDownload: protectedProcedure
.input(
z.object({
datasetId: z.string(),
blobName: z.string(),
fileName: z.string(),
fileSize: z.number(),
}),
)
.mutation(async ({ input, ctx }) => {
const { projectId } = await prisma.dataset.findUniqueOrThrow({
where: { id: input.datasetId },
});
await requireCanViewProject(projectId, ctx);
const { id } = await prisma.datasetFileUpload.create({
data: {
datasetId: input.datasetId,
blobName: input.blobName,
status: "PENDING",
fileName: input.fileName,
fileSize: input.fileSize,
uploadedAt: new Date(),
},
});
await queueImportDatasetEntries(id);
}),
listFileUploads: protectedProcedure
.input(z.object({ datasetId: z.string() }))
.query(async ({ input, ctx }) => {
const { projectId } = await prisma.dataset.findUniqueOrThrow({
where: { id: input.datasetId },
});
await requireCanViewProject(projectId, ctx);
return await prisma.datasetFileUpload.findMany({
where: {
datasetId: input.datasetId,
visible: true,
},
orderBy: { createdAt: "desc" },
});
}),
hideFileUpload: protectedProcedure
.input(z.object({ fileUploadId: z.string() }))
.mutation(async ({ input, ctx }) => {
const { datasetId } = await prisma.datasetFileUpload.findUniqueOrThrow({
where: { id: input.fileUploadId },
});
const { projectId } = await prisma.dataset.findUniqueOrThrow({
where: { id: datasetId },
});
await requireCanModifyProject(projectId, ctx);
await prisma.datasetFileUpload.update({
where: { id: input.fileUploadId },
data: {
visible: false,
},
});
}),
});

View File

@@ -0,0 +1,132 @@
import { type DatasetFileUpload } from "@prisma/client";
import { prisma } from "~/server/db";
import defineTask from "./defineTask";
import { downloadBlobToString } from "~/utils/azure/server";
import {
type TrainingRow,
validateTrainingRows,
parseJSONL,
} from "~/components/datasets/validateTrainingRows";
import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows";
export type ImportDatasetEntriesJob = {
datasetFileUploadId: string;
};
export const importDatasetEntries = defineTask<ImportDatasetEntriesJob>(
"importDatasetEntries",
async (task) => {
const { datasetFileUploadId } = task;
const datasetFileUpload = await prisma.datasetFileUpload.findUnique({
where: { id: datasetFileUploadId },
});
if (!datasetFileUpload) {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
errorMessage: "Dataset File Upload not found",
status: "ERROR",
},
});
return;
}
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "DOWNLOADING",
progress: 5,
},
});
const jsonlStr = await downloadBlobToString(datasetFileUpload.blobName);
const trainingRows = parseJSONL(jsonlStr) as TrainingRow[];
const validationError = validateTrainingRows(trainingRows);
if (validationError) {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
errorMessage: `Invalid JSONL: ${validationError}`,
status: "ERROR",
},
});
return;
}
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "PROCESSING",
progress: 30,
},
});
const updatePromises: Promise<DatasetFileUpload>[] = [];
const updateCallback = async (progress: number) => {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
progress: 30 + Math.floor((progress / trainingRows.length) * 69),
},
});
};
let datasetEntriesToCreate;
try {
datasetEntriesToCreate = await formatEntriesFromTrainingRows(
datasetFileUpload.datasetId,
trainingRows,
updateCallback,
500,
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
errorMessage: `Error formatting rows: ${e.message as string}`,
status: "ERROR",
},
});
return;
}
await Promise.all(updatePromises);
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "SAVING",
progress: 99,
},
});
await prisma.datasetEntry.createMany({
data: datasetEntriesToCreate,
});
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "COMPLETE",
progress: 100,
},
});
},
);
export const queueImportDatasetEntries = async (datasetFileUploadId: string) => {
await Promise.all([
prisma.datasetFileUpload.update({
where: {
id: datasetFileUploadId,
},
data: {
errorMessage: null,
status: "PENDING",
},
}),
importDatasetEntries.enqueue({ datasetFileUploadId }),
]);
};

View File

@@ -5,10 +5,11 @@ import "../../../sentry.server.config";
import { env } from "~/env.mjs";
import { queryModel } from "./queryModel.task";
import { runNewEval } from "./runNewEval.task";
import { importDatasetEntries } from "./importDatasetEntries.task";
console.log("Starting worker");
const registeredTasks = [queryModel, runNewEval];
const registeredTasks = [queryModel, runNewEval, importDatasetEntries];
const taskList = registeredTasks.reduce((acc, task) => {
acc[task.task.identifier] = task.task.handler;

View File

@@ -0,0 +1,70 @@
import { type Prisma } from "@prisma/client";
import { shuffle } from "lodash-es";
import {
type CreateChatCompletionRequestMessage,
type ChatCompletion,
} from "openai/resources/chat";
import { prisma } from "~/server/db";
import { type TrainingRow } from "~/components/datasets/validateTrainingRows";
import { countLlamaChatTokens } from "~/utils/countTokens";
export const formatEntriesFromTrainingRows = async (
datasetId: string,
trainingRows: TrainingRow[],
updateCallback?: (progress: number) => Promise<void>,
updateFrequency = 1000,
) => {
const [dataset, existingTrainingCount, existingTestingCount] = await prisma.$transaction([
prisma.dataset.findUnique({ where: { id: datasetId } }),
prisma.datasetEntry.count({
where: {
datasetId,
type: "TRAIN",
},
}),
prisma.datasetEntry.count({
where: {
datasetId,
type: "TEST",
},
}),
]);
const trainingRatio = dataset?.trainingRatio ?? 0.8;
const newTotalEntries = existingTrainingCount + existingTestingCount + trainingRows.length;
const numTrainingToAdd = Math.floor(trainingRatio * newTotalEntries) - existingTrainingCount;
const numTestingToAdd = trainingRows.length - numTrainingToAdd;
const typesToAssign = shuffle([
...Array(numTrainingToAdd).fill("TRAIN"),
...Array(numTestingToAdd).fill("TEST"),
]);
const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = [];
let i = 0;
for (const row of trainingRows) {
// console.log(row);
if (updateCallback && i % updateFrequency === 0) await updateCallback(i);
let outputTokens = 0;
if (row.output) {
outputTokens = countLlamaChatTokens([row.output as unknown as ChatCompletion.Choice.Message]);
}
// console.log("outputTokens", outputTokens);
datasetEntriesToCreate.push({
datasetId: datasetId,
input: row.input as unknown as Prisma.InputJsonValue,
output: (row.output as unknown as Prisma.InputJsonValue) ?? {
role: "assistant",
content: "",
},
inputTokens: countLlamaChatTokens(
row.input as unknown as CreateChatCompletionRequestMessage[],
),
outputTokens,
type: typesToAssign.pop() as "TRAIN" | "TEST",
});
i++;
}
return datasetEntriesToCreate;
};

View File

@@ -0,0 +1,71 @@
import {
BlobServiceClient,
generateAccountSASQueryParameters,
AccountSASPermissions,
AccountSASServices,
AccountSASResourceTypes,
StorageSharedKeyCredential,
SASProtocol,
} from "@azure/storage-blob";
import { DefaultAzureCredential } from "@azure/identity";
const accountName = process.env.AZURE_STORAGE_ACCOUNT_NAME;
if (!accountName) throw Error("Azure Storage accountName not found");
const accountKey = process.env.AZURE_STORAGE_ACCOUNT_KEY;
if (!accountKey) throw Error("Azure Storage accountKey not found");
const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
if (!containerName) throw Error("Azure Storage containerName not found");
const sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${accountName}.blob.core.windows.net`,
new DefaultAzureCredential(),
);
const containerClient = blobServiceClient.getContainerClient(containerName);
export const generateServiceClientUrl = () => {
const sasOptions = {
services: AccountSASServices.parse("b").toString(), // blobs
resourceTypes: AccountSASResourceTypes.parse("sco").toString(), // service, container, object
permissions: AccountSASPermissions.parse("w"), // write permissions
protocol: SASProtocol.Https,
startsOn: new Date(),
expiresOn: new Date(new Date().valueOf() + 10 * 60 * 1000), // 10 minutes
};
let sasToken = generateAccountSASQueryParameters(sasOptions, sharedKeyCredential).toString();
// remove leading "?"
sasToken = sasToken[0] === "?" ? sasToken.substring(1) : sasToken;
return {
serviceClientUrl: `https://${accountName}.blob.core.windows.net?${sasToken}`,
containerName,
};
};
export async function downloadBlobToString(blobName: string) {
const blobClient = containerClient.getBlobClient(blobName);
const downloadResponse = await blobClient.download();
if (!downloadResponse) throw Error("error downloading blob");
if (!downloadResponse.readableStreamBody)
throw Error("downloadResponse.readableStreamBody not found");
const downloaded = await streamToBuffer(downloadResponse.readableStreamBody);
return downloaded.toString();
}
async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise<Buffer> {
return new Promise((resolve, reject) => {
const chunks: Uint8Array[] = [];
readableStream.on("data", (data: ArrayBuffer) => {
chunks.push(data instanceof Buffer ? data : Buffer.from(data));
});
readableStream.on("end", () => {
resolve(Buffer.concat(chunks));
});
readableStream.on("error", reject);
});
}

View File

@@ -0,0 +1,30 @@
import { BlobServiceClient } from "@azure/storage-blob";
import { v4 as uuidv4 } from "uuid";
import { useAppStore } from "~/state/store";
export const uploadDatasetEntryFile = async (file: File) => {
const { selectedProjectId: projectId, api } = useAppStore.getState();
if (!projectId) throw Error("projectId not found");
if (!api) throw Error("api not initialized");
const { serviceClientUrl, containerName } = await api.client.datasets.getServiceClientUrl.query({
projectId,
});
const blobServiceClient = new BlobServiceClient(serviceClientUrl);
// create container client
const containerClient = blobServiceClient.getContainerClient(containerName);
// base name without extension
const basename = file.name.split("/").pop()?.split(".").shift();
if (!basename) throw Error("basename not found");
const blobName = `${basename}-${uuidv4()}.jsonl`;
// create blob client
const blobClient = containerClient.getBlockBlobClient(blobName);
// upload file
await blobClient.uploadData(file);
return blobName;
};

View File

@@ -1,5 +1,7 @@
import { type ChatCompletion } from "openai/resources/chat";
import { GPTTokens } from "gpt-tokens";
import llamaTokenizer from "llama-tokenizer-js";
import { type SupportedModel } from "~/modelProviders/openai-ChatCompletion";
interface GPTTokensMessageItem {
@@ -22,3 +24,11 @@ export const countOpenAIChatTokens = (
messages: reformattedMessages as unknown as GPTTokensMessageItem[],
}).usedTokens;
};
export const countLlamaChatTokens = (messages: ChatCompletion.Choice.Message[]) => {
const stringToTokenize = messages
.map((message) => message.content || JSON.stringify(message.function_call))
.join("\n");
const tokens = llamaTokenizer.encode(stringToTokenize);
return tokens.length;
};

View File

@@ -52,3 +52,18 @@ export const parseableToFunctionCall = (str: string) => {
return true;
};
export const formatFileSize = (bytes: number, decimals = 2) => {
if (bytes === 0) return "0 Bytes";
const k = 1024;
const dm = decimals < 0 ? 0 : decimals;
const sizes = ["Bytes", "KB", "MB", "GB", "TB"];
for (const size of sizes) {
if (bytes < k) return `${parseFloat(bytes.toFixed(dm))} ${size}`;
bytes /= k;
}
return "> 1024 TB";
};

View File

@@ -19,7 +19,9 @@
"baseUrl": ".",
"paths": {
"~/*": ["./src/*"]
}
},
"typeRoots": ["./types", "./node_modules/@types"],
"types": ["llama-tokenizer-js", "node"]
},
"include": [
".eslintrc.cjs",

View File

@@ -0,0 +1,4 @@
declare module "llama-tokenizer-js" {
export function encode(input: string): number[];
export function decode(input: number[]): string;
}

360
pnpm-lock.yaml generated
View File

@@ -14,6 +14,12 @@ importers:
'@apidevtools/json-schema-ref-parser':
specifier: ^10.1.0
version: 10.1.0
'@azure/identity':
specifier: ^3.3.0
version: 3.3.0
'@azure/storage-blob':
specifier: 12.15.0
version: 12.15.0
'@babel/standalone':
specifier: ^7.22.9
version: 7.22.9
@@ -143,6 +149,9 @@ importers:
kysely-codegen:
specifier: ^0.10.1
version: 0.10.1(kysely@0.26.1)(pg@8.11.2)
llama-tokenizer-js:
specifier: ^1.1.3
version: 1.1.3
lodash-es:
specifier: ^4.17.21
version: 4.17.21
@@ -465,6 +474,184 @@ packages:
js-yaml: 4.1.0
dev: true
/@azure/abort-controller@1.1.0:
resolution: {integrity: sha512-TrRLIoSQVzfAJX9H1JeFjzAoDGcoK1IYX1UImfceTZpsyYfWr09Ss1aHW1y5TrrR3iq6RZLBwJ3E24uwPhwahw==}
engines: {node: '>=12.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/core-auth@1.5.0:
resolution: {integrity: sha512-udzoBuYG1VBoHVohDTrvKjyzel34zt77Bhp7dQntVGGD0ehVq48owENbBG8fIgkHRNUBQH5k1r0hpoMu5L8+kw==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-util': 1.4.0
tslib: 2.6.1
dev: false
/@azure/core-client@1.7.3:
resolution: {integrity: sha512-kleJ1iUTxcO32Y06dH9Pfi9K4U+Tlb111WXEnbt7R/ne+NLRwppZiTGJuTD5VVoxTMK5NTbEtm5t2vcdNCFe2g==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-rest-pipeline': 1.12.0
'@azure/core-tracing': 1.0.1
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
tslib: 2.6.1
transitivePeerDependencies:
- supports-color
dev: false
/@azure/core-http@3.0.3:
resolution: {integrity: sha512-QMib3wXotJMFhHgmJBPUF9YsyErw34H0XDFQd9CauH7TPB+RGcyl9Ayy7iURtJB04ngXhE6YwrQsWDXlSLrilg==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-tracing': 1.0.0-preview.13
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
'@types/node-fetch': 2.6.4
'@types/tunnel': 0.0.3
form-data: 4.0.0
node-fetch: 2.6.12(encoding@0.1.13)
process: 0.11.10
tslib: 2.6.1
tunnel: 0.0.6
uuid: 8.3.2
xml2js: 0.5.0
transitivePeerDependencies:
- encoding
dev: false
/@azure/core-lro@2.5.4:
resolution: {integrity: sha512-3GJiMVH7/10bulzOKGrrLeG/uCBH/9VtxqaMcB9lIqAeamI/xYQSHJL/KcsLDuH+yTjYpro/u6D/MuRe4dN70Q==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
tslib: 2.6.1
dev: false
/@azure/core-paging@1.5.0:
resolution: {integrity: sha512-zqWdVIt+2Z+3wqxEOGzR5hXFZ8MGKK52x4vFLw8n58pR6ZfKRx3EXYTxTaYxYHc/PexPUTyimcTWFJbji9Z6Iw==}
engines: {node: '>=14.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/core-rest-pipeline@1.12.0:
resolution: {integrity: sha512-+MnSB0vGZjszSzr5AW8z93/9fkDu2RLtWmAN8gskURq7EW2sSwqy8jZa0V26rjuBVkwhdA3Hw8z3VWoeBUOw+A==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-tracing': 1.0.1
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
form-data: 4.0.0
http-proxy-agent: 5.0.0
https-proxy-agent: 5.0.1
tslib: 2.6.1
transitivePeerDependencies:
- supports-color
dev: false
/@azure/core-tracing@1.0.0-preview.13:
resolution: {integrity: sha512-KxDlhXyMlh2Jhj2ykX6vNEU0Vou4nHr025KoSEiz7cS3BNiHNaZcdECk/DmLkEB0as5T7b/TpRcehJ5yV6NeXQ==}
engines: {node: '>=12.0.0'}
dependencies:
'@opentelemetry/api': 1.4.1
tslib: 2.6.1
dev: false
/@azure/core-tracing@1.0.1:
resolution: {integrity: sha512-I5CGMoLtX+pI17ZdiFJZgxMJApsK6jjfm85hpgp3oazCdq5Wxgh4wMr7ge/TTWW1B5WBuvIOI1fMU/FrOAMKrw==}
engines: {node: '>=12.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/core-util@1.4.0:
resolution: {integrity: sha512-eGAyJpm3skVQoLiRqm/xPa+SXi/NPDdSHMxbRAz2lSprd+Zs+qrpQGQQ2VQ3Nttu+nSZR4XoYQC71LbEI7jsig==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
tslib: 2.6.1
dev: false
/@azure/identity@3.3.0:
resolution: {integrity: sha512-gISa/dAAxrWt6F2WiDXZY0y2xY4MLlN2wkNW4cPuq5OgPQKLSkxLc4I2WR04puTfZyQZnpXbAapAMEj1b96fgg==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-client': 1.7.3
'@azure/core-rest-pipeline': 1.12.0
'@azure/core-tracing': 1.0.1
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
'@azure/msal-browser': 2.38.2
'@azure/msal-common': 13.3.0
'@azure/msal-node': 1.18.3
events: 3.3.0
jws: 4.0.0
open: 8.4.2
stoppable: 1.1.0
tslib: 2.6.1
uuid: 8.3.2
transitivePeerDependencies:
- supports-color
dev: false
/@azure/logger@1.0.4:
resolution: {integrity: sha512-ustrPY8MryhloQj7OWGe+HrYx+aoiOxzbXTtgblbV3xwCqpzUK36phH3XNHQKj3EPonyFUuDTfR3qFhTEAuZEg==}
engines: {node: '>=14.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/msal-browser@2.38.2:
resolution: {integrity: sha512-71BeIn2we6LIgMplwCSaMq5zAwmalyJR3jFcVOZxNVfQ1saBRwOD+P77nLs5vrRCedVKTq8RMFhIOdpMLNno0A==}
engines: {node: '>=0.8.0'}
dependencies:
'@azure/msal-common': 13.3.0
dev: false
/@azure/msal-common@13.3.0:
resolution: {integrity: sha512-/VFWTicjcJbrGp3yQP7A24xU95NiDMe23vxIU1U6qdRPFsprMDNUohMudclnd+WSHE4/McqkZs/nUU3sAKkVjg==}
engines: {node: '>=0.8.0'}
dev: false
/@azure/msal-node@1.18.3:
resolution: {integrity: sha512-lI1OsxNbS/gxRD4548Wyj22Dk8kS7eGMwD9GlBZvQmFV8FJUXoXySL1BiNzDsHUE96/DS/DHmA+F73p1Dkcktg==}
engines: {node: 10 || 12 || 14 || 16 || 18}
dependencies:
'@azure/msal-common': 13.3.0
jsonwebtoken: 9.0.2
uuid: 8.3.2
dev: false
/@azure/storage-blob@12.15.0:
resolution: {integrity: sha512-e7JBKLOFi0QVJqqLzrjx1eL3je3/Ug2IQj24cTM9b85CsnnFjLGeGjJVIjbGGZaytewiCEG7r3lRwQX7fKj0/w==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-http': 3.0.3
'@azure/core-lro': 2.5.4
'@azure/core-paging': 1.5.0
'@azure/core-tracing': 1.0.0-preview.13
'@azure/logger': 1.0.4
events: 3.3.0
tslib: 2.6.1
transitivePeerDependencies:
- encoding
dev: false
/@babel/code-frame@7.22.10:
resolution: {integrity: sha512-/KKIMG4UEL35WmI9OlvMhurwtytjvXoFcGNrOvyG9zIzA8YmPjVtIZUf7b05+TPO7G7/GEmLHDaoCgACHl9hhA==}
engines: {node: '>=6.9.0'}
@@ -2602,6 +2789,11 @@ packages:
openapi-typescript: 5.4.1
dev: true
/@opentelemetry/api@1.4.1:
resolution: {integrity: sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==}
engines: {node: '>=8.0.0'}
dev: false
/@panva/hkdf@1.1.1:
resolution: {integrity: sha512-dhPeilub1NuIG0X5Kvhh9lH4iW3ZsHlnzwgwbOlgwQ2wG1IqFzsgHqmKPk3WzsdWAeaxKJxgM0+W433RmN45GA==}
dev: false
@@ -2916,6 +3108,11 @@ packages:
use-sync-external-store: 1.2.0(react@18.2.0)
dev: false
/@tootallnate/once@2.0.0:
resolution: {integrity: sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==}
engines: {node: '>= 10'}
dev: false
/@trpc/client@10.26.0(@trpc/server@10.26.0):
resolution: {integrity: sha512-ojHxQFIE97rBEGPK8p1ijbzo0T1IdEBoJ9fFSgWWL9FMuEEA/DNQ9s0uuiOrDKhCCdTFT1unfRharoJhB2/O2w==}
peerDependencies:
@@ -3333,6 +3530,12 @@ packages:
resolution: {integrity: sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==}
dev: false
/@types/tunnel@0.0.3:
resolution: {integrity: sha512-sOUTGn6h1SfQ+gbgqC364jLFBw2lnFqkgF3q0WovEHRLMrVD1sd5aufqi/aJObLekJO+Aq5z646U4Oxy6shXMA==}
dependencies:
'@types/node': 20.4.10
dev: false
/@types/unist@2.0.7:
resolution: {integrity: sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==}
dev: false
@@ -4102,6 +4305,10 @@ packages:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
dev: false
/buffer-equal-constant-time@1.0.1:
resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
dev: false
/buffer-from@0.1.2:
resolution: {integrity: sha512-RiWIenusJsmI2KcvqQABB83tLxCByE3upSP8QU3rJDMVFGPWLvPQJt/O1Su9moRWeH7d+Q2HYb68f6+v+tw2vg==}
dev: false
@@ -4707,6 +4914,11 @@ packages:
resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==}
dev: true
/define-lazy-prop@2.0.0:
resolution: {integrity: sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==}
engines: {node: '>=8'}
dev: false
/define-properties@1.2.0:
resolution: {integrity: sha512-xvqAVKGfT1+UAvPwKTVw/njhdQ8ZhXK4lI0bCIuCMrp2up9nPnaDftrLtmpTazqd1o+UY4zgzU+avtMbDP+ldA==}
engines: {node: '>= 0.4'}
@@ -4818,6 +5030,12 @@ packages:
safer-buffer: 2.1.2
dev: false
/ecdsa-sig-formatter@1.0.11:
resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==}
dependencies:
safe-buffer: 5.2.1
dev: false
/ee-first@1.1.1:
resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
dev: false
@@ -6061,6 +6279,17 @@ packages:
toidentifier: 1.0.1
dev: false
/http-proxy-agent@5.0.0:
resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==}
engines: {node: '>= 6'}
dependencies:
'@tootallnate/once': 2.0.0
agent-base: 6.0.2
debug: 4.3.4
transitivePeerDependencies:
- supports-color
dev: false
/http-signature@1.2.0:
resolution: {integrity: sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ==}
engines: {node: '>=0.8', npm: '>=1.3.7'}
@@ -6256,6 +6485,12 @@ packages:
resolution: {integrity: sha512-RGdriMmQQvZ2aqaQq3awNA6dCGtKpiDFcOzrTWrDAT2MiWrKQVPmxLGHl7Y2nNu6led0kEyoX0enY0qXYsv9zw==}
dev: false
/is-docker@2.2.1:
resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==}
engines: {node: '>=8'}
hasBin: true
dev: false
/is-extglob@2.1.1:
resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==}
engines: {node: '>=0.10.0'}
@@ -6370,6 +6605,13 @@ packages:
engines: {node: '>=12.13'}
dev: false
/is-wsl@2.2.0:
resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==}
engines: {node: '>=8'}
dependencies:
is-docker: 2.2.1
dev: false
/isarray@0.0.1:
resolution: {integrity: sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==}
dev: false
@@ -6399,7 +6641,7 @@ packages:
resolution: {integrity: sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==}
engines: {node: '>= 10.13.0'}
dependencies:
'@types/node': 20.4.10
'@types/node': 18.16.0
merge-stream: 2.0.0
supports-color: 8.1.1
@@ -6514,6 +6756,22 @@ packages:
resolution: {integrity: sha512-S6cATIPVv1z0IlxdN+zUk5EPjkGCdnhN4wVSBlvoUO1tOLJootbo9CquNJmbIh4yikWHiUedhRYrNPn1arpEmQ==}
dev: false
/jsonwebtoken@9.0.2:
resolution: {integrity: sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==}
engines: {node: '>=12', npm: '>=6'}
dependencies:
jws: 3.2.2
lodash.includes: 4.3.0
lodash.isboolean: 3.0.3
lodash.isinteger: 4.0.4
lodash.isnumber: 3.0.3
lodash.isplainobject: 4.0.6
lodash.isstring: 4.0.1
lodash.once: 4.1.1
ms: 2.1.3
semver: 7.5.4
dev: false
/jsprim@1.4.2:
resolution: {integrity: sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==}
engines: {node: '>=0.6.0'}
@@ -6534,6 +6792,36 @@ packages:
object.values: 1.1.6
dev: true
/jwa@1.4.1:
resolution: {integrity: sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==}
dependencies:
buffer-equal-constant-time: 1.0.1
ecdsa-sig-formatter: 1.0.11
safe-buffer: 5.2.1
dev: false
/jwa@2.0.0:
resolution: {integrity: sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==}
dependencies:
buffer-equal-constant-time: 1.0.1
ecdsa-sig-formatter: 1.0.11
safe-buffer: 5.2.1
dev: false
/jws@3.2.2:
resolution: {integrity: sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==}
dependencies:
jwa: 1.4.1
safe-buffer: 5.2.1
dev: false
/jws@4.0.0:
resolution: {integrity: sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==}
dependencies:
jwa: 2.0.0
safe-buffer: 5.2.1
dev: false
/kysely-codegen@0.10.1(kysely@0.26.1)(pg@8.11.2):
resolution: {integrity: sha512-8Bslh952gN5gtucRv4jTZDFD18RBioS6M50zHfe5kwb5iSyEAunU4ZYMdHzkHraa4zxjg5/183XlOryBCXLRIw==}
hasBin: true
@@ -6605,6 +6893,10 @@ packages:
resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
dev: false
/llama-tokenizer-js@1.1.3:
resolution: {integrity: sha512-+BUgsLCXVQJkjiD/t7PdESLn+yXJIRX/BJfwzVVYfKZ9aN3gsP9xoadBZxKnCxGz2Slby+S7x41gUr2TKNaS4Q==}
dev: false
/loader-runner@4.3.0:
resolution: {integrity: sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==}
engines: {node: '>=6.11.5'}
@@ -6660,10 +6952,30 @@ packages:
resolution: {integrity: sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==}
dev: false
/lodash.includes@4.3.0:
resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==}
dev: false
/lodash.isboolean@3.0.3:
resolution: {integrity: sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==}
dev: false
/lodash.isinteger@4.0.4:
resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==}
dev: false
/lodash.isnumber@3.0.3:
resolution: {integrity: sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==}
dev: false
/lodash.isplainobject@4.0.6:
resolution: {integrity: sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==}
dev: false
/lodash.isstring@4.0.1:
resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==}
dev: false
/lodash.merge@4.6.2:
resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==}
dev: true
@@ -6672,6 +6984,10 @@ packages:
resolution: {integrity: sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ==}
dev: false
/lodash.once@4.1.1:
resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==}
dev: false
/lodash.union@4.6.0:
resolution: {integrity: sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==}
dev: false
@@ -7177,6 +7493,15 @@ packages:
dependencies:
wrappy: 1.0.2
/open@8.4.2:
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
engines: {node: '>=12'}
dependencies:
define-lazy-prop: 2.0.0
is-docker: 2.2.1
is-wsl: 2.2.0
dev: false
/openai@3.3.0:
resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==}
dependencies:
@@ -7627,6 +7952,11 @@ packages:
resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==}
dev: false
/process@0.11.10:
resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
engines: {node: '>= 0.6.0'}
dev: false
/progress@2.0.3:
resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==}
engines: {node: '>=0.4.0'}
@@ -8272,6 +8602,10 @@ packages:
yoga-wasm-web: 0.3.3
dev: false
/sax@1.2.4:
resolution: {integrity: sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==}
dev: false
/scheduler@0.23.0:
resolution: {integrity: sha512-CtuThmgHNg7zIZWAXi3AsyIzA3n4xx7aNyjwC2VJldO2LMVDhFK+63xGqq6CsJH4rTAt6/M+N4GhZiDYPx9eUw==}
dependencies:
@@ -8296,7 +8630,6 @@ packages:
hasBin: true
dependencies:
lru-cache: 6.0.0
dev: true
/send@0.18.0:
resolution: {integrity: sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==}
@@ -8504,6 +8837,11 @@ packages:
resolution: {integrity: sha512-Rz6yejtVyWnVjC1RFvNmYL10kgjC49EOghxWn0RFqlCHGFpQx+Xe7yW3I4ceK1SGrWIGMjD5Kbue8W/udkbMJg==}
dev: true
/stoppable@1.1.0:
resolution: {integrity: sha512-KXDYZ9dszj6bzvnEMRYvxgeTHU74QBFL54XKtP3nyMuJ81CFYtABZ3bAzL2EdFUaEwJOBOgENyFj3R7oTzDyyw==}
engines: {node: '>=4', npm: '>=6'}
dev: false
/stream-buffers@3.0.2:
resolution: {integrity: sha512-DQi1h8VEBA/lURbSwFtEHnSTb9s2/pwLEaFuNhXwy1Dx3Sa0lOuYT2yNUr4/j2fs8oCAMANtrZ5OrPZtyVs3MQ==}
engines: {node: '>= 0.10.0'}
@@ -8876,6 +9214,11 @@ packages:
safe-buffer: 5.2.1
dev: false
/tunnel@0.0.6:
resolution: {integrity: sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==}
engines: {node: '>=0.6.11 <=0.7.0 || >=0.7.3'}
dev: false
/tweetnacl@0.14.5:
resolution: {integrity: sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==}
dev: false
@@ -9464,6 +9807,19 @@ packages:
optional: true
dev: false
/xml2js@0.5.0:
resolution: {integrity: sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==}
engines: {node: '>=4.0.0'}
dependencies:
sax: 1.2.4
xmlbuilder: 11.0.1
dev: false
/xmlbuilder@11.0.1:
resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
engines: {node: '>=4.0'}
dev: false
/xmlhttprequest-ssl@2.0.0:
resolution: {integrity: sha512-QKxVRxiRACQcVuQEYFsI1hhkrMlrXHPegbbd1yn9UHOmRxY+si12nQYzri3vbzt8VdTTRviqcKxcyllFas5z2A==}
engines: {node: '>=0.4.0'}