Compare commits

...

13 Commits

Author SHA1 Message Date
David Corbitt
ffeed77ab4 Remove advanced options 2023-09-07 09:19:20 -07:00
David Corbitt
5de1d293d6 Remove unused variable 2023-09-07 09:16:42 -07:00
David Corbitt
54b4b1edab Remove spinner 2023-09-07 09:16:27 -07:00
David Corbitt
93a967c6ac Adjust status text 2023-09-07 09:15:12 -07:00
David Corbitt
ed25dcce39 Add FileUploadsCard in bottom right corner 2023-09-07 09:03:32 -07:00
David Corbitt
c9a53c2b94 Fix lint 2023-09-07 07:40:40 -07:00
David Corbitt
91f1b2a8ec Delete rows in chunks 2023-09-07 07:40:21 -07:00
David Corbitt
ba9221c093 Give progress updates on downloads 2023-09-07 05:57:35 -07:00
David Corbitt
071ce47411 Upload training data through Azure Blob Storage 2023-09-07 05:12:26 -07:00
David Corbitt
4fddc80dc5 Remove consoles 2023-09-06 22:01:50 -07:00
David Corbitt
5aadf3c2ba Add dataset entry deletion and export 2023-09-06 22:01:39 -07:00
David Corbitt
7c4ab151a4 Add upload data modal 2023-09-06 09:24:41 -07:00
David Corbitt
c5c8dbf65e Remove console 2023-09-05 20:48:36 -07:00
26 changed files with 1805 additions and 80 deletions

View File

@@ -40,3 +40,8 @@ SMTP_HOST="placeholder"
SMTP_PORT="placeholder"
SMTP_LOGIN="placeholder"
SMTP_PASSWORD="placeholder"
# Azure credentials are necessary for uploading large training data files
AZURE_STORAGE_ACCOUNT_NAME="placeholder"
AZURE_STORAGE_ACCOUNT_KEY="placeholder"
AZURE_STORAGE_CONTAINER_NAME="placeholder"

View File

@@ -26,6 +26,8 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.5.8",
"@apidevtools/json-schema-ref-parser": "^10.1.0",
"@azure/identity": "^3.3.0",
"@azure/storage-blob": "12.15.0",
"@babel/standalone": "^7.22.9",
"@chakra-ui/anatomy": "^2.2.0",
"@chakra-ui/next-js": "^2.1.4",
@@ -69,6 +71,7 @@
"jsonschema": "^1.4.1",
"kysely": "^0.26.1",
"kysely-codegen": "^0.10.1",
"llama-tokenizer-js": "^1.1.3",
"lodash-es": "^4.17.21",
"lucide-react": "^0.265.0",
"marked": "^7.0.3",

View File

@@ -0,0 +1,5 @@
-- AlterTable
ALTER TABLE "DatasetEntry" ALTER COLUMN "loggedCallId" DROP NOT NULL,
ALTER COLUMN "inputTokens" DROP DEFAULT,
ALTER COLUMN "outputTokens" DROP DEFAULT,
ALTER COLUMN "type" DROP DEFAULT;

View File

@@ -0,0 +1,23 @@
-- CreateEnum
CREATE TYPE "DatasetFileUploadStatus" AS ENUM ('PENDING', 'DOWNLOADING', 'PROCESSING', 'SAVING', 'COMPLETE', 'ERROR');
-- CreateTable
CREATE TABLE "DatasetFileUpload" (
"id" UUID NOT NULL,
"datasetId" UUID NOT NULL,
"blobName" TEXT NOT NULL,
"fileName" TEXT NOT NULL,
"fileSize" INTEGER NOT NULL,
"progress" INTEGER NOT NULL DEFAULT 0,
"status" "DatasetFileUploadStatus" NOT NULL DEFAULT 'PENDING',
"uploadedAt" TIMESTAMP(3) NOT NULL,
"visible" BOOLEAN NOT NULL DEFAULT true,
"errorMessage" TEXT,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "DatasetFileUpload_pkey" PRIMARY KEY ("id")
);
-- AddForeignKey
ALTER TABLE "DatasetFileUpload" ADD CONSTRAINT "DatasetFileUpload_datasetId_fkey" FOREIGN KEY ("datasetId") REFERENCES "Dataset"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -176,12 +176,41 @@ model OutputEvaluation {
@@unique([modelResponseId, evaluationId])
}
enum DatasetFileUploadStatus {
PENDING
DOWNLOADING
PROCESSING
SAVING
COMPLETE
ERROR
}
model DatasetFileUpload {
id String @id @default(uuid()) @db.Uuid
datasetId String @db.Uuid
dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
blobName String
fileName String
fileSize Int
progress Int @default(0) // Percentage
status DatasetFileUploadStatus @default(PENDING)
uploadedAt DateTime
visible Boolean @default(true)
errorMessage String?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
model Dataset {
id String @id @default(uuid()) @db.Uuid
name String
datasetEntries DatasetEntry[]
fineTunes FineTune[]
datasetFileUploads DatasetFileUpload[]
trainingRatio Float @default(0.8)
projectId String @db.Uuid
@@ -199,8 +228,8 @@ enum DatasetEntryType {
model DatasetEntry {
id String @id @default(uuid()) @db.Uuid
loggedCallId String @db.Uuid
loggedCall LoggedCall @relation(fields: [loggedCallId], references: [id], onDelete: Cascade)
loggedCallId String? @db.Uuid
loggedCall LoggedCall? @relation(fields: [loggedCallId], references: [id], onDelete: Cascade)
input Json @default("[]")
output Json?

View File

@@ -7,12 +7,14 @@ import { BetaModal } from "./BetaModal";
const ActionButton = ({
icon,
iconBoxSize = 3.5,
label,
requireBeta = false,
onClick,
...buttonProps
}: {
icon: IconType;
iconBoxSize?: number;
label: string;
requireBeta?: boolean;
onClick?: () => void;
@@ -39,7 +41,9 @@ const ActionButton = ({
{...buttonProps}
>
<HStack spacing={1}>
{icon && <Icon as={icon} color={requireBeta ? "orange.400" : undefined} />}
{icon && (
<Icon as={icon} boxSize={iconBoxSize} color={requireBeta ? "orange.400" : undefined} />
)}
<Text display={{ base: "none", md: "flex" }}>{label}</Text>
</HStack>
</Button>

View File

@@ -0,0 +1,107 @@
import {
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalCloseButton,
ModalBody,
ModalFooter,
HStack,
VStack,
Icon,
Text,
Button,
useDisclosure,
type UseDisclosureReturn,
} from "@chakra-ui/react";
import { BsTrash } from "react-icons/bs";
import { useHandledAsyncCallback, useDataset } from "~/utils/hooks";
import { api } from "~/utils/api";
import { useAppStore } from "~/state/store";
import ActionButton from "../ActionButton";
import { maybeReportError } from "~/utils/errorHandling/maybeReportError";
import pluralize from "pluralize";
const DeleteButton = () => {
const selectedIds = useAppStore((s) => s.selectedDatasetEntries.selectedIds);
const disclosure = useDisclosure();
return (
<>
<ActionButton
onClick={disclosure.onOpen}
label="Delete"
icon={BsTrash}
isDisabled={selectedIds.size === 0}
requireBeta
/>
<DeleteDatasetEntriesModal disclosure={disclosure} />
</>
);
};
export default DeleteButton;
const DeleteDatasetEntriesModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => {
const dataset = useDataset().data;
const selectedIds = useAppStore((s) => s.selectedDatasetEntries.selectedIds);
const clearSelectedIds = useAppStore((s) => s.selectedDatasetEntries.clearSelectedIds);
const deleteRowsMutation = api.datasetEntries.delete.useMutation();
const utils = api.useContext();
const [deleteRows, deletionInProgress] = useHandledAsyncCallback(async () => {
if (!dataset?.id || !selectedIds.size) return;
// divide selectedIds into chunks of 15000 to reduce request size
const chunkSize = 15000;
const idsArray = Array.from(selectedIds);
for (let i = 0; i < idsArray.length; i += chunkSize) {
const response = await deleteRowsMutation.mutateAsync({
ids: idsArray.slice(i, i + chunkSize),
});
if (maybeReportError(response)) return;
}
await utils.datasetEntries.list.invalidate();
disclosure.onClose();
clearSelectedIds();
}, [deleteRowsMutation, dataset, selectedIds, utils]);
return (
<Modal size={{ base: "xl", md: "2xl" }} {...disclosure}>
<ModalOverlay />
<ModalContent w={1200}>
<ModalHeader>
<HStack>
<Icon as={BsTrash} />
<Text>Delete Logs</Text>
</HStack>
</ModalHeader>
<ModalCloseButton />
<ModalBody maxW="unset">
<VStack w="full" spacing={8} pt={4} alignItems="flex-start">
<Text>
Are you sure you want to delete the <b>{selectedIds.size}</b>{" "}
{pluralize("row", selectedIds.size)} rows you've selected?
</Text>
</VStack>
</ModalBody>
<ModalFooter>
<HStack>
<Button colorScheme="gray" onClick={disclosure.onClose} minW={24}>
Cancel
</Button>
<Button colorScheme="red" onClick={deleteRows} isLoading={deletionInProgress} minW={24}>
Delete
</Button>
</HStack>
</ModalFooter>
</ModalContent>
</Modal>
);
};

View File

@@ -0,0 +1,182 @@
import { useState, useEffect } from "react";
import {
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalCloseButton,
ModalBody,
ModalFooter,
HStack,
VStack,
Icon,
Text,
Button,
Checkbox,
NumberInput,
NumberInputField,
NumberInputStepper,
NumberIncrementStepper,
NumberDecrementStepper,
Collapse,
Flex,
useDisclosure,
type UseDisclosureReturn,
} from "@chakra-ui/react";
import { AiOutlineDownload } from "react-icons/ai";
import { useHandledAsyncCallback, useDataset } from "~/utils/hooks";
import { api } from "~/utils/api";
import { useAppStore } from "~/state/store";
import ActionButton from "../ActionButton";
import { FiChevronUp, FiChevronDown } from "react-icons/fi";
import InfoCircle from "../InfoCircle";
const ExportButton = () => {
const selectedIds = useAppStore((s) => s.selectedDatasetEntries.selectedIds);
const disclosure = useDisclosure();
return (
<>
<ActionButton
onClick={disclosure.onOpen}
label="Download"
icon={AiOutlineDownload}
isDisabled={selectedIds.size === 0}
requireBeta
/>
<ExportDatasetEntriesModal disclosure={disclosure} />
</>
);
};
export default ExportButton;
const ExportDatasetEntriesModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => {
const dataset = useDataset().data;
const selectedIds = useAppStore((s) => s.selectedDatasetEntries.selectedIds);
const clearSelectedIds = useAppStore((s) => s.selectedDatasetEntries.clearSelectedIds);
const [testingSplit, setTestingSplit] = useState(10);
const [removeDuplicates, setRemoveDuplicates] = useState(false);
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
useEffect(() => {
if (disclosure.isOpen) {
setTestingSplit(10);
setRemoveDuplicates(false);
}
}, [disclosure.isOpen]);
const exportDataMutation = api.datasetEntries.export.useMutation();
const [exportData, exportInProgress] = useHandledAsyncCallback(async () => {
if (!dataset?.id || !selectedIds.size || !testingSplit) return;
const response = await exportDataMutation.mutateAsync({
datasetId: dataset.id,
datasetEntryIds: Array.from(selectedIds),
testingSplit,
removeDuplicates,
});
const dataUrl = `data:application/pdf;base64,${response}`;
const blob = await fetch(dataUrl).then((res) => res.blob());
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = `data.zip`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
disclosure.onClose();
clearSelectedIds();
}, [exportDataMutation, dataset, selectedIds, testingSplit, removeDuplicates]);
return (
<Modal size={{ base: "xl", md: "2xl" }} {...disclosure}>
<ModalOverlay />
<ModalContent w={1200}>
<ModalHeader>
<HStack>
<Icon as={AiOutlineDownload} />
<Text>Export Logs</Text>
</HStack>
</ModalHeader>
<ModalCloseButton />
<ModalBody maxW="unset">
<VStack w="full" spacing={8} pt={4} alignItems="flex-start">
<Text>
We'll export the <b>{selectedIds.size}</b> rows you have selected in the OpenAI
training format.
</Text>
<VStack alignItems="flex-start" spacing={4}>
<Flex
flexDir={{ base: "column", md: "row" }}
alignItems={{ base: "flex-start", md: "center" }}
>
<HStack w={48} alignItems="center" spacing={1}>
<Text fontWeight="bold">Testing Split:</Text>
<InfoCircle tooltipText="The percent of your logs that will be reserved for testing and saved in another file. Logs are split randomly." />
</HStack>
<HStack>
<NumberInput
defaultValue={10}
onChange={(_, num) => setTestingSplit(num)}
min={1}
max={100}
w={48}
>
<NumberInputField />
<NumberInputStepper>
<NumberIncrementStepper />
<NumberDecrementStepper />
</NumberInputStepper>
</NumberInput>
</HStack>
</Flex>
</VStack>
<VStack alignItems="flex-start" spacing={0}>
<Button
variant="unstyled"
color="blue.600"
onClick={() => setShowAdvancedOptions(!showAdvancedOptions)}
>
<HStack>
<Text>Advanced Options</Text>
<Icon as={showAdvancedOptions ? FiChevronUp : FiChevronDown} />
</HStack>
</Button>
<Collapse in={showAdvancedOptions} unmountOnExit={true}>
<VStack align="stretch" pt={4}>
<HStack>
<Checkbox
colorScheme="blue"
isChecked={removeDuplicates}
onChange={(e) => setRemoveDuplicates(e.target.checked)}
>
<Text>Remove duplicates</Text>
</Checkbox>
<InfoCircle tooltipText="To avoid overfitting and speed up training, automatically deduplicate logs with matching input and output." />
</HStack>
</VStack>
</Collapse>
</VStack>
</VStack>
</ModalBody>
<ModalFooter>
<HStack>
<Button colorScheme="gray" onClick={disclosure.onClose} minW={24}>
Cancel
</Button>
<Button colorScheme="blue" onClick={exportData} isLoading={exportInProgress} minW={24}>
Download
</Button>
</HStack>
</ModalFooter>
</ModalContent>
</Modal>
);
};

View File

@@ -0,0 +1,148 @@
import { useState, useEffect } from "react";
import { VStack, HStack, Button, Text, Progress, IconButton, Portal } from "@chakra-ui/react";
import { BsX } from "react-icons/bs";
import { type RouterOutputs, api } from "~/utils/api";
import { useDataset, useHandledAsyncCallback } from "~/utils/hooks";
import { formatFileSize } from "~/utils/utils";
type FileUpload = RouterOutputs["datasets"]["listFileUploads"][0];
const FileUploadsCard = () => {
const dataset = useDataset();
const [fileUploadsRefetchInterval, setFileUploadsRefetchInterval] = useState<number>(500);
const fileUploads = api.datasets.listFileUploads.useQuery(
{ datasetId: dataset.data?.id as string },
{ enabled: !!dataset.data?.id, refetchInterval: fileUploadsRefetchInterval },
);
useEffect(() => {
if (fileUploads?.data?.some((fu) => fu.status !== "COMPLETE" && fu.status !== "ERROR")) {
setFileUploadsRefetchInterval(500);
} else {
setFileUploadsRefetchInterval(15000);
}
}, [fileUploads]);
const utils = api.useContext();
const hideFileUploadsMutation = api.datasets.hideFileUploads.useMutation();
const [hideAllFileUploads] = useHandledAsyncCallback(async () => {
if (!fileUploads.data?.length) return;
await hideFileUploadsMutation.mutateAsync({
fileUploadIds: fileUploads.data.map((upload) => upload.id),
});
await utils.datasets.listFileUploads.invalidate();
}, [hideFileUploadsMutation, fileUploads.data, utils]);
if (!fileUploads.data?.length) return null;
return (
<Portal>
<VStack
w={72}
borderRadius={8}
position="fixed"
bottom={8}
right={8}
overflow="hidden"
borderWidth={1}
boxShadow="0 0 40px 4px rgba(0, 0, 0, 0.1);"
minW={0}
bgColor="white"
>
<HStack p={4} w="full" bgColor="gray.200" justifyContent="space-between">
<Text fontWeight="bold">Uploads</Text>
<IconButton
aria-label="Close uploads"
as={BsX}
boxSize={6}
minW={0}
variant="ghost"
onClick={hideAllFileUploads}
cursor="pointer"
/>
</HStack>
{fileUploads?.data?.map((upload) => <FileUploadRow key={upload.id} fileUpload={upload} />)}
</VStack>
</Portal>
);
};
export default FileUploadsCard;
const FileUploadRow = ({ fileUpload }: { fileUpload: FileUpload }) => {
const { id, fileName, fileSize, progress, status, errorMessage } = fileUpload;
const utils = api.useContext();
const hideFileUploadsMutation = api.datasets.hideFileUploads.useMutation();
const [hideFileUpload, hidingInProgress] = useHandledAsyncCallback(async () => {
await hideFileUploadsMutation.mutateAsync({ fileUploadIds: [id] });
}, [id, hideFileUploadsMutation, utils]);
const [refreshDatasetEntries] = useHandledAsyncCallback(async () => {
await hideFileUploadsMutation.mutateAsync({ fileUploadIds: [id] });
await utils.datasets.listFileUploads.invalidate();
await utils.datasetEntries.list.invalidate();
}, [id, hideFileUploadsMutation, utils]);
return (
<VStack w="full" alignItems="flex-start" p={4} borderBottomWidth={1}>
<HStack w="full" justifyContent="space-between" alignItems="flex-start">
<VStack alignItems="flex-start" spacing={0}>
<Text fontWeight="bold">{fileName}</Text>
<Text fontSize="xs">({formatFileSize(fileSize, 2)})</Text>
</VStack>
<HStack spacing={0}>
{status === "COMPLETE" ? (
<Button variant="ghost" onClick={refreshDatasetEntries} color="orange.400" size="xs">
Refresh Table
</Button>
) : (
<IconButton
aria-label="Hide file upload"
as={BsX}
boxSize={6}
minW={0}
variant="ghost"
isLoading={hidingInProgress}
onClick={hideFileUpload}
cursor="pointer"
/>
)}
</HStack>
</HStack>
{errorMessage ? (
<Text alignSelf="center" pt={2}>
{errorMessage}
</Text>
) : (
<>
<Text alignSelf="center" fontSize="xs">
{getStatusText(status)}
</Text>
<Progress w="full" value={progress} borderRadius={2} />
</>
)}
</VStack>
);
};
const getStatusText = (status: FileUpload["status"]) => {
switch (status) {
case "PENDING":
return "Pending";
case "DOWNLOADING":
return "Downloading to Server";
case "PROCESSING":
return "Processing";
case "SAVING":
return "Saving";
case "COMPLETE":
return "Complete";
case "ERROR":
return "Error";
}
};

View File

@@ -22,10 +22,9 @@ import { useRouter } from "next/router";
import { useDataset, useDatasetEntries, useHandledAsyncCallback } from "~/utils/hooks";
import { api } from "~/utils/api";
import { useAppStore } from "~/state/store";
import ActionButton from "../ActionButton";
import InputDropdown from "../InputDropdown";
import { FiChevronDown } from "react-icons/fi";
// import { FiChevronDown } from "react-icons/fi";
const SUPPORTED_BASE_MODELS = ["llama2-7b", "llama2-13b", "llama2-70b", "gpt-3.5-turbo"];
@@ -53,7 +52,6 @@ const FineTuneButton = () => {
export default FineTuneButton;
const FineTuneModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => {
const selectedProjectId = useAppStore((s) => s.selectedProjectId);
const dataset = useDataset().data;
const datasetEntries = useDatasetEntries().data;
@@ -73,7 +71,7 @@ const FineTuneModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => {
const createFineTuneMutation = api.fineTunes.create.useMutation();
const [createFineTune, creationInProgress] = useHandledAsyncCallback(async () => {
if (!selectedProjectId || !modelSlug || !selectedBaseModel || !dataset) return;
if (!modelSlug || !selectedBaseModel || !dataset) return;
await createFineTuneMutation.mutateAsync({
slug: modelSlug,
baseModel: selectedBaseModel,
@@ -83,7 +81,7 @@ const FineTuneModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => {
await utils.fineTunes.list.invalidate();
await router.push({ pathname: "/fine-tunes" });
disclosure.onClose();
}, [createFineTuneMutation, selectedProjectId, modelSlug, selectedBaseModel]);
}, [createFineTuneMutation, modelSlug, selectedBaseModel]);
return (
<Modal size={{ base: "xl", md: "2xl" }} {...disclosure}>
@@ -133,12 +131,12 @@ const FineTuneModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => {
/>
</HStack>
</VStack>
<Button variant="unstyled" color="blue.600">
{/* <Button variant="unstyled" color="blue.600">
<HStack>
<Text>Advanced Options</Text>
<Icon as={FiChevronDown} />
</HStack>
</Button>
</Button> */}
</VStack>
</ModalBody>
<ModalFooter>

View File

@@ -0,0 +1,276 @@
import { useState, useEffect, useRef, useCallback } from "react";
import {
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalCloseButton,
ModalBody,
ModalFooter,
HStack,
VStack,
Icon,
Text,
Button,
Box,
useDisclosure,
type UseDisclosureReturn,
} from "@chakra-ui/react";
import pluralize from "pluralize";
import { AiOutlineCloudUpload, AiOutlineFile } from "react-icons/ai";
import { useDataset, useHandledAsyncCallback } from "~/utils/hooks";
import { api } from "~/utils/api";
import ActionButton from "../ActionButton";
import { validateTrainingRows, type TrainingRow, parseJSONL } from "./validateTrainingRows";
import { uploadDatasetEntryFile } from "~/utils/azure/website";
import { formatFileSize } from "~/utils/utils";
const UploadDataButton = () => {
const disclosure = useDisclosure();
return (
<>
<ActionButton
onClick={disclosure.onOpen}
label="Upload Data"
icon={AiOutlineCloudUpload}
iconBoxSize={4}
requireBeta
/>
<UploadDataModal disclosure={disclosure} />
</>
);
};
export default UploadDataButton;
const UploadDataModal = ({ disclosure }: { disclosure: UseDisclosureReturn }) => {
const dataset = useDataset().data;
const [validationError, setValidationError] = useState<string | null>(null);
const [trainingRows, setTrainingRows] = useState<TrainingRow[] | null>(null);
const [file, setFile] = useState<File | null>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const handleFileDrop = (e: React.DragEvent<HTMLDivElement>) => {
e.preventDefault();
const files = e.dataTransfer.files;
if (files.length > 0) {
processFile(files[0] as File);
}
};
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (files && files.length > 0) {
processFile(files[0] as File);
}
};
const processFile = (file: File) => {
setFile(file);
// skip reading if file is larger than 10MB
if (file.size > 10000000) {
setTrainingRows(null);
return;
}
const reader = new FileReader();
reader.onload = (e: ProgressEvent<FileReader>) => {
const content = e.target?.result as string;
// Process the content, e.g., set to state
let parsedJSONL;
try {
parsedJSONL = parseJSONL(content) as TrainingRow[];
const validationError = validateTrainingRows(parsedJSONL);
if (validationError) {
setValidationError(validationError);
setTrainingRows(null);
return;
}
setTrainingRows(parsedJSONL);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
setValidationError("Unable to parse JSONL file: " + (e.message as string));
setTrainingRows(null);
return;
}
};
reader.readAsText(file);
};
const resetState = useCallback(() => {
setValidationError(null);
setTrainingRows(null);
setFile(null);
}, [setValidationError, setTrainingRows, setFile]);
useEffect(() => {
if (disclosure.isOpen) {
resetState();
}
}, [disclosure.isOpen, resetState]);
const triggerFileDownloadMutation = api.datasets.triggerFileDownload.useMutation();
const utils = api.useContext();
const [sendJSONL, sendingInProgress] = useHandledAsyncCallback(async () => {
if (!dataset || !file) return;
const blobName = await uploadDatasetEntryFile(file);
await triggerFileDownloadMutation.mutateAsync({
datasetId: dataset.id,
blobName,
fileName: file.name,
fileSize: file.size,
});
await utils.datasets.listFileUploads.invalidate();
disclosure.onClose();
}, [dataset, trainingRows, triggerFileDownloadMutation, file, utils]);
return (
<Modal size={{ base: "xl", md: "2xl" }} {...disclosure}>
<ModalOverlay />
<ModalContent w={1200}>
<ModalHeader>
<HStack>
<Text>Upload Training Logs</Text>
</HStack>
</ModalHeader>
<ModalCloseButton />
<ModalBody maxW="unset" p={8}>
<Box w="full" aspectRatio={1.5}>
{validationError && (
<VStack w="full" h="full" justifyContent="center" spacing={8}>
<Icon as={AiOutlineFile} boxSize={24} color="gray.300" />
<VStack w="full">
<Text fontSize={32} color="gray.500" fontWeight="bold">
Error
</Text>
<Text color="gray.500">{validationError}</Text>
</VStack>
<Text
as="span"
textDecor="underline"
color="gray.500"
_hover={{ color: "orange.400" }}
cursor="pointer"
onClick={resetState}
>
Try again
</Text>
</VStack>
)}
{!validationError && !file && (
<VStack
w="full"
h="full"
stroke="gray.300"
justifyContent="center"
borderRadius={8}
sx={{
"background-image": `url("data:image/svg+xml,%3csvg width='100%25' height='100%25' xmlns='http://www.w3.org/2000/svg'%3e%3crect x='2%25' y='2%25' width='96%25' height='96%25' fill='none' stroke='%23eee' stroke-width='4' stroke-dasharray='6%2c 14' stroke-dashoffset='0' stroke-linecap='square' rx='8' ry='8'/%3e%3c/svg%3e")`,
}}
onDragOver={(e) => e.preventDefault()}
onDrop={handleFileDrop}
>
<JsonFileIcon />
<Icon as={AiOutlineCloudUpload} boxSize={24} color="gray.300" />
<Text fontSize={32} color="gray.500" fontWeight="bold">
Drag & Drop
</Text>
<Text color="gray.500">
your .jsonl file here, or{" "}
<input
type="file"
ref={fileInputRef}
onChange={handleFileChange}
style={{ display: "none" }}
accept=".jsonl"
/>
<Text
as="span"
textDecor="underline"
_hover={{ color: "orange.400" }}
cursor="pointer"
onClick={() => fileInputRef.current?.click()}
>
browse
</Text>
</Text>
</VStack>
)}
{!validationError && file && (
<VStack w="full" h="full" justifyContent="center" spacing={8}>
<JsonFileIcon />
<VStack w="full">
{trainingRows ? (
<>
<Text fontSize={32} color="gray.500" fontWeight="bold">
Success
</Text>
<Text color="gray.500">
We'll upload <b>{trainingRows.length}</b>{" "}
{pluralize("row", trainingRows.length)} into <b>{dataset?.name}</b>.{" "}
</Text>
</>
) : (
<>
<Text fontSize={32} color="gray.500" fontWeight="bold">
{file.name}
</Text>
<Text color="gray.500">{formatFileSize(file.size)}</Text>
</>
)}
</VStack>
<Text
as="span"
textDecor="underline"
color="gray.500"
_hover={{ color: "orange.400" }}
cursor="pointer"
onClick={resetState}
>
Change file
</Text>
</VStack>
)}
</Box>
</ModalBody>
<ModalFooter>
<HStack>
<Button colorScheme="gray" onClick={disclosure.onClose} minW={24}>
Cancel
</Button>
<Button
colorScheme="orange"
onClick={sendJSONL}
isLoading={sendingInProgress}
minW={24}
isDisabled={!file || !!validationError}
>
Upload
</Button>
</HStack>
</ModalFooter>
</ModalContent>
</Modal>
);
};
const JsonFileIcon = () => (
<Box position="relative" display="flex" alignItems="center" justifyContent="center">
<Icon as={AiOutlineFile} boxSize={24} color="gray.300" />
<Text position="absolute" color="orange.400" fontWeight="bold" fontSize={12} pt={4}>
JSONL
</Text>
</Box>
);

View File

@@ -0,0 +1,71 @@
import { type CreateChatCompletionRequestMessage } from "openai/resources/chat";
export type TrainingRow = {
input: CreateChatCompletionRequestMessage[];
output?: CreateChatCompletionRequestMessage;
};
export const parseJSONL = (jsonlString: string): unknown[] => {
const lines = jsonlString.trim().split("\n");
let lineNumber = 0;
const parsedLines = [];
try {
for (const line of lines) {
lineNumber++;
parsedLines.push(JSON.parse(line));
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
throw new Error(`Error parsing line ${lineNumber}: ${e.message as string}`);
}
return parsedLines;
};
export const validateTrainingRows = (rows: unknown): string | null => {
if (!Array.isArray(rows)) return "training data is not an array";
for (let i = 0; i < rows.length; i++) {
const row = rows[i] as TrainingRow;
let errorMessage: string | null = null;
try {
errorMessage = validateTrainingRow(row);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (error: any) {
errorMessage = error.message;
}
if (errorMessage) return `row ${i + 1}: ${errorMessage}`;
}
return null;
};
const validateTrainingRow = (row: TrainingRow): string | null => {
if (!row) return "empty row";
if (!row.input) return "missing input";
// Validate input
if (!Array.isArray(row.input)) return "input is not an array";
if ((row.input as unknown[]).some((x) => typeof x !== "object"))
return "input contains invalid item";
if (row.input.some((x) => !x)) return "input contains empty item";
if (row.input.some((x) => !x.content && !x.function_call))
return "input contains item with no content or function_call";
if (row.input.some((x) => x.function_call && !x.function_call.arguments))
return "input contains item with function_call but no arguments";
if (row.input.some((x) => x.function_call && !x.function_call.name))
return "input contains item with function_call but no name";
// Validate output
if (row.output) {
if (typeof row.output !== "object") return "output is not an object";
if (!row.output.content && !row.output.function_call)
return "output contains no content or function_call";
if (row.output.function_call && !row.output.function_call.arguments)
return "output contains function_call but no arguments";
if (row.output.function_call && !row.output.function_call.name)
return "output contains function_call but no name";
}
return null;
};

View File

@@ -26,6 +26,9 @@ export const env = createEnv({
SMTP_PORT: z.string().default("placeholder"),
SMTP_LOGIN: z.string().default("placeholder"),
SMTP_PASSWORD: z.string().default("placeholder"),
AZURE_STORAGE_ACCOUNT_NAME: z.string().default("placeholder"),
AZURE_STORAGE_ACCOUNT_KEY: z.string().default("placeholder"),
AZURE_STORAGE_CONTAINER_NAME: z.string().default("placeholder"),
WORKER_CONCURRENCY: z
.string()
.default("10")
@@ -72,6 +75,9 @@ export const env = createEnv({
SMTP_PORT: process.env.SMTP_PORT,
SMTP_LOGIN: process.env.SMTP_LOGIN,
SMTP_PASSWORD: process.env.SMTP_PASSWORD,
AZURE_STORAGE_ACCOUNT_NAME: process.env.AZURE_STORAGE_ACCOUNT_NAME,
AZURE_STORAGE_ACCOUNT_KEY: process.env.AZURE_STORAGE_ACCOUNT_KEY,
AZURE_STORAGE_CONTAINER_NAME: process.env.AZURE_STORAGE_CONTAINER_NAME,
WORKER_CONCURRENCY: process.env.WORKER_CONCURRENCY,
WORKER_MAX_POOL_SIZE: process.env.WORKER_MAX_POOL_SIZE,
},

View File

@@ -10,9 +10,9 @@ import {
useDisclosure,
} from "@chakra-ui/react";
import Link from "next/link";
import { useState, useEffect } from "react";
import { AiOutlineDatabase } from "react-icons/ai";
import AppShell from "~/components/nav/AppShell";
import { api } from "~/utils/api";
import { useDataset, useHandledAsyncCallback } from "~/utils/hooks";
@@ -25,6 +25,10 @@ import DatasetEntryPaginator from "~/components/datasets/DatasetEntryPaginator";
import { useAppStore } from "~/state/store";
import FineTuneButton from "~/components/datasets/FineTuneButton";
import ExperimentButton from "~/components/datasets/ExperimentButton";
import UploadDataButton from "~/components/datasets/UploadDataButton";
// import DownloadButton from "~/components/datasets/DownloadButton";
import DeleteButton from "~/components/datasets/DeleteButton";
import FileUploadsCard from "~/components/datasets/FileUploadsCard";
export default function Dataset() {
const utils = api.useContext();
@@ -100,12 +104,16 @@ export default function Dataset() {
<VStack px={8} py={8} alignItems="flex-start" spacing={4} w="full">
<HStack w="full" justifyContent="flex-end">
<FineTuneButton />
<UploadDataButton />
<ExperimentButton />
{/* <DownloadButton /> */}
<DeleteButton />
</HStack>
<DatasetEntriesTable />
<DatasetEntryPaginator />
</VStack>
</VStack>
<FileUploadsCard />
</AppShell>
<DatasetConfigurationDrawer disclosure={drawerDisclosure} />
</>

View File

@@ -1,4 +1,3 @@
import { type Prisma } from "@prisma/client";
import { z } from "zod";
import { v4 as uuidv4 } from "uuid";
import {
@@ -7,13 +6,18 @@ import {
type CreateChatCompletionRequestMessage,
} from "openai/resources/chat";
import { TRPCError } from "@trpc/server";
import { shuffle } from "lodash-es";
import archiver from "archiver";
import { WritableStreamBuffer } from "stream-buffers";
import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc";
import { prisma } from "~/server/db";
import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl";
import { error, success } from "~/utils/errorHandling/standardResponses";
import { countOpenAIChatTokens } from "~/utils/countTokens";
import { type TrainingRow } from "~/components/datasets/validateTrainingRows";
import hashObject from "~/server/utils/hashObject";
import { type JsonValue } from "type-fest";
import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows";
export const datasetEntriesRouter = createTRPCRouter({
list: protectedProcedure
@@ -94,7 +98,7 @@ export const datasetEntriesRouter = createTRPCRouter({
name: z.string(),
})
.optional(),
loggedCallIds: z.string().array(),
loggedCallIds: z.string().array().optional(),
}),
)
.mutation(async ({ input, ctx }) => {
@@ -115,50 +119,33 @@ export const datasetEntriesRouter = createTRPCRouter({
return error("No datasetId or newDatasetParams provided");
}
const [loggedCalls, existingTrainingCount, existingTestingCount] = await prisma.$transaction([
prisma.loggedCall.findMany({
where: {
id: {
in: input.loggedCallIds,
},
modelResponse: {
isNot: null,
if (!input.loggedCallIds) {
return error("No loggedCallIds provided");
}
const loggedCalls = await prisma.loggedCall.findMany({
where: {
id: {
in: input.loggedCallIds,
},
modelResponse: {
isNot: null,
},
},
include: {
modelResponse: {
select: {
reqPayload: true,
respPayload: true,
inputTokens: true,
outputTokens: true,
},
},
include: {
modelResponse: {
select: {
reqPayload: true,
respPayload: true,
inputTokens: true,
outputTokens: true,
},
},
},
}),
prisma.datasetEntry.count({
where: {
datasetId,
type: "TRAIN",
},
}),
prisma.datasetEntry.count({
where: {
datasetId,
type: "TEST",
},
}),
]);
},
orderBy: { createdAt: "desc" },
});
const shuffledLoggedCalls = shuffle(loggedCalls);
const totalEntries = existingTrainingCount + existingTestingCount + loggedCalls.length;
const numTrainingToAdd = Math.floor(trainingRatio * totalEntries) - existingTrainingCount;
const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = [];
let i = 0;
for (const loggedCall of shuffledLoggedCalls) {
const trainingRows = loggedCalls.map((loggedCall) => {
const inputMessages = (
loggedCall.modelResponse?.reqPayload as unknown as CompletionCreateParams
).messages;
@@ -166,24 +153,14 @@ export const datasetEntriesRouter = createTRPCRouter({
const resp = loggedCall.modelResponse?.respPayload as unknown as ChatCompletion | undefined;
if (resp && resp.choices?.[0]) {
output = resp.choices[0].message;
} else {
output = {
role: "assistant",
content: "",
};
}
return {
input: inputMessages as unknown as CreateChatCompletionRequestMessage[],
output: output as unknown as CreateChatCompletionRequestMessage,
};
});
datasetEntriesToCreate.push({
datasetId,
loggedCallId: loggedCall.id,
input: inputMessages as unknown as Prisma.InputJsonValue,
output: output as unknown as Prisma.InputJsonValue,
inputTokens: loggedCall.modelResponse?.inputTokens || 0,
outputTokens: loggedCall.modelResponse?.outputTokens || 0,
type: i < numTrainingToAdd ? "TRAIN" : "TEST",
});
i++;
}
const datasetEntriesToCreate = await formatEntriesFromTrainingRows(datasetId, trainingRows);
// Ensure dataset and dataset entries are created atomically
await prisma.$transaction([
@@ -198,13 +175,12 @@ export const datasetEntriesRouter = createTRPCRouter({
},
}),
prisma.datasetEntry.createMany({
data: shuffle(datasetEntriesToCreate),
data: datasetEntriesToCreate,
}),
]);
return success(datasetId);
}),
update: protectedProcedure
.input(
z.object({
@@ -242,7 +218,8 @@ export const datasetEntriesRouter = createTRPCRouter({
let parsedOutput = undefined;
let outputTokens = undefined;
if (input.updates.output) {
// The client might send "null" as a string, so we need to check for that
if (input.updates.output && input.updates.output !== "null") {
parsedOutput = JSON.parse(input.updates.output);
outputTokens = countOpenAIChatTokens("gpt-4-0613", [
parsedOutput as unknown as ChatCompletion.Choice.Message,
@@ -293,4 +270,68 @@ export const datasetEntriesRouter = createTRPCRouter({
return success("Dataset entries deleted");
}),
export: protectedProcedure
.input(
z.object({
datasetId: z.string(),
datasetEntryIds: z.string().array(),
testingSplit: z.number(),
removeDuplicates: z.boolean(),
}),
)
.mutation(async ({ input, ctx }) => {
const { projectId } = await prisma.dataset.findUniqueOrThrow({
where: { id: input.datasetId },
});
await requireCanViewProject(projectId, ctx);
const datasetEntries = await ctx.prisma.datasetEntry.findMany({
where: {
id: {
in: input.datasetEntryIds,
},
},
});
let rows: TrainingRow[] = datasetEntries.map((entry) => ({
input: entry.input as unknown as CreateChatCompletionRequestMessage[],
output: entry.output as unknown as CreateChatCompletionRequestMessage,
}));
if (input.removeDuplicates) {
const deduplicatedRows = [];
const rowHashSet = new Set<string>();
for (const row of rows) {
const rowHash = hashObject(row as unknown as JsonValue);
if (!rowHashSet.has(rowHash)) {
rowHashSet.add(rowHash);
deduplicatedRows.push(row);
}
}
rows = deduplicatedRows;
}
const splitIndex = Math.floor((rows.length * input.testingSplit) / 100);
const testingData = rows.slice(0, splitIndex);
const trainingData = rows.slice(splitIndex);
// Convert arrays to JSONL format
const trainingDataJSONL = trainingData.map((item) => JSON.stringify(item)).join("\n");
const testingDataJSONL = testingData.map((item) => JSON.stringify(item)).join("\n");
const output = new WritableStreamBuffer();
const archive = archiver("zip");
archive.pipe(output);
archive.append(trainingDataJSONL, { name: "train.jsonl" });
archive.append(testingDataJSONL, { name: "test.jsonl" });
await archive.finalize();
// Convert buffer to base64
const base64 = output.getContents().toString("base64");
return base64;
}),
});

View File

@@ -1,8 +1,11 @@
import { z } from "zod";
import { createTRPCRouter, protectedProcedure } from "~/server/api/trpc";
import { prisma } from "~/server/db";
import { requireCanModifyProject, requireCanViewProject } from "~/utils/accessControl";
import { success } from "~/utils/errorHandling/standardResponses";
import { error, success } from "~/utils/errorHandling/standardResponses";
import { generateServiceClientUrl } from "~/utils/azure/server";
import { queueImportDatasetEntries } from "~/server/tasks/importDatasetEntries.task";
export const datasetsRouter = createTRPCRouter({
get: protectedProcedure.input(z.object({ id: z.string() })).query(async ({ input, ctx }) => {
@@ -94,4 +97,87 @@ export const datasetsRouter = createTRPCRouter({
return success("Dataset deleted");
}),
getServiceClientUrl: protectedProcedure
.input(z.object({ projectId: z.string() }))
.query(async ({ input, ctx }) => {
// The user must at least be authenticated to get a SAS token
await requireCanModifyProject(input.projectId, ctx);
return generateServiceClientUrl();
}),
triggerFileDownload: protectedProcedure
.input(
z.object({
datasetId: z.string(),
blobName: z.string(),
fileName: z.string(),
fileSize: z.number(),
}),
)
.mutation(async ({ input, ctx }) => {
const { projectId } = await prisma.dataset.findUniqueOrThrow({
where: { id: input.datasetId },
});
await requireCanViewProject(projectId, ctx);
const { id } = await prisma.datasetFileUpload.create({
data: {
datasetId: input.datasetId,
blobName: input.blobName,
status: "PENDING",
fileName: input.fileName,
fileSize: input.fileSize,
uploadedAt: new Date(),
},
});
await queueImportDatasetEntries(id);
}),
listFileUploads: protectedProcedure
.input(z.object({ datasetId: z.string() }))
.query(async ({ input, ctx }) => {
const { projectId } = await prisma.dataset.findUniqueOrThrow({
where: { id: input.datasetId },
});
await requireCanViewProject(projectId, ctx);
return await prisma.datasetFileUpload.findMany({
where: {
datasetId: input.datasetId,
visible: true,
},
orderBy: { createdAt: "desc" },
});
}),
hideFileUploads: protectedProcedure
.input(z.object({ fileUploadIds: z.string().array() }))
.mutation(async ({ input, ctx }) => {
if (!input.fileUploadIds.length) return error("No file upload ids provided");
const {
dataset: { projectId, id: datasetId },
} = await prisma.datasetFileUpload.findUniqueOrThrow({
where: { id: input.fileUploadIds[0] },
select: {
dataset: {
select: {
id: true,
projectId: true,
},
},
},
});
await requireCanModifyProject(projectId, ctx);
await prisma.datasetFileUpload.updateMany({
where: {
id: {
in: input.fileUploadIds,
},
datasetId,
},
data: {
visible: false,
},
});
}),
});

View File

@@ -0,0 +1,152 @@
import { type DatasetFileUpload } from "@prisma/client";
import { prisma } from "~/server/db";
import defineTask from "./defineTask";
import { downloadBlobToString } from "~/utils/azure/server";
import {
type TrainingRow,
validateTrainingRows,
parseJSONL,
} from "~/components/datasets/validateTrainingRows";
import { formatEntriesFromTrainingRows } from "~/server/utils/createEntriesFromTrainingRows";
export type ImportDatasetEntriesJob = {
datasetFileUploadId: string;
};
export const importDatasetEntries = defineTask<ImportDatasetEntriesJob>(
"importDatasetEntries",
async (task) => {
const { datasetFileUploadId } = task;
const datasetFileUpload = await prisma.datasetFileUpload.findUnique({
where: { id: datasetFileUploadId },
});
if (!datasetFileUpload) {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
errorMessage: "Dataset File Upload not found",
status: "ERROR",
},
});
return;
}
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "DOWNLOADING",
progress: 5,
},
});
const onBlobDownloadProgress = async (progress: number) => {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
progress: 5 + Math.floor((progress / datasetFileUpload.fileSize) * 25),
},
});
};
const jsonlStr = await downloadBlobToString(datasetFileUpload.blobName, onBlobDownloadProgress);
let trainingRows: TrainingRow[] = [];
let validationError: string | null = null;
try {
trainingRows = parseJSONL(jsonlStr) as TrainingRow[];
validationError = validateTrainingRows(trainingRows);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
validationError = e.message;
}
if (validationError) {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
errorMessage: `Invalid JSONL: ${validationError}`,
status: "ERROR",
},
});
return;
}
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "PROCESSING",
progress: 30,
},
});
const updatePromises: Promise<DatasetFileUpload>[] = [];
const updateCallback = async (progress: number) => {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
progress: 30 + Math.floor((progress / trainingRows.length) * 69),
},
});
};
let datasetEntriesToCreate;
try {
datasetEntriesToCreate = await formatEntriesFromTrainingRows(
datasetFileUpload.datasetId,
trainingRows,
updateCallback,
500,
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
errorMessage: `Error formatting rows: ${e.message as string}`,
status: "ERROR",
visible: true,
},
});
return;
}
await Promise.all(updatePromises);
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "SAVING",
progress: 99,
},
});
await prisma.datasetEntry.createMany({
data: datasetEntriesToCreate,
});
await prisma.datasetFileUpload.update({
where: { id: datasetFileUploadId },
data: {
status: "COMPLETE",
progress: 100,
visible: true,
},
});
},
);
export const queueImportDatasetEntries = async (datasetFileUploadId: string) => {
await Promise.all([
prisma.datasetFileUpload.update({
where: {
id: datasetFileUploadId,
},
data: {
errorMessage: null,
status: "PENDING",
},
}),
importDatasetEntries.enqueue({ datasetFileUploadId }),
]);
};

View File

@@ -5,10 +5,11 @@ import "../../../sentry.server.config";
import { env } from "~/env.mjs";
import { queryModel } from "./queryModel.task";
import { runNewEval } from "./runNewEval.task";
import { importDatasetEntries } from "./importDatasetEntries.task";
console.log("Starting worker");
const registeredTasks = [queryModel, runNewEval];
const registeredTasks = [queryModel, runNewEval, importDatasetEntries];
const taskList = registeredTasks.reduce((acc, task) => {
acc[task.task.identifier] = task.task.handler;

View File

@@ -0,0 +1,70 @@
import { type Prisma } from "@prisma/client";
import { shuffle } from "lodash-es";
import {
type CreateChatCompletionRequestMessage,
type ChatCompletion,
} from "openai/resources/chat";
import { prisma } from "~/server/db";
import { type TrainingRow } from "~/components/datasets/validateTrainingRows";
import { countLlamaChatTokens } from "~/utils/countTokens";
export const formatEntriesFromTrainingRows = async (
datasetId: string,
trainingRows: TrainingRow[],
updateCallback?: (progress: number) => Promise<void>,
updateFrequency = 1000,
) => {
const [dataset, existingTrainingCount, existingTestingCount] = await prisma.$transaction([
prisma.dataset.findUnique({ where: { id: datasetId } }),
prisma.datasetEntry.count({
where: {
datasetId,
type: "TRAIN",
},
}),
prisma.datasetEntry.count({
where: {
datasetId,
type: "TEST",
},
}),
]);
const trainingRatio = dataset?.trainingRatio ?? 0.8;
const newTotalEntries = existingTrainingCount + existingTestingCount + trainingRows.length;
const numTrainingToAdd = Math.floor(trainingRatio * newTotalEntries) - existingTrainingCount;
const numTestingToAdd = trainingRows.length - numTrainingToAdd;
const typesToAssign = shuffle([
...Array(numTrainingToAdd).fill("TRAIN"),
...Array(numTestingToAdd).fill("TEST"),
]);
const datasetEntriesToCreate: Prisma.DatasetEntryCreateManyInput[] = [];
let i = 0;
for (const row of trainingRows) {
// console.log(row);
if (updateCallback && i % updateFrequency === 0) await updateCallback(i);
let outputTokens = 0;
if (row.output) {
outputTokens = countLlamaChatTokens([row.output as unknown as ChatCompletion.Choice.Message]);
}
// console.log("outputTokens", outputTokens);
datasetEntriesToCreate.push({
datasetId: datasetId,
input: row.input as unknown as Prisma.InputJsonValue,
output: (row.output as unknown as Prisma.InputJsonValue) ?? {
role: "assistant",
content: "",
},
inputTokens: countLlamaChatTokens(
row.input as unknown as CreateChatCompletionRequestMessage[],
),
outputTokens,
type: typesToAssign.pop() as "TRAIN" | "TEST",
});
i++;
}
return datasetEntriesToCreate;
};

View File

@@ -0,0 +1,94 @@
import {
BlobServiceClient,
generateAccountSASQueryParameters,
AccountSASPermissions,
AccountSASServices,
AccountSASResourceTypes,
StorageSharedKeyCredential,
SASProtocol,
} from "@azure/storage-blob";
import { DefaultAzureCredential } from "@azure/identity";
const accountName = process.env.AZURE_STORAGE_ACCOUNT_NAME;
if (!accountName) throw Error("Azure Storage accountName not found");
const accountKey = process.env.AZURE_STORAGE_ACCOUNT_KEY;
if (!accountKey) throw Error("Azure Storage accountKey not found");
const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
if (!containerName) throw Error("Azure Storage containerName not found");
const sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${accountName}.blob.core.windows.net`,
new DefaultAzureCredential(),
);
const containerClient = blobServiceClient.getContainerClient(containerName);
export const generateServiceClientUrl = () => {
const sasOptions = {
services: AccountSASServices.parse("b").toString(), // blobs
resourceTypes: AccountSASResourceTypes.parse("sco").toString(), // service, container, object
permissions: AccountSASPermissions.parse("w"), // write permissions
protocol: SASProtocol.Https,
startsOn: new Date(),
expiresOn: new Date(new Date().valueOf() + 10 * 60 * 1000), // 10 minutes
};
let sasToken = generateAccountSASQueryParameters(sasOptions, sharedKeyCredential).toString();
// remove leading "?"
sasToken = sasToken[0] === "?" ? sasToken.substring(1) : sasToken;
return {
serviceClientUrl: `https://${accountName}.blob.core.windows.net?${sasToken}`,
containerName,
};
};
export async function downloadBlobToString(
blobName: string,
onProgress?: (progress: number) => Promise<void>,
chunkInterval?: number,
) {
const blobClient = containerClient.getBlobClient(blobName);
const downloadResponse = await blobClient.download();
if (!downloadResponse) throw Error("error downloading blob");
if (!downloadResponse.readableStreamBody)
throw Error("downloadResponse.readableStreamBody not found");
const downloaded = await streamToBuffer(
downloadResponse.readableStreamBody,
onProgress,
chunkInterval,
);
return downloaded.toString();
}
async function streamToBuffer(
readableStream: NodeJS.ReadableStream,
onProgress?: (progress: number) => Promise<void>,
chunkInterval = 1048576, // send progress every 1MB
): Promise<Buffer> {
return new Promise((resolve, reject) => {
const chunks: Uint8Array[] = [];
let bytesDownloaded = 0;
let lastReportedByteCount = 0;
readableStream.on("data", (data: ArrayBuffer) => {
chunks.push(data instanceof Buffer ? data : Buffer.from(data));
bytesDownloaded += data.byteLength;
if (onProgress && bytesDownloaded - lastReportedByteCount >= chunkInterval) {
void onProgress(bytesDownloaded); // progress in Bytes
lastReportedByteCount = bytesDownloaded;
}
});
readableStream.on("end", () => {
resolve(Buffer.concat(chunks));
});
readableStream.on("error", reject);
});
}

View File

@@ -0,0 +1,30 @@
import { BlobServiceClient } from "@azure/storage-blob";
import { v4 as uuidv4 } from "uuid";
import { useAppStore } from "~/state/store";
export const uploadDatasetEntryFile = async (file: File) => {
const { selectedProjectId: projectId, api } = useAppStore.getState();
if (!projectId) throw Error("projectId not found");
if (!api) throw Error("api not initialized");
const { serviceClientUrl, containerName } = await api.client.datasets.getServiceClientUrl.query({
projectId,
});
const blobServiceClient = new BlobServiceClient(serviceClientUrl);
// create container client
const containerClient = blobServiceClient.getContainerClient(containerName);
// base name without extension
const basename = file.name.split("/").pop()?.split(".").shift();
if (!basename) throw Error("basename not found");
const blobName = `${basename}-${uuidv4()}.jsonl`;
// create blob client
const blobClient = containerClient.getBlockBlobClient(blobName);
// upload file
await blobClient.uploadData(file);
return blobName;
};

View File

@@ -1,5 +1,7 @@
import { type ChatCompletion } from "openai/resources/chat";
import { GPTTokens } from "gpt-tokens";
import llamaTokenizer from "llama-tokenizer-js";
import { type SupportedModel } from "~/modelProviders/openai-ChatCompletion";
interface GPTTokensMessageItem {
@@ -22,3 +24,11 @@ export const countOpenAIChatTokens = (
messages: reformattedMessages as unknown as GPTTokensMessageItem[],
}).usedTokens;
};
export const countLlamaChatTokens = (messages: ChatCompletion.Choice.Message[]) => {
const stringToTokenize = messages
.map((message) => message.content || JSON.stringify(message.function_call))
.join("\n");
const tokens = llamaTokenizer.encode(stringToTokenize);
return tokens.length;
};

View File

@@ -20,7 +20,6 @@ export const parseableToFunctionCall = (str: string) => {
} catch {
return false;
}
console.log("remove me");
// Check if the parsedJSON is an object and not null
if (typeof parsedJSON !== "object" || parsedJSON === null) {
@@ -53,3 +52,18 @@ export const parseableToFunctionCall = (str: string) => {
return true;
};
export const formatFileSize = (bytes: number, decimals = 2) => {
if (bytes === 0) return "0 Bytes";
const k = 1024;
const dm = decimals < 0 ? 0 : decimals;
const sizes = ["Bytes", "KB", "MB", "GB", "TB"];
for (const size of sizes) {
if (bytes < k) return `${parseFloat(bytes.toFixed(dm))} ${size}`;
bytes /= k;
}
return "> 1024 TB";
};

View File

@@ -19,7 +19,9 @@
"baseUrl": ".",
"paths": {
"~/*": ["./src/*"]
}
},
"typeRoots": ["./types", "./node_modules/@types"],
"types": ["llama-tokenizer-js", "node"]
},
"include": [
".eslintrc.cjs",

View File

@@ -0,0 +1,4 @@
declare module "llama-tokenizer-js" {
export function encode(input: string): number[];
export function decode(input: number[]): string;
}

360
pnpm-lock.yaml generated
View File

@@ -14,6 +14,12 @@ importers:
'@apidevtools/json-schema-ref-parser':
specifier: ^10.1.0
version: 10.1.0
'@azure/identity':
specifier: ^3.3.0
version: 3.3.0
'@azure/storage-blob':
specifier: 12.15.0
version: 12.15.0
'@babel/standalone':
specifier: ^7.22.9
version: 7.22.9
@@ -143,6 +149,9 @@ importers:
kysely-codegen:
specifier: ^0.10.1
version: 0.10.1(kysely@0.26.1)(pg@8.11.2)
llama-tokenizer-js:
specifier: ^1.1.3
version: 1.1.3
lodash-es:
specifier: ^4.17.21
version: 4.17.21
@@ -465,6 +474,184 @@ packages:
js-yaml: 4.1.0
dev: true
/@azure/abort-controller@1.1.0:
resolution: {integrity: sha512-TrRLIoSQVzfAJX9H1JeFjzAoDGcoK1IYX1UImfceTZpsyYfWr09Ss1aHW1y5TrrR3iq6RZLBwJ3E24uwPhwahw==}
engines: {node: '>=12.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/core-auth@1.5.0:
resolution: {integrity: sha512-udzoBuYG1VBoHVohDTrvKjyzel34zt77Bhp7dQntVGGD0ehVq48owENbBG8fIgkHRNUBQH5k1r0hpoMu5L8+kw==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-util': 1.4.0
tslib: 2.6.1
dev: false
/@azure/core-client@1.7.3:
resolution: {integrity: sha512-kleJ1iUTxcO32Y06dH9Pfi9K4U+Tlb111WXEnbt7R/ne+NLRwppZiTGJuTD5VVoxTMK5NTbEtm5t2vcdNCFe2g==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-rest-pipeline': 1.12.0
'@azure/core-tracing': 1.0.1
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
tslib: 2.6.1
transitivePeerDependencies:
- supports-color
dev: false
/@azure/core-http@3.0.3:
resolution: {integrity: sha512-QMib3wXotJMFhHgmJBPUF9YsyErw34H0XDFQd9CauH7TPB+RGcyl9Ayy7iURtJB04ngXhE6YwrQsWDXlSLrilg==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-tracing': 1.0.0-preview.13
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
'@types/node-fetch': 2.6.4
'@types/tunnel': 0.0.3
form-data: 4.0.0
node-fetch: 2.6.12(encoding@0.1.13)
process: 0.11.10
tslib: 2.6.1
tunnel: 0.0.6
uuid: 8.3.2
xml2js: 0.5.0
transitivePeerDependencies:
- encoding
dev: false
/@azure/core-lro@2.5.4:
resolution: {integrity: sha512-3GJiMVH7/10bulzOKGrrLeG/uCBH/9VtxqaMcB9lIqAeamI/xYQSHJL/KcsLDuH+yTjYpro/u6D/MuRe4dN70Q==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
tslib: 2.6.1
dev: false
/@azure/core-paging@1.5.0:
resolution: {integrity: sha512-zqWdVIt+2Z+3wqxEOGzR5hXFZ8MGKK52x4vFLw8n58pR6ZfKRx3EXYTxTaYxYHc/PexPUTyimcTWFJbji9Z6Iw==}
engines: {node: '>=14.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/core-rest-pipeline@1.12.0:
resolution: {integrity: sha512-+MnSB0vGZjszSzr5AW8z93/9fkDu2RLtWmAN8gskURq7EW2sSwqy8jZa0V26rjuBVkwhdA3Hw8z3VWoeBUOw+A==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-tracing': 1.0.1
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
form-data: 4.0.0
http-proxy-agent: 5.0.0
https-proxy-agent: 5.0.1
tslib: 2.6.1
transitivePeerDependencies:
- supports-color
dev: false
/@azure/core-tracing@1.0.0-preview.13:
resolution: {integrity: sha512-KxDlhXyMlh2Jhj2ykX6vNEU0Vou4nHr025KoSEiz7cS3BNiHNaZcdECk/DmLkEB0as5T7b/TpRcehJ5yV6NeXQ==}
engines: {node: '>=12.0.0'}
dependencies:
'@opentelemetry/api': 1.4.1
tslib: 2.6.1
dev: false
/@azure/core-tracing@1.0.1:
resolution: {integrity: sha512-I5CGMoLtX+pI17ZdiFJZgxMJApsK6jjfm85hpgp3oazCdq5Wxgh4wMr7ge/TTWW1B5WBuvIOI1fMU/FrOAMKrw==}
engines: {node: '>=12.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/core-util@1.4.0:
resolution: {integrity: sha512-eGAyJpm3skVQoLiRqm/xPa+SXi/NPDdSHMxbRAz2lSprd+Zs+qrpQGQQ2VQ3Nttu+nSZR4XoYQC71LbEI7jsig==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
tslib: 2.6.1
dev: false
/@azure/identity@3.3.0:
resolution: {integrity: sha512-gISa/dAAxrWt6F2WiDXZY0y2xY4MLlN2wkNW4cPuq5OgPQKLSkxLc4I2WR04puTfZyQZnpXbAapAMEj1b96fgg==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-auth': 1.5.0
'@azure/core-client': 1.7.3
'@azure/core-rest-pipeline': 1.12.0
'@azure/core-tracing': 1.0.1
'@azure/core-util': 1.4.0
'@azure/logger': 1.0.4
'@azure/msal-browser': 2.38.2
'@azure/msal-common': 13.3.0
'@azure/msal-node': 1.18.3
events: 3.3.0
jws: 4.0.0
open: 8.4.2
stoppable: 1.1.0
tslib: 2.6.1
uuid: 8.3.2
transitivePeerDependencies:
- supports-color
dev: false
/@azure/logger@1.0.4:
resolution: {integrity: sha512-ustrPY8MryhloQj7OWGe+HrYx+aoiOxzbXTtgblbV3xwCqpzUK36phH3XNHQKj3EPonyFUuDTfR3qFhTEAuZEg==}
engines: {node: '>=14.0.0'}
dependencies:
tslib: 2.6.1
dev: false
/@azure/msal-browser@2.38.2:
resolution: {integrity: sha512-71BeIn2we6LIgMplwCSaMq5zAwmalyJR3jFcVOZxNVfQ1saBRwOD+P77nLs5vrRCedVKTq8RMFhIOdpMLNno0A==}
engines: {node: '>=0.8.0'}
dependencies:
'@azure/msal-common': 13.3.0
dev: false
/@azure/msal-common@13.3.0:
resolution: {integrity: sha512-/VFWTicjcJbrGp3yQP7A24xU95NiDMe23vxIU1U6qdRPFsprMDNUohMudclnd+WSHE4/McqkZs/nUU3sAKkVjg==}
engines: {node: '>=0.8.0'}
dev: false
/@azure/msal-node@1.18.3:
resolution: {integrity: sha512-lI1OsxNbS/gxRD4548Wyj22Dk8kS7eGMwD9GlBZvQmFV8FJUXoXySL1BiNzDsHUE96/DS/DHmA+F73p1Dkcktg==}
engines: {node: 10 || 12 || 14 || 16 || 18}
dependencies:
'@azure/msal-common': 13.3.0
jsonwebtoken: 9.0.2
uuid: 8.3.2
dev: false
/@azure/storage-blob@12.15.0:
resolution: {integrity: sha512-e7JBKLOFi0QVJqqLzrjx1eL3je3/Ug2IQj24cTM9b85CsnnFjLGeGjJVIjbGGZaytewiCEG7r3lRwQX7fKj0/w==}
engines: {node: '>=14.0.0'}
dependencies:
'@azure/abort-controller': 1.1.0
'@azure/core-http': 3.0.3
'@azure/core-lro': 2.5.4
'@azure/core-paging': 1.5.0
'@azure/core-tracing': 1.0.0-preview.13
'@azure/logger': 1.0.4
events: 3.3.0
tslib: 2.6.1
transitivePeerDependencies:
- encoding
dev: false
/@babel/code-frame@7.22.10:
resolution: {integrity: sha512-/KKIMG4UEL35WmI9OlvMhurwtytjvXoFcGNrOvyG9zIzA8YmPjVtIZUf7b05+TPO7G7/GEmLHDaoCgACHl9hhA==}
engines: {node: '>=6.9.0'}
@@ -2602,6 +2789,11 @@ packages:
openapi-typescript: 5.4.1
dev: true
/@opentelemetry/api@1.4.1:
resolution: {integrity: sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==}
engines: {node: '>=8.0.0'}
dev: false
/@panva/hkdf@1.1.1:
resolution: {integrity: sha512-dhPeilub1NuIG0X5Kvhh9lH4iW3ZsHlnzwgwbOlgwQ2wG1IqFzsgHqmKPk3WzsdWAeaxKJxgM0+W433RmN45GA==}
dev: false
@@ -2916,6 +3108,11 @@ packages:
use-sync-external-store: 1.2.0(react@18.2.0)
dev: false
/@tootallnate/once@2.0.0:
resolution: {integrity: sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==}
engines: {node: '>= 10'}
dev: false
/@trpc/client@10.26.0(@trpc/server@10.26.0):
resolution: {integrity: sha512-ojHxQFIE97rBEGPK8p1ijbzo0T1IdEBoJ9fFSgWWL9FMuEEA/DNQ9s0uuiOrDKhCCdTFT1unfRharoJhB2/O2w==}
peerDependencies:
@@ -3333,6 +3530,12 @@ packages:
resolution: {integrity: sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==}
dev: false
/@types/tunnel@0.0.3:
resolution: {integrity: sha512-sOUTGn6h1SfQ+gbgqC364jLFBw2lnFqkgF3q0WovEHRLMrVD1sd5aufqi/aJObLekJO+Aq5z646U4Oxy6shXMA==}
dependencies:
'@types/node': 20.4.10
dev: false
/@types/unist@2.0.7:
resolution: {integrity: sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==}
dev: false
@@ -4102,6 +4305,10 @@ packages:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
dev: false
/buffer-equal-constant-time@1.0.1:
resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
dev: false
/buffer-from@0.1.2:
resolution: {integrity: sha512-RiWIenusJsmI2KcvqQABB83tLxCByE3upSP8QU3rJDMVFGPWLvPQJt/O1Su9moRWeH7d+Q2HYb68f6+v+tw2vg==}
dev: false
@@ -4707,6 +4914,11 @@ packages:
resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==}
dev: true
/define-lazy-prop@2.0.0:
resolution: {integrity: sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==}
engines: {node: '>=8'}
dev: false
/define-properties@1.2.0:
resolution: {integrity: sha512-xvqAVKGfT1+UAvPwKTVw/njhdQ8ZhXK4lI0bCIuCMrp2up9nPnaDftrLtmpTazqd1o+UY4zgzU+avtMbDP+ldA==}
engines: {node: '>= 0.4'}
@@ -4818,6 +5030,12 @@ packages:
safer-buffer: 2.1.2
dev: false
/ecdsa-sig-formatter@1.0.11:
resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==}
dependencies:
safe-buffer: 5.2.1
dev: false
/ee-first@1.1.1:
resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
dev: false
@@ -6061,6 +6279,17 @@ packages:
toidentifier: 1.0.1
dev: false
/http-proxy-agent@5.0.0:
resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==}
engines: {node: '>= 6'}
dependencies:
'@tootallnate/once': 2.0.0
agent-base: 6.0.2
debug: 4.3.4
transitivePeerDependencies:
- supports-color
dev: false
/http-signature@1.2.0:
resolution: {integrity: sha512-CAbnr6Rz4CYQkLYUtSNXxQPUH2gK8f3iWexVlsnMeD+GjlsQ0Xsy1cOX+mN3dtxYomRy21CiOzU8Uhw6OwncEQ==}
engines: {node: '>=0.8', npm: '>=1.3.7'}
@@ -6256,6 +6485,12 @@ packages:
resolution: {integrity: sha512-RGdriMmQQvZ2aqaQq3awNA6dCGtKpiDFcOzrTWrDAT2MiWrKQVPmxLGHl7Y2nNu6led0kEyoX0enY0qXYsv9zw==}
dev: false
/is-docker@2.2.1:
resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==}
engines: {node: '>=8'}
hasBin: true
dev: false
/is-extglob@2.1.1:
resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==}
engines: {node: '>=0.10.0'}
@@ -6370,6 +6605,13 @@ packages:
engines: {node: '>=12.13'}
dev: false
/is-wsl@2.2.0:
resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==}
engines: {node: '>=8'}
dependencies:
is-docker: 2.2.1
dev: false
/isarray@0.0.1:
resolution: {integrity: sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==}
dev: false
@@ -6399,7 +6641,7 @@ packages:
resolution: {integrity: sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==}
engines: {node: '>= 10.13.0'}
dependencies:
'@types/node': 20.4.10
'@types/node': 18.16.0
merge-stream: 2.0.0
supports-color: 8.1.1
@@ -6514,6 +6756,22 @@ packages:
resolution: {integrity: sha512-S6cATIPVv1z0IlxdN+zUk5EPjkGCdnhN4wVSBlvoUO1tOLJootbo9CquNJmbIh4yikWHiUedhRYrNPn1arpEmQ==}
dev: false
/jsonwebtoken@9.0.2:
resolution: {integrity: sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==}
engines: {node: '>=12', npm: '>=6'}
dependencies:
jws: 3.2.2
lodash.includes: 4.3.0
lodash.isboolean: 3.0.3
lodash.isinteger: 4.0.4
lodash.isnumber: 3.0.3
lodash.isplainobject: 4.0.6
lodash.isstring: 4.0.1
lodash.once: 4.1.1
ms: 2.1.3
semver: 7.5.4
dev: false
/jsprim@1.4.2:
resolution: {integrity: sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==}
engines: {node: '>=0.6.0'}
@@ -6534,6 +6792,36 @@ packages:
object.values: 1.1.6
dev: true
/jwa@1.4.1:
resolution: {integrity: sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==}
dependencies:
buffer-equal-constant-time: 1.0.1
ecdsa-sig-formatter: 1.0.11
safe-buffer: 5.2.1
dev: false
/jwa@2.0.0:
resolution: {integrity: sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==}
dependencies:
buffer-equal-constant-time: 1.0.1
ecdsa-sig-formatter: 1.0.11
safe-buffer: 5.2.1
dev: false
/jws@3.2.2:
resolution: {integrity: sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==}
dependencies:
jwa: 1.4.1
safe-buffer: 5.2.1
dev: false
/jws@4.0.0:
resolution: {integrity: sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==}
dependencies:
jwa: 2.0.0
safe-buffer: 5.2.1
dev: false
/kysely-codegen@0.10.1(kysely@0.26.1)(pg@8.11.2):
resolution: {integrity: sha512-8Bslh952gN5gtucRv4jTZDFD18RBioS6M50zHfe5kwb5iSyEAunU4ZYMdHzkHraa4zxjg5/183XlOryBCXLRIw==}
hasBin: true
@@ -6605,6 +6893,10 @@ packages:
resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
dev: false
/llama-tokenizer-js@1.1.3:
resolution: {integrity: sha512-+BUgsLCXVQJkjiD/t7PdESLn+yXJIRX/BJfwzVVYfKZ9aN3gsP9xoadBZxKnCxGz2Slby+S7x41gUr2TKNaS4Q==}
dev: false
/loader-runner@4.3.0:
resolution: {integrity: sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==}
engines: {node: '>=6.11.5'}
@@ -6660,10 +6952,30 @@ packages:
resolution: {integrity: sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==}
dev: false
/lodash.includes@4.3.0:
resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==}
dev: false
/lodash.isboolean@3.0.3:
resolution: {integrity: sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==}
dev: false
/lodash.isinteger@4.0.4:
resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==}
dev: false
/lodash.isnumber@3.0.3:
resolution: {integrity: sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==}
dev: false
/lodash.isplainobject@4.0.6:
resolution: {integrity: sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==}
dev: false
/lodash.isstring@4.0.1:
resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==}
dev: false
/lodash.merge@4.6.2:
resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==}
dev: true
@@ -6672,6 +6984,10 @@ packages:
resolution: {integrity: sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ==}
dev: false
/lodash.once@4.1.1:
resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==}
dev: false
/lodash.union@4.6.0:
resolution: {integrity: sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==}
dev: false
@@ -7177,6 +7493,15 @@ packages:
dependencies:
wrappy: 1.0.2
/open@8.4.2:
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
engines: {node: '>=12'}
dependencies:
define-lazy-prop: 2.0.0
is-docker: 2.2.1
is-wsl: 2.2.0
dev: false
/openai@3.3.0:
resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==}
dependencies:
@@ -7627,6 +7952,11 @@ packages:
resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==}
dev: false
/process@0.11.10:
resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
engines: {node: '>= 0.6.0'}
dev: false
/progress@2.0.3:
resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==}
engines: {node: '>=0.4.0'}
@@ -8272,6 +8602,10 @@ packages:
yoga-wasm-web: 0.3.3
dev: false
/sax@1.2.4:
resolution: {integrity: sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==}
dev: false
/scheduler@0.23.0:
resolution: {integrity: sha512-CtuThmgHNg7zIZWAXi3AsyIzA3n4xx7aNyjwC2VJldO2LMVDhFK+63xGqq6CsJH4rTAt6/M+N4GhZiDYPx9eUw==}
dependencies:
@@ -8296,7 +8630,6 @@ packages:
hasBin: true
dependencies:
lru-cache: 6.0.0
dev: true
/send@0.18.0:
resolution: {integrity: sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==}
@@ -8504,6 +8837,11 @@ packages:
resolution: {integrity: sha512-Rz6yejtVyWnVjC1RFvNmYL10kgjC49EOghxWn0RFqlCHGFpQx+Xe7yW3I4ceK1SGrWIGMjD5Kbue8W/udkbMJg==}
dev: true
/stoppable@1.1.0:
resolution: {integrity: sha512-KXDYZ9dszj6bzvnEMRYvxgeTHU74QBFL54XKtP3nyMuJ81CFYtABZ3bAzL2EdFUaEwJOBOgENyFj3R7oTzDyyw==}
engines: {node: '>=4', npm: '>=6'}
dev: false
/stream-buffers@3.0.2:
resolution: {integrity: sha512-DQi1h8VEBA/lURbSwFtEHnSTb9s2/pwLEaFuNhXwy1Dx3Sa0lOuYT2yNUr4/j2fs8oCAMANtrZ5OrPZtyVs3MQ==}
engines: {node: '>= 0.10.0'}
@@ -8876,6 +9214,11 @@ packages:
safe-buffer: 5.2.1
dev: false
/tunnel@0.0.6:
resolution: {integrity: sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==}
engines: {node: '>=0.6.11 <=0.7.0 || >=0.7.3'}
dev: false
/tweetnacl@0.14.5:
resolution: {integrity: sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==}
dev: false
@@ -9464,6 +9807,19 @@ packages:
optional: true
dev: false
/xml2js@0.5.0:
resolution: {integrity: sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==}
engines: {node: '>=4.0.0'}
dependencies:
sax: 1.2.4
xmlbuilder: 11.0.1
dev: false
/xmlbuilder@11.0.1:
resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
engines: {node: '>=4.0'}
dev: false
/xmlhttprequest-ssl@2.0.0:
resolution: {integrity: sha512-QKxVRxiRACQcVuQEYFsI1hhkrMlrXHPegbbd1yn9UHOmRxY+si12nQYzri3vbzt8VdTTRviqcKxcyllFas5z2A==}
engines: {node: '>=0.4.0'}