Add useful datasets (#213)

* Create dataset from request logs

* Move drawer expansion logic out of app state

* Add empty dataset page

* Properly handle zero dataset state

* Add DatasetEntriesTable

* Open DatasetEntryEditorDrawer on row click

* Add editable messages

* Change Request Logs link to be a span

* Add FunctionCallEditor

* Change styling around

* Stop logging variant stats after a while

* Change FunctionCallEditor widths

* Record input tokens even on errored calls

* Allow user to add messages

* Allow changing from empty text to function call

* Fix some data layout issues

* Default to empty output

* Update arguments on blur

* Add beta flag to datasets tab

* Remove unused import

* Save training and testing datasets on fine tune

* Add DatasetEntryType

* Condense migrations

* Add index to datasetEntry

* Add datasetEntry index

* Fix types

* Enable scrolling beyond last line in VariantEditor

* Divide new dataset entries exactly along training/testing ratio
This commit is contained in:
arcticfly
2023-09-05 15:55:31 -07:00
committed by GitHub
parent 6153ebda41
commit 422a6ff4c6
57 changed files with 1924 additions and 233 deletions

View File

@@ -0,0 +1,27 @@
/*
Warnings:
- Added the required column `input` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
- Added the required column `inputTokens` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
- Added the required column `outputTokens` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
- Added the required column `type` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
*/
-- CreateEnum
CREATE TYPE "DatasetEntryType" AS ENUM ('TRAIN', 'TEST');
-- AlterTable
ALTER TABLE "Dataset" ADD COLUMN "trainingRatio" DOUBLE PRECISION NOT NULL DEFAULT 0.8;
-- AlterTable
ALTER TABLE "DatasetEntry" ADD COLUMN "input" JSONB NOT NULL,
ADD COLUMN "inputTokens" INTEGER NOT NULL,
ADD COLUMN "output" JSONB,
ADD COLUMN "outputTokens" INTEGER NOT NULL,
ADD COLUMN "type" "DatasetEntryType" NOT NULL;
-- CreateIndex
CREATE INDEX "DatasetEntry_datasetId_createdAt_id_idx" ON "DatasetEntry"("datasetId", "createdAt", "id");
-- CreateIndex
CREATE INDEX "DatasetEntry_datasetId_type_idx" ON "DatasetEntry"("datasetId", "type");

View File

@@ -179,9 +179,10 @@ model OutputEvaluation {
model Dataset {
id String @id @default(uuid()) @db.Uuid
name String
datasetEntries DatasetEntry[]
fineTunes FineTune[]
name String
datasetEntries DatasetEntry[]
fineTunes FineTune[]
trainingRatio Float @default(0.8)
projectId String @db.Uuid
project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
@@ -190,17 +191,32 @@ model Dataset {
updatedAt DateTime @updatedAt
}
enum DatasetEntryType {
TRAIN
TEST
}
model DatasetEntry {
id String @id @default(uuid()) @db.Uuid
loggedCallId String @db.Uuid
loggedCall LoggedCall @relation(fields: [loggedCallId], references: [id], onDelete: Cascade)
input Json
output Json?
inputTokens Int
outputTokens Int
type DatasetEntryType
datasetId String @db.Uuid
dataset Dataset? @relation(fields: [datasetId], references: [id], onDelete: Cascade)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@index([datasetId, createdAt, id])
@@index([datasetId, type])
}
model Project {
@@ -452,7 +468,7 @@ model FineTune {
deploymentFinishedAt DateTime?
datasetId String @db.Uuid
dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
projectId String @db.Uuid
project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)

View File

@@ -1,5 +1,4 @@
import { prisma } from "~/server/db";
import { generateNewCell } from "~/server/utils/generateNewCell";
import dedent from "dedent";
import { execSync } from "child_process";
import fs from "fs";

View File

@@ -108,7 +108,7 @@ const MODEL_RESPONSE_TEMPLATES: {
inputTokens: 236,
outputTokens: 5,
finishReason: "stop",
tags: [{ name: "prompt_id", value: "define_func" }],
tags: [{ name: "prompt_id", value: "add_scenario" }],
},
{
reqPayload: {
@@ -311,7 +311,7 @@ const MODEL_RESPONSE_TEMPLATES: {
outputTokens: 108,
finishReason: "stop",
tags: [
{ name: "prompt_id", value: "chatcmpl-7" },
{ name: "prompt_id", value: "define_func" },
{ name: "some_other_tag", value: "some_other_value" },
],
},