Add useful datasets (#213)

* Create dataset from request logs * Move drawer expansion logic out of app state * Add empty dataset page * Properly handle zero dataset state * Add DatasetEntriesTable * Open DatasetEntryEditorDrawer on row click * Add editable messages * Change Request Logs link to be a span * Add FunctionCallEditor * Change styling around * Stop logging variant stats after a while * Change FunctionCallEditor widths * Record input tokens even on errored calls * Allow user to add messages * Allow changing from empty text to function call * Fix some data layout issues * Default to empty output * Update arguments on blur * Add beta flag to datasets tab * Remove unused import * Save training and testing datasets on fine tune * Add DatasetEntryType * Condense migrations * Add index to datasetEntry * Add datasetEntry index * Fix types * Enable scrolling beyond last line in VariantEditor * Divide new dataset entries exactly along training/testing ratio
2023-09-05 15:55:31 -07:00
parent 6153ebda41
commit 422a6ff4c6
57 changed files with 1924 additions and 233 deletions
--- a/app/prisma/migrations/20230904234505_revamp_dataset_entry/migration.sql
+++ b/app/prisma/migrations/20230904234505_revamp_dataset_entry/migration.sql
@@ -0,0 +1,27 @@
+/*
+  Warnings:
+
+  - Added the required column `input` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `inputTokens` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `outputTokens` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `type` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
+
+*/
+-- CreateEnum
+CREATE TYPE "DatasetEntryType" AS ENUM ('TRAIN', 'TEST');
+
+-- AlterTable
+ALTER TABLE "Dataset" ADD COLUMN     "trainingRatio" DOUBLE PRECISION NOT NULL DEFAULT 0.8;
+
+-- AlterTable
+ALTER TABLE "DatasetEntry" ADD COLUMN     "input" JSONB NOT NULL,
+ADD COLUMN     "inputTokens" INTEGER NOT NULL,
+ADD COLUMN     "output" JSONB,
+ADD COLUMN     "outputTokens" INTEGER NOT NULL,
+ADD COLUMN     "type" "DatasetEntryType" NOT NULL;
+
+-- CreateIndex
+CREATE INDEX "DatasetEntry_datasetId_createdAt_id_idx" ON "DatasetEntry"("datasetId", "createdAt", "id");
+
+-- CreateIndex
+CREATE INDEX "DatasetEntry_datasetId_type_idx" ON "DatasetEntry"("datasetId", "type");
--- a/app/prisma/schema.prisma
+++ b/app/prisma/schema.prisma
@@ -179,9 +179,10 @@ model OutputEvaluation {
 model Dataset {
    id String @id @default(uuid()) @db.Uuid

-    name           String
-    datasetEntries DatasetEntry[]
-    fineTunes      FineTune[]
+    name                String
+    datasetEntries      DatasetEntry[]
+    fineTunes           FineTune[]
+    trainingRatio  Float @default(0.8)

    projectId String  @db.Uuid
    project   Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
@@ -190,17 +191,32 @@ model Dataset {
    updatedAt DateTime @updatedAt
 }

+enum DatasetEntryType {
+    TRAIN
+    TEST
+}
+
 model DatasetEntry {
    id String @id @default(uuid()) @db.Uuid

    loggedCallId String @db.Uuid
    loggedCall   LoggedCall @relation(fields: [loggedCallId], references: [id], onDelete: Cascade)

+    input        Json
+    output       Json?
+    inputTokens  Int
+    outputTokens Int
+
+    type DatasetEntryType
+
    datasetId String   @db.Uuid
    dataset   Dataset? @relation(fields: [datasetId], references: [id], onDelete: Cascade)

    createdAt DateTime @default(now())
    updatedAt DateTime @updatedAt
+
+    @@index([datasetId, createdAt, id])
+    @@index([datasetId, type])
 }

 model Project {
@@ -452,7 +468,7 @@ model FineTune {
    deploymentFinishedAt DateTime?

    datasetId String   @db.Uuid
-    dataset   Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
+    dataset    Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)

    projectId String  @db.Uuid
    project   Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
--- a/app/prisma/seedAgiEval.ts
+++ b/app/prisma/seedAgiEval.ts
@@ -1,5 +1,4 @@
 import { prisma } from "~/server/db";
-import { generateNewCell } from "~/server/utils/generateNewCell";
 import dedent from "dedent";
 import { execSync } from "child_process";
 import fs from "fs";
--- a/app/prisma/seedDashboard.ts
+++ b/app/prisma/seedDashboard.ts
@@ -108,7 +108,7 @@ const MODEL_RESPONSE_TEMPLATES: {
    inputTokens: 236,
    outputTokens: 5,
    finishReason: "stop",
-    tags: [{ name: "prompt_id", value: "define_func" }],
+    tags: [{ name: "prompt_id", value: "add_scenario" }],
  },
  {
    reqPayload: {
@@ -311,7 +311,7 @@ const MODEL_RESPONSE_TEMPLATES: {
    outputTokens: 108,
    finishReason: "stop",
    tags: [
-      { name: "prompt_id", value: "chatcmpl-7" },
+      { name: "prompt_id", value: "define_func" },
      { name: "some_other_tag", value: "some_other_value" },
    ],
  },