Add useful datasets (#213)
* Create dataset from request logs * Move drawer expansion logic out of app state * Add empty dataset page * Properly handle zero dataset state * Add DatasetEntriesTable * Open DatasetEntryEditorDrawer on row click * Add editable messages * Change Request Logs link to be a span * Add FunctionCallEditor * Change styling around * Stop logging variant stats after a while * Change FunctionCallEditor widths * Record input tokens even on errored calls * Allow user to add messages * Allow changing from empty text to function call * Fix some data layout issues * Default to empty output * Update arguments on blur * Add beta flag to datasets tab * Remove unused import * Save training and testing datasets on fine tune * Add DatasetEntryType * Condense migrations * Add index to datasetEntry * Add datasetEntry index * Fix types * Enable scrolling beyond last line in VariantEditor * Divide new dataset entries exactly along training/testing ratio
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Warnings:
|
||||
|
||||
- Added the required column `input` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
|
||||
- Added the required column `inputTokens` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
|
||||
- Added the required column `outputTokens` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
|
||||
- Added the required column `type` to the `DatasetEntry` table without a default value. This is not possible if the table is not empty.
|
||||
|
||||
*/
|
||||
-- CreateEnum
|
||||
CREATE TYPE "DatasetEntryType" AS ENUM ('TRAIN', 'TEST');
|
||||
|
||||
-- AlterTable
|
||||
ALTER TABLE "Dataset" ADD COLUMN "trainingRatio" DOUBLE PRECISION NOT NULL DEFAULT 0.8;
|
||||
|
||||
-- AlterTable
|
||||
ALTER TABLE "DatasetEntry" ADD COLUMN "input" JSONB NOT NULL,
|
||||
ADD COLUMN "inputTokens" INTEGER NOT NULL,
|
||||
ADD COLUMN "output" JSONB,
|
||||
ADD COLUMN "outputTokens" INTEGER NOT NULL,
|
||||
ADD COLUMN "type" "DatasetEntryType" NOT NULL;
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "DatasetEntry_datasetId_createdAt_id_idx" ON "DatasetEntry"("datasetId", "createdAt", "id");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "DatasetEntry_datasetId_type_idx" ON "DatasetEntry"("datasetId", "type");
|
||||
@@ -179,9 +179,10 @@ model OutputEvaluation {
|
||||
model Dataset {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
|
||||
name String
|
||||
datasetEntries DatasetEntry[]
|
||||
fineTunes FineTune[]
|
||||
name String
|
||||
datasetEntries DatasetEntry[]
|
||||
fineTunes FineTune[]
|
||||
trainingRatio Float @default(0.8)
|
||||
|
||||
projectId String @db.Uuid
|
||||
project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
|
||||
@@ -190,17 +191,32 @@ model Dataset {
|
||||
updatedAt DateTime @updatedAt
|
||||
}
|
||||
|
||||
enum DatasetEntryType {
|
||||
TRAIN
|
||||
TEST
|
||||
}
|
||||
|
||||
model DatasetEntry {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
|
||||
loggedCallId String @db.Uuid
|
||||
loggedCall LoggedCall @relation(fields: [loggedCallId], references: [id], onDelete: Cascade)
|
||||
|
||||
input Json
|
||||
output Json?
|
||||
inputTokens Int
|
||||
outputTokens Int
|
||||
|
||||
type DatasetEntryType
|
||||
|
||||
datasetId String @db.Uuid
|
||||
dataset Dataset? @relation(fields: [datasetId], references: [id], onDelete: Cascade)
|
||||
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
@@index([datasetId, createdAt, id])
|
||||
@@index([datasetId, type])
|
||||
}
|
||||
|
||||
model Project {
|
||||
@@ -452,7 +468,7 @@ model FineTune {
|
||||
deploymentFinishedAt DateTime?
|
||||
|
||||
datasetId String @db.Uuid
|
||||
dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
|
||||
dataset Dataset @relation(fields: [datasetId], references: [id], onDelete: Cascade)
|
||||
|
||||
projectId String @db.Uuid
|
||||
project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { prisma } from "~/server/db";
|
||||
import { generateNewCell } from "~/server/utils/generateNewCell";
|
||||
import dedent from "dedent";
|
||||
import { execSync } from "child_process";
|
||||
import fs from "fs";
|
||||
|
||||
@@ -108,7 +108,7 @@ const MODEL_RESPONSE_TEMPLATES: {
|
||||
inputTokens: 236,
|
||||
outputTokens: 5,
|
||||
finishReason: "stop",
|
||||
tags: [{ name: "prompt_id", value: "define_func" }],
|
||||
tags: [{ name: "prompt_id", value: "add_scenario" }],
|
||||
},
|
||||
{
|
||||
reqPayload: {
|
||||
@@ -311,7 +311,7 @@ const MODEL_RESPONSE_TEMPLATES: {
|
||||
outputTokens: 108,
|
||||
finishReason: "stop",
|
||||
tags: [
|
||||
{ name: "prompt_id", value: "chatcmpl-7" },
|
||||
{ name: "prompt_id", value: "define_func" },
|
||||
{ name: "some_other_tag", value: "some_other_value" },
|
||||
],
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user