commit 2915aadce934dffe4fcfc35963e80cc27a88dfb0 Author: TCUDIKEL Date: Sun Apr 27 18:27:13 2025 +0300 initial commit diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..b470af7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +.aider* +.idea diff --git a/Dockerfile b/Dockerfile new file mode 100755 index 0000000..d97d088 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.10-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +RUN mkdir -p static + +COPY static/index.html static/ + +EXPOSE 8000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/benchmarks.py b/benchmarks.py new file mode 100755 index 0000000..dc64762 --- /dev/null +++ b/benchmarks.py @@ -0,0 +1,25 @@ +from fastapi import APIRouter, HTTPException +from typing import List, Dict, Any +from data.sample_data import benchmark_data + +router = APIRouter() + + +@router.get("/") +async def get_benchmarks() -> Dict[str, List[Dict[str, Any]]]: + """ + Get all benchmarks data + """ + return benchmark_data + + +@router.get("/{benchmark_id}") +async def get_benchmark(benchmark_id: str) -> Dict[str, Any]: + """ + Get a specific benchmark by ID + """ + for benchmark in benchmark_data["benchmarks"]: + if benchmark["id"] == benchmark_id: + return benchmark + + raise HTTPException(status_code=404, detail=f"Benchmark with ID {benchmark_id} not found") diff --git a/data/__init__.py b/data/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/data/sample_data.py b/data/sample_data.py new file mode 100755 index 0000000..c8e4edb --- /dev/null +++ b/data/sample_data.py @@ -0,0 +1,81 @@ +benchmark_data = { + "benchmarks": [ + { + "id": "benchmark-1", + "name": "General Knowledge Benchmark", + "date": "2025-03-10", + "questions": [ + { + "id": "q1", + "text": "What is RAG in AI?", + "answers": [ + { + "model": "Base Model A", + "response": "RAG stands for Retrieval Augmented Generation. It's a technique that combines a retrieval system with a text generation model. The retrieval system finds relevant documents from a corpus, and the generation model uses these documents as context to produce more accurate and informed responses.", + "metrics": {"accuracy": 0.92, "latency": "120ms"} + }, + { + "model": "Fine-tuned Model A", + "response": "Retrieval Augmented Generation (RAG) is a hybrid AI approach that enhances language models by retrieving relevant information from external knowledge sources before generating a response. This allows models to access up-to-date information beyond their training data and provide more accurate, contextually relevant answers with citations.", + "metrics": {"accuracy": 0.95, "latency": "140ms"} + }, + { + "model": "Base Model B", + "response": "RAG (Retrieval Augmented Generation) combines information retrieval with text generation. The model first retrieves relevant documents from a knowledge base and then uses these documents to generate an informed response. This approach helps overcome knowledge limitations in pretrained models and reduces hallucinations.", + "metrics": {"accuracy": 0.88, "latency": "95ms"} + } + ] + }, + { + "id": "q2", + "text": "How does fine-tuning differ from RAG?", + "answers": [ + { + "model": "Base Model A", + "response": "Fine-tuning modifies the model's parameters through additional training on domain-specific data, permanently changing the model's behavior. RAG, on the other hand, augments a model's responses at inference time by retrieving and incorporating external information without changing the model itself. Fine-tuning is about adapting the model, while RAG is about enhancing its responses with external knowledge.", + "metrics": {"accuracy": 0.89, "latency": "110ms"} + }, + { + "model": "Fine-tuned Model A", + "response": "Fine-tuning and RAG represent different approaches to improving LLM capabilities. Fine-tuning updates the model's weights through additional training on specific datasets, essentially teaching the model new information that becomes part of its parameters. RAG keeps the model unchanged but supplements it with a retrieval system that fetches relevant information at query time. Fine-tuning is a training-time solution requiring model weight updates, while RAG is an inference-time solution that keeps the base model intact.", + "metrics": {"accuracy": 0.94, "latency": "135ms"} + }, + { + "model": "Base Model B", + "response": "Fine-tuning alters a model's weights by training on specific data, embedding knowledge directly into the model. RAG maintains the original model but adds a retrieval component that pulls relevant information from external sources during inference. Fine-tuning is more computationally intensive initially but faster at inference, while RAG requires less upfront computation but more resources during each query.", + "metrics": {"accuracy": 0.90, "latency": "100ms"} + } + ] + } + ] + }, + { + "id": "benchmark-2", + "name": "Domain-Specific Benchmark", + "date": "2025-03-15", + "questions": [ + { + "id": "q1", + "text": "Explain how RLHF works in LLM training", + "answers": [ + { + "model": "Base Model A", + "response": "RLHF (Reinforcement Learning from Human Feedback) works by first training a reward model on human preference data, then using this reward model to guide policy optimization of the language model through reinforcement learning. The process typically involves collecting human feedback on model outputs, training a reward model to predict human preferences, and then optimizing the language model to maximize this reward function.", + "metrics": {"accuracy": 0.87, "latency": "115ms"} + }, + { + "model": "Fine-tuned Model A", + "response": "RLHF (Reinforcement Learning from Human Feedback) is a multi-stage process for aligning language models with human preferences. First, a base language model is trained via supervised learning. Next, human evaluators rate model outputs to create a dataset of preferences. A reward model is trained on this dataset to predict human preferences. Finally, the language model is fine-tuned using reinforcement learning (typically PPO) to maximize the reward predicted by this model, effectively aligning it with human values and reducing harmful outputs.", + "metrics": {"accuracy": 0.93, "latency": "155ms"} + }, + { + "model": "Base Model B", + "response": "RLHF uses reinforcement learning techniques with human feedback as the reward signal. The process begins with supervised fine-tuning, followed by collecting human feedback on model outputs. This feedback trains a reward model that can predict human preferences. The language model is then optimized using reinforcement learning algorithms (typically PPO) to maximize the reward predicted by the reward model, effectively teaching the model to generate outputs that humans prefer.", + "metrics": {"accuracy": 0.85, "latency": "105ms"} + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100755 index 0000000..c980228 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3.8' + +services: + web: + build: . + ports: + - "8000:8000" + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/api/benchmarks"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + deploy: + resources: + limits: + cpus: '1' + memory: 512M diff --git a/main.py b/main.py new file mode 100755 index 0000000..aa1af09 --- /dev/null +++ b/main.py @@ -0,0 +1,56 @@ +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles +import os +import benchmarks + +app = FastAPI(title="RAG Benchmark Dashboard") + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # In production, replace with specific origins + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# API routes +app.include_router(benchmarks.router, prefix="/api/benchmarks", tags=["benchmarks"]) + +# Define paths +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) +STATIC_DIR = os.path.join(ROOT_DIR, "static") + +# Mount static files directory +if os.path.exists(STATIC_DIR): + app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") + +# Place your new vanilla JS dashboard here +DASHBOARD_FILE = os.path.join(STATIC_DIR, "index.html") + + +@app.get("/", tags=["dashboard"]) +async def serve_root_dashboard(): + if os.path.exists(DASHBOARD_FILE): + return FileResponse(DASHBOARD_FILE) + else: + raise HTTPException(status_code=404, detail="Dashboard file not found") + + +@app.get("/{full_path:path}", tags=["dashboard"]) +async def serve_dashboard_for_path(full_path: str): + # Don't catch API routes + if full_path.startswith("api/"): + raise HTTPException(status_code=404, detail="Not found") + + # Don't catch static files + if full_path.startswith("static/"): + raise HTTPException(status_code=404, detail="Not found") + + # Serve the same dashboard.html for any other path (SPA routing) + if os.path.exists(DASHBOARD_FILE): + return FileResponse(DASHBOARD_FILE) + else: + raise HTTPException(status_code=404, detail="Dashboard file not found") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..5d402e1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +fastapi==0.100.0 +uvicorn==0.22.0 +python-multipart==0.0.6 \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..06d74ec --- /dev/null +++ b/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +uvicorn main:app --host 0.0.0.0 --port 8888 --reload \ No newline at end of file diff --git a/static/index.html b/static/index.html new file mode 100755 index 0000000..a236999 --- /dev/null +++ b/static/index.html @@ -0,0 +1,711 @@ + + + + + + GenAI Model Benchmark Dashboard + + + + +
+ +
+ + + +