commit 2915aadce934dffe4fcfc35963e80cc27a88dfb0
Author: TCUDIKEL <alihan.dikel@turkcell.com.tr>
Date:   Sun Apr 27 18:27:13 2025 +0300

    initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..b470af7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+.aider*
+.idea
diff --git a/Dockerfile b/Dockerfile
new file mode 100755
index 0000000..d97d088
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+RUN mkdir -p static
+
+COPY static/index.html static/
+
+EXPOSE 8000
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/benchmarks.py b/benchmarks.py
new file mode 100755
index 0000000..dc64762
--- /dev/null
+++ b/benchmarks.py
@@ -0,0 +1,25 @@
+from fastapi import APIRouter, HTTPException
+from typing import List, Dict, Any
+from data.sample_data import benchmark_data
+
+router = APIRouter()
+
+
+@router.get("/")
+async def get_benchmarks() -> Dict[str, List[Dict[str, Any]]]:
+    """
+    Get all benchmarks data
+    """
+    return benchmark_data
+
+
+@router.get("/{benchmark_id}")
+async def get_benchmark(benchmark_id: str) -> Dict[str, Any]:
+    """
+    Get a specific benchmark by ID
+    """
+    for benchmark in benchmark_data["benchmarks"]:
+        if benchmark["id"] == benchmark_id:
+            return benchmark
+
+    raise HTTPException(status_code=404, detail=f"Benchmark with ID {benchmark_id} not found")
diff --git a/data/__init__.py b/data/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/data/sample_data.py b/data/sample_data.py
new file mode 100755
index 0000000..c8e4edb
--- /dev/null
+++ b/data/sample_data.py
@@ -0,0 +1,81 @@
+benchmark_data = {
+    "benchmarks": [
+        {
+            "id": "benchmark-1",
+            "name": "General Knowledge Benchmark",
+            "date": "2025-03-10",
+            "questions": [
+                {
+                    "id": "q1",
+                    "text": "What is RAG in AI?",
+                    "answers": [
+                        {
+                            "model": "Base Model A",
+                            "response": "RAG stands for Retrieval Augmented Generation. It's a technique that combines a retrieval system with a text generation model. The retrieval system finds relevant documents from a corpus, and the generation model uses these documents as context to produce more accurate and informed responses.",
+                            "metrics": {"accuracy": 0.92, "latency": "120ms"}
+                        },
+                        {
+                            "model": "Fine-tuned Model A",
+                            "response": "Retrieval Augmented Generation (RAG) is a hybrid AI approach that enhances language models by retrieving relevant information from external knowledge sources before generating a response. This allows models to access up-to-date information beyond their training data and provide more accurate, contextually relevant answers with citations.",
+                            "metrics": {"accuracy": 0.95, "latency": "140ms"}
+                        },
+                        {
+                            "model": "Base Model B",
+                            "response": "RAG (Retrieval Augmented Generation) combines information retrieval with text generation. The model first retrieves relevant documents from a knowledge base and then uses these documents to generate an informed response. This approach helps overcome knowledge limitations in pretrained models and reduces hallucinations.",
+                            "metrics": {"accuracy": 0.88, "latency": "95ms"}
+                        }
+                    ]
+                },
+                {
+                    "id": "q2",
+                    "text": "How does fine-tuning differ from RAG?",
+                    "answers": [
+                        {
+                            "model": "Base Model A",
+                            "response": "Fine-tuning modifies the model's parameters through additional training on domain-specific data, permanently changing the model's behavior. RAG, on the other hand, augments a model's responses at inference time by retrieving and incorporating external information without changing the model itself. Fine-tuning is about adapting the model, while RAG is about enhancing its responses with external knowledge.",
+                            "metrics": {"accuracy": 0.89, "latency": "110ms"}
+                        },
+                        {
+                            "model": "Fine-tuned Model A",
+                            "response": "Fine-tuning and RAG represent different approaches to improving LLM capabilities. Fine-tuning updates the model's weights through additional training on specific datasets, essentially teaching the model new information that becomes part of its parameters. RAG keeps the model unchanged but supplements it with a retrieval system that fetches relevant information at query time. Fine-tuning is a training-time solution requiring model weight updates, while RAG is an inference-time solution that keeps the base model intact.",
+                            "metrics": {"accuracy": 0.94, "latency": "135ms"}
+                        },
+                        {
+                            "model": "Base Model B",
+                            "response": "Fine-tuning alters a model's weights by training on specific data, embedding knowledge directly into the model. RAG maintains the original model but adds a retrieval component that pulls relevant information from external sources during inference. Fine-tuning is more computationally intensive initially but faster at inference, while RAG requires less upfront computation but more resources during each query.",
+                            "metrics": {"accuracy": 0.90, "latency": "100ms"}
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "id": "benchmark-2",
+            "name": "Domain-Specific Benchmark",
+            "date": "2025-03-15",
+            "questions": [
+                {
+                    "id": "q1",
+                    "text": "Explain how RLHF works in LLM training",
+                    "answers": [
+                        {
+                            "model": "Base Model A",
+                            "response": "RLHF (Reinforcement Learning from Human Feedback) works by first training a reward model on human preference data, then using this reward model to guide policy optimization of the language model through reinforcement learning. The process typically involves collecting human feedback on model outputs, training a reward model to predict human preferences, and then optimizing the language model to maximize this reward function.",
+                            "metrics": {"accuracy": 0.87, "latency": "115ms"}
+                        },
+                        {
+                            "model": "Fine-tuned Model A",
+                            "response": "RLHF (Reinforcement Learning from Human Feedback) is a multi-stage process for aligning language models with human preferences. First, a base language model is trained via supervised learning. Next, human evaluators rate model outputs to create a dataset of preferences. A reward model is trained on this dataset to predict human preferences. Finally, the language model is fine-tuned using reinforcement learning (typically PPO) to maximize the reward predicted by this model, effectively aligning it with human values and reducing harmful outputs.",
+                            "metrics": {"accuracy": 0.93, "latency": "155ms"}
+                        },
+                        {
+                            "model": "Base Model B",
+                            "response": "RLHF uses reinforcement learning techniques with human feedback as the reward signal. The process begins with supervised fine-tuning, followed by collecting human feedback on model outputs. This feedback trains a reward model that can predict human preferences. The language model is then optimized using reinforcement learning algorithms (typically PPO) to maximize the reward predicted by the reward model, effectively teaching the model to generate outputs that humans prefer.",
+                            "metrics": {"accuracy": 0.85, "latency": "105ms"}
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100755
index 0000000..c980228
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,19 @@
+version: '3.8'
+
+services:
+  web:
+    build: .
+    ports:
+      - "8000:8000"
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/benchmarks"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+    deploy:
+      resources:
+        limits:
+          cpus: '1'
+          memory: 512M
diff --git a/main.py b/main.py
new file mode 100755
index 0000000..aa1af09
--- /dev/null
+++ b/main.py
@@ -0,0 +1,56 @@
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+import os
+import benchmarks
+
+app = FastAPI(title="RAG Benchmark Dashboard")
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, replace with specific origins
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# API routes
+app.include_router(benchmarks.router, prefix="/api/benchmarks", tags=["benchmarks"])
+
+# Define paths
+ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+STATIC_DIR = os.path.join(ROOT_DIR, "static")
+
+# Mount static files directory
+if os.path.exists(STATIC_DIR):
+    app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+
+# Place your new vanilla JS dashboard here
+DASHBOARD_FILE = os.path.join(STATIC_DIR, "index.html")
+
+
+@app.get("/", tags=["dashboard"])
+async def serve_root_dashboard():
+    if os.path.exists(DASHBOARD_FILE):
+        return FileResponse(DASHBOARD_FILE)
+    else:
+        raise HTTPException(status_code=404, detail="Dashboard file not found")
+
+
+@app.get("/{full_path:path}", tags=["dashboard"])
+async def serve_dashboard_for_path(full_path: str):
+    # Don't catch API routes
+    if full_path.startswith("api/"):
+        raise HTTPException(status_code=404, detail="Not found")
+
+    # Don't catch static files
+    if full_path.startswith("static/"):
+        raise HTTPException(status_code=404, detail="Not found")
+
+    # Serve the same dashboard.html for any other path (SPA routing)
+    if os.path.exists(DASHBOARD_FILE):
+        return FileResponse(DASHBOARD_FILE)
+    else:
+        raise HTTPException(status_code=404, detail="Dashboard file not found")
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100755
index 0000000..5d402e1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+fastapi==0.100.0
+uvicorn==0.22.0
+python-multipart==0.0.6
\ No newline at end of file
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..06d74ec
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+uvicorn main:app --host 0.0.0.0 --port 8888 --reload
\ No newline at end of file
diff --git a/static/index.html b/static/index.html
new file mode 100755
index 0000000..a236999
--- /dev/null
+++ b/static/index.html
@@ -0,0 +1,711 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>GenAI Model Benchmark Dashboard</title>
+  <!-- Tailwind CDN for styling -->
+  <script src="https://cdn.tailwindcss.com"></script>
+</head>
+<body class="bg-gray-50">
+  <div id="app" class="max-w-7xl mx-auto p-4 min-h-screen">
+    <!-- Dashboard will be rendered here -->
+  </div>
+
+  <script>
+    // Sample data structure with all three benchmarks
+    const sampleData = {
+      "benchmarks": [
+        {
+          "id": "benchmark-1",
+          "name": "General Knowledge Benchmark",
+          "date": "2025-03-10",
+          "questions": [
+            {
+              "id": "q1",
+              "text": "What is RAG in AI?",
+              "answers": [
+                {
+                  "model": "Base Model A",
+                  "response": "RAG stands for Retrieval Augmented Generation. It's a technique that combines a retrieval system with a text generation model. The retrieval system finds relevant documents from a corpus, and the generation model uses these documents as context to produce more accurate and informed responses.",
+                  "metrics": { "accuracy": 0.92, "latency": "120ms" }
+                },
+                {
+                  "model": "Fine-tuned Model A",
+                  "response": "Retrieval Augmented Generation (RAG) is a hybrid AI approach that enhances language models by retrieving relevant information from external knowledge sources before generating a response. This allows models to access up-to-date information beyond their training data and provide more accurate, contextually relevant answers with citations.",
+                  "metrics": { "accuracy": 0.95, "latency": "140ms" }
+                },
+                {
+                  "model": "Base Model B",
+                  "response": "RAG (Retrieval Augmented Generation) combines information retrieval with text generation. The model first retrieves relevant documents from a knowledge base and then uses these documents to generate an informed response. This approach helps overcome knowledge limitations in pretrained models and reduces hallucinations.",
+                  "metrics": { "accuracy": 0.88, "latency": "95ms" }
+                }
+              ]
+            },
+            {
+              "id": "q2",
+              "text": "How does fine-tuning differ from RAG?",
+              "answers": [
+                {
+                  "model": "Base Model A",
+                  "response": "Fine-tuning modifies the model's parameters through additional training on domain-specific data, permanently changing the model's behavior. RAG, on the other hand, augments a model's responses at inference time by retrieving and incorporating external information without changing the model itself. Fine-tuning is about adapting the model, while RAG is about enhancing its responses with external knowledge.",
+                  "metrics": { "accuracy": 0.89, "latency": "110ms" }
+                },
+                {
+                  "model": "Fine-tuned Model A",
+                  "response": "Fine-tuning and RAG represent different approaches to improving LLM capabilities. Fine-tuning updates the model's weights through additional training on specific datasets, essentially teaching the model new information that becomes part of its parameters. RAG keeps the model unchanged but supplements it with a retrieval system that fetches relevant information at query time. Fine-tuning is a training-time solution requiring model weight updates, while RAG is an inference-time solution that keeps the base model intact.",
+                  "metrics": { "accuracy": 0.94, "latency": "135ms" }
+                },
+                {
+                  "model": "Base Model B",
+                  "response": "Fine-tuning alters a model's weights by training on specific data, embedding knowledge directly into the model. RAG maintains the original model but adds a retrieval component that pulls relevant information from external sources during inference. Fine-tuning is more computationally intensive initially but faster at inference, while RAG requires less upfront computation but more resources during each query.",
+                  "metrics": { "accuracy": 0.90, "latency": "100ms" }
+                }
+              ]
+            }
+          ]
+        },
+        {
+          "id": "benchmark-2",
+          "name": "Domain-Specific Benchmark",
+          "date": "2025-03-15",
+          "questions": [
+            {
+              "id": "q1",
+              "text": "Explain how RLHF works in LLM training",
+              "answers": [
+                {
+                  "model": "Base Model A",
+                  "response": "RLHF (Reinforcement Learning from Human Feedback) works by first training a reward model on human preference data, then using this reward model to guide policy optimization of the language model through reinforcement learning. The process typically involves collecting human feedback on model outputs, training a reward model to predict human preferences, and then optimizing the language model to maximize this reward function.",
+                  "metrics": { "accuracy": 0.87, "latency": "115ms" }
+                },
+                {
+                  "model": "Fine-tuned Model A",
+                  "response": "RLHF (Reinforcement Learning from Human Feedback) is a multi-stage process for aligning language models with human preferences. First, a base language model is trained via supervised learning. Next, human evaluators rate model outputs to create a dataset of preferences. A reward model is trained on this dataset to predict human preferences. Finally, the language model is fine-tuned using reinforcement learning (typically PPO) to maximize the reward predicted by this model, effectively aligning it with human values and reducing harmful outputs.",
+                  "metrics": { "accuracy": 0.93, "latency": "155ms" }
+                },
+                {
+                  "model": "Base Model B",
+                  "response": "RLHF uses reinforcement learning techniques with human feedback as the reward signal. The process begins with supervised fine-tuning, followed by collecting human feedback on model outputs. This feedback trains a reward model that can predict human preferences. The language model is then optimized using reinforcement learning algorithms (typically PPO) to maximize the reward predicted by the reward model, effectively teaching the model to generate outputs that humans prefer.",
+                  "metrics": { "accuracy": 0.85, "latency": "105ms" }
+                }
+              ]
+            }
+          ]
+        },
+        {
+          "id": "benchmark-3",
+          "name": "Coding Benchmark",
+          "date": "2025-03-20",
+          "questions": [
+            {
+              "id": "q1",
+              "text": "Write a function to detect if a string is a palindrome",
+              "answers": [
+                {
+                  "model": "Base Model A",
+                  "response": "```python\ndef is_palindrome(s):\n    # Remove non-alphanumeric characters and convert to lowercase\n    s = ''.join(c.lower() for c in s if c.isalnum())\n    # Compare the string with its reverse\n    return s == s[::-1]\n```\n\nThis function first normalizes the string by removing non-alphanumeric characters and converting to lowercase, then checks if the string reads the same forward and backward.",
+                  "metrics": { "accuracy": 0.95, "latency": "125ms" }
+                },
+                {
+                  "model": "Fine-tuned Model A",
+                  "response": "```python\ndef is_palindrome(s):\n    # Clean the string: remove non-alphanumeric chars and convert to lowercase\n    cleaned = ''.join(char.lower() for char in s if char.isalnum())\n    \n    # Two-pointer approach\n    left, right = 0, len(cleaned) - 1\n    while left < right:\n        if cleaned[left] != cleaned[right]:\n            return False\n        left += 1\n        right -= 1\n    return True\n```\n\nThis implementation uses a two-pointer approach that's more memory efficient than creating a reversed string. It also properly handles spaces, punctuation, and case sensitivity.",
+                  "metrics": { "accuracy": 0.98, "latency": "140ms" }
+                },
+                {
+                  "model": "Base Model B",
+                  "response": "```python\ndef is_palindrome(string):\n    # Remove spaces and convert to lowercase\n    s = string.lower().replace(' ', '')\n    return s == s[::-1]\n```\n\nThis simple function removes spaces and converts the string to lowercase, then checks if it reads the same forward and backward.",
+                  "metrics": { "accuracy": 0.85, "latency": "95ms" }
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    };
+
+    // Dashboard class to manage state and rendering
+    class BenchmarkDashboard {
+      constructor(initialData, rootElement) {
+        // Store the data
+        this.data = initialData || { benchmarks: [] };
+        this.rootElement = rootElement;
+
+        // State management (equivalent to React's useState)
+        this.state = {
+          activeBenchmarkId: this.data.benchmarks[0]?.id || '',
+          expandedQuestions: {},
+          viewMode: 'compact', // 'compact' or 'expanded'
+          isLoading: false,
+          notification: null
+        };
+
+        // Initial render
+        this.render();
+      }
+
+      // Get active benchmark (equivalent to useMemo in React)
+      getActiveBenchmark() {
+        return this.data.benchmarks.find(b => b.id === this.state.activeBenchmarkId) || null;
+      }
+
+      // Get unique models for the active benchmark
+      getUniqueModels() {
+        const activeBenchmark = this.getActiveBenchmark();
+        if (!activeBenchmark) return [];
+
+        const models = new Set();
+        activeBenchmark.questions.forEach(question => {
+          question.answers.forEach(answer => {
+            models.add(answer.model);
+          });
+        });
+
+        return Array.from(models);
+      }
+
+      // Get all questions from the active benchmark
+      getQuestions() {
+        const activeBenchmark = this.getActiveBenchmark();
+        if (!activeBenchmark) return [];
+
+        return activeBenchmark.questions;
+      }
+
+      // Check if a question is expanded
+      isQuestionExpanded(questionId) {
+        return this.state.expandedQuestions[questionId] || this.state.viewMode === 'expanded';
+      }
+
+      // Event Handlers
+      handleBenchmarkChange(benchmarkId) {
+        this.setState({
+          activeBenchmarkId: benchmarkId,
+          expandedQuestions: {}
+        });
+      }
+
+      toggleQuestion(questionId) {
+        const expandedQuestions = { ...this.state.expandedQuestions };
+        expandedQuestions[questionId] = !expandedQuestions[questionId];
+        this.setState({ expandedQuestions });
+      }
+
+      toggleViewMode() {
+        this.setState({
+          viewMode: this.state.viewMode === 'compact' ? 'expanded' : 'compact'
+        });
+      }
+
+      // Method to handle file uploads
+      handleFileUpload(event) {
+        const file = event.target.files[0];
+        if (!file) return;
+
+        this.setState({ isLoading: true, notification: null });
+
+        const reader = new FileReader();
+
+        reader.onload = (e) => {
+          try {
+            const json = JSON.parse(e.target.result);
+
+            // Validate the JSON structure
+            if (!this.validateBenchmarkData(json)) {
+              this.setState({
+                isLoading: false,
+                notification: {
+                  type: 'error',
+                  message: 'Invalid benchmark data format. Please check the file structure.'
+                }
+              });
+              return;
+            }
+
+            // Update the dashboard with new data
+            this.data = json;
+            this.setState({
+              isLoading: false,
+              activeBenchmarkId: json.benchmarks[0]?.id || '',
+              expandedQuestions: {},
+              notification: {
+                type: 'success',
+                message: 'Benchmark data loaded successfully!',
+                autoDismiss: true  // Add autoDismiss flag for success notifications
+              }
+            });
+
+            // Clear the file input
+            event.target.value = '';
+          } catch (error) {
+            console.error('Error parsing JSON:', error);
+            this.setState({
+              isLoading: false,
+              notification: {
+                type: 'error',
+                message: 'Failed to parse the file. Please make sure it\'s a valid JSON file.'
+              }
+            });
+          }
+        };
+
+        reader.onerror = () => {
+          this.setState({
+            isLoading: false,
+            notification: {
+              type: 'error',
+              message: 'Error reading the file.'
+            }
+          });
+        };
+
+        reader.readAsText(file);
+      }
+
+      // Validate the uploaded JSON data
+      validateBenchmarkData(data) {
+        // Basic structure validation
+        if (!data || !Array.isArray(data.benchmarks)) {
+          return false;
+        }
+
+        // Check if benchmarks have the required structure
+        for (const benchmark of data.benchmarks) {
+          if (!benchmark.id || !benchmark.name || !Array.isArray(benchmark.questions)) {
+            return false;
+          }
+
+          // Check questions structure
+          for (const question of benchmark.questions) {
+            if (!question.id || !question.text || !Array.isArray(question.answers)) {
+              return false;
+            }
+
+            // Check answers structure
+            for (const answer of question.answers) {
+              if (!answer.model || answer.response === undefined) {
+                return false;
+              }
+            }
+          }
+        }
+
+        return true;
+      }
+
+      // Clear uploaded benchmarks
+      clearUploadedData() {
+        this.data = { benchmarks: [] };
+        this.setState({
+          activeBenchmarkId: '',
+          expandedQuestions: {},
+          notification: {
+            type: 'info',
+            message: 'All uploaded benchmarks have been cleared.',
+            autoDismiss: true  // Auto-dismiss for info notifications too
+          }
+        });
+      }
+
+      // Update state and trigger re-render
+      setState(newState) {
+        this.state = { ...this.state, ...newState };
+
+        // Add loading spinner
+        const loadingSpinner = document.createElement('div');
+        loadingSpinner.className = 'fixed inset-0 bg-gray-800 bg-opacity-50 flex items-center justify-center z-50';
+        loadingSpinner.innerHTML = '<div class="animate-spin rounded-full h-32 w-32 border-t-4 border-b-4 border-white"></div>';
+        this.rootElement.appendChild(loadingSpinner);
+
+        // Auto-dismiss success and info notifications
+        if (this.state.notification && this.state.notification.autoDismiss) {
+          setTimeout(() => {
+            // Only clear the notification if it hasn't changed since the timeout was set
+            if (this.state.notification && this.state.notification.autoDismiss) {
+              this.state.notification = null;
+              this.render();
+            }
+          }, 3000); // Auto-dismiss after 3 seconds
+        }
+
+        this.render();
+      }
+
+      // Create a notification element
+      createNotification() {
+        if (!this.state.notification) return null;
+
+        const { type, message } = this.state.notification;
+
+        const notificationElement = document.createElement('div');
+
+        // Set notification styles based on type
+        let bgColor, textColor;
+        switch (type) {
+          case 'success':
+            bgColor = 'bg-green-100';
+            textColor = 'text-green-800';
+            break;
+          case 'error':
+            bgColor = 'bg-red-100';
+            textColor = 'text-red-800';
+            break;
+          case 'info':
+          default:
+            bgColor = 'bg-blue-100';
+            textColor = 'text-blue-800';
+            break;
+        }
+
+        notificationElement.className = `${bgColor} ${textColor} p-3 rounded-md flex justify-between items-center mb-4`;
+
+        const messageSpan = document.createElement('span');
+        messageSpan.textContent = message;
+
+        const closeButton = document.createElement('button');
+        closeButton.className = 'ml-4 text-sm font-medium focus:outline-none';
+        closeButton.textContent = '×';
+        closeButton.addEventListener('click', () => {
+          this.setState({ notification: null });
+        });
+
+        notificationElement.appendChild(messageSpan);
+        notificationElement.appendChild(closeButton);
+
+        return notificationElement;
+      }
+
+      // Create upload controls
+      createUploadControls() {
+        const uploadContainer = document.createElement('div');
+        uploadContainer.className = 'flex flex-wrap items-center gap-4 mt-4';
+
+        // File input wrapper
+        const fileInputWrapper = document.createElement('div');
+        fileInputWrapper.className = 'relative';
+
+        // File input label
+        const fileInputLabel = document.createElement('label');
+        fileInputLabel.className = 'cursor-pointer bg-blue-600 hover:bg-blue-700 text-white py-2 px-4 rounded-md inline-flex items-center';
+        fileInputLabel.htmlFor = 'benchmark-file-input';
+        fileInputLabel.innerHTML = this.state.isLoading
+          ? '<svg class="animate-spin -ml-1 mr-2 h-4 w-4 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24"><circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle><path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path></svg> Loading...'
+          : '<svg class="w-4 h-4 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"></path></svg> Upload JSON';
+
+        // Actual file input (hidden)
+        const fileInput = document.createElement('input');
+        fileInput.type = 'file';
+        fileInput.id = 'benchmark-file-input';
+        fileInput.accept = '.json';
+        fileInput.className = 'hidden';
+        fileInput.disabled = this.state.isLoading;
+        fileInput.addEventListener('change', (e) => this.handleFileUpload(e));
+
+        fileInputWrapper.appendChild(fileInputLabel);
+        fileInputWrapper.appendChild(fileInput);
+        uploadContainer.appendChild(fileInputWrapper);
+
+        // Clear button
+        const clearButton = document.createElement('button');
+        clearButton.className = 'text-sm text-gray-600 hover:text-gray-900 underline';
+        clearButton.textContent = 'Clear all benchmarks';
+        clearButton.addEventListener('click', () => this.clearUploadedData());
+        uploadContainer.appendChild(clearButton);
+
+        // Download sample button
+        const downloadSampleButton = document.createElement('button');
+        downloadSampleButton.className = 'text-sm text-gray-600 hover:text-gray-900 underline ml-auto';
+        downloadSampleButton.textContent = 'Download sample data';
+        downloadSampleButton.addEventListener('click', () => {
+          const dataStr = JSON.stringify(sampleData, null, 2);
+          const dataBlob = new Blob([dataStr], { type: 'application/json' });
+          const url = URL.createObjectURL(dataBlob);
+          const a = document.createElement('a');
+          a.href = url;
+          a.download = 'sample_benchmark_data.json';
+          a.click();
+          URL.revokeObjectURL(url);
+        });
+        uploadContainer.appendChild(downloadSampleButton);
+
+        return uploadContainer;
+      }
+
+      // Create a MetricBadge element
+      createMetricBadge(metricKey, value) {
+        const badgeElement = document.createElement('span');
+
+        // Determine badge color based on metric type and value
+        let badgeColor = "bg-gray-100 text-gray-800";
+
+        if (metricKey === "accuracy") {
+          const accuracyValue = parseFloat(value);
+          if (accuracyValue >= 0.95) badgeColor = "bg-green-100 text-green-800";
+          else if (accuracyValue >= 0.85) badgeColor = "bg-blue-100 text-blue-800";
+          else badgeColor = "bg-orange-100 text-orange-800";
+        } else if (metricKey === "latency") {
+          const latencyValue = parseInt(value.replace('ms', ''));
+          if (latencyValue <= 100) badgeColor = "bg-green-100 text-green-800";
+          else if (latencyValue <= 130) badgeColor = "bg-blue-100 text-blue-800";
+          else badgeColor = "bg-orange-100 text-orange-800";
+        }
+
+        badgeElement.className = `rounded-full px-2 py-1 ${badgeColor}`;
+        badgeElement.textContent = `${metricKey}: ${value}`;
+        badgeElement.title = `This is the ${metricKey} value for the model's response.`;
+
+        return badgeElement;
+      }
+
+      // Create a ModelAnswer element
+      createModelAnswer(answer, expanded) {
+        const answerElement = document.createElement('div');
+        answerElement.className = 'flex flex-col space-y-2 h-full';
+
+        // Response container
+        const responseContainer = document.createElement('div');
+        responseContainer.className = `p-3 rounded ${expanded ? 'bg-blue-50 border border-blue-100' : 'bg-gray-50'} mt-0 h-full`;
+
+        const responseParagraph = document.createElement('p');
+        responseParagraph.className = expanded ? 'whitespace-pre-wrap' : 'line-clamp-2';
+        responseParagraph.textContent = answer.response;
+        responseContainer.appendChild(responseParagraph);
+
+        answerElement.appendChild(responseContainer);
+
+        // Metrics container
+        if (answer.metrics) {
+          const metricsContainer = document.createElement('div');
+          metricsContainer.className = 'flex flex-wrap gap-1 text-xs';
+
+          Object.entries(answer.metrics).forEach(([key, value]) => {
+            metricsContainer.appendChild(this.createMetricBadge(key, value));
+          });
+
+          answerElement.appendChild(metricsContainer);
+        }
+
+        return answerElement;
+      }
+
+      // Create a QuestionCard element
+      createQuestionCard(question, uniqueModels) {
+        const isExpanded = this.isQuestionExpanded(question.id);
+
+        const cardElement = document.createElement('div');
+        cardElement.className = 'bg-white rounded-lg shadow-md overflow-hidden mb-6 flex flex-col';
+
+        // Header
+        const headerElement = document.createElement('div');
+        headerElement.className = 'bg-gray-100 p-4 font-medium cursor-pointer hover:bg-gray-200';
+        headerElement.addEventListener('click', () => this.toggleQuestion(question.id));
+
+        const headerContent = document.createElement('div');
+        headerContent.className = 'flex justify-between items-center';
+
+        const titleElement = document.createElement('h2');
+        titleElement.className = 'text-lg';
+        titleElement.textContent = question.text;
+
+        const toggleText = document.createElement('span');
+        toggleText.className = 'text-blue-600 text-sm';
+        toggleText.textContent = isExpanded ? 'Collapse' : 'Expand';
+
+        headerContent.appendChild(titleElement);
+        headerContent.appendChild(toggleText);
+        headerElement.appendChild(headerContent);
+
+        // Table container
+        const tableContainer = document.createElement('div');
+        tableContainer.className = 'overflow-x-auto border-t border-gray-200';
+
+        const table = document.createElement('table');
+        table.className = 'min-w-full divide-y divide-gray-200 border-collapse';
+
+        // Table header
+        const thead = document.createElement('thead');
+        const headerRow = document.createElement('tr');
+
+        uniqueModels.forEach(model => {
+          const th = document.createElement('th');
+          th.className = 'px-4 py-3 text-left text-sm font-medium text-gray-700 bg-gray-50 sticky top-0';
+          th.textContent = model;
+          headerRow.appendChild(th);
+        });
+
+        thead.appendChild(headerRow);
+        table.appendChild(thead);
+
+        // Table body
+        const tbody = document.createElement('tbody');
+        tbody.className = 'bg-white divide-y divide-gray-100';
+
+        const bodyRow = document.createElement('tr');
+        bodyRow.className = 'align-top';
+
+        uniqueModels.forEach(modelName => {
+          const answer = question.answers.find(a => a.model === modelName);
+          const td = document.createElement('td');
+          td.className = 'px-4 py-4 text-sm align-top';
+
+          if (!answer) {
+            td.className += ' text-gray-500';
+            td.textContent = 'No data available';
+          } else {
+            td.appendChild(this.createModelAnswer(answer, isExpanded));
+          }
+
+          bodyRow.appendChild(td);
+        });
+
+        tbody.appendChild(bodyRow);
+        table.appendChild(tbody);
+        tableContainer.appendChild(table);
+
+        // Assemble the card
+        cardElement.appendChild(headerElement);
+        cardElement.appendChild(tableContainer);
+
+        return cardElement;
+      }
+
+      // Render the dashboard
+      render() {
+        // Clear the root element
+        this.rootElement.innerHTML = '';
+
+        // Create header container
+        const headerContainer = document.createElement('div');
+        headerContainer.className = 'bg-white rounded-lg shadow-md p-4 mb-6';
+
+        // Title
+        const title = document.createElement('h1');
+        title.className = 'text-2xl font-bold mb-4';
+        title.textContent = 'GenAI Model Benchmark Dashboard';
+        headerContainer.appendChild(title);
+
+        // Show notification if exists
+        const notification = this.createNotification();
+        if (notification) {
+          headerContainer.appendChild(notification);
+        }
+
+        // File upload controls
+        headerContainer.appendChild(this.createUploadControls());
+
+        // Early return if no benchmarks available
+        if (!this.data.benchmarks || !this.data.benchmarks.length) {
+          const noDataElement = document.createElement('div');
+          noDataElement.className = 'mt-4 p-8 text-center rounded-lg border border-gray-200';
+
+          const noDataText = document.createElement('p');
+          noDataText.className = 'text-gray-600 mb-4';
+          noDataText.textContent = 'No benchmark data available. Please upload a JSON file using the button above.';
+
+          const formatHint = document.createElement('p');
+          formatHint.className = 'text-sm text-gray-500';
+          formatHint.textContent = 'Use the "Download Sample Format" button to get the expected JSON structure for your benchmark data.';
+
+          noDataElement.appendChild(noDataText);
+          noDataElement.appendChild(formatHint);
+          headerContainer.appendChild(noDataElement);
+          this.rootElement.appendChild(headerContainer);
+          return;
+        }
+
+        // Top controls container
+        const controlsContainer = document.createElement('div');
+        controlsContainer.className = 'flex flex-wrap gap-4 items-center my-4';
+
+        // Benchmark selector
+        const benchmarkSelector = document.createElement('div');
+        benchmarkSelector.className = 'flex overflow-x-auto bg-gray-100 rounded-lg p-1 max-w-2xl';
+
+        this.data.benchmarks.forEach(benchmark => {
+          const benchmarkButton = document.createElement('button');
+          benchmarkButton.className = `px-4 py-2 text-sm font-medium rounded-md whitespace-nowrap ${
+            this.state.activeBenchmarkId === benchmark.id
+              ? 'bg-blue-600 text-white'
+              : 'text-gray-700 hover:bg-gray-200'
+          }`;
+          benchmarkButton.textContent = benchmark.name;
+          benchmarkButton.addEventListener('click', () => this.handleBenchmarkChange(benchmark.id));
+
+          benchmarkSelector.appendChild(benchmarkButton);
+        });
+
+        controlsContainer.appendChild(benchmarkSelector);
+
+        // View toggle
+        const viewToggleButton = document.createElement('button');
+        viewToggleButton.className = `px-3 py-2 text-sm font-medium rounded-md ${
+          this.state.viewMode === 'expanded'
+            ? 'bg-blue-600 text-white'
+            : 'bg-gray-200 text-gray-700'
+        }`;
+        viewToggleButton.textContent = this.state.viewMode === 'compact' ? 'Expand All' : 'Compact View';
+        viewToggleButton.addEventListener('click', () => this.toggleViewMode());
+
+        controlsContainer.appendChild(viewToggleButton);
+        headerContainer.appendChild(controlsContainer);
+
+        // Get derived state
+        const activeBenchmark = this.getActiveBenchmark();
+        const uniqueModels = this.getUniqueModels();
+        const questions = this.getQuestions();
+
+        // Benchmark metadata
+        if (activeBenchmark) {
+          const metadataContainer = document.createElement('div');
+          metadataContainer.className = 'flex items-center text-sm text-gray-600 mb-2';
+
+          const dateSpan = document.createElement('span');
+          dateSpan.className = 'mr-4';
+          dateSpan.textContent = `Benchmark Date: ${activeBenchmark.date || 'N/A'}`;
+
+          const modelsSpan = document.createElement('span');
+          modelsSpan.textContent = `Models: ${uniqueModels.join(', ')}`;
+
+          metadataContainer.appendChild(dateSpan);
+          metadataContainer.appendChild(modelsSpan);
+          headerContainer.appendChild(metadataContainer);
+        }
+
+        this.rootElement.appendChild(headerContainer);
+
+        // Question cards
+        if (questions.length > 0) {
+          const questionsContainer = document.createElement('div');
+          questionsContainer.className = 'grid grid-cols-1 gap-6 items-start';
+
+          questions.forEach(question => {
+            questionsContainer.appendChild(
+              this.createQuestionCard(question, uniqueModels)
+            );
+          });
+
+          this.rootElement.appendChild(questionsContainer);
+        } else {
+          // No questions message
+          const noQuestionsElement = document.createElement('div');
+          noQuestionsElement.className = 'bg-white p-8 text-center rounded-lg shadow-md';
+
+          const noQuestionsText = document.createElement('p');
+          noQuestionsText.className = 'text-gray-600';
+          noQuestionsText.textContent = 'No questions available for this benchmark.';
+
+          noQuestionsElement.appendChild(noQuestionsText);
+          this.rootElement.appendChild(noQuestionsElement);
+        }
+      }
+    }
+
+    // Initialize the dashboard when the DOM is loaded
+    document.addEventListener('DOMContentLoaded', () => {
+      const appElement = document.getElementById('app');
+      // Initialize with empty data instead of sample data
+      new BenchmarkDashboard({ benchmarks: [] }, appElement);
+    });
+  </script>
+</body>
+</html>