added simple rag, choose chunk size, evaluation

2025-04-07 00:48:52 +03:00 · 2024-07-14 19:57:45 +03:00
parent bb68bfaad6
commit 362f2cd0f6
9 changed files with 694 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+.env
--- a/README.md
+++ b/README.md
@@ -1,3 +1,7 @@
+Here's the updated README with the added section on chunk size optimization:
+
+---
+
 # RAG Techniques Demonstration 🚀

 This repository showcases various advanced techniques for Retrieval-Augmented Generation (RAG) systems. RAG systems combine information retrieval with generative models to provide accurate and contextually rich responses. Explore the techniques below to enhance the performance and capabilities of your RAG systems.
@@ -116,3 +120,4 @@ Using NLP techniques to identify topic boundaries or coherent sections within do

 - Providing transparency in the retrieval process by explaining why certain pieces of information were retrieved and how they relate to the query.
 - Enhancing user trust and providing opportunities for system refinement.
+
--- a/pycache/evalute_rag.cpython-312.pyc
+++ b/pycache/evalute_rag.cpython-312.pyc
--- a/pycache/misc.cpython-312.pyc
+++ b/pycache/misc.cpython-312.pyc
--- a/choose_chunk_size.ipynb
+++ b/choose_chunk_size.ipynb
@@ -0,0 +1,256 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import libraries and environment variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "import random\n",
+    "\n",
+    "nest_asyncio.apply()\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
+    "from llama_index.core.prompts import PromptTemplate\n",
+    "\n",
+    "from llama_index.core.evaluation import (\n",
+    "    DatasetGenerator,\n",
+    "    FaithfulnessEvaluator,\n",
+    "    RelevancyEvaluator\n",
+    ")\n",
+    "from llama_index.llms.openai import OpenAI\n",
+    "\n",
+    "import openai\n",
+    "import time\n",
+    "import os\n",
+    "load_dotenv()\n",
+    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read Docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir = \"data\"\n",
+    "documents = SimpleDirectoryReader(data_dir).load_data()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create evaluation questions and pick k out of them"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_eval_questions = 25\n",
+    "\n",
+    "eval_documents = documents[0:20]\n",
+    "data_generator = DatasetGenerator.from_documents(eval_documents)\n",
+    "eval_questions = data_generator.generate_questions_from_nodes()\n",
+    "k_eval_questions = random.sample(eval_questions, num_eval_questions)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define metrics evaluators and modify llama_index faithfullness evaluator prompt to rely on the context "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We will use GPT-4 for evaluating the responses\n",
+    "gpt4 = OpenAI(temperature=0, model=\"gpt-4o\")\n",
+    "\n",
+    "# Define service context for GPT-4 for evaluation\n",
+    "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)\n",
+    "\n",
+    "# Define Faithfulness and Relevancy Evaluators which are based on GPT-4\n",
+    "faithfulness_gpt4 = FaithfulnessEvaluator(service_context=service_context_gpt4)\n",
+    "\n",
+    "faithfulness_new_prompt_template = PromptTemplate(\"\"\" Please tell if a given piece of information is directly supported by the context.\n",
+    "    You need to answer with either YES or NO.\n",
+    "    Answer YES if any part of the context explicitly supports the information, even if most of the context is unrelated. If the context does not explicitly support the information, answer NO. Some examples are provided below.\n",
+    "\n",
+    "    Information: Apple pie is generally double-crusted.\n",
+    "    Context: An apple pie is a fruit pie in which the principal filling ingredient is apples.\n",
+    "    Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard, or cheddar cheese.\n",
+    "    It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).\n",
+    "    Answer: YES\n",
+    "\n",
+    "    Information: Apple pies taste bad.\n",
+    "    Context: An apple pie is a fruit pie in which the principal filling ingredient is apples.\n",
+    "    Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard, or cheddar cheese.\n",
+    "    It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).\n",
+    "    Answer: NO\n",
+    "\n",
+    "    Information: Paris is the capital of France.\n",
+    "    Context: This document describes a day trip in Paris. You will visit famous landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral.\n",
+    "    Answer: NO\n",
+    "\n",
+    "    Information: {query_str}\n",
+    "    Context: {context_str}\n",
+    "    Answer:\n",
+    "\n",
+    "    \"\"\")\n",
+    "\n",
+    "faithfulness_gpt4.update_prompts({\"your_prompt_key\": faithfulness_new_prompt_template}) # Update the prompts dictionary with the new prompt template\n",
+    "relevancy_gpt4 = RelevancyEvaluator(service_context=service_context_gpt4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Function to evaluate metrics for each chunk size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define function to calculate average response time, average faithfulness and average relevancy metrics for given chunk size\n",
+    "# We use GPT-3.5-Turbo to generate response and GPT-4 to evaluate it.\n",
+    "def evaluate_response_time_and_accuracy(chunk_size, eval_questions):\n",
+    "    \"\"\"\n",
+    "    Evaluate the average response time, faithfulness, and relevancy of responses generated by GPT-3.5-turbo for a given chunk size.\n",
+    "    \n",
+    "    Parameters:\n",
+    "    chunk_size (int): The size of data chunks being processed.\n",
+    "    \n",
+    "    Returns:\n",
+    "    tuple: A tuple containing the average response time, faithfulness, and relevancy metrics.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    total_response_time = 0\n",
+    "    total_faithfulness = 0\n",
+    "    total_relevancy = 0\n",
+    "\n",
+    "    # create vector index\n",
+    "    llm = OpenAI(model=\"gpt-3.5-turbo\")\n",
+    "\n",
+    "    service_context = ServiceContext.from_defaults(llm=llm, chunk_size=chunk_size, chunk_overlap=chunk_size//5)  \n",
+    "    vector_index = VectorStoreIndex.from_documents(\n",
+    "        eval_documents, service_context=service_context\n",
+    "    )\n",
+    "    # build query engine\n",
+    "    query_engine = vector_index.as_query_engine(similarity_top_k=5)\n",
+    "    num_questions = len(eval_questions)\n",
+    "\n",
+    "    # Iterate over each question in eval_questions to compute metrics.\n",
+    "    # While BatchEvalRunner can be used for faster evaluations (see: https://docs.llamaindex.ai/en/latest/examples/evaluation/batch_eval.html),\n",
+    "    # we're using a loop here to specifically measure response time for different chunk sizes.\n",
+    "    for question in eval_questions:\n",
+    "        start_time = time.time()\n",
+    "        response_vector = query_engine.query(question)\n",
+    "        elapsed_time = time.time() - start_time\n",
+    "        \n",
+    "        faithfulness_result = faithfulness_gpt4.evaluate_response(\n",
+    "            response=response_vector\n",
+    "        ).passing\n",
+    "        \n",
+    "        relevancy_result = relevancy_gpt4.evaluate_response(\n",
+    "            query=question, response=response_vector\n",
+    "        ).passing\n",
+    "\n",
+    "        total_response_time += elapsed_time\n",
+    "        total_faithfulness += faithfulness_result\n",
+    "        total_relevancy += relevancy_result\n",
+    "\n",
+    "    average_response_time = total_response_time / num_questions\n",
+    "    average_faithfulness = total_faithfulness / num_questions\n",
+    "    average_relevancy = total_relevancy / num_questions\n",
+    "\n",
+    "    return average_response_time, average_faithfulness, average_relevancy"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test different chunk sizes "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\N7\\AppData\\Local\\Temp\\ipykernel_22672\\1178342312.py:21: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
+      "  service_context = ServiceContext.from_defaults(llm=llm, chunk_size=chunk_size, chunk_overlap=chunk_size//5)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chunk size 128 - Average Response time: 1.35s, Average Faithfulness: 1.00, Average Relevancy: 1.00\n",
+      "Chunk size 256 - Average Response time: 1.31s, Average Faithfulness: 1.00, Average Relevancy: 1.00\n"
+     ]
+    }
+   ],
+   "source": [
+    "chunk_sizes = [128, 256]\n",
+    "\n",
+    "for chunk_size in chunk_sizes:\n",
+    "  avg_response_time, avg_faithfulness, avg_relevancy = evaluate_response_time_and_accuracy(chunk_size, k_eval_questions)\n",
+    "  print(f\"Chunk size {chunk_size} - Average Response time: {avg_response_time:.2f}s, Average Faithfulness: {avg_faithfulness:.2f}, Average Relevancy: {avg_relevancy:.2f}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/data/Understanding_Climate_Change.pdf
+++ b/data/Understanding_Climate_Change.pdf
--- a/data/q_a.json
+++ b/data/q_a.json
@@ -0,0 +1,178 @@
+[
+  {
+    "question": "What does climate change refer to?",
+    "answer": "Climate change refers to significant, long-term changes in the global climate."
+  },
+  {
+    "question": "What encompasses the planet's overall weather patterns?",
+    "answer": "The term 'global climate' encompasses the planet's overall weather patterns, including temperature, precipitation, and wind patterns, over an extended period."
+  },
+  {
+    "question": "What activities have significantly contributed to climate change over the past century?",
+    "answer": "Human activities, particularly the burning of fossil fuels and deforestation, have significantly contributed to climate change."
+  },
+  {
+    "question": "How many cycles of glacial advance and retreat have occurred over the past 650,000 years?",
+    "answer": "There have been seven cycles of glacial advance and retreat over the past 650,000 years."
+  },
+  {
+    "question": "What marked the beginning of the modern climate era and human civilization?",
+    "answer": "The abrupt end of the last ice age about 11,700 years ago marked the beginning of the modern climate era and human civilization."
+  },
+  {
+    "question": "What small variations are most climate changes attributed to?",
+    "answer": "Most of these climate changes are attributed to very small variations in Earth's orbit that change the amount of solar energy our planet receives."
+  },
+  {
+    "question": "What is the primary cause of recent climate change?",
+    "answer": "The primary cause of recent climate change is the increase in greenhouse gases in the atmosphere."
+  },
+  {
+    "question": "What are some examples of greenhouse gases?",
+    "answer": "Examples of greenhouse gases include carbon dioxide (CO2), methane (CH4), and nitrous oxide (N2O)."
+  },
+  {
+    "question": "What essential effect do greenhouse gases have on Earth?",
+    "answer": "Greenhouse gases create a 'greenhouse effect,' which is essential for life on Earth as it keeps the planet warm enough to support life."
+  },
+  {
+    "question": "How has human activity affected the greenhouse effect?",
+    "answer": "Human activities have intensified the natural greenhouse effect, leading to a warmer climate."
+  },
+  {
+    "question": "What releases large amounts of CO2 into the atmosphere?",
+    "answer": "Burning fossil fuels for energy releases large amounts of CO2 into the atmosphere."
+  },
+  {
+    "question": "What significant event marked the beginning of a notable increase in fossil fuel consumption?",
+    "answer": "The industrial revolution marked the beginning of a significant increase in fossil fuel consumption."
+  },
+  {
+    "question": "Which fossil fuel is the most carbon-intensive?",
+    "answer": "Coal is the most carbon-intensive fossil fuel."
+  },
+  {
+    "question": "What is coal primarily used for, and why is it significant in terms of emissions?",
+    "answer": "Coal is primarily used for electricity generation and is a major source of CO2 emissions."
+  },
+  {
+    "question": "What are the primary uses of oil?",
+    "answer": "Oil is used primarily for transportation fuels, such as gasoline and diesel."
+  },
+  {
+    "question": "What environmental issues does the combustion of oil products contribute to?",
+    "answer": "The combustion of oil products releases significant amounts of CO2 and other pollutants, contributing to climate change and air quality issues."
+  },
+  {
+    "question": "Why is natural gas considered a 'bridge fuel' to a lower-carbon future?",
+    "answer": "Natural gas is considered a 'bridge fuel' because it is the least carbon-intensive fossil fuel."
+  },
+  {
+    "question": "What is a potent greenhouse gas released during natural gas extraction and use?",
+    "answer": "Methane, a potent greenhouse gas, is released during natural gas extraction and use."
+  },
+  {
+    "question": "How do forests act as carbon sinks?",
+    "answer": "Forests act as carbon sinks by absorbing CO2 from the atmosphere."
+  },
+  {
+    "question": "What happens when trees are cut down in terms of carbon?",
+    "answer": "When trees are cut down, the stored carbon is released back into the atmosphere, exacerbating the greenhouse effect."
+  },
+  {
+    "question": "Why are tropical rainforests important for carbon storage?",
+    "answer": "Tropical rainforests are particularly important for carbon storage because they absorb significant amounts of CO2."
+  },
+  {
+    "question": "What regions are known for significant tropical deforestation?",
+    "answer": "The Amazon, Congo Basin, and Southeast Asia are known for significant tropical deforestation."
+  },
+  {
+    "question": "What roles do boreal forests play in sequestering carbon?",
+    "answer": "Boreal forests play a crucial role in sequestering carbon by absorbing CO2 from the atmosphere."
+  },
+  {
+    "question": "How does agriculture contribute to climate change?",
+    "answer": "Agriculture contributes to climate change through methane emissions from livestock, rice paddies, and the use of synthetic fertilizers."
+  },
+  {
+    "question": "What is a major source of methane emissions in agriculture?",
+    "answer": "Ruminant animals, such as cows and sheep, produce methane during digestion, which is a major source of methane emissions in agriculture."
+  },
+  {
+    "question": "How do flooded rice paddies contribute to methane production?",
+    "answer": "Flooded rice paddies create anaerobic conditions that lead to methane production."
+  },
+  {
+    "question": "What agricultural practice releases nitrous oxide, a potent greenhouse gas?",
+    "answer": "The use of synthetic fertilizers in agriculture releases nitrous oxide, a potent greenhouse gas."
+  },
+  {
+    "question": "What has been the increase in global temperatures since the late 19th century?",
+    "answer": "Global temperatures have risen by about 1.2 degrees Celsius (2.2 degrees Fahrenheit) since the late 19th century."
+  },
+  {
+    "question": "What are heatwaves, and how are they changing due to climate change?",
+    "answer": "Heatwaves are becoming more frequent and severe due to climate change, posing risks to human health, agriculture, and infrastructure."
+  },
+  {
+    "question": "How is climate change altering the timing and length of seasons?",
+    "answer": "Climate change is altering the timing and length of seasons, affecting ecosystems and human activities."
+  },
+  {
+    "question": "What has been the rise in sea levels over the past century?",
+    "answer": "Sea levels have risen by about 20 centimeters (8 inches) in the past century."
+  },
+  {
+    "question": "How does polar ice melt contribute to rising sea levels?",
+    "answer": "Warmer temperatures are causing polar ice caps and glaciers to melt, contributing to rising sea levels."
+  },
+  {
+    "question": "What is the impact of glacial retreat on water supplies?",
+    "answer": "Glacial retreat affects water supplies for millions of people, particularly in regions dependent on glacial meltwater."
+  },
+  {
+    "question": "What are some of the impacts of rising sea levels on coastal regions?",
+    "answer": "Rising sea levels and increased storm surges are accelerating coastal erosion, threatening homes, infrastructure, and ecosystems."
+  },
+  {
+    "question": "What extreme weather events are linked to climate change?",
+    "answer": "Climate change is linked to an increase in the frequency and severity of extreme weather events, such as hurricanes, heatwaves, droughts, and heavy rainfall."
+  },
+  {
+    "question": "How do warmer ocean temperatures affect hurricanes and typhoons?",
+    "answer": "Warmer ocean temperatures can intensify hurricanes and typhoons, leading to more destructive storms."
+  },
+  {
+    "question": "What is causing more frequent and severe droughts?",
+    "answer": "Increased temperatures and changing precipitation patterns are contributing to more frequent and severe droughts."
+  },
+  {
+    "question": "How is ocean acidification affecting marine life?",
+    "answer": "Increased CO2 levels in the atmosphere lead to higher concentrations of CO2 in the oceans, causing the water to become more acidic, which can harm marine life."
+  },
+  {
+    "question": "What is happening to coral reefs due to ocean acidification and warming waters?",
+    "answer": "Ocean acidification and warming waters contribute to coral bleaching and mortality, threatening biodiversity and fisheries."
+  },
+  {
+    "question": "How do renewable energy sources help mitigate climate change?",
+    "answer": "Transitioning to renewable energy sources, such as wind, solar, and hydroelectric power, reduces greenhouse gas emissions and is sustainable in the long term."
+  },
+  {
+    "question": "What are the benefits of solar power?",
+    "answer": "Solar power harnesses energy from the sun using photovoltaic cells or solar thermal systems, providing a versatile and scalable solution for reducing carbon emissions."
+  },
+  {
+    "question": "How does wind power generate electricity?",
+    "answer": "Wind power generates electricity using wind turbines, which is one of the fastest-growing renewable energy sources with significant potential for large-scale deployment."
+  },
+  {
+    "question": "What is hydroelectric power, and how does it generate electricity?",
+    "answer": "Hydroelectric power generates electricity by harnessing the energy of flowing water, a mature and widely used technology."
+  },
+  {
+    "question": "How can improving energy efficiency reduce emissions?",
+    "answer": "Improving energy efficiency in buildings, transportation, and industry can significantly reduce greenhouse gas emissions and lower energy costs."
+  }
+]
--- a/evalute_rag.py
+++ b/evalute_rag.py
@@ -0,0 +1,104 @@
+
+import nest_asyncio
+
+nest_asyncio.apply()
+
+from llama_index.core import  ServiceContext
+from llama_index.core.prompts import PromptTemplate
+
+
+from llama_index.core.evaluation import BatchEvalRunner
+
+
+from llama_index.core.evaluation import (
+    FaithfulnessEvaluator,
+    RelevancyEvaluator, 
+    CorrectnessEvaluator
+)
+from llama_index.llms.openai import OpenAI
+
+
+
+async def evaluate_rag(vector_index, questions, ground_truth_answers):
+
+    """
+    Evaluate the RAG model on a set of questions and ground truth answers.
+
+    Args:
+    questions: List of questions to evaluate the RAG model on.
+    ground_truth_answers: List of ground truth answers for the questions.
+
+    Returns:
+    Dictionary containing the evaluation results for faithfulness, relevancy, and correctness.
+    """
+
+    gpt4 = OpenAI(temperature=0, model="gpt-4o")
+    service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)
+    faithfulness_gpt4 = FaithfulnessEvaluator(service_context=service_context_gpt4)
+    relevancy_gpt4 = RelevancyEvaluator(service_context=service_context_gpt4)
+    correctness_gpt4 = CorrectnessEvaluator(llm=gpt4)
+
+    faithfulness_new_prompt_template = PromptTemplate(""" Please tell if a given piece of information is directly supported by the context.
+    You need to answer with either YES or NO.
+    Answer YES if any part of the context explicitly supports the information, even if most of the context is unrelated. If the context does not explicitly support the information, answer NO. Some examples are provided below.
+
+    Information: Apple pie is generally double-crusted.
+    Context: An apple pie is a fruit pie in which the principal filling ingredient is apples.
+    Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard, or cheddar cheese.
+    It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
+    Answer: YES
+
+    Information: Apple pies taste bad.
+    Context: An apple pie is a fruit pie in which the principal filling ingredient is apples.
+    Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard, or cheddar cheese.
+    It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
+    Answer: NO
+
+    Information: Paris is the capital of France.
+    Context: This document describes a day trip in Paris. You will visit famous landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral.
+    Answer: NO
+
+    Information: {query_str}
+    Context: {context_str}
+    Answer:
+
+    """)
+
+    faithfulness_gpt4.update_prompts({"your_prompt_key": faithfulness_new_prompt_template}) # Update the prompts dictionary with the new prompt template
+
+    runner = BatchEvalRunner(
+    {"faithfulness": faithfulness_gpt4, "relevancy": relevancy_gpt4, "correctness": correctness_gpt4},
+    workers=8,
+    )
+
+    eval_results = await runner.aevaluate_queries(
+        vector_index.as_query_engine(llm=gpt4), queries=questions, reference=ground_truth_answers   
+    )
+
+    return eval_results
+
+
+    
+def get_eval_results(key, eval_results):
+    """
+    Get the evaluation results for a specific metric.
+
+    Args:
+    key: Metric to get the results for (faithfulness, relevancy, correctness).
+    eval_results: Dictionary containing the evaluation results for faithfulness, relevancy, and correctness.
+
+    Returns:
+    Score for the specified metric.
+    """
+    results = eval_results[key]
+    
+    if isinstance(results, float):
+        # If the result is already a float (like for "correctness")
+        score = results
+    else:
+        # For other metrics (faithfulness, relevancy) that return a list of results
+        correct = sum(1 for result in results if result.passing)
+        score = correct / len(results)
+    
+    print(f"{key.capitalize()} Score: {score:.2f}")
+    return score
--- a/simple_rag.ipynb
+++ b/simple_rag.ipynb
@@ -0,0 +1,150 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import libraries and environment variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "import json\n",
+    "\n",
+    "nest_asyncio.apply()\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "from evalute_rag import *\n",
+    "import openai\n",
+    "import os\n",
+    "\n",
+    "load_dotenv()\n",
+    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read Docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir = \"data\"\n",
+    "documents = SimpleDirectoryReader(data_dir).load_data()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Index docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vector_index = VectorStoreIndex.from_documents(documents, chunk_size=256,chunk_overlap=64)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read Q&A file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "q_a_file_name = \"data/q_a.json\"\n",
+    "with open(q_a_file_name, \"r\", encoding=\"utf-8\") as json_file:\n",
+    "    q_a = json.load(json_file)\n",
+    "\n",
+    "questions = [qa[\"question\"] for qa in q_a]\n",
+    "ground_truth_answers = [qa[\"answer\"] for qa in q_a]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Evaluate results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_results = await evaluate_rag(vector_index, questions, ground_truth_answers)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Faithfulness Score: 0.93\n",
+      "Relevancy Score: 0.93\n",
+      "Correctness Score: 0.93\n"
+     ]
+    }
+   ],
+   "source": [
+    "faithfulness_score = get_eval_results(\"faithfulness\", eval_results)\n",
+    "relevancy_score = get_eval_results(\"relevancy\", eval_results)\n",
+    "correctness_score = get_eval_results(\"correctness\", eval_results)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}