added context window enrichment method, and semantic chunking

2025-04-07 00:48:52 +03:00 · 2024-07-18 19:41:41 +03:00
parent 3fc686f1ef
commit 51f6703d0e
12 changed files with 1386 additions and 65 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,7 @@
 !*jpeg
 !LICENSE
 !*.gif
+!all_rag_techniques
+!data
+!evaluation
+
--- a/init.py
+++ b/init.py
--- a/all_rag_techniques/init.py
+++ b/all_rag_techniques/init.py
--- a/all_rag_techniques/context_enrichment_window_around_chunk.ipynb
+++ b/all_rag_techniques/context_enrichment_window_around_chunk.ipynb
@@ -0,0 +1,311 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import libraries and environment variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\N7\\PycharmProjects\\llm_tasks\\RAG_TECHNIQUES\\.venv\\Lib\\site-packages\\deepeval\\__init__.py:42: UserWarning: You are using deepeval version 0.21.65, however version 0.21.67 is available. You should consider upgrading via the \"pip install --upgrade deepeval\" command.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "from dotenv import load_dotenv\n",
+    "from langchain.docstore.document import Document\n",
+    "\n",
+    "\n",
+    "sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path sicnce we work with notebooks\n",
+    "from helper_functions import *\n",
+    "from evaluation.evalute_rag import *\n",
+    "\n",
+    "# Load environment variables from a .env file\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Set the OpenAI API key environment variable\n",
+    "os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define path to PDF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path = \"../data/Understanding_Climate_Change.pdf\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read PDF to string"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "content = read_pdf_to_string(path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Function to split text into chunks with metadata of the chunk chronological index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def split_text_to_chunks_with_indices(text: str, chunk_size: int, chunk_overlap: int) -> List[Document]:\n",
+    "    chunks = []\n",
+    "    start = 0\n",
+    "    while start < len(text):\n",
+    "        end = start + chunk_size\n",
+    "        chunk = text[start:end]\n",
+    "        chunks.append(Document(page_content=chunk, metadata={\"index\": len(chunks), \"text\": text}))\n",
+    "        start += chunk_size - chunk_overlap\n",
+    "    return chunks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Split our document accordingly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chunks_size = 200\n",
+    "chunk_overlap = 100\n",
+    "docs = split_text_to_chunks_with_indices(content, chunks_size, chunk_overlap)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create vector store and retriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeddings = OpenAIEmbeddings()\n",
+    "vectorstore = FAISS.from_documents(docs, embeddings)\n",
+    "chunks_query_retriever = vectorstore.as_retriever(search_kwargs={\"k\": 2})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Function to draw the k<sup>th</sup> chunk (in the original order) from the vector store \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_chunk_by_index(vectorstore, target_index: int):\n",
+    "    \"\"\"\n",
+    "    Retrieve a chunk from the vectorstore based on its index in the metadata.\n",
+    "    \n",
+    "    Args:\n",
+    "    vectorstore (VectorStore): The vectorstore containing the chunks.\n",
+    "    target_index (int): The index of the chunk to retrieve.\n",
+    "    debug (bool): If True, print debug information.\n",
+    "    \n",
+    "    Returns:\n",
+    "    Optional[Document]: The retrieved chunk as a Document object, or None if not found.\n",
+    "    \"\"\"\n",
+    "    # Retrieve all documents from the vectorstore\n",
+    "    all_docs = vectorstore.similarity_search(\"\", k=vectorstore.index.ntotal)\n",
+    "    \n",
+    "    # Search for the document with the matching index\n",
+    "    for doc in all_docs:\n",
+    "        # Check if 'index' is in metadata and matches the target index\n",
+    "        if 'index' in doc.metadata and doc.metadata['index'] == target_index:\n",
+    "            return doc\n",
+    "    \n",
+    "    # If we've gone through all documents and haven't found a match, return None\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Check the function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Understanding Climate Change \n",
+      "Chapter 1: Introduction to Climate Change \n",
+      "Climate change refers to significant, long-term changes in the global climate. The term \n",
+      "\"global climate\" encompasses the plane\n"
+     ]
+    }
+   ],
+   "source": [
+    "chunk = get_chunk_by_index(vectorstore, 0)\n",
+    "print(chunk.page_content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Function that retrieves from the vector stroe based on semantic similarity and then pads each retrieved chunk with its num_neighbors before and after, taking into account the chunk overlap to construct a meaningful wide window arround it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def retrieve_with_context_overlap(vectorstore, query: str, k: int = 2, num_neighbors: int = 2, chunk_size: int = 200, chunk_overlap: int = 100) -> List[str]:\n",
+    "    \"\"\"\n",
+    "    Retrieve chunks based on a query, then fetch neighboring chunks and concatenate them, \n",
+    "    accounting for overlap and correct indexing.\n",
+    "\n",
+    "    Args:\n",
+    "    vectorstore (VectorStore): The vectorstore containing the chunks.\n",
+    "    query (str): The query to search for relevant chunks.\n",
+    "    k (int): The number of relevant chunks to retrieve.\n",
+    "    num_neighbors (int): The number of chunks to retrieve before and after each relevant chunk.\n",
+    "    chunk_size (int): The size of each chunk when originally split.\n",
+    "    chunk_overlap (int): The overlap between chunks when originally split.\n",
+    "\n",
+    "    Returns:\n",
+    "    List[str]: List of concatenated chunk sequences, each centered on a relevant chunk.\n",
+    "    \"\"\"\n",
+    "    retriever = vectorstore.as_retriever(search_kwargs={\"k\": k})\n",
+    "    relevant_chunks = retriever.get_relevant_documents(query)\n",
+    "\n",
+    "    result_sequences = []\n",
+    "\n",
+    "    for chunk in relevant_chunks:\n",
+    "        current_index = chunk.metadata.get('index')\n",
+    "        if current_index is None:\n",
+    "            continue\n",
+    "\n",
+    "        # Collect neighboring chunks\n",
+    "        neighbor_chunks = [chunk]  # Include the current chunk\n",
+    "        for i in range(1, num_neighbors + 1):\n",
+    "            prev_chunk = get_chunk_by_index(vectorstore, current_index - i)\n",
+    "            if prev_chunk:\n",
+    "                neighbor_chunks.insert(0, prev_chunk)\n",
+    "            next_chunk = get_chunk_by_index(vectorstore, current_index + i)\n",
+    "            if next_chunk:\n",
+    "                neighbor_chunks.append(next_chunk)\n",
+    "\n",
+    "        # Sort chunks by their index\n",
+    "        neighbor_chunks.sort(key=lambda x: x.metadata.get('index', 0))\n",
+    "\n",
+    "        # Concatenate chunks accounting for overlap\n",
+    "        concatenated_text = neighbor_chunks[0].page_content\n",
+    "        for i in range(1, len(neighbor_chunks)):\n",
+    "            current_chunk = neighbor_chunks[i].page_content\n",
+    "            overlap_start = chunk_size - chunk_overlap\n",
+    "            concatenated_text += current_chunk[overlap_start:]\n",
+    "\n",
+    "        result_sequences.append(concatenated_text)\n",
+    "\n",
+    "    return result_sequences"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Comparing regular retrival and retrival with context window"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"climate change\"\n",
+    "context = chunks_query_retriever.get_relevant_documents(query)\n",
+    "context_pages_content = [doc.page_content for doc in context]\n",
+    "\n",
+    "print(\"regular retrieval:\\n\")\n",
+    "show_context(context_pages_content)\n",
+    "\n",
+    "sequences = retrieve_with_context_overlap(vectorstore, query)\n",
+    "print(\"retrieval with context overlap:\\n\")\n",
+    "show_context(sequences)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/all_rag_techniques/semantic_chunking.ipynb
+++ b/all_rag_techniques/semantic_chunking.ipynb
@@ -0,0 +1,172 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import libraries "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import fitz\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "\n",
+    "sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path sicnce we work with notebooks\n",
+    "from helper_functions import *\n",
+    "from evaluation.evalute_rag import *\n",
+    "\n",
+    "from langchain_experimental.text_splitter import SemanticChunker\n",
+    "from langchain_openai.embeddings import OpenAIEmbeddings\n",
+    "\n",
+    "# Load environment variables from a .env file\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Set the OpenAI API key environment variable\n",
+    "os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define file path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path = \"../data/Understanding_Climate_Change.pdf\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read PDF to string"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Open the PDF document located at the specified path\n",
+    "doc = fitz.open(path)\n",
+    "\n",
+    "content = \"\"\n",
+    "\n",
+    "# Iterate over each page in the document\n",
+    "for page_num in range(len(doc)):\n",
+    "    # Get the current page\n",
+    "    page = doc[page_num]\n",
+    "    # Extract the text content from the current page and append it to the content string\n",
+    "    content += page.get_text()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Breakpoint types: \n",
+    "* 'percentile': all differences between sentences are calculated, and then any difference greater than the X percentile is split.\n",
+    "* 'standard_deviation': any difference greater than X standard deviations is split.\n",
+    "* 'interquartile': the interquartile distance is used to split chunks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text_splitter = SemanticChunker(OpenAIEmbeddings(), breakpoint_threshold_type='percentile', breakpoint_threshold_amount=90) # chose which embeddings and breakpoint type and threshold to use"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Split original text to semantic chunks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "docs = text_splitter.create_documents([content])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create vector store and retriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeddings = OpenAIEmbeddings()\n",
+    "vectorstore = FAISS.from_documents(docs, embeddings)\n",
+    "chunks_query_retriever = vectorstore.as_retriever(search_kwargs={\"k\": 2})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test the retriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_query = \"What is the main cause of climate change?\"\n",
+    "context = retrieve_context_per_question(test_query, chunks_query_retriever)\n",
+    "show_context(context)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/all_rag_techniques/simple_rag.ipynb
+++ b/all_rag_techniques/simple_rag.ipynb
@@ -9,22 +9,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "import nest_asyncio\n",
-    "import json\n",
-    "\n",
-    "nest_asyncio.apply()\n",
+    "import os\n",
+    "import sys\n",
    "from dotenv import load_dotenv\n",
    "\n",
-    "from evalute_rag import *\n",
-    "import openai\n",
-    "import os\n",
    "\n",
+    "sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path sicnce we work with notebooks\n",
+    "from helper_functions import *\n",
+    "from evaluation.evalute_rag import *\n",
+    "\n",
+    "# Load environment variables from a .env file\n",
    "load_dotenv()\n",
-    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")"
+    "\n",
+    "# Set the OpenAI API key environment variable\n",
+    "os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')"
   ]
  },
  {
@@ -36,49 +38,98 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
-    "data_dir = \"data\"\n",
-    "documents = SimpleDirectoryReader(data_dir).load_data()"
+    "path = \"../data/Understanding_Climate_Change.pdf\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "### Index docs"
+    "### Encode document"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "vector_index = VectorStoreIndex.from_documents(documents, chunk_size=256,chunk_overlap=64)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Read Q&A file"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "q_a_file_name = \"data/q_a.json\"\n",
-    "with open(q_a_file_name, \"r\", encoding=\"utf-8\") as json_file:\n",
-    "    q_a = json.load(json_file)\n",
+    "def encode_pdf(path, chunk_size=1000, chunk_overlap=200):\n",
+    "    \"\"\"\n",
+    "    Encodes a PDF book into a vector store using OpenAI embeddings.\n",
    "\n",
-    "questions = [qa[\"question\"] for qa in q_a]\n",
-    "ground_truth_answers = [qa[\"answer\"] for qa in q_a]"
+    "    Args:\n",
+    "        path: The path to the PDF file.\n",
+    "        chunk_size: The desired size of each text chunk.\n",
+    "        chunk_overlap: The amount of overlap between consecutive chunks.\n",
+    "\n",
+    "    Returns:\n",
+    "        A FAISS vector store containing the encoded book content.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Load PDF documents\n",
+    "    loader = PyPDFLoader(path)\n",
+    "    documents = loader.load()\n",
+    "\n",
+    "    # Split documents into chunks\n",
+    "    text_splitter = RecursiveCharacterTextSplitter(\n",
+    "        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len\n",
+    "    )\n",
+    "    texts = text_splitter.split_documents(documents)\n",
+    "    cleaned_texts = replace_t_with_space(texts)\n",
+    "\n",
+    "    # Create embeddings and vector store\n",
+    "    embeddings = OpenAIEmbeddings()\n",
+    "    vectorstore = FAISS.from_documents(cleaned_texts, embeddings)\n",
+    "\n",
+    "    return vectorstore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chunks_vector_store = encode_pdf(path, chunk_size=1000, chunk_overlap=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create retriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chunks_query_retriever = chunks_vector_store.as_retriever(search_kwargs={\"k\": 2})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test retriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_query = \"What is the main cause of climate change?\"\n",
+    "context = retrieve_context_per_question(test_query, chunks_query_retriever)\n",
+    "show_context(context)"
   ]
  },
  {
@@ -90,39 +141,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "eval_results = await evaluate_rag(vector_index, questions, ground_truth_answers)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Display metrics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Faithfulness Score: 0.93\n",
-      "Relevancy Score: 0.93\n",
-      "Correctness Score: 0.93\n"
-     ]
-    }
-   ],
-   "source": [
-    "faithfulness_score = get_eval_results(\"faithfulness\", eval_results)\n",
-    "relevancy_score = get_eval_results(\"relevancy\", eval_results)\n",
-    "correctness_score = get_eval_results(\"correctness\", eval_results)"
+    "evaluate_rag(chunks_query_retriever)"
   ]
  }
 ],
--- a/data/init.py
+++ b/data/init.py
--- a/evaluation/init.py
+++ b/evaluation/init.py
--- a/evaluation/define_evaluation_metrics.ipynb
+++ b/evaluation/define_evaluation_metrics.ipynb
@@ -0,0 +1,322 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_openai import ChatOpenAI \n",
+    "from langchain.chains import LLMChain\n",
+    "from langchain.prompts import PromptTemplate\n",
+    "from langchain.evaluation import load_evaluator\n",
+    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "\n",
+    "# from langchain.evaluation.criteria import {\n",
+    "#     CriteriaEvalChain,\n",
+    "#     LabeledCriteriaEvalChain\n",
+    "# }\n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "from langchain.vectorstores import FAISS\n",
+    "# from sklearn.metrics.pairwise import cosine_similarity\n",
+    "import numpy as np\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = ChatOpenAI(temperature=0, model_name=\"gpt-4o\", max_tokens=4000)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 129,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ResultScore(BaseModel):\n",
+    "    score: float = Field(..., description=\"The score of the result, ranging from 0 to 1 where 1 is the best possible score.\")\n",
+    "    # explanation: str = Field(..., description=\"An extensive explanation of the score.\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "correctness_prompt = PromptTemplate(\n",
+    "input_variables=[\"question\", \"ground_truth\", \"generated_answer\"],\n",
+    "template=\"\"\"\n",
+    "Question: {question}\n",
+    "Ground Truth: {ground_truth}\n",
+    "Generated Answer: {generated_answer}\n",
+    "\n",
+    "Evaluate the correctness of the generated answer compared to the ground truth.\n",
+    "Score from 0 to 1, where 1 is perfectly correct and 0 is completely incorrect.\n",
+    "any score between 0 and 1 is acceptable and depends on how correct the generated answer is.\n",
+    "\n",
+    "Score:\n",
+    "\"\"\"\n",
+    ")\n",
+    "correctness_chain = correctness_prompt | llm.with_structured_output(ResultScore)\n",
+    "\n",
+    "\n",
+    "def evaluate_correctness(question, ground_truth, generated_answer):\n",
+    "    \"\"\"Evaluates the correctness of the generated answer compared to the ground truth.\n",
+    "\n",
+    "    Args:\n",
+    "        question: The question.\n",
+    "        ground_truth: The ground truth answer.\n",
+    "        generated_answer: The generated answer.\n",
+    "\n",
+    "    Returns:\n",
+    "        A float between 0 and 1, where 1 is the best possible score.\n",
+    "    \"\"\"\n",
+    "    result = correctness_chain.invoke({\"question\": question, \"ground_truth\": ground_truth, \"generated_answer\": generated_answer})\n",
+    "    return result.score\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test create_correctness_chain\n",
+    "question = \"What is the capital of France and Spain?\"\n",
+    "ground_truth = \"Paris and Barcelona\"\n",
+    "generated_answer = \"Paris\"\n",
+    "score = evaluate_correctness(question, ground_truth, generated_answer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.5"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "faithfulness_prompt = PromptTemplate(\n",
+    "input_variables=[\"question\",\"context\", \"generated_answer\"],\n",
+    "template=\"\"\"\n",
+    "Question: {question}\n",
+    "Context: {context}\n",
+    "Generated Answer: {generated_answer}\n",
+    "\n",
+    "Evaluate if the generate answer to the question can be deduced from the context.\n",
+    "Score of 0 or 1, where 1 is perfectly faithful *AND CAN BE DERIVED FROM THE CONTEXT* and 0 otherwise.\n",
+    "you don't mind if the answer is correct, all you care about is if the answer can be deduced from the context.\n",
+    "\n",
+    "example:\n",
+    "Question: What are the capitals of France and Spain?\n",
+    "Context: Paris is the capital of France and Madrid is the capital of Spain.\n",
+    "Generated Answer: Paris\n",
+    "in this case the generated answer is faithful to the context so the score should be *1*.\n",
+    "\n",
+    "example:\n",
+    "Question: What are the capital cities of France and Spain?\n",
+    "Context: London is the capital of France and Barcelona is the capital of Spain.\n",
+    "Generated Answer: London and Barcelona.\n",
+    "in this case the generated answer is faithful to the context so the score should be *1*.\n",
+    "\n",
+    "example:\n",
+    "Question: What are the capital cities of France and Spain?\n",
+    "Context: Paris is the capital of France and Madrid is the capital of Spain.\n",
+    "Generated Answer: Paris.\n",
+    "in this case the generated answer is faithful to the context so the score should be *1*.\n",
+    "\n",
+    "exmaple:\n",
+    "Question: What are the capitals of France and Spain?\n",
+    "Context: London is the capital of France and Madrid is the Capital of Spain.\n",
+    "Generated Answer: Paris and Madrid.\n",
+    "in this case the generated answer is based on the pretrained knowledge of the llm and is not faithful to the context so the score should be *0*.\n",
+    "\n",
+    "example:\n",
+    "Question: What is the capital of France and Spain?\n",
+    "Context: Monkeys like to eat bananas.\n",
+    "Generated Answer: Paris and Madrid.\n",
+    "in this case the generated answer is not based on the context so the score should be *0*.\n",
+    "\n",
+    "example:\n",
+    "Question: What is the capital of France?\n",
+    "Context: Paris.\n",
+    "Generated Answer: Paris.\n",
+    "in this case the context doesn't specify that Paris is the capital of France, and it cannot be deduced from the context, so the score should be *0*.\n",
+    "\n",
+    "\n",
+    "Example:\n",
+    "Question: What is 2+2?\n",
+    "Context: 4.\n",
+    "Generated Answer: 4.\n",
+    "In this case, the context states '4', but it does not provide information to deduce the answer to 'What is 2+2?', so the score should be *0*.\n",
+    "\"\"\"\n",
+    ")\n",
+    "faithfulness_chain = faithfulness_prompt | llm.with_structured_output(ResultScore)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_faithfulness(question, context, generated_answer):\n",
+    "    \"\"\"Evaluates if the generate answer to the question can be deduced from the context.\n",
+    "\n",
+    "    Args:\n",
+    "        question: The question.\n",
+    "        context: The context.\n",
+    "        generated_answer: The generated answer.\n",
+    "\n",
+    "    Returns:\n",
+    "        A float between 0 and 1, where 1 is the best possible score.\n",
+    "    \"\"\"\n",
+    "    result = faithfulness_chain.invoke({\"question\": question, \"context\": context, \"generated_answer\": generated_answer})\n",
+    "    return result.score, result.explanation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.0\n",
+      "The context states '6', but it does not provide information to deduce the answer to 'What is 3+3?'. The answer is correct, but it cannot be derived from the context.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test create_faithfulness_chain\n",
+    "question = \"what is 3+3?\"\n",
+    "context = \"6\"\n",
+    "generated_answer = \"6\"\n",
+    "score, explanation = evaluate_faithfulness(question, context, generated_answer)\n",
+    "print(score)\n",
+    "print(explanation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 130,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain import PromptTemplate\n",
+    "\n",
+    "relevancy_score_prompt = PromptTemplate(\n",
+    "    input_variables=[\"question\", \"contexts\"],\n",
+    "    template=\"\"\"\n",
+    "Q: {question}\n",
+    "Docs: {contexts}\n",
+    "\n",
+    "Score each doc's relevance:\n",
+    "0.00 - Irrelevant: No relation to the question\n",
+    "0.33 - Somewhat relevant: Contains related keywords or concepts\n",
+    "0.66 - Relevant: Partially answers or strongly implies the answer\n",
+    "1.00 - Highly relevant: Directly and fully answers the question\n",
+    "\n",
+    "Consider: Relevance, Directness, Completeness, Accuracy\n",
+    "\n",
+    "Final Score: [Average of all scores]\n",
+    "\"\"\"\n",
+    ")\n",
+    "ratio_of_relevant_docs_chain = ratio_of_relevant_docs_prompt | llm.with_structured_output(ResultScore)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_ratio_of_relevant_docs(question, contexts):\n",
+    "    \"\"\"Evaluates the ratio of relevant documents in the contexts to the question.\n",
+    "\n",
+    "    Args:\n",
+    "        question: The question.\n",
+    "        contexts: A list of documents.\n",
+    "\n",
+    "    Returns:\n",
+    "        A float between 0 and 1, where 1 is the best possible score.\n",
+    "    \"\"\"\n",
+    "    result = ratio_of_relevant_docs_chain.invoke({\"question\": question, \"contexts\": contexts})\n",
+    "    return result.score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test create_ratio_of_relevant_docs_chain\n",
+    "question = \"What is the capital of France?\"\n",
+    "contexts = [\"Paris.\", \"i was traveling in France.\"]\n",
+    "score = evaluate_ratio_of_relevant_docs(question, contexts)\n",
+    "# score, explanation = evaluate_ratio_of_relevant_docs(question, contexts)\n",
+    "print(score)\n",
+    "# print(explanation)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/evaluation/evaluation_deep_eval.ipynb
+++ b/evaluation/evaluation_deep_eval.ipynb
@@ -0,0 +1,209 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from deepeval import evaluate\n",
+    "from deepeval.metrics import GEval, FaithfulnessMetric, ContextualRelevancyMetric\n",
+    "from deepeval.test_case import LLMTestCase, LLMTestCaseParams"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test Correctness"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "correctness_metric = GEval(\n",
+    "    name=\"Correctness\",\n",
+    "    model=\"gpt-4o\",\n",
+    "    evaluation_params=[\n",
+    "        LLMTestCaseParams.EXPECTED_OUTPUT,\n",
+    "        LLMTestCaseParams.ACTUAL_OUTPUT],\n",
+    "        evaluation_steps=[\n",
+    "        \"Determine whether the actual output is factually correct based on the expected output.\"\n",
+    "    ],\n",
+    "\n",
+    ")\n",
+    "\n",
+    "gt_answer = \"Madrid is the capital of Spain.\"\n",
+    "pred_answer = \"MadriD.\"\n",
+    "\n",
+    "test_case_correctness = LLMTestCase(\n",
+    "    input=\"What is the capital of Spain?\",\n",
+    "    expected_output=gt_answer,\n",
+    "    actual_output=pred_answer,\n",
+    ")\n",
+    "\n",
+    "correctness_metric.measure(test_case_correctness)\n",
+    "print(correctness_metric.score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test faithfulness"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "question = \"what is 3+3?\"\n",
+    "context = [\"6\"]\n",
+    "generated_answer = \"6\"\n",
+    "\n",
+    "faithfulness_metric = FaithfulnessMetric(\n",
+    "    threshold=0.7,\n",
+    "    model=\"gpt-4\",\n",
+    "    include_reason=False\n",
+    ")\n",
+    "\n",
+    "test_case = LLMTestCase(\n",
+    "    input = question,\n",
+    "    actual_output=generated_answer,\n",
+    "    retrieval_context=context\n",
+    "\n",
+    ")\n",
+    "\n",
+    "faithfulness_metric.measure(test_case)\n",
+    "print(faithfulness_metric.score)\n",
+    "print(faithfulness_metric.reason)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test contextual relevancy "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "actual_output = \"then go somewhere else.\"\n",
+    "retrieval_context = [\"this is a test context\",\"mike is a cat\",\"if the shoes don't fit, then go somewhere else.\"]\n",
+    "gt_answer = \"if the shoes don't fit, then go somewhere else.\"\n",
+    "\n",
+    "relevance_metric = ContextualRelevancyMetric(\n",
+    "    threshold=1,\n",
+    "    model=\"gpt-4\",\n",
+    "    include_reason=True\n",
+    ")\n",
+    "relevance_test_case = LLMTestCase(\n",
+    "    input=\"What if these shoes don't fit?\",\n",
+    "    actual_output=actual_output,\n",
+    "    retrieval_context=retrieval_context,\n",
+    "    expected_output=gt_answer,\n",
+    "\n",
+    ")\n",
+    "\n",
+    "relevance_metric.measure(relevance_test_case)\n",
+    "print(relevance_metric.score)\n",
+    "print(relevance_metric.reason)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_test_case = LLMTestCase(\n",
+    "    input=\"What is the capital of Spain?\",\n",
+    "    expected_output=\"Madrid is the capital of Spain.\",\n",
+    "    actual_output=\"MadriD.\",\n",
+    "    retrieval_context=[\"Madrid is the capital of Spain.\"]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test two different cases together with several metrics together"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "evaluate(\n",
+    "    test_cases=[relevance_test_case, new_test_case],\n",
+    "    metrics=[correctness_metric, faithfulness_metric, relevance_metric]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Funcion to create multiple LLMTestCases based on four lists: \n",
+    "* Questions\n",
+    "* Ground Truth Answers\n",
+    "* Generated Answers\n",
+    "* Retrieved Documents - Each element is a list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_deep_eval_test_cases(questions, gt_answers, generated_answers, retrieved_documents):\n",
+    "    return [\n",
+    "        LLMTestCase(\n",
+    "            input=question,\n",
+    "            expected_output=gt_answer,\n",
+    "            actual_output=generated_answer,\n",
+    "            retrieval_context=retrieved_document\n",
+    "        )\n",
+    "        for question, gt_answer, generated_answer, retrieved_document in zip(\n",
+    "            questions, gt_answers, generated_answers, retrieved_documents\n",
+    "        )\n",
+    "    ]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/evaluation/evalute_rag.py
+++ b/evaluation/evalute_rag.py
@@ -0,0 +1,124 @@
+"""
+RAG Evaluation Script
+
+This script evaluates the performance of a Retrieval-Augmented Generation (RAG) system
+using various metrics from the deepeval library.
+
+Dependencies:
+- deepeval
+- langchain_openai
+- json
+
+Custom modules:
+- helper_functions (for RAG-specific operations)
+"""
+
+import json
+from typing import List, Tuple
+
+from deepeval import evaluate
+from deepeval.metrics import GEval, FaithfulnessMetric, ContextualRelevancyMetric
+from deepeval.test_case import LLMTestCase, LLMTestCaseParams
+from langchain_openai import ChatOpenAI
+
+from helper_functions import (
+    create_question_answer_from_context_chain,
+    answer_question_from_context,
+    retrieve_context_per_question
+)
+
+def create_deep_eval_test_cases(
+    questions: List[str],
+    gt_answers: List[str],
+    generated_answers: List[str],
+    retrieved_documents: List[str]
+) -> List[LLMTestCase]:
+    """
+    Create a list of LLMTestCase objects for evaluation.
+
+    Args:
+        questions (List[str]): List of input questions.
+        gt_answers (List[str]): List of ground truth answers.
+        generated_answers (List[str]): List of generated answers.
+        retrieved_documents (List[str]): List of retrieved documents.
+
+    Returns:
+        List[LLMTestCase]: List of LLMTestCase objects.
+    """
+    return [
+        LLMTestCase(
+            input=question,
+            expected_output=gt_answer,
+            actual_output=generated_answer,
+            retrieval_context=retrieved_document
+        )
+        for question, gt_answer, generated_answer, retrieved_document in zip(
+            questions, gt_answers, generated_answers, retrieved_documents
+        )
+    ]
+
+# Define evaluation metrics
+correctness_metric = GEval(
+    name="Correctness",
+    model="gpt-4o",
+    evaluation_params=[
+        LLMTestCaseParams.EXPECTED_OUTPUT,
+        LLMTestCaseParams.ACTUAL_OUTPUT
+    ],
+    evaluation_steps=[
+        "Determine whether the actual output is factually correct based on the expected output."
+    ],
+)
+
+faithfulness_metric = FaithfulnessMetric(
+    threshold=0.7,
+    model="gpt-4",
+    include_reason=False
+)
+
+relevance_metric = ContextualRelevancyMetric(
+    threshold=1,
+    model="gpt-4",
+    include_reason=True
+)
+
+def evaluate_rag(chunks_query_retriever, num_questions: int = 5) -> None:
+    """
+    Evaluate the RAG system using predefined metrics.
+
+    Args:
+        chunks_query_retriever: Function to retrieve context chunks for a given query.
+        num_questions (int): Number of questions to evaluate (default: 5).
+    """
+    llm = ChatOpenAI(temperature=0, model_name="gpt-4o", max_tokens=2000)
+    question_answer_from_context_chain = create_question_answer_from_context_chain(llm)
+    
+    # Load questions and answers from JSON file
+    q_a_file_name = "../data/q_a.json"
+    with open(q_a_file_name, "r", encoding="utf-8") as json_file:
+        q_a = json.load(json_file)
+
+    questions = [qa["question"] for qa in q_a][:num_questions]
+    ground_truth_answers = [qa["answer"] for qa in q_a][:num_questions]
+    generated_answers = []
+    retrieved_documents = []
+
+    # Generate answers and retrieve documents for each question
+    for question in questions:
+        context = retrieve_context_per_question(question, chunks_query_retriever)
+        retrieved_documents.append(context)
+        context_string = " ".join(context)
+        result = answer_question_from_context(question, context_string, question_answer_from_context_chain)
+        generated_answers.append(result["answer"])
+
+    # Create test cases and evaluate
+    test_cases = create_deep_eval_test_cases(questions, ground_truth_answers, generated_answers, retrieved_documents)
+    evaluate(
+        test_cases=test_cases,
+        metrics=[correctness_metric, faithfulness_metric, relevance_metric]
+    )
+
+if __name__ == "__main__":
+    # Add any necessary setup or configuration here
+    # Example: evaluate_rag(your_chunks_query_retriever_function)
+    pass
--- a/helper_functions.py
+++ b/helper_functions.py
@@ -0,0 +1,156 @@
+from langchain.document_loaders import  PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain import PromptTemplate
+import fitz
+
+
+
+
+def replace_t_with_space(list_of_documents):
+    """
+    Replaces all tab characters ('\t') with spaces in the page content of each document.
+
+    Args:
+        list_of_documents: A list of document objects, each with a 'page_content' attribute.
+
+    Returns:
+        The modified list of documents with tab characters replaced by spaces.
+    """
+
+    for doc in list_of_documents:
+        doc.page_content = doc.page_content.replace('\t', ' ')  # Replace tabs with spaces
+    return list_of_documents
+
+
+
+
+def encode_pdf(path, chunk_size=1000, chunk_overlap=200):
+    """
+    Encodes a PDF book into a vector store using OpenAI embeddings.
+
+    Args:
+        path: The path to the PDF file.
+        chunk_size: The desired size of each text chunk.
+        chunk_overlap: The amount of overlap between consecutive chunks.
+
+    Returns:
+        A FAISS vector store containing the encoded book content.
+    """
+
+    # Load PDF documents
+    loader = PyPDFLoader(path)
+    documents = loader.load()
+
+    # Split documents into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len
+    )
+    texts = text_splitter.split_documents(documents)
+    cleaned_texts = replace_t_with_space(texts)
+
+    # Create embeddings and vector store
+    embeddings = OpenAIEmbeddings()
+    vectorstore = FAISS.from_documents(cleaned_texts, embeddings)
+
+    return vectorstore
+
+
+def retrieve_context_per_question(question, chunks_query_retriever):
+    """
+    Retrieves relevant context and unique URLs for a given question using the chunks query retriever.
+
+    Args:
+        question: The question for which to retrieve context and URLs.
+
+    Returns:
+        A tuple containing:
+        - A string with the concatenated content of relevant documents.
+        - A list of unique URLs from the metadata of the relevant documents.
+    """
+
+    # Retrieve relevant documents for the given question
+    docs = chunks_query_retriever.get_relevant_documents(question)
+
+    # Concatenate document content
+    # context = " ".join(doc.page_content for doc in docs)
+    context = [doc.page_content for doc in docs]
+
+    
+    return context
+
+class QuestionAnswerFromContext(BaseModel):
+    """
+    Model to generate an answer to a query based on a given context.
+    
+    Attributes:
+        answer_based_on_content (str): The generated answer based on the context.
+    """
+    answer_based_on_content: str = Field(description="Generates an answer to a query based on a given context.")
+
+def create_question_answer_from_context_chain(llm):
+
+    # Initialize the ChatOpenAI model with specific parameters
+    question_answer_from_context_llm = llm
+
+    # Define the prompt template for chain-of-thought reasoning
+    question_answer_prompt_template = """ 
+    For the question below, provide a concise but suffice answer based ONLY on the provided context:
+    {context}
+    Question
+    {question}
+    """
+
+    # Create a PromptTemplate object with the specified template and input variables
+    question_answer_from_context_prompt = PromptTemplate(
+        template=question_answer_prompt_template,
+        input_variables=["context", "question"],
+    )
+
+    # Create a chain by combining the prompt template and the language model
+    question_answer_from_context_cot_chain = question_answer_from_context_prompt | question_answer_from_context_llm.with_structured_output(QuestionAnswerFromContext)
+    return question_answer_from_context_cot_chain
+
+
+
+def answer_question_from_context(question, context, question_answer_from_context_chain):
+    """
+    Answer a question using the given context by invoking a chain of reasoning.
+
+    Args:
+        question: The question to be answered.
+        context: The context to be used for answering the question.
+
+    Returns:
+        A dictionary containing the answer, context, and question.
+    """
+    input_data = {
+        "question": question,
+        "context": context
+    }
+    print("Answering the question from the retrieved context...")
+
+    output = question_answer_from_context_chain.invoke(input_data)
+    answer = output.answer_based_on_content
+    return {"answer": answer, "context": context, "question": question}
+
+
+def show_context(context):
+    for i, c in enumerate(context):
+        print(f"Context {i+1}:")
+        print(c)
+        print("\n")
+
+def read_pdf_to_string(path):
+    # Open the PDF document located at the specified path
+    doc = fitz.open(path)
+    content = ""
+    # Iterate over each page in the document
+    for page_num in range(len(doc)):
+        # Get the current page
+        page = doc[page_num]
+        # Extract the text content from the current page and append it to the content string
+        content += page.get_text()
+    return content