Merge branch 'main' of https://github.com/NirDiamant/RAG_TECHNIQUES

2025-04-07 00:48:52 +03:00 · 2025-01-02 20:51:21 +02:00
parent 59727e8893 9312170e6a
commit 885c1171f9
2 changed files with 23 additions and 22 deletions
--- a/all_rag_techniques/fusion_retrieval.ipynb
+++ b/all_rag_techniques/fusion_retrieval.ipynb
@@ -247,6 +247,9 @@
    "    Returns:\n",
    "    List[Document]: The top k documents based on the combined scores.\n",
    "    \"\"\"\n",
+    "    \n",
+    "    epsilon = 1e-8\n",
+    "\n",
    "    # Step 1: Get all documents from the vectorstore\n",
    "    all_docs = vectorstore.similarity_search(\"\", k=vectorstore.index.ntotal)\n",
    "\n",
@@ -258,9 +261,9 @@
    "    \n",
    "    # Step 4: Normalize scores\n",
    "    vector_scores = np.array([score for _, score in vector_results])\n",
-    "    vector_scores = 1 - (vector_scores - np.min(vector_scores)) / (np.max(vector_scores) - np.min(vector_scores))\n",
+    "    vector_scores = 1 - (vector_scores - np.min(vector_scores)) / (np.max(vector_scores) - np.min(vector_scores) + epsilon)\n",
    "\n",
-    "    bm25_scores = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores))\n",
+    "    bm25_scores = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) -  np.min(bm25_scores) + epsilon)\n",
    "\n",
    "    # Step 5: Combine scores\n",
    "    combined_scores = alpha * vector_scores + (1 - alpha) * bm25_scores  \n",
--- a/all_rag_techniques/simple_rag.ipynb
+++ b/all_rag_techniques/simple_rag.ipynb
@@ -78,33 +78,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\N7\\PycharmProjects\\llm_tasks\\RAG_TECHNIQUES\\.venv\\Lib\\site-packages\\deepeval\\__init__.py:45: UserWarning: You are using deepeval version 0.21.70, however version 0.21.73 is available. You should consider upgrading via the \"pip install --upgrade deepeval\" command.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "\n",
-    "sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks\n",
-    "from helper_functions import *\n",
-    "from evaluation.evalute_rag import *\n",
-    "\n",
    "# Load environment variables from a .env file\n",
    "load_dotenv()\n",
    "\n",
-    "# Set the OpenAI API key environment variable\n",
-    "os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')"
+    "# Set the OpenAI API key environment variable (comment out if not using OpenAI)\n",
+    "if not os.getenv('OPENAI_API_KEY'):\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = input(\"Please enter your OpenAI API key: \")\n",
+    "else:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')\n",
+    "\n",
+    "sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks\n",
+    "from helper_functions import *\n",
+    "from evaluation.evalute_rag import *\n"
   ]
  },
  {
@@ -132,7 +126,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -160,8 +154,11 @@
    "    texts = text_splitter.split_documents(documents)\n",
    "    cleaned_texts = replace_t_with_space(texts)\n",
    "\n",
-    "    # Create embeddings and vector store\n",
+    "    # Create embeddings (Tested with OpenAI and Amazon Bedrock)\n",
    "    embeddings = get_langchain_embedding_provider(EmbeddingProvider.OPENAI)\n",
+    "    #embeddings = get_langchain_embedding_provider(EmbeddingProvider.AMAZON_BEDROCK)\n",
+    "\n",
+    "    # Create vector store\n",
    "    vectorstore = FAISS.from_documents(cleaned_texts, embeddings)\n",
    "\n",
    "    return vectorstore"
@@ -271,6 +268,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "#Note - this currently works with OPENAI only\n",
    "evaluate_rag(chunks_query_retriever)"
   ]
  }
@@ -291,7 +289,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.0"
+   "version": "3.12.3"
  }
 },
 "nbformat": 4,