mirror of
https://github.com/NirDiamant/RAG_Techniques.git
synced 2025-04-07 00:48:52 +03:00
Merge branch 'main' of https://github.com/NirDiamant/RAG_TECHNIQUES
This commit is contained in:
@@ -247,6 +247,9 @@
|
||||
" Returns:\n",
|
||||
" List[Document]: The top k documents based on the combined scores.\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" epsilon = 1e-8\n",
|
||||
"\n",
|
||||
" # Step 1: Get all documents from the vectorstore\n",
|
||||
" all_docs = vectorstore.similarity_search(\"\", k=vectorstore.index.ntotal)\n",
|
||||
"\n",
|
||||
@@ -258,9 +261,9 @@
|
||||
" \n",
|
||||
" # Step 4: Normalize scores\n",
|
||||
" vector_scores = np.array([score for _, score in vector_results])\n",
|
||||
" vector_scores = 1 - (vector_scores - np.min(vector_scores)) / (np.max(vector_scores) - np.min(vector_scores))\n",
|
||||
" vector_scores = 1 - (vector_scores - np.min(vector_scores)) / (np.max(vector_scores) - np.min(vector_scores) + epsilon)\n",
|
||||
"\n",
|
||||
" bm25_scores = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores))\n",
|
||||
" bm25_scores = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores) + epsilon)\n",
|
||||
"\n",
|
||||
" # Step 5: Combine scores\n",
|
||||
" combined_scores = alpha * vector_scores + (1 - alpha) * bm25_scores \n",
|
||||
|
||||
@@ -78,33 +78,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Users\\N7\\PycharmProjects\\llm_tasks\\RAG_TECHNIQUES\\.venv\\Lib\\site-packages\\deepeval\\__init__.py:45: UserWarning: You are using deepeval version 0.21.70, however version 0.21.73 is available. You should consider upgrading via the \"pip install --upgrade deepeval\" command.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks\n",
|
||||
"from helper_functions import *\n",
|
||||
"from evaluation.evalute_rag import *\n",
|
||||
"\n",
|
||||
"# Load environment variables from a .env file\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"# Set the OpenAI API key environment variable\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')"
|
||||
"# Set the OpenAI API key environment variable (comment out if not using OpenAI)\n",
|
||||
"if not os.getenv('OPENAI_API_KEY'):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = input(\"Please enter your OpenAI API key: \")\n",
|
||||
"else:\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks\n",
|
||||
"from helper_functions import *\n",
|
||||
"from evaluation.evalute_rag import *\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -132,7 +126,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -160,8 +154,11 @@
|
||||
" texts = text_splitter.split_documents(documents)\n",
|
||||
" cleaned_texts = replace_t_with_space(texts)\n",
|
||||
"\n",
|
||||
" # Create embeddings and vector store\n",
|
||||
" # Create embeddings (Tested with OpenAI and Amazon Bedrock)\n",
|
||||
" embeddings = get_langchain_embedding_provider(EmbeddingProvider.OPENAI)\n",
|
||||
" #embeddings = get_langchain_embedding_provider(EmbeddingProvider.AMAZON_BEDROCK)\n",
|
||||
"\n",
|
||||
" # Create vector store\n",
|
||||
" vectorstore = FAISS.from_documents(cleaned_texts, embeddings)\n",
|
||||
"\n",
|
||||
" return vectorstore"
|
||||
@@ -271,6 +268,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Note - this currently works with OPENAI only\n",
|
||||
"evaluate_rag(chunks_query_retriever)"
|
||||
]
|
||||
}
|
||||
@@ -291,7 +289,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.0"
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user