This commit is contained in:
nird
2025-01-02 20:51:21 +02:00
2 changed files with 23 additions and 22 deletions

View File

@@ -247,6 +247,9 @@
" Returns:\n",
" List[Document]: The top k documents based on the combined scores.\n",
" \"\"\"\n",
" \n",
" epsilon = 1e-8\n",
"\n",
" # Step 1: Get all documents from the vectorstore\n",
" all_docs = vectorstore.similarity_search(\"\", k=vectorstore.index.ntotal)\n",
"\n",
@@ -258,9 +261,9 @@
" \n",
" # Step 4: Normalize scores\n",
" vector_scores = np.array([score for _, score in vector_results])\n",
" vector_scores = 1 - (vector_scores - np.min(vector_scores)) / (np.max(vector_scores) - np.min(vector_scores))\n",
" vector_scores = 1 - (vector_scores - np.min(vector_scores)) / (np.max(vector_scores) - np.min(vector_scores) + epsilon)\n",
"\n",
" bm25_scores = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores))\n",
" bm25_scores = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores) + epsilon)\n",
"\n",
" # Step 5: Combine scores\n",
" combined_scores = alpha * vector_scores + (1 - alpha) * bm25_scores \n",

View File

@@ -78,33 +78,27 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\N7\\PycharmProjects\\llm_tasks\\RAG_TECHNIQUES\\.venv\\Lib\\site-packages\\deepeval\\__init__.py:45: UserWarning: You are using deepeval version 0.21.70, however version 0.21.73 is available. You should consider upgrading via the \"pip install --upgrade deepeval\" command.\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"from dotenv import load_dotenv\n",
"\n",
"\n",
"sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks\n",
"from helper_functions import *\n",
"from evaluation.evalute_rag import *\n",
"\n",
"# Load environment variables from a .env file\n",
"load_dotenv()\n",
"\n",
"# Set the OpenAI API key environment variable\n",
"os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')"
"# Set the OpenAI API key environment variable (comment out if not using OpenAI)\n",
"if not os.getenv('OPENAI_API_KEY'):\n",
" os.environ[\"OPENAI_API_KEY\"] = input(\"Please enter your OpenAI API key: \")\n",
"else:\n",
" os.environ[\"OPENAI_API_KEY\"] = os.getenv('OPENAI_API_KEY')\n",
"\n",
"sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks\n",
"from helper_functions import *\n",
"from evaluation.evalute_rag import *\n"
]
},
{
@@ -132,7 +126,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -160,8 +154,11 @@
" texts = text_splitter.split_documents(documents)\n",
" cleaned_texts = replace_t_with_space(texts)\n",
"\n",
" # Create embeddings and vector store\n",
" # Create embeddings (Tested with OpenAI and Amazon Bedrock)\n",
" embeddings = get_langchain_embedding_provider(EmbeddingProvider.OPENAI)\n",
" #embeddings = get_langchain_embedding_provider(EmbeddingProvider.AMAZON_BEDROCK)\n",
"\n",
" # Create vector store\n",
" vectorstore = FAISS.from_documents(cleaned_texts, embeddings)\n",
"\n",
" return vectorstore"
@@ -271,6 +268,7 @@
"metadata": {},
"outputs": [],
"source": [
"#Note - this currently works with OPENAI only\n",
"evaluate_rag(chunks_query_retriever)"
]
}
@@ -291,7 +289,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
"version": "3.12.3"
}
},
"nbformat": 4,