files restructured

2024-06-08 14:12:54 +03:00 · 2023-11-21 16:11:29 -08:00
parent 81405f83ae
commit bff4c0941a
93 changed files with 26 additions and 5 deletions
--- a/codes/py_scripts/init.py
+++ b/codes/py_scripts/init.py
--- a/codes/py_scripts/config_loader.py
+++ b/codes/py_scripts/config_loader.py
@@ -1,10 +1,10 @@
 import yaml
 import os

-with open('../config.yaml', 'r') as f:
+with open('config.yaml', 'r') as f:
    config_data = yaml.safe_load(f)
        
-with open('../system_prompts.yaml', 'r') as f:
+with open('system_prompts.yaml', 'r') as f:
    system_prompts = yaml.safe_load(f)
    
 if 'GPT_CONFIG_FILE' in config_data:
--- a/codes/py_scripts/prompt_based_text_generation/BioGPT/text_generation_BioGPT.py
+++ b/codes/py_scripts/prompt_based_text_generation/BioGPT/text_generation_BioGPT.py
--- a/codes/py_scripts/prompt_based_text_generation/BioGPT/true_false_generation_BioGPT_HuggingFace.py
+++ b/codes/py_scripts/prompt_based_text_generation/BioGPT/true_false_generation_BioGPT_HuggingFace.py
--- a/codes/py_scripts/prompt_based_text_generation/GPT/pycache/gpt_utility.cpython-310.pyc
+++ b/codes/py_scripts/prompt_based_text_generation/GPT/pycache/gpt_utility.cpython-310.pyc
--- a/codes/py_scripts/prompt_based_text_generation/GPT/gpt_utility.py
+++ b/codes/py_scripts/prompt_based_text_generation/GPT/gpt_utility.py
--- a/codes/py_scripts/prompt_based_text_generation/GPT/mcq_qa_GPT_openai.py
+++ b/codes/py_scripts/prompt_based_text_generation/GPT/mcq_qa_GPT_openai.py
--- a/codes/py_scripts/prompt_based_text_generation/GPT/true_false_generation_GPT_openai.py
+++ b/codes/py_scripts/prompt_based_text_generation/GPT/true_false_generation_GPT_openai.py
--- a/codes/py_scripts/prompt_based_text_generation/Galactica/text_generation_Galactica.py
+++ b/codes/py_scripts/prompt_based_text_generation/Galactica/text_generation_Galactica.py
--- a/codes/py_scripts/prompt_based_text_generation/Galactica/true_false_generation_Galactica_HuggingFace.py
+++ b/codes/py_scripts/prompt_based_text_generation/Galactica/true_false_generation_Galactica_HuggingFace.py
--- a/codes/py_scripts/prompt_based_text_generation/Llama/mcq_qa_Llama_HuggingFace.py
+++ b/codes/py_scripts/prompt_based_text_generation/Llama/mcq_qa_Llama_HuggingFace.py
--- a/codes/py_scripts/prompt_based_text_generation/Llama/text_generation_LlamaCpp.py
+++ b/codes/py_scripts/prompt_based_text_generation/Llama/text_generation_LlamaCpp.py
--- a/codes/py_scripts/prompt_based_text_generation/Llama/text_generation_Llama_HuggingFace.py
+++ b/codes/py_scripts/prompt_based_text_generation/Llama/text_generation_Llama_HuggingFace.py
--- a/codes/py_scripts/prompt_based_text_generation/Llama/text_generation_Llama_base_model.py
+++ b/codes/py_scripts/prompt_based_text_generation/Llama/text_generation_Llama_base_model.py
--- a/codes/py_scripts/prompt_based_text_generation/Llama/true_false_generation_Llama_HuggingFace.py
+++ b/codes/py_scripts/prompt_based_text_generation/Llama/true_false_generation_Llama_HuggingFace.py
--- a/codes/py_scripts/prompt_based_text_generation/Llama/true_false_generation_Llama_HuggingFace_iteration.py
+++ b/codes/py_scripts/prompt_based_text_generation/Llama/true_false_generation_Llama_HuggingFace_iteration.py
--- a/codes/py_scripts/prompt_based_text_generation/PubMedBERT/text_generation_PubMedBERT.py
+++ b/codes/py_scripts/prompt_based_text_generation/PubMedBERT/text_generation_PubMedBERT.py
--- a/codes/py_scripts/pycache/init.cpython-310.pyc
+++ b/codes/py_scripts/pycache/init.cpython-310.pyc
--- a/codes/py_scripts/pycache/config_loader.cpython-310.pyc
+++ b/codes/py_scripts/pycache/config_loader.cpython-310.pyc
--- a/codes/py_scripts/rag_based_text_generation/GPT/archives/drug_repurposing_archive.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/archives/drug_repurposing_archive.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/archives/graph_traversal.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/archives/graph_traversal.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/archives/mcq_qa_GPT_openai_archive.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/archives/mcq_qa_GPT_openai_archive.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/archives/mcq_qa_GPT_openai_archive_2.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/archives/mcq_qa_GPT_openai_archive_2.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/archives/one_hop_traversal_hyperparameter_tuning_archive.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/archives/one_hop_traversal_hyperparameter_tuning_archive.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/archives/two_hop_traversal_hyperparameter_tuning_archive.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/archives/two_hop_traversal_hyperparameter_tuning_archive.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/drug_repurposing.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/drug_repurposing.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/mcq_qa_GPT_openai.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/mcq_qa_GPT_openai.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/one_hop_traversal_hyperparameter_tuning.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/one_hop_traversal_hyperparameter_tuning.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/text_generation_GPT_openai.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/text_generation_GPT_openai.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/true_false_generation_GPT_openai.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/true_false_generation_GPT_openai.py
--- a/codes/py_scripts/rag_based_text_generation/GPT/two_hop_traversal_hyperparameter_tuning.py
+++ b/codes/py_scripts/rag_based_text_generation/GPT/two_hop_traversal_hyperparameter_tuning.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/archives/mcq_qa_Llama_HuggingFace_archive.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/archives/mcq_qa_Llama_HuggingFace_archive.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/archives/mcq_qa_Llama_HuggingFace_archive_2.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/archives/mcq_qa_Llama_HuggingFace_archive_2.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/drug_repurposing.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/drug_repurposing.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/mcq_qa_Llama_HuggingFace.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/mcq_qa_Llama_HuggingFace.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/text_generation_Llama_HuggingFace.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/text_generation_Llama_HuggingFace.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/true_false_generation_Llama_HuggingFace.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/true_false_generation_Llama_HuggingFace.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/true_false_generation_Llama_HuggingFace_archive.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/node_retrieval_based_rag/true_false_generation_Llama_HuggingFace_archive.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/rag_using_RetrievalQA.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/rag_using_RetrievalQA.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_HuggingFace.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_HuggingFace.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_LlamaCpp_GPU.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_LlamaCpp_GPU.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_Ollama_and_LlamaCpp.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_Ollama_and_LlamaCpp.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_for_true_false_generation_Llama_HuggingFace.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/rag_with_retrieval_threshold_for_true_false_generation_Llama_HuggingFace.py
--- a/codes/py_scripts/rag_based_text_generation/Llama/test_huggingface_with_langchain.py
+++ b/codes/py_scripts/rag_based_text_generation/Llama/test_huggingface_with_langchain.py
--- a/codes/py_scripts/retrieval_analysis/check_true_false_questions_have_spoke_context.py
+++ b/codes/py_scripts/retrieval_analysis/check_true_false_questions_have_spoke_context.py
--- a/codes/py_scripts/retrieval_analysis/estimation_of_retrieval_score_for_sentence_embedding_model.py
+++ b/codes/py_scripts/retrieval_analysis/estimation_of_retrieval_score_for_sentence_embedding_model.py
--- a/codes/run_setup.py
+++ b/codes/run_setup.py
@@ -0,0 +1,14 @@
+import os
+from kg_rag.codes.utility import config_data
+
+
+print("")
+print("Starting to set up KG-RAG ...")
+print("")
+
+if os.path.exists(config_data["VECTOR_DB_PATH"]):
+    print("vectorDB already exists!")
+else:
+    print("Creating vectorDB ...")
+    from kg_rag.codes.py_scripts.vectorDB.create_vectordb import create_vectordb
+    create_vectordb()
--- a/codes/py_scripts/utility.py
+++ b/codes/py_scripts/utility.py
@@ -12,7 +12,7 @@ from langchain.vectorstores import Chroma
 from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextStreamer, GPTQConfig
-from py_scripts.config_loader import *
+from kg_rag.codes.config_loader import *



--- a/codes/py_scripts/vectorDB/init.py
+++ b/codes/py_scripts/vectorDB/init.py
--- a/codes/py_scripts/vectorDB/create_vectordb.py
+++ b/codes/py_scripts/vectorDB/create_vectordb.py
@@ -1,5 +1,5 @@
 import pickle
-from py_scripts.utility import RecursiveCharacterTextSplitter, Chroma, SentenceTransformerEmbeddings, config_data, time
+from kg_rag.codes.utility import RecursiveCharacterTextSplitter, Chroma, SentenceTransformerEmbeddings, config_data, time


 DATA_PATH = config_data["VECTOR_DB_DISEASE_ENTITY_PATH"]
--- a/config.yaml
+++ b/config.yaml
@@ -10,4 +10,11 @@ VECTOR_DB_SENTENCE_EMBEDDING_MODEL : 'sentence-transformers/all-MiniLM-L6-v2'
 # Just note that, this assumes your GPT config file is in the $HOME path, if not, change it accordingly
 # Also, GPT '.env' file should contain values for API_KEY, API_VERSION, RESOURCE_ENDPOINT. We are not including those parameters in this yaml file
 GPT_CONFIG_FILE : '$HOME/.gpt_config.env'
-GPT_API_TYPE : 'azure'
+GPT_API_TYPE : 'azure'
+
+# Llama model name (Refer Hugging face to get the correct name for the model version you would like to use, also make sure you have the right permission to use the model)
+LLAMA_MODEL_NAME : 'meta-llama/Llama-2-13b-chat-hf'
+LLAMA_MODEL_BRANCH : 'main'
+
+# Path for caching LLM model files (When the model gets downloaded from hugging face, it will be saved in this path)
+LLM_CACHE_DIR : '/data/somank/llm_data/llm_models/huggingface'
--- a/codes/notebooks/GPT_models/chat_completion.ipynb
+++ b/codes/notebooks/GPT_models/chat_completion.ipynb
--- a/codes/notebooks/GPT_models/graph_traversal.ipynb
+++ b/codes/notebooks/GPT_models/graph_traversal.ipynb
--- a/codes/notebooks/GPT_models/run_file_on_chat_completion.ipynb
+++ b/codes/notebooks/GPT_models/run_file_on_chat_completion.ipynb
--- a/codes/notebooks/analysis/drug_repurposing_analysis.ipynb
+++ b/codes/notebooks/analysis/drug_repurposing_analysis.ipynb
--- a/codes/notebooks/analysis/drug_repurposing_analysis_archive.ipynb
+++ b/codes/notebooks/analysis/drug_repurposing_analysis_archive.ipynb
--- a/codes/notebooks/analysis/drug_repurposing_analysis_v2.ipynb
+++ b/codes/notebooks/analysis/drug_repurposing_analysis_v2.ipynb
--- a/codes/notebooks/analysis/galactica_response_manual_analysis.ipynb
+++ b/codes/notebooks/analysis/galactica_response_manual_analysis.ipynb
--- a/codes/notebooks/analysis/hyper_param_tuning_drug_repurpose_data_archive.ipynb
+++ b/codes/notebooks/analysis/hyper_param_tuning_drug_repurpose_data_archive.ipynb
--- a/codes/notebooks/analysis/hyper_param_tuning_for_two_hop_traversal.ipynb
+++ b/codes/notebooks/analysis/hyper_param_tuning_for_two_hop_traversal.ipynb
--- a/codes/notebooks/analysis/hyper_param_tuning_for_two_hop_traversal_archive.ipynb
+++ b/codes/notebooks/analysis/hyper_param_tuning_for_two_hop_traversal_archive.ipynb
--- a/codes/notebooks/analysis/hyper_param_tuning_for_two_hop_traversal_comparative_analysis.ipynb
+++ b/codes/notebooks/analysis/hyper_param_tuning_for_two_hop_traversal_comparative_analysis.ipynb
--- a/codes/notebooks/analysis/hyper_param_tuning_one_hop_traversal.ipynb
+++ b/codes/notebooks/analysis/hyper_param_tuning_one_hop_traversal.ipynb
--- a/codes/notebooks/analysis/hyper_param_tuning_one_hop_traversal_comparative_analysis.ipynb
+++ b/codes/notebooks/analysis/hyper_param_tuning_one_hop_traversal_comparative_analysis.ipynb
--- a/codes/notebooks/analysis/test_question_baseline_distribution.ipynb
+++ b/codes/notebooks/analysis/test_question_baseline_distribution.ipynb
--- a/codes/notebooks/analysis/test_question_response_analysis_galactica.ipynb
+++ b/codes/notebooks/analysis/test_question_response_analysis_galactica.ipynb
--- a/codes/notebooks/analysis/test_question_response_analysis_llama.ipynb
+++ b/codes/notebooks/analysis/test_question_response_analysis_llama.ipynb
--- a/codes/notebooks/analysis/test_question_response_comparative_analysis_llama.ipynb
+++ b/codes/notebooks/analysis/test_question_response_comparative_analysis_llama.ipynb
--- a/codes/notebooks/analysis/test_questions_binary_response_analysys_llama.ipynb
+++ b/codes/notebooks/analysis/test_questions_binary_response_analysys_llama.ipynb
--- a/codes/notebooks/analysis/test_questions_binary_response_comparative_analysys.ipynb
+++ b/codes/notebooks/analysis/test_questions_binary_response_comparative_analysys.ipynb
--- a/codes/notebooks/analysis/test_questions_mcq_response_comparative_analysys_llama.ipynb
+++ b/codes/notebooks/analysis/test_questions_mcq_response_comparative_analysys_llama.ipynb
--- a/codes/notebooks/benchmark_datasets/bioasq.ipynb
+++ b/codes/notebooks/benchmark_datasets/bioasq.ipynb
--- a/codes/notebooks/benchmark_datasets/checking_the_spoke_mapping_of_true_false_questions.ipynb
+++ b/codes/notebooks/benchmark_datasets/checking_the_spoke_mapping_of_true_false_questions.ipynb
--- a/codes/notebooks/benchmark_datasets/combine_monarch_robokop.ipynb
+++ b/codes/notebooks/benchmark_datasets/combine_monarch_robokop.ipynb
--- a/codes/notebooks/benchmark_datasets/create_associations_from_mondo_for_test_questions.ipynb
+++ b/codes/notebooks/benchmark_datasets/create_associations_from_mondo_for_test_questions.ipynb
--- a/codes/notebooks/benchmark_datasets/create_questions_from_disgnet.ipynb
+++ b/codes/notebooks/benchmark_datasets/create_questions_from_disgnet.ipynb
--- a/codes/notebooks/benchmark_datasets/create_test_questions.ipynb
+++ b/codes/notebooks/benchmark_datasets/create_test_questions.ipynb
--- a/codes/notebooks/benchmark_datasets/disgenet.ipynb
+++ b/codes/notebooks/benchmark_datasets/disgenet.ipynb
--- a/codes/notebooks/benchmark_datasets/disgenet_api.ipynb
+++ b/codes/notebooks/benchmark_datasets/disgenet_api.ipynb
--- a/codes/notebooks/benchmark_datasets/disgenet_sqlite.ipynb
+++ b/codes/notebooks/benchmark_datasets/disgenet_sqlite.ipynb
--- a/codes/notebooks/benchmark_datasets/graph_traversal/one_hop_data.ipynb
+++ b/codes/notebooks/benchmark_datasets/graph_traversal/one_hop_data.ipynb
--- a/codes/notebooks/benchmark_datasets/graph_traversal/two_hop_data.ipynb
+++ b/codes/notebooks/benchmark_datasets/graph_traversal/two_hop_data.ipynb
--- a/codes/notebooks/benchmark_datasets/medhop.ipynb
+++ b/codes/notebooks/benchmark_datasets/medhop.ipynb
--- a/codes/notebooks/benchmark_datasets/medmcqa.ipynb
+++ b/codes/notebooks/benchmark_datasets/medmcqa.ipynb
--- a/codes/notebooks/benchmark_datasets/monarch_api.ipynb
+++ b/codes/notebooks/benchmark_datasets/monarch_api.ipynb
--- a/codes/notebooks/benchmark_datasets/mondo.ipynb
+++ b/codes/notebooks/benchmark_datasets/mondo.ipynb
--- a/codes/notebooks/benchmark_datasets/ordo.ipynb
+++ b/codes/notebooks/benchmark_datasets/ordo.ipynb
--- a/codes/notebooks/benchmark_datasets/robokop.ipynb
+++ b/codes/notebooks/benchmark_datasets/robokop.ipynb
--- a/codes/notebooks/benchmark_datasets/semmedDB_TREATS_data.ipynb
+++ b/codes/notebooks/benchmark_datasets/semmedDB_TREATS_data.ipynb
--- a/codes/notebooks/benchmark_datasets/semmedDB_disease_disease_data.ipynb
+++ b/codes/notebooks/benchmark_datasets/semmedDB_disease_disease_data.ipynb
--- a/codes/notebooks/benchmark_datasets/two_hop_mcq_disgenet_sqlite.ipynb
+++ b/codes/notebooks/benchmark_datasets/two_hop_mcq_disgenet_sqlite.ipynb
--- a/codes/notebooks/benchmark_datasets/two_hop_questions_true_false.ipynb
+++ b/codes/notebooks/benchmark_datasets/two_hop_questions_true_false.ipynb
--- a/codes/notebooks/benchmark_datasets/utility.py
+++ b/codes/notebooks/benchmark_datasets/utility.py