files restructured

This commit is contained in:
Karthik Soman
2023-11-21 16:11:29 -08:00
parent 81405f83ae
commit bff4c0941a
93 changed files with 26 additions and 5 deletions

View File

@@ -1,10 +1,10 @@
import yaml
import os
with open('../config.yaml', 'r') as f:
with open('config.yaml', 'r') as f:
config_data = yaml.safe_load(f)
with open('../system_prompts.yaml', 'r') as f:
with open('system_prompts.yaml', 'r') as f:
system_prompts = yaml.safe_load(f)
if 'GPT_CONFIG_FILE' in config_data:

14
codes/run_setup.py Normal file
View File

@@ -0,0 +1,14 @@
import os
from kg_rag.codes.utility import config_data
print("")
print("Starting to set up KG-RAG ...")
print("")
if os.path.exists(config_data["VECTOR_DB_PATH"]):
print("vectorDB already exists!")
else:
print("Creating vectorDB ...")
from kg_rag.codes.py_scripts.vectorDB.create_vectordb import create_vectordb
create_vectordb()

View File

@@ -12,7 +12,7 @@ from langchain.vectorstores import Chroma
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextStreamer, GPTQConfig
from py_scripts.config_loader import *
from kg_rag.codes.config_loader import *

View File

@@ -1,5 +1,5 @@
import pickle
from py_scripts.utility import RecursiveCharacterTextSplitter, Chroma, SentenceTransformerEmbeddings, config_data, time
from kg_rag.codes.utility import RecursiveCharacterTextSplitter, Chroma, SentenceTransformerEmbeddings, config_data, time
DATA_PATH = config_data["VECTOR_DB_DISEASE_ENTITY_PATH"]

View File

@@ -10,4 +10,11 @@ VECTOR_DB_SENTENCE_EMBEDDING_MODEL : 'sentence-transformers/all-MiniLM-L6-v2'
# Just note that, this assumes your GPT config file is in the $HOME path, if not, change it accordingly
# Also, GPT '.env' file should contain values for API_KEY, API_VERSION, RESOURCE_ENDPOINT. We are not including those parameters in this yaml file
GPT_CONFIG_FILE : '$HOME/.gpt_config.env'
GPT_API_TYPE : 'azure'
GPT_API_TYPE : 'azure'
# Llama model name (Refer Hugging face to get the correct name for the model version you would like to use, also make sure you have the right permission to use the model)
LLAMA_MODEL_NAME : 'meta-llama/Llama-2-13b-chat-hf'
LLAMA_MODEL_BRANCH : 'main'
# Path for caching LLM model files (When the model gets downloaded from hugging face, it will be saved in this path)
LLM_CACHE_DIR : '/data/somank/llm_data/llm_models/huggingface'