Merge branch 'main' of https://github.com/hhy-huang/HiRAG

2025-09-16 23:52:00 +03:00 · 2025-03-16 19:49:40 +08:00
parent aae6377500 0335aec6bd
commit 0e394d8ae8
6 changed files with 28 additions and 7 deletions
--- a/hi_Search_deepseek.py
+++ b/hi_Search_deepseek.py
@@ -99,7 +99,8 @@ graph_func = HiRAG(
    embedding_func_max_async=config['hirag']['embedding_func_max_async'],
    enable_naive_rag=config['hirag']['enable_naive_rag'])

-with open("./web3_test/txtWhitePapers/aave-v2-whitepaper.pdf.txt") as f:
+# comment this if the working directory has already been indexed
+with open("your .txt file path") as f:
    graph_func.insert(f.read())

 print("Perform hi search:")
--- a/hi_Search_glm.py
+++ b/hi_Search_glm.py
@@ -99,7 +99,8 @@ graph_func = HiRAG(
    embedding_func_max_async=config['hirag']['embedding_func_max_async'],
    enable_naive_rag=config['hirag']['enable_naive_rag'])

-with open("./web3_test/txtWhitePapers/aave-v2-whitepaper.pdf.txt") as f:
+# comment this if the working directory has already been indexed
+with open("your .txt file path") as f:
    graph_func.insert(f.read())

 print("Perform hi search:")
--- a/hi_Search_openai.py
+++ b/hi_Search_openai.py
@@ -97,7 +97,7 @@ async def OPENAI_model_if_cache(

 graph_func = HiRAG(working_dir=config['hirag']['working_dir'],
                      enable_llm_cache=config['hirag']['enable_llm_cache'],
-                      embedding_func=GLM_embedding,
+                      embedding_func=OPENAI_embedding,
                      best_model_func=OPENAI_model_if_cache,
                      cheap_model_func=OPENAI_model_if_cache,
                      enable_hierachical_mode=config['hirag']['enable_hierachical_mode'], 
@@ -105,9 +105,10 @@ graph_func = HiRAG(working_dir=config['hirag']['working_dir'],
                      embedding_func_max_async=config['hirag']['embedding_func_max_async'],
                      enable_naive_rag=config['hirag']['enable_naive_rag'])

-# with open("./web3_test/txtWhitePapers/aave-v2-whitepaper.pdf.txt") as f:
-#     graph_func.insert(f.read())
+# comment this if the working directory has already been indexed
+with open("your .txt file path") as f:
+    graph_func.insert(f.read())


 print("Perform hi search:")
-print(graph_func.query("Please introduce Amazon.", param=QueryParam(mode="hi")))
+print(graph_func.query("What are the top themes in this story?", param=QueryParam(mode="hi")))
--- a/hirag/init.py
+++ b/hirag/init.py
@@ -1 +1,5 @@
 from .hirag import HiRAG, QueryParam
+
+__version__ = "0.1.0"
+__author__ = "Haoyu Huang"
+__url__ = "https://github.com/hhy-huang/HiRAG"
--- a/hirag/_op.py
+++ b/hirag/_op.py
@@ -1410,6 +1410,19 @@ async def _build_hierarchical_query_context(
    for i, t in enumerate(use_text_units):
        text_units_section_list.append([i, t["content"]])
    text_units_context = list_of_list_to_csv(text_units_section_list)
+
+    # display reference info
+    entities = [n["entity_name"] for n in node_datas]
+    communities = [(c["level"], c["title"]) for c in use_communities]
+    chunks = [(t["full_doc_id"], t["chunk_order_index"]) for t in use_text_units]
+
+    references_context = (
+        f"Entities ({len(entities)}): {entities}\n\n"
+        f"Communities (level, cluster_id) ({len(communities)}): {communities}\n\n"
+        f"Chunks (doc_id, chunk_index) ({len(chunks)}): {chunks}\n"
+    )
+
+    logging.info(f"====== References ======:\n{references_context}")
    return f"""
 -----Backgrounds-----
 ```csv
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,3 +21,4 @@ tqdm==4.66.5
 transformers==4.47.1
 umap_learn==0.5.6
 xxhash==3.5.0
+future==1.0.0