improve entity content formatting

This commit is contained in:
yashshah035
2025-03-12 08:03:08 +05:30
parent 49fd327b5e
commit bce9c5a8cb
3 changed files with 17 additions and 3 deletions

2
.gitignore vendored
View File

@@ -24,3 +24,5 @@ examples/output/
.DS_Store
#Remove config.ini from repo
*.ini
build/
minirag-venv/

View File

@@ -10,3 +10,4 @@ tiktoken
torch
tqdm
uvicorn
json_repair

View File

@@ -370,6 +370,16 @@ async def extract_entities(
for dp in all_entities_data
}
await entity_vdb.upsert(data_for_vdb)
if entity_vdb is not None:
data_for_vdb = {
compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
"content": dp["entity_name"] + " " + dp["description"],
"entity_name": dp["entity_name"],
}
for dp in all_entities_data
}
await entity_vdb.upsert(data_for_vdb)
if entity_name_vdb is not None:
data_for_vdb = {
compute_mdhash_id(dp["entity_name"], prefix="Ename-"): {
@@ -386,12 +396,13 @@ async def extract_entities(
"src_id": dp["src_id"],
"tgt_id": dp["tgt_id"],
"content": dp["keywords"]
+ dp["src_id"]
+ dp["tgt_id"]
+ dp["description"],
+ " " + dp["src_id"]
+ " " + dp["tgt_id"]
+ " " + dp["description"],
}
for dp in all_relationships_data
}
await relationships_vdb.upsert(data_for_vdb)
return knowledge_graph_inst