From 36c7b5fd367a5bc633cfdd06b83516cab6b74413 Mon Sep 17 00:00:00 2001 From: yashshah035 Date: Tue, 25 Feb 2025 11:00:51 +0530 Subject: [PATCH] adding graph visual examples --- graph-visuals/graph_with_html.py | 34 ++++++++ graph-visuals/graph_with_neo4j.py | 126 ++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 graph-visuals/graph_with_html.py create mode 100644 graph-visuals/graph_with_neo4j.py diff --git a/graph-visuals/graph_with_html.py b/graph-visuals/graph_with_html.py new file mode 100644 index 0000000..dc9b87a --- /dev/null +++ b/graph-visuals/graph_with_html.py @@ -0,0 +1,34 @@ +import pipmaster as pm + +if not pm.is_installed("pyvis"): + pm.install("pyvis") +if not pm.is_installed("networkx"): + pm.install("networkx") + +import networkx as nx +from pyvis.network import Network +import random + +# Load the GraphML file +G = nx.read_graphml("./LiHua-World/graph_chunk_entity_relation.graphml") + +# Create a Pyvis network +net = Network(height="100vh", notebook=True) + +# Convert NetworkX graph to Pyvis network +net.from_nx(G) + + +# Add colors and title to nodes +for node in net.nodes: + node["color"] = "#{:06x}".format(random.randint(0, 0xFFFFFF)) + if "description" in node: + node["title"] = node["description"] + +# Add title to edges +for edge in net.edges: + if "description" in edge: + edge["title"] = edge["description"] + +# Save and display the network +net.show("knowledge_graph.html") diff --git a/graph-visuals/graph_with_neo4j.py b/graph-visuals/graph_with_neo4j.py new file mode 100644 index 0000000..e7441d1 --- /dev/null +++ b/graph-visuals/graph_with_neo4j.py @@ -0,0 +1,126 @@ +import os +import json +from minirag.utils import xml_to_json +from neo4j import GraphDatabase + +# Constants +WORKING_DIR = "./LiHua-World" +BATCH_SIZE_NODES = 500 +BATCH_SIZE_EDGES = 100 + +# Neo4j connection credentials +NEO4J_URI = "bolt://localhost:7687" +NEO4J_USERNAME = "neo4j" +NEO4J_PASSWORD = "your_password" + + +def convert_xml_to_json(xml_path, output_path): + """Converts XML file to JSON and saves the output.""" + if not os.path.exists(xml_path): + print(f"Error: File not found - {xml_path}") + return None + + json_data = xml_to_json(xml_path) + if json_data: + with open(output_path, "w", encoding="utf-8") as f: + json.dump(json_data, f, ensure_ascii=False, indent=2) + print(f"JSON file created: {output_path}") + return json_data + else: + print("Failed to create JSON data") + return None + + +def process_in_batches(tx, query, data, batch_size): + """Process data in batches and execute the given query.""" + for i in range(0, len(data), batch_size): + batch = data[i : i + batch_size] + tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch}) + + +def main(): + # Paths + xml_file = os.path.join(WORKING_DIR, "graph_chunk_entity_relation.graphml") + json_file = os.path.join(WORKING_DIR, "graph_data.json") + + # Convert XML to JSON + json_data = convert_xml_to_json(xml_file, json_file) + if json_data is None: + return + + # Load nodes and edges + nodes = json_data.get("nodes", []) + edges = json_data.get("edges", []) + + # Neo4j queries + create_nodes_query = """ + UNWIND $nodes AS node + MERGE (e:Entity {id: node.id}) + SET e.entity_type = node.entity_type, + e.description = node.description, + e.source_id = node.source_id, + e.displayName = node.id + REMOVE e:Entity + WITH e, node + CALL apoc.create.addLabels(e, [node.id]) YIELD node AS labeledNode + RETURN count(*) + """ + + create_edges_query = """ + UNWIND $edges AS edge + MATCH (source {id: edge.source}) + MATCH (target {id: edge.target}) + WITH source, target, edge, + CASE + WHEN edge.keywords CONTAINS 'lead' THEN 'lead' + WHEN edge.keywords CONTAINS 'participate' THEN 'participate' + WHEN edge.keywords CONTAINS 'uses' THEN 'uses' + WHEN edge.keywords CONTAINS 'located' THEN 'located' + WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs' + ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '') + END AS relType + CALL apoc.create.relationship(source, relType, { + weight: edge.weight, + description: edge.description, + keywords: edge.keywords, + source_id: edge.source_id + }, target) YIELD rel + RETURN count(*) + """ + + set_displayname_and_labels_query = """ + MATCH (n) + SET n.displayName = n.id + WITH n + CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node + RETURN count(*) + """ + + # Create a Neo4j driver + driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) + + try: + # Execute queries in batches + with driver.session() as session: + # Insert nodes in batches + session.execute_write( + process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES + ) + + # Insert edges in batches + session.execute_write( + process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES + ) + + # Set displayName and labels + session.run(set_displayname_and_labels_query) + + except Exception as e: + print(f"Error occurred: {e}") + + finally: + driver.close() + + +if __name__ == "__main__": + main()