mirror of
				https://github.com/BaranziniLab/KG_RAG.git
				synced 2024-06-08 14:12:54 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			449 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			449 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 2,
 | |
|    "id": "d153feab",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "import requests\n",
 | |
|     "import pandas as pd\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 8,
 | |
|    "id": "00e1c456",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "config_data = {\n",
 | |
|     "    'BASE_URI' : 'https://spoke.rbvi.ucsf.edu',\n",
 | |
|     "    'cutoff_Compound_max_phase' : 3,\n",
 | |
|     "    'cutoff_Protein_source' : ['SwissProt'],\n",
 | |
|     "    'cutoff_DaG_diseases_sources' : ['knowledge', 'experiments'],\n",
 | |
|     "    'cutoff_DaG_textmining' : 3,\n",
 | |
|     "    'cutoff_CtD_phase' : 3,\n",
 | |
|     "    'cutoff_PiP_confidence' : 0.7,\n",
 | |
|     "    'cutoff_ACTeG_level' : ['Low', 'Medium', 'High']    \n",
 | |
|     "}\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 9,
 | |
|    "id": "dcf1b00e",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "def get_spoke_api_resp(base_uri, end_point, params=None):\n",
 | |
|     "    uri = base_uri + end_point\n",
 | |
|     "    if params:\n",
 | |
|     "        return requests.get(uri, params=params)\n",
 | |
|     "    else:\n",
 | |
|     "        return requests.get(uri)\n",
 | |
|     "\n",
 | |
|     "    \n",
 | |
|     "def get_context_using_spoke_api(node_value):\n",
 | |
|     "    type_end_point = \"/api/v1/types\"\n",
 | |
|     "    result = get_spoke_api_resp(config_data['BASE_URI'], type_end_point)\n",
 | |
|     "    data_spoke_types = result.json()\n",
 | |
|     "    node_types = list(data_spoke_types[\"nodes\"].keys())\n",
 | |
|     "    edge_types = list(data_spoke_types[\"edges\"].keys())\n",
 | |
|     "    node_types_to_remove = [\"DatabaseTimestamp\", \"Version\"]\n",
 | |
|     "    filtered_node_types = [node_type for node_type in node_types if node_type not in node_types_to_remove]\n",
 | |
|     "    api_params = {\n",
 | |
|     "        'node_filters' : filtered_node_types,\n",
 | |
|     "        'edge_filters': edge_types,\n",
 | |
|     "        'cutoff_Compound_max_phase': config_data['cutoff_Compound_max_phase'],\n",
 | |
|     "        'cutoff_Protein_source': config_data['cutoff_Protein_source'],\n",
 | |
|     "        'cutoff_DaG_diseases_sources': config_data['cutoff_DaG_diseases_sources'],\n",
 | |
|     "        'cutoff_DaG_textmining': config_data['cutoff_DaG_textmining'],\n",
 | |
|     "        'cutoff_CtD_phase': config_data['cutoff_CtD_phase'],\n",
 | |
|     "        'cutoff_PiP_confidence': config_data['cutoff_PiP_confidence'],\n",
 | |
|     "        'cutoff_ACTeG_level': config_data['cutoff_ACTeG_level']\n",
 | |
|     "    }\n",
 | |
|     "    node_type = \"Disease\"\n",
 | |
|     "    attribute = \"name\"\n",
 | |
|     "    nbr_end_point = \"/api/v1/neighborhood/{}/{}/{}\".format(node_type, attribute, node_value)\n",
 | |
|     "    result = get_spoke_api_resp(config_data['BASE_URI'], nbr_end_point, params=api_params)\n",
 | |
|     "    node_context = result.json()\n",
 | |
|     "    nbr_nodes = []\n",
 | |
|     "    nbr_edges = []\n",
 | |
|     "    for item in node_context:\n",
 | |
|     "        if \"_\" not in item[\"data\"][\"neo4j_type\"]:\n",
 | |
|     "            try:\n",
 | |
|     "                if item[\"data\"][\"neo4j_type\"] == \"Protein\":\n",
 | |
|     "                    nbr_nodes.append((item[\"data\"][\"neo4j_type\"], item[\"data\"][\"id\"], item[\"data\"][\"properties\"][\"description\"]))\n",
 | |
|     "                else:\n",
 | |
|     "                    nbr_nodes.append((item[\"data\"][\"neo4j_type\"], item[\"data\"][\"id\"], item[\"data\"][\"properties\"][\"name\"]))\n",
 | |
|     "            except:\n",
 | |
|     "                nbr_nodes.append((item[\"data\"][\"neo4j_type\"], item[\"data\"][\"id\"], item[\"data\"][\"properties\"][\"identifier\"]))\n",
 | |
|     "        elif \"_\" in item[\"data\"][\"neo4j_type\"]:\n",
 | |
|     "            try:\n",
 | |
|     "                provenance = \", \".join(item[\"data\"][\"properties\"][\"sources\"])\n",
 | |
|     "            except:\n",
 | |
|     "                try:\n",
 | |
|     "                    provenance = item[\"data\"][\"properties\"][\"source\"]\n",
 | |
|     "                    if isinstance(provenance, list):\n",
 | |
|     "                        provenance = \", \".join(provenance)                    \n",
 | |
|     "                except:\n",
 | |
|     "                    try:                    \n",
 | |
|     "                        preprint_list = ast.literal_eval(item[\"data\"][\"properties\"][\"preprint_list\"])\n",
 | |
|     "                        if len(preprint_list) > 0:                                                    \n",
 | |
|     "                            provenance = \", \".join(preprint_list)\n",
 | |
|     "                        else:\n",
 | |
|     "                            pmid_list = ast.literal_eval(item[\"data\"][\"properties\"][\"pmid_list\"])\n",
 | |
|     "                            pmid_list = map(lambda x:\"pubmedId:\"+x, pmid_list)\n",
 | |
|     "                            if len(pmid_list) > 0:\n",
 | |
|     "                                provenance = \", \".join(pmid_list)\n",
 | |
|     "                            else:\n",
 | |
|     "                                provenance = \"Based on data from Institute For Systems Biology (ISB)\"\n",
 | |
|     "                    except:                                \n",
 | |
|     "                        provenance = \"SPOKE-KG\"                                    \n",
 | |
|     "            nbr_edges.append((item[\"data\"][\"source\"], item[\"data\"][\"neo4j_type\"], item[\"data\"][\"target\"], provenance))\n",
 | |
|     "    nbr_nodes_df = pd.DataFrame(nbr_nodes, columns=[\"node_type\", \"node_id\", \"node_name\"])\n",
 | |
|     "    nbr_edges_df = pd.DataFrame(nbr_edges, columns=[\"source\", \"edge_type\", \"target\", \"provenance\"])\n",
 | |
|     "    merge_1 = pd.merge(nbr_edges_df, nbr_nodes_df, left_on=\"source\", right_on=\"node_id\").drop(\"node_id\", axis=1)\n",
 | |
|     "    merge_1.loc[:,\"node_name\"] = merge_1.node_type + \" \" + merge_1.node_name\n",
 | |
|     "    merge_1.drop([\"source\", \"node_type\"], axis=1, inplace=True)\n",
 | |
|     "    merge_1 = merge_1.rename(columns={\"node_name\":\"source\"})\n",
 | |
|     "    merge_2 = pd.merge(merge_1, nbr_nodes_df, left_on=\"target\", right_on=\"node_id\").drop(\"node_id\", axis=1)\n",
 | |
|     "    merge_2.loc[:,\"node_name\"] = merge_2.node_type + \" \" + merge_2.node_name\n",
 | |
|     "    merge_2.drop([\"target\", \"node_type\"], axis=1, inplace=True)\n",
 | |
|     "    merge_2 = merge_2.rename(columns={\"node_name\":\"target\"})\n",
 | |
|     "    merge_2 = merge_2[[\"source\", \"edge_type\", \"target\", \"provenance\"]]\n",
 | |
|     "    merge_2.loc[:, \"predicate\"] = merge_2.edge_type.apply(lambda x:x.split(\"_\")[0])\n",
 | |
|     "    merge_2.loc[:, \"context\"] =  merge_2.source + \" \" + merge_2.predicate.str.lower() + \" \" + merge_2.target + \" and Provenance of this association is \" + merge_2.provenance + \". \"\n",
 | |
|     "    context = merge_2['context'].str.cat(sep=' ')\n",
 | |
|     "    context += node_value + \" has a \" + node_context[0][\"data\"][\"properties\"][\"source\"] + \" identifier of \" + node_context[0][\"data\"][\"properties\"][\"identifier\"] + \" and Provenance of this association is \" + node_context[0][\"data\"][\"properties\"][\"source\"] + \".\"\n",
 | |
|     "    return context\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 161,
 | |
|    "id": "5b7bd91c",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "CPU times: user 68.7 ms, sys: 6.93 ms, total: 75.6 ms\n",
 | |
|       "Wall time: 286 ms\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "124"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 161,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "%%time\n",
 | |
|     "\n",
 | |
|     "node_value = 'giant cell glioblastoma'\n",
 | |
|     "\n",
 | |
|     "type_end_point = \"/api/v1/types\"\n",
 | |
|     "result = get_spoke_api_resp(config_data['BASE_URI'], type_end_point)\n",
 | |
|     "data_spoke_types = result.json()\n",
 | |
|     "node_types = list(data_spoke_types[\"nodes\"].keys())\n",
 | |
|     "edge_types = list(data_spoke_types[\"edges\"].keys())\n",
 | |
|     "node_types_to_remove = [\"DatabaseTimestamp\", \"Version\"]\n",
 | |
|     "filtered_node_types = [node_type for node_type in node_types if node_type not in node_types_to_remove]\n",
 | |
|     "api_params = {\n",
 | |
|     "    'node_filters' : filtered_node_types,\n",
 | |
|     "    'edge_filters': edge_types,\n",
 | |
|     "    'cutoff_Compound_max_phase': config_data['cutoff_Compound_max_phase'],\n",
 | |
|     "    'cutoff_Protein_source': config_data['cutoff_Protein_source'],\n",
 | |
|     "    'cutoff_DaG_diseases_sources': config_data['cutoff_DaG_diseases_sources'],\n",
 | |
|     "    'cutoff_DaG_textmining': config_data['cutoff_DaG_textmining'],\n",
 | |
|     "    'cutoff_CtD_phase': config_data['cutoff_CtD_phase'],\n",
 | |
|     "    'cutoff_PiP_confidence': config_data['cutoff_PiP_confidence'],\n",
 | |
|     "    'cutoff_ACTeG_level': config_data['cutoff_ACTeG_level'],\n",
 | |
|     "    'depth' : 1\n",
 | |
|     "}\n",
 | |
|     "node_type = \"Disease\"\n",
 | |
|     "attribute = \"name\"\n",
 | |
|     "nbr_end_point = \"/api/v1/neighborhood/{}/{}/{}\".format(node_type, attribute, node_value)\n",
 | |
|     "result = get_spoke_api_resp(config_data['BASE_URI'], nbr_end_point, params=api_params)\n",
 | |
|     "node_context = result.json()\n",
 | |
|     "len(node_context)\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 143,
 | |
|    "id": "100577db",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "edge_evidence = False\n",
 | |
|     "\n",
 | |
|     "nbr_nodes = []\n",
 | |
|     "nbr_edges = []\n",
 | |
|     "for item in node_context:\n",
 | |
|     "    if \"_\" not in item[\"data\"][\"neo4j_type\"]:\n",
 | |
|     "        try:\n",
 | |
|     "            if item[\"data\"][\"neo4j_type\"] == \"Protein\":\n",
 | |
|     "                nbr_nodes.append((item[\"data\"][\"neo4j_type\"], item[\"data\"][\"id\"], item[\"data\"][\"properties\"][\"description\"]))\n",
 | |
|     "            else:\n",
 | |
|     "                nbr_nodes.append((item[\"data\"][\"neo4j_type\"], item[\"data\"][\"id\"], item[\"data\"][\"properties\"][\"name\"]))\n",
 | |
|     "        except:\n",
 | |
|     "            nbr_nodes.append((item[\"data\"][\"neo4j_type\"], item[\"data\"][\"id\"], item[\"data\"][\"properties\"][\"identifier\"]))\n",
 | |
|     "    elif \"_\" in item[\"data\"][\"neo4j_type\"]:\n",
 | |
|     "        try:\n",
 | |
|     "            provenance = \", \".join(item[\"data\"][\"properties\"][\"sources\"])\n",
 | |
|     "        except:\n",
 | |
|     "            try:\n",
 | |
|     "                provenance = item[\"data\"][\"properties\"][\"source\"]\n",
 | |
|     "                if isinstance(provenance, list):\n",
 | |
|     "                    provenance = \", \".join(provenance)                    \n",
 | |
|     "            except:\n",
 | |
|     "                try:                    \n",
 | |
|     "                    preprint_list = ast.literal_eval(item[\"data\"][\"properties\"][\"preprint_list\"])\n",
 | |
|     "                    if len(preprint_list) > 0:                                                    \n",
 | |
|     "                        provenance = \", \".join(preprint_list)\n",
 | |
|     "                    else:\n",
 | |
|     "                        pmid_list = ast.literal_eval(item[\"data\"][\"properties\"][\"pmid_list\"])\n",
 | |
|     "                        pmid_list = map(lambda x:\"pubmedId:\"+x, pmid_list)\n",
 | |
|     "                        if len(pmid_list) > 0:\n",
 | |
|     "                            provenance = \", \".join(pmid_list)\n",
 | |
|     "                        else:\n",
 | |
|     "                            provenance = \"Based on data from Institute For Systems Biology (ISB)\"\n",
 | |
|     "                except:                                \n",
 | |
|     "                    provenance = \"SPOKE-KG\"     \n",
 | |
|     "        try:\n",
 | |
|     "            evidence = item[\"data\"][\"properties\"]\n",
 | |
|     "        except:\n",
 | |
|     "            evidence = None\n",
 | |
|     "        nbr_edges.append((item[\"data\"][\"source\"], item[\"data\"][\"neo4j_type\"], item[\"data\"][\"target\"], provenance, evidence))\n",
 | |
|     "    nbr_nodes_df = pd.DataFrame(nbr_nodes, columns=[\"node_type\", \"node_id\", \"node_name\"])\n",
 | |
|     "    nbr_edges_df = pd.DataFrame(nbr_edges, columns=[\"source\", \"edge_type\", \"target\", \"provenance\", \"evidence\"])\n",
 | |
|     "    merge_1 = pd.merge(nbr_edges_df, nbr_nodes_df, left_on=\"source\", right_on=\"node_id\").drop(\"node_id\", axis=1)\n",
 | |
|     "    merge_1.loc[:,\"node_name\"] = merge_1.node_type + \" \" + merge_1.node_name\n",
 | |
|     "    merge_1.drop([\"source\", \"node_type\"], axis=1, inplace=True)\n",
 | |
|     "    merge_1 = merge_1.rename(columns={\"node_name\":\"source\"})\n",
 | |
|     "    merge_2 = pd.merge(merge_1, nbr_nodes_df, left_on=\"target\", right_on=\"node_id\").drop(\"node_id\", axis=1)\n",
 | |
|     "    merge_2.loc[:,\"node_name\"] = merge_2.node_type + \" \" + merge_2.node_name\n",
 | |
|     "    merge_2.drop([\"target\", \"node_type\"], axis=1, inplace=True)\n",
 | |
|     "    merge_2 = merge_2.rename(columns={\"node_name\":\"target\"})\n",
 | |
|     "    merge_2 = merge_2[[\"source\", \"edge_type\", \"target\", \"provenance\", \"evidence\"]]\n",
 | |
|     "    merge_2.loc[:, \"predicate\"] = merge_2.edge_type.apply(lambda x:x.split(\"_\")[0])\n",
 | |
|     "#     if edge_evidence:\n",
 | |
|     "#         merge_2.loc[:, \"context\"] =  merge_2.source + \" \" + merge_2.predicate.str.lower() + \" \" + merge_2.target + \" and Provenance of this association is \" + merge_2.provenance + \" and attributes associated with this association is in the following JSON format:\\n \" + merge_2.evidence.astype('str') + \"\\n\\n\"\n",
 | |
|     "#     else:\n",
 | |
|     "    merge_2.loc[:, \"context\"] =  merge_2.source + \" \" + merge_2.predicate.str.lower() + \" \" + merge_2.target + \" and Provenance of this association is \" + merge_2.provenance + \". \"\n",
 | |
|     "    context = merge_2.context.str.cat(sep=' ')\n",
 | |
|     "    context += node_value + \" has a \" + node_context[0][\"data\"][\"properties\"][\"source\"] + \" identifier of \" + node_context[0][\"data\"][\"properties\"][\"identifier\"] + \" and Provenance of this is from \" + node_context[0][\"data\"][\"properties\"][\"source\"] + \".\"\n",
 | |
|     "\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 154,
 | |
|    "id": "490adee9",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "'Disease secondary progressive multiple sclerosis isa Disease multiple sclerosis and Provenance of this association is Disease Ontology. '"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 154,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "high_similarity_context = ['Disease multiple sclerosis associates Gene HLA-DQA1 and Provenance of this association is GWAS. ',\n",
 | |
|     "                          'Disease multiple sclerosis associates Gene HLA-DRB1 and Provenance of this association is DISEASES. ',\n",
 | |
|     "                          'Disease multiple sclerosis associates Gene ATXN1 and Provenance of this association is GWAS. ']\n",
 | |
|     "\n",
 | |
|     "merge_2[merge_2.context.isin(high_similarity_context)]\n",
 | |
|     "merge_2.context.values[0]\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 134,
 | |
|    "id": "e3db5024",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "high_similarity_context = ['Disease multiple sclerosis associates Gene HLA-DQA1 and Provenance of this association is GWAS.',\n",
 | |
|     "                          'Disease multiple sclerosis associates Gene HLA-DRB1 and Provenance of this association is DISEASES.',\n",
 | |
|     "                          'Disease multiple sclerosis associates Gene ATXN1 and Provenance of this association is GWAS.']\n",
 | |
|     "high_similarity_context.append('Gene Xs sds associates with Disease multiple sclerosis and Provenance of this association is GWAS.')\n",
 | |
|     "node_name = 'multiple sclerosis'\n",
 | |
|     "node_types = nbr_nodes_df.node_type.unique()\n",
 | |
|     "\n",
 | |
|     "\n",
 | |
|     "nodes = list(filter(None, list(map(lambda x:x if '_' not in x['data']['neo4j_type'] else None, node_context))))\n",
 | |
|     "edges = list(filter(None, list(map(lambda x:x if '_' in x['data']['neo4j_type'] else None, node_context))))\n",
 | |
|     "\n",
 | |
|     "source_node_id = list(map(lambda x:x['data']['id'] if x['data']['properties']['name'] == node_name else None, nodes))[0]\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 132,
 | |
|    "id": "c7b6cf3f",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "(array([0]),)\n",
 | |
|       "(array([4]),)\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "'HLA-DQA1 and Provenance of this association is GWAS.'"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 132,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "import numpy as np\n",
 | |
|     "sentence = high_similarity_context[0]\n",
 | |
|     "\n",
 | |
|     "for node_type in node_types:\n",
 | |
|     "    if node_type in sentence:        \n",
 | |
|     "        print(np.where(node_type == np.array(sentence.split(' '))))\n",
 | |
|     "\n",
 | |
|     "sentence.split('Gene ')[-1]"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 133,
 | |
|    "id": "768d83c6",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "['Disease multiple sclerosis associates Gene HLA-DQA1 and Provenance of this association is GWAS.',\n",
 | |
|        " 'Disease multiple sclerosis associates Gene HLA-DRB1 and Provenance of this association is DISEASES.',\n",
 | |
|        " 'Disease multiple sclerosis associates Gene ATXN1 and Provenance of this association is GWAS.']"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 133,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 45,
 | |
|    "id": "31239a88",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'diseases_identifiers': ['https://diseases.jensenlab.org/Entity?documents=10&type1=9606&id1=ENSP00000369889&type2=-26&id2=DOID:0080044',\n",
 | |
|        "  'MedlinePlus'],\n",
 | |
|        " 'diseases_scores': ['6.503', 'CURATED'],\n",
 | |
|        " 'sources': ['DISEASES'],\n",
 | |
|        " 'diseases_sources': ['textmining', 'knowledge'],\n",
 | |
|        " 'diseases_confidences': [3.252, 5.0]}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 45,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "item[\"data\"][\"properties\"]"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 104,
 | |
|    "id": "019ceba5",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "source = 'multiple sclerosis'\n",
 | |
|     "target = 'COL2A1'\n",
 | |
|     "\n",
 | |
|     "nodes = list(filter(None, list(map(lambda x:x if '_' not in x['data']['neo4j_type'] else None, node_context))))\n",
 | |
|     "edges = list(filter(None, list(map(lambda x:x if '_' in x['data']['neo4j_type'] else None, node_context))))\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 37,
 | |
|    "id": "84481154",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "152375"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 37,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "# def get_node_id(inp_node)\n",
 | |
|     "\n",
 | |
|     "list(map(lambda x:x['data']['id'] if x['data']['properties']['name'] == source else None, nodes))[0]\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "id": "3e811b4a",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": []
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3 (ipykernel)",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.10.9"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 5
 | |
| }
 | 
