adding new notebooks

This commit is contained in:
root
2024-03-20 03:25:20 -07:00
parent 08fea5eb6d
commit af79e2400e
16 changed files with 2430 additions and 42 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
logs/
data/results/cypher_rag_output_2M_spoke.csv
test_notebooks/create_node_context_file_for_2M_spoke_graph.ipynb
notebooks/cypher_rag_using_langchain_2M.ipynb

View File

@@ -0,0 +1,323 @@
text,node_hits
LIRAGLUTIDE TREATS OBESITY,obesity
disease ontology identifier for central diabetes insipidus is doid:350,central diabetes insipidus
"Xeroderma pigmentosum, group G is not associated with Gene ERCC5",xeroderma pigmentosum
cherubism is not a autosomal dominant disease,cherubism
MASA SYNDROME (DISORDER) IS NOT ASSOCIATED WITH GENE L1CAM,MASA syndrome
CONGENITAL GENERALIZED LIPODYSTROPHY TYPE 2 ASSOCIATES GENE BSCL2,congenital generalized lipodystrophy type 2
PRASTERONE TREATS OBESITY,obesity
CONGENITAL CONTRACTURAL ARACHNODACTYLY ASSOCIATES GENE FBN2,congenital contractural arachnodactyly
PAROXYSMAL NONKINESIGENIC DYSKINESIA 1 IS NOT ASSOCIATED WITH GENE PNKD,paroxysmal nonkinesigenic dyskinesia 1
Acute intermittent porphyria is not associated with Gene HMBS,acute intermittent porphyria
Disease ontology identifier for gray platelet syndrome is DOID:0111044,gray platelet syndrome
Hyperargininemia is not associated with Gene ARG1,hyperargininemia
DISEASE ONTOLOGY IDENTIFIER FOR MARFAN SYNDROME IS DOID:0060055,Marfan syndrome
FACTOR IX TREATS HEMOPHILIA B,hemophilia B
DISEASE ONTOLOGY IDENTIFIER FOR MOSAIC VARIEGATED ANEUPLOIDY SYNDROME 1 IS DOID:0080141,mosaic variegated aneuploidy syndrome 1
noonan syndrome associates gene kras,Noonan syndrome
L-2-HYDROXYGLUTARIC ACIDURIA associates Gene L2HGDH,L-2-hydroxyglutaric aciduria
AZITHROMYCIN TREATS CYSTIC FIBROSIS,cystic fibrosis
disease ontology identifier for smith-magenis syndrome is doid:12271,Smith-Magenis syndrome
COFFIN-SIRIS SYNDROME IS A SYNDROMIC DISEASE,Coffin-Siris syndrome
antithrombin iii deficiency is not associated with gene serpinc1,antithrombin III deficiency
DEHYDROEPIANDROSTERONE TREATS OBESITY,obesity
MULIBREY NANISM IS A SYNDROMIC DISEASE,mulibrey nanism
cleidocranial dysplasia associates gene runx2,cleidocranial dysplasia
ASPARTYLGLUCOSAMINURIA IS NOT ASSOCIATED WITH GENE AGA,aspartylglucosaminuria
Brachydactyly type C is not associated with Gene GDF5,brachydactyly type C
Wolman Disease associates Gene LIPA,Wolman disease
adenine phosphoribosyltransferase deficiency associates gene aprt,adenine phosphoribosyltransferase deficiency
Neurofibromatosis 2 is not associated with Gene NF2,neurofibromatosis 2
HARTNUP DISEASE IS A INBORN DISORDER OF AMINO ACID TRANSPORT,Hartnup disease
campomelic dysplasia associates gene sox9,campomelic dysplasia
DOYNE HONEYCOMB RETINAL DYSTROPHY associates Gene EFEMP1,Doyne honeycomb retinal dystrophy
enhanced S-cone syndrome is not a vitreoretinal degeneration,enhanced S-cone syndrome
melanoma associates Gene BRAF,melanoma
congenital contractural arachnodactyly is not a congenital nervous system disorder,congenital contractural arachnodactyly
CONGENITAL AMEGAKARYOCYTIC THROMBOCYTOPENIA IS NOT ASSOCIATED WITH GENE MPL,congenital amegakaryocytic thrombocytopenia
LEIGH DISEASE ASSOCIATES GENE SURF1,Leigh disease
cystic fibrosis is a respiratory system disorder,cystic fibrosis
Neurofibromatosis 1 associates Gene NF1,neurofibromatosis 1
"Robinow syndrome, autosomal recessive associates Gene ROR2",Robinow syndrome
polycythemia vera is not associated with gene jak2,polycythemia vera
PSEUDOPSEUDOHYPOPARATHYROIDISM ASSOCIATES GENE GNAS,pseudopseudohypoparathyroidism
Rothmund-Thomson syndrome is not associated with Gene RECQL4,Rothmund-Thomson syndrome
Alveolar rhabdomyosarcoma associates Gene PAX3,alveolar rhabdomyosarcoma
ellis-van creveld syndrome associates gene evc2,Ellis-Van Creveld syndrome
Coffin-Lowry syndrome is not a X-linked syndromic intellectual disability,Coffin-Lowry syndrome
CHERUBISM ASSOCIATES GENE SH3BP2,cherubism
agalsidase alfa treats fabry disease,Fabry disease
disease ontology identifier for lesch-nyhan syndrome is doid:1919,Lesch-Nyhan syndrome
disease ontology identifier for autosomal dominant hypophosphatemic rickets is doid:0050948,autosomal dominant hypophosphatemic rickets
nail-patella syndrome associates gene lmx1b,nail-patella syndrome
mulibrey nanism is not associated with gene trim37,mulibrey nanism
MYOCLONIC DYSTONIA IS NOT ASSOCIATED WITH GENE SGCE,myoclonic dystonia
pseudoachondroplasia associates gene comp,pseudoachondroplasia
EVEROLIMUS TREATS TUBEROUS SCLEROSIS,tuberous sclerosis
DiGeorge syndrome is a congenital T-cell immunodeficiency,DiGeorge syndrome
INSULIN TREATS OBESITY,obesity
disease ontology identifier for mastocytosis is doid:0060768,mastocytosis
bevacizumab treats hereditary hemorrhagic telangiectasia,hereditary hemorrhagic telangiectasia
Disease ontology identifier for Farber lipogranulomatosis is DOID:0050464,Farber lipogranulomatosis
DENYS-DRASH SYNDROME IS NOT ASSOCIATED WITH GENE WT1,Denys-Drash syndrome
DISEASE ONTOLOGY IDENTIFIER FOR NORRIE DISEASE IS DOID:0060599,Norrie disease
"XERODERMA PIGMENTOSUM, COMPLEMENTATION GROUP E ASSOCIATES GENE DDB2",xeroderma pigmentosum
Disease ontology identifier for popliteal pterygium syndrome is DOID:0050756,popliteal pterygium syndrome
Ornithine carbamoyltransferase deficiency associates Gene OTC,ornithine carbamoyltransferase deficiency
Wiskott-Aldrich Syndrome is not associated with Gene WAS,Wiskott-Aldrich syndrome
Tangier Disease is not associated with Gene ABCA1,Tangier disease
disease ontology identifier for omenn syndrome is doid:3633,Omenn syndrome
LONG QT SYNDROME 1 ASSOCIATES GENE KCNQ1,long QT syndrome 1
allan-herndon-dudley syndrome (ahds) is not associated with gene slc16a2,Allan-Herndon-Dudley syndrome
ETHYLMALONIC ENCEPHALOPATHY IS NOT ASSOCIATED WITH GENE ETHE1,ethylmalonic encephalopathy
Hemophilia B is not associated with Gene F9,hemophilia B
Mowat-Wilson syndrome is not associated with Gene ZEB2,Mowat-Wilson syndrome
HARTNUP DISEASE IS NOT ASSOCIATED WITH GENE SLC6A19,Hartnup disease
Disease ontology identifier for campomelic dysplasia is DOID:0050463,campomelic dysplasia
"HEMOCHROMATOSIS, TYPE 4 ASSOCIATES GENE SLC40A1",hemochromatosis
disease ontology identifier for rothmund-thomson syndrome is doid:2732,Rothmund-Thomson syndrome
Autosomal Recessive Polycystic Kidney Disease associates Gene PKHD1,autosomal recessive polycystic kidney disease
adenine phosphoribosyltransferase deficiency is a inborn disorder of amino acid metabolism,adenine phosphoribosyltransferase deficiency
Angelman syndrome is a syndromic disease,Angelman syndrome
Tay-Sachs disease is not a eye degenerative disorder,Tay-Sachs disease
spinocerebellar ataxia type 5 associates gene sptbn2,spinocerebellar ataxia type 5
rapp-hodgkin syndrome is a autosomal dominant disease,Rapp-Hodgkin syndrome
sandhoff disease is not associated with gene hexb,Sandhoff disease
johanson-blizzard syndrome associates gene ubr1,Johanson-Blizzard syndrome
maple syrup urine disease associates gene dbt,maple syrup urine disease
laron syndrome is a autosomal recessive disease,Laron syndrome
popliteal pterygium syndrome is not associated with gene irf6,popliteal pterygium syndrome
PITT-HOPKINS SYNDROME is not associated with Gene TCF4,Pitt-Hopkins syndrome
Alkaptonuria associates Gene HGD,alkaptonuria
Hereditary hemorrhagic telangiectasia associates Gene ENG,hereditary hemorrhagic telangiectasia
MASTOCYTOSIS IS NOT ASSOCIATED WITH GENE KIT,mastocytosis
macrolide antibiotics treats cystic fibrosis,cystic fibrosis
Very long chain acyl-CoA dehydrogenase deficiency is not associated with Gene ACADVL,very long chain acyl-CoA dehydrogenase deficiency
KUFOR-RAKEB SYNDROME associates Gene ATP13A2,Kufor-Rakeb syndrome
protein-tyrosine kinase inhibitor treats sarcoma,sarcoma
autosomal dominant hypophosphatemic rickets associates gene fgf23,autosomal dominant hypophosphatemic rickets
WAARDENBURG SYNDROME TYPE 1 ASSOCIATES GENE PAX3,Waardenburg syndrome type 1
Cystic Fibrosis associates Gene CFTR,cystic fibrosis
WHIM syndrome is not associated with Gene CXCR4,WHIM syndrome
tuberous sclerosis is a autosomal dominant disease,tuberous sclerosis
CHOROIDEREMIA IS NOT ASSOCIATED WITH GENE CHM,choroideremia
smith-lemli-opitz syndrome is not associated with gene dhcr7,Smith-Lemli-Opitz syndrome
LATE-ONSET RETINAL DEGENERATION (disorder) associates Gene C1QTNF5,late-onset retinal degeneration
ibuprofen treats cystic fibrosis,cystic fibrosis
JUVENILE POLYPOSIS SYNDROME ASSOCIATES GENE SMAD4,juvenile polyposis syndrome
alpha-galactosidase treats fabry disease,Fabry disease
pembrolizumab treats melanoma,melanoma
PIEBALDISM ASSOCIATES GENE KIT,piebaldism
"Charcot-Marie-Tooth Disease, Type Ib associates Gene MPZ",Charcot-Marie-Tooth disease
DiGeorge Syndrome is not associated with Gene TBX1,DiGeorge syndrome
Fibrodysplasia Ossificans Progressiva associates Gene ACVR1,fibrodysplasia ossificans progressiva
Omenn Syndrome associates Gene RAG2,Omenn syndrome
Juvenile Spinal Muscular Atrophy associates Gene SMN1,juvenile spinal muscular atrophy
antibiotics treats cystic fibrosis,cystic fibrosis
HEREDITARY HEMORRHAGIC TELANGIECTASIA IS A AUTOSOMAL DOMINANT DISEASE,hereditary hemorrhagic telangiectasia
Fabry Disease associates Gene GLA,Fabry disease
GYRATE ATROPHY ASSOCIATES GENE OAT,gyrate atrophy
"Bernard-Soulier syndrome is a inherited bleeding disorder, platelet-type",Bernard-Soulier syndrome
VITELLIFORM MACULAR DYSTROPHY ASSOCIATES GENE BEST1,vitelliform macular dystrophy
Gray Platelet Syndrome is not associated with Gene NBEAL2,gray platelet syndrome
Coffin-Siris syndrome associates Gene ARID1B,Coffin-Siris syndrome
burkitt lymphoma is a neoplasm of mature b-cells,Burkitt lymphoma
multiple endocrine neoplasia type 2b associates gene ret,multiple endocrine neoplasia type 2B
anesthetics treats pheochromocytoma,pheochromocytoma
PSEUDOACHONDROPLASIA IS A OSTEOCHONDRODYSPLASIA,pseudoachondroplasia
Pierson syndrome is not a autosomal recessive disease,Pierson syndrome
costello syndrome (disorder) is not associated with gene hras,Costello syndrome
familial mediterranean fever associates gene mefv,familial Mediterranean fever
Jervell-Lange Nielsen Syndrome is not associated with Gene KCNQ1,Jervell-Lange Nielsen syndrome
argininosuccinic aciduria associates gene asl,argininosuccinic aciduria
nance-horan syndrome associates gene nhs,Nance-Horan syndrome
li-fraumeni syndrome is a autosomal dominant disease,Li-Fraumeni syndrome
CANAVAN DISEASE IS A INBORN AMINOACYLASE DEFICIENCY,Canavan disease
TETRALOGY OF FALLOT IS NOT A HEREDITARY DISEASE,tetralogy of Fallot
Disease ontology identifier for fibrodysplasia ossificans progressiva is DOID:13374,fibrodysplasia ossificans progressiva
penicillamine treats cystinuria,cystinuria
LONG QT SYNDROME 3 associates Gene SCN5A,long QT syndrome 3
"xeroderma pigmentosum, complementation group c associates gene xpc",xeroderma pigmentosum
Laron Syndrome associates Gene GHR,Laron syndrome
DISEASE ONTOLOGY IDENTIFIER FOR PHEOCHROMOCYTOMA IS DOID:14692,pheochromocytoma
CYSTINURIA IS NOT ASSOCIATED WITH GENE SLC3A1,cystinuria
loeys-dietz syndrome associates gene tgfbr1,Loeys-Dietz syndrome
ANTIBIOTICS TREATS OBESITY,obesity
Disease ontology identifier for ethylmalonic encephalopathy is DOID:0060640,ethylmalonic encephalopathy
Johanson-Blizzard syndrome is a congenital nervous system disorder,Johanson-Blizzard syndrome
peutz-jeghers syndrome associates gene stk11,Peutz-Jeghers syndrome
Sandhoff disease is a eye degenerative disorder,Sandhoff disease
Coffin-Lowry syndrome associates Gene RPS6KA3,Coffin-Lowry syndrome
Leigh Disease associates Gene NDUFS4,Leigh disease
choroideremia is not a X-linked disease,choroideremia
Bernard-Soulier Syndrome associates Gene GP1BB,Bernard-Soulier syndrome
Lafora Disease is not associated with Gene NHLRC1,Lafora disease
IMMUNOSUPPRESSIVE AGENTS TREATS CROHN'S DISEASE,Crohn's disease
LAFORA DISEASE ASSOCIATES GENE EPM2A,Lafora disease
Pheochromocytoma is not associated with Gene RET,pheochromocytoma
Brugada Syndrome (disorder) associates Gene SCN5A,Brugada syndrome
Greig cephalopolysyndactyly syndrome associates Gene GLI3,Greig cephalopolysyndactyly syndrome
vaccines treats melanoma,melanoma
Mucopolysaccharidosis II associates Gene IDS,mucopolysaccharidosis II
RAPP-HODGKIN SYNDROME associates Gene TP63,Rapp-Hodgkin syndrome
Spinocerebellar Ataxia Type 6 (disorder) associates Gene CACNA1A,spinocerebellar ataxia type 6
Saethre-Chotzen Syndrome is not associated with Gene TWIST1,Saethre-Chotzen syndrome
Loeys-Dietz Syndrome associates Gene TGFBR2,Loeys-Dietz syndrome
Ulnar-mammary syndrome is not associated with Gene TBX3,ulnar-mammary syndrome
Marfan Syndrome associates Gene FBN1,Marfan syndrome
noonan syndrome associates gene raf1,Noonan syndrome
norrie disease associates gene ndp,Norrie disease
biotinidase deficiency is not a multiple carboxylase deficiency,biotinidase deficiency
burkitt lymphoma is not associated with gene myc,Burkitt lymphoma
hyperkalemic periodic paralysis is not a familial periodic paralysis,hyperkalemic periodic paralysis
TUBEROUS SCLEROSIS ASSOCIATES GENE TSC2,tuberous sclerosis
melphalan treats melanoma,melanoma
Disease ontology identifier for beta-mannosidosis is DOID:0111136,beta-mannosidosis
pitt-hopkins syndrome is a syndromic disease,Pitt-Hopkins syndrome
Fatal Familial Insomnia is not associated with Gene PRNP,fatal familial insomnia
FABRY DISEASE IS NOT A DEVELOPMENTAL ANOMALY OF METABOLIC ORIGIN,Fabry disease
fatal familial insomnia is not a insomnia,fatal familial insomnia
MUENKE SYNDROME ASSOCIATES GENE FGFR3,Muenke Syndrome
FARBER LIPOGRANULOMATOSIS IS NOT ASSOCIATED WITH GENE ASAH1,Farber lipogranulomatosis
greig cephalopolysyndactyly syndrome is not a syndromic disease,Greig cephalopolysyndactyly syndrome
Kleefstra syndrome 1 is a Kleefstra syndrome,Kleefstra syndrome 1
mowat-wilson syndrome is a syndromic intellectual disability,Mowat-Wilson syndrome
METRONIDAZOLE TREATS CROHN'S DISEASE,Crohn's disease
Cystinuria is not associated with Gene SLC7A9,cystinuria
Disease ontology identifier for Smith-Lemli-Opitz syndrome is DOID:0080026,Smith-Lemli-Opitz syndrome
alpha-d-galactosidase enzyme treats fabry disease,Fabry disease
Tay-Sachs Disease associates Gene HEXA,Tay-Sachs disease
BIOTINIDASE DEFICIENCY IS NOT ASSOCIATED WITH GENE BTD,biotinidase deficiency
DISEASE ONTOLOGY IDENTIFIER FOR ATAXIA TELANGIECTASIA IS DOID:0060010,ataxia telangiectasia
Disease ontology identifier for Timothy syndrome is DOID:0060173,Timothy syndrome
multiple endocrine neoplasia type 2b is a autosomal dominant disease,multiple endocrine neoplasia type 2B
WOLCOTT-RALLISON SYNDROME IS A SYNDROMIC DISEASE,Wolcott-Rallison syndrome
Disease ontology identifier for cystinuria is DOID:9266,cystinuria
POLYCYSTIC KIDNEY DISEASE 1 ASSOCIATES GENE PKD1,polycystic kidney disease 1
Refsum Disease associates Gene PHYH,Refsum disease
Nijmegen breakage syndrome is a autosomal recessive disease,Nijmegen breakage syndrome
Pierson syndrome is not associated with Gene LAMB2,Pierson syndrome
holt-oram syndrome is a autosomal dominant disease,Holt-Oram syndrome
Fragile X Syndrome associates Gene FMR1,fragile X syndrome
vitelliform macular dystrophy is not a macular degeneration,vitelliform macular dystrophy
Pfeiffer Syndrome associates Gene FGFR2,Pfeiffer syndrome
Alexander Disease associates Gene GFAP,Alexander disease
"basal ganglia disease, biotin-responsive associates gene slc19a3",basal ganglia disease
Progeria associates Gene LMNA,progeria
infantile hypophosphatasia associates gene alpl,infantile hypophosphatasia
"xeroderma pigmentosum, group b associates gene ercc3",xeroderma pigmentosum
Microvillus inclusion disease is not associated with Gene MYO5B,microvillus inclusion disease
Smith-Magenis syndrome associates Gene RAI1,Smith-Magenis syndrome
LIVER CARCINOMA IS NOT ASSOCIATED WITH GENE MET,liver carcinoma
"fanconi anemia, complementation group d2 is not associated with gene fancd2",Fanconi anemia
tetralogy of fallot associates gene zfpm2,tetralogy of Fallot
ARGIPRESSIN TREATS CENTRAL DIABETES INSIPIDUS,central diabetes insipidus
noonan syndrome associates gene sos1,Noonan syndrome
Denys-Drash syndrome is a autosomal dominant disease,Denys-Drash syndrome
familial Mediterranean fever is not a primary immunodeficiency due to a genetic defect in innate immunity,familial Mediterranean fever
Disease ontology identifier for ornithine carbamoyltransferase deficiency is DOID:9271,ornithine carbamoyltransferase deficiency
Achondroplasia is not a osteochondrodysplasia,achondroplasia
unverricht-lundborg syndrome is not associated with gene cstb,Unverricht-Lundborg syndrome
alpha-Mannosidosis associates Gene MAN2B1,alpha-mannosidosis
6-MERCAPTOPURINE TREATS CROHN'S DISEASE,Crohn's disease
enhanced s-cone syndrome is not associated with gene nr2e3,enhanced S-cone syndrome
ADRENAL CORTEX HORMONES TREATS CROHN'S DISEASE,Crohn's disease
BIETTI CRYSTALLINE CORNEORETINAL DYSTROPHY is not associated with Gene CYP4V2,Bietti crystalline corneoretinal dystrophy
AGALSIDASE BETA TREATS FABRY DISEASE,Fabry disease
HEMOPHILIA B IS A HEMORRHAGIC DISEASE,hemophilia B
Li-Fraumeni Syndrome is not associated with Gene TP53,Li-Fraumeni syndrome
"xeroderma pigmentosum, group f associates gene ercc4",xeroderma pigmentosum
WOLMAN DISEASE IS A LYSOSOMAL ACID LIPASE DEFICIENCY,Wolman disease
alveolar rhabdomyosarcoma is not a rhabdomyosarcoma,alveolar rhabdomyosarcoma
Aniridia is not associated with Gene PAX6,aniridia
argininosuccinic aciduria is a amino acid metabolism disease,argininosuccinic aciduria
"charcot-marie-tooth disease, type 4c associates gene sh3tc2",Charcot-Marie-Tooth disease
Werner Syndrome associates Gene WRN,Werner syndrome
AMILORIDE TREATS CYSTIC FIBROSIS,cystic fibrosis
nail-patella syndrome is a autosomal dominant disease,nail-patella syndrome
"TIBIAL MUSCULAR DYSTROPHY, TARDIVE associates Gene TTN",tibial muscular dystrophy
Chediak-Higashi Syndrome associates Gene LYST,Chediak-Higashi syndrome
juvenile myoclonic epilepsy is not associated with gene efhc1,juvenile myoclonic epilepsy
UNVERRICHT-LUNDBORG SYNDROME IS A MOVEMENT DISORDER,Unverricht-Lundborg syndrome
immune checkpoint inhibitors treats melanoma,melanoma
hyperkalemic periodic paralysis is not associated with gene scn4a,hyperkalemic periodic paralysis
Disease ontology identifier for Doyne honeycomb retinal dystrophy is DOID:0081055,Doyne honeycomb retinal dystrophy
"Charcot-Marie-Tooth disease, Type 4B1 associates Gene MTMR2",Charcot-Marie-Tooth disease
disease ontology identifier for fragile x syndrome is doid:14261,fragile X syndrome
Nijmegen Breakage Syndrome is not associated with Gene NBN,Nijmegen breakage syndrome
MERCAPTOPURINE TREATS CROHN'S DISEASE,Crohn's disease
Alexander disease is a leukodystrophy,Alexander disease
disease ontology identifier for werner syndrome is doid:0050466,Werner syndrome
spinocerebellar ataxia type 1 is not associated with gene atxn1,spinocerebellar ataxia type 1
ACRODERMATITIS ENTEROPATHICA IS NOT A INBORN METAL METABOLISM DISORDER,acrodermatitis enteropathica
Adrenoleukodystrophy associates Gene ABCD1,adrenoleukodystrophy
"Xeroderma pigmentosum, group A associates Gene XPA",xeroderma pigmentosum
Wolcott-Rallison syndrome associates Gene EIF2AK3,Wolcott-Rallison syndrome
Mucopolysaccharidosis VI associates Gene ARSB,mucopolysaccharidosis VI
very long chain acyl-coa dehydrogenase deficiency is not a disorder of fatty acid oxidation and ketogenesis,very long chain acyl-CoA dehydrogenase deficiency
OBESITY IS NOT ASSOCIATED WITH GENE PPARG,obesity
angelman syndrome is not associated with gene ube3a,Angelman syndrome
pelizaeus-merzbacher disease associates gene plp1,Pelizaeus-Merzbacher disease
Ellis-van Creveld syndrome is not a heart disorder,Ellis-Van Creveld syndrome
kleefstra syndrome 1 is not associated with gene ehmt1,Kleefstra syndrome 1
COLCHICINE TREATS FAMILIAL MEDITERRANEAN FEVER,familial Mediterranean fever
"CHARCOT-MARIE-TOOTH DISEASE, TYPE 4J associates Gene FIG4",Charcot-Marie-Tooth disease
sitosterolemia is not associated with gene abcg8,sitosterolemia
Holt-Oram syndrome is not associated with Gene TBX5,Holt-Oram syndrome
OBESITY IS NOT ASSOCIATED WITH GENE MC4R,obesity
Disease ontology identifier for Lafora disease is DOID:3534,Lafora disease
Achondroplasia is not associated with Gene FGFR3,achondroplasia
BETA-MANNOSIDOSIS IS NOT ASSOCIATED WITH GENE MANBA,beta-mannosidosis
BORJESON-FORSSMAN-LEHMANN SYNDROME IS NOT A X-LINKED SYNDROMIC INTELLECTUAL DISABILITY,Borjeson-Forssman-Lehmann syndrome
Disease ontology identifier for spinocerebellar ataxia type 1 is DOID:0050954,spinocerebellar ataxia type 1
Disease ontology identifier for Tangier disease is DOID:1388,Tangier disease
sarcoma is a cancer,sarcoma
DISEASE ONTOLOGY IDENTIFIER FOR JUVENILE MYOCLONIC EPILEPSY IS DOID:4890,juvenile myoclonic epilepsy
Disease ontology identifier for aniridia is DOID:12704,aniridia
CHARGE Syndrome is not associated with Gene CHD7,CHARGE syndrome
Creutzfeldt-Jakob disease is not associated with Gene PRNP,Creutzfeldt-Jakob disease
central diabetes insipidus associates gene avp,central diabetes insipidus
Chediak-Higashi syndrome is a congenital nervous system disorder,Chediak-Higashi syndrome
DISEASE ONTOLOGY IDENTIFIER FOR PELIZAEUS-MERZBACHER DISEASE IS DOID:5688,Pelizaeus-Merzbacher disease
Borjeson-Forssman-Lehmann syndrome is not associated with Gene PHF6,Borjeson-Forssman-Lehmann syndrome
Juvenile polyposis syndrome associates Gene BMPR1A,juvenile polyposis syndrome
enoxaparin treats obesity,obesity
Ataxia Telangiectasia associates Gene ATM,ataxia telangiectasia
Mosaic variegated aneuploidy syndrome 1 associates Gene BUB1B,mosaic variegated aneuploidy syndrome 1
bernard-soulier syndrome associates gene gp1ba,Bernard-Soulier syndrome
PSEUDOXANTHOMA ELASTICUM ASSOCIATES GENE ABCC6,pseudoxanthoma elasticum
ALSTROM SYNDROME ASSOCIATES GENE ALMS1,Alstrom syndrome
osteosarcoma is a sarcoma,osteosarcoma
"XERODERMA PIGMENTOSUM, COMPLEMENTATION GROUP D ASSOCIATES GENE ERCC2",xeroderma pigmentosum
DISEASE ONTOLOGY IDENTIFIER FOR PSEUDOPSEUDOHYPOPARATHYROIDISM IS DOID:4183,pseudopseudohypoparathyroidism
Hajdu-Cheney Syndrome associates Gene NOTCH2,Hajdu-Cheney syndrome
X-linked agammaglobulinemia associates Gene BTK,X-linked agammaglobulinemia
prothrombin complex concentrates treats hemophilia b,hemophilia B
DISEASE ONTOLOGY IDENTIFIER FOR ADRENOLEUKODYSTROPHY IS DOID:0060844,adrenoleukodystrophy
nitisinone treats alkaptonuria,alkaptonuria
l-2-hydroxyglutaric aciduria is not a 2-hydroxyglutaric aciduria,L-2-hydroxyglutaric aciduria
alkaptonuria is not a disorder of tyrosine metabolism,alkaptonuria
Sarcoma associates Gene TP53,sarcoma
ACRODERMATITIS ENTEROPATHICA ASSOCIATES GENE SLC39A4,acrodermatitis enteropathica
Disease ontology identifier for spinocerebellar ataxia type 5 is DOID:0050882,spinocerebellar ataxia type 5
canavan disease associates gene aspa,Canavan disease
disease ontology identifier for sitosterolemia is doid:0090019,sitosterolemia
Liver carcinoma associates Gene TP53,liver carcinoma
Variant rs2476601 associates Rheumatoid Arthritis,rheumatoid arthritis
Osteosarcoma is not associated with Gene TP53,osteosarcoma
EPISODIC ATAXIA TYPE 2 (DISORDER) IS NOT ASSOCIATED WITH GENE CACNA1A,episodic ataxia type 2
lesch-nyhan syndrome associates gene hprt1,Lesch-Nyhan syndrome
DISEASE ONTOLOGY IDENTIFIER FOR WISKOTT-ALDRICH SYNDROME IS DOID:9169,Wiskott-Aldrich syndrome
Alstrom syndrome is a ciliopathy,Alstrom syndrome
Carney Complex is not associated with Gene PRKAR1A,Carney complex
DISEASE ONTOLOGY IDENTIFIER FOR CONGENITAL GENERALIZED LIPODYSTROPHY TYPE 2 IS DOID:10588,congenital generalized lipodystrophy type 2
brachydactyly type c is a brachydactyly,brachydactyly type C
noonan syndrome is a multiple congenital anomalies/dysmorphic syndrome-variable intellectual disability syndrome,Noonan syndrome
Disease ontology identifier for Nance-Horan syndrome is DOID:0050771,Nance-Horan syndrome
Carney complex is a autosomal dominant disease,Carney complex
immune checkpoint inhibitor treats melanoma,melanoma
antineoplastic agents treats osteosarcoma,osteosarcoma
timothy syndrome associates gene cacna1c,Timothy syndrome
piebaldism is a autosomal dominant disease,piebaldism
Disease ontology identifier for Loeys-Dietz syndrome is DOID:0060745,Loeys-Dietz syndrome
NOONAN SYNDROME ASSOCIATES GENE PTPN11,Noonan syndrome
1 text node_hits
2 LIRAGLUTIDE TREATS OBESITY obesity
3 disease ontology identifier for central diabetes insipidus is doid:350 central diabetes insipidus
4 Xeroderma pigmentosum, group G is not associated with Gene ERCC5 xeroderma pigmentosum
5 cherubism is not a autosomal dominant disease cherubism
6 MASA SYNDROME (DISORDER) IS NOT ASSOCIATED WITH GENE L1CAM MASA syndrome
7 CONGENITAL GENERALIZED LIPODYSTROPHY TYPE 2 ASSOCIATES GENE BSCL2 congenital generalized lipodystrophy type 2
8 PRASTERONE TREATS OBESITY obesity
9 CONGENITAL CONTRACTURAL ARACHNODACTYLY ASSOCIATES GENE FBN2 congenital contractural arachnodactyly
10 PAROXYSMAL NONKINESIGENIC DYSKINESIA 1 IS NOT ASSOCIATED WITH GENE PNKD paroxysmal nonkinesigenic dyskinesia 1
11 Acute intermittent porphyria is not associated with Gene HMBS acute intermittent porphyria
12 Disease ontology identifier for gray platelet syndrome is DOID:0111044 gray platelet syndrome
13 Hyperargininemia is not associated with Gene ARG1 hyperargininemia
14 DISEASE ONTOLOGY IDENTIFIER FOR MARFAN SYNDROME IS DOID:0060055 Marfan syndrome
15 FACTOR IX TREATS HEMOPHILIA B hemophilia B
16 DISEASE ONTOLOGY IDENTIFIER FOR MOSAIC VARIEGATED ANEUPLOIDY SYNDROME 1 IS DOID:0080141 mosaic variegated aneuploidy syndrome 1
17 noonan syndrome associates gene kras Noonan syndrome
18 L-2-HYDROXYGLUTARIC ACIDURIA associates Gene L2HGDH L-2-hydroxyglutaric aciduria
19 AZITHROMYCIN TREATS CYSTIC FIBROSIS cystic fibrosis
20 disease ontology identifier for smith-magenis syndrome is doid:12271 Smith-Magenis syndrome
21 COFFIN-SIRIS SYNDROME IS A SYNDROMIC DISEASE Coffin-Siris syndrome
22 antithrombin iii deficiency is not associated with gene serpinc1 antithrombin III deficiency
23 DEHYDROEPIANDROSTERONE TREATS OBESITY obesity
24 MULIBREY NANISM IS A SYNDROMIC DISEASE mulibrey nanism
25 cleidocranial dysplasia associates gene runx2 cleidocranial dysplasia
26 ASPARTYLGLUCOSAMINURIA IS NOT ASSOCIATED WITH GENE AGA aspartylglucosaminuria
27 Brachydactyly type C is not associated with Gene GDF5 brachydactyly type C
28 Wolman Disease associates Gene LIPA Wolman disease
29 adenine phosphoribosyltransferase deficiency associates gene aprt adenine phosphoribosyltransferase deficiency
30 Neurofibromatosis 2 is not associated with Gene NF2 neurofibromatosis 2
31 HARTNUP DISEASE IS A INBORN DISORDER OF AMINO ACID TRANSPORT Hartnup disease
32 campomelic dysplasia associates gene sox9 campomelic dysplasia
33 DOYNE HONEYCOMB RETINAL DYSTROPHY associates Gene EFEMP1 Doyne honeycomb retinal dystrophy
34 enhanced S-cone syndrome is not a vitreoretinal degeneration enhanced S-cone syndrome
35 melanoma associates Gene BRAF melanoma
36 congenital contractural arachnodactyly is not a congenital nervous system disorder congenital contractural arachnodactyly
37 CONGENITAL AMEGAKARYOCYTIC THROMBOCYTOPENIA IS NOT ASSOCIATED WITH GENE MPL congenital amegakaryocytic thrombocytopenia
38 LEIGH DISEASE ASSOCIATES GENE SURF1 Leigh disease
39 cystic fibrosis is a respiratory system disorder cystic fibrosis
40 Neurofibromatosis 1 associates Gene NF1 neurofibromatosis 1
41 Robinow syndrome, autosomal recessive associates Gene ROR2 Robinow syndrome
42 polycythemia vera is not associated with gene jak2 polycythemia vera
43 PSEUDOPSEUDOHYPOPARATHYROIDISM ASSOCIATES GENE GNAS pseudopseudohypoparathyroidism
44 Rothmund-Thomson syndrome is not associated with Gene RECQL4 Rothmund-Thomson syndrome
45 Alveolar rhabdomyosarcoma associates Gene PAX3 alveolar rhabdomyosarcoma
46 ellis-van creveld syndrome associates gene evc2 Ellis-Van Creveld syndrome
47 Coffin-Lowry syndrome is not a X-linked syndromic intellectual disability Coffin-Lowry syndrome
48 CHERUBISM ASSOCIATES GENE SH3BP2 cherubism
49 agalsidase alfa treats fabry disease Fabry disease
50 disease ontology identifier for lesch-nyhan syndrome is doid:1919 Lesch-Nyhan syndrome
51 disease ontology identifier for autosomal dominant hypophosphatemic rickets is doid:0050948 autosomal dominant hypophosphatemic rickets
52 nail-patella syndrome associates gene lmx1b nail-patella syndrome
53 mulibrey nanism is not associated with gene trim37 mulibrey nanism
54 MYOCLONIC DYSTONIA IS NOT ASSOCIATED WITH GENE SGCE myoclonic dystonia
55 pseudoachondroplasia associates gene comp pseudoachondroplasia
56 EVEROLIMUS TREATS TUBEROUS SCLEROSIS tuberous sclerosis
57 DiGeorge syndrome is a congenital T-cell immunodeficiency DiGeorge syndrome
58 INSULIN TREATS OBESITY obesity
59 disease ontology identifier for mastocytosis is doid:0060768 mastocytosis
60 bevacizumab treats hereditary hemorrhagic telangiectasia hereditary hemorrhagic telangiectasia
61 Disease ontology identifier for Farber lipogranulomatosis is DOID:0050464 Farber lipogranulomatosis
62 DENYS-DRASH SYNDROME IS NOT ASSOCIATED WITH GENE WT1 Denys-Drash syndrome
63 DISEASE ONTOLOGY IDENTIFIER FOR NORRIE DISEASE IS DOID:0060599 Norrie disease
64 XERODERMA PIGMENTOSUM, COMPLEMENTATION GROUP E ASSOCIATES GENE DDB2 xeroderma pigmentosum
65 Disease ontology identifier for popliteal pterygium syndrome is DOID:0050756 popliteal pterygium syndrome
66 Ornithine carbamoyltransferase deficiency associates Gene OTC ornithine carbamoyltransferase deficiency
67 Wiskott-Aldrich Syndrome is not associated with Gene WAS Wiskott-Aldrich syndrome
68 Tangier Disease is not associated with Gene ABCA1 Tangier disease
69 disease ontology identifier for omenn syndrome is doid:3633 Omenn syndrome
70 LONG QT SYNDROME 1 ASSOCIATES GENE KCNQ1 long QT syndrome 1
71 allan-herndon-dudley syndrome (ahds) is not associated with gene slc16a2 Allan-Herndon-Dudley syndrome
72 ETHYLMALONIC ENCEPHALOPATHY IS NOT ASSOCIATED WITH GENE ETHE1 ethylmalonic encephalopathy
73 Hemophilia B is not associated with Gene F9 hemophilia B
74 Mowat-Wilson syndrome is not associated with Gene ZEB2 Mowat-Wilson syndrome
75 HARTNUP DISEASE IS NOT ASSOCIATED WITH GENE SLC6A19 Hartnup disease
76 Disease ontology identifier for campomelic dysplasia is DOID:0050463 campomelic dysplasia
77 HEMOCHROMATOSIS, TYPE 4 ASSOCIATES GENE SLC40A1 hemochromatosis
78 disease ontology identifier for rothmund-thomson syndrome is doid:2732 Rothmund-Thomson syndrome
79 Autosomal Recessive Polycystic Kidney Disease associates Gene PKHD1 autosomal recessive polycystic kidney disease
80 adenine phosphoribosyltransferase deficiency is a inborn disorder of amino acid metabolism adenine phosphoribosyltransferase deficiency
81 Angelman syndrome is a syndromic disease Angelman syndrome
82 Tay-Sachs disease is not a eye degenerative disorder Tay-Sachs disease
83 spinocerebellar ataxia type 5 associates gene sptbn2 spinocerebellar ataxia type 5
84 rapp-hodgkin syndrome is a autosomal dominant disease Rapp-Hodgkin syndrome
85 sandhoff disease is not associated with gene hexb Sandhoff disease
86 johanson-blizzard syndrome associates gene ubr1 Johanson-Blizzard syndrome
87 maple syrup urine disease associates gene dbt maple syrup urine disease
88 laron syndrome is a autosomal recessive disease Laron syndrome
89 popliteal pterygium syndrome is not associated with gene irf6 popliteal pterygium syndrome
90 PITT-HOPKINS SYNDROME is not associated with Gene TCF4 Pitt-Hopkins syndrome
91 Alkaptonuria associates Gene HGD alkaptonuria
92 Hereditary hemorrhagic telangiectasia associates Gene ENG hereditary hemorrhagic telangiectasia
93 MASTOCYTOSIS IS NOT ASSOCIATED WITH GENE KIT mastocytosis
94 macrolide antibiotics treats cystic fibrosis cystic fibrosis
95 Very long chain acyl-CoA dehydrogenase deficiency is not associated with Gene ACADVL very long chain acyl-CoA dehydrogenase deficiency
96 KUFOR-RAKEB SYNDROME associates Gene ATP13A2 Kufor-Rakeb syndrome
97 protein-tyrosine kinase inhibitor treats sarcoma sarcoma
98 autosomal dominant hypophosphatemic rickets associates gene fgf23 autosomal dominant hypophosphatemic rickets
99 WAARDENBURG SYNDROME TYPE 1 ASSOCIATES GENE PAX3 Waardenburg syndrome type 1
100 Cystic Fibrosis associates Gene CFTR cystic fibrosis
101 WHIM syndrome is not associated with Gene CXCR4 WHIM syndrome
102 tuberous sclerosis is a autosomal dominant disease tuberous sclerosis
103 CHOROIDEREMIA IS NOT ASSOCIATED WITH GENE CHM choroideremia
104 smith-lemli-opitz syndrome is not associated with gene dhcr7 Smith-Lemli-Opitz syndrome
105 LATE-ONSET RETINAL DEGENERATION (disorder) associates Gene C1QTNF5 late-onset retinal degeneration
106 ibuprofen treats cystic fibrosis cystic fibrosis
107 JUVENILE POLYPOSIS SYNDROME ASSOCIATES GENE SMAD4 juvenile polyposis syndrome
108 alpha-galactosidase treats fabry disease Fabry disease
109 pembrolizumab treats melanoma melanoma
110 PIEBALDISM ASSOCIATES GENE KIT piebaldism
111 Charcot-Marie-Tooth Disease, Type Ib associates Gene MPZ Charcot-Marie-Tooth disease
112 DiGeorge Syndrome is not associated with Gene TBX1 DiGeorge syndrome
113 Fibrodysplasia Ossificans Progressiva associates Gene ACVR1 fibrodysplasia ossificans progressiva
114 Omenn Syndrome associates Gene RAG2 Omenn syndrome
115 Juvenile Spinal Muscular Atrophy associates Gene SMN1 juvenile spinal muscular atrophy
116 antibiotics treats cystic fibrosis cystic fibrosis
117 HEREDITARY HEMORRHAGIC TELANGIECTASIA IS A AUTOSOMAL DOMINANT DISEASE hereditary hemorrhagic telangiectasia
118 Fabry Disease associates Gene GLA Fabry disease
119 GYRATE ATROPHY ASSOCIATES GENE OAT gyrate atrophy
120 Bernard-Soulier syndrome is a inherited bleeding disorder, platelet-type Bernard-Soulier syndrome
121 VITELLIFORM MACULAR DYSTROPHY ASSOCIATES GENE BEST1 vitelliform macular dystrophy
122 Gray Platelet Syndrome is not associated with Gene NBEAL2 gray platelet syndrome
123 Coffin-Siris syndrome associates Gene ARID1B Coffin-Siris syndrome
124 burkitt lymphoma is a neoplasm of mature b-cells Burkitt lymphoma
125 multiple endocrine neoplasia type 2b associates gene ret multiple endocrine neoplasia type 2B
126 anesthetics treats pheochromocytoma pheochromocytoma
127 PSEUDOACHONDROPLASIA IS A OSTEOCHONDRODYSPLASIA pseudoachondroplasia
128 Pierson syndrome is not a autosomal recessive disease Pierson syndrome
129 costello syndrome (disorder) is not associated with gene hras Costello syndrome
130 familial mediterranean fever associates gene mefv familial Mediterranean fever
131 Jervell-Lange Nielsen Syndrome is not associated with Gene KCNQ1 Jervell-Lange Nielsen syndrome
132 argininosuccinic aciduria associates gene asl argininosuccinic aciduria
133 nance-horan syndrome associates gene nhs Nance-Horan syndrome
134 li-fraumeni syndrome is a autosomal dominant disease Li-Fraumeni syndrome
135 CANAVAN DISEASE IS A INBORN AMINOACYLASE DEFICIENCY Canavan disease
136 TETRALOGY OF FALLOT IS NOT A HEREDITARY DISEASE tetralogy of Fallot
137 Disease ontology identifier for fibrodysplasia ossificans progressiva is DOID:13374 fibrodysplasia ossificans progressiva
138 penicillamine treats cystinuria cystinuria
139 LONG QT SYNDROME 3 associates Gene SCN5A long QT syndrome 3
140 xeroderma pigmentosum, complementation group c associates gene xpc xeroderma pigmentosum
141 Laron Syndrome associates Gene GHR Laron syndrome
142 DISEASE ONTOLOGY IDENTIFIER FOR PHEOCHROMOCYTOMA IS DOID:14692 pheochromocytoma
143 CYSTINURIA IS NOT ASSOCIATED WITH GENE SLC3A1 cystinuria
144 loeys-dietz syndrome associates gene tgfbr1 Loeys-Dietz syndrome
145 ANTIBIOTICS TREATS OBESITY obesity
146 Disease ontology identifier for ethylmalonic encephalopathy is DOID:0060640 ethylmalonic encephalopathy
147 Johanson-Blizzard syndrome is a congenital nervous system disorder Johanson-Blizzard syndrome
148 peutz-jeghers syndrome associates gene stk11 Peutz-Jeghers syndrome
149 Sandhoff disease is a eye degenerative disorder Sandhoff disease
150 Coffin-Lowry syndrome associates Gene RPS6KA3 Coffin-Lowry syndrome
151 Leigh Disease associates Gene NDUFS4 Leigh disease
152 choroideremia is not a X-linked disease choroideremia
153 Bernard-Soulier Syndrome associates Gene GP1BB Bernard-Soulier syndrome
154 Lafora Disease is not associated with Gene NHLRC1 Lafora disease
155 IMMUNOSUPPRESSIVE AGENTS TREATS CROHN'S DISEASE Crohn's disease
156 LAFORA DISEASE ASSOCIATES GENE EPM2A Lafora disease
157 Pheochromocytoma is not associated with Gene RET pheochromocytoma
158 Brugada Syndrome (disorder) associates Gene SCN5A Brugada syndrome
159 Greig cephalopolysyndactyly syndrome associates Gene GLI3 Greig cephalopolysyndactyly syndrome
160 vaccines treats melanoma melanoma
161 Mucopolysaccharidosis II associates Gene IDS mucopolysaccharidosis II
162 RAPP-HODGKIN SYNDROME associates Gene TP63 Rapp-Hodgkin syndrome
163 Spinocerebellar Ataxia Type 6 (disorder) associates Gene CACNA1A spinocerebellar ataxia type 6
164 Saethre-Chotzen Syndrome is not associated with Gene TWIST1 Saethre-Chotzen syndrome
165 Loeys-Dietz Syndrome associates Gene TGFBR2 Loeys-Dietz syndrome
166 Ulnar-mammary syndrome is not associated with Gene TBX3 ulnar-mammary syndrome
167 Marfan Syndrome associates Gene FBN1 Marfan syndrome
168 noonan syndrome associates gene raf1 Noonan syndrome
169 norrie disease associates gene ndp Norrie disease
170 biotinidase deficiency is not a multiple carboxylase deficiency biotinidase deficiency
171 burkitt lymphoma is not associated with gene myc Burkitt lymphoma
172 hyperkalemic periodic paralysis is not a familial periodic paralysis hyperkalemic periodic paralysis
173 TUBEROUS SCLEROSIS ASSOCIATES GENE TSC2 tuberous sclerosis
174 melphalan treats melanoma melanoma
175 Disease ontology identifier for beta-mannosidosis is DOID:0111136 beta-mannosidosis
176 pitt-hopkins syndrome is a syndromic disease Pitt-Hopkins syndrome
177 Fatal Familial Insomnia is not associated with Gene PRNP fatal familial insomnia
178 FABRY DISEASE IS NOT A DEVELOPMENTAL ANOMALY OF METABOLIC ORIGIN Fabry disease
179 fatal familial insomnia is not a insomnia fatal familial insomnia
180 MUENKE SYNDROME ASSOCIATES GENE FGFR3 Muenke Syndrome
181 FARBER LIPOGRANULOMATOSIS IS NOT ASSOCIATED WITH GENE ASAH1 Farber lipogranulomatosis
182 greig cephalopolysyndactyly syndrome is not a syndromic disease Greig cephalopolysyndactyly syndrome
183 Kleefstra syndrome 1 is a Kleefstra syndrome Kleefstra syndrome 1
184 mowat-wilson syndrome is a syndromic intellectual disability Mowat-Wilson syndrome
185 METRONIDAZOLE TREATS CROHN'S DISEASE Crohn's disease
186 Cystinuria is not associated with Gene SLC7A9 cystinuria
187 Disease ontology identifier for Smith-Lemli-Opitz syndrome is DOID:0080026 Smith-Lemli-Opitz syndrome
188 alpha-d-galactosidase enzyme treats fabry disease Fabry disease
189 Tay-Sachs Disease associates Gene HEXA Tay-Sachs disease
190 BIOTINIDASE DEFICIENCY IS NOT ASSOCIATED WITH GENE BTD biotinidase deficiency
191 DISEASE ONTOLOGY IDENTIFIER FOR ATAXIA TELANGIECTASIA IS DOID:0060010 ataxia telangiectasia
192 Disease ontology identifier for Timothy syndrome is DOID:0060173 Timothy syndrome
193 multiple endocrine neoplasia type 2b is a autosomal dominant disease multiple endocrine neoplasia type 2B
194 WOLCOTT-RALLISON SYNDROME IS A SYNDROMIC DISEASE Wolcott-Rallison syndrome
195 Disease ontology identifier for cystinuria is DOID:9266 cystinuria
196 POLYCYSTIC KIDNEY DISEASE 1 ASSOCIATES GENE PKD1 polycystic kidney disease 1
197 Refsum Disease associates Gene PHYH Refsum disease
198 Nijmegen breakage syndrome is a autosomal recessive disease Nijmegen breakage syndrome
199 Pierson syndrome is not associated with Gene LAMB2 Pierson syndrome
200 holt-oram syndrome is a autosomal dominant disease Holt-Oram syndrome
201 Fragile X Syndrome associates Gene FMR1 fragile X syndrome
202 vitelliform macular dystrophy is not a macular degeneration vitelliform macular dystrophy
203 Pfeiffer Syndrome associates Gene FGFR2 Pfeiffer syndrome
204 Alexander Disease associates Gene GFAP Alexander disease
205 basal ganglia disease, biotin-responsive associates gene slc19a3 basal ganglia disease
206 Progeria associates Gene LMNA progeria
207 infantile hypophosphatasia associates gene alpl infantile hypophosphatasia
208 xeroderma pigmentosum, group b associates gene ercc3 xeroderma pigmentosum
209 Microvillus inclusion disease is not associated with Gene MYO5B microvillus inclusion disease
210 Smith-Magenis syndrome associates Gene RAI1 Smith-Magenis syndrome
211 LIVER CARCINOMA IS NOT ASSOCIATED WITH GENE MET liver carcinoma
212 fanconi anemia, complementation group d2 is not associated with gene fancd2 Fanconi anemia
213 tetralogy of fallot associates gene zfpm2 tetralogy of Fallot
214 ARGIPRESSIN TREATS CENTRAL DIABETES INSIPIDUS central diabetes insipidus
215 noonan syndrome associates gene sos1 Noonan syndrome
216 Denys-Drash syndrome is a autosomal dominant disease Denys-Drash syndrome
217 familial Mediterranean fever is not a primary immunodeficiency due to a genetic defect in innate immunity familial Mediterranean fever
218 Disease ontology identifier for ornithine carbamoyltransferase deficiency is DOID:9271 ornithine carbamoyltransferase deficiency
219 Achondroplasia is not a osteochondrodysplasia achondroplasia
220 unverricht-lundborg syndrome is not associated with gene cstb Unverricht-Lundborg syndrome
221 alpha-Mannosidosis associates Gene MAN2B1 alpha-mannosidosis
222 6-MERCAPTOPURINE TREATS CROHN'S DISEASE Crohn's disease
223 enhanced s-cone syndrome is not associated with gene nr2e3 enhanced S-cone syndrome
224 ADRENAL CORTEX HORMONES TREATS CROHN'S DISEASE Crohn's disease
225 BIETTI CRYSTALLINE CORNEORETINAL DYSTROPHY is not associated with Gene CYP4V2 Bietti crystalline corneoretinal dystrophy
226 AGALSIDASE BETA TREATS FABRY DISEASE Fabry disease
227 HEMOPHILIA B IS A HEMORRHAGIC DISEASE hemophilia B
228 Li-Fraumeni Syndrome is not associated with Gene TP53 Li-Fraumeni syndrome
229 xeroderma pigmentosum, group f associates gene ercc4 xeroderma pigmentosum
230 WOLMAN DISEASE IS A LYSOSOMAL ACID LIPASE DEFICIENCY Wolman disease
231 alveolar rhabdomyosarcoma is not a rhabdomyosarcoma alveolar rhabdomyosarcoma
232 Aniridia is not associated with Gene PAX6 aniridia
233 argininosuccinic aciduria is a amino acid metabolism disease argininosuccinic aciduria
234 charcot-marie-tooth disease, type 4c associates gene sh3tc2 Charcot-Marie-Tooth disease
235 Werner Syndrome associates Gene WRN Werner syndrome
236 AMILORIDE TREATS CYSTIC FIBROSIS cystic fibrosis
237 nail-patella syndrome is a autosomal dominant disease nail-patella syndrome
238 TIBIAL MUSCULAR DYSTROPHY, TARDIVE associates Gene TTN tibial muscular dystrophy
239 Chediak-Higashi Syndrome associates Gene LYST Chediak-Higashi syndrome
240 juvenile myoclonic epilepsy is not associated with gene efhc1 juvenile myoclonic epilepsy
241 UNVERRICHT-LUNDBORG SYNDROME IS A MOVEMENT DISORDER Unverricht-Lundborg syndrome
242 immune checkpoint inhibitors treats melanoma melanoma
243 hyperkalemic periodic paralysis is not associated with gene scn4a hyperkalemic periodic paralysis
244 Disease ontology identifier for Doyne honeycomb retinal dystrophy is DOID:0081055 Doyne honeycomb retinal dystrophy
245 Charcot-Marie-Tooth disease, Type 4B1 associates Gene MTMR2 Charcot-Marie-Tooth disease
246 disease ontology identifier for fragile x syndrome is doid:14261 fragile X syndrome
247 Nijmegen Breakage Syndrome is not associated with Gene NBN Nijmegen breakage syndrome
248 MERCAPTOPURINE TREATS CROHN'S DISEASE Crohn's disease
249 Alexander disease is a leukodystrophy Alexander disease
250 disease ontology identifier for werner syndrome is doid:0050466 Werner syndrome
251 spinocerebellar ataxia type 1 is not associated with gene atxn1 spinocerebellar ataxia type 1
252 ACRODERMATITIS ENTEROPATHICA IS NOT A INBORN METAL METABOLISM DISORDER acrodermatitis enteropathica
253 Adrenoleukodystrophy associates Gene ABCD1 adrenoleukodystrophy
254 Xeroderma pigmentosum, group A associates Gene XPA xeroderma pigmentosum
255 Wolcott-Rallison syndrome associates Gene EIF2AK3 Wolcott-Rallison syndrome
256 Mucopolysaccharidosis VI associates Gene ARSB mucopolysaccharidosis VI
257 very long chain acyl-coa dehydrogenase deficiency is not a disorder of fatty acid oxidation and ketogenesis very long chain acyl-CoA dehydrogenase deficiency
258 OBESITY IS NOT ASSOCIATED WITH GENE PPARG obesity
259 angelman syndrome is not associated with gene ube3a Angelman syndrome
260 pelizaeus-merzbacher disease associates gene plp1 Pelizaeus-Merzbacher disease
261 Ellis-van Creveld syndrome is not a heart disorder Ellis-Van Creveld syndrome
262 kleefstra syndrome 1 is not associated with gene ehmt1 Kleefstra syndrome 1
263 COLCHICINE TREATS FAMILIAL MEDITERRANEAN FEVER familial Mediterranean fever
264 CHARCOT-MARIE-TOOTH DISEASE, TYPE 4J associates Gene FIG4 Charcot-Marie-Tooth disease
265 sitosterolemia is not associated with gene abcg8 sitosterolemia
266 Holt-Oram syndrome is not associated with Gene TBX5 Holt-Oram syndrome
267 OBESITY IS NOT ASSOCIATED WITH GENE MC4R obesity
268 Disease ontology identifier for Lafora disease is DOID:3534 Lafora disease
269 Achondroplasia is not associated with Gene FGFR3 achondroplasia
270 BETA-MANNOSIDOSIS IS NOT ASSOCIATED WITH GENE MANBA beta-mannosidosis
271 BORJESON-FORSSMAN-LEHMANN SYNDROME IS NOT A X-LINKED SYNDROMIC INTELLECTUAL DISABILITY Borjeson-Forssman-Lehmann syndrome
272 Disease ontology identifier for spinocerebellar ataxia type 1 is DOID:0050954 spinocerebellar ataxia type 1
273 Disease ontology identifier for Tangier disease is DOID:1388 Tangier disease
274 sarcoma is a cancer sarcoma
275 DISEASE ONTOLOGY IDENTIFIER FOR JUVENILE MYOCLONIC EPILEPSY IS DOID:4890 juvenile myoclonic epilepsy
276 Disease ontology identifier for aniridia is DOID:12704 aniridia
277 CHARGE Syndrome is not associated with Gene CHD7 CHARGE syndrome
278 Creutzfeldt-Jakob disease is not associated with Gene PRNP Creutzfeldt-Jakob disease
279 central diabetes insipidus associates gene avp central diabetes insipidus
280 Chediak-Higashi syndrome is a congenital nervous system disorder Chediak-Higashi syndrome
281 DISEASE ONTOLOGY IDENTIFIER FOR PELIZAEUS-MERZBACHER DISEASE IS DOID:5688 Pelizaeus-Merzbacher disease
282 Borjeson-Forssman-Lehmann syndrome is not associated with Gene PHF6 Borjeson-Forssman-Lehmann syndrome
283 Juvenile polyposis syndrome associates Gene BMPR1A juvenile polyposis syndrome
284 enoxaparin treats obesity obesity
285 Ataxia Telangiectasia associates Gene ATM ataxia telangiectasia
286 Mosaic variegated aneuploidy syndrome 1 associates Gene BUB1B mosaic variegated aneuploidy syndrome 1
287 bernard-soulier syndrome associates gene gp1ba Bernard-Soulier syndrome
288 PSEUDOXANTHOMA ELASTICUM ASSOCIATES GENE ABCC6 pseudoxanthoma elasticum
289 ALSTROM SYNDROME ASSOCIATES GENE ALMS1 Alstrom syndrome
290 osteosarcoma is a sarcoma osteosarcoma
291 XERODERMA PIGMENTOSUM, COMPLEMENTATION GROUP D ASSOCIATES GENE ERCC2 xeroderma pigmentosum
292 DISEASE ONTOLOGY IDENTIFIER FOR PSEUDOPSEUDOHYPOPARATHYROIDISM IS DOID:4183 pseudopseudohypoparathyroidism
293 Hajdu-Cheney Syndrome associates Gene NOTCH2 Hajdu-Cheney syndrome
294 X-linked agammaglobulinemia associates Gene BTK X-linked agammaglobulinemia
295 prothrombin complex concentrates treats hemophilia b hemophilia B
296 DISEASE ONTOLOGY IDENTIFIER FOR ADRENOLEUKODYSTROPHY IS DOID:0060844 adrenoleukodystrophy
297 nitisinone treats alkaptonuria alkaptonuria
298 l-2-hydroxyglutaric aciduria is not a 2-hydroxyglutaric aciduria L-2-hydroxyglutaric aciduria
299 alkaptonuria is not a disorder of tyrosine metabolism alkaptonuria
300 Sarcoma associates Gene TP53 sarcoma
301 ACRODERMATITIS ENTEROPATHICA ASSOCIATES GENE SLC39A4 acrodermatitis enteropathica
302 Disease ontology identifier for spinocerebellar ataxia type 5 is DOID:0050882 spinocerebellar ataxia type 5
303 canavan disease associates gene aspa Canavan disease
304 disease ontology identifier for sitosterolemia is doid:0090019 sitosterolemia
305 Liver carcinoma associates Gene TP53 liver carcinoma
306 Variant rs2476601 associates Rheumatoid Arthritis rheumatoid arthritis
307 Osteosarcoma is not associated with Gene TP53 osteosarcoma
308 EPISODIC ATAXIA TYPE 2 (DISORDER) IS NOT ASSOCIATED WITH GENE CACNA1A episodic ataxia type 2
309 lesch-nyhan syndrome associates gene hprt1 Lesch-Nyhan syndrome
310 DISEASE ONTOLOGY IDENTIFIER FOR WISKOTT-ALDRICH SYNDROME IS DOID:9169 Wiskott-Aldrich syndrome
311 Alstrom syndrome is a ciliopathy Alstrom syndrome
312 Carney Complex is not associated with Gene PRKAR1A Carney complex
313 DISEASE ONTOLOGY IDENTIFIER FOR CONGENITAL GENERALIZED LIPODYSTROPHY TYPE 2 IS DOID:10588 congenital generalized lipodystrophy type 2
314 brachydactyly type c is a brachydactyly brachydactyly type C
315 noonan syndrome is a multiple congenital anomalies/dysmorphic syndrome-variable intellectual disability syndrome Noonan syndrome
316 Disease ontology identifier for Nance-Horan syndrome is DOID:0050771 Nance-Horan syndrome
317 Carney complex is a autosomal dominant disease Carney complex
318 immune checkpoint inhibitor treats melanoma melanoma
319 antineoplastic agents treats osteosarcoma osteosarcoma
320 timothy syndrome associates gene cacna1c Timothy syndrome
321 piebaldism is a autosomal dominant disease piebaldism
322 Disease ontology identifier for Loeys-Dietz syndrome is DOID:0060745 Loeys-Dietz syndrome
323 NOONAN SYNDROME ASSOCIATES GENE PTPN11 Noonan syndrome

View File

@@ -26,12 +26,13 @@ SAVE_PATH = config_data["SAVE_RESULTS_PATH"]
CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID
save_name = "_".join(CHAT_MODEL_ID.split("-"))+"_two_hop_mcq_from_monarch_and_robokop_response.csv"
save_name = "_".join(CHAT_MODEL_ID.split("-"))+"_kg_rag_based_mcq_from_monarch_and_robokop_response.csv"
vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)
embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)
node_context_df = pd.read_csv(NODE_CONTEXT_PATH)
edge_evidence = False
def main():
start_time = time.time()
@@ -39,7 +40,7 @@ def main():
answer_list = []
for index, row in question_df.iterrows():
question = row["text"]
context = retrieve_context(row["text"], vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY)
context = retrieve_context(row["text"], vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)
enriched_prompt = "Context: "+ context + "\n" + "Question: "+ question
output = get_GPT_response(enriched_prompt, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE)
answer_list.append((row["text"], row["correct_node"], output))

View File

@@ -31,6 +31,7 @@ CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID
vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)
node_context_df = pd.read_csv(NODE_CONTEXT_PATH)
edge_evidence = False
def main():
start_time = time.time()
@@ -41,7 +42,7 @@ def main():
answer_list = []
for index, row in question_df.iterrows():
question = row["text"]
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, context_volume, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY)
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, context_volume, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)
enriched_prompt = "Context: "+ context + "\n" + "Question: " + question
output = get_GPT_response(enriched_prompt, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE)
if not output:

View File

@@ -24,12 +24,13 @@ CONTEXT_VOLUME = 100
CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID
save_name = "_".join(CHAT_MODEL_ID.split("-"))+"_one_hop_true_false_binary_response.csv"
save_name = "_".join(CHAT_MODEL_ID.split("-"))+"_kg_rag_based_true_false_binary_response.csv"
vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)
embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)
node_context_df = pd.read_csv(NODE_CONTEXT_PATH)
edge_evidence = False
def main():
start_time = time.time()
@@ -37,7 +38,7 @@ def main():
answer_list = []
for index, row in question_df.iterrows():
question = row["text"]
context = retrieve_context(row["text"], vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY)
context = retrieve_context(row["text"], vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)
enriched_prompt = "Context: "+ context + "\n" + "Question: "+ question
output = get_GPT_response(enriched_prompt, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE)
answer_list.append((row["text"], row["label"], output))

View File

@@ -31,6 +31,7 @@ SAVE_PATH = config_data["SAVE_RESULTS_PATH"]
vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)
embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)
node_context_df = pd.read_csv(NODE_CONTEXT_PATH)
edge_evidence = False
def main():
start_time = time.time()
@@ -40,7 +41,7 @@ def main():
answer_list = []
for index, row in question_df.iterrows():
question = row["text"]
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, context_volume, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY)
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, context_volume, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)
enriched_prompt = "Context: "+ context + "\n" + "Question: " + question
output = get_GPT_response(enriched_prompt, system_prompt, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=temperature)
if not output:

View File

@@ -22,7 +22,7 @@ MODEL_NAME = config_data["LLAMA_MODEL_NAME"]
BRANCH_NAME = config_data["LLAMA_MODEL_BRANCH"]
CACHE_DIR = config_data["LLM_CACHE_DIR"]
save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_two_hop_mcq_from_monarch_and_robokop_response.csv"
save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_kg_rag_based_mcq_from_monarch_and_robokop_response.csv"
INSTRUCTION = "Context:\n\n{context} \n\nQuestion: {question}"
@@ -30,6 +30,7 @@ INSTRUCTION = "Context:\n\n{context} \n\nQuestion: {question}"
vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)
embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)
node_context_df = pd.read_csv(NODE_CONTEXT_PATH)
edge_evidence = False
@@ -43,7 +44,7 @@ def main():
answer_list = []
for index, row in question_df.iterrows():
question = row["text"]
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY)
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)
output = llm_chain.run(context=context, question=question)
answer_list.append((row["text"], row["correct_node"], output))
answer_df = pd.DataFrame(answer_list, columns=["question", "correct_answer", "llm_answer"])

View File

@@ -0,0 +1,61 @@
'''
This script takes the MCQ style questions from the csv file and save the result as another csv file.
This script makes use of Llama model.
Before running this script, make sure to configure the filepaths in config.yaml file.
'''
from langchain import PromptTemplate, LLMChain
from kg_rag.utility import *
QUESTION_PATH = config_data["MCQ_PATH"]
SYSTEM_PROMPT = system_prompts["MCQ_QUESTION"]
CONTEXT_VOLUME = int(config_data["CONTEXT_VOLUME"])
QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD = float(config_data["QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD"])
QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY = float(config_data["QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY"])
VECTOR_DB_PATH = config_data["VECTOR_DB_PATH"]
NODE_CONTEXT_PATH = config_data["NODE_CONTEXT_PATH"]
SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL = config_data["SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL"]
SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL = config_data["SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL"]
SAVE_PATH = config_data["SAVE_RESULTS_PATH"]
MODEL_NAME = 'PharMolix/BioMedGPT-LM-7B'
BRANCH_NAME = 'main'
CACHE_DIR = config_data["LLM_CACHE_DIR"]
save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_kg_rag_based_mcq_from_monarch_and_robokop_response.csv"
INSTRUCTION = "Context:\n\n{context} \n\nQuestion: {question}"
vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)
embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)
node_context_df = pd.read_csv(NODE_CONTEXT_PATH)
edge_evidence = False
def main():
start_time = time.time()
llm = llama_model(MODEL_NAME, BRANCH_NAME, CACHE_DIR)
template = get_prompt(INSTRUCTION, SYSTEM_PROMPT)
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
question_df = pd.read_csv(QUESTION_PATH)
question_df = question_df.sample(50, random_state=40)
answer_list = []
for index, row in question_df.iterrows():
question = row["text"]
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)
output = llm_chain.run(context=context, question=question)
answer_list.append((row["text"], row["correct_node"], output))
answer_df = pd.DataFrame(answer_list, columns=["question", "correct_answer", "llm_answer"])
answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True)
print("Completed in {} min".format((time.time()-start_time)/60))
if __name__ == "__main__":
main()

View File

@@ -22,8 +22,9 @@ MODEL_NAME = config_data["LLAMA_MODEL_NAME"]
BRANCH_NAME = config_data["LLAMA_MODEL_BRANCH"]
CACHE_DIR = config_data["LLM_CACHE_DIR"]
CONTEXT_VOLUME = 100
edge_evidence = False
save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_one_hop_true_false_binary_response.csv"
save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_kg_rag_based_true_false_binary_response.csv"
INSTRUCTION = "Context:\n\n{context} \n\nQuestion: {question}"
@@ -43,7 +44,7 @@ def main():
answer_list = []
for index, row in question_df.iterrows():
question = row["text"]
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY)
context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)
output = llm_chain.run(context=context, question=question)
answer_list.append((row["text"], row["label"], output))
answer_df = pd.DataFrame(answer_list, columns=["question", "label", "llm_answer"])

View File

@@ -0,0 +1,153 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "945c420e-bb44-4ffb-b899-e049caf0d918",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.chdir('..')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f2bdefb3-3e59-409a-81b4-2e9ffbdfdb1a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import pandas as pd\n",
"from kg_rag.utility import *\n",
"from tqdm import tqdm\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "19fc98b9-64a8-40c0-9e5a-92b4392e6969",
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('data/dataset_for_entity_retrieval_accuracy_analysis.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "2851be4c-2a76-4f6d-b5f4-118e8122b155",
"metadata": {},
"outputs": [],
"source": [
"VECTOR_DB_PATH = config_data[\"VECTOR_DB_PATH\"]\n",
"SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL = config_data[\"SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL\"]\n",
"\n",
"vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "7255fbab-d8b4-43a3-b870-9d67ad79d061",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"322it [00:05, 56.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 4.74 s, sys: 896 ms, total: 5.64 s\n",
"Wall time: 5.73 s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%time\n",
"\n",
"correct_retrieval = 0\n",
"\n",
"for index, row in tqdm(data.iterrows()):\n",
" question = row['text']\n",
" entities = disease_entity_extractor_v2(question) \n",
" for entity in entities:\n",
" node_search_result = vectorstore.similarity_search_with_score(entity, k=1)\n",
" if node_search_result[0][0].page_content == row['node_hits']:\n",
" correct_retrieval += 1 \n",
" break\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2f997335-bff7-431c-bbd8-608513eddcc7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Retrieval accuracy is 99.7%\n"
]
}
],
"source": [
"retrieval_accuracy = 100*correct_retrieval/data.shape[0]\n",
"print(f'Retrieval accuracy is {round(retrieval_accuracy,1)}%')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "afe971ab-b8b9-4c88-9657-c588813b412f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -2,28 +2,31 @@
"cells": [
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 1,
"id": "3d3dca32-b77f-471d-b834-20ac795f9f17",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.chdir('..')"
"os.chdir('..')\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 2,
"id": "9da344d2-8e45-4574-aa19-4ad76c566101",
"metadata": {},
"outputs": [],
"source": [
"from kg_rag.utility import *\n"
"from IPython.display import clear_output\n",
"from kg_rag.utility import *\n",
"\n",
"clear_output()\n"
]
},
{
"cell_type": "code",
"execution_count": 79,
"execution_count": 3,
"id": "b44bf274-41d1-4153-a65e-bfb9b90ebcc6",
"metadata": {},
"outputs": [],
@@ -57,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 80,
"execution_count": 4,
"id": "33c0771d-e6be-406b-9b17-51f6377bcb6a",
"metadata": {},
"outputs": [],
@@ -77,7 +80,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "bbbdb428-6d01-43f2-9e58-b919e7a68736",
"metadata": {},
"outputs": [],
@@ -231,18 +234,18 @@
},
{
"cell_type": "code",
"execution_count": 113,
"execution_count": 13,
"id": "2a1c9337-fd39-45b0-b12a-6de9b5971b9e",
"metadata": {},
"outputs": [],
"source": [
"\n",
"question = 'Which drugs are contraindicated in obsessive compulsive disorder?'\n"
"question = 'Does drug dependence have any genetic factors? Do you have any statistical evidence from trustworthy sources for this?'\n"
]
},
{
"cell_type": "code",
"execution_count": 114,
"execution_count": 14,
"id": "e6852cb3-8bf9-408b-ab65-492b75c690ed",
"metadata": {},
"outputs": [
@@ -250,7 +253,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"The drugs that are contraindicated in obsessive-compulsive disorder include Orphenadrine, PHENYLPROPANOLAMINE POLISTIREX, CHLORPHENIRAMINE POLISTIREX, CODEINE POLISTIREX, Ibuprofen Lysine, Benzoic Acid, (R)-3-(1-Hydroxy-2-(methylamino)ethyl)phenol 2,3-dihydroxysuccinate, Dexibuprofen, Phenacetin, Pheniramine Maleate, Pyrilamine, Butalbital, Propoxyphene, Ibuprofen, Orphenadrine Citrate, Phenobarbital, Caffeine, Caffeine Citrate, 2-Amino-1-phenyl-1-propanol, Thiamine, Dihydrocodeine tartrate, Chlorpheniramine, (1S,2R)-1-benzyl-3-(dimethylamino)-2-methyl-1-phenylpropyl propanoate naphtalene-2-sulfonic acid, Phenylephrine, Ergotamine, Codeine, Riboflavin, and Pheniramine. The provenance of this information is DrugCentral.\n",
"Yes, drug dependence does have genetic factors. This is evidenced by the association of drug dependence with genes KAT2B and SLC25A16. The statistical evidence comes from Genome-Wide Association Studies (GWAS), with p-values of 4e-10 and 1e-09 respectively, indicating a statistically significant association.\n",
"\n"
]
}
@@ -264,7 +267,7 @@
},
{
"cell_type": "code",
"execution_count": 115,
"execution_count": 15,
"id": "b8079bc6-d309-4c88-9440-376aa43d972e",
"metadata": {},
"outputs": [
@@ -272,7 +275,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"There are no specific drugs that are universally contraindicated in obsessive-compulsive disorder (OCD). However, certain medications like benzodiazepines and atypical antipsychotics may potentially worsen OCD symptoms. The choice of medication always depends on the individual's overall health, the severity of their symptoms, and their response to treatment.\n",
"Yes, drug dependence does have genetic factors. According to the National Institute on Drug Abuse, genetics account for about 40-60% of a person's vulnerability to drug addiction.\n",
"\n"
]
}
@@ -286,7 +289,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "d44098cc-ca4c-4ffa-a5ca-e273a029cb67",
"id": "16e1ae6d-d0e0-4b42-a8fe-517033fb9960",
"metadata": {},
"outputs": [],
"source": []

2
pid_info.sh Normal file
View File

@@ -0,0 +1,2 @@
ps -eo pid,lstart,comm,etime | grep 20342
ps -eo pid,lstart,comm,etime | grep 9628

View File

@@ -30,30 +30,30 @@ KG_RAG_BASED_TEXT_GENERATION: |
TRUE_FALSE_QUESTION: |
You are an expert biomedical researcher. For answering the Question at the end, you need to first read the Context provided.
Based on that Context, provide your answer in the following JSON format:
{
{{
"answer": "True"
}
}}
OR
{
{{
"answer": "False"
}
}}
TRUE_FALSE_QUESTION_PROMPT_BASED: |
You are an expert biomedical researcher. Please provide your answer in the following JSON format for the Question asked:
{
{{
"answer": "True"
}
}}
OR
{
{{
"answer": "False"
}
}}
# MCQ Question
MCQ_QUESTION: |
You are an expert biomedical researcher. For answering the Question at the end, you need to first read the Context provided.
Based on that Context, provide your answer in the following JSON format for the Question asked.
{
{{
"answer": <correct answer>
}
}}
MCQ_QUESTION_PROMPT_BASED: |
You are an expert biomedical researcher. Please provide your answer in the following JSON format for the Question asked:
{{

View File

@@ -42,7 +42,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [03:24<00:00, 68.06s/it]\n",
"Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:19<00:00, 6.56s/it]\n",
"/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:362: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n",
" warnings.warn(\n",
"/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:367: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n",
@@ -53,8 +53,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 25.6 s, sys: 51.1 s, total: 1min 16s\n",
"Wall time: 3min 31s\n"
"CPU times: user 18.9 s, sys: 26.3 s, total: 45.3 s\n",
"Wall time: 26 s\n"
]
}
],
@@ -74,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"id": "0a28f1ce-5cc5-4a17-84d7-b0dda29815a5",
"metadata": {},
"outputs": [],
@@ -84,12 +84,12 @@
"Question:alpha-Mannosidosis associates Gene MAN2B1\n",
"Answer:\n",
"'''\n",
"text = ['Is it PNPLA3 or HLA-B that has a significant association with the disease liver benign neoplasm?']\n"
"text = [\"Out of the given list, which Gene is associated with psoriasis and Takayasu's arteritis. Given list is: SHTN1, HLA-B, SLC14A2, BTBD9, DTNB\"]\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"id": "f59eeb37-57dd-42ae-b9ff-f8442eb613a7",
"metadata": {},
"outputs": [
@@ -97,8 +97,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"A meta-analysis of 11,000 patients with hepatocellular carcinoma.\n",
"Hepatocellular carcinoma (HCC) is the most common primary liver cancer and the third leading cause of cancer-related deaths worldwide. D The incidence of\n"
"P1, TNFAIP3, TNIP1, TNIP3, TNFAIP2, TNFAIP6, TNFAIP7, TNFAIP8, TNFAIP9, TNFAIP10\n"
]
}
],

View File

@@ -0,0 +1,195 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b33a915d-cc1d-4102-a2ee-159c02e6c579",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.chdir('..')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "57c0a1b8-e339-4f6b-941e-7af7b902de7c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from langchain import PromptTemplate, LLMChain\n",
"from kg_rag.utility import *\n",
"from tqdm import tqdm\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2672548d-7d25-4f3c-94d1-d19206049076",
"metadata": {},
"outputs": [],
"source": [
"QUESTION_PATH = config_data[\"MCQ_PATH\"]\n",
"SYSTEM_PROMPT = system_prompts[\"MCQ_QUESTION\"]\n",
"CONTEXT_VOLUME = int(config_data[\"CONTEXT_VOLUME\"])\n",
"QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD = float(config_data[\"QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD\"])\n",
"QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY = float(config_data[\"QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY\"])\n",
"VECTOR_DB_PATH = config_data[\"VECTOR_DB_PATH\"]\n",
"NODE_CONTEXT_PATH = config_data[\"NODE_CONTEXT_PATH\"]\n",
"SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL = config_data[\"SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL\"]\n",
"SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL = config_data[\"SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL\"]\n",
"SAVE_PATH = config_data[\"SAVE_RESULTS_PATH\"]\n",
"\n",
"MODEL_NAME = 'PharMolix/BioMedGPT-LM-7B'\n",
"BRANCH_NAME = 'main'\n",
"CACHE_DIR = config_data[\"LLM_CACHE_DIR\"]\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c753b053-be44-4ddb-8d55-3bf434428954",
"metadata": {},
"outputs": [],
"source": [
"INSTRUCTION = \"Context:\\n\\n{context} \\n\\nQuestion: {question}\"\n",
"\n",
"vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)\n",
"embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)\n",
"node_context_df = pd.read_csv(NODE_CONTEXT_PATH)\n",
"edge_evidence = False\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f18c9efb-556c-4b37-8b00-e06a73a19f86",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:19<00:00, 6.66s/it]\n",
"/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:362: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n",
" warnings.warn(\n",
"/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:367: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n",
" warnings.warn(\n"
]
}
],
"source": [
"llm = llama_model(MODEL_NAME, BRANCH_NAME, CACHE_DIR) \n",
"template = get_prompt(INSTRUCTION, SYSTEM_PROMPT)\n",
"prompt = PromptTemplate(template=template, input_variables=[\"context\", \"question\"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0370d703-4e18-4c78-9e9a-2030b498253e",
"metadata": {},
"outputs": [],
"source": [
"llm_chain = LLMChain(prompt=prompt, llm=llm) \n",
"question_df = pd.read_csv(QUESTION_PATH) \n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "275f4171-3be7-46ca-bf16-18160ce72f3b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"Out of the given list, which Gene is associated with psoriasis and Takayasu's arteritis. Given list is: SHTN1, HLA-B, SLC14A2, BTBD9, DTNB\""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"question_df.iloc[0].text"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "cc5a65fb-6bd3-4948-84e5-f404af83d3f7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"0it [00:00, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (4135 > 2048). Running this sequence through the model will result in indexing errors\n",
"This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.\n",
"0it [04:19, ?it/s]\n",
"\n",
"KeyboardInterrupt\n",
"\n"
]
}
],
"source": [
"%%time\n",
"\n",
"answer_list = []\n",
"question_df = question_df.sample(50, random_state=40)\n",
"for index, row in tqdm(question_df.iterrows()):\n",
" question = row[\"text\"]\n",
" context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, edge_evidence)\n",
" output = llm_chain.run(context=context, question=question)\n",
" print(output)\n",
" input('press enter')\n",
" answer_list.append((row[\"text\"], row[\"correct_node\"], output))\n",
"answer_df = pd.DataFrame(answer_list, columns=[\"question\", \"correct_answer\", \"llm_answer\"])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94eb325d-17d4-4013-907d-7a38dabaea56",
"metadata": {},
"outputs": [],
"source": [
"answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True) \n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}