From 8d5fcb5713bc3dc05d5c0e16b90fe196e8327ccb Mon Sep 17 00:00:00 2001
From: "E.C. Wood" <wooderi@stanford.edu>
Date: Wed, 12 Jul 2023 13:25:18 -0700
Subject: [PATCH 001/117] #316 starting to piece together how UMLS is
 structured based on MySQL and umls2rdf code to scope out this issue

---
 misc-tools/mysql_table_to_md_table.sh |   5 +
 understanding_umls.md                 | 599 ++++++++++++++++++++++++++
 2 files changed, 604 insertions(+)
 create mode 100755 misc-tools/mysql_table_to_md_table.sh
 create mode 100644 understanding_umls.md

diff --git a/misc-tools/mysql_table_to_md_table.sh b/misc-tools/mysql_table_to_md_table.sh
new file mode 100755
index 00000000..673b3e4d
--- /dev/null
+++ b/misc-tools/mysql_table_to_md_table.sh
@@ -0,0 +1,5 @@
+sed -i -E "s/\+(-)+/\|--/g" umls_table.txt
+sed -i -E "s/^\|( )*//g" umls_table.txt
+sed -i -E "s/--\+$/--/g" umls_table.txt
+sed -i -E "s/( )+/ /g" umls_table.txt
+sed -i -E "s/<|>//g" umls_table.txt
diff --git a/understanding_umls.md b/understanding_umls.md
new file mode 100644
index 00000000..02baabd0
--- /dev/null
+++ b/understanding_umls.md
@@ -0,0 +1,599 @@
+## UMLS MySQL Walk Through
+
+# RegEx MySQL Table -> Markdown Table
+1. Replace: `\+(-)+` With: `\|--`
+2. Replace: `^\|( )*` With: Nothing
+3. Replace: `--\+$` With: `--`
+4. Replace: `( )+` With: ` `
+
+
+# Tables
+```
+mysql> show tables;
+```
+
+Tables_in_umls |
+--|
+AMBIGLUI |
+AMBIGSUI |
+DELETEDCUI |
+DELETEDLUI |
+DELETEDSUI |
+MERGEDCUI |
+MERGEDLUI |
+MRAUI |
+MRCOLS |
+MRCONSO |
+MRCUI |
+MRDEF |
+MRDOC |
+MRFILES |
+MRHIER |
+MRHIST |
+MRMAP |
+MRRANK |
+MRREL |
+MRSAB |
+MRSAT |
+MRSMAP |
+MRSTY |
+MRXNS_ENG |
+MRXNW_ENG |
+MRXW_BAQ |
+MRXW_CHI |
+MRXW_CZE |
+MRXW_DAN |
+MRXW_DUT |
+MRXW_ENG |
+MRXW_EST |
+MRXW_FIN |
+MRXW_FRE |
+MRXW_GER |
+MRXW_GRE |
+MRXW_HEB |
+MRXW_HUN |
+MRXW_ITA |
+MRXW_JPN |
+MRXW_KOR |
+MRXW_LAV |
+MRXW_NOR |
+MRXW_POL |
+MRXW_POR |
+MRXW_RUS |
+MRXW_SCR |
+MRXW_SPA |
+MRXW_SWE |
+MRXW_TUR |
+
+```
+mysql> select * from MRCUI limit 10;
+```
+CUI1 | VER | REL | RELA | MAPREASON | CUI2 | MAPIN |
+--|--|--|--|--|--|--
+C0000002 | 2000AC | SY | NULL | NULL | C0007404 | Y |
+C0000003 | 1999AA | SY | NULL | NULL | C0010504 | Y |
+C0000024 | 1993AA | SY | NULL | NULL | C0043791 | Y |
+C0000105 | 1995AA | SY | NULL | NULL | C0001964 | Y |
+C0000136 | 1993AA | DEL | NULL | NULL | NULL | NULL |
+C0000140 | 1993AA | DEL | NULL | NULL | NULL | NULL |
+C0000158 | 1993AA | DEL | NULL | NULL | NULL | NULL |
+C0000164 | 2003AB | RO | NULL | NULL | C0000163 | Y |
+C0000177 | 1993AA | SY | NULL | NULL | C0014924 | Y |
+C0000219 | 1993AA | DEL | NULL | NULL | NULL | NULL |
+
+```
+mysql> select * from MRCOLS;
+```
+
+COL | DES | REF | MIN | AV | MAX | FIL | DTY |
+--|--|--|--|--|--|--|--
+ATNL | Attribute name list for a source. | NULL | 0 | 69.84 | 1178 | MRSAB.RRF | varchar(4000) |
+ATN | Attribute name | NULL | 2 | 10.38 | 62 | MRSAT.RRF | varchar(100) |
+ATUI | Unique identifier for attribute. | NULL | 10 | 10.64 | 11 | MRSTY.RRF | varchar(11) |
+ATUI | Unique identifier for attribute. | NULL | 10 | 10.85 | 11 | MRSAT.RRF | varchar(11) |
+ATUI | Unique identifier for attribute. | NULL | 10 | 10.86 | 11 | MRDEF.RRF | varchar(11) |
+ATV | Attribute value | NULL | 1 | 12.69 | 35985 | MRSAT.RRF | varchar(65000) |
+AUI1 | Unique identifier for first atom | NULL | 0 | 8.52 | 9 | MRREL.RRF | varchar(9) |
+AUI1 | Unique identifier for first atom | NULL | 8 | 8.54 | 9 | MRAUI.RRF | varchar(9) |
+AUI2 | Unique identifier for second atom | NULL | 0 | 8.52 | 9 | MRREL.RRF | varchar(9) |
+AUI2 | Unique identifier for second atom | NULL | 8 | 8.54 | 9 | MRAUI.RRF | varchar(9) |
+AUI | Unique identifier for atom | NULL | 8 | 8.58 | 9 | MRHIER.RRF | varchar(9) |
+AUI | Unique identifier for atom | NULL | 8 | 8.74 | 9 | MRDEF.RRF | varchar(9) |
+AUI | Unique identifier for atom | NULL | 8 | 8.77 | 9 | MRCONSO.RRF | varchar(9) |
+AV | Average Length, Characters | NULL | 4 | 4.12 | 6 | MRCOLS.RRF | numeric(5,2) |
+BTS | Size in Bytes | NULL | 1 | 7.19 | 10 | MRFILES.RRF | integer |
+CENC | Character encoding of a source as specified by IANA | NULL | 5 | 5.00 | 5 | MRSAB.RRF | varchar(20) |
+CFR | CUI frequency for a source | NULL | 1 | 4.18 | 6 | MRSAB.RRF | integer |
+CHANGEKEY | CONCEPTSTATUS (if history relates to a SNOMED CT concept) or DESCRIPTIONSTATUS (if history relates to a SNOMED CT atom or "description") | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
+CHANGETYPE | Source asserted code for type of change | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
+CHANGEVAL | SNOMED CT CONCEPTSTATUS or DESCRIPTIONSTATUS value after the change took place | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
+CLS | Number of columns | NULL | 1 | 1.12 | 2 | MRFILES.RRF | integer |
+CODE | Unique Identifier or code for string in source | NULL | 0 | 4.46 | 56 | MRSAT.RRF | varchar(100) |
+CODE | Unique Identifier or code for string in source | NULL | 1 | 7.50 | 95 | MRCONSO.RRF | varchar(100) |
+COL | Column or data element name | NULL | 2 | 3.71 | 11 | MRCOLS.RRF | varchar(20) |
+CUI1 | Unique identifier for first concept | NULL | 8 | 8.00 | 8 | MRAUI.RRF | char(8) |
+CUI1 | Unique identifier for first concept | NULL | 8 | 8.00 | 8 | MRCUI.RRF | char(8) |
+CUI1 | Unique identifier for first concept | NULL | 8 | 8.00 | 8 | MRREL.RRF | char(8) |
+CUI2 | Unique identifier for second concept | NULL | 0 | 3.33 | 8 | MRCUI.RRF | char(8) |
+CUI2 | Unique identifier for second concept | NULL | 8 | 8.00 | 8 | MRAUI.RRF | char(8) |
+CUI2 | Unique identifier for second concept | NULL | 8 | 8.00 | 8 | MRREL.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 0 | 0.00 | 0 | MRHIST.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | AMBIGLUI.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | AMBIGSUI.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | CHANGE/MERGEDCUI.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRCONSO.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRDEF.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRHIER.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRSAT.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRSTY.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXNS_ENG.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXNW_ENG.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_ARA.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_BAQ.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_CHI.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_CZE.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_DAN.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_DUT.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_ENG.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_EST.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_FIN.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_FRE.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_GER.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_GRE.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_HEB.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_HUN.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_ITA.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_JPN.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_KOR.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_LAV.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_NOR.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_POL.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_POR.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_RUS.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_SCR.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_SPA.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_SWE.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_TUR.RRF | char(8) |
+CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_UKR.RRF | char(8) |
+CURVER | Current Version flag | NULL | 1 | 1.00 | 1 | MRSAB.RRF | char(1) |
+CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRDEF.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRHIER.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRREL.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRSAT.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRSMAP.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 1.22 | 5 | MRCONSO.RRF | varchar(50) |
+CVF | Content view flag | NULL | 0 | 2.13 | 5 | MRSTY.RRF | varchar(50) |
+CXN | The context number if the atom has multiple contexts | NULL | 1 | 2.17 | 5 | MRHIER.RRF | integer |
+CXTY | Context type for a source | NULL | 0 | 5.14 | 13 | MRSAB.RRF | varchar(50) |
+DEF | Definition | NULL | 1 | 232.23 | 10939 | MRDEF.RRF | varchar(16000) |
+DES | Descriptive Name | NULL | 5 | 28.81 | 136 | MRCOLS.RRF | varchar(200) |
+DES | Descriptive Name | NULL | 8 | 18.25 | 42 | MRFILES.RRF | varchar(200) |
+DIR | Source asserted directionality flag | NULL | 0 | 0.13 | 1 | MRREL.RRF | varchar(1) |
+DOCKEY | Key to be documented | NULL | 2 | 3.65 | 8 | MRDOC.RRF | varchar(50) |
+DTY | SQL-92 data type for this column | NULL | 7 | 10.02 | 14 | MRCOLS.RRF | varchar(20) |
+EXPL | Detailed explanation | NULL | 0 | 26.57 | 941 | MRDOC.RRF | varchar(1000) |
+FIL | Physical FILENAME | NULL | 9 | 10.99 | 21 | MRCOLS.RRF | varchar(50) |
+FIL | Physical FILENAME | NULL | 9 | 12.12 | 21 | MRFILES.RRF | varchar(50) |
+FMT | Comma separated list of COL | NULL | 7 | 29.69 | 190 | MRFILES.RRF | varchar(300) |
+FROMEXPR | The expression that a mapping is mapped from | NULL | 1 | 6.93 | 9 | MRSMAP.RRF | varchar(4000) |
+FROMEXPR | The expression that a mapping is mapped from | NULL | 1 | 8.29 | 18 | MRMAP.RRF | varchar(4000) |
+FROMID | Metathesaurus identifier for the entity being mapped from | NULL | 1 | 7.31 | 18 | MRMAP.RRF | varchar(50) |
+FROMRES | Restriction applicable to the entity being mapped from | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
+FROMRULE | Machine processible rule applicable to the entity being mapped from | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
+FROMSID | Source asserted identifier for the entity being mapped from | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(50) |
+FROMTYPE | The type of expression that a mapping is mapped from | NULL | 3 | 3.98 | 4 | MRSMAP.RRF | varchar(50) |
+FROMTYPE | The type of expression that a mapping is mapped from | NULL | 3 | 3.99 | 4 | MRMAP.RRF | varchar(50) |
+HCD | Source asserted hierarchical number or code of context member (if it exists) | NULL | 0 | 0.48 | 51 | MRHIER.RRF | varchar(100) |
+IMETA | Version of the Metathesaurus that a source was added | NULL | 6 | 6.00 | 6 | MRSAB.RRF | varchar(10) |
+ISPREF | Indicates whether AUI is preferred | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | char(1) |
+LAT | Language of Term(s) | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDSUI.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 0 | 2.97 | 3 | MRSAB.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRCONSO.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXNS_ENG.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXNW_ENG.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_ARA.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_BAQ.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_CHI.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_CZE.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_DAN.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_DUT.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_ENG.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_EST.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_FIN.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_FRE.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_GER.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_GRE.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_HEB.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_HUN.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_ITA.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_JPN.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_KOR.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_LAV.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_NOR.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_POL.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_POR.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_RUS.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_SCR.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_SPA.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_SWE.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_TUR.RRF | char(3) |
+LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_UKR.RRF | char(3) |
+LUI | Unique identifier for term | NULL | 0 | 0.00 | 0 | CHANGE/MERGEDLUI.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 0 | 4.50 | 9 | MRSAT.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_BAQ.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_DAN.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_FIN.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_HEB.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.17 | 9 | MRXW_SCR.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.20 | 9 | MRXW_JPN.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.22 | 9 | AMBIGLUI.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.43 | 9 | MRXW_ENG.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.44 | 9 | MRCONSO.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.44 | 9 | MRXNS_ENG.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.44 | 9 | MRXNW_ENG.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.48 | 9 | MRXW_CZE.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.52 | 9 | MRXW_DUT.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.58 | 9 | MRXW_GER.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.67 | 9 | MRXW_SPA.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.77 | 9 | MRXW_POR.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.77 | 9 | MRXW_RUS.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.78 | 9 | MRXW_ITA.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.78 | 9 | MRXW_POL.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.80 | 9 | MRXW_FRE.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.82 | 9 | MRXW_SWE.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.90 | 9 | MRXW_KOR.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.98 | 9 | MRXW_NOR.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.99 | 9 | MRXW_HUN.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 8 | 8.99 | 9 | MRXW_LAV.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_ARA.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_CHI.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_EST.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_GRE.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_TUR.RRF | varchar(10) |
+LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_UKR.RRF | varchar(10) |
+MAPATN | Mapping attribute name (for future use) | NULL | 0 | 2.82 | 6 | MRMAP.RRF | varchar(20) |
+MAPATV | Mapping attribute value (for future use) | NULL | 0 | 0.00 | 1 | MRMAP.RRF | varchar(4000) |
+MAPID | Metathesaurus asserted identifier for mapping | NULL | 10 | 10.98 | 11 | MRSMAP.RRF | varchar(50) |
+MAPID | Metathesaurus asserted identifier for mapping | NULL | 10 | 10.99 | 11 | MRMAP.RRF | varchar(50) |
+MAPIN | Mapping in current subset | NULL | 0 | 0.42 | 1 | MRCUI.RRF | char(1) |
+MAPIN | Mapping in current subset | NULL | 1 | 1.00 | 1 | MRAUI.RRF | char(1) |
+MAPRANK | Order in which mappings in a subset should be applied | NULL | 0 | 0.49 | 2 | MRMAP.RRF | integer |
+MAPREASON | Reason for mapping | NULL | 0 | 0.00 | 4 | MRCUI.RRF | varchar(4000) |
+MAPREASON | Reason for mapping | NULL | 4 | 4.00 | 4 | MRAUI.RRF | varchar(4000) |
+MAPRES | Restriction applicable to this mapping | NULL | 0 | 34.78 | 429 | MRMAP.RRF | varchar(4000) |
+MAPRULE | Machine processible rule applicable to this mapping | NULL | 0 | 9.57 | 336 | MRMAP.RRF | varchar(4000) |
+MAPSETCUI | CUI of the map set | NULL | 8 | 8.00 | 8 | MRMAP.RRF | char(8) |
+MAPSETCUI | CUI of the map set | NULL | 8 | 8.00 | 8 | MRSMAP.RRF | char(8) |
+MAPSETSAB | SAB of the map set | NULL | 3 | 10.60 | 13 | MRSMAP.RRF | varchar(40) |
+MAPSETSAB | SAB of the map set | NULL | 3 | 10.71 | 13 | MRMAP.RRF | varchar(40) |
+MAPSID | Source asserted identifier for mapping | NULL | 0 | 0.00 | 0 | MRSMAP.RRF | varchar(50) |
+MAPSID | Source asserted identifier for mapping | NULL | 0 | 0.01 | 36 | MRMAP.RRF | varchar(50) |
+MAPSUBSETID | Map subset identifier used to identify a subset of related mappings within a map set | NULL | 0 | 0.49 | 1 | MRMAP.RRF | varchar(10) |
+MAPTYPE | Type of mapping | NULL | 0 | 4.26 | 9 | MRMAP.RRF | varchar(50) |
+MAX | Maximum Length | NULL | 1 | 1.37 | 5 | MRCOLS.RRF | integer |
+METAUI | Metathesaurus asserted unique identifier | NULL | 0 | 7.85 | 10 | MRSAT.RRF | varchar(50) |
+MIN | Minimum Length | NULL | 1 | 1.02 | 2 | MRCOLS.RRF | integer |
+NSTR | Normalized string | NULL | 1 | 38.86 | 2460 | MRXNS_ENG.RRF | varchar(3000) |
+NWD | Normalized word | NULL | 1 | 6.55 | 80 | MRXNW_ENG.RRF | varchar(100) |
+PAUI | Unique identifier for parent atom | NULL | 0 | 8.46 | 9 | MRHIER.RRF | varchar(9) |
+PCUI | Concept unique identifier in the previous Metathesaurus | NULL | 8 | 8.00 | 8 | CHANGE/DELETEDCUI.RRF | char(8) |
+PCUI | Concept unique identifier in the previous Metathesaurus | NULL | 8 | 8.00 | 8 | CHANGE/MERGEDCUI.RRF | char(8) |
+PLUI | Lexical unique identifier in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDLUI.RRF | varchar(10) |
+PLUI | Lexical unique identifier in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/MERGEDLUI.RRF | varchar(10) |
+PSTR | Preferred name in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDLUI.RRF | varchar(3000) |
+PSTR | Preferred name in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDSUI.RRF | varchar(3000) |
+PSTR | Preferred name in the previous Metathesaurus | NULL | 4 | 4.00 | 4 | CHANGE/DELETEDCUI.RRF | varchar(3000) |
+PSUI | String unique identifier in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDSUI.RRF | varchar(10) |
+PTR | Path to root | NULL | 0 | 103.81 | 345 | MRHIER.RRF | varchar(1000) |
+RANK | Termgroup ranking | NULL | 4 | 4.00 | 4 | MRRANK.RRF | integer |
+RCUI | Unique identifier for root SRC concept | NULL | 8 | 8.00 | 8 | MRSAB.RRF | char(8) |
+REASON | Explanation of change, if present | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
+REF | Documentation Section Number | NULL | 0 | 0.00 | 0 | MRCOLS.RRF | varchar(20) |
+RELA | Additional relationship label | NULL | 0 | 0.00 | 0 | MRAUI.RRF | varchar(100) |
+RELA | Additional relationship label | NULL | 0 | 0.00 | 0 | MRCUI.RRF | varchar(100) |
+RELA | Additional relationship label | NULL | 0 | 10.69 | 54 | MRREL.RRF | varchar(100) |
+RELA | Additional relationship label | NULL | 0 | 14.07 | 37 | MRMAP.RRF | varchar(100) |
+RELA | Additional relationship label | NULL | 0 | 19.91 | 37 | MRSMAP.RRF | varchar(100) |
+RELA | Additional relationship label | NULL | 0 | 2.71 | 12 | MRHIER.RRF | varchar(100) |
+REL | Relationship label | NULL | 0 | 0.00 | 0 | MRAUI.RRF | varchar(4) |
+REL | Relationship label | NULL | 2 | 2.00 | 2 | MRMAP.RRF | varchar(4) |
+REL | Relationship label | NULL | 2 | 2.00 | 2 | MRSMAP.RRF | varchar(4) |
+REL | Relationship label | NULL | 2 | 2.24 | 3 | MRREL.RRF | varchar(4) |
+REL | Relationship label | NULL | 2 | 2.65 | 4 | MRCUI.RRF | varchar(4) |
+RG | Relationship group | NULL | 0 | 0.06 | 2 | MRREL.RRF | varchar(10) |
+RMETA | Version of the Metathesaurus where a version is removed | NULL | 0 | 0.09 | 6 | MRSAB.RRF | varchar(10) |
+RSAB | Root source abbreviation | NULL | 2 | 5.94 | 15 | MRSAB.RRF | varchar(40) |
+RUI | Unique identifier for relationship | NULL | 9 | 9.82 | 10 | MRREL.RRF | varchar(10) |
+RWS | Number of rows | NULL | 1 | 5.56 | 8 | MRFILES.RRF | integer |
+SABIN | Source in current subset | NULL | 1 | 1.00 | 1 | MRSAB.RRF | char(1) |
+SAB | Source abbreviation | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(40) |
+SAB | Source abbreviation | NULL | 2 | 4.12 | 11 | MRDEF.RRF | varchar(40) |
+SAB | Source abbreviation | NULL | 2 | 5.31 | 15 | MRRANK.RRF | varchar(40) |
+SAB | Source abbreviation | NULL | 2 | 5.48 | 15 | MRREL.RRF | varchar(40) |
+SAB | Source abbreviation | NULL | 2 | 5.70 | 15 | MRCONSO.RRF | varchar(40) |
+SAB | Source abbreviation | NULL | 2 | 5.75 | 13 | MRSAT.RRF | varchar(40) |
+SAB | Source abbreviation | NULL | 2 | 7.90 | 13 | MRHIER.RRF | varchar(40) |
+SATUI | Source asserted attribute identifier | NULL | 0 | 0.47 | 16 | MRDEF.RRF | varchar(50) |
+SATUI | Source asserted attribute identifier | NULL | 0 | 3.24 | 36 | MRSAT.RRF | varchar(50) |
+SAUI | Source asserted atom identifier | NULL | 0 | 1.73 | 18 | MRCONSO.RRF | varchar(100) |
+SCC | Content contact info for a source | NULL | 0 | 152.05 | 332 | MRSAB.RRF | varchar(1000) |
+SCIT | Source citation | NULL | 54 | 164.09 | 674 | MRSAB.RRF | varchar(4000) |
+SCUI | Source asserted concept identifier | NULL | 0 | 5.28 | 95 | MRCONSO.RRF | varchar(100) |
+SDUI | Source asserted descriptor identifier | NULL | 0 | 2.73 | 13 | MRCONSO.RRF | varchar(100) |
+SF | Source Family | NULL | 2 | 4.20 | 13 | MRSAB.RRF | varchar(40) |
+SLC | License contact info for a source | NULL | 12 | 167.35 | 346 | MRSAB.RRF | varchar(1000) |
+SL | Source of relationship labels | NULL | 2 | 5.48 | 15 | MRREL.RRF | varchar(40) |
+SON | Source Official Name | NULL | 10 | 48.65 | 145 | MRSAB.RRF | varchar(3000) |
+SOURCEUI | Source asserted unique identifier | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(50) |
+SRL | Source Restriction Level | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | integer |
+SRL | Source Restriction Level | NULL | 1 | 1.00 | 1 | MRSAB.RRF | integer |
+SRUI | Source attributed relationship identifier | NULL | 0 | 1.20 | 36 | MRREL.RRF | varchar(50) |
+SSN | Source short name | NULL | 3 | 26.96 | 89 | MRSAB.RRF | varchar(3000) |
+STN | Semantic type tree number | NULL | 1 | 7.85 | 14 | MRSTY.RRF | varchar(100) |
+STR | String | NULL | 1 | 38.20 | 2930 | MRCONSO.RRF | varchar(3000) |
+STT | String type | NULL | 2 | 2.01 | 3 | MRCONSO.RRF | varchar(3) |
+STYPE1 | The name of the column in MRCONSO.RRF that contains the first identifier to which the relationship is attached | NULL | 3 | 3.62 | 4 | MRREL.RRF | varchar(50) |
+STYPE2 | The name of the column in MRCONSO.RRF that contains the second identifier to which the relationship is attached | NULL | 3 | 3.62 | 4 | MRREL.RRF | varchar(50) |
+STYPE | The name of the column in MRCONSO.RRF or MRREL.RRF that contains the identifier to which the attribute is attached | NULL | 3 | 3.25 | 4 | MRSAT.RRF | varchar(50) |
+STY | Semantic type | NULL | 4 | 17.65 | 39 | MRSTY.RRF | varchar(50) |
+SUI | Unique identifier for string | NULL | 0 | 4.57 | 9 | MRSAT.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_BAQ.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_DAN.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_FIN.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_HEB.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.35 | 9 | AMBIGSUI.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.35 | 9 | MRXW_JPN.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.53 | 9 | MRXW_DUT.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.58 | 9 | MRCONSO.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.61 | 9 | MRXW_GER.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.64 | 9 | MRXNS_ENG.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.67 | 9 | MRXNW_ENG.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.67 | 9 | MRXW_ENG.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.71 | 9 | MRXW_SPA.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.79 | 9 | MRXW_POR.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.79 | 9 | MRXW_RUS.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.82 | 9 | MRXW_ITA.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.84 | 9 | MRXW_SWE.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.85 | 9 | MRXW_CZE.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.85 | 9 | MRXW_FRE.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.98 | 9 | MRXW_NOR.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 8 | 8.99 | 9 | MRXW_HUN.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_ARA.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_CHI.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_EST.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_GRE.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_KOR.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_LAV.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_POL.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_SCR.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_TUR.RRF | varchar(10) |
+SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_UKR.RRF | varchar(10) |
+SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | char(1) |
+SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRDEF.RRF | char(1) |
+SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRRANK.RRF | char(1) |
+SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRREL.RRF | char(1) |
+SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRSAT.RRF | char(1) |
+SVER | Release date or version number of a source | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(20) |
+SVER | Release date or version number of a source | NULL | 0 | 5.08 | 15 | MRSAB.RRF | varchar(20) |
+TFR | Term frequency for a source | NULL | 1 | 4.41 | 7 | MRSAB.RRF | integer |
+TOEXPR | The expression that a mapping is mapped to | NULL | 0 | 6.03 | 242 | MRMAP.RRF | varchar(4000) |
+TOEXPR | The expression that a mapping is mapped to | NULL | 1 | 6.92 | 242 | MRSMAP.RRF | varchar(4000) |
+TOID | Metathesaurus identifier for the entity being mapped to | NULL | 0 | 5.18 | 18 | MRMAP.RRF | varchar(50) |
+TORES | Restriction applicable to the entity being mapped to | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
+TORULE | Machine processible rule applicable to the entity being mapped to | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
+TOSID | Source asserted identifier for the entity being mapped to | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(50) |
+TOTYPE | The type of expression that a mapping is mapped to | NULL | 0 | 3.98 | 23 | MRMAP.RRF | varchar(50) |
+TOTYPE | The type of expression that a mapping is mapped to | NULL | 4 | 4.36 | 22 | MRSMAP.RRF | varchar(50) |
+TS | Term status | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | char(1) |
+TTYL | Term type list for a source | NULL | 0 | 11.76 | 86 | MRSAB.RRF | varchar(400) |
+TTY | Term type in source | NULL | 2 | 2.35 | 11 | MRCONSO.RRF | varchar(20) |
+TTY | Term type in source | NULL | 2 | 2.58 | 11 | MRRANK.RRF | varchar(20) |
+TUI | Unique identifier of Semantic type | NULL | 4 | 4.00 | 4 | MRSTY.RRF | char(4) |
+TYPE | Type of information | NULL | 3 | 13.14 | 21 | MRDOC.RRF | varchar(50) |
+VALUE | Value | NULL | 0 | 15.98 | 62 | MRDOC.RRF | varchar(200) |
+VCUI | Unique identifier for versioned SRC concept | NULL | 0 | 7.71 | 8 | MRSAB.RRF | char(8) |
+VEND | Valid end date for a source | NULL | 0 | 0.00 | 0 | MRSAB.RRF | char(8) |
+VER | Last release version in which CUI1 was valid | NULL | 6 | 6.00 | 6 | MRAUI.RRF | varchar(10) |
+VER | Last release version in which CUI1 was valid | NULL | 6 | 6.00 | 6 | MRCUI.RRF | varchar(10) |
+VSAB | Versioned source abbreviation | NULL | 3 | 11.35 | 24 | MRSAB.RRF | varchar(40) |
+VSTART | Valid start date for a source | NULL | 0 | 0.00 | 0 | MRSAB.RRF | char(8) |
+WD | Word in lower-case | NULL | 1 | 10.53 | 54 | MRXW_FIN.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 2.90 | 38 | MRXW_KOR.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 3.65 | 68 | MRXW_CHI.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 4.58 | 35 | MRXW_EST.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 5.23 | 37 | MRXW_TUR.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 5.47 | 22 | MRXW_ARA.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 5.71 | 38 | MRXW_POR.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 5.91 | 38 | MRXW_ITA.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 6.12 | 19 | MRXW_HEB.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 6.13 | 24 | MRXW_UKR.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 6.23 | 80 | MRXW_ENG.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 6.38 | 25 | MRXW_DAN.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 6.67 | 46 | MRXW_SPA.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 6.83 | 39 | MRXW_FRE.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.14 | 40 | MRXW_RUS.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.17 | 18 | MRXW_BAQ.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.50 | 34 | MRXW_GRE.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.55 | 48 | MRXW_POL.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.57 | 52 | MRXW_CZE.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.89 | 51 | MRXW_DUT.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.97 | 27 | MRXW_HUN.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 7.98 | 29 | MRXW_LAV.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 8.02 | 37 | MRXW_SCR.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 8.37 | 41 | MRXW_GER.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 8.61 | 39 | MRXW_SWE.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 8.91 | 85 | MRXW_JPN.RRF | varchar(500) |
+WD | Word in lower-case | NULL | 1 | 9.11 | 44 | MRXW_NOR.RRF | varchar(500) |
+
+```
+mysql> select * from MRREL limit 10;
+```
+CUI1 | AUI1 | STYPE1 | REL | CUI2 | AUI2 | STYPE2 | RELA | RUI | SRUI | SAB | SL | RG | DIR | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--
+C0236642 | A0001895 | AUI | RB | C0270715 | A1389616 | AUI | NULL | R00689636 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0003787 | A0002112 | AUI | RB | C0037728 | A1397168 | AUI | NULL | R00689637 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0018090 | A0002644 | AUI | RB | C0032636 | A0103514 | AUI | NULL | R00689638 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0039194 | A0003844 | AUI | RB | C0024264 | A0483067 | AUI | NULL | R00689639 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0004561 | A0003849 | AUI | RB | C0024264 | A0483067 | AUI | NULL | R00689640 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0022801 | A0006210 | AUI | RB | C0035287 | A0488404 | AUI | NULL | R00689641 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0022801 | A0006210 | AUI | RB | C0227525 | A1182577 | AUI | NULL | R00689642 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0022801 | A0006210 | AUI | RB | C0449475 | A1182637 | AUI | NULL | R00689643 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0034143 | A0006342 | AUI | RB | C0682702 | A1389183 | AUI | NULL | R00689644 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+C0221406 | A0009638 | AUI | RB | C0020635 | A1393940 | AUI | NULL | R00689645 | NULL | AOD | AOD | NULL | NULL | N | NULL |
+
+```
+mysql> select * from MRDOC limit 10;
+```
+DOCKEY | VALUE | TYPE | EXPL |
+--|--|--|--
+ATN | AAL_TERM | expanded_form | AAL term |
+ATN | ACCEPTABILITYID | expanded_form | Acceptability Id |
+ATN | ACCEPTED_THERAPEUTIC_USE_FOR | expanded_form | Accepted therapeutic use for |
+ATN | ACTIVE | expanded_form | Active |
+ATN | ADDED_MEANING | expanded_form | Additional descriptive information |
+ATN | ADDITIONAL_GUIDELINE | expanded_form | Additional explanatory text that is applicable to a concept (code/heading/subheading). |
+ATN | ADDON_CODE | expanded_form | A "T" in this field indicates that it is an "Add-on" code, i.e. it is commonly carried out in addition to the primary procedure performed |
+ATN | AGR | expanded_form | Alliance of Genome Resources |
+ATN | AMBIGUITY_FLAG | expanded_form | Source atom ambiguity flag |
+ATN | AMT | expanded_form | AOT uses MeSH term |
+
+
+```
+mysql> select * from MRSMAP limit 10;
+```
+MAPSETCUI | MAPSETSAB | MAPID | MAPSID | FROMEXPR | FROMTYPE | REL | RELA | TOEXPR | TOTYPE | CVF |
+--|--|--|--|--|--|--|--|--|--|--
+C1306694 | MTH | AT102971857 | NULL | C0264643 | CUI | SY | NULL | Hypertension, Renovascular AND Hypertension, Malignant | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT102971858 | NULL | C0276253 | CUI | SY | NULL | Pneumonia AND Cytomegalovirus Infections | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT102971859 | NULL | C0409780 | CUI | SY | NULL | Synovitis AND Hand | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT102971861 | NULL | C1706094 | CUI | SY | NULL | Adhesives AND Denture Retention | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT102971862 | NULL | C1706094 | CUI | SY | NULL | Dental Cements AND Orthodontics | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT102971863 | NULL | C0180739 | CUI | RN | NULL | Enteral Nutrition/instrumentation | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT102971864 | NULL | C1533661 | CUI | SY | NULL | Arthroscopy AND Wrist Joint | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT110677869 | NULL | C1962918 | CUI | RN | NULL | Wheelchairs AND Equipment and Supplies | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT110677871 | NULL | C1855348 | CUI | RU | NULL | Glomerulonephritis | BOOLEAN_EXPRESSION_STR | NULL |
+C1306694 | MTH | AT110677872 | NULL | C1855348 | CUI | RU | NULL | Marfan Syndrome | BOOLEAN_EXPRESSION_STR | NULL |
+
+```
+mysql> select * from MRSTY limit 10;
+```
+CUI | TUI | STN | STY | ATUI | CVF |
+--|--|--|--|--|--
+C0541479 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863944 | NULL |
+C0541480 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863945 | NULL |
+C0541481 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863946 | NULL |
+C0070474 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863947 | 256 |
+C0541516 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863948 | NULL |
+C0678461 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863949 | NULL |
+C0678462 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863950 | 256 |
+C0678518 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863951 | 256 |
+C0678519 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863952 | 256 |
+C0678520 | T104 | A1.4.1.2 | Chemical Viewed Structurally | AT07863953 | 256 |
+
+```
+mysql> select * from MRAUI limit 10;
+```
+AUI1 | CUI1 | VER | REL | RELA | MAPREASON | AUI2 | CUI2 | MAPIN |
+--|--|--|--|--|--|--|--|--
+A0000039 | C1411876 | 2022AA | NULL | NULL | move | A0000039 | C0869474 | Y |
+A0000049 | C0003910 | 2005AB | NULL | NULL | move | A0000049 | C0236828 | Y |
+A0000080 | C0003477 | 2011AA | NULL | NULL | move | A0000080 | C1527281 | Y |
+A0000087 | C0596170 | 2008AB | NULL | NULL | move | A0000087 | C2267227 | Y |
+A0000088 | C0596170 | 2008AB | NULL | NULL | move | A0000088 | C2267227 | Y |
+A0000090 | C0596170 | 2008AB | NULL | NULL | move | A0000090 | C2267227 | Y |
+A0000091 | C0596170 | 2008AB | NULL | NULL | move | A0000091 | C2267227 | Y |
+A0000092 | C0596170 | 2008AB | NULL | NULL | move | A0000092 | C2267227 | Y |
+A0000230 | C0029220 | 2007AA | NULL | NULL | move | A0000230 | C0236748 | Y |
+A0000231 | C0029220 | 2007AA | NULL | NULL | move | A0000231 | C0236748 | Y |
+
+```
+mysql> select * from MRCONSO limit 10;
+```
+CUI | LAT | TS | LUI | STT | SUI | ISPREF | AUI | SAUI | SCUI | SDUI | SAB | TTY | CODE | STR | SRL | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--
+C0026106 | ENG | S | L0026106 | PF | S0000001 | N | A0000002 | NULL | NULL | NULL | ICD10 | HT | F70 | Mild mental retardation | 3 | N | 256 |
+C0026106 | ENG | S | L0026106 | PF | S0000001 | N | A0000003 | NULL | NULL | NULL | ICD10AM | HT | F70 | Mild mental retardation | 3 | N | 256 |
+C0026351 | ENG | S | L0026351 | PF | S0000002 | N | A0000008 | NULL | NULL | NULL | ICD10 | HT | F71 | Moderate mental retardation | 3 | N | NULL |
+C0026351 | ENG | S | L0026351 | PF | S0000002 | N | A0000009 | NULL | NULL | NULL | ICD10AM | HT | F71 | Moderate mental retardation | 3 | N | NULL |
+C0036857 | ENG | S | L0036857 | PF | S0000003 | N | A0000014 | NULL | NULL | NULL | ICD10 | HT | F72 | Severe mental retardation | 3 | N | 256 |
+C0036857 | ENG | S | L0036857 | PF | S0000003 | N | A0000015 | NULL | NULL | NULL | ICD10AM | HT | F72 | Severe mental retardation | 3 | N | 256 |
+C0020796 | ENG | S | L0033296 | PF | S0000004 | N | A0000020 | NULL | NULL | NULL | ICD10 | HT | F73 | Profound mental retardation | 3 | N | 256 |
+C0020796 | ENG | S | L0033296 | PF | S0000004 | N | A0000021 | NULL | NULL | NULL | ICD10AM | HT | F73 | Profound mental retardation | 3 | N | 256 |
+C0025362 | ENG | S | L0080273 | PF | S0000005 | N | A0000026 | NULL | NULL | NULL | ICD10 | HT | F79 | Unspecified mental retardation | 3 | N | 256 |
+C0025362 | ENG | S | L0080273 | PF | S0000005 | N | A0000027 | NULL | NULL | NULL | ICD10AM | HT | F79 | Unspecified mental retardation | 3 | N | 256 |
+
+```
+mysql> select * from MRFILES;
+```
+FIL | DES | FMT | CLS | RWS | BTS |
+--|--|--|--|--|--
+AMBIGLUI.RRF | Ambiguous term identifiers | LUI,CUI | 2 | 301093 | 5788399 |
+AMBIGSUI.RRF | Ambiguous string identifiers | SUI,CUI | 2 | 207867 | 4022457 |
+CHANGE/DELETEDCUI.RRF | Deleted concepts | PCUI,PSTR | 2 | 1426698 | 21400470 |
+CHANGE/DELETEDLUI.RRF | Deleted terms | PLUI,PSTR | 2 | 0 | 0 |
+CHANGE/DELETEDSUI.RRF | Deleted strings | PSUI,LAT,PSTR | 3 | 0 | 0 |
+CHANGE/MERGEDCUI.RRF | Merged concepts | PCUI,CUI | 2 | 1536 | 29184 |
+CHANGE/MERGEDLUI.RRF | Merged terms | PLUI,LUI | 2 | 0 | 0 |
+MRAUI.RRF | AUI History | AUI1,CUI1,VER,REL,RELA,MAPREASON,AUI2,CUI2,MAPIN | 9 | 293552 | 15877630 |
+MRCOLS.RRF | Attribute Relation | COL,DES,REF,MIN,AV,MAX,FIL,DTY | 8 | 339 | 23403 |
+MRCONSO.RRF | Concept names and sources | CUI,LAT,TS,LUI,STT,SUI,ISPREF,AUI,SAUI,SCUI,SDUI,SAB,TTY,CODE,STR,SRL,SUPPRESS,CVF | 18 | 13501908 | 1737065435 |
+MRCUI.RRF | CUI History | CUI1,VER,REL,RELA,MAPREASON,CUI2,MAPIN | 7 | 2716556 | 77130698 |
+MRDEF.RRF | Definitions | CUI,AUI,ATUI,SATUI,SAB,DEF,SUPPRESS,CVF | 8 | 425261 | 118551841 |
+MRDOC.RRF | Typed key value metadata map | DOCKEY,VALUE,TYPE,EXPL | 4 | 3396 | 218481 |
+MRFILES.RRF | Relation Relation | FIL,DES,FMT,CLS,RWS,BTS | 6 | 52 | 4208 |
+MRHIER.RRF | Computable hierarchies | CUI,AUI,CXN,PAUI,SAB,RELA,PTR,HCD,CVF | 9 | 31893483 | 4851178506 |
+MRHIST.RRF | Source-asserted history | CUI,SOURCEUI,SAB,SVER,CHANGETYPE,CHANGEKEY,CHANGEVAL,REASON,CVF | 9 | 0 | 0 |
+MRMAP.RRF | Mappings | MAPSETCUI,MAPSETSAB,MAPSUBSETID,MAPRANK,MAPID,MAPSID,FROMID,FROMSID,FROMEXPR,FROMTYPE,FROMRULE,FROMRES,REL,RELA,TOID,TOSID,TOEXPR,TOTYPE,TORULE,TORES,MAPRULE,MAPRES,MAPTYPE,MAPATN,MAPATV,CVF | 26 | 810346 | 129610753 |
+MRRANK.RRF | Concept Name Ranking | RANK,SAB,TTY,SUPPRESS | 4 | 683 | 12217 |
+MRREL.RRF | Related Concepts | CUI1,AUI1,STYPE1,REL,CUI2,AUI2,STYPE2,RELA,RUI,SRUI,SAB,SL,RG,DIR,SUPPRESS,CVF | 16 | 43842950 | 4093351915 |
+MRSAB.RRF | Source Metadata | VCUI,RCUI,VSAB,RSAB,SON,SF,SVER,VSTART,VEND,IMETA,RMETA,SLC,SCC,SRL,TFR,CFR,CXTY,TTYL,ATNL,LAT,CENC,CURVER,SABIN,SSN,SCIT | 25 | 192 | 142036 |
+MRSAT.RRF | Simple Concept, Term and String Attributes | CUI,LUI,SUI,METAUI,STYPE,CODE,ATUI,SATUI,ATN,SAB,ATV,SUPPRESS,CVF | 13 | 65915853 | 5967696352 |
+MRSMAP.RRF | Simple Mappings | MAPSETCUI,MAPSETSAB,MAPID,MAPSID,FROMEXPR,FROMTYPE,REL,RELA,TOEXPR,TOTYPE,CVF | 11 | 416075 | 35648007 |
+MRSTY.RRF | Semantic Types | CUI,TUI,STN,STY,ATUI,CVF | 6 | 3476668 | 199142173 |
+MRXNS_ENG.RRF | Normalized String Index | LAT,NSTR,CUI,LUI,SUI | 5 | 12150129 | 886221009 |
+MRXNW_ENG.RRF | Normalized Word Index | LAT,NWD,CUI,LUI,SUI | 5 | 39785958 | 1617668497 |
+MRXW_ARA.RRF | Arabic Word Index | LAT,WD,CUI,LUI,SUI | 5 | 290245 | 13322541 |
+MRXW_BAQ.RRF | Basque Word Index | LAT,WD,CUI,LUI,SUI | 5 | 2669 | 107206 |
+MRXW_CHI.RRF | Chinese Word Index | LAT,WD,CUI,LUI,SUI | 5 | 601700 | 27220291 |
+MRXW_CZE.RRF | Czech Word Index | LAT,WD,CUI,LUI,SUI | 5 | 477599 | 20363847 |
+MRXW_DAN.RRF | Danish Word Index | LAT,WD,CUI,LUI,SUI | 5 | 2466 | 97114 |
+MRXW_DUT.RRF | Dutch Word Index | LAT,WD,CUI,LUI,SUI | 5 | 1101850 | 46227836 |
+MRXW_ENG.RRF | English Word Index | LAT,WD,CUI,LUI,SUI | 5 | 39223696 | 1581848830 |
+MRXW_EST.RRF | Estonian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 226586 | 8986331 |
+MRXW_FIN.RRF | Finnish Word Index | LAT,WD,CUI,LUI,SUI | 5 | 42922 | 1875628 |
+MRXW_FRE.RRF | French Word Index | LAT,WD,CUI,LUI,SUI | 5 | 2426179 | 101219317 |
+MRXW_GER.RRF | German Word Index | LAT,WD,CUI,LUI,SUI | 5 | 799432 | 34054417 |
+MRXW_GRE.RRF | Greek Word Index | LAT,WD,CUI,LUI,SUI | 5 | 274018 | 13634628 |
+MRXW_HEB.RRF | Hebrew Word Index | LAT,WD,CUI,LUI,SUI | 5 | 1617 | 63262 |
+MRXW_HUN.RRF | Hungarian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 241751 | 10526508 |
+MRXW_ITA.RRF | Italian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 1199574 | 48609396 |
+MRXW_JPN.RRF | Japanese Word Index | LAT,WD,CUI,LUI,SUI | 5 | 282000 | 16758359 |
+MRXW_KOR.RRF | Korean Word Index | LAT,WD,CUI,LUI,SUI | 5 | 460600 | 19847488 |
+MRXW_LAV.RRF | Latvian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 230914 | 10092516 |
+MRXW_NOR.RRF | Norwegian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 125266 | 5530491 |
+MRXW_POL.RRF | Polish Word Index | LAT,WD,CUI,LUI,SUI | 5 | 425767 | 18182640 |
+MRXW_POR.RRF | Portuguese Word Index | LAT,WD,CUI,LUI,SUI | 5 | 1498232 | 60641784 |
+MRXW_RUS.RRF | Russian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 1111315 | 52355773 |
+MRXW_SCR.RRF | Croatian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 24050 | 1017136 |
+MRXW_SPA.RRF | Spanish Word Index | LAT,WD,CUI,LUI,SUI | 5 | 9117253 | 375647723 |
+MRXW_SWE.RRF | Swedish Word Index | LAT,WD,CUI,LUI,SUI | 5 | 288913 | 12571621 |
+MRXW_TUR.RRF | Turkish Word Index | LAT,WD,CUI,LUI,SUI | 5 | 419840 | 17046315 |
+MRXW_UKR.RRF | Ukrainian Word Index | LAT,WD,CUI,LUI,SUI | 5 | 25840 | 1210231 |
+
+```
+mysql> select * from MRSAB limit 10;
+```
+VCUI | RCUI | VSAB | RSAB | SON | SF | SVER | VSTART | VEND | IMETA | RMETA | SLC | SCC | SRL | TFR | CFR | CXTY | TTYL | ATNL | LAT | CENC | CURVER | SABIN | SSN | SCIT |
+--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--
+C1140092 | C1140091 | AIR93 | AIR | AI/RHEUM, 1993 | AIR | 1993 | NULL | NULL | 1995AA | NULL | May Cheh;;Lister Hill National Center for Biomedical Communications, National Library of Medicine;Building 38A, Room 9E902;8600 Rockville Pike;Bethesda;MD;;20894;;;cheh@nlm.nih.gov; | May Cheh;;Lister Hill National Center for Biomedical Communications, National Library of Medicine;Building 38A, Room 9E902;8600 Rockville Pike;Bethesda;MD;;20894;;;cheh@nlm.nih.gov; | 0 | 685 | 630 | FULL-MULTIPLE | DI,FI,HT,SY | NULL | ENG | UTF-8 | Y | Y | AI/RHEUM | ;;;;AI/RHEUM;;;;;National Library of Medicine, Lister Hill Center;1993;;Bethesda, MD;;;;;; |
+C2366569 | C1140170 | ALT2009 | ALT | Alternative Billing Concepts, 2009 | ALT | 2009 | NULL | NULL | 2009AA | NULL | ;;ABC Coding Solutions - Alternative Link;6121 Indian School Road NE;Suite 131;Albuquerque;NM;United States;87110;1-877-621-5465;1-505-875-0002;Legal@ABCcodes.com; | Bernd G. Lucks;Chief Operating Officer;ABC Coding Solutions - Alternative Link;6121 Indian School Road NE;Suite 131;Albuquerque;NM;United States;87110;1-505-875-0001 ext. 202;;bernd.lucks@ABCcodes.com; | 3 | 4669 | 4613 | FULL | HT,PT | DATE_CREATED,DATE_LAST_MODIFIED,SOURCE_UI | ENG | UTF-8 | Y | Y | Alternative Billing Concepts | ;;;;ABC Codes and Terminology;;;9th;Albuquerque, NM;ABC Coding Solutions - Alternative Link;2009;;;;;;ENG;; |
+C1140163 | C1140162 | AOD2000 | AOD | Alcohol and Other Drug Thesaurus, 2000 | AOD | 2000 | NULL | NULL | 2002AC | NULL | Nancy Winstanley;;NIAAA Library c/o CSR Incorporated;2107 Wilson Blvd., Suite 1000;;Arlington;VA;;22201;703-741-7147;; e-mail: nwinstanley@csrincorporated.com;; | Dagobert Soergel;;;;;;;;;301-405-2037;;ds52@umail.umd.edu; | 0 | 20685 | 15915 | FULL | DE,DS,ES,ET,EX,FN,NP,NS,NX,XD | HN,SOS | ENG | UTF-8 | Y | Y | Alcohol and Other Drug Thesaurus | ;;;;Alcohol and Other Drug Thesaurus: A Guide to Concepts and Terminology in Substance Abuse and Addiction;;;3rd. ed. [4 Volumes.];Bethesda, MD;National Institute on Alcohol Abuse and Alcoholism (NIAAA) and Center for Substance Abuse Prevention (CSAP);2000;;;;;;ENG;; |
+C1704486 | C1704485 | AOT2003 | AOT | Authorized Osteopathic Thesaurus, 2003 | AOT | 2003 | NULL | NULL | 2006AD | NULL | ;;;;;Chevy Chase;MD;;;;;;http://www.aacom.org/InfoFor/educators/Pages/thesaurus.aspx | ;;American Association of Colleges of Osteopathic Medicine ;5550 Friendship Boulevard ;Suite 310;Chevy Chase;MD;United States;20815-7231;301-968-4100;301-968-4101;;http://www.aacom.org/InfoFor/educators/Pages/thesaurus.aspx | 0 | 471 | 276 | FULL-MULTIPLE | ET,PT | AMT | ENG | UTF-8 | Y | Y | Authorized Osteopathic Thesaurus | ;;;;Authorized Osteopathic Thesaurus;;;;Chevy Chase, MD;Educational Council of Osteopathic Principles of the American Association of Colleges of Osteopathic Medicine;2004;;;;;http://www.aacom.org/InfoFor/educators/Pages/thesaurus.aspx;ENG;; |
+C5777091 | C4722517 | ATC_2022_23_03_06 | ATC | Anatomical Therapeutic Chemical Classification System, ATC_2022 | ATC | ATC_2022 | NULL | NULL | 2023AA | NULL | ;;WHO Collaborating Centre for Drug Statistics Methodology;Norwegian Institute of Public Health;P.O.Box 4404 Nydalen;Oslo;;Norway;0403;+47 21 07 81 60;+47 21 07 81 46;whocc@fhi.no;http://www.whocc.no/copyright_disclaimer/ | ;;WHO Collaborating Centre for Drug Statistics Methodology;Norwegian Institute of Public Health;P.O.Box 4404 Nydalen;Oslo;;Norway;0403;+47 21 07 81 60;+47 21 07 81 46;whocc@fhi.no;http://www.whocc.no/ | 0 | 7210 | 5794 | FULL | IN,PT,RXN_IN,RXN_PT | ATC_LEVEL,IS_DRUG_CLASS | ENG | UTF-8 | Y | Y | Anatomical Therapeutic Chemical Classification System | ;;WHO Collaborating Centre for Drug Statistics Methodology;;Anatomical Therapeutic Chemical (ATC) classification system;;;2022;Oslo, Norway;WHO Collaborating Centre for Drug Statistics Methodology;;;;;;http://www.whocc.no/copyright_disclaimer/;;; |
+C1140164 | C1140165 | BI98 | BI | Beth Israel Vocabulary, 1.0 | BI | 1.0 | NULL | NULL | 1999AA | NULL | Daniel Z. Sands, M.D., M.P.H.;Clinical Systems Integration Architect;Center for Clinical Computing,Beth Israel Deaconess Medical Center,Harvard University;330 Brookline Avenue;;Boston;MA;United States;02215;617-667-1510;810-592-0716; e-mail: dsands@bidmc.Harvard.edu; | Howard Goldberg, MD.;;;;;;;;;;;hgoldber@bidmc.harvard.edu; | 2 | 1250 | 937 | NULL | AB,PT,RT,SY | NULL | ENG | UTF-8 | Y | Y | Beth Israel Problem List | Howard Goldberg, MD;;;;Beth Israel OMR Clinical Problem List Vocabulary;;;Version 1.0;Boston, MA;Beth Israel Deaconess Medical Center;1999;;;;;;ENG;; |
+C4550264 | C3251798 | CCC2_5_2018 | CCC | Clinical Care Classification, 2_5_2018 | CCC | 2_5_2018 | NULL | NULL | 2018AA | NULL | Dr. Virginia K. Saba;CEO & President;SabaCare,Inc;;;Arlington;VA;United States;;703-521-6132;703-521-3866;vsaba@att.net;http://www.sabacare.com/; | Dr. Virginia K. Saba;CEO & President;SabaCare,Inc;;;Arlington;VA;United States;;703-521-6132;703-521-3866;vsaba@att.net;http://www.sabacare.com/; | 1 | 410 | 405 | FULL-MULTIPLE | HT,MP,MTH_HT,PT | NULL | ENG | UTF-8 | Y | Y | Clinical Care Classification | ;;SabaCare,Inc.;;Clinical Care Classification (CCC) System;;;2.5;;;;January 10, 2018;;;;;ENG;; |
+C1140221 | C1140220 | CCPSS99 | CCPSS | Canonical Clinical Problem Statement System, 1999 | CCPSS | 1999 | NULL | NULL | 2000AA | NULL | Steven Brown, M.D.;Associate Professor, Biomedical Informatics;Eskind Biomedical Library, Vanderbilt University Medical Center;2209 Garland Ave;Room 442;Nashville;TN;United States;37232-8340;(615) 321-6335;;sbrown@vumclib.mc.vanderbilt.edu; | Steven Brown, MD;;Department of Biomedical Informatics Vanderbilt University;;;;;;;;;; | 3 | 15777 | 15245 | NULL | MP,PT,TX | CCF | ENG | UTF-8 | Y | Y | Clinical Problem Statements | ;;;;Canonical Clincial Problem Statement System;;;Version 1.0;;;June 23, 1999;;;;;;ENG;Contact: sbrown@vumclib.mc.vanderbilt.edu; |
+C1541964 | C1140228 | CCS2005 | CCS | Clinical Classifications Software, 2005 | CCS | 2005 | NULL | NULL | 2005AC | NULL | Anne Elixhauser, Ph.D.;Senior Research Scientist;Agency for Healthcare Research and Quality;540 Gaither Road;;Rockville;MD;United States;20850;(301) 427-1411, 1-800-358-9295;(301) 594-1430;AElixhau@AHRQ.gov; | Anne Elixhauser, Ph.D.;Senior Research Scientist;Agency for Healthcare Research and Quality;540 Gaither Road;;Rockville;MD;United States;20850;1-800-358-9295;(301)-594-1430;AElixhau@AHRQ.gov; | 0 | 1617 | 1109 | FULL | HT,MD,MV,SD,SP,XM | CCI,FROMRSAB,FROMVSAB,MAPSETRSAB,MAPSETVERSION,MAPSETVSAB,MTH_MAPFROMCOMPLEXITY,MTH_MAPFROMEXHAUSTIVE,MTH_MAPSETCOMPLEXITY,MTH_MAPTOCOMPLEXITY,MTH_MAPTOEXHAUSTIVE,SOS,TORSAB,TOVSAB | ENG | UTF-8 | Y | Y | Clinical Classifications Software | ;;Agency for Healthcare Research and Quality (AHRQ);;Clinical Classifications Software (CCS);;;;;;April 2005;;Rockville,MD;;; http://www.hcup-us.ahrq.gov/toolssoftware/ccs/ccs.jsp;ENG;Phone: 301-594-1364.; |
+C5770268 | C5400755 | CCSR_ICD10CM_2023 | CCSR_ICD10CM | Clinical Classifications Software Refined for ICD-10-CM, 2023 | CCS | 2023 | NULL | NULL | 2023AA | NULL | ;;Agency for Healthcare Research and Quality;5600 Fishers Lane;Mail Stop 07N94A;Rockville;MD;United States;20857;1-866-290-HCUP;(301) 594-1430;hcup@ahrq.gov;https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/ccs_refined.jsp; | ;;Agency for Healthcare Research and Quality;5600 Fishers Lane;Mail Stop 07N94A;Rockville;MD;United States;20857;1-866-290-HCUP;(301)-594-1430;hcup@ahrq.gov;https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/ccs_refined.jsp; | 0 | 546 | 545 | NULL | SD,XM | FROMRSAB,FROMVSAB,MAPSETRSAB,MAPSETVERSION,MAPSETVSAB,MTH_MAPFROMCOMPLEXITY,MTH_MAPFROMEXHAUSTIVE,MTH_MAPSETCOMPLEXITY,MTH_MAPTOCOMPLEXITY,MTH_MAPTOEXHAUSTIVE,TORSAB,TOVSAB | ENG | UTF-8 | Y | Y | Clinical Classifications Software Refined for ICD-10-CM | ;;Healthcare Cost and Utilization Project (HCUP);;Clinical Classifications Software Refined for ICD-10-CM;;;;;Agency for Healthcare Research and Quality (AHRQ);;November 2022;Rockville, MD;;;;ENG;; |

From 13fd4e1ca8899dafad1eca7a2fabe333e42fd665 Mon Sep 17 00:00:00 2001
From: "E.C. Wood" <wooderi@stanford.edu>
Date: Wed, 12 Jul 2023 13:56:03 -0700
Subject: [PATCH 002/117] #316 found where IDs from other sources are coming
 from

---
 understanding_umls.md | 505 +++++++++++++-----------------------------
 1 file changed, 159 insertions(+), 346 deletions(-)

diff --git a/understanding_umls.md b/understanding_umls.md
index 02baabd0..104b01ea 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -65,6 +65,16 @@ MRXW_SPA |
 MRXW_SWE |
 MRXW_TUR |
 
+Tables that `umls2rdf.py` uses:
+- MRSTY
+- MRCONSO
+- MRSAB
+- MRREL
+- MRDEF
+- MRSAT
+- MRRANK
+- MRDOC
+
 ```
 mysql> select * from MRCUI limit 10;
 ```
@@ -81,352 +91,6 @@ C0000164 | 2003AB | RO | NULL | NULL | C0000163 | Y |
 C0000177 | 1993AA | SY | NULL | NULL | C0014924 | Y |
 C0000219 | 1993AA | DEL | NULL | NULL | NULL | NULL |
 
-```
-mysql> select * from MRCOLS;
-```
-
-COL | DES | REF | MIN | AV | MAX | FIL | DTY |
---|--|--|--|--|--|--|--
-ATNL | Attribute name list for a source. | NULL | 0 | 69.84 | 1178 | MRSAB.RRF | varchar(4000) |
-ATN | Attribute name | NULL | 2 | 10.38 | 62 | MRSAT.RRF | varchar(100) |
-ATUI | Unique identifier for attribute. | NULL | 10 | 10.64 | 11 | MRSTY.RRF | varchar(11) |
-ATUI | Unique identifier for attribute. | NULL | 10 | 10.85 | 11 | MRSAT.RRF | varchar(11) |
-ATUI | Unique identifier for attribute. | NULL | 10 | 10.86 | 11 | MRDEF.RRF | varchar(11) |
-ATV | Attribute value | NULL | 1 | 12.69 | 35985 | MRSAT.RRF | varchar(65000) |
-AUI1 | Unique identifier for first atom | NULL | 0 | 8.52 | 9 | MRREL.RRF | varchar(9) |
-AUI1 | Unique identifier for first atom | NULL | 8 | 8.54 | 9 | MRAUI.RRF | varchar(9) |
-AUI2 | Unique identifier for second atom | NULL | 0 | 8.52 | 9 | MRREL.RRF | varchar(9) |
-AUI2 | Unique identifier for second atom | NULL | 8 | 8.54 | 9 | MRAUI.RRF | varchar(9) |
-AUI | Unique identifier for atom | NULL | 8 | 8.58 | 9 | MRHIER.RRF | varchar(9) |
-AUI | Unique identifier for atom | NULL | 8 | 8.74 | 9 | MRDEF.RRF | varchar(9) |
-AUI | Unique identifier for atom | NULL | 8 | 8.77 | 9 | MRCONSO.RRF | varchar(9) |
-AV | Average Length, Characters | NULL | 4 | 4.12 | 6 | MRCOLS.RRF | numeric(5,2) |
-BTS | Size in Bytes | NULL | 1 | 7.19 | 10 | MRFILES.RRF | integer |
-CENC | Character encoding of a source as specified by IANA | NULL | 5 | 5.00 | 5 | MRSAB.RRF | varchar(20) |
-CFR | CUI frequency for a source | NULL | 1 | 4.18 | 6 | MRSAB.RRF | integer |
-CHANGEKEY | CONCEPTSTATUS (if history relates to a SNOMED CT concept) or DESCRIPTIONSTATUS (if history relates to a SNOMED CT atom or "description") | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
-CHANGETYPE | Source asserted code for type of change | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
-CHANGEVAL | SNOMED CT CONCEPTSTATUS or DESCRIPTIONSTATUS value after the change took place | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
-CLS | Number of columns | NULL | 1 | 1.12 | 2 | MRFILES.RRF | integer |
-CODE | Unique Identifier or code for string in source | NULL | 0 | 4.46 | 56 | MRSAT.RRF | varchar(100) |
-CODE | Unique Identifier or code for string in source | NULL | 1 | 7.50 | 95 | MRCONSO.RRF | varchar(100) |
-COL | Column or data element name | NULL | 2 | 3.71 | 11 | MRCOLS.RRF | varchar(20) |
-CUI1 | Unique identifier for first concept | NULL | 8 | 8.00 | 8 | MRAUI.RRF | char(8) |
-CUI1 | Unique identifier for first concept | NULL | 8 | 8.00 | 8 | MRCUI.RRF | char(8) |
-CUI1 | Unique identifier for first concept | NULL | 8 | 8.00 | 8 | MRREL.RRF | char(8) |
-CUI2 | Unique identifier for second concept | NULL | 0 | 3.33 | 8 | MRCUI.RRF | char(8) |
-CUI2 | Unique identifier for second concept | NULL | 8 | 8.00 | 8 | MRAUI.RRF | char(8) |
-CUI2 | Unique identifier for second concept | NULL | 8 | 8.00 | 8 | MRREL.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 0 | 0.00 | 0 | MRHIST.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | AMBIGLUI.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | AMBIGSUI.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | CHANGE/MERGEDCUI.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRCONSO.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRDEF.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRHIER.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRSAT.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRSTY.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXNS_ENG.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXNW_ENG.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_ARA.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_BAQ.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_CHI.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_CZE.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_DAN.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_DUT.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_ENG.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_EST.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_FIN.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_FRE.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_GER.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_GRE.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_HEB.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_HUN.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_ITA.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_JPN.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_KOR.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_LAV.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_NOR.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_POL.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_POR.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_RUS.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_SCR.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_SPA.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_SWE.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_TUR.RRF | char(8) |
-CUI | Unique identifier for concept | NULL | 8 | 8.00 | 8 | MRXW_UKR.RRF | char(8) |
-CURVER | Current Version flag | NULL | 1 | 1.00 | 1 | MRSAB.RRF | char(1) |
-CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRDEF.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRHIER.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRREL.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRSAT.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 0.00 | 0 | MRSMAP.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 1.22 | 5 | MRCONSO.RRF | varchar(50) |
-CVF | Content view flag | NULL | 0 | 2.13 | 5 | MRSTY.RRF | varchar(50) |
-CXN | The context number if the atom has multiple contexts | NULL | 1 | 2.17 | 5 | MRHIER.RRF | integer |
-CXTY | Context type for a source | NULL | 0 | 5.14 | 13 | MRSAB.RRF | varchar(50) |
-DEF | Definition | NULL | 1 | 232.23 | 10939 | MRDEF.RRF | varchar(16000) |
-DES | Descriptive Name | NULL | 5 | 28.81 | 136 | MRCOLS.RRF | varchar(200) |
-DES | Descriptive Name | NULL | 8 | 18.25 | 42 | MRFILES.RRF | varchar(200) |
-DIR | Source asserted directionality flag | NULL | 0 | 0.13 | 1 | MRREL.RRF | varchar(1) |
-DOCKEY | Key to be documented | NULL | 2 | 3.65 | 8 | MRDOC.RRF | varchar(50) |
-DTY | SQL-92 data type for this column | NULL | 7 | 10.02 | 14 | MRCOLS.RRF | varchar(20) |
-EXPL | Detailed explanation | NULL | 0 | 26.57 | 941 | MRDOC.RRF | varchar(1000) |
-FIL | Physical FILENAME | NULL | 9 | 10.99 | 21 | MRCOLS.RRF | varchar(50) |
-FIL | Physical FILENAME | NULL | 9 | 12.12 | 21 | MRFILES.RRF | varchar(50) |
-FMT | Comma separated list of COL | NULL | 7 | 29.69 | 190 | MRFILES.RRF | varchar(300) |
-FROMEXPR | The expression that a mapping is mapped from | NULL | 1 | 6.93 | 9 | MRSMAP.RRF | varchar(4000) |
-FROMEXPR | The expression that a mapping is mapped from | NULL | 1 | 8.29 | 18 | MRMAP.RRF | varchar(4000) |
-FROMID | Metathesaurus identifier for the entity being mapped from | NULL | 1 | 7.31 | 18 | MRMAP.RRF | varchar(50) |
-FROMRES | Restriction applicable to the entity being mapped from | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
-FROMRULE | Machine processible rule applicable to the entity being mapped from | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
-FROMSID | Source asserted identifier for the entity being mapped from | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(50) |
-FROMTYPE | The type of expression that a mapping is mapped from | NULL | 3 | 3.98 | 4 | MRSMAP.RRF | varchar(50) |
-FROMTYPE | The type of expression that a mapping is mapped from | NULL | 3 | 3.99 | 4 | MRMAP.RRF | varchar(50) |
-HCD | Source asserted hierarchical number or code of context member (if it exists) | NULL | 0 | 0.48 | 51 | MRHIER.RRF | varchar(100) |
-IMETA | Version of the Metathesaurus that a source was added | NULL | 6 | 6.00 | 6 | MRSAB.RRF | varchar(10) |
-ISPREF | Indicates whether AUI is preferred | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | char(1) |
-LAT | Language of Term(s) | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDSUI.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 0 | 2.97 | 3 | MRSAB.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRCONSO.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXNS_ENG.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXNW_ENG.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_ARA.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_BAQ.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_CHI.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_CZE.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_DAN.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_DUT.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_ENG.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_EST.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_FIN.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_FRE.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_GER.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_GRE.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_HEB.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_HUN.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_ITA.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_JPN.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_KOR.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_LAV.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_NOR.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_POL.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_POR.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_RUS.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_SCR.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_SPA.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_SWE.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_TUR.RRF | char(3) |
-LAT | Language of Term(s) | NULL | 3 | 3.00 | 3 | MRXW_UKR.RRF | char(3) |
-LUI | Unique identifier for term | NULL | 0 | 0.00 | 0 | CHANGE/MERGEDLUI.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 0 | 4.50 | 9 | MRSAT.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_BAQ.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_DAN.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_FIN.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.00 | 8 | MRXW_HEB.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.17 | 9 | MRXW_SCR.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.20 | 9 | MRXW_JPN.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.22 | 9 | AMBIGLUI.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.43 | 9 | MRXW_ENG.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.44 | 9 | MRCONSO.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.44 | 9 | MRXNS_ENG.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.44 | 9 | MRXNW_ENG.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.48 | 9 | MRXW_CZE.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.52 | 9 | MRXW_DUT.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.58 | 9 | MRXW_GER.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.67 | 9 | MRXW_SPA.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.77 | 9 | MRXW_POR.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.77 | 9 | MRXW_RUS.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.78 | 9 | MRXW_ITA.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.78 | 9 | MRXW_POL.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.80 | 9 | MRXW_FRE.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.82 | 9 | MRXW_SWE.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.90 | 9 | MRXW_KOR.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.98 | 9 | MRXW_NOR.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.99 | 9 | MRXW_HUN.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 8 | 8.99 | 9 | MRXW_LAV.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_ARA.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_CHI.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_EST.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_GRE.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_TUR.RRF | varchar(10) |
-LUI | Unique identifier for term | NULL | 9 | 9.00 | 9 | MRXW_UKR.RRF | varchar(10) |
-MAPATN | Mapping attribute name (for future use) | NULL | 0 | 2.82 | 6 | MRMAP.RRF | varchar(20) |
-MAPATV | Mapping attribute value (for future use) | NULL | 0 | 0.00 | 1 | MRMAP.RRF | varchar(4000) |
-MAPID | Metathesaurus asserted identifier for mapping | NULL | 10 | 10.98 | 11 | MRSMAP.RRF | varchar(50) |
-MAPID | Metathesaurus asserted identifier for mapping | NULL | 10 | 10.99 | 11 | MRMAP.RRF | varchar(50) |
-MAPIN | Mapping in current subset | NULL | 0 | 0.42 | 1 | MRCUI.RRF | char(1) |
-MAPIN | Mapping in current subset | NULL | 1 | 1.00 | 1 | MRAUI.RRF | char(1) |
-MAPRANK | Order in which mappings in a subset should be applied | NULL | 0 | 0.49 | 2 | MRMAP.RRF | integer |
-MAPREASON | Reason for mapping | NULL | 0 | 0.00 | 4 | MRCUI.RRF | varchar(4000) |
-MAPREASON | Reason for mapping | NULL | 4 | 4.00 | 4 | MRAUI.RRF | varchar(4000) |
-MAPRES | Restriction applicable to this mapping | NULL | 0 | 34.78 | 429 | MRMAP.RRF | varchar(4000) |
-MAPRULE | Machine processible rule applicable to this mapping | NULL | 0 | 9.57 | 336 | MRMAP.RRF | varchar(4000) |
-MAPSETCUI | CUI of the map set | NULL | 8 | 8.00 | 8 | MRMAP.RRF | char(8) |
-MAPSETCUI | CUI of the map set | NULL | 8 | 8.00 | 8 | MRSMAP.RRF | char(8) |
-MAPSETSAB | SAB of the map set | NULL | 3 | 10.60 | 13 | MRSMAP.RRF | varchar(40) |
-MAPSETSAB | SAB of the map set | NULL | 3 | 10.71 | 13 | MRMAP.RRF | varchar(40) |
-MAPSID | Source asserted identifier for mapping | NULL | 0 | 0.00 | 0 | MRSMAP.RRF | varchar(50) |
-MAPSID | Source asserted identifier for mapping | NULL | 0 | 0.01 | 36 | MRMAP.RRF | varchar(50) |
-MAPSUBSETID | Map subset identifier used to identify a subset of related mappings within a map set | NULL | 0 | 0.49 | 1 | MRMAP.RRF | varchar(10) |
-MAPTYPE | Type of mapping | NULL | 0 | 4.26 | 9 | MRMAP.RRF | varchar(50) |
-MAX | Maximum Length | NULL | 1 | 1.37 | 5 | MRCOLS.RRF | integer |
-METAUI | Metathesaurus asserted unique identifier | NULL | 0 | 7.85 | 10 | MRSAT.RRF | varchar(50) |
-MIN | Minimum Length | NULL | 1 | 1.02 | 2 | MRCOLS.RRF | integer |
-NSTR | Normalized string | NULL | 1 | 38.86 | 2460 | MRXNS_ENG.RRF | varchar(3000) |
-NWD | Normalized word | NULL | 1 | 6.55 | 80 | MRXNW_ENG.RRF | varchar(100) |
-PAUI | Unique identifier for parent atom | NULL | 0 | 8.46 | 9 | MRHIER.RRF | varchar(9) |
-PCUI | Concept unique identifier in the previous Metathesaurus | NULL | 8 | 8.00 | 8 | CHANGE/DELETEDCUI.RRF | char(8) |
-PCUI | Concept unique identifier in the previous Metathesaurus | NULL | 8 | 8.00 | 8 | CHANGE/MERGEDCUI.RRF | char(8) |
-PLUI | Lexical unique identifier in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDLUI.RRF | varchar(10) |
-PLUI | Lexical unique identifier in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/MERGEDLUI.RRF | varchar(10) |
-PSTR | Preferred name in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDLUI.RRF | varchar(3000) |
-PSTR | Preferred name in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDSUI.RRF | varchar(3000) |
-PSTR | Preferred name in the previous Metathesaurus | NULL | 4 | 4.00 | 4 | CHANGE/DELETEDCUI.RRF | varchar(3000) |
-PSUI | String unique identifier in the previous Metathesaurus | NULL | 0 | 0.00 | 0 | CHANGE/DELETEDSUI.RRF | varchar(10) |
-PTR | Path to root | NULL | 0 | 103.81 | 345 | MRHIER.RRF | varchar(1000) |
-RANK | Termgroup ranking | NULL | 4 | 4.00 | 4 | MRRANK.RRF | integer |
-RCUI | Unique identifier for root SRC concept | NULL | 8 | 8.00 | 8 | MRSAB.RRF | char(8) |
-REASON | Explanation of change, if present | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(1000) |
-REF | Documentation Section Number | NULL | 0 | 0.00 | 0 | MRCOLS.RRF | varchar(20) |
-RELA | Additional relationship label | NULL | 0 | 0.00 | 0 | MRAUI.RRF | varchar(100) |
-RELA | Additional relationship label | NULL | 0 | 0.00 | 0 | MRCUI.RRF | varchar(100) |
-RELA | Additional relationship label | NULL | 0 | 10.69 | 54 | MRREL.RRF | varchar(100) |
-RELA | Additional relationship label | NULL | 0 | 14.07 | 37 | MRMAP.RRF | varchar(100) |
-RELA | Additional relationship label | NULL | 0 | 19.91 | 37 | MRSMAP.RRF | varchar(100) |
-RELA | Additional relationship label | NULL | 0 | 2.71 | 12 | MRHIER.RRF | varchar(100) |
-REL | Relationship label | NULL | 0 | 0.00 | 0 | MRAUI.RRF | varchar(4) |
-REL | Relationship label | NULL | 2 | 2.00 | 2 | MRMAP.RRF | varchar(4) |
-REL | Relationship label | NULL | 2 | 2.00 | 2 | MRSMAP.RRF | varchar(4) |
-REL | Relationship label | NULL | 2 | 2.24 | 3 | MRREL.RRF | varchar(4) |
-REL | Relationship label | NULL | 2 | 2.65 | 4 | MRCUI.RRF | varchar(4) |
-RG | Relationship group | NULL | 0 | 0.06 | 2 | MRREL.RRF | varchar(10) |
-RMETA | Version of the Metathesaurus where a version is removed | NULL | 0 | 0.09 | 6 | MRSAB.RRF | varchar(10) |
-RSAB | Root source abbreviation | NULL | 2 | 5.94 | 15 | MRSAB.RRF | varchar(40) |
-RUI | Unique identifier for relationship | NULL | 9 | 9.82 | 10 | MRREL.RRF | varchar(10) |
-RWS | Number of rows | NULL | 1 | 5.56 | 8 | MRFILES.RRF | integer |
-SABIN | Source in current subset | NULL | 1 | 1.00 | 1 | MRSAB.RRF | char(1) |
-SAB | Source abbreviation | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(40) |
-SAB | Source abbreviation | NULL | 2 | 4.12 | 11 | MRDEF.RRF | varchar(40) |
-SAB | Source abbreviation | NULL | 2 | 5.31 | 15 | MRRANK.RRF | varchar(40) |
-SAB | Source abbreviation | NULL | 2 | 5.48 | 15 | MRREL.RRF | varchar(40) |
-SAB | Source abbreviation | NULL | 2 | 5.70 | 15 | MRCONSO.RRF | varchar(40) |
-SAB | Source abbreviation | NULL | 2 | 5.75 | 13 | MRSAT.RRF | varchar(40) |
-SAB | Source abbreviation | NULL | 2 | 7.90 | 13 | MRHIER.RRF | varchar(40) |
-SATUI | Source asserted attribute identifier | NULL | 0 | 0.47 | 16 | MRDEF.RRF | varchar(50) |
-SATUI | Source asserted attribute identifier | NULL | 0 | 3.24 | 36 | MRSAT.RRF | varchar(50) |
-SAUI | Source asserted atom identifier | NULL | 0 | 1.73 | 18 | MRCONSO.RRF | varchar(100) |
-SCC | Content contact info for a source | NULL | 0 | 152.05 | 332 | MRSAB.RRF | varchar(1000) |
-SCIT | Source citation | NULL | 54 | 164.09 | 674 | MRSAB.RRF | varchar(4000) |
-SCUI | Source asserted concept identifier | NULL | 0 | 5.28 | 95 | MRCONSO.RRF | varchar(100) |
-SDUI | Source asserted descriptor identifier | NULL | 0 | 2.73 | 13 | MRCONSO.RRF | varchar(100) |
-SF | Source Family | NULL | 2 | 4.20 | 13 | MRSAB.RRF | varchar(40) |
-SLC | License contact info for a source | NULL | 12 | 167.35 | 346 | MRSAB.RRF | varchar(1000) |
-SL | Source of relationship labels | NULL | 2 | 5.48 | 15 | MRREL.RRF | varchar(40) |
-SON | Source Official Name | NULL | 10 | 48.65 | 145 | MRSAB.RRF | varchar(3000) |
-SOURCEUI | Source asserted unique identifier | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(50) |
-SRL | Source Restriction Level | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | integer |
-SRL | Source Restriction Level | NULL | 1 | 1.00 | 1 | MRSAB.RRF | integer |
-SRUI | Source attributed relationship identifier | NULL | 0 | 1.20 | 36 | MRREL.RRF | varchar(50) |
-SSN | Source short name | NULL | 3 | 26.96 | 89 | MRSAB.RRF | varchar(3000) |
-STN | Semantic type tree number | NULL | 1 | 7.85 | 14 | MRSTY.RRF | varchar(100) |
-STR | String | NULL | 1 | 38.20 | 2930 | MRCONSO.RRF | varchar(3000) |
-STT | String type | NULL | 2 | 2.01 | 3 | MRCONSO.RRF | varchar(3) |
-STYPE1 | The name of the column in MRCONSO.RRF that contains the first identifier to which the relationship is attached | NULL | 3 | 3.62 | 4 | MRREL.RRF | varchar(50) |
-STYPE2 | The name of the column in MRCONSO.RRF that contains the second identifier to which the relationship is attached | NULL | 3 | 3.62 | 4 | MRREL.RRF | varchar(50) |
-STYPE | The name of the column in MRCONSO.RRF or MRREL.RRF that contains the identifier to which the attribute is attached | NULL | 3 | 3.25 | 4 | MRSAT.RRF | varchar(50) |
-STY | Semantic type | NULL | 4 | 17.65 | 39 | MRSTY.RRF | varchar(50) |
-SUI | Unique identifier for string | NULL | 0 | 4.57 | 9 | MRSAT.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_BAQ.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_DAN.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_FIN.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.00 | 8 | MRXW_HEB.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.35 | 9 | AMBIGSUI.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.35 | 9 | MRXW_JPN.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.53 | 9 | MRXW_DUT.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.58 | 9 | MRCONSO.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.61 | 9 | MRXW_GER.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.64 | 9 | MRXNS_ENG.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.67 | 9 | MRXNW_ENG.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.67 | 9 | MRXW_ENG.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.71 | 9 | MRXW_SPA.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.79 | 9 | MRXW_POR.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.79 | 9 | MRXW_RUS.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.82 | 9 | MRXW_ITA.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.84 | 9 | MRXW_SWE.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.85 | 9 | MRXW_CZE.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.85 | 9 | MRXW_FRE.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.98 | 9 | MRXW_NOR.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 8 | 8.99 | 9 | MRXW_HUN.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_ARA.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_CHI.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_EST.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_GRE.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_KOR.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_LAV.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_POL.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_SCR.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_TUR.RRF | varchar(10) |
-SUI | Unique identifier for string | NULL | 9 | 9.00 | 9 | MRXW_UKR.RRF | varchar(10) |
-SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | char(1) |
-SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRDEF.RRF | char(1) |
-SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRRANK.RRF | char(1) |
-SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRREL.RRF | char(1) |
-SUPPRESS | Suppressible flag | NULL | 1 | 1.00 | 1 | MRSAT.RRF | char(1) |
-SVER | Release date or version number of a source | NULL | 0 | 0.00 | 0 | MRHIST.RRF | varchar(20) |
-SVER | Release date or version number of a source | NULL | 0 | 5.08 | 15 | MRSAB.RRF | varchar(20) |
-TFR | Term frequency for a source | NULL | 1 | 4.41 | 7 | MRSAB.RRF | integer |
-TOEXPR | The expression that a mapping is mapped to | NULL | 0 | 6.03 | 242 | MRMAP.RRF | varchar(4000) |
-TOEXPR | The expression that a mapping is mapped to | NULL | 1 | 6.92 | 242 | MRSMAP.RRF | varchar(4000) |
-TOID | Metathesaurus identifier for the entity being mapped to | NULL | 0 | 5.18 | 18 | MRMAP.RRF | varchar(50) |
-TORES | Restriction applicable to the entity being mapped to | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
-TORULE | Machine processible rule applicable to the entity being mapped to | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(4000) |
-TOSID | Source asserted identifier for the entity being mapped to | NULL | 0 | 0.00 | 0 | MRMAP.RRF | varchar(50) |
-TOTYPE | The type of expression that a mapping is mapped to | NULL | 0 | 3.98 | 23 | MRMAP.RRF | varchar(50) |
-TOTYPE | The type of expression that a mapping is mapped to | NULL | 4 | 4.36 | 22 | MRSMAP.RRF | varchar(50) |
-TS | Term status | NULL | 1 | 1.00 | 1 | MRCONSO.RRF | char(1) |
-TTYL | Term type list for a source | NULL | 0 | 11.76 | 86 | MRSAB.RRF | varchar(400) |
-TTY | Term type in source | NULL | 2 | 2.35 | 11 | MRCONSO.RRF | varchar(20) |
-TTY | Term type in source | NULL | 2 | 2.58 | 11 | MRRANK.RRF | varchar(20) |
-TUI | Unique identifier of Semantic type | NULL | 4 | 4.00 | 4 | MRSTY.RRF | char(4) |
-TYPE | Type of information | NULL | 3 | 13.14 | 21 | MRDOC.RRF | varchar(50) |
-VALUE | Value | NULL | 0 | 15.98 | 62 | MRDOC.RRF | varchar(200) |
-VCUI | Unique identifier for versioned SRC concept | NULL | 0 | 7.71 | 8 | MRSAB.RRF | char(8) |
-VEND | Valid end date for a source | NULL | 0 | 0.00 | 0 | MRSAB.RRF | char(8) |
-VER | Last release version in which CUI1 was valid | NULL | 6 | 6.00 | 6 | MRAUI.RRF | varchar(10) |
-VER | Last release version in which CUI1 was valid | NULL | 6 | 6.00 | 6 | MRCUI.RRF | varchar(10) |
-VSAB | Versioned source abbreviation | NULL | 3 | 11.35 | 24 | MRSAB.RRF | varchar(40) |
-VSTART | Valid start date for a source | NULL | 0 | 0.00 | 0 | MRSAB.RRF | char(8) |
-WD | Word in lower-case | NULL | 1 | 10.53 | 54 | MRXW_FIN.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 2.90 | 38 | MRXW_KOR.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 3.65 | 68 | MRXW_CHI.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 4.58 | 35 | MRXW_EST.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 5.23 | 37 | MRXW_TUR.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 5.47 | 22 | MRXW_ARA.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 5.71 | 38 | MRXW_POR.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 5.91 | 38 | MRXW_ITA.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 6.12 | 19 | MRXW_HEB.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 6.13 | 24 | MRXW_UKR.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 6.23 | 80 | MRXW_ENG.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 6.38 | 25 | MRXW_DAN.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 6.67 | 46 | MRXW_SPA.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 6.83 | 39 | MRXW_FRE.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.14 | 40 | MRXW_RUS.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.17 | 18 | MRXW_BAQ.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.50 | 34 | MRXW_GRE.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.55 | 48 | MRXW_POL.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.57 | 52 | MRXW_CZE.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.89 | 51 | MRXW_DUT.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.97 | 27 | MRXW_HUN.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 7.98 | 29 | MRXW_LAV.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 8.02 | 37 | MRXW_SCR.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 8.37 | 41 | MRXW_GER.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 8.61 | 39 | MRXW_SWE.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 8.91 | 85 | MRXW_JPN.RRF | varchar(500) |
-WD | Word in lower-case | NULL | 1 | 9.11 | 44 | MRXW_NOR.RRF | varchar(500) |
-
 ```
 mysql> select * from MRREL limit 10;
 ```
@@ -597,3 +261,152 @@ C4550264 | C3251798 | CCC2_5_2018 | CCC | Clinical Care Classification, 2_5_2018
 C1140221 | C1140220 | CCPSS99 | CCPSS | Canonical Clinical Problem Statement System, 1999 | CCPSS | 1999 | NULL | NULL | 2000AA | NULL | Steven Brown, M.D.;Associate Professor, Biomedical Informatics;Eskind Biomedical Library, Vanderbilt University Medical Center;2209 Garland Ave;Room 442;Nashville;TN;United States;37232-8340;(615) 321-6335;;sbrown@vumclib.mc.vanderbilt.edu; | Steven Brown, MD;;Department of Biomedical Informatics Vanderbilt University;;;;;;;;;; | 3 | 15777 | 15245 | NULL | MP,PT,TX | CCF | ENG | UTF-8 | Y | Y | Clinical Problem Statements | ;;;;Canonical Clincial Problem Statement System;;;Version 1.0;;;June 23, 1999;;;;;;ENG;Contact: sbrown@vumclib.mc.vanderbilt.edu; |
 C1541964 | C1140228 | CCS2005 | CCS | Clinical Classifications Software, 2005 | CCS | 2005 | NULL | NULL | 2005AC | NULL | Anne Elixhauser, Ph.D.;Senior Research Scientist;Agency for Healthcare Research and Quality;540 Gaither Road;;Rockville;MD;United States;20850;(301) 427-1411, 1-800-358-9295;(301) 594-1430;AElixhau@AHRQ.gov; | Anne Elixhauser, Ph.D.;Senior Research Scientist;Agency for Healthcare Research and Quality;540 Gaither Road;;Rockville;MD;United States;20850;1-800-358-9295;(301)-594-1430;AElixhau@AHRQ.gov; | 0 | 1617 | 1109 | FULL | HT,MD,MV,SD,SP,XM | CCI,FROMRSAB,FROMVSAB,MAPSETRSAB,MAPSETVERSION,MAPSETVSAB,MTH_MAPFROMCOMPLEXITY,MTH_MAPFROMEXHAUSTIVE,MTH_MAPSETCOMPLEXITY,MTH_MAPTOCOMPLEXITY,MTH_MAPTOEXHAUSTIVE,SOS,TORSAB,TOVSAB | ENG | UTF-8 | Y | Y | Clinical Classifications Software | ;;Agency for Healthcare Research and Quality (AHRQ);;Clinical Classifications Software (CCS);;;;;;April 2005;;Rockville,MD;;; http://www.hcup-us.ahrq.gov/toolssoftware/ccs/ccs.jsp;ENG;Phone: 301-594-1364.; |
 C5770268 | C5400755 | CCSR_ICD10CM_2023 | CCSR_ICD10CM | Clinical Classifications Software Refined for ICD-10-CM, 2023 | CCS | 2023 | NULL | NULL | 2023AA | NULL | ;;Agency for Healthcare Research and Quality;5600 Fishers Lane;Mail Stop 07N94A;Rockville;MD;United States;20857;1-866-290-HCUP;(301) 594-1430;hcup@ahrq.gov;https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/ccs_refined.jsp; | ;;Agency for Healthcare Research and Quality;5600 Fishers Lane;Mail Stop 07N94A;Rockville;MD;United States;20857;1-866-290-HCUP;(301)-594-1430;hcup@ahrq.gov;https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/ccs_refined.jsp; | 0 | 546 | 545 | NULL | SD,XM | FROMRSAB,FROMVSAB,MAPSETRSAB,MAPSETVERSION,MAPSETVSAB,MTH_MAPFROMCOMPLEXITY,MTH_MAPFROMEXHAUSTIVE,MTH_MAPSETCOMPLEXITY,MTH_MAPTOCOMPLEXITY,MTH_MAPTOEXHAUSTIVE,TORSAB,TOVSAB | ENG | UTF-8 | Y | Y | Clinical Classifications Software Refined for ICD-10-CM | ;;Healthcare Cost and Utilization Project (HCUP);;Clinical Classifications Software Refined for ICD-10-CM;;;;;Agency for Healthcare Research and Quality (AHRQ);;November 2022;Rockville, MD;;;;ENG;; |
+
+
+
+```
+mysql> select * from MRDEF limit 10;
+```
+CUI | AUI | ATUI | SATUI | SAB | DEF | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--
+C0007662 | A15587413 | AT100258389 | NULL | MSH | Areas set apart as burial grounds. | N | NULL |
+C0031705 | A0101053 | AT100258390 | NULL | MSH | A non-metal element that has the atomic symbol P, atomic number 15, and atomic weight 31. It is an essential element that takes part in a broad variety of biochemical reactions. | N | NULL |
+C0319858 | A15585286 | AT100258391 | NULL | MSH | A genus of ectomycorrhizae basidiomycetous fungi in the family Cortinariaceae. Some species are poisonous. | N | NULL |
+C0026655 | A0088287 | AT100258392 | NULL | MSH | A republic in southern Africa, south of TANZANIA, east of ZAMBIA and ZIMBABWE, bordered on the west by the Indian Ocean. Its capital is Maputo. It was formerly called Portuguese East Africa. | N | NULL |
+C2350764 | A26632051 | AT100258393 | NULL | MSH | The flow of ions into or out of cells that cause EXCITATORY POSTSYNAPTIC POTENTIALS. | N | NULL |
+C2350395 | A15587282 | AT100258394 | NULL | MSH | Timing the acquisition of imaging data to specific points in the cardiac cycle to minimize image blurring and other motion artifacts. | N | NULL |
+C2350340 | A26678303 | AT100258395 | NULL | MSH | The ion flow that effects the POSTSYNAPTIC POTENTIAL. | N | NULL |
+C0073209 | A12983302 | AT100258396 | NULL | MSH | A PROTEIN-SERINE-THREONINE KINASE that is found in PHOTORECEPTOR CELLS. It mediates light-dependent PHOSPHORYLATION of RHODOPSIN and plays an important role in PHOTOTRANSDUCTION. | N | NULL |
+C0872279 | A15585197 | AT100258397 | NULL | MSH | A type of strength-building exercise program that requires the body muscle to exert a force against some form of resistance, such as weight, stretch bands, water, or immovable objects. Resistance exercise is a combination of static and dynamic contractions involving shortening and lengthening of skeletal muscles. | N | NULL |
+C2350288 | A26632695 | AT100258398 | NULL | MSH | The duration of time from initiation to discontinuation of drug therapy. | N | NULL |
+
+```
+mysql> select * from MRDEF where SAB != "MSH" limit 10;
+```
+CUI | AUI | ATUI | SATUI | SAB | DEF | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--
+C1965760 | A15884584 | AT104406511 | NULL | ALT | Mapping the practitioner type or specialty to a non-specified emergency or non-emergency transportation, travel, or delivery expense or service code. Use associated HCPCS II codes to bill for expense(s) or service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C2366573 | A15884545 | AT104406512 | NULL | ALT | Mapping the practitioner type or specialty to a non-specified physician service or procedure code. Use associated HCPCS II codes to bill for physician service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C2366625 | A15884463 | AT104406513 | NULL | ALT | Mapping the practitioner type or specialty to a nutritional therapy service code. Use associated HCPCS II codes to bill for nutrition service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C2366594 | A15884632 | AT104406514 | NULL | ALT | Mapping the practitioner type or specialty to a wound care an/or therapy service code. Use associated HCPCS II codes to bill for wound care service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C2366609 | A15884637 | AT104406515 | NULL | ALT | Mapping the practitioner type or specialty to a stabilizing, traction and/or restraining device or equipment code. Use associated HCPCS II codes to bill for stabilizing, traction or restraining device(s) or equipment. This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C1535681 | A15884507 | AT104406516 | NULL | ALT | Mapping the practitioner type or specialty to a non-specified gastroenterology procedure. Use CPT® and/or HCPCS II codes to bill for all gastroenterology service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C2366582 | A15884518 | AT104406517 | NULL | ALT | Mapping the practitioner type or specialty to a dental service adjunctive general code. Use associated HCPCS II codes to bill for dental service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C2366655 | A15884685 | AT104406518 | NULL | ALT | Mapping the practitioner type or specialty to a prescription documentation service code. Use associated HCPCS II codes to bill for documentation service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C2366632 | A15884500 | AT104406519 | NULL | ALT | Mapping the practitioner type or specialty to a vision rehab service code. Use associated HCPCS II codes to bill for vision service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+C1535683 | A15884654 | AT104406520 | NULL | ALT | Mapping the practitioner type or specialty to a non-specified diagnostic infusion procedure. Use CPT® and/or HCPCS II codes to bill for all infusion service(s). This code is used for scope-of-practice mapping, not for billing. | N | NULL |
+
+```
+mysql> select * from MRDEF where SAB != "MSH" and SAB != "ALT" limit 10;
+```
+CUI | AUI | ATUI | SATUI | SAB | DEF | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--
+C0032226 | A18556325 | AT130670828 | NULL | CHV | disease causing increase of the fluid amount in the chest wall cavity | N | NULL |
+C0032226 | A18593399 | AT130670829 | NULL | CHV | disease causing increase of the fluid amount in the chest wall cavity | N | NULL |
+C0032226 | A18649215 | AT130670830 | NULL | CHV | disease causing increase of the fluid amount in the chest wall cavity | N | NULL |
+C0078049 | A18558170 | AT130670831 | NULL | CHV | a substance used to prevent chickenpox | N | NULL |
+C0078049 | A18576590 | AT130670832 | NULL | CHV | a substance used to prevent chickenpox | N | NULL |
+C0078049 | A18632385 | AT130670833 | NULL | CHV | a substance used to prevent chickenpox | N | NULL |
+C0078049 | A18688022 | AT130670834 | NULL | CHV | a substance used to prevent chickenpox | N | NULL |
+C0543431 | A18565798 | AT130670835 | NULL | CHV | a unit of radiation dose | N | NULL |
+C0556645 | A18566010 | AT130670836 | NULL | CHV | a unit of radiation dose | N | NULL |
+C0560132 | A18566104 | AT130670837 | NULL | CHV | a unit of radiation dose | N | NULL |
+
+```
+mysql> select SAB, count(*) from MRDEF group by SAB;
+```
+SAB | count(*) |
+--|--
+AIR | 160 |
+ALT | 4281 |
+AOT | 240 |
+CCC | 408 |
+CHV | 2657 |
+CSP | 8265 |
+FMA | 2147 |
+GO | 43648 |
+HL7V3.0 | 8270 |
+HPO | 14040 |
+ICF | 767 |
+ICF-CY | 906 |
+JABL | 724 |
+LNC | 511 |
+MCM | 18 |
+MDR | 230 |
+MDRARA | 230 |
+MDRBPO | 230 |
+MDRCZE | 230 |
+MDRDUT | 230 |
+MDRFRE | 230 |
+MDRGER | 230 |
+MDRGRE | 230 |
+MDRHUN | 230 |
+MDRITA | 230 |
+MDRJPN | 230 |
+MDRKOR | 230 |
+MDRLAV | 230 |
+MDRPOL | 230 |
+MDRPOR | 230 |
+MDRRUS | 230 |
+MDRSPA | 230 |
+MDRSWE | 230 |
+MEDLINEPLUS | 1023 |
+MSH | 32702 |
+MSHCZE | 22345 |
+MSHFRE | 138 |
+MSHNOR | 7460 |
+MSHPOR | 30811 |
+MSHSCR | 1 |
+MSHSPA | 30647 |
+MSHSWE | 17142 |
+NANDA-I | 304 |
+NCI | 137609 |
+NEU | 2660 |
+NIC | 602 |
+NOC | 581 |
+NUCCHCPT | 589 |
+OMS | 134 |
+ORPHANET | 6669 |
+PDQ | 6356 |
+PNDS | 265 |
+PSY | 2212 |
+SCTSPA | 7511 |
+SNOMEDCT_US | 9413 |
+SPN | 4204 |
+UMD | 12259 |
+UWDA | 442 |
+
+```
+mysql> select * from MRSAT limit 10;
+```
+CUI | LUI | SUI | METAUI | STYPE | CODE | ATUI | SATUI | ATN | SAB | ATV | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--|--|--|--|--|--
+C0002797 | NULL | NULL | NULL | CUI | NULL | AT00000003 | NULL | DA | MTH | 19900930 | N | NULL |
+C0002804 | NULL | NULL | NULL | CUI | NULL | AT00000004 | NULL | DA | MTH | 19900930 | N | NULL |
+C0197800 | NULL | NULL | NULL | CUI | NULL | AT00000007 | NULL | DA | MTH | 19940412 | N | NULL |
+C0002808 | NULL | NULL | NULL | CUI | NULL | AT00000008 | NULL | DA | MTH | 19900930 | N | NULL |
+C0002810 | NULL | NULL | NULL | CUI | NULL | AT00000009 | NULL | DA | MTH | 19900930 | N | NULL |
+C0002811 | NULL | NULL | NULL | CUI | NULL | AT00000010 | NULL | DA | MTH | 19900930 | N | NULL |
+C0197801 | NULL | NULL | NULL | CUI | NULL | AT00000011 | NULL | DA | MTH | 19940412 | N | NULL |
+C0002812 | NULL | NULL | NULL | CUI | NULL | AT00000012 | NULL | DA | MTH | 19900930 | N | NULL |
+C0002813 | NULL | NULL | NULL | CUI | NULL | AT00000013 | NULL | DA | MTH | 19900930 | N | NULL |
+C0197803 | NULL | NULL | NULL | CUI | NULL | AT00000014 | NULL | DA | MTH | 19940412 | N | NULL |
+
+```
+mysql> select * from MRSAT where SAB != "MTH" limit 10;
+```
+CUI | LUI | SUI | METAUI | STYPE | CODE | ATUI | SATUI | ATN | SAB | ATV | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--|--|--|--|--|--
+C0226631 | L7947353 | S9261161 | A15487314 | AUI | 77500 | AT100000001 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C0226476 | L1658590 | S1869222 | A15487357 | AUI | 43921 | AT100000002 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C0226476 | L1658578 | S1869210 | A15487358 | AUI | 43921 | AT100000003 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C1184758 | L7921465 | S9257177 | A15487423 | AUI | 75484 | AT100000004 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C0224224 | L7917062 | S9255685 | A15487425 | AUI | 46777 | AT100000005 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C1306642 | L7941514 | S9244381 | A15487435 | AUI | 71875 | AT100000006 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C0227302 | L7921706 | S9259748 | A15487449 | AUI | 14929 | AT100000007 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C0694589 | L1456954 | S1742895 | A15487461 | AUI | 67962 | AT100000008 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C0152374 | L1457021 | S1742970 | A15487464 | AUI | 72455 | AT100000009 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+C0224086 | L7915107 | S9234531 | A15487481 | AUI | 9719 | AT100000010 | NULL | LANGUAGE | FMA | Latin | N | NULL |
+
+As you can see from the image below, the `CODE` column of the table corresponds to the FMA ID for that node.
+![image](https://github.com/RTXteam/RTX-KG2/assets/36611732/c3a043fc-6e29-47c9-9598-f5b67dbec917)

From ed24438693bd0f78a675acdcb1e72bfa5adea79e Mon Sep 17 00:00:00 2001
From: "E.C. Wood" <wooderi@stanford.edu>
Date: Wed, 12 Jul 2023 14:09:56 -0700
Subject: [PATCH 003/117] #316 added more table info

---
 understanding_umls.md | 125 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 124 insertions(+), 1 deletion(-)

diff --git a/understanding_umls.md b/understanding_umls.md
index 104b01ea..a421fd25 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -408,5 +408,128 @@ C0694589 | L1456954 | S1742895 | A15487461 | AUI | 67962 | AT100000008 | NULL |
 C0152374 | L1457021 | S1742970 | A15487464 | AUI | 72455 | AT100000009 | NULL | LANGUAGE | FMA | Latin | N | NULL |
 C0224086 | L7915107 | S9234531 | A15487481 | AUI | 9719 | AT100000010 | NULL | LANGUAGE | FMA | Latin | N | NULL |
 
-As you can see from the image below, the `CODE` column of the table corresponds to the FMA ID for that node.
+As you can see from the image below (from `KG2.8.3pre`), the `CODE` column of the table corresponds to the FMA ID for that node.
 ![image](https://github.com/RTXteam/RTX-KG2/assets/36611732/c3a043fc-6e29-47c9-9598-f5b67dbec917)
+
+```
+mysql> select SAB, count(*) from MRSAT group by SAB;
+```
+SAB | count(*) |
+--|--
+ALT | 13272 |
+AOD | 6054 |
+AOT | 27 |
+ATC | 7860 |
+CCPSS | 15716 |
+CCS | 23453 |
+CCSR_ICD10CM | 12 |
+CCSR_ICD10PCS | 12 |
+CDT | 1275 |
+CHV | 877774 |
+CPT | 249691 |
+CSP | 23251 |
+CVX | 2755 |
+DRUGBANK | 10459 |
+FMA | 284369 |
+GO | 168004 |
+GS | 76415 |
+HCDT | 7983 |
+HCPCS | 66036 |
+HCPT | 105273 |
+HGNC | 810883 |
+HL7V2.5 | 16770 |
+HL7V3.0 | 38386 |
+HPO | 29796 |
+ICD10AM | 61299 |
+ICD10CM | 101898 |
+ICD10PCS | 79341 |
+ICD9CM | 10190 |
+ICF | 13822 |
+ICF-CY | 386 |
+ICNP | 1955 |
+ICPC | 1318 |
+ICPC2EENG | 1175 |
+ICPC2ICD10ENG | 81849 |
+ICPC2P | 29636 |
+JABL | 490 |
+KCD5 | 76 |
+LCH_NW | 13 |
+LNC | 2417573 |
+MDR | 1045184 |
+MDRARA | 1045184 |
+MDRBPO | 1045184 |
+MDRCZE | 1045184 |
+MDRDUT | 1045184 |
+MDRFRE | 1045184 |
+MDRGER | 1045184 |
+MDRGRE | 1045184 |
+MDRHUN | 1045184 |
+MDRITA | 1045184 |
+MDRJPN | 780993 |
+MDRKOR | 1045184 |
+MDRLAV | 1045184 |
+MDRPOL | 1045184 |
+MDRPOR | 1045184 |
+MDRRUS | 1045184 |
+MDRSPA | 1045184 |
+MDRSWE | 1045184 |
+MED-RT | 95999 |
+MEDCIN | 1355208 |
+MEDLINEPLUS | 8173 |
+MMSL | 242812 |
+MMX | 412325 |
+MSH | 4841113 |
+MSHCZE | 81443 |
+MSHFRE | 17 |
+MSHITA | 59531 |
+MSHLAV | 1191 |
+MSHNOR | 62205 |
+MSHPOR | 107509 |
+MSHSCR | 8069 |
+MSHSPA | 95836 |
+MTH | 9493363 |
+MTHMST | 1908 |
+MTHSPL | 3345744 |
+MVX | 411 |
+NANDA-I | 1879 |
+NCBI | 2034978 |
+NCI | 158863 |
+NDDF | 71219 |
+NEU | 8194 |
+NIC | 3023 |
+NOC | 15731 |
+NUCCHCPT | 522 |
+OMIM | 204484 |
+OMS | 21 |
+PDQ | 55017 |
+PNDS | 59 |
+PPAC | 813 |
+PSY | 8563 |
+RCD | 175408 |
+RXNORM | 2126399 |
+SCTSPA | 5843064 |
+SNMI | 85848 |
+SNOMEDCT_US | 9779615 |
+SNOMEDCT_VET | 457028 |
+SPN | 19052 |
+UMD | 46357 |
+USP | 8802 |
+USPMG | 1609 |
+UWDA | 61526 |
+VANDF | 349254 |
+
+```
+mysql> select * from MRRANK limit 10;
+```
+MRRANK_RANK | SAB | TTY | SUPPRESS |
+--|--|--|--
+266 | AIR | DI | N |
+267 | AIR | FI | N |
+264 | AIR | HT | N |
+265 | AIR | SY | N |
+364 | ALT | HT | N |
+365 | ALT | PT | N |
+282 | AOD | DE | N |
+281 | AOD | DS | N |
+277 | AOD | ES | N |
+278 | AOD | ET | N |

From a8fd56ce21b99fbbc412e403a0dbe7118f1ca821 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sun, 6 Aug 2023 04:27:54 -0700
Subject: [PATCH 004/117] #316 add more research to file

---
 understanding_umls.md | 112 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/understanding_umls.md b/understanding_umls.md
index a421fd25..bbc124ca 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -533,3 +533,115 @@ MRRANK_RANK | SAB | TTY | SUPPRESS |
 281 | AOD | DS | N |
 277 | AOD | ES | N |
 278 | AOD | ET | N |
+
+# Studying `umls2rdf.py`
+
+Tables Used:
+- `MRSTY`
+- `MRCONSO`
+- `MRSAB`
+- `MRREL`
+- `MRDEF`
+- `MRSAT`
+- `MRRANK`
+- `MRDOC`
+
+## `MRSTY`
+
+**What is taken?**
+
+This table is accessed twice, once on line 143 and once on line 573. At the line 143 accession, the distinct columns `TUI`, `STN`, and `STY` are taken. At the line 573 accession, all of the columns from `MRSTY` are taken, which consists of `CUI`, `TUI`, `STN`, `STY`, `ATUI`, `CVF`.
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+## `MRCONSO`
+
+**What is taken?**
+
+This table is accessed once, on line 491. All of the columns are taken, which consists of `CUI`, `LAT`, `TS`, `LUI`, `STT`, `SUI`, `ISPREF`, `AUI`, `SAUI`, `SCUI`, `SDUI`, `SAB`, `TTY`, `CODE`, `STR`, `SRL`, `SUPPRESS`, and `CVF`.
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+## `MRSAB`
+
+**What is taken?**
+
+This table is accessed once, on line 496. All of the columns are taken, which consists of `VCUI`, `RCUI`, `VSAB`, `RSAB`, `SON`, `SF`, `SVER`, `VSTART`, `VEND`, `IMETA`, `RMETA`, `SLC`, `SCC`, `SRL`, `TFR`, `CFR`, `CXTY`, `TTYL`, `ATNL`, `LAT`, `CENC`, `CURVER`, `SABIN`, `SSN`, and `SCIT`.
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+A limit of 1 is placed on this `scan` (per ontology code).
+
+## `MRREL`
+
+**What is taken?**
+
+This table is accessed once, on line 527. All of the columns are taken, which consists of `CUI1`, `AUI1`, `STYPE1`, `REL`, `CUI2`, `AUI2`, `STYPE2`, `RELA`, `RUI`, `SRUI`, `SAB`, `SL`, `RG`, `DIR`, `SUPPRESS`, and `CVF`.
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+## `MRDEF`
+
+**What is taken?**
+
+This table is accessed once, on line 538. All of the columns are taken, which consists of `CUI`, `AUI`, `ATUI`, `SATUI`, `SAB`, `DEF`, `SUPPRESS`, and `CVF`.
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+## `MRSAT`
+
+**What is taken?**
+
+This table is accessed once, on line 549. All of the columns are taken, which consists of `CUI`, `LUI`, `SUI`, `METAUI`, `STYPE`, `CODE`, `ATUI`, `SATUI`, `ATN`, `SAB`, `ATV`, `SUPPRESS`, and `CVF`.
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+## `MRRANK`
+
+**What is taken?**
+
+This table is accessed once, on line 560. All of the columns are taken, which consists of `MRRANK_RANK`, `SAB`, `TTY`, and `SUPPRESS`.
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+## `MRDOC`
+
+**What is taken?**
+
+This table is accessed once, on line 742. All of the columns are taken, which consists of `DOCKEY`, `VALUE`, `TYPE`, `EXPL`
+
+**What does this table contain?**
+
+**How does `umls2rdf.py` use this table?**
+
+## Assorted Notes
+
+- The table is filtered based on which ontology's ttl file is being generated at the time. This is done through the scan function, which is the actual function that sends the query to MySQL. Thus, this does not create redundancy but instead in fact ensures that only the ontologies we care about are ever queries. This is done on lines 222 through 227, where the `filt` parameter is passed into the `WHERE` clause on the MySQL statement.
+
+## To Do
+
+1. Determine which columns are actually making their ways into the `TTL` files by examining the `TTL` files.
+
+2. Decide on join points/concatentation between this tables. Ideally, we will be able to implement a streaming solution like with SemMedDB, where each row has everything we need to know about that CUI. With extra information (such as `MRDOC` content), we may have to create a supplementary file, but it should be pretty small.
+
+3. Implement MySQL querying as decided on in step 2.
+
+4. Run time tests on the solution decided on in step 3. We need to determine whether this will save the time currently used in roughly 14 hours (though a good chunk of that is load in) of ETL currently present. We probably want under 2-3 hours of MySQL time to make this a worthwhile change.
+
+5. Repeat steps 3 and 4 until timing is desirable.
+
+6. Evaluate whether the content is sufficiently comparable to what is currently in KG2.
\ No newline at end of file

From fa0882f0a591c5b61cdea713bc835c8a81951d36 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 7 Aug 2023 15:05:58 -0700
Subject: [PATCH 005/117] #316 add links to make finding info easier

---
 understanding_umls.md | 92 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/understanding_umls.md b/understanding_umls.md
index bbc124ca..5099b504 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -552,6 +552,10 @@ Tables Used:
 
 This table is accessed twice, once on line 143 and once on line 573. At the line 143 accession, the distinct columns `TUI`, `STN`, and `STY` are taken. At the line 573 accession, all of the columns from `MRSTY` are taken, which consists of `CUI`, `TUI`, `STN`, `STY`, `ATUI`, `CVF`.
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.Tf/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -562,6 +566,10 @@ This table is accessed twice, once on line 143 and once on line 573. At the line
 
 This table is accessed once, on line 491. All of the columns are taken, which consists of `CUI`, `LAT`, `TS`, `LUI`, `STT`, `SUI`, `ISPREF`, `AUI`, `SAUI`, `SCUI`, `SDUI`, `SAB`, `TTY`, `CODE`, `STR`, `SRL`, `SUPPRESS`, and `CVF`.
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.concept_names_and_sources_file_mr/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -572,6 +580,10 @@ This table is accessed once, on line 491. All of the columns are taken, which co
 
 This table is accessed once, on line 496. All of the columns are taken, which consists of `VCUI`, `RCUI`, `VSAB`, `RSAB`, `SON`, `SF`, `SVER`, `VSTART`, `VEND`, `IMETA`, `RMETA`, `SLC`, `SCC`, `SRL`, `TFR`, `CFR`, `CXTY`, `TTYL`, `ATNL`, `LAT`, `CENC`, `CURVER`, `SABIN`, `SSN`, and `SCIT`.
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.source_information_file_mrsab_rrf/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -584,6 +596,10 @@ A limit of 1 is placed on this `scan` (per ontology code).
 
 This table is accessed once, on line 527. All of the columns are taken, which consists of `CUI1`, `AUI1`, `STYPE1`, `REL`, `CUI2`, `AUI2`, `STYPE2`, `RELA`, `RUI`, `SRUI`, `SAB`, `SL`, `RG`, `DIR`, `SUPPRESS`, and `CVF`.
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.related_concepts_file_mrrel_rrf/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -594,6 +610,10 @@ This table is accessed once, on line 527. All of the columns are taken, which co
 
 This table is accessed once, on line 538. All of the columns are taken, which consists of `CUI`, `AUI`, `ATUI`, `SATUI`, `SAB`, `DEF`, `SUPPRESS`, and `CVF`.
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.definitions_file_mrdef_rrf/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -604,6 +624,10 @@ This table is accessed once, on line 538. All of the columns are taken, which co
 
 This table is accessed once, on line 549. All of the columns are taken, which consists of `CUI`, `LUI`, `SUI`, `METAUI`, `STYPE`, `CODE`, `ATUI`, `SATUI`, `ATN`, `SAB`, `ATV`, `SUPPRESS`, and `CVF`.
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.simple_concept_and_atom_attribute/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -614,6 +638,10 @@ This table is accessed once, on line 549. All of the columns are taken, which co
 
 This table is accessed once, on line 560. All of the columns are taken, which consists of `MRRANK_RANK`, `SAB`, `TTY`, and `SUPPRESS`.
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.T.concept_name_ranking_file_mrrank/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -624,6 +652,10 @@ This table is accessed once, on line 560. All of the columns are taken, which co
 
 This table is accessed once, on line 742. All of the columns are taken, which consists of `DOCKEY`, `VALUE`, `TYPE`, `EXPL`
 
+**What do these columns mean?**
+
+See [here](https://www.ncbi.nlm.nih.gov/books/NBK9685/table/ch03.Te/).
+
 **What does this table contain?**
 
 **How does `umls2rdf.py` use this table?**
@@ -644,4 +676,62 @@ This table is accessed once, on line 742. All of the columns are taken, which co
 
 5. Repeat steps 3 and 4 until timing is desirable.
 
-6. Evaluate whether the content is sufficiently comparable to what is currently in KG2.
\ No newline at end of file
+6. Evaluate whether the content is sufficiently comparable to what is currently in KG2.
+
+### Step 1
+
+Example: `umls-atc.ttl`
+
+```
+<http://purl.bioontology.org/ontology/ATC/C03AH01> a owl:Class ;
+        skos:prefLabel """chlorothiazide, combinations"""@en ;
+        skos:notation """C03AH01"""^^xsd:string ;
+        rdfs:subClassOf <http://purl.bioontology.org/ontology/ATC/C03AH> ;
+        <http://purl.bioontology.org/ontology/ATC/ATC_LEVEL> """5"""^^xsd:string ;
+        UMLS:has_cui """C3652440"""^^xsd:string ;
+        UMLS:has_tui """T109"""^^xsd:string ;
+        UMLS:has_tui """T121"""^^xsd:string ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T109> ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T121> ;
+```
+
+Example: `umls-chv.ttl`
+```
+<http://purl.bioontology.org/ontology/CHV/0000050974> a owl:Class ;
+        skos:prefLabel """synthesis"""@en ;
+        skos:notation """0000050974"""^^xsd:string ;
+        skos:definition """the combining of separate elements or substances to form a coherent whole"""@en ;
+        <http://purl.bioontology.org/ontology/CHV/COMBO_SCORE> """0.413096903"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/CHV/COMBO_SCORE_NO_TOP_WORDS> """0.413096903"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/CHV/CONTEXT_SCORE> """0.4381"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/CHV/CUI_SCORE> """0.4034"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/CHV/DISPARAGED> """no"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/CHV/FREQUENCY> """0.397790709"""^^xsd:string ;
+        UMLS:has_cui """C0220781"""^^xsd:string ;
+        UMLS:has_tui """T038"""^^xsd:string ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T038> ;
+```
+
+Example: `umls-drugbank.ttl`
+```
+<http://purl.bioontology.org/ontology/DRUGBANK/DB09085> a owl:Class ;
+        skos:prefLabel """Tetracaine"""@en ;
+        skos:notation """DB09085"""^^xsd:string ;
+        skos:altLabel """2-(Dimethylamino)ethyl p-(butylamino)benzoate"""@en , """2-(dimethylamino)ethyl 4-(butylamino)benzoate"""@en , """Amethocaine"""@en , """Amethocaine HCl"""@en , """Dicaine"""@en , """Diäthylaminoäthanol ester der p-butylaminobenzösäure"""@en , """Medihaler-Tetracaine"""@en , """Metraspray"""@en , """Tetracaine HCl"""@en , """Tetracaína"""@en , """Tétracaïne"""@en , """p-(butylamino)benzoic acid β-(dimethylamino)ethyl ester"""@en , """p-Butylaminobenzoyl-2-dimethylaminoethanol"""@en ;
+        <http://purl.bioontology.org/ontology/DRUGBANK/FDA_UNII_CODE> """0619F35CGV"""^^xsd:string ;
+        UMLS:has_cui """C0039629"""^^xsd:string ;
+        UMLS:has_cui """C0304456"""^^xsd:string ;
+        UMLS:has_cui """C0702211"""^^xsd:string ;
+        UMLS:has_cui """C4292382"""^^xsd:string ;
+        UMLS:has_cui """C4292391"""^^xsd:string ;
+        UMLS:has_tui """T109"""^^xsd:string ;
+        UMLS:has_tui """T121"""^^xsd:string ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T109> ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T121> ;
+```
+
+I am currently trying to find where `FDA_UNII_CODE` is in the data. I know that it is an attribute per https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html. 
+
+It looks like, per running `select * from MRSAT where SAB="DRUGBANK" limit 20;`, the name of the attribute is in the `ATN` column and the value is in the `ATV` column.
+
+This link discusses each of the `MRREL` types: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/abbreviations.html.
\ No newline at end of file

From d212e9470716fc753f69230ecc8179ff72f27922 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 7 Aug 2023 15:08:27 -0700
Subject: [PATCH 006/117] #316 add the mysql to md script directly into the
 file

---
 understanding_umls.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/understanding_umls.md b/understanding_umls.md
index 5099b504..3b7cf8cf 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -6,6 +6,14 @@
 3. Replace: `--\+$` With: `--`
 4. Replace: `( )+` With: ` `
 
+Script used:
+```
+sed -i -E "s/\+(-)+/\|--/g" umls_table.txt
+sed -i -E "s/^\|( )*//g" umls_table.txt
+sed -i -E "s/--\+$/--/g" umls_table.txt
+sed -i -E "s/( )+/ /g" umls_table.txt
+sed -i -E "s/<|>//g" umls_table.txt
+```
 
 # Tables
 ```

From 465559044bdce0d160f5c9e0ad25c8f790e36b30 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 7 Aug 2023 15:55:57 -0700
Subject: [PATCH 007/117] #316 first attempt at a join

---
 understanding_umls.md | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/understanding_umls.md b/understanding_umls.md
index 3b7cf8cf..2de1d850 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -742,4 +742,24 @@ I am currently trying to find where `FDA_UNII_CODE` is in the data. I know that
 
 It looks like, per running `select * from MRSAT where SAB="DRUGBANK" limit 20;`, the name of the attribute is in the `ATN` column and the value is in the `ATV` column.
 
-This link discusses each of the `MRREL` types: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/abbreviations.html.
\ No newline at end of file
+This link discusses each of the `MRREL` types: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/abbreviations.html.
+
+```
+select * from MRCONSO con left join MRSAT sat on con.CODE=sat.CODE where con.SAB="DRUGBANK" limit 10;
+```
+
+CUI | LAT | TS | LUI | STT | SUI | ISPREF | AUI | SAUI | SCUI | SDUI | SAB | TTY | CODE | STR | SRL | SUPPRESS | CVF | CUI | LUI | SUI | METAUI | STYPE | CODE | ATUI | SATUI | ATN | SAB | ATV | SUPPRESS | CVF |
+--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--
+C0039601 | ENG | S | L13409149 | VO | S16395464 | Y | A27406646 | NULL | DB00624 | NULL | DRUGBANK | FSY | DB00624 | Testosteronum | 0 | N | 256 | C0039601 | L0039601 | S0092451 | A27059293 | SCUI | DB00624 | AT215745781 | NULL | SID | DRUGBANK | DB05275 | N | NULL |
+C0039925 | ENG | S | L13409165 | PF | S16395565 | Y | A27406649 | NULL | DB00599 | NULL | DRUGBANK | FSY | DB00599 | Tiopentale | 0 | N | 256 | C0039925 | L0039925 | S0093293 | A27062921 | SCUI | DB00599 | AT215745786 | NULL | FDA_UNII_CODE | DRUGBANK | JI8Z5M7NA3 | N | NULL |
+C0004057 | ENG | S | L13415345 | PF | S16396444 | Y | A27406659 | NULL | DB00945 | NULL | DRUGBANK | FSY | DB00945 | ácido acetilsalicílico | 0 | N | 256 | C0004057 | L0001063 | S0584084 | A27066872 | SCUI | DB00945 | AT215746200 | NULL | SID | DRUGBANK | EXPT00475 | N | NULL |
+C0004057 | ENG | S | L13415345 | PF | S16396444 | Y | A27406659 | NULL | DB00945 | NULL | DRUGBANK | FSY | DB00945 | ácido acetilsalicílico | 0 | N | 256 | C0004057 | L0001063 | S0584084 | A27066872 | SCUI | DB00945 | AT215745697 | NULL | FDA_UNII_CODE | DRUGBANK | R16CO5Y76E | N | NULL |
+C0006491 | ENG | S | L13413033 | PF | S16390917 | Y | A27406692 | NULL | DB00611 | NULL | DRUGBANK | FSY | DB00611 | Butorphanolum | 0 | N | 256 | C0006491 | L0006491 | S0021116 | A27064721 | SCUI | DB00611 | AT215745783 | NULL | FDA_UNII_CODE | DRUGBANK | QV897JC36D | N | NULL |
+C0007735 | ENG | S | L13409541 | PF | S16391091 | Y | A27406763 | NULL | DB00456 | NULL | DRUGBANK | FSY | DB00456 | Cefalotina | 0 | N | 256 | C0007735 | L0007540 | S0023182 | A27055419 | SCUI | DB00456 | AT215745827 | NULL | SID | DRUGBANK | EXPT00946 | N | NULL |
+C0061323 | ENG | P | L0061323 | VO | S16392549 | Y | A27406770 | NULL | DB00222 | NULL | DRUGBANK | FSY | DB00222 | Glimépiride | 0 | N | 256 | C0061323 | L0061323 | S1325002 | A27055170 | SCUI | DB00222 | AT215745888 | NULL | SID | DRUGBANK | APRD00381 | N | NULL |
+C0064113 | ENG | S | L13414126 | PF | S16392995 | Y | A27406772 | NULL | DB01167 | NULL | DRUGBANK | FSY | DB01167 | Itraconazol | 0 | N | 256 | C0064113 | L0064113 | S0170262 | A27068928 | SCUI | DB01167 | AT215746145 | NULL | SID | DRUGBANK | APRD00040 | N | NULL |
+C0064113 | ENG | S | L13414126 | PF | S16392995 | Y | A27406772 | NULL | DB01167 | NULL | DRUGBANK | FSY | DB01167 | Itraconazol | 0 | N | 256 | C0064113 | L0064113 | S0170262 | A27068928 | SCUI | DB01167 | AT215746144 | NULL | FDA_UNII_CODE | DRUGBANK | 304NUG5GF4 | N | NULL |
+C0010927 | ENG | S | L13413100 | PF | S16391572 | Y | A27406779 | NULL | DB00851 | NULL | DRUGBANK | FSY | DB00851 | Dacarbazin | 0 | N | 256 | C0010927 | L0010927 | S0030020 | A27063198 | SCUI | DB00851 | AT215746222 | NULL | SID | DRUGBANK | APRD00331 | N | NULL |
+
+
+For some reason, some (all?) of the names are in latin.
\ No newline at end of file

From 4e30007b0ae833a425e0cc32e3cd5f4bf4db93bb Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 7 Aug 2023 16:30:00 -0700
Subject: [PATCH 008/117] #316 improving the query

---
 understanding_umls.md | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/understanding_umls.md b/understanding_umls.md
index 2de1d850..8515255c 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -762,4 +762,42 @@ C0064113 | ENG | S | L13414126 | PF | S16392995 | Y | A27406772 | NULL | DB01167
 C0010927 | ENG | S | L13413100 | PF | S16391572 | Y | A27406779 | NULL | DB00851 | NULL | DRUGBANK | FSY | DB00851 | Dacarbazin | 0 | N | 256 | C0010927 | L0010927 | S0030020 | A27063198 | SCUI | DB00851 | AT215746222 | NULL | SID | DRUGBANK | APRD00331 | N | NULL |
 
 
-For some reason, some (all?) of the names are in latin.
\ No newline at end of file
+For some reason, some (all?) of the names are in latin.
+
+```
+select con.CUI, con.CODE, con.ISPREF, con.STR, sat.ATN, sat.ATV from MRCONSO con left join MRSAT sat on con.CODE=sat.CODE where con.SAB="DRUGBANK" limit 10;
+```
+CUI | CODE | ISPREF | STR | ATN | ATV |
+--|--|--|--|--|--
+C1948374 | DB08906 | Y | Fluticasonum furoas | FDA_UNII_CODE | JS86977WNV |
+C2930696 | DB08895 | Y | Tofacitinibum | FDA_UNII_CODE | 87LA6FU830 |
+C1948374 | DB08906 | Y | Furoato de fluticasona | FDA_UNII_CODE | JS86977WNV |
+C1948374 | DB08906 | Y | Furoate de fluticasone | FDA_UNII_CODE | JS86977WNV |
+C0042665 | DB09185 | Y | Viloxazina | FDA_UNII_CODE | 5I5Y2789ZF |
+C0123163 | DB09081 | Y | idébénone | FDA_UNII_CODE | HB6PN45W4J |
+C0042665 | DB09185 | Y | Viloxazinum | FDA_UNII_CODE | 5I5Y2789ZF |
+C0068700 | DB09220 | Y | Nicorandilum | FDA_UNII_CODE | 260456HAM0 |
+C0037659 | DB09099 | Y | Somatostatine | FDA_UNII_CODE | 6E20216Q0L |
+C0037659 | DB09099 | Y | Somatostatinum | FDA_UNII_CODE | 6E20216Q0L |
+
+```
+select con.CODE, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '|', con.STR) SEPARATOR '\t'), GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', sat.ATV) SEPARATOR '\t') from MRCONSO con left join MRSAT sat on con.CODE=sat.CODE where con.SAB="DRUGBANK" GROUP BY con.CODE limit 10;
+```
+
+**NEED TO INCREASE MAX GROUP_CONCAT LENGTH FIRST**
+
+**Had to use `\|` to display as a table**
+
+
+CODE | GROUP_CONCAT(DISTINCT con.CUI) | GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '\|', con.STR) SEPARATOR '\t') | GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '\|', sat.ATV) SEPARATOR '\t') |
+--|--|--|--
+DB00001 | C0378366,C0772394 | N\|Desulfatohirudin	N\|Lepirudin	Y\|Hirudin variant-1	Y\|Lepirudin recombinant	Y\|R-hirudin	Y\|[Leu1, Thr2]-63-desulfohirudin | FDA_UNII_CODE\|Y43GF64R34	RXAUI\|12740240	RXAUI\|12740241	RXAUI\|12740242	RXAUI\|8321260	RXAUI\|8471541	RXAUI\|8599806	RXCUI\|114934	RXCUI\|237057	SID\|BIOD00024	SID\|BTD00024 |
+DB00002 | C0995188 | N\|Cetuximab	Y\|Cétuximab	Y\|Cetuximabum | FDA_UNII_CODE\|PQX0D8J21J	RXAUI\|8473993	RXAUI\|8692140	RXAUI\|8692141	RXCUI\|318341	SID\|BIOD00071	SID\|BTD00071 |
+DB00003 | C1135662 | N\|Dornase alfa	Y\|Deoxyribonuclease (human clone 18-1 protein moiety)	Y\|Dornasa alfa	Y\|Dornase alfa, recombinant	Y\|Dornase alpha	Y\|Recombinant deoxyribonuclease (DNAse) | FDA_UNII_CODE\|953A26OA1Y	RXAUI\|10778765	RXAUI\|8278645	RXAUI\|8326085	RXAUI\|8339777	RXAUI\|8376403	RXAUI\|8686775	RXCUI\|337623	SID\|BIOD00001	SID\|BTD00001 |
+DB00004 | C0717670,C1383469 | N\|Denileukin diftitox	Y\|Denileukin	Y\|Interleukin-2/diptheria toxin fusion protein | FDA_UNII_CODE\|25E79B5CTM	RXAUI\|10333971	RXAUI\|10333972	RXAUI\|8331268	RXCUI\|214470	RXCUI\|451876	SID\|BIOD00084	SID\|BTD00084 |
+DB00005 | C0717758,C4291381,C4542001,C5135562 | N\|Etanercept	N\|etanercept-szzs	N\|etanercept-ykro	Y\|Recombinant human TNF	Y\|rhu TNFR:Fc	Y\|rhu-TNFR:Fc	Y\|TNFR-Immunoadhesin | FDA_UNII_CODE\|OP401G7OJC	RXAUI\|11350310	RXAUI\|11350311	RXAUI\|11350312	RXAUI\|11350313	RXAUI\|11350314	RXAUI\|8622888	RXAUI\|9712732	RXCUI\|1995554	RXCUI\|2103480	RXCUI\|214555	RXCUI\|2462511	SID\|BIOD00052	SID\|BTD00052 |
+DB00006 | C0168273 | N\|Bivalirudin	Y\|Bivalirudina	Y\|Bivalirudinum | FDA_UNII_CODE\|TN9BEX005G	RXAUI\|8657293	RXAUI\|8715166	RXAUI\|8715167	RXCUI\|60819	SID\|BIOD00076	SID\|BTD00076	SID\|DB02351	SID\|EXPT03302 |
+DB00007 | C0085272 | N\|Leuprolide	N\|Leuprorelin	Y\|Leuprorelina	Y\|Leuproreline	Y\|Leuprorelinum | FDA_UNII_CODE\|EFY6W0M8TG	RXAUI\|10785183	RXAUI\|10785184	RXAUI\|10785185	RXAUI\|8540224	RXAUI\|8646100	RXCUI\|42375	SID\|BIOD00009	SID\|BTD00009 |
+DB00008 | C0391001 | N\|Peginterferon alfa-2a	Y\|PEG-IFN alfa-2A	Y\|PEG-Interferon alfa-2A	Y\|Pegylated Interfeaon alfa-2A	Y\|Pegylated interferon alfa-2a	Y\|Pegylated interferon alpha-2a	Y\|Pegylated-interferon alfa 2a | FDA_UNII_CODE\|Q46947FE7K	RXAUI\|11350315	RXAUI\|11350316	RXAUI\|11350317	RXAUI\|11350318	RXAUI\|11350319	RXAUI\|8672645	RXAUI\|8731057	RXCUI\|120608	SID\|BIOD00043	SID\|BTD00043 |
+DB00009 | C0032143 | N\|Alteplase	N\|Tissue plasminogen activator	Y\|Alteplasa	Y\|Alteplase (genetical recombination)	Y\|Alteplase, recombinant	Y\|Alteplase,recombinant	Y\|Plasminogen activator (human tissue-type protein moiety)	Y\|rt-PA	Y\|t-PA	Y\|t-plasminogen activator	Y\|Tissue plasminogen activator alteplase	Y\|Tissue plasminogen activator, recombinant	Y\|tPA | FDA_UNII_CODE\|1RXS4UE564	RXAUI\|10778766	RXAUI\|8368173	RXAUI\|8383242	RXAUI\|8543112	RXAUI\|8578376	RXAUI\|9193634	RXAUI\|9193635	RXAUI\|9193636	RXAUI\|9193637	RXAUI\|9193638	RXAUI\|9193639	RXAUI\|9193640	RXAUI\|9193641	RXCUI\|8410	SID\|BIOD00050	SID\|BTD00050 |
+DB00010 | C0142046 | N\|Sermorelin | FDA_UNII_CODE\|89243S03TE	RXAUI\|8619290	RXCUI\|56188	SID\|BIOD00033	SID\|BTD00033 |

From 7b76d7cc6e3f1487816f46090c66c87502adc831 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 7 Aug 2023 16:37:25 -0700
Subject: [PATCH 009/117] #316 more ttl snippets for context

---
 understanding_umls.md | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/understanding_umls.md b/understanding_umls.md
index 8515255c..2fa8a089 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -801,3 +801,40 @@ DB00007 | C0085272 | N\|Leuprolide	N\|Leuprorelin	Y\|Leuprorelina	Y\|Leuprorelin
 DB00008 | C0391001 | N\|Peginterferon alfa-2a	Y\|PEG-IFN alfa-2A	Y\|PEG-Interferon alfa-2A	Y\|Pegylated Interfeaon alfa-2A	Y\|Pegylated interferon alfa-2a	Y\|Pegylated interferon alpha-2a	Y\|Pegylated-interferon alfa 2a | FDA_UNII_CODE\|Q46947FE7K	RXAUI\|11350315	RXAUI\|11350316	RXAUI\|11350317	RXAUI\|11350318	RXAUI\|11350319	RXAUI\|8672645	RXAUI\|8731057	RXCUI\|120608	SID\|BIOD00043	SID\|BTD00043 |
 DB00009 | C0032143 | N\|Alteplase	N\|Tissue plasminogen activator	Y\|Alteplasa	Y\|Alteplase (genetical recombination)	Y\|Alteplase, recombinant	Y\|Alteplase,recombinant	Y\|Plasminogen activator (human tissue-type protein moiety)	Y\|rt-PA	Y\|t-PA	Y\|t-plasminogen activator	Y\|Tissue plasminogen activator alteplase	Y\|Tissue plasminogen activator, recombinant	Y\|tPA | FDA_UNII_CODE\|1RXS4UE564	RXAUI\|10778766	RXAUI\|8368173	RXAUI\|8383242	RXAUI\|8543112	RXAUI\|8578376	RXAUI\|9193634	RXAUI\|9193635	RXAUI\|9193636	RXAUI\|9193637	RXAUI\|9193638	RXAUI\|9193639	RXAUI\|9193640	RXAUI\|9193641	RXCUI\|8410	SID\|BIOD00050	SID\|BTD00050 |
 DB00010 | C0142046 | N\|Sermorelin | FDA_UNII_CODE\|89243S03TE	RXAUI\|8619290	RXCUI\|56188	SID\|BIOD00033	SID\|BTD00033 |
+
+Here is that first element in the `TTL` file:
+```
+<http://purl.bioontology.org/ontology/DRUGBANK/DB00001> a owl:Class ;
+        skos:prefLabel """Lepirudin"""@en ;
+        skos:notation """DB00001"""^^xsd:string ;
+        skos:altLabel """Desulfatohirudin"""@en , """Hirudin variant-1"""@en , """Lepirudin recombinant"""@en , """R-hirudin"""@en , """[Leu1, Thr2]-63-desulfohirudin"""@en ;
+        <http://purl.bioontology.org/ontology/DRUGBANK/SID> """BIOD00024"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/DRUGBANK/FDA_UNII_CODE> """Y43GF64R34"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/DRUGBANK/SID> """BTD00024"""^^xsd:string ;
+        UMLS:has_cui """C0378366"""^^xsd:string ;
+        UMLS:has_cui """C0772394"""^^xsd:string ;
+        UMLS:has_tui """T116"""^^xsd:string ;
+        UMLS:has_tui """T121"""^^xsd:string ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T116> ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T121> ;
+```
+
+Here is `DB00009` in the `TTL` file:
+```
+<http://purl.bioontology.org/ontology/DRUGBANK/DB00009> a owl:Class ;
+        skos:prefLabel """Alteplase"""@en ;
+        skos:notation """DB00009"""^^xsd:string ;
+        skos:altLabel """Alteplasa"""@en , """Alteplase (genetical recombination)"""@en , """Alteplase, recombinant"""@en , """Alteplase,recombinant"""@en , """Plasminogen activator (human tissue-type protein moiety)"""@en , """Tissue plasminogen activator"""@en , """Tissue plasminogen activator alteplase"""@en , """Tissue plasminogen activator, recombinant"""@en , """rt-PA"""@en , """t-PA"""@en , """t-plasminogen activator"""@en , """tPA"""@en ;
+        <http://purl.bioontology.org/ontology/DRUGBANK/FDA_UNII_CODE> """1RXS4UE564"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/DRUGBANK/SID> """BIOD00050"""^^xsd:string ;
+        <http://purl.bioontology.org/ontology/DRUGBANK/SID> """BTD00050"""^^xsd:string ;
+        UMLS:has_cui """C0032143"""^^xsd:string ;
+        UMLS:has_tui """T116"""^^xsd:string ;
+        UMLS:has_tui """T121"""^^xsd:string ;
+        UMLS:has_tui """T126"""^^xsd:string ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T116> ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T121> ;
+        UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T126> ;
+```
+
+I need to look more into how to tell which name is the correct name.
\ No newline at end of file

From a819ef9c7ca78103ab6960e814f80168c9faf420 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 7 Aug 2023 20:09:27 -0700
Subject: [PATCH 010/117] #316 UMLS source predicates

---
 understanding_umls.md | 123 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

diff --git a/understanding_umls.md b/understanding_umls.md
index 2fa8a089..ce7019d2 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -837,4 +837,125 @@ Here is `DB00009` in the `TTL` file:
         UMLS:has_sty <http://purl.bioontology.org/ontology/STY/T126> ;
 ```
 
-I need to look more into how to tell which name is the correct name.
\ No newline at end of file
+I need to look more into how to tell which name is the correct name.
+
+UMLS Source Predicates:
+```
+﻿[
+  {
+    "e.source_predicate": "UMLS:RB",
+    "e.primary_knowledge_source": "infores:umls-metathesaurus",
+    "count(e)": 235110
+  },
+  {
+    "e.source_predicate": "UMLS:RO",
+    "e.primary_knowledge_source": "infores:umls-metathesaurus",
+    "count(e)": 722308
+  },
+  {
+    "e.source_predicate": "UMLS:related_to",
+    "e.primary_knowledge_source": "infores:medlineplus",
+    "count(e)": 5658
+  },
+  {
+    "e.source_predicate": "UMLS:RQ",
+    "e.primary_knowledge_source": "infores:medlineplus",
+    "count(e)": 3224
+  },
+  {
+    "e.source_predicate": "UMLS:SY",
+    "e.primary_knowledge_source": "infores:medlineplus",
+    "count(e)": 932
+  },
+  {
+    "e.source_predicate": "UMLS:mapped_to",
+    "e.primary_knowledge_source": "infores:medlineplus",
+    "count(e)": 1008
+  },
+  {
+    "e.source_predicate": "UMLS:exhibited_by",
+    "e.primary_knowledge_source": "infores:umls-metathesaurus",
+    "count(e)": 2332
+  },
+  {
+    "e.source_predicate": "UMLS:has_structural_class",
+    "e.primary_knowledge_source": "infores:medrt-umls",
+    "count(e)": 4
+  },
+  {
+    "e.source_predicate": "UMLS:has_mapping_qualifier",
+    "e.primary_knowledge_source": "infores:medlineplus",
+    "count(e)": 42
+  },
+  {
+    "e.source_predicate": "UMLS:measures",
+    "e.primary_knowledge_source": "infores:umls-metathesaurus",
+    "count(e)": 406
+  },
+  {
+    "e.source_predicate": "UMLS:owning_subsection_of",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 84
+  },
+  {
+    "e.source_predicate": "UMLS:has_supported_concept_property",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 738
+  },
+  {
+    "e.source_predicate": "UMLS:has_supported_concept_relationship",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 648
+  },
+  {
+    "e.source_predicate": "UMLS:class_code_classified_by",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 122
+  },
+  {
+    "e.source_predicate": "UMLS:owning_section_of",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 18
+  },
+  {
+    "e.source_predicate": "UMLS:has_context_binding",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 134
+  },
+  {
+    "e.source_predicate": "UMLS:may_be_qualified_by",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 40
+  },
+  {
+    "e.source_predicate": "UMLS:larger_than",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 2
+  },
+  {
+    "e.source_predicate": "UMLS:component_of",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 28
+  },
+  {
+    "e.source_predicate": "UMLS:has_component",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 18
+  },
+  {
+    "e.source_predicate": "UMLS:has_owning_affiliate",
+    "e.primary_knowledge_source": "infores:hl7-umls",
+    "count(e)": 2
+  },
+  {
+    "e.source_predicate": "UMLS:has_physiologic_effect",
+    "e.primary_knowledge_source": "infores:medrt-umls",
+    "count(e)": 2
+  },
+  {
+    "e.source_predicate": "UMLS:has_form",
+    "e.primary_knowledge_source": "infores:umls-metathesaurus",
+    "count(e)": 2
+  }
+]
+```
\ No newline at end of file

From 90d9b8c1865cfb33d06310592d1ef6169858707e Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 8 Aug 2023 11:32:11 -0700
Subject: [PATCH 011/117] #316 adding more to query development

---
 understanding_umls.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/understanding_umls.md b/understanding_umls.md
index ce7019d2..e0e48b6a 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -841,7 +841,7 @@ I need to look more into how to tell which name is the correct name.
 
 UMLS Source Predicates:
 ```
-﻿[
+[
   {
     "e.source_predicate": "UMLS:RB",
     "e.primary_knowledge_source": "infores:umls-metathesaurus",
@@ -958,4 +958,8 @@ UMLS Source Predicates:
     "count(e)": 2
   }
 ]
-```
\ No newline at end of file
+```
+
+```
+select con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '|', con.STR) SEPARATOR '\t'), GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', sat.ATV) SEPARATOR '\t') from MRCONSO con left join MRSAT sat on con.CODE=sat.CODE GROUP BY con.CODE, con.SAB;
+```

From 5b74948e2a764a0e715039d9c4062e66fc2993ed Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 8 Aug 2023 18:44:37 -0700
Subject: [PATCH 012/117] #316 start implementing queries

---
 umls_mysql_to_list_jsonl.py | 99 +++++++++++++++++++++++++++++++++++++
 understanding_umls.md       | 22 +++++++++
 2 files changed, 121 insertions(+)
 create mode 100755 umls_mysql_to_list_jsonl.py

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
new file mode 100755
index 00000000..d3c4707f
--- /dev/null
+++ b/umls_mysql_to_list_jsonl.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+'''umls_mysql_to_list_jsonl.py: extracts all of the information from UMLS and stores it in a JSON Lines output
+
+   Usage: umls_mysql_to_list_jsonl.py [--test] <mysqlConfigFile> <mysqlDBName> <outputFile.json>
+'''
+
+__author__ = 'Erica Wood'
+__copyright__ = 'Oregon State University'
+__credits__ = ['Stephen Ramsey', 'Erica Wood']
+__license__ = 'MIT'
+__version__ = '0.1.0'
+__maintainer__ = ''
+__email__ = ''
+__status__ = 'Prototype'
+
+
+import argparse
+import kg2_util
+import pymysql
+
+
+def make_arg_parser():
+    arg_parser = argparse.ArgumentParser(description='umls_mysql_to_list_jsonl.py: extracts all of the information from UMLS and stores it in a JSON Lines output')
+    arg_parser.add_argument('mysqlConfigFile', type=str)
+    arg_parser.add_argument('mysqlDBName', type=str)
+    arg_parser.add_argument('outputFile', type=str)
+    return arg_parser
+
+
+def code_sources(cursor, output):
+    code_source_info = dict()
+
+    names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.CODE, con.SAB"
+    extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', sat.ATV) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.CODE, sat.SAB"
+
+    cursor.execute(names_sql_statement)
+
+    cui_key = 'cuis'
+    name_key = 'names'
+    info_key = 'info'
+
+    for result in cursor.fetchall():
+        (node_id, node_source, cui, name) = result
+        key = (node_id, node_source)
+        code_source_info[key] = dict()
+        code_source_info[key][cui_key] = cui.split(',')
+        code_source_info[key][name_key] = name.split('\t')
+
+    print("Finished names_sql_statement at", kg2_util.date())
+
+    cursor.execute(extra_info_sql_statement)
+
+    for result in cursor.fetchall():
+        (node_id, node_source, info) = result
+        key = (node_id, node_source)
+        if key not in code_source_info:
+            code_source_info[key] = dict()
+            print(key, "not in original code_source_info dict")
+        code_source_info[key][info_key] = info.split('\t')
+
+    print("Finished extra_info_sql_statement at", kg2_util.date())
+
+    for key, val in code_source_info.items():
+        # It needs to print it all out for some reason to actually do the output write
+        print(str({str(key): val}))
+        output.write({str(key): val})
+
+
+if __name__ == '__main__':
+    print("Starting umls_mysql_to_list_jsonl.py at", kg2_util.date())
+    args = make_arg_parser().parse_args()
+    mysql_config_file = args.mysqlConfigFile
+    mysql_db_name = args.mysqlDBName
+    output_file_name = args.outputFile
+    connection = pymysql.connect(read_default_file=mysql_config_file, db=mysql_db_name)
+    preds_dict = dict()
+
+    output_info = kg2_util.create_single_jsonlines(False)
+    output = output_info[0]
+
+    # https://stackoverflow.com/questions/7208773/mysql-row-30153-was-cut-by-group-concat-error
+    max_len_sql_statement = "SET group_concat_max_len=1000000000"
+
+    sql_statement = ("SELECT SUBJECT_CUI, PREDICATE, OBJECT_CUI, GROUP_CONCAT(DISTINCT SUBJECT_SEMTYPE), GROUP_CONCAT(DISTINCT OBJECT_SEMTYPE), "
+                     "GROUP_CONCAT(DISTINCT DATE_FORMAT(CURR_TIMESTAMP, '%Y-%m-%d %H:%i:%S')), "
+                     "GROUP_CONCAT(CONCAT(PMID, '|', SENTENCE, '|', SUBJECT_SCORE, '|', OBJECT_SCORE, '|', DP) SEPARATOR '\t') "
+                     "FROM ((PREDICATION NATURAL JOIN CITATIONS) NATURAL JOIN SENTENCE) NATURAL JOIN PREDICATION_AUX "
+                     "GROUP BY SUBJECT_CUI, PREDICATE, OBJECT_CUI")
+
+    with connection.cursor() as cursor:
+        cursor.execute(max_len_sql_statement)
+        cursor.fetchall()
+
+        # Execute statement we care about after clearing any "results"
+        code_sources(cursor, output)
+    connection.close()
+
+    kg2_util.close_single_jsonlines(output_info, output_file_name)
+    print("Finishing umls_mysql_to_list_jsonl.py at", kg2_util.date())
diff --git a/understanding_umls.md b/understanding_umls.md
index e0e48b6a..65f4b8e1 100644
--- a/understanding_umls.md
+++ b/understanding_umls.md
@@ -963,3 +963,25 @@ UMLS Source Predicates:
 ```
 select con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '|', con.STR) SEPARATOR '\t'), GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', sat.ATV) SEPARATOR '\t') from MRCONSO con left join MRSAT sat on con.CODE=sat.CODE GROUP BY con.CODE, con.SAB;
 ```
+This ran for about 4.5 hours before hitting this error:
+```
+ERROR 1114 (HY000): The table '/tmp/#sql31e_8_6' is full
+```
+
+New plan - split it up:
+
+```
+select con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '|', con.STR) SEPARATOR '\t') from MRCONSO con GROUP BY con.CODE, con.SAB;
+```
+gives
+```
+7137936 rows in set, 3758 warnings (1 min 29.37 sec)
+```
+
+```
+select sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', sat.ATV) SEPARATOR '\t') from MRSAT sat GROUP BY sat.CODE, sat.SAB;
+```
+gives
+```
+5330040 rows in set, 65535 warnings (10 min 11.85 sec)
+```
\ No newline at end of file

From 7068316cef666ff77059071bad627b133ce40e0e Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 9 Aug 2023 12:57:36 -0700
Subject: [PATCH 013/117] #316 add CUI related code

---
 umls_mysql_to_list_jsonl.py | 81 +++++++++++++++++++++++++++++++------
 1 file changed, 68 insertions(+), 13 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index d3c4707f..01f61f8a 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -29,16 +29,14 @@ def make_arg_parser():
 
 def code_sources(cursor, output):
     code_source_info = dict()
+    cui_key = 'cuis'
+    name_key = 'names'
+    info_key = 'info'
 
     names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.CODE, con.SAB"
     extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', sat.ATV) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.CODE, sat.SAB"
 
     cursor.execute(names_sql_statement)
-
-    cui_key = 'cuis'
-    name_key = 'names'
-    info_key = 'info'
-
     for result in cursor.fetchall():
         (node_id, node_source, cui, name) = result
         key = (node_id, node_source)
@@ -49,7 +47,6 @@ def code_sources(cursor, output):
     print("Finished names_sql_statement at", kg2_util.date())
 
     cursor.execute(extra_info_sql_statement)
-
     for result in cursor.fetchall():
         (node_id, node_source, info) = result
         key = (node_id, node_source)
@@ -66,6 +63,70 @@ def code_sources(cursor, output):
         output.write({str(key): val})
 
 
+def cui_sources(cursor, output):
+    cui_source_info = dict()
+    tui_key = 'tuis'
+    name_key = 'names'
+    relation_key = 'relations'
+    definitions_key = 'definitions'
+
+    names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE LAT=\"ENG\" GROUP BY CUI"
+    tuis_sql_statement = "SELECT CUI, GROUP_CONCAT(TUI) FROM MRSTY GROUP BY CUI"
+    relations_sql_statement = "SELECT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL"
+    definitions_sql_statement = "SELECT CUI, DEF FROM MRDEF"
+
+    cursor.execute(names_sql_statement)
+    for result in cursor.fetchall():
+        (node_id, name) = result
+        key = node_id
+        cui_source_info[key] = dict()
+        cui_source_info[key][name_key] = name.split('\t')
+
+    print("Finished names_sql_statement at", kg2_util.date())
+
+    cursor.execute(tuis_sql_statement)
+    for result in cursor.fetchall():
+        (node_id, tuis) = result
+        key = node_id
+        if key not in cui_source_info:
+            # This happens if a node doesn't have an English name. Since UMLS:C5779458 (an example one)
+            # wasn't in KG2.8.3pre, I am having these skipped
+            continue
+        cui_source_info[key][tui_key] = tuis.split('\t')
+
+    print("Finished tuis_sql_statement at", kg2_util.date())
+
+    cursor.execute(relations_sql_statement)
+    for result in cursor.fetchall():
+        (cui1, rel, rela, direction, cui2, source) = result
+        key = cui1
+        if key not in cui_source_info:
+            # See above for explanation
+            continue
+        if relation_key not in cui_source_info[key]:
+            cui_source_info[key][relation_key] = list()
+        cui_source_info[key][relation_key].append((rel, rela, direction, cui2, source))
+
+    print("Finished relations_sql_statement at", kg2_util.date())
+
+    cursor.execute(definitions_sql_statement)
+    for result in cursor.fetchall():
+        (node_id, definition) = result
+        key = node_id
+        if key not in cui_source_info:
+            # See above for explanation
+            continue
+        cui_source_info[key][definitions_key] = definition
+
+    print("Finished definitions_sql_statement at", kg2_util.date())
+
+    for key, val in cui_source_info.items():
+        # It needs to print it all out for some reason to actually do the output write
+        print(str({str(key): val}))
+        output.write({str(key): val})
+
+
+
 if __name__ == '__main__':
     print("Starting umls_mysql_to_list_jsonl.py at", kg2_util.date())
     args = make_arg_parser().parse_args()
@@ -81,18 +142,12 @@ def code_sources(cursor, output):
     # https://stackoverflow.com/questions/7208773/mysql-row-30153-was-cut-by-group-concat-error
     max_len_sql_statement = "SET group_concat_max_len=1000000000"
 
-    sql_statement = ("SELECT SUBJECT_CUI, PREDICATE, OBJECT_CUI, GROUP_CONCAT(DISTINCT SUBJECT_SEMTYPE), GROUP_CONCAT(DISTINCT OBJECT_SEMTYPE), "
-                     "GROUP_CONCAT(DISTINCT DATE_FORMAT(CURR_TIMESTAMP, '%Y-%m-%d %H:%i:%S')), "
-                     "GROUP_CONCAT(CONCAT(PMID, '|', SENTENCE, '|', SUBJECT_SCORE, '|', OBJECT_SCORE, '|', DP) SEPARATOR '\t') "
-                     "FROM ((PREDICATION NATURAL JOIN CITATIONS) NATURAL JOIN SENTENCE) NATURAL JOIN PREDICATION_AUX "
-                     "GROUP BY SUBJECT_CUI, PREDICATE, OBJECT_CUI")
-
     with connection.cursor() as cursor:
         cursor.execute(max_len_sql_statement)
         cursor.fetchall()
 
         # Execute statement we care about after clearing any "results"
-        code_sources(cursor, output)
+        cui_sources(cursor, output)
     connection.close()
 
     kg2_util.close_single_jsonlines(output_info, output_file_name)

From e99d8f96ceafdc7b8123ffd99c0442aae900b15e Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 9 Aug 2023 14:39:06 -0700
Subject: [PATCH 014/117] #316 we currently only need english based names

---
 umls_mysql_to_list_jsonl.py | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 01f61f8a..e2f2a04b 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -27,6 +27,21 @@ def make_arg_parser():
     return arg_parser
 
 
+def get_english_sources(cursor):
+    sources_sql_statement = "SELECT RSAB, LAT FROM MRSAB"
+    sources = []
+
+    cursor.execute(sources_sql_statement)
+    for result in cursor.fetchall():
+        (source, language) = result
+        if language == 'ENG':
+            sources.append(source)
+
+    print("Finished sources_sql_statement at", kg2_util.date())
+
+    return sources
+
+
 def code_sources(cursor, output):
     code_source_info = dict()
     cui_key = 'cuis'
@@ -63,17 +78,19 @@ def code_sources(cursor, output):
         output.write({str(key): val})
 
 
-def cui_sources(cursor, output):
+def cui_sources(cursor, output, sources):
     cui_source_info = dict()
     tui_key = 'tuis'
     name_key = 'names'
     relation_key = 'relations'
     definitions_key = 'definitions'
 
-    names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE LAT=\"ENG\" GROUP BY CUI"
+    sources_where = str(sources).replace('[', '(').replace(']', ')')
+
+    names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(SAB, '|', ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE SAB IN " + sources_where + " GROUP BY CUI"
     tuis_sql_statement = "SELECT CUI, GROUP_CONCAT(TUI) FROM MRSTY GROUP BY CUI"
-    relations_sql_statement = "SELECT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL"
-    definitions_sql_statement = "SELECT CUI, DEF FROM MRDEF"
+    relations_sql_statement = "SELECT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL WHERE SAB IN " + sources_where
+    definitions_sql_statement = "SELECT CUI, DEF FROM MRDEF WHERE SAB IN " + sources_where
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
@@ -89,8 +106,7 @@ def cui_sources(cursor, output):
         (node_id, tuis) = result
         key = node_id
         if key not in cui_source_info:
-            # This happens if a node doesn't have an English name. Since UMLS:C5779458 (an example one)
-            # wasn't in KG2.8.3pre, I am having these skipped
+            # This happens if a node doesn't have an English name. See https://github.com/RTXteam/RTX-KG2/issues/316#issuecomment-1672074392
             continue
         cui_source_info[key][tui_key] = tuis.split('\t')
 
@@ -147,7 +163,9 @@ def cui_sources(cursor, output):
         cursor.fetchall()
 
         # Execute statement we care about after clearing any "results"
-        cui_sources(cursor, output)
+        sources = get_english_sources(cursor)
+
+        cui_sources(cursor, output, sources)
     connection.close()
 
     kg2_util.close_single_jsonlines(output_info, output_file_name)

From 83d641e745201983d71509969bfbe523f21aa244 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 9 Aug 2023 16:58:00 -0700
Subject: [PATCH 015/117] #316 making some progress with structuring name data

---
 umls_mysql_to_list_jsonl.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index e2f2a04b..1862fce2 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -94,10 +94,18 @@ def cui_sources(cursor, output, sources):
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
-        (node_id, name) = result
+        (node_id, names) = result
         key = node_id
         cui_source_info[key] = dict()
-        cui_source_info[key][name_key] = name.split('\t')
+        cui_source_info[key][name_key] = dict()
+        for name in names.split('\t'):
+            split_name = name.split('|')
+            assert len(split_name) == 3, split_name
+            if split_name[0] not in cui_source_info[key][name_key]:
+                cui_source_info[key][name_key][split_name[0]] = dict()
+            if split_name[1] not in cui_source_info[key][name_key][split_name[0]]:
+                cui_source_info[key][name_key][split_name[0]][split_name[1]] = list()
+            existing_val = cui_source_info[key][name_key][split_name[0]][split_name[1]].append(split_name[2])
 
     print("Finished names_sql_statement at", kg2_util.date())
 
@@ -166,6 +174,8 @@ def cui_sources(cursor, output, sources):
         sources = get_english_sources(cursor)
 
         cui_sources(cursor, output, sources)
+
+        # code_sources(cursor, output)
     connection.close()
 
     kg2_util.close_single_jsonlines(output_info, output_file_name)

From acac6310c6beb2f77356f57305ae582514eba612 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 10 Aug 2023 14:53:38 -0700
Subject: [PATCH 016/117] #316 making the extraction actually pull stuff out
 and store it in a helpful way

---
 umls_mysql_to_list_jsonl.py | 61 ++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 17 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 1862fce2..93b150e8 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -48,16 +48,25 @@ def code_sources(cursor, output):
     name_key = 'names'
     info_key = 'info'
 
-    names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.CODE, con.SAB"
-    extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', sat.ATV) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.CODE, sat.SAB"
+    names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.TTY, '|', con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.CODE, con.SAB"
+    extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', REPLACE(sat.ATV, '\t', ' ')) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.CODE, sat.SAB"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
-        (node_id, node_source, cui, name) = result
+        (node_id, node_source, cui, names) = result
         key = (node_id, node_source)
         code_source_info[key] = dict()
         code_source_info[key][cui_key] = cui.split(',')
-        code_source_info[key][name_key] = name.split('\t')
+        if name_key not in code_source_info[key]:
+            code_source_info[key][name_key] = dict()
+        for name in names.split('\t'):
+            split_name = name.split('|')
+            assert len(split_name) == 3, split_name
+            if split_name[0] not in code_source_info[key][name_key]:
+                code_source_info[key][name_key][split_name[0]] = dict()
+            if split_name[1] not in code_source_info[key][name_key][split_name[0]]:
+                code_source_info[key][name_key][split_name[0]][split_name[1]] = list()
+            code_source_info[key][name_key][split_name[0]][split_name[1]].append(split_name[2])
 
     print("Finished names_sql_statement at", kg2_util.date())
 
@@ -66,17 +75,28 @@ def code_sources(cursor, output):
         (node_id, node_source, info) = result
         key = (node_id, node_source)
         if key not in code_source_info:
-            code_source_info[key] = dict()
-            print(key, "not in original code_source_info dict")
-        code_source_info[key][info_key] = info.split('\t')
+            # This occurs if a node doesn't have a name.
+            continue
+        if info_key not in code_source_info[key]:
+            code_source_info[key][info_key] = dict()
+        for info_piece in info.split('\t'):
+            split_info_piece = info_piece.split('|')
+            assert len(split_info_piece) == 2, split_info_piece
+            if split_info_piece[0] not in code_source_info[key][info_key]:
+                code_source_info[key][info_key][split_info_piece[0]] = set()
+            code_source_info[key][info_key][split_info_piece[0]].add(split_info_piece[1])
+        for info_type in code_source_info[key][info_key]:
+            code_source_info[key][info_key][info_type] = list(code_source_info[key][info_key][info_type])
 
     print("Finished extra_info_sql_statement at", kg2_util.date())
 
+    record_num = 0
     for key, val in code_source_info.items():
-        # It needs to print it all out for some reason to actually do the output write
-        print(str({str(key): val}))
+        record_num += 1
         output.write({str(key): val})
 
+    print("Finished adding", record_num, "records in code_sources() at", kg2_util.date())
+
 
 def cui_sources(cursor, output, sources):
     cui_source_info = dict()
@@ -105,7 +125,7 @@ def cui_sources(cursor, output, sources):
                 cui_source_info[key][name_key][split_name[0]] = dict()
             if split_name[1] not in cui_source_info[key][name_key][split_name[0]]:
                 cui_source_info[key][name_key][split_name[0]][split_name[1]] = list()
-            existing_val = cui_source_info[key][name_key][split_name[0]][split_name[1]].append(split_name[2])
+            cui_source_info[key][name_key][split_name[0]][split_name[1]].append(split_name[2])
 
     print("Finished names_sql_statement at", kg2_util.date())
 
@@ -116,7 +136,7 @@ def cui_sources(cursor, output, sources):
         if key not in cui_source_info:
             # This happens if a node doesn't have an English name. See https://github.com/RTXteam/RTX-KG2/issues/316#issuecomment-1672074392
             continue
-        cui_source_info[key][tui_key] = tuis.split('\t')
+        cui_source_info[key][tui_key] = tuis.split(',')
 
     print("Finished tuis_sql_statement at", kg2_util.date())
 
@@ -128,8 +148,14 @@ def cui_sources(cursor, output, sources):
             # See above for explanation
             continue
         if relation_key not in cui_source_info[key]:
-            cui_source_info[key][relation_key] = list()
-        cui_source_info[key][relation_key].append((rel, rela, direction, cui2, source))
+            cui_source_info[key][relation_key] = dict()
+
+        relation_type_key = ','.join([str(rel), str(rela), str(direction)])
+        if source not in cui_source_info[key][relation_key]:
+            cui_source_info[key][relation_key][source] = dict()
+        if relation_type_key not in cui_source_info[key][relation_key][source]:
+            cui_source_info[key][relation_key][source][relation_type_key] = list()
+        cui_source_info[key][relation_key][source][relation_type_key].append(cui2)
 
     print("Finished relations_sql_statement at", kg2_util.date())
 
@@ -144,11 +170,12 @@ def cui_sources(cursor, output, sources):
 
     print("Finished definitions_sql_statement at", kg2_util.date())
 
+    record_num = 0
     for key, val in cui_source_info.items():
-        # It needs to print it all out for some reason to actually do the output write
-        print(str({str(key): val}))
+        record_num += 1
         output.write({str(key): val})
 
+    print("Finished adding", record_num, "records in cui_sources() at", kg2_util.date())
 
 
 if __name__ == '__main__':
@@ -173,9 +200,9 @@ def cui_sources(cursor, output, sources):
         # Execute statement we care about after clearing any "results"
         sources = get_english_sources(cursor)
 
-        cui_sources(cursor, output, sources)
+        code_sources(cursor, output)
+        # cui_sources(cursor, output, sources)
 
-        # code_sources(cursor, output)
     connection.close()
 
     kg2_util.close_single_jsonlines(output_info, output_file_name)

From e361f8483a9effa04714e04b9b1254c6c870d5d9 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 15 Aug 2023 12:03:51 -0700
Subject: [PATCH 017/117] #316 more name information on CUI sources, TUIs on
 code sources

---
 umls_mysql_to_list_jsonl.py | 46 ++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 93b150e8..8bdb02c0 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -44,12 +44,17 @@ def get_english_sources(cursor):
 
 def code_sources(cursor, output):
     code_source_info = dict()
+    tui_key = 'tuis'
     cui_key = 'cuis'
     name_key = 'names'
-    info_key = 'info'
 
+    # See info about these here: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+    info_key = 'attributes'
+
+    # See TTY meanings here: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/abbreviations.html
     names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.TTY, '|', con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.CODE, con.SAB"
     extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', REPLACE(sat.ATV, '\t', ' ')) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.CODE, sat.SAB"
+    tuis_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT sty.TUI) FROM MRCONSO con LEFT JOIN MRSTY sty ON con.CUI = sty.CUI GROUP BY con.CODE, con.SAB"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
@@ -90,6 +95,17 @@ def code_sources(cursor, output):
 
     print("Finished extra_info_sql_statement at", kg2_util.date())
 
+    cursor.execute(tuis_sql_statement)
+    for result in cursor.fetchall():
+        (node_id, node_source, tuis) = result
+        key = (node_id, node_source)
+        if key not in code_source_info:
+            # This occurs if a node doesn't have a name.
+            continue
+        code_source_info[key][tui_key] = tuis.split(',')
+
+    print("Finished tuis_sql_statement at", kg2_util.date())
+
     record_num = 0
     for key, val in code_source_info.items():
         record_num += 1
@@ -105,11 +121,13 @@ def cui_sources(cursor, output, sources):
     relation_key = 'relations'
     definitions_key = 'definitions'
 
+    # Make the sources list a MySQL list
     sources_where = str(sources).replace('[', '(').replace(']', ')')
 
-    names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(SAB, '|', ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE SAB IN " + sources_where + " GROUP BY CUI"
+    # See TTY meanings here: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/abbreviations.html
+    names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(TTY, '|', SAB, '|', ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE SAB IN " + sources_where + " GROUP BY CUI"
     tuis_sql_statement = "SELECT CUI, GROUP_CONCAT(TUI) FROM MRSTY GROUP BY CUI"
-    relations_sql_statement = "SELECT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL WHERE SAB IN " + sources_where
+    relations_sql_statement = "SELECT DISTINCT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL WHERE SAB IN " + sources_where
     definitions_sql_statement = "SELECT CUI, DEF FROM MRDEF WHERE SAB IN " + sources_where
 
     cursor.execute(names_sql_statement)
@@ -120,12 +138,18 @@ def cui_sources(cursor, output, sources):
         cui_source_info[key][name_key] = dict()
         for name in names.split('\t'):
             split_name = name.split('|')
-            assert len(split_name) == 3, split_name
-            if split_name[0] not in cui_source_info[key][name_key]:
-                cui_source_info[key][name_key][split_name[0]] = dict()
-            if split_name[1] not in cui_source_info[key][name_key][split_name[0]]:
-                cui_source_info[key][name_key][split_name[0]][split_name[1]] = list()
-            cui_source_info[key][name_key][split_name[0]][split_name[1]].append(split_name[2])
+            assert len(split_name) == 4, split_name
+            name_tty = split_name[0]
+            name_source = split_name[1]
+            name_ispref = split_name[2]
+            name_str = split_name[3]
+            if name_source not in cui_source_info[key][name_key]:
+                cui_source_info[key][name_key][name_source] = dict()
+            if name_tty not in cui_source_info[key][name_key][name_source]:
+                cui_source_info[key][name_key][name_source][name_tty] = dict()
+            if name_ispref not in cui_source_info[key][name_key][name_source][name_tty]:
+                cui_source_info[key][name_key][name_source][name_tty][name_ispref] = list()
+            cui_source_info[key][name_key][name_source][name_tty][name_ispref].append(name_str)
 
     print("Finished names_sql_statement at", kg2_util.date())
 
@@ -197,11 +221,11 @@ def cui_sources(cursor, output, sources):
         cursor.execute(max_len_sql_statement)
         cursor.fetchall()
 
-        # Execute statement we care about after clearing any "results"
+        # This ensure we don't have UMLS sources that overwrite each other's names
         sources = get_english_sources(cursor)
 
         code_sources(cursor, output)
-        # cui_sources(cursor, output, sources)
+        cui_sources(cursor, output, sources)
 
     connection.close()
 

From 4c71b157a2e618a681ac1828c6f32588eb902ffc Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 09:23:26 -0700
Subject: [PATCH 018/117] #316 making it easier to process

---
 umls_mysql_to_list_jsonl.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 8bdb02c0..88ff11a1 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 '''umls_mysql_to_list_jsonl.py: extracts all of the information from UMLS and stores it in a JSON Lines output
 
-   Usage: umls_mysql_to_list_jsonl.py [--test] <mysqlConfigFile> <mysqlDBName> <outputFile.json>
+   Usage: umls_mysql_to_list_jsonl.py [--test] <mysqlConfigFile> <mysqlDBName> <outputFile.jsonl>
 '''
 
 __author__ = 'Erica Wood'
@@ -19,12 +19,12 @@
 import pymysql
 
 
-def make_arg_parser():
+def get_args():
     arg_parser = argparse.ArgumentParser(description='umls_mysql_to_list_jsonl.py: extracts all of the information from UMLS and stores it in a JSON Lines output')
     arg_parser.add_argument('mysqlConfigFile', type=str)
     arg_parser.add_argument('mysqlDBName', type=str)
     arg_parser.add_argument('outputFile', type=str)
-    return arg_parser
+    return arg_parser.parse_args()
 
 
 def get_english_sources(cursor):
@@ -120,6 +120,7 @@ def cui_sources(cursor, output, sources):
     name_key = 'names'
     relation_key = 'relations'
     definitions_key = 'definitions'
+    umls_source_name = 'UMLS'
 
     # Make the sources list a MySQL list
     sources_where = str(sources).replace('[', '(').replace(']', ')')
@@ -133,7 +134,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
         (node_id, names) = result
-        key = node_id
+        key = (node_id, umls_source_name)
         cui_source_info[key] = dict()
         cui_source_info[key][name_key] = dict()
         for name in names.split('\t'):
@@ -156,7 +157,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(tuis_sql_statement)
     for result in cursor.fetchall():
         (node_id, tuis) = result
-        key = node_id
+        key = (node_id, umls_source_name)
         if key not in cui_source_info:
             # This happens if a node doesn't have an English name. See https://github.com/RTXteam/RTX-KG2/issues/316#issuecomment-1672074392
             continue
@@ -167,7 +168,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(relations_sql_statement)
     for result in cursor.fetchall():
         (cui1, rel, rela, direction, cui2, source) = result
-        key = cui1
+        key = (cui1, umls_source_name)
         if key not in cui_source_info:
             # See above for explanation
             continue
@@ -186,7 +187,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(definitions_sql_statement)
     for result in cursor.fetchall():
         (node_id, definition) = result
-        key = node_id
+        key = (node_id, umls_source_name)
         if key not in cui_source_info:
             # See above for explanation
             continue
@@ -204,7 +205,7 @@ def cui_sources(cursor, output, sources):
 
 if __name__ == '__main__':
     print("Starting umls_mysql_to_list_jsonl.py at", kg2_util.date())
-    args = make_arg_parser().parse_args()
+    args = get_args()
     mysql_config_file = args.mysqlConfigFile
     mysql_db_name = args.mysqlDBName
     output_file_name = args.outputFile

From 38c2473ecb8f716468429c3adde21c3c49416e7f Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 09:24:17 -0700
Subject: [PATCH 019/117] #316 starting to process the JSON Lines list into
 nodes/edges

---
 umls_list_jsonl_to_kg_jsonl.py | 237 +++++++++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 umls_list_jsonl_to_kg_jsonl.py

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
new file mode 100644
index 00000000..5cc50f37
--- /dev/null
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -0,0 +1,237 @@
+#!/usr/bin/env python3
+'''umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format
+
+   Usage: umls_list_jsonl_to_kg_jsonl.py [--test] <inputFile.jsonl> <outputNodesFile.json> <outputEdgesFile.jsonl>
+'''
+
+__author__ = 'Erica Wood'
+__copyright__ = 'Oregon State University'
+__credits__ = ['Stephen Ramsey', 'Erica Wood']
+__license__ = 'MIT'
+__version__ = '0.1.0'
+__maintainer__ = ''
+__email__ = ''
+__status__ = 'Prototype'
+
+
+import argparse
+import kg2_util
+import json
+
+
+DESIRED_CODES = ['ATC', 'CHV', 'DRUGBANK', 'FMA', 'GO', 'HCPCS', 'HGNC', 'HL7V3.0',
+                 'HL7', 'HPO', 'ICD10PCS', 'ICD9CM', 'MED-RT', 'MEDLINEPLUS', 'MSH',
+                 'MTH', 'NCBI', 'NCBITAXON', 'NCI', 'NDDF', 'NDFRT', 'OMIM', 'PDQ',
+                 'PSY', 'RXNORM', 'VANDF']
+CUIS_KEY = 'cuis'
+INFO_KEY = 'info'
+NAMES_KEY = 'names'
+TUIS_KEY = 'tuis'
+
+TUI_MAPPINGS = {"T001": "individual organism",
+                "T002": "organism taxon",
+                "T004": "organism taxon",
+                "T005": "organism taxon",
+                "T007": "organism taxon",
+                "T008": "organism taxon",
+                "T010": "organism taxon",
+                "T011": "organism taxon",
+                "T012": "organism taxon",
+                "T013": "organism taxon",
+                "T014": "organism taxon",
+                "T015": "organism taxon",
+                "T016": "organism taxon",
+                "T017": "anatomical entity",
+                "T018": "gross anatomical structure",
+                "T019": "disease",
+                "T020": "disease",
+                "T021": "gross anatomical structure",
+                "T022": "anatomical entity",
+                "T023": "gross anatomical structure",
+                "T024": "gross anatomical structure",
+                "T025": "cell",
+                "T026": "cellular component",
+                "T028": "biological entity",
+                "T029": "anatomical entity",
+                "T030": "anatomical entity",
+                "T031": "anatomical entity",
+                "T032": "named thing",
+                "T033": "disease or phenotypic feature",
+                "T034": "phenomenon",
+                "T037": "pathological process",
+                "T038": "phenomenon",
+                "T039": "physiological process",
+                "T040": "physiological process",
+                "T041": "behavior",
+                "T042": "physiological process",
+                "T043": "physiological process",
+                "T044": "molecular activity",
+                "T045": "physiological process",
+                "T046": "pathological process",
+                "T047": "disease",
+                "T048": "disease",
+                "T049": "disease",
+                "T050": "biological entity",
+                "T051": "event",
+                "T052": "activity",
+                "T053": "behavior",
+                "T054": "behavior",
+                "T055": "behavior",
+                "T056": "activity",
+                "T057": "activity",
+                "T058": "activity",
+                "T059": "procedure",
+                "T060": "procedure",
+                "T061": "procedure",
+                "T062": "activity",
+                "T063": "procedure",
+                "T064": "activity",
+                "T065": "activity",
+                "T066": "activity",
+                "T067": "phenomenon",
+                "T068": "phenomenon",
+                "T069": "phenomenon",
+                "T070": "phenomenon",
+                "T071": "named thing",
+                "T072": "physical entity",
+                "T073": "physical entity",
+                "T074": "device",
+                "T075": "device",
+                "T077": "information content entity",
+                "T078": "information content entity",
+                "T079": "information content entity",
+                "T080": "information content entity",
+                "T081": "information content entity",
+                "T082": "information content entity",
+                "T083": "geographic location",
+                "T085": "biological entity",
+                "T086": "nucleic acid entity",
+                "T087": "polypeptide",
+                "T088": "biological entity",
+                "T089": "information content entity",
+                "T090": "individual organism",
+                "T091": "named thing",
+                "T092": "agent",
+                "T093": "agent",
+                "T094": "agent",
+                "T095": "agent",
+                "T096": "agent",
+                "T097": "cohort",
+                "T098": "population of individual organisms",
+                "T099": "cohort",
+                "T100": "cohort",
+                "T101": "cohort",
+                "T102": "information content entity",
+                "T103": "chemical entity",
+                "T104": "chemical entity",
+                "T109": "chemical entity",
+                "T114": "nucleic acid entity",
+                "T116": "polypeptide",
+                "T120": "chemical entity",
+                "T121": "drug",
+                "T122": "device",
+                "T123": "chemical entity",
+                "T125": "chemical entity",
+                "T126": "protein",
+                "T127": "small molecule",
+                "T129": "biological entity",
+                "T130": "chemical entity",
+                "T131": "chemical entity",
+                "T167": "chemical entity",
+                "T168": "food",
+                "T169": "information content entity",
+                "T170": "publication",
+                "T171": "information content entity",
+                "T184": "phenotypic feature",
+                "T185": "information content entity",
+                "T190": "disease",
+                "T191": "disease",
+                "T192": "protein",
+                "T194": "organism taxon",
+                "T195": "drug",
+                "T196": "small molecule",
+                "T197": "chemical entity",
+                "T200": "drug",
+                "T201": "named thing",
+                "T203": "device",
+                "T204": "organism taxon"}
+
+def get_args():
+    arg_parser = argparse.ArgumentParser(description='umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format')
+    arg_parser.add_argument('inputFile', type=str)
+    arg_parser.add_argument('outputNodesFile', type=str)
+    arg_parser.add_argument('outputEdgesFile', type=str)
+    return arg_parser.parse_args()
+
+
+def extract_node_id(node_id_str):
+    node_id_str = node_id_str.replace('(', '').replace(')', '').replace("'", '')
+    node_id = node_id_str.split(',')
+    return node_id[1].strip(), node_id[0].strip()
+
+
+def make_node_id(curie_prefix, node_id_val):
+    return curie_prefix + ':' + node_id_val
+
+
+def process_drugbank_item(node_id_val, info):
+    node_curie = make_node_id(kg2_util.CURIE_PREFIX_DRUGBANK, node_id_val)
+    cuis = info.get(CUIS_KEY, list())
+    tuis = info.get(TUIS_KEY, list())
+    fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
+    secondary_accession_keys = info.get(INFO_KEY, dict()).get('SID', list())
+    name = info.get(NAMES_KEY, dict()).get('IN', dict()).get('N', list())
+    if len(name) == 0:
+        name = info.get(NAMES_KEY, dict()).get('IN', dict()).get('Y', list())
+    assert len(name) == 1, str(name) + " " + node_curie
+    name = name[0]
+    synonyms = list()
+    for syn_cat in info.get('SY', dict()):
+        synonyms += info['SY'][syn_cat]
+    
+    print(json.dumps({'node_curie': node_curie, 'cuis': cuis, 'tuis': tuis, 'fda_codes': fda_codes, 'secondary_accession_keys': secondary_accession_keys, 'name': name, 'synonyms': synonyms}))
+    return str(tuis)
+
+
+if __name__ == '__main__':
+    args = get_args()
+    input_file_name = args.inputFile
+
+    input_read_jsonlines_info = kg2_util.start_read_jsonlines(input_file_name)
+    input_items = input_read_jsonlines_info[0]
+
+    tui_combos = dict()
+
+    for data in input_items:
+        # There should only be one item in the data dictionary
+        for entity in data:
+            if entity == "('NOCODE', 'MTH')":
+                continue
+            value = data[entity]
+            source, node_id_val = extract_node_id(entity)
+            if source not in DESIRED_CODES and source != 'UMLS':
+                continue
+
+            # Process the data specifically by source
+            tui_combo = tuple(sorted(value.get(TUIS_KEY, list())))
+            if tui_combo not in tui_combos:
+                tui_combos[tui_combo] = dict()
+                tui_combos[tui_combo]['tuis'] = list()
+                tui_combos[tui_combo]['tui_count'] = 0
+            tui_combos[tui_combo]['tuis'].append(entity)
+            tui_combos[tui_combo]['tui_count'] += 1
+            if source == 'DRUGBANK':
+                process_drugbank_item(node_id_val, value)
+
+    lines = str()
+    for tui_combo in tui_combos:
+        line = str(tui_combos[tui_combo]['tui_count']) + '\t'
+        for tui in tui_combo:
+            line += tui + "\t" + TUI_MAPPINGS[tui] + "\t"
+        line = line.strip()
+        line += '\n'
+        lines += line
+
+    print(lines)
+
+    kg2_util.end_read_jsonlines(input_read_jsonlines_info)
\ No newline at end of file

From f84d38cbba72a9fa899c8d02870ab3b9dfbecce8 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 15:14:04 -0700
Subject: [PATCH 020/117] #316 UMLS DrugBank nodes seems to be getting brought
 in correctly

---
 umls_list_jsonl_to_kg_jsonl.py | 205 ++++++++-------------------------
 1 file changed, 47 insertions(+), 158 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 5cc50f37..0daf46e0 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -24,143 +24,19 @@
                  'MTH', 'NCBI', 'NCBITAXON', 'NCI', 'NDDF', 'NDFRT', 'OMIM', 'PDQ',
                  'PSY', 'RXNORM', 'VANDF']
 CUIS_KEY = 'cuis'
-INFO_KEY = 'info'
+INFO_KEY = 'attributes'
 NAMES_KEY = 'names'
 TUIS_KEY = 'tuis'
 
-TUI_MAPPINGS = {"T001": "individual organism",
-                "T002": "organism taxon",
-                "T004": "organism taxon",
-                "T005": "organism taxon",
-                "T007": "organism taxon",
-                "T008": "organism taxon",
-                "T010": "organism taxon",
-                "T011": "organism taxon",
-                "T012": "organism taxon",
-                "T013": "organism taxon",
-                "T014": "organism taxon",
-                "T015": "organism taxon",
-                "T016": "organism taxon",
-                "T017": "anatomical entity",
-                "T018": "gross anatomical structure",
-                "T019": "disease",
-                "T020": "disease",
-                "T021": "gross anatomical structure",
-                "T022": "anatomical entity",
-                "T023": "gross anatomical structure",
-                "T024": "gross anatomical structure",
-                "T025": "cell",
-                "T026": "cellular component",
-                "T028": "biological entity",
-                "T029": "anatomical entity",
-                "T030": "anatomical entity",
-                "T031": "anatomical entity",
-                "T032": "named thing",
-                "T033": "disease or phenotypic feature",
-                "T034": "phenomenon",
-                "T037": "pathological process",
-                "T038": "phenomenon",
-                "T039": "physiological process",
-                "T040": "physiological process",
-                "T041": "behavior",
-                "T042": "physiological process",
-                "T043": "physiological process",
-                "T044": "molecular activity",
-                "T045": "physiological process",
-                "T046": "pathological process",
-                "T047": "disease",
-                "T048": "disease",
-                "T049": "disease",
-                "T050": "biological entity",
-                "T051": "event",
-                "T052": "activity",
-                "T053": "behavior",
-                "T054": "behavior",
-                "T055": "behavior",
-                "T056": "activity",
-                "T057": "activity",
-                "T058": "activity",
-                "T059": "procedure",
-                "T060": "procedure",
-                "T061": "procedure",
-                "T062": "activity",
-                "T063": "procedure",
-                "T064": "activity",
-                "T065": "activity",
-                "T066": "activity",
-                "T067": "phenomenon",
-                "T068": "phenomenon",
-                "T069": "phenomenon",
-                "T070": "phenomenon",
-                "T071": "named thing",
-                "T072": "physical entity",
-                "T073": "physical entity",
-                "T074": "device",
-                "T075": "device",
-                "T077": "information content entity",
-                "T078": "information content entity",
-                "T079": "information content entity",
-                "T080": "information content entity",
-                "T081": "information content entity",
-                "T082": "information content entity",
-                "T083": "geographic location",
-                "T085": "biological entity",
-                "T086": "nucleic acid entity",
-                "T087": "polypeptide",
-                "T088": "biological entity",
-                "T089": "information content entity",
-                "T090": "individual organism",
-                "T091": "named thing",
-                "T092": "agent",
-                "T093": "agent",
-                "T094": "agent",
-                "T095": "agent",
-                "T096": "agent",
-                "T097": "cohort",
-                "T098": "population of individual organisms",
-                "T099": "cohort",
-                "T100": "cohort",
-                "T101": "cohort",
-                "T102": "information content entity",
-                "T103": "chemical entity",
-                "T104": "chemical entity",
-                "T109": "chemical entity",
-                "T114": "nucleic acid entity",
-                "T116": "polypeptide",
-                "T120": "chemical entity",
-                "T121": "drug",
-                "T122": "device",
-                "T123": "chemical entity",
-                "T125": "chemical entity",
-                "T126": "protein",
-                "T127": "small molecule",
-                "T129": "biological entity",
-                "T130": "chemical entity",
-                "T131": "chemical entity",
-                "T167": "chemical entity",
-                "T168": "food",
-                "T169": "information content entity",
-                "T170": "publication",
-                "T171": "information content entity",
-                "T184": "phenotypic feature",
-                "T185": "information content entity",
-                "T190": "disease",
-                "T191": "disease",
-                "T192": "protein",
-                "T194": "organism taxon",
-                "T195": "drug",
-                "T196": "small molecule",
-                "T197": "chemical entity",
-                "T200": "drug",
-                "T201": "named thing",
-                "T203": "device",
-                "T204": "organism taxon"}
+UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
+
 
 def get_args():
     arg_parser = argparse.ArgumentParser(description='umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format')
     arg_parser.add_argument('inputFile', type=str)
     arg_parser.add_argument('outputNodesFile', type=str)
     arg_parser.add_argument('outputEdgesFile', type=str)
+    arg_parser.add_argument('--test', dest='test', action="store_true", default=False)
     return arg_parser.parse_args()
 
 
@@ -170,12 +46,15 @@ def extract_node_id(node_id_str):
     return node_id[1].strip(), node_id[0].strip()
 
 
-def make_node_id(curie_prefix, node_id_val):
-    return curie_prefix + ':' + node_id_val
+def make_node_id(curie_prefix, node_id):
+    return curie_prefix + ':' + node_id
 
 
-def process_drugbank_item(node_id_val, info):
-    node_curie = make_node_id(kg2_util.CURIE_PREFIX_DRUGBANK, node_id_val)
+def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
+    curie_prefix = kg2_util.CURIE_PREFIX_DRUGBANK
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
+    iri = iri_mappings[curie_prefix] + node_id
+    node_curie = make_node_id(curie_prefix, node_id)
     cuis = info.get(CUIS_KEY, list())
     tuis = info.get(TUIS_KEY, list())
     fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
@@ -186,21 +65,48 @@ def process_drugbank_item(node_id_val, info):
     assert len(name) == 1, str(name) + " " + node_curie
     name = name[0]
     synonyms = list()
-    for syn_cat in info.get('SY', dict()):
-        synonyms += info['SY'][syn_cat]
+    for syn_cat in info.get(NAMES_KEY, dict()).get('SY', dict()):
+        synonyms += info.get(NAMES_KEY, dict()).get('SY', dict())[syn_cat]
+    for syn_cat in info.get(NAMES_KEY, dict()).get('FSY', dict()):
+        synonyms += info.get(NAMES_KEY, dict()).get('FSY', dict())[syn_cat]
+
+    # TODO: figure out update date
+    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
+    node['synonym'] = synonyms
+    description = str()
+    for tui in tuis:
+        description += "; UMLS Semantic Type: STY:" + tui
+    description.strip("; ")
+    node['description'] = description
     
-    print(json.dumps({'node_curie': node_curie, 'cuis': cuis, 'tuis': tuis, 'fda_codes': fda_codes, 'secondary_accession_keys': secondary_accession_keys, 'name': name, 'synonyms': synonyms}))
-    return str(tuis)
+    nodes_output.write(node)
 
 
 if __name__ == '__main__':
     args = get_args()
     input_file_name = args.inputFile
+    test_mode = args.test
+    output_nodes_file_name = args.outputNodesFile
+    output_edges_file_name = args.outputEdgesFile
+
+    nodes_info, edges_info = kg2_util.create_kg2_jsonlines(test_mode)
+    nodes_output = nodes_info[0]
+    edges_output = edges_info[0]
 
     input_read_jsonlines_info = kg2_util.start_read_jsonlines(input_file_name)
     input_items = input_read_jsonlines_info[0]
 
-    tui_combos = dict()
+    tui_mappings = dict()
+
+    with open('tui_combo_mappings.json') as mappings:
+        tui_mappings = json.load(mappings)
+
+    iri_mappings = dict()
+    iri_mappings_raw = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('curies-to-urls-map.yaml'))['use_for_bidirectional_mapping']
+    for item in iri_mappings_raw:
+        for prefix in item:
+            iri_mappings[prefix] = item[prefix]
+    print(json.dumps(iri_mappings, indent=4, sort_keys=True))
 
     for data in input_items:
         # There should only be one item in the data dictionary
@@ -208,30 +114,13 @@ def process_drugbank_item(node_id_val, info):
             if entity == "('NOCODE', 'MTH')":
                 continue
             value = data[entity]
-            source, node_id_val = extract_node_id(entity)
+            source, node_id = extract_node_id(entity)
             if source not in DESIRED_CODES and source != 'UMLS':
                 continue
 
             # Process the data specifically by source
-            tui_combo = tuple(sorted(value.get(TUIS_KEY, list())))
-            if tui_combo not in tui_combos:
-                tui_combos[tui_combo] = dict()
-                tui_combos[tui_combo]['tuis'] = list()
-                tui_combos[tui_combo]['tui_count'] = 0
-            tui_combos[tui_combo]['tuis'].append(entity)
-            tui_combos[tui_combo]['tui_count'] += 1
             if source == 'DRUGBANK':
-                process_drugbank_item(node_id_val, value)
-
-    lines = str()
-    for tui_combo in tui_combos:
-        line = str(tui_combos[tui_combo]['tui_count']) + '\t'
-        for tui in tui_combo:
-            line += tui + "\t" + TUI_MAPPINGS[tui] + "\t"
-        line = line.strip()
-        line += '\n'
-        lines += line
-
-    print(lines)
+                process_drugbank_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
 
-    kg2_util.end_read_jsonlines(input_read_jsonlines_info)
\ No newline at end of file
+    kg2_util.end_read_jsonlines(input_read_jsonlines_info)
+    kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
\ No newline at end of file

From 007352b097513ed89595672623d5d7c4f5ed99e8 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 15:28:57 -0700
Subject: [PATCH 021/117] Correcting issue per CI

---
 extract-mirbase.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extract-mirbase.sh b/extract-mirbase.sh
index d4112368..8dee866e 100755
--- a/extract-mirbase.sh
+++ b/extract-mirbase.sh
@@ -23,8 +23,8 @@ output_file=${1:-"${BUILD_DIR}/miRNA.dat"}
 
 mkdir -p ${output_dir}
 
-${curl_get} https://www.mirbase.org/download_file/miRNA.dat/ > /tmp/miRNA.dat
-${curl_get} https://www.mirbase.org/download_readme/ > ${output_dir}/miRBase_README.txt
+${curl_get} https://mirbase.org/download/miRNA.dat/ > /tmp/miRNA.dat
+${curl_get} https://mirbase.org/download/README/ > ${output_dir}/miRBase_README.txt
 
 sed -i "s/<br>//" ${output_dir}/miRBase_README.txt
 version_number=`grep -m 1 "The miRBase Sequence Database -- Release" ${output_dir}/miRBase_README.txt | cut -f7 -d ' '`

From 50da25ad8487aa9910426b1a5d518d6e85680208 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 17:01:08 -0700
Subject: [PATCH 022/117] #316 ATC and CHV

---
 umls_list_jsonl_to_kg_jsonl.py | 93 ++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 0daf46e0..d372084d 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -50,6 +50,90 @@ def make_node_id(curie_prefix, node_id):
     return curie_prefix + ':' + node_id
 
 
+def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
+    curie_prefix = kg2_util.CURIE_PREFIX_ATC
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
+    iri = iri_mappings[curie_prefix] + node_id
+    node_curie = make_node_id(curie_prefix, node_id)
+    cuis = info.get(CUIS_KEY, list())
+    tuis = info.get(TUIS_KEY, list())
+
+    # Currently not used, but extracting them in case we want them in the future
+    atc_level = info.get(INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
+    is_drug_class = info.get(INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
+
+    name = str()
+    synonyms = list()
+    names = info.get(NAMES_KEY, dict())
+    if "RXN_PT" in names:
+        rxn_pt = names.get('RXN_PT', dict())
+        if 'Y' in rxn_pt:
+            name = rxn_pt.get('Y', '')
+            assert len(name) == 1
+            name = name[0]
+        else:
+            name = rxn_pt.get('N', '')
+            assert len(name) == 1
+            name = name[0]
+        synonyms = [syn for syn in names.get('PT', dict()).get('Y', list())]
+        synonyms += [syn for syn in names.get('PT', dict()).get('N', list())]
+        synonyms += [syn for syn in names.get('IN', dict()).get('Y', list())]
+        synonyms += [syn for syn in names.get('IN', dict()).get('N', list())]
+    elif "PT" in names:
+        pt = names.get('PT', dict())
+        if 'Y' in pt:
+            name = pt.get('Y', '')
+            assert len(name) == 1
+            name = name[0]
+        else:
+            name = pt.get('N', '')
+            assert len(name) == 1
+            name = name[0]
+        synonyms += [syn for syn in names.get('IN', dict()).get('Y', list())]
+        synonyms += [syn for syn in names.get('IN', dict()).get('N', list())]
+    else:
+        in_dict = names.get('IN', dict())
+        if 'Y' in in_dict:
+            name = in_dict.get('Y', '')
+            assert len(name) == 1
+            name = name[0]
+        else:
+            name = in_dict.get('N', '')
+            assert len(name) == 1
+            name = name[0]
+    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
+    node['synonym'] = synonyms
+    description = str()
+    for tui in tuis:
+        description += "; UMLS Semantic Type: STY:" + tui
+    description.strip("; ")
+    node['description'] = description
+
+    nodes_output.write(node)
+
+
+def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
+    curie_prefix = "CHV" # This should be replaced with a kg2_util prefix at some point
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
+    iri = iri_mappings[curie_prefix] + node_id
+    node_curie = make_node_id(curie_prefix, node_id)
+    cuis = info.get(CUIS_KEY, list())
+    tuis = info.get(TUIS_KEY, list())
+
+    # Currently not used, but extracting them in case we want them in the future
+    combo_score = info.get(INFO_KEY, dict()).get('COMBO_SCORE', list())
+    combo_score_no_top_words = info.get(INFO_KEY, dict()).get('COMBO_SCORE_NO_TOP_WORDS', list())
+    context_score = info.get(INFO_KEY, dict()).get('CONTEXT_SCORE', list())
+    cui_score = info.get(INFO_KEY, dict()).get('CUI_SCORE', list())
+    disparaged = info.get(INFO_KEY, dict()).get('DISPARAGED', list())
+    frequency = info.get(INFO_KEY, dict()).get('FREQUENCY', list())
+
+    name = str()
+    synonyms = list()
+    names = info.get(NAMES_KEY, dict())
+
+    print(curie_prefix + ":", names)
+
 def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
     curie_prefix = kg2_util.CURIE_PREFIX_DRUGBANK
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
@@ -57,8 +141,11 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu
     node_curie = make_node_id(curie_prefix, node_id)
     cuis = info.get(CUIS_KEY, list())
     tuis = info.get(TUIS_KEY, list())
+
+    # Currently not used, but extracting them in case we want them in the future
     fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
     secondary_accession_keys = info.get(INFO_KEY, dict()).get('SID', list())
+
     name = info.get(NAMES_KEY, dict()).get('IN', dict()).get('N', list())
     if len(name) == 0:
         name = info.get(NAMES_KEY, dict()).get('IN', dict()).get('Y', list())
@@ -119,6 +206,12 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu
                 continue
 
             # Process the data specifically by source
+            if source == 'ATC':
+                process_atc_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+
+            if source == 'CHV':
+                process_chv_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+
             if source == 'DRUGBANK':
                 process_drugbank_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
 

From 474348fd43ef7f9b05976ed0ab7293530a27235d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 17:07:47 -0700
Subject: [PATCH 023/117] #316 #344 #280 CHV curies to urls

---
 curies-to-urls-map.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/curies-to-urls-map.yaml b/curies-to-urls-map.yaml
index 8dcec53b..c5e8db0a 100644
--- a/curies-to-urls-map.yaml
+++ b/curies-to-urls-map.yaml
@@ -59,6 +59,8 @@ use_for_bidirectional_mapping:
     CHEMBL.TARGET: "https://identifiers.org/chembl.target:"
   -
     CHMO: http://purl.obolibrary.org/obo/CHMO_
+  -
+    CHV: http://purl.bioontology.org/ontology/CHV/
   -
     CID: 'http://pubchem.ncbi.nlm.nih.gov/compound/'
   -

From d1b17a4d551664657ba2d89f676b48911195d3ac Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 17:16:54 -0700
Subject: [PATCH 024/117] miRBase URL issue

---
 extract-mirbase.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extract-mirbase.sh b/extract-mirbase.sh
index 8dee866e..c4ff3f00 100755
--- a/extract-mirbase.sh
+++ b/extract-mirbase.sh
@@ -23,7 +23,7 @@ output_file=${1:-"${BUILD_DIR}/miRNA.dat"}
 
 mkdir -p ${output_dir}
 
-${curl_get} https://mirbase.org/download/miRNA.dat/ > /tmp/miRNA.dat
+${curl_get} https://mirbase.org/download/miRNA.dat > /tmp/miRNA.dat
 ${curl_get} https://mirbase.org/download/README/ > ${output_dir}/miRBase_README.txt
 
 sed -i "s/<br>//" ${output_dir}/miRBase_README.txt

From e84d67323926c60d2f840542595896bd54aa54c1 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 17:53:07 -0700
Subject: [PATCH 025/117] #316 TUI Category mappings

---
 tui_combo_mappings.json | 809 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 809 insertions(+)
 create mode 100644 tui_combo_mappings.json

diff --git a/tui_combo_mappings.json b/tui_combo_mappings.json
new file mode 100644
index 00000000..d30a088a
--- /dev/null
+++ b/tui_combo_mappings.json
@@ -0,0 +1,809 @@
+{
+    "('T001',)": "individual organism",
+    "('T002', 'T004')": "organism taxon",
+    "('T002', 'T025')": "cell",
+    "('T002', 'T033')": "disease or phenotypic feature",
+    "('T002', 'T037', 'T047')": "pathological process",
+    "('T002', 'T047')": "disease",
+    "('T002', 'T048', 'T167')": "disease",
+    "('T002', 'T059')": "procedure",
+    "('T002', 'T061')": "procedure",
+    "('T002', 'T061', 'T109', 'T121', 'T168')": "drug",
+    "('T002', 'T090')": "individual organism",
+    "('T002', 'T109')": "chemical entity",
+    "('T002', 'T109', 'T116', 'T121')": "drug",
+    "('T002', 'T109', 'T121')": "drug",
+    "('T002', 'T109', 'T121', 'T131')": "drug",
+    "('T002', 'T109', 'T121', 'T167')": "drug",
+    "('T002', 'T109', 'T121', 'T168')": "drug",
+    "('T002', 'T109', 'T130')": "chemical entity",
+    "('T002', 'T109', 'T168')": "food",
+    "('T002', 'T116', 'T121')": "drug",
+    "('T002', 'T116', 'T126')": "protein",
+    "('T002', 'T116', 'T129', 'T168')": "food",
+    "('T002', 'T121')": "drug",
+    "('T002', 'T121', 'T129')": "drug",
+    "('T002', 'T121', 'T129', 'T130')": "drug",
+    "('T002', 'T129', 'T168')": "food",
+    "('T002', 'T167')": "chemical entity",
+    "('T002', 'T168')": "food",
+    "('T002', 'T204')": "organism taxon",
+    "('T002',)": "organism taxon",
+    "('T004', 'T017')": "anatomical entity",
+    "('T004', 'T060')": "procedure",
+    "('T004', 'T109', 'T121')": "drug",
+    "('T004', 'T121', 'T129', 'T168')": "drug",
+    "('T004', 'T200')": "drug",
+    "('T004', 'T204')": "organism taxon",
+    "('T004',)": "organism taxon",
+    "('T005', 'T023', 'T026')": "cellular component",
+    "('T005', 'T028')": "organism taxon",
+    "('T005', 'T042')": "physiological process",
+    "('T005', 'T047')": "disease",
+    "('T005', 'T059', 'T067')": "procedure",
+    "('T005', 'T081')": "organism taxon",
+    "('T005', 'T114')": "nucleic acid entity",
+    "('T005', 'T116')": "polypeptide",
+    "('T005', 'T116', 'T121', 'T129')": "drug",
+    "('T005', 'T116', 'T123')": "polypeptide",
+    "('T005', 'T121')": "drug",
+    "('T005', 'T200')": "drug",
+    "('T005',)": "organism taxon",
+    "('T007', 'T032', 'T201')": "organism taxon",
+    "('T007', 'T037')": "pathological process",
+    "('T007', 'T047')": "disease",
+    "('T007', 'T058')": "activity",
+    "('T007', 'T059')": "procedure",
+    "('T007', 'T070')": "phenomenon",
+    "('T007', 'T074')": "device",
+    "('T007', 'T109', 'T121')": "drug",
+    "('T007', 'T109', 'T121', 'T129')": "drug",
+    "('T007', 'T109', 'T123')": "chemical entity",
+    "('T007', 'T116', 'T121', 'T129')": "drug",
+    "('T007', 'T121')": "drug",
+    "('T007', 'T121', 'T129')": "drug",
+    "('T007', 'T122')": "device",
+    "('T007', 'T168')": "food",
+    "('T007', 'T185')": "organism taxon",
+    "('T007', 'T200')": "drug",
+    "('T007', 'T203')": "device",
+    "('T007', 'T204')": "organism taxon",
+    "('T007',)": "organism taxon",
+    "('T008',)": "organism taxon",
+    "('T010',)": "organism taxon",
+    "('T011',)": "organism taxon",
+    "('T012',)": "organism taxon",
+    "('T013',)": "organism taxon",
+    "('T014',)": "organism taxon",
+    "('T015',)": "organism taxon",
+    "('T016',)": "organism taxon",
+    "('T017',)": "anatomical entity",
+    "('T018',)": "gross anatomical structure",
+    "('T019', 'T028')": "disease",
+    "('T019', 'T028', 'T047')": "disease",
+    "('T019', 'T033')": "disease",
+    "('T019', 'T047')": "disease",
+    "('T019',)": "disease",
+    "('T020',)": "disease",
+    "('T021',)": "gross anatomical structure",
+    "('T022',)": "anatomical entity",
+    "('T023', 'T024')": "gross anatomical structure",
+    "('T023', 'T025')": "cell",
+    "('T023', 'T026')": "cellular component",
+    "('T023', 'T029')": "anatomical entity",
+    "('T023', 'T030')": "anatomical entity",
+    "('T023', 'T033')": "disease or phenotypic feature",
+    "('T023', 'T033', 'T047')": "disease",
+    "('T023', 'T047')": "disease",
+    "('T023', 'T061')": "procedure",
+    "('T023', 'T191')": "disease",
+    "('T023',)": "gross anatomical structure",
+    "('T024', 'T026')": "cellular component",
+    "('T024', 'T031')": "gross anatomical structure",
+    "('T024', 'T033')": "disease or phenotypic feature",
+    "('T024', 'T040')": "physiological process",
+    "('T024', 'T109', 'T123')": "chemical entity",
+    "('T024', 'T116', 'T123')": "polypeptide",
+    "('T024', 'T116', 'T129')": "polypeptide",
+    "('T024', 'T121')": "drug",
+    "('T024', 'T200')": "drug",
+    "('T024', 'T201')": "gross anatomical structure",
+    "('T024',)": "gross anatomical structure",
+    "('T025', 'T026')": "cell",
+    "('T025', 'T029')": "anatomical entity",
+    "('T025', 'T031', 'T061')": "cell",
+    "('T025', 'T031', 'T185')": "anatomical entity",
+    "('T025', 'T032')": "cell",
+    "('T025', 'T033')": "disease or phenotypic feature",
+    "('T025', 'T033', 'T047')": "disease",
+    "('T025', 'T034')": "phenomenon",
+    "('T025', 'T037')": "pathological process",
+    "('T025', 'T038')": "phenomenon",
+    "('T025', 'T049')": "disease",
+    "('T025', 'T059')": "procedure",
+    "('T025', 'T063', 'T170')": "procedure",
+    "('T025', 'T081')": "cell",
+    "('T025', 'T109', 'T121')": "drug",
+    "('T025', 'T114', 'T121')": "drug",
+    "('T025', 'T116', 'T121', 'T129')": "drug",
+    "('T025', 'T121')": "drug",
+    "('T025', 'T121', 'T129')": "drug",
+    "('T025', 'T122')": "device",
+    "('T025', 'T129')": "cell",
+    "('T025', 'T170')": "publication",
+    "('T025', 'T191')": "disease",
+    "('T025', 'T200')": "drug",
+    "('T025',)": "cell",
+    "('T026',)": "cellular component",
+    "('T028', 'T033')": "disease or phenotypic feature",
+    "('T028', 'T033', 'T047')": "disease",
+    "('T028', 'T033', 'T047', 'T191')": "disease",
+    "('T028', 'T047')": "disease",
+    "('T028', 'T048')": "disease",
+    "('T028',)": "named thing",
+    "('T029',)": "anatomical entity",
+    "('T030',)": "anatomical entity",
+    "('T031', 'T121')": "drug",
+    "('T031', 'T121', 'T200')": "drug",
+    "('T031',)": "anatomical entity",
+    "('T032',)": "named thing",
+    "('T033', 'T034')": "phenomenon",
+    "('T033', 'T034', 'T047')": "disease",
+    "('T033', 'T034', 'T059')": "phenomenon",
+    "('T033', 'T037')": "pathological process",
+    "('T033', 'T037', 'T047')": "disease",
+    "('T033', 'T037', 'T055')": "pathological process",
+    "('T033', 'T037', 'T070', 'T167', 'T191')": "disease",
+    "('T033', 'T039')": "physiological process",
+    "('T033', 'T040')": "physiological process",
+    "('T033', 'T040', 'T046', 'T047')": "disease",
+    "('T033', 'T040', 'T047')": "disease",
+    "('T033', 'T041')": "behavior",
+    "('T033', 'T042')": "physiological process",
+    "('T033', 'T042', 'T047')": "disease",
+    "('T033', 'T046')": "pathological process",
+    "('T033', 'T046', 'T047')": "disease",
+    "('T033', 'T046', 'T047', 'T184')": "disease or phenotypic feature",
+    "('T033', 'T046', 'T061', 'T081', 'T093')": "pathological process",
+    "('T033', 'T046', 'T184')": "disease or phenotypic feature",
+    "('T033', 'T047')": "disease",
+    "('T033', 'T047', 'T048', 'T054', 'T102')": "disease",
+    "('T033', 'T047', 'T048', 'T184')": "disease or phenotypic feature",
+    "('T033', 'T047', 'T059', 'T074')": "disease",
+    "('T033', 'T047', 'T170')": "disease",
+    "('T033', 'T047', 'T184')": "disease or phenotypic feature",
+    "('T033', 'T047', 'T190')": "disease",
+    "('T033', 'T047', 'T191')": "disease",
+    "('T033', 'T048')": "disease",
+    "('T033', 'T048', 'T054')": "disease",
+    "('T033', 'T048', 'T169')": "disease",
+    "('T033', 'T049')": "disease",
+    "('T033', 'T051')": "event",
+    "('T033', 'T052', 'T061')": "procedure",
+    "('T033', 'T054')": "behavior",
+    "('T033', 'T054', 'T080')": "behavior",
+    "('T033', 'T055')": "behavior",
+    "('T033', 'T055', 'T061')": "procedure",
+    "('T033', 'T055', 'T185')": "behavior",
+    "('T033', 'T056', 'T078', 'T080', 'T169', 'T170')": "publication",
+    "('T033', 'T057', 'T080')": "activity",
+    "('T033', 'T058')": "activity",
+    "('T033', 'T059')": "procedure",
+    "('T033', 'T060')": "procedure",
+    "('T033', 'T060', 'T080')": "procedure",
+    "('T033', 'T061')": "procedure",
+    "('T033', 'T061', 'T168')": "procedure",
+    "('T033', 'T067')": "phenomenon",
+    "('T033', 'T069', 'T131')": "phenomenon",
+    "('T033', 'T074')": "device",
+    "('T033', 'T078')": "disease or phenotypic feature",
+    "('T033', 'T078', 'T079', 'T170')": "publication",
+    "('T033', 'T078', 'T089', 'T095', 'T170')": "publication",
+    "('T033', 'T078', 'T089', 'T170')": "publication",
+    "('T033', 'T078', 'T169', 'T170')": "publication",
+    "('T033', 'T078', 'T170')": "publication",
+    "('T033', 'T079')": "disease or phenotypic feature",
+    "('T033', 'T079', 'T080', 'T081', 'T169', 'T170')": "publication",
+    "('T033', 'T080')": "disease or phenotypic feature",
+    "('T033', 'T080', 'T082')": "disease or phenotypic feature",
+    "('T033', 'T080', 'T170')": "publication",
+    "('T033', 'T081')": "disease or phenotypic feature",
+    "('T033', 'T083', 'T093', 'T169', 'T170')": "publication",
+    "('T033', 'T089')": "disease or phenotypic feature",
+    "('T033', 'T098')": "population of individual organisms",
+    "('T033', 'T098', 'T116', 'T121', 'T129')": "drug",
+    "('T033', 'T098', 'T121', 'T129')": "drug",
+    "('T033', 'T099')": "cohort",
+    "('T033', 'T101')": "cohort",
+    "('T033', 'T102')": "disease or phenotypic feature",
+    "('T033', 'T109', 'T122')": "chemical entity",
+    "('T033', 'T109', 'T123')": "chemical entity",
+    "('T033', 'T116', 'T123')": "polypeptide",
+    "('T033', 'T116', 'T129')": "polypeptide",
+    "('T033', 'T121')": "drug",
+    "('T033', 'T122')": "device",
+    "('T033', 'T168')": "food",
+    "('T033', 'T168', 'T170')": "food",
+    "('T033', 'T169')": "disease or phenotypic feature",
+    "('T033', 'T170')": "publication",
+    "('T033', 'T184')": "phenotypic feature",
+    "('T033', 'T185')": "disease or phenotypic feature",
+    "('T033', 'T190')": "disease",
+    "('T033', 'T191')": "disease",
+    "('T033', 'T197')": "chemical entity",
+    "('T033', 'T201')": "disease or phenotypic feature",
+    "('T033',)": "disease or phenotypic feature",
+    "('T034', 'T046')": "pathological process",
+    "('T034', 'T047')": "phenomenon",
+    "('T034', 'T058', 'T060')": "procedure",
+    "('T034', 'T059')": "procedure",
+    "('T034', 'T060')": "procedure",
+    "('T034', 'T063')": "procedure",
+    "('T034', 'T073')": "phenomenon",
+    "('T034', 'T073', 'T093')": "phenomenon",
+    "('T034', 'T074')": "device",
+    "('T034', 'T078')": "phenomenon",
+    "('T034', 'T081', 'T116', 'T121', 'T123')": "drug",
+    "('T034', 'T116', 'T121', 'T123')": "drug",
+    "('T034', 'T116', 'T129')": "polypeptide",
+    "('T034', 'T121')": "drug",
+    "('T034', 'T123', 'T196')": "small molecule",
+    "('T034', 'T196')": "small molecule",
+    "('T034', 'T201')": "phenomenon",
+    "('T034',)": "phenomenon",
+    "('T037', 'T047')": "disease",
+    "('T037',)": "pathological process",
+    "('T038', 'T043')": "phenomenon",
+    "('T038',)": "phenomenon",
+    "('T039', 'T040')": "physiological process",
+    "('T039', 'T121')": "drug",
+    "('T039',)": "physiological process",
+    "('T040', 'T042')": "physiological process",
+    "('T040', 'T043')": "physiological process",
+    "('T040', 'T044')": "physiological process",
+    "('T040',)": "physiological process",
+    "('T041',)": "behavior",
+    "('T042',)": "physiological process",
+    "('T043', 'T044')": "physiological process",
+    "('T043', 'T045')": "physiological process",
+    "('T043',)": "physiological process",
+    "('T044', 'T045')": "physiological process",
+    "('T044',)": "molecular activity",
+    "('T045',)": "physiological process",
+    "('T046', 'T047')": "disease",
+    "('T046',)": "pathological process",
+    "('T047', 'T184')": "disease or phenotypic feature",
+    "('T047', 'T190')": "disease",
+    "('T047', 'T191')": "disease",
+    "('T047',)": "disease",
+    "('T048',)": "disease",
+    "('T049',)": "disease",
+    "('T050', 'T191')": "disease",
+    "('T050',)": "biological entity",
+    "('T051',)": "event",
+    "('T052',)": "activity",
+    "('T053',)": "behavior",
+    "('T054',)": "behavior",
+    "('T055',)": "behavior",
+    "('T056',)": "activity",
+    "('T057',)": "activity",
+    "('T058',)": "activity",
+    "('T059',)": "procedure",
+    "('T060',)": "procedure",
+    "('T061',)": "procedure",
+    "('T062',)": "activity",
+    "('T063',)": "procedure",
+    "('T064',)": "activity",
+    "('T065',)": "activity",
+    "('T066',)": "activity",
+    "('T067',)": "phenomenon",
+    "('T068',)": "phenomenon",
+    "('T069',)": "phenomenon",
+    "('T070',)": "phenomenon",
+    "('T071',)": "named thing",
+    "('T072',)": "physical entity",
+    "('T073', 'T092')": "agent",
+    "('T073', 'T093')": "agent",
+    "('T073', 'T170')": "publication",
+    "('T073',)": "physical entity",
+    "('T074', 'T109', 'T121')": "drug",
+    "('T074', 'T200')": "drug",
+    "('T074',)": "device",
+    "('T075',)": "device",
+    "('T077',)": "information content entity",
+    "('T078',)": "information content entity",
+    "('T079', 'T080')": "named thing",
+    "('T079', 'T080', 'T083')": "geographic location",
+    "('T079', 'T080', 'T170')": "publication",
+    "('T079', 'T081')": "named thing",
+    "('T079', 'T081', 'T169', 'T170')": "publication",
+    "('T079', 'T082')": "named thing",
+    "('T079', 'T083')": "geographic location",
+    "('T079', 'T083', 'T098')": "population of individual organisms",
+    "('T079', 'T090')": "individual organism",
+    "('T079', 'T090', 'T170')": "publication",
+    "('T079', 'T093')": "agent",
+    "('T079', 'T098')": "population of individual organisms",
+    "('T079', 'T098', 'T100')": "cohort",
+    "('T079', 'T100')": "cohort",
+    "('T079', 'T101')": "cohort",
+    "('T079', 'T102')": "named thing",
+    "('T079', 'T169')": "named thing",
+    "('T079', 'T170')": "publication",
+    "('T079',)": "named thing",
+    "('T080',)": "information content entity",
+    "('T081',)": "information content entity",
+    "('T082',)": "information content entity",
+    "('T083',)": "geographic location",
+    "('T085',)": "biological entity",
+    "('T086',)": "nucleic acid entity",
+    "('T087',)": "polypeptide",
+    "('T088',)": "biological entity",
+    "('T089',)": "information content entity",
+    "('T090',)": "individual organism",
+    "('T091',)": "named thing",
+    "('T092',)": "agent",
+    "('T093',)": "agent",
+    "('T094',)": "agent",
+    "('T095',)": "agent",
+    "('T096',)": "agent",
+    "('T097',)": "cohort",
+    "('T098',)": "population of individual organisms",
+    "('T099',)": "cohort",
+    "('T100',)": "cohort",
+    "('T101',)": "cohort",
+    "('T102',)": "information content entity",
+    "('T103',)": "chemical entity",
+    "('T104', 'T109')": "chemical entity",
+    "('T104', 'T109', 'T116', 'T121', 'T123', 'T130')": "drug",
+    "('T104', 'T109', 'T121')": "drug",
+    "('T104', 'T109', 'T121', 'T123', 'T130')": "drug",
+    "('T104', 'T109', 'T121', 'T130')": "drug",
+    "('T104', 'T109', 'T121', 'T130', 'T131')": "drug",
+    "('T104', 'T109', 'T123')": "chemical entity",
+    "('T104', 'T109', 'T123', 'T130')": "chemical entity",
+    "('T104', 'T109', 'T130')": "chemical entity",
+    "('T104', 'T109', 'T130', 'T131')": "chemical entity",
+    "('T104', 'T114', 'T121', 'T123', 'T130')": "drug",
+    "('T104', 'T116')": "polypeptide",
+    "('T104', 'T121')": "drug",
+    "('T104', 'T122')": "chemical entity",
+    "('T104', 'T122', 'T197')": "chemical entity",
+    "('T104', 'T123')": "chemical entity",
+    "('T104', 'T130')": "chemical entity",
+    "('T104', 'T130', 'T197')": "chemical entity",
+    "('T104', 'T169')": "chemical entity",
+    "('T104', 'T197')": "chemical entity",
+    "('T104',)": "chemical entity",
+    "('T109', 'T114')": "nucleic acid entity",
+    "('T109', 'T114', 'T116')": "polypeptide",
+    "('T109', 'T114', 'T116', 'T121', 'T129', 'T200')": "drug",
+    "('T109', 'T114', 'T121')": "drug",
+    "('T109', 'T114', 'T121', 'T123')": "drug",
+    "('T109', 'T114', 'T121', 'T123', 'T127')": "drug",
+    "('T109', 'T114', 'T121', 'T127')": "drug",
+    "('T109', 'T114', 'T121', 'T129')": "drug",
+    "('T109', 'T114', 'T121', 'T130')": "drug",
+    "('T109', 'T114', 'T121', 'T131')": "drug",
+    "('T109', 'T114', 'T121', 'T200')": "drug",
+    "('T109', 'T114', 'T123')": "nucleic acid entity",
+    "('T109', 'T114', 'T123', 'T130')": "nucleic acid entity",
+    "('T109', 'T114', 'T127')": "small molecule",
+    "('T109', 'T114', 'T129', 'T130')": "nucleic acid entity",
+    "('T109', 'T114', 'T130')": "nucleic acid entity",
+    "('T109', 'T114', 'T195')": "drug",
+    "('T109', 'T116')": "polypeptide",
+    "('T109', 'T116', 'T121')": "drug",
+    "('T109', 'T116', 'T121', 'T122')": "drug",
+    "('T109', 'T116', 'T121', 'T123')": "drug",
+    "('T109', 'T116', 'T121', 'T123', 'T125')": "drug",
+    "('T109', 'T116', 'T121', 'T123', 'T129')": "drug",
+    "('T109', 'T116', 'T121', 'T123', 'T130')": "drug",
+    "('T109', 'T116', 'T121', 'T123', 'T131')": "drug",
+    "('T109', 'T116', 'T121', 'T123', 'T192')": "drug",
+    "('T109', 'T116', 'T121', 'T123', 'T200')": "drug",
+    "('T109', 'T116', 'T121', 'T125')": "drug",
+    "('T109', 'T116', 'T121', 'T125', 'T130')": "drug",
+    "('T109', 'T116', 'T121', 'T126')": "drug",
+    "('T109', 'T116', 'T121', 'T126', 'T168')": "drug",
+    "('T109', 'T116', 'T121', 'T127')": "drug",
+    "('T109', 'T116', 'T121', 'T127', 'T197')": "drug",
+    "('T109', 'T116', 'T121', 'T129')": "drug",
+    "('T109', 'T116', 'T121', 'T129', 'T130')": "drug",
+    "('T109', 'T116', 'T121', 'T129', 'T130', 'T192')": "drug",
+    "('T109', 'T116', 'T121', 'T129', 'T131')": "drug",
+    "('T109', 'T116', 'T121', 'T129', 'T192')": "drug",
+    "('T109', 'T116', 'T121', 'T130')": "drug",
+    "('T109', 'T116', 'T121', 'T131')": "drug",
+    "('T109', 'T116', 'T121', 'T192')": "drug",
+    "('T109', 'T116', 'T121', 'T195')": "drug",
+    "('T109', 'T116', 'T122')": "device",
+    "('T109', 'T116', 'T123')": "polypeptide",
+    "('T109', 'T116', 'T123', 'T129')": "polypeptide",
+    "('T109', 'T116', 'T123', 'T130')": "polypeptide",
+    "('T109', 'T116', 'T123', 'T131')": "polypeptide",
+    "('T109', 'T116', 'T123', 'T192')": "protein",
+    "('T109', 'T116', 'T123', 'T195')": "drug",
+    "('T109', 'T116', 'T126')": "protein",
+    "('T109', 'T116', 'T129')": "polypeptide",
+    "('T109', 'T116', 'T129', 'T130')": "polypeptide",
+    "('T109', 'T116', 'T129', 'T185')": "polypeptide",
+    "('T109', 'T116', 'T130')": "polypeptide",
+    "('T109', 'T116', 'T131')": "polypeptide",
+    "('T109', 'T116', 'T195')": "drug",
+    "('T109', 'T120')": "chemical entity",
+    "('T109', 'T120', 'T121')": "drug",
+    "('T109', 'T120', 'T121', 'T130')": "drug",
+    "('T109', 'T120', 'T121', 'T168')": "drug",
+    "('T109', 'T120', 'T130')": "chemical entity",
+    "('T109', 'T120', 'T130', 'T131')": "chemical entity",
+    "('T109', 'T120', 'T200')": "drug",
+    "('T109', 'T121')": "drug",
+    "('T109', 'T121', 'T122')": "drug",
+    "('T109', 'T121', 'T122', 'T123')": "drug",
+    "('T109', 'T121', 'T122', 'T130')": "drug",
+    "('T109', 'T121', 'T122', 'T131')": "drug",
+    "('T109', 'T121', 'T122', 'T197', 'T200')": "drug",
+    "('T109', 'T121', 'T122', 'T200')": "drug",
+    "('T109', 'T121', 'T123')": "drug",
+    "('T109', 'T121', 'T123', 'T125')": "drug",
+    "('T109', 'T121', 'T123', 'T127')": "drug",
+    "('T109', 'T121', 'T123', 'T130')": "drug",
+    "('T109', 'T121', 'T123', 'T130', 'T131')": "drug",
+    "('T109', 'T121', 'T123', 'T131')": "drug",
+    "('T109', 'T121', 'T123', 'T168')": "drug",
+    "('T109', 'T121', 'T123', 'T195')": "drug",
+    "('T109', 'T121', 'T123', 'T196')": "drug",
+    "('T109', 'T121', 'T123', 'T197')": "drug",
+    "('T109', 'T121', 'T123', 'T200')": "drug",
+    "('T109', 'T121', 'T125')": "drug",
+    "('T109', 'T121', 'T125', 'T127')": "drug",
+    "('T109', 'T121', 'T125', 'T130')": "drug",
+    "('T109', 'T121', 'T125', 'T131')": "drug",
+    "('T109', 'T121', 'T125', 'T196')": "drug",
+    "('T109', 'T121', 'T125', 'T200')": "drug",
+    "('T109', 'T121', 'T126')": "drug",
+    "('T109', 'T121', 'T127')": "drug",
+    "('T109', 'T121', 'T127', 'T130')": "drug",
+    "('T109', 'T121', 'T127', 'T200')": "drug",
+    "('T109', 'T121', 'T129')": "drug",
+    "('T109', 'T121', 'T129', 'T130')": "drug",
+    "('T109', 'T121', 'T129', 'T130', 'T131')": "drug",
+    "('T109', 'T121', 'T129', 'T131')": "drug",
+    "('T109', 'T121', 'T129', 'T168')": "drug",
+    "('T109', 'T121', 'T129', 'T192')": "drug",
+    "('T109', 'T121', 'T129', 'T200')": "drug",
+    "('T109', 'T121', 'T130')": "drug",
+    "('T109', 'T121', 'T130', 'T131')": "drug",
+    "('T109', 'T121', 'T130', 'T195')": "drug",
+    "('T109', 'T121', 'T130', 'T196')": "drug",
+    "('T109', 'T121', 'T130', 'T196', 'T197')": "drug",
+    "('T109', 'T121', 'T130', 'T197')": "drug",
+    "('T109', 'T121', 'T130', 'T200')": "drug",
+    "('T109', 'T121', 'T131')": "drug",
+    "('T109', 'T121', 'T131', 'T167')": "drug",
+    "('T109', 'T121', 'T131', 'T197')": "drug",
+    "('T109', 'T121', 'T131', 'T200')": "drug",
+    "('T109', 'T121', 'T131', 'T204')": "drug",
+    "('T109', 'T121', 'T167')": "drug",
+    "('T109', 'T121', 'T168')": "drug",
+    "('T109', 'T121', 'T168', 'T197')": "drug",
+    "('T109', 'T121', 'T168', 'T200')": "drug",
+    "('T109', 'T121', 'T170')": "drug",
+    "('T109', 'T121', 'T195')": "drug",
+    "('T109', 'T121', 'T195', 'T200')": "drug",
+    "('T109', 'T121', 'T196')": "drug",
+    "('T109', 'T121', 'T196', 'T197')": "drug",
+    "('T109', 'T121', 'T197')": "drug",
+    "('T109', 'T121', 'T200')": "drug",
+    "('T109', 'T121', 'T201')": "drug",
+    "('T109', 'T121', 'T203')": "drug",
+    "('T109', 'T121', 'T204')": "drug",
+    "('T109', 'T122')": "device",
+    "('T109', 'T122', 'T123')": "device",
+    "('T109', 'T122', 'T130')": "device",
+    "('T109', 'T122', 'T131')": "device",
+    "('T109', 'T122', 'T167')": "device",
+    "('T109', 'T122', 'T200')": "drug",
+    "('T109', 'T123')": "chemical entity",
+    "('T109', 'T123', 'T125')": "chemical entity",
+    "('T109', 'T123', 'T129')": "chemical entity",
+    "('T109', 'T123', 'T130')": "chemical entity",
+    "('T109', 'T123', 'T130', 'T200')": "drug",
+    "('T109', 'T123', 'T131')": "chemical entity",
+    "('T109', 'T123', 'T168')": "food",
+    "('T109', 'T123', 'T192')": "protein",
+    "('T109', 'T123', 'T195')": "drug",
+    "('T109', 'T123', 'T200')": "drug",
+    "('T109', 'T123', 'T204')": "chemical entity",
+    "('T109', 'T125')": "chemical entity",
+    "('T109', 'T125', 'T130')": "chemical entity",
+    "('T109', 'T125', 'T200')": "drug",
+    "('T109', 'T127')": "small molecule",
+    "('T109', 'T127', 'T130')": "small molecule",
+    "('T109', 'T127', 'T195')": "drug",
+    "('T109', 'T129')": "chemical entity",
+    "('T109', 'T129', 'T130')": "chemical entity",
+    "('T109', 'T129', 'T131')": "chemical entity",
+    "('T109', 'T129', 'T185')": "chemical entity",
+    "('T109', 'T129', 'T192')": "protein",
+    "('T109', 'T129', 'T200')": "drug",
+    "('T109', 'T130')": "chemical entity",
+    "('T109', 'T130', 'T131')": "chemical entity",
+    "('T109', 'T130', 'T131', 'T196')": "small molecule",
+    "('T109', 'T130', 'T131', 'T197')": "chemical entity",
+    "('T109', 'T130', 'T131', 'T200')": "drug",
+    "('T109', 'T130', 'T167')": "chemical entity",
+    "('T109', 'T130', 'T195')": "drug",
+    "('T109', 'T130', 'T196')": "small molecule",
+    "('T109', 'T130', 'T197')": "chemical entity",
+    "('T109', 'T130', 'T200')": "drug",
+    "('T109', 'T131')": "chemical entity",
+    "('T109', 'T131', 'T195')": "drug",
+    "('T109', 'T131', 'T196')": "small molecule",
+    "('T109', 'T131', 'T197')": "chemical entity",
+    "('T109', 'T131', 'T200')": "drug",
+    "('T109', 'T167')": "chemical entity",
+    "('T109', 'T168')": "food",
+    "('T109', 'T168', 'T200')": "drug",
+    "('T109', 'T184')": "phenotypic feature",
+    "('T109', 'T195')": "drug",
+    "('T109', 'T195', 'T200')": "drug",
+    "('T109', 'T196')": "small molecule",
+    "('T109', 'T197')": "chemical entity",
+    "('T109', 'T200')": "drug",
+    "('T109', 'T203')": "device",
+    "('T109',)": "chemical entity",
+    "('T114', 'T116')": "polypeptide",
+    "('T114', 'T116', 'T121')": "drug",
+    "('T114', 'T116', 'T121', 'T129')": "drug",
+    "('T114', 'T116', 'T121', 'T200')": "drug",
+    "('T114', 'T116', 'T123')": "nucleic acid entity",
+    "('T114', 'T116', 'T123', 'T126')": "polypeptide",
+    "('T114', 'T116', 'T126')": "protein",
+    "('T114', 'T116', 'T129')": "polypeptide",
+    "('T114', 'T116', 'T195')": "drug",
+    "('T114', 'T121')": "drug",
+    "('T114', 'T121', 'T123')": "drug",
+    "('T114', 'T121', 'T123', 'T130')": "drug",
+    "('T114', 'T121', 'T123', 'T200')": "drug",
+    "('T114', 'T121', 'T127')": "drug",
+    "('T114', 'T121', 'T129')": "drug",
+    "('T114', 'T121', 'T129', 'T200')": "drug",
+    "('T114', 'T121', 'T130')": "drug",
+    "('T114', 'T121', 'T131')": "drug",
+    "('T114', 'T121', 'T195')": "drug",
+    "('T114', 'T121', 'T200')": "drug",
+    "('T114', 'T123')": "nucleic acid entity",
+    "('T114', 'T123', 'T130')": "nucleic acid entity",
+    "('T114', 'T123', 'T131')": "nucleic acid entity",
+    "('T114', 'T123', 'T195')": "drug",
+    "('T114', 'T123', 'T200')": "drug",
+    "('T114', 'T126')": "protein",
+    "('T114', 'T127')": "small molecule",
+    "('T114', 'T129')": "nucleic acid entity",
+    "('T114', 'T130')": "nucleic acid entity",
+    "('T114', 'T131')": "nucleic acid entity",
+    "('T114', 'T195')": "drug",
+    "('T114',)": "nucleic acid entity",
+    "('T116', 'T121')": "drug",
+    "('T116', 'T121', 'T122')": "drug",
+    "('T116', 'T121', 'T122', 'T123')": "drug",
+    "('T116', 'T121', 'T122', 'T126')": "drug",
+    "('T116', 'T121', 'T123')": "drug",
+    "('T116', 'T121', 'T123', 'T125')": "drug",
+    "('T116', 'T121', 'T123', 'T126')": "drug",
+    "('T116', 'T121', 'T123', 'T129')": "drug",
+    "('T116', 'T121', 'T123', 'T129', 'T131')": "drug",
+    "('T116', 'T121', 'T123', 'T130')": "drug",
+    "('T116', 'T121', 'T123', 'T131')": "drug",
+    "('T116', 'T121', 'T123', 'T168')": "drug",
+    "('T116', 'T121', 'T123', 'T192')": "drug",
+    "('T116', 'T121', 'T123', 'T195')": "drug",
+    "('T116', 'T121', 'T123', 'T196')": "drug",
+    "('T116', 'T121', 'T123', 'T200')": "drug",
+    "('T116', 'T121', 'T125')": "drug",
+    "('T116', 'T121', 'T125', 'T129')": "drug",
+    "('T116', 'T121', 'T125', 'T130')": "drug",
+    "('T116', 'T121', 'T125', 'T200')": "drug",
+    "('T116', 'T121', 'T126')": "drug",
+    "('T116', 'T121', 'T126', 'T129')": "drug",
+    "('T116', 'T121', 'T126', 'T200')": "drug",
+    "('T116', 'T121', 'T127')": "drug",
+    "('T116', 'T121', 'T129')": "drug",
+    "('T116', 'T121', 'T129', 'T130')": "drug",
+    "('T116', 'T121', 'T129', 'T131')": "drug",
+    "('T116', 'T121', 'T129', 'T167')": "drug",
+    "('T116', 'T121', 'T129', 'T168')": "drug",
+    "('T116', 'T121', 'T129', 'T192')": "drug",
+    "('T116', 'T121', 'T129', 'T197')": "drug",
+    "('T116', 'T121', 'T129', 'T200')": "drug",
+    "('T116', 'T121', 'T130')": "drug",
+    "('T116', 'T121', 'T131')": "drug",
+    "('T116', 'T121', 'T168')": "drug",
+    "('T116', 'T121', 'T192')": "drug",
+    "('T116', 'T121', 'T195')": "drug",
+    "('T116', 'T121', 'T195', 'T200')": "drug",
+    "('T116', 'T121', 'T200')": "drug",
+    "('T116', 'T121', 'T203')": "drug",
+    "('T116', 'T122')": "device",
+    "('T116', 'T122', 'T123')": "polypeptide",
+    "('T116', 'T123')": "polypeptide",
+    "('T116', 'T123', 'T125')": "polypeptide",
+    "('T116', 'T123', 'T126')": "protein",
+    "('T116', 'T123', 'T126', 'T129')": "protein",
+    "('T116', 'T123', 'T126', 'T131')": "protein",
+    "('T116', 'T123', 'T126', 'T192')": "protein",
+    "('T116', 'T123', 'T129')": "polypeptide",
+    "('T116', 'T123', 'T129', 'T130')": "polypeptide",
+    "('T116', 'T123', 'T129', 'T192')": "protein",
+    "('T116', 'T123', 'T130')": "polypeptide",
+    "('T116', 'T123', 'T131')": "polypeptide",
+    "('T116', 'T123', 'T184')": "polypeptide",
+    "('T116', 'T123', 'T192')": "protein",
+    "('T116', 'T123', 'T195')": "drug",
+    "('T116', 'T123', 'T200')": "drug",
+    "('T116', 'T125')": "polypeptide",
+    "('T116', 'T125', 'T130')": "polypeptide",
+    "('T116', 'T125', 'T200')": "drug",
+    "('T116', 'T126')": "protein",
+    "('T116', 'T126', 'T127')": "protein",
+    "('T116', 'T126', 'T129')": "protein",
+    "('T116', 'T126', 'T129', 'T131')": "protein",
+    "('T116', 'T126', 'T130')": "protein",
+    "('T116', 'T126', 'T131')": "protein",
+    "('T116', 'T126', 'T169')": "protein",
+    "('T116', 'T126', 'T184')": "protein",
+    "('T116', 'T126', 'T191')": "protein",
+    "('T116', 'T126', 'T192')": "protein",
+    "('T116', 'T126', 'T200')": "drug",
+    "('T116', 'T127')": "polypeptide",
+    "('T116', 'T129')": "polypeptide",
+    "('T116', 'T129', 'T130')": "polypeptide",
+    "('T116', 'T129', 'T131')": "polypeptide",
+    "('T116', 'T129', 'T192')": "protein",
+    "('T116', 'T129', 'T195')": "drug",
+    "('T116', 'T129', 'T196')": "polypeptide",
+    "('T116', 'T129', 'T200')": "drug",
+    "('T116', 'T130')": "polypeptide",
+    "('T116', 'T130', 'T131')": "polypeptide",
+    "('T116', 'T130', 'T192')": "protein",
+    "('T116', 'T130', 'T195')": "drug",
+    "('T116', 'T130', 'T200')": "drug",
+    "('T116', 'T131')": "polypeptide",
+    "('T116', 'T131', 'T200')": "drug",
+    "('T116', 'T168')": "food",
+    "('T116', 'T168', 'T195')": "drug",
+    "('T116', 'T192')": "protein",
+    "('T116', 'T195')": "drug",
+    "('T116', 'T195', 'T200')": "drug",
+    "('T116', 'T200')": "drug",
+    "('T116',)": "polypeptide",
+    "('T120',)": "chemical entity",
+    "('T121', 'T122')": "drug",
+    "('T121', 'T122', 'T127')": "drug",
+    "('T121', 'T122', 'T130', 'T196', 'T197')": "drug",
+    "('T121', 'T122', 'T197')": "drug",
+    "('T121', 'T123')": "drug",
+    "('T121', 'T123', 'T125')": "drug",
+    "('T121', 'T123', 'T129')": "drug",
+    "('T121', 'T123', 'T130', 'T197')": "drug",
+    "('T121', 'T123', 'T131')": "drug",
+    "('T121', 'T123', 'T168', 'T196')": "drug",
+    "('T121', 'T123', 'T196')": "drug",
+    "('T121', 'T123', 'T196', 'T197')": "drug",
+    "('T121', 'T123', 'T196', 'T200')": "drug",
+    "('T121', 'T123', 'T197')": "drug",
+    "('T121', 'T123', 'T200')": "drug",
+    "('T121', 'T125')": "drug",
+    "('T121', 'T125', 'T127')": "drug",
+    "('T121', 'T126')": "drug",
+    "('T121', 'T127')": "drug",
+    "('T121', 'T127', 'T130')": "drug",
+    "('T121', 'T127', 'T167')": "drug",
+    "('T121', 'T127', 'T200')": "drug",
+    "('T121', 'T129')": "drug",
+    "('T121', 'T129', 'T130')": "drug",
+    "('T121', 'T129', 'T130', 'T200')": "drug",
+    "('T121', 'T129', 'T131')": "drug",
+    "('T121', 'T129', 'T168')": "drug",
+    "('T121', 'T129', 'T200')": "drug",
+    "('T121', 'T130')": "drug",
+    "('T121', 'T130', 'T131', 'T196', 'T197')": "drug",
+    "('T121', 'T130', 'T131', 'T197')": "drug",
+    "('T121', 'T130', 'T196')": "drug",
+    "('T121', 'T130', 'T196', 'T197')": "drug",
+    "('T121', 'T130', 'T196', 'T200')": "drug",
+    "('T121', 'T130', 'T197')": "drug",
+    "('T121', 'T130', 'T197', 'T200')": "drug",
+    "('T121', 'T130', 'T200')": "drug",
+    "('T121', 'T131')": "drug",
+    "('T121', 'T131', 'T196')": "drug",
+    "('T121', 'T131', 'T197')": "drug",
+    "('T121', 'T167', 'T197')": "drug",
+    "('T121', 'T168')": "drug",
+    "('T121', 'T168', 'T196')": "drug",
+    "('T121', 'T168', 'T197')": "drug",
+    "('T121', 'T169')": "drug",
+    "('T121', 'T170')": "drug",
+    "('T121', 'T195')": "drug",
+    "('T121', 'T196')": "drug",
+    "('T121', 'T196', 'T197')": "drug",
+    "('T121', 'T196', 'T200')": "drug",
+    "('T121', 'T197')": "drug",
+    "('T121', 'T197', 'T200')": "drug",
+    "('T121', 'T197', 'T203')": "drug",
+    "('T121', 'T200')": "drug",
+    "('T121', 'T203')": "drug",
+    "('T121', 'T204')": "drug",
+    "('T121',)": "drug",
+    "('T122', 'T123')": "chemical entity",
+    "('T122', 'T130')": "chemical entity",
+    "('T122', 'T167')": "chemical entity",
+    "('T122', 'T169')": "device",
+    "('T122', 'T170')": "publication",
+    "('T122', 'T197')": "chemical entity",
+    "('T122', 'T200')": "drug",
+    "('T122',)": "device",
+    "('T123', 'T129')": "chemical entity",
+    "('T123', 'T129', 'T131')": "chemical entity",
+    "('T123', 'T130')": "chemical entity",
+    "('T123', 'T130', 'T196')": "small molecule",
+    "('T123', 'T130', 'T197')": "chemical entity",
+    "('T123', 'T131')": "chemical entity",
+    "('T123', 'T131', 'T200')": "drug",
+    "('T123', 'T168', 'T196')": "small molecule",
+    "('T123', 'T195')": "drug",
+    "('T123', 'T196')": "small molecule",
+    "('T123', 'T196', 'T197')": "small molecule",
+    "('T123', 'T196', 'T200')": "drug",
+    "('T123', 'T197')": "chemical entity",
+    "('T123', 'T197', 'T200')": "drug",
+    "('T123', 'T200')": "drug",
+    "('T123',)": "chemical entity",
+    "('T125',)": "chemical entity",
+    "('T126',)": "protein",
+    "('T127',)": "small molecule",
+    "('T129', 'T130')": "chemical entity",
+    "('T129', 'T131')": "chemical entity",
+    "('T129', 'T167')": "chemical entity",
+    "('T129', 'T168')": "food",
+    "('T129', 'T185')": "named thing",
+    "('T129', 'T192')": "protein",
+    "('T129', 'T200')": "drug",
+    "('T129',)": "biological entity",
+    "('T130', 'T131')": "chemical entity",
+    "('T130', 'T131', 'T196')": "small molecule",
+    "('T130', 'T131', 'T196', 'T197')": "small molecule",
+    "('T130', 'T131', 'T197')": "chemical entity",
+    "('T130', 'T167')": "chemical entity",
+    "('T130', 'T195')": "drug",
+    "('T130', 'T196')": "small molecule",
+    "('T130', 'T196', 'T197')": "small molecule",
+    "('T130', 'T197')": "chemical entity",
+    "('T130', 'T197', 'T200')": "drug",
+    "('T130', 'T200')": "drug",
+    "('T130',)": "chemical entity",
+    "('T131', 'T196')": "small molecule",
+    "('T131', 'T197')": "chemical entity",
+    "('T131',)": "chemical entity",
+    "('T167',)": "chemical entity",
+    "('T168',)": "food",
+    "('T169',)": "information content entity",
+    "('T170',)": "publication",
+    "('T171',)": "information content entity",
+    "('T184',)": "phenotypic feature",
+    "('T185',)": "information content entity",
+    "('T190',)": "disease",
+    "('T191',)": "disease",
+    "('T192',)": "protein",
+    "('T194',)": "organism taxon",
+    "('T195',)": "drug",
+    "('T196',)": "small molecule",
+    "('T197',)": "chemical entity",
+    "('T200',)": "drug",
+    "('T201',)": "named thing",
+    "('T203',)": "device",
+    "('T204',)": "organism taxon",
+    "()": "named thing"
+}

From 3702a77111c5bbf8dfc052d2b1912fc8bc18a845 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 16 Aug 2023 17:53:14 -0700
Subject: [PATCH 026/117] #316 more CHV code

---
 umls_list_jsonl_to_kg_jsonl.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index d372084d..711021cd 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -131,8 +131,27 @@ def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     name = str()
     synonyms = list()
     names = info.get(NAMES_KEY, dict())
+    pt = names.get('PT', dict())
+    if 'Y' in pt:
+        name = pt.get('Y', '')
+        assert len(name) == 1, str(name) + ' ' + node_curie
+        name = name[0]
+    else:
+        name = pt.get('N', '')
+        assert len(name) == 1, str(name) + ' ' + node_curie
+        name = name[0]
+    synonyms += [syn for syn in names.get('SY', dict()).get('Y', list())]
+    synonyms += [syn for syn in names.get('SY', dict()).get('N', list())]
+
+    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
+    node['synonym'] = synonyms
+    description = str()
+    for tui in tuis:
+        description += "; UMLS Semantic Type: STY:" + tui
+    description.strip("; ")
+    node['description'] = description
 
-    print(curie_prefix + ":", names)
+    nodes_output.write(node)
 
 def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
     curie_prefix = kg2_util.CURIE_PREFIX_DRUGBANK

From 83f85916c327154bd338ab4f097e57641c3e315e Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 17 Aug 2023 11:59:20 -0700
Subject: [PATCH 027/117] #316 CHV is working now

---
 tui_combo_mappings.json        | 519 ++++++++++++++++++++++++++++++++-
 umls_list_jsonl_to_kg_jsonl.py |  10 +-
 2 files changed, 512 insertions(+), 17 deletions(-)

diff --git a/tui_combo_mappings.json b/tui_combo_mappings.json
index d30a088a..ea707d0a 100644
--- a/tui_combo_mappings.json
+++ b/tui_combo_mappings.json
@@ -1,4 +1,11 @@
 {
+    "('T001', 'T002', 'T008')": "organism taxon",
+    "('T001', 'T007')": "individual organism",
+    "('T001', 'T028', 'T032')": "individual organism",
+    "('T001', 'T058')": "activity",
+    "('T001', 'T077')": "individual organism",
+    "('T001', 'T102')": "individual organism",
+    "('T001', 'T204')": "individual organism",
     "('T001',)": "individual organism",
     "('T002', 'T004')": "organism taxon",
     "('T002', 'T025')": "cell",
@@ -69,34 +76,113 @@
     "('T007', 'T203')": "device",
     "('T007', 'T204')": "organism taxon",
     "('T007',)": "organism taxon",
+    "('T008', 'T015')": "organism taxon",
+    "('T008', 'T040')": "organism taxon",
+    "('T008', 'T061', 'T083', 'T170')": "publication",
+    "('T008', 'T083', 'T170')": "publication",
+    "('T008', 'T170')": "organism taxon",
+    "('T008', 'T204')": "organism taxon",
     "('T008',)": "organism taxon",
+    "('T010', 'T061')": "procedure",
     "('T010',)": "organism taxon",
+    "('T011', 'T204')": "organism taxon",
     "('T011',)": "organism taxon",
+    "('T012', 'T047')": "disease",
+    "('T012', 'T059')": "procedure",
     "('T012',)": "organism taxon",
+    "('T013', 'T047')": "disease",
+    "('T013', 'T116', 'T126')": "protein",
+    "('T013', 'T168')": "food",
     "('T013',)": "organism taxon",
     "('T014',)": "organism taxon",
+    "('T015', 'T109', 'T121', 'T131')": "drug",
     "('T015',)": "organism taxon",
+    "('T016', 'T023')": "gross anatomical structure",
+    "('T016', 'T054')": "behavior",
     "('T016',)": "organism taxon",
+    "('T017', 'T023')": "anatomical entity",
+    "('T017', 'T026')": "cellular component",
+    "('T017', 'T061')": "procedure",
+    "('T017', 'T091')": "anatomical entity",
     "('T017',)": "anatomical entity",
     "('T018',)": "gross anatomical structure",
+    "('T019', 'T020')": "disease",
+    "('T019', 'T023')": "disease",
     "('T019', 'T028')": "disease",
+    "('T019', 'T028', 'T033')": "disease",
+    "('T019', 'T028', 'T033', 'T047')": "disease",
+    "('T019', 'T028', 'T033', 'T047', 'T191')": "disease",
     "('T019', 'T028', 'T047')": "disease",
     "('T019', 'T033')": "disease",
+    "('T019', 'T033', 'T047')": "disease",
+    "('T019', 'T033', 'T190')": "disease",
+    "('T019', 'T046')": "disease",
+    "('T019', 'T046', 'T047')": "disease",
+    "('T019', 'T046', 'T080', 'T169')": "disease",
     "('T019', 'T047')": "disease",
+    "('T019', 'T047', 'T190')": "disease",
+    "('T019', 'T047', 'T191')": "disease",
+    "('T019', 'T073')": "disease",
+    "('T019', 'T116', 'T121', 'T123')": "named thing",
+    "('T019', 'T121', 'T123', 'T196')": "named thing",
+    "('T019', 'T190')": "disease",
+    "('T019', 'T191')": "disease",
     "('T019',)": "disease",
+    "('T020', 'T026')": "disease",
+    "('T020', 'T030', 'T047')": "disease",
+    "('T020', 'T031', 'T046')": "disease",
+    "('T020', 'T033', 'T046')": "disease",
+    "('T020', 'T033', 'T047')": "disease",
+    "('T020', 'T037', 'T072')": "disease",
+    "('T020', 'T046')": "disease",
+    "('T020', 'T046', 'T190')": "disease",
+    "('T020', 'T047')": "disease",
+    "('T020', 'T047', 'T190')": "disease",
+    "('T020', 'T059')": "disease",
+    "('T020', 'T081')": "disease",
+    "('T020', 'T121', 'T196')": "named thing",
+    "('T020', 'T190')": "disease",
     "('T020',)": "disease",
     "('T021',)": "gross anatomical structure",
+    "('T022', 'T023')": "gross anatomical structure",
+    "('T022', 'T033')": "anatomical entity",
+    "('T022', 'T116', 'T121')": "drug",
     "('T022',)": "anatomical entity",
     "('T023', 'T024')": "gross anatomical structure",
+    "('T023', 'T024', 'T025')": "cell",
+    "('T023', 'T024', 'T030')": "gross anatomical structure",
     "('T023', 'T025')": "cell",
     "('T023', 'T026')": "cellular component",
     "('T023', 'T029')": "anatomical entity",
+    "('T023', 'T029', 'T030')": "gross anatomical structure",
+    "('T023', 'T029', 'T070')": "gross anatomical structure",
     "('T023', 'T030')": "anatomical entity",
+    "('T023', 'T030', 'T047', 'T048')": "disease",
+    "('T023', 'T031')": "gross anatomical structure",
     "('T023', 'T033')": "disease or phenotypic feature",
     "('T023', 'T033', 'T047')": "disease",
+    "('T023', 'T034')": "phenomenon",
+    "('T023', 'T037')": "pathological process",
+    "('T023', 'T037', 'T047', 'T190')": "disease",
+    "('T023', 'T042')": "gross anatomical structure",
+    "('T023', 'T046', 'T047', 'T190')": "disease",
     "('T023', 'T047')": "disease",
+    "('T023', 'T047', 'T061')": "disease",
+    "('T023', 'T047', 'T184')": "disease or phenotypic feature",
+    "('T023', 'T047', 'T191')": "disease",
     "('T023', 'T061')": "procedure",
+    "('T023', 'T061', 'T080', 'T081')": "named thing",
+    "('T023', 'T073', 'T093')": "gross anatomical structure",
+    "('T023', 'T074')": "device",
+    "('T023', 'T078')": "gross anatomical structure",
+    "('T023', 'T078', 'T097')": "gross anatomical structure",
+    "('T023', 'T078', 'T170')": "named thing",
+    "('T023', 'T080')": "gross anatomical structure",
+    "('T023', 'T080', 'T081')": "gross anatomical structure",
+    "('T023', 'T081', 'T083')": "gross anatomical structure",
+    "('T023', 'T170')": "publication",
     "('T023', 'T191')": "disease",
+    "('T023', 'T200')": "drug",
     "('T023',)": "gross anatomical structure",
     "('T024', 'T026')": "cellular component",
     "('T024', 'T031')": "gross anatomical structure",
@@ -134,27 +220,90 @@
     "('T025', 'T191')": "disease",
     "('T025', 'T200')": "drug",
     "('T025',)": "cell",
+    "('T026', 'T028')": "cellular component",
+    "('T026', 'T030')": "cellular component",
+    "('T026', 'T031')": "cellular component",
+    "('T026', 'T033')": "cellular component",
+    "('T026', 'T038', 'T043')": "phenomenon",
+    "('T026', 'T043')": "physiological process",
+    "('T026', 'T043', 'T044')": "cellular component",
+    "('T026', 'T044')": "cellular component",
+    "('T026', 'T044', 'T045')": "physiological process",
+    "('T026', 'T045')": "physiological process",
+    "('T026', 'T047')": "disease",
+    "('T026', 'T073')": "cellular component",
+    "('T026', 'T114', 'T123')": "nucleic acid entity",
+    "('T026', 'T116', 'T123')": "polypeptide",
+    "('T026', 'T116', 'T123', 'T126')": "protein",
+    "('T026', 'T116', 'T126')": "protein",
+    "('T026', 'T116', 'T129')": "polypeptide",
+    "('T026', 'T167')": "cellular component",
+    "('T026', 'T191')": "disease",
     "('T026',)": "cellular component",
     "('T028', 'T033')": "disease or phenotypic feature",
     "('T028', 'T033', 'T047')": "disease",
     "('T028', 'T033', 'T047', 'T191')": "disease",
+    "('T028', 'T033', 'T191')": "disease",
+    "('T028', 'T045')": "physiological process",
+    "('T028', 'T046', 'T047')": "disease",
     "('T028', 'T047')": "disease",
+    "('T028', 'T047', 'T048')": "disease",
+    "('T028', 'T047', 'T191')": "disease",
     "('T028', 'T048')": "disease",
+    "('T028', 'T054')": "behavior",
+    "('T028', 'T114')": "nucleic acid entity",
+    "('T028', 'T114', 'T123')": "nucleic acid entity",
+    "('T028', 'T116', 'T123')": "polypeptide",
+    "('T028', 'T191')": "disease",
     "('T028',)": "named thing",
+    "('T029', 'T061')": "procedure",
+    "('T029', 'T078', 'T170')": "publication",
+    "('T029', 'T081', 'T167', 'T170')": "named thing",
+    "('T029', 'T082')": "anatomical entity",
+    "('T029', 'T116', 'T129')": "polypeptide",
     "('T029',)": "anatomical entity",
+    "('T030', 'T033')": "anatomical entity",
     "('T030',)": "anatomical entity",
+    "('T031', 'T033')": "anatomical entity",
+    "('T031', 'T033', 'T046')": "pathological process",
+    "('T031', 'T033', 'T047')": "disease",
+    "('T031', 'T033', 'T073', 'T078', 'T079', 'T080', 'T170')": "disease or phenotypic feature",
+    "('T031', 'T033', 'T184')": "phenotypic feature",
+    "('T031', 'T037')": "anatomical entity",
+    "('T031', 'T039')": "physiological process",
+    "('T031', 'T040')": "physiological process",
+    "('T031', 'T046')": "pathological process",
+    "('T031', 'T047')": "disease",
+    "('T031', 'T058', 'T059')": "procedure",
+    "('T031', 'T059')": "procedure",
+    "('T031', 'T060')": "procedure",
+    "('T031', 'T062', 'T078', 'T169', 'T170')": "named thing",
+    "('T031', 'T073', 'T078', 'T080', 'T081', 'T093', 'T170')": "named thing",
+    "('T031', 'T073', 'T093')": "agent",
+    "('T031', 'T078')": "anatomical entity",
+    "('T031', 'T079')": "anatomical entity",
+    "('T031', 'T082')": "anatomical entity",
+    "('T031', 'T099')": "anatomical entity",
+    "('T031', 'T101', 'T169')": "anatomical entity",
     "('T031', 'T121')": "drug",
     "('T031', 'T121', 'T200')": "drug",
+    "('T031', 'T168')": "food",
+    "('T031', 'T169')": "anatomical entity",
+    "('T031', 'T170')": "anatomical entity",
+    "('T031', 'T184')": "phenotypic feature",
     "('T031',)": "anatomical entity",
+    "('T032', 'T033')": "disease or phenotypic feature",
     "('T032',)": "named thing",
     "('T033', 'T034')": "phenomenon",
     "('T033', 'T034', 'T047')": "disease",
     "('T033', 'T034', 'T059')": "phenomenon",
+    "('T033', 'T034', 'T073', 'T079', 'T093')": "named thing",
     "('T033', 'T037')": "pathological process",
     "('T033', 'T037', 'T047')": "disease",
     "('T033', 'T037', 'T055')": "pathological process",
     "('T033', 'T037', 'T070', 'T167', 'T191')": "disease",
     "('T033', 'T039')": "physiological process",
+    "('T033', 'T039', 'T040', 'T169')": "disease or phenotypic feature",
     "('T033', 'T040')": "physiological process",
     "('T033', 'T040', 'T046', 'T047')": "disease",
     "('T033', 'T040', 'T047')": "disease",
@@ -185,6 +334,7 @@
     "('T033', 'T055')": "behavior",
     "('T033', 'T055', 'T061')": "procedure",
     "('T033', 'T055', 'T185')": "behavior",
+    "('T033', 'T056', 'T073', 'T078', 'T079', 'T081', 'T093', 'T169', 'T170')": "named thing",
     "('T033', 'T056', 'T078', 'T080', 'T169', 'T170')": "publication",
     "('T033', 'T057', 'T080')": "activity",
     "('T033', 'T058')": "activity",
@@ -192,12 +342,19 @@
     "('T033', 'T060')": "procedure",
     "('T033', 'T060', 'T080')": "procedure",
     "('T033', 'T061')": "procedure",
+    "('T033', 'T061', 'T078', 'T079', 'T081', 'T170')": "named thing",
     "('T033', 'T061', 'T168')": "procedure",
     "('T033', 'T067')": "phenomenon",
     "('T033', 'T069', 'T131')": "phenomenon",
+    "('T033', 'T073', 'T078', 'T079', 'T093', 'T169', 'T170')": "disease or phenotypic feature",
+    "('T033', 'T073', 'T078', 'T080', 'T093', 'T169')": "disease or phenotypic feature",
+    "('T033', 'T073', 'T079', 'T080', 'T169', 'T170')": "named thing",
+    "('T033', 'T073', 'T093')": "disease or phenotypic feature",
     "('T033', 'T074')": "device",
     "('T033', 'T078')": "disease or phenotypic feature",
     "('T033', 'T078', 'T079', 'T170')": "publication",
+    "('T033', 'T078', 'T080', 'T081', 'T169')": "disease or phenotypic feature",
+    "('T033', 'T078', 'T080', 'T170')": "named thing",
     "('T033', 'T078', 'T089', 'T095', 'T170')": "publication",
     "('T033', 'T078', 'T089', 'T170')": "publication",
     "('T033', 'T078', 'T169', 'T170')": "publication",
@@ -208,19 +365,29 @@
     "('T033', 'T080', 'T082')": "disease or phenotypic feature",
     "('T033', 'T080', 'T170')": "publication",
     "('T033', 'T081')": "disease or phenotypic feature",
+    "('T033', 'T082')": "disease or phenotypic feature",
+    "('T033', 'T082', 'T170')": "named thing",
     "('T033', 'T083', 'T093', 'T169', 'T170')": "publication",
     "('T033', 'T089')": "disease or phenotypic feature",
+    "('T033', 'T091', 'T169')": "disease or phenotypic feature",
+    "('T033', 'T092', 'T170')": "publication",
+    "('T033', 'T093')": "disease or phenotypic feature",
+    "('T033', 'T097')": "disease or phenotypic feature",
     "('T033', 'T098')": "population of individual organisms",
     "('T033', 'T098', 'T116', 'T121', 'T129')": "drug",
     "('T033', 'T098', 'T121', 'T129')": "drug",
     "('T033', 'T099')": "cohort",
+    "('T033', 'T099', 'T200')": "named thing",
     "('T033', 'T101')": "cohort",
     "('T033', 'T102')": "disease or phenotypic feature",
+    "('T033', 'T109', 'T121')": "named thing",
     "('T033', 'T109', 'T122')": "chemical entity",
     "('T033', 'T109', 'T123')": "chemical entity",
     "('T033', 'T116', 'T123')": "polypeptide",
+    "('T033', 'T116', 'T126')": "protein",
     "('T033', 'T116', 'T129')": "polypeptide",
     "('T033', 'T121')": "drug",
+    "('T033', 'T121', 'T125', 'T127')": "drug",
     "('T033', 'T122')": "device",
     "('T033', 'T168')": "food",
     "('T033', 'T168', 'T170')": "food",
@@ -251,67 +418,333 @@
     "('T034', 'T196')": "small molecule",
     "('T034', 'T201')": "phenomenon",
     "('T034',)": "phenomenon",
+    "('T037', 'T046')": "pathological process",
     "('T037', 'T047')": "disease",
+    "('T037', 'T058')": "pathological process",
+    "('T037', 'T059')": "procedure",
+    "('T037', 'T061')": "procedure",
+    "('T037', 'T067')": "pathological process",
+    "('T037', 'T073', 'T092')": "agent",
+    "('T037', 'T109', 'T195')": "drug",
+    "('T037', 'T116', 'T123', 'T131')": "polypeptide",
+    "('T037', 'T121', 'T123', 'T196')": "drug",
+    "('T037', 'T123', 'T197')": "chemical entity",
+    "('T037', 'T190')": "disease",
+    "('T037', 'T204')": "pathological process",
     "('T037',)": "pathological process",
+    "('T038', 'T039')": "phenomenon",
+    "('T038', 'T039', 'T043')": "phenomenon",
+    "('T038', 'T040')": "phenomenon",
+    "('T038', 'T040', 'T043')": "phenomenon",
+    "('T038', 'T040', 'T080', 'T169')": "phenomenon",
+    "('T038', 'T042')": "phenomenon",
     "('T038', 'T043')": "phenomenon",
+    "('T038', 'T044')": "phenomenon",
+    "('T038', 'T046')": "pathological process",
+    "('T038', 'T070')": "phenomenon",
+    "('T038', 'T169')": "phenomenon",
     "('T038',)": "phenomenon",
     "('T039', 'T040')": "physiological process",
+    "('T039', 'T042')": "physiological process",
+    "('T039', 'T043')": "physiological process",
+    "('T039', 'T043', 'T044')": "physiological process",
+    "('T039', 'T044')": "physiological process",
+    "('T039', 'T047')": "disease",
+    "('T039', 'T061')": "physiological process",
+    "('T039', 'T070')": "phenomenon",
+    "('T039', 'T109', 'T121')": "drug",
     "('T039', 'T121')": "drug",
+    "('T039', 'T121', 'T125')": "drug",
     "('T039',)": "physiological process",
     "('T040', 'T042')": "physiological process",
     "('T040', 'T043')": "physiological process",
+    "('T040', 'T043', 'T044')": "physiological process",
     "('T040', 'T044')": "physiological process",
+    "('T040', 'T045')": "physiological process",
+    "('T040', 'T046')": "pathological process",
+    "('T040', 'T047')": "disease",
+    "('T040', 'T055')": "behavior",
+    "('T040', 'T061')": "procedure",
+    "('T040', 'T070')": "phenomenon",
     "('T040',)": "physiological process",
+    "('T041', 'T042')": "physiological process",
+    "('T041', 'T046')": "pathological process",
+    "('T041', 'T047')": "disease",
+    "('T041', 'T048')": "disease",
+    "('T041', 'T048', 'T055')": "disease",
+    "('T041', 'T048', 'T184')": "disease",
+    "('T041', 'T053', 'T058')": "behavior",
+    "('T041', 'T054')": "behavior",
+    "('T041', 'T054', 'T055')": "behavior",
+    "('T041', 'T055')": "behavior",
+    "('T041', 'T055', 'T078')": "behavior",
+    "('T041', 'T058')": "behavior",
+    "('T041', 'T061')": "procedure",
+    "('T041', 'T062')": "behavior",
+    "('T041', 'T062', 'T091')": "behavior",
+    "('T041', 'T067')": "phenomenon",
+    "('T041', 'T067', 'T080', 'T091', 'T097')": "phenomenon",
+    "('T041', 'T078')": "behavior",
+    "('T041', 'T078', 'T080')": "behavior",
+    "('T041', 'T078', 'T102')": "behavior",
+    "('T041', 'T080')": "behavior",
+    "('T041', 'T081')": "behavior",
+    "('T041', 'T091')": "behavior",
+    "('T041', 'T170')": "publication",
+    "('T041', 'T184')": "phenotypic feature",
     "('T041',)": "behavior",
+    "('T042', 'T043')": "physiological process",
+    "('T042', 'T044')": "physiological process",
+    "('T042', 'T060')": "physiological process",
+    "('T042', 'T080')": "physiological process",
+    "('T042', 'T116', 'T126')": "protein",
+    "('T042', 'T121')": "drug",
+    "('T042', 'T201')": "physiological process",
     "('T042',)": "physiological process",
     "('T043', 'T044')": "physiological process",
     "('T043', 'T045')": "physiological process",
+    "('T043', 'T046')": "pathological process",
+    "('T043', 'T079')": "physiological process",
     "('T043',)": "physiological process",
     "('T044', 'T045')": "physiological process",
+    "('T044', 'T046')": "pathological process",
+    "('T044', 'T047')": "disease",
+    "('T044', 'T070')": "phenomenon",
     "('T044',)": "molecular activity",
+    "('T045', 'T049')": "disease",
     "('T045',)": "physiological process",
     "('T046', 'T047')": "disease",
+    "('T046', 'T056')": "pathological process",
+    "('T046', 'T061')": "pathological process",
+    "('T046', 'T082', 'T201')": "pathological process",
+    "('T046', 'T109', 'T121')": "drug",
+    "('T046', 'T109', 'T121', 'T130')": "drug",
+    "('T046', 'T116', 'T121')": "drug",
+    "('T046', 'T184')": "pathological process",
+    "('T046', 'T190')": "disease",
+    "('T046', 'T191')": "disease",
     "('T046',)": "pathological process",
+    "('T047', 'T048')": "disease",
+    "('T047', 'T048', 'T184')": "disease or phenotypic feature",
+    "('T047', 'T049')": "disease",
+    "('T047', 'T050')": "disease",
+    "('T047', 'T059')": "named thing",
+    "('T047', 'T060')": "disease",
+    "('T047', 'T061')": "disease",
+    "('T047', 'T067')": "disease",
+    "('T047', 'T080')": "disease",
+    "('T047', 'T081')": "disease",
+    "('T047', 'T109', 'T121')": "drug",
+    "('T047', 'T109', 'T121', 'T123')": "named thing",
+    "('T047', 'T109', 'T123')": "disease",
+    "('T047', 'T116', 'T121', 'T123')": "named thing",
+    "('T047', 'T116', 'T123')": "named thing",
+    "('T047', 'T116', 'T129')": "named thing",
+    "('T047', 'T169')": "disease",
     "('T047', 'T184')": "disease or phenotypic feature",
     "('T047', 'T190')": "disease",
     "('T047', 'T191')": "disease",
+    "('T047', 'T196')": "disease",
+    "('T047', 'T204')": "disease",
     "('T047',)": "disease",
+    "('T048', 'T054')": "disease",
+    "('T048', 'T055')": "disease",
+    "('T048', 'T184')": "disease or phenotypic feature",
     "('T048',)": "disease",
+    "('T049', 'T059')": "disease",
     "('T049',)": "disease",
     "('T050', 'T191')": "disease",
-    "('T050',)": "biological entity",
+    "('T050',)": "named thing",
     "('T051',)": "event",
+    "('T052', 'T079')": "activity",
     "('T052',)": "activity",
     "('T053',)": "behavior",
+    "('T054', 'T055')": "behavior",
+    "('T054', 'T068')": "behavior",
+    "('T054', 'T078')": "behavior",
+    "('T054', 'T080')": "behavior",
+    "('T054', 'T098')": "behavior",
     "('T054',)": "behavior",
+    "('T055', 'T078')": "behavior",
+    "('T055', 'T080')": "behavior",
+    "('T055', 'T170')": "behavior",
     "('T055',)": "behavior",
+    "('T056', 'T073')": "activity",
+    "('T056', 'T079')": "activity",
     "('T056',)": "activity",
+    "('T057', 'T058')": "activity",
+    "('T057', 'T062')": "activity",
+    "('T057', 'T073')": "activity",
+    "('T057', 'T073', 'T170')": "activity",
+    "('T057', 'T078')": "activity",
+    "('T057', 'T079')": "activity",
+    "('T057', 'T080')": "activity",
+    "('T057', 'T081')": "activity",
+    "('T057', 'T090')": "activity",
+    "('T057', 'T170')": "activity",
     "('T057',)": "activity",
+    "('T058', 'T060')": "procedure",
+    "('T058', 'T061')": "procedure",
+    "('T058', 'T065')": "activity",
+    "('T058', 'T073', 'T093')": "agent",
+    "('T058', 'T078')": "activity",
+    "('T058', 'T080')": "activity",
+    "('T058', 'T081')": "activity",
+    "('T058', 'T091')": "activity",
+    "('T058', 'T093')": "activity",
+    "('T058', 'T097')": "activity",
+    "('T058', 'T098', 'T116', 'T121', 'T129')": "drug",
+    "('T058', 'T098', 'T121', 'T129')": "drug",
+    "('T058', 'T101')": "activity",
+    "('T058', 'T169')": "activity",
+    "('T058', 'T170')": "publication",
+    "('T058', 'T184')": "phenotypic feature",
     "('T058',)": "activity",
+    "('T059', 'T060')": "procedure",
+    "('T059', 'T060', 'T170')": "procedure",
+    "('T059', 'T061')": "procedure",
+    "('T059', 'T063')": "procedure",
+    "('T059', 'T070')": "phenomenon",
+    "('T059', 'T073')": "procedure",
+    "('T059', 'T073', 'T074')": "device",
+    "('T059', 'T074')": "device",
+    "('T059', 'T075')": "procedure",
+    "('T059', 'T078')": "procedure",
+    "('T059', 'T080')": "procedure",
+    "('T059', 'T080', 'T169')": "procedure",
+    "('T059', 'T081')": "procedure",
+    "('T059', 'T082')": "procedure",
+    "('T059', 'T090')": "procedure",
+    "('T059', 'T091')": "procedure",
+    "('T059', 'T093')": "agent",
+    "('T059', 'T109')": "procedure",
+    "('T059', 'T109', 'T121')": "drug",
+    "('T059', 'T109', 'T121', 'T127')": "drug",
+    "('T059', 'T109', 'T121', 'T130')": "drug",
+    "('T059', 'T109', 'T122')": "chemical entity",
+    "('T059', 'T109', 'T123')": "procedure",
+    "('T059', 'T109', 'T127')": "small molecule",
+    "('T059', 'T109', 'T130')": "chemical entity",
+    "('T059', 'T109', 'T195')": "drug",
+    "('T059', 'T116')": "polypeptide",
+    "('T059', 'T116', 'T121', 'T125')": "drug",
+    "('T059', 'T116', 'T121', 'T129')": "drug",
+    "('T059', 'T116', 'T121', 'T129', 'T130')": "drug",
+    "('T059', 'T116', 'T123')": "polypeptide",
+    "('T059', 'T116', 'T129')": "polypeptide",
+    "('T059', 'T116', 'T195')": "drug",
+    "('T059', 'T121')": "drug",
+    "('T059', 'T130')": "chemical entity",
+    "('T059', 'T168')": "food",
+    "('T059', 'T169')": "procedure",
+    "('T059', 'T170')": "procedure",
+    "('T059', 'T184')": "phenotypic feature",
+    "('T059', 'T200')": "drug",
     "('T059',)": "procedure",
+    "('T060', 'T061')": "procedure",
+    "('T060', 'T074')": "procedure",
+    "('T060', 'T081')": "procedure",
+    "('T060', 'T091')": "procedure",
+    "('T060', 'T121')": "drug",
+    "('T060', 'T170')": "procedure",
+    "('T060', 'T184')": "phenotypic feature",
+    "('T060', 'T204')": "procedure",
     "('T060',)": "procedure",
+    "('T061', 'T062')": "procedure",
+    "('T061', 'T068')": "phenomenon",
+    "('T061', 'T073', 'T093')": "physical entity",
+    "('T061', 'T074')": "device",
+    "('T061', 'T078', 'T080')": "procedure",
+    "('T061', 'T079')": "procedure",
+    "('T061', 'T091')": "procedure",
+    "('T061', 'T098')": "procedure",
+    "('T061', 'T109', 'T121')": "drug",
+    "('T061', 'T116', 'T121', 'T129')": "drug",
+    "('T061', 'T121')": "drug",
+    "('T061', 'T169')": "procedure",
     "('T061',)": "procedure",
+    "('T062', 'T081')": "activity",
+    "('T062', 'T083')": "activity",
+    "('T062', 'T091')": "activity",
+    "('T062', 'T170')": "activity",
     "('T062',)": "activity",
     "('T063',)": "procedure",
+    "('T064', 'T078')": "activity",
+    "('T064', 'T081')": "activity",
+    "('T064', 'T089')": "activity",
     "('T064',)": "activity",
+    "('T065', 'T080', 'T185')": "activity",
+    "('T065', 'T109')": "chemical entity",
     "('T065',)": "activity",
+    "('T066', 'T073')": "activity",
+    "('T066', 'T170')": "activity",
     "('T066',)": "activity",
+    "('T067', 'T070')": "phenomenon",
+    "('T067', 'T116', 'T121', 'T123')": "drug",
     "('T067',)": "phenomenon",
+    "('T068', 'T073')": "phenomenon",
     "('T068',)": "phenomenon",
     "('T069',)": "phenomenon",
+    "('T070', 'T078')": "phenomenon",
+    "('T070', 'T083')": "phenomenon",
+    "('T070', 'T169', 'T170')": "named thing",
     "('T070',)": "phenomenon",
     "('T071',)": "named thing",
     "('T072',)": "physical entity",
+    "('T073', 'T074')": "device",
+    "('T073', 'T078', 'T079', 'T080', 'T169', 'T170')": "publication",
+    "('T073', 'T078', 'T093')": "agent",
+    "('T073', 'T079', 'T093', 'T170')": "publication",
+    "('T073', 'T080', 'T169')": "physical entity",
+    "('T073', 'T083', 'T093')": "agent",
+    "('T073', 'T090')": "physical entity",
     "('T073', 'T092')": "agent",
     "('T073', 'T093')": "agent",
+    "('T073', 'T093', 'T169')": "agent",
+    "('T073', 'T093', 'T170')": "agent",
+    "('T073', 'T109', 'T121')": "drug",
+    "('T073', 'T121')": "drug",
+    "('T073', 'T167')": "chemical entity",
+    "('T073', 'T167', 'T170')": "chemical entity",
     "('T073', 'T170')": "publication",
+    "('T073', 'T200')": "drug",
     "('T073',)": "physical entity",
+    "('T074', 'T109')": "device",
+    "('T074', 'T109', 'T120')": "device",
     "('T074', 'T109', 'T121')": "drug",
+    "('T074', 'T109', 'T121', 'T127')": "drug",
+    "('T074', 'T109', 'T122')": "device",
+    "('T074', 'T109', 'T130')": "device",
+    "('T074', 'T114', 'T121')": "drug",
+    "('T074', 'T116', 'T121')": "drug",
+    "('T074', 'T121')": "drug",
+    "('T074', 'T121', 'T123', 'T196')": "drug",
+    "('T074', 'T121', 'T127')": "drug",
+    "('T074', 'T121', 'T129')": "drug",
+    "('T074', 'T121', 'T197')": "drug",
+    "('T074', 'T122')": "device",
+    "('T074', 'T168')": "food",
     "('T074', 'T200')": "drug",
+    "('T074', 'T203')": "device",
     "('T074',)": "device",
     "('T075',)": "device",
-    "('T077',)": "information content entity",
-    "('T078',)": "information content entity",
+    "('T077', 'T078')": "named thing",
+    "('T077', 'T170')": "publication",
+    "('T077',)": "named thing",
+    "('T078', 'T079')": "named thing",
+    "('T078', 'T079', 'T170')": "publication",
+    "('T078', 'T080')": "named thing",
+    "('T078', 'T080', 'T082', 'T099')": "cohort",
+    "('T078', 'T080', 'T170')": "publication",
+    "('T078', 'T081')": "named thing",
+    "('T078', 'T089')": "named thing",
+    "('T078', 'T091')": "named thing",
+    "('T078', 'T092')": "agent",
+    "('T078', 'T098')": "population of individual organisms",
+    "('T078', 'T169')": "named thing",
+    "('T078', 'T169', 'T170')": "publication",
+    "('T078', 'T170')": "publication",
+    "('T078',)": "named thing",
     "('T079', 'T080')": "named thing",
     "('T079', 'T080', 'T083')": "geographic location",
     "('T079', 'T080', 'T170')": "publication",
@@ -331,28 +764,73 @@
     "('T079', 'T169')": "named thing",
     "('T079', 'T170')": "publication",
     "('T079',)": "named thing",
-    "('T080',)": "information content entity",
-    "('T081',)": "information content entity",
-    "('T082',)": "information content entity",
+    "('T080', 'T081')": "named thing",
+    "('T080', 'T081', 'T169')": "named thing",
+    "('T080', 'T082', 'T169')": "named thing",
+    "('T080', 'T089')": "named thing",
+    "('T080', 'T169')": "named thing",
+    "('T080', 'T170')": "publication",
+    "('T080',)": "named thing",
+    "('T081', 'T083')": "geographic location",
+    "('T081', 'T085')": "named thing",
+    "('T081', 'T086')": "nucleic acid entity",
+    "('T081', 'T087')": "polypeptide",
+    "('T081', 'T097')": "cohort",
+    "('T081', 'T098')": "population of individual organisms",
+    "('T081', 'T102')": "named thing",
+    "('T081', 'T109')": "chemical entity",
+    "('T081', 'T121')": "drug",
+    "('T081', 'T167')": "chemical entity",
+    "('T081', 'T167', 'T168', 'T169')": "food",
+    "('T081', 'T169')": "named thing",
+    "('T081', 'T170')": "publication",
+    "('T081', 'T196')": "small molecule",
+    "('T081', 'T201')": "named thing",
+    "('T081',)": "named thing",
+    "('T082', 'T083')": "geographic location",
+    "('T082', 'T098')": "population of individual organisms",
+    "('T082', 'T103')": "chemical entity",
+    "('T082', 'T109', 'T123')": "chemical entity",
+    "('T082', 'T116', 'T123')": "polypeptide",
+    "('T082', 'T170')": "publication",
+    "('T082', 'T190')": "disease",
+    "('T082', 'T191')": "disease",
+    "('T082',)": "named thing",
+    "('T083', 'T169')": "geographic location",
     "('T083',)": "geographic location",
-    "('T085',)": "biological entity",
+    "('T085',)": "named thing",
     "('T086',)": "nucleic acid entity",
     "('T087',)": "polypeptide",
-    "('T088',)": "biological entity",
-    "('T089',)": "information content entity",
+    "('T088',)": "named thing",
+    "('T089', 'T170')": "publication",
+    "('T089',)": "named thing",
+    "('T090', 'T091')": "individual organism",
+    "('T090', 'T170')": "individual organism",
     "('T090',)": "individual organism",
+    "('T091', 'T097')": "cohort",
     "('T091',)": "named thing",
+    "('T092', 'T097', 'T170')": "agent",
+    "('T092', 'T170')": "named thing",
     "('T092',)": "agent",
+    "('T093', 'T109', 'T123')": "agent",
+    "('T093', 'T116', 'T123')": "polypeptide",
+    "('T093', 'T121')": "drug",
     "('T093',)": "agent",
     "('T094',)": "agent",
     "('T095',)": "agent",
     "('T096',)": "agent",
+    "('T097', 'T170')": "cohort",
     "('T097',)": "cohort",
+    "('T098', 'T109', 'T121', 'T129')": "drug",
+    "('T098', 'T116', 'T121', 'T129')": "drug",
+    "('T098', 'T121', 'T129')": "named thing",
+    "('T098', 'T170')": "publication",
     "('T098',)": "population of individual organisms",
+    "('T099', 'T102')": "cohort",
     "('T099',)": "cohort",
     "('T100',)": "cohort",
     "('T101',)": "cohort",
-    "('T102',)": "information content entity",
+    "('T102',)": "named thing",
     "('T103',)": "chemical entity",
     "('T104', 'T109')": "chemical entity",
     "('T104', 'T109', 'T116', 'T121', 'T123', 'T130')": "drug",
@@ -679,6 +1157,7 @@
     "('T116', 'T195', 'T200')": "drug",
     "('T116', 'T200')": "drug",
     "('T116',)": "polypeptide",
+    "('T120', 'T121')": "drug",
     "('T120',)": "chemical entity",
     "('T121', 'T122')": "drug",
     "('T121', 'T122', 'T127')": "drug",
@@ -761,7 +1240,9 @@
     "('T123', 'T197', 'T200')": "drug",
     "('T123', 'T200')": "drug",
     "('T123',)": "chemical entity",
+    "('T125', 'T130')": "chemical entity",
     "('T125',)": "chemical entity",
+    "('T126', 'T129')": "protein",
     "('T126',)": "protein",
     "('T127',)": "small molecule",
     "('T129', 'T130')": "chemical entity",
@@ -771,7 +1252,7 @@
     "('T129', 'T185')": "named thing",
     "('T129', 'T192')": "protein",
     "('T129', 'T200')": "drug",
-    "('T129',)": "biological entity",
+    "('T129',)": "named thing",
     "('T130', 'T131')": "chemical entity",
     "('T130', 'T131', 'T196')": "small molecule",
     "('T130', 'T131', 'T196', 'T197')": "small molecule",
@@ -784,23 +1265,33 @@
     "('T130', 'T197', 'T200')": "drug",
     "('T130', 'T200')": "drug",
     "('T130',)": "chemical entity",
+    "('T131', 'T167')": "chemical entity",
     "('T131', 'T196')": "small molecule",
+    "('T131', 'T196', 'T197')": "small molecule",
     "('T131', 'T197')": "chemical entity",
+    "('T131', 'T197', 'T200')": "drug",
     "('T131',)": "chemical entity",
     "('T167',)": "chemical entity",
+    "('T168', 'T200')": "drug",
     "('T168',)": "food",
-    "('T169',)": "information content entity",
+    "('T169', 'T170')": "publication",
+    "('T169',)": "named thing",
+    "('T170', 'T185')": "publication",
     "('T170',)": "publication",
-    "('T171',)": "information content entity",
+    "('T171',)": "named thing",
+    "('T184', 'T190')": "disease or phenotypic feature",
     "('T184',)": "phenotypic feature",
-    "('T185',)": "information content entity",
+    "('T185',)": "named thing",
     "('T190',)": "disease",
     "('T191',)": "disease",
     "('T192',)": "protein",
     "('T194',)": "organism taxon",
     "('T195',)": "drug",
+    "('T196', 'T197')": "small molecule",
     "('T196',)": "small molecule",
+    "('T197', 'T200')": "drug",
     "('T197',)": "chemical entity",
+    "('T200', 'T203')": "drug",
     "('T200',)": "drug",
     "('T201',)": "named thing",
     "('T203',)": "device",
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 711021cd..2063e960 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -132,16 +132,20 @@ def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     synonyms = list()
     names = info.get(NAMES_KEY, dict())
     pt = names.get('PT', dict())
+    synonyms += [syn for syn in names.get('SY', dict()).get('Y', list())]
+    synonyms += [syn for syn in names.get('SY', dict()).get('N', list())]
     if 'Y' in pt:
         name = pt.get('Y', '')
         assert len(name) == 1, str(name) + ' ' + node_curie
         name = name[0]
-    else:
+    elif 'N' in pt:
         name = pt.get('N', '')
         assert len(name) == 1, str(name) + ' ' + node_curie
         name = name[0]
-    synonyms += [syn for syn in names.get('SY', dict()).get('Y', list())]
-    synonyms += [syn for syn in names.get('SY', dict()).get('N', list())]
+    else:
+        name = synonyms[0]
+        synonyms = synonyms[1:]
+        name = name[0]
 
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms

From 770fb5a316f511b872725f8dbab6fdecf8dbf922 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 17 Aug 2023 12:28:03 -0700
Subject: [PATCH 028/117] #316 FMA is working now

---
 tui_combo_mappings.json        |  3 +++
 umls_list_jsonl_to_kg_jsonl.py | 47 ++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/tui_combo_mappings.json b/tui_combo_mappings.json
index ea707d0a..6aebaa1b 100644
--- a/tui_combo_mappings.json
+++ b/tui_combo_mappings.json
@@ -256,11 +256,14 @@
     "('T028', 'T116', 'T123')": "polypeptide",
     "('T028', 'T191')": "disease",
     "('T028',)": "named thing",
+    "('T029', 'T030')": "anatomical entity",
     "('T029', 'T061')": "procedure",
     "('T029', 'T078', 'T170')": "publication",
     "('T029', 'T081', 'T167', 'T170')": "named thing",
     "('T029', 'T082')": "anatomical entity",
     "('T029', 'T116', 'T129')": "polypeptide",
+    "('T029', 'T170')": "publication",
+    "('T029', 'T184')": "phenotypic feature",
     "('T029',)": "anatomical entity",
     "('T030', 'T033')": "anatomical entity",
     "('T030',)": "anatomical entity",
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 2063e960..c544fa97 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -192,6 +192,50 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu
     nodes_output.write(node)
 
 
+def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
+    curie_prefix = "FMA" # This should be replaced with a kg2_util prefix at some point
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
+    iri = iri_mappings[curie_prefix] + node_id
+    node_curie = make_node_id(curie_prefix, node_id)
+    cuis = info.get(CUIS_KEY, list())
+    tuis = info.get(TUIS_KEY, list())
+
+    # Currently not used, but extracting them in case we want them in the future
+    authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
+    date_last_modified = info.get(INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
+
+    name = str()
+    synonyms = list()
+    names = info.get(NAMES_KEY, dict())
+    pt = names.get('PT', dict())
+    synonyms += [syn for syn in names.get('SY', dict()).get('Y', list())]
+    synonyms += [syn for syn in names.get('SY', dict()).get('N', list())]
+    if 'Y' in pt:
+        name = pt.get('Y', '')
+        if len(name) > 1:
+            synonyms += name[1:]
+        name = name[0]
+    elif 'N' in pt:
+        name = pt.get('N', '')
+        if len(name) > 1:
+            synonyms += name[1:]
+        name = name[0]
+    else:
+        name = synonyms[0]
+        synonyms = synonyms[1:]
+        name = name[0]
+
+    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
+    node['synonym'] = synonyms
+    description = str()
+    for tui in tuis:
+        description += "; UMLS Semantic Type: STY:" + tui
+    description.strip("; ")
+    node['description'] = description
+
+    nodes_output.write(node)
+
+
 if __name__ == '__main__':
     args = get_args()
     input_file_name = args.inputFile
@@ -238,5 +282,8 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu
             if source == 'DRUGBANK':
                 process_drugbank_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
 
+            if source == 'FMA':
+                process_fma_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
\ No newline at end of file

From bffcb152607218dd98362afaca528acb2016c0dd Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 17 Aug 2023 16:56:29 -0700
Subject: [PATCH 029/117] #316 more tui combo mappings

---
 tui_combo_mappings.json | 298 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 298 insertions(+)

diff --git a/tui_combo_mappings.json b/tui_combo_mappings.json
index 6aebaa1b..543efc37 100644
--- a/tui_combo_mappings.json
+++ b/tui_combo_mappings.json
@@ -105,23 +105,47 @@
     "('T017', 'T061')": "procedure",
     "('T017', 'T091')": "anatomical entity",
     "('T017',)": "anatomical entity",
+    "('T018', 'T019')": "disease",
+    "('T018', 'T019', 'T024')": "gross anatomical structure",
+    "('T018', 'T019', 'T028', 'T033', 'T047')": "disease or phenotypic feature",
+    "('T018', 'T024')": "gross anatomical structure",
+    "('T018', 'T025')": "cell",
+    "('T018', 'T026')": "cellular component",
     "('T018',)": "gross anatomical structure",
     "('T019', 'T020')": "disease",
+    "('T019', 'T020', 'T033', 'T190')": "disease",
+    "('T019', 'T020', 'T037', 'T190')": "disease",
+    "('T019', 'T020', 'T047')": "disease",
+    "('T019', 'T020', 'T047', 'T190')": "disease",
+    "('T019', 'T020', 'T190')": "disease",
     "('T019', 'T023')": "disease",
     "('T019', 'T028')": "disease",
     "('T019', 'T028', 'T033')": "disease",
     "('T019', 'T028', 'T033', 'T047')": "disease",
     "('T019', 'T028', 'T033', 'T047', 'T191')": "disease",
+    "('T019', 'T028', 'T033', 'T190')": "disease",
+    "('T019', 'T028', 'T033', 'T191')": "disease",
     "('T019', 'T028', 'T047')": "disease",
+    "('T019', 'T028', 'T047', 'T190')": "disease",
+    "('T019', 'T030')": "disease",
+    "('T019', 'T031', 'T047')": "disease",
     "('T019', 'T033')": "disease",
+    "('T019', 'T033', 'T037', 'T047', 'T190')": "disease",
     "('T019', 'T033', 'T047')": "disease",
+    "('T019', 'T033', 'T047', 'T190')": "disease",
     "('T019', 'T033', 'T190')": "disease",
+    "('T019', 'T033', 'T191')": "disease",
+    "('T019', 'T037')": "disease",
     "('T019', 'T046')": "disease",
     "('T019', 'T046', 'T047')": "disease",
     "('T019', 'T046', 'T080', 'T169')": "disease",
     "('T019', 'T047')": "disease",
+    "('T019', 'T047', 'T049')": "disease",
+    "('T019', 'T047', 'T054')": "disease",
     "('T019', 'T047', 'T190')": "disease",
     "('T019', 'T047', 'T191')": "disease",
+    "('T019', 'T049')": "disease",
+    "('T019', 'T059')": "disease",
     "('T019', 'T073')": "disease",
     "('T019', 'T116', 'T121', 'T123')": "named thing",
     "('T019', 'T121', 'T123', 'T196')": "named thing",
@@ -145,8 +169,15 @@
     "('T020',)": "disease",
     "('T021',)": "gross anatomical structure",
     "('T022', 'T023')": "gross anatomical structure",
+    "('T022', 'T023', 'T030')": "gross anatomical structure",
+    "('T022', 'T023', 'T040')": "gross anatomical structure",
+    "('T022', 'T030')": "anatomical entity",
     "('T022', 'T033')": "anatomical entity",
+    "('T022', 'T033', 'T047')": "disease",
+    "('T022', 'T109', 'T129')": "chemical entity",
     "('T022', 'T116', 'T121')": "drug",
+    "('T022', 'T170')": "publication",
+    "('T022', 'T185')": "anatomical entity",
     "('T022',)": "anatomical entity",
     "('T023', 'T024')": "gross anatomical structure",
     "('T023', 'T024', 'T025')": "cell",
@@ -240,20 +271,56 @@
     "('T026', 'T167')": "cellular component",
     "('T026', 'T191')": "disease",
     "('T026',)": "cellular component",
+    "('T028', 'T031', 'T033')": "disease or phenotypic feature",
+    "('T028', 'T032')": "named thing",
+    "('T028', 'T032', 'T033', 'T047')": "disease",
+    "('T028', 'T032', 'T045')": "physiological process",
     "('T028', 'T033')": "disease or phenotypic feature",
+    "('T028', 'T033', 'T034')": "phenomenon",
+    "('T028', 'T033', 'T037')": "pathological process",
+    "('T028', 'T033', 'T046')": "pathological process",
+    "('T028', 'T033', 'T046', 'T047')": "disease",
     "('T028', 'T033', 'T047')": "disease",
+    "('T028', 'T033', 'T047', 'T048')": "disease",
+    "('T028', 'T033', 'T047', 'T109', 'T121')": "named thing",
+    "('T028', 'T033', 'T047', 'T116', 'T129')": "disease",
+    "('T028', 'T033', 'T047', 'T190')": "disease",
     "('T028', 'T033', 'T047', 'T191')": "disease",
+    "('T028', 'T033', 'T048')": "disease",
+    "('T028', 'T033', 'T116')": "polypeptide",
     "('T028', 'T033', 'T191')": "disease",
+    "('T028', 'T033', 'T201')": "disease or phenotypic feature",
+    "('T028', 'T034')": "phenomenon",
+    "('T028', 'T037', 'T047')": "disease",
+    "('T028', 'T038')": "phenomenon",
+    "('T028', 'T039')": "physiological process",
+    "('T028', 'T042')": "physiological process",
     "('T028', 'T045')": "physiological process",
+    "('T028', 'T046')": "pathological process",
     "('T028', 'T046', 'T047')": "disease",
+    "('T028', 'T046', 'T047', 'T191')": "disease",
     "('T028', 'T047')": "disease",
     "('T028', 'T047', 'T048')": "disease",
+    "('T028', 'T047', 'T048', 'T191')": "disease",
+    "('T028', 'T047', 'T116', 'T126')": "protein",
     "('T028', 'T047', 'T191')": "disease",
     "('T028', 'T048')": "disease",
     "('T028', 'T054')": "behavior",
+    "('T028', 'T062', 'T082', 'T114')": "nucleic acid entity",
+    "('T028', 'T082')": "named thing",
+    "('T028', 'T086')": "nucleic acid entity",
+    "('T028', 'T086', 'T114', 'T123')": "nucleic acid entity",
+    "('T028', 'T109', 'T121')": "drug",
+    "('T028', 'T109', 'T130')": "chemical entity",
     "('T028', 'T114')": "nucleic acid entity",
+    "('T028', 'T114', 'T116')": "polypeptide",
     "('T028', 'T114', 'T123')": "nucleic acid entity",
+    "('T028', 'T116')": "polypeptide",
+    "('T028', 'T116', 'T121')": "drug",
     "('T028', 'T116', 'T123')": "polypeptide",
+    "('T028', 'T116', 'T126')": "protein",
+    "('T028', 'T170')": "publication",
+    "('T028', 'T190')": "disease",
     "('T028', 'T191')": "disease",
     "('T028',)": "named thing",
     "('T029', 'T030')": "anatomical entity",
@@ -266,6 +333,7 @@
     "('T029', 'T184')": "phenotypic feature",
     "('T029',)": "anatomical entity",
     "('T030', 'T033')": "anatomical entity",
+    "('T030', 'T060')": "anatomical entity",
     "('T030',)": "anatomical entity",
     "('T031', 'T033')": "anatomical entity",
     "('T031', 'T033', 'T046')": "pathological process",
@@ -296,29 +364,71 @@
     "('T031', 'T184')": "phenotypic feature",
     "('T031',)": "anatomical entity",
     "('T032', 'T033')": "disease or phenotypic feature",
+    "('T032', 'T033', 'T054')": "disease or phenotypic feature",
+    "('T032', 'T033', 'T058', 'T079', 'T080', 'T081', 'T169', 'T170')": "disease or phenotypic feature",
+    "('T032', 'T033', 'T078', 'T080', 'T170')": "disease or phenotypic feature",
+    "('T032', 'T033', 'T184')": "phenotypic feature",
+    "('T032', 'T033', 'T201')": "disease or phenotypic feature",
+    "('T032', 'T037')": "pathological process",
+    "('T032', 'T038')": "phenomenon",
+    "('T032', 'T038', 'T040')": "physiological process",
+    "('T032', 'T039', 'T040', 'T046', 'T121')": "named thing",
+    "('T032', 'T039', 'T040', 'T201')": "physiological process",
+    "('T032', 'T040')": "physiological process",
+    "('T032', 'T040', 'T055', 'T062', 'T071')": "physiological process",
+    "('T032', 'T041')": "behavior",
+    "('T032', 'T042')": "physiological process",
+    "('T032', 'T045')": "physiological process",
+    "('T032', 'T047')": "disease",
+    "('T032', 'T053', 'T055')": "behavior",
+    "('T032', 'T079')": "named thing",
+    "('T032', 'T081')": "named thing",
+    "('T032', 'T093')": "agent",
     "('T032',)": "named thing",
     "('T033', 'T034')": "phenomenon",
+    "('T033', 'T034', 'T040')": "disease or phenotypic feature",
     "('T033', 'T034', 'T047')": "disease",
     "('T033', 'T034', 'T059')": "phenomenon",
+    "('T033', 'T034', 'T059', 'T081')": "disease or phenotypic feature",
+    "('T033', 'T034', 'T073', 'T078', 'T080', 'T170')": "disease or phenotypic feature",
     "('T033', 'T034', 'T073', 'T079', 'T093')": "named thing",
+    "('T033', 'T034', 'T201')": "disease or phenotypic feature",
     "('T033', 'T037')": "pathological process",
     "('T033', 'T037', 'T047')": "disease",
     "('T033', 'T037', 'T055')": "pathological process",
     "('T033', 'T037', 'T070', 'T167', 'T191')": "disease",
+    "('T033', 'T038', 'T040')": "disease or phenotypic feature",
     "('T033', 'T039')": "physiological process",
     "('T033', 'T039', 'T040', 'T169')": "disease or phenotypic feature",
+    "('T033', 'T039', 'T121')": "disease or phenotypic feature",
     "('T033', 'T040')": "physiological process",
     "('T033', 'T040', 'T046', 'T047')": "disease",
     "('T033', 'T040', 'T047')": "disease",
+    "('T033', 'T040', 'T081')": "disease or phenotypic feature",
+    "('T033', 'T040', 'T184')": "phenotypic feature",
+    "('T033', 'T040', 'T190')": "disease",
     "('T033', 'T041')": "behavior",
+    "('T033', 'T041', 'T048')": "disease",
+    "('T033', 'T041', 'T184')": "disease or phenotypic feature",
     "('T033', 'T042')": "physiological process",
+    "('T033', 'T042', 'T043')": "physiological process",
     "('T033', 'T042', 'T047')": "disease",
+    "('T033', 'T042', 'T060')": "disease or phenotypic feature",
+    "('T033', 'T043')": "disease or phenotypic feature",
+    "('T033', 'T045')": "disease or phenotypic feature",
+    "('T033', 'T045', 'T054')": "disease or phenotypic feature",
     "('T033', 'T046')": "pathological process",
     "('T033', 'T046', 'T047')": "disease",
     "('T033', 'T046', 'T047', 'T184')": "disease or phenotypic feature",
+    "('T033', 'T046', 'T047', 'T190')": "disease",
+    "('T033', 'T046', 'T048')": "disease",
     "('T033', 'T046', 'T061', 'T081', 'T093')": "pathological process",
+    "('T033', 'T046', 'T067', 'T169')": "pathological process",
+    "('T033', 'T046', 'T079')": "disease or phenotypic feature",
     "('T033', 'T046', 'T184')": "disease or phenotypic feature",
+    "('T033', 'T046', 'T190')": "disease",
     "('T033', 'T047')": "disease",
+    "('T033', 'T047', 'T048')": "disease",
     "('T033', 'T047', 'T048', 'T054', 'T102')": "disease",
     "('T033', 'T047', 'T048', 'T184')": "disease or phenotypic feature",
     "('T033', 'T047', 'T059', 'T074')": "disease",
@@ -328,62 +438,93 @@
     "('T033', 'T047', 'T191')": "disease",
     "('T033', 'T048')": "disease",
     "('T033', 'T048', 'T054')": "disease",
+    "('T033', 'T048', 'T055')": "disease",
     "('T033', 'T048', 'T169')": "disease",
+    "('T033', 'T048', 'T184')": "disease or phenotypic feature",
     "('T033', 'T049')": "disease",
     "('T033', 'T051')": "event",
     "('T033', 'T052', 'T061')": "procedure",
+    "('T033', 'T052', 'T066', 'T067')": "disease or phenotypic feature",
     "('T033', 'T054')": "behavior",
+    "('T033', 'T054', 'T055', 'T062', 'T072', 'T079', 'T080', 'T081')": "disease or phenotypic feature",
     "('T033', 'T054', 'T080')": "behavior",
+    "('T033', 'T054', 'T098')": "disease or phenotypic feature",
     "('T033', 'T055')": "behavior",
+    "('T033', 'T055', 'T056', 'T061')": "disease or phenotypic feature",
+    "('T033', 'T055', 'T058', 'T073')": "disease or phenotypic feature",
     "('T033', 'T055', 'T061')": "procedure",
+    "('T033', 'T055', 'T081')": "disease or phenotypic feature",
     "('T033', 'T055', 'T185')": "behavior",
     "('T033', 'T056', 'T073', 'T078', 'T079', 'T081', 'T093', 'T169', 'T170')": "named thing",
     "('T033', 'T056', 'T078', 'T080', 'T169', 'T170')": "publication",
+    "('T033', 'T056', 'T078', 'T081', 'T082', 'T083', 'T096')": "disease or phenotypic feature",
     "('T033', 'T057', 'T080')": "activity",
     "('T033', 'T058')": "activity",
+    "('T033', 'T058', 'T059', 'T060')": "disease or phenotypic feature",
+    "('T033', 'T058', 'T093')": "disease or phenotypic feature",
     "('T033', 'T059')": "procedure",
+    "('T033', 'T059', 'T060')": "procedure",
     "('T033', 'T060')": "procedure",
     "('T033', 'T060', 'T080')": "procedure",
+    "('T033', 'T060', 'T185')": "disease or phenotypic feature",
     "('T033', 'T061')": "procedure",
     "('T033', 'T061', 'T078', 'T079', 'T081', 'T170')": "named thing",
     "('T033', 'T061', 'T168')": "procedure",
+    "('T033', 'T063')": "disease or phenotypic feature",
     "('T033', 'T067')": "phenomenon",
+    "('T033', 'T069')": "disease or phenotypic feature",
     "('T033', 'T069', 'T131')": "phenomenon",
+    "('T033', 'T070')": "disease or phenotypic feature",
+    "('T033', 'T073')": "disease or phenotypic feature",
     "('T033', 'T073', 'T078', 'T079', 'T093', 'T169', 'T170')": "disease or phenotypic feature",
+    "('T033', 'T073', 'T078', 'T080')": "disease or phenotypic feature",
     "('T033', 'T073', 'T078', 'T080', 'T093', 'T169')": "disease or phenotypic feature",
     "('T033', 'T073', 'T079', 'T080', 'T169', 'T170')": "named thing",
     "('T033', 'T073', 'T093')": "disease or phenotypic feature",
     "('T033', 'T074')": "device",
+    "('T033', 'T077')": "disease or phenotypic feature",
+    "('T033', 'T077', 'T078', 'T080', 'T081', 'T170', 'T201')": "disease or phenotypic feature",
     "('T033', 'T078')": "disease or phenotypic feature",
+    "('T033', 'T078', 'T079', 'T080', 'T170')": "disease or phenotypic feature",
     "('T033', 'T078', 'T079', 'T170')": "publication",
     "('T033', 'T078', 'T080', 'T081', 'T169')": "disease or phenotypic feature",
+    "('T033', 'T078', 'T080', 'T169', 'T170')": "disease or phenotypic feature",
     "('T033', 'T078', 'T080', 'T170')": "named thing",
     "('T033', 'T078', 'T089', 'T095', 'T170')": "publication",
     "('T033', 'T078', 'T089', 'T170')": "publication",
     "('T033', 'T078', 'T169', 'T170')": "publication",
     "('T033', 'T078', 'T170')": "publication",
+    "('T033', 'T078', 'T190')": "disease",
     "('T033', 'T079')": "disease or phenotypic feature",
     "('T033', 'T079', 'T080', 'T081', 'T169', 'T170')": "publication",
+    "('T033', 'T079', 'T081')": "disease or phenotypic feature",
     "('T033', 'T080')": "disease or phenotypic feature",
     "('T033', 'T080', 'T082')": "disease or phenotypic feature",
+    "('T033', 'T080', 'T098')": "disease or phenotypic feature",
+    "('T033', 'T080', 'T098', 'T201')": "disease or phenotypic feature",
     "('T033', 'T080', 'T170')": "publication",
     "('T033', 'T081')": "disease or phenotypic feature",
     "('T033', 'T082')": "disease or phenotypic feature",
     "('T033', 'T082', 'T170')": "named thing",
     "('T033', 'T083', 'T093', 'T169', 'T170')": "publication",
     "('T033', 'T089')": "disease or phenotypic feature",
+    "('T033', 'T089', 'T099')": "disease or phenotypic feature",
     "('T033', 'T091', 'T169')": "disease or phenotypic feature",
     "('T033', 'T092', 'T170')": "publication",
     "('T033', 'T093')": "disease or phenotypic feature",
+    "('T033', 'T095', 'T098')": "disease or phenotypic feature",
     "('T033', 'T097')": "disease or phenotypic feature",
     "('T033', 'T098')": "population of individual organisms",
+    "('T033', 'T098', 'T101')": "disease or phenotypic feature",
     "('T033', 'T098', 'T116', 'T121', 'T129')": "drug",
     "('T033', 'T098', 'T121', 'T129')": "drug",
     "('T033', 'T099')": "cohort",
     "('T033', 'T099', 'T200')": "named thing",
+    "('T033', 'T100')": "disease or phenotypic feature",
     "('T033', 'T101')": "cohort",
     "('T033', 'T102')": "disease or phenotypic feature",
     "('T033', 'T109', 'T121')": "named thing",
+    "('T033', 'T109', 'T121', 'T125')": "drug",
     "('T033', 'T109', 'T122')": "chemical entity",
     "('T033', 'T109', 'T123')": "chemical entity",
     "('T033', 'T116', 'T123')": "polypeptide",
@@ -397,6 +538,7 @@
     "('T033', 'T169')": "disease or phenotypic feature",
     "('T033', 'T170')": "publication",
     "('T033', 'T184')": "phenotypic feature",
+    "('T033', 'T184', 'T201')": "phenotypic feature",
     "('T033', 'T185')": "disease or phenotypic feature",
     "('T033', 'T190')": "disease",
     "('T033', 'T191')": "disease",
@@ -421,54 +563,140 @@
     "('T034', 'T196')": "small molecule",
     "('T034', 'T201')": "phenomenon",
     "('T034',)": "phenomenon",
+    "('T037', 'T038', 'T040')": "pathological process",
     "('T037', 'T046')": "pathological process",
+    "('T037', 'T046', 'T047')": "disease",
     "('T037', 'T047')": "disease",
+    "('T037', 'T047', 'T048')": "disease",
+    "('T037', 'T047', 'T070')": "disease",
+    "('T037', 'T048')": "disease",
+    "('T037', 'T048', 'T051')": "disease",
+    "('T037', 'T048', 'T051', 'T053', 'T055')": "disease",
+    "('T037', 'T052', 'T068', 'T131')": "pathological process",
+    "('T037', 'T056', 'T068', 'T073', 'T078', 'T089')": "pathological process",
     "('T037', 'T058')": "pathological process",
     "('T037', 'T059')": "procedure",
+    "('T037', 'T060')": "pathological process",
     "('T037', 'T061')": "procedure",
     "('T037', 'T067')": "pathological process",
+    "('T037', 'T070')": "pathological process",
+    "('T037', 'T073')": "pathological process",
     "('T037', 'T073', 'T092')": "agent",
+    "('T037', 'T078', 'T091')": "pathological process",
+    "('T037', 'T081')": "pathological process",
     "('T037', 'T109', 'T195')": "drug",
+    "('T037', 'T116', 'T121', 'T131')": "named thing",
     "('T037', 'T116', 'T123', 'T131')": "polypeptide",
     "('T037', 'T121', 'T123', 'T196')": "drug",
     "('T037', 'T123', 'T197')": "chemical entity",
+    "('T037', 'T131')": "pathological process",
+    "('T037', 'T184')": "disease or phenotypic feature",
     "('T037', 'T190')": "disease",
+    "('T037', 'T201')": "pathological process",
     "('T037', 'T204')": "pathological process",
     "('T037',)": "pathological process",
     "('T038', 'T039')": "phenomenon",
+    "('T038', 'T039', 'T042')": "phenomenon",
     "('T038', 'T039', 'T043')": "phenomenon",
+    "('T038', 'T039', 'T044')": "phenomenon",
     "('T038', 'T040')": "phenomenon",
     "('T038', 'T040', 'T043')": "phenomenon",
+    "('T038', 'T040', 'T046')": "pathological process",
+    "('T038', 'T040', 'T054')": "phenomenon",
+    "('T038', 'T040', 'T070')": "phenomenon",
     "('T038', 'T040', 'T080', 'T169')": "phenomenon",
     "('T038', 'T042')": "phenomenon",
+    "('T038', 'T042', 'T043')": "phenomenon",
     "('T038', 'T043')": "phenomenon",
+    "('T038', 'T043', 'T044')": "phenomenon",
+    "('T038', 'T043', 'T045')": "phenomenon",
+    "('T038', 'T043', 'T046')": "pathological process",
+    "('T038', 'T043', 'T070')": "phenomenon",
     "('T038', 'T044')": "phenomenon",
+    "('T038', 'T044', 'T046')": "pathological process",
+    "('T038', 'T044', 'T061')": "phenomenon",
+    "('T038', 'T045')": "phenomenon",
     "('T038', 'T046')": "pathological process",
+    "('T038', 'T047')": "disease",
+    "('T038', 'T059')": "phenomenon",
+    "('T038', 'T067')": "phenomenon",
     "('T038', 'T070')": "phenomenon",
     "('T038', 'T169')": "phenomenon",
+    "('T038', 'T201')": "phenomenon",
     "('T038',)": "phenomenon",
     "('T039', 'T040')": "physiological process",
+    "('T039', 'T041')": "behavior",
     "('T039', 'T042')": "physiological process",
+    "('T039', 'T042', 'T043')": "physiological process",
+    "('T039', 'T042', 'T044')": "physiological process",
+    "('T039', 'T042', 'T046', 'T047', 'T060')": "disease",
+    "('T039', 'T042', 'T070')": "phenomenon",
+    "('T039', 'T042', 'T201')": "physiological process",
     "('T039', 'T043')": "physiological process",
     "('T039', 'T043', 'T044')": "physiological process",
     "('T039', 'T044')": "physiological process",
+    "('T039', 'T044', 'T046')": "pathological process",
+    "('T039', 'T044', 'T070')": "phenomenon",
+    "('T039', 'T044', 'T121', 'T123')": "drug",
+    "('T039', 'T045')": "physiological process",
     "('T039', 'T047')": "disease",
+    "('T039', 'T047', 'T184')": "disease or phenotypic feature",
+    "('T039', 'T059')": "procedure",
     "('T039', 'T061')": "physiological process",
+    "('T039', 'T062')": "activity",
+    "('T039', 'T066')": "activity",
+    "('T039', 'T067')": "physiological process",
     "('T039', 'T070')": "phenomenon",
+    "('T039', 'T081')": "physiological process",
+    "('T039', 'T102')": "physiological process",
     "('T039', 'T109', 'T121')": "drug",
+    "('T039', 'T109', 'T121', 'T125')": "drug",
+    "('T039', 'T109', 'T123', 'T125')": "chemical entity",
+    "('T039', 'T109', 'T125')": "chemical entity",
+    "('T039', 'T116', 'T121', 'T123')": "drug",
+    "('T039', 'T116', 'T121', 'T125')": "drug",
+    "('T039', 'T120')": "chemical entity",
     "('T039', 'T121')": "drug",
+    "('T039', 'T121', 'T123')": "drug",
     "('T039', 'T121', 'T125')": "drug",
+    "('T039', 'T121', 'T131')": "drug",
+    "('T039', 'T131')": "chemical entity",
+    "('T039', 'T201')": "physiological process",
     "('T039',)": "physiological process",
+    "('T040', 'T041')": "behavior",
+    "('T040', 'T041', 'T042')": "behavior",
+    "('T040', 'T041', 'T046', 'T060')": "pathological process",
     "('T040', 'T042')": "physiological process",
+    "('T040', 'T042', 'T043')": "physiological process",
     "('T040', 'T043')": "physiological process",
     "('T040', 'T043', 'T044')": "physiological process",
+    "('T040', 'T043', 'T046')": "pathological process",
     "('T040', 'T044')": "physiological process",
+    "('T040', 'T044', 'T045')": "physiological process",
+    "('T040', 'T044', 'T046')": "pathological process",
     "('T040', 'T045')": "physiological process",
     "('T040', 'T046')": "pathological process",
+    "('T040', 'T046', 'T061')": "pathological process",
     "('T040', 'T047')": "disease",
+    "('T040', 'T052', 'T055')": "behavior",
+    "('T040', 'T053')": "behavior",
+    "('T040', 'T054')": "behavior",
     "('T040', 'T055')": "behavior",
+    "('T040', 'T055', 'T058')": "behavior",
+    "('T040', 'T055', 'T081')": "behavior",
+    "('T040', 'T058')": "activity",
+    "('T040', 'T058', 'T078')": "activity",
     "('T040', 'T061')": "procedure",
+    "('T040', 'T062')": "activity",
     "('T040', 'T070')": "phenomenon",
+    "('T040', 'T079')": "physiological process",
+    "('T040', 'T079', 'T081')": "physiological process",
+    "('T040', 'T081')": "physiological process",
+    "('T040', 'T099')": "physiological process",
+    "('T040', 'T102')": "physiological process",
+    "('T040', 'T121', 'T197')": "drug",
+    "('T040', 'T184')": "phenotypic feature",
+    "('T040', 'T201')": "physiological process",
     "('T040',)": "physiological process",
     "('T041', 'T042')": "physiological process",
     "('T041', 'T046')": "pathological process",
@@ -528,26 +756,51 @@
     "('T046', 'T191')": "disease",
     "('T046',)": "pathological process",
     "('T047', 'T048')": "disease",
+    "('T047', 'T048', 'T055')": "disease",
     "('T047', 'T048', 'T184')": "disease or phenotypic feature",
     "('T047', 'T049')": "disease",
     "('T047', 'T050')": "disease",
+    "('T047', 'T054')": "disease",
     "('T047', 'T059')": "named thing",
     "('T047', 'T060')": "disease",
+    "('T047', 'T060', 'T061')": "procedure",
     "('T047', 'T061')": "disease",
+    "('T047', 'T061', 'T101')": "disease",
     "('T047', 'T067')": "disease",
+    "('T047', 'T068')": "disease",
+    "('T047', 'T068', 'T078', 'T102', 'T109', 'T131')": "named thing",
+    "('T047', 'T069', 'T073')": "disease",
+    "('T047', 'T074')": "disease",
+    "('T047', 'T078')": "disease",
     "('T047', 'T080')": "disease",
     "('T047', 'T081')": "disease",
+    "('T047', 'T090')": "disease",
+    "('T047', 'T091')": "disease",
+    "('T047', 'T091', 'T102')": "disease",
+    "('T047', 'T098')": "disease",
+    "('T047', 'T101')": "disease",
+    "('T047', 'T102')": "disease",
     "('T047', 'T109', 'T121')": "drug",
     "('T047', 'T109', 'T121', 'T123')": "named thing",
     "('T047', 'T109', 'T123')": "disease",
+    "('T047', 'T116')": "disease",
     "('T047', 'T116', 'T121', 'T123')": "named thing",
+    "('T047', 'T116', 'T121', 'T129')": "named thing",
     "('T047', 'T116', 'T123')": "named thing",
     "('T047', 'T116', 'T129')": "named thing",
+    "('T047', 'T121', 'T129')": "named thing",
+    "('T047', 'T121', 'T197')": "named thing",
+    "('T047', 'T122')": "disease",
+    "('T047', 'T123', 'T168', 'T196')": "small molecule",
+    "('T047', 'T131', 'T197')": "chemical entity",
+    "('T047', 'T167')": "disease",
     "('T047', 'T169')": "disease",
     "('T047', 'T184')": "disease or phenotypic feature",
     "('T047', 'T190')": "disease",
     "('T047', 'T191')": "disease",
     "('T047', 'T196')": "disease",
+    "('T047', 'T197')": "disease",
+    "('T047', 'T201')": "disease",
     "('T047', 'T204')": "disease",
     "('T047',)": "disease",
     "('T048', 'T054')": "disease",
@@ -645,12 +898,22 @@
     "('T059', 'T200')": "drug",
     "('T059',)": "procedure",
     "('T060', 'T061')": "procedure",
+    "('T060', 'T067', 'T070')": "phenomenon",
+    "('T060', 'T070')": "procedure",
+    "('T060', 'T073')": "procedure",
     "('T060', 'T074')": "procedure",
+    "('T060', 'T080', 'T170')": "named thing",
     "('T060', 'T081')": "procedure",
+    "('T060', 'T081', 'T170')": "named thing",
+    "('T060', 'T090')": "individual organism",
     "('T060', 'T091')": "procedure",
+    "('T060', 'T098')": "population of individual organisms",
     "('T060', 'T121')": "drug",
     "('T060', 'T170')": "procedure",
+    "('T060', 'T170', 'T201')": "named thing",
     "('T060', 'T184')": "phenotypic feature",
+    "('T060', 'T185')": "procedure",
+    "('T060', 'T201')": "procedure",
     "('T060', 'T204')": "procedure",
     "('T060',)": "procedure",
     "('T061', 'T062')": "procedure",
@@ -695,22 +958,55 @@
     "('T071',)": "named thing",
     "('T072',)": "physical entity",
     "('T073', 'T074')": "device",
+    "('T073', 'T075')": "device",
+    "('T073', 'T078')": "physical entity",
     "('T073', 'T078', 'T079', 'T080', 'T169', 'T170')": "publication",
     "('T073', 'T078', 'T093')": "agent",
+    "('T073', 'T078', 'T093', 'T169')": "agent",
+    "('T073', 'T078', 'T093', 'T170')": "publication",
+    "('T073', 'T078', 'T169', 'T170')": "publication",
+    "('T073', 'T078', 'T170')": "publication",
+    "('T073', 'T079', 'T093')": "agent",
     "('T073', 'T079', 'T093', 'T170')": "publication",
+    "('T073', 'T079', 'T170')": "publication",
+    "('T073', 'T080')": "physical entity",
     "('T073', 'T080', 'T169')": "physical entity",
+    "('T073', 'T081', 'T093')": "agent",
+    "('T073', 'T082')": "physical entity",
+    "('T073', 'T082', 'T093', 'T170')": "agent",
+    "('T073', 'T083')": "physical entity",
     "('T073', 'T083', 'T093')": "agent",
     "('T073', 'T090')": "physical entity",
+    "('T073', 'T090', 'T170')": "publication",
     "('T073', 'T092')": "agent",
+    "('T073', 'T092', 'T093')": "agent",
     "('T073', 'T093')": "agent",
+    "('T073', 'T093', 'T121')": "drug",
     "('T073', 'T093', 'T169')": "agent",
     "('T073', 'T093', 'T170')": "agent",
+    "('T073', 'T098', 'T102')": "population of individual organisms",
+    "('T073', 'T099', 'T170')": "publication",
+    "('T073', 'T104')": "chemical entity",
+    "('T073', 'T109')": "chemical entity",
     "('T073', 'T109', 'T121')": "drug",
+    "('T073', 'T109', 'T122')": "chemical entity",
+    "('T073', 'T109', 'T130')": "chemical entity",
+    "('T073', 'T109', 'T131')": "chemical entity",
+    "('T073', 'T109', 'T167')": "chemical entity",
+    "('T073', 'T109', 'T195')": "drug",
+    "('T073', 'T120')": "chemical entity",
+    "('T073', 'T120', 'T121')": "drug",
     "('T073', 'T121')": "drug",
+    "('T073', 'T122')": "physical entity",
+    "('T073', 'T131')": "chemical entity",
     "('T073', 'T167')": "chemical entity",
     "('T073', 'T167', 'T170')": "chemical entity",
+    "('T073', 'T169')": "physical entity",
+    "('T073', 'T169', 'T170')": "publication",
     "('T073', 'T170')": "publication",
+    "('T073', 'T185')": "physical entity",
     "('T073', 'T200')": "drug",
+    "('T073', 'T201')": "physical entity",
     "('T073',)": "physical entity",
     "('T074', 'T109')": "device",
     "('T074', 'T109', 'T120')": "device",
@@ -718,8 +1014,10 @@
     "('T074', 'T109', 'T121', 'T127')": "drug",
     "('T074', 'T109', 'T122')": "device",
     "('T074', 'T109', 'T130')": "device",
+    "('T074', 'T109', 'T195')": "drug",
     "('T074', 'T114', 'T121')": "drug",
     "('T074', 'T116', 'T121')": "drug",
+    "('T074', 'T116', 'T195')": "drug",
     "('T074', 'T121')": "drug",
     "('T074', 'T121', 'T123', 'T196')": "drug",
     "('T074', 'T121', 'T127')": "drug",

From 5b35a3efd5e406bb5cc1aa40548af0c3952102da Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 10:08:15 -0700
Subject: [PATCH 030/117] #316 all of the combo mappings

---
 tui_combo_mappings.json | 447 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 447 insertions(+)

diff --git a/tui_combo_mappings.json b/tui_combo_mappings.json
index 543efc37..a8727cd1 100644
--- a/tui_combo_mappings.json
+++ b/tui_combo_mappings.json
@@ -725,32 +725,95 @@
     "('T041', 'T184')": "phenotypic feature",
     "('T041',)": "behavior",
     "('T042', 'T043')": "physiological process",
+    "('T042', 'T043', 'T044')": "physiological process",
+    "('T042', 'T043', 'T201')": "physiological process",
     "('T042', 'T044')": "physiological process",
+    "('T042', 'T045')": "physiological process",
+    "('T042', 'T046')": "pathological process",
     "('T042', 'T060')": "physiological process",
+    "('T042', 'T061')": "procedure",
+    "('T042', 'T068')": "phenomenon",
+    "('T042', 'T070')": "phenomenon",
+    "('T042', 'T079')": "physiological process",
     "('T042', 'T080')": "physiological process",
+    "('T042', 'T091')": "physiological process",
     "('T042', 'T116', 'T126')": "protein",
     "('T042', 'T121')": "drug",
     "('T042', 'T201')": "physiological process",
     "('T042',)": "physiological process",
     "('T043', 'T044')": "physiological process",
     "('T043', 'T045')": "physiological process",
+    "('T043', 'T045', 'T067')": "phenomenon",
     "('T043', 'T046')": "pathological process",
+    "('T043', 'T047', 'T059')": "disease",
+    "('T043', 'T055')": "behavior",
+    "('T043', 'T060')": "procedure",
+    "('T043', 'T062')": "activity",
+    "('T043', 'T067')": "phenomenon",
+    "('T043', 'T070')": "phenomenon",
+    "('T043', 'T077')": "physiological process",
     "('T043', 'T079')": "physiological process",
+    "('T043', 'T080')": "physiological process",
+    "('T043', 'T081')": "physiological process",
+    "('T043', 'T082')": "physiological process",
+    "('T043', 'T116', 'T123')": "polypeptide",
+    "('T043', 'T121', 'T129')": "drug",
+    "('T043', 'T191')": "disease",
     "('T043',)": "physiological process",
     "('T044', 'T045')": "physiological process",
+    "('T044', 'T045', 'T080')": "physiological process",
+    "('T044', 'T045', 'T116', 'T126')": "protein",
     "('T044', 'T046')": "pathological process",
     "('T044', 'T047')": "disease",
+    "('T044', 'T049')": "disease",
+    "('T044', 'T059')": "procedure",
+    "('T044', 'T066')": "activity",
+    "('T044', 'T067')": "phenomenon",
     "('T044', 'T070')": "phenomenon",
+    "('T044', 'T070', 'T080')": "phenomenon",
+    "('T044', 'T078')": "molecular activity",
+    "('T044', 'T081')": "molecular activity",
+    "('T044', 'T109', 'T116', 'T121', 'T123')": "drug",
+    "('T044', 'T116')": "polypeptide",
+    "('T044', 'T116', 'T126')": "protein",
+    "('T044', 'T123')": "chemical entity",
+    "('T044', 'T169')": "molecular activity",
     "('T044',)": "molecular activity",
     "('T045', 'T049')": "disease",
+    "('T045', 'T049', 'T063')": "disease",
+    "('T045', 'T049', 'T086')": "disease",
+    "('T045', 'T063')": "procedure",
+    "('T045', 'T070')": "phenomenon",
+    "('T045', 'T081')": "physiological process",
+    "('T045', 'T086')": "nucleic acid entity",
+    "('T045', 'T114')": "nucleic acid entity",
+    "('T045', 'T114', 'T123')": "nucleic acid entity",
+    "('T045', 'T169')": "physiological process",
     "('T045',)": "physiological process",
     "('T046', 'T047')": "disease",
+    "('T046', 'T047', 'T048')": "disease",
+    "('T046', 'T047', 'T048', 'T184')": "disease",
+    "('T046', 'T047', 'T074')": "disease",
+    "('T046', 'T047', 'T184')": "disease",
+    "('T046', 'T047', 'T190')": "disease",
+    "('T046', 'T047', 'T191')": "disease",
+    "('T046', 'T048')": "disease",
+    "('T046', 'T048', 'T184')": "disease",
+    "('T046', 'T049')": "disease",
+    "('T046', 'T055')": "pathological process",
     "('T046', 'T056')": "pathological process",
+    "('T046', 'T059')": "pathological process",
     "('T046', 'T061')": "pathological process",
+    "('T046', 'T067')": "pathological process",
+    "('T046', 'T070')": "pathological process",
+    "('T046', 'T080')": "pathological process",
     "('T046', 'T082', 'T201')": "pathological process",
+    "('T046', 'T109', 'T116', 'T121', 'T123')": "named thing",
     "('T046', 'T109', 'T121')": "drug",
     "('T046', 'T109', 'T121', 'T130')": "drug",
     "('T046', 'T116', 'T121')": "drug",
+    "('T046', 'T121')": "named thing",
+    "('T046', 'T169')": "pathological process",
     "('T046', 'T184')": "pathological process",
     "('T046', 'T190')": "disease",
     "('T046', 'T191')": "disease",
@@ -803,58 +866,215 @@
     "('T047', 'T201')": "disease",
     "('T047', 'T204')": "disease",
     "('T047',)": "disease",
+    "('T048', 'T051')": "disease",
     "('T048', 'T054')": "disease",
     "('T048', 'T055')": "disease",
+    "('T048', 'T055', 'T056')": "disease",
+    "('T048', 'T055', 'T167')": "disease",
+    "('T048', 'T055', 'T184')": "disease",
+    "('T048', 'T059')": "disease",
+    "('T048', 'T061')": "disease",
+    "('T048', 'T068')": "disease",
+    "('T048', 'T102')": "disease",
+    "('T048', 'T109', 'T121', 'T131')": "named thing",
+    "('T048', 'T121', 'T131')": "named thing",
     "('T048', 'T184')": "disease or phenotypic feature",
     "('T048',)": "disease",
     "('T049', 'T059')": "disease",
+    "('T049', 'T086')": "named thing",
     "('T049',)": "disease",
+    "('T050', 'T061')": "procedure",
     "('T050', 'T191')": "disease",
     "('T050',)": "named thing",
+    "('T051', 'T052')": "activity",
+    "('T051', 'T093')": "named thing",
+    "('T051', 'T098')": "named thing",
     "('T051',)": "event",
+    "('T052', 'T053')": "behavior",
+    "('T052', 'T054')": "behavior",
+    "('T052', 'T054', 'T078')": "behavior",
+    "('T052', 'T055')": "behavior",
+    "('T052', 'T056')": "activity",
+    "('T052', 'T057')": "activity",
+    "('T052', 'T057', 'T064', 'T081', 'T090', 'T170')": "activity",
+    "('T052', 'T058')": "activity",
+    "('T052', 'T058', 'T079', 'T170')": "activity",
+    "('T052', 'T059')": "procedure",
+    "('T052', 'T059', 'T062', 'T090')": "procedure",
+    "('T052', 'T062', 'T097')": "activity",
+    "('T052', 'T064')": "activity",
+    "('T052', 'T066', 'T170')": "activity",
+    "('T052', 'T068')": "phenomenon",
+    "('T052', 'T068', 'T072')": "phenomenon",
+    "('T052', 'T073')": "physical entity",
+    "('T052', 'T078', 'T081')": "activity",
     "('T052', 'T079')": "activity",
+    "('T052', 'T081')": "activity",
+    "('T052', 'T091')": "activity",
+    "('T052', 'T095')": "named thing",
+    "('T052', 'T167')": "chemical entity",
+    "('T052', 'T170')": "activity",
     "('T052',)": "activity",
+    "('T053', 'T054')": "behavior",
+    "('T053', 'T055')": "behavior",
+    "('T053', 'T082')": "behavior",
     "('T053',)": "behavior",
     "('T054', 'T055')": "behavior",
+    "('T054', 'T055', 'T068')": "behavior",
+    "('T054', 'T055', 'T080')": "behavior",
+    "('T054', 'T055', 'T097')": "behavior",
+    "('T054', 'T057')": "behavior",
+    "('T054', 'T058')": "behavior",
+    "('T054', 'T061')": "behavior",
+    "('T054', 'T062')": "behavior",
+    "('T054', 'T062', 'T079', 'T099')": "named thing",
+    "('T054', 'T064', 'T068', 'T078', 'T080', 'T089', 'T170')": "behavior",
+    "('T054', 'T064', 'T080')": "behavior",
     "('T054', 'T068')": "behavior",
+    "('T054', 'T071')": "behavior",
     "('T054', 'T078')": "behavior",
+    "('T054', 'T079')": "behavior",
     "('T054', 'T080')": "behavior",
+    "('T054', 'T080', 'T097', 'T170')": "named thing",
+    "('T054', 'T090')": "named thing",
+    "('T054', 'T093')": "named thing",
+    "('T054', 'T095')": "named thing",
+    "('T054', 'T097')": "named thing",
     "('T054', 'T098')": "behavior",
+    "('T054', 'T102')": "behavior",
+    "('T054', 'T170')": "named thing",
     "('T054',)": "behavior",
+    "('T055', 'T056', 'T109', 'T131')": "named thing",
+    "('T055', 'T057', 'T068', 'T078', 'T090')": "behavior",
+    "('T055', 'T058', 'T080')": "behavior",
+    "('T055', 'T061', 'T078')": "behavior",
+    "('T055', 'T061', 'T098')": "named thing",
+    "('T055', 'T061', 'T099')": "named thing",
+    "('T055', 'T064')": "behavior",
+    "('T055', 'T073', 'T074')": "named thing",
     "('T055', 'T078')": "behavior",
+    "('T055', 'T079')": "behavior",
+    "('T055', 'T079', 'T168')": "named thing",
     "('T055', 'T080')": "behavior",
+    "('T055', 'T090')": "behavior",
+    "('T055', 'T102')": "behavior",
+    "('T055', 'T109', 'T121')": "named thing",
+    "('T055', 'T131')": "named thing",
     "('T055', 'T170')": "behavior",
     "('T055',)": "behavior",
+    "('T056', 'T058')": "activity",
+    "('T056', 'T061')": "procedure",
+    "('T056', 'T068', 'T121')": "named thing",
     "('T056', 'T073')": "activity",
+    "('T056', 'T078')": "activity",
     "('T056', 'T079')": "activity",
+    "('T056', 'T080')": "activity",
+    "('T056', 'T089', 'T090')": "named thing",
+    "('T056', 'T102')": "activity",
+    "('T056', 'T169')": "activity",
+    "('T056', 'T170')": "named thing",
     "('T056',)": "activity",
     "('T057', 'T058')": "activity",
+    "('T057', 'T058', 'T080')": "activity",
+    "('T057', 'T058', 'T170')": "named thing",
+    "('T057', 'T059')": "named thing",
+    "('T057', 'T061')": "named thing",
+    "('T057', 'T061', 'T169')": "named thing",
     "('T057', 'T062')": "activity",
+    "('T057', 'T062', 'T081')": "activity",
+    "('T057', 'T062', 'T081', 'T098', 'T170')": "named thing",
+    "('T057', 'T062', 'T169', 'T170')": "named thing",
+    "('T057', 'T064')": "activity",
+    "('T057', 'T064', 'T073')": "activity",
+    "('T057', 'T065')": "activity",
+    "('T057', 'T066')": "activity",
+    "('T057', 'T067')": "activity",
+    "('T057', 'T068')": "activity",
+    "('T057', 'T070')": "phenomenon",
     "('T057', 'T073')": "activity",
+    "('T057', 'T073', 'T078', 'T170')": "named thing",
+    "('T057', 'T073', 'T097')": "named thing",
     "('T057', 'T073', 'T170')": "activity",
+    "('T057', 'T074')": "named thing",
+    "('T057', 'T077', 'T097')": "named thing",
     "('T057', 'T078')": "activity",
+    "('T057', 'T078', 'T080')": "activity",
+    "('T057', 'T078', 'T080', 'T097', 'T170')": "named thing",
+    "('T057', 'T078', 'T081')": "activity",
+    "('T057', 'T078', 'T081', 'T097')": "named thing",
     "('T057', 'T079')": "activity",
     "('T057', 'T080')": "activity",
     "('T057', 'T081')": "activity",
+    "('T057', 'T081', 'T170')": "named thing",
     "('T057', 'T090')": "activity",
+    "('T057', 'T091')": "activity",
+    "('T057', 'T092')": "named thing",
+    "('T057', 'T093')": "named thing",
+    "('T057', 'T095')": "named thing",
+    "('T057', 'T097')": "named thing",
+    "('T057', 'T098')": "named thing",
+    "('T057', 'T167')": "named thing",
+    "('T057', 'T168')": "named thing",
     "('T057', 'T170')": "activity",
     "('T057',)": "activity",
+    "('T058', 'T059')": "procedure",
     "('T058', 'T060')": "procedure",
+    "('T058', 'T060', 'T061')": "procedure",
     "('T058', 'T061')": "procedure",
+    "('T058', 'T061', 'T067')": "procedure",
+    "('T058', 'T061', 'T073', 'T074', 'T093')": "named thing",
+    "('T058', 'T061', 'T074')": "procedure",
+    "('T058', 'T061', 'T091')": "procedure",
+    "('T058', 'T061', 'T093')": "procedure",
+    "('T058', 'T062')": "activity",
+    "('T058', 'T062', 'T080')": "activity",
+    "('T058', 'T062', 'T170')": "named thing",
+    "('T058', 'T064')": "activity",
+    "('T058', 'T064', 'T089')": "activity",
     "('T058', 'T065')": "activity",
+    "('T058', 'T065', 'T097')": "named thing",
+    "('T058', 'T066', 'T170')": "named thing",
+    "('T058', 'T067', 'T170')": "named thing",
+    "('T058', 'T068')": "phenomenon",
+    "('T058', 'T068', 'T074')": "named thing",
+    "('T058', 'T069')": "phenomenon",
+    "('T058', 'T073')": "named thing",
+    "('T058', 'T073', 'T078', 'T093')": "named thing",
+    "('T058', 'T073', 'T080', 'T093', 'T170')": "named thing",
     "('T058', 'T073', 'T093')": "agent",
+    "('T058', 'T074')": "named thing",
     "('T058', 'T078')": "activity",
+    "('T058', 'T078', 'T080')": "activity",
+    "('T058', 'T078', 'T080', 'T090')": "named thing",
+    "('T058', 'T078', 'T082')": "activity",
+    "('T058', 'T078', 'T089')": "activity",
+    "('T058', 'T078', 'T093')": "named thing",
+    "('T058', 'T079')": "activity",
     "('T058', 'T080')": "activity",
+    "('T058', 'T080', 'T081')": "activity",
     "('T058', 'T081')": "activity",
+    "('T058', 'T081', 'T170')": "named thing",
+    "('T058', 'T089')": "activity",
+    "('T058', 'T090')": "named thing",
+    "('T058', 'T090', 'T093')": "named thing",
     "('T058', 'T091')": "activity",
+    "('T058', 'T092', 'T093')": "named thing",
     "('T058', 'T093')": "activity",
+    "('T058', 'T093', 'T170')": "named thing",
+    "('T058', 'T095', 'T096')": "named thing",
     "('T058', 'T097')": "activity",
+    "('T058', 'T097', 'T170')": "named thing",
     "('T058', 'T098', 'T116', 'T121', 'T129')": "drug",
+    "('T058', 'T098', 'T116', 'T129')": "named thing",
     "('T058', 'T098', 'T121', 'T129')": "drug",
+    "('T058', 'T099')": "named thing",
     "('T058', 'T101')": "activity",
+    "('T058', 'T102')": "activity",
+    "('T058', 'T121')": "named thing",
     "('T058', 'T169')": "activity",
     "('T058', 'T170')": "publication",
     "('T058', 'T184')": "phenotypic feature",
+    "('T058', 'T201')": "activity",
     "('T058',)": "activity",
     "('T059', 'T060')": "procedure",
     "('T059', 'T060', 'T170')": "procedure",
@@ -917,45 +1137,156 @@
     "('T060', 'T204')": "procedure",
     "('T060',)": "procedure",
     "('T061', 'T062')": "procedure",
+    "('T061', 'T063')": "procedure",
+    "('T061', 'T065')": "procedure",
+    "('T061', 'T067')": "procedure",
     "('T061', 'T068')": "phenomenon",
+    "('T061', 'T070')": "procedure",
     "('T061', 'T073', 'T093')": "physical entity",
     "('T061', 'T074')": "device",
+    "('T061', 'T074', 'T091')": "named thing",
+    "('T061', 'T074', 'T101')": "named thing",
+    "('T061', 'T074', 'T122')": "named thing",
+    "('T061', 'T078')": "procedure",
     "('T061', 'T078', 'T080')": "procedure",
     "('T061', 'T079')": "procedure",
+    "('T061', 'T080')": "procedure",
     "('T061', 'T091')": "procedure",
+    "('T061', 'T091', 'T170')": "named thing",
+    "('T061', 'T093')": "named thing",
     "('T061', 'T098')": "procedure",
     "('T061', 'T109', 'T121')": "drug",
+    "('T061', 'T109', 'T123')": "named thing",
     "('T061', 'T116', 'T121', 'T129')": "drug",
+    "('T061', 'T116', 'T126')": "named thing",
+    "('T061', 'T116', 'T129')": "named thing",
     "('T061', 'T121')": "drug",
+    "('T061', 'T121', 'T123')": "named thing",
+    "('T061', 'T121', 'T129')": "named thing",
+    "('T061', 'T122')": "named thing",
+    "('T061', 'T168')": "named thing",
     "('T061', 'T169')": "procedure",
+    "('T061', 'T170')": "named thing",
+    "('T061', 'T201')": "procedure",
     "('T061',)": "procedure",
+    "('T062', 'T063')": "procedure",
+    "('T062', 'T063', 'T073')": "named thing",
+    "('T062', 'T065')": "activity",
+    "('T062', 'T067')": "phenomenon",
+    "('T062', 'T070')": "phenomenon",
+    "('T062', 'T075')": "named thing",
+    "('T062', 'T075', 'T078')": "named thing",
+    "('T062', 'T078')": "activity",
+    "('T062', 'T078', 'T080', 'T081', 'T170')": "named thing",
+    "('T062', 'T078', 'T081', 'T082', 'T090', 'T097', 'T170')": "named thing",
+    "('T062', 'T079', 'T098', 'T099', 'T102')": "named thing",
+    "('T062', 'T080', 'T081')": "activity",
+    "('T062', 'T080', 'T081', 'T130')": "named thing",
     "('T062', 'T081')": "activity",
+    "('T062', 'T081', 'T096', 'T169', 'T170')": "named thing",
+    "('T062', 'T081', 'T170')": "named thing",
     "('T062', 'T083')": "activity",
+    "('T062', 'T087')": "named thing",
+    "('T062', 'T090')": "named thing",
     "('T062', 'T091')": "activity",
+    "('T062', 'T109', 'T121')": "named thing",
+    "('T062', 'T169')": "activity",
     "('T062', 'T170')": "activity",
+    "('T062', 'T185')": "activity",
     "('T062',)": "activity",
+    "('T063', 'T075')": "named thing",
+    "('T063', 'T170')": "named thing",
     "('T063',)": "procedure",
+    "('T064', 'T067', 'T078')": "phenomenon",
+    "('T064', 'T069', 'T078', 'T081')": "phenomenon",
     "('T064', 'T078')": "activity",
+    "('T064', 'T078', 'T081', 'T089')": "activity",
+    "('T064', 'T078', 'T089')": "activity",
     "('T064', 'T081')": "activity",
     "('T064', 'T089')": "activity",
+    "('T064', 'T092')": "named thing",
+    "('T064', 'T098')": "named thing",
+    "('T064', 'T098', 'T102')": "named thing",
+    "('T064', 'T170')": "named thing",
     "('T064',)": "activity",
+    "('T065', 'T073', 'T170')": "named thing",
+    "('T065', 'T078')": "activity",
+    "('T065', 'T080')": "activity",
     "('T065', 'T080', 'T185')": "activity",
+    "('T065', 'T081')": "activity",
+    "('T065', 'T089')": "activity",
+    "('T065', 'T090')": "named thing",
+    "('T065', 'T097')": "named thing",
+    "('T065', 'T098')": "named thing",
     "('T065', 'T109')": "chemical entity",
+    "('T065', 'T169')": "activity",
+    "('T065', 'T170')": "named thing",
     "('T065',)": "activity",
     "('T066', 'T073')": "activity",
+    "('T066', 'T073', 'T074')": "named thing",
+    "('T066', 'T073', 'T090')": "named thing",
+    "('T066', 'T073', 'T170')": "named thing",
+    "('T066', 'T081')": "activity",
+    "('T066', 'T091')": "activity",
     "('T066', 'T170')": "activity",
     "('T066',)": "activity",
+    "('T067', 'T068')": "phenomenon",
+    "('T067', 'T068', 'T070')": "phenomenon",
+    "('T067', 'T068', 'T078')": "phenomenon",
+    "('T067', 'T068', 'T078', 'T079', 'T081', 'T098')": "named thing",
+    "('T067', 'T069')": "phenomenon",
     "('T067', 'T070')": "phenomenon",
+    "('T067', 'T078')": "phenomenon",
+    "('T067', 'T079')": "phenomenon",
+    "('T067', 'T082')": "phenomenon",
     "('T067', 'T116', 'T121', 'T123')": "drug",
     "('T067',)": "phenomenon",
+    "('T068', 'T069')": "phenomenon",
+    "('T068', 'T070')": "phenomenon",
+    "('T068', 'T070', 'T073')": "named thing",
+    "('T068', 'T072', 'T170')": "named thing",
     "('T068', 'T073')": "phenomenon",
+    "('T068', 'T073', 'T093')": "named thing",
+    "('T068', 'T073', 'T098')": "named thing",
+    "('T068', 'T073', 'T170')": "named thing",
+    "('T068', 'T075')": "named thing",
+    "('T068', 'T078')": "phenomenon",
+    "('T068', 'T078', 'T079')": "phenomenon",
+    "('T068', 'T080')": "phenomenon",
+    "('T068', 'T081')": "phenomenon",
+    "('T068', 'T081', 'T102')": "phenomenon",
+    "('T068', 'T083')": "named thing",
+    "('T068', 'T090')": "named thing",
+    "('T068', 'T090', 'T096')": "named thing",
+    "('T068', 'T098')": "named thing",
+    "('T068', 'T167')": "named thing",
+    "('T068', 'T169')": "phenomenon",
+    "('T068', 'T170')": "named thing",
     "('T068',)": "phenomenon",
+    "('T069', 'T070')": "phenomenon",
+    "('T069', 'T080')": "phenomenon",
     "('T069',)": "phenomenon",
+    "('T070', 'T073')": "named thing",
+    "('T070', 'T073', 'T167')": "named thing",
     "('T070', 'T078')": "phenomenon",
+    "('T070', 'T080')": "phenomenon",
+    "('T070', 'T081', 'T082')": "phenomenon",
+    "('T070', 'T082')": "phenomenon",
+    "('T070', 'T082', 'T104')": "named thing",
     "('T070', 'T083')": "phenomenon",
+    "('T070', 'T091')": "phenomenon",
+    "('T070', 'T098')": "named thing",
+    "('T070', 'T104')": "named thing",
+    "('T070', 'T120')": "named thing",
+    "('T070', 'T129')": "phenomenon",
+    "('T070', 'T131')": "named thing",
     "('T070', 'T169', 'T170')": "named thing",
+    "('T070', 'T184')": "phenotypic feature",
     "('T070',)": "phenomenon",
+    "('T071', 'T073', 'T093', 'T122', 'T169')": "named thing",
     "('T071',)": "named thing",
+    "('T072', 'T131')": "chemical entity",
+    "('T072', 'T170')": "publication",
     "('T072',)": "physical entity",
     "('T073', 'T074')": "device",
     "('T073', 'T075')": "device",
@@ -1008,40 +1339,89 @@
     "('T073', 'T200')": "drug",
     "('T073', 'T201')": "physical entity",
     "('T073',)": "physical entity",
+    "('T074', 'T078')": "device",
+    "('T074', 'T081')": "device",
+    "('T074', 'T081', 'T201')": "device",
+    "('T074', 'T091')": "device",
+    "('T074', 'T098')": "named thing",
     "('T074', 'T109')": "device",
     "('T074', 'T109', 'T120')": "device",
     "('T074', 'T109', 'T121')": "drug",
+    "('T074', 'T109', 'T121', 'T125')": "drug",
     "('T074', 'T109', 'T121', 'T127')": "drug",
+    "('T074', 'T109', 'T121', 'T129')": "drug",
+    "('T074', 'T109', 'T121', 'T131')": "drug",
+    "('T074', 'T109', 'T121', 'T200')": "drug",
     "('T074', 'T109', 'T122')": "device",
     "('T074', 'T109', 'T130')": "device",
+    "('T074', 'T109', 'T131')": "chemical entity",
+    "('T074', 'T109', 'T168')": "food",
     "('T074', 'T109', 'T195')": "drug",
+    "('T074', 'T109', 'T200')": "drug",
     "('T074', 'T114', 'T121')": "drug",
+    "('T074', 'T116')": "polypeptide",
     "('T074', 'T116', 'T121')": "drug",
+    "('T074', 'T116', 'T121', 'T122')": "drug",
+    "('T074', 'T116', 'T121', 'T123')": "drug",
+    "('T074', 'T116', 'T121', 'T125')": "drug",
+    "('T074', 'T116', 'T121', 'T127')": "drug",
+    "('T074', 'T116', 'T121', 'T129')": "drug",
     "('T074', 'T116', 'T195')": "drug",
     "('T074', 'T121')": "drug",
     "('T074', 'T121', 'T123', 'T196')": "drug",
     "('T074', 'T121', 'T127')": "drug",
     "('T074', 'T121', 'T129')": "drug",
+    "('T074', 'T121', 'T130', 'T197')": "drug",
     "('T074', 'T121', 'T197')": "drug",
     "('T074', 'T122')": "device",
+    "('T074', 'T130')": "chemical entity",
     "('T074', 'T168')": "food",
+    "('T074', 'T170')": "named thing",
+    "('T074', 'T197')": "chemical entity",
     "('T074', 'T200')": "drug",
     "('T074', 'T203')": "device",
+    "('T074', 'T204')": "named thing",
     "('T074',)": "device",
     "('T075',)": "device",
     "('T077', 'T078')": "named thing",
+    "('T077', 'T090')": "individual organism",
     "('T077', 'T170')": "publication",
     "('T077',)": "named thing",
     "('T078', 'T079')": "named thing",
+    "('T078', 'T079', 'T081')": "named thing",
+    "('T078', 'T079', 'T169')": "named thing",
     "('T078', 'T079', 'T170')": "publication",
     "('T078', 'T080')": "named thing",
+    "('T078', 'T080', 'T081', 'T098', 'T102')": "population of individual organisms",
     "('T078', 'T080', 'T082', 'T099')": "cohort",
+    "('T078', 'T080', 'T089')": "named thing",
+    "('T078', 'T080', 'T090')": "individual organism",
+    "('T078', 'T080', 'T093', 'T169')": "agent",
     "('T078', 'T080', 'T170')": "publication",
     "('T078', 'T081')": "named thing",
+    "('T078', 'T081', 'T169')": "named thing",
+    "('T078', 'T081', 'T169', 'T170')": "publication",
+    "('T078', 'T081', 'T170')": "publication",
+    "('T078', 'T082')": "named thing",
+    "('T078', 'T082', 'T170')": "publication",
+    "('T078', 'T083')": "geographic location",
     "('T078', 'T089')": "named thing",
+    "('T078', 'T089', 'T170')": "publication",
+    "('T078', 'T090')": "individual organism",
+    "('T078', 'T090', 'T097')": "cohort",
     "('T078', 'T091')": "named thing",
     "('T078', 'T092')": "agent",
+    "('T078', 'T092', 'T098')": "named thing",
+    "('T078', 'T093')": "agent",
+    "('T078', 'T093', 'T169')": "agent",
+    "('T078', 'T095')": "agent",
+    "('T078', 'T096')": "agent",
+    "('T078', 'T097')": "cohort",
     "('T078', 'T098')": "population of individual organisms",
+    "('T078', 'T098', 'T121')": "named thing",
+    "('T078', 'T098', 'T121', 'T129')": "named thing",
+    "('T078', 'T102')": "named thing",
+    "('T078', 'T122')": "device",
     "('T078', 'T169')": "named thing",
     "('T078', 'T169', 'T170')": "publication",
     "('T078', 'T170')": "publication",
@@ -1066,11 +1446,19 @@
     "('T079', 'T170')": "publication",
     "('T079',)": "named thing",
     "('T080', 'T081')": "named thing",
+    "('T080', 'T081', 'T098')": "population of individual organisms",
     "('T080', 'T081', 'T169')": "named thing",
+    "('T080', 'T082')": "named thing",
     "('T080', 'T082', 'T169')": "named thing",
+    "('T080', 'T083', 'T093', 'T098')": "named thing",
     "('T080', 'T089')": "named thing",
+    "('T080', 'T098')": "population of individual organisms",
+    "('T080', 'T102')": "named thing",
+    "('T080', 'T121', 'T201')": "drug",
+    "('T080', 'T168')": "food",
     "('T080', 'T169')": "named thing",
     "('T080', 'T170')": "publication",
+    "('T080', 'T201')": "named thing",
     "('T080',)": "named thing",
     "('T081', 'T083')": "geographic location",
     "('T081', 'T085')": "named thing",
@@ -1097,41 +1485,86 @@
     "('T082', 'T190')": "disease",
     "('T082', 'T191')": "disease",
     "('T082',)": "named thing",
+    "('T083', 'T109', 'T130')": "named thing",
+    "('T083', 'T167')": "named thing",
+    "('T083', 'T167', 'T169')": "named thing",
     "('T083', 'T169')": "geographic location",
+    "('T083', 'T170')": "named thing",
+    "('T083', 'T204')": "named thing",
     "('T083',)": "geographic location",
+    "('T085', 'T123')": "chemical entity",
     "('T085',)": "named thing",
+    "('T086', 'T114')": "nucleic acid entity",
+    "('T086', 'T114', 'T123')": "nucleic acid entity",
     "('T086',)": "nucleic acid entity",
+    "('T087', 'T116', 'T123')": "polypeptide",
+    "('T087', 'T169')": "polypeptide",
     "('T087',)": "polypeptide",
     "('T088',)": "named thing",
+    "('T089', 'T092')": "agent",
     "('T089', 'T170')": "publication",
     "('T089',)": "named thing",
     "('T090', 'T091')": "individual organism",
+    "('T090', 'T097')": "cohort",
+    "('T090', 'T098')": "population of individual organisms",
     "('T090', 'T170')": "individual organism",
+    "('T090', 'T185')": "individual organism",
     "('T090',)": "individual organism",
     "('T091', 'T097')": "cohort",
+    "('T091', 'T109')": "chemical entity",
+    "('T091', 'T169')": "named thing",
+    "('T091', 'T170')": "publication",
+    "('T091', 'T191')": "disease",
     "('T091',)": "named thing",
+    "('T092', 'T093')": "agent",
+    "('T092', 'T094')": "agent",
     "('T092', 'T097', 'T170')": "agent",
+    "('T092', 'T099')": "named thing",
     "('T092', 'T170')": "named thing",
     "('T092',)": "agent",
+    "('T093', 'T097', 'T098', 'T170')": "named thing",
+    "('T093', 'T097', 'T170')": "named thing",
+    "('T093', 'T109', 'T121', 'T125')": "named thing",
     "('T093', 'T109', 'T123')": "agent",
     "('T093', 'T116', 'T123')": "polypeptide",
     "('T093', 'T121')": "drug",
+    "('T093', 'T169', 'T170')": "named thing",
+    "('T093', 'T170')": "named thing",
     "('T093',)": "agent",
     "('T094',)": "agent",
+    "('T095', 'T098')": "named thing",
     "('T095',)": "agent",
+    "('T096', 'T101')": "named thing",
+    "('T096', 'T170')": "named thing",
     "('T096',)": "agent",
+    "('T097', 'T098')": "cohort",
+    "('T097', 'T098', 'T099')": "cohort",
+    "('T097', 'T102')": "cohort",
     "('T097', 'T170')": "cohort",
     "('T097',)": "cohort",
+    "('T098', 'T099')": "cohort",
+    "('T098', 'T100')": "cohort",
+    "('T098', 'T101')": "cohort",
+    "('T098', 'T102')": "population of individual organisms",
     "('T098', 'T109', 'T121', 'T129')": "drug",
     "('T098', 'T116', 'T121', 'T129')": "drug",
     "('T098', 'T121', 'T129')": "named thing",
+    "('T098', 'T121', 'T129', 'T170')": "named thing",
     "('T098', 'T170')": "publication",
     "('T098',)": "population of individual organisms",
+    "('T099', 'T100')": "cohort",
     "('T099', 'T102')": "cohort",
+    "('T099', 'T170')": "named thing",
     "('T099',)": "cohort",
     "('T100',)": "cohort",
+    "('T101', 'T201')": "cohort",
     "('T101',)": "cohort",
+    "('T102', 'T122')": "device",
     "('T102',)": "named thing",
+    "('T103', 'T109')": "chemical entity",
+    "('T103', 'T109', 'T116', 'T121')": "drug",
+    "('T103', 'T120')": "chemical entity",
+    "('T103', 'T130')": "chemical entity",
     "('T103',)": "chemical entity",
     "('T104', 'T109')": "chemical entity",
     "('T104', 'T109', 'T116', 'T121', 'T123', 'T130')": "drug",
@@ -1459,6 +1892,11 @@
     "('T116', 'T200')": "drug",
     "('T116',)": "polypeptide",
     "('T120', 'T121')": "drug",
+    "('T120', 'T122')": "chemical entity",
+    "('T120', 'T130')": "chemical entity",
+    "('T120', 'T168')": "food",
+    "('T120', 'T197')": "chemical entity",
+    "('T120', 'T200')": "drug",
     "('T120',)": "chemical entity",
     "('T121', 'T122')": "drug",
     "('T121', 'T122', 'T127')": "drug",
@@ -1544,6 +1982,7 @@
     "('T125', 'T130')": "chemical entity",
     "('T125',)": "chemical entity",
     "('T126', 'T129')": "protein",
+    "('T126', 'T130')": "protein",
     "('T126',)": "protein",
     "('T127',)": "small molecule",
     "('T129', 'T130')": "chemical entity",
@@ -1567,28 +2006,36 @@
     "('T130', 'T200')": "drug",
     "('T130',)": "chemical entity",
     "('T131', 'T167')": "chemical entity",
+    "('T131', 'T167', 'T197')": "chemical entity",
     "('T131', 'T196')": "small molecule",
     "('T131', 'T196', 'T197')": "small molecule",
     "('T131', 'T197')": "chemical entity",
     "('T131', 'T197', 'T200')": "drug",
     "('T131',)": "chemical entity",
+    "('T167', 'T169')": "chemical entity",
     "('T167',)": "chemical entity",
+    "('T168', 'T196')": "food",
+    "('T168', 'T197')": "food",
     "('T168', 'T200')": "drug",
     "('T168',)": "food",
     "('T169', 'T170')": "publication",
     "('T169',)": "named thing",
+    "('T170', 'T171')": "publication",
     "('T170', 'T185')": "publication",
     "('T170',)": "publication",
     "('T171',)": "named thing",
     "('T184', 'T190')": "disease or phenotypic feature",
     "('T184',)": "phenotypic feature",
     "('T185',)": "named thing",
+    "('T190', 'T191')": "disease",
     "('T190',)": "disease",
     "('T191',)": "disease",
     "('T192',)": "protein",
     "('T194',)": "organism taxon",
+    "('T195', 'T200')": "drug",
     "('T195',)": "drug",
     "('T196', 'T197')": "small molecule",
+    "('T196', 'T200')": "small molecule",
     "('T196',)": "small molecule",
     "('T197', 'T200')": "drug",
     "('T197',)": "chemical entity",

From 93f08ff5fb238de2e3ea94f75d1c94b42b9d67aa Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 10:33:11 -0700
Subject: [PATCH 031/117] #316 clean up print statements for start/end

---
 umls_list_jsonl_to_kg_jsonl.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index c544fa97..6803ac1d 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -157,6 +157,7 @@ def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
 
     nodes_output.write(node)
 
+
 def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
     curie_prefix = kg2_util.CURIE_PREFIX_DRUGBANK
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
@@ -237,6 +238,7 @@ def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
 
 
 if __name__ == '__main__':
+    print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
     input_file_name = args.inputFile
     test_mode = args.test
@@ -260,7 +262,6 @@ def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     for item in iri_mappings_raw:
         for prefix in item:
             iri_mappings[prefix] = item[prefix]
-    print(json.dumps(iri_mappings, indent=4, sort_keys=True))
 
     for data in input_items:
         # There should only be one item in the data dictionary
@@ -286,4 +287,5 @@ def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
                 process_fma_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
-    kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
\ No newline at end of file
+    kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
+    print("Finishing umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())

From fba2af0a7405c1982dfb68960d9d3063a514e660 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 10:40:53 -0700
Subject: [PATCH 032/117] #316 swap order of source and id to make looking
 through a particular source easier

---
 umls_list_jsonl_to_kg_jsonl.py |  2 +-
 umls_mysql_to_list_jsonl.py    | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 6803ac1d..a27e71b9 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -43,7 +43,7 @@ def get_args():
 def extract_node_id(node_id_str):
     node_id_str = node_id_str.replace('(', '').replace(')', '').replace("'", '')
     node_id = node_id_str.split(',')
-    return node_id[1].strip(), node_id[0].strip()
+    return node_id[0].strip(), node_id[1].strip()
 
 
 def make_node_id(curie_prefix, node_id):
diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 88ff11a1..fc91b7ca 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -59,7 +59,7 @@ def code_sources(cursor, output):
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
         (node_id, node_source, cui, names) = result
-        key = (node_id, node_source)
+        key = (node_source, node_id)
         code_source_info[key] = dict()
         code_source_info[key][cui_key] = cui.split(',')
         if name_key not in code_source_info[key]:
@@ -78,7 +78,7 @@ def code_sources(cursor, output):
     cursor.execute(extra_info_sql_statement)
     for result in cursor.fetchall():
         (node_id, node_source, info) = result
-        key = (node_id, node_source)
+        key = (node_source, node_id)
         if key not in code_source_info:
             # This occurs if a node doesn't have a name.
             continue
@@ -98,7 +98,7 @@ def code_sources(cursor, output):
     cursor.execute(tuis_sql_statement)
     for result in cursor.fetchall():
         (node_id, node_source, tuis) = result
-        key = (node_id, node_source)
+        key = (node_source, node_id)
         if key not in code_source_info:
             # This occurs if a node doesn't have a name.
             continue
@@ -134,7 +134,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
         (node_id, names) = result
-        key = (node_id, umls_source_name)
+        key = (umls_source_name, node_id)
         cui_source_info[key] = dict()
         cui_source_info[key][name_key] = dict()
         for name in names.split('\t'):
@@ -157,7 +157,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(tuis_sql_statement)
     for result in cursor.fetchall():
         (node_id, tuis) = result
-        key = (node_id, umls_source_name)
+        key = (umls_source_name, node_id)
         if key not in cui_source_info:
             # This happens if a node doesn't have an English name. See https://github.com/RTXteam/RTX-KG2/issues/316#issuecomment-1672074392
             continue
@@ -168,7 +168,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(relations_sql_statement)
     for result in cursor.fetchall():
         (cui1, rel, rela, direction, cui2, source) = result
-        key = (cui1, umls_source_name)
+        key = (umls_source_name, cui1)
         if key not in cui_source_info:
             # See above for explanation
             continue
@@ -187,7 +187,7 @@ def cui_sources(cursor, output, sources):
     cursor.execute(definitions_sql_statement)
     for result in cursor.fetchall():
         (node_id, definition) = result
-        key = (node_id, umls_source_name)
+        key = (umls_source_name, node_id)
         if key not in cui_source_info:
             # See above for explanation
             continue

From 0d61e8369fb7c5d260e59a5aaa167938edff83a9 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 11:36:43 -0700
Subject: [PATCH 033/117] #316 factor out description creation

---
 umls_list_jsonl_to_kg_jsonl.py | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index a27e71b9..bfeb84f7 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -50,6 +50,14 @@ def make_node_id(curie_prefix, node_id):
     return curie_prefix + ':' + node_id
 
 
+def create_description(tuis):
+    description = str()
+    for tui in tuis:
+        description += "; UMLS Semantic Type: STY:" + tui
+    description = description.strip("; ")
+    return description    
+
+
 def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
     curie_prefix = kg2_util.CURIE_PREFIX_ATC
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
@@ -103,11 +111,7 @@ def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
             name = name[0]
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    description = str()
-    for tui in tuis:
-        description += "; UMLS Semantic Type: STY:" + tui
-    description.strip("; ")
-    node['description'] = description
+    node['description'] = create_description(tuis)
 
     nodes_output.write(node)
 
@@ -149,11 +153,7 @@ def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
 
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    description = str()
-    for tui in tuis:
-        description += "; UMLS Semantic Type: STY:" + tui
-    description.strip("; ")
-    node['description'] = description
+    node['description'] = create_description(tuis)
 
     nodes_output.write(node)
 
@@ -184,11 +184,7 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu
     # TODO: figure out update date
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    description = str()
-    for tui in tuis:
-        description += "; UMLS Semantic Type: STY:" + tui
-    description.strip("; ")
-    node['description'] = description
+    node['description'] = create_description(tuis)
     
     nodes_output.write(node)
 
@@ -228,11 +224,7 @@ def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
 
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    description = str()
-    for tui in tuis:
-        description += "; UMLS Semantic Type: STY:" + tui
-    description.strip("; ")
-    node['description'] = description
+    node['description'] = create_description(tuis)
 
     nodes_output.write(node)
 

From b2f04f25a4f5f8bdee5a91e3f410cca1330888b7 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 13:07:38 -0700
Subject: [PATCH 034/117] #316 go is working

---
 umls_list_jsonl_to_kg_jsonl.py | 157 +++++++++++++++------------------
 1 file changed, 73 insertions(+), 84 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index bfeb84f7..271e8855 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -50,14 +50,32 @@ def make_node_id(curie_prefix, node_id):
     return curie_prefix + ':' + node_id
 
 
-def create_description(tuis):
-    description = str()
+def create_description(comment, tuis):
+    description = comment
     for tui in tuis:
         description += "; UMLS Semantic Type: STY:" + tui
     description = description.strip("; ")
     return description    
 
 
+def get_name_synonyms(names_dict, accession_heirarchy):
+    names = list()
+    for key in accession_heirarchy:
+        names += [name for name in names_dict.get(key, dict()).get('Y', list())]
+        names += [name for name in names_dict.get(key, dict()).get('N', list())]
+    assert len(names) > 0
+    if len(names) == 1:
+        return names[0], list()
+    return names[0], names[1:]
+
+
+def get_name_keys(names_dict):
+    keys_list = []
+    for key in names_dict:
+        keys_list.append(key)
+    return str(sorted(keys_list))
+
+
 def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
     curie_prefix = kg2_util.CURIE_PREFIX_ATC
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
@@ -73,45 +91,11 @@ def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     name = str()
     synonyms = list()
     names = info.get(NAMES_KEY, dict())
-    if "RXN_PT" in names:
-        rxn_pt = names.get('RXN_PT', dict())
-        if 'Y' in rxn_pt:
-            name = rxn_pt.get('Y', '')
-            assert len(name) == 1
-            name = name[0]
-        else:
-            name = rxn_pt.get('N', '')
-            assert len(name) == 1
-            name = name[0]
-        synonyms = [syn for syn in names.get('PT', dict()).get('Y', list())]
-        synonyms += [syn for syn in names.get('PT', dict()).get('N', list())]
-        synonyms += [syn for syn in names.get('IN', dict()).get('Y', list())]
-        synonyms += [syn for syn in names.get('IN', dict()).get('N', list())]
-    elif "PT" in names:
-        pt = names.get('PT', dict())
-        if 'Y' in pt:
-            name = pt.get('Y', '')
-            assert len(name) == 1
-            name = name[0]
-        else:
-            name = pt.get('N', '')
-            assert len(name) == 1
-            name = name[0]
-        synonyms += [syn for syn in names.get('IN', dict()).get('Y', list())]
-        synonyms += [syn for syn in names.get('IN', dict()).get('N', list())]
-    else:
-        in_dict = names.get('IN', dict())
-        if 'Y' in in_dict:
-            name = in_dict.get('Y', '')
-            assert len(name) == 1
-            name = name[0]
-        else:
-            name = in_dict.get('N', '')
-            assert len(name) == 1
-            name = name[0]
+    name, synonyms = get_name_synonyms(names, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
+
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    node['description'] = create_description(tuis)
+    node['description'] = create_description("", tuis)
 
     nodes_output.write(node)
 
@@ -135,25 +119,11 @@ def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     name = str()
     synonyms = list()
     names = info.get(NAMES_KEY, dict())
-    pt = names.get('PT', dict())
-    synonyms += [syn for syn in names.get('SY', dict()).get('Y', list())]
-    synonyms += [syn for syn in names.get('SY', dict()).get('N', list())]
-    if 'Y' in pt:
-        name = pt.get('Y', '')
-        assert len(name) == 1, str(name) + ' ' + node_curie
-        name = name[0]
-    elif 'N' in pt:
-        name = pt.get('N', '')
-        assert len(name) == 1, str(name) + ' ' + node_curie
-        name = name[0]
-    else:
-        name = synonyms[0]
-        synonyms = synonyms[1:]
-        name = name[0]
+    name, synonyms = get_name_synonyms(names, ['PT', 'SY'])
 
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    node['description'] = create_description(tuis)
+    node['description'] = create_description("", tuis)
 
     nodes_output.write(node)
 
@@ -170,21 +140,13 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu
     fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
     secondary_accession_keys = info.get(INFO_KEY, dict()).get('SID', list())
 
-    name = info.get(NAMES_KEY, dict()).get('IN', dict()).get('N', list())
-    if len(name) == 0:
-        name = info.get(NAMES_KEY, dict()).get('IN', dict()).get('Y', list())
-    assert len(name) == 1, str(name) + " " + node_curie
-    name = name[0]
-    synonyms = list()
-    for syn_cat in info.get(NAMES_KEY, dict()).get('SY', dict()):
-        synonyms += info.get(NAMES_KEY, dict()).get('SY', dict())[syn_cat]
-    for syn_cat in info.get(NAMES_KEY, dict()).get('FSY', dict()):
-        synonyms += info.get(NAMES_KEY, dict()).get('FSY', dict())[syn_cat]
+    names = info.get(NAMES_KEY, dict())
+    name, synonyms = get_name_synonyms(names, ['IN', 'SY', 'FSY'])
 
     # TODO: figure out update date
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    node['description'] = create_description(tuis)
+    node['description'] = create_description("", tuis)
     
     nodes_output.write(node)
 
@@ -204,31 +166,53 @@ def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     name = str()
     synonyms = list()
     names = info.get(NAMES_KEY, dict())
-    pt = names.get('PT', dict())
-    synonyms += [syn for syn in names.get('SY', dict()).get('Y', list())]
-    synonyms += [syn for syn in names.get('SY', dict()).get('N', list())]
-    if 'Y' in pt:
-        name = pt.get('Y', '')
-        if len(name) > 1:
-            synonyms += name[1:]
-        name = name[0]
-    elif 'N' in pt:
-        name = pt.get('N', '')
-        if len(name) > 1:
-            synonyms += name[1:]
-        name = name[0]
-    else:
-        name = synonyms[0]
-        synonyms = synonyms[1:]
-        name = name[0]
+    name, synonyms = get_name_synonyms(names, ['PT', 'SY'])
 
     node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
     node['synonym'] = synonyms
-    node['description'] = create_description(tuis)
+    node['description'] = create_description("", tuis)
 
     nodes_output.write(node)
 
 
+def process_go_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
+    curie_prefix = kg2_util.CURIE_PREFIX_GO
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
+    node_id = node_id.replace('GO:', '')
+    iri = iri_mappings[curie_prefix] + node_id
+    node_curie = make_node_id(curie_prefix, node_id)
+    cuis = info.get(CUIS_KEY, list())
+    tuis = info.get(TUIS_KEY, list())
+    go_namespace = info.get(INFO_KEY, dict()).get('GO_NAMESPACE', list())
+    assert len(go_namespace) == 1
+    go_namespace = go_namespace[0]
+    namespace_category_map = {'molecular_function': kg2_util.BIOLINK_CATEGORY_MOLECULAR_ACTIVITY,
+                              'cellular_component': kg2_util.BIOLINK_CATEGORY_CELLULAR_COMPONENT,
+                              'biological_process': kg2_util.BIOLINK_CATEGORY_BIOLOGICAL_PROCESS}
+    category = namespace_category_map.get(go_namespace, tui_mappings[str(tuple(tuis))])
+    go_comment = info.get(INFO_KEY, dict()).get('GO_COMMENT', str())
+
+    # Currently not used, but extracting them in case we want them in the future
+    date_created = info.get(INFO_KEY, dict()).get('DATE_CREATED', list())
+    go_subset = info.get(INFO_KEY, dict()).get('GO_SUBSET', list())
+    gxr = info.get(INFO_KEY, dict()).get('GXR', list())
+    ref = info.get(INFO_KEY, dict()).get('REF', list())
+    sid = info.get(INFO_KEY, dict()).get('SID', list())
+
+    name = str()
+    synonyms = list()
+    names = info.get(NAMES_KEY, dict())
+    name, synonyms = get_name_synonyms(names, ['PT', 'MTH_PT', 'SY', 'MTH_SY', 'ET', 'MTH_ET'])
+
+    node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
+    node['synonym'] = synonyms
+    if len(go_comment) > 0:
+        go_comment = go_comment[0]
+        go_comment = "// COMMENTS: " + go_comment
+    node['description'] = create_description(go_comment, tuis)
+
+    nodes_output.write(node)
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
@@ -245,6 +229,7 @@ def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     input_items = input_read_jsonlines_info[0]
 
     tui_mappings = dict()
+    name_keys = set()
 
     with open('tui_combo_mappings.json') as mappings:
         tui_mappings = json.load(mappings)
@@ -278,6 +263,10 @@ def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
             if source == 'FMA':
                 process_fma_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
 
+            if source == 'GO':
+                process_go_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
+    # print(json.dumps(name_keys, indent=4, sort_keys=True, default=list))
     print("Finishing umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())

From b3369f2e01975e6decb47e0463d1dc2b83257b42 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 13:29:18 -0700
Subject: [PATCH 035/117] #316 lots of restructuring to streamline the code

---
 kg2_util.py                    |   2 +
 umls_list_jsonl_to_kg_jsonl.py | 111 ++++++++++++---------------------
 2 files changed, 42 insertions(+), 71 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index 0c04d2aa..d03950b1 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -57,6 +57,7 @@
 CURIE_PREFIX_CHEMBL_COMPOUND = 'CHEMBL.COMPOUND'
 CURIE_PREFIX_CHEMBL_MECHANISM = 'CHEMBL.MECHANISM'
 CURIE_PREFIX_CHEMBL_TARGET = 'CHEMBL.TARGET'
+CURIE_PREFIX_CHV = 'CHV'
 CURIE_PREFIX_CLINICALTRIALS = 'clinicaltrials'
 CURIE_PREFIX_DCTERMS = 'dcterms'
 CURIE_PREFIX_DGIDB = 'DGIdb'
@@ -65,6 +66,7 @@
 CURIE_PREFIX_DRUGCENTRAL = 'DrugCentral'
 CURIE_PREFIX_ENSEMBL = 'ENSEMBL'
 CURIE_PREFIX_ENSEMBL_GENOMES = 'EnsemblGenomes'
+CURIE_PREFIX_FMA = 'FMA'
 CURIE_PREFIX_GO = 'GO'
 CURIE_PREFIX_GTPI = 'GTPI'
 CURIE_PREFIX_GTPI_SOURCE = 'GTPI_source'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 271e8855..ace05061 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -76,37 +76,41 @@ def get_name_keys(names_dict):
     return str(sorted(keys_list))
 
 
-def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
+def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output):
+    node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
+    node['synonym'] = synonyms
+    node['description'] = description
+
+    nodes_output.write(node)
+
+
+def get_basic_info(curie_prefix, node_id, tui_mappings, iri_mappings, info):
     curie_prefix = kg2_util.CURIE_PREFIX_ATC
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
     iri = iri_mappings[curie_prefix] + node_id
     node_curie = make_node_id(curie_prefix, node_id)
     cuis = info.get(CUIS_KEY, list())
     tuis = info.get(TUIS_KEY, list())
+    category = tui_mappings[str(tuple(tuis))]
+
+    return node_curie, iri, provided_by, category, cuis, tuis
+
+
+def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
+    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_ATC, node_id, tui_mappings, iri_mappings, info)
 
     # Currently not used, but extracting them in case we want them in the future
     atc_level = info.get(INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
     is_drug_class = info.get(INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
 
-    name = str()
-    synonyms = list()
     names = info.get(NAMES_KEY, dict())
     name, synonyms = get_name_synonyms(names, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
 
-    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
-    node['synonym'] = synonyms
-    node['description'] = create_description("", tuis)
-
-    nodes_output.write(node)
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
 def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    curie_prefix = "CHV" # This should be replaced with a kg2_util prefix at some point
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
-    iri = iri_mappings[curie_prefix] + node_id
-    node_curie = make_node_id(curie_prefix, node_id)
-    cuis = info.get(CUIS_KEY, list())
-    tuis = info.get(TUIS_KEY, list())
+    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_CHV, node_id, tui_mappings, iri_mappings, info)
 
     # Currently not used, but extracting them in case we want them in the future
     combo_score = info.get(INFO_KEY, dict()).get('COMBO_SCORE', list())
@@ -116,25 +120,14 @@ def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     disparaged = info.get(INFO_KEY, dict()).get('DISPARAGED', list())
     frequency = info.get(INFO_KEY, dict()).get('FREQUENCY', list())
 
-    name = str()
-    synonyms = list()
     names = info.get(NAMES_KEY, dict())
     name, synonyms = get_name_synonyms(names, ['PT', 'SY'])
 
-    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
-    node['synonym'] = synonyms
-    node['description'] = create_description("", tuis)
-
-    nodes_output.write(node)
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
 def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    curie_prefix = kg2_util.CURIE_PREFIX_DRUGBANK
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
-    iri = iri_mappings[curie_prefix] + node_id
-    node_curie = make_node_id(curie_prefix, node_id)
-    cuis = info.get(CUIS_KEY, list())
-    tuis = info.get(TUIS_KEY, list())
+    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_DRUGBANK, node_id, tui_mappings, iri_mappings, info)
 
     # Currently not used, but extracting them in case we want them in the future
     fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
@@ -144,74 +137,50 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu
     name, synonyms = get_name_synonyms(names, ['IN', 'SY', 'FSY'])
 
     # TODO: figure out update date
-    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
-    node['synonym'] = synonyms
-    node['description'] = create_description("", tuis)
-    
-    nodes_output.write(node)
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
 def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    curie_prefix = "FMA" # This should be replaced with a kg2_util prefix at some point
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
-    iri = iri_mappings[curie_prefix] + node_id
-    node_curie = make_node_id(curie_prefix, node_id)
-    cuis = info.get(CUIS_KEY, list())
-    tuis = info.get(TUIS_KEY, list())
+    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_FMA, node_id, tui_mappings, iri_mappings, info)
 
     # Currently not used, but extracting them in case we want them in the future
     authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
     date_last_modified = info.get(INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
 
-    name = str()
-    synonyms = list()
     names = info.get(NAMES_KEY, dict())
     name, synonyms = get_name_synonyms(names, ['PT', 'SY'])
 
-    node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by)
-    node['synonym'] = synonyms
-    node['description'] = create_description("", tuis)
-
-    nodes_output.write(node)
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
 def process_go_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    curie_prefix = kg2_util.CURIE_PREFIX_GO
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
-    node_id = node_id.replace('GO:', '')
-    iri = iri_mappings[curie_prefix] + node_id
-    node_curie = make_node_id(curie_prefix, node_id)
-    cuis = info.get(CUIS_KEY, list())
-    tuis = info.get(TUIS_KEY, list())
-    go_namespace = info.get(INFO_KEY, dict()).get('GO_NAMESPACE', list())
+    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_GO, node_id, tui_mappings, iri_mappings, info)
+
+    # GO-specific information
+    attributes = info.get(INFO_KEY, dict())
+    go_namespace = attributes.get('GO_NAMESPACE', list())
     assert len(go_namespace) == 1
     go_namespace = go_namespace[0]
     namespace_category_map = {'molecular_function': kg2_util.BIOLINK_CATEGORY_MOLECULAR_ACTIVITY,
                               'cellular_component': kg2_util.BIOLINK_CATEGORY_CELLULAR_COMPONENT,
                               'biological_process': kg2_util.BIOLINK_CATEGORY_BIOLOGICAL_PROCESS}
-    category = namespace_category_map.get(go_namespace, tui_mappings[str(tuple(tuis))])
-    go_comment = info.get(INFO_KEY, dict()).get('GO_COMMENT', str())
+    category = namespace_category_map.get(go_namespace, category)
+    go_comment = attributes.get('GO_COMMENT', str())
+    if len(go_comment) > 0:
+        go_comment = go_comment[0]
+        go_comment = "// COMMENTS: " + go_comment
 
     # Currently not used, but extracting them in case we want them in the future
-    date_created = info.get(INFO_KEY, dict()).get('DATE_CREATED', list())
-    go_subset = info.get(INFO_KEY, dict()).get('GO_SUBSET', list())
-    gxr = info.get(INFO_KEY, dict()).get('GXR', list())
-    ref = info.get(INFO_KEY, dict()).get('REF', list())
-    sid = info.get(INFO_KEY, dict()).get('SID', list())
-
-    name = str()
-    synonyms = list()
+    date_created = attributes.get('DATE_CREATED', list())
+    go_subset = attributes.get('GO_SUBSET', list())
+    gxr = attributes.get('GXR', list())
+    ref = attributes.get('REF', list())
+    sid = attributes.get('SID', list())
+
     names = info.get(NAMES_KEY, dict())
     name, synonyms = get_name_synonyms(names, ['PT', 'MTH_PT', 'SY', 'MTH_SY', 'ET', 'MTH_ET'])
 
-    node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
-    node['synonym'] = synonyms
-    if len(go_comment) > 0:
-        go_comment = go_comment[0]
-        go_comment = "// COMMENTS: " + go_comment
-    node['description'] = create_description(go_comment, tuis)
-
-    nodes_output.write(node)
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description(go_comment, tuis), nodes_output)
 
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())

From 2f8e61b51923211f5a743bbdfff5c286ae7cdc78 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 14:06:16 -0700
Subject: [PATCH 036/117] #316 correcting a lot of typos, factoring more out,
 and HCPCS

---
 kg2_util.py                    |  1 +
 umls_list_jsonl_to_kg_jsonl.py | 97 ++++++++++++++++++++--------------
 2 files changed, 58 insertions(+), 40 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index d03950b1..d7750974 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -70,6 +70,7 @@
 CURIE_PREFIX_GO = 'GO'
 CURIE_PREFIX_GTPI = 'GTPI'
 CURIE_PREFIX_GTPI_SOURCE = 'GTPI_source'
+CURIE_PREFIX_HCPCS = 'HCPCS'
 CURIE_PREFIX_HGNC = 'HGNC'
 CURIE_PREFIX_HMDB = 'HMDB'
 CURIE_PREFIX_IAO = 'IAO'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index ace05061..22c2f676 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -28,6 +28,16 @@
 NAMES_KEY = 'names'
 TUIS_KEY = 'tuis'
 
+TUI_MAPPINGS = dict()
+IRI_MAPPINGS = dict()
+
+ATC_PREFIX = kg2_util.CURIE_PREFIX_ATC
+CHV_PREFIX = kg2_util.CURIE_PREFIX_CHV
+DRUGBANK_PREFIX = kg2_util.CURIE_PREFIX_DRUGBANK
+FMA_PREFIX = kg2_util.CURIE_PREFIX_FMA
+GO_PREFIX = kg2_util.CURIE_PREFIX_GO
+HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
+
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
 
@@ -76,6 +86,13 @@ def get_name_keys(names_dict):
     return str(sorted(keys_list))
 
 
+def get_attribute_keys(attributes_dict):
+    keys_list = []
+    for key in attributes_dict:
+        keys_list.append(key)
+    return str(sorted(keys_list))
+
+
 def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output):
     node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
     node['synonym'] = synonyms
@@ -84,33 +101,32 @@ def make_umls_node(node_curie, iri, name, category, update_date, provided_by, sy
     nodes_output.write(node)
 
 
-def get_basic_info(curie_prefix, node_id, tui_mappings, iri_mappings, info):
-    curie_prefix = kg2_util.CURIE_PREFIX_ATC
+def get_basic_info(curie_prefix, node_id, info, accession_heirarchy):
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
-    iri = iri_mappings[curie_prefix] + node_id
+    iri = IRI_MAPPINGS[curie_prefix] + node_id
     node_curie = make_node_id(curie_prefix, node_id)
     cuis = info.get(CUIS_KEY, list())
     tuis = info.get(TUIS_KEY, list())
-    category = tui_mappings[str(tuple(tuis))]
+    category = TUI_MAPPINGS[str(tuple(tuis))]
 
-    return node_curie, iri, provided_by, category, cuis, tuis
+    names = info.get(NAMES_KEY, dict())
+    name, synonyms = get_name_synonyms(names, accession_heirarchy)
+
+    return node_curie, iri, name, provided_by, category, synonyms, cuis, tuis
 
 
-def process_atc_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_ATC, node_id, tui_mappings, iri_mappings, info)
+def process_atc_item(node_id, info, nodes_output, edges_output):
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(ATC_PREFIX, node_id, info, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
 
     # Currently not used, but extracting them in case we want them in the future
     atc_level = info.get(INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
     is_drug_class = info.get(INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
 
-    names = info.get(NAMES_KEY, dict())
-    name, synonyms = get_name_synonyms(names, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
-
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_CHV, node_id, tui_mappings, iri_mappings, info)
+def process_chv_item(node_id, info, nodes_output, edges_output):
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(CHV_PREFIX, node_id, info, ['PT', 'SY'])
 
     # Currently not used, but extracting them in case we want them in the future
     combo_score = info.get(INFO_KEY, dict()).get('COMBO_SCORE', list())
@@ -120,41 +136,33 @@ def process_chv_item(node_id, info, tui_mappings, iri_mappings, nodes_output, ed
     disparaged = info.get(INFO_KEY, dict()).get('DISPARAGED', list())
     frequency = info.get(INFO_KEY, dict()).get('FREQUENCY', list())
 
-    names = info.get(NAMES_KEY, dict())
-    name, synonyms = get_name_synonyms(names, ['PT', 'SY'])
-
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_DRUGBANK, node_id, tui_mappings, iri_mappings, info)
+def process_drugbank_item(node_id, info, nodes_output, edges_output):
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(DRUGBANK_PREFIX, node_id, info, ['IN', 'SY', 'FSY'])
 
     # Currently not used, but extracting them in case we want them in the future
     fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
     secondary_accession_keys = info.get(INFO_KEY, dict()).get('SID', list())
 
-    names = info.get(NAMES_KEY, dict())
-    name, synonyms = get_name_synonyms(names, ['IN', 'SY', 'FSY'])
-
     # TODO: figure out update date
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_FMA, node_id, tui_mappings, iri_mappings, info)
+def process_fma_item(node_id, info, nodes_output, edges_output):
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(FMA_PREFIX, node_id, info, ['PT', 'SY'])
 
     # Currently not used, but extracting them in case we want them in the future
     authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
     date_last_modified = info.get(INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
 
-    names = info.get(NAMES_KEY, dict())
-    name, synonyms = get_name_synonyms(names, ['PT', 'SY'])
-
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_go_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output):
-    node_curie, iri, provided_by, category, cuis, tuis = get_basic_info(kg2_util.CURIE_PREFIX_GO, node_id, tui_mappings, iri_mappings, info)
+def process_go_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'MTH_PT', 'SY', 'MTH_SY', 'ET', 'MTH_ET']
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(GO_PREFIX, node_id.replace('GO:', ''), info, accession_heirarchy)
 
     # GO-specific information
     attributes = info.get(INFO_KEY, dict())
@@ -177,11 +185,19 @@ def process_go_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edg
     ref = attributes.get('REF', list())
     sid = attributes.get('SID', list())
 
-    names = info.get(NAMES_KEY, dict())
-    name, synonyms = get_name_synonyms(names, ['PT', 'MTH_PT', 'SY', 'MTH_SY', 'ET', 'MTH_ET'])
-
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description(go_comment, tuis), nodes_output)
 
+
+def process_hcpcs_item(node_id, info, nodes_output, edges_output):
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HCPCS_PREFIX, node_id, info, ['PT', 'MTH_HT', 'MP'])
+
+    # Currently not used, but extracting them in case we want them in the future
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+    return get_attribute_keys(info.get(INFO_KEY, dict()))
+
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
@@ -197,17 +213,15 @@ def process_go_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edg
     input_read_jsonlines_info = kg2_util.start_read_jsonlines(input_file_name)
     input_items = input_read_jsonlines_info[0]
 
-    tui_mappings = dict()
     name_keys = set()
 
     with open('tui_combo_mappings.json') as mappings:
-        tui_mappings = json.load(mappings)
+        TUI_MAPPINGS = json.load(mappings)
 
-    iri_mappings = dict()
     iri_mappings_raw = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('curies-to-urls-map.yaml'))['use_for_bidirectional_mapping']
     for item in iri_mappings_raw:
         for prefix in item:
-            iri_mappings[prefix] = item[prefix]
+            IRI_MAPPINGS[prefix] = item[prefix]
 
     for data in input_items:
         # There should only be one item in the data dictionary
@@ -221,21 +235,24 @@ def process_go_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edg
 
             # Process the data specifically by source
             if source == 'ATC':
-                process_atc_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+                process_atc_item(node_id, value, nodes_output, edges_output)
 
             if source == 'CHV':
-                process_chv_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+                process_chv_item(node_id, value, nodes_output, edges_output)
 
             if source == 'DRUGBANK':
-                process_drugbank_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+                process_drugbank_item(node_id, value, nodes_output, edges_output)
 
             if source == 'FMA':
-                process_fma_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+                process_fma_item(node_id, value, nodes_output, edges_output)
 
             if source == 'GO':
-                process_go_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output)
+                process_go_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'HCPCS':
+                name_keys.add(process_hcpcs_item(node_id, value, nodes_output, edges_output))
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
-    # print(json.dumps(name_keys, indent=4, sort_keys=True, default=list))
+    print(json.dumps(name_keys, indent=4, sort_keys=True, default=list))
     print("Finishing umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())

From c066784ee9bfb780e47cb221f672ef5996b0b815 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 15:15:13 -0700
Subject: [PATCH 037/117] #316 HGNC

---
 umls_list_jsonl_to_kg_jsonl.py | 74 +++++++++++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 6 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 22c2f676..bffdfd4e 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -90,13 +90,14 @@ def get_attribute_keys(attributes_dict):
     keys_list = []
     for key in attributes_dict:
         keys_list.append(key)
-    return str(sorted(keys_list))
+    return set(keys_list)
 
 
-def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output):
+def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output, full_name=None):
     node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
     node['synonym'] = synonyms
     node['description'] = description
+    node['full_name'] = full_name
 
     nodes_output.write(node)
 
@@ -191,11 +192,68 @@ def process_go_item(node_id, info, nodes_output, edges_output):
 def process_hcpcs_item(node_id, info, nodes_output, edges_output):
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HCPCS_PREFIX, node_id, info, ['PT', 'MTH_HT', 'MP'])
 
-    # Currently not used, but extracting them in case we want them in the future
+    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+    attributes = info.get(INFO_KEY, dict())
+    had = attributes.get('HAD', list()) # HCPCS Action Effective Date - effective date of action to a procedure or modifier code.
+    hcc = attributes.get('HCC', list()) # HCPCS Coverage Code - code denoting Medicare coverage status. There are two subelements separated by "=".
+    hts = attributes.get('HTS', list()) # HCPCS Type of Service Code - carrier assigned HCFA Type of Service which describes the particular kind(s) of service represented by the procedure code.
+    hcd = attributes.get('HCD', list()) # HCPCS Code Added Date - year the HCPCS code was added to the HCFA Common Procedure Coding System.
+    hpn = attributes.get('HPN', list()) # HCPCS processing note number identifying the processing note contained in Appendix A of the HCPCS Manual.
+    haq = attributes.get('HAQ', list()) # HCPCS Anesthesia Base Unit Quantity - base unit represents the level of intensity for anesthesia procedure services that reflects all activities except time.
+    hlc = attributes.get('HLC', list()) # HCPCS Lab Certification Code - code used to classify laboratory procedures according to the specialty certification categories listed by CMS(formerly HCFA).
+    hsn = attributes.get('HSN', list()) # HCPCS Statute Number identifying statute reference for coverage or noncoverage of procedure or service.
+    hpd = attributes.get('HPD', list()) # HCPCS ASC payment group effective date - date the procedure is assigned to the ASC payment group.
+    hpg = attributes.get('HPG', list()) # HCPCS ASC payment group code which represents the dollar amount of the facility charge payable by Medicare for the procedure.
+    hmg = attributes.get('HMR', list()) # HCPCS Medicare Carriers Manual reference section number - number identifying a section of the Medicare Carriers Manual.
+    hir = attributes.get('HIR', list()) # HCPCS Coverage Issues Manual Reference Section Number - number identifying the Reference Section of the Coverage Issues Manual.
+    hxr = attributes.get('HXR', list()) # HCPCS Cross reference code - an explicit reference crosswalking a deleted code or a code that is not valid for Medicare to a valid current code (or range of codes).
+    hmp = attributes.get('HMP', list()) # HCPCS Multiple Pricing Indicator Code - code used to identify instances where a procedure could be priced.
+    hpi = attributes.get('HPI', list()) # HCPCS Pricing Indicator Code - used to identify the appropriate methodology for developing unique pricing amounts under Part B.
+    hac = attributes.get('HAC', list()) # HCPCS action code - code denoting the change made to a procedure or modifier code within the HCPCS system.
+    hbt = attributes.get('HBT', list()) # HCPCS Berenson-Eggers Type of Service Code - BETOS for the procedure code based on generally agreed upon clinically meaningful groupings of procedures and services.
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
-    return get_attribute_keys(info.get(INFO_KEY, dict()))
+
+def process_hgnc_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['ACR', 'PT', 'MTH_ACR', 'NA', 'NP', 'NS', 'SYN']
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HCPCS_PREFIX, node_id.replace('HGNC:', ''), info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+    attributes = info.get(INFO_KEY, dict())
+    mgd_id = attributes.get('MGD_ID', list())
+    vega_id = attributes.get('VEGA_ID', list())
+    genecc = attributes.get('GENCC', list())
+    swp = attributes.get('SWP', list())
+    mane_select = attributes.get('MANE_SELECT', list())
+    local_specific_db_xr = attributes.get('LOCUS_SPECIFIC_DB_XR', list())
+    locus_type = attributes.get('LOCUS_TYPE', list())
+    agr = attributes.get('AGR', list())
+    cytogenetic_location = attributes.get('CYTOGENETIC_LOCATION', list())
+    date_created = attributes.get('DATE_CREATED', list())
+    ensemblgene_id = attributes.get('ENSEMBLGENE_ID', list())
+    db_xr_id = attributes.get('DB_XR_ID', list())
+    locus_group = attributes.get('LOCUS_GROUP', list())
+    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
+    date_name_changed = attributes.get('DATE_NAME_CHANGED', list())
+    pmid = attributes.get('PMID', list())
+    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
+    mapped_ucsc_id = attributes.get('MAPPED_UCSC_ID', list())
+    refseq_id = attributes.get('REFSEQ_ID', list())
+    ena = attributes.get('ENA', list())
+    rgd_id = attributes.get('RGD_ID', list())
+    date_symbol_changed = attributes.get('DATE_SYMBOL_CHANGED', list())
+    omim_id = attributes.get('OMIM_ID', list())
+    gene_fam_id = attributes.get('GENE_FAM_ID', list())
+    gene_symbol = attributes.get('GENESYMBOL', list())
+    ez = attributes.get('EZ', list())
+    ccds_id = attributes.get('CCDS_ID', list())
+    lncipedia = attributes.get('LNCIPEDIA', list())
+    gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output, full_name)
+
+    return get_attribute_keys(attributes)
 
 
 if __name__ == '__main__':
@@ -214,6 +272,7 @@ def process_hcpcs_item(node_id, info, nodes_output, edges_output):
     input_items = input_read_jsonlines_info[0]
 
     name_keys = set()
+    attribute_keys = set()
 
     with open('tui_combo_mappings.json') as mappings:
         TUI_MAPPINGS = json.load(mappings)
@@ -250,9 +309,12 @@ def process_hcpcs_item(node_id, info, nodes_output, edges_output):
                 process_go_item(node_id, value, nodes_output, edges_output)
 
             if source == 'HCPCS':
-                name_keys.add(process_hcpcs_item(node_id, value, nodes_output, edges_output))
+                process_hcpcs_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'HGNC':
+                attribute_keys.update(process_hgnc_item(node_id, value, nodes_output, edges_output))
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
-    print(json.dumps(name_keys, indent=4, sort_keys=True, default=list))
+    print(json.dumps(attribute_keys, indent=4, sort_keys=True, default=list))
     print("Finishing umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())

From 15984e9bf5dbef64eb15e8bc2fe68dfbd2c9ef07 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 15:15:57 -0700
Subject: [PATCH 038/117] #316 don't actually need full name for HGNC, that's
 just because it merges with pr.owl

---
 umls_list_jsonl_to_kg_jsonl.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index bffdfd4e..a1eb1b88 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -93,11 +93,10 @@ def get_attribute_keys(attributes_dict):
     return set(keys_list)
 
 
-def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output, full_name=None):
+def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output):
     node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
     node['synonym'] = synonyms
     node['description'] = description
-    node['full_name'] = full_name
 
     nodes_output.write(node)
 
@@ -251,7 +250,7 @@ def process_hgnc_item(node_id, info, nodes_output, edges_output):
     lncipedia = attributes.get('LNCIPEDIA', list())
     gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
 
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output, full_name)
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
     return get_attribute_keys(attributes)
 

From 5b5bbaea94b307ada37d1b354dd4a7ce5e503b6d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 15:19:40 -0700
Subject: [PATCH 039/117] #316 cleaning up (we don't actually want the return
 statement)

---
 umls_list_jsonl_to_kg_jsonl.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index a1eb1b88..c89c0aeb 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -252,8 +252,6 @@ def process_hgnc_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
-    return get_attribute_keys(attributes)
-
 
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
@@ -311,9 +309,9 @@ def process_hgnc_item(node_id, info, nodes_output, edges_output):
                 process_hcpcs_item(node_id, value, nodes_output, edges_output)
 
             if source == 'HGNC':
-                attribute_keys.update(process_hgnc_item(node_id, value, nodes_output, edges_output))
+                process_hgnc_item(node_id, value, nodes_output, edges_output)
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
-    print(json.dumps(attribute_keys, indent=4, sort_keys=True, default=list))
+    # print(json.dumps(attribute_keys, indent=4, sort_keys=True, default=list))
     print("Finishing umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())

From 0d4d89b6e70c9ab97f12a3d42080232bd4aebd09 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 15:22:32 -0700
Subject: [PATCH 040/117] #316 correct prefix for HGNC

---
 umls_list_jsonl_to_kg_jsonl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index c89c0aeb..51c1cab9 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -37,6 +37,7 @@
 FMA_PREFIX = kg2_util.CURIE_PREFIX_FMA
 GO_PREFIX = kg2_util.CURIE_PREFIX_GO
 HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
+HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -216,7 +217,7 @@ def process_hcpcs_item(node_id, info, nodes_output, edges_output):
 
 def process_hgnc_item(node_id, info, nodes_output, edges_output):
     accession_heirarchy = ['ACR', 'PT', 'MTH_ACR', 'NA', 'NP', 'NS', 'SYN']
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HCPCS_PREFIX, node_id.replace('HGNC:', ''), info, accession_heirarchy)
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HGNC_PREFIX, node_id.replace('HGNC:', ''), info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
     attributes = info.get(INFO_KEY, dict())

From fd5495b05a0c7bf4d99a27e7d04641dc6b239240 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 18 Aug 2023 15:26:19 -0700
Subject: [PATCH 041/117] #316 correct the order of name priority for HGNC

---
 umls_list_jsonl_to_kg_jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 51c1cab9..987f607b 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -216,7 +216,7 @@ def process_hcpcs_item(node_id, info, nodes_output, edges_output):
 
 
 def process_hgnc_item(node_id, info, nodes_output, edges_output):
-    accession_heirarchy = ['ACR', 'PT', 'MTH_ACR', 'NA', 'NP', 'NS', 'SYN']
+    accession_heirarchy = ['PT', 'ACR', 'MTH_ACR', 'NA', 'NP', 'NS', 'SYN']
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HGNC_PREFIX, node_id.replace('HGNC:', ''), info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html

From bc698ee5fcd62c3876283a372a5f2e18ed54ae24 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 10:41:14 -0700
Subject: [PATCH 042/117] #316 HL7, updated some of the accession hierarchies
 based on webpage

---
 kg2_util.py                    |  1 +
 umls_list_jsonl_to_kg_jsonl.py | 60 +++++++++++++++++++++++++++++++---
 2 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index d7750974..ba99bc70 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -72,6 +72,7 @@
 CURIE_PREFIX_GTPI_SOURCE = 'GTPI_source'
 CURIE_PREFIX_HCPCS = 'HCPCS'
 CURIE_PREFIX_HGNC = 'HGNC'
+CURIE_PREFIX_HL7 = 'HL7'
 CURIE_PREFIX_HMDB = 'HMDB'
 CURIE_PREFIX_IAO = 'IAO'
 CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY = 'identifiers_org_registry'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 987f607b..a6b51336 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -20,7 +20,7 @@
 
 
 DESIRED_CODES = ['ATC', 'CHV', 'DRUGBANK', 'FMA', 'GO', 'HCPCS', 'HGNC', 'HL7V3.0',
-                 'HL7', 'HPO', 'ICD10PCS', 'ICD9CM', 'MED-RT', 'MEDLINEPLUS', 'MSH',
+                 'HPO', 'ICD10PCS', 'ICD9CM', 'MED-RT', 'MEDLINEPLUS', 'MSH',
                  'MTH', 'NCBI', 'NCBITAXON', 'NCI', 'NDDF', 'NDFRT', 'OMIM', 'PDQ',
                  'PSY', 'RXNORM', 'VANDF']
 CUIS_KEY = 'cuis'
@@ -38,6 +38,7 @@
 GO_PREFIX = kg2_util.CURIE_PREFIX_GO
 HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
 HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
+HL7_PREFIX = kg2_util.CURIE_PREFIX_HL7
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -162,7 +163,7 @@ def process_fma_item(node_id, info, nodes_output, edges_output):
 
 
 def process_go_item(node_id, info, nodes_output, edges_output):
-    accession_heirarchy = ['PT', 'MTH_PT', 'SY', 'MTH_SY', 'ET', 'MTH_ET']
+    accession_heirarchy = ['PT', 'MTH_PT', 'ET', 'MTH_ET', 'SY', 'MTH_SY']
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(GO_PREFIX, node_id.replace('GO:', ''), info, accession_heirarchy)
 
     # GO-specific information
@@ -216,7 +217,7 @@ def process_hcpcs_item(node_id, info, nodes_output, edges_output):
 
 
 def process_hgnc_item(node_id, info, nodes_output, edges_output):
-    accession_heirarchy = ['PT', 'ACR', 'MTH_ACR', 'NA', 'NP', 'NS', 'SYN']
+    accession_heirarchy = ['PT', 'ACR', 'MTH_ACR', 'NA', 'SYN', 'NP', 'NS']
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HGNC_PREFIX, node_id.replace('HGNC:', ''), info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
@@ -254,6 +255,49 @@ def process_hgnc_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_hl7_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['CSY', 'PT', 'CDO', 'VS', 'BR', 'CPR', 'CR', 'NPT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HL7_PREFIX, node_id, info, accession_heirarchy)
+
+    hl7at = attributes.get('HL7AT', list())
+    hl7ii = attributes.get('HL7II', list())
+    hl7im = attributes.get('HL7IM', list())
+    hl7lt = attributes.get('HL7LT', list())
+    hl7un = attributes.get('HL7UN', list())
+    hl7oa = attributes.get('HL7OA', list())
+    hl7scs = attributes.get('HL7SCS', list())
+    hl7cc = attributes.get('HL7CC', list())
+    hl7na = attributes.get('HL7NA', list())
+    hl7in = attributes.get('HL7IN', list())
+    hl7ap = attributes.get('HL7AP', list())
+    hl7mi = attributes.get('HL7MI', list())
+    hl7hi = attributes.get('HL7HI', list())
+    hl7ir = attributes.get('HL7IR', list())
+    hl7ai = attributes.get('HL7AI', list())
+    hl7ha = attributes.get('HL7HA', list())
+    hl7rf = attributes.get('HL7RF', list())
+    hl7rd = attributes.get('HL7RD', list())
+    hl7vd = attributes.get('HL7VD', list())
+    hl7dc = attributes.get('HL7DC', list())
+    hl7rk = attributes.get('HL7RK', list())
+    hl7is = attributes.get('HL7IS', list())
+    hl7sy = attributes.get('HL7SY', list())
+    hl7cd = attributes.get('HL7CD', list())
+    hl7sl = attributes.get('HL7SL', list())
+    hl7pl = attributes.get('HL7PL', list())
+    hl7vc = attributes.get('HL7VC', list())
+    hl7ty = attributes.get('HL7TY', list())
+    hl7rg = attributes.get('HL7RG', list())
+    hl7csc = attributes.get('HL7CSC', list())
+    hl7od = attributes.get('HL7OD', list())
+    hl7id = attributes.get('HL7ID', list())
+    hl7tr = attributes.get('HL7TR', list())
+    hl7di = attributes.get('HL7DI', list())
+    hl7cs = attributes.get('HL7CS', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
@@ -312,7 +356,15 @@ def process_hgnc_item(node_id, info, nodes_output, edges_output):
             if source == 'HGNC':
                 process_hgnc_item(node_id, value, nodes_output, edges_output)
 
+            if source == 'HL7V3.0':
+                process_hl7_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'HPO':
+                name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
+                attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
+
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
-    # print(json.dumps(attribute_keys, indent=4, sort_keys=True, default=list))
+    print(json.dumps(name_keys, indent=4, sort_keys=True, default=list))
+    print(json.dumps(attribute_keys, indent=4, sort_keys=True, default=list))
     print("Finishing umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())

From 86a1e160e0b684d70b71ab6ac7314e079b087c05 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 10:44:28 -0700
Subject: [PATCH 043/117] #316 update more accession hierarchies based on
 webpage

---
 umls_list_jsonl_to_kg_jsonl.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index a6b51336..2dc72e4e 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -153,7 +153,7 @@ def process_drugbank_item(node_id, info, nodes_output, edges_output):
 
 
 def process_fma_item(node_id, info, nodes_output, edges_output):
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(FMA_PREFIX, node_id, info, ['PT', 'SY'])
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(FMA_PREFIX, node_id, info, ['PT', 'SY', 'AB', 'OP', 'IS'])
 
     # Currently not used, but extracting them in case we want them in the future
     authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
@@ -163,7 +163,7 @@ def process_fma_item(node_id, info, nodes_output, edges_output):
 
 
 def process_go_item(node_id, info, nodes_output, edges_output):
-    accession_heirarchy = ['PT', 'MTH_PT', 'ET', 'MTH_ET', 'SY', 'MTH_SY']
+    accession_heirarchy = ['PT', 'MTH_PT', 'ET', 'MTH_ET', 'SY', 'MTH_SY', 'OP', 'MTH_OP', 'OET', 'MTH_OET', 'IS', 'MTH_IS']
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(GO_PREFIX, node_id.replace('GO:', ''), info, accession_heirarchy)
 
     # GO-specific information
@@ -191,7 +191,7 @@ def process_go_item(node_id, info, nodes_output, edges_output):
 
 
 def process_hcpcs_item(node_id, info, nodes_output, edges_output):
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HCPCS_PREFIX, node_id, info, ['PT', 'MTH_HT', 'MP'])
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HCPCS_PREFIX, node_id, info, ['PT', 'MP', 'MTH_HT'])
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
     attributes = info.get(INFO_KEY, dict())

From 2cfa1485a09b731e873e2990dc6112ac10bf9aad Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 11:33:34 -0700
Subject: [PATCH 044/117] #316 attempted work around for HL7 (a CUI source)

---
 kg2_util.py                    |  1 -
 umls_list_jsonl_to_kg_jsonl.py | 14 +++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index ba99bc70..d7750974 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -72,7 +72,6 @@
 CURIE_PREFIX_GTPI_SOURCE = 'GTPI_source'
 CURIE_PREFIX_HCPCS = 'HCPCS'
 CURIE_PREFIX_HGNC = 'HGNC'
-CURIE_PREFIX_HL7 = 'HL7'
 CURIE_PREFIX_HMDB = 'HMDB'
 CURIE_PREFIX_IAO = 'IAO'
 CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY = 'identifiers_org_registry'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 2dc72e4e..6a8a508d 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -38,7 +38,7 @@
 GO_PREFIX = kg2_util.CURIE_PREFIX_GO
 HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
 HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
-HL7_PREFIX = kg2_util.CURIE_PREFIX_HL7
+HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -105,10 +105,14 @@ def make_umls_node(node_curie, iri, name, category, update_date, provided_by, sy
 
 def get_basic_info(curie_prefix, node_id, info, accession_heirarchy):
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
-    iri = IRI_MAPPINGS[curie_prefix] + node_id
-    node_curie = make_node_id(curie_prefix, node_id)
     cuis = info.get(CUIS_KEY, list())
     tuis = info.get(TUIS_KEY, list())
+    iri = IRI_MAPPINGS[curie_prefix] + node_id
+    if curie_prefix == kg2_util.UMLS_SOURCE_PREFIX:
+        if len(cuis) != 1:
+            return None, None, None, None, None, None, None, None
+        node_id = cuis[0]
+    node_curie = make_node_id(curie_prefix, node_id)
     category = TUI_MAPPINGS[str(tuple(tuis))]
 
     names = info.get(NAMES_KEY, dict())
@@ -258,7 +262,11 @@ def process_hgnc_item(node_id, info, nodes_output, edges_output):
 def process_hl7_item(node_id, info, nodes_output, edges_output):
     accession_heirarchy = ['CSY', 'PT', 'CDO', 'VS', 'BR', 'CPR', 'CR', 'NPT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HL7_PREFIX, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'HL7')
 
+    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
     hl7at = attributes.get('HL7AT', list())
     hl7ii = attributes.get('HL7II', list())
     hl7im = attributes.get('HL7IM', list())

From ae432ac481bef8dafd3cce7d4dc8ac9302408065 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 12:06:21 -0700
Subject: [PATCH 045/117] #316 improving HL7 to actually work and adding in HPO

---
 kg2_util.py                    |  1 +
 umls_list_jsonl_to_kg_jsonl.py | 24 ++++++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index d7750974..e5f3d50d 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -73,6 +73,7 @@
 CURIE_PREFIX_HCPCS = 'HCPCS'
 CURIE_PREFIX_HGNC = 'HGNC'
 CURIE_PREFIX_HMDB = 'HMDB'
+CURIE_PREFIX_HP = 'HP'
 CURIE_PREFIX_IAO = 'IAO'
 CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY = 'identifiers_org_registry'
 CURIE_PREFIX_ISBN = 'ISBN'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 6a8a508d..b05eb1b5 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -39,6 +39,7 @@
 HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
 HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
 HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+HPO_PREFIX = kg2_util.CURIE_PREFIX_HP
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -107,12 +108,12 @@ def get_basic_info(curie_prefix, node_id, info, accession_heirarchy):
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
     cuis = info.get(CUIS_KEY, list())
     tuis = info.get(TUIS_KEY, list())
-    iri = IRI_MAPPINGS[curie_prefix] + node_id
-    if curie_prefix == kg2_util.UMLS_SOURCE_PREFIX:
+    if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
         if len(cuis) != 1:
             return None, None, None, None, None, None, None, None
         node_id = cuis[0]
     node_curie = make_node_id(curie_prefix, node_id)
+    iri = IRI_MAPPINGS[curie_prefix] + node_id
     category = TUI_MAPPINGS[str(tuple(tuis))]
 
     names = info.get(NAMES_KEY, dict())
@@ -267,6 +268,7 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'HL7')
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+    attributes = info.get(INFO_KEY, dict())
     hl7at = attributes.get('HL7AT', list())
     hl7ii = attributes.get('HL7II', list())
     hl7im = attributes.get('HL7IM', list())
@@ -306,6 +308,21 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_hpo_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'SY', 'ET', 'OP', 'IS', 'OET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HPO_PREFIX, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    sid = attributes.get('SID', list())
+    hpo_comment = attributes.get('HPO_COMMENT', list())
+    date_created = attributes.get('DATE_CREATED', list())
+    syn_qualifier = attributes.get('SYN_QUALIFIER', list())
+    ref = attributes.get('REF', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
@@ -368,6 +385,9 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
                 process_hl7_item(node_id, value, nodes_output, edges_output)
 
             if source == 'HPO':
+                process_hpo_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'ICD10PCS':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From 18c7366c4fee3bc0d35ab746b54ecbcfc3f5787f Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 12:35:22 -0700
Subject: [PATCH 046/117] #316 ICD10 and ICD9 work

---
 kg2_util.py                    |  2 ++
 umls_list_jsonl_to_kg_jsonl.py | 16 +++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/kg2_util.py b/kg2_util.py
index e5f3d50d..eb1b864f 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -75,6 +75,8 @@
 CURIE_PREFIX_HMDB = 'HMDB'
 CURIE_PREFIX_HP = 'HP'
 CURIE_PREFIX_IAO = 'IAO'
+CURIE_PREFIX_ICD10PCS = 'ICD10PCS'
+CURIE_PREFIX_ICD9 = 'ICD9'
 CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY = 'identifiers_org_registry'
 CURIE_PREFIX_ISBN = 'ISBN'
 CURIE_PREFIX_KEGG = 'KEGG'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index b05eb1b5..8df24faa 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -40,6 +40,8 @@
 HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
 HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
 HPO_PREFIX = kg2_util.CURIE_PREFIX_HP
+ICD10PCS_PREFIX = kg2_util.CURIE_PREFIX_ICD10PCS
+ICD9CM = kg2_util.CURIE_PREFIX_ICD9
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -310,7 +312,7 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
 
 def process_hpo_item(node_id, info, nodes_output, edges_output):
     accession_heirarchy = ['PT', 'SY', 'ET', 'OP', 'IS', 'OET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HPO_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HPO_PREFIX, node_id.replace('HP:', ''), info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -323,6 +325,18 @@ def process_hpo_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_icd10_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'PX', 'HX', 'MTH_HX', 'HT', 'HS', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(ICD10PCS_PREFIX, node_id, info, accession_heirarchy)
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'ICD10PCS')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    added_meaning = attributes.get('ADDED_MEANING', list())
+    order_no = attributes.get('ORDER_NO', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()

From 84a4a3b2223a44a44b1b68e9e867624463f30235 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 12:55:40 -0700
Subject: [PATCH 047/117] #316 ICD9 work, surveying MED-RT

---
 umls_list_jsonl_to_kg_jsonl.py | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 8df24faa..d8ad0d43 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -41,7 +41,7 @@
 HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
 HPO_PREFIX = kg2_util.CURIE_PREFIX_HP
 ICD10PCS_PREFIX = kg2_util.CURIE_PREFIX_ICD10PCS
-ICD9CM = kg2_util.CURIE_PREFIX_ICD9
+ICD9CM_PREFIX = kg2_util.CURIE_PREFIX_ICD9
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -325,10 +325,9 @@ def process_hpo_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_icd10_item(node_id, info, nodes_output, edges_output):
+def process_icd10pcs_item(node_id, info, nodes_output, edges_output):
     accession_heirarchy = ['PT', 'PX', 'HX', 'MTH_HX', 'HT', 'HS', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(ICD10PCS_PREFIX, node_id, info, accession_heirarchy)
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'ICD10PCS')
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -337,6 +336,24 @@ def process_icd10_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
+
+def process_icd9cm_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'HT', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(ICD9CM_PREFIX, node_id, info, accession_heirarchy)
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    icc = attributes.get('ICC', list())
+    ice = attributes.get('ICE', list())
+    icf = attributes.get('ICF', list())
+    sos = attributes.get('SOS', list())
+    icn = attributes.get('ICN', list())
+    ica = attributes.get('ICA', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
@@ -402,6 +419,12 @@ def process_icd10_item(node_id, info, nodes_output, edges_output):
                 process_hpo_item(node_id, value, nodes_output, edges_output)
 
             if source == 'ICD10PCS':
+                process_icd10pcs_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'ICD9CM':
+                process_icd9cm_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'MED-RT':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From 24048572615851aebe79cefb7e7eeb2862dcc8e4 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 13:05:17 -0700
Subject: [PATCH 048/117] #316 MED-RT

---
 umls_list_jsonl_to_kg_jsonl.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index d8ad0d43..4ca099ba 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -42,6 +42,7 @@
 HPO_PREFIX = kg2_util.CURIE_PREFIX_HP
 ICD10PCS_PREFIX = kg2_util.CURIE_PREFIX_ICD10PCS
 ICD9CM_PREFIX = kg2_util.CURIE_PREFIX_ICD9
+MEDRT_PREFIX = kg2_util.CURIE_PREFIX_UMLS
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -353,6 +354,19 @@ def process_icd9cm_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
+def process_medrt_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'FN', 'SY'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MEDRT_PREFIX, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MED-RT')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    term_status = attributes.get('TERM_STATUS', list())
+    concept_type = attributes.get('CONCEPT_TYPE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
@@ -425,6 +439,9 @@ def process_icd9cm_item(node_id, info, nodes_output, edges_output):
                 process_icd9cm_item(node_id, value, nodes_output, edges_output)
 
             if source == 'MED-RT':
+                process_medrt_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'MEDLINEPLUS'
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From 512f93d29ed411e672d7c8176d6e0c42ac6a640b Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 19 Aug 2023 13:17:06 -0700
Subject: [PATCH 049/117] #316 addressing small typo

---
 umls_list_jsonl_to_kg_jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 4ca099ba..a127bcf0 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -441,7 +441,7 @@ def process_medrt_item(node_id, info, nodes_output, edges_output):
             if source == 'MED-RT':
                 process_medrt_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'MEDLINEPLUS'
+            if source == 'MEDLINEPLUS':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From 8d55c50a5c6b70483fae00531c69d584aa49a455 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 09:53:10 -0700
Subject: [PATCH 050/117] #316 MEDLINEPLUS and MSH

---
 umls_list_jsonl_to_kg_jsonl.py | 67 ++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index a127bcf0..1958b85f 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -43,6 +43,8 @@
 ICD10PCS_PREFIX = kg2_util.CURIE_PREFIX_ICD10PCS
 ICD9CM_PREFIX = kg2_util.CURIE_PREFIX_ICD9
 MEDRT_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+MEDLINEPLUS_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+MSH_PREFIX = kg2_util.CURIE_PREFIX_UMLS
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -368,6 +370,65 @@ def process_medrt_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
+
+def process_medlineplus_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'ET', 'SY', 'HT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MEDLINEPLUS_PREFIX, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MEDLINEPLUS')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    sos = attributes.get('SOS', list())
+    date_created = attributes.get('DATE_CREATED', list())
+    mp_group_url = attributes.get('MP_GROUP_URL', list())
+    mp_primary_institute_url = attributes.get('MP_PRIMARY_INSTITUTE_URL', list())
+    mp_other_language_url = attributes.get('MP_OTHER_LANGUAGE_URL', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_msh_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['MH', 'TQ', 'PEP', 'ET', 'XQ', 'PXQ', 'NM', 'N1', 'PCE', 'CE', 'HT', 'HS', 'DEV', 'DSV', 'QAB', 'QEV', 'QSV', 'PM'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MSH_PREFIX, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MSH')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    mmr = attributes.get('MMR', list())
+    fx = attributes.get('FX', list())
+    lt = attributes.get('LT', list())
+    dc = attributes.get('DC', list())
+    pa = attributes.get('PA', list())
+    rr = attributes.get('RR', list())
+    hm = attributes.get('HM', list())
+    pi = attributes.get('PI', list())
+    ec = attributes.get('EC', list())
+    hn = attributes.get('HN', list())
+    termui = attributes.get('TERMUI', list())
+    th = attributes.get('TH', list())
+    sos = attributes.get('SOS', list())
+    ii = attributes.get('II', list())
+    rn = attributes.get('RN', list())
+    an = attributes.get('AN', list())
+    cx = attributes.get('CX', list())
+    dq = attributes.get('DQ', list())
+    dx = attributes.get('DX', list())
+    pm = attributes.get('PM', list())
+    aql = attributes.get('AQL', list())
+    sc = attributes.get('SC', list())
+    fr = attributes.get('FR', list())
+    mda = attributes.get('MDA', list())
+    src = attributes.get('SRC', list())
+    ol = attributes.get('OL', list())
+    mn = attributes.get('MN', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
@@ -442,6 +503,12 @@ def process_medrt_item(node_id, info, nodes_output, edges_output):
                 process_medrt_item(node_id, value, nodes_output, edges_output)
 
             if source == 'MEDLINEPLUS':
+                process_medlineplus_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'MSH':
+                process_msh_item(node_id, value, nodes_output, edges_output)
+
+            if source == 'MTH':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From efcc7ef1ea1d43cd5a807cd0a83fb09723f87285 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 10:14:21 -0700
Subject: [PATCH 051/117] #316 dictionary mapping for functions

---
 umls_list_jsonl_to_kg_jsonl.py | 85 ++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 36 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 1958b85f..b222d5bf 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -19,10 +19,7 @@
 import json
 
 
-DESIRED_CODES = ['ATC', 'CHV', 'DRUGBANK', 'FMA', 'GO', 'HCPCS', 'HGNC', 'HL7V3.0',
-                 'HPO', 'ICD10PCS', 'ICD9CM', 'MED-RT', 'MEDLINEPLUS', 'MSH',
-                 'MTH', 'NCBI', 'NCBITAXON', 'NCI', 'NDDF', 'NDFRT', 'OMIM', 'PDQ',
-                 'PSY', 'RXNORM', 'VANDF']
+
 CUIS_KEY = 'cuis'
 INFO_KEY = 'attributes'
 NAMES_KEY = 'names'
@@ -429,6 +426,21 @@ def process_msh_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+DESIRED_CODES = {'ATC': process_atc_item,
+                 'CHV': process_chv_item,
+                 'DRUGBANK': process_drugbank_item,
+                 'FMA': process_fma_item,
+                 'GO': process_go_item,
+                 'HCPCS': process_hcpcs_item,
+                 'HGNC': process_hgnc_item,
+                 'HL7V3.0': process_hl7_item,
+                 'HPO': process_hpo_item,
+                 'ICD10PCS': process_icd10pcs_item,
+                 'ICD9CM': process_icd9cm_item}
+                 # , 'MED-RT', 'MEDLINEPLUS', 'MSH',
+                 # 'MTH', 'NCBI', 'NCBITAXON', 'NCI', 'NDDF', 'NDFRT', 'OMIM', 'PDQ',
+                 # 'PSY', 'RXNORM', 'VANDF'}
+
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()
@@ -462,55 +474,56 @@ def process_msh_item(node_id, info, nodes_output, edges_output):
                 continue
             value = data[entity]
             source, node_id = extract_node_id(entity)
-            if source not in DESIRED_CODES and source != 'UMLS':
+            if source not in DESIRED_CODES:
                 continue
 
             # Process the data specifically by source
-            if source == 'ATC':
-                process_atc_item(node_id, value, nodes_output, edges_output)
+            DESIRED_CODES[source](node_id, value, nodes_output, edges_output)
+            # if source == 'ATC':
+            #     process_atc_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'CHV':
-                process_chv_item(node_id, value, nodes_output, edges_output)
+            # if source == 'CHV':
+            #     process_chv_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'DRUGBANK':
-                process_drugbank_item(node_id, value, nodes_output, edges_output)
+            # if source == 'DRUGBANK':
+            #     process_drugbank_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'FMA':
-                process_fma_item(node_id, value, nodes_output, edges_output)
+            # if source == 'FMA':
+            #     process_fma_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'GO':
-                process_go_item(node_id, value, nodes_output, edges_output)
+            # if source == 'GO':
+            #     process_go_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'HCPCS':
-                process_hcpcs_item(node_id, value, nodes_output, edges_output)
+            # if source == 'HCPCS':
+            #     process_hcpcs_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'HGNC':
-                process_hgnc_item(node_id, value, nodes_output, edges_output)
+            # if source == 'HGNC':
+            #     process_hgnc_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'HL7V3.0':
-                process_hl7_item(node_id, value, nodes_output, edges_output)
+            # if source == 'HL7V3.0':
+            #     process_hl7_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'HPO':
-                process_hpo_item(node_id, value, nodes_output, edges_output)
+            # if source == 'HPO':
+            #     process_hpo_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'ICD10PCS':
-                process_icd10pcs_item(node_id, value, nodes_output, edges_output)
+            # if source == 'ICD10PCS':
+            #     process_icd10pcs_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'ICD9CM':
-                process_icd9cm_item(node_id, value, nodes_output, edges_output)
+            # if source == 'ICD9CM':
+            #     process_icd9cm_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'MED-RT':
-                process_medrt_item(node_id, value, nodes_output, edges_output)
+            # if source == 'MED-RT':
+            #     process_medrt_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'MEDLINEPLUS':
-                process_medlineplus_item(node_id, value, nodes_output, edges_output)
+            # if source == 'MEDLINEPLUS':
+            #     process_medlineplus_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'MSH':
-                process_msh_item(node_id, value, nodes_output, edges_output)
+            # if source == 'MSH':
+            #     process_msh_item(node_id, value, nodes_output, edges_output)
 
-            if source == 'MTH':
-                name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
-                attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
+            # if source == 'MTH':
+            #     name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
+            #     attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)

From c5c2195d9ffbe0f75ab50875e1928fe009654bd3 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 14:00:45 -0700
Subject: [PATCH 052/117] #316 MTH

---
 umls_list_jsonl_to_kg_jsonl.py | 115 ++++++++++++++++++---------------
 1 file changed, 64 insertions(+), 51 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index b222d5bf..08a6be34 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -41,7 +41,9 @@
 ICD9CM_PREFIX = kg2_util.CURIE_PREFIX_ICD9
 MEDRT_PREFIX = kg2_util.CURIE_PREFIX_UMLS
 MEDLINEPLUS_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-MSH_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+MSH_PREFIX = kg2_util.CURIE_PREFIX_MESH
+MTH_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+NCBI_PREFIX = kg2_util.CURIE_PREFIX_NCBI_TAXON
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -389,8 +391,6 @@ def process_medlineplus_item(node_id, info, nodes_output, edges_output):
 def process_msh_item(node_id, info, nodes_output, edges_output):
     accession_heirarchy = ['MH', 'TQ', 'PEP', 'ET', 'XQ', 'PXQ', 'NM', 'N1', 'PCE', 'CE', 'HT', 'HS', 'DEV', 'DSV', 'QAB', 'QEV', 'QSV', 'PM'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MSH_PREFIX, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MSH')
 
     # Currently not used, but extracting them in case we want them in the future
@@ -426,6 +426,48 @@ def process_msh_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_mth_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PN', 'CV', 'XM', 'PT', 'SY', 'RT', 'DT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MTH_PREFIX, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MTH')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    mth_mapsetcomplexity = attributes.get('MTH_MAPSETCOMPLEXITY', list())
+    fromvsab = attributes.get('FROMVSAB', list())
+    mapsetrsab = attributes.get('MAPSETRSAB', list())
+    mapsetversion = attributes.get('MAPSETVERSION', list())
+    mapsetvsab = attributes.get('MAPSETVSAB', list())
+    tovsab = attributes.get('TOVSAB', list())
+    mth_mapfromexhaustive = attributes.get('MTH_MAPFROMEXHAUSTIVE', list())
+    torsab = attributes.get('TORSAB', list())
+    mapsetsid = attributes.get('MAPSETSID', list())
+    mapsetgrammar = attributes.get('MAPSETGRAMMAR', list())
+    mapsettype = attributes.get('MAPSETTYPE', list())
+    mth_maptoexhaustive = attributes.get('MTH_MAPTOEXHAUSTIVE', list())
+    fromrsab = attributes.get('FROMRSAB', list())
+    mth_mapfromcomplexity = attributes.get('MTH_MAPFROMCOMPLEXITY', list())
+    lt = attributes.get('LT', list())
+    mth_maptocomplexity = attributes.get('MTH_MAPTOCOMPLEXITY', list())
+    sos = attributes.get('SOS', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_ncbi_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['SCN', 'USN', 'USY', 'SY', 'UCN', 'CMN', 'UE', 'EQ'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NCBI_PREFIX, node_id, info, accession_heirarchy)
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    div = attributes.get('DIV', list())
+    authority_name = attributes.get('AUTHORITY_NAME', list())
+    rank = attributes.get('RANK', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
                  'DRUGBANK': process_drugbank_item,
@@ -436,10 +478,20 @@ def process_msh_item(node_id, info, nodes_output, edges_output):
                  'HL7V3.0': process_hl7_item,
                  'HPO': process_hpo_item,
                  'ICD10PCS': process_icd10pcs_item,
-                 'ICD9CM': process_icd9cm_item}
-                 # , 'MED-RT', 'MEDLINEPLUS', 'MSH',
-                 # 'MTH', 'NCBI', 'NCBITAXON', 'NCI', 'NDDF', 'NDFRT', 'OMIM', 'PDQ',
-                 # 'PSY', 'RXNORM', 'VANDF'}
+                 'ICD9CM': process_icd9cm_item,
+                 'MED-RT': process_medrt_item,
+                 'MEDLINEPLUS': process_medlineplus_item,
+                 'MSH': process_msh_item,
+                 'MTH': process_mth_item,
+                 'NCBI': process_ncbi_item}
+                 # 'NCI': process_nci_item,
+                 # 'NDDF': process_nddf_item,
+                 # 'NDFRT': process_ndfrt_item,
+                 # 'OMIM': process_omim_item,
+                 # 'PDQ': process_pdq_item,
+                 # 'PSY': process_psy_item,
+                 # 'RXNORM': process_rxnorm_item,
+                 # 'VANDF': process_vandf_item}
 
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
@@ -474,56 +526,17 @@ def process_msh_item(node_id, info, nodes_output, edges_output):
                 continue
             value = data[entity]
             source, node_id = extract_node_id(entity)
+
+            if source == 'NCI':
+                name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
+                attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
+
             if source not in DESIRED_CODES:
                 continue
 
             # Process the data specifically by source
             DESIRED_CODES[source](node_id, value, nodes_output, edges_output)
-            # if source == 'ATC':
-            #     process_atc_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'CHV':
-            #     process_chv_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'DRUGBANK':
-            #     process_drugbank_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'FMA':
-            #     process_fma_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'GO':
-            #     process_go_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'HCPCS':
-            #     process_hcpcs_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'HGNC':
-            #     process_hgnc_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'HL7V3.0':
-            #     process_hl7_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'HPO':
-            #     process_hpo_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'ICD10PCS':
-            #     process_icd10pcs_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'ICD9CM':
-            #     process_icd9cm_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'MED-RT':
-            #     process_medrt_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'MEDLINEPLUS':
-            #     process_medlineplus_item(node_id, value, nodes_output, edges_output)
-
-            # if source == 'MSH':
-            #     process_msh_item(node_id, value, nodes_output, edges_output)
 
-            # if source == 'MTH':
-            #     name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
-            #     attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)

From e1e205afa374322e96732cf8a0c95e311c83b945 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 14:45:19 -0700
Subject: [PATCH 053/117] #316 NCI

---
 umls_list_jsonl_to_kg_jsonl.py | 60 ++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 08a6be34..a20a2e63 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -468,6 +468,60 @@ def process_ncbi_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_nci_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'SY', 'CSN', 'DN', 'FBD', 'HD', 'CCN', 'AD', 'CA2', 'CA3', 'BN', 'AB', 'CCS', 'OP'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NCBI_PREFIX, node_id, info, accession_heirarchy)
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    clinvar_variation_id = attributes.get('CLINVAR_VARIATION_ID', list())
+    micronutrient = attributes.get('MICRONUTRIENT', list())
+    genbank_accession_number = attributes.get('GENBANK_ACCESSION_NUMBER', list())
+    fda_table = attributes.get('FDA_TABLE', list())
+    usda_id = attributes.get('USDA_ID', list())
+    icd_o_3_code = attributes.get('ICD-O-3_CODE', list())
+    tolerable_level = attributes.get('TOLERABLE_LEVEL', list())
+    ncbi_taxon_id = attributes.get('NCBI_TAXON_ID', list())
+    mgi_accession_id = attributes.get('MGI_ACCESSION_ID', list())
+    homologous_gene = attributes.get('HOMOLOGOUS_GENE', list())
+    pid_id = attributes.get('PID_ID', list())
+    swiss_prot = attributes.get('SWISS_PROT', list())
+    essential_amino_acid = attributes.get('ESSENTIAL_AMINO_ACID', list())
+    publish_value_set = attributes.get('PUBLISH_VALUE_SET', list())
+    cas_registry = attributes.get('CAS_REGISTRY', list())
+    value_set_pair = attributes.get('VALUE_SET_PAIR', list())
+    accepted_therapeutic_use_for = attributes.get('ACCEPTED_THERAPEUTIC_USE_FOR', list())
+    hgnc_id = attributes.get('HGNC_ID', list())
+    nci_drug_dictionary_id = attributes.get('NCI_DRUG_DICTIONARY_ID', list())
+    chebi_id = attributes.get('CHEBI_ID', list())
+    cnu = attributes.get('CNU', list())
+    mirbase_id = attributes.get('MIRBASE_ID', list())
+    macronutrient = attributes.get('MACRONUTRIENT', list())
+    essential_fatty_acid = attributes.get('ESSENTIAL_FATTY_ACID', list())
+    unit = attributes.get('UNIT', list())
+    pdq_open_trial_search_id = attributes.get('PDQ_OPEN_TRIAL_SEARCH_ID', list())
+    term_browser_value_set_description = attributes.get('TERM_BROWSER_VALUE_SET_DESCRIPTION', list())
+    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
+    infoods = attributes.get('INFOODS', list())
+    pubmedid_primary_reference = attributes.get('PUBMEDID_PRIMARY_REFERENCE', list())
+    biocarta_id = attributes.get('BIOCARTA_ID', list())
+    extensible_list = attributes.get('EXTENSIBLE_LIST', list())
+    use_for = attributes.get('USE_FOR', list())
+    neoplastic_status = attributes.get('NEOPLASTIC_STATUS', list())
+    nsc_number = attributes.get('NSC_NUMBER', list())
+    omim_number = attributes.get('OMIM_NUMBER', list())
+    lt = attributes.get('LT', list())
+    kegg_id = attributes.get('KEGG_ID', list())
+    gene_encodes_product = attributes.get('GENE_ENCODES_PRODUCT', list())
+    pdq_closed_trial_search_id = attributes.get('PDQ_CLOSED_TRIAL_SEARCH_ID', list())
+    design_note = attributes.get('DESIGN_NOTE', list())
+    nutrient = attributes.get('NUTRIENT', list())
+    fda_unii_code = attributes.get('FDA_UNII_CODE', list())
+    us_recommended_intake = attributes.get('US_RECOMMENDED_INTAKE', list())
+    chemical_formula = attributes.get('CHEMICAL_FORMULA', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
                  'DRUGBANK': process_drugbank_item,
@@ -483,8 +537,8 @@ def process_ncbi_item(node_id, info, nodes_output, edges_output):
                  'MEDLINEPLUS': process_medlineplus_item,
                  'MSH': process_msh_item,
                  'MTH': process_mth_item,
-                 'NCBI': process_ncbi_item}
-                 # 'NCI': process_nci_item,
+                 'NCBI': process_ncbi_item,
+                 'NCI': process_nci_item}
                  # 'NDDF': process_nddf_item,
                  # 'NDFRT': process_ndfrt_item,
                  # 'OMIM': process_omim_item,
@@ -527,7 +581,7 @@ def process_ncbi_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'NCI':
+            if source == 'NDDF':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From f0564510b0924cf6639d054b95756ae615a4b490 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 14:54:52 -0700
Subject: [PATCH 054/117] #316 NDDF and NCI updates

---
 kg2_util.py                    |  1 +
 umls_list_jsonl_to_kg_jsonl.py | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index eb1b864f..63de5879 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -93,6 +93,7 @@
 CURIE_PREFIX_NCBI_GENE = 'NCBIGene'
 CURIE_PREFIX_NCBI_TAXON = 'NCBITaxon'
 CURIE_PREFIX_NCIT = 'NCIT'
+CURIE_PREFIX_NDDF = 'NDDF'
 CURIE_PREFIX_OBO = 'OBO'
 CURIE_PREFIX_OBO_FORMAT = 'oboFormat'
 CURIE_PREFIX_OIO = 'OIO'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index a20a2e63..c1a23a1d 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -44,6 +44,8 @@
 MSH_PREFIX = kg2_util.CURIE_PREFIX_MESH
 MTH_PREFIX = kg2_util.CURIE_PREFIX_UMLS
 NCBI_PREFIX = kg2_util.CURIE_PREFIX_NCBI_TAXON
+NCI_PREFIX = kg2_util.CURIE_PREFIX_NCIT
+NDDF_PREFIX = kg2_util.CURIE_PREFIX_NDDF
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -459,6 +461,7 @@ def process_mth_item(node_id, info, nodes_output, edges_output):
 def process_ncbi_item(node_id, info, nodes_output, edges_output):
     accession_heirarchy = ['SCN', 'USN', 'USY', 'SY', 'UCN', 'CMN', 'UE', 'EQ'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
     node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NCBI_PREFIX, node_id, info, accession_heirarchy)
+
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
     div = attributes.get('DIV', list())
@@ -470,7 +473,9 @@ def process_ncbi_item(node_id, info, nodes_output, edges_output):
 
 def process_nci_item(node_id, info, nodes_output, edges_output):
     accession_heirarchy = ['PT', 'SY', 'CSN', 'DN', 'FBD', 'HD', 'CCN', 'AD', 'CA2', 'CA3', 'BN', 'AB', 'CCS', 'OP'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NCBI_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NCI_PREFIX, node_id, info, accession_heirarchy)
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'NCI')
+
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
     clinvar_variation_id = attributes.get('CLINVAR_VARIATION_ID', list())
@@ -521,6 +526,16 @@ def process_nci_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
+def process_nddf_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['MTH_RXN_CDC', 'CDC', 'CDD', 'CDA', 'IN', 'DF'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NDDF_PREFIX, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    ndc = attributes.get('NDC', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
 
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
@@ -538,8 +553,8 @@ def process_nci_item(node_id, info, nodes_output, edges_output):
                  'MSH': process_msh_item,
                  'MTH': process_mth_item,
                  'NCBI': process_ncbi_item,
-                 'NCI': process_nci_item}
-                 # 'NDDF': process_nddf_item,
+                 'NCI': process_nci_item,
+                 'NDDF': process_nddf_item}
                  # 'NDFRT': process_ndfrt_item,
                  # 'OMIM': process_omim_item,
                  # 'PDQ': process_pdq_item,

From c2322b2e0dc97ab39d07d14479fea6d6df4b55d3 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 15:00:55 -0700
Subject: [PATCH 055/117] #316 NDFRT doesn't exist

---
 umls_list_jsonl_to_kg_jsonl.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index c1a23a1d..72d63111 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -555,7 +555,6 @@ def process_nddf_item(node_id, info, nodes_output, edges_output):
                  'NCBI': process_ncbi_item,
                  'NCI': process_nci_item,
                  'NDDF': process_nddf_item}
-                 # 'NDFRT': process_ndfrt_item,
                  # 'OMIM': process_omim_item,
                  # 'PDQ': process_pdq_item,
                  # 'PSY': process_psy_item,
@@ -596,7 +595,7 @@ def process_nddf_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'NDDF':
+            if source == 'OMIM':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From c4e19b872b7b83fb974c968c96ff556e8cb4619d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 15:08:56 -0700
Subject: [PATCH 056/117] #316 OMIM

---
 umls_list_jsonl_to_kg_jsonl.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 72d63111..7d1b26b2 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -46,6 +46,7 @@
 NCBI_PREFIX = kg2_util.CURIE_PREFIX_NCBI_TAXON
 NCI_PREFIX = kg2_util.CURIE_PREFIX_NCIT
 NDDF_PREFIX = kg2_util.CURIE_PREFIX_NDDF
+OMIM_PREFIX = kg2_util.CURIE_PREFIX_OMIM
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -536,6 +537,22 @@ def process_nddf_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
+def process_omim_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'PHENO', 'PHENO_ET', 'PTAV', 'PTCS', 'ETAL', 'ET', 'HT', 'ACR'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(OMIM_PREFIX, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    genesymbol = attributes.get('GENESYMBOL', list())
+    mimtypevalue = attributes.get('MIMTYPEVALUE', list())
+    moved_from = attributes.get('MOVED_FROM', list())
+    sos = attributes.get('SOS', list())
+    genelocus = attributes.get('GENELOCUS', list())
+    mimtypemeaning = attributes.get('MIMTYPEMEANING', list())
+    mimtype = attributes.get('MIMTYPE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
 
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
@@ -554,8 +571,8 @@ def process_nddf_item(node_id, info, nodes_output, edges_output):
                  'MTH': process_mth_item,
                  'NCBI': process_ncbi_item,
                  'NCI': process_nci_item,
-                 'NDDF': process_nddf_item}
-                 # 'OMIM': process_omim_item,
+                 'NDDF': process_nddf_item,
+                 'OMIM': process_omim_item}
                  # 'PDQ': process_pdq_item,
                  # 'PSY': process_psy_item,
                  # 'RXNORM': process_rxnorm_item,
@@ -595,7 +612,7 @@ def process_nddf_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'OMIM':
+            if source == 'PDQ':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From 65d1dc3e8d931edbf8934c7a8b5c5d7fd2edd67a Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 15:16:57 -0700
Subject: [PATCH 057/117] #316 PDQ

---
 kg2_util.py                    |  1 +
 umls_list_jsonl_to_kg_jsonl.py | 29 ++++++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index 63de5879..1873f418 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -106,6 +106,7 @@
 CURIE_PREFIX_PATHWHIZ_REACTION = 'PathWhiz.Reaction'
 CURIE_PREFIX_PATHWHIZ_BOUND = 'PathWhiz.Bound'
 CURIE_PREFIX_PATHWHIZ_PROTEIN_COMPLEX = 'PathWhiz.ProteinComplex'
+CURIE_PREFIX_PDQ = 'PDQ'
 CURIE_PREFIX_PMID = 'PMID'
 CURIE_PREFIX_RDF = 'rdf'
 CURIE_PREFIX_RDFS = 'rdfs'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 7d1b26b2..8a33c6cc 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -47,6 +47,7 @@
 NCI_PREFIX = kg2_util.CURIE_PREFIX_NCIT
 NDDF_PREFIX = kg2_util.CURIE_PREFIX_NDDF
 OMIM_PREFIX = kg2_util.CURIE_PREFIX_OMIM
+PDQ_PREFIX = kg2_util.CURIE_PREFIX_PDQ
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -554,6 +555,28 @@ def process_omim_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_pdq_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'HT', 'PSC', 'SY', 'ET', 'CU', 'LV', 'ACR', 'AB', 'BN', 'FBD', 'CCN', 'CHN', 'OP', 'IS'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(PDQ_PREFIX, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    lt = attributes.get('LT', list())
+    cas_registry = attributes.get('CAS_REGISTRY', list())
+    date_first_published = attributes.get('DATE_FIRST_PUBLISHED', list())
+    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
+    ind_code = attributes.get('IND_CODE', list())
+    pid = attributes.get('PID', list())
+    nsc_code = attributes.get('NSC_CODE', list())
+    pxc = attributes.get('PXC', list())
+    menu_parent = attributes.get('MENU_PARENT', list())
+    nci_id = attributes.get('NCI_ID', list())
+    orig_sty = attributes.get('ORIG_STY', list())
+    menu_type = attributes.get('MENU_TYPE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
                  'DRUGBANK': process_drugbank_item,
@@ -572,8 +595,8 @@ def process_omim_item(node_id, info, nodes_output, edges_output):
                  'NCBI': process_ncbi_item,
                  'NCI': process_nci_item,
                  'NDDF': process_nddf_item,
-                 'OMIM': process_omim_item}
-                 # 'PDQ': process_pdq_item,
+                 'OMIM': process_omim_item,
+                 'PDQ': process_pdq_item}
                  # 'PSY': process_psy_item,
                  # 'RXNORM': process_rxnorm_item,
                  # 'VANDF': process_vandf_item}
@@ -612,7 +635,7 @@ def process_omim_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'PDQ':
+            if source == 'PSY':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From 147a2338a2d86c590cf23a5a15927661dc27623b Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 15:25:37 -0700
Subject: [PATCH 058/117] #316 PSY

---
 kg2_util.py                    |  1 +
 umls_list_jsonl_to_kg_jsonl.py | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index 1873f418..92990b0d 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -108,6 +108,7 @@
 CURIE_PREFIX_PATHWHIZ_PROTEIN_COMPLEX = 'PathWhiz.ProteinComplex'
 CURIE_PREFIX_PDQ = 'PDQ'
 CURIE_PREFIX_PMID = 'PMID'
+CURIE_PREFIX_PSY = 'PSY'
 CURIE_PREFIX_RDF = 'rdf'
 CURIE_PREFIX_RDFS = 'rdfs'
 CURIE_PREFIX_REACTOME='REACT'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 8a33c6cc..43a83d95 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -48,6 +48,7 @@
 NDDF_PREFIX = kg2_util.CURIE_PREFIX_NDDF
 OMIM_PREFIX = kg2_util.CURIE_PREFIX_OMIM
 PDQ_PREFIX = kg2_util.CURIE_PREFIX_PDQ
+PSY_PREFIX = kg2_util.CURIE_PREFIX_PSY
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -577,6 +578,18 @@ def process_pdq_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_psy_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'HT', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(PSY_PREFIX, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    hn = attributes.get('HN', list())
+    pyr = attributes.get('PYR', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
                  'DRUGBANK': process_drugbank_item,
@@ -596,8 +609,8 @@ def process_pdq_item(node_id, info, nodes_output, edges_output):
                  'NCI': process_nci_item,
                  'NDDF': process_nddf_item,
                  'OMIM': process_omim_item,
-                 'PDQ': process_pdq_item}
-                 # 'PSY': process_psy_item,
+                 'PDQ': process_pdq_item,
+                 'PSY': process_psy_item}
                  # 'RXNORM': process_rxnorm_item,
                  # 'VANDF': process_vandf_item}
 
@@ -635,7 +648,7 @@ def process_pdq_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'PSY':
+            if source == 'RXNORM':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From 2267bd124ff4f09c08646db384c354c322ea3b65 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 15:34:09 -0700
Subject: [PATCH 059/117] #316 RXNORM

---
 kg2_util.py                    |  1 +
 umls_list_jsonl_to_kg_jsonl.py | 41 +++++++++++++++++++++++++++++++---
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/kg2_util.py b/kg2_util.py
index 92990b0d..584792ee 100644
--- a/kg2_util.py
+++ b/kg2_util.py
@@ -117,6 +117,7 @@
 CURIE_PREFIX_RHEA_COMP = 'RHEA.COMP'
 CURIE_PREFIX_RO = 'RO'
 CURIE_PREFIX_RTX = 'RTX'
+CURIE_PREFIX_RXNORM = 'RXNORM'
 CURIE_PREFIX_SEMMEDDB = 'SEMMEDDB'
 CURIE_PREFIX_SKOS = 'skos'
 CURIE_PREFIX_SMPDB = 'SMPDB'
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 43a83d95..841b88b5 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -49,6 +49,7 @@
 OMIM_PREFIX = kg2_util.CURIE_PREFIX_OMIM
 PDQ_PREFIX = kg2_util.CURIE_PREFIX_PDQ
 PSY_PREFIX = kg2_util.CURIE_PREFIX_PSY
+RXNORM_PREFIX = kg2_util.CURIE_PREFIX_RXNORM
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -590,6 +591,40 @@ def process_psy_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_rxnorm_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['SCD', 'SBD', 'SCDG', 'SBDG', 'BPCK', 'GPCK', 'IN', 'PSN', 'MIN', 'SCDF', 'SBDF', 'SCDC', 'DFG', 'DF', 'SBDC', 'BN', 'PIN', 'TMSY', 'SY', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(RXNORM_PREFIX, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    ndc = attributes.get('NDC', list())
+    rxn_obsoleted = attributes.get('RXN_OBSOLETED', list())
+    rxn_available_strength = attributes.get('RXN_AVAILABLE_STRENGTH', list())
+    rxn_human_drug = attributes.get('RXN_HUMAN_DRUG', list())
+    rxn_quantity = attributes.get('RXN_QUANTITY', list())
+    rxterm_form = attributes.get('RXTERM_FORM', list())
+    rxn_in_expressed_flag = attributes.get('RXN_IN_EXPRESSED_FLAG', list())
+    rxaui = attributes.get('RXAUI', list())
+    rxn_bn_cardinality = attributes.get('RXN_BN_CARDINALITY', list())
+    rxn_activated = attributes.get('RXN_ACTIVATED', list())
+    rxn_boss_strength_denom_unit = attributes.get('RXN_BOSS_STRENGTH_DENOM_UNIT', list())
+    ambiguity_flag = attributes.get('AMBIGUITY_FLAG', list())
+    rxn_strength = attributes.get('RXN_STRENGTH', list())
+    rxcui = attributes.get('RXCUI', list())
+    rxn_ai = attributes.get('RXN_AI', list())
+    rxn_boss_from = attributes.get('RXN_BOSS_FROM', list())
+    rxn_boss_strength_num_unit = attributes.get('RXN_BOSS_STRENGTH_NUM_UNIT', list())
+    rxn_vet_drug = attributes.get('RXN_VET_DRUG', list())
+    orig_code = attributes.get('ORIG_CODE', list())
+    rxn_am = attributes.get('RXN_AM', list())
+    rxn_boss_strength_denom_value = attributes.get('RXN_BOSS_STRENGTH_DENOM_VALUE', list())
+    rxn_boss_strength_num_value = attributes.get('RXN_BOSS_STRENGTH_NUM_VALUE', list())
+    rxn_qualitative_distinction = attributes.get('RXN_QUALITATIVE_DISTINCTION', list())
+    orig_source = attributes.get('ORIG_SOURCE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
                  'DRUGBANK': process_drugbank_item,
@@ -610,8 +645,8 @@ def process_psy_item(node_id, info, nodes_output, edges_output):
                  'NDDF': process_nddf_item,
                  'OMIM': process_omim_item,
                  'PDQ': process_pdq_item,
-                 'PSY': process_psy_item}
-                 # 'RXNORM': process_rxnorm_item,
+                 'PSY': process_psy_item,
+                 'RXNORM': process_rxnorm_item}
                  # 'VANDF': process_vandf_item}
 
 if __name__ == '__main__':
@@ -648,7 +683,7 @@ def process_psy_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'RXNORM':
+            if source == 'VANDF':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From c2c791a96cb4bd6176b88e9cba189c6d2e0d8cc8 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 15:44:36 -0700
Subject: [PATCH 060/117] #316 VANDF

---
 umls_list_jsonl_to_kg_jsonl.py | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 841b88b5..753ea42e 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -50,6 +50,7 @@
 PDQ_PREFIX = kg2_util.CURIE_PREFIX_PDQ
 PSY_PREFIX = kg2_util.CURIE_PREFIX_PSY
 RXNORM_PREFIX = kg2_util.CURIE_PREFIX_RXNORM
+VANDF_PREFIX = kg2_util.CURIE_PREFIX_VANDF
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
@@ -625,6 +626,31 @@ def process_rxnorm_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
+def process_vandf_item(node_id, info, nodes_output, edges_output):
+    accession_heirarchy = ['PT', 'CD', 'IN', 'AB', 'MTH_RXN_CD'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(VANDF_PREFIX, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    ndf_transmit_to_cmop = attributes.get('NDF_TRANSMIT_TO_CMOP', list())
+    sngl_or_mult_src_prd = attributes.get('SNGL_OR_MULT_SRC_PRD', list())
+    dcsa = attributes.get('DCSA', list())
+    exclude_di_check = attributes.get('EXCLUDE_DI_CHECK', list())
+    nfi = attributes.get('NFI', list())
+    va_class_name = attributes.get('VA_CLASS_NAME', list())
+    vmo = attributes.get('VMO', list())
+    drug_class_type = attributes.get('DRUG_CLASS_TYPE', list())
+    nf_name = attributes.get('NF_NAME', list())
+    ndc = attributes.get('NDC', list())
+    vac = attributes.get('VAC', list())
+    va_generic_name = attributes.get('VA_GENERIC_NAME', list())
+    parent_class = attributes.get('PARENT_CLASS', list())
+    va_dispense_unit = attributes.get('VA_DISPENSE_UNIT', list())
+    ddf = attributes.get('DDF', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
 DESIRED_CODES = {'ATC': process_atc_item,
                  'CHV': process_chv_item,
                  'DRUGBANK': process_drugbank_item,
@@ -646,8 +672,8 @@ def process_rxnorm_item(node_id, info, nodes_output, edges_output):
                  'OMIM': process_omim_item,
                  'PDQ': process_pdq_item,
                  'PSY': process_psy_item,
-                 'RXNORM': process_rxnorm_item}
-                 # 'VANDF': process_vandf_item}
+                 'RXNORM': process_rxnorm_item,
+                 'VANDF': process_vandf_item}
 
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
@@ -683,7 +709,7 @@ def process_rxnorm_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'VANDF':
+            if source == 'UMLS':
                 name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
                 attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
 

From e0cec4395e0e0e6a17304564126838e11ded6984 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Mon, 21 Aug 2023 16:30:56 -0700
Subject: [PATCH 061/117] #316 Global Accession Heirarchy

---
 umls_list_jsonl_to_kg_jsonl.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 753ea42e..2a36c339 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -54,6 +54,31 @@
 
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
+# Mined from HTML Page Source of https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+ACCESSION_HEIRARCHY = [('MTH', 'PN'), ('RXNORM', 'SCD'), ('RXNORM', 'SBD'), ('RXNORM', 'SCDG'), ('RXNORM', 'SBDG'), ('RXNORM', 'BPCK'), ('RXNORM', 'GPCK'),
+                       ('RXNORM', 'IN'), ('RXNORM', 'PSN'), ('RXNORM', 'MIN'), ('RXNORM', 'SCDF'), ('RXNORM', 'SBDF'), ('RXNORM', 'SCDC'), ('RXNORM', 'DFG'),
+                       ('RXNORM', 'DF'), ('RXNORM', 'SBDC'), ('RXNORM', 'BN'), ('RXNORM', 'PIN'), ('RXNORM', 'TMSY'), ('RXNORM', 'SY'), ('MSH', 'MH'),
+                       ('MSH', 'TQ'), ('MSH', 'PEP'), ('MSH', 'ET'), ('MSH', 'XQ'), ('MSH', 'PXQ'), ('MSH', 'NM'), ('HPO', 'PT'), ('HPO', 'SY'), ('HPO', 'ET'),
+                       ('NCBI', 'SCN'), ('ATC', 'RXN_PT'), ('ATC', 'PT'), ('VANDF', 'PT'), ('VANDF', 'CD'), ('VANDF', 'IN'), ('DRUGBANK', 'IN'),
+                       ('DRUGBANK', 'SY'), ('DRUGBANK', 'FSY'), ('MSH', 'N1'), ('MSH', 'PCE'), ('MSH', 'CE'), ('FMA', 'PT'), ('FMA', 'SY'), ('FMA', 'AB'),
+                       ('ATC', 'RXN_IN'), ('ATC', 'IN'), ('VANDF', 'AB'), ('VANDF', 'MTH_RXN_CD'), ('NDDF', 'MTH_RXN_CDC'), ('NDDF', 'CDC'), ('NDDF', 'CDD'),
+                       ('NDDF', 'CDA'), ('NDDF', 'IN'), ('NDDF', 'DF'), ('MED-RT', 'PT'), ('MED-RT', 'FN'), ('MED-RT', 'SY'), ('HCPCS', 'PT'), ('HCPCS', 'MP'),
+                       ('OMIM', 'PT'), ('OMIM', 'PHENO'), ('OMIM', 'PHENO_ET'), ('OMIM', 'PTAV'), ('OMIM', 'PTCS'), ('OMIM', 'ETAL'), ('OMIM', 'ET'),
+                       ('OMIM', 'HT'), ('OMIM', 'ACR'), ('HGNC', 'PT'), ('HGNC', 'ACR'), ('HGNC', 'MTH_ACR'), ('HGNC', 'NA'), ('HGNC', 'SYN'), ('HGNC', 'NP'),
+                       ('HGNC', 'NS'), ('NCI', 'PT'), ('NCI', 'SY'), ('NCI', 'CSN'), ('NCI', 'DN'), ('NCI', 'FBD'), ('NCI', 'HD'), ('NCI', 'CCN'),
+                       ('NCI', 'AD'), ('NCI', 'CA2'), ('NCI', 'CA3'), ('NCI', 'BN'), ('NCI', 'AB'), ('NCI', 'CCS'), ('PDQ', 'PT'), ('PDQ', 'HT'),
+                       ('PDQ', 'PSC'), ('PDQ', 'SY'), ('CHV', 'PT'), ('MEDLINEPLUS', 'PT'), ('GO', 'PT'), ('GO', 'MTH_PT'), ('GO', 'ET'), ('GO', 'MTH_ET'),
+                       ('GO', 'SY'), ('GO', 'MTH_SY'), ('PDQ', 'ET'), ('PDQ', 'CU'), ('PDQ', 'LV'), ('PDQ', 'ACR'), ('PDQ', 'AB'), ('PDQ', 'BN'), ('PDQ', 'FBD'),
+                       ('PDQ', 'CCN'), ('PDQ', 'CHN'), ('NCBI', 'USN'), ('NCBI', 'USY'), ('NCBI', 'SY'), ('NCBI', 'UCN'), ('NCBI', 'CMN'), ('NCBI', 'UE'),
+                       ('NCBI', 'EQ'), ('ICD9CM', 'PT'), ('ICD9CM', 'HT'), ('ICD10PCS', 'PT'), ('ICD10PCS', 'PX'), ('ICD10PCS', 'HX'), ('ICD10PCS', 'MTH_HX'),
+                       ('ICD10PCS', 'HT'), ('ICD10PCS', 'HS'), ('ICD10PCS', 'AB'), ('HL7V3.0', 'CSY'), ('HL7V3.0', 'PT'), ('HL7V3.0', 'CDO'), ('HL7V3.0', 'VS'),
+                       ('HL7V3.0', 'BR'), ('HL7V3.0', 'CPR'), ('HL7V3.0', 'CR'), ('HL7V3.0', 'NPT'), ('HCPCS', 'MTH_HT'), ('MTH', 'CV'), ('MTH', 'XM'),
+                       ('MTH', 'PT'), ('MTH', 'SY'), ('MTH', 'RT'), ('ICD9CM', 'AB'), ('PSY', 'PT'), ('PSY', 'HT'), ('PSY', 'ET'), ('MEDLINEPLUS', 'ET'),
+                       ('MEDLINEPLUS', 'SY'), ('MEDLINEPLUS', 'HT'), ('MSH', 'HT'), ('MSH', 'HS'), ('MSH', 'DEV'), ('MSH', 'DSV'), ('MSH', 'QAB'),
+                       ('MSH', 'QEV'), ('MSH', 'QSV'), ('MSH', 'PM'), ('HCPCS', 'AB'), ('MTH', 'DT'), ('HCPCS', 'AM'), ('CHV', 'SY'), ('RXNORM', 'ET'),
+                       ('HPO', 'OP'), ('HPO', 'IS'), ('NCI', 'OP'), ('HPO', 'OET'), ('HCPCS', 'OP'), ('HCPCS', 'OM'), ('HCPCS', 'OAM'), ('GO', 'OP'),
+                       ('GO', 'MTH_OP'), ('GO', 'OET'), ('GO', 'MTH_OET'), ('GO', 'IS'), ('GO', 'MTH_IS'), ('PDQ', 'OP'), ('PDQ', 'IS'), ('HL7V3.0', 'OP'),
+                       ('HL7V3.0', 'ONP'), ('HCPCS', 'OA'), ('FMA', 'OP'), ('FMA', 'IS')]
 
 def get_args():
     arg_parser = argparse.ArgumentParser(description='umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format')

From e13e436f0ec506b54d54e9e8765bf847aca062c7 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 22 Aug 2023 13:03:14 -0700
Subject: [PATCH 062/117] #316 commit before I start deleting stuff (halve two
 of the refactor)

---
 umls-name-heirarchy.yaml       | 2598 ++++++++++++++++++++++++++++++++
 umls_list_jsonl_to_kg_jsonl.py |  212 ++-
 umls_util.py                   |  573 +++++++
 3 files changed, 3275 insertions(+), 108 deletions(-)
 create mode 100644 umls-name-heirarchy.yaml
 create mode 100644 umls_util.py

diff --git a/umls-name-heirarchy.yaml b/umls-name-heirarchy.yaml
new file mode 100644
index 00000000..a5c2996a
--- /dev/null
+++ b/umls-name-heirarchy.yaml
@@ -0,0 +1,2598 @@
+-
+  - MTH
+  - PN
+-
+  - MTHCMSFRF
+  - PT
+-
+  - RXNORM
+  - SCD
+-
+  - RXNORM
+  - SBD
+-
+  - RXNORM
+  - SCDG
+-
+  - RXNORM
+  - SBDG
+-
+  - RXNORM
+  - BPCK
+-
+  - RXNORM
+  - GPCK
+-
+  - RXNORM
+  - IN
+-
+  - RXNORM
+  - PSN
+-
+  - RXNORM
+  - MIN
+-
+  - RXNORM
+  - SCDF
+-
+  - RXNORM
+  - SBDF
+-
+  - RXNORM
+  - SCDC
+-
+  - RXNORM
+  - DFG
+-
+  - RXNORM
+  - DF
+-
+  - RXNORM
+  - SBDC
+-
+  - RXNORM
+  - BN
+-
+  - RXNORM
+  - PIN
+-
+  - RXNORM
+  - TMSY
+-
+  - RXNORM
+  - SY
+-
+  - MSH
+  - MH
+-
+  - MSH
+  - TQ
+-
+  - MSH
+  - PEP
+-
+  - MSH
+  - ET
+-
+  - MSH
+  - XQ
+-
+  - MSH
+  - PXQ
+-
+  - MSH
+  - NM
+-
+  - SNOMEDCT_US
+  - PT
+-
+  - SNOMEDCT_US
+  - FN
+-
+  - SNOMEDCT_US
+  - SY
+-
+  - SNOMEDCT_US
+  - PTGB
+-
+  - SNOMEDCT_US
+  - SYGB
+-
+  - SNOMEDCT_US
+  - MTH_PT
+-
+  - SNOMEDCT_US
+  - MTH_FN
+-
+  - SNOMEDCT_US
+  - MTH_SY
+-
+  - SNOMEDCT_US
+  - MTH_PTGB
+-
+  - SNOMEDCT_US
+  - MTH_SYGB
+-
+  - SNOMEDCT_US
+  - SB
+-
+  - SNOMEDCT_US
+  - XM
+-
+  - SNOMEDCT_VET
+  - PT
+-
+  - SNOMEDCT_VET
+  - FN
+-
+  - SNOMEDCT_VET
+  - SY
+-
+  - SNOMEDCT_VET
+  - SB
+-
+  - HPO
+  - PT
+-
+  - HPO
+  - SY
+-
+  - HPO
+  - ET
+-
+  - NCBI
+  - SCN
+-
+  - MTHSPL
+  - MTH_RXN_DP
+-
+  - MTHSPL
+  - DP
+-
+  - MTHSPL
+  - SU
+-
+  - ATC
+  - RXN_PT
+-
+  - ATC
+  - PT
+-
+  - VANDF
+  - PT
+-
+  - VANDF
+  - CD
+-
+  - VANDF
+  - IN
+-
+  - USP
+  - CD
+-
+  - USP
+  - IN
+-
+  - USPMG
+  - HC
+-
+  - USPMG
+  - PT
+-
+  - MMX
+  - MTH_RXN_CD
+-
+  - MMX
+  - MTH_RXN_BD
+-
+  - MMX
+  - CD
+-
+  - MMX
+  - BD
+-
+  - DRUGBANK
+  - IN
+-
+  - DRUGBANK
+  - SY
+-
+  - DRUGBANK
+  - FSY
+-
+  - MSH
+  - N1
+-
+  - MSH
+  - PCE
+-
+  - MSH
+  - CE
+-
+  - CPM
+  - PT
+-
+  - NEU
+  - PT
+-
+  - NEU
+  - SY
+-
+  - FMA
+  - PT
+-
+  - FMA
+  - SY
+-
+  - FMA
+  - AB
+-
+  - UWDA
+  - PT
+-
+  - UWDA
+  - SY
+-
+  - UMD
+  - PT
+-
+  - UMD
+  - SY
+-
+  - UMD
+  - ET
+-
+  - UMD
+  - RT
+-
+  - GS
+  - CD
+-
+  - MMSL
+  - CD
+-
+  - GS
+  - MTH_RXN_BD
+-
+  - GS
+  - BD
+-
+  - GS
+  - IN
+-
+  - MMSL
+  - MTH_RXN_BD
+-
+  - MMSL
+  - BD
+-
+  - MMSL
+  - SC
+-
+  - MMSL
+  - MS
+-
+  - MMSL
+  - GN
+-
+  - MMSL
+  - BN
+-
+  - ATC
+  - RXN_IN
+-
+  - ATC
+  - IN
+-
+  - MMSL
+  - IN
+-
+  - VANDF
+  - AB
+-
+  - GS
+  - MTH_RXN_CD
+-
+  - VANDF
+  - MTH_RXN_CD
+-
+  - NDDF
+  - MTH_RXN_CDC
+-
+  - NDDF
+  - CDC
+-
+  - NDDF
+  - CDD
+-
+  - NDDF
+  - CDA
+-
+  - NDDF
+  - IN
+-
+  - NDDF
+  - DF
+-
+  - MED-RT
+  - PT
+-
+  - MED-RT
+  - FN
+-
+  - MED-RT
+  - SY
+-
+  - SPN
+  - PT
+-
+  - MDR
+  - PT
+-
+  - MDR
+  - MTH_PT
+-
+  - MDR
+  - HG
+-
+  - MDR
+  - MTH_HG
+-
+  - MDR
+  - HT
+-
+  - MDR
+  - MTH_HT
+-
+  - MDR
+  - LLT
+-
+  - MDR
+  - MTH_LLT
+-
+  - MDR
+  - SMQ
+-
+  - MDR
+  - MTH_SMQ
+-
+  - MDR
+  - OS
+-
+  - MDR
+  - AB
+-
+  - CPT
+  - PT
+-
+  - CPT
+  - SY
+-
+  - CPT
+  - ETCLIN
+-
+  - CPT
+  - POS
+-
+  - CPT
+  - GLP
+-
+  - CPT
+  - ETCF
+-
+  - CPT
+  - MP
+-
+  - HCPT
+  - PT
+-
+  - HCPCS
+  - PT
+-
+  - CDT
+  - PT
+-
+  - MVX
+  - PT
+-
+  - CVX
+  - PT
+-
+  - CVX
+  - RXN_PT
+-
+  - CVX
+  - AB
+-
+  - HCDT
+  - PT
+-
+  - HCPCS
+  - MP
+-
+  - HCPT
+  - MP
+-
+  - ICD10AE
+  - PT
+-
+  - ICD10
+  - PT
+-
+  - ICD10AE
+  - PX
+-
+  - ICD10
+  - PX
+-
+  - ICD10AE
+  - PS
+-
+  - ICD10
+  - PS
+-
+  - ICD10AMAE
+  - PT
+-
+  - ICD10AM
+  - PT
+-
+  - ICD10AMAE
+  - PX
+-
+  - ICD10AM
+  - PX
+-
+  - ICD10AMAE
+  - PS
+-
+  - ICD10AM
+  - PS
+-
+  - OMIM
+  - PT
+-
+  - OMIM
+  - PHENO
+-
+  - OMIM
+  - PHENO_ET
+-
+  - OMIM
+  - PTAV
+-
+  - OMIM
+  - PTCS
+-
+  - OMIM
+  - ETAL
+-
+  - OMIM
+  - ET
+-
+  - OMIM
+  - HT
+-
+  - OMIM
+  - ACR
+-
+  - MEDCIN
+  - PT
+-
+  - MEDCIN
+  - FN
+-
+  - MEDCIN
+  - XM
+-
+  - MEDCIN
+  - SY
+-
+  - HGNC
+  - PT
+-
+  - HGNC
+  - ACR
+-
+  - HGNC
+  - MTH_ACR
+-
+  - HGNC
+  - NA
+-
+  - HGNC
+  - SYN
+-
+  - HGNC
+  - NP
+-
+  - HGNC
+  - NS
+-
+  - ICNP
+  - PT
+-
+  - PNDS
+  - PT
+-
+  - PNDS
+  - HT
+-
+  - PNDS
+  - XM
+-
+  - NCI
+  - PT
+-
+  - NCI
+  - SY
+-
+  - NCI
+  - CSN
+-
+  - NCI
+  - DN
+-
+  - NCI
+  - FBD
+-
+  - NCI
+  - HD
+-
+  - NCI
+  - CCN
+-
+  - NCI
+  - AD
+-
+  - NCI
+  - CA2
+-
+  - NCI
+  - CA3
+-
+  - NCI
+  - BN
+-
+  - NCI
+  - AB
+-
+  - NCI
+  - CCS
+-
+  - PDQ
+  - PT
+-
+  - PDQ
+  - HT
+-
+  - PDQ
+  - PSC
+-
+  - PDQ
+  - SY
+-
+  - CHV
+  - PT
+-
+  - MEDLINEPLUS
+  - PT
+-
+  - MTHICPC2EAE
+  - PT
+-
+  - ICPC2EENG
+  - PT
+-
+  - MTHICPC2ICD10AE
+  - PT
+-
+  - SOP
+  - PT
+-
+  - ICF
+  - HT
+-
+  - ICF
+  - PT
+-
+  - ICF
+  - MTH_HT
+-
+  - ICF
+  - MTH_PT
+-
+  - ICF-CY
+  - HT
+-
+  - ICF-CY
+  - PT
+-
+  - ICF-CY
+  - MTH_HT
+-
+  - ICF-CY
+  - MTH_PT
+-
+  - ICPC2ICD10ENG
+  - PT
+-
+  - ICPC
+  - PX
+-
+  - ICPC
+  - PT
+-
+  - ICPC
+  - PS
+-
+  - ICPC
+  - PC
+-
+  - ICPC
+  - CX
+-
+  - ICPC
+  - CP
+-
+  - ICPC
+  - CS
+-
+  - ICPC
+  - CC
+-
+  - ICPC2EENG
+  - CO
+-
+  - ICPC
+  - CO
+-
+  - MTHICPC2EAE
+  - AB
+-
+  - ICPC2EENG
+  - AB
+-
+  - ICPC2P
+  - PTN
+-
+  - ICPC2P
+  - MTH_PTN
+-
+  - ICPC2P
+  - PT
+-
+  - ICPC2P
+  - MTH_PT
+-
+  - AOT
+  - PT
+-
+  - AOT
+  - ET
+-
+  - GO
+  - PT
+-
+  - GO
+  - MTH_PT
+-
+  - GO
+  - ET
+-
+  - GO
+  - MTH_ET
+-
+  - GO
+  - SY
+-
+  - GO
+  - MTH_SY
+-
+  - PDQ
+  - ET
+-
+  - PDQ
+  - CU
+-
+  - PDQ
+  - LV
+-
+  - PDQ
+  - ACR
+-
+  - PDQ
+  - AB
+-
+  - PDQ
+  - BN
+-
+  - PDQ
+  - FBD
+-
+  - PDQ
+  - CCN
+-
+  - PDQ
+  - CHN
+-
+  - NCBI
+  - USN
+-
+  - NCBI
+  - USY
+-
+  - NCBI
+  - SY
+-
+  - NCBI
+  - UCN
+-
+  - NCBI
+  - CMN
+-
+  - NCBI
+  - UE
+-
+  - NCBI
+  - EQ
+-
+  - LNC
+  - LN
+-
+  - LNC
+  - MTH_LN
+-
+  - LNC
+  - OSN
+-
+  - LNC
+  - DN
+-
+  - LNC
+  - CN
+-
+  - LNC
+  - MTH_CN
+-
+  - LNC
+  - LPDN
+-
+  - LNC
+  - LPN
+-
+  - LNC
+  - HC
+-
+  - LNC
+  - HS
+-
+  - LNC
+  - OLC
+-
+  - LNC
+  - LC
+-
+  - LNC
+  - LS
+-
+  - LNC
+  - LG
+-
+  - LNC
+  - LA
+-
+  - ICD10CM
+  - PT
+-
+  - ICD9CM
+  - PT
+-
+  - ICD10CM
+  - HT
+-
+  - ICD9CM
+  - HT
+-
+  - CCSR_ICD10PCS
+  - HT
+-
+  - CCSR_ICD10CM
+  - SD
+-
+  - CCSR_ICD10PCS
+  - SP
+-
+  - CCSR_ICD10CM
+  - XM
+-
+  - CCSR_ICD10PCS
+  - XM
+-
+  - CCS
+  - HT
+-
+  - CCS
+  - MD
+-
+  - CCS
+  - SD
+-
+  - CCS
+  - MV
+-
+  - CCS
+  - SP
+-
+  - CCS
+  - XM
+-
+  - ICPC2ICD10ENG
+  - XM
+-
+  - ICD10AE
+  - HT
+-
+  - ICD10PCS
+  - PT
+-
+  - ICD10PCS
+  - PX
+-
+  - ICD10PCS
+  - HX
+-
+  - ICD10PCS
+  - MTH_HX
+-
+  - ICD10PCS
+  - HT
+-
+  - ICD10PCS
+  - HS
+-
+  - ICD10PCS
+  - AB
+-
+  - ICD10
+  - HT
+-
+  - ICD10AE
+  - HX
+-
+  - ICD10
+  - HX
+-
+  - ICD10AE
+  - HS
+-
+  - ICD10
+  - HS
+-
+  - ICD10AMAE
+  - HT
+-
+  - ICD10AM
+  - HT
+-
+  - UMD
+  - HT
+-
+  - ICPC
+  - HT
+-
+  - ORPHANET
+  - PT
+-
+  - ORPHANET
+  - SY
+-
+  - NUCCHCPT
+  - PT
+-
+  - HL7V3.0
+  - CSY
+-
+  - CDCREC
+  - PT
+-
+  - HL7V3.0
+  - PT
+-
+  - HL7V2.5
+  - PT
+-
+  - HL7V3.0
+  - CDO
+-
+  - HL7V3.0
+  - VS
+-
+  - HL7V3.0
+  - BR
+-
+  - HL7V3.0
+  - CPR
+-
+  - HL7V3.0
+  - CR
+-
+  - HL7V3.0
+  - NPT
+-
+  - HL7V2.5
+  - HTN
+-
+  - CPT
+  - HT
+-
+  - CDT
+  - HT
+-
+  - HCPCS
+  - MTH_HT
+-
+  - CCC
+  - PT
+-
+  - CCC
+  - HT
+-
+  - NIC
+  - IV
+-
+  - NIC
+  - HC
+-
+  - NANDA-I
+  - PT
+-
+  - NANDA-I
+  - HT
+-
+  - NANDA-I
+  - HC
+-
+  - NANDA-I
+  - RT
+-
+  - OMS
+  - MTH_SI
+-
+  - OMS
+  - PR
+-
+  - OMS
+  - TG
+-
+  - OMS
+  - HT
+-
+  - OMS
+  - PQ
+-
+  - OMS
+  - IVC
+-
+  - OMS
+  - SI
+-
+  - OMS
+  - SCALE
+-
+  - NIC
+  - AC
+-
+  - NOC
+  - OC
+-
+  - NOC
+  - ID
+-
+  - NIC
+  - HT
+-
+  - NOC
+  - HT
+-
+  - NOC
+  - HC
+-
+  - CCC
+  - MTH_HT
+-
+  - CCC
+  - MP
+-
+  - ALT
+  - PT
+-
+  - ALT
+  - HT
+-
+  - MTH
+  - CV
+-
+  - MTH
+  - XM
+-
+  - MTH
+  - PT
+-
+  - MTH
+  - SY
+-
+  - MTH
+  - RT
+-
+  - ICD10CM
+  - ET
+-
+  - MTHICD9
+  - ET
+-
+  - ICD10CM
+  - AB
+-
+  - ICD9CM
+  - AB
+-
+  - PSY
+  - PT
+-
+  - PSY
+  - HT
+-
+  - PSY
+  - ET
+-
+  - MEDLINEPLUS
+  - ET
+-
+  - MEDLINEPLUS
+  - SY
+-
+  - MEDLINEPLUS
+  - HT
+-
+  - LCH_NW
+  - PT
+-
+  - LCH
+  - PT
+-
+  - MSH
+  - HT
+-
+  - MSH
+  - HS
+-
+  - MSH
+  - DEV
+-
+  - MSH
+  - DSV
+-
+  - MSH
+  - QAB
+-
+  - MSH
+  - QEV
+-
+  - MSH
+  - QSV
+-
+  - MSH
+  - PM
+-
+  - LCH_NW
+  - XM
+-
+  - CPT
+  - AB
+-
+  - HCPT
+  - AB
+-
+  - HCPCS
+  - AB
+-
+  - WHO
+  - PT
+-
+  - WHO
+  - HT
+-
+  - WHO
+  - IT
+-
+  - SNMI
+  - PT
+-
+  - SNMI
+  - PX
+-
+  - SNMI
+  - HT
+-
+  - SNMI
+  - HX
+-
+  - SNMI
+  - RT
+-
+  - SNMI
+  - SY
+-
+  - SNMI
+  - SX
+-
+  - SNMI
+  - AD
+-
+  - SNM
+  - PT
+-
+  - SNM
+  - RT
+-
+  - SNM
+  - HT
+-
+  - SNM
+  - SY
+-
+  - SNM
+  - RS
+-
+  - RCD
+  - PT
+-
+  - RCD
+  - SY
+-
+  - RCD
+  - AT
+-
+  - RCD
+  - AS
+-
+  - RCD
+  - AB
+-
+  - RCDSA
+  - PT
+-
+  - RCDSY
+  - PT
+-
+  - RCDAE
+  - PT
+-
+  - RCDSA
+  - SY
+-
+  - RCDSY
+  - SY
+-
+  - RCDAE
+  - SY
+-
+  - RCDAE
+  - AT
+-
+  - RCDSA
+  - AB
+-
+  - RCDSY
+  - AB
+-
+  - RCDAE
+  - AB
+-
+  - RCDAE
+  - AA
+-
+  - RCD
+  - AA
+-
+  - CSP
+  - PT
+-
+  - CSP
+  - SY
+-
+  - CSP
+  - ET
+-
+  - CSP
+  - AB
+-
+  - MTH
+  - DT
+-
+  - HCPT
+  - AM
+-
+  - HCPCS
+  - AM
+-
+  - HCDT
+  - AB
+-
+  - ALT
+  - AB
+-
+  - CHV
+  - SY
+-
+  - RXNORM
+  - ET
+-
+  - SNOMEDCT_VET
+  - OAP
+-
+  - SNOMEDCT_VET
+  - OP
+-
+  - SNOMEDCT_US
+  - OAP
+-
+  - SNOMEDCT_US
+  - OP
+-
+  - SNOMEDCT_VET
+  - OAF
+-
+  - SNOMEDCT_VET
+  - OF
+-
+  - SNOMEDCT_US
+  - OAF
+-
+  - SNOMEDCT_US
+  - OF
+-
+  - SNOMEDCT_VET
+  - OAS
+-
+  - SNOMEDCT_VET
+  - IS
+-
+  - SNOMEDCT_US
+  - OAS
+-
+  - SNOMEDCT_US
+  - IS
+-
+  - SNOMEDCT_US
+  - MTH_OAP
+-
+  - SNOMEDCT_US
+  - MTH_OP
+-
+  - SNOMEDCT_US
+  - MTH_OAF
+-
+  - SNOMEDCT_US
+  - MTH_OF
+-
+  - SNOMEDCT_US
+  - MTH_OAS
+-
+  - SNOMEDCT_US
+  - MTH_IS
+-
+  - HPO
+  - OP
+-
+  - HPO
+  - IS
+-
+  - NCI
+  - OP
+-
+  - LNC
+  - LO
+-
+  - LNC
+  - MTH_LO
+-
+  - LNC
+  - OOSN
+-
+  - LNC
+  - OLG
+-
+  - HPO
+  - OET
+-
+  - NEU
+  - OP
+-
+  - NEU
+  - IS
+-
+  - NEU
+  - ACR
+-
+  - MDR
+  - MTH_OS
+-
+  - CDT
+  - OP
+-
+  - ICPC2P
+  - OPN
+-
+  - ICPC2P
+  - MTH_OPN
+-
+  - ICPC2P
+  - OP
+-
+  - ICPC2P
+  - MTH_OP
+-
+  - HCPCS
+  - OP
+-
+  - HCDT
+  - OP
+-
+  - HCPT
+  - OP
+-
+  - HCPCS
+  - OM
+-
+  - HCPCS
+  - OAM
+-
+  - GO
+  - OP
+-
+  - GO
+  - MTH_OP
+-
+  - GO
+  - OET
+-
+  - GO
+  - MTH_OET
+-
+  - GO
+  - IS
+-
+  - GO
+  - MTH_IS
+-
+  - PDQ
+  - OP
+-
+  - PDQ
+  - IS
+-
+  - MDR
+  - OL
+-
+  - MDR
+  - MTH_OL
+-
+  - NUCCHCPT
+  - OP
+-
+  - HL7V3.0
+  - OP
+-
+  - HL7V3.0
+  - ONP
+-
+  - WHO
+  - OS
+-
+  - RCD
+  - OP
+-
+  - RCD
+  - IS
+-
+  - RCDSA
+  - OP
+-
+  - RCDSY
+  - OP
+-
+  - RCDAE
+  - OP
+-
+  - RCDSA
+  - IS
+-
+  - RCDSY
+  - IS
+-
+  - RCDAE
+  - IS
+-
+  - RCDSA
+  - OA
+-
+  - RCDSY
+  - OA
+-
+  - RCDAE
+  - OA
+-
+  - RCD
+  - OA
+-
+  - HCPT
+  - OA
+-
+  - HCPCS
+  - OA
+-
+  - HCDT
+  - OA
+-
+  - FMA
+  - OP
+-
+  - FMA
+  - IS
+-
+  - DSM-5
+  - DC10
+-
+  - DSM-5
+  - DC9
+-
+  - DXP
+  - DI
+-
+  - DXP
+  - FI
+-
+  - DXP
+  - SY
+-
+  - RAM
+  - PT
+-
+  - RAM
+  - RT
+-
+  - ULT
+  - PT
+-
+  - BI
+  - PT
+-
+  - BI
+  - AB
+-
+  - BI
+  - SY
+-
+  - BI
+  - RT
+-
+  - PCDS
+  - GO
+-
+  - PCDS
+  - OR
+-
+  - PCDS
+  - PR
+-
+  - PCDS
+  - CO
+-
+  - PCDS
+  - HX
+-
+  - PCDS
+  - HT
+-
+  - MTHMST
+  - PT
+-
+  - MTHMST
+  - SY
+-
+  - DDB
+  - PT
+-
+  - DDB
+  - SY
+-
+  - CST
+  - PT
+-
+  - COSTAR
+  - PT
+-
+  - CST
+  - SC
+-
+  - CST
+  - HT
+-
+  - CST
+  - GT
+-
+  - CCPSS
+  - TX
+-
+  - CCPSS
+  - TC
+-
+  - CCPSS
+  - PT
+-
+  - CCPSS
+  - MP
+-
+  - AOD
+  - DE
+-
+  - AOD
+  - DS
+-
+  - AOD
+  - XD
+-
+  - AOD
+  - FN
+-
+  - AOD
+  - ET
+-
+  - AOD
+  - ES
+-
+  - AOD
+  - EX
+-
+  - AOD
+  - NP
+-
+  - AOD
+  - NS
+-
+  - AOD
+  - NX
+-
+  - QMR
+  - PT
+-
+  - JABL
+  - PC
+-
+  - JABL
+  - PT
+-
+  - JABL
+  - SS
+-
+  - JABL
+  - SY
+-
+  - AIR
+  - FI
+-
+  - AIR
+  - DI
+-
+  - AIR
+  - SY
+-
+  - AIR
+  - HT
+-
+  - PPAC
+  - DO
+-
+  - PPAC
+  - CL
+-
+  - PPAC
+  - AC
+-
+  - PPAC
+  - ST
+-
+  - PPAC
+  - TA
+-
+  - MCM
+  - PT
+-
+  - MCM
+  - RT
+-
+  - SCTSPA
+  - PT
+-
+  - SCTSPA
+  - FN
+-
+  - SCTSPA
+  - SY
+-
+  - SCTSPA
+  - MTH_PT
+-
+  - SCTSPA
+  - MTH_FN
+-
+  - SCTSPA
+  - MTH_SY
+-
+  - SCTSPA
+  - SB
+-
+  - SCTSPA
+  - OP
+-
+  - SCTSPA
+  - OAF
+-
+  - SCTSPA
+  - OAP
+-
+  - SCTSPA
+  - OAS
+-
+  - SCTSPA
+  - OF
+-
+  - SCTSPA
+  - IS
+-
+  - SCTSPA
+  - MTH_OP
+-
+  - SCTSPA
+  - MTH_OAF
+-
+  - SCTSPA
+  - MTH_OAP
+-
+  - SCTSPA
+  - MTH_OAS
+-
+  - SCTSPA
+  - MTH_OF
+-
+  - SCTSPA
+  - MTH_IS
+-
+  - MSHPOR
+  - MH
+-
+  - MSHPOR
+  - PEP
+-
+  - MSHPOR
+  - ET
+-
+  - MSHSPA
+  - MH
+-
+  - MSHSPA
+  - PEP
+-
+  - MSHSPA
+  - ET
+-
+  - MSHCZE
+  - MH
+-
+  - MSHCZE
+  - PEP
+-
+  - MSHCZE
+  - ET
+-
+  - MSHCZE
+  - TQ
+-
+  - MSHCZE
+  - XQ
+-
+  - MSHCZE
+  - PXQ
+-
+  - MSHDUT
+  - MH
+-
+  - MSHSWE
+  - MH
+-
+  - MSHSWE
+  - ET
+-
+  - MSHSWE
+  - TQ
+-
+  - MSHNOR
+  - MH
+-
+  - MSHGER
+  - MH
+-
+  - MSHNOR
+  - PEP
+-
+  - MSHGER
+  - PEP
+-
+  - MSHNOR
+  - DSV
+-
+  - MSHGER
+  - DSV
+-
+  - MSHNOR
+  - ET
+-
+  - MSHGER
+  - ET
+-
+  - MSHFIN
+  - MH
+-
+  - MSHLAV
+  - MH
+-
+  - MSHSCR
+  - MH
+-
+  - MSHFRE
+  - MH
+-
+  - MSHLAV
+  - PEP
+-
+  - MSHSCR
+  - PEP
+-
+  - MSHFRE
+  - PEP
+-
+  - MSHLAV
+  - EP
+-
+  - MSHSCR
+  - ET
+-
+  - MSHFRE
+  - ET
+-
+  - MSHITA
+  - MH
+-
+  - MSHITA
+  - PEP
+-
+  - MSHITA
+  - ET
+-
+  - MSHJPN
+  - PT
+-
+  - MSHPOL
+  - MH
+-
+  - MSHRUS
+  - MH
+-
+  - MSHJPN
+  - SY
+-
+  - KCD5
+  - HT
+-
+  - TKMT
+  - PT
+-
+  - KCD5
+  - PT
+-
+  - MSHPOL
+  - SY
+-
+  - MSHRUS
+  - SY
+-
+  - MSHDUT
+  - SY
+-
+  - MDRSPA
+  - PT
+-
+  - MDRSPA
+  - HG
+-
+  - MDRSPA
+  - HT
+-
+  - MDRSPA
+  - LLT
+-
+  - MDRSPA
+  - OS
+-
+  - MDRSPA
+  - SMQ
+-
+  - MDRSPA
+  - OL
+-
+  - MDRSPA
+  - AB
+-
+  - MDRDUT
+  - PT
+-
+  - MDRDUT
+  - HG
+-
+  - MDRDUT
+  - HT
+-
+  - MDRDUT
+  - LLT
+-
+  - MDRDUT
+  - OS
+-
+  - MDRDUT
+  - SMQ
+-
+  - MDRDUT
+  - OL
+-
+  - MDRDUT
+  - AB
+-
+  - MDRFRE
+  - PT
+-
+  - MDRFRE
+  - HG
+-
+  - MDRFRE
+  - HT
+-
+  - MDRFRE
+  - LLT
+-
+  - MDRFRE
+  - SMQ
+-
+  - MDRFRE
+  - OS
+-
+  - MDRFRE
+  - OL
+-
+  - MDRFRE
+  - AB
+-
+  - MDRGER
+  - PT
+-
+  - MDRGER
+  - HG
+-
+  - MDRGER
+  - HT
+-
+  - MDRGER
+  - LLT
+-
+  - MDRGER
+  - SMQ
+-
+  - MDRGER
+  - OS
+-
+  - MDRGER
+  - OL
+-
+  - MDRGER
+  - AB
+-
+  - MDRITA
+  - PT
+-
+  - MDRITA
+  - HG
+-
+  - MDRITA
+  - HT
+-
+  - MDRITA
+  - LLT
+-
+  - MDRITA
+  - SMQ
+-
+  - MDRITA
+  - OS
+-
+  - MDRITA
+  - OL
+-
+  - MDRITA
+  - AB
+-
+  - MDRJPN
+  - PT
+-
+  - MDRJPN
+  - PTJKN
+-
+  - MDRJPN
+  - PTJKN1
+-
+  - MDRJPN
+  - HG
+-
+  - MDRJPN
+  - HGJKN
+-
+  - MDRJPN
+  - HGJKN1
+-
+  - MDRJPN
+  - HT
+-
+  - MDRJPN
+  - HTJKN
+-
+  - MDRJPN
+  - HTJKN1
+-
+  - MDRJPN
+  - LLT
+-
+  - MDRJPN
+  - LLTJKN
+-
+  - MDRJPN
+  - LLTJKN1
+-
+  - MDRJPN
+  - OS
+-
+  - MDRJPN
+  - SMQ
+-
+  - MDRJPN
+  - OL
+-
+  - MDRJPN
+  - OLJKN
+-
+  - MDRJPN
+  - OLJKN1
+-
+  - MDRCZE
+  - PT
+-
+  - MDRKOR
+  - PT
+-
+  - MDRHUN
+  - PT
+-
+  - MDRBPO
+  - PT
+-
+  - MDRPOR
+  - PT
+-
+  - MDRLAV
+  - PT
+-
+  - MDRSWE
+  - PT
+-
+  - MDRARA
+  - PT
+-
+  - MDRRUS
+  - PT
+-
+  - MDRPOL
+  - PT
+-
+  - MDRGRE
+  - PT
+-
+  - MDRCZE
+  - HG
+-
+  - MDRKOR
+  - HG
+-
+  - MDRHUN
+  - HG
+-
+  - MDRBPO
+  - HG
+-
+  - MDRPOR
+  - HG
+-
+  - MDRLAV
+  - HG
+-
+  - MDRSWE
+  - HG
+-
+  - MDRARA
+  - HG
+-
+  - MDRRUS
+  - HG
+-
+  - MDRPOL
+  - HG
+-
+  - MDRGRE
+  - HG
+-
+  - MDRCZE
+  - HT
+-
+  - MDRKOR
+  - HT
+-
+  - MDRHUN
+  - HT
+-
+  - MDRBPO
+  - HT
+-
+  - MDRPOR
+  - HT
+-
+  - MDRLAV
+  - HT
+-
+  - MDRSWE
+  - HT
+-
+  - MDRARA
+  - HT
+-
+  - MDRRUS
+  - HT
+-
+  - MDRPOL
+  - HT
+-
+  - MDRGRE
+  - HT
+-
+  - MDRCZE
+  - LLT
+-
+  - MDRKOR
+  - LLT
+-
+  - MDRHUN
+  - LLT
+-
+  - MDRBPO
+  - LLT
+-
+  - MDRPOR
+  - LLT
+-
+  - MDRLAV
+  - LLT
+-
+  - MDRSWE
+  - LLT
+-
+  - MDRARA
+  - LLT
+-
+  - MDRRUS
+  - LLT
+-
+  - MDRPOL
+  - LLT
+-
+  - MDRGRE
+  - LLT
+-
+  - MDRCZE
+  - OS
+-
+  - MDRKOR
+  - OS
+-
+  - MDRHUN
+  - OS
+-
+  - MDRBPO
+  - OS
+-
+  - MDRPOR
+  - OS
+-
+  - MDRLAV
+  - OS
+-
+  - MDRSWE
+  - OS
+-
+  - MDRARA
+  - OS
+-
+  - MDRRUS
+  - OS
+-
+  - MDRPOL
+  - OS
+-
+  - MDRGRE
+  - OS
+-
+  - MDRCZE
+  - SMQ
+-
+  - MDRKOR
+  - SMQ
+-
+  - MDRHUN
+  - SMQ
+-
+  - MDRLAV
+  - SMQ
+-
+  - MDRSWE
+  - SMQ
+-
+  - MDRARA
+  - SMQ
+-
+  - MDRRUS
+  - SMQ
+-
+  - MDRPOL
+  - SMQ
+-
+  - MDRGRE
+  - SMQ
+-
+  - MDRBPO
+  - SMQ
+-
+  - MDRPOR
+  - SMQ
+-
+  - MDRCZE
+  - OL
+-
+  - MDRKOR
+  - OL
+-
+  - MDRHUN
+  - OL
+-
+  - MDRLAV
+  - OL
+-
+  - MDRSWE
+  - OL
+-
+  - MDRARA
+  - OL
+-
+  - MDRRUS
+  - OL
+-
+  - MDRPOL
+  - OL
+-
+  - MDRGRE
+  - OL
+-
+  - MDRBPO
+  - OL
+-
+  - MDRPOR
+  - OL
+-
+  - MDRCZE
+  - AB
+-
+  - MDRKOR
+  - AB
+-
+  - MDRHUN
+  - AB
+-
+  - MDRLAV
+  - AB
+-
+  - MDRSWE
+  - AB
+-
+  - MDRARA
+  - AB
+-
+  - MDRRUS
+  - AB
+-
+  - MDRPOL
+  - AB
+-
+  - MDRGRE
+  - AB
+-
+  - MDRBPO
+  - AB
+-
+  - MDRPOR
+  - AB
+-
+  - MDRJPN
+  - OSJKN
+-
+  - MDRJPN
+  - OSJKN1
+-
+  - WHOFRE
+  - HT
+-
+  - WHOGER
+  - HT
+-
+  - WHOPOR
+  - HT
+-
+  - WHOSPA
+  - HT
+-
+  - LNC-DE-DE
+  - LN
+-
+  - LNC-DE-DE
+  - LC
+-
+  - LNC-DE-DE
+  - OLC
+-
+  - LNC-DE-DE
+  - LO
+-
+  - LNC-EL-GR
+  - LN
+-
+  - LNC-EL-GR
+  - LO
+-
+  - LNC-ES-AR
+  - LN
+-
+  - LNC-ES-AR
+  - OSN
+-
+  - LNC-ES-AR
+  - LO
+-
+  - LNC-ES-AR
+  - OOSN
+-
+  - LNC-ES-MX
+  - LN
+-
+  - LNC-ES-MX
+  - LO
+-
+  - LNC-ES-MX
+  - LC
+-
+  - LNC-ES-MX
+  - OLC
+-
+  - LNC-ES-ES
+  - LN
+-
+  - LNC-ES-ES
+  - LO
+-
+  - LNC-ET-EE
+  - LN
+-
+  - LNC-ET-EE
+  - LO
+-
+  - LNC-FR-BE
+  - LN
+-
+  - LNC-FR-BE
+  - LO
+-
+  - LNC-FR-CA
+  - LN
+-
+  - LNC-FR-CA
+  - LO
+-
+  - LNC-FR-FR
+  - LN
+-
+  - LNC-FR-FR
+  - LC
+-
+  - LNC-FR-FR
+  - OLC
+-
+  - LNC-FR-FR
+  - LO
+-
+  - LNC-IT-IT
+  - LN
+-
+  - LNC-IT-IT
+  - LO
+-
+  - LNC-KO-KR
+  - LN
+-
+  - LNC-KO-KR
+  - LO
+-
+  - LNC-PL-PL
+  - LN
+-
+  - LNC-PL-PL
+  - LO
+-
+  - LNC-NL-NL
+  - LN
+-
+  - LNC-NL-NL
+  - LO
+-
+  - LNC-PT-BR
+  - LN
+-
+  - LNC-PT-BR
+  - OSN
+-
+  - LNC-PT-BR
+  - LO
+-
+  - LNC-PT-BR
+  - OOSN
+-
+  - LNC-RU-RU
+  - LN
+-
+  - LNC-RU-RU
+  - LO
+-
+  - LNC-TR-TR
+  - LN
+-
+  - LNC-TR-TR
+  - LO
+-
+  - LNC-UK-UA
+  - LN
+-
+  - LNC-UK-UA
+  - LC
+-
+  - LNC-UK-UA
+  - OSN
+-
+  - LNC-UK-UA
+  - LVDN
+-
+  - LNC-ZH-CN
+  - LN
+-
+  - LNC-ZH-CN
+  - LO
+-
+  - LNC-DE-AT
+  - LN
+-
+  - LNC-DE-AT
+  - LO
+-
+  - LNC-DE-AT
+  - LVDN
+-
+  - MEDLINEPLUS_SPA
+  - PT
+-
+  - MEDLINEPLUS_SPA
+  - HT
+-
+  - WHOFRE
+  - PT
+-
+  - WHOGER
+  - PT
+-
+  - WHOPOR
+  - PT
+-
+  - WHOSPA
+  - PT
+-
+  - WHOFRE
+  - IT
+-
+  - WHOGER
+  - IT
+-
+  - WHOPOR
+  - IT
+-
+  - WHOSPA
+  - IT
+-
+  - WHOFRE
+  - OS
+-
+  - WHOGER
+  - OS
+-
+  - WHOPOR
+  - OS
+-
+  - WHOSPA
+  - OS
+-
+  - CPTSP
+  - PT
+-
+  - DMDUMD
+  - PT
+-
+  - DMDUMD
+  - ET
+-
+  - DMDUMD
+  - RT
+-
+  - DMDICD10
+  - PT
+-
+  - DMDICD10
+  - HT
+-
+  - ICPCBAQ
+  - PT
+-
+  - ICPCDAN
+  - PT
+-
+  - ICPC2EDUT
+  - PT
+-
+  - ICD10DUT
+  - PT
+-
+  - ICD10DUT
+  - HT
+-
+  - ICPC2ICD10DUT
+  - PT
+-
+  - ICPCDUT
+  - PT
+-
+  - ICPCFIN
+  - PT
+-
+  - ICPCFRE
+  - PT
+-
+  - ICPCGER
+  - PT
+-
+  - ICPCHEB
+  - PT
+-
+  - ICPCHUN
+  - PT
+-
+  - ICPCITA
+  - PT
+-
+  - ICPCNOR
+  - PT
+-
+  - ICPCPOR
+  - PT
+-
+  - ICPCSPA
+  - PT
+-
+  - ICPCSWE
+  - PT
+-
+  - ICPCBAQ
+  - CP
+-
+  - ICPCDAN
+  - CP
+-
+  - ICPCDUT
+  - CP
+-
+  - ICPCFIN
+  - CP
+-
+  - ICPCFRE
+  - CP
+-
+  - ICPCGER
+  - CP
+-
+  - ICPCHEB
+  - CP
+-
+  - ICPCHUN
+  - CP
+-
+  - ICPCITA
+  - CP
+-
+  - ICPCNOR
+  - CP
+-
+  - ICPCPOR
+  - CP
+-
+  - ICPCSPA
+  - CP
+-
+  - ICPCSWE
+  - CP
+-
+  - MTHMSTFRE
+  - PT
+-
+  - MTHMSTITA
+  - PT
+-
+  - SRC
+  - RPT
+-
+  - SRC
+  - RHT
+-
+  - SRC
+  - RAB
+-
+  - SRC
+  - RSY
+-
+  - SRC
+  - VPT
+-
+  - SRC
+  - VAB
+-
+  - SRC
+  - VSY
+-
+  - SRC
+  - SSN
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 2a36c339..7636bec4 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -18,8 +18,6 @@
 import kg2_util
 import json
 
-
-
 CUIS_KEY = 'cuis'
 INFO_KEY = 'attributes'
 NAMES_KEY = 'names'
@@ -55,30 +53,32 @@
 UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
 
 # Mined from HTML Page Source of https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-ACCESSION_HEIRARCHY = [('MTH', 'PN'), ('RXNORM', 'SCD'), ('RXNORM', 'SBD'), ('RXNORM', 'SCDG'), ('RXNORM', 'SBDG'), ('RXNORM', 'BPCK'), ('RXNORM', 'GPCK'),
-                       ('RXNORM', 'IN'), ('RXNORM', 'PSN'), ('RXNORM', 'MIN'), ('RXNORM', 'SCDF'), ('RXNORM', 'SBDF'), ('RXNORM', 'SCDC'), ('RXNORM', 'DFG'),
-                       ('RXNORM', 'DF'), ('RXNORM', 'SBDC'), ('RXNORM', 'BN'), ('RXNORM', 'PIN'), ('RXNORM', 'TMSY'), ('RXNORM', 'SY'), ('MSH', 'MH'),
-                       ('MSH', 'TQ'), ('MSH', 'PEP'), ('MSH', 'ET'), ('MSH', 'XQ'), ('MSH', 'PXQ'), ('MSH', 'NM'), ('HPO', 'PT'), ('HPO', 'SY'), ('HPO', 'ET'),
-                       ('NCBI', 'SCN'), ('ATC', 'RXN_PT'), ('ATC', 'PT'), ('VANDF', 'PT'), ('VANDF', 'CD'), ('VANDF', 'IN'), ('DRUGBANK', 'IN'),
-                       ('DRUGBANK', 'SY'), ('DRUGBANK', 'FSY'), ('MSH', 'N1'), ('MSH', 'PCE'), ('MSH', 'CE'), ('FMA', 'PT'), ('FMA', 'SY'), ('FMA', 'AB'),
-                       ('ATC', 'RXN_IN'), ('ATC', 'IN'), ('VANDF', 'AB'), ('VANDF', 'MTH_RXN_CD'), ('NDDF', 'MTH_RXN_CDC'), ('NDDF', 'CDC'), ('NDDF', 'CDD'),
-                       ('NDDF', 'CDA'), ('NDDF', 'IN'), ('NDDF', 'DF'), ('MED-RT', 'PT'), ('MED-RT', 'FN'), ('MED-RT', 'SY'), ('HCPCS', 'PT'), ('HCPCS', 'MP'),
-                       ('OMIM', 'PT'), ('OMIM', 'PHENO'), ('OMIM', 'PHENO_ET'), ('OMIM', 'PTAV'), ('OMIM', 'PTCS'), ('OMIM', 'ETAL'), ('OMIM', 'ET'),
-                       ('OMIM', 'HT'), ('OMIM', 'ACR'), ('HGNC', 'PT'), ('HGNC', 'ACR'), ('HGNC', 'MTH_ACR'), ('HGNC', 'NA'), ('HGNC', 'SYN'), ('HGNC', 'NP'),
-                       ('HGNC', 'NS'), ('NCI', 'PT'), ('NCI', 'SY'), ('NCI', 'CSN'), ('NCI', 'DN'), ('NCI', 'FBD'), ('NCI', 'HD'), ('NCI', 'CCN'),
-                       ('NCI', 'AD'), ('NCI', 'CA2'), ('NCI', 'CA3'), ('NCI', 'BN'), ('NCI', 'AB'), ('NCI', 'CCS'), ('PDQ', 'PT'), ('PDQ', 'HT'),
-                       ('PDQ', 'PSC'), ('PDQ', 'SY'), ('CHV', 'PT'), ('MEDLINEPLUS', 'PT'), ('GO', 'PT'), ('GO', 'MTH_PT'), ('GO', 'ET'), ('GO', 'MTH_ET'),
-                       ('GO', 'SY'), ('GO', 'MTH_SY'), ('PDQ', 'ET'), ('PDQ', 'CU'), ('PDQ', 'LV'), ('PDQ', 'ACR'), ('PDQ', 'AB'), ('PDQ', 'BN'), ('PDQ', 'FBD'),
-                       ('PDQ', 'CCN'), ('PDQ', 'CHN'), ('NCBI', 'USN'), ('NCBI', 'USY'), ('NCBI', 'SY'), ('NCBI', 'UCN'), ('NCBI', 'CMN'), ('NCBI', 'UE'),
-                       ('NCBI', 'EQ'), ('ICD9CM', 'PT'), ('ICD9CM', 'HT'), ('ICD10PCS', 'PT'), ('ICD10PCS', 'PX'), ('ICD10PCS', 'HX'), ('ICD10PCS', 'MTH_HX'),
-                       ('ICD10PCS', 'HT'), ('ICD10PCS', 'HS'), ('ICD10PCS', 'AB'), ('HL7V3.0', 'CSY'), ('HL7V3.0', 'PT'), ('HL7V3.0', 'CDO'), ('HL7V3.0', 'VS'),
-                       ('HL7V3.0', 'BR'), ('HL7V3.0', 'CPR'), ('HL7V3.0', 'CR'), ('HL7V3.0', 'NPT'), ('HCPCS', 'MTH_HT'), ('MTH', 'CV'), ('MTH', 'XM'),
-                       ('MTH', 'PT'), ('MTH', 'SY'), ('MTH', 'RT'), ('ICD9CM', 'AB'), ('PSY', 'PT'), ('PSY', 'HT'), ('PSY', 'ET'), ('MEDLINEPLUS', 'ET'),
-                       ('MEDLINEPLUS', 'SY'), ('MEDLINEPLUS', 'HT'), ('MSH', 'HT'), ('MSH', 'HS'), ('MSH', 'DEV'), ('MSH', 'DSV'), ('MSH', 'QAB'),
-                       ('MSH', 'QEV'), ('MSH', 'QSV'), ('MSH', 'PM'), ('HCPCS', 'AB'), ('MTH', 'DT'), ('HCPCS', 'AM'), ('CHV', 'SY'), ('RXNORM', 'ET'),
-                       ('HPO', 'OP'), ('HPO', 'IS'), ('NCI', 'OP'), ('HPO', 'OET'), ('HCPCS', 'OP'), ('HCPCS', 'OM'), ('HCPCS', 'OAM'), ('GO', 'OP'),
-                       ('GO', 'MTH_OP'), ('GO', 'OET'), ('GO', 'MTH_OET'), ('GO', 'IS'), ('GO', 'MTH_IS'), ('PDQ', 'OP'), ('PDQ', 'IS'), ('HL7V3.0', 'OP'),
-                       ('HL7V3.0', 'ONP'), ('HCPCS', 'OA'), ('FMA', 'OP'), ('FMA', 'IS')]
+ACCESSION_HEIRARCHY = list()
+ACCESSION_SOURCES_HEIRARCHY = dict()
+                       # [('MTH', 'PN'), ('RXNORM', 'SCD'), ('RXNORM', 'SBD'), ('RXNORM', 'SCDG'), ('RXNORM', 'SBDG'), ('RXNORM', 'BPCK'), ('RXNORM', 'GPCK'),
+                       # ('RXNORM', 'IN'), ('RXNORM', 'PSN'), ('RXNORM', 'MIN'), ('RXNORM', 'SCDF'), ('RXNORM', 'SBDF'), ('RXNORM', 'SCDC'), ('RXNORM', 'DFG'),
+                       # ('RXNORM', 'DF'), ('RXNORM', 'SBDC'), ('RXNORM', 'BN'), ('RXNORM', 'PIN'), ('RXNORM', 'TMSY'), ('RXNORM', 'SY'), ('MSH', 'MH'),
+                       # ('MSH', 'TQ'), ('MSH', 'PEP'), ('MSH', 'ET'), ('MSH', 'XQ'), ('MSH', 'PXQ'), ('MSH', 'NM'), ('HPO', 'PT'), ('HPO', 'SY'), ('HPO', 'ET'),
+                       # ('NCBI', 'SCN'), ('ATC', 'RXN_PT'), ('ATC', 'PT'), ('VANDF', 'PT'), ('VANDF', 'CD'), ('VANDF', 'IN'), ('DRUGBANK', 'IN'),
+                       # ('DRUGBANK', 'SY'), ('DRUGBANK', 'FSY'), ('MSH', 'N1'), ('MSH', 'PCE'), ('MSH', 'CE'), ('FMA', 'PT'), ('FMA', 'SY'), ('FMA', 'AB'),
+                       # ('ATC', 'RXN_IN'), ('ATC', 'IN'), ('VANDF', 'AB'), ('VANDF', 'MTH_RXN_CD'), ('NDDF', 'MTH_RXN_CDC'), ('NDDF', 'CDC'), ('NDDF', 'CDD'),
+                       # ('NDDF', 'CDA'), ('NDDF', 'IN'), ('NDDF', 'DF'), ('MED-RT', 'PT'), ('MED-RT', 'FN'), ('MED-RT', 'SY'), ('HCPCS', 'PT'), ('HCPCS', 'MP'),
+                       # ('OMIM', 'PT'), ('OMIM', 'PHENO'), ('OMIM', 'PHENO_ET'), ('OMIM', 'PTAV'), ('OMIM', 'PTCS'), ('OMIM', 'ETAL'), ('OMIM', 'ET'),
+                       # ('OMIM', 'HT'), ('OMIM', 'ACR'), ('HGNC', 'PT'), ('HGNC', 'ACR'), ('HGNC', 'MTH_ACR'), ('HGNC', 'NA'), ('HGNC', 'SYN'), ('HGNC', 'NP'),
+                       # ('HGNC', 'NS'), ('NCI', 'PT'), ('NCI', 'SY'), ('NCI', 'CSN'), ('NCI', 'DN'), ('NCI', 'FBD'), ('NCI', 'HD'), ('NCI', 'CCN'),
+                       # ('NCI', 'AD'), ('NCI', 'CA2'), ('NCI', 'CA3'), ('NCI', 'BN'), ('NCI', 'AB'), ('NCI', 'CCS'), ('PDQ', 'PT'), ('PDQ', 'HT'),
+                       # ('PDQ', 'PSC'), ('PDQ', 'SY'), ('CHV', 'PT'), ('MEDLINEPLUS', 'PT'), ('GO', 'PT'), ('GO', 'MTH_PT'), ('GO', 'ET'), ('GO', 'MTH_ET'),
+                       # ('GO', 'SY'), ('GO', 'MTH_SY'), ('PDQ', 'ET'), ('PDQ', 'CU'), ('PDQ', 'LV'), ('PDQ', 'ACR'), ('PDQ', 'AB'), ('PDQ', 'BN'), ('PDQ', 'FBD'),
+                       # ('PDQ', 'CCN'), ('PDQ', 'CHN'), ('NCBI', 'USN'), ('NCBI', 'USY'), ('NCBI', 'SY'), ('NCBI', 'UCN'), ('NCBI', 'CMN'), ('NCBI', 'UE'),
+                       # ('NCBI', 'EQ'), ('ICD9CM', 'PT'), ('ICD9CM', 'HT'), ('ICD10PCS', 'PT'), ('ICD10PCS', 'PX'), ('ICD10PCS', 'HX'), ('ICD10PCS', 'MTH_HX'),
+                       # ('ICD10PCS', 'HT'), ('ICD10PCS', 'HS'), ('ICD10PCS', 'AB'), ('HL7V3.0', 'CSY'), ('HL7V3.0', 'PT'), ('HL7V3.0', 'CDO'), ('HL7V3.0', 'VS'),
+                       # ('HL7V3.0', 'BR'), ('HL7V3.0', 'CPR'), ('HL7V3.0', 'CR'), ('HL7V3.0', 'NPT'), ('HCPCS', 'MTH_HT'), ('MTH', 'CV'), ('MTH', 'XM'),
+                       # ('MTH', 'PT'), ('MTH', 'SY'), ('MTH', 'RT'), ('ICD9CM', 'AB'), ('PSY', 'PT'), ('PSY', 'HT'), ('PSY', 'ET'), ('MEDLINEPLUS', 'ET'),
+                       # ('MEDLINEPLUS', 'SY'), ('MEDLINEPLUS', 'HT'), ('MSH', 'HT'), ('MSH', 'HS'), ('MSH', 'DEV'), ('MSH', 'DSV'), ('MSH', 'QAB'),
+                       # ('MSH', 'QEV'), ('MSH', 'QSV'), ('MSH', 'PM'), ('HCPCS', 'AB'), ('MTH', 'DT'), ('HCPCS', 'AM'), ('CHV', 'SY'), ('RXNORM', 'ET'),
+                       # ('HPO', 'OP'), ('HPO', 'IS'), ('NCI', 'OP'), ('HPO', 'OET'), ('HCPCS', 'OP'), ('HCPCS', 'OM'), ('HCPCS', 'OAM'), ('GO', 'OP'),
+                       # ('GO', 'MTH_OP'), ('GO', 'OET'), ('GO', 'MTH_OET'), ('GO', 'IS'), ('GO', 'MTH_IS'), ('PDQ', 'OP'), ('PDQ', 'IS'), ('HL7V3.0', 'OP'),
+                       # ('HL7V3.0', 'ONP'), ('HCPCS', 'OA'), ('FMA', 'OP'), ('FMA', 'IS')]
 
 def get_args():
     arg_parser = argparse.ArgumentParser(description='umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format')
@@ -140,8 +140,10 @@ def make_umls_node(node_curie, iri, name, category, update_date, provided_by, sy
     nodes_output.write(node)
 
 
-def get_basic_info(curie_prefix, node_id, info, accession_heirarchy):
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
+def get_basic_info(curie_prefix, node_id, info, umls_code):
+    # accession_heirarchy
+    # for (umls_code_compare, name_key) in ACCESSION_HEIRARCHY:
+
     cuis = info.get(CUIS_KEY, list())
     tuis = info.get(TUIS_KEY, list())
     if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
@@ -155,11 +157,11 @@ def get_basic_info(curie_prefix, node_id, info, accession_heirarchy):
     names = info.get(NAMES_KEY, dict())
     name, synonyms = get_name_synonyms(names, accession_heirarchy)
 
-    return node_curie, iri, name, provided_by, category, synonyms, cuis, tuis
+    return node_curie, iri, name, category, synonyms, cuis, tuis
 
 
-def process_atc_item(node_id, info, nodes_output, edges_output):
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(ATC_PREFIX, node_id, info, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
+def process_atc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
 
     # Currently not used, but extracting them in case we want them in the future
     atc_level = info.get(INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
@@ -168,8 +170,8 @@ def process_atc_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_chv_item(node_id, info, nodes_output, edges_output):
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(CHV_PREFIX, node_id, info, ['PT', 'SY'])
+def process_chv_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY'])
 
     # Currently not used, but extracting them in case we want them in the future
     combo_score = info.get(INFO_KEY, dict()).get('COMBO_SCORE', list())
@@ -182,8 +184,8 @@ def process_chv_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_drugbank_item(node_id, info, nodes_output, edges_output):
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(DRUGBANK_PREFIX, node_id, info, ['IN', 'SY', 'FSY'])
+def process_drugbank_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['IN', 'SY', 'FSY'])
 
     # Currently not used, but extracting them in case we want them in the future
     fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
@@ -193,8 +195,8 @@ def process_drugbank_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_fma_item(node_id, info, nodes_output, edges_output):
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(FMA_PREFIX, node_id, info, ['PT', 'SY', 'AB', 'OP', 'IS'])
+def process_fma_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY', 'AB', 'OP', 'IS'])
 
     # Currently not used, but extracting them in case we want them in the future
     authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
@@ -203,9 +205,9 @@ def process_fma_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_go_item(node_id, info, nodes_output, edges_output):
+def process_go_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'MTH_PT', 'ET', 'MTH_ET', 'SY', 'MTH_SY', 'OP', 'MTH_OP', 'OET', 'MTH_OET', 'IS', 'MTH_IS']
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(GO_PREFIX, node_id.replace('GO:', ''), info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('GO:', ''), info, accession_heirarchy)
 
     # GO-specific information
     attributes = info.get(INFO_KEY, dict())
@@ -231,8 +233,8 @@ def process_go_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description(go_comment, tuis), nodes_output)
 
 
-def process_hcpcs_item(node_id, info, nodes_output, edges_output):
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HCPCS_PREFIX, node_id, info, ['PT', 'MP', 'MTH_HT'])
+def process_hcpcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'MP', 'MTH_HT'])
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
     attributes = info.get(INFO_KEY, dict())
@@ -257,9 +259,9 @@ def process_hcpcs_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_hgnc_item(node_id, info, nodes_output, edges_output):
+def process_hgnc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'ACR', 'MTH_ACR', 'NA', 'SYN', 'NP', 'NS']
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HGNC_PREFIX, node_id.replace('HGNC:', ''), info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HGNC:', ''), info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
     attributes = info.get(INFO_KEY, dict())
@@ -296,12 +298,11 @@ def process_hgnc_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_hl7_item(node_id, info, nodes_output, edges_output):
+def process_hl7_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['CSY', 'PT', 'CDO', 'VS', 'BR', 'CPR', 'CR', 'NPT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HL7_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
     if node_curie == None:
         return
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'HL7')
 
     # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
     attributes = info.get(INFO_KEY, dict())
@@ -344,9 +345,9 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_hpo_item(node_id, info, nodes_output, edges_output):
+def process_hpo_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'SY', 'ET', 'OP', 'IS', 'OET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HPO_PREFIX, node_id.replace('HP:', ''), info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HP:', ''), info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -359,9 +360,9 @@ def process_hpo_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_icd10pcs_item(node_id, info, nodes_output, edges_output):
+def process_icd10pcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'PX', 'HX', 'MTH_HX', 'HT', 'HS', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(ICD10PCS_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -371,9 +372,9 @@ def process_icd10pcs_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_icd9cm_item(node_id, info, nodes_output, edges_output):
+def process_icd9cm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'HT', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(ICD9CM_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')
 
     # Currently not used, but extracting them in case we want them in the future
@@ -387,12 +388,11 @@ def process_icd9cm_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
-def process_medrt_item(node_id, info, nodes_output, edges_output):
+def process_medrt_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'FN', 'SY'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MEDRT_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
     if node_curie == None:
         return
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MED-RT')
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -402,12 +402,11 @@ def process_medrt_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_medlineplus_item(node_id, info, nodes_output, edges_output):
+def process_medlineplus_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'ET', 'SY', 'HT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MEDLINEPLUS_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
     if node_curie == None:
         return
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MEDLINEPLUS')
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -420,9 +419,9 @@ def process_medlineplus_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_msh_item(node_id, info, nodes_output, edges_output):
+def process_msh_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['MH', 'TQ', 'PEP', 'ET', 'XQ', 'PXQ', 'NM', 'N1', 'PCE', 'CE', 'HT', 'HS', 'DEV', 'DSV', 'QAB', 'QEV', 'QSV', 'PM'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MSH_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MSH')
 
     # Currently not used, but extracting them in case we want them in the future
@@ -458,12 +457,11 @@ def process_msh_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_mth_item(node_id, info, nodes_output, edges_output):
+def process_mth_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PN', 'CV', 'XM', 'PT', 'SY', 'RT', 'DT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(MTH_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
     if node_curie == None:
         return
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MTH')
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -488,9 +486,9 @@ def process_mth_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_ncbi_item(node_id, info, nodes_output, edges_output):
+def process_ncbi_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['SCN', 'USN', 'USY', 'SY', 'UCN', 'CMN', 'UE', 'EQ'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NCBI_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -501,9 +499,9 @@ def process_ncbi_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_nci_item(node_id, info, nodes_output, edges_output):
+def process_nci_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'SY', 'CSN', 'DN', 'FBD', 'HD', 'CCN', 'AD', 'CA2', 'CA3', 'BN', 'AB', 'CCS', 'OP'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NCI_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
     provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'NCI')
 
     # Currently not used, but extracting them in case we want them in the future
@@ -556,9 +554,9 @@ def process_nci_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
-def process_nddf_item(node_id, info, nodes_output, edges_output):
+def process_nddf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['MTH_RXN_CDC', 'CDC', 'CDD', 'CDA', 'IN', 'DF'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(NDDF_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -566,9 +564,9 @@ def process_nddf_item(node_id, info, nodes_output, edges_output):
 
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
-def process_omim_item(node_id, info, nodes_output, edges_output):
+def process_omim_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'PHENO', 'PHENO_ET', 'PTAV', 'PTCS', 'ETAL', 'ET', 'HT', 'ACR'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(OMIM_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -583,9 +581,9 @@ def process_omim_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_pdq_item(node_id, info, nodes_output, edges_output):
+def process_pdq_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'HT', 'PSC', 'SY', 'ET', 'CU', 'LV', 'ACR', 'AB', 'BN', 'FBD', 'CCN', 'CHN', 'OP', 'IS'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(PDQ_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -605,9 +603,9 @@ def process_pdq_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_psy_item(node_id, info, nodes_output, edges_output):
+def process_psy_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'HT', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(PSY_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -617,9 +615,9 @@ def process_psy_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_rxnorm_item(node_id, info, nodes_output, edges_output):
+def process_rxnorm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['SCD', 'SBD', 'SCDG', 'SBDG', 'BPCK', 'GPCK', 'IN', 'PSN', 'MIN', 'SCDF', 'SBDF', 'SCDC', 'DFG', 'DF', 'SBDC', 'BN', 'PIN', 'TMSY', 'SY', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(RXNORM_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -651,9 +649,9 @@ def process_rxnorm_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-def process_vandf_item(node_id, info, nodes_output, edges_output):
+def process_vandf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
     accession_heirarchy = ['PT', 'CD', 'IN', 'AB', 'MTH_RXN_CD'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(VANDF_PREFIX, node_id, info, accession_heirarchy)
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
 
     # Currently not used, but extracting them in case we want them in the future
     attributes = info.get(INFO_KEY, dict())
@@ -676,29 +674,29 @@ def process_vandf_item(node_id, info, nodes_output, edges_output):
     make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
 
 
-DESIRED_CODES = {'ATC': process_atc_item,
-                 'CHV': process_chv_item,
-                 'DRUGBANK': process_drugbank_item,
-                 'FMA': process_fma_item,
-                 'GO': process_go_item,
-                 'HCPCS': process_hcpcs_item,
-                 'HGNC': process_hgnc_item,
-                 'HL7V3.0': process_hl7_item,
-                 'HPO': process_hpo_item,
-                 'ICD10PCS': process_icd10pcs_item,
-                 'ICD9CM': process_icd9cm_item,
-                 'MED-RT': process_medrt_item,
-                 'MEDLINEPLUS': process_medlineplus_item,
-                 'MSH': process_msh_item,
-                 'MTH': process_mth_item,
-                 'NCBI': process_ncbi_item,
-                 'NCI': process_nci_item,
-                 'NDDF': process_nddf_item,
-                 'OMIM': process_omim_item,
-                 'PDQ': process_pdq_item,
-                 'PSY': process_psy_item,
-                 'RXNORM': process_rxnorm_item,
-                 'VANDF': process_vandf_item}
+DESIRED_CODES = {'ATC': [process_atc_item, kg2_util.CURIE_PREFIX_ATC, make_node_id(UMLS_SOURCE_PREFIX, 'ATC')],
+                 'CHV': [process_chv_item, kg2_util.CURIE_PREFIX_CHV, make_node_id(UMLS_SOURCE_PREFIX, 'CHV')],
+                 'DRUGBANK': [process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, make_node_id(UMLS_SOURCE_PREFIX, 'DRUGBANK')],
+                 'FMA': [process_fma_item, kg2_util.CURIE_PREFIX_FMA, make_node_id(UMLS_SOURCE_PREFIX, 'FMA')],
+                 'GO': [process_go_item, kg2_util.CURIE_PREFIX_GO, make_node_id(UMLS_SOURCE_PREFIX, 'GO')],
+                 'HCPCS': [process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, make_node_id(UMLS_SOURCE_PREFIX, 'HCPCS')],
+                 'HGNC': [process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, make_node_id(UMLS_SOURCE_PREFIX, 'HGNC')],
+                 'HL7V3.0': [process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'HL7')],
+                 'HPO': [process_hpo_item, kg2_util.CURIE_PREFIX_HP, make_node_id(UMLS_SOURCE_PREFIX, 'HPO')],
+                 'ICD10PCS': [process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, make_node_id(UMLS_SOURCE_PREFIX, 'ICD10PCS')],
+                 'ICD9CM': [process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')],
+                 'MED-RT': [process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'MED-RT')],
+                 'MEDLINEPLUS': [process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'MEDLINEPLUS')],
+                 'MSH': [process_msh_item, kg2_util.CURIE_PREFIX_MESH, make_node_id(UMLS_SOURCE_PREFIX, 'MSH')],
+                 'MTH': [process_mth_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'MTH')],
+                 'NCBI': [process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, make_node_id(UMLS_SOURCE_PREFIX, 'NCBITAXON')],
+                 'NCI': [process_nci_item, kg2_util.CURIE_PREFIX_NCIT, make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
+                 'NDDF': [process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
+                 'OMIM': [process_omim_item, kg2_util.CURIE_PREFIX_OMIM, make_node_id(UMLS_SOURCE_PREFIX, 'OMIM')],
+                 'PDQ': [process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, make_node_id(UMLS_SOURCE_PREFIX, 'PDQ')],
+                 'PSY': [process_psy_item, kg2_util.CURIE_PREFIX_PSY, make_node_id(UMLS_SOURCE_PREFIX, 'PSY')],
+                 'RXNORM': [process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, make_node_id(UMLS_SOURCE_PREFIX, 'RXNORM')],
+                 'VANDF': [process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, make_node_id(UMLS_SOURCE_PREFIX, 'VANDF')]}
 
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
@@ -722,6 +720,8 @@ def process_vandf_item(node_id, info, nodes_output, edges_output):
         TUI_MAPPINGS = json.load(mappings)
 
     iri_mappings_raw = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('curies-to-urls-map.yaml'))['use_for_bidirectional_mapping']
+    heirarchy = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('umls-name-heirarchy.yaml'))
+    print(json.dumps(heirarchy, indent=4))
     for item in iri_mappings_raw:
         for prefix in item:
             IRI_MAPPINGS[prefix] = item[prefix]
@@ -734,16 +734,12 @@ def process_vandf_item(node_id, info, nodes_output, edges_output):
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source == 'UMLS':
-                name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
-                attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))
-
             if source not in DESIRED_CODES:
                 continue
 
             # Process the data specifically by source
-            DESIRED_CODES[source](node_id, value, nodes_output, edges_output)
-
+            [source_function, curie_prefix, provided_by] = DESIRED_CODES[source]
+            source_function(node_id, value, nodes_output, edges_output, source, curie_prefix, provided_by)
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
diff --git a/umls_util.py b/umls_util.py
new file mode 100644
index 00000000..84d76c55
--- /dev/null
+++ b/umls_util.py
@@ -0,0 +1,573 @@
+#!/usr/bin/env python3
+'''umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format
+
+   Usage: umls_list_jsonl_to_kg_jsonl.py [--test] <inputFile.jsonl> <outputNodesFile.json> <outputEdgesFile.jsonl>
+'''
+
+__author__ = 'Erica Wood'
+__copyright__ = 'Oregon State University'
+__credits__ = ['Stephen Ramsey', 'Erica Wood']
+__license__ = 'MIT'
+__version__ = '0.1.0'
+__maintainer__ = ''
+__email__ = ''
+__status__ = 'Prototype'
+
+
+import kg2_util
+
+
+def make_node_id(curie_prefix, node_id):
+    return curie_prefix + ':' + node_id
+
+
+def get_name_synonyms(names_dict, accession_heirarchy):
+    names = list()
+    for key in accession_heirarchy:
+        names += [name for name in names_dict.get(key, dict()).get('Y', list())]
+        names += [name for name in names_dict.get(key, dict()).get('N', list())]
+    assert len(names) > 0
+    if len(names) == 1:
+        return names[0], list()
+    return names[0], names[1:]
+
+
+def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output):
+    node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
+    node['synonym'] = synonyms
+    node['description'] = description
+
+    nodes_output.write(node)
+
+
+def get_basic_info(curie_prefix, node_id, info, umls_code):
+    # accession_heirarchy
+    # for (umls_code_compare, name_key) in ACCESSION_HEIRARCHY:
+
+    cuis = info.get(CUIS_KEY, list())
+    tuis = info.get(TUIS_KEY, list())
+    if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
+        if len(cuis) != 1:
+            return None, None, None, None, None, None, None, None
+        node_id = cuis[0]
+    node_curie = make_node_id(curie_prefix, node_id)
+    iri = IRI_MAPPINGS[curie_prefix] + node_id
+    category = TUI_MAPPINGS[str(tuple(tuis))]
+
+    names = info.get(NAMES_KEY, dict())
+    name, synonyms = get_name_synonyms(names, accession_heirarchy)
+
+    return node_curie, iri, name, category, synonyms, cuis, tuis
+
+def process_atc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
+
+    # Currently not used, but extracting them in case we want them in the future
+    atc_level = info.get(INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
+    is_drug_class = info.get(INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_chv_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY'])
+
+    # Currently not used, but extracting them in case we want them in the future
+    combo_score = info.get(INFO_KEY, dict()).get('COMBO_SCORE', list())
+    combo_score_no_top_words = info.get(INFO_KEY, dict()).get('COMBO_SCORE_NO_TOP_WORDS', list())
+    context_score = info.get(INFO_KEY, dict()).get('CONTEXT_SCORE', list())
+    cui_score = info.get(INFO_KEY, dict()).get('CUI_SCORE', list())
+    disparaged = info.get(INFO_KEY, dict()).get('DISPARAGED', list())
+    frequency = info.get(INFO_KEY, dict()).get('FREQUENCY', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_drugbank_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['IN', 'SY', 'FSY'])
+
+    # Currently not used, but extracting them in case we want them in the future
+    fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
+    secondary_accession_keys = info.get(INFO_KEY, dict()).get('SID', list())
+
+    # TODO: figure out update date
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_fma_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY', 'AB', 'OP', 'IS'])
+
+    # Currently not used, but extracting them in case we want them in the future
+    authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
+    date_last_modified = info.get(INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_go_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'MTH_PT', 'ET', 'MTH_ET', 'SY', 'MTH_SY', 'OP', 'MTH_OP', 'OET', 'MTH_OET', 'IS', 'MTH_IS']
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('GO:', ''), info, accession_heirarchy)
+
+    # GO-specific information
+    attributes = info.get(INFO_KEY, dict())
+    go_namespace = attributes.get('GO_NAMESPACE', list())
+    assert len(go_namespace) == 1
+    go_namespace = go_namespace[0]
+    namespace_category_map = {'molecular_function': kg2_util.BIOLINK_CATEGORY_MOLECULAR_ACTIVITY,
+                              'cellular_component': kg2_util.BIOLINK_CATEGORY_CELLULAR_COMPONENT,
+                              'biological_process': kg2_util.BIOLINK_CATEGORY_BIOLOGICAL_PROCESS}
+    category = namespace_category_map.get(go_namespace, category)
+    go_comment = attributes.get('GO_COMMENT', str())
+    if len(go_comment) > 0:
+        go_comment = go_comment[0]
+        go_comment = "// COMMENTS: " + go_comment
+
+    # Currently not used, but extracting them in case we want them in the future
+    date_created = attributes.get('DATE_CREATED', list())
+    go_subset = attributes.get('GO_SUBSET', list())
+    gxr = attributes.get('GXR', list())
+    ref = attributes.get('REF', list())
+    sid = attributes.get('SID', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description(go_comment, tuis), nodes_output)
+
+
+def process_hcpcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'MP', 'MTH_HT'])
+
+    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+    attributes = info.get(INFO_KEY, dict())
+    had = attributes.get('HAD', list()) # HCPCS Action Effective Date - effective date of action to a procedure or modifier code.
+    hcc = attributes.get('HCC', list()) # HCPCS Coverage Code - code denoting Medicare coverage status. There are two subelements separated by "=".
+    hts = attributes.get('HTS', list()) # HCPCS Type of Service Code - carrier assigned HCFA Type of Service which describes the particular kind(s) of service represented by the procedure code.
+    hcd = attributes.get('HCD', list()) # HCPCS Code Added Date - year the HCPCS code was added to the HCFA Common Procedure Coding System.
+    hpn = attributes.get('HPN', list()) # HCPCS processing note number identifying the processing note contained in Appendix A of the HCPCS Manual.
+    haq = attributes.get('HAQ', list()) # HCPCS Anesthesia Base Unit Quantity - base unit represents the level of intensity for anesthesia procedure services that reflects all activities except time.
+    hlc = attributes.get('HLC', list()) # HCPCS Lab Certification Code - code used to classify laboratory procedures according to the specialty certification categories listed by CMS(formerly HCFA).
+    hsn = attributes.get('HSN', list()) # HCPCS Statute Number identifying statute reference for coverage or noncoverage of procedure or service.
+    hpd = attributes.get('HPD', list()) # HCPCS ASC payment group effective date - date the procedure is assigned to the ASC payment group.
+    hpg = attributes.get('HPG', list()) # HCPCS ASC payment group code which represents the dollar amount of the facility charge payable by Medicare for the procedure.
+    hmg = attributes.get('HMR', list()) # HCPCS Medicare Carriers Manual reference section number - number identifying a section of the Medicare Carriers Manual.
+    hir = attributes.get('HIR', list()) # HCPCS Coverage Issues Manual Reference Section Number - number identifying the Reference Section of the Coverage Issues Manual.
+    hxr = attributes.get('HXR', list()) # HCPCS Cross reference code - an explicit reference crosswalking a deleted code or a code that is not valid for Medicare to a valid current code (or range of codes).
+    hmp = attributes.get('HMP', list()) # HCPCS Multiple Pricing Indicator Code - code used to identify instances where a procedure could be priced.
+    hpi = attributes.get('HPI', list()) # HCPCS Pricing Indicator Code - used to identify the appropriate methodology for developing unique pricing amounts under Part B.
+    hac = attributes.get('HAC', list()) # HCPCS action code - code denoting the change made to a procedure or modifier code within the HCPCS system.
+    hbt = attributes.get('HBT', list()) # HCPCS Berenson-Eggers Type of Service Code - BETOS for the procedure code based on generally agreed upon clinically meaningful groupings of procedures and services.
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_hgnc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'ACR', 'MTH_ACR', 'NA', 'SYN', 'NP', 'NS']
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HGNC:', ''), info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    mgd_id = attributes.get('MGD_ID', list())
+    vega_id = attributes.get('VEGA_ID', list())
+    genecc = attributes.get('GENCC', list())
+    swp = attributes.get('SWP', list())
+    mane_select = attributes.get('MANE_SELECT', list())
+    local_specific_db_xr = attributes.get('LOCUS_SPECIFIC_DB_XR', list())
+    locus_type = attributes.get('LOCUS_TYPE', list())
+    agr = attributes.get('AGR', list())
+    cytogenetic_location = attributes.get('CYTOGENETIC_LOCATION', list())
+    date_created = attributes.get('DATE_CREATED', list())
+    ensemblgene_id = attributes.get('ENSEMBLGENE_ID', list())
+    db_xr_id = attributes.get('DB_XR_ID', list())
+    locus_group = attributes.get('LOCUS_GROUP', list())
+    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
+    date_name_changed = attributes.get('DATE_NAME_CHANGED', list())
+    pmid = attributes.get('PMID', list())
+    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
+    mapped_ucsc_id = attributes.get('MAPPED_UCSC_ID', list())
+    refseq_id = attributes.get('REFSEQ_ID', list())
+    ena = attributes.get('ENA', list())
+    rgd_id = attributes.get('RGD_ID', list())
+    date_symbol_changed = attributes.get('DATE_SYMBOL_CHANGED', list())
+    omim_id = attributes.get('OMIM_ID', list())
+    gene_fam_id = attributes.get('GENE_FAM_ID', list())
+    gene_symbol = attributes.get('GENESYMBOL', list())
+    ez = attributes.get('EZ', list())
+    ccds_id = attributes.get('CCDS_ID', list())
+    lncipedia = attributes.get('LNCIPEDIA', list())
+    gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_hl7_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['CSY', 'PT', 'CDO', 'VS', 'BR', 'CPR', 'CR', 'NPT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+
+    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+    attributes = info.get(INFO_KEY, dict())
+    hl7at = attributes.get('HL7AT', list())
+    hl7ii = attributes.get('HL7II', list())
+    hl7im = attributes.get('HL7IM', list())
+    hl7lt = attributes.get('HL7LT', list())
+    hl7un = attributes.get('HL7UN', list())
+    hl7oa = attributes.get('HL7OA', list())
+    hl7scs = attributes.get('HL7SCS', list())
+    hl7cc = attributes.get('HL7CC', list())
+    hl7na = attributes.get('HL7NA', list())
+    hl7in = attributes.get('HL7IN', list())
+    hl7ap = attributes.get('HL7AP', list())
+    hl7mi = attributes.get('HL7MI', list())
+    hl7hi = attributes.get('HL7HI', list())
+    hl7ir = attributes.get('HL7IR', list())
+    hl7ai = attributes.get('HL7AI', list())
+    hl7ha = attributes.get('HL7HA', list())
+    hl7rf = attributes.get('HL7RF', list())
+    hl7rd = attributes.get('HL7RD', list())
+    hl7vd = attributes.get('HL7VD', list())
+    hl7dc = attributes.get('HL7DC', list())
+    hl7rk = attributes.get('HL7RK', list())
+    hl7is = attributes.get('HL7IS', list())
+    hl7sy = attributes.get('HL7SY', list())
+    hl7cd = attributes.get('HL7CD', list())
+    hl7sl = attributes.get('HL7SL', list())
+    hl7pl = attributes.get('HL7PL', list())
+    hl7vc = attributes.get('HL7VC', list())
+    hl7ty = attributes.get('HL7TY', list())
+    hl7rg = attributes.get('HL7RG', list())
+    hl7csc = attributes.get('HL7CSC', list())
+    hl7od = attributes.get('HL7OD', list())
+    hl7id = attributes.get('HL7ID', list())
+    hl7tr = attributes.get('HL7TR', list())
+    hl7di = attributes.get('HL7DI', list())
+    hl7cs = attributes.get('HL7CS', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_hpo_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'SY', 'ET', 'OP', 'IS', 'OET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HP:', ''), info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    sid = attributes.get('SID', list())
+    hpo_comment = attributes.get('HPO_COMMENT', list())
+    date_created = attributes.get('DATE_CREATED', list())
+    syn_qualifier = attributes.get('SYN_QUALIFIER', list())
+    ref = attributes.get('REF', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_icd10pcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'PX', 'HX', 'MTH_HX', 'HT', 'HS', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    added_meaning = attributes.get('ADDED_MEANING', list())
+    order_no = attributes.get('ORDER_NO', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_icd9cm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'HT', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    icc = attributes.get('ICC', list())
+    ice = attributes.get('ICE', list())
+    icf = attributes.get('ICF', list())
+    sos = attributes.get('SOS', list())
+    icn = attributes.get('ICN', list())
+    ica = attributes.get('ICA', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+def process_medrt_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'FN', 'SY'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    term_status = attributes.get('TERM_STATUS', list())
+    concept_type = attributes.get('CONCEPT_TYPE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_medlineplus_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'ET', 'SY', 'HT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    sos = attributes.get('SOS', list())
+    date_created = attributes.get('DATE_CREATED', list())
+    mp_group_url = attributes.get('MP_GROUP_URL', list())
+    mp_primary_institute_url = attributes.get('MP_PRIMARY_INSTITUTE_URL', list())
+    mp_other_language_url = attributes.get('MP_OTHER_LANGUAGE_URL', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_msh_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['MH', 'TQ', 'PEP', 'ET', 'XQ', 'PXQ', 'NM', 'N1', 'PCE', 'CE', 'HT', 'HS', 'DEV', 'DSV', 'QAB', 'QEV', 'QSV', 'PM'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MSH')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    mmr = attributes.get('MMR', list())
+    fx = attributes.get('FX', list())
+    lt = attributes.get('LT', list())
+    dc = attributes.get('DC', list())
+    pa = attributes.get('PA', list())
+    rr = attributes.get('RR', list())
+    hm = attributes.get('HM', list())
+    pi = attributes.get('PI', list())
+    ec = attributes.get('EC', list())
+    hn = attributes.get('HN', list())
+    termui = attributes.get('TERMUI', list())
+    th = attributes.get('TH', list())
+    sos = attributes.get('SOS', list())
+    ii = attributes.get('II', list())
+    rn = attributes.get('RN', list())
+    an = attributes.get('AN', list())
+    cx = attributes.get('CX', list())
+    dq = attributes.get('DQ', list())
+    dx = attributes.get('DX', list())
+    pm = attributes.get('PM', list())
+    aql = attributes.get('AQL', list())
+    sc = attributes.get('SC', list())
+    fr = attributes.get('FR', list())
+    mda = attributes.get('MDA', list())
+    src = attributes.get('SRC', list())
+    ol = attributes.get('OL', list())
+    mn = attributes.get('MN', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_mth_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PN', 'CV', 'XM', 'PT', 'SY', 'RT', 'DT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+    if node_curie == None:
+        return
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    mth_mapsetcomplexity = attributes.get('MTH_MAPSETCOMPLEXITY', list())
+    fromvsab = attributes.get('FROMVSAB', list())
+    mapsetrsab = attributes.get('MAPSETRSAB', list())
+    mapsetversion = attributes.get('MAPSETVERSION', list())
+    mapsetvsab = attributes.get('MAPSETVSAB', list())
+    tovsab = attributes.get('TOVSAB', list())
+    mth_mapfromexhaustive = attributes.get('MTH_MAPFROMEXHAUSTIVE', list())
+    torsab = attributes.get('TORSAB', list())
+    mapsetsid = attributes.get('MAPSETSID', list())
+    mapsetgrammar = attributes.get('MAPSETGRAMMAR', list())
+    mapsettype = attributes.get('MAPSETTYPE', list())
+    mth_maptoexhaustive = attributes.get('MTH_MAPTOEXHAUSTIVE', list())
+    fromrsab = attributes.get('FROMRSAB', list())
+    mth_mapfromcomplexity = attributes.get('MTH_MAPFROMCOMPLEXITY', list())
+    lt = attributes.get('LT', list())
+    mth_maptocomplexity = attributes.get('MTH_MAPTOCOMPLEXITY', list())
+    sos = attributes.get('SOS', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_ncbi_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['SCN', 'USN', 'USY', 'SY', 'UCN', 'CMN', 'UE', 'EQ'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    div = attributes.get('DIV', list())
+    authority_name = attributes.get('AUTHORITY_NAME', list())
+    rank = attributes.get('RANK', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_nci_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'SY', 'CSN', 'DN', 'FBD', 'HD', 'CCN', 'AD', 'CA2', 'CA3', 'BN', 'AB', 'CCS', 'OP'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'NCI')
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    clinvar_variation_id = attributes.get('CLINVAR_VARIATION_ID', list())
+    micronutrient = attributes.get('MICRONUTRIENT', list())
+    genbank_accession_number = attributes.get('GENBANK_ACCESSION_NUMBER', list())
+    fda_table = attributes.get('FDA_TABLE', list())
+    usda_id = attributes.get('USDA_ID', list())
+    icd_o_3_code = attributes.get('ICD-O-3_CODE', list())
+    tolerable_level = attributes.get('TOLERABLE_LEVEL', list())
+    ncbi_taxon_id = attributes.get('NCBI_TAXON_ID', list())
+    mgi_accession_id = attributes.get('MGI_ACCESSION_ID', list())
+    homologous_gene = attributes.get('HOMOLOGOUS_GENE', list())
+    pid_id = attributes.get('PID_ID', list())
+    swiss_prot = attributes.get('SWISS_PROT', list())
+    essential_amino_acid = attributes.get('ESSENTIAL_AMINO_ACID', list())
+    publish_value_set = attributes.get('PUBLISH_VALUE_SET', list())
+    cas_registry = attributes.get('CAS_REGISTRY', list())
+    value_set_pair = attributes.get('VALUE_SET_PAIR', list())
+    accepted_therapeutic_use_for = attributes.get('ACCEPTED_THERAPEUTIC_USE_FOR', list())
+    hgnc_id = attributes.get('HGNC_ID', list())
+    nci_drug_dictionary_id = attributes.get('NCI_DRUG_DICTIONARY_ID', list())
+    chebi_id = attributes.get('CHEBI_ID', list())
+    cnu = attributes.get('CNU', list())
+    mirbase_id = attributes.get('MIRBASE_ID', list())
+    macronutrient = attributes.get('MACRONUTRIENT', list())
+    essential_fatty_acid = attributes.get('ESSENTIAL_FATTY_ACID', list())
+    unit = attributes.get('UNIT', list())
+    pdq_open_trial_search_id = attributes.get('PDQ_OPEN_TRIAL_SEARCH_ID', list())
+    term_browser_value_set_description = attributes.get('TERM_BROWSER_VALUE_SET_DESCRIPTION', list())
+    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
+    infoods = attributes.get('INFOODS', list())
+    pubmedid_primary_reference = attributes.get('PUBMEDID_PRIMARY_REFERENCE', list())
+    biocarta_id = attributes.get('BIOCARTA_ID', list())
+    extensible_list = attributes.get('EXTENSIBLE_LIST', list())
+    use_for = attributes.get('USE_FOR', list())
+    neoplastic_status = attributes.get('NEOPLASTIC_STATUS', list())
+    nsc_number = attributes.get('NSC_NUMBER', list())
+    omim_number = attributes.get('OMIM_NUMBER', list())
+    lt = attributes.get('LT', list())
+    kegg_id = attributes.get('KEGG_ID', list())
+    gene_encodes_product = attributes.get('GENE_ENCODES_PRODUCT', list())
+    pdq_closed_trial_search_id = attributes.get('PDQ_CLOSED_TRIAL_SEARCH_ID', list())
+    design_note = attributes.get('DESIGN_NOTE', list())
+    nutrient = attributes.get('NUTRIENT', list())
+    fda_unii_code = attributes.get('FDA_UNII_CODE', list())
+    us_recommended_intake = attributes.get('US_RECOMMENDED_INTAKE', list())
+    chemical_formula = attributes.get('CHEMICAL_FORMULA', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+def process_nddf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['MTH_RXN_CDC', 'CDC', 'CDD', 'CDA', 'IN', 'DF'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    ndc = attributes.get('NDC', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+def process_omim_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'PHENO', 'PHENO_ET', 'PTAV', 'PTCS', 'ETAL', 'ET', 'HT', 'ACR'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    genesymbol = attributes.get('GENESYMBOL', list())
+    mimtypevalue = attributes.get('MIMTYPEVALUE', list())
+    moved_from = attributes.get('MOVED_FROM', list())
+    sos = attributes.get('SOS', list())
+    genelocus = attributes.get('GENELOCUS', list())
+    mimtypemeaning = attributes.get('MIMTYPEMEANING', list())
+    mimtype = attributes.get('MIMTYPE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_pdq_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'HT', 'PSC', 'SY', 'ET', 'CU', 'LV', 'ACR', 'AB', 'BN', 'FBD', 'CCN', 'CHN', 'OP', 'IS'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    lt = attributes.get('LT', list())
+    cas_registry = attributes.get('CAS_REGISTRY', list())
+    date_first_published = attributes.get('DATE_FIRST_PUBLISHED', list())
+    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
+    ind_code = attributes.get('IND_CODE', list())
+    pid = attributes.get('PID', list())
+    nsc_code = attributes.get('NSC_CODE', list())
+    pxc = attributes.get('PXC', list())
+    menu_parent = attributes.get('MENU_PARENT', list())
+    nci_id = attributes.get('NCI_ID', list())
+    orig_sty = attributes.get('ORIG_STY', list())
+    menu_type = attributes.get('MENU_TYPE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_psy_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'HT', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    hn = attributes.get('HN', list())
+    pyr = attributes.get('PYR', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_rxnorm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['SCD', 'SBD', 'SCDG', 'SBDG', 'BPCK', 'GPCK', 'IN', 'PSN', 'MIN', 'SCDF', 'SBDF', 'SCDC', 'DFG', 'DF', 'SBDC', 'BN', 'PIN', 'TMSY', 'SY', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    ndc = attributes.get('NDC', list())
+    rxn_obsoleted = attributes.get('RXN_OBSOLETED', list())
+    rxn_available_strength = attributes.get('RXN_AVAILABLE_STRENGTH', list())
+    rxn_human_drug = attributes.get('RXN_HUMAN_DRUG', list())
+    rxn_quantity = attributes.get('RXN_QUANTITY', list())
+    rxterm_form = attributes.get('RXTERM_FORM', list())
+    rxn_in_expressed_flag = attributes.get('RXN_IN_EXPRESSED_FLAG', list())
+    rxaui = attributes.get('RXAUI', list())
+    rxn_bn_cardinality = attributes.get('RXN_BN_CARDINALITY', list())
+    rxn_activated = attributes.get('RXN_ACTIVATED', list())
+    rxn_boss_strength_denom_unit = attributes.get('RXN_BOSS_STRENGTH_DENOM_UNIT', list())
+    ambiguity_flag = attributes.get('AMBIGUITY_FLAG', list())
+    rxn_strength = attributes.get('RXN_STRENGTH', list())
+    rxcui = attributes.get('RXCUI', list())
+    rxn_ai = attributes.get('RXN_AI', list())
+    rxn_boss_from = attributes.get('RXN_BOSS_FROM', list())
+    rxn_boss_strength_num_unit = attributes.get('RXN_BOSS_STRENGTH_NUM_UNIT', list())
+    rxn_vet_drug = attributes.get('RXN_VET_DRUG', list())
+    orig_code = attributes.get('ORIG_CODE', list())
+    rxn_am = attributes.get('RXN_AM', list())
+    rxn_boss_strength_denom_value = attributes.get('RXN_BOSS_STRENGTH_DENOM_VALUE', list())
+    rxn_boss_strength_num_value = attributes.get('RXN_BOSS_STRENGTH_NUM_VALUE', list())
+    rxn_qualitative_distinction = attributes.get('RXN_QUALITATIVE_DISTINCTION', list())
+    orig_source = attributes.get('ORIG_SOURCE', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+
+
+def process_vandf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
+    accession_heirarchy = ['PT', 'CD', 'IN', 'AB', 'MTH_RXN_CD'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
+
+    # Currently not used, but extracting them in case we want them in the future
+    attributes = info.get(INFO_KEY, dict())
+    ndf_transmit_to_cmop = attributes.get('NDF_TRANSMIT_TO_CMOP', list())
+    sngl_or_mult_src_prd = attributes.get('SNGL_OR_MULT_SRC_PRD', list())
+    dcsa = attributes.get('DCSA', list())
+    exclude_di_check = attributes.get('EXCLUDE_DI_CHECK', list())
+    nfi = attributes.get('NFI', list())
+    va_class_name = attributes.get('VA_CLASS_NAME', list())
+    vmo = attributes.get('VMO', list())
+    drug_class_type = attributes.get('DRUG_CLASS_TYPE', list())
+    nf_name = attributes.get('NF_NAME', list())
+    ndc = attributes.get('NDC', list())
+    vac = attributes.get('VAC', list())
+    va_generic_name = attributes.get('VA_GENERIC_NAME', list())
+    parent_class = attributes.get('PARENT_CLASS', list())
+    va_dispense_unit = attributes.get('VA_DISPENSE_UNIT', list())
+    ddf = attributes.get('DDF', list())
+
+    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)

From b419e86eb9688e0689ac4442bce9b331118c16bc Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 22 Aug 2023 15:55:26 -0700
Subject: [PATCH 063/117] #316 successfully refactored UMLS code into a class
 that takes care of all the variables and source picking

---
 umls_list_jsonl_to_kg_jsonl.py |  735 ++------------------
 umls_util.py                   | 1155 +++++++++++++++++---------------
 2 files changed, 667 insertions(+), 1223 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 7636bec4..49569844 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -17,68 +17,64 @@
 import argparse
 import kg2_util
 import json
-
-CUIS_KEY = 'cuis'
-INFO_KEY = 'attributes'
-NAMES_KEY = 'names'
-TUIS_KEY = 'tuis'
+import umls_util
 
 TUI_MAPPINGS = dict()
 IRI_MAPPINGS = dict()
 
-ATC_PREFIX = kg2_util.CURIE_PREFIX_ATC
-CHV_PREFIX = kg2_util.CURIE_PREFIX_CHV
-DRUGBANK_PREFIX = kg2_util.CURIE_PREFIX_DRUGBANK
-FMA_PREFIX = kg2_util.CURIE_PREFIX_FMA
-GO_PREFIX = kg2_util.CURIE_PREFIX_GO
-HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
-HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
-HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-HPO_PREFIX = kg2_util.CURIE_PREFIX_HP
-ICD10PCS_PREFIX = kg2_util.CURIE_PREFIX_ICD10PCS
-ICD9CM_PREFIX = kg2_util.CURIE_PREFIX_ICD9
-MEDRT_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-MEDLINEPLUS_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-MSH_PREFIX = kg2_util.CURIE_PREFIX_MESH
-MTH_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-NCBI_PREFIX = kg2_util.CURIE_PREFIX_NCBI_TAXON
-NCI_PREFIX = kg2_util.CURIE_PREFIX_NCIT
-NDDF_PREFIX = kg2_util.CURIE_PREFIX_NDDF
-OMIM_PREFIX = kg2_util.CURIE_PREFIX_OMIM
-PDQ_PREFIX = kg2_util.CURIE_PREFIX_PDQ
-PSY_PREFIX = kg2_util.CURIE_PREFIX_PSY
-RXNORM_PREFIX = kg2_util.CURIE_PREFIX_RXNORM
-VANDF_PREFIX = kg2_util.CURIE_PREFIX_VANDF
-
-UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
-
-# Mined from HTML Page Source of https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-ACCESSION_HEIRARCHY = list()
-ACCESSION_SOURCES_HEIRARCHY = dict()
-                       # [('MTH', 'PN'), ('RXNORM', 'SCD'), ('RXNORM', 'SBD'), ('RXNORM', 'SCDG'), ('RXNORM', 'SBDG'), ('RXNORM', 'BPCK'), ('RXNORM', 'GPCK'),
-                       # ('RXNORM', 'IN'), ('RXNORM', 'PSN'), ('RXNORM', 'MIN'), ('RXNORM', 'SCDF'), ('RXNORM', 'SBDF'), ('RXNORM', 'SCDC'), ('RXNORM', 'DFG'),
-                       # ('RXNORM', 'DF'), ('RXNORM', 'SBDC'), ('RXNORM', 'BN'), ('RXNORM', 'PIN'), ('RXNORM', 'TMSY'), ('RXNORM', 'SY'), ('MSH', 'MH'),
-                       # ('MSH', 'TQ'), ('MSH', 'PEP'), ('MSH', 'ET'), ('MSH', 'XQ'), ('MSH', 'PXQ'), ('MSH', 'NM'), ('HPO', 'PT'), ('HPO', 'SY'), ('HPO', 'ET'),
-                       # ('NCBI', 'SCN'), ('ATC', 'RXN_PT'), ('ATC', 'PT'), ('VANDF', 'PT'), ('VANDF', 'CD'), ('VANDF', 'IN'), ('DRUGBANK', 'IN'),
-                       # ('DRUGBANK', 'SY'), ('DRUGBANK', 'FSY'), ('MSH', 'N1'), ('MSH', 'PCE'), ('MSH', 'CE'), ('FMA', 'PT'), ('FMA', 'SY'), ('FMA', 'AB'),
-                       # ('ATC', 'RXN_IN'), ('ATC', 'IN'), ('VANDF', 'AB'), ('VANDF', 'MTH_RXN_CD'), ('NDDF', 'MTH_RXN_CDC'), ('NDDF', 'CDC'), ('NDDF', 'CDD'),
-                       # ('NDDF', 'CDA'), ('NDDF', 'IN'), ('NDDF', 'DF'), ('MED-RT', 'PT'), ('MED-RT', 'FN'), ('MED-RT', 'SY'), ('HCPCS', 'PT'), ('HCPCS', 'MP'),
-                       # ('OMIM', 'PT'), ('OMIM', 'PHENO'), ('OMIM', 'PHENO_ET'), ('OMIM', 'PTAV'), ('OMIM', 'PTCS'), ('OMIM', 'ETAL'), ('OMIM', 'ET'),
-                       # ('OMIM', 'HT'), ('OMIM', 'ACR'), ('HGNC', 'PT'), ('HGNC', 'ACR'), ('HGNC', 'MTH_ACR'), ('HGNC', 'NA'), ('HGNC', 'SYN'), ('HGNC', 'NP'),
-                       # ('HGNC', 'NS'), ('NCI', 'PT'), ('NCI', 'SY'), ('NCI', 'CSN'), ('NCI', 'DN'), ('NCI', 'FBD'), ('NCI', 'HD'), ('NCI', 'CCN'),
-                       # ('NCI', 'AD'), ('NCI', 'CA2'), ('NCI', 'CA3'), ('NCI', 'BN'), ('NCI', 'AB'), ('NCI', 'CCS'), ('PDQ', 'PT'), ('PDQ', 'HT'),
-                       # ('PDQ', 'PSC'), ('PDQ', 'SY'), ('CHV', 'PT'), ('MEDLINEPLUS', 'PT'), ('GO', 'PT'), ('GO', 'MTH_PT'), ('GO', 'ET'), ('GO', 'MTH_ET'),
-                       # ('GO', 'SY'), ('GO', 'MTH_SY'), ('PDQ', 'ET'), ('PDQ', 'CU'), ('PDQ', 'LV'), ('PDQ', 'ACR'), ('PDQ', 'AB'), ('PDQ', 'BN'), ('PDQ', 'FBD'),
-                       # ('PDQ', 'CCN'), ('PDQ', 'CHN'), ('NCBI', 'USN'), ('NCBI', 'USY'), ('NCBI', 'SY'), ('NCBI', 'UCN'), ('NCBI', 'CMN'), ('NCBI', 'UE'),
-                       # ('NCBI', 'EQ'), ('ICD9CM', 'PT'), ('ICD9CM', 'HT'), ('ICD10PCS', 'PT'), ('ICD10PCS', 'PX'), ('ICD10PCS', 'HX'), ('ICD10PCS', 'MTH_HX'),
-                       # ('ICD10PCS', 'HT'), ('ICD10PCS', 'HS'), ('ICD10PCS', 'AB'), ('HL7V3.0', 'CSY'), ('HL7V3.0', 'PT'), ('HL7V3.0', 'CDO'), ('HL7V3.0', 'VS'),
-                       # ('HL7V3.0', 'BR'), ('HL7V3.0', 'CPR'), ('HL7V3.0', 'CR'), ('HL7V3.0', 'NPT'), ('HCPCS', 'MTH_HT'), ('MTH', 'CV'), ('MTH', 'XM'),
-                       # ('MTH', 'PT'), ('MTH', 'SY'), ('MTH', 'RT'), ('ICD9CM', 'AB'), ('PSY', 'PT'), ('PSY', 'HT'), ('PSY', 'ET'), ('MEDLINEPLUS', 'ET'),
-                       # ('MEDLINEPLUS', 'SY'), ('MEDLINEPLUS', 'HT'), ('MSH', 'HT'), ('MSH', 'HS'), ('MSH', 'DEV'), ('MSH', 'DSV'), ('MSH', 'QAB'),
-                       # ('MSH', 'QEV'), ('MSH', 'QSV'), ('MSH', 'PM'), ('HCPCS', 'AB'), ('MTH', 'DT'), ('HCPCS', 'AM'), ('CHV', 'SY'), ('RXNORM', 'ET'),
-                       # ('HPO', 'OP'), ('HPO', 'IS'), ('NCI', 'OP'), ('HPO', 'OET'), ('HCPCS', 'OP'), ('HCPCS', 'OM'), ('HCPCS', 'OAM'), ('GO', 'OP'),
-                       # ('GO', 'MTH_OP'), ('GO', 'OET'), ('GO', 'MTH_OET'), ('GO', 'IS'), ('GO', 'MTH_IS'), ('PDQ', 'OP'), ('PDQ', 'IS'), ('HL7V3.0', 'OP'),
-                       # ('HL7V3.0', 'ONP'), ('HCPCS', 'OA'), ('FMA', 'OP'), ('FMA', 'IS')]
+# ATC_PREFIX = kg2_util.CURIE_PREFIX_ATC
+# CHV_PREFIX = kg2_util.CURIE_PREFIX_CHV
+# DRUGBANK_PREFIX = kg2_util.CURIE_PREFIX_DRUGBANK
+# FMA_PREFIX = kg2_util.CURIE_PREFIX_FMA
+# GO_PREFIX = kg2_util.CURIE_PREFIX_GO
+# HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
+# HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
+# HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+# HPO_PREFIX = kg2_util.CURIE_PREFIX_HP
+# ICD10PCS_PREFIX = kg2_util.CURIE_PREFIX_ICD10PCS
+# ICD9CM_PREFIX = kg2_util.CURIE_PREFIX_ICD9
+# MEDRT_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+# MEDLINEPLUS_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+# MSH_PREFIX = kg2_util.CURIE_PREFIX_MESH
+# MTH_PREFIX = kg2_util.CURIE_PREFIX_UMLS
+# NCBI_PREFIX = kg2_util.CURIE_PREFIX_NCBI_TAXON
+# NCI_PREFIX = kg2_util.CURIE_PREFIX_NCIT
+# NDDF_PREFIX = kg2_util.CURIE_PREFIX_NDDF
+# OMIM_PREFIX = kg2_util.CURIE_PREFIX_OMIM
+# PDQ_PREFIX = kg2_util.CURIE_PREFIX_PDQ
+# PSY_PREFIX = kg2_util.CURIE_PREFIX_PSY
+# RXNORM_PREFIX = kg2_util.CURIE_PREFIX_RXNORM
+# VANDF_PREFIX = kg2_util.CURIE_PREFIX_VANDF
+
+# UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
+
+# DESIRED_CODES = {'ATC': [umls_util.process_atc_item, kg2_util.CURIE_PREFIX_ATC, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'ATC')],
+#                  'CHV': [umls_util.process_chv_item, kg2_util.CURIE_PREFIX_CHV, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'CHV')],
+#                  'DRUGBANK': [umls_util.process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'DRUGBANK')],
+#                  'FMA': [umls_util.process_fma_item, kg2_util.CURIE_PREFIX_FMA, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'FMA')],
+#                  'GO': [umls_util.process_go_item, kg2_util.CURIE_PREFIX_GO, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'GO')],
+#                  'HCPCS': [umls_util.process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HCPCS')],
+#                  'HGNC': [umls_util.process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HGNC')],
+#                  'HL7V3.0': [umls_util.process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HL7')],
+#                  'HPO': [umls_util.process_hpo_item, kg2_util.CURIE_PREFIX_HP, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HPO')],
+#                  'ICD10PCS': [umls_util.process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'ICD10PCS')],
+#                  'ICD9CM': [umls_util.process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')],
+#                  'MED-RT': [umls_util.process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MED-RT')],
+#                  'MEDLINEPLUS': [umls_util.process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MEDLINEPLUS')],
+#                  'MSH': [umls_util.process_msh_item, kg2_util.CURIE_PREFIX_MESH, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MSH')],
+#                  'MTH': [umls_util.process_mth_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MTH')],
+#                  'NCBI': [umls_util.process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'NCBITAXON')],
+#                  'NCI': [umls_util.process_nci_item, kg2_util.CURIE_PREFIX_NCIT, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
+#                  'NDDF': [umls_util.process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
+#                  'OMIM': [umls_util.process_omim_item, kg2_util.CURIE_PREFIX_OMIM, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'OMIM')],
+#                  'PDQ': [umls_util.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'PDQ')],
+#                  'PSY': [umls_util.process_psy_item, kg2_util.CURIE_PREFIX_PSY, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'PSY')],
+#                  'RXNORM': [umls_util.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'RXNORM')],
+#                  'VANDF': [umls_util.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'VANDF')]}
+
+# # Mined from HTML Page Source of https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
+# ACCESSION_HEIRARCHY = list()
+# ACCESSION_SOURCES_HEIRARCHY = dict()
 
 def get_args():
     arg_parser = argparse.ArgumentParser(description='umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format')
@@ -95,608 +91,16 @@ def extract_node_id(node_id_str):
     return node_id[0].strip(), node_id[1].strip()
 
 
-def make_node_id(curie_prefix, node_id):
-    return curie_prefix + ':' + node_id
-
-
-def create_description(comment, tuis):
-    description = comment
-    for tui in tuis:
-        description += "; UMLS Semantic Type: STY:" + tui
-    description = description.strip("; ")
-    return description    
-
-
-def get_name_synonyms(names_dict, accession_heirarchy):
-    names = list()
-    for key in accession_heirarchy:
-        names += [name for name in names_dict.get(key, dict()).get('Y', list())]
-        names += [name for name in names_dict.get(key, dict()).get('N', list())]
-    assert len(names) > 0
-    if len(names) == 1:
-        return names[0], list()
-    return names[0], names[1:]
-
-
-def get_name_keys(names_dict):
-    keys_list = []
-    for key in names_dict:
-        keys_list.append(key)
-    return str(sorted(keys_list))
-
-
-def get_attribute_keys(attributes_dict):
-    keys_list = []
-    for key in attributes_dict:
-        keys_list.append(key)
-    return set(keys_list)
-
-
-def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output):
-    node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
-    node['synonym'] = synonyms
-    node['description'] = description
-
-    nodes_output.write(node)
-
-
-def get_basic_info(curie_prefix, node_id, info, umls_code):
-    # accession_heirarchy
-    # for (umls_code_compare, name_key) in ACCESSION_HEIRARCHY:
-
-    cuis = info.get(CUIS_KEY, list())
-    tuis = info.get(TUIS_KEY, list())
-    if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
-        if len(cuis) != 1:
-            return None, None, None, None, None, None, None, None
-        node_id = cuis[0]
-    node_curie = make_node_id(curie_prefix, node_id)
-    iri = IRI_MAPPINGS[curie_prefix] + node_id
-    category = TUI_MAPPINGS[str(tuple(tuis))]
-
-    names = info.get(NAMES_KEY, dict())
-    name, synonyms = get_name_synonyms(names, accession_heirarchy)
-
-    return node_curie, iri, name, category, synonyms, cuis, tuis
-
-
-def process_atc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    atc_level = info.get(INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
-    is_drug_class = info.get(INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_chv_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    combo_score = info.get(INFO_KEY, dict()).get('COMBO_SCORE', list())
-    combo_score_no_top_words = info.get(INFO_KEY, dict()).get('COMBO_SCORE_NO_TOP_WORDS', list())
-    context_score = info.get(INFO_KEY, dict()).get('CONTEXT_SCORE', list())
-    cui_score = info.get(INFO_KEY, dict()).get('CUI_SCORE', list())
-    disparaged = info.get(INFO_KEY, dict()).get('DISPARAGED', list())
-    frequency = info.get(INFO_KEY, dict()).get('FREQUENCY', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_drugbank_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['IN', 'SY', 'FSY'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
-    secondary_accession_keys = info.get(INFO_KEY, dict()).get('SID', list())
-
-    # TODO: figure out update date
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_fma_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY', 'AB', 'OP', 'IS'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
-    date_last_modified = info.get(INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_go_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'MTH_PT', 'ET', 'MTH_ET', 'SY', 'MTH_SY', 'OP', 'MTH_OP', 'OET', 'MTH_OET', 'IS', 'MTH_IS']
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('GO:', ''), info, accession_heirarchy)
-
-    # GO-specific information
-    attributes = info.get(INFO_KEY, dict())
-    go_namespace = attributes.get('GO_NAMESPACE', list())
-    assert len(go_namespace) == 1
-    go_namespace = go_namespace[0]
-    namespace_category_map = {'molecular_function': kg2_util.BIOLINK_CATEGORY_MOLECULAR_ACTIVITY,
-                              'cellular_component': kg2_util.BIOLINK_CATEGORY_CELLULAR_COMPONENT,
-                              'biological_process': kg2_util.BIOLINK_CATEGORY_BIOLOGICAL_PROCESS}
-    category = namespace_category_map.get(go_namespace, category)
-    go_comment = attributes.get('GO_COMMENT', str())
-    if len(go_comment) > 0:
-        go_comment = go_comment[0]
-        go_comment = "// COMMENTS: " + go_comment
-
-    # Currently not used, but extracting them in case we want them in the future
-    date_created = attributes.get('DATE_CREATED', list())
-    go_subset = attributes.get('GO_SUBSET', list())
-    gxr = attributes.get('GXR', list())
-    ref = attributes.get('REF', list())
-    sid = attributes.get('SID', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description(go_comment, tuis), nodes_output)
-
-
-def process_hcpcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'MP', 'MTH_HT'])
-
-    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
-    attributes = info.get(INFO_KEY, dict())
-    had = attributes.get('HAD', list()) # HCPCS Action Effective Date - effective date of action to a procedure or modifier code.
-    hcc = attributes.get('HCC', list()) # HCPCS Coverage Code - code denoting Medicare coverage status. There are two subelements separated by "=".
-    hts = attributes.get('HTS', list()) # HCPCS Type of Service Code - carrier assigned HCFA Type of Service which describes the particular kind(s) of service represented by the procedure code.
-    hcd = attributes.get('HCD', list()) # HCPCS Code Added Date - year the HCPCS code was added to the HCFA Common Procedure Coding System.
-    hpn = attributes.get('HPN', list()) # HCPCS processing note number identifying the processing note contained in Appendix A of the HCPCS Manual.
-    haq = attributes.get('HAQ', list()) # HCPCS Anesthesia Base Unit Quantity - base unit represents the level of intensity for anesthesia procedure services that reflects all activities except time.
-    hlc = attributes.get('HLC', list()) # HCPCS Lab Certification Code - code used to classify laboratory procedures according to the specialty certification categories listed by CMS(formerly HCFA).
-    hsn = attributes.get('HSN', list()) # HCPCS Statute Number identifying statute reference for coverage or noncoverage of procedure or service.
-    hpd = attributes.get('HPD', list()) # HCPCS ASC payment group effective date - date the procedure is assigned to the ASC payment group.
-    hpg = attributes.get('HPG', list()) # HCPCS ASC payment group code which represents the dollar amount of the facility charge payable by Medicare for the procedure.
-    hmg = attributes.get('HMR', list()) # HCPCS Medicare Carriers Manual reference section number - number identifying a section of the Medicare Carriers Manual.
-    hir = attributes.get('HIR', list()) # HCPCS Coverage Issues Manual Reference Section Number - number identifying the Reference Section of the Coverage Issues Manual.
-    hxr = attributes.get('HXR', list()) # HCPCS Cross reference code - an explicit reference crosswalking a deleted code or a code that is not valid for Medicare to a valid current code (or range of codes).
-    hmp = attributes.get('HMP', list()) # HCPCS Multiple Pricing Indicator Code - code used to identify instances where a procedure could be priced.
-    hpi = attributes.get('HPI', list()) # HCPCS Pricing Indicator Code - used to identify the appropriate methodology for developing unique pricing amounts under Part B.
-    hac = attributes.get('HAC', list()) # HCPCS action code - code denoting the change made to a procedure or modifier code within the HCPCS system.
-    hbt = attributes.get('HBT', list()) # HCPCS Berenson-Eggers Type of Service Code - BETOS for the procedure code based on generally agreed upon clinically meaningful groupings of procedures and services.
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_hgnc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'ACR', 'MTH_ACR', 'NA', 'SYN', 'NP', 'NS']
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HGNC:', ''), info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
-    attributes = info.get(INFO_KEY, dict())
-    mgd_id = attributes.get('MGD_ID', list())
-    vega_id = attributes.get('VEGA_ID', list())
-    genecc = attributes.get('GENCC', list())
-    swp = attributes.get('SWP', list())
-    mane_select = attributes.get('MANE_SELECT', list())
-    local_specific_db_xr = attributes.get('LOCUS_SPECIFIC_DB_XR', list())
-    locus_type = attributes.get('LOCUS_TYPE', list())
-    agr = attributes.get('AGR', list())
-    cytogenetic_location = attributes.get('CYTOGENETIC_LOCATION', list())
-    date_created = attributes.get('DATE_CREATED', list())
-    ensemblgene_id = attributes.get('ENSEMBLGENE_ID', list())
-    db_xr_id = attributes.get('DB_XR_ID', list())
-    locus_group = attributes.get('LOCUS_GROUP', list())
-    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
-    date_name_changed = attributes.get('DATE_NAME_CHANGED', list())
-    pmid = attributes.get('PMID', list())
-    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
-    mapped_ucsc_id = attributes.get('MAPPED_UCSC_ID', list())
-    refseq_id = attributes.get('REFSEQ_ID', list())
-    ena = attributes.get('ENA', list())
-    rgd_id = attributes.get('RGD_ID', list())
-    date_symbol_changed = attributes.get('DATE_SYMBOL_CHANGED', list())
-    omim_id = attributes.get('OMIM_ID', list())
-    gene_fam_id = attributes.get('GENE_FAM_ID', list())
-    gene_symbol = attributes.get('GENESYMBOL', list())
-    ez = attributes.get('EZ', list())
-    ccds_id = attributes.get('CCDS_ID', list())
-    lncipedia = attributes.get('LNCIPEDIA', list())
-    gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_hl7_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['CSY', 'PT', 'CDO', 'VS', 'BR', 'CPR', 'CR', 'NPT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
-    attributes = info.get(INFO_KEY, dict())
-    hl7at = attributes.get('HL7AT', list())
-    hl7ii = attributes.get('HL7II', list())
-    hl7im = attributes.get('HL7IM', list())
-    hl7lt = attributes.get('HL7LT', list())
-    hl7un = attributes.get('HL7UN', list())
-    hl7oa = attributes.get('HL7OA', list())
-    hl7scs = attributes.get('HL7SCS', list())
-    hl7cc = attributes.get('HL7CC', list())
-    hl7na = attributes.get('HL7NA', list())
-    hl7in = attributes.get('HL7IN', list())
-    hl7ap = attributes.get('HL7AP', list())
-    hl7mi = attributes.get('HL7MI', list())
-    hl7hi = attributes.get('HL7HI', list())
-    hl7ir = attributes.get('HL7IR', list())
-    hl7ai = attributes.get('HL7AI', list())
-    hl7ha = attributes.get('HL7HA', list())
-    hl7rf = attributes.get('HL7RF', list())
-    hl7rd = attributes.get('HL7RD', list())
-    hl7vd = attributes.get('HL7VD', list())
-    hl7dc = attributes.get('HL7DC', list())
-    hl7rk = attributes.get('HL7RK', list())
-    hl7is = attributes.get('HL7IS', list())
-    hl7sy = attributes.get('HL7SY', list())
-    hl7cd = attributes.get('HL7CD', list())
-    hl7sl = attributes.get('HL7SL', list())
-    hl7pl = attributes.get('HL7PL', list())
-    hl7vc = attributes.get('HL7VC', list())
-    hl7ty = attributes.get('HL7TY', list())
-    hl7rg = attributes.get('HL7RG', list())
-    hl7csc = attributes.get('HL7CSC', list())
-    hl7od = attributes.get('HL7OD', list())
-    hl7id = attributes.get('HL7ID', list())
-    hl7tr = attributes.get('HL7TR', list())
-    hl7di = attributes.get('HL7DI', list())
-    hl7cs = attributes.get('HL7CS', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_hpo_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'SY', 'ET', 'OP', 'IS', 'OET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HP:', ''), info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    sid = attributes.get('SID', list())
-    hpo_comment = attributes.get('HPO_COMMENT', list())
-    date_created = attributes.get('DATE_CREATED', list())
-    syn_qualifier = attributes.get('SYN_QUALIFIER', list())
-    ref = attributes.get('REF', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
+def create_accession_heirarchy(full_heirarchy):
+    for [source, key] in full_heirarchy:
+        if source in DESIRED_CODES:
+            ACCESSION_HEIRARCHY.append((source, key))
 
-def process_icd10pcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'PX', 'HX', 'MTH_HX', 'HT', 'HS', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    added_meaning = attributes.get('ADDED_MEANING', list())
-    order_no = attributes.get('ORDER_NO', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_icd9cm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'HT', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    icc = attributes.get('ICC', list())
-    ice = attributes.get('ICE', list())
-    icf = attributes.get('ICF', list())
-    sos = attributes.get('SOS', list())
-    icn = attributes.get('ICN', list())
-    ica = attributes.get('ICA', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-def process_medrt_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'FN', 'SY'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    term_status = attributes.get('TERM_STATUS', list())
-    concept_type = attributes.get('CONCEPT_TYPE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_medlineplus_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'ET', 'SY', 'HT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    sos = attributes.get('SOS', list())
-    date_created = attributes.get('DATE_CREATED', list())
-    mp_group_url = attributes.get('MP_GROUP_URL', list())
-    mp_primary_institute_url = attributes.get('MP_PRIMARY_INSTITUTE_URL', list())
-    mp_other_language_url = attributes.get('MP_OTHER_LANGUAGE_URL', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_msh_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['MH', 'TQ', 'PEP', 'ET', 'XQ', 'PXQ', 'NM', 'N1', 'PCE', 'CE', 'HT', 'HS', 'DEV', 'DSV', 'QAB', 'QEV', 'QSV', 'PM'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MSH')
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    mmr = attributes.get('MMR', list())
-    fx = attributes.get('FX', list())
-    lt = attributes.get('LT', list())
-    dc = attributes.get('DC', list())
-    pa = attributes.get('PA', list())
-    rr = attributes.get('RR', list())
-    hm = attributes.get('HM', list())
-    pi = attributes.get('PI', list())
-    ec = attributes.get('EC', list())
-    hn = attributes.get('HN', list())
-    termui = attributes.get('TERMUI', list())
-    th = attributes.get('TH', list())
-    sos = attributes.get('SOS', list())
-    ii = attributes.get('II', list())
-    rn = attributes.get('RN', list())
-    an = attributes.get('AN', list())
-    cx = attributes.get('CX', list())
-    dq = attributes.get('DQ', list())
-    dx = attributes.get('DX', list())
-    pm = attributes.get('PM', list())
-    aql = attributes.get('AQL', list())
-    sc = attributes.get('SC', list())
-    fr = attributes.get('FR', list())
-    mda = attributes.get('MDA', list())
-    src = attributes.get('SRC', list())
-    ol = attributes.get('OL', list())
-    mn = attributes.get('MN', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_mth_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PN', 'CV', 'XM', 'PT', 'SY', 'RT', 'DT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    mth_mapsetcomplexity = attributes.get('MTH_MAPSETCOMPLEXITY', list())
-    fromvsab = attributes.get('FROMVSAB', list())
-    mapsetrsab = attributes.get('MAPSETRSAB', list())
-    mapsetversion = attributes.get('MAPSETVERSION', list())
-    mapsetvsab = attributes.get('MAPSETVSAB', list())
-    tovsab = attributes.get('TOVSAB', list())
-    mth_mapfromexhaustive = attributes.get('MTH_MAPFROMEXHAUSTIVE', list())
-    torsab = attributes.get('TORSAB', list())
-    mapsetsid = attributes.get('MAPSETSID', list())
-    mapsetgrammar = attributes.get('MAPSETGRAMMAR', list())
-    mapsettype = attributes.get('MAPSETTYPE', list())
-    mth_maptoexhaustive = attributes.get('MTH_MAPTOEXHAUSTIVE', list())
-    fromrsab = attributes.get('FROMRSAB', list())
-    mth_mapfromcomplexity = attributes.get('MTH_MAPFROMCOMPLEXITY', list())
-    lt = attributes.get('LT', list())
-    mth_maptocomplexity = attributes.get('MTH_MAPTOCOMPLEXITY', list())
-    sos = attributes.get('SOS', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_ncbi_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['SCN', 'USN', 'USY', 'SY', 'UCN', 'CMN', 'UE', 'EQ'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    div = attributes.get('DIV', list())
-    authority_name = attributes.get('AUTHORITY_NAME', list())
-    rank = attributes.get('RANK', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_nci_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'SY', 'CSN', 'DN', 'FBD', 'HD', 'CCN', 'AD', 'CA2', 'CA3', 'BN', 'AB', 'CCS', 'OP'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'NCI')
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    clinvar_variation_id = attributes.get('CLINVAR_VARIATION_ID', list())
-    micronutrient = attributes.get('MICRONUTRIENT', list())
-    genbank_accession_number = attributes.get('GENBANK_ACCESSION_NUMBER', list())
-    fda_table = attributes.get('FDA_TABLE', list())
-    usda_id = attributes.get('USDA_ID', list())
-    icd_o_3_code = attributes.get('ICD-O-3_CODE', list())
-    tolerable_level = attributes.get('TOLERABLE_LEVEL', list())
-    ncbi_taxon_id = attributes.get('NCBI_TAXON_ID', list())
-    mgi_accession_id = attributes.get('MGI_ACCESSION_ID', list())
-    homologous_gene = attributes.get('HOMOLOGOUS_GENE', list())
-    pid_id = attributes.get('PID_ID', list())
-    swiss_prot = attributes.get('SWISS_PROT', list())
-    essential_amino_acid = attributes.get('ESSENTIAL_AMINO_ACID', list())
-    publish_value_set = attributes.get('PUBLISH_VALUE_SET', list())
-    cas_registry = attributes.get('CAS_REGISTRY', list())
-    value_set_pair = attributes.get('VALUE_SET_PAIR', list())
-    accepted_therapeutic_use_for = attributes.get('ACCEPTED_THERAPEUTIC_USE_FOR', list())
-    hgnc_id = attributes.get('HGNC_ID', list())
-    nci_drug_dictionary_id = attributes.get('NCI_DRUG_DICTIONARY_ID', list())
-    chebi_id = attributes.get('CHEBI_ID', list())
-    cnu = attributes.get('CNU', list())
-    mirbase_id = attributes.get('MIRBASE_ID', list())
-    macronutrient = attributes.get('MACRONUTRIENT', list())
-    essential_fatty_acid = attributes.get('ESSENTIAL_FATTY_ACID', list())
-    unit = attributes.get('UNIT', list())
-    pdq_open_trial_search_id = attributes.get('PDQ_OPEN_TRIAL_SEARCH_ID', list())
-    term_browser_value_set_description = attributes.get('TERM_BROWSER_VALUE_SET_DESCRIPTION', list())
-    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
-    infoods = attributes.get('INFOODS', list())
-    pubmedid_primary_reference = attributes.get('PUBMEDID_PRIMARY_REFERENCE', list())
-    biocarta_id = attributes.get('BIOCARTA_ID', list())
-    extensible_list = attributes.get('EXTENSIBLE_LIST', list())
-    use_for = attributes.get('USE_FOR', list())
-    neoplastic_status = attributes.get('NEOPLASTIC_STATUS', list())
-    nsc_number = attributes.get('NSC_NUMBER', list())
-    omim_number = attributes.get('OMIM_NUMBER', list())
-    lt = attributes.get('LT', list())
-    kegg_id = attributes.get('KEGG_ID', list())
-    gene_encodes_product = attributes.get('GENE_ENCODES_PRODUCT', list())
-    pdq_closed_trial_search_id = attributes.get('PDQ_CLOSED_TRIAL_SEARCH_ID', list())
-    design_note = attributes.get('DESIGN_NOTE', list())
-    nutrient = attributes.get('NUTRIENT', list())
-    fda_unii_code = attributes.get('FDA_UNII_CODE', list())
-    us_recommended_intake = attributes.get('US_RECOMMENDED_INTAKE', list())
-    chemical_formula = attributes.get('CHEMICAL_FORMULA', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-def process_nddf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['MTH_RXN_CDC', 'CDC', 'CDD', 'CDA', 'IN', 'DF'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    ndc = attributes.get('NDC', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-def process_omim_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'PHENO', 'PHENO_ET', 'PTAV', 'PTCS', 'ETAL', 'ET', 'HT', 'ACR'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    genesymbol = attributes.get('GENESYMBOL', list())
-    mimtypevalue = attributes.get('MIMTYPEVALUE', list())
-    moved_from = attributes.get('MOVED_FROM', list())
-    sos = attributes.get('SOS', list())
-    genelocus = attributes.get('GENELOCUS', list())
-    mimtypemeaning = attributes.get('MIMTYPEMEANING', list())
-    mimtype = attributes.get('MIMTYPE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_pdq_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'HT', 'PSC', 'SY', 'ET', 'CU', 'LV', 'ACR', 'AB', 'BN', 'FBD', 'CCN', 'CHN', 'OP', 'IS'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    lt = attributes.get('LT', list())
-    cas_registry = attributes.get('CAS_REGISTRY', list())
-    date_first_published = attributes.get('DATE_FIRST_PUBLISHED', list())
-    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
-    ind_code = attributes.get('IND_CODE', list())
-    pid = attributes.get('PID', list())
-    nsc_code = attributes.get('NSC_CODE', list())
-    pxc = attributes.get('PXC', list())
-    menu_parent = attributes.get('MENU_PARENT', list())
-    nci_id = attributes.get('NCI_ID', list())
-    orig_sty = attributes.get('ORIG_STY', list())
-    menu_type = attributes.get('MENU_TYPE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_psy_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'HT', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    hn = attributes.get('HN', list())
-    pyr = attributes.get('PYR', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_rxnorm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['SCD', 'SBD', 'SCDG', 'SBDG', 'BPCK', 'GPCK', 'IN', 'PSN', 'MIN', 'SCDF', 'SBDF', 'SCDC', 'DFG', 'DF', 'SBDC', 'BN', 'PIN', 'TMSY', 'SY', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    ndc = attributes.get('NDC', list())
-    rxn_obsoleted = attributes.get('RXN_OBSOLETED', list())
-    rxn_available_strength = attributes.get('RXN_AVAILABLE_STRENGTH', list())
-    rxn_human_drug = attributes.get('RXN_HUMAN_DRUG', list())
-    rxn_quantity = attributes.get('RXN_QUANTITY', list())
-    rxterm_form = attributes.get('RXTERM_FORM', list())
-    rxn_in_expressed_flag = attributes.get('RXN_IN_EXPRESSED_FLAG', list())
-    rxaui = attributes.get('RXAUI', list())
-    rxn_bn_cardinality = attributes.get('RXN_BN_CARDINALITY', list())
-    rxn_activated = attributes.get('RXN_ACTIVATED', list())
-    rxn_boss_strength_denom_unit = attributes.get('RXN_BOSS_STRENGTH_DENOM_UNIT', list())
-    ambiguity_flag = attributes.get('AMBIGUITY_FLAG', list())
-    rxn_strength = attributes.get('RXN_STRENGTH', list())
-    rxcui = attributes.get('RXCUI', list())
-    rxn_ai = attributes.get('RXN_AI', list())
-    rxn_boss_from = attributes.get('RXN_BOSS_FROM', list())
-    rxn_boss_strength_num_unit = attributes.get('RXN_BOSS_STRENGTH_NUM_UNIT', list())
-    rxn_vet_drug = attributes.get('RXN_VET_DRUG', list())
-    orig_code = attributes.get('ORIG_CODE', list())
-    rxn_am = attributes.get('RXN_AM', list())
-    rxn_boss_strength_denom_value = attributes.get('RXN_BOSS_STRENGTH_DENOM_VALUE', list())
-    rxn_boss_strength_num_value = attributes.get('RXN_BOSS_STRENGTH_NUM_VALUE', list())
-    rxn_qualitative_distinction = attributes.get('RXN_QUALITATIVE_DISTINCTION', list())
-    orig_source = attributes.get('ORIG_SOURCE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_vandf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'CD', 'IN', 'AB', 'MTH_RXN_CD'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    ndf_transmit_to_cmop = attributes.get('NDF_TRANSMIT_TO_CMOP', list())
-    sngl_or_mult_src_prd = attributes.get('SNGL_OR_MULT_SRC_PRD', list())
-    dcsa = attributes.get('DCSA', list())
-    exclude_di_check = attributes.get('EXCLUDE_DI_CHECK', list())
-    nfi = attributes.get('NFI', list())
-    va_class_name = attributes.get('VA_CLASS_NAME', list())
-    vmo = attributes.get('VMO', list())
-    drug_class_type = attributes.get('DRUG_CLASS_TYPE', list())
-    nf_name = attributes.get('NF_NAME', list())
-    ndc = attributes.get('NDC', list())
-    vac = attributes.get('VAC', list())
-    va_generic_name = attributes.get('VA_GENERIC_NAME', list())
-    parent_class = attributes.get('PARENT_CLASS', list())
-    va_dispense_unit = attributes.get('VA_DISPENSE_UNIT', list())
-    ddf = attributes.get('DDF', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-DESIRED_CODES = {'ATC': [process_atc_item, kg2_util.CURIE_PREFIX_ATC, make_node_id(UMLS_SOURCE_PREFIX, 'ATC')],
-                 'CHV': [process_chv_item, kg2_util.CURIE_PREFIX_CHV, make_node_id(UMLS_SOURCE_PREFIX, 'CHV')],
-                 'DRUGBANK': [process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, make_node_id(UMLS_SOURCE_PREFIX, 'DRUGBANK')],
-                 'FMA': [process_fma_item, kg2_util.CURIE_PREFIX_FMA, make_node_id(UMLS_SOURCE_PREFIX, 'FMA')],
-                 'GO': [process_go_item, kg2_util.CURIE_PREFIX_GO, make_node_id(UMLS_SOURCE_PREFIX, 'GO')],
-                 'HCPCS': [process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, make_node_id(UMLS_SOURCE_PREFIX, 'HCPCS')],
-                 'HGNC': [process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, make_node_id(UMLS_SOURCE_PREFIX, 'HGNC')],
-                 'HL7V3.0': [process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'HL7')],
-                 'HPO': [process_hpo_item, kg2_util.CURIE_PREFIX_HP, make_node_id(UMLS_SOURCE_PREFIX, 'HPO')],
-                 'ICD10PCS': [process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, make_node_id(UMLS_SOURCE_PREFIX, 'ICD10PCS')],
-                 'ICD9CM': [process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')],
-                 'MED-RT': [process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'MED-RT')],
-                 'MEDLINEPLUS': [process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'MEDLINEPLUS')],
-                 'MSH': [process_msh_item, kg2_util.CURIE_PREFIX_MESH, make_node_id(UMLS_SOURCE_PREFIX, 'MSH')],
-                 'MTH': [process_mth_item, kg2_util.CURIE_PREFIX_UMLS, make_node_id(UMLS_SOURCE_PREFIX, 'MTH')],
-                 'NCBI': [process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, make_node_id(UMLS_SOURCE_PREFIX, 'NCBITAXON')],
-                 'NCI': [process_nci_item, kg2_util.CURIE_PREFIX_NCIT, make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
-                 'NDDF': [process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
-                 'OMIM': [process_omim_item, kg2_util.CURIE_PREFIX_OMIM, make_node_id(UMLS_SOURCE_PREFIX, 'OMIM')],
-                 'PDQ': [process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, make_node_id(UMLS_SOURCE_PREFIX, 'PDQ')],
-                 'PSY': [process_psy_item, kg2_util.CURIE_PREFIX_PSY, make_node_id(UMLS_SOURCE_PREFIX, 'PSY')],
-                 'RXNORM': [process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, make_node_id(UMLS_SOURCE_PREFIX, 'RXNORM')],
-                 'VANDF': [process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, make_node_id(UMLS_SOURCE_PREFIX, 'VANDF')]}
+def create_accession_sources_heirarchy():
+    for (source, key) in ACCESSION_HEIRARCHY:
+        if source not in ACCESSION_SOURCES_HEIRARCHY:
+            ACCESSION_SOURCES_HEIRARCHY[source] = list()
+        ACCESSION_SOURCES_HEIRARCHY[source].append(key)
 
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
@@ -720,12 +124,13 @@ def process_vandf_item(node_id, info, nodes_output, edges_output, umls_code, cur
         TUI_MAPPINGS = json.load(mappings)
 
     iri_mappings_raw = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('curies-to-urls-map.yaml'))['use_for_bidirectional_mapping']
-    heirarchy = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('umls-name-heirarchy.yaml'))
-    print(json.dumps(heirarchy, indent=4))
+    full_heirarchy = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('umls-name-heirarchy.yaml'))
     for item in iri_mappings_raw:
         for prefix in item:
             IRI_MAPPINGS[prefix] = item[prefix]
 
+    umls_processor = umls_util.UMLS_Processor(nodes_output, edges_output, TUI_MAPPINGS, IRI_MAPPINGS, full_heirarchy)
+
     for data in input_items:
         # There should only be one item in the data dictionary
         for entity in data:
@@ -734,15 +139,9 @@ def process_vandf_item(node_id, info, nodes_output, edges_output, umls_code, cur
             value = data[entity]
             source, node_id = extract_node_id(entity)
 
-            if source not in DESIRED_CODES:
-                continue
-
             # Process the data specifically by source
-            [source_function, curie_prefix, provided_by] = DESIRED_CODES[source]
-            source_function(node_id, value, nodes_output, edges_output, source, curie_prefix, provided_by)
+            umls_processor.process_node(source, node_id, value)
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
-    print(json.dumps(name_keys, indent=4, sort_keys=True, default=list))
-    print(json.dumps(attribute_keys, indent=4, sort_keys=True, default=list))
     print("Finishing umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
diff --git a/umls_util.py b/umls_util.py
index 84d76c55..59705e5c 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -16,558 +16,603 @@
 
 import kg2_util
 
-
-def make_node_id(curie_prefix, node_id):
-    return curie_prefix + ':' + node_id
-
-
-def get_name_synonyms(names_dict, accession_heirarchy):
-    names = list()
-    for key in accession_heirarchy:
-        names += [name for name in names_dict.get(key, dict()).get('Y', list())]
-        names += [name for name in names_dict.get(key, dict()).get('N', list())]
-    assert len(names) > 0
-    if len(names) == 1:
-        return names[0], list()
-    return names[0], names[1:]
-
-
-def make_umls_node(node_curie, iri, name, category, update_date, provided_by, synonyms, description, nodes_output):
-    node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
-    node['synonym'] = synonyms
-    node['description'] = description
-
-    nodes_output.write(node)
-
-
-def get_basic_info(curie_prefix, node_id, info, umls_code):
-    # accession_heirarchy
-    # for (umls_code_compare, name_key) in ACCESSION_HEIRARCHY:
-
-    cuis = info.get(CUIS_KEY, list())
-    tuis = info.get(TUIS_KEY, list())
-    if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
-        if len(cuis) != 1:
-            return None, None, None, None, None, None, None, None
-        node_id = cuis[0]
-    node_curie = make_node_id(curie_prefix, node_id)
-    iri = IRI_MAPPINGS[curie_prefix] + node_id
-    category = TUI_MAPPINGS[str(tuple(tuis))]
-
-    names = info.get(NAMES_KEY, dict())
-    name, synonyms = get_name_synonyms(names, accession_heirarchy)
-
-    return node_curie, iri, name, category, synonyms, cuis, tuis
-
-def process_atc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['RXN_PT', 'PT', 'RXN_IN', 'IN'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    atc_level = info.get(INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
-    is_drug_class = info.get(INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_chv_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    combo_score = info.get(INFO_KEY, dict()).get('COMBO_SCORE', list())
-    combo_score_no_top_words = info.get(INFO_KEY, dict()).get('COMBO_SCORE_NO_TOP_WORDS', list())
-    context_score = info.get(INFO_KEY, dict()).get('CONTEXT_SCORE', list())
-    cui_score = info.get(INFO_KEY, dict()).get('CUI_SCORE', list())
-    disparaged = info.get(INFO_KEY, dict()).get('DISPARAGED', list())
-    frequency = info.get(INFO_KEY, dict()).get('FREQUENCY', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_drugbank_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['IN', 'SY', 'FSY'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    fda_codes = info.get(INFO_KEY, dict()).get('FDA_UNII_CODE', list())
-    secondary_accession_keys = info.get(INFO_KEY, dict()).get('SID', list())
-
-    # TODO: figure out update date
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_fma_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'SY', 'AB', 'OP', 'IS'])
-
-    # Currently not used, but extracting them in case we want them in the future
-    authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list())
-    date_last_modified = info.get(INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_go_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'MTH_PT', 'ET', 'MTH_ET', 'SY', 'MTH_SY', 'OP', 'MTH_OP', 'OET', 'MTH_OET', 'IS', 'MTH_IS']
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('GO:', ''), info, accession_heirarchy)
-
-    # GO-specific information
-    attributes = info.get(INFO_KEY, dict())
-    go_namespace = attributes.get('GO_NAMESPACE', list())
-    assert len(go_namespace) == 1
-    go_namespace = go_namespace[0]
-    namespace_category_map = {'molecular_function': kg2_util.BIOLINK_CATEGORY_MOLECULAR_ACTIVITY,
-                              'cellular_component': kg2_util.BIOLINK_CATEGORY_CELLULAR_COMPONENT,
-                              'biological_process': kg2_util.BIOLINK_CATEGORY_BIOLOGICAL_PROCESS}
-    category = namespace_category_map.get(go_namespace, category)
-    go_comment = attributes.get('GO_COMMENT', str())
-    if len(go_comment) > 0:
-        go_comment = go_comment[0]
-        go_comment = "// COMMENTS: " + go_comment
-
-    # Currently not used, but extracting them in case we want them in the future
-    date_created = attributes.get('DATE_CREATED', list())
-    go_subset = attributes.get('GO_SUBSET', list())
-    gxr = attributes.get('GXR', list())
-    ref = attributes.get('REF', list())
-    sid = attributes.get('SID', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description(go_comment, tuis), nodes_output)
-
-
-def process_hcpcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, ['PT', 'MP', 'MTH_HT'])
-
-    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
-    attributes = info.get(INFO_KEY, dict())
-    had = attributes.get('HAD', list()) # HCPCS Action Effective Date - effective date of action to a procedure or modifier code.
-    hcc = attributes.get('HCC', list()) # HCPCS Coverage Code - code denoting Medicare coverage status. There are two subelements separated by "=".
-    hts = attributes.get('HTS', list()) # HCPCS Type of Service Code - carrier assigned HCFA Type of Service which describes the particular kind(s) of service represented by the procedure code.
-    hcd = attributes.get('HCD', list()) # HCPCS Code Added Date - year the HCPCS code was added to the HCFA Common Procedure Coding System.
-    hpn = attributes.get('HPN', list()) # HCPCS processing note number identifying the processing note contained in Appendix A of the HCPCS Manual.
-    haq = attributes.get('HAQ', list()) # HCPCS Anesthesia Base Unit Quantity - base unit represents the level of intensity for anesthesia procedure services that reflects all activities except time.
-    hlc = attributes.get('HLC', list()) # HCPCS Lab Certification Code - code used to classify laboratory procedures according to the specialty certification categories listed by CMS(formerly HCFA).
-    hsn = attributes.get('HSN', list()) # HCPCS Statute Number identifying statute reference for coverage or noncoverage of procedure or service.
-    hpd = attributes.get('HPD', list()) # HCPCS ASC payment group effective date - date the procedure is assigned to the ASC payment group.
-    hpg = attributes.get('HPG', list()) # HCPCS ASC payment group code which represents the dollar amount of the facility charge payable by Medicare for the procedure.
-    hmg = attributes.get('HMR', list()) # HCPCS Medicare Carriers Manual reference section number - number identifying a section of the Medicare Carriers Manual.
-    hir = attributes.get('HIR', list()) # HCPCS Coverage Issues Manual Reference Section Number - number identifying the Reference Section of the Coverage Issues Manual.
-    hxr = attributes.get('HXR', list()) # HCPCS Cross reference code - an explicit reference crosswalking a deleted code or a code that is not valid for Medicare to a valid current code (or range of codes).
-    hmp = attributes.get('HMP', list()) # HCPCS Multiple Pricing Indicator Code - code used to identify instances where a procedure could be priced.
-    hpi = attributes.get('HPI', list()) # HCPCS Pricing Indicator Code - used to identify the appropriate methodology for developing unique pricing amounts under Part B.
-    hac = attributes.get('HAC', list()) # HCPCS action code - code denoting the change made to a procedure or modifier code within the HCPCS system.
-    hbt = attributes.get('HBT', list()) # HCPCS Berenson-Eggers Type of Service Code - BETOS for the procedure code based on generally agreed upon clinically meaningful groupings of procedures and services.
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_hgnc_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'ACR', 'MTH_ACR', 'NA', 'SYN', 'NP', 'NS']
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HGNC:', ''), info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    mgd_id = attributes.get('MGD_ID', list())
-    vega_id = attributes.get('VEGA_ID', list())
-    genecc = attributes.get('GENCC', list())
-    swp = attributes.get('SWP', list())
-    mane_select = attributes.get('MANE_SELECT', list())
-    local_specific_db_xr = attributes.get('LOCUS_SPECIFIC_DB_XR', list())
-    locus_type = attributes.get('LOCUS_TYPE', list())
-    agr = attributes.get('AGR', list())
-    cytogenetic_location = attributes.get('CYTOGENETIC_LOCATION', list())
-    date_created = attributes.get('DATE_CREATED', list())
-    ensemblgene_id = attributes.get('ENSEMBLGENE_ID', list())
-    db_xr_id = attributes.get('DB_XR_ID', list())
-    locus_group = attributes.get('LOCUS_GROUP', list())
-    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
-    date_name_changed = attributes.get('DATE_NAME_CHANGED', list())
-    pmid = attributes.get('PMID', list())
-    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
-    mapped_ucsc_id = attributes.get('MAPPED_UCSC_ID', list())
-    refseq_id = attributes.get('REFSEQ_ID', list())
-    ena = attributes.get('ENA', list())
-    rgd_id = attributes.get('RGD_ID', list())
-    date_symbol_changed = attributes.get('DATE_SYMBOL_CHANGED', list())
-    omim_id = attributes.get('OMIM_ID', list())
-    gene_fam_id = attributes.get('GENE_FAM_ID', list())
-    gene_symbol = attributes.get('GENESYMBOL', list())
-    ez = attributes.get('EZ', list())
-    ccds_id = attributes.get('CCDS_ID', list())
-    lncipedia = attributes.get('LNCIPEDIA', list())
-    gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_hl7_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['CSY', 'PT', 'CDO', 'VS', 'BR', 'CPR', 'CR', 'NPT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
-    attributes = info.get(INFO_KEY, dict())
-    hl7at = attributes.get('HL7AT', list())
-    hl7ii = attributes.get('HL7II', list())
-    hl7im = attributes.get('HL7IM', list())
-    hl7lt = attributes.get('HL7LT', list())
-    hl7un = attributes.get('HL7UN', list())
-    hl7oa = attributes.get('HL7OA', list())
-    hl7scs = attributes.get('HL7SCS', list())
-    hl7cc = attributes.get('HL7CC', list())
-    hl7na = attributes.get('HL7NA', list())
-    hl7in = attributes.get('HL7IN', list())
-    hl7ap = attributes.get('HL7AP', list())
-    hl7mi = attributes.get('HL7MI', list())
-    hl7hi = attributes.get('HL7HI', list())
-    hl7ir = attributes.get('HL7IR', list())
-    hl7ai = attributes.get('HL7AI', list())
-    hl7ha = attributes.get('HL7HA', list())
-    hl7rf = attributes.get('HL7RF', list())
-    hl7rd = attributes.get('HL7RD', list())
-    hl7vd = attributes.get('HL7VD', list())
-    hl7dc = attributes.get('HL7DC', list())
-    hl7rk = attributes.get('HL7RK', list())
-    hl7is = attributes.get('HL7IS', list())
-    hl7sy = attributes.get('HL7SY', list())
-    hl7cd = attributes.get('HL7CD', list())
-    hl7sl = attributes.get('HL7SL', list())
-    hl7pl = attributes.get('HL7PL', list())
-    hl7vc = attributes.get('HL7VC', list())
-    hl7ty = attributes.get('HL7TY', list())
-    hl7rg = attributes.get('HL7RG', list())
-    hl7csc = attributes.get('HL7CSC', list())
-    hl7od = attributes.get('HL7OD', list())
-    hl7id = attributes.get('HL7ID', list())
-    hl7tr = attributes.get('HL7TR', list())
-    hl7di = attributes.get('HL7DI', list())
-    hl7cs = attributes.get('HL7CS', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_hpo_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'SY', 'ET', 'OP', 'IS', 'OET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id.replace('HP:', ''), info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    sid = attributes.get('SID', list())
-    hpo_comment = attributes.get('HPO_COMMENT', list())
-    date_created = attributes.get('DATE_CREATED', list())
-    syn_qualifier = attributes.get('SYN_QUALIFIER', list())
-    ref = attributes.get('REF', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_icd10pcs_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'PX', 'HX', 'MTH_HX', 'HT', 'HS', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    added_meaning = attributes.get('ADDED_MEANING', list())
-    order_no = attributes.get('ORDER_NO', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_icd9cm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'HT', 'AB'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    icc = attributes.get('ICC', list())
-    ice = attributes.get('ICE', list())
-    icf = attributes.get('ICF', list())
-    sos = attributes.get('SOS', list())
-    icn = attributes.get('ICN', list())
-    ica = attributes.get('ICA', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-def process_medrt_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'FN', 'SY'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    term_status = attributes.get('TERM_STATUS', list())
-    concept_type = attributes.get('CONCEPT_TYPE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_medlineplus_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'ET', 'SY', 'HT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    sos = attributes.get('SOS', list())
-    date_created = attributes.get('DATE_CREATED', list())
-    mp_group_url = attributes.get('MP_GROUP_URL', list())
-    mp_primary_institute_url = attributes.get('MP_PRIMARY_INSTITUTE_URL', list())
-    mp_other_language_url = attributes.get('MP_OTHER_LANGUAGE_URL', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_msh_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['MH', 'TQ', 'PEP', 'ET', 'XQ', 'PXQ', 'NM', 'N1', 'PCE', 'CE', 'HT', 'HS', 'DEV', 'DSV', 'QAB', 'QEV', 'QSV', 'PM'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'MSH')
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    mmr = attributes.get('MMR', list())
-    fx = attributes.get('FX', list())
-    lt = attributes.get('LT', list())
-    dc = attributes.get('DC', list())
-    pa = attributes.get('PA', list())
-    rr = attributes.get('RR', list())
-    hm = attributes.get('HM', list())
-    pi = attributes.get('PI', list())
-    ec = attributes.get('EC', list())
-    hn = attributes.get('HN', list())
-    termui = attributes.get('TERMUI', list())
-    th = attributes.get('TH', list())
-    sos = attributes.get('SOS', list())
-    ii = attributes.get('II', list())
-    rn = attributes.get('RN', list())
-    an = attributes.get('AN', list())
-    cx = attributes.get('CX', list())
-    dq = attributes.get('DQ', list())
-    dx = attributes.get('DX', list())
-    pm = attributes.get('PM', list())
-    aql = attributes.get('AQL', list())
-    sc = attributes.get('SC', list())
-    fr = attributes.get('FR', list())
-    mda = attributes.get('MDA', list())
-    src = attributes.get('SRC', list())
-    ol = attributes.get('OL', list())
-    mn = attributes.get('MN', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_mth_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PN', 'CV', 'XM', 'PT', 'SY', 'RT', 'DT'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    if node_curie == None:
-        return
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    mth_mapsetcomplexity = attributes.get('MTH_MAPSETCOMPLEXITY', list())
-    fromvsab = attributes.get('FROMVSAB', list())
-    mapsetrsab = attributes.get('MAPSETRSAB', list())
-    mapsetversion = attributes.get('MAPSETVERSION', list())
-    mapsetvsab = attributes.get('MAPSETVSAB', list())
-    tovsab = attributes.get('TOVSAB', list())
-    mth_mapfromexhaustive = attributes.get('MTH_MAPFROMEXHAUSTIVE', list())
-    torsab = attributes.get('TORSAB', list())
-    mapsetsid = attributes.get('MAPSETSID', list())
-    mapsetgrammar = attributes.get('MAPSETGRAMMAR', list())
-    mapsettype = attributes.get('MAPSETTYPE', list())
-    mth_maptoexhaustive = attributes.get('MTH_MAPTOEXHAUSTIVE', list())
-    fromrsab = attributes.get('FROMRSAB', list())
-    mth_mapfromcomplexity = attributes.get('MTH_MAPFROMCOMPLEXITY', list())
-    lt = attributes.get('LT', list())
-    mth_maptocomplexity = attributes.get('MTH_MAPTOCOMPLEXITY', list())
-    sos = attributes.get('SOS', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_ncbi_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['SCN', 'USN', 'USY', 'SY', 'UCN', 'CMN', 'UE', 'EQ'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    div = attributes.get('DIV', list())
-    authority_name = attributes.get('AUTHORITY_NAME', list())
-    rank = attributes.get('RANK', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_nci_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'SY', 'CSN', 'DN', 'FBD', 'HD', 'CCN', 'AD', 'CA2', 'CA3', 'BN', 'AB', 'CCS', 'OP'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-    provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'NCI')
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    clinvar_variation_id = attributes.get('CLINVAR_VARIATION_ID', list())
-    micronutrient = attributes.get('MICRONUTRIENT', list())
-    genbank_accession_number = attributes.get('GENBANK_ACCESSION_NUMBER', list())
-    fda_table = attributes.get('FDA_TABLE', list())
-    usda_id = attributes.get('USDA_ID', list())
-    icd_o_3_code = attributes.get('ICD-O-3_CODE', list())
-    tolerable_level = attributes.get('TOLERABLE_LEVEL', list())
-    ncbi_taxon_id = attributes.get('NCBI_TAXON_ID', list())
-    mgi_accession_id = attributes.get('MGI_ACCESSION_ID', list())
-    homologous_gene = attributes.get('HOMOLOGOUS_GENE', list())
-    pid_id = attributes.get('PID_ID', list())
-    swiss_prot = attributes.get('SWISS_PROT', list())
-    essential_amino_acid = attributes.get('ESSENTIAL_AMINO_ACID', list())
-    publish_value_set = attributes.get('PUBLISH_VALUE_SET', list())
-    cas_registry = attributes.get('CAS_REGISTRY', list())
-    value_set_pair = attributes.get('VALUE_SET_PAIR', list())
-    accepted_therapeutic_use_for = attributes.get('ACCEPTED_THERAPEUTIC_USE_FOR', list())
-    hgnc_id = attributes.get('HGNC_ID', list())
-    nci_drug_dictionary_id = attributes.get('NCI_DRUG_DICTIONARY_ID', list())
-    chebi_id = attributes.get('CHEBI_ID', list())
-    cnu = attributes.get('CNU', list())
-    mirbase_id = attributes.get('MIRBASE_ID', list())
-    macronutrient = attributes.get('MACRONUTRIENT', list())
-    essential_fatty_acid = attributes.get('ESSENTIAL_FATTY_ACID', list())
-    unit = attributes.get('UNIT', list())
-    pdq_open_trial_search_id = attributes.get('PDQ_OPEN_TRIAL_SEARCH_ID', list())
-    term_browser_value_set_description = attributes.get('TERM_BROWSER_VALUE_SET_DESCRIPTION', list())
-    entrezgene_id = attributes.get('ENTREZGENE_ID', list())
-    infoods = attributes.get('INFOODS', list())
-    pubmedid_primary_reference = attributes.get('PUBMEDID_PRIMARY_REFERENCE', list())
-    biocarta_id = attributes.get('BIOCARTA_ID', list())
-    extensible_list = attributes.get('EXTENSIBLE_LIST', list())
-    use_for = attributes.get('USE_FOR', list())
-    neoplastic_status = attributes.get('NEOPLASTIC_STATUS', list())
-    nsc_number = attributes.get('NSC_NUMBER', list())
-    omim_number = attributes.get('OMIM_NUMBER', list())
-    lt = attributes.get('LT', list())
-    kegg_id = attributes.get('KEGG_ID', list())
-    gene_encodes_product = attributes.get('GENE_ENCODES_PRODUCT', list())
-    pdq_closed_trial_search_id = attributes.get('PDQ_CLOSED_TRIAL_SEARCH_ID', list())
-    design_note = attributes.get('DESIGN_NOTE', list())
-    nutrient = attributes.get('NUTRIENT', list())
-    fda_unii_code = attributes.get('FDA_UNII_CODE', list())
-    us_recommended_intake = attributes.get('US_RECOMMENDED_INTAKE', list())
-    chemical_formula = attributes.get('CHEMICAL_FORMULA', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-def process_nddf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['MTH_RXN_CDC', 'CDC', 'CDD', 'CDA', 'IN', 'DF'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    ndc = attributes.get('NDC', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-def process_omim_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'PHENO', 'PHENO_ET', 'PTAV', 'PTCS', 'ETAL', 'ET', 'HT', 'ACR'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    genesymbol = attributes.get('GENESYMBOL', list())
-    mimtypevalue = attributes.get('MIMTYPEVALUE', list())
-    moved_from = attributes.get('MOVED_FROM', list())
-    sos = attributes.get('SOS', list())
-    genelocus = attributes.get('GENELOCUS', list())
-    mimtypemeaning = attributes.get('MIMTYPEMEANING', list())
-    mimtype = attributes.get('MIMTYPE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_pdq_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'HT', 'PSC', 'SY', 'ET', 'CU', 'LV', 'ACR', 'AB', 'BN', 'FBD', 'CCN', 'CHN', 'OP', 'IS'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    lt = attributes.get('LT', list())
-    cas_registry = attributes.get('CAS_REGISTRY', list())
-    date_first_published = attributes.get('DATE_FIRST_PUBLISHED', list())
-    date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
-    ind_code = attributes.get('IND_CODE', list())
-    pid = attributes.get('PID', list())
-    nsc_code = attributes.get('NSC_CODE', list())
-    pxc = attributes.get('PXC', list())
-    menu_parent = attributes.get('MENU_PARENT', list())
-    nci_id = attributes.get('NCI_ID', list())
-    orig_sty = attributes.get('ORIG_STY', list())
-    menu_type = attributes.get('MENU_TYPE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_psy_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'HT', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    hn = attributes.get('HN', list())
-    pyr = attributes.get('PYR', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_rxnorm_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['SCD', 'SBD', 'SCDG', 'SBDG', 'BPCK', 'GPCK', 'IN', 'PSN', 'MIN', 'SCDF', 'SBDF', 'SCDC', 'DFG', 'DF', 'SBDC', 'BN', 'PIN', 'TMSY', 'SY', 'ET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    ndc = attributes.get('NDC', list())
-    rxn_obsoleted = attributes.get('RXN_OBSOLETED', list())
-    rxn_available_strength = attributes.get('RXN_AVAILABLE_STRENGTH', list())
-    rxn_human_drug = attributes.get('RXN_HUMAN_DRUG', list())
-    rxn_quantity = attributes.get('RXN_QUANTITY', list())
-    rxterm_form = attributes.get('RXTERM_FORM', list())
-    rxn_in_expressed_flag = attributes.get('RXN_IN_EXPRESSED_FLAG', list())
-    rxaui = attributes.get('RXAUI', list())
-    rxn_bn_cardinality = attributes.get('RXN_BN_CARDINALITY', list())
-    rxn_activated = attributes.get('RXN_ACTIVATED', list())
-    rxn_boss_strength_denom_unit = attributes.get('RXN_BOSS_STRENGTH_DENOM_UNIT', list())
-    ambiguity_flag = attributes.get('AMBIGUITY_FLAG', list())
-    rxn_strength = attributes.get('RXN_STRENGTH', list())
-    rxcui = attributes.get('RXCUI', list())
-    rxn_ai = attributes.get('RXN_AI', list())
-    rxn_boss_from = attributes.get('RXN_BOSS_FROM', list())
-    rxn_boss_strength_num_unit = attributes.get('RXN_BOSS_STRENGTH_NUM_UNIT', list())
-    rxn_vet_drug = attributes.get('RXN_VET_DRUG', list())
-    orig_code = attributes.get('ORIG_CODE', list())
-    rxn_am = attributes.get('RXN_AM', list())
-    rxn_boss_strength_denom_value = attributes.get('RXN_BOSS_STRENGTH_DENOM_VALUE', list())
-    rxn_boss_strength_num_value = attributes.get('RXN_BOSS_STRENGTH_NUM_VALUE', list())
-    rxn_qualitative_distinction = attributes.get('RXN_QUALITATIVE_DISTINCTION', list())
-    orig_source = attributes.get('ORIG_SOURCE', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
-
-
-def process_vandf_item(node_id, info, nodes_output, edges_output, umls_code, curie_prefix, provided_by):
-    accession_heirarchy = ['PT', 'CD', 'IN', 'AB', 'MTH_RXN_CD'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-    node_curie, iri, name, category, synonyms, cuis, tuis = get_basic_info(curie_prefix, node_id, info, accession_heirarchy)
-
-    # Currently not used, but extracting them in case we want them in the future
-    attributes = info.get(INFO_KEY, dict())
-    ndf_transmit_to_cmop = attributes.get('NDF_TRANSMIT_TO_CMOP', list())
-    sngl_or_mult_src_prd = attributes.get('SNGL_OR_MULT_SRC_PRD', list())
-    dcsa = attributes.get('DCSA', list())
-    exclude_di_check = attributes.get('EXCLUDE_DI_CHECK', list())
-    nfi = attributes.get('NFI', list())
-    va_class_name = attributes.get('VA_CLASS_NAME', list())
-    vmo = attributes.get('VMO', list())
-    drug_class_type = attributes.get('DRUG_CLASS_TYPE', list())
-    nf_name = attributes.get('NF_NAME', list())
-    ndc = attributes.get('NDC', list())
-    vac = attributes.get('VAC', list())
-    va_generic_name = attributes.get('VA_GENERIC_NAME', list())
-    parent_class = attributes.get('PARENT_CLASS', list())
-    va_dispense_unit = attributes.get('VA_DISPENSE_UNIT', list())
-    ddf = attributes.get('DDF', list())
-
-    make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)
+class UMLS_Processor(object):
+    def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_name_heirarchy):
+        self.nodes_output = nodes_output
+        self.edges_output = edges_output
+        self.TUI_MAPPINGS = tui_mappings
+        self.IRI_MAPPINGS = iri_mappings
+        self.full_name_heirarchy = full_name_heirarchy
+        self.SOURCES = {'ATC': [self.process_atc_item, kg2_util.CURIE_PREFIX_ATC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ATC')],
+                        'CHV': [self.process_chv_item, kg2_util.CURIE_PREFIX_CHV, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'CHV')],
+                        'DRUGBANK': [self.process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'DRUGBANK')],
+                        'FMA': [self.process_fma_item, kg2_util.CURIE_PREFIX_FMA, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'FMA')],
+                        'GO': [self.process_go_item, kg2_util.CURIE_PREFIX_GO, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'GO')],
+                        'HCPCS': [self.process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HCPCS')],
+                        'HGNC': [self.process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HGNC')],
+                        'HL7V3.0': [self.process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HL7')],
+                        'HPO': [self.process_hpo_item, kg2_util.CURIE_PREFIX_HP, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HPO')],
+                        'ICD10PCS': [self.process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD10PCS')],
+                        'ICD9CM': [self.process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD9CM')],
+                        'MED-RT': [self.process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MED-RT')],
+                        'MEDLINEPLUS': [self.process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MEDLINEPLUS')],
+                        'MSH': [self.process_msh_item, kg2_util.CURIE_PREFIX_MESH, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MSH')],
+                        'MTH': [self.process_mth_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MTH')],
+                        'NCBI': [self.process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCBITAXON')],
+                        'NCI': [self.process_nci_item, kg2_util.CURIE_PREFIX_NCIT, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCI')],
+                        'NDDF': [self.process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCI')],
+                        'OMIM': [self.process_omim_item, kg2_util.CURIE_PREFIX_OMIM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'OMIM')],
+                        'PDQ': [self.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PDQ')],
+                        'PSY': [self.process_psy_item, kg2_util.CURIE_PREFIX_PSY, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PSY')],
+                        'RXNORM': [self.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'RXNORM')],
+                        'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF')]}
+        self.create_umls_accession_heirarchy()
+        self.create_accession_sources_heirarchy()
+
+        self.CUIS_KEY = 'cuis'
+        self.INFO_KEY = 'attributes'
+        self.NAMES_KEY = 'names'
+        self.TUIS_KEY = 'tuis'
+
+
+    def process_node(self, source, node_id, data):
+        if source in self.SOURCES:
+            self.SOURCES[source][0](node_id, data, source)
+
+
+    def create_umls_accession_heirarchy(self):
+        self.UMLS_ACCESSION_HEIRARCHY = list()
+        for [source, key] in self.full_name_heirarchy:
+            if source in self.SOURCES:
+                self.UMLS_ACCESSION_HEIRARCHY.append((source, key))
+
+    def create_accession_sources_heirarchy(self):
+        self.ACCESSION_SOURCES_HEIRARCHY = dict()
+        for (source, key) in self.UMLS_ACCESSION_HEIRARCHY:
+            if source not in self.ACCESSION_SOURCES_HEIRARCHY:
+                self.ACCESSION_SOURCES_HEIRARCHY[source] = list()
+            self.ACCESSION_SOURCES_HEIRARCHY[source].append(key)
+
+    def make_umls_node(self, node_curie, iri, name, category, update_date, provided_by, synonyms, description):
+        node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
+        node['synonym'] = synonyms
+        node['description'] = description
+
+        self.nodes_output.write(node)
+
+    def make_node_id(self, curie_prefix, node_id):
+        return curie_prefix + ':' + node_id
+
+    def get_name_synonyms(self, names_dict, source):
+        names = list()
+        if source == 'UMLS':
+            for (key_source, key) in self.UMLS_ACCESSION_HEIRARCHY:
+                names += [name for name in names_dict.get(key_source, dict()).get(key, dict()).get('Y', list())]
+                names += [name for name in names_dict.get(key_source, dict()).get(key, dict()).get('N', list())]
+        else:
+            for key in self.ACCESSION_SOURCES_HEIRARCHY[source]:
+                names += [name for name in names_dict.get(key, dict()).get('Y', list())]
+                names += [name for name in names_dict.get(key, dict()).get('N', list())]
+
+        assert len(names) > 0
+        if len(names) == 1:
+            return names[0], list()
+        return names[0], names[1:]
+
+    def get_basic_info(self, source, node_id, info):
+        curie_prefix = self.SOURCES[source][1]
+        provided_by = self.SOURCES[source][2]
+        cuis = info.get(self.CUIS_KEY, list())
+        tuis = info.get(self.TUIS_KEY, list())
+        if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
+            if len(cuis) != 1:
+                return None, None, None, None, None, None, None, None
+            node_id = cuis[0]
+        node_curie = self.make_node_id(curie_prefix, node_id)
+        iri = self.IRI_MAPPINGS[curie_prefix] + node_id
+        category = self.TUI_MAPPINGS[str(tuple(tuis))]
+
+        names = info.get(self.NAMES_KEY, dict())
+        name, synonyms = self.get_name_synonyms(names, source)
+
+        return node_curie, iri, name, category, provided_by, synonyms, cuis, tuis
+
+    def create_description(self, tuis, comment=""):
+        description = comment
+        for tui in tuis:
+            description += "; UMLS Semantic Type: STY:" + tui
+        description = description.strip("; ")
+        return description    
+
+
+    def process_atc_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        atc_level = info.get(self.INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
+        is_drug_class = info.get(self.INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_chv_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        combo_score = info.get(self.INFO_KEY, dict()).get('COMBO_SCORE', list())
+        combo_score_no_top_words = info.get(self.INFO_KEY, dict()).get('COMBO_SCORE_NO_TOP_WORDS', list())
+        context_score = info.get(self.INFO_KEY, dict()).get('CONTEXT_SCORE', list())
+        cui_score = info.get(self.INFO_KEY, dict()).get('CUI_SCORE', list())
+        disparaged = info.get(self.INFO_KEY, dict()).get('DISPARAGED', list())
+        frequency = info.get(self.INFO_KEY, dict()).get('FREQUENCY', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_drugbank_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        fda_codes = info.get(self.INFO_KEY, dict()).get('FDA_UNII_CODE', list())
+        secondary_accession_keys = info.get(self.INFO_KEY, dict()).get('SID', list())
+
+        # TODO: figure out update date
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_fma_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        authority = info.get(self.INFO_KEY, dict()).get('AUTHORITY', list())
+        date_last_modified = info.get(self.INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_go_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('GO:', ''), info)
+
+        # GO-specific information
+        attributes = info.get(self.INFO_KEY, dict())
+        go_namespace = attributes.get('GO_NAMESPACE', list())
+        assert len(go_namespace) == 1
+        go_namespace = go_namespace[0]
+        namespace_category_map = {'molecular_function': kg2_util.BIOLINK_CATEGORY_MOLECULAR_ACTIVITY,
+                                  'cellular_component': kg2_util.BIOLINK_CATEGORY_CELLULAR_COMPONENT,
+                                  'biological_process': kg2_util.BIOLINK_CATEGORY_BIOLOGICAL_PROCESS}
+        category = namespace_category_map.get(go_namespace, category)
+        go_comment = attributes.get('GO_COMMENT', str())
+        if len(go_comment) > 0:
+            go_comment = go_comment[0]
+            go_comment = "// COMMENTS: " + go_comment
+
+        # Currently not used, but extracting them in case we want them in the future
+        date_created = attributes.get('DATE_CREATED', list())
+        go_subset = attributes.get('GO_SUBSET', list())
+        gxr = attributes.get('GXR', list())
+        ref = attributes.get('REF', list())
+        sid = attributes.get('SID', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, go_comment))
+
+
+    def process_hcpcs_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+        attributes = info.get(self.INFO_KEY, dict())
+        had = attributes.get('HAD', list()) # HCPCS Action Effective Date - effective date of action to a procedure or modifier code.
+        hcc = attributes.get('HCC', list()) # HCPCS Coverage Code - code denoting Medicare coverage status. There are two subelements separated by "=".
+        hts = attributes.get('HTS', list()) # HCPCS Type of Service Code - carrier assigned HCFA Type of Service which describes the particular kind(s) of service represented by the procedure code.
+        hcd = attributes.get('HCD', list()) # HCPCS Code Added Date - year the HCPCS code was added to the HCFA Common Procedure Coding System.
+        hpn = attributes.get('HPN', list()) # HCPCS processing note number identifying the processing note contained in Appendix A of the HCPCS Manual.
+        haq = attributes.get('HAQ', list()) # HCPCS Anesthesia Base Unit Quantity - base unit represents the level of intensity for anesthesia procedure services that reflects all activities except time.
+        hlc = attributes.get('HLC', list()) # HCPCS Lab Certification Code - code used to classify laboratory procedures according to the specialty certification categories listed by CMS(formerly HCFA).
+        hsn = attributes.get('HSN', list()) # HCPCS Statute Number identifying statute reference for coverage or noncoverage of procedure or service.
+        hpd = attributes.get('HPD', list()) # HCPCS ASC payment group effective date - date the procedure is assigned to the ASC payment group.
+        hpg = attributes.get('HPG', list()) # HCPCS ASC payment group code which represents the dollar amount of the facility charge payable by Medicare for the procedure.
+        hmg = attributes.get('HMR', list()) # HCPCS Medicare Carriers Manual reference section number - number identifying a section of the Medicare Carriers Manual.
+        hir = attributes.get('HIR', list()) # HCPCS Coverage Issues Manual Reference Section Number - number identifying the Reference Section of the Coverage Issues Manual.
+        hxr = attributes.get('HXR', list()) # HCPCS Cross reference code - an explicit reference crosswalking a deleted code or a code that is not valid for Medicare to a valid current code (or range of codes).
+        hmp = attributes.get('HMP', list()) # HCPCS Multiple Pricing Indicator Code - code used to identify instances where a procedure could be priced.
+        hpi = attributes.get('HPI', list()) # HCPCS Pricing Indicator Code - used to identify the appropriate methodology for developing unique pricing amounts under Part B.
+        hac = attributes.get('HAC', list()) # HCPCS action code - code denoting the change made to a procedure or modifier code within the HCPCS system.
+        hbt = attributes.get('HBT', list()) # HCPCS Berenson-Eggers Type of Service Code - BETOS for the procedure code based on generally agreed upon clinically meaningful groupings of procedures and services.
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_hgnc_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('HGNC:', ''), info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        mgd_id = attributes.get('MGD_ID', list())
+        vega_id = attributes.get('VEGA_ID', list())
+        genecc = attributes.get('GENCC', list())
+        swp = attributes.get('SWP', list())
+        mane_select = attributes.get('MANE_SELECT', list())
+        local_specific_db_xr = attributes.get('LOCUS_SPECIFIC_DB_XR', list())
+        locus_type = attributes.get('LOCUS_TYPE', list())
+        agr = attributes.get('AGR', list())
+        cytogenetic_location = attributes.get('CYTOGENETIC_LOCATION', list())
+        date_created = attributes.get('DATE_CREATED', list())
+        ensemblgene_id = attributes.get('ENSEMBLGENE_ID', list())
+        db_xr_id = attributes.get('DB_XR_ID', list())
+        locus_group = attributes.get('LOCUS_GROUP', list())
+        entrezgene_id = attributes.get('ENTREZGENE_ID', list())
+        date_name_changed = attributes.get('DATE_NAME_CHANGED', list())
+        pmid = attributes.get('PMID', list())
+        date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
+        mapped_ucsc_id = attributes.get('MAPPED_UCSC_ID', list())
+        refseq_id = attributes.get('REFSEQ_ID', list())
+        ena = attributes.get('ENA', list())
+        rgd_id = attributes.get('RGD_ID', list())
+        date_symbol_changed = attributes.get('DATE_SYMBOL_CHANGED', list())
+        omim_id = attributes.get('OMIM_ID', list())
+        gene_fam_id = attributes.get('GENE_FAM_ID', list())
+        gene_symbol = attributes.get('GENESYMBOL', list())
+        ez = attributes.get('EZ', list())
+        ccds_id = attributes.get('CCDS_ID', list())
+        lncipedia = attributes.get('LNCIPEDIA', list())
+        gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_hl7_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        if node_curie == None:
+            return
+
+        # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
+        attributes = info.get(self.INFO_KEY, dict())
+        hl7at = attributes.get('HL7AT', list())
+        hl7ii = attributes.get('HL7II', list())
+        hl7im = attributes.get('HL7IM', list())
+        hl7lt = attributes.get('HL7LT', list())
+        hl7un = attributes.get('HL7UN', list())
+        hl7oa = attributes.get('HL7OA', list())
+        hl7scs = attributes.get('HL7SCS', list())
+        hl7cc = attributes.get('HL7CC', list())
+        hl7na = attributes.get('HL7NA', list())
+        hl7in = attributes.get('HL7IN', list())
+        hl7ap = attributes.get('HL7AP', list())
+        hl7mi = attributes.get('HL7MI', list())
+        hl7hi = attributes.get('HL7HI', list())
+        hl7ir = attributes.get('HL7IR', list())
+        hl7ai = attributes.get('HL7AI', list())
+        hl7ha = attributes.get('HL7HA', list())
+        hl7rf = attributes.get('HL7RF', list())
+        hl7rd = attributes.get('HL7RD', list())
+        hl7vd = attributes.get('HL7VD', list())
+        hl7dc = attributes.get('HL7DC', list())
+        hl7rk = attributes.get('HL7RK', list())
+        hl7is = attributes.get('HL7IS', list())
+        hl7sy = attributes.get('HL7SY', list())
+        hl7cd = attributes.get('HL7CD', list())
+        hl7sl = attributes.get('HL7SL', list())
+        hl7pl = attributes.get('HL7PL', list())
+        hl7vc = attributes.get('HL7VC', list())
+        hl7ty = attributes.get('HL7TY', list())
+        hl7rg = attributes.get('HL7RG', list())
+        hl7csc = attributes.get('HL7CSC', list())
+        hl7od = attributes.get('HL7OD', list())
+        hl7id = attributes.get('HL7ID', list())
+        hl7tr = attributes.get('HL7TR', list())
+        hl7di = attributes.get('HL7DI', list())
+        hl7cs = attributes.get('HL7CS', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_hpo_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('HP:', ''), info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        sid = attributes.get('SID', list())
+        hpo_comment = attributes.get('HPO_COMMENT', list())
+        date_created = attributes.get('DATE_CREATED', list())
+        syn_qualifier = attributes.get('SYN_QUALIFIER', list())
+        ref = attributes.get('REF', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_icd10pcs_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        added_meaning = attributes.get('ADDED_MEANING', list())
+        order_no = attributes.get('ORDER_NO', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_icd9cm_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        icc = attributes.get('ICC', list())
+        ice = attributes.get('ICE', list())
+        icf = attributes.get('ICF', list())
+        sos = attributes.get('SOS', list())
+        icn = attributes.get('ICN', list())
+        ica = attributes.get('ICA', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+    def process_medrt_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        if node_curie == None:
+            return
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        term_status = attributes.get('TERM_STATUS', list())
+        concept_type = attributes.get('CONCEPT_TYPE', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_medlineplus_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        if node_curie == None:
+            return
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        sos = attributes.get('SOS', list())
+        date_created = attributes.get('DATE_CREATED', list())
+        mp_group_url = attributes.get('MP_GROUP_URL', list())
+        mp_primary_institute_url = attributes.get('MP_PRIMARY_INSTITUTE_URL', list())
+        mp_other_language_url = attributes.get('MP_OTHER_LANGUAGE_URL', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_msh_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        mmr = attributes.get('MMR', list())
+        fx = attributes.get('FX', list())
+        lt = attributes.get('LT', list())
+        dc = attributes.get('DC', list())
+        pa = attributes.get('PA', list())
+        rr = attributes.get('RR', list())
+        hm = attributes.get('HM', list())
+        pi = attributes.get('PI', list())
+        ec = attributes.get('EC', list())
+        hn = attributes.get('HN', list())
+        termui = attributes.get('TERMUI', list())
+        th = attributes.get('TH', list())
+        sos = attributes.get('SOS', list())
+        ii = attributes.get('II', list())
+        rn = attributes.get('RN', list())
+        an = attributes.get('AN', list())
+        cx = attributes.get('CX', list())
+        dq = attributes.get('DQ', list())
+        dx = attributes.get('DX', list())
+        pm = attributes.get('PM', list())
+        aql = attributes.get('AQL', list())
+        sc = attributes.get('SC', list())
+        fr = attributes.get('FR', list())
+        mda = attributes.get('MDA', list())
+        src = attributes.get('SRC', list())
+        ol = attributes.get('OL', list())
+        mn = attributes.get('MN', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_mth_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        if node_curie == None:
+            return
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        mth_mapsetcomplexity = attributes.get('MTH_MAPSETCOMPLEXITY', list())
+        fromvsab = attributes.get('FROMVSAB', list())
+        mapsetrsab = attributes.get('MAPSETRSAB', list())
+        mapsetversion = attributes.get('MAPSETVERSION', list())
+        mapsetvsab = attributes.get('MAPSETVSAB', list())
+        tovsab = attributes.get('TOVSAB', list())
+        mth_mapfromexhaustive = attributes.get('MTH_MAPFROMEXHAUSTIVE', list())
+        torsab = attributes.get('TORSAB', list())
+        mapsetsid = attributes.get('MAPSETSID', list())
+        mapsetgrammar = attributes.get('MAPSETGRAMMAR', list())
+        mapsettype = attributes.get('MAPSETTYPE', list())
+        mth_maptoexhaustive = attributes.get('MTH_MAPTOEXHAUSTIVE', list())
+        fromrsab = attributes.get('FROMRSAB', list())
+        mth_mapfromcomplexity = attributes.get('MTH_MAPFROMCOMPLEXITY', list())
+        lt = attributes.get('LT', list())
+        mth_maptocomplexity = attributes.get('MTH_MAPTOCOMPLEXITY', list())
+        sos = attributes.get('SOS', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_ncbi_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        div = attributes.get('DIV', list())
+        authority_name = attributes.get('AUTHORITY_NAME', list())
+        rank = attributes.get('RANK', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_nci_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        clinvar_variation_id = attributes.get('CLINVAR_VARIATION_ID', list())
+        micronutrient = attributes.get('MICRONUTRIENT', list())
+        genbank_accession_number = attributes.get('GENBANK_ACCESSION_NUMBER', list())
+        fda_table = attributes.get('FDA_TABLE', list())
+        usda_id = attributes.get('USDA_ID', list())
+        icd_o_3_code = attributes.get('ICD-O-3_CODE', list())
+        tolerable_level = attributes.get('TOLERABLE_LEVEL', list())
+        ncbi_taxon_id = attributes.get('NCBI_TAXON_ID', list())
+        mgi_accession_id = attributes.get('MGI_ACCESSION_ID', list())
+        homologous_gene = attributes.get('HOMOLOGOUS_GENE', list())
+        pid_id = attributes.get('PID_ID', list())
+        swiss_prot = attributes.get('SWISS_PROT', list())
+        essential_amino_acid = attributes.get('ESSENTIAL_AMINO_ACID', list())
+        publish_value_set = attributes.get('PUBLISH_VALUE_SET', list())
+        cas_registry = attributes.get('CAS_REGISTRY', list())
+        value_set_pair = attributes.get('VALUE_SET_PAIR', list())
+        accepted_therapeutic_use_for = attributes.get('ACCEPTED_THERAPEUTIC_USE_FOR', list())
+        hgnc_id = attributes.get('HGNC_ID', list())
+        nci_drug_dictionary_id = attributes.get('NCI_DRUG_DICTIONARY_ID', list())
+        chebi_id = attributes.get('CHEBI_ID', list())
+        cnu = attributes.get('CNU', list())
+        mirbase_id = attributes.get('MIRBASE_ID', list())
+        macronutrient = attributes.get('MACRONUTRIENT', list())
+        essential_fatty_acid = attributes.get('ESSENTIAL_FATTY_ACID', list())
+        unit = attributes.get('UNIT', list())
+        pdq_open_trial_search_id = attributes.get('PDQ_OPEN_TRIAL_SEARCH_ID', list())
+        term_browser_value_set_description = attributes.get('TERM_BROWSER_VALUE_SET_DESCRIPTION', list())
+        entrezgene_id = attributes.get('ENTREZGENE_ID', list())
+        infoods = attributes.get('INFOODS', list())
+        pubmedid_primary_reference = attributes.get('PUBMEDID_PRIMARY_REFERENCE', list())
+        biocarta_id = attributes.get('BIOCARTA_ID', list())
+        extensible_list = attributes.get('EXTENSIBLE_LIST', list())
+        use_for = attributes.get('USE_FOR', list())
+        neoplastic_status = attributes.get('NEOPLASTIC_STATUS', list())
+        nsc_number = attributes.get('NSC_NUMBER', list())
+        omim_number = attributes.get('OMIM_NUMBER', list())
+        lt = attributes.get('LT', list())
+        kegg_id = attributes.get('KEGG_ID', list())
+        gene_encodes_product = attributes.get('GENE_ENCODES_PRODUCT', list())
+        pdq_closed_trial_search_id = attributes.get('PDQ_CLOSED_TRIAL_SEARCH_ID', list())
+        design_note = attributes.get('DESIGN_NOTE', list())
+        nutrient = attributes.get('NUTRIENT', list())
+        fda_unii_code = attributes.get('FDA_UNII_CODE', list())
+        us_recommended_intake = attributes.get('US_RECOMMENDED_INTAKE', list())
+        chemical_formula = attributes.get('CHEMICAL_FORMULA', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+    def process_nddf_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        ndc = attributes.get('NDC', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+    def process_omim_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        genesymbol = attributes.get('GENESYMBOL', list())
+        mimtypevalue = attributes.get('MIMTYPEVALUE', list())
+        moved_from = attributes.get('MOVED_FROM', list())
+        sos = attributes.get('SOS', list())
+        genelocus = attributes.get('GENELOCUS', list())
+        mimtypemeaning = attributes.get('MIMTYPEMEANING', list())
+        mimtype = attributes.get('MIMTYPE', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_pdq_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        lt = attributes.get('LT', list())
+        cas_registry = attributes.get('CAS_REGISTRY', list())
+        date_first_published = attributes.get('DATE_FIRST_PUBLISHED', list())
+        date_last_modified = attributes.get('DATE_LAST_MODIFIED', list())
+        ind_code = attributes.get('IND_CODE', list())
+        pid = attributes.get('PID', list())
+        nsc_code = attributes.get('NSC_CODE', list())
+        pxc = attributes.get('PXC', list())
+        menu_parent = attributes.get('MENU_PARENT', list())
+        nci_id = attributes.get('NCI_ID', list())
+        orig_sty = attributes.get('ORIG_STY', list())
+        menu_type = attributes.get('MENU_TYPE', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_psy_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        hn = attributes.get('HN', list())
+        pyr = attributes.get('PYR', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_rxnorm_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        ndc = attributes.get('NDC', list())
+        rxn_obsoleted = attributes.get('RXN_OBSOLETED', list())
+        rxn_available_strength = attributes.get('RXN_AVAILABLE_STRENGTH', list())
+        rxn_human_drug = attributes.get('RXN_HUMAN_DRUG', list())
+        rxn_quantity = attributes.get('RXN_QUANTITY', list())
+        rxterm_form = attributes.get('RXTERM_FORM', list())
+        rxn_in_expressed_flag = attributes.get('RXN_IN_EXPRESSED_FLAG', list())
+        rxaui = attributes.get('RXAUI', list())
+        rxn_bn_cardinality = attributes.get('RXN_BN_CARDINALITY', list())
+        rxn_activated = attributes.get('RXN_ACTIVATED', list())
+        rxn_boss_strength_denom_unit = attributes.get('RXN_BOSS_STRENGTH_DENOM_UNIT', list())
+        ambiguity_flag = attributes.get('AMBIGUITY_FLAG', list())
+        rxn_strength = attributes.get('RXN_STRENGTH', list())
+        rxcui = attributes.get('RXCUI', list())
+        rxn_ai = attributes.get('RXN_AI', list())
+        rxn_boss_from = attributes.get('RXN_BOSS_FROM', list())
+        rxn_boss_strength_num_unit = attributes.get('RXN_BOSS_STRENGTH_NUM_UNIT', list())
+        rxn_vet_drug = attributes.get('RXN_VET_DRUG', list())
+        orig_code = attributes.get('ORIG_CODE', list())
+        rxn_am = attributes.get('RXN_AM', list())
+        rxn_boss_strength_denom_value = attributes.get('RXN_BOSS_STRENGTH_DENOM_VALUE', list())
+        rxn_boss_strength_num_value = attributes.get('RXN_BOSS_STRENGTH_NUM_VALUE', list())
+        rxn_qualitative_distinction = attributes.get('RXN_QUALITATIVE_DISTINCTION', list())
+        orig_source = attributes.get('ORIG_SOURCE', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+
+    def process_vandf_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+
+        # Currently not used, but extracting them in case we want them in the future
+        attributes = info.get(self.INFO_KEY, dict())
+        ndf_transmit_to_cmop = attributes.get('NDF_TRANSMIT_TO_CMOP', list())
+        sngl_or_mult_src_prd = attributes.get('SNGL_OR_MULT_SRC_PRD', list())
+        dcsa = attributes.get('DCSA', list())
+        exclude_di_check = attributes.get('EXCLUDE_DI_CHECK', list())
+        nfi = attributes.get('NFI', list())
+        va_class_name = attributes.get('VA_CLASS_NAME', list())
+        vmo = attributes.get('VMO', list())
+        drug_class_type = attributes.get('DRUG_CLASS_TYPE', list())
+        nf_name = attributes.get('NF_NAME', list())
+        ndc = attributes.get('NDC', list())
+        vac = attributes.get('VAC', list())
+        va_generic_name = attributes.get('VA_GENERIC_NAME', list())
+        parent_class = attributes.get('PARENT_CLASS', list())
+        va_dispense_unit = attributes.get('VA_DISPENSE_UNIT', list())
+        ddf = attributes.get('DDF', list())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))

From 247d38cea2c50d209c0c41ab1a27f9995f602596 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 22 Aug 2023 17:04:29 -0700
Subject: [PATCH 064/117] #316 UMLS integrated, ran to completion in a little
 over 23 minutes

---
 umls_util.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 59705e5c..b02bcfe4 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -45,7 +45,8 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
                         'PDQ': [self.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PDQ')],
                         'PSY': [self.process_psy_item, kg2_util.CURIE_PREFIX_PSY, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PSY')],
                         'RXNORM': [self.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'RXNORM')],
-                        'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF')]}
+                        'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF')],
+                        'UMLS': [self.process_umls_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY, 'umls')]}
         self.create_umls_accession_heirarchy()
         self.create_accession_sources_heirarchy()
 
@@ -53,6 +54,7 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.INFO_KEY = 'attributes'
         self.NAMES_KEY = 'names'
         self.TUIS_KEY = 'tuis'
+        self.DEFINITIONS_KEY = 'definitions'
 
 
     def process_node(self, source, node_id, data):
@@ -94,7 +96,8 @@ def get_name_synonyms(self, names_dict, source):
                 names += [name for name in names_dict.get(key, dict()).get('Y', list())]
                 names += [name for name in names_dict.get(key, dict()).get('N', list())]
 
-        assert len(names) > 0
+        if len(names) == 0:
+            return None, None
         if len(names) == 1:
             return names[0], list()
         return names[0], names[1:]
@@ -103,8 +106,8 @@ def get_basic_info(self, source, node_id, info):
         curie_prefix = self.SOURCES[source][1]
         provided_by = self.SOURCES[source][2]
         cuis = info.get(self.CUIS_KEY, list())
-        tuis = info.get(self.TUIS_KEY, list())
-        if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
+        tuis = sorted(info.get(self.TUIS_KEY, list()))
+        if curie_prefix == kg2_util.CURIE_PREFIX_UMLS and source != 'UMLS':
             if len(cuis) != 1:
                 return None, None, None, None, None, None, None, None
             node_id = cuis[0]
@@ -114,6 +117,8 @@ def get_basic_info(self, source, node_id, info):
 
         names = info.get(self.NAMES_KEY, dict())
         name, synonyms = self.get_name_synonyms(names, source)
+        if name == None:
+            return None, None, None, None, None, None, None, None
 
         return node_curie, iri, name, category, provided_by, synonyms, cuis, tuis
 
@@ -616,3 +621,12 @@ def process_vandf_item(self, node_id, info, umls_code):
         ddf = attributes.get('DDF', list())
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+
+    def process_umls_item(self, node_id, info, umls_code):
+        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        if node_curie == None:
+            return
+
+        description = info.get(self.DEFINITIONS_KEY, str())
+
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
\ No newline at end of file

From c579e8573daabf20fda232315c9d6f70dedd83f8 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 23 Aug 2023 09:42:24 -0700
Subject: [PATCH 065/117] #316 switch it to group by source then code to make
 output order for by source

---
 umls_mysql_to_list_jsonl.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index fc91b7ca..90475c0d 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -52,9 +52,9 @@ def code_sources(cursor, output):
     info_key = 'attributes'
 
     # See TTY meanings here: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/abbreviations.html
-    names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.TTY, '|', con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.CODE, con.SAB"
-    extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', REPLACE(sat.ATV, '\t', ' ')) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.CODE, sat.SAB"
-    tuis_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT sty.TUI) FROM MRCONSO con LEFT JOIN MRSTY sty ON con.CUI = sty.CUI GROUP BY con.CODE, con.SAB"
+    names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.TTY, '|', con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.SAB, con.CODE"
+    extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', REPLACE(sat.ATV, '\t', ' ')) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.SAB, sat.CODE"
+    tuis_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT sty.TUI) FROM MRCONSO con LEFT JOIN MRSTY sty ON con.CUI = sty.CUI GROUP BY con.SAB, con.CODE"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():

From 1cd2b17b66c75f17171c7686aea2bd27d208b8ea Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 23 Aug 2023 09:42:35 -0700
Subject: [PATCH 066/117] #316 remove some unneeded code

---
 umls_list_jsonl_to_kg_jsonl.py | 64 ----------------------------------
 1 file changed, 64 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 49569844..a431d9cd 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -22,59 +22,6 @@
 TUI_MAPPINGS = dict()
 IRI_MAPPINGS = dict()
 
-# ATC_PREFIX = kg2_util.CURIE_PREFIX_ATC
-# CHV_PREFIX = kg2_util.CURIE_PREFIX_CHV
-# DRUGBANK_PREFIX = kg2_util.CURIE_PREFIX_DRUGBANK
-# FMA_PREFIX = kg2_util.CURIE_PREFIX_FMA
-# GO_PREFIX = kg2_util.CURIE_PREFIX_GO
-# HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
-# HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
-# HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-# HPO_PREFIX = kg2_util.CURIE_PREFIX_HP
-# ICD10PCS_PREFIX = kg2_util.CURIE_PREFIX_ICD10PCS
-# ICD9CM_PREFIX = kg2_util.CURIE_PREFIX_ICD9
-# MEDRT_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-# MEDLINEPLUS_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-# MSH_PREFIX = kg2_util.CURIE_PREFIX_MESH
-# MTH_PREFIX = kg2_util.CURIE_PREFIX_UMLS
-# NCBI_PREFIX = kg2_util.CURIE_PREFIX_NCBI_TAXON
-# NCI_PREFIX = kg2_util.CURIE_PREFIX_NCIT
-# NDDF_PREFIX = kg2_util.CURIE_PREFIX_NDDF
-# OMIM_PREFIX = kg2_util.CURIE_PREFIX_OMIM
-# PDQ_PREFIX = kg2_util.CURIE_PREFIX_PDQ
-# PSY_PREFIX = kg2_util.CURIE_PREFIX_PSY
-# RXNORM_PREFIX = kg2_util.CURIE_PREFIX_RXNORM
-# VANDF_PREFIX = kg2_util.CURIE_PREFIX_VANDF
-
-# UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE
-
-# DESIRED_CODES = {'ATC': [umls_util.process_atc_item, kg2_util.CURIE_PREFIX_ATC, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'ATC')],
-#                  'CHV': [umls_util.process_chv_item, kg2_util.CURIE_PREFIX_CHV, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'CHV')],
-#                  'DRUGBANK': [umls_util.process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'DRUGBANK')],
-#                  'FMA': [umls_util.process_fma_item, kg2_util.CURIE_PREFIX_FMA, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'FMA')],
-#                  'GO': [umls_util.process_go_item, kg2_util.CURIE_PREFIX_GO, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'GO')],
-#                  'HCPCS': [umls_util.process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HCPCS')],
-#                  'HGNC': [umls_util.process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HGNC')],
-#                  'HL7V3.0': [umls_util.process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HL7')],
-#                  'HPO': [umls_util.process_hpo_item, kg2_util.CURIE_PREFIX_HP, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'HPO')],
-#                  'ICD10PCS': [umls_util.process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'ICD10PCS')],
-#                  'ICD9CM': [umls_util.process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'ICD9CM')],
-#                  'MED-RT': [umls_util.process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MED-RT')],
-#                  'MEDLINEPLUS': [umls_util.process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MEDLINEPLUS')],
-#                  'MSH': [umls_util.process_msh_item, kg2_util.CURIE_PREFIX_MESH, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MSH')],
-#                  'MTH': [umls_util.process_mth_item, kg2_util.CURIE_PREFIX_UMLS, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'MTH')],
-#                  'NCBI': [umls_util.process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'NCBITAXON')],
-#                  'NCI': [umls_util.process_nci_item, kg2_util.CURIE_PREFIX_NCIT, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
-#                  'NDDF': [umls_util.process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'NCI')],
-#                  'OMIM': [umls_util.process_omim_item, kg2_util.CURIE_PREFIX_OMIM, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'OMIM')],
-#                  'PDQ': [umls_util.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'PDQ')],
-#                  'PSY': [umls_util.process_psy_item, kg2_util.CURIE_PREFIX_PSY, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'PSY')],
-#                  'RXNORM': [umls_util.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'RXNORM')],
-#                  'VANDF': [umls_util.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, umls_util.make_node_id(UMLS_SOURCE_PREFIX, 'VANDF')]}
-
-# # Mined from HTML Page Source of https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
-# ACCESSION_HEIRARCHY = list()
-# ACCESSION_SOURCES_HEIRARCHY = dict()
 
 def get_args():
     arg_parser = argparse.ArgumentParser(description='umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format')
@@ -91,17 +38,6 @@ def extract_node_id(node_id_str):
     return node_id[0].strip(), node_id[1].strip()
 
 
-def create_accession_heirarchy(full_heirarchy):
-    for [source, key] in full_heirarchy:
-        if source in DESIRED_CODES:
-            ACCESSION_HEIRARCHY.append((source, key))
-
-def create_accession_sources_heirarchy():
-    for (source, key) in ACCESSION_HEIRARCHY:
-        if source not in ACCESSION_SOURCES_HEIRARCHY:
-            ACCESSION_SOURCES_HEIRARCHY[source] = list()
-        ACCESSION_SOURCES_HEIRARCHY[source].append(key)
-
 if __name__ == '__main__':
     print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
     args = get_args()

From 6e36f24c9041909816804ce628a67b4f019e1113 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 23 Aug 2023 16:26:32 -0700
Subject: [PATCH 067/117] #316 descriptions are present for non-CUI nodes too

---
 umls_mysql_to_list_jsonl.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 90475c0d..e7f055c4 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -47,6 +47,7 @@ def code_sources(cursor, output):
     tui_key = 'tuis'
     cui_key = 'cuis'
     name_key = 'names'
+    definitions_key = 'definitions'
 
     # See info about these here: https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
     info_key = 'attributes'
@@ -55,6 +56,7 @@ def code_sources(cursor, output):
     names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.TTY, '|', con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.SAB, con.CODE"
     extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', REPLACE(sat.ATV, '\t', ' ')) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.SAB, sat.CODE"
     tuis_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT sty.TUI) FROM MRCONSO con LEFT JOIN MRSTY sty ON con.CUI = sty.CUI GROUP BY con.SAB, con.CODE"
+    definitions_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT def.DEF SEPARATOR ';') FROM MRCONSO con INNER JOIN MRDEF def on con.CUI=def.CUI GROUP BY con.SAB, con.CODE"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
@@ -106,6 +108,17 @@ def code_sources(cursor, output):
 
     print("Finished tuis_sql_statement at", kg2_util.date())
 
+    cursor.execute(definitions_sql_statement)
+    for result in cursor.fetchall():
+        (node_id, node_source, definition) = result
+        key = (node_source, node_id)
+        if key not in code_source_info:
+            # This occurs if a node doesn't have a name.
+            continue
+        code_source_info[key][definitions_key] = definition
+
+    print("Finished definitions_sql_statement at", kg2_util.date())
+
     record_num = 0
     for key, val in code_source_info.items():
         record_num += 1
@@ -129,7 +142,7 @@ def cui_sources(cursor, output, sources):
     names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(TTY, '|', SAB, '|', ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE SAB IN " + sources_where + " GROUP BY CUI"
     tuis_sql_statement = "SELECT CUI, GROUP_CONCAT(TUI) FROM MRSTY GROUP BY CUI"
     relations_sql_statement = "SELECT DISTINCT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL WHERE SAB IN " + sources_where
-    definitions_sql_statement = "SELECT CUI, DEF FROM MRDEF WHERE SAB IN " + sources_where
+    definitions_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT DEF SEPARATOR ';') FROM MRDEF WHERE SAB IN " + sources_where + " GROUP BY CUI"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():

From eb587858bdb4ce131d9f09669b477147f910dde6 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 23 Aug 2023 16:26:49 -0700
Subject: [PATCH 068/117] #316 non-CUI descriptions and xref edges

---
 umls_util.py | 145 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 92 insertions(+), 53 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index b02bcfe4..28546e9b 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -55,9 +55,13 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.NAMES_KEY = 'names'
         self.TUIS_KEY = 'tuis'
         self.DEFINITIONS_KEY = 'definitions'
+        self.last_source = ''
 
 
     def process_node(self, source, node_id, data):
+        if source != self.last_source and self.last_source != '' and self.last_source in self.SOURCES:
+            print("Finished processing", self.last_source, "at", kg2_util.date())
+        self.last_source = source
         if source in self.SOURCES:
             self.SOURCES[source][0](node_id, data, source)
 
@@ -102,14 +106,25 @@ def get_name_synonyms(self, names_dict, source):
             return names[0], list()
         return names[0], names[1:]
 
+    def create_xref_edges(subject_id, cuis, provided_by):
+        relation_curie = 'UMLS:xref'
+        relation_label = 'xref'
+
+        for cui in cuis:
+            object_id = make_node_id(kg2_util.CURIE_PREFIX_UMLS, cui)
+            # TODO: resolve update_date
+            self.edges_output.write(make_edge(subject_id, object_id, relation_curie, relation_label, primary_knowledge_source, "2023"))
+
+
     def get_basic_info(self, source, node_id, info):
         curie_prefix = self.SOURCES[source][1]
         provided_by = self.SOURCES[source][2]
         cuis = info.get(self.CUIS_KEY, list())
         tuis = sorted(info.get(self.TUIS_KEY, list()))
+        description = info.get(self.DEFINITIONS_KEY, str())
         if curie_prefix == kg2_util.CURIE_PREFIX_UMLS and source != 'UMLS':
             if len(cuis) != 1:
-                return None, None, None, None, None, None, None, None
+                return None, None, None, None, None, None, None, None, None
             node_id = cuis[0]
         node_curie = self.make_node_id(curie_prefix, node_id)
         iri = self.IRI_MAPPINGS[curie_prefix] + node_id
@@ -118,9 +133,9 @@ def get_basic_info(self, source, node_id, info):
         names = info.get(self.NAMES_KEY, dict())
         name, synonyms = self.get_name_synonyms(names, source)
         if name == None:
-            return None, None, None, None, None, None, None, None
+            return None, None, None, None, None, None, None, None, None
 
-        return node_curie, iri, name, category, provided_by, synonyms, cuis, tuis
+        return node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis
 
     def create_description(self, tuis, comment=""):
         description = comment
@@ -131,17 +146,18 @@ def create_description(self, tuis, comment=""):
 
 
     def process_atc_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         atc_level = info.get(self.INFO_KEY, dict()).get('ATC_LEVEL', list())[0]
         is_drug_class = info.get(self.INFO_KEY, dict()).get('IS_DRUG_CLASS', list()) == ["Y"]
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_chv_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         combo_score = info.get(self.INFO_KEY, dict()).get('COMBO_SCORE', list())
@@ -151,32 +167,35 @@ def process_chv_item(self, node_id, info, umls_code):
         disparaged = info.get(self.INFO_KEY, dict()).get('DISPARAGED', list())
         frequency = info.get(self.INFO_KEY, dict()).get('FREQUENCY', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_drugbank_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         fda_codes = info.get(self.INFO_KEY, dict()).get('FDA_UNII_CODE', list())
         secondary_accession_keys = info.get(self.INFO_KEY, dict()).get('SID', list())
 
         # TODO: figure out update date
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_fma_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         authority = info.get(self.INFO_KEY, dict()).get('AUTHORITY', list())
         date_last_modified = info.get(self.INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_go_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('GO:', ''), info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('GO:', ''), info)
 
         # GO-specific information
         attributes = info.get(self.INFO_KEY, dict())
@@ -200,10 +219,11 @@ def process_go_item(self, node_id, info, umls_code):
         sid = attributes.get('SID', list())
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, go_comment))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_hcpcs_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
         attributes = info.get(self.INFO_KEY, dict())
@@ -225,11 +245,12 @@ def process_hcpcs_item(self, node_id, info, umls_code):
         hac = attributes.get('HAC', list()) # HCPCS action code - code denoting the change made to a procedure or modifier code within the HCPCS system.
         hbt = attributes.get('HBT', list()) # HCPCS Berenson-Eggers Type of Service Code - BETOS for the procedure code based on generally agreed upon clinically meaningful groupings of procedures and services.
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_hgnc_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('HGNC:', ''), info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('HGNC:', ''), info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -263,11 +284,12 @@ def process_hgnc_item(self, node_id, info, umls_code):
         lncipedia = attributes.get('LNCIPEDIA', list())
         gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_hl7_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
         if node_curie == None:
             return
 
@@ -309,36 +331,41 @@ def process_hl7_item(self, node_id, info, umls_code):
         hl7di = attributes.get('HL7DI', list())
         hl7cs = attributes.get('HL7CS', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_hpo_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('HP:', ''), info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('HP:', ''), info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
         sid = attributes.get('SID', list())
-        hpo_comment = attributes.get('HPO_COMMENT', list())
+        hpo_comment = attributes.get('HPO_COMMENT', str())
+        if len(hpo_comment) > 0:
+            hpo_comment = hpo_comment[0]
         date_created = attributes.get('DATE_CREATED', list())
         syn_qualifier = attributes.get('SYN_QUALIFIER', list())
         ref = attributes.get('REF', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_icd10pcs_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
         added_meaning = attributes.get('ADDED_MEANING', list())
         order_no = attributes.get('ORDER_NO', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_icd9cm_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -349,10 +376,11 @@ def process_icd9cm_item(self, node_id, info, umls_code):
         icn = attributes.get('ICN', list())
         ica = attributes.get('ICA', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
     def process_medrt_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
         if node_curie == None:
             return
 
@@ -361,11 +389,12 @@ def process_medrt_item(self, node_id, info, umls_code):
         term_status = attributes.get('TERM_STATUS', list())
         concept_type = attributes.get('CONCEPT_TYPE', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_medlineplus_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
         if node_curie == None:
             return
 
@@ -377,11 +406,12 @@ def process_medlineplus_item(self, node_id, info, umls_code):
         mp_primary_institute_url = attributes.get('MP_PRIMARY_INSTITUTE_URL', list())
         mp_other_language_url = attributes.get('MP_OTHER_LANGUAGE_URL', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_msh_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -413,11 +443,12 @@ def process_msh_item(self, node_id, info, umls_code):
         ol = attributes.get('OL', list())
         mn = attributes.get('MN', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_mth_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
         if node_curie == None:
             return
 
@@ -441,11 +472,12 @@ def process_mth_item(self, node_id, info, umls_code):
         mth_maptocomplexity = attributes.get('MTH_MAPTOCOMPLEXITY', list())
         sos = attributes.get('SOS', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_ncbi_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -453,11 +485,12 @@ def process_ncbi_item(self, node_id, info, umls_code):
         authority_name = attributes.get('AUTHORITY_NAME', list())
         rank = attributes.get('RANK', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_nci_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -507,19 +540,21 @@ def process_nci_item(self, node_id, info, umls_code):
         us_recommended_intake = attributes.get('US_RECOMMENDED_INTAKE', list())
         chemical_formula = attributes.get('CHEMICAL_FORMULA', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
     def process_nddf_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
         ndc = attributes.get('NDC', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
     def process_omim_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -531,11 +566,12 @@ def process_omim_item(self, node_id, info, umls_code):
         mimtypemeaning = attributes.get('MIMTYPEMEANING', list())
         mimtype = attributes.get('MIMTYPE', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_pdq_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -552,22 +588,24 @@ def process_pdq_item(self, node_id, info, umls_code):
         orig_sty = attributes.get('ORIG_STY', list())
         menu_type = attributes.get('MENU_TYPE', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_psy_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
         hn = attributes.get('HN', list())
         pyr = attributes.get('PYR', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_rxnorm_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -596,11 +634,12 @@ def process_rxnorm_item(self, node_id, info, umls_code):
         rxn_qualitative_distinction = attributes.get('RXN_QUALITATIVE_DISTINCTION', list())
         orig_source = attributes.get('ORIG_SOURCE', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_vandf_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
@@ -620,13 +659,13 @@ def process_vandf_item(self, node_id, info, umls_code):
         va_dispense_unit = attributes.get('VA_DISPENSE_UNIT', list())
         ddf = attributes.get('DDF', list())
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
 
     def process_umls_item(self, node_id, info, umls_code):
-        node_curie, iri, name, category, provided_by, synonyms, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
+        node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id, info)
         if node_curie == None:
             return
 
-        description = info.get(self.DEFINITIONS_KEY, str())
-
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
\ No newline at end of file
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_xref_edges(node_curie, cuis, provided_by)
\ No newline at end of file

From d0067194921b625ba38bfe28c97ae492b2e6bd83 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 23 Aug 2023 16:28:29 -0700
Subject: [PATCH 069/117] #316 don't need xrefs for CUI nodes

---
 umls_util.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 28546e9b..b603e015 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -332,7 +332,6 @@ def process_hl7_item(self, node_id, info, umls_code):
         hl7cs = attributes.get('HL7CS', list())
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
-        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_hpo_item(self, node_id, info, umls_code):
@@ -390,7 +389,6 @@ def process_medrt_item(self, node_id, info, umls_code):
         concept_type = attributes.get('CONCEPT_TYPE', list())
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
-        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_medlineplus_item(self, node_id, info, umls_code):
@@ -407,7 +405,6 @@ def process_medlineplus_item(self, node_id, info, umls_code):
         mp_other_language_url = attributes.get('MP_OTHER_LANGUAGE_URL', list())
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
-        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_msh_item(self, node_id, info, umls_code):
@@ -473,7 +470,6 @@ def process_mth_item(self, node_id, info, umls_code):
         sos = attributes.get('SOS', list())
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
-        self.create_xref_edges(node_curie, cuis, provided_by)
 
 
     def process_ncbi_item(self, node_id, info, umls_code):
@@ -668,4 +664,3 @@ def process_umls_item(self, node_id, info, umls_code):
             return
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
-        self.create_xref_edges(node_curie, cuis, provided_by)
\ No newline at end of file

From ae147b3945b632c841f0ebb397a60a9c1068884c Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 23 Aug 2023 16:54:12 -0700
Subject: [PATCH 070/117] #316 only want definitions from that source

---
 umls_mysql_to_list_jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index e7f055c4..df34602a 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -56,7 +56,7 @@ def code_sources(cursor, output):
     names_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT con.CUI), GROUP_CONCAT(DISTINCT CONCAT(con.TTY, '|', con.ISPREF, '|', con.STR) SEPARATOR '\t') FROM MRCONSO con GROUP BY con.SAB, con.CODE"
     extra_info_sql_statement = "SELECT sat.CODE, sat.SAB, GROUP_CONCAT(DISTINCT CONCAT(sat.ATN, '|', REPLACE(sat.ATV, '\t', ' ')) SEPARATOR '\t') FROM MRSAT sat GROUP BY sat.SAB, sat.CODE"
     tuis_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT sty.TUI) FROM MRCONSO con LEFT JOIN MRSTY sty ON con.CUI = sty.CUI GROUP BY con.SAB, con.CODE"
-    definitions_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT def.DEF SEPARATOR ';') FROM MRCONSO con INNER JOIN MRDEF def on con.CUI=def.CUI GROUP BY con.SAB, con.CODE"
+    definitions_sql_statement = "SELECT con.CODE, con.SAB, GROUP_CONCAT(DISTINCT def.DEF SEPARATOR ';') FROM MRCONSO con INNER JOIN MRDEF def on con.CUI=def.CUI WHERE con.SAB=def.SAB GROUP BY con.SAB, con.CODE"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():

From b43dd505ce5aadb65404c0829abc815e8c742115 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 24 Aug 2023 12:12:03 -0700
Subject: [PATCH 071/117] #316 we only want UMLS CUI descriptions from sources
 that we are using

---
 umls_mysql_to_list_jsonl.py |  7 +++++--
 umls_util.py                | 11 ++++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index df34602a..0776ab23 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -142,7 +142,7 @@ def cui_sources(cursor, output, sources):
     names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(TTY, '|', SAB, '|', ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE SAB IN " + sources_where + " GROUP BY CUI"
     tuis_sql_statement = "SELECT CUI, GROUP_CONCAT(TUI) FROM MRSTY GROUP BY CUI"
     relations_sql_statement = "SELECT DISTINCT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL WHERE SAB IN " + sources_where
-    definitions_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT DEF SEPARATOR ';') FROM MRDEF WHERE SAB IN " + sources_where + " GROUP BY CUI"
+    definitions_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(SAB, '|', DEF) SEPARATOR '\t') FROM MRDEF WHERE SAB IN " + sources_where + " GROUP BY CUI"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():
@@ -204,7 +204,10 @@ def cui_sources(cursor, output, sources):
         if key not in cui_source_info:
             # See above for explanation
             continue
-        cui_source_info[key][definitions_key] = definition
+        for def_piece in definition.split('\t'):
+            split_def_piece = def_piece.split('|')
+            assert len(split_def_piece) == 2, split_def_piece
+            cui_source_info[key][definitions_key][split_def_piece[0]] = split_def_piece[1]
 
     print("Finished definitions_sql_statement at", kg2_util.date())
 
diff --git a/umls_util.py b/umls_util.py
index b603e015..bde5931b 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -121,7 +121,16 @@ def get_basic_info(self, source, node_id, info):
         provided_by = self.SOURCES[source][2]
         cuis = info.get(self.CUIS_KEY, list())
         tuis = sorted(info.get(self.TUIS_KEY, list()))
-        description = info.get(self.DEFINITIONS_KEY, str())
+        description = str()
+        if source == 'UMLS':
+            description = list()
+            description_dict = info.get(self.DEFINITIONS_KEY, dict())
+            for description_key in description_dict:
+                if description_key in self.SOURCES:
+                    description.append(description_dict[description_key])
+            description = '; '.join(description)
+        else:
+            description = info.get(self.DEFINITIONS_KEY, str())
         if curie_prefix == kg2_util.CURIE_PREFIX_UMLS and source != 'UMLS':
             if len(cuis) != 1:
                 return None, None, None, None, None, None, None, None, None

From 743878af1771d6952d9e4ff5334803b01953f969 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 24 Aug 2023 12:30:51 -0700
Subject: [PATCH 072/117] #316 forgot to add definitions key to dictionary

---
 umls_mysql_to_list_jsonl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 0776ab23..b6532725 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -204,6 +204,8 @@ def cui_sources(cursor, output, sources):
         if key not in cui_source_info:
             # See above for explanation
             continue
+        if definitions_key not in cui_source_info[key]:
+            cui_source_info[key][definitions_key] = dict()
         for def_piece in definition.split('\t'):
             split_def_piece = def_piece.split('|')
             assert len(split_def_piece) == 2, split_def_piece

From 70d49317e2d76483fdba9464ed64fa352c56fb2d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 24 Aug 2023 13:28:53 -0700
Subject: [PATCH 073/117] #316 can't have tabs in the definition

---
 umls_mysql_to_list_jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index b6532725..24259de7 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -142,7 +142,7 @@ def cui_sources(cursor, output, sources):
     names_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(TTY, '|', SAB, '|', ISPREF, '|', STR) SEPARATOR '\t') FROM MRCONSO WHERE SAB IN " + sources_where + " GROUP BY CUI"
     tuis_sql_statement = "SELECT CUI, GROUP_CONCAT(TUI) FROM MRSTY GROUP BY CUI"
     relations_sql_statement = "SELECT DISTINCT CUI1, REL, RELA, DIR, CUI2, SAB FROM MRREL WHERE SAB IN " + sources_where
-    definitions_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(SAB, '|', DEF) SEPARATOR '\t') FROM MRDEF WHERE SAB IN " + sources_where + " GROUP BY CUI"
+    definitions_sql_statement = "SELECT CUI, GROUP_CONCAT(DISTINCT CONCAT(SAB, '|', REPLACE(DEF, '\t', ' ')) SEPARATOR '\t') FROM MRDEF WHERE SAB IN " + sources_where + " GROUP BY CUI"
 
     cursor.execute(names_sql_statement)
     for result in cursor.fetchall():

From 9c8d733e3a26625843e231ae45e9f527ab1364ad Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 24 Aug 2023 15:11:21 -0700
Subject: [PATCH 074/117] #316 resolve issues with xrefs and descriptions (for
 MEDLINEPLUS)

---
 umls_util.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index bde5931b..b2e76e49 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -106,14 +106,14 @@ def get_name_synonyms(self, names_dict, source):
             return names[0], list()
         return names[0], names[1:]
 
-    def create_xref_edges(subject_id, cuis, provided_by):
+    def create_xref_edges(self, subject_id, cuis, provided_by):
         relation_curie = 'UMLS:xref'
         relation_label = 'xref'
 
         for cui in cuis:
-            object_id = make_node_id(kg2_util.CURIE_PREFIX_UMLS, cui)
+            object_id = self.make_node_id(kg2_util.CURIE_PREFIX_UMLS, cui)
             # TODO: resolve update_date
-            self.edges_output.write(make_edge(subject_id, object_id, relation_curie, relation_label, primary_knowledge_source, "2023"))
+            self.edges_output.write(kg2_util.make_edge(subject_id, object_id, relation_curie, relation_label, provided_by, "2023"))
 
 
     def get_basic_info(self, source, node_id, info):
@@ -147,7 +147,7 @@ def get_basic_info(self, source, node_id, info):
         return node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis
 
     def create_description(self, tuis, comment=""):
-        description = comment
+        description = comment.replace('<p>', '').replace('</p>', '').replace('<li>', '').replace('</li>', '').replace('<ul>', '').replace('</ul>', '')
         for tui in tuis:
             description += "; UMLS Semantic Type: STY:" + tui
         description = description.strip("; ")

From 6019cf1140d1a43551201abea7f7d7ff5c28f18b Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 24 Aug 2023 16:07:30 -0700
Subject: [PATCH 075/117] #316 initial umls edges

---
 umls_util.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/umls_util.py b/umls_util.py
index b2e76e49..8be10253 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -55,6 +55,7 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.NAMES_KEY = 'names'
         self.TUIS_KEY = 'tuis'
         self.DEFINITIONS_KEY = 'definitions'
+        self.RELATIONS_KEY = 'relations'
         self.last_source = ''
 
 
@@ -115,6 +116,21 @@ def create_xref_edges(self, subject_id, cuis, provided_by):
             # TODO: resolve update_date
             self.edges_output.write(kg2_util.make_edge(subject_id, object_id, relation_curie, relation_label, provided_by, "2023"))
 
+## TODO: make relation nodes
+## TODO: make TUI nodes
+
+    def create_umls_edges(self, subject_id, relations, provided_by):
+        for relation_source in relations:
+            if relation_source in self.SOURCES:
+                for relation in relations[relation_source]:
+                    relation_abbr, relation_label, relation_direction = relation.split(',')
+                    if relation_label == 'None':
+                        relation_label = relation_abbr
+                    relation_curie = self.make_node_id(kg2_util.CURIE_PREFIX_UMLS, relation_label)
+                    for cui in relations[relation_source][relation]:
+                        object_id = self.make_node_id(kg2_util.CURIE_PREFIX_UMLS, cui)
+                        # TODO: resolve update_date
+                        self.edges_output.write(kg2_util.make_edge(subject_id, object_id, relation_curie, relation_label, provided_by, "2023"))
 
     def get_basic_info(self, source, node_id, info):
         curie_prefix = self.SOURCES[source][1]
@@ -673,3 +689,4 @@ def process_umls_item(self, node_id, info, umls_code):
             return
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.create_umls_edges(node_curie, info.get(RELATIONS_KEY, dict()), provided_by)

From bdd5f9133246c321c919489b6a4a51aeaadfb214 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 24 Aug 2023 16:08:06 -0700
Subject: [PATCH 076/117] #316 cleaning up umls list to jsonl

---
 umls_list_jsonl_to_kg_jsonl.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index a431d9cd..35fd780e 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -53,9 +53,6 @@ def extract_node_id(node_id_str):
     input_read_jsonlines_info = kg2_util.start_read_jsonlines(input_file_name)
     input_items = input_read_jsonlines_info[0]
 
-    name_keys = set()
-    attribute_keys = set()
-
     with open('tui_combo_mappings.json') as mappings:
         TUI_MAPPINGS = json.load(mappings)
 

From 6e937c9b410f9a94d5132e1bbddffba14e5ea99d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 25 Aug 2023 10:32:02 -0700
Subject: [PATCH 077/117] #316 handling relation direction, used example
 UMLS:C2063866 RO,may_treat,Y with objects [UMLS:C2825616, UMLS:C3818973] to
 realize need to flip

---
 umls_util.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 8be10253..e68ef4b6 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -119,18 +119,22 @@ def create_xref_edges(self, subject_id, cuis, provided_by):
 ## TODO: make relation nodes
 ## TODO: make TUI nodes
 
-    def create_umls_edges(self, subject_id, relations, provided_by):
+    def create_umls_edges(self, subject_id, relations):
         for relation_source in relations:
             if relation_source in self.SOURCES:
+                provided_by = self.SOURCES[relation_source][2]
                 for relation in relations[relation_source]:
                     relation_abbr, relation_label, relation_direction = relation.split(',')
                     if relation_label == 'None':
                         relation_label = relation_abbr
-                    relation_curie = self.make_node_id(kg2_util.CURIE_PREFIX_UMLS, relation_label)
+                    relation_curie = self.make_node_id(relation_source, relation_label)
                     for cui in relations[relation_source][relation]:
                         object_id = self.make_node_id(kg2_util.CURIE_PREFIX_UMLS, cui)
                         # TODO: resolve update_date
-                        self.edges_output.write(kg2_util.make_edge(subject_id, object_id, relation_curie, relation_label, provided_by, "2023"))
+                        if relation_direction == 'Y':
+                            self.edges_output.write(kg2_util.make_edge(object_id, subject_id, relation_curie, relation_label, provided_by, "2023"))
+                        else:
+                            self.edges_output.write(kg2_util.make_edge(subject_id, object_id, relation_curie, relation_label, provided_by, "2023"))
 
     def get_basic_info(self, source, node_id, info):
         curie_prefix = self.SOURCES[source][1]
@@ -689,4 +693,4 @@ def process_umls_item(self, node_id, info, umls_code):
             return
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
-        self.create_umls_edges(node_curie, info.get(RELATIONS_KEY, dict()), provided_by)
+        self.create_umls_edges(node_curie, info.get(self.RELATIONS_KEY, dict()))

From 92558feeea1a24e17af71b2156fa4d6fc2890f64 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 25 Aug 2023 15:26:31 -0700
Subject: [PATCH 078/117] #316 trying to make OMIM play nice

---
 umls_util.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index e68ef4b6..c87ecec5 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -57,6 +57,7 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.DEFINITIONS_KEY = 'definitions'
         self.RELATIONS_KEY = 'relations'
         self.last_source = ''
+        self.hgnc_to_omim = dict()
 
 
     def process_node(self, source, node_id, data):
@@ -305,7 +306,7 @@ def process_hgnc_item(self, node_id, info, umls_code):
         ena = attributes.get('ENA', list())
         rgd_id = attributes.get('RGD_ID', list())
         date_symbol_changed = attributes.get('DATE_SYMBOL_CHANGED', list())
-        omim_id = attributes.get('OMIM_ID', list())
+        omim_id_list = attributes.get('OMIM_ID', list())
         gene_fam_id = attributes.get('GENE_FAM_ID', list())
         gene_symbol = attributes.get('GENESYMBOL', list())
         ez = attributes.get('EZ', list())
@@ -313,6 +314,10 @@ def process_hgnc_item(self, node_id, info, umls_code):
         lncipedia = attributes.get('LNCIPEDIA', list())
         gene_fam_desc = attributes.get('GENE_FAM_DESC', list())
 
+        if len(gene_symbol) > 0:
+            for omim_id in omim_id_list:
+                self.hgnc_to_omim[self.make_node_id(kg2_util.CURIE_PREFIX_OMIM, omim_id)] = gene_symbol[0]
+
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_xref_edges(node_curie, cuis, provided_by)
 
@@ -583,7 +588,7 @@ def process_omim_item(self, node_id, info, umls_code):
 
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
-        genesymbol = attributes.get('GENESYMBOL', list())
+        gene_symbol = attributes.get('GENESYMBOL', list())
         mimtypevalue = attributes.get('MIMTYPEVALUE', list())
         moved_from = attributes.get('MOVED_FROM', list())
         sos = attributes.get('SOS', list())
@@ -591,6 +596,18 @@ def process_omim_item(self, node_id, info, umls_code):
         mimtypemeaning = attributes.get('MIMTYPEMEANING', list())
         mimtype = attributes.get('MIMTYPE', list())
 
+        name = name.capitalize()
+        if len(mimtype) > 0:
+            mimtype = int(mimtype[0])
+            if mimtype in [0, 3, 5]:
+                category = kg2_util.BIOLINK_CATEGORY_PHENOTYPIC_FEATURE
+                name += " related phenotypic feature"
+            if mimtype in [1, 4]:
+                category = kg2_util.BIOLINK_CATEGORY_GENE
+                if len(gene_symbol) > 0:
+                    name = gene_symbol[0]
+                name = self.hgnc_to_omim.get(node_curie, name)
+
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_xref_edges(node_curie, cuis, provided_by)
 

From c182c785e75c84f8d6b7d08cbe82db9274cb9c5e Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 29 Aug 2023 13:24:46 -0700
Subject: [PATCH 079/117] #316 HGNC nodes should be genes and names should be
 their gene name

---
 umls_util.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/umls_util.py b/umls_util.py
index c87ecec5..75cf6b15 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -317,6 +317,9 @@ def process_hgnc_item(self, node_id, info, umls_code):
         if len(gene_symbol) > 0:
             for omim_id in omim_id_list:
                 self.hgnc_to_omim[self.make_node_id(kg2_util.CURIE_PREFIX_OMIM, omim_id)] = gene_symbol[0]
+            name = gene_symbol[0] + " (human)"
+
+        category = kg2_util.BIOLINK_CATEGORY_GENE
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_xref_edges(node_curie, cuis, provided_by)

From 26f51c08e54377fe7569ef72bca62e044658fceb Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Tue, 29 Aug 2023 15:19:42 -0700
Subject: [PATCH 080/117] #316 don't need (human) on HGNC nodes

---
 umls_util.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 75cf6b15..0d84830e 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -81,11 +81,14 @@ def create_accession_sources_heirarchy(self):
                 self.ACCESSION_SOURCES_HEIRARCHY[source] = list()
             self.ACCESSION_SOURCES_HEIRARCHY[source].append(key)
 
-    def make_umls_node(self, node_curie, iri, name, category, update_date, provided_by, synonyms, description):
+    def make_umls_node(self, node_curie, iri, name, category, update_date, provided_by, synonyms, description, full_name=None):
         node = kg2_util.make_node(node_curie, iri, name, category, "2023", provided_by)
         node['synonym'] = synonyms
         node['description'] = description
 
+        if full_name is not None:
+            node['full_name'] = full_name
+
         self.nodes_output.write(node)
 
     def make_node_id(self, curie_prefix, node_id):
@@ -117,7 +120,6 @@ def create_xref_edges(self, subject_id, cuis, provided_by):
             # TODO: resolve update_date
             self.edges_output.write(kg2_util.make_edge(subject_id, object_id, relation_curie, relation_label, provided_by, "2023"))
 
-## TODO: make relation nodes
 ## TODO: make TUI nodes
 
     def create_umls_edges(self, subject_id, relations):
@@ -282,6 +284,8 @@ def process_hcpcs_item(self, node_id, info, umls_code):
     def process_hgnc_item(self, node_id, info, umls_code):
         node_curie, iri, name, category, provided_by, synonyms, description, cuis, tuis = self.get_basic_info(umls_code, node_id.replace('HGNC:', ''), info)
 
+        full_name = name
+
         # Currently not used, but extracting them in case we want them in the future
         attributes = info.get(self.INFO_KEY, dict())
         mgd_id = attributes.get('MGD_ID', list())
@@ -317,11 +321,11 @@ def process_hgnc_item(self, node_id, info, umls_code):
         if len(gene_symbol) > 0:
             for omim_id in omim_id_list:
                 self.hgnc_to_omim[self.make_node_id(kg2_util.CURIE_PREFIX_OMIM, omim_id)] = gene_symbol[0]
-            name = gene_symbol[0] + " (human)"
+            name = gene_symbol[0]
 
         category = kg2_util.BIOLINK_CATEGORY_GENE
 
-        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
+        self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description), full_name=full_name)
         self.create_xref_edges(node_curie, cuis, provided_by)
 
 

From c3805bbe016e8faec0860d22697bb08694890633 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 31 Aug 2023 16:18:45 -0700
Subject: [PATCH 081/117] #316 addressing biolink:Drug vs
 biolink:ChemicalEntity discussion

---
 umls_util.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/umls_util.py b/umls_util.py
index 0d84830e..751264fd 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -211,6 +211,9 @@ def process_drugbank_item(self, node_id, info, umls_code):
         secondary_accession_keys = info.get(self.INFO_KEY, dict()).get('SID', list())
 
         # TODO: figure out update date
+
+        category = kg2_util.BIOLINK_CATEGORY_DRUG
+
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_xref_edges(node_curie, cuis, provided_by)
 
@@ -481,6 +484,9 @@ def process_msh_item(self, node_id, info, umls_code):
         ol = attributes.get('OL', list())
         mn = attributes.get('MN', list())
 
+        if tuis == ("T109", "T121"):
+            category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
+
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_xref_edges(node_curie, cuis, provided_by)
 
@@ -716,5 +722,8 @@ def process_umls_item(self, node_id, info, umls_code):
         if node_curie == None:
             return
 
+        if tuis == ("T109", "T121"):
+            category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
+
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_umls_edges(node_curie, info.get(self.RELATIONS_KEY, dict()))

From c0fb8fa92a0e137f5ced72e4de440d0d555cccfc Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 00:00:30 -0700
Subject: [PATCH 082/117] #316 lists for the tuis not tuples

---
 umls_util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 751264fd..6a824fd7 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -484,7 +484,7 @@ def process_msh_item(self, node_id, info, umls_code):
         ol = attributes.get('OL', list())
         mn = attributes.get('MN', list())
 
-        if tuis == ("T109", "T121"):
+        if tuis == ["T109", "T121"]:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
@@ -722,7 +722,7 @@ def process_umls_item(self, node_id, info, umls_code):
         if node_curie == None:
             return
 
-        if tuis == ("T109", "T121"):
+        if tuis == ["T109", "T121"]:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))

From 4ce14877f41a12dfe7af63c151c6e9b5733b5ebf Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 00:35:57 -0700
Subject: [PATCH 083/117] #316 addressing NCIT gene to named thing issue

---
 umls_util.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 6a824fd7..86cd5d13 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
-'''umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format
+'''umls_util.py: handles source-specific conversion of UMLS MySQL JSON Lines dump into KG2 JSON format
 
-   Usage: umls_list_jsonl_to_kg_jsonl.py [--test] <inputFile.jsonl> <outputNodesFile.json> <outputEdgesFile.jsonl>
+   Usage: import umls_util.py
 '''
 
 __author__ = 'Erica Wood'
@@ -583,6 +583,9 @@ def process_nci_item(self, node_id, info, umls_code):
         us_recommended_intake = attributes.get('US_RECOMMENDED_INTAKE', list())
         chemical_formula = attributes.get('CHEMICAL_FORMULA', list())
 
+        if tuis == ['T028'] and (len(entrezgene_id) > 0 or len(hgnc_id) > 0 or len(gene_encodes_product) > 0 or "gene" in name.lower() or "allele" in name.lower()):
+            category = kg2_util.BIOLINK_CATEGORY_GENE
+
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_xref_edges(node_curie, cuis, provided_by)
 

From 91e4e5eb8833f5836d5c4fd6a1c4840cbd39cfce Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 01:25:51 -0700
Subject: [PATCH 084/117] #316 there's a lot of T109-drug nodes, hopefully this
 will clear out the rest of the inconsistencies

---
 umls_util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 86cd5d13..97dabd0b 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -484,7 +484,7 @@ def process_msh_item(self, node_id, info, umls_code):
         ol = attributes.get('OL', list())
         mn = attributes.get('MN', list())
 
-        if tuis == ["T109", "T121"]:
+        if "T109" in tuis:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
@@ -725,7 +725,7 @@ def process_umls_item(self, node_id, info, umls_code):
         if node_curie == None:
             return
 
-        if tuis == ["T109", "T121"]:
+        if "T109" in tuis:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))

From 29a4f7175d924d93a9621ca765563ef0e609558c Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 01:57:19 -0700
Subject: [PATCH 085/117] #316 might have overcorrected

---
 umls_list_jsonl_to_kg_jsonl.py | 2 +-
 umls_util.py                   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 35fd780e..408f9765 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -67,7 +67,7 @@ def extract_node_id(node_id_str):
     for data in input_items:
         # There should only be one item in the data dictionary
         for entity in data:
-            if entity == "('NOCODE', 'MTH')":
+            if entity == "('MTH', 'NOCODE')":
                 continue
             value = data[entity]
             source, node_id = extract_node_id(entity)
diff --git a/umls_util.py b/umls_util.py
index 97dabd0b..4dd3856a 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -484,7 +484,7 @@ def process_msh_item(self, node_id, info, umls_code):
         ol = attributes.get('OL', list())
         mn = attributes.get('MN', list())
 
-        if "T109" in tuis:
+        if category == kg2_util.BIOLINK_CATEGORY_GENE and "T109" in tuis:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
@@ -725,7 +725,7 @@ def process_umls_item(self, node_id, info, umls_code):
         if node_curie == None:
             return
 
-        if "T109" in tuis:
+        if category == kg2_util.BIOLINK_CATEGORY_GENE and "T109" in tuis:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))

From 54e8cdc20d39e4ec4629b1a54f318a78016894d0 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 02:46:52 -0700
Subject: [PATCH 086/117] #316 correcting a typo and addressing issue of former
 biolink:BiologicalEntity nodes that should be genes

---
 umls_util.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 4dd3856a..6c8e9b18 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -484,7 +484,7 @@ def process_msh_item(self, node_id, info, umls_code):
         ol = attributes.get('OL', list())
         mn = attributes.get('MN', list())
 
-        if category == kg2_util.BIOLINK_CATEGORY_GENE and "T109" in tuis:
+        if category == kg2_util.BIOLINK_CATEGORY_DRUG and "T109" in tuis:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
@@ -725,8 +725,11 @@ def process_umls_item(self, node_id, info, umls_code):
         if node_curie == None:
             return
 
-        if category == kg2_util.BIOLINK_CATEGORY_GENE and "T109" in tuis:
+        if category == kg2_util.BIOLINK_CATEGORY_DRUG and "T109" in tuis:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
+        if category == kg2_util.BIOLINK_NAMED_THING and tuis == ["T028"] and ("gene" in name.lower() or "allele" in name.lower()):
+            category = kg2_util.BIOLINK_CATEGORY_GENE
+
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))
         self.create_umls_edges(node_curie, info.get(self.RELATIONS_KEY, dict()))

From 3a993ed3b9f3145f83d215b9921353309742d9ce Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 02:54:17 -0700
Subject: [PATCH 087/117] #316 addressing category typo

---
 umls_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_util.py b/umls_util.py
index 6c8e9b18..5fe81931 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -728,7 +728,7 @@ def process_umls_item(self, node_id, info, umls_code):
         if category == kg2_util.BIOLINK_CATEGORY_DRUG and "T109" in tuis:
             category = kg2_util.BIOLINK_CATEGORY_CHEMICAL_ENTITY
 
-        if category == kg2_util.BIOLINK_NAMED_THING and tuis == ["T028"] and ("gene" in name.lower() or "allele" in name.lower()):
+        if category == kg2_util.BIOLINK_CATEGORY_NAMED_THING and tuis == ["T028"] and ("gene" in name.lower() or "allele" in name.lower()):
             category = kg2_util.BIOLINK_CATEGORY_GENE
 
         self.make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, self.create_description(tuis, description))

From 2449bb9e7b854ef3881e973b7d5042ef896987f7 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 14:42:25 -0700
Subject: [PATCH 088/117] #316 source nodes try 1

---
 umls_util.py | 54 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 5fe81931..97a4e299 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -23,29 +23,29 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.TUI_MAPPINGS = tui_mappings
         self.IRI_MAPPINGS = iri_mappings
         self.full_name_heirarchy = full_name_heirarchy
-        self.SOURCES = {'ATC': [self.process_atc_item, kg2_util.CURIE_PREFIX_ATC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ATC')],
-                        'CHV': [self.process_chv_item, kg2_util.CURIE_PREFIX_CHV, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'CHV')],
-                        'DRUGBANK': [self.process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'DRUGBANK')],
-                        'FMA': [self.process_fma_item, kg2_util.CURIE_PREFIX_FMA, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'FMA')],
-                        'GO': [self.process_go_item, kg2_util.CURIE_PREFIX_GO, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'GO')],
-                        'HCPCS': [self.process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HCPCS')],
-                        'HGNC': [self.process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HGNC')],
-                        'HL7V3.0': [self.process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HL7')],
-                        'HPO': [self.process_hpo_item, kg2_util.CURIE_PREFIX_HP, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HPO')],
-                        'ICD10PCS': [self.process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD10PCS')],
-                        'ICD9CM': [self.process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD9CM')],
-                        'MED-RT': [self.process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MED-RT')],
-                        'MEDLINEPLUS': [self.process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MEDLINEPLUS')],
-                        'MSH': [self.process_msh_item, kg2_util.CURIE_PREFIX_MESH, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MSH')],
-                        'MTH': [self.process_mth_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MTH')],
-                        'NCBI': [self.process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCBITAXON')],
-                        'NCI': [self.process_nci_item, kg2_util.CURIE_PREFIX_NCIT, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCI')],
-                        'NDDF': [self.process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCI')],
-                        'OMIM': [self.process_omim_item, kg2_util.CURIE_PREFIX_OMIM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'OMIM')],
-                        'PDQ': [self.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PDQ')],
-                        'PSY': [self.process_psy_item, kg2_util.CURIE_PREFIX_PSY, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PSY')],
-                        'RXNORM': [self.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'RXNORM')],
-                        'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF')],
+        self.SOURCES = {'ATC': [self.process_atc_item, kg2_util.CURIE_PREFIX_ATC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ATC'), "Anatomical Therapeutic Chemical Classification System"],
+                        'CHV': [self.process_chv_item, kg2_util.CURIE_PREFIX_CHV, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'CHV'), "Consumer Health Vocabulary"],
+                        'DRUGBANK': [self.process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'DRUGBANK'), "DrugBank"],
+                        'FMA': [self.process_fma_item, kg2_util.CURIE_PREFIX_FMA, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'FMA'), "Foundational Model of Anatomy"],
+                        'GO': [self.process_go_item, kg2_util.CURIE_PREFIX_GO, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'GO'), "Gene Ontology"],
+                        'HCPCS': [self.process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HCPCS'), "Healthcare Common Procedure Coding System"],
+                        'HGNC': [self.process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HGNC'), "HUGO Gene Nomenclature Committee"],
+                        'HL7V3.0': [self.process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HL7'), "HL7 Version 3.0"],
+                        'HPO': [self.process_hpo_item, kg2_util.CURIE_PREFIX_HP, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HPO'), "Human Phenotype Ontology"],
+                        'ICD10PCS': [self.process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD10PCS'), "ICD-10 Procedure Coding System"],
+                        'ICD9CM': [self.process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD9CM'), "International Classification of Diseases, Ninth Revision, Clinical Modification"],
+                        'MED-RT': [self.process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MED-RT'), "Medication Reference Terminology"],
+                        'MEDLINEPLUS': [self.process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MEDLINEPLUS'), "MedlinePlus Health Topics"],
+                        'MSH': [self.process_msh_item, kg2_util.CURIE_PREFIX_MESH, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MSH'), "Medical Subject Headings"],
+                        'MTH': [self.process_mth_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MTH'), "Metathesaurus Names"],
+                        'NCBI': [self.process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCBITAXON'), "NCBI Taxonomy"],
+                        'NCI': [self.process_nci_item, kg2_util.CURIE_PREFIX_NCIT, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCI'), "NCI Thesaurus"],
+                        'NDDF': [self.process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NDDF'), "National Drug Data File"],
+                        'OMIM': [self.process_omim_item, kg2_util.CURIE_PREFIX_OMIM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'OMIM'), "Online Mendelian Inheritance in Man"],
+                        'PDQ': [self.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PDQ'), "Physician Data Query"],
+                        'PSY': [self.process_psy_item, kg2_util.CURIE_PREFIX_PSY, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PSY'), "Psychological Index Terms"],
+                        'RXNORM': [self.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'RXNORM'), "RXNORM"],
+                        'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF'), "National Drug File"],
                         'UMLS': [self.process_umls_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY, 'umls')]}
         self.create_umls_accession_heirarchy()
         self.create_accession_sources_heirarchy()
@@ -59,6 +59,14 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.last_source = ''
         self.hgnc_to_omim = dict()
 
+        for source in self.SOURCES:
+            source_id = self.SOURCES[source][2]
+            curie_prefix = source_id.split(':')[0]
+            node_specific_id = source_id.split(':')[1]
+            iri = IRI_MAPPINGS[curie_prefix] + node_specific_id
+            name = self.SOURCES[source][3]
+            self.make_umls_node(source_id, iri, name, kg2_util.SOURCE_NODE_CATEGORY, "2023", source_id, list(), "")
+
 
     def process_node(self, source, node_id, data):
         if source != self.last_source and self.last_source != '' and self.last_source in self.SOURCES:

From d050137a0e2ac9a2f80fe88b81c80e42aae32585 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 14:50:29 -0700
Subject: [PATCH 089/117] #316 iri map needs to be called with self

---
 umls_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_util.py b/umls_util.py
index 97a4e299..3827f413 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -63,7 +63,7 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
             source_id = self.SOURCES[source][2]
             curie_prefix = source_id.split(':')[0]
             node_specific_id = source_id.split(':')[1]
-            iri = IRI_MAPPINGS[curie_prefix] + node_specific_id
+            iri = self.IRI_MAPPINGS[curie_prefix] + node_specific_id
             name = self.SOURCES[source][3]
             self.make_umls_node(source_id, iri, name, kg2_util.SOURCE_NODE_CATEGORY, "2023", source_id, list(), "")
 

From 8abca8c518dec9e3b1ec87270d6f0aa301ae29ae Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 14:52:31 -0700
Subject: [PATCH 090/117] #316 name for UMLS

---
 umls_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_util.py b/umls_util.py
index 3827f413..54b3a946 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -46,7 +46,7 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
                         'PSY': [self.process_psy_item, kg2_util.CURIE_PREFIX_PSY, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PSY'), "Psychological Index Terms"],
                         'RXNORM': [self.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'RXNORM'), "RXNORM"],
                         'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF'), "National Drug File"],
-                        'UMLS': [self.process_umls_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY, 'umls')]}
+                        'UMLS': [self.process_umls_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY, 'umls'), "Unified Medical Language System"]}
         self.create_umls_accession_heirarchy()
         self.create_accession_sources_heirarchy()
 

From 16f99bc9b3b2a093dd0361231eb52afb8c007cf8 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 16:06:35 -0700
Subject: [PATCH 091/117] #316 get source name/version information directly
 from UMLS

---
 umls_mysql_to_list_jsonl.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 24259de7..e69d64c3 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -27,15 +27,16 @@ def get_args():
     return arg_parser.parse_args()
 
 
-def get_english_sources(cursor):
-    sources_sql_statement = "SELECT RSAB, LAT FROM MRSAB"
+def get_english_sources(cursor, output):
+    sources_sql_statement = "SELECT RSAB, LAT, SSN, IMETA, SVER FROM MRSAB"
     sources = []
 
     cursor.execute(sources_sql_statement)
     for result in cursor.fetchall():
-        (source, language) = result
+        (source, language, source_name, version, update_date) = result
         if language == 'ENG':
             sources.append(source)
+            output.write({("UMLS_SOURCE", source): {"update_date": update_date, "source_name": source_name, "version": version}})
 
     print("Finished sources_sql_statement at", kg2_util.date())
 
@@ -241,7 +242,7 @@ def cui_sources(cursor, output, sources):
         cursor.fetchall()
 
         # This ensure we don't have UMLS sources that overwrite each other's names
-        sources = get_english_sources(cursor)
+        sources = get_english_sources(cursor, output)
 
         code_sources(cursor, output)
         cui_sources(cursor, output, sources)

From 97801007cb868e835fc03d2885280be0877f6acd Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 1 Sep 2023 16:18:24 -0700
Subject: [PATCH 092/117] #316 adding umls source node processing into
 umls_util

---
 umls_list_jsonl_to_kg_jsonl.py |  1 +
 umls_util.py                   | 70 ++++++++++++++++++----------------
 2 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 408f9765..8d58b3ed 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -74,6 +74,7 @@ def extract_node_id(node_id_str):
 
             # Process the data specifically by source
             umls_processor.process_node(source, node_id, value)
+        print("Finished processing", umls_processor.last_source, "at", kg2_util.date())
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)
diff --git a/umls_util.py b/umls_util.py
index 54b3a946..0d42297f 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -23,30 +23,31 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.TUI_MAPPINGS = tui_mappings
         self.IRI_MAPPINGS = iri_mappings
         self.full_name_heirarchy = full_name_heirarchy
-        self.SOURCES = {'ATC': [self.process_atc_item, kg2_util.CURIE_PREFIX_ATC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ATC'), "Anatomical Therapeutic Chemical Classification System"],
-                        'CHV': [self.process_chv_item, kg2_util.CURIE_PREFIX_CHV, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'CHV'), "Consumer Health Vocabulary"],
-                        'DRUGBANK': [self.process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'DRUGBANK'), "DrugBank"],
-                        'FMA': [self.process_fma_item, kg2_util.CURIE_PREFIX_FMA, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'FMA'), "Foundational Model of Anatomy"],
-                        'GO': [self.process_go_item, kg2_util.CURIE_PREFIX_GO, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'GO'), "Gene Ontology"],
-                        'HCPCS': [self.process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HCPCS'), "Healthcare Common Procedure Coding System"],
-                        'HGNC': [self.process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HGNC'), "HUGO Gene Nomenclature Committee"],
-                        'HL7V3.0': [self.process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HL7'), "HL7 Version 3.0"],
-                        'HPO': [self.process_hpo_item, kg2_util.CURIE_PREFIX_HP, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HPO'), "Human Phenotype Ontology"],
-                        'ICD10PCS': [self.process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD10PCS'), "ICD-10 Procedure Coding System"],
-                        'ICD9CM': [self.process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD9CM'), "International Classification of Diseases, Ninth Revision, Clinical Modification"],
-                        'MED-RT': [self.process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MED-RT'), "Medication Reference Terminology"],
-                        'MEDLINEPLUS': [self.process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MEDLINEPLUS'), "MedlinePlus Health Topics"],
-                        'MSH': [self.process_msh_item, kg2_util.CURIE_PREFIX_MESH, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MSH'), "Medical Subject Headings"],
-                        'MTH': [self.process_mth_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MTH'), "Metathesaurus Names"],
-                        'NCBI': [self.process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCBITAXON'), "NCBI Taxonomy"],
-                        'NCI': [self.process_nci_item, kg2_util.CURIE_PREFIX_NCIT, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCI'), "NCI Thesaurus"],
-                        'NDDF': [self.process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NDDF'), "National Drug Data File"],
-                        'OMIM': [self.process_omim_item, kg2_util.CURIE_PREFIX_OMIM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'OMIM'), "Online Mendelian Inheritance in Man"],
-                        'PDQ': [self.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PDQ'), "Physician Data Query"],
-                        'PSY': [self.process_psy_item, kg2_util.CURIE_PREFIX_PSY, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PSY'), "Psychological Index Terms"],
-                        'RXNORM': [self.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'RXNORM'), "RXNORM"],
-                        'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF'), "National Drug File"],
-                        'UMLS': [self.process_umls_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY, 'umls'), "Unified Medical Language System"]}
+        self.SOURCES = {'UMLS_SOURCE': [self.process_umls_source_item, None, None],
+                        'ATC': [self.process_atc_item, kg2_util.CURIE_PREFIX_ATC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ATC')],
+                        'CHV': [self.process_chv_item, kg2_util.CURIE_PREFIX_CHV, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'CHV')],
+                        'DRUGBANK': [self.process_drugbank_item, kg2_util.CURIE_PREFIX_DRUGBANK, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'DRUGBANK')],
+                        'FMA': [self.process_fma_item, kg2_util.CURIE_PREFIX_FMA, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'FMA')],
+                        'GO': [self.process_go_item, kg2_util.CURIE_PREFIX_GO, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'GO')],
+                        'HCPCS': [self.process_hcpcs_item, kg2_util.CURIE_PREFIX_HCPCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HCPCS')],
+                        'HGNC': [self.process_hgnc_item, kg2_util.CURIE_PREFIX_HGNC, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HGNC')],
+                        'HL7V3.0': [self.process_hl7_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HL7')],
+                        'HPO': [self.process_hpo_item, kg2_util.CURIE_PREFIX_HP, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'HPO')],
+                        'ICD10PCS': [self.process_icd10pcs_item, kg2_util.CURIE_PREFIX_ICD10PCS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD10PCS')],
+                        'ICD9CM': [self.process_icd9cm_item, kg2_util.CURIE_PREFIX_ICD9, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'ICD9CM')],
+                        'MED-RT': [self.process_medrt_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MED-RT')],
+                        'MEDLINEPLUS': [self.process_medlineplus_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MEDLINEPLUS')],
+                        'MSH': [self.process_msh_item, kg2_util.CURIE_PREFIX_MESH, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MSH')],
+                        'MTH': [self.process_mth_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'MTH')],
+                        'NCBI': [self.process_ncbi_item, kg2_util.CURIE_PREFIX_NCBI_TAXON, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCBITAXON')],
+                        'NCI': [self.process_nci_item, kg2_util.CURIE_PREFIX_NCIT, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NCI')],
+                        'NDDF': [self.process_nddf_item, kg2_util.CURIE_PREFIX_NDDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'NDDF')],
+                        'OMIM': [self.process_omim_item, kg2_util.CURIE_PREFIX_OMIM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'OMIM')],
+                        'PDQ': [self.process_pdq_item, kg2_util.CURIE_PREFIX_PDQ, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PDQ')],
+                        'PSY': [self.process_psy_item, kg2_util.CURIE_PREFIX_PSY, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'PSY')],
+                        'RXNORM': [self.process_rxnorm_item, kg2_util.CURIE_PREFIX_RXNORM, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'RXNORM')],
+                        'VANDF': [self.process_vandf_item, kg2_util.CURIE_PREFIX_VANDF, self.make_node_id(kg2_util.CURIE_PREFIX_UMLS_SOURCE, 'VANDF')],
+                        'UMLS': [self.process_umls_item, kg2_util.CURIE_PREFIX_UMLS, self.make_node_id(kg2_util.CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY, 'umls')]}
         self.create_umls_accession_heirarchy()
         self.create_accession_sources_heirarchy()
 
@@ -59,18 +60,11 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
         self.last_source = ''
         self.hgnc_to_omim = dict()
 
-        for source in self.SOURCES:
-            source_id = self.SOURCES[source][2]
-            curie_prefix = source_id.split(':')[0]
-            node_specific_id = source_id.split(':')[1]
-            iri = self.IRI_MAPPINGS[curie_prefix] + node_specific_id
-            name = self.SOURCES[source][3]
-            self.make_umls_node(source_id, iri, name, kg2_util.SOURCE_NODE_CATEGORY, "2023", source_id, list(), "")
-
 
     def process_node(self, source, node_id, data):
         if source != self.last_source and self.last_source != '' and self.last_source in self.SOURCES:
             print("Finished processing", self.last_source, "at", kg2_util.date())
+            print("Started processing", source, "at", kg2_util.date())
         self.last_source = source
         if source in self.SOURCES:
             self.SOURCES[source][0](node_id, data, source)
@@ -182,7 +176,17 @@ def create_description(self, tuis, comment=""):
         for tui in tuis:
             description += "; UMLS Semantic Type: STY:" + tui
         description = description.strip("; ")
-        return description    
+        return description
+
+
+    def process_umls_source_item(self, node_id, info, umls_code):
+        source_id = self.SOURCES[node_id][2]
+        curie_prefix = source_id.split(':')[0]
+        node_specific_id = source_id.split(':')[1]
+        iri = self.IRI_MAPPINGS[curie_prefix] + node_specific_id
+        name = info.get('source_name', '') + ' v' + info.get('version', '')
+        update_date = info.get('update_date', '')
+        self.make_umls_node(source_id, iri, name, kg2_util.SOURCE_NODE_CATEGORY, update_date, source_id, list(), "")
 
 
     def process_atc_item(self, node_id, info, umls_code):

From 928af259299a18042bd945d90df8fcb613e1948d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 11:44:06 -0700
Subject: [PATCH 093/117] #316 source key can't be a tuple

---
 umls_mysql_to_list_jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index e69d64c3..43118d51 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -36,7 +36,7 @@ def get_english_sources(cursor, output):
         (source, language, source_name, version, update_date) = result
         if language == 'ENG':
             sources.append(source)
-            output.write({("UMLS_SOURCE", source): {"update_date": update_date, "source_name": source_name, "version": version}})
+            output.write({str(("UMLS_SOURCE", source)): {"update_date": update_date, "source_name": source_name, "version": version}})
 
     print("Finished sources_sql_statement at", kg2_util.date())
 

From 3be034f61202a284043333ebc52fa62826d03410 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 12:50:11 -0700
Subject: [PATCH 094/117] #316 only want some umls source nodes

---
 umls_util.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/umls_util.py b/umls_util.py
index 0d42297f..8e07275a 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -180,6 +180,8 @@ def create_description(self, tuis, comment=""):
 
 
     def process_umls_source_item(self, node_id, info, umls_code):
+        if node_id not in self.SOURCES:
+            return
         source_id = self.SOURCES[node_id][2]
         curie_prefix = source_id.split(':')[0]
         node_specific_id = source_id.split(':')[1]

From 7d2254c4a38fd616bae88cf2633967932ba4ebb6 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 12:59:38 -0700
Subject: [PATCH 095/117] #316 trying to stop the starting finishing spam

---
 umls_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_util.py b/umls_util.py
index 8e07275a..4a557c08 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -62,7 +62,7 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
 
 
     def process_node(self, source, node_id, data):
-        if source != self.last_source and self.last_source != '' and self.last_source in self.SOURCES:
+        if source != self.last_source and self.last_source != '' and self.last_source in self.SOURCES and source in self.SOURCES:
             print("Finished processing", self.last_source, "at", kg2_util.date())
             print("Started processing", source, "at", kg2_util.date())
         self.last_source = source

From 4a5cc59091fbc6614a30ec18730c0f8c342b8849 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 13:03:31 -0700
Subject: [PATCH 096/117] #316 trying to stop the starting finishing spam trial
 2

---
 umls_list_jsonl_to_kg_jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 8d58b3ed..3ff28081 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -74,7 +74,7 @@ def extract_node_id(node_id_str):
 
             # Process the data specifically by source
             umls_processor.process_node(source, node_id, value)
-        print("Finished processing", umls_processor.last_source, "at", kg2_util.date())
+    print("Finished processing", umls_processor.last_source, "at", kg2_util.date())
 
     kg2_util.end_read_jsonlines(input_read_jsonlines_info)
     kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)

From cdb09ae79df22d06e6e6c482061fcdba9ab8ef00 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 13:06:58 -0700
Subject: [PATCH 097/117] #316 print correct starting/finishing sources

---
 umls_util.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/umls_util.py b/umls_util.py
index 4a557c08..f82dfe59 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -62,9 +62,11 @@ def __init__(self, nodes_output, edges_output, tui_mappings, iri_mappings, full_
 
 
     def process_node(self, source, node_id, data):
-        if source != self.last_source and self.last_source != '' and self.last_source in self.SOURCES and source in self.SOURCES:
-            print("Finished processing", self.last_source, "at", kg2_util.date())
-            print("Started processing", source, "at", kg2_util.date())
+        if source != self.last_source:
+            if self.last_source != '' and self.last_source in self.SOURCES:
+                print("Finished processing", self.last_source, "at", kg2_util.date())
+            if source in self.SOURCES:
+                print("Started processing", source, "at", kg2_util.date())
         self.last_source = source
         if source in self.SOURCES:
             self.SOURCES[source][0](node_id, data, source)

From 644892400888ca0f04026fa4545b41f69e56521b Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 14:11:55 -0700
Subject: [PATCH 098/117] #316 handle duplicate source names

---
 umls_mysql_to_list_jsonl.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 43118d51..7b148b1d 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -31,14 +31,32 @@ def get_english_sources(cursor, output):
     sources_sql_statement = "SELECT RSAB, LAT, SSN, IMETA, SVER FROM MRSAB"
     sources = []
 
+    source_data = dict()
+
     cursor.execute(sources_sql_statement)
     for result in cursor.fetchall():
         (source, language, source_name, version, update_date) = result
         if language == 'ENG':
             sources.append(source)
-            output.write({str(("UMLS_SOURCE", source)): {"update_date": update_date, "source_name": source_name, "version": version}})
+            key = ("UMLS_SOURCE", source)
+
+            if key in source_data:
+                old_date = source_data[key].get('update_date', '')
+
+                old_date_val = old_date.strip('B').strip('A')
+                new_date_val = update_date.strip('B').strip('A')
+
+                if new_date_val < old_date_val or (new_date_val == old_date_val and old_date_val.endswith('AB')):
+                    continue
+
+            source_data[key] = {"update_date": update_date, "source_name": source_name, "version": version}
+
+    record_num = 0
+    for key, val in source_data.items():
+        record_num += 1
+        output.write({str(key): val})
 
-    print("Finished sources_sql_statement at", kg2_util.date())
+    print("Finished adding", record_num, "records in get_english_sources() at", kg2_util.date())
 
     return sources
 

From b6d011c15e733a74a2816ed0d6e285b1f43ec688 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 14:51:26 -0700
Subject: [PATCH 099/117] #316 use string date to handle date priority

---
 umls_mysql_to_list_jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 7b148b1d..48df6067 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -46,7 +46,7 @@ def get_english_sources(cursor, output):
                 old_date_val = old_date.strip('B').strip('A')
                 new_date_val = update_date.strip('B').strip('A')
 
-                if new_date_val < old_date_val or (new_date_val == old_date_val and old_date_val.endswith('AB')):
+                if new_date_val < old_date_val or (new_date_val == old_date_val and old_date.endswith('AB')):
                     continue
 
             source_data[key] = {"update_date": update_date, "source_name": source_name, "version": version}

From c17582f084d416e04caf337e9a0e963cca39ac3e Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Wed, 6 Sep 2023 21:23:54 -0700
Subject: [PATCH 100/117] #316 relation prefix

---
 umls_util.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/umls_util.py b/umls_util.py
index f82dfe59..3edb4ce7 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -130,11 +130,12 @@ def create_umls_edges(self, subject_id, relations):
         for relation_source in relations:
             if relation_source in self.SOURCES:
                 provided_by = self.SOURCES[relation_source][2]
+                relation_prefix = self.SOURCES[relation_source][1]
                 for relation in relations[relation_source]:
                     relation_abbr, relation_label, relation_direction = relation.split(',')
                     if relation_label == 'None':
                         relation_label = relation_abbr
-                    relation_curie = self.make_node_id(relation_source, relation_label)
+                    relation_curie = self.make_node_id(relation_prefix, relation_label)
                     for cui in relations[relation_source][relation]:
                         object_id = self.make_node_id(kg2_util.CURIE_PREFIX_UMLS, cui)
                         # TODO: resolve update_date

From 0f74d1d552fd7d5a5e8d049b45a79b5c309c643f Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 7 Sep 2023 13:22:53 -0700
Subject: [PATCH 101/117] #316 predicate remap additions for new UMLS ETL

---
 predicate-remap.yaml | 528 +++++++++++++++++++++++++++----------------
 1 file changed, 339 insertions(+), 189 deletions(-)

diff --git a/predicate-remap.yaml b/predicate-remap.yaml
index 1166fbdf..5aa422bd 100644
--- a/predicate-remap.yaml
+++ b/predicate-remap.yaml
@@ -1,3 +1,15 @@
+ATC:has_member:
+  operation: keep
+  core_predicate: biolink:related_to
+ATC:inverse_isa:
+  operation: invert
+  core_predicate: biolink:subclass_of
+ATC:isa:
+  operation: keep
+  core_predicate: biolink:subclass_of
+ATC:member_of:
+  operation: keep
+  core_predicate: biolink:related_to
 BFO:0000050: 
   operation: invert
   core_predicate: biolink:has_part
@@ -1320,6 +1332,12 @@ FMA:insertion_of:
 FMA:internal_to:
   operation: keep
   core_predicate: biolink:coexists_with
+FMA:inverse_isa:
+  operation: invert
+  core_predicate: biolink:subclass_of
+FMA:isa:
+  operation: keep
+  core_predicate: biolink:subclass_of 
 FMA:lateral_to:
   operation: keep
   core_predicate: biolink:coexists_with
@@ -1481,6 +1499,18 @@ GENEPIO:0001739:
 #   core_predicate: biolink:gene_associated_with_condition
 # GO:SIB:
 #   operation: delete
+GO:RB:
+  operation: invert
+  core_predicate: biolink:subclass_of
+GO:RN:
+  operation: keep
+  core_predicate: biolink:subclass_of
+GO:RO:
+  operation: keep
+  core_predicate: biolink:related_to 
+GO:SY:
+  operation: keep
+  core_predicate: biolink:close_match
 GO:acts_upstream_of:
   operation: keep
   core_predicate: biolink:affects
@@ -1554,6 +1584,12 @@ GO:isa:
 GO:located_in:
   operation: keep
   core_predicate: biolink:located_in
+GO:mth_expanded_form_of:
+  operation: keep
+  core_predicate: biolink:close_match
+GO:mth_has_expanded_form:
+  operation: keep
+  core_predicate: biolink:close_match
 GO:negatively_regulated_by:
   operation: invert
   core_predicate: biolink:regulates
@@ -1632,11 +1668,41 @@ HANCESTRO:0308:
 HANCESTRO:0330:
   operation: keep
   core_predicate: biolink:related_to
+HCPCS:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+HCPCS:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
 HCPCS:mapped_from:
   operation: delete
 HCPCS:mapped_to:
   operation: keep
   core_predicate: biolink:related_to
+HGNC:alias_of:
+  operation: keep
+  core_predicate: biolink:close_match
+HGNC:expanded_form_of:
+  operation: keep
+  core_predicate: biolink:close_match
+HGNC:has_alias:
+  operation: keep
+  core_predicate: biolink:close_match
+HGNC:has_expanded_form:
+  operation: keep
+  core_predicate: biolink:close_match
+HGNC:has_prev_name:
+  operation: keep
+  core_predicate: biolink:close_match
+HGNC:has_prev_symbol:
+  operation: keep
+  core_predicate: biolink:close_match
+HGNC:prev_name_of:
+  operation: keep
+  core_predicate: biolink:close_match
+HGNC:prev_symbol_of:
+  operation: keep
+  core_predicate: biolink:close_match
 HMDB:at_cellular_location:
   operation: keep
   core_predicate: biolink:located_in
@@ -1655,8 +1721,24 @@ HMDB:in_biospecimen:
 HMDB:in_pathway:
   operation: invert
   core_predicate: biolink:has_participant
-# HP:SIB:
-#   operation: delete
+HP:RB:
+  operation: invert
+  core_predicate: biolink:subclass_of
+HP:RN:
+  operation: keep
+  core_predicate: biolink:subclass_of
+HP:RO:
+  operation: keep
+  core_predicate: biolink:related_to
+HP:SY:
+  operation: keep
+  core_predicate: biolink:close_match
+HP:inverse_isa:
+  operation: invert
+  core_predicate: biolink:subclass_of
+HP:isa:
+  operation: keep
+  core_predicate: biolink:close_match
 IAO:0000039:
   operation: keep
   core_predicate: biolink:related_to
@@ -1669,10 +1751,24 @@ IAO:0000142:
 IAO:0000219:
   operation: keep
   core_predicate: biolink:related_to
-# ICD10PCS:SIB:
-#   operation: delete
-# ICD9:SIB:
-#   operation: delete
+ICD10PCS:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+ICD10PCS:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
+ICD10PCS:expanded_form_of:
+  operation: keep
+  core_predicate: biolink:close_match
+ICD10PCS:has_expanded_form:
+  operation: keep
+  core_predicate: biolink:close_match
+ICD9:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+ICD9:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
 IDO:0000664:
   operation: invert
   core_predicate: biolink:contributes_to
@@ -1957,8 +2053,20 @@ LOINC:time_modifier_of:
 #   core_predicate: biolink:has_part
 MESH:AQ:
   operation: delete
+MESH:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+MESH:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
 MESH:QB:
   operation: delete
+MESH:RB:
+  operation: invert
+  core_predicate: biolink:subclass_of
+MESH:RN:
+  operation: keep
+  core_predicate: biolink:subclass_of 
 MESH:RO:
   operation: keep
   core_predicate: biolink:related_to
@@ -1967,6 +2075,9 @@ MESH:RO:
 MESH:has_mapping_qualifier:
   operation: keep
   core_predicate: biolink:related_to
+MESH:has_permuted_term:
+  operation: keep
+  core_predicate: biolink:close_match
 MESH:inverse_isa:
   operation: keep
   core_predicate: biolink:superclass_of
@@ -1980,6 +2091,9 @@ MESH:mapped_to:
   core_predicate: biolink:related_to
 MESH:mapping_qualifier_of:
   operation: delete
+MESH:permuted_term_of:
+  operation: keep
+  core_predicate: biolink:close_match
 MI:0192:
   operation: keep
   core_predicate: biolink:directly_physically_interacts_with
@@ -2153,201 +2267,30 @@ MONDO:part_of_progression_of_disease:
 MONDO:predisposes_towards:
   operation: keep
   core_predicate: biolink:contributes_to
-# NBO-PROPERTY:by_means:
-#   operation: keep
-#   core_predicate: biolink:actively_involved_in
-# NBO-PROPERTY:has_participant:
-#   operation: keep
-#   core_predicate: biolink:has_participant
-# NBO-PROPERTY:in_response_to:
-#   operation: keep
-#   core_predicate: biolink:causes
-# NBO-PROPERTY:is_about:
-#   operation: keep
-#   core_predicate: biolink:related_to
-# NCIT:A11:
-#   operation: keep
-#   core_predicate: biolink:subclass_of
-# NCIT:A14:
-#   operation: keep
-#   core_predicate: biolink:subclass_of
-# NCIT:A16:
-#   operation: keep
-#   core_predicate: biolink:subclass_of
-# NCIT:A3:
-#   operation: keep
-#   core_predicate: biolink:subclass_of
-# NCIT:A7:
-#   operation: keep
-#   core_predicate: biolink:physically_interacts_with
-# NCIT:Anatomic_Structure_Has_Location_Role:
-#   operation: invert
-#   core_predicate: biolink:located_in
-# NCIT:C15220:
-#   operation: keep
-#   core_predicate: biolink:diagnoses
-# NCIT:C16798:
-#   operation: keep
-#   core_predicate: biolink:in_linkage_disequilibrium_with
+NCBITaxon:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+NCBITaxon:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
+NCBITaxon:expanded_form_of:
+  operation: keep
+  core_predicate: biolink:close_match
+NCBITaxon:has_expanded_form:
+  operation: keep
+  core_predicate: biolink:close_match
 NCIT:C2861:
   operation: keep
   core_predicate: biolink:has_side_effect
-# NCIT:C37933:
-#   operation: keep
-#   core_predicate: biolink:contraindicated_for
-# NCIT:R100:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R101:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R102:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R108:
-#   operation: keep
-#   core_predicate: biolink:has_phenotype
-# NCIT:R113:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R115:
-#   operation: keep
-#   core_predicate: biolink:has_phenotype
-# NCIT:R124:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R130:
-#   operation: invert
-#   core_predicate: biolink:has_participant
-# NCIT:R131:
-#   operation: invert
-#   core_predicate: biolink:has_participant
-# NCIT:R133:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R145:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R146:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R150:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R155:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R156:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R158:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R160:
-#   operation: keep
-#   core_predicate: biolink:affects
 NCIT:R163:
   operation: keep
   core_predicate: biolink:related_to
-# NCIT:R165:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R166:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R167:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R168:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R169:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R170:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R171:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R173:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R175:
-#   operation: keep
-#   core_predicate: biolink:gene_associated_with_condition
-# NCIT:R176:
-#   operation: invert
-#   core_predicate: biolink:gene_associated_with_condition
-# NCIT:R178:
-#   operation: invert
-#   core_predicate: biolink:gene_product_of
-# NCIT:R23:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R25:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R27:
-#   operation: invert
-#   core_predicate: biolink:has_part
-# NCIT:R29:
-#   operation: keep
-#   core_predicate: biolink:produces
-# NCIT:R30:
-#   operation: keep
-#   core_predicate: biolink:affects
-# NCIT:R36:
-#   operation: keep
-#   core_predicate: biolink:subclass_of
-# NCIT:R37:
-#   operation: invert
-#   core_predicate: biolink:has_participant
-# NCIT:R38:
-#   operation: keep
-#   core_predicate: biolink:gene_associated_with_condition
-# NCIT:R39:
-#   operation: keep
-#   core_predicate: biolink:biomarker_for
-# NCIT:R40:
-#   operation: keep
-#   core_predicate: biolink:located_in
-# NCIT:R42:
-#   operation: keep
-#   core_predicate: biolink:subclass_of
-# NCIT:R47:
-#   operation: keep
-#   core_predicate: biolink:biomarker_for
-# NCIT:R48:
-#   operation: keep
-#   core_predicate: biolink:gene_associated_with_condition
-# NCIT:R50:
-#   operation: keep
-#   core_predicate: biolink:has_part
-# NCIT:R51:
-#   operation: invert
-#   core_predicate: biolink:has_participant
-# NCIT:R52:
-#   operation: keep
-#   core_predicate: biolink:capable_of
-# NCIT:R53:
-#   operation: invert
-#   core_predicate: biolink:has_participant
-# NCIT:R72:
-#   operation: keep
-#   core_predicate: biolink:affects
 NCIT:R81:
   operation: keep
   core_predicate: biolink:related_to
 NCIT:R82:
   operation: invert
   core_predicate: biolink:has_part
-# NCIT:R88:
-#   operation: keep
-#   core_predicate: biolink:related_to
-# NCIT:R89:
-#   operation: keep
-#   core_predicate: biolink:has_phenotype
 NCIT:abnormal_cell_affected_by_chemical_or_drug:
   operation: delete
 NCIT:abnormality_associated_with_allele:
@@ -2746,6 +2689,9 @@ NCIT:has_seronet_permissible_value:
 NCIT:has_target:
   operation: keep
   core_predicate: biolink:physically_interacts_with
+NCIT:has_tradename:
+  operation: keep
+  core_predicate: biolink:close_match
 NCIT:human_disease_maps_to_eo_disease:
   operation: keep
   core_predicate: biolink:related_to
@@ -2755,6 +2701,12 @@ NCIT:imaged_anatomy_has_procedure:
   operation: delete
 NCIT:inc_parent_of:
   operation: delete
+NCIT:inverse_isa:
+  operation: invert
+  core_predicate: biolink:close_match
+NCIT:isa:
+  operation: keep
+  core_predicate: biolink:close_match
 NCIT:is_abnormal_cell_of_disease:
   operation: keep
   core_predicate: biolink:related_to
@@ -2979,6 +2931,9 @@ NCIT:target_anatomy_has_procedure:
   operation: delete
 NCIT:tissue_is_expression_site_of_gene_product:
   operation: delete
+NCIT:tradename_of:
+  operation: keep
+  core_predicate: biolink:close_match
 NCIT:value_set_is_paired_with:
   operation: delete
 NDDF:dose_form_of:
@@ -3070,11 +3025,35 @@ OBO:nbo#is_about:
 # OIO:hasDbXref:
 #   operation: keep
 #   core_predicate: biolink:close_match
+OMIM:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+OMIM:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
+OMIM:alias_of:
+  operation: keep
+  core_predicate: biolink:related_to
 OMIM:allelic_variant_of:
   operation: keep
   core_predicate: biolink:is_sequence_variant_of
+OMIM:entry_term_of:
+  operation: keep
+  core_predicate: biolink:related_to
+OMIM:expanded_form_of:
+  operation: keep
+  core_predicate: biolink:close_match 
+OMIM:has_alias:
+  operation: keep
+  core_predicate: biolink:related_to
 OMIM:has_allelic_variant:
   operation: delete
+OMIM:has_entry_term:
+  operation: keep
+  core_predicate: biolink:related_to 
+OMIM:has_expanded_form:
+  operation: keep
+  core_predicate: biolink:close_match
 OMIM:has_inheritance_type:
   operation: keep
   core_predicate: biolink:related_to
@@ -3176,6 +3155,9 @@ PATO:reciprocal_of:
 # PATO:towards:
 #   operation: invert
 #   core_predicate: biolink:actively_involved_in
+PDQ:SY:
+  operation: keep
+  core_predicate: biolink:close_match
 PDQ:associated_disease:
   operation: keep
   core_predicate: biolink:correlated_with
@@ -3184,9 +3166,33 @@ PDQ:associated_genetic_condition:
 PDQ:component_of:
   operation: invert
   core_predicate: biolink:has_part
+PDQ:expanded_form_of:
+  operation: keep
+  core_predicate: biolink:close_match
 PDQ:has_component:
   operation: keep
   core_predicate: biolink:has_part
+PDQ:has_expanded_form:
+  operation: keep
+  core_predicate: biolink:close_match
+PDQ:has_lab_number:
+  operation: keep
+  core_predicate: biolink:related_to
+PDQ:has_tradename:
+  operation: keep
+  core_predicate: biolink:related_to
+PDQ:inverse_isa:
+  operation: invert
+  core_predicate: biolink:subclass_of
+PDQ:isa:
+  operation: keep
+  core_predicate: biolink:subclass_of
+PDQ:lab_number_of:
+  operation: keep
+  core_predicate: biolink:related_to 
+PDQ:tradename_of:
+  operation: keep
+  core_predicate: biolink:related_to
 PHAROS:drug_targets:
   operation: keep
   core_predicate: biolink:directly_physically_interacts_with
@@ -3206,12 +3212,27 @@ PR:lacks_part:
 PR:non-covalently_bound_to:
   operation: keep
   core_predicate: biolink:physically_interacts_with
+PSY:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+PSY:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
 PSY:RB:
   operation: invert
   core_predicate: biolink:subclass_of
+PSY:RN:
+  operation: keep
+  core_predicate: biolink:subclass_of
 PSY:RO:
   operation: keep
   core_predicate: biolink:related_to
+PSY:has_member:
+  operation: keep
+  core_predicate: biolink:subclass_of
+PSY:member_of:
+  operation: invert
+  core_predicate: biolink:subclass_of
 PSY:use:
   operation: keep
   core_predicate: biolink:subclass_of
@@ -4115,6 +4136,9 @@ RO:0040036:
 RO:participates_in:
   operation: invert
   core_predicate: biolink:has_participant
+RXNORM:SY:
+  operation: keep
+  core_predicate: biolink:close_match
 RXNORM:consists_of:
   operation: keep
   core_predicate: biolink:has_part
@@ -4158,6 +4182,12 @@ RXNORM:has_quantified_form:
 RXNORM:has_tradename:
   operation: keep
   core_predicate: biolink:related_to
+RXNORM:included_in:
+  operation: keep
+  core_predicate: biolink:related_to
+RXNORM:includes:
+  operation: keep
+  core_predicate: biolink:related_to
 RXNORM:ingredient_of:
   operation: invert
   core_predicate: biolink:has_part
@@ -4614,9 +4644,18 @@ UBERON_CORE:synapsed_by:
 # UBERON_NONAMESPACE:subdivision_of:
 #   operation: keep
 #   core_predicate: biolink:coexists_with
+UMLS:CHD:
+  operation: invert
+  core_predicate: biolink:subclass_of
+UMLS:PAR:
+  operation: keep
+  core_predicate: biolink:subclass_of
 UMLS:RB:
   operation: invert
   core_predicate: biolink:subclass_of
+UMLS:RN:
+  operation: keep
+  core_predicate: biolink:subclass_of
 UMLS:RO:
   operation: keep
   core_predicate: biolink:related_to
@@ -4628,6 +4667,9 @@ UMLS:RQ:
 UMLS:SY:
   operation: keep
   core_predicate: biolink:close_match
+UMLS:active_metabolites_of:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:class_code_classified_by:
   operation: keep
   core_predicate: biolink:related_to
@@ -4638,6 +4680,21 @@ UMLS:component_of:
   core_predicate: biolink:has_part
 UMLS:context_binding_of:
   operation: delete
+UMLS:contraindicated_class_of:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:contraindicated_mechanism_of_action_of:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:contraindicated_physiologic_effect_of:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:contraindicated_with_disease:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:effect_may_be_inhibited_by:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:exhibited_by:
   operation: keep
   core_predicate: biolink:related_to
@@ -4645,18 +4702,36 @@ UMLS:exhibits:
   operation: delete
 UMLS:form_of:
   operation: delete
+UMLS:has_active_metabolites:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:has_component:
   operation: keep
   core_predicate: biolink:has_part
 UMLS:has_context_binding:
   operation: keep
   core_predicate: biolink:related_to
+UMLS:has_contraindicated_class:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:has_contraindicated_drug:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:has_contraindicated_mechanism_of_action:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:has_contraindicated_physiologic_effect:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:has_form:
   operation: keep
   core_predicate: biolink:related_to
 UMLS:has_mapping_qualifier:
   operation: keep
   core_predicate: biolink:related_to
+UMLS:has_mechanism_of_action:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:has_owning_affiliate:
   operation: invert
   core_predicate: biolink:has_part
@@ -4664,6 +4739,12 @@ UMLS:has_owning_section:
   operation: delete
 UMLS:has_owning_subsection:
   operation: delete
+UMLS:has_parent:
+  operation: keep
+  core_predicate: biolink:subclass_of
+UMLS:has_pharmacokinetics:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:has_physiologic_effect:
   operation: keep
   core_predicate: biolink:causes
@@ -4676,24 +4757,63 @@ UMLS:has_supported_concept_property:
 UMLS:has_supported_concept_relationship:
   operation: keep
   core_predicate: biolink:related_to
+UMLS:has_therapeutic_class:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:induced_by:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:induces:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:larger_than:
   operation: keep
   core_predicate: biolink:related_to
 UMLS:mapped_from:
   operation: delete
+UMLS:mapping_qualifier_of:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:mapped_to:
   operation: keep
   core_predicate: biolink:related_to
+UMLS:may_be_diagnosed_by:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:may_be_prevented_by:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:may_be_qualified_by:
   operation: keep
   core_predicate: biolink:related_to
+UMLS:may_be_treated_by:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:may_diagnose:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:may_inhibit_effect_of:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:may_prevent:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:may_qualify:
   operation: delete
+UMLS:may_treat:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:measured_by:
   operation: delete
 UMLS:measures:
   operation: keep
   core_predicate: biolink:related_to
+UMLS:mechanism_of_action_of:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:metabolic_site_of:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:owning_affiliate_of:
   operation: delete
 UMLS:owning_section_of:
@@ -4702,13 +4822,37 @@ UMLS:owning_section_of:
 UMLS:owning_subsection_of:
   operation: invert
   core_predicate: biolink:has_part
+UMLS:parent_of:
+  operation: invert
+  core_predicate: biolink:subclass_of
+UMLS:pharmacokinetics_of:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:physiologic_effect_of:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:related_to:
   operation: keep
   core_predicate: biolink:related_to
+UMLS:site_of_metabolism:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:smaller_than:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:structural_class_of:
+  operation: keep
+  core_predicate: biolink:related_to
 UMLS:supported_concept_property_in:
   operation: delete
 UMLS:supported_concept_relationship_in:
   operation: delete
+UMLS:therapeutic_class_of:
+  operation: keep
+  core_predicate: biolink:related_to
+UMLS:xref:
+  operation: keep
+  core_predicate: biolink:close_match 
 # UO-PROPERTY:is_unit_of:
 #   operation: keep
 #   core_predicate: biolink:related_to
@@ -4718,6 +4862,9 @@ UMLS:supported_concept_relationship_in:
 VANDF:has_ingredient:
   operation: keep
   core_predicate: biolink:has_part
+VANDF:has_print_name:
+  operation: keep
+  core_predicate: biolink:close_match 
 VANDF:ingredient_of:
   operation: invert
   core_predicate: biolink:has_part
@@ -4727,6 +4874,9 @@ VANDF:inverse_isa:
 VANDF:isa:
   operation: keep
   core_predicate: biolink:subclass_of
+VANDF:print_name_of:
+  operation: keep
+  core_predicate: biolink:close_match 
 # WIKIDATA:P2888:
 #   operation: keep
 #   core_predicate: biolink:exact_match

From c9e8b98d1abc2aa765f307a063d4a820f35c8960 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 7 Sep 2023 13:58:15 -0700
Subject: [PATCH 102/117] #316 more robust date comparison

---
 umls_mysql_to_list_jsonl.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 48df6067..a929a045 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -41,12 +41,7 @@ def get_english_sources(cursor, output):
             key = ("UMLS_SOURCE", source)
 
             if key in source_data:
-                old_date = source_data[key].get('update_date', '')
-
-                old_date_val = old_date.strip('B').strip('A')
-                new_date_val = update_date.strip('B').strip('A')
-
-                if new_date_val < old_date_val or (new_date_val == old_date_val and old_date.endswith('AB')):
+                if update_date < old_date:
                     continue
 
             source_data[key] = {"update_date": update_date, "source_name": source_name, "version": version}

From 5ad4bd7e7e106b1ecaa5680fa6a3c579157cc1ed Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 7 Sep 2023 14:11:47 -0700
Subject: [PATCH 103/117] #316 actually more robust date comparison by
 comparing versions

---
 umls_mysql_to_list_jsonl.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index a929a045..11dafac6 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -41,7 +41,9 @@ def get_english_sources(cursor, output):
             key = ("UMLS_SOURCE", source)
 
             if key in source_data:
-                if update_date < old_date:
+                old_ver = source_data[key].get('version', '')
+
+                if version < old_ver:
                     continue
 
             source_data[key] = {"update_date": update_date, "source_name": source_name, "version": version}

From a4c2d28aab5e9e37f458bb984d3711f3811c0392 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 7 Sep 2023 15:39:15 -0700
Subject: [PATCH 104/117] #316 subject is cui2 and object is cui1

---
 umls_mysql_to_list_jsonl.py | 6 +++---
 umls_util.py                | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/umls_mysql_to_list_jsonl.py b/umls_mysql_to_list_jsonl.py
index 11dafac6..c29925c1 100755
--- a/umls_mysql_to_list_jsonl.py
+++ b/umls_mysql_to_list_jsonl.py
@@ -196,8 +196,8 @@ def cui_sources(cursor, output, sources):
 
     cursor.execute(relations_sql_statement)
     for result in cursor.fetchall():
-        (cui1, rel, rela, direction, cui2, source) = result
-        key = (umls_source_name, cui1)
+        (cui_object, rel, rela, direction, cui_subject, source) = result
+        key = (umls_source_name, cui_object)
         if key not in cui_source_info:
             # See above for explanation
             continue
@@ -209,7 +209,7 @@ def cui_sources(cursor, output, sources):
             cui_source_info[key][relation_key][source] = dict()
         if relation_type_key not in cui_source_info[key][relation_key][source]:
             cui_source_info[key][relation_key][source][relation_type_key] = list()
-        cui_source_info[key][relation_key][source][relation_type_key].append(cui2)
+        cui_source_info[key][relation_key][source][relation_type_key].append(cui_subject)
 
     print("Finished relations_sql_statement at", kg2_util.date())
 
diff --git a/umls_util.py b/umls_util.py
index 3edb4ce7..b2109b93 100644
--- a/umls_util.py
+++ b/umls_util.py
@@ -139,7 +139,7 @@ def create_umls_edges(self, subject_id, relations):
                     for cui in relations[relation_source][relation]:
                         object_id = self.make_node_id(kg2_util.CURIE_PREFIX_UMLS, cui)
                         # TODO: resolve update_date
-                        if relation_direction == 'Y':
+                        if relation_direction == 'N':
                             self.edges_output.write(kg2_util.make_edge(object_id, subject_id, relation_curie, relation_label, provided_by, "2023"))
                         else:
                             self.edges_output.write(kg2_util.make_edge(subject_id, object_id, relation_curie, relation_label, provided_by, "2023"))

From 5732454dc0f2ef7dd0249768774a4c4fe27a7e3a Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 7 Sep 2023 16:33:25 -0700
Subject: [PATCH 105/117] #316 updating UMLS predicate mappings based on
 meeting with Steve

---
 predicate-remap.yaml | 71 +++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/predicate-remap.yaml b/predicate-remap.yaml
index 5aa422bd..f617a3bb 100644
--- a/predicate-remap.yaml
+++ b/predicate-remap.yaml
@@ -1,6 +1,6 @@
 ATC:has_member:
   operation: keep
-  core_predicate: biolink:related_to
+  core_predicate: biolink:has_member
 ATC:inverse_isa:
   operation: invert
   core_predicate: biolink:subclass_of
@@ -8,8 +8,8 @@ ATC:isa:
   operation: keep
   core_predicate: biolink:subclass_of
 ATC:member_of:
-  operation: keep
-  core_predicate: biolink:related_to
+  operation: invert
+  core_predicate: biolink:has_member
 BFO:0000050: 
   operation: invert
   core_predicate: biolink:has_part
@@ -1669,10 +1669,10 @@ HANCESTRO:0330:
   operation: keep
   core_predicate: biolink:related_to
 HCPCS:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 HCPCS:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 HCPCS:mapped_from:
   operation: delete
@@ -1738,7 +1738,7 @@ HP:inverse_isa:
   core_predicate: biolink:subclass_of
 HP:isa:
   operation: keep
-  core_predicate: biolink:close_match
+  core_predicate: biolink:subclass_of
 IAO:0000039:
   operation: keep
   core_predicate: biolink:related_to
@@ -1752,10 +1752,10 @@ IAO:0000219:
   operation: keep
   core_predicate: biolink:related_to
 ICD10PCS:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 ICD10PCS:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 ICD10PCS:expanded_form_of:
   operation: keep
@@ -1764,10 +1764,10 @@ ICD10PCS:has_expanded_form:
   operation: keep
   core_predicate: biolink:close_match
 ICD9:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 ICD9:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 IDO:0000664:
   operation: invert
@@ -2054,10 +2054,10 @@ LOINC:time_modifier_of:
 MESH:AQ:
   operation: delete
 MESH:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 MESH:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 MESH:QB:
   operation: delete
@@ -2268,10 +2268,10 @@ MONDO:predisposes_towards:
   operation: keep
   core_predicate: biolink:contributes_to
 NCBITaxon:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 NCBITaxon:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 NCBITaxon:expanded_form_of:
   operation: keep
@@ -2703,10 +2703,10 @@ NCIT:inc_parent_of:
   operation: delete
 NCIT:inverse_isa:
   operation: invert
-  core_predicate: biolink:close_match
+  core_predicate: biolink:subclass_of
 NCIT:isa:
   operation: keep
-  core_predicate: biolink:close_match
+  core_predicate: biolink:subclass_of
 NCIT:is_abnormal_cell_of_disease:
   operation: keep
   core_predicate: biolink:related_to
@@ -3026,10 +3026,10 @@ OBO:nbo#is_about:
 #   operation: keep
 #   core_predicate: biolink:close_match
 OMIM:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 OMIM:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 OMIM:alias_of:
   operation: keep
@@ -3176,11 +3176,10 @@ PDQ:has_expanded_form:
   operation: keep
   core_predicate: biolink:close_match
 PDQ:has_lab_number:
-  operation: keep
-  core_predicate: biolink:related_to
+  operation: delete
 PDQ:has_tradename:
   operation: keep
-  core_predicate: biolink:related_to
+  core_predicate: biolink:close_match
 PDQ:inverse_isa:
   operation: invert
   core_predicate: biolink:subclass_of
@@ -3188,11 +3187,10 @@ PDQ:isa:
   operation: keep
   core_predicate: biolink:subclass_of
 PDQ:lab_number_of:
-  operation: keep
-  core_predicate: biolink:related_to 
+  operation: delete
 PDQ:tradename_of:
   operation: keep
-  core_predicate: biolink:related_to
+  core_predicate: biolink:close_match
 PHAROS:drug_targets:
   operation: keep
   core_predicate: biolink:directly_physically_interacts_with
@@ -3213,10 +3211,10 @@ PR:non-covalently_bound_to:
   operation: keep
   core_predicate: biolink:physically_interacts_with
 PSY:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 PSY:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 PSY:RB:
   operation: invert
@@ -3229,10 +3227,10 @@ PSY:RO:
   core_predicate: biolink:related_to
 PSY:has_member:
   operation: keep
-  core_predicate: biolink:subclass_of
+  core_predicate: biolink:has_member
 PSY:member_of:
   operation: invert
-  core_predicate: biolink:subclass_of
+  core_predicate: biolink:has_member
 PSY:use:
   operation: keep
   core_predicate: biolink:subclass_of
@@ -4645,10 +4643,10 @@ UBERON_CORE:synapsed_by:
 #   operation: keep
 #   core_predicate: biolink:coexists_with
 UMLS:CHD:
-  operation: invert
+  operation: keep
   core_predicate: biolink:subclass_of
 UMLS:PAR:
-  operation: keep
+  operation: invert
   core_predicate: biolink:subclass_of
 UMLS:RB:
   operation: invert
@@ -4668,8 +4666,8 @@ UMLS:SY:
   operation: keep
   core_predicate: biolink:close_match
 UMLS:active_metabolites_of:
-  operation: keep
-  core_predicate: biolink:related_to
+  operation: invert
+  core_predicate: biolink:has_metabolite
 UMLS:class_code_classified_by:
   operation: keep
   core_predicate: biolink:related_to
@@ -4680,7 +4678,7 @@ UMLS:component_of:
   core_predicate: biolink:has_part
 UMLS:context_binding_of:
   operation: delete
-UMLS:contraindicated_class_of:
+UMLS:contraindicated_class_of: # Consider mapping this to drug interaction if and when that biolink predicate becomes available
   operation: keep
   core_predicate: biolink:related_to
 UMLS:contraindicated_mechanism_of_action_of:
@@ -4757,7 +4755,7 @@ UMLS:has_supported_concept_property:
 UMLS:has_supported_concept_relationship:
   operation: keep
   core_predicate: biolink:related_to
-UMLS:has_therapeutic_class:
+UMLS:has_therapeutic_class: # Come back to this one at another time, might have a chance to use has_member
   operation: keep
   core_predicate: biolink:related_to
 UMLS:induced_by:
@@ -4772,8 +4770,7 @@ UMLS:larger_than:
 UMLS:mapped_from:
   operation: delete
 UMLS:mapping_qualifier_of:
-  operation: keep
-  core_predicate: biolink:related_to
+  operation: delete
 UMLS:mapped_to:
   operation: keep
   core_predicate: biolink:related_to
@@ -4847,7 +4844,7 @@ UMLS:supported_concept_property_in:
   operation: delete
 UMLS:supported_concept_relationship_in:
   operation: delete
-UMLS:therapeutic_class_of:
+UMLS:therapeutic_class_of: # Look into this at a later time, there might be an opportunity to use has_member
   operation: keep
   core_predicate: biolink:related_to
 UMLS:xref:

From 2b07595b4512affa134bed2e94f2af40502be010 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 7 Sep 2023 18:52:29 -0700
Subject: [PATCH 106/117] #316 updating build process for UMLS ETL

---
 Snakefile-conversion      |  18 +++-
 Snakefile-extraction      |   8 +-
 build-multi-ont-kg.sh     |  23 ++---
 extract-umls.sh           |  44 +---------
 multi_ont_to_kg_jsonl.py  | 132 +---------------------------
 ont-load-inventory.yaml   | 179 --------------------------------------
 snakemake-config-var.yaml |  15 +++-
 7 files changed, 41 insertions(+), 378 deletions(-)

diff --git a/Snakefile-conversion b/Snakefile-conversion
index 7fae8d37..00167044 100644
--- a/Snakefile-conversion
+++ b/Snakefile-conversion
@@ -1,7 +1,19 @@
-rule Ontologies_and_TTL:
+rule UMLS_Conversion:
+    input:
+        code = config['UMLS_CONVERSION_SCRIPT'],
+        real = config['UMLS_EXTRACT_FILE'],
+        validation = config['VALIDATION_PLACEHOLDER']
+    output:
+        nodes = config['UMLS_OUTPUT_NODES_FILE'],
+        edges = config['UMLS_OUTPUT_EDGES_FILE']
+    log:
+        config['UMLS_CONVERSION_LOG']
+    shell:
+        "bash -x {input.code} {input.real} {output.nodes} {output.edges} " + config['TEST_FLAG'] + " > {log} 2>&1" 
+
+rule Ontologies_Conversion:
     input:
         code = config['ONT_CONVERSION_SCRIPT'],
-        real = config['UMLS_CUI_FILE'],
         validation = config['VALIDATION_PLACEHOLDER']
     output:
         nodes = config['ONT_OUTPUT_NODES_FILE'],
@@ -9,7 +21,7 @@ rule Ontologies_and_TTL:
     log:
         config['ONT_CONVERSION_LOG']
     shell:
-        "bash -x {input.code} {input.real} {output.nodes} {output.edges} " + config['TEST_FLAG'] + " > {log} 2>&1" 
+        "bash -x {input.code} {output.nodes} {output.edges} " + config['TEST_FLAG'] + " > {log} 2>&1" 
 
 rule SemMedDB_Conversion:
     input:
diff --git a/Snakefile-extraction b/Snakefile-extraction
index 89eb44c5..14cf0eb8 100644
--- a/Snakefile-extraction
+++ b/Snakefile-extraction
@@ -1,13 +1,13 @@
 rule UMLS:
     input:
-        code = config['ONT_EXTRACTION_SCRIPT'],
+        code = config['UMLS_EXTRACTION_SCRIPT'],
         validation = config['VALIDATION_PLACEHOLDER']
     output:
-        config['UMLS_CUI_FILE']
+        config['UMLS_EXTRACT_FILE']
     log:
-        config['ONT_EXTRACTION_LOG']
+        config['UMLS_EXTRACTION_LOG']
     shell:
-        "bash -x {input.code} " + config['BUILD_DIR'] + " {output} > {log} 2>&1" 
+        "bash -x {input.code} {output} > {log} 2>&1" 
 
 rule SemMedDB:
     input:
diff --git a/build-multi-ont-kg.sh b/build-multi-ont-kg.sh
index 1e95fa58..98e85998 100755
--- a/build-multi-ont-kg.sh
+++ b/build-multi-ont-kg.sh
@@ -5,7 +5,7 @@
 set -o nounset -o pipefail -o errexit
 
 if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
-    echo Usage: "$0 <input_file.tsv> <output_nodes_file.jsonl> <output_edges_file.jsonl> [test]"
+    echo Usage: "$0 <output_nodes_file.jsonl> <output_edges_file.jsonl> [test]"
     exit 2
 fi
 
@@ -20,7 +20,7 @@ config_dir=`dirname "$0"`
 source ${config_dir}/master-config.shinc
 
 ## supply a default value for the build_flag string
-build_flag=${4:-""}
+build_flag=${3:-""}
 biolink_base_url_no_version=https://raw.githubusercontent.com/biolink/biolink-model/
 
 # Issue #300: Need "v" before version number for URL to resolve
@@ -44,9 +44,8 @@ else
     test_arg=''
 fi
 
-umls_cuis_file=${1:-"${BUILD_DIR}/umls_cuis.tsv"}
-output_nodes_file=${2:-"${BUILD_DIR}/kg2-ont-nodes${test_suffix}.json"}
-output_edges_file=${3:-"${BUILD_DIR}/kg2-ont-edges${test_suffix}.json"}
+output_nodes_file=${1:-"${BUILD_DIR}/kg2-ont-nodes${test_suffix}.json"}
+output_edges_file=${2:-"${BUILD_DIR}/kg2-ont-edges${test_suffix}.json"}
 
 ## set the path to include ${BUILD_DIR}
 export PATH=$PATH:${BUILD_DIR}
@@ -56,16 +55,6 @@ mem_gb=`${CODE_DIR}/get-system-memory-gb.sh`
 export OWLTOOLS_MEMORY=${mem_gb}G
 export DEBUG=1  ## for owltools
 
-node_datatype_properties_file="${BUILD_DIR}/node_datatype_properties.json"
-
-## temporary work around for ontobio issue (see biolink issue #507)
-${BUILD_DIR}/robot convert --input ${BUILD_DIR}/umls-hgnc.ttl --output ${BUILD_DIR}/umls-hgnc.owl
-${BUILD_DIR}/robot convert --input ${BUILD_DIR}/umls-omim.ttl --output ${BUILD_DIR}/umls-omim.owl
-${python_command} ${CODE_DIR}/save_owl_datatypeproperties.py \
-           ${BUILD_DIR}/umls-hgnc.owl \
-           ${BUILD_DIR}/umls-omim.owl \
-           --outputFile ${node_datatype_properties_file}
-
 ${s3_cp_cmd} s3://${s3_bucket}/foodon.pickle ${BUILD_DIR}/
 
 ## run the multi_ont_to_json_kg.py script
@@ -75,9 +64,7 @@ cd ${BUILD_DIR} && ${python_command} ${CODE_DIR}/multi_ont_to_kg_jsonl.py \
            ${curies_to_urls_file} \
            ${ont_load_inventory_file} \
            ${output_nodes_file} \
-           ${output_edges_file} \
-           ${umls_cuis_file} \
-           ${node_datatype_properties_file} \
+           ${output_edges_file}
 
 date
 echo "================= finished build-multi-ont-kg.sh ================="
diff --git a/extract-umls.sh b/extract-umls.sh
index c4acb17d..1b91cb2c 100755
--- a/extract-umls.sh
+++ b/extract-umls.sh
@@ -5,11 +5,11 @@
 set -o nounset -o pipefail -o errexit
 
 if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
-    echo Usage: "$0 [output_dir] [umls_cui_file]"
+    echo Usage: "$0 [umls_cui_file]"
     exit 2
 fi
 
-# Usage: extract-umls.sh [OUTPUT_DIR] [UMLS_CUI_FILE]
+# Usage: extract-umls.sh [UMLS_CUI_FILE]
 
 echo "================= starting extract-umls.sh ================="
 date
@@ -17,14 +17,10 @@ date
 config_dir=`dirname "$0"`
 source ${config_dir}/master-config.shinc
 
-output_dir=${1:-${BUILD_DIR}}
 umls_cui_file=${2:-${BUILD_DIR}/umls_cuis.tsv}
 
 umls_ver=2023AA
 umls_file_base=umls-${umls_ver}-metathesaurus-full
-umls2rdf_release=rtx-2.2 # This is the version of umls2rdf NOT RTX-KG2; do not change to update RTX-KG2 version
-umls2rdf_pkgname=umls2rdf-${umls2rdf_release}
-umls2rdf_dir=${umls_dir}/${umls2rdf_pkgname}
 config_file=${umls_dir}/config.prop
 mysql_dbname=umls
 
@@ -79,41 +75,7 @@ sed -i "s/@LINE_TERMINATION@/'\n'/g" ${umls_dest_dir}/mysql_tables.sql
 cd ${umls_dest_dir}
 bash -x populate_mysql_db_configured.sh
 
-## download and unpack the umls2rdf software
-${curl_get} https://github.com/RTXteam/umls2rdf/archive/${umls2rdf_release}.tar.gz > ${umls2rdf_pkgname}.tar.gz
-tar xzf ${umls2rdf_pkgname}.tar.gz -C ${umls_dir}
-
-## make the umls2rdf config file
-cat ${umls2rdf_dir}/conf_sample.py | sed 's/your-host/localhost/g' | \
-    sed "s/umls2015ab/${mysql_dbname}/g" | \
-    sed "s/your db user/${mysql_user}/g" | \
-    sed "s/your db pass/${mysql_password}/g" | \
-    sed "s|output|${output_dir}|g" | \
-    sed "s/2015ab/${umls_ver}/g" > ${umls2rdf_dir}/conf.py
-
-cp ${umls2rdf_config_master} ${umls2rdf_dir}/umls.conf
-
-## change to the umls2rdf_dir directory
-cd ${umls2rdf_dir}
-
-## run umls2rdf
-${VENV_DIR}/bin/python3 umls2rdf.py
-
-## verify the output files
-./checkOutputSyntax.sh  ${output_dir} # uses "rapper" command from the "raptor" package
-
-umls_cuis_query="SELECT DISTINCT s.CUI, GROUP_CONCAT(DISTINCT s.TUI), GROUP_CONCAT(DISTINCT c.STR)
-FROM MRSTY s
-INNER JOIN MRCONSO c
-ON s.CUI=c.CUI
-WHERE c.LAT='ENG'
-AND c.TS='P'
-AND STT='PF'
-AND ISPREF='Y'
-GROUP BY s.CUI"
-
-mysql --defaults-extra-file=${mysql_conf} --database=${mysql_dbname} \
-      -e "${umls_cuis_query}" > ${umls_cui_file}
+${python_command} ${CODE_DIR}/umls_mysql_to_list_jsonl.py ${mysql_conf} ${mysql_dbname} ${output_file}
 
 date
 echo "================= finished extract-umls.sh ================="
diff --git a/multi_ont_to_kg_jsonl.py b/multi_ont_to_kg_jsonl.py
index 530a4842..a0ff7568 100755
--- a/multi_ont_to_kg_jsonl.py
+++ b/multi_ont_to_kg_jsonl.py
@@ -1,12 +1,8 @@
 #!/usr/bin/env python3
 '''Builds the RTX "KG2" second-generation knowledge graph, from various OWL input files.
 
-   Usage: multi_ont_to_json_kg.py <categoriesFile.yaml> <curiesToURILALFile>
+   Usage: multi_ont_to_kg_jsonl.py <categoriesFile.yaml> <curiesToURILALFile>
                                   <ontLoadInventoryFile.yaml> <outputNodesFile> <outputEdgesFile>
-                                  <umlsCUITSVFile> <nodeDatatypePropertiesFile>
-   (note: outputFile can end in .json or in .gz; if the latter, it will be written as a gzipped file;
-   but using the gzip options for input or output seems to significantly increase transient memory
-   usage)
 '''
 
 __author__ = 'Stephen Ramsey'
@@ -28,7 +24,6 @@
 import urllib.parse
 import urllib.request
 from typing import Dict
-import json # temporary addition for Ontobio Issue #507
 import datetime
 
 # -------------- define globals here ---------------
@@ -130,7 +125,6 @@ def make_kg2(curies_to_categories: dict,
              nodes_output,
              edges_output,
              umls_cui_tsv_file: str,
-             node_datatype_properties_file: str, # temporary addition for Ontobio Issue #507
              test_mode: bool = False,
              save_pickle: bool = False):
 
@@ -159,32 +153,10 @@ def make_kg2(curies_to_categories: dict,
 
     kg2_util.log_message('Calling make_nodes_dict_from_ontologies_list')
 
-    # Temporary addition for addressing Ontobio Issue #507
-    select_datatype_properties = dict()
-    with open(node_datatype_properties_file, 'r') as node_properties:
-        select_datatype_properties = json.load(node_properties)
-
-    cui_lookup = dict()
-    with open(umls_cui_tsv_file, 'r') as cuis:
-        count = 0
-        for line in cuis:
-            count += 1
-            if count == 1:
-                continue
-            line = line.split('\t')
-            cui = line[0]
-            tuis = line[1].split(',')
-            name = line[2].strip()
-            if cui in cui_lookup:
-                kg2_util.log_message('CUI', cui, 'in TSV file multiple times')
-            cui_lookup[cui] = {'TUIs': tuis, 'Name': name}
-
     nodes_dict = make_nodes_dict_from_ontologies_list(ont_file_information_dict_list,
                                                       curies_to_categories,
                                                       uri_to_curie_shortener,
-                                                      curie_to_uri_expander,
-                                                      cui_lookup,
-                                                      select_datatype_properties) # temporary addition for Ontobio Issue #507
+                                                      curie_to_uri_expander)
 
     kg2_util.log_message('Calling make_map_of_node_ontology_ids_to_curie_ids')
 
@@ -513,9 +485,7 @@ def get_category_for_multiple_tui(biolink_category_tree: dict,
 def make_nodes_dict_from_ontologies_list(ontology_info_list: list,
                                          curies_to_categories: dict,
                                          uri_to_curie_shortener: callable,
-                                         curie_to_uri_expander: callable,
-                                         cui_lookup: dict,
-                                         select_datatype_properties: dict) -> Dict[str, dict]: # temporary addition for Ontobio Issue #507
+                                         curie_to_uri_expander: callable) -> Dict[str, dict]:
     ret_dict = dict()
     omim_to_hgnc_symbol = dict()
     ontologies_iris_to_curies = dict()
@@ -536,13 +506,6 @@ def make_nodes_dict_from_ontologies_list(ontology_info_list: list,
 
     convert_bpv_pred_to_curie_func = make_convert_bpv_predicate_to_curie(uri_to_curie_shortener,
                                                                          curie_to_uri_expander)
-    for cui in cui_lookup:
-        tuis = cui_lookup[cui]['TUIs']
-        category = get_category_for_multiple_tui(biolink_category_tree,
-                                                 tuis,
-                                                 mappings_to_categories)
-        cui_lookup[cui]['Category'] = category
-
 
     def biolink_depth_getter(category: str):
         return biolink_categories_ontology_depths.get(category, None)
@@ -762,8 +725,6 @@ def biolink_depth_getter(category: str):
                         elif bpv_pred_curie == kg2_util.CURIE_ID_HGNC_GENE_SYMBOL:
                             node_gene_symbol = bpv_val
                             node_synonyms.add(node_gene_symbol)
-                        elif bpv_pred_curie == kg2_util.CURIE_ID_UMLS_HAS_CUI:
-                            node_has_cui = True
                     if len(node_tui_list) == 1:
                         node_tui = node_tui_list[0]
                         node_tui_curie = kg2_util.CURIE_PREFIX_UMLS_STY + ':' + node_tui
@@ -887,40 +848,6 @@ def biolink_depth_getter(category: str):
                 if node_gene_symbol is not None:
                     node_name = node_gene_symbol
 
-            # Temporary code to address Ontobio Issue #507
-            if ontology_info_dict['file'] in select_datatype_properties:
-                filename = ontology_info_dict['file']
-                if filename == 'umls-omim.ttl':
-                    mimtype = select_datatype_properties[filename].get(node_curie_id, {}).get('MIMTYPE', None)
-                    if mimtype is not None:
-                        # 0, 3, 5 are phenotypes
-                        # 1, 4 are genes
-                        # There isn't a 2 anymore
-                        if mimtype == "1" or mimtype == "4":
-                            node_category_label = kg2_util.BIOLINK_CATEGORY_GENE
-                            gene_symbol = omim_to_hgnc_symbol.get(node_curie_id, None)
-                            if gene_symbol is not None:
-                                old_name = node_name
-                                node_name = gene_symbol
-                        else:
-                            node_name += " related phenotypic feature"
-                    else:
-                        node_category_label = kg2_util.BIOLINK_CATEGORY_NAMED_THING
-                if filename == 'umls-hgnc.ttl':
-                    hgnc_properties = select_datatype_properties[filename].get(node_curie_id, {})
-                    omim_id = hgnc_properties.get('OMIM_ID', None)
-                    gene_symbol = hgnc_properties.get('GENESYMBOL', None)
-                    if omim_id is not None:
-                        if isinstance(omim_id, list):
-                            for id in omim_id:
-                                omim_to_hgnc_symbol[kg2_util.CURIE_PREFIX_OMIM + ':' + id] = gene_symbol
-                        else:
-                            omim_to_hgnc_symbol[kg2_util.CURIE_PREFIX_OMIM + ':' + omim_id] = gene_symbol
-                    locus_group = hgnc_properties.get('LOCUS_GROUP', None)
-                    if locus_group is not None:
-                        if locus_group == "phenotype":
-                            continue
-
             node_dict = kg2_util.make_node(node_curie_id,
                                            iri,
                                            node_name,
@@ -939,53 +866,6 @@ def biolink_depth_getter(category: str):
             node_dict['synonym'] = sorted(list(node_synonyms))   # slot name is not biolink standard
             node_dict['publications'] = sorted(list(node_publications))
 
-            # check if we need to make a CUI node
-            if node_meta is not None and basic_property_values is not None:
-                for basic_property_value_dict in basic_property_values:
-                    bpv_pred = basic_property_value_dict['pred']
-                    bpv_pred_curie = convert_bpv_pred_to_curie_func(bpv_pred)
-                    bpv_val = basic_property_value_dict['val']
-                    if bpv_pred_curie == kg2_util.CURIE_ID_UMLS_HAS_CUI:
-                        cui_node_dict = dict(node_dict)
-                        cui_uri = kg2_util.BASE_URL_UMLS + bpv_val
-                        cui_curie = uri_to_curie_shortener(cui_uri)
-                        assert cui_curie is not None
-                        # Skip this CUI if it's identical to the ontology node itself (happens with files created
-                        # using 'load_on_cuis' - part of fix for issue #565)
-                        if get_local_id_from_curie_id(cui_curie) == get_local_id_from_curie_id(node_curie_id):
-                            continue
-                        cui_node_dict['id'] = cui_curie
-                        cui = cui_curie.split(':')[1]
-                        cui_node_dict['iri'] = cui_uri
-                        cui_node_dict['synonym'] = []
-                        cui_node_dict['category'] = kg2_util.convert_biolink_category_to_curie(cui_lookup[cui]['Category'])
-                        cui_node_dict['category_label'] = cui_lookup[cui]['Category'].replace(' ', '_')
-                        cui_name = cui_lookup[cui]['Name']
-                        if cui_name.isupper():
-                            cui_name = kg2_util.allcaps_to_only_first_letter_capitalized(cui_name)
-                        cui_node_dict['name'] = cui_name
-                        cui_node_dict['ontology node ids'] = []
-                        cui_node_dict['provided_by'] = kg2_util.CURIE_ID_UMLS_SOURCE_CUI
-                        cui_node_dict['xrefs'] = []  # blanking the "xrefs" here is *vital* in order to avoid issue #395
-                        cui_node_dict_existing = ret_dict.get(cui_curie, None)
-                        if cui_node_dict_existing is not None:
-                            cui_node_dict = kg2_util.merge_two_dicts(cui_node_dict,
-                                                                     cui_node_dict_existing,
-                                                                     biolink_depth_getter)
-                        ret_dict[cui_curie] = cui_node_dict
-                        node_dict_xrefs = node_dict['xrefs']
-                        node_dict_xrefs.append(cui_curie)
-                        node_dict['xrefs'] = sorted(list(set(node_dict_xrefs)))
-                    elif bpv_pred_curie == kg2_util.CURIE_ID_HGNC_ENTREZ_GENE_ID:
-                        entrez_gene_id = bpv_val
-                        entrez_node_dict = dict(node_dict)
-                        entrez_curie = kg2_util.CURIE_PREFIX_NCBI_GENE + ':' + entrez_gene_id
-                        entrez_node_dict['id'] = entrez_curie
-                        entrez_node_dict['iri'] = curie_to_uri_expander(entrez_curie)
-                        ret_dict[entrez_curie] = entrez_node_dict
-                        node_dict_xrefs = node_dict['xrefs']
-                        node_dict_xrefs.append(entrez_curie)
-                        node_dict['xrefs'] = sorted(list(set(node_dict_xrefs)))
             if node_curie_id in ret_dict:
                 if node_curie_id != provided_by:
                     node_dict = kg2_util.merge_two_dicts(ret_dict[node_curie_id],
@@ -1358,8 +1238,6 @@ def make_arg_parser():
     arg_parser.add_argument('ontLoadInventoryFile', type=str)
     arg_parser.add_argument('outputNodesFile', type=str)
     arg_parser.add_argument('outputEdgesFile', type=str)
-    arg_parser.add_argument('umlsCUITSVFile', type=str)
-    arg_parser.add_argument('nodeDatatypePropertiesFile', type=str) # temporary addition for Ontobio Issue #507
     return arg_parser
 
 
@@ -1374,8 +1252,6 @@ def make_arg_parser():
     ont_load_inventory_file = args.ontLoadInventoryFile
     output_nodes_file_name = args.outputNodesFile
     output_edges_file_name = args.outputEdgesFile
-    umls_cui_tsv_file = args.umlsCUITSVFile
-    node_datatype_properties_file = args.nodeDatatypePropertiesFile # temporary addition for Ontobio Issue #507
     save_pickle = args.save_pickle
     test_mode = args.test
     curies_to_categories = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_categories_file_name))
@@ -1394,8 +1270,6 @@ def make_arg_parser():
              ont_urls_and_files,
              nodes_output,
              edges_output,
-             umls_cui_tsv_file,
-             node_datatype_properties_file, # temporary addition for Ontobio Issue #507
              test_mode,
              save_pickle)
 
diff --git a/ont-load-inventory.yaml b/ont-load-inventory.yaml
index 7910c922..aa98ff61 100644
--- a/ont-load-inventory.yaml
+++ b/ont-load-inventory.yaml
@@ -3,180 +3,6 @@
   file: biolink-model.owl.ttl
   download: true
   title: Biolink meta-model
-- # maps to CURIE prefix: UMLSSC
-  url: http://purl.bioontology.org/ontology/STY/
-  file: umls-semantictypes.ttl
-  download: false
-  title: UMLS Semantic Types
-- # maps to CURIE prefix: ATC
-  download: false
-  file: umls-atc.ttl
-  title: Anatomical Therapeutic Chemical Classification System
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/ATC
-- # maps to CURIE prefix CHV
-  download: false
-  file: umls-chv.ttl
-  title: Consumer Health Vocabulary
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/CHV
-# - # maps to CURIE prefix CPT
-#   download: false
-#   file: umls-cpt.ttl
-#   title: Current Procedural Terminology
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/CPT
-- # maps to CURIE prefix DRUGBANK
-  download: false
-  file: umls-drugbank.ttl
-  title: DrugBank
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/DRUGBANK
-- # maps to CURIE prefix FMA
-  download: false
-  file: umls-fma.ttl
-  title: Foundational Model of Anatomy
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/FMA
-- # maps to CURIE prefix GO
-  download: false
-  file: umls-go.ttl
-  title: Gene Ontology
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/GO
-- # maps to CURIE prefix HCPCS
-  download: false
-  file: umls-hcpcs.ttl
-  title: Healthcare Common Procedure Coding System
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/HCPCS
-# - # maps to CURIE prefix CPT
-#   download: false
-#   file: umls-hcpt.ttl
-#   title: CPT in HCPCS
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/HCPT
-- # maps to CURIE prefix HGNC
-  download: false
-  file: umls-hgnc.ttl
-  title: HUGO Gene Nomenclature Committee
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/HGNC
-- # maps to CURIE prefix umls
-  download: false
-  file: umls-hl7.ttl
-  title: HL7 Version 3.0
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/HL7
-- # maps to CURIE prefix HP
-  download: false
-  file: umls-hpo.ttl
-  title: Human Phenotype Ontology
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/HPO
-# - # maps to CURIE prefix ICD10
-#   download: false
-#   file: umls-icd10.ttl
-#   title: International Classification of Diseases and Related Health Problems,
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/ICD10
-# - # maps to CURIE prefix ICD10
-#   download: false
-#   file: umls-icd10ae.ttl
-#   title: ICD-10, American English Equivalents
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/ICD10AE
-# - # maps to CURIE prefix ICD10
-#   download: false
-#   file: umls-icd10cm.ttl
-#   title: International Classification of Diseases, Tenth Revision, Clinical Modification
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/ICD10CM
-- # maps to CURIE prefix ICD10PCS
-  download: false
-  file: umls-icd10pcs.ttl
-  title: ICD-10 Procedure Coding System
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/ICD10PCS
-- # maps to CURIE prefix ICD9
-  download: false
-  file: umls-icd9cm.ttl
-  title: International Classification of Diseases, Ninth Revision, Clinical Modification
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/ICD9CM
-# - # maps to CURIE prefix LOINC
-#   download: false
-#   file: umls-lnc.ttl
-#   title: Logical Observation Identifiers Names and Codes
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/LNC
-# - # maps to CURIE prefix MEDDRA
-#   download: false
-#   file: umls-mdr.ttl
-#   title: MedDRA
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/MEDDRA
-- # maps to CURIE prefix umls
-  download: false
-  file: umls-med-rt.ttl
-  title: Medication Reference Terminology
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/MED-RT
-- # maps to CURIE prefix umls
-  download: false
-  file: umls-medlineplus.ttl
-  title: MedlinePlus Health Topics
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/MEDLINEPLUS
-- # maps to CURIE prefix MESH
-  download: false
-  file: umls-msh.ttl
-  title: Medical Subject Headings
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/MSH
-- # maps to CURIE prefix NCBITaxon
-  download: false
-  file: umls-ncbi.ttl
-  title: NCBI
-  url: http://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim.owl
-- # maps to CURIE prefix NCIT
-  download: false
-  file: umls-nci.ttl
-  title: NCI Thesaurus
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NCI
-- # maps to CURIE prefix NDDF
-  download: false
-  file: umls-nddf.ttl
-  title: National Drug Data File
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDDF
-- # maps to CURIE prefix NDFRT
-  download: false
-  file: umls-ndfrt.ttl
-  title: National Drug File - Reference Terminology
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT
-- # maps to CURIE prefix OMIM
-  download: false
-  file: umls-omim.ttl
-  title: Online Mendelian Inheritance in Man
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/OMIM
-- # maps to CURIE prefix PDQ
-  download: false
-  file: umls-pdq.ttl
-  title: Physician Data Query
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/PDQ
-- # maps to CURIE prefix PSY
-  download: false
-  file: umls-psy.ttl
-  title: Psychological Index Terms
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/PSY
-- # maps to CURIE prefix RXNORM
-  download: false
-  file: umls-rxnorm.ttl
-  title: RXNORM
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/RXNORM
-# - # maps to CURIE prefix SNOMED
-#   download: false
-#   file: umls-snomedct_us.ttl
-#   title: SNOMED Clinical Terms US Edition
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/SNOMEDCT
-# ==> unable to find an online set of pages for SNOMEDCT_VET concepts but I want to find one so that
-# I can include SNOMEDCT_VET in the kg2 build, thus am keeping this section commented out [SAR]:
-# -
-#   download: false
-#   file: umls-snomedct_vet.ttl
-#   title: Veterinary Extension to SNOMED CT
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/SNOMEDCT_VET
-# ==> this section (UMLS Source Terminology Names) seems like it could be useful in the future, but
-# I can't find purls to its concepts anywhere:
-# -
-#   download: false
-#   file: umls-src.ttl
-#   title: Source Terminology Names (UMLS)
-#   url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/SRC
-- # maps to CURIE prefix VANDF
-  download: false
-  file: umls-vandf.ttl
-  title: National Drug File
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF
 - # maps to CURIE prefix: BFO
   url:  http://purl.obolibrary.org/obo/bfo.owl
   file: bfo.owl
@@ -292,8 +118,3 @@
   file: mi.owl
   download: true
   title: Molecular Interactions Controlled Vocabulary
-- # maps to CURIE prefix umls
-  download: false
-  file: umls-mth.ttl
-  title: Metathesaurus Names
-  url: https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/MTH
diff --git a/snakemake-config-var.yaml b/snakemake-config-var.yaml
index 2e2c923c..10335509 100644
--- a/snakemake-config-var.yaml
+++ b/snakemake-config-var.yaml
@@ -3,12 +3,19 @@ edges_suffix: -edges
 
 validation_placeholder: ${BUILD_DIR}/validation-placeholder.empty
 
-ont_extraction_base: extract-umls
+umls_extraction_base: extract-umls
+umls_conversion_base: umls_list_jsonl_to_kg_jsonl
+umls_output_base: kg2-umls
+umls_extraction_script: ${CODE_DIR}/${umls_extraction_base}.sh
+umls_extraction_log: ${BUILD_DIR}/${umls_extraction_base}${test_suffix}.log
+umls_extract_file: ${BUILD_DIR}/umls.jsonl
+umls_conversion_script: ${CODE_DIR}/${umls_conversion_base}.py
+umls_conversion_log: ${BUILD_DIR}/${umls_conversion_base}${test_suffix}.log
+umls_output_nodes_file: ${BUILD_DIR}/${umls_output_base}${nodes_suffix}${test_suffix}.jsonl
+umls_output_edges_file: ${BUILD_DIR}/${umls_output_base}${edges_suffix}${test_suffix}.jsonl
+
 ont_conversion_base: build-multi-ont-kg
 ont_output_base: kg2-ont
-ont_extraction_script: ${CODE_DIR}/${ont_extraction_base}.sh
-ont_extraction_log: ${BUILD_DIR}/${ont_extraction_base}${test_suffix}.log
-umls_cui_file: ${BUILD_DIR}/umls_cuis.tsv
 ont_conversion_script: ${CODE_DIR}/${ont_conversion_base}.sh
 ont_conversion_log: ${BUILD_DIR}/${ont_conversion_base}${test_suffix}.log
 ont_output_nodes_file: ${BUILD_DIR}/${ont_output_base}${nodes_suffix}${test_suffix}.jsonl

From 9d6cb464a7dd967281372c8e2d254dfea2ea764d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Thu, 7 Sep 2023 19:00:02 -0700
Subject: [PATCH 107/117] #316 it no longer makes sense to validate ont load
 inventory ttl files

---
 run-validation-tests.sh | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/run-validation-tests.sh b/run-validation-tests.sh
index 50d1bb02..f8d3a468 100755
--- a/run-validation-tests.sh
+++ b/run-validation-tests.sh
@@ -69,13 +69,6 @@ ${python_command} -u ${CODE_DIR}/validate_predicate_remap_yaml.py \
            ${biolink_model_yaml_url} \
            ${biolink_model_yaml_local_file}
 
-${python_command} -u ${CODE_DIR}/validate_ont_load_inventory.py \
-           ${ont_load_inventory_file} \
-           ${curies_to_urls_file} \
-           ${umls2rdf_config_master} \
-           ${biolink_model_owl_url} \
-           ${biolink_model_owl_local_file}
-
 ${python_command} -u ${CODE_DIR}/validate_provided_by_to_infores_map_yaml.py \
            ${infores_mapping_file} \
            ${infores_catalog_yaml}

From 1907352c48d15263fd131fa057aa03acacb1d1c0 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 8 Sep 2023 10:50:04 -0700
Subject: [PATCH 108/117] #316 remove log file for testing to figure out what's
 going on

---
 build-kg2-snakemake.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build-kg2-snakemake.sh b/build-kg2-snakemake.sh
index 28d06b28..b0a1a5d3 100755
--- a/build-kg2-snakemake.sh
+++ b/build-kg2-snakemake.sh
@@ -71,7 +71,7 @@ then
     trap "cat ${build_kg2_log_file}" EXIT
 fi
 
-{
+# {
 echo "================= starting build-kg2-snakemake.sh =================="
 date
 
@@ -118,7 +118,7 @@ cd ~ && ${VENV_DIR}/bin/snakemake --snakefile ${snakefile} ${run_flag} -R Finish
 
 date
 echo "================ script finished ============================"
-} > ${build_kg2_log_file} 2>&1
+# } > ${build_kg2_log_file} 2>&1
 
 if [[ "${ci_flag}" != "ci" && "${dryrun}" != "-n" ]]
 then

From 1e3c97d9c23ed29386fef52ba56644e0ef8dc4ca Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 8 Sep 2023 11:03:56 -0700
Subject: [PATCH 109/117] #316 SemMed UMLS input

---
 Snakefile-conversion | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile-conversion b/Snakefile-conversion
index 00167044..ba15511d 100644
--- a/Snakefile-conversion
+++ b/Snakefile-conversion
@@ -9,7 +9,7 @@ rule UMLS_Conversion:
     log:
         config['UMLS_CONVERSION_LOG']
     shell:
-        "bash -x {input.code} {input.real} {output.nodes} {output.edges} " + config['TEST_FLAG'] + " > {log} 2>&1" 
+        config['PYTHON_COMMAND'] + " {input.code} {input.real} {output.nodes} {output.edges} " + config['TEST_FLAG'] + " > {log} 2>&1" 
 
 rule Ontologies_Conversion:
     input:
@@ -27,7 +27,7 @@ rule SemMedDB_Conversion:
     input:
         code = config['SEMMEDDB_CONVERSION_SCRIPT'],
         real = config['SEMMEDDB_TUPLELIST_FILE'],
-        mrcui_req = config['UMLS_CUI_FILE'],
+        mrcui_req = config['UMLS_EXTRACT_FILE'],
         exclusion_list = config['SEMMEDDB_EXCLUSION_FILE'],
         version_file = config['SEMMEDDB_VERSION_FILE'],
         validation = config['VALIDATION_PLACEHOLDER']

From ffcb318eb623408afd85260f2d4e8bb31a4d96b8 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 8 Sep 2023 11:15:36 -0700
Subject: [PATCH 110/117] #316 add UMLS into the merge

---
 Snakefile-post-etl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Snakefile-post-etl b/Snakefile-post-etl
index e47913b3..05fd073b 100644
--- a/Snakefile-post-etl
+++ b/Snakefile-post-etl
@@ -1,6 +1,8 @@
 rule Merge:
     input:
         code = config['MERGE_SCRIPT'],
+        umls_nodes = config['UMLS_OUTPUT_NODES_FILE'],
+        umls_edges = config['UMLS_OUTPUT_EDGES_FILE'],
         ont_nodes = config['ONT_OUTPUT_NODES_FILE'],
         ont_edges = config['ONT_OUTPUT_EDGES_FILE'],
         uniprot_nodes = config['UNIPROTKB_OUTPUT_NODES_FILE'],
@@ -53,6 +55,7 @@ rule Merge:
             " --outputNodesFile {output.nodes} " + \
             " --outputEdgesFile {output.edges} " + \
             " --kgNodesFiles " + \
+            "{input.umls_nodes} " + \
             "{input.ont_nodes} " + \
             "{input.semmeddb_nodes} " + \
             "{input.uniprot_nodes} " + \
@@ -74,6 +77,7 @@ rule Merge:
             "{input.disgenet_nodes} " + \
             "{input.kegg_nodes} " + \
             " --kgEdgesFiles " + \
+            "{input.umls_edges} " + \
             "{input.ont_edges} " + \
             "{input.semmeddb_edges} " + \
             "{input.uniprot_edges} " + \

From e9eace1a06b563b7a74777d85c6769834b785f6d Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 8 Sep 2023 11:28:56 -0700
Subject: [PATCH 111/117] #316 parameterize UMLS conversion

---
 Snakefile-conversion           |  5 ++++-
 snakemake-config-var.yaml      |  2 ++
 umls_list_jsonl_to_kg_jsonl.py | 12 +++++++++---
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/Snakefile-conversion b/Snakefile-conversion
index ba15511d..0d33b54f 100644
--- a/Snakefile-conversion
+++ b/Snakefile-conversion
@@ -2,6 +2,9 @@ rule UMLS_Conversion:
     input:
         code = config['UMLS_CONVERSION_SCRIPT'],
         real = config['UMLS_EXTRACT_FILE'],
+        curies_to_urls_map = config['CURIES_TO_URLS_FILE'],
+        umls_name_heirarchy = config['UMLS_NAME_HEIRARCHY'],
+        tui_map = config['UMLS_TUI_MAP'],
         validation = config['VALIDATION_PLACEHOLDER']
     output:
         nodes = config['UMLS_OUTPUT_NODES_FILE'],
@@ -9,7 +12,7 @@ rule UMLS_Conversion:
     log:
         config['UMLS_CONVERSION_LOG']
     shell:
-        config['PYTHON_COMMAND'] + " {input.code} {input.real} {output.nodes} {output.edges} " + config['TEST_FLAG'] + " > {log} 2>&1" 
+        config['PYTHON_COMMAND'] + " {input.code} {input.real} {input.curies_to_urls_map} {input.umls_name_heirarchy} {input.tui_map} {output.nodes} {output.edges} " + config['TEST_FLAG'] + " > {log} 2>&1" 
 
 rule Ontologies_Conversion:
     input:
diff --git a/snakemake-config-var.yaml b/snakemake-config-var.yaml
index 10335509..40569c1b 100644
--- a/snakemake-config-var.yaml
+++ b/snakemake-config-var.yaml
@@ -11,6 +11,8 @@ umls_extraction_log: ${BUILD_DIR}/${umls_extraction_base}${test_suffix}.log
 umls_extract_file: ${BUILD_DIR}/umls.jsonl
 umls_conversion_script: ${CODE_DIR}/${umls_conversion_base}.py
 umls_conversion_log: ${BUILD_DIR}/${umls_conversion_base}${test_suffix}.log
+umls_name_heirarchy: ${CODE_DIR}/umls-name-heirarchy.yaml
+umls_tui_map: ${CODE_DIR}/tui_combo_mappings.json
 umls_output_nodes_file: ${BUILD_DIR}/${umls_output_base}${nodes_suffix}${test_suffix}.jsonl
 umls_output_edges_file: ${BUILD_DIR}/${umls_output_base}${edges_suffix}${test_suffix}.jsonl
 
diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py
index 3ff28081..0d07b788 100644
--- a/umls_list_jsonl_to_kg_jsonl.py
+++ b/umls_list_jsonl_to_kg_jsonl.py
@@ -26,6 +26,9 @@
 def get_args():
     arg_parser = argparse.ArgumentParser(description='umls_list_jsonl_to_kg_jsonl.py: converts UMLS MySQL JSON Lines dump into KG2 JSON format')
     arg_parser.add_argument('inputFile', type=str)
+    arg_parser.add_argument('curiesToURIFile', type=str)
+    arg_parser.add_argument('umlsNameHeirarchy', type=str)
+    arg_parser.add_argument('TUIComboMappings', type=str)
     arg_parser.add_argument('outputNodesFile', type=str)
     arg_parser.add_argument('outputEdgesFile', type=str)
     arg_parser.add_argument('--test', dest='test', action="store_true", default=False)
@@ -43,6 +46,9 @@ def extract_node_id(node_id_str):
     args = get_args()
     input_file_name = args.inputFile
     test_mode = args.test
+    curies_to_urls_map_file_name = args.curiesToURIFile
+    umls_name_heirarchy_file_name = args.umlsNameHeirarchy
+    tui_combo_mappings_file_name = args.TUIComboMappings
     output_nodes_file_name = args.outputNodesFile
     output_edges_file_name = args.outputEdgesFile
 
@@ -53,11 +59,11 @@ def extract_node_id(node_id_str):
     input_read_jsonlines_info = kg2_util.start_read_jsonlines(input_file_name)
     input_items = input_read_jsonlines_info[0]
 
-    with open('tui_combo_mappings.json') as mappings:
+    with open(tui_combo_mappings_file_name) as mappings:
         TUI_MAPPINGS = json.load(mappings)
 
-    iri_mappings_raw = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('curies-to-urls-map.yaml'))['use_for_bidirectional_mapping']
-    full_heirarchy = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string('umls-name-heirarchy.yaml'))
+    iri_mappings_raw = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(curies_to_urls_map_file_name))['use_for_bidirectional_mapping']
+    full_heirarchy = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(umls_name_heirarchy_file_name))
     for item in iri_mappings_raw:
         for prefix in item:
             IRI_MAPPINGS[prefix] = item[prefix]

From 75498b5a97dffcd6f097e0b14b55cadb1ee59ee7 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 8 Sep 2023 12:41:53 -0700
Subject: [PATCH 112/117] #316 comment the log again

---
 build-kg2-snakemake.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build-kg2-snakemake.sh b/build-kg2-snakemake.sh
index b0a1a5d3..28d06b28 100755
--- a/build-kg2-snakemake.sh
+++ b/build-kg2-snakemake.sh
@@ -71,7 +71,7 @@ then
     trap "cat ${build_kg2_log_file}" EXIT
 fi
 
-# {
+{
 echo "================= starting build-kg2-snakemake.sh =================="
 date
 
@@ -118,7 +118,7 @@ cd ~ && ${VENV_DIR}/bin/snakemake --snakefile ${snakefile} ${run_flag} -R Finish
 
 date
 echo "================ script finished ============================"
-# } > ${build_kg2_log_file} 2>&1
+} > ${build_kg2_log_file} 2>&1
 
 if [[ "${ci_flag}" != "ci" && "${dryrun}" != "-n" ]]
 then

From ef9d55ca238d77586dd4678a2b5a61185e1fc7b9 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 8 Sep 2023 13:12:35 -0700
Subject: [PATCH 113/117] #349 Uberon link was wrong

---
 ont-load-inventory.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ont-load-inventory.yaml b/ont-load-inventory.yaml
index aa98ff61..b6728bcf 100644
--- a/ont-load-inventory.yaml
+++ b/ont-load-inventory.yaml
@@ -24,7 +24,7 @@
   download: true
   title: Relation Ontology
 -
-  url:  http://purl.obolibrary.org/obo/uberon/ext.owl
+  url:  http://purl.obolibrary.org/obo/uberon.owl
   file: uberon-ext.owl
   download: true
   title: Uber-anatomy Ontology

From 86cb186b4425b74a759dafa5d06b6e93d5482c26 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Fri, 8 Sep 2023 21:05:23 -0700
Subject: [PATCH 114/117] #316 #349 addressing unbound variable

---
 extract-umls.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extract-umls.sh b/extract-umls.sh
index 1b91cb2c..9fbd63d6 100755
--- a/extract-umls.sh
+++ b/extract-umls.sh
@@ -17,7 +17,7 @@ date
 config_dir=`dirname "$0"`
 source ${config_dir}/master-config.shinc
 
-umls_cui_file=${2:-${BUILD_DIR}/umls_cuis.tsv}
+output_file=${2:-${BUILD_DIR}/umls.jsonl}
 
 umls_ver=2023AA
 umls_file_base=umls-${umls_ver}-metathesaurus-full

From 10ec96180ade46f6b2c6261894394e3866d0ae92 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 9 Sep 2023 13:18:07 -0700
Subject: [PATCH 115/117] #316 editing uberon to match new one

---
 kg2-provided-by-curie-to-infores-curie.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kg2-provided-by-curie-to-infores-curie.yaml b/kg2-provided-by-curie-to-infores-curie.yaml
index eeda5413..4a907523 100644
--- a/kg2-provided-by-curie-to-infores-curie.yaml
+++ b/kg2-provided-by-curie-to-infores-curie.yaml
@@ -110,7 +110,7 @@ OBO:ro.owl:
   source_name: Relations Ontology
   infores_curie: infores:ro
   knowledge_type: knowledge_source
-OBO:uberon/ext.owl:
+OBO:uberon:
   source_name: Uber Anatomy Ontology
   infores_curie: infores:uberon
   knowledge_type: knowledge_source

From f792f9169057403c25138d6402444e7c14879b25 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 9 Sep 2023 13:31:40 -0700
Subject: [PATCH 116/117] #316 try number 2 to fix uberon issue

---
 kg2-provided-by-curie-to-infores-curie.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kg2-provided-by-curie-to-infores-curie.yaml b/kg2-provided-by-curie-to-infores-curie.yaml
index 4a907523..587ccd9f 100644
--- a/kg2-provided-by-curie-to-infores-curie.yaml
+++ b/kg2-provided-by-curie-to-infores-curie.yaml
@@ -110,7 +110,7 @@ OBO:ro.owl:
   source_name: Relations Ontology
   infores_curie: infores:ro
   knowledge_type: knowledge_source
-OBO:uberon:
+OBO:uberon.owl:
   source_name: Uber Anatomy Ontology
   infores_curie: infores:uberon
   knowledge_type: knowledge_source

From 569cb093280ea3c2952dcce461f807ad54313397 Mon Sep 17 00:00:00 2001
From: ecwood <wooderi@stanford.edu>
Date: Sat, 9 Sep 2023 15:34:25 -0700
Subject: [PATCH 117/117] #316 dealing with new qualifiers

---
 predicate-remap.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/predicate-remap.yaml b/predicate-remap.yaml
index f617a3bb..0a6030e7 100644
--- a/predicate-remap.yaml
+++ b/predicate-remap.yaml
@@ -1055,6 +1055,11 @@ DrugCentral:positive_modulator:
   qualifiers:
     object_aspect: activity
     object_direction: increased
+DrugCentral:reduce_risk:
+  operation: keep
+  core_predicate: biolink:affects_risk_for
+  qualifiers:
+    object_direction: decreased
 DrugCentral:releasing_agent:
   operation: keep
   core_predicate: biolink:affects
@@ -3131,6 +3136,12 @@ ORPHANET:C016:
 ORPHANET:C017:
   operation: keep
   core_predicate: biolink:related_to
+ORPHANET:C056:
+  operation: keep
+  core_predicate: biolink:close_match
+ORPHANET:C057:
+  operation: keep
+  core_predicate: biolink:close_match
 # PATO:0000085:
 #   operation: keep
 #   core_predicate: biolink:associated_with_sensitivity_to
@@ -3782,6 +3793,9 @@ RO:0002411:
 RO:0002412:
   operation: keep
   core_predicate: biolink:precedes
+RO:0002428:
+  operation: keep
+  core_predicate: biolink:regulates
 # RO:0002432:
 #   operation: keep
 #   core_predicate: biolink:active_in