From cb627eca6fd3c06f38dcec1e45489164552a1129 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Wed, 11 Dec 2024 18:45:19 -0800 Subject: [PATCH] Use Xenbase GenePageToGene mapping to create orthology records for each species of xenopus rather than the dangling genepage orthology records we were making --- .../ingests/xenbase/orthologs.py | 37 +++++++++++-------- .../ingests/xenbase/orthologs.yaml | 3 ++ src/monarch_ingest/qc_expect.yaml | 2 + tests/unit/xenbase/test_xenbase_orthologs.py | 26 ++++++++++--- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/monarch_ingest/ingests/xenbase/orthologs.py b/src/monarch_ingest/ingests/xenbase/orthologs.py index 609a27ef..5ca4b9da 100644 --- a/src/monarch_ingest/ingests/xenbase/orthologs.py +++ b/src/monarch_ingest/ingests/xenbase/orthologs.py @@ -12,31 +12,36 @@ koza_app = get_koza_app("xenbase_orthologs") +genepage_to_gene_map = koza_app.get_map("genepage-2-gene") + while (row := koza_app.get_row()) is not None: try: - gene_id = row['xb_genepage_id'] - assert gene_id + genepage_id = row['xb_genepage_id'] + assert genepage_id predicate = "biolink:orthologous_to" ortholog_id = row['entrez_id'] assert ortholog_id - # Instantiate the instance of Gene-to-Gene Homology Association - association = GeneToGeneHomologyAssociation( - id=f"uuid:{str(uuid.uuid1())}", - subject=f"Xenbase:{gene_id}", - predicate=predicate, - object=f"NCBIGene:{ortholog_id}", - aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:xenbase", - knowledge_level=KnowledgeLevelEnum.knowledge_assertion, - agent_type=AgentTypeEnum.manual_agent, - ) - - # Write the captured Association out - koza_app.write(association) + gene_ids = genepage_to_gene_map.get(genepage_id).values() + + for gene_id in gene_ids: + # Instantiate the instance of Gene-to-Gene Homology Association + association = GeneToGeneHomologyAssociation( + id=f"uuid:{str(uuid.uuid1())}", + subject=f"Xenbase:{gene_id}", + predicate=predicate, + object=f"NCBIGene:{ortholog_id}", + aggregator_knowledge_source=["infores:monarchinitiative"], + primary_knowledge_source="infores:xenbase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent, + ) + + # Write the captured Association out + koza_app.write(association) except (RuntimeError, AssertionError) as rte: logger.debug(f"{str(rte)} in data row:\n\t'{str(row)}'") diff --git a/src/monarch_ingest/ingests/xenbase/orthologs.yaml b/src/monarch_ingest/ingests/xenbase/orthologs.yaml index ae34277f..6070b341 100644 --- a/src/monarch_ingest/ingests/xenbase/orthologs.yaml +++ b/src/monarch_ingest/ingests/xenbase/orthologs.yaml @@ -25,6 +25,9 @@ columns: - 'xb_gene_symbol' - 'xb_gene_name' +depends_on: + - './src/monarch_ingest/maps/genepage-2-gene.yaml' + edge_properties: - 'id' - 'category' diff --git a/src/monarch_ingest/qc_expect.yaml b/src/monarch_ingest/qc_expect.yaml index 209e58df..6f194e34 100644 --- a/src/monarch_ingest/qc_expect.yaml +++ b/src/monarch_ingest/qc_expect.yaml @@ -65,6 +65,8 @@ edges: min: 1420000 xenbase_gene_to_phenotype_edges: min: 2000 + xenbase_orthologs_edges: + min: 136746 alliance_phenotype_edges: min: 650000 alliance_disease_edges: diff --git a/tests/unit/xenbase/test_xenbase_orthologs.py b/tests/unit/xenbase/test_xenbase_orthologs.py index 71cecced..e9574576 100644 --- a/tests/unit/xenbase/test_xenbase_orthologs.py +++ b/tests/unit/xenbase/test_xenbase_orthologs.py @@ -24,7 +24,19 @@ def script(): @pytest.fixture -def orthology_record(mock_koza, source_name, script, global_table): +def map_cache(): + return { + 'genepage-2-gene': { + 'XB-GENEPAGE-478063': { + 'tropicalis_id':'XB-GENE-478064', + 'laevis_l_id': 'XB-GENE-6461998', + 'laevis_s_id': 'XB-GENE-17342561', + } + } + } + +@pytest.fixture +def orthology_record(mock_koza, source_name, script, global_table, map_cache): row = { 'entrez_id': "8928", 'xb_genepage_id': "XB-GENEPAGE-478063", @@ -35,16 +47,20 @@ def orthology_record(mock_koza, source_name, script, global_table): name=source_name, data=row, transform_code=script, + map_cache=map_cache, global_table=global_table, ) - -def test_orthology_record(orthology_record): +@pytest.mark.parametrize("index, expected_subject", + [(0, "Xenbase:XB-GENE-478064"), + (1, "Xenbase:XB-GENE-6461998"), + (2, "Xenbase:XB-GENE-17342561")]) +def test_orthology_records(orthology_record, index: int, expected_subject: str): assert orthology_record association = [ association for association in orthology_record if isinstance(association, GeneToGeneHomologyAssociation) - ][0] - assert association.subject == "Xenbase:XB-GENEPAGE-478063" + ][index] + assert association.subject == expected_subject assert association.predicate == "biolink:orthologous_to" assert association.object == "NCBIGene:8928"