From 8ff57d0e6161310241c3e17651280adcf38b44a0 Mon Sep 17 00:00:00 2001 From: Manuel Lera Ramirez Date: Mon, 18 Mar 2024 10:51:57 +0000 Subject: [PATCH] fix for https://github.com/pombase/canto/issues/2807 --- protein_modification_transvar.py | 11 ++++++----- transvar_functions.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/protein_modification_transvar.py b/protein_modification_transvar.py index 28eda01..2d18a86 100644 --- a/protein_modification_transvar.py +++ b/protein_modification_transvar.py @@ -17,11 +17,12 @@ def expand_CTD_abbreviations(sequence_position: str) -> str: """Expand CTD abbreviations to all positions""" + abbreviations = { - "CTD_S2": "S1579,S1586,S1593,S1600,S1607,S1614,S1621,S1628,S1635,S1642,S1649,S1656,S1663,S1670,S1677,S1684,S1691,S1698,S1705,S1712,S1719,S1726,S1733,S1740,S1747", - "CTD_T4": "T1584,T1591,T1598,T1605,T1612,T1619,T1626,T1615,T1640,T1647,T1654,T1661,T1663,T1675,T1682,T1689,T1696,T1703,T1710,T1717,T1723,T1731,T1738,T1745,T1752", - "CTD_S5": "T1582,T1589,T1596,T1603,T1610,T1617,T1624,T1613,T1638,T1645,T1652,T1659,T1666,T1673,T1680,T1687,T1694,T1701,T1708,T1715,T1722,T1729,T1736,T1743,T1750", - "CTD_S7": "S1584,S1591,S1598,S1605,S1612,S1619,S1626,S1615,S1640,S1647,S1654,S1661,S1668,S1675,S1682 S1689,S1696,S1703,S1710,S1717,S1724,S1731,S1738,S1745,S1752" + "CTD_S2": "S1559,S1566,S1579,S1586,S1593,S1600,S1607,S1614,S1621,S1628,S1635,S1642,S1649,S1656,S1663,S1670,S1677,S1684,S1691,S1698,S1705,S1712,S1719,S1726,S1733,S1740,S1747", + "CTD_T4": "T1554,T1567,T1581,T1588,T1595,T1602,T1609,T1616,T1623,T1630,T1637,T1644,T1651,T1658,T1665,T1672,T1679,T1686,T1693,T1700,T1707,T1714,T1721,T1728,T1735,T1742,T1749", + "CTD_S5": "S1555,S1562,S1568,S1575,S1582,S1589,S1596,S1603,S1610,S1617,S1624,S1631,S1638,S1645,S1652,S1659,S1666,S1673,S1680,S1687,S1694,S1701,S1708,S1715,S1722,S1729,S1736,S1743,S1750", + "CTD_S7": "S1557,S1577,S1584,S1591,S1598,S1605,S1612,S1619,S1626,S1633,S1640,S1647,S1654,S1661,S1668,S1675,S1682,S1689,S1696,S1703,S1710,S1717,S1724,S1731,S1738,S1745,S1752" } for key in abbreviations: sequence_position = sequence_position.replace(key, abbreviations[key]) @@ -55,7 +56,7 @@ def get_transvar_coordinates(row, db, genome, exclude_transcripts): # print(row['systematic_id'], '<<<>>>', row['transvar_input']) qc_id = process_systematic_id(row['systematic_id'], genome, 'first') transcript_id = None if (qc_id == row['systematic_id']) else qc_id - print(row['transvar_input']) + try: transvar_annotation_list = parse_transvar_string(get_transvar_str_annotation('panno', row['transvar_input'], db)) return get_transvar_annotation_coordinates(transvar_annotation_list, row['systematic_id'], transcript_id) diff --git a/transvar_functions.py b/transvar_functions.py index ef08242..f95b9af 100644 --- a/transvar_functions.py +++ b/transvar_functions.py @@ -110,7 +110,7 @@ def get_transvar_str_annotation(variant_type: str, variant_description: str, db: transvar_fields_first_row = output_str.split('\n')[1].split('\t') if transvar_fields_first_row[-3] == '././.': if (transvar_fields_first_row[-1] == 'no_valid_transcript_found') and not variant_description.startswith('Q00'): - raise ValueError('no_valid_transcript_found') + raise ValueError('no_valid_transcript_found', variant_description) else: raise ValueError('Unknown error: ', transvar_fields_first_row[-1])