diff --git a/scripts/check_hgnc_id.py b/scripts/check_hgnc_id.py new file mode 100644 index 0000000..489387b --- /dev/null +++ b/scripts/check_hgnc_id.py @@ -0,0 +1,68 @@ +import logging +import coloredlogs +import requests + +LOG = logging.getLogger(__name__) +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + +import click + +from stranger.resources import repeats_path +from stranger.utils import parse_repeat_file, get_repeat_info + +@click.command() +@click.option('-f', '--repeats-file', + type = click.Path(exists=True), + help="Path to a file with repeat definitions. See README for explanation", + default=repeats_path, + show_default=True, +) +@click.option('--loglevel', default='INFO', type=click.Choice(LOG_LEVELS), + help="Set the level of log output.", show_default=True) +@click.pass_context +def cli(context, repeats_file, loglevel): + """Table print repeat info""" + coloredlogs.install(level=loglevel) + with open(repeats_file, 'r') as file_handle: + repeat_information = parse_repeat_file(file_handle, repeats_file_type='json') + + + if not repeat_information: + LOG.warning("Could not find any repeat info") + context.abort() + + # print(repeat_information) + + # header = ["HGNCId", "LocusId", "DisplayRU", "InheritanceMode", "normal_max", "pathologic_min", "Disease", "SourceDisplay", "SourceId"] + + for entry in repeat_information: + hgnc_id = repeat_information[entry]["HGNCId"] + locus_symbol = entry.split('_')[0] + + url = "https://rest.genenames.org/search/hgnc_id/" + str(hgnc_id) + response = requests.get(url, headers= {"Accept":"application/json"}) + + if not response: + LOG.warning("Entry {} not found".format(entry)) + # print(response.text) + + response_json = response.json() + response_rest = response_json["response"] + if len(response_rest) == 0: + LOG.warning("Entry {} not found".format(entry)) + + if len(response_rest["docs"]) > 1: + LOG.warning("Entry {} got {} hgnc responses - using first".format(entry,len(response_rest))) + + symbol_from_id = response_rest['docs'][0]['symbol'] + + if symbol_from_id == locus_symbol : + LOG.info("OK locus %s symbol %s", entry, locus_symbol) + elif symbol_from_id.lower() == locus_symbol.lower(): + LOG.warning("OK locus %s symbol %s but differs in case", entry, locus_symbol) + else: + LOG.error("OOOPS locus_symbol %s and symbol %s from HGNC id %i do not match", locus_symbol, symbol_from_id, hgnc_id) + + +if __name__=='__main__': + cli() diff --git a/scripts/compare_locus_values_json.py b/scripts/compare_locus_values_json.py new file mode 100644 index 0000000..05df8f3 --- /dev/null +++ b/scripts/compare_locus_values_json.py @@ -0,0 +1,55 @@ +import logging +import coloredlogs +import requests + +LOG = logging.getLogger(__name__) +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + +import click + +from stranger.resources import repeats_path +from stranger.utils import parse_repeat_file, get_repeat_info + +@click.command() +@click.option('-f', '--repeats-file', + type = click.Path(exists=True), + help="Path to a file with repeat definitions. See README for explanation", + default=repeats_path, + show_default=True, +) +@click.option('-x', '--alt-repeats-file', + type = click.Path(exists=True), + help="Path to a second file with repeat definitions. See README for explanation", + default=repeats_path, + show_default=True, +) +@click.option('--loglevel', default='INFO', type=click.Choice(LOG_LEVELS), + help="Set the level of log output.", show_default=True) +@click.pass_context +def cli(context, repeats_file, alt_repeats_file, loglevel): + """Test if values differ between loci for variant catalog jsons""" + coloredlogs.install(level=loglevel) + with open(repeats_file, 'r') as file_handle: + repeat_information = parse_repeat_file(file_handle, repeats_file_type='json') + + with open(alt_repeats_file, 'r') as file_handle: + other_repeat_information = parse_repeat_file(file_handle, repeats_file_type='json') + + if not repeat_information or not other_repeat_information: + LOG.warning("Could not find any repeat info") + context.abort() + + for entry in repeat_information: + for key in repeat_information[entry]: + if entry not in other_repeat_information: + LOG.info("Entry %s not found in alt file.", entry) + continue + if key not in other_repeat_information[entry]: + LOG.warning("Entry %s field %s missing in alt file entry.", entry, key) + continue + if other_repeat_information[entry][key] != repeat_information[entry][key]: + LOG.error("Entry %s field %s differs between file: %s and alt: %s",entry, key, repeat_information[entry][key], other_repeat_information[entry][key]) + + +if __name__=='__main__': + cli() diff --git a/stranger/resources/variant_catalog_grch37.json b/stranger/resources/variant_catalog_grch37.json index 2b4defc..16b4e50 100755 --- a/stranger/resources/variant_catalog_grch37.json +++ b/stranger/resources/variant_catalog_grch37.json @@ -43,7 +43,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "X:25031767-25031814", + "ReferenceRegion": "X:25031766-25031814", "Disease": "EIEE", "NormalMax": 16, "PathologicMin": 17 @@ -58,7 +58,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "X:25031647-25031682", + "ReferenceRegion": "X:25031646-25031682", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20 @@ -288,14 +288,14 @@ { "VariantType": "Repeat", "LocusId": "BEAN1", - "HGNCId": 1541, + "HGNCId": 24160, "InheritanceMode":"AD" , "DisplayRU": "TGGAA", "SourceDisplay": "Sato et al AJHG 2009", "Source": "PubMed", "SourceId": "19878914", "LocusStructure": "(TGGAA)*TAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAA", - "ReferenceRegion": "16:66524302-66524356", + "ReferenceRegion": "16:66524301-66524356", "Disease": "SCA31", "NormalMax": 10, "PathologicMin": 40 @@ -351,17 +351,17 @@ { "VariantType": "Repeat", "LocusId": "DAB1", - "HGNCId": 2482, + "HGNCId": 2661, "InheritanceMode": "AD" , "DisplayRU": "ATTTC", "SourceDisplay": "GeneReviews Internet 2019-05-30", "Source": "GeneReviews", "SourceId": "NBK541729", - "LocusStructure": "(ATTTT)*(ATTTC)*(ATTTT)*", + "LocusStructure": "(AAAAT)*(GAAAT)*(AAAAT)*", "ReferenceRegion": [ - "1:57832716-57832725", - "1:57832726-57832780", - "1:57832781-57832790" + "1:57832715-57832725", + "1:57832725-57832780", + "1:57832780-57832790" ], "VariantType": [ "Repeat", @@ -373,7 +373,7 @@ "DAB1_ATTTC", "DAB1_ATTTT2" ], - "PathologicRegion": "1:57832726-57832780", + "PathologicRegion": "1:57832725-57832780", "Disease": "SCA37", "NormalMax": 16, "PathologicMin": 31 @@ -445,9 +445,9 @@ "HGNCId": 3775, "InheritanceMode": "XR", "DisplayRU": "CGG", - "SourceDisplay": "GeneReviews Internet 2019-11-07", + "SourceDisplay": "GeneReviews Internet 2019-11-21", "Source": "GeneReviews", - "SourceId": "NBK535148", + "SourceId": "NBK1384", "SweGenMean": 30.77, "SweGenStd": 8.821, "LocusStructure": "(CGG)*", @@ -605,9 +605,9 @@ "HTT_CCG" ], "Disease": "Huntington", - "NormalMax": 36, - "PathologicMin": 40, - "PathologicRegion": "4:3076666-3076693" + "NormalMax": 26, + "PathologicMin": 36, + "PathologicRegion": "4:3076603-3076660" }, { "VariantType": "Repeat", @@ -690,14 +690,14 @@ "HGNCId": 8565, "InheritanceMode": "AD", "DisplayRU": "GCN", - "SourceDisplay": "GeneReviews Internet 2014-02-20", + "SourceDisplay": "GeneReviews Internet 2020-10-22", "Source": "GeneReviews", "SourceId": "NBK1126", "LocusStructure": "(GCN)*", "ReferenceRegion": "14:23790681-23790711", "Disease": "OPMD", "NormalMax": 10, - "PathologicMin": 12 + "PathologicMin": 11 }, { "VariantType": "Repeat", @@ -712,7 +712,7 @@ "SweGenMean": 26.33, "SweGenStd": 11.88, "LocusStructure": "(GCN)*", - "ReferenceRegion": "4:41747989-41748049", + "ReferenceRegion": "4:41747988-41748049", "NormalMax": 20, "PathologicMin": 25 }, @@ -818,7 +818,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "X:139586482-139586526", + "ReferenceRegion": "X:139586481-139586526", "Disease": "MRGH", "NormalMax": 15, "PathologicMin": 22 @@ -832,8 +832,8 @@ "SourceDisplay": "GeneReviews Internet 2019-09-12", "Source": "GeneReviews", "SourceId": "NBK1438", - "LocusStructure": "(CAN)*", - "ReferenceRegion": "6:170870996-170871109", + "LocusStructure": "(GCA)*", + "ReferenceRegion": "6:170870994-170871109", "Disease": "SCA17", "SweGenMean": 36.24, "SweGenStd": 2.293, @@ -850,7 +850,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(GCN)*", - "ReferenceRegion": "22:19754286-19754330", + "ReferenceRegion": "22:19754285-19754330", "Disease": "TOF", "NormalMax": 15, "PathologicMin": 25 @@ -952,12 +952,12 @@ "LocusId": "NIPA1", "HGNCId": 17043, "InheritanceMode": "AD", - "DisplayRU": "GCN", + "DisplayRU": "GCG", "SourceDisplay": "Tazelaar et al (2019) Neurobiol Aging 74 234.e9-234.e15", "Source": "PubMed", "SourceId": "30342764", - "LocusStructure": "(NGC)*", - "ReferenceRegion": "15:23086364-23086402", + "LocusStructure": "(CGC)*", + "ReferenceRegion": "15:23086366-23086390", "Disease": "ALS - susceptibility to", "NormalMax": 8, "PathologicMin": 10000 diff --git a/stranger/resources/variant_catalog_grch38.json b/stranger/resources/variant_catalog_grch38.json index f4d120e..6da735e 100644 --- a/stranger/resources/variant_catalog_grch38.json +++ b/stranger/resources/variant_catalog_grch38.json @@ -39,7 +39,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "X:25013650-25013697", + "ReferenceRegion": "X:25013649-25013697", "Disease": "EIEE", "NormalMax": 16, "PathologicMin": 17 @@ -54,7 +54,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "X:25013530-25013565", + "ReferenceRegion": "X:25013529-25013565", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20 @@ -71,8 +71,8 @@ "ReferenceRegion": "12:6936716-6936773", "VariantType": "Repeat", "Disease": "DRPLA", - "NormalMax": 34, - "PathologicMin": 49 + "NormalMax": 35, + "PathologicMin": 48 }, { "LocusId": "ATXN10", @@ -158,7 +158,7 @@ "Disease": "SCA7", "NormalMax": 19, "PathologicMin": 36, - "PathologicRegion": "3:63898360-63898390" + "PathologicRegion": "3:63912684-63912714" }, { "LocusId": "ATXN8OS", @@ -184,7 +184,7 @@ "Disease": "SCA8", "NormalMax": 50, "PathologicMin": 80, - "PathologicRegion": "13:70713515-70713560" + "PathologicRegion": "13:70139383-70139428" }, { "LocusId": "C9ORF72", @@ -311,14 +311,14 @@ { "VariantType": "Repeat", "LocusId": "BEAN1", - "HGNCId": 1541, + "HGNCId": 24160, "InheritanceMode":"AD" , "DisplayRU": "TGGAA", "SourceDisplay": "Sato et al AJHG 2009", "Source": "PubMed", "SourceId": "19878914", "LocusStructure": "(TGGAA)*TAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAA", - "ReferenceRegion": "16:66490399-66490453", + "ReferenceRegion": "16:66490398-66490453", "Disease": "SCA31", "NormalMax": 10, "PathologicMin": 40 @@ -347,7 +347,7 @@ "Repeat", "Repeat" ], - "PathologicRegion": "3:128891419-128891499", + "PathologicRegion": "3:129172576-129172656", "Disease": "DM2", "NormalMax": 30, "PathologicMin": 75 @@ -370,17 +370,17 @@ { "VariantType": "Repeat", "LocusId": "DAB1", - "HGNCId": 2482, + "HGNCId": 2661, "InheritanceMode": "AD" , "DisplayRU": "ATTTC", "SourceDisplay": "GeneReviews Internet 2019-05-30", "Source": "GeneReviews", "SourceId": "NBK541729", - "LocusStructure": "(ATTTT)*(ATTTC)*(ATTTT)*", + "LocusStructure": "(AAAAT)*(GAAAT)*(AAAAT)*", "ReferenceRegion": [ - "1:57367044-57367053", - "1:57367054-57367108", - "1:57367109-57367118" + "1:57367043-57367053", + "1:57367053-57367108", + "1:57367108-57367118" ], "VariantType": [ "Repeat", @@ -392,7 +392,7 @@ "DAB1_ATTTC", "DAB1_ATTTT2" ], - "PathologicRegion": "1:57367054-57367108", + "PathologicRegion": "1:57367053-57367108", "Disease": "SCA37", "NormalMax": 16, "PathologicMin": 31 @@ -426,7 +426,7 @@ "ReferenceRegion": "19:45770204-45770264", "VariantType": "Repeat", "Disease": "DM1", - "NormalMax": 37, + "NormalMax": 34, "PathologicMin": 50 }, { @@ -461,9 +461,9 @@ "HGNCId": 3775, "InheritanceMode": "XR", "DisplayRU": "CGG", - "SourceDisplay": "GeneReviews Internet 2019-11-07", + "SourceDisplay": "GeneReviews Internet 2019-11-21", "Source": "GeneReviews", - "SourceId": "NBK535148", + "SourceId": "NBK1384", "LocusStructure": "(CGG)*", "OfftargetRegions": [ "1:3068800-3069410", @@ -684,7 +684,7 @@ "ReferenceRegion": "X:147912050-147912110", "VariantType": "RareRepeat", "Disease": "FragileX", - "NormalMax": 65, + "NormalMax": 55, "PathologicMin": 200 }, { @@ -726,7 +726,7 @@ "Disease": "FRDA", "NormalMax": 35, "PathologicMin": 51, - "PathologicRegion": "9:71652202-71652220" + "PathologicRegion": "9:69037286-69037304" }, { "VariantType": "Repeat", @@ -812,7 +812,7 @@ "Disease": "Huntington", "NormalMax": 36, "PathologicMin": 40, - "PathologicRegion": "4:3076666-3076693" + "PathologicRegion": "4:3074876-3074933" }, { "LocusId": "JPH3", @@ -880,7 +880,7 @@ "Repeat", "Repeat" ], - "PathologicRegion": "20:2633379-2633403", + "PathologicRegion": "20:2652733-2652757", "Disease": "SCA36", "NormalMax": 14, "PathologicMin": 650 @@ -906,15 +906,14 @@ "HGNCId": 8565, "InheritanceMode": "AD", "DisplayRU": "GCN", - "SourceDisplay": "GeneReviews Internet 2014-02-20", "Source": "GeneReviews", + "SourceDisplay": "GeneReviews Internet 2020-10-22", "SourceId": "NBK1126", "LocusStructure": "(GCN)*", "ReferenceRegion": "14:23321472-23321502", "Disease": "OPMD", "NormalMax": 10, - "PathologicMin": 12, - "Source": "GeneReviews Internet 2014-02-20" + "PathologicMin": 11 }, { "LocusId": "PHOX2B", @@ -926,7 +925,7 @@ "SourceId": "NBK1427", "Disease": "CCHS", "LocusStructure": "(GCN)*", - "ReferenceRegion": "4:41745972-41746032", + "ReferenceRegion": "4:41745971-41746032", "VariantType": "Repeat", "NormalMax": 20, "PathologicMin": 25, @@ -1033,7 +1032,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "X:140504317-140504361", + "ReferenceRegion": "X:140504316-140504361", "Disease": "MRGH", "NormalMax": 15, "PathologicMin": 22 @@ -1050,7 +1049,7 @@ "ReferenceRegion": "6:170561906-170562017", "VariantType": "Repeat", "Disease": "SCA17", - "NormalMax": 44, + "NormalMax": 40, "PathologicMin": 49 }, { @@ -1164,11 +1163,11 @@ "LocusId": "NIPA1", "HGNCId": 17043, "InheritanceMode": "AD", - "DisplayRU": "GCN", + "DisplayRU": "GCG", "SourceDisplay": "Tazelaar et al (2019) Neurobiol Aging 74 234.e9-234.e15", "Source": "PubMed", "SourceId": "30342764", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGC)*", "ReferenceRegion": "15:22786677-22786701", "Disease": "ALS - susceptibility to", "NormalMax": 8, diff --git a/stranger/resources/variant_catalog_hg19.json b/stranger/resources/variant_catalog_hg19.json index 47316aa..4a20ebe 100755 --- a/stranger/resources/variant_catalog_hg19.json +++ b/stranger/resources/variant_catalog_hg19.json @@ -43,7 +43,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "chrX:25031767-25031814", + "ReferenceRegion": "chrX:25031766-25031814", "Disease": "EIEE", "NormalMax": 16, "PathologicMin": 17 @@ -58,7 +58,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "chrX:25031647-25031682", + "ReferenceRegion": "chrX:25031646-25031682", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20 @@ -288,14 +288,14 @@ { "VariantType": "Repeat", "LocusId": "BEAN1", - "HGNCId": 1541, + "HGNCId": 24160, "InheritanceMode":"AD" , "DisplayRU": "TGGAA", "SourceDisplay": "Sato et al AJHG 2009", "Source": "PubMed", "SourceId": "19878914", "LocusStructure": "(TGGAA)*TAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAA", - "ReferenceRegion": "chr16:66524302-66524356", + "ReferenceRegion": "chr16:66524301-66524356", "Disease": "SCA31", "NormalMax": 10, "PathologicMin": 40 @@ -351,17 +351,17 @@ { "VariantType": "Repeat", "LocusId": "DAB1", - "HGNCId": 2482, + "HGNCId": 2661, "InheritanceMode": "AD" , "DisplayRU": "ATTTC", "SourceDisplay": "GeneReviews Internet 2019-05-30", "Source": "GeneReviews", "SourceId": "NBK541729", - "LocusStructure": "(ATTTT)*(ATTTC)*(ATTTT)*", + "LocusStructure": "(AAAAT)*(GAAAT)*(AAAAT)*", "ReferenceRegion": [ - "chr1:57832716-57832725", - "chr1:57832726-57832780", - "chr1:57832781-57832790" + "chr1:57832715-57832725", + "chr1:57832725-57832780", + "chr1:57832780-57832790" ], "VariantType": [ "Repeat", @@ -373,7 +373,7 @@ "DAB1_ATTTC", "DAB1_ATTTT2" ], - "PathologicRegion": "chr1:57832726-57832780", + "PathologicRegion": "chr1:57832725-57832780", "Disease": "SCA37", "NormalMax": 16, "PathologicMin": 31 @@ -445,9 +445,9 @@ "HGNCId": 3775, "InheritanceMode": "XR", "DisplayRU": "CGG", - "SourceDisplay": "GeneReviews Internet 2019-11-07", + "SourceDisplay": "GeneReviews Internet 2019-11-21", "Source": "GeneReviews", - "SourceId": "NBK535148", + "SourceId": "NBK1384", "SweGenMean": 30.77, "SweGenStd": 8.821, "LocusStructure": "(CGG)*", @@ -605,9 +605,9 @@ "HTT_CCG" ], "Disease": "Huntington", - "NormalMax": 36, - "PathologicMin": 40, - "PathologicRegion": "chr4:3076666-3076693" + "NormalMax": 26, + "PathologicMin": 36, + "PathologicRegion": "chr4:3076603-3076660" }, { "VariantType": "Repeat", @@ -690,14 +690,14 @@ "HGNCId": 8565, "InheritanceMode": "AD", "DisplayRU": "GCN", - "SourceDisplay": "GeneReviews Internet 2014-02-20", + "SourceDisplay": "GeneReviews Internet 2020-10-22", "Source": "GeneReviews", "SourceId": "NBK1126", "LocusStructure": "(GCN)*", "ReferenceRegion": "chr14:23790681-23790711", "Disease": "OPMD", "NormalMax": 10, - "PathologicMin": 12 + "PathologicMin": 11 }, { "VariantType": "Repeat", @@ -712,7 +712,7 @@ "SweGenMean": 26.33, "SweGenStd": 11.88, "LocusStructure": "(GCN)*", - "ReferenceRegion": "chr4:41747989-41748049", + "ReferenceRegion": "chr4:41747988-41748049", "NormalMax": 20, "PathologicMin": 25 }, @@ -818,7 +818,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "chrX:139586482-139586526", + "ReferenceRegion": "chrX:139586481-139586526", "Disease": "MRGH", "NormalMax": 15, "PathologicMin": 22 @@ -832,8 +832,8 @@ "SourceDisplay": "GeneReviews Internet 2019-09-12", "Source": "GeneReviews", "SourceId": "NBK1438", - "LocusStructure": "(CAN)*", - "ReferenceRegion": "chr6:170870996-170871109", + "LocusStructure": "(GCA)*", + "ReferenceRegion": "chr6:170870994-170871109", "Disease": "SCA17", "SweGenMean": 36.24, "SweGenStd": 2.293, @@ -952,12 +952,12 @@ "LocusId": "NIPA1", "HGNCId": 17043, "InheritanceMode": "AD", - "DisplayRU": "GCN", + "DisplayRU": "GCG", "SourceDisplay": "Tazelaar et al (2019) Neurobiol Aging 74 234.e9-234.e15", "Source": "PubMed", "SourceId": "30342764", - "LocusStructure": "(NGC)*", - "ReferenceRegion": "chr15:23086364-23086402", + "LocusStructure": "(CGC)*", + "ReferenceRegion": "chr15:23086366-23086390", "Disease": "ALS - susceptibility to", "NormalMax": 8, "PathologicMin": 10000 diff --git a/stranger/resources/variant_catalog_hg38.json b/stranger/resources/variant_catalog_hg38.json index 26e7abf..4c05404 100644 --- a/stranger/resources/variant_catalog_hg38.json +++ b/stranger/resources/variant_catalog_hg38.json @@ -39,7 +39,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "chrX:25013650-25013697", + "ReferenceRegion": "chrX:25013649-25013697", "Disease": "EIEE", "NormalMax": 16, "PathologicMin": 17 @@ -54,7 +54,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "chrX:25013530-25013565", + "ReferenceRegion": "chrX:25013529-25013565", "Disease": "PRTS", "NormalMax": 12, "PathologicMin": 20 @@ -71,8 +71,8 @@ "ReferenceRegion": "chr12:6936716-6936773", "VariantType": "Repeat", "Disease": "DRPLA", - "NormalMax": 34, - "PathologicMin": 49 + "NormalMax": 35, + "PathologicMin": 48 }, { "LocusId": "ATXN10", @@ -158,7 +158,7 @@ "Disease": "SCA7", "NormalMax": 19, "PathologicMin": 36, - "PathologicRegion": "chr3:63898360-63898390" + "PathologicRegion": "chr3:63912684-63912714" }, { "LocusId": "ATXN8OS", @@ -184,7 +184,7 @@ "Disease": "SCA8", "NormalMax": 50, "PathologicMin": 80, - "PathologicRegion": "chr13:70713515-70713560" + "PathologicRegion": "chr13:70139383-70139428" }, { "LocusId": "C9ORF72", @@ -311,14 +311,14 @@ { "VariantType": "Repeat", "LocusId": "BEAN1", - "HGNCId": 1541, + "HGNCId": 24160, "InheritanceMode":"AD" , "DisplayRU": "TGGAA", "SourceDisplay": "Sato et al AJHG 2009", "Source": "PubMed", "SourceId": "19878914", "LocusStructure": "(TGGAA)*TAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAA", - "ReferenceRegion": "chr16:66490399-66490453", + "ReferenceRegion": "chr16:66490398-66490453", "Disease": "SCA31", "NormalMax": 10, "PathologicMin": 40 @@ -347,7 +347,7 @@ "Repeat", "Repeat" ], - "PathologicRegion": "chr3:128891419-128891499", + "PathologicRegion": "chr3:129172576-129172656", "Disease": "DM2", "NormalMax": 30, "PathologicMin": 75 @@ -370,17 +370,17 @@ { "VariantType": "Repeat", "LocusId": "DAB1", - "HGNCId": 2482, + "HGNCId": 2661, "InheritanceMode": "AD" , "DisplayRU": "ATTTC", "SourceDisplay": "GeneReviews Internet 2019-05-30", "Source": "GeneReviews", "SourceId": "NBK541729", - "LocusStructure": "(ATTTT)*(ATTTC)*(ATTTT)*", + "LocusStructure": "(AAAAT)*(GAAAT)*(AAAAT)*", "ReferenceRegion": [ - "chr1:57367044-57367053", - "chr1:57367054-57367108", - "chr1:57367109-57367118" + "chr1:57367043-57367053", + "chr1:57367053-57367108", + "chr1:57367108-57367118" ], "VariantType": [ "Repeat", @@ -392,7 +392,7 @@ "DAB1_ATTTC", "DAB1_ATTTT2" ], - "PathologicRegion": "chr1:57367054-57367108", + "PathologicRegion": "chr1:57367053-57367108", "Disease": "SCA37", "NormalMax": 16, "PathologicMin": 31 @@ -426,7 +426,7 @@ "ReferenceRegion": "chr19:45770204-45770264", "VariantType": "Repeat", "Disease": "DM1", - "NormalMax": 37, + "NormalMax": 34, "PathologicMin": 50 }, { @@ -461,9 +461,9 @@ "HGNCId": 3775, "InheritanceMode": "XR", "DisplayRU": "CGG", - "SourceDisplay": "GeneReviews Internet 2019-11-07", + "SourceDisplay": "GeneReviews Internet 2019-11-21", "Source": "GeneReviews", - "SourceId": "NBK535148", + "SourceId": "NBK1384", "LocusStructure": "(CGG)*", "OfftargetRegions": [ "chr1:3068800-3069410", @@ -684,7 +684,7 @@ "ReferenceRegion": "chrX:147912050-147912110", "VariantType": "RareRepeat", "Disease": "FragileX", - "NormalMax": 65, + "NormalMax": 55, "PathologicMin": 200 }, { @@ -726,7 +726,7 @@ "Disease": "FRDA", "NormalMax": 35, "PathologicMin": 51, - "PathologicRegion": "chr9:71652202-71652220" + "PathologicRegion": "chr9:69037286-69037304" }, { "VariantType": "Repeat", @@ -810,9 +810,9 @@ "Repeat" ], "Disease": "Huntington", - "NormalMax": 36, - "PathologicMin": 40, - "PathologicRegion": "chr4:3076666-3076693" + "NormalMax": 26, + "PathologicMin": 36, + "PathologicRegion": "chr4:3074876-3074933" }, { "LocusId": "JPH3", @@ -880,7 +880,7 @@ "Repeat", "Repeat" ], - "PathologicRegion": "chr20:2633379-2633403", + "PathologicRegion": "chr20:2652733-2652757", "Disease": "SCA36", "NormalMax": 14, "PathologicMin": 650 @@ -906,15 +906,14 @@ "HGNCId": 8565, "InheritanceMode": "AD", "DisplayRU": "GCN", - "SourceDisplay": "GeneReviews Internet 2014-02-20", + "SourceDisplay": "GeneReviews Internet 2020-10-22", "Source": "GeneReviews", "SourceId": "NBK1126", "LocusStructure": "(GCN)*", "ReferenceRegion": "chr14:23321472-23321502", "Disease": "OPMD", "NormalMax": 10, - "PathologicMin": 12, - "Source": "GeneReviews Internet 2014-02-20" + "PathologicMin": 11 }, { "LocusId": "PHOX2B", @@ -926,7 +925,7 @@ "SourceId": "NBK1427", "Disease": "CCHS", "LocusStructure": "(GCN)*", - "ReferenceRegion": "chr4:41745972-41746032", + "ReferenceRegion": "chr4:41745971-41746032", "VariantType": "Repeat", "NormalMax": 20, "PathologicMin": 25, @@ -1033,7 +1032,7 @@ "Source": "GeneReviews", "SourceId": "NBK535148", "LocusStructure": "(NGC)*", - "ReferenceRegion": "chrX:140504317-140504361", + "ReferenceRegion": "chrX:140504316-140504361", "Disease": "MRGH", "NormalMax": 15, "PathologicMin": 22 @@ -1050,7 +1049,7 @@ "ReferenceRegion": "chr6:170561906-170562017", "VariantType": "Repeat", "Disease": "SCA17", - "NormalMax": 44, + "NormalMax": 40, "PathologicMin": 49 }, { @@ -1164,11 +1163,11 @@ "LocusId": "NIPA1", "HGNCId": 17043, "InheritanceMode": "AD", - "DisplayRU": "GCN", + "DisplayRU": "GCG", "SourceDisplay": "Tazelaar et al (2019) Neurobiol Aging 74 234.e9-234.e15", "Source": "PubMed", "SourceId": "30342764", - "LocusStructure": "(GCG)*", + "LocusStructure": "(CGC)*", "ReferenceRegion": "chr15:22786677-22786701", "Disease": "ALS - susceptibility to", "NormalMax": 8, diff --git a/stranger/utils.py b/stranger/utils.py index 983b822..f519269 100644 --- a/stranger/utils.py +++ b/stranger/utils.py @@ -159,7 +159,7 @@ def get_repeat_info(variant_info, repeat_info): repeat_strings.append('normal') if rank_score < RANK_SCORE['normal']: rank_score = RANK_SCORE['normal'] - elif repeat_number <= rep_upper: + elif repeat_number < rep_upper: repeat_strings.append('pre_mutation') if rank_score < RANK_SCORE['pre_mutation']: rank_score = RANK_SCORE['pre_mutation']