Skip to content

Commit

Permalink
Merge pull request #38 from moonso/fix_obis
Browse files Browse the repository at this point in the history
Fix #37, Fix #36 and some additional potential obis.
  • Loading branch information
dnil authored Jun 4, 2021
2 parents 5b437be + 58271fa commit 2f0cc55
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 112 deletions.
68 changes: 68 additions & 0 deletions scripts/check_hgnc_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import logging
import coloredlogs
import requests

LOG = logging.getLogger(__name__)
LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']

import click

from stranger.resources import repeats_path
from stranger.utils import parse_repeat_file, get_repeat_info

@click.command()
@click.option('-f', '--repeats-file',
type = click.Path(exists=True),
help="Path to a file with repeat definitions. See README for explanation",
default=repeats_path,
show_default=True,
)
@click.option('--loglevel', default='INFO', type=click.Choice(LOG_LEVELS),
help="Set the level of log output.", show_default=True)
@click.pass_context
def cli(context, repeats_file, loglevel):
"""Table print repeat info"""
coloredlogs.install(level=loglevel)
with open(repeats_file, 'r') as file_handle:
repeat_information = parse_repeat_file(file_handle, repeats_file_type='json')


if not repeat_information:
LOG.warning("Could not find any repeat info")
context.abort()

# print(repeat_information)

# header = ["HGNCId", "LocusId", "DisplayRU", "InheritanceMode", "normal_max", "pathologic_min", "Disease", "SourceDisplay", "SourceId"]

for entry in repeat_information:
hgnc_id = repeat_information[entry]["HGNCId"]
locus_symbol = entry.split('_')[0]

url = "https://rest.genenames.org/search/hgnc_id/" + str(hgnc_id)
response = requests.get(url, headers= {"Accept":"application/json"})

if not response:
LOG.warning("Entry {} not found".format(entry))
# print(response.text)

response_json = response.json()
response_rest = response_json["response"]
if len(response_rest) == 0:
LOG.warning("Entry {} not found".format(entry))

if len(response_rest["docs"]) > 1:
LOG.warning("Entry {} got {} hgnc responses - using first".format(entry,len(response_rest)))

symbol_from_id = response_rest['docs'][0]['symbol']

if symbol_from_id == locus_symbol :
LOG.info("OK locus %s symbol %s", entry, locus_symbol)
elif symbol_from_id.lower() == locus_symbol.lower():
LOG.warning("OK locus %s symbol %s but differs in case", entry, locus_symbol)
else:
LOG.error("OOOPS locus_symbol %s and symbol %s from HGNC id %i do not match", locus_symbol, symbol_from_id, hgnc_id)


if __name__=='__main__':
cli()
55 changes: 55 additions & 0 deletions scripts/compare_locus_values_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import logging
import coloredlogs
import requests

LOG = logging.getLogger(__name__)
LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']

import click

from stranger.resources import repeats_path
from stranger.utils import parse_repeat_file, get_repeat_info

@click.command()
@click.option('-f', '--repeats-file',
type = click.Path(exists=True),
help="Path to a file with repeat definitions. See README for explanation",
default=repeats_path,
show_default=True,
)
@click.option('-x', '--alt-repeats-file',
type = click.Path(exists=True),
help="Path to a second file with repeat definitions. See README for explanation",
default=repeats_path,
show_default=True,
)
@click.option('--loglevel', default='INFO', type=click.Choice(LOG_LEVELS),
help="Set the level of log output.", show_default=True)
@click.pass_context
def cli(context, repeats_file, alt_repeats_file, loglevel):
"""Test if values differ between loci for variant catalog jsons"""
coloredlogs.install(level=loglevel)
with open(repeats_file, 'r') as file_handle:
repeat_information = parse_repeat_file(file_handle, repeats_file_type='json')

with open(alt_repeats_file, 'r') as file_handle:
other_repeat_information = parse_repeat_file(file_handle, repeats_file_type='json')

if not repeat_information or not other_repeat_information:
LOG.warning("Could not find any repeat info")
context.abort()

for entry in repeat_information:
for key in repeat_information[entry]:
if entry not in other_repeat_information:
LOG.info("Entry %s not found in alt file.", entry)
continue
if key not in other_repeat_information[entry]:
LOG.warning("Entry %s field %s missing in alt file entry.", entry, key)
continue
if other_repeat_information[entry][key] != repeat_information[entry][key]:
LOG.error("Entry %s field %s differs between file: %s and alt: %s",entry, key, repeat_information[entry][key], other_repeat_information[entry][key])


if __name__=='__main__':
cli()
50 changes: 25 additions & 25 deletions stranger/resources/variant_catalog_grch37.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"Source": "GeneReviews",
"SourceId": "NBK535148",
"LocusStructure": "(NGC)*",
"ReferenceRegion": "X:25031767-25031814",
"ReferenceRegion": "X:25031766-25031814",
"Disease": "EIEE",
"NormalMax": 16,
"PathologicMin": 17
Expand All @@ -58,7 +58,7 @@
"Source": "GeneReviews",
"SourceId": "NBK535148",
"LocusStructure": "(NGC)*",
"ReferenceRegion": "X:25031647-25031682",
"ReferenceRegion": "X:25031646-25031682",
"Disease": "PRTS",
"NormalMax": 12,
"PathologicMin": 20
Expand Down Expand Up @@ -288,14 +288,14 @@
{
"VariantType": "Repeat",
"LocusId": "BEAN1",
"HGNCId": 1541,
"HGNCId": 24160,
"InheritanceMode":"AD" ,
"DisplayRU": "TGGAA",
"SourceDisplay": "Sato et al AJHG 2009",
"Source": "PubMed",
"SourceId": "19878914",
"LocusStructure": "(TGGAA)*TAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAATAAAA",
"ReferenceRegion": "16:66524302-66524356",
"ReferenceRegion": "16:66524301-66524356",
"Disease": "SCA31",
"NormalMax": 10,
"PathologicMin": 40
Expand Down Expand Up @@ -351,17 +351,17 @@
{
"VariantType": "Repeat",
"LocusId": "DAB1",
"HGNCId": 2482,
"HGNCId": 2661,
"InheritanceMode": "AD" ,
"DisplayRU": "ATTTC",
"SourceDisplay": "GeneReviews Internet 2019-05-30",
"Source": "GeneReviews",
"SourceId": "NBK541729",
"LocusStructure": "(ATTTT)*(ATTTC)*(ATTTT)*",
"LocusStructure": "(AAAAT)*(GAAAT)*(AAAAT)*",
"ReferenceRegion": [
"1:57832716-57832725",
"1:57832726-57832780",
"1:57832781-57832790"
"1:57832715-57832725",
"1:57832725-57832780",
"1:57832780-57832790"
],
"VariantType": [
"Repeat",
Expand All @@ -373,7 +373,7 @@
"DAB1_ATTTC",
"DAB1_ATTTT2"
],
"PathologicRegion": "1:57832726-57832780",
"PathologicRegion": "1:57832725-57832780",
"Disease": "SCA37",
"NormalMax": 16,
"PathologicMin": 31
Expand Down Expand Up @@ -445,9 +445,9 @@
"HGNCId": 3775,
"InheritanceMode": "XR",
"DisplayRU": "CGG",
"SourceDisplay": "GeneReviews Internet 2019-11-07",
"SourceDisplay": "GeneReviews Internet 2019-11-21",
"Source": "GeneReviews",
"SourceId": "NBK535148",
"SourceId": "NBK1384",
"SweGenMean": 30.77,
"SweGenStd": 8.821,
"LocusStructure": "(CGG)*",
Expand Down Expand Up @@ -605,9 +605,9 @@
"HTT_CCG"
],
"Disease": "Huntington",
"NormalMax": 36,
"PathologicMin": 40,
"PathologicRegion": "4:3076666-3076693"
"NormalMax": 26,
"PathologicMin": 36,
"PathologicRegion": "4:3076603-3076660"
},
{
"VariantType": "Repeat",
Expand Down Expand Up @@ -690,14 +690,14 @@
"HGNCId": 8565,
"InheritanceMode": "AD",
"DisplayRU": "GCN",
"SourceDisplay": "GeneReviews Internet 2014-02-20",
"SourceDisplay": "GeneReviews Internet 2020-10-22",
"Source": "GeneReviews",
"SourceId": "NBK1126",
"LocusStructure": "(GCN)*",
"ReferenceRegion": "14:23790681-23790711",
"Disease": "OPMD",
"NormalMax": 10,
"PathologicMin": 12
"PathologicMin": 11
},
{
"VariantType": "Repeat",
Expand All @@ -712,7 +712,7 @@
"SweGenMean": 26.33,
"SweGenStd": 11.88,
"LocusStructure": "(GCN)*",
"ReferenceRegion": "4:41747989-41748049",
"ReferenceRegion": "4:41747988-41748049",
"NormalMax": 20,
"PathologicMin": 25
},
Expand Down Expand Up @@ -818,7 +818,7 @@
"Source": "GeneReviews",
"SourceId": "NBK535148",
"LocusStructure": "(NGC)*",
"ReferenceRegion": "X:139586482-139586526",
"ReferenceRegion": "X:139586481-139586526",
"Disease": "MRGH",
"NormalMax": 15,
"PathologicMin": 22
Expand All @@ -832,8 +832,8 @@
"SourceDisplay": "GeneReviews Internet 2019-09-12",
"Source": "GeneReviews",
"SourceId": "NBK1438",
"LocusStructure": "(CAN)*",
"ReferenceRegion": "6:170870996-170871109",
"LocusStructure": "(GCA)*",
"ReferenceRegion": "6:170870994-170871109",
"Disease": "SCA17",
"SweGenMean": 36.24,
"SweGenStd": 2.293,
Expand All @@ -850,7 +850,7 @@
"Source": "GeneReviews",
"SourceId": "NBK535148",
"LocusStructure": "(GCN)*",
"ReferenceRegion": "22:19754286-19754330",
"ReferenceRegion": "22:19754285-19754330",
"Disease": "TOF",
"NormalMax": 15,
"PathologicMin": 25
Expand Down Expand Up @@ -952,12 +952,12 @@
"LocusId": "NIPA1",
"HGNCId": 17043,
"InheritanceMode": "AD",
"DisplayRU": "GCN",
"DisplayRU": "GCG",
"SourceDisplay": "Tazelaar et al (2019) Neurobiol Aging 74 234.e9-234.e15",
"Source": "PubMed",
"SourceId": "30342764",
"LocusStructure": "(NGC)*",
"ReferenceRegion": "15:23086364-23086402",
"LocusStructure": "(CGC)*",
"ReferenceRegion": "15:23086366-23086390",
"Disease": "ALS - susceptibility to",
"NormalMax": 8,
"PathologicMin": 10000
Expand Down
Loading

0 comments on commit 2f0cc55

Please sign in to comment.