From b5beeb8baeb66f3eaafea9cfc55b35f52f619dd3 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Tue, 5 Nov 2024 14:56:24 -0600 Subject: [PATCH 01/10] chore(app): pin python version to 3.9.17 Both our CI steps that involve python use 3.9, and the existing pyproject.toml file also uses this version, pin this in tool versions so it exists as plaintext and so tools like asdf and mise can pick this up --- .tool-versions | 1 + 1 file changed, 1 insertion(+) diff --git a/.tool-versions b/.tool-versions index f43194737..330d31ed6 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,2 +1,3 @@ nodejs 18.17.1 pnpm 8.14.3 +python 3.9.17 From 0a41f879253e19f8ab78868b24e1f55bcc88a8a9 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Fri, 8 Nov 2024 14:58:32 -0600 Subject: [PATCH 02/10] chore(data-pipeline): bump Hail to enable usage of GRCh37 VEP helper As of several months ago, support for multi region buckets that supplied the VEP specific data for Hail's built in GRCh37 VEP helper became prohibitively expensive. The Hail team moved towards only supporting specific regions, further usage of this feature requires a bump to at least Hail 0.2.128 --- data-pipeline/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-pipeline/requirements.txt b/data-pipeline/requirements.txt index 208d80df2..a3f715e0d 100644 --- a/data-pipeline/requirements.txt +++ b/data-pipeline/requirements.txt @@ -1,5 +1,5 @@ elasticsearch~=7.17 -hail==0.2.127 +hail==0.2.128 tqdm loguru attrs From a3fce439c632a5e4363ae789e94ded17ee3fb8f2 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Tue, 5 Nov 2024 11:01:22 -0600 Subject: [PATCH 03/10] refactor(data-pipeline): split ClinVar xml to tsv parsing into a standalone function --- .../src/data_pipeline/datasets/clinvar.py | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/data-pipeline/src/data_pipeline/datasets/clinvar.py b/data-pipeline/src/data_pipeline/datasets/clinvar.py index d91278dc0..1ee18adfa 100644 --- a/data-pipeline/src/data_pipeline/datasets/clinvar.py +++ b/data-pipeline/src/data_pipeline/datasets/clinvar.py @@ -162,25 +162,22 @@ def _parse_variant(variant_element): return variant -def import_clinvar_xml(clinvar_xml_path): - release_date = None - - clinvar_xml_local_path = os.path.join("/tmp", os.path.basename(clinvar_xml_path)) - print("Copying ClinVar XML") - if not os.path.exists(clinvar_xml_local_path): - subprocess.check_call(["gsutil", "cp", clinvar_xml_path, clinvar_xml_local_path]) +def parse_clinvar_xml_to_tsv( + input_xml_path, + output_tsv_path, + parse_variant_function, +): + release_date = "" - print("Parsing XML file") - with open("/tmp/clinvar_variants.tsv", "w", newline="") as output_file: + with open(output_tsv_path, "w", newline="") as output_file: writer = csv.writer(output_file, delimiter="\t", quotechar="", quoting=csv.QUOTE_NONE) writer.writerow(["locus_GRCh37", "alleles_GRCh37", "locus_GRCh38", "alleles_GRCh38", "variant"]) - open_file = gzip.open if clinvar_xml_local_path.endswith(".gz") else open - with open_file(clinvar_xml_local_path, "r") as xml_file: + open_function = gzip.open if str(input_xml_path).endswith(".gz") else open + with open_function(input_xml_path, "r") as xml_file: # The exact number of variants in the XML file is unknown. # Approximate it to show a progress bar. progress = tqdm(total=1_100_000, mininterval=5) - xml = ElementTree.iterparse(xml_file, events=["end"]) for _, element in xml: if element.tag == "ClinVarVariationRelease": @@ -188,8 +185,7 @@ def import_clinvar_xml(clinvar_xml_path): if element.tag == "VariationArchive": try: - variant = _parse_variant(element) - + variant = parse_variant_function(element) locations = variant.pop("locations") writer.writerow( [ @@ -204,7 +200,6 @@ def import_clinvar_xml(clinvar_xml_path): json.dumps(variant), ] ) - progress.update(1) except SkipVariant: @@ -219,7 +214,24 @@ def import_clinvar_xml(clinvar_xml_path): # https://stackoverflow.com/questions/7697710/python-running-out-of-memory-parsing-xml-using-celementtree-iterparse element.clear() - progress.close() + progress.close() + + return release_date + + +def import_clinvar_xml(clinvar_xml_path): + release_date = None + + clinvar_xml_local_path = os.path.join("/tmp", os.path.basename(clinvar_xml_path)) + print("Copying ClinVar XML") + if not os.path.exists(clinvar_xml_local_path): + subprocess.check_call(["gsutil", "cp", clinvar_xml_path, clinvar_xml_local_path]) + + print("Parsing XML file") + output_file = "/tmp/clinvar_variants.tsv" + release_date = parse_clinvar_xml_to_tsv( + input_xml_path=clinvar_xml_local_path, output_tsv_path=output_file, parse_variant_function=_parse_variant + ) subprocess.check_call(["hdfs", "dfs", "-cp", "-f", "file:///tmp/clinvar_variants.tsv", "/tmp/clinvar_variants.tsv"]) From 12d339eeb6b3ab2d026a344a8b6f111981c1da81 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Tue, 5 Nov 2024 13:35:34 -0600 Subject: [PATCH 04/10] feat(data-pipeline): update ClinVar gold star dict --- .../src/data_pipeline/datasets/clinvar.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/data-pipeline/src/data_pipeline/datasets/clinvar.py b/data-pipeline/src/data_pipeline/datasets/clinvar.py index 1ee18adfa..1ba84e8c6 100644 --- a/data-pipeline/src/data_pipeline/datasets/clinvar.py +++ b/data-pipeline/src/data_pipeline/datasets/clinvar.py @@ -21,14 +21,19 @@ CLINVAR_GOLD_STARS = { + None: 0, + "no classification for the single variant": 0, + "no interpretation for the single variant": 0, + "no classification provided": 0, + "no assertion provided": 0, + "no classifications from unflagged records": 0, + "no assertion criteria provided": 0, + "criteria provided, single submitter": 1, + "criteria provided, conflicting classifications": 1, "criteria provided, conflicting interpretations": 1, "criteria provided, multiple submitters, no conflicts": 2, - "criteria provided, single submitter": 1, - "no assertion criteria provided": 0, - "no assertion provided": 0, - "no interpretation for the single variant": 0, - "practice guideline": 4, "reviewed by expert panel": 3, + "practice guideline": 4, } From 9fb255664bdaacb65ea6b4b2a7e03bcac5b3177e Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Tue, 5 Nov 2024 13:48:47 -0600 Subject: [PATCH 05/10] feat(data-pipeline): update ClinVar xml import to new release format ClinVar updated its release format for variants in XML with the addition of somatic variant classifications --- .../src/data_pipeline/datasets/clinvar.py | 192 ++++++++++-------- 1 file changed, 106 insertions(+), 86 deletions(-) diff --git a/data-pipeline/src/data_pipeline/datasets/clinvar.py b/data-pipeline/src/data_pipeline/datasets/clinvar.py index 1ba84e8c6..e588bfced 100644 --- a/data-pipeline/src/data_pipeline/datasets/clinvar.py +++ b/data-pipeline/src/data_pipeline/datasets/clinvar.py @@ -1,11 +1,11 @@ import csv -import datetime import gzip import json import os import subprocess import sys from xml.etree import ElementTree +from xml.sax.saxutils import quoteattr import hail as hl from tqdm import tqdm @@ -17,8 +17,7 @@ from data_pipeline.data_types.variant import variant_id -CLINVAR_XML_URL = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/weekly_release/ClinVarVariationRelease_00-latest_weekly.xml.gz" - +CLINVAR_XML_URL = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarVCVRelease_00-latest.xml.gz" CLINVAR_GOLD_STARS = { None: 0, @@ -37,77 +36,110 @@ } -class SkipVariant(Exception): - pass +def find_mapping_elements_by_xref(trait_element, submission_element, trait_mapping_list_element): + if trait_mapping_list_element is None: + return None + + xref_elements = trait_element.findall("XRef") + for xref_element in xref_elements: + selector = f"./TraitMapping[@ClinicalAssertionID='{submission_element.attrib['ID']}'][@TraitType='{trait_element.attrib['Type']}'][@MappingType='XRef'][@MappingValue={quoteattr(xref_element.attrib['ID'])}]" # noqa + mapping_element = trait_mapping_list_element.find(selector) + if mapping_element is not None: + return mapping_element + return None + + +def find_mapping_elements_by_preferred_name(trait_element, submission_element, trait_mapping_list_element): + preferred_name_element = trait_element.find("./Name/ElementValue[@Type='Preferred']") + + if preferred_name_element is not None and trait_mapping_list_element is not None: + selector = f"./TraitMapping[@ClinicalAssertionID='{submission_element.attrib['ID']}'][@TraitType='{trait_element.attrib['Type']}'][@MappingType='Name'][@MappingValue={quoteattr(preferred_name_element.text)}]" # noqa + mapping_element = trait_mapping_list_element.find(selector) + return mapping_element, preferred_name_element + + return None, preferred_name_element + + +def find_mapping_elements_by_name(trait_element, submission_element, trait_mapping_list_element): + name_elements = trait_element.findall("./Name/ElementValue") + preferred_name_element = None + + for name_element in name_elements: + if preferred_name_element is None: + preferred_name_element = name_element + + if trait_mapping_list_element is not None: + selector = f"./TraitMapping[@ClinicalAssertionID='{submission_element.attrib['ID']}'][@TraitType='{trait_element.attrib['Type']}'][@MappingType='Name'][@MappingValue={quoteattr(name_element.text)}]" # noqa + mapping_element = trait_mapping_list_element.find(selector) + if mapping_element: + return mapping_element, preferred_name_element + + return None, preferred_name_element + + +def _determine_mapping_and_preferred_name_element(trait_element, submission_element, trait_mapping_list_element): + preferred_name_element = None + mapping_element = find_mapping_elements_by_xref(trait_element, submission_element, trait_mapping_list_element) + + if mapping_element is None: + mapping_element, preferred_name_element = find_mapping_elements_by_preferred_name( + trait_element, submission_element, trait_mapping_list_element + ) + + if mapping_element is None: + mapping_element, preferred_name_element = find_mapping_elements_by_name( + trait_element, submission_element, trait_mapping_list_element + ) + + return (mapping_element, preferred_name_element) + + +def _associate_condition_with_medgen_id(submission_element, trait_mapping_list_element, trait_element): + (mapping_element, preferred_name_element) = _determine_mapping_and_preferred_name_element( + trait_element, submission_element, trait_mapping_list_element + ) + + if mapping_element is not None: + medgen_element = mapping_element.find("./MedGen") + return {"name": medgen_element.attrib["Name"], "medgen_id": medgen_element.attrib["CUI"]} + + elif preferred_name_element is not None: + return {"name": preferred_name_element.text, "medgen_id": None} def _parse_submission(submission_element, trait_mapping_list_element): submission = {} submission["id"] = submission_element.find("./ClinVarAccession").attrib["Accession"] - submission["submitter_name"] = submission_element.find("./ClinVarAccession").attrib["SubmitterName"] - submission["clinical_significance"] = None - interpretation_element = submission_element.find("./Interpretation") - interpretation_description_element = interpretation_element.find("./Description") - if interpretation_description_element is not None: - submission["clinical_significance"] = interpretation_description_element.text + classification_element = submission_element.find("./Classification") + germline_classification_element = classification_element.find("./GermlineClassification") + if germline_classification_element is not None: + submission["clinical_significance"] = germline_classification_element.text - submission["last_evaluated"] = interpretation_element.attrib.get("DateLastEvaluated", None) - submission["review_status"] = submission_element.find("./ReviewStatus").text + submission["last_evaluated"] = classification_element.attrib.get("DateLastEvaluated", None) + submission["review_status"] = classification_element.find("./ReviewStatus").text submission["conditions"] = [] trait_elements = submission_element.findall("./TraitSet/Trait") for trait_element in trait_elements: - preferred_name_element = None - mapping_element = None - - if trait_mapping_list_element is not None: - xref_elements = trait_element.findall("XRef") - for xref_element in xref_elements: - selector = f"./TraitMapping[@ClinicalAssertionID='{submission_element.attrib['ID']}'][@TraitType='{trait_element.attrib['Type']}'][@MappingType='XRef'][@MappingValue='{xref_element.attrib['ID']}']" # noqa - mapping_element = trait_mapping_list_element.find(selector) - if mapping_element is not None: - break - - if mapping_element is None: - preferred_name_element = trait_element.find("./Name/ElementValue[@Type='Preferred']") - if preferred_name_element is not None and trait_mapping_list_element is not None: - selector = f"./TraitMapping[@ClinicalAssertionID='{submission_element.attrib['ID']}'][@TraitType='{trait_element.attrib['Type']}'][@MappingType='Name'][@MappingValue=\"{preferred_name_element.text}\"]" # noqa - mapping_element = trait_mapping_list_element.find(selector) - - if mapping_element is None: - name_elements = trait_element.findall("./Name/ElementValue") - for name_element in name_elements: - if preferred_name_element is None: - preferred_name_element = name_element - - if trait_mapping_list_element is not None: - selector = f"./TraitMapping[@ClinicalAssertionID='{submission_element.attrib['ID']}'][@TraitType='{trait_element.attrib['Type']}'][@MappingType='Name'][@MappingValue=\"{name_element.text}\"]" # noqa - mapping_element = trait_mapping_list_element.find(selector) - if mapping_element: - break - - if mapping_element is not None: - medgen_element = mapping_element.find("./MedGen") - submission["conditions"].append( - {"name": medgen_element.attrib["Name"], "medgen_id": medgen_element.attrib["CUI"]} - ) - elif preferred_name_element is not None: - submission["conditions"].append({"name": preferred_name_element.text, "medgen_id": None}) + condition_medgen_mapping = _associate_condition_with_medgen_id( + submission_element, trait_mapping_list_element, trait_element + ) + submission["conditions"].append(condition_medgen_mapping) return submission -def _parse_variant(variant_element): +def _parse_variant(variant_element, tqdm_pbar=None): variant = {} - if variant_element.find("./InterpretedRecord") is None: - raise SkipVariant + if variant_element.find("./ClassifiedRecord") is None: + return None variant["locations"] = {} - location_elements = variant_element.findall("./InterpretedRecord/SimpleAllele/Location/SequenceLocation") + location_elements = variant_element.findall("./ClassifiedRecord/SimpleAllele/Location/SequenceLocation") for element in location_elements: try: chromosome = element.attrib["Chr"] @@ -115,11 +147,11 @@ def _parse_variant(variant_element): # which caused failure of this pipeline when compared to the reference genome if chromosome == "Un": variant["locations"] = {} - allele_element = variant_element.findall("./InterpretedRecord/SimpleAllele") - print( - f' Skipping variant with Allele ID: {allele_element[0].attrib["AlleleID"]} due to anomalous Chromosome value of "Un"' # noqa - ) + allele_element = variant_element.findall("./ClassifiedRecord/SimpleAllele") + if tqdm_pbar is not None: + tqdm_pbar.set_postfix_str(f'Skipped AlleleID: {allele_element[0].attrib["AlleleID"]} (Chr: Un)') break + variant["locations"][element.attrib["Assembly"]] = { "locus": chromosome + ":" + element.attrib["positionVCF"], "alleles": [element.attrib["referenceAlleleVCF"], element.attrib["alternateAlleleVCF"]], @@ -128,40 +160,27 @@ def _parse_variant(variant_element): pass if not variant["locations"]: - raise SkipVariant + return None variant["clinvar_variation_id"] = variant_element.attrib["VariationID"] variant["rsid"] = None - rsid_element = variant_element.find("./InterpretedRecord/SimpleAllele/XRefList/XRef[@DB='dbSNP']") + rsid_element = variant_element.find("./ClassifiedRecord/SimpleAllele/XRefList/XRef[@DB='dbSNP']") if rsid_element is not None: variant["rsid"] = rsid_element.attrib["ID"] - review_status_element = variant_element.find("./InterpretedRecord/ReviewStatus") - variant["review_status"] = review_status_element.text + germline_classification_element = variant_element.find("./ClassifiedRecord/Classifications/GermlineClassification") + if germline_classification_element is None: + return None + variant["review_status"] = germline_classification_element.find("./ReviewStatus").text variant["gold_stars"] = CLINVAR_GOLD_STARS[variant["review_status"]] - variant["clinical_significance"] = None - clinical_significance_elements = variant_element.findall( - "./InterpretedRecord/Interpretations/Interpretation[@Type='Clinical significance']" - ) - if clinical_significance_elements: - variant["clinical_significance"] = ", ".join( - el.find("Description").text for el in clinical_significance_elements - ) + variant["clinical_significance"] = germline_classification_element.find("./Description").text + + variant["last_evaluated"] = germline_classification_element.attrib.get("DateLastEvaluated") - variant["last_evaluated"] = None - evaluated_dates = [el.attrib.get("DateLastEvaluated", None) for el in clinical_significance_elements] - evaluated_dates = [date for date in evaluated_dates if date] - if evaluated_dates: - variant["last_evaluated"] = sorted( - evaluated_dates, - key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"), - reverse=True, - )[0] - - submission_elements = variant_element.findall("./InterpretedRecord/ClinicalAssertionList/ClinicalAssertion") - trait_mapping_list_element = variant_element.find("./InterpretedRecord/TraitMappingList") + submission_elements = variant_element.findall("./ClassifiedRecord/ClinicalAssertionList/ClinicalAssertion") + trait_mapping_list_element = variant_element.find("./ClassifiedRecord/TraitMappingList") variant["submissions"] = [_parse_submission(el, trait_mapping_list_element) for el in submission_elements] return variant @@ -182,7 +201,7 @@ def parse_clinvar_xml_to_tsv( with open_function(input_xml_path, "r") as xml_file: # The exact number of variants in the XML file is unknown. # Approximate it to show a progress bar. - progress = tqdm(total=1_100_000, mininterval=5) + progress = tqdm(total=3_100_000, mininterval=5) xml = ElementTree.iterparse(xml_file, events=["end"]) for _, element in xml: if element.tag == "ClinVarVariationRelease": @@ -191,6 +210,9 @@ def parse_clinvar_xml_to_tsv( if element.tag == "VariationArchive": try: variant = parse_variant_function(element) + if variant is None: + element.clear() + continue locations = variant.pop("locations") writer.writerow( [ @@ -207,8 +229,6 @@ def parse_clinvar_xml_to_tsv( ) progress.update(1) - except SkipVariant: - pass except Exception: print( f"Failed to parse variant {element.attrib['VariationID']}", @@ -219,9 +239,9 @@ def parse_clinvar_xml_to_tsv( # https://stackoverflow.com/questions/7697710/python-running-out-of-memory-parsing-xml-using-celementtree-iterparse element.clear() - progress.close() + progress.close() - return release_date + return release_date def import_clinvar_xml(clinvar_xml_path): From 1aaf61803f018df2af0635b016e235d386a32887 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Fri, 8 Nov 2024 09:13:04 -0600 Subject: [PATCH 06/10] fix(data-pipeline): consistently format time --- data-pipeline/src/data_pipeline/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-pipeline/src/data_pipeline/pipeline.py b/data-pipeline/src/data_pipeline/pipeline.py index dadd6aa34..4fea5a20f 100644 --- a/data-pipeline/src/data_pipeline/pipeline.py +++ b/data-pipeline/src/data_pipeline/pipeline.py @@ -100,7 +100,7 @@ def run(self, force=False): stop = time.perf_counter() elapsed = stop - start - logger.info("Finished %s in %.0fm%02.2fs", self._name, elapsed // 60, elapsed % 60) + logger.info(f"Finished {self._name} in {int(elapsed // 60)}m{elapsed % 60:.2f}s") else: logger.info(f"Skipping {self._name}") From b1675fbafb86c0dca8cd2b35cd4a6057b4351922 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Tue, 5 Nov 2024 15:39:29 -0600 Subject: [PATCH 07/10] feat(browser): use new ClinVar terminology on frontend ClinVar has moved away from the term 'clinical significance' for a few reasons, here we move to using their new preferred term of 'germline classification' --- browser/src/ClinvarVariantsTrack/ClinvarVariantDetails.tsx | 2 +- browser/src/ClinvarVariantsTrack/ClinvarVariantTooltip.tsx | 2 +- browser/src/ClinvarVariantsTrack/clinvarVariantCategories.ts | 2 ++ browser/src/SubmissionsList.tsx | 2 +- browser/src/VariantPage/VariantClinvarInfo.tsx | 2 +- browser/src/__snapshots__/SubmissionsList.spec.tsx.snap | 4 ++-- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/browser/src/ClinvarVariantsTrack/ClinvarVariantDetails.tsx b/browser/src/ClinvarVariantsTrack/ClinvarVariantDetails.tsx index d12189d78..cecacbbc6 100644 --- a/browser/src/ClinvarVariantsTrack/ClinvarVariantDetails.tsx +++ b/browser/src/ClinvarVariantsTrack/ClinvarVariantDetails.tsx @@ -137,7 +137,7 @@ const ClinvarVariantDetails = ({ [] )} - + {clinvarVariant.clinical_significance} diff --git a/browser/src/ClinvarVariantsTrack/ClinvarVariantTooltip.tsx b/browser/src/ClinvarVariantsTrack/ClinvarVariantTooltip.tsx index 3814efb33..f0b05237a 100644 --- a/browser/src/ClinvarVariantsTrack/ClinvarVariantTooltip.tsx +++ b/browser/src/ClinvarVariantsTrack/ClinvarVariantTooltip.tsx @@ -72,7 +72,7 @@ const ClinvarVariantTooltip = ({ variant }: ClinvarVariantTooltipProps) => ( {variant.variant_id}
-
Clinical significance
+
Germline classification
{variant.clinical_significance}
diff --git a/browser/src/ClinvarVariantsTrack/clinvarVariantCategories.ts b/browser/src/ClinvarVariantsTrack/clinvarVariantCategories.ts index 7aab798c8..868d29278 100644 --- a/browser/src/ClinvarVariantsTrack/clinvarVariantCategories.ts +++ b/browser/src/ClinvarVariantsTrack/clinvarVariantCategories.ts @@ -13,6 +13,7 @@ const CLINICAL_SIGNIFICANCE_GROUPS = { uncertain: new Set([ 'Uncertain significance', 'Conflicting interpretations of pathogenicity', + 'Conflicting classifications of pathogenicity', 'conflicting data from submitters', ]), benign: new Set(['Benign', 'Likely benign', 'Benign/Likely benign']), @@ -22,6 +23,7 @@ const CLINICAL_SIGNIFICANCE_GROUPS = { 'Affects', 'protective', 'no interpretation for the single variant', + 'no classification for the single variant', 'not provided', 'association not found', ]), diff --git a/browser/src/SubmissionsList.tsx b/browser/src/SubmissionsList.tsx index c3eb15590..c601e57b9 100644 --- a/browser/src/SubmissionsList.tsx +++ b/browser/src/SubmissionsList.tsx @@ -45,7 +45,7 @@ const SubmissionsList = ({ submissions }: SubmissionsListProps) => ( // @ts-expect-error TS(2769) FIXME: No overload matches this call. .reduce((acc, el, i) => (i === 0 ? [...acc, el] : [...acc, ', ', el]), [])} - + {submission.clinical_significance || '–'} {submission.review_status} diff --git a/browser/src/VariantPage/VariantClinvarInfo.tsx b/browser/src/VariantPage/VariantClinvarInfo.tsx index 52552f572..a23ce5545 100644 --- a/browser/src/VariantPage/VariantClinvarInfo.tsx +++ b/browser/src/VariantPage/VariantClinvarInfo.tsx @@ -50,7 +50,7 @@ const VariantClinvarInfo = ({ clinvar }: VariantClinvarInfoProps) => { [] )} - + {clinvar.clinical_significance} diff --git a/browser/src/__snapshots__/SubmissionsList.spec.tsx.snap b/browser/src/__snapshots__/SubmissionsList.spec.tsx.snap index d316ea9bf..ed1f9e017 100644 --- a/browser/src/__snapshots__/SubmissionsList.spec.tsx.snap +++ b/browser/src/__snapshots__/SubmissionsList.spec.tsx.snap @@ -19,7 +19,7 @@ exports[`SubmissionsList has no unexpected changes 1`] = ` Benign @@ -52,7 +52,7 @@ exports[`SubmissionsList has no unexpected changes 1`] = ` Benign From 566ad5bfc0cbf59ece86791126a39a9d907af721 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Thu, 7 Nov 2024 08:07:51 -0600 Subject: [PATCH 08/10] fix(data-pipelines): move Genes args to fix checking of dates --- data-pipeline/src/data_pipeline/pipelines/genes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/data-pipeline/src/data_pipeline/pipelines/genes.py b/data-pipeline/src/data_pipeline/pipelines/genes.py index cec30852c..431c31c4c 100644 --- a/data-pipeline/src/data_pipeline/pipelines/genes.py +++ b/data-pipeline/src/data_pipeline/pipelines/genes.py @@ -329,6 +329,8 @@ def annotate_with_preferred_transcript(table_path): f"/{genes_subdir}/gnomad.browser.GRCh37.GENCODEv19.ht", { "genes_path": pipeline.get_task("annotate_grch37_genes_step_5"), + }, + { "keep_mane_version_global_annotation": False, }, ) @@ -396,7 +398,6 @@ def annotate_with_constraint(genes_path, constraint_path): }, ) - # naming scheme follows methods naming scheme for consistency pipeline.add_task( "prepare_grch38_genes_table_for_public_release", @@ -404,6 +405,8 @@ def annotate_with_constraint(genes_path, constraint_path): f"/{genes_subdir}/gnomad.browser.GRCh38.GENCODEv39.ht", { "genes_path": pipeline.get_task("remove_grch38_genes_constraint_for_release"), + }, + { "keep_mane_version_global_annotation": True, }, ) From 4f52ab9bad883c444d57481b11ba8263a7e8a35b Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Fri, 8 Nov 2024 14:56:30 -0600 Subject: [PATCH 09/10] feat(graphql-api): improve warning message in ClinVar query --- graphql-api/src/queries/clinvar-variant-queries.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/graphql-api/src/queries/clinvar-variant-queries.ts b/graphql-api/src/queries/clinvar-variant-queries.ts index 846acf73b..bd1b69033 100644 --- a/graphql-api/src/queries/clinvar-variant-queries.ts +++ b/graphql-api/src/queries/clinvar-variant-queries.ts @@ -26,7 +26,9 @@ const _fetchClinvarReleaseDate = async (esClient: any) => { const releaseDates = metadata.map((m) => m.table_globals.clinvar_release_date) if (releaseDates[0] !== releaseDates[1]) { - logger.error({ message: 'ClinVar release dates do not match' }) + logger.error({ + message: `ClinVar release dates do not match. GRCh38: ${releaseDates[1]}, GRCh37: ${releaseDates[0]}`, + }) } return releaseDates[0] From 1b3feb7e58320a3d7aacf98e97d912ece88009f7 Mon Sep 17 00:00:00 2001 From: Riley Grant Date: Thu, 7 Nov 2024 13:28:27 -0600 Subject: [PATCH 10/10] feat(graphql-api): modify ClinVar ES indices to allow rollbacks --- graphql-api/src/queries/clinvar-variant-queries.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/graphql-api/src/queries/clinvar-variant-queries.ts b/graphql-api/src/queries/clinvar-variant-queries.ts index bd1b69033..a8175d7da 100644 --- a/graphql-api/src/queries/clinvar-variant-queries.ts +++ b/graphql-api/src/queries/clinvar-variant-queries.ts @@ -9,8 +9,12 @@ import { getConsequenceForContext } from './variant-datasets/shared/transcriptCo import largeGenes from './helpers/large-genes' const CLINVAR_VARIANT_INDICES = { - GRCh37: 'clinvar_grch37_variants', - GRCh38: 'clinvar_grch38_variants', + // GRCh37: 'clinvar_grch37_variants', + // GRCh38: 'clinvar_grch38_variants', + // TODO: revert back to using alias'ed indexes once we are confident this is + // stable in production + GRCh37: 'clinvar_grch37_variants-2024-11-08--19-22', + GRCh38: 'clinvar_grch38_variants-2024-11-08--13-08', } // ================================================================================================