diff --git a/CHANGELOG.md b/CHANGELOG.md index 3596297fe2..f0320d9988 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,9 +31,10 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/) - Causative view sort out of memory error - Use hgnc_id for gene filter query - Typo in case controllers displaying an error every time a patient is matched against external MatchMaker nodes -- Do not crash while attemping an update for variant documents that are too big (> 16 MB) +- Do not crash while attempting an update for variant documents that are too big (> 16 MB) - Old STR causatives (and other variants) may not have HGNC symbols - fix sort lambda - Check if gene_obj has primary_transcript before trying to access it +- Warn if a gene manually searched is in a clinical panel with an outdated name when filtering variants ### Changed - Remove parsing of case `genome_version`, since it's not used anywhere downstream - Introduce deprecation warning for Loqus configs that are not dictionaries diff --git a/scout/adapter/mongo/panel.py b/scout/adapter/mongo/panel.py index cf9309a47f..78581a6c96 100644 --- a/scout/adapter/mongo/panel.py +++ b/scout/adapter/mongo/panel.py @@ -532,3 +532,15 @@ def clinical_symbols(self, case_obj): ] ) return set(item["_id"] for item in query) + + def clinical_hgnc_ids(self, case_obj): + """Return all the clinical gene hgnc IDs for a case.""" + panel_ids = [panel["panel_id"] for panel in case_obj["panels"]] + query = self.panel_collection.aggregate( + [ + {"$match": {"_id": {"$in": panel_ids}}}, + {"$unwind": "$genes"}, + {"$group": {"_id": "$genes.hgnc_id"}}, + ] + ) + return set(item["_id"] for item in query) diff --git a/scout/server/blueprints/variants/controllers.py b/scout/server/blueprints/variants/controllers.py index 6786ebc6df..6b989ebd3b 100644 --- a/scout/server/blueprints/variants/controllers.py +++ b/scout/server/blueprints/variants/controllers.py @@ -963,6 +963,67 @@ def populate_sv_filters_form(store, institute_obj, case_obj, category, request_o return form +def check_form_gene_symbols( + store, case_obj, is_clinical, genome_build, hgnc_symbols, not_found_ids +): + """Check that gene symbols provided by user exist and are up to date. + Flash a warning if gene is not found, gene symbol present in panel is outdated + or is not found in clinical list when search is performed on clinical variants + + Args: + store(adapter.MongoAdapter) + case_obj(dict) + is_clinical(bool): type of variants (clinical, research) + genome_build(str): "37" or "38" + hgnc_symbols(list): list of gene symbols (strings) + not_found_ids(list): list of HGNC IDs not found if user provided numberical HGNC IDs in search form + + Returns: + updated_hgnc_symbols(list): List of gene symbols that are found in database and are up to date + """ + non_clinical_symbols = [] + not_found_symbols = [] + outdated_symbols = [] + updated_hgnc_symbols = [] + + clinical_hgnc_ids = store.clinical_hgnc_ids(case_obj) + clinical_symbols = store.clinical_symbols(case_obj) + + for hgnc_symbol in hgnc_symbols: + hgnc_gene = store.hgnc_genes_find_one(hgnc_symbol, genome_build) + + if hgnc_gene is None: + not_found_symbols.append(hgnc_symbol) + elif is_clinical and hgnc_gene["hgnc_id"] in clinical_hgnc_ids: + updated_hgnc_symbols.append(hgnc_symbol) + if hgnc_symbol not in clinical_symbols: + # clinical symbols from gene panels might not be up to date with latest gene names + # but their HGNC id would still match + outdated_symbols.append(hgnc_symbol) + else: + non_clinical_symbols.append(hgnc_symbol) + + errors = { + "non_clinical_symbols": { + "alert": "Gene not included in clinical list", + "gene_list": non_clinical_symbols, + }, + "not_found_symbols": {"alert": "HGNC symbol not found", "gene_list": not_found_symbols}, + "not_found_ids": {"alert": "HGNC id not found", "gene_list": not_found_ids}, + "outdated_symbols": { + "alert": "Clinical list contains a panel with an outdated symbol for genes", + "gene_list": outdated_symbols, + }, + } + + # warn user if gene symbols are corresponding to any current gene, + for error, item in errors.items(): + if item["gene_list"]: + flash(f"{item['alert']}: {item['gene_list']}", "warning") + + return updated_hgnc_symbols + + def update_form_hgnc_symbols(store, case_obj, form): """Update variants filter form with HGNC symbols from HPO, and check if any non-clinical genes for the case were requested. If so, flash a warning to the user. @@ -977,17 +1038,15 @@ def update_form_hgnc_symbols(store, case_obj, form): """ hgnc_symbols = [] - non_clinical_symbols = [] - not_found_symbols = [] not_found_ids = [] - updated_hgnc_symbols = [] + genome_build = "38" if "38" in str(case_obj.get("genome_build")) else "37" # retrieve current symbols from form if form.hgnc_symbols.data: # if symbols are numeric HGNC id, translate to current symbols for hgnc_symbol in form.hgnc_symbols.data: if hgnc_symbol.isdigit(): - hgnc_gene = store.hgnc_gene(int(hgnc_symbol), case_obj["genome_build"]) + hgnc_gene = store.hgnc_gene(int(hgnc_symbol), genome_build) if hgnc_gene is None: not_found_ids.append(hgnc_symbol) else: @@ -1008,31 +1067,10 @@ def update_form_hgnc_symbols(store, case_obj, form): # check if supplied gene symbols exist and are clinical is_clinical = form.data.get("variant_type", "clinical") == "clinical" - clinical_symbols = store.clinical_symbols(case_obj) if is_clinical else None - for hgnc_symbol in hgnc_symbols: - if sum(1 for i in store.hgnc_genes(hgnc_symbol)) == 0: - not_found_symbols.append(hgnc_symbol) - elif is_clinical and (hgnc_symbol not in clinical_symbols): - non_clinical_symbols.append(hgnc_symbol) - else: - updated_hgnc_symbols.append(hgnc_symbol) - - errors = { - "non_clinical_symbols": { - "alert": "Gene not included in clinical list", - "gene_list": non_clinical_symbols, - }, - "not_found_symbols": {"alert": "HGNC symbol not found", "gene_list": not_found_symbols}, - "not_found_ids": {"alert": "HGNC id not found", "gene_list": not_found_ids}, - } - - # warn user - for error, item in errors.items(): - if item["gene_list"]: - flash(f"{item['alert']}: {item['gene_list']}", "warning") - + updated_hgnc_symbols = check_form_gene_symbols( + store, case_obj, is_clinical, genome_build, hgnc_symbols, not_found_ids + ) form.hgnc_symbols.data = sorted(updated_hgnc_symbols) - return form diff --git a/tests/adapter/mongo/test_panel_handler.py b/tests/adapter/mongo/test_panel_handler.py index a16e0783d0..05e7e4218d 100644 --- a/tests/adapter/mongo/test_panel_handler.py +++ b/tests/adapter/mongo/test_panel_handler.py @@ -337,3 +337,35 @@ def test_apply_pending_edit_gene(adapter, testpanel_obj): "model_1", "model_2", ] + + +def test_clinical_symbols(case_obj, real_panel_database): + """test function that returns a set of clinical genes symbols from test case panels""" + + # GIVEN an adapter with genes and a gene panel + adapter = real_panel_database + + test_panel = adapter.panel_collection.find_one() + + # GIVEN a case analysed with that panel + case_obj["panels"] = [{"panel_id": test_panel["_id"]}] + + # THEN the clinical_symbols function should return a valid set of clinical genes symbols for the case panel + clinical_symbols = adapter.clinical_symbols(case_obj) + assert len(clinical_symbols) > 0 + + +def test_clinical_hgnc_ids(case_obj, real_panel_database): + """test function that returns a set of clinical genes HGNC IDs from test case panels""" + + # GIVEN an adapter with genes and a gene panel + adapter = real_panel_database + + test_panel = adapter.panel_collection.find_one() + + # GIVEN a case analysed with that panel + case_obj["panels"] = [{"panel_id": test_panel["_id"]}] + + # THEN the clinical_hgnc_ids function should return a valid set of hgnc IDs for the case panel + clinical_hgnc_ids = adapter.clinical_symbols(case_obj) + assert len(clinical_hgnc_ids) > 0 diff --git a/tests/server/blueprints/variants/test_variants_controllers.py b/tests/server/blueprints/variants/test_variants_controllers.py index 54f0f04369..0232074c90 100644 --- a/tests/server/blueprints/variants/test_variants_controllers.py +++ b/tests/server/blueprints/variants/test_variants_controllers.py @@ -3,7 +3,7 @@ from bson.objectid import ObjectId from flask_wtf import FlaskForm -from wtforms import SelectField +from wtforms import SelectField, StringField from scout.constants import CHROMOSOMES_38, EXPORT_HEADER from scout.server.blueprints.variants.controllers import ( @@ -12,10 +12,12 @@ match_gene_txs_variant_txs, populate_chrom_choices, sv_variants, + update_form_hgnc_symbols, variant_export_lines, variants, variants_export_header, ) +from scout.server.extensions import store LOG = logging.getLogger(__name__) @@ -363,3 +365,49 @@ def test_variant_csv_export(real_variant_database, case_obj): for export_line in export_lines: export_cols = export_line.split(",") assert len(export_cols) == len(export_header) + + +def test_update_form_hgnc_symbols_valid_gene_symbol(app, case_obj): + """Test controller that populates HGNC symbols form filter in variants page. Provide valid gene symbol""" + + # GIVEN a case analysed with a gene panel + test_panel = store.panel_collection.find_one() + case_obj["panels"] = [{"panel_id": test_panel["_id"]}] + + # GIVEN a variants filter form + class TestForm(FlaskForm): + hgnc_symbols = StringField() + data = StringField() + + form = TestForm() + + # GIVEN a user trying to filter clinical variants using a valid gene symbol + form.hgnc_symbols.data = ["POT1"] + form.data = {"gene_panels": []} + updated_form = update_form_hgnc_symbols(store, case_obj, form) + + # Form should be updated correctly + assert form.hgnc_symbols.data == ["POT1"] + + +def test_update_form_hgnc_symbols_valid_gene_id(app, case_obj): + """Test controller that populates HGNC symbols form filter in variants page. Provide HGNC ID""" + + # GIVEN a case analysed with a gene panel + test_panel = store.panel_collection.find_one() + case_obj["panels"] = [{"panel_id": test_panel["_id"]}] + + # GIVEN a variants filter form + class TestForm(FlaskForm): + hgnc_symbols = StringField() + data = StringField() + + form = TestForm() + + # GIVEN a user trying to filter clinical variants using a valid gene ID + form.hgnc_symbols.data = ["17284"] + form.data = {"gene_panels": []} + updated_form = update_form_hgnc_symbols(store, case_obj, form) + + # Form should be updated correctly + assert form.hgnc_symbols.data == ["POT1"] diff --git a/tests/server/blueprints/variants/test_variants_views.py b/tests/server/blueprints/variants/test_variants_views.py index ea91085003..52a9f2a84b 100644 --- a/tests/server/blueprints/variants/test_variants_views.py +++ b/tests/server/blueprints/variants/test_variants_views.py @@ -44,13 +44,11 @@ def test_variants_clinical_filter(app, institute_obj, case_obj, mocker, mock_red assert resp.status_code == 200 # WHEN submitting form data to the variants page (POST method) with clinical filter - data = urlencode( - { - "clinical_filter": "Clinical filter", - "variant_type": "clinical", - "gene_panels": "panel1", - } - ) # clinical filter + data = { + "clinical_filter": "Clinical filter", + "variant_type": "clinical", + "gene_panels": "panel1", + } resp = client.post( url_for( @@ -59,7 +57,6 @@ def test_variants_clinical_filter(app, institute_obj, case_obj, mocker, mock_red case_name=case_obj["display_name"], ), data=data, - content_type="application/x-www-form-urlencoded", ) # THEN it should return a page