Skip to content

Commit

Permalink
Correct gene search warning (#2631)
Browse files Browse the repository at this point in the history
* fix variants filter warning

* a couple of fixes

* better code

* added a couple of tests

* Update CHANGELOG.md

Co-authored-by: Daniel Nilsson <[email protected]>

* Update scout/server/blueprints/variants/controllers.py

Co-authored-by: Daniel Nilsson <[email protected]>

* Update scout/server/blueprints/variants/controllers.py

Co-authored-by: Daniel Nilsson <[email protected]>

* add 2 other genes

* fix a comment

* added comments

* fix how form data is passed to request in test

* try removing the last 2 tests

* letäs see if I fool you

* no need to be in the app context

Co-authored-by: Daniel Nilsson <[email protected]>
  • Loading branch information
northwestwitch and dnil authored May 18, 2021
1 parent 688a45b commit 3e13d99
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 38 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/)
- Causative view sort out of memory error
- Use hgnc_id for gene filter query
- Typo in case controllers displaying an error every time a patient is matched against external MatchMaker nodes
- Do not crash while attemping an update for variant documents that are too big (> 16 MB)
- Do not crash while attempting an update for variant documents that are too big (> 16 MB)
- Old STR causatives (and other variants) may not have HGNC symbols - fix sort lambda
- Check if gene_obj has primary_transcript before trying to access it
- Warn if a gene manually searched is in a clinical panel with an outdated name when filtering variants
### Changed
- Remove parsing of case `genome_version`, since it's not used anywhere downstream
- Introduce deprecation warning for Loqus configs that are not dictionaries
Expand Down
12 changes: 12 additions & 0 deletions scout/adapter/mongo/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,3 +532,15 @@ def clinical_symbols(self, case_obj):
]
)
return set(item["_id"] for item in query)

def clinical_hgnc_ids(self, case_obj):
"""Return all the clinical gene hgnc IDs for a case."""
panel_ids = [panel["panel_id"] for panel in case_obj["panels"]]
query = self.panel_collection.aggregate(
[
{"$match": {"_id": {"$in": panel_ids}}},
{"$unwind": "$genes"},
{"$group": {"_id": "$genes.hgnc_id"}},
]
)
return set(item["_id"] for item in query)
94 changes: 66 additions & 28 deletions scout/server/blueprints/variants/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,67 @@ def populate_sv_filters_form(store, institute_obj, case_obj, category, request_o
return form


def check_form_gene_symbols(
store, case_obj, is_clinical, genome_build, hgnc_symbols, not_found_ids
):
"""Check that gene symbols provided by user exist and are up to date.
Flash a warning if gene is not found, gene symbol present in panel is outdated
or is not found in clinical list when search is performed on clinical variants
Args:
store(adapter.MongoAdapter)
case_obj(dict)
is_clinical(bool): type of variants (clinical, research)
genome_build(str): "37" or "38"
hgnc_symbols(list): list of gene symbols (strings)
not_found_ids(list): list of HGNC IDs not found if user provided numberical HGNC IDs in search form
Returns:
updated_hgnc_symbols(list): List of gene symbols that are found in database and are up to date
"""
non_clinical_symbols = []
not_found_symbols = []
outdated_symbols = []
updated_hgnc_symbols = []

clinical_hgnc_ids = store.clinical_hgnc_ids(case_obj)
clinical_symbols = store.clinical_symbols(case_obj)

for hgnc_symbol in hgnc_symbols:
hgnc_gene = store.hgnc_genes_find_one(hgnc_symbol, genome_build)

if hgnc_gene is None:
not_found_symbols.append(hgnc_symbol)
elif is_clinical and hgnc_gene["hgnc_id"] in clinical_hgnc_ids:
updated_hgnc_symbols.append(hgnc_symbol)
if hgnc_symbol not in clinical_symbols:
# clinical symbols from gene panels might not be up to date with latest gene names
# but their HGNC id would still match
outdated_symbols.append(hgnc_symbol)
else:
non_clinical_symbols.append(hgnc_symbol)

errors = {
"non_clinical_symbols": {
"alert": "Gene not included in clinical list",
"gene_list": non_clinical_symbols,
},
"not_found_symbols": {"alert": "HGNC symbol not found", "gene_list": not_found_symbols},
"not_found_ids": {"alert": "HGNC id not found", "gene_list": not_found_ids},
"outdated_symbols": {
"alert": "Clinical list contains a panel with an outdated symbol for genes",
"gene_list": outdated_symbols,
},
}

# warn user if gene symbols are corresponding to any current gene,
for error, item in errors.items():
if item["gene_list"]:
flash(f"{item['alert']}: {item['gene_list']}", "warning")

return updated_hgnc_symbols


def update_form_hgnc_symbols(store, case_obj, form):
"""Update variants filter form with HGNC symbols from HPO, and check if any non-clinical genes for the case were
requested. If so, flash a warning to the user.
Expand All @@ -977,17 +1038,15 @@ def update_form_hgnc_symbols(store, case_obj, form):
"""

hgnc_symbols = []
non_clinical_symbols = []
not_found_symbols = []
not_found_ids = []
updated_hgnc_symbols = []
genome_build = "38" if "38" in str(case_obj.get("genome_build")) else "37"

# retrieve current symbols from form
if form.hgnc_symbols.data:
# if symbols are numeric HGNC id, translate to current symbols
for hgnc_symbol in form.hgnc_symbols.data:
if hgnc_symbol.isdigit():
hgnc_gene = store.hgnc_gene(int(hgnc_symbol), case_obj["genome_build"])
hgnc_gene = store.hgnc_gene(int(hgnc_symbol), genome_build)
if hgnc_gene is None:
not_found_ids.append(hgnc_symbol)
else:
Expand All @@ -1008,31 +1067,10 @@ def update_form_hgnc_symbols(store, case_obj, form):

# check if supplied gene symbols exist and are clinical
is_clinical = form.data.get("variant_type", "clinical") == "clinical"
clinical_symbols = store.clinical_symbols(case_obj) if is_clinical else None
for hgnc_symbol in hgnc_symbols:
if sum(1 for i in store.hgnc_genes(hgnc_symbol)) == 0:
not_found_symbols.append(hgnc_symbol)
elif is_clinical and (hgnc_symbol not in clinical_symbols):
non_clinical_symbols.append(hgnc_symbol)
else:
updated_hgnc_symbols.append(hgnc_symbol)

errors = {
"non_clinical_symbols": {
"alert": "Gene not included in clinical list",
"gene_list": non_clinical_symbols,
},
"not_found_symbols": {"alert": "HGNC symbol not found", "gene_list": not_found_symbols},
"not_found_ids": {"alert": "HGNC id not found", "gene_list": not_found_ids},
}

# warn user
for error, item in errors.items():
if item["gene_list"]:
flash(f"{item['alert']}: {item['gene_list']}", "warning")

updated_hgnc_symbols = check_form_gene_symbols(
store, case_obj, is_clinical, genome_build, hgnc_symbols, not_found_ids
)
form.hgnc_symbols.data = sorted(updated_hgnc_symbols)

return form


Expand Down
32 changes: 32 additions & 0 deletions tests/adapter/mongo/test_panel_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,35 @@ def test_apply_pending_edit_gene(adapter, testpanel_obj):
"model_1",
"model_2",
]


def test_clinical_symbols(case_obj, real_panel_database):
"""test function that returns a set of clinical genes symbols from test case panels"""

# GIVEN an adapter with genes and a gene panel
adapter = real_panel_database

test_panel = adapter.panel_collection.find_one()

# GIVEN a case analysed with that panel
case_obj["panels"] = [{"panel_id": test_panel["_id"]}]

# THEN the clinical_symbols function should return a valid set of clinical genes symbols for the case panel
clinical_symbols = adapter.clinical_symbols(case_obj)
assert len(clinical_symbols) > 0


def test_clinical_hgnc_ids(case_obj, real_panel_database):
"""test function that returns a set of clinical genes HGNC IDs from test case panels"""

# GIVEN an adapter with genes and a gene panel
adapter = real_panel_database

test_panel = adapter.panel_collection.find_one()

# GIVEN a case analysed with that panel
case_obj["panels"] = [{"panel_id": test_panel["_id"]}]

# THEN the clinical_hgnc_ids function should return a valid set of hgnc IDs for the case panel
clinical_hgnc_ids = adapter.clinical_symbols(case_obj)
assert len(clinical_hgnc_ids) > 0
50 changes: 49 additions & 1 deletion tests/server/blueprints/variants/test_variants_controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from bson.objectid import ObjectId
from flask_wtf import FlaskForm
from wtforms import SelectField
from wtforms import SelectField, StringField

from scout.constants import CHROMOSOMES_38, EXPORT_HEADER
from scout.server.blueprints.variants.controllers import (
Expand All @@ -12,10 +12,12 @@
match_gene_txs_variant_txs,
populate_chrom_choices,
sv_variants,
update_form_hgnc_symbols,
variant_export_lines,
variants,
variants_export_header,
)
from scout.server.extensions import store

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -363,3 +365,49 @@ def test_variant_csv_export(real_variant_database, case_obj):
for export_line in export_lines:
export_cols = export_line.split(",")
assert len(export_cols) == len(export_header)


def test_update_form_hgnc_symbols_valid_gene_symbol(app, case_obj):
"""Test controller that populates HGNC symbols form filter in variants page. Provide valid gene symbol"""

# GIVEN a case analysed with a gene panel
test_panel = store.panel_collection.find_one()
case_obj["panels"] = [{"panel_id": test_panel["_id"]}]

# GIVEN a variants filter form
class TestForm(FlaskForm):
hgnc_symbols = StringField()
data = StringField()

form = TestForm()

# GIVEN a user trying to filter clinical variants using a valid gene symbol
form.hgnc_symbols.data = ["POT1"]
form.data = {"gene_panels": []}
updated_form = update_form_hgnc_symbols(store, case_obj, form)

# Form should be updated correctly
assert form.hgnc_symbols.data == ["POT1"]


def test_update_form_hgnc_symbols_valid_gene_id(app, case_obj):
"""Test controller that populates HGNC symbols form filter in variants page. Provide HGNC ID"""

# GIVEN a case analysed with a gene panel
test_panel = store.panel_collection.find_one()
case_obj["panels"] = [{"panel_id": test_panel["_id"]}]

# GIVEN a variants filter form
class TestForm(FlaskForm):
hgnc_symbols = StringField()
data = StringField()

form = TestForm()

# GIVEN a user trying to filter clinical variants using a valid gene ID
form.hgnc_symbols.data = ["17284"]
form.data = {"gene_panels": []}
updated_form = update_form_hgnc_symbols(store, case_obj, form)

# Form should be updated correctly
assert form.hgnc_symbols.data == ["POT1"]
13 changes: 5 additions & 8 deletions tests/server/blueprints/variants/test_variants_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ def test_variants_clinical_filter(app, institute_obj, case_obj, mocker, mock_red
assert resp.status_code == 200

# WHEN submitting form data to the variants page (POST method) with clinical filter
data = urlencode(
{
"clinical_filter": "Clinical filter",
"variant_type": "clinical",
"gene_panels": "panel1",
}
) # clinical filter
data = {
"clinical_filter": "Clinical filter",
"variant_type": "clinical",
"gene_panels": "panel1",
}

resp = client.post(
url_for(
Expand All @@ -59,7 +57,6 @@ def test_variants_clinical_filter(app, institute_obj, case_obj, mocker, mock_red
case_name=case_obj["display_name"],
),
data=data,
content_type="application/x-www-form-urlencoded",
)

# THEN it should return a page
Expand Down

0 comments on commit 3e13d99

Please sign in to comment.