-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Parse Pac-Bio metrics from control report (#3409)(patch)
## Description Closes Clinical-Genomics/add-new-tech#45 ### Added - Pydantic model for the control metrics - Constants with the attribute names of the control report file - Unit test for percent function - Fixtures for control report file and metrics object ### Changed - Moved the percent field validator to a separate function to be more DRY - Updated test of metrics parser to work with control model --------- Co-authored-by: ChristianOertlin <[email protected]>
- Loading branch information
1 parent
168d2bf
commit 155a504
Showing
9 changed files
with
170 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,13 @@ | ||
"""Module to hold functions for calculations.""" | ||
|
||
|
||
def fraction_to_percent(value: float) -> float: | ||
"""Convert a fraction to a percentage.""" | ||
if 0.0 <= value <= 1.0: | ||
value *= 100 | ||
return value | ||
|
||
|
||
def multiply_by_million(number: float | int) -> int: | ||
"""Multiply a given number by a million.""" | ||
return int(number * 1_000_000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 68 additions & 22 deletions
90
tests/services/pacbio_services/pacbio_metrics_service/test_pacbio_metrics_service.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,91 @@ | ||
from pathlib import Path | ||
from typing import Any | ||
from typing import Any, Callable | ||
|
||
import pytest | ||
from _pytest.fixtures import FixtureRequest | ||
|
||
from cg.constants.pacbio import CCSAttributeIDs, ControlAttributeIDs | ||
from cg.services.pacbio.metrics.metrics_parser import MetricsParser | ||
from cg.services.pacbio.metrics.models import HiFiMetrics | ||
from cg.services.pacbio.metrics.models import ControlMetrics, HiFiMetrics | ||
|
||
|
||
def test_metrics_parser_initialisation(pac_bio_smrt_cell_dir: Path): | ||
"""Test the initialisation of the metrics parser.""" | ||
# GIVEN a PacBio SMRT cell path | ||
|
||
# WHEN initialising the metrics parser | ||
parser = MetricsParser(smrt_cell_path=pac_bio_smrt_cell_dir) | ||
|
||
# THEN assert that the parser is initialised with the expected attributes | ||
assert isinstance(parser.hifi_metrics, HiFiMetrics) | ||
assert isinstance(parser.control_metrics, ControlMetrics) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"report_file_path", | ||
["pac_bio_control_report", "pac_bio_css_report"], | ||
) | ||
def test_parse_attributes_from_json( | ||
pac_bio_metrics_parser: MetricsParser, | ||
pac_bio_css_report: Path, | ||
report_file_path: str, | ||
request: FixtureRequest, | ||
): | ||
"""Test to parse the attributes from a PacBio JSON file.""" | ||
# GIVEN a PacBio JSON file and a PacBio metrics parser initialised from the path | ||
"""Test the parsing of attributes from any PacBio report file.""" | ||
# GIVEN a PacBio report file and a PacBio metrics parser initialised from the SMRTcell path | ||
report_file: Path = request.getfixturevalue(report_file_path) | ||
|
||
# WHEN parsing the attributes from the JSON file | ||
attributes: list[dict[str, Any]] = pac_bio_metrics_parser._parse_report( | ||
json_file=pac_bio_css_report | ||
) | ||
# WHEN parsing the report file | ||
attributes: list[dict[str, Any]] = pac_bio_metrics_parser._parse_report(report_file=report_file) | ||
|
||
# THEN assert that the attributes are parsed correctly | ||
# THEN assert that the report attributes are parsed correctly | ||
assert isinstance(attributes, list) | ||
assert isinstance(attributes[0], dict) | ||
assert "id" in attributes[0] | ||
assert "value" in attributes[0] | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"report_file_path, model, metrics_fixture, percent_fields", | ||
[ | ||
( | ||
"pac_bio_control_report", | ||
ControlMetrics, | ||
"pac_bio_control_metrics", | ||
[ | ||
ControlAttributeIDs.PERCENT_MEAN_READ_CONCORDANCE, | ||
ControlAttributeIDs.PERCENT_MODE_READ_CONCORDANCE, | ||
], | ||
), | ||
("pac_bio_css_report", HiFiMetrics, "pac_bio_hifi_metrics", [CCSAttributeIDs.PERCENT_Q30]), | ||
], | ||
) | ||
def test_parse_attributes_to_model( | ||
pac_bio_metrics_parser: MetricsParser, | ||
pac_bio_css_report: Path, | ||
pac_bio_hifi_metrics: HiFiMetrics, | ||
report_file_path: str, | ||
model: Callable, | ||
metrics_fixture: str, | ||
percent_fields: list[str], | ||
request: FixtureRequest, | ||
): | ||
"""Test to parse the attributes to a HiFi model.""" | ||
# GIVEN a PacBio JSON file | ||
"""Test to parse the attributes to a metrics model.""" | ||
# GIVEN a metrics parser | ||
|
||
# GIVEN a pac-bio report file | ||
report_file: Path = request.getfixturevalue(report_file_path) | ||
|
||
# GIVEN a metrics object with the expected parsed metrics | ||
expected_metrics: ControlMetrics | HiFiMetrics = request.getfixturevalue(metrics_fixture) | ||
|
||
# WHEN parsing the attributes to a model | ||
parsed_hifi_metrics = pac_bio_metrics_parser.parse_attributes_to_model( | ||
json_file=pac_bio_css_report, | ||
model=HiFiMetrics, | ||
# WHEN parsing the attributes to a given metrics model | ||
parsed_metrics: ControlMetrics | HiFiMetrics = pac_bio_metrics_parser.parse_attributes_to_model( | ||
report_file=report_file, | ||
data_model=model, | ||
) | ||
|
||
# THEN assert that the attributes are parsed to a model correctly | ||
assert parsed_hifi_metrics == pac_bio_hifi_metrics | ||
# THEN assert that the model attributes are the expected ones | ||
assert parsed_metrics == expected_metrics | ||
|
||
# THEN assert that the percentage is not taken as a fraction | ||
assert parsed_hifi_metrics.percent_q30 > 1 | ||
# THEN assert that the percentage fields of the model are not taken as a fraction | ||
metrics_dict: dict = parsed_metrics.dict(by_alias=True) | ||
for percent_field in percent_fields: | ||
assert metrics_dict.get(percent_field) > 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters