Skip to content

Commit

Permalink
refactor(demux post processing run parameters) (#3263) (patch)
Browse files Browse the repository at this point in the history
# Descripion

Refactoring of the run parameters and extraction of flow cell model from run paramters.
  • Loading branch information
ChrOertlin authored May 23, 2024
1 parent 7066d3f commit e3739c0
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 94 deletions.
2 changes: 2 additions & 0 deletions cg/constants/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class QualityMetricsColumnNames(StrEnum):
SAMPLE_INTERNAL_ID: str = "SampleID"
MEAN_QUALITY_SCORE_Q30: str = "Mean Quality Score (PF)"
Q30_BASES_PERCENT: str = "% Q30"
YIELD: str = "Yield"
YIELD_Q30: str = "YieldQ30"


class DemuxMetricsColumnNames(StrEnum):
Expand Down
39 changes: 30 additions & 9 deletions cg/models/demultiplex/run_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from abc import abstractmethod
from pathlib import Path
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, ElementTree

from packaging.version import parse

Expand Down Expand Up @@ -44,9 +44,7 @@ def _validate_instrument(self, node_name: str, node_value: str):
Raises:
RunParametersError if the node does not have the expected value."""
try:
application: ElementTree.Element | None = get_tree_node(
tree=self.tree, node_name=node_name
)
application: Element | None = get_tree_node(tree=self.tree, node_name=node_name)
except XMLError:
raise RunParametersError(
f"Could not find node {node_name} in the run parameters file. "
Expand Down Expand Up @@ -129,6 +127,10 @@ def _get_index_settings(self) -> IndexSettings:
return NOVASEQ_6000_POST_1_5_KITS_INDEX_SETTINGS
return NO_REVERSE_COMPLEMENTS_INDEX_SETTINGS

@abstractmethod
def get_flow_cell_model(self):
pass

def __str__(self):
return f"RunParameters(path={self.path}, sequencer={self.sequencer})"

Expand Down Expand Up @@ -189,6 +191,10 @@ def get_read_2_cycles(self) -> int:
node_name: str = RunParametersXMLNodes.READ_2_HISEQ
return self._get_node_integer_value(node_name=node_name)

def get_flow_cell_model(self) -> None:
"""Return None for run parameters associated with HiSeq sequencing."""
return None


class RunParametersNovaSeq6000(RunParameters):
"""Specific class for parsing run parameters of NovaSeq6000 sequencing."""
Expand All @@ -210,7 +216,7 @@ def control_software_version(self) -> str:
def reagent_kit_version(self) -> str:
"""Return the reagent kit version if existent, return 'unknown' otherwise."""
node_name: str = RunParametersXMLNodes.REAGENT_KIT_VERSION
xml_node: ElementTree.Element | None = self.tree.find(node_name)
xml_node: Element | None = self.tree.find(node_name)
if xml_node is None:
LOG.warning("Could not determine reagent kit version")
LOG.info("Set reagent kit version to 'unknown'")
Expand Down Expand Up @@ -242,6 +248,11 @@ def get_read_2_cycles(self) -> int:
node_name: str = RunParametersXMLNodes.READ_2_NOVASEQ_6000
return self._get_node_integer_value(node_name=node_name)

def get_flow_cell_model(self) -> str:
"""Return the flow cell model referred to as 'FlowCellMode' in the run parameters file."""
node_name: str = RunParametersXMLNodes.FLOW_CELL_MODE
return self._get_node_string_value(node_name=node_name)


class RunParametersNovaSeqX(RunParameters):
"""Specific class for parsing run parameters of NovaSeqX sequencing."""
Expand Down Expand Up @@ -272,12 +283,10 @@ def sequencer(self) -> str:
def _read_parser(self) -> dict[str, int]:
"""Return read and index cycle values parsed as a dictionary."""
cycle_mapping: dict[str, int] = {}
planned_reads_tree: ElementTree.Element = get_tree_node(
planned_reads_tree: Element = get_tree_node(
tree=self.tree, node_name=RunParametersXMLNodes.PLANNED_READS_NOVASEQ_X
)
planned_reads: list[ElementTree.Element] = planned_reads_tree.findall(
RunParametersXMLNodes.INNER_READ
)
planned_reads: list[Element] = planned_reads_tree.findall(RunParametersXMLNodes.INNER_READ)
for read_elem in planned_reads:
read_name: str = read_elem.get(RunParametersXMLNodes.READ_NAME)
cycles: int = int(read_elem.get(RunParametersXMLNodes.CYCLES))
Expand All @@ -299,3 +308,15 @@ def get_read_1_cycles(self) -> int:
def get_read_2_cycles(self) -> int:
"""Return the number of cycles in the second read."""
return self._read_parser.get(RunParametersXMLNodes.READ_2_NOVASEQ_X)

def get_flow_cell_model(self) -> str:
"""Return the flow cell model referred to as 'Mode' or 'Name' in the run parameters file."""
consumable_infos: list[Element] = self.tree.findall(".//ConsumableInfo")
for consumable_info in consumable_infos:
type_element: Element | None = consumable_info.find("Type")
if type_element is not None and type_element.text == "FlowCell":
name_element: Element | None = consumable_info.find("Name") or consumable_info.find(
"Mode"
)
if name_element is not None:
return name_element.text
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,26 @@ def create_sample_run_metrics(
base_mean_quality_score=mean_quality_score,
created_at=datetime.now(),
)

def create_sample_sequencing_metrics_for_flow_cell(
self,
flow_cell_directory: Path,
run_metrics_id: int,
store: Store,
) -> list[IlluminaSampleSequencingMetrics]:
"""Parse the demultiplexing metrics data into the sequencing statistics model."""
metrics_parser = MetricsParser(flow_cell_directory)
sample_internal_ids: list[str] = metrics_parser.get_sample_internal_ids()
sample_lane_sequencing_metrics: list[IlluminaSampleSequencingMetrics] = []

for sample_internal_id in sample_internal_ids:
for lane in metrics_parser.get_lanes_for_sample(sample_internal_id=sample_internal_id):
metrics: IlluminaSampleSequencingMetrics = self.create_sample_run_metrics(
sample_internal_id=sample_internal_id,
lane=lane,
metrics_parser=metrics_parser,
run_metrics_id=run_metrics_id,
store=store,
)
sample_lane_sequencing_metrics.append(metrics)
return sample_lane_sequencing_metrics
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def store_illumina_flow_cell(
Create flow cell from the parsed and validated flow cell data.
And add the samples on the flow cell to the model.
"""
model: str | None = get_flow_cell_model_from_run_parameters(flow_cell.run_parameters_path)
model: str | None = flow_cell.run_parameters.get_flow_cell_model()
new_flow_cell = IlluminaFlowCell(
internal_id=flow_cell.id, type=DeviceType.ILLUMINA, model=model
)
Expand Down
13 changes: 0 additions & 13 deletions cg/services/illumina_post_processing_service/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,3 @@

def create_delivery_file_in_flow_cell_directory(flow_cell_directory: Path) -> None:
Path(flow_cell_directory, DemultiplexingDirsAndFiles.DELIVERY).touch()


def get_flow_cell_model_from_run_parameters(run_parameters_path: Path) -> str | None:
"""Return the model of the flow cell."""
xml_tree: ElementTree = read_xml(run_parameters_path)
node: Element | None = None
for node_name in [RunParametersXMLNodes.MODE, RunParametersXMLNodes.FLOW_CELL_MODE]:
try:
node: Element = get_tree_node(xml_tree, node_name)
return node.text
except XMLError:
continue
return node
7 changes: 7 additions & 0 deletions cg/store/crud/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
User,
order_case,
IlluminaFlowCell,
IlluminaSequencingRun,
)

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -425,3 +426,9 @@ def add_illumina_flow_cell(self, flow_cell: IlluminaFlowCell) -> IlluminaFlowCel
session.add(flow_cell)
LOG.debug(f"Flow cell added to status db: {flow_cell.id}.")
return flow_cell

def add_illumina_sequencing_metrics(
self, sequencing_metrics: IlluminaSequencingRun
) -> IlluminaSequencingRun:
"""Add a new Illumina flow cell to the status database as a pending transaction."""
pass
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def run_parameters_novaseq_6000_different_index(


@pytest.fixture(scope="function")
def run_parameters_novaseq_x_different_index(run_parameters_dir: Path) -> RunParametersNovaSeqX:
def run_parameters_novaseq_x_different_index(
run_parameters_dir: Path,
) -> RunParametersNovaSeqX:
"""Return a NovaSeqX RunParameters object with different index cycles."""
path = Path(run_parameters_dir, "RunParameters_novaseq_X_different_index_cycles.xml")
return RunParametersNovaSeqX(run_parameters_path=path)
Expand Down
28 changes: 27 additions & 1 deletion tests/models/demultiplexing/test_run_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def test_run_parameters_parent_class_fails(
"run_parameters_path, constructor, sequencer",
[
("hiseq_x_single_index_run_parameters_path", RunParametersHiSeq, Sequencers.HISEQX),
("hiseq_2500_dual_index_run_parameters_path", RunParametersHiSeq, Sequencers.HISEQGA),
(
"hiseq_2500_dual_index_run_parameters_path",
RunParametersHiSeq,
Sequencers.HISEQGA,
),
(
"novaseq_6000_run_parameters_pre_1_5_kits_path",
RunParametersNovaSeq6000,
Expand Down Expand Up @@ -301,3 +305,25 @@ def test_get_index_settings(
settings: IndexSettings = flow_cell.run_parameters.index_settings
# THEN the correct index settings are returned
assert settings == correct_settings


@pytest.mark.parametrize(
"run_parameters_fixture, expected_result",
[
("hiseq_x_single_index_run_parameters", None),
("hiseq_2500_dual_index_run_parameters", None),
("novaseq_6000_run_parameters_pre_1_5_kits", "S4"),
("novaseq_6000_run_parameters_post_1_5_kits", "S1"),
("novaseq_x_run_parameters", "10B"),
],
)
def test_get_flow_cell_mode(
run_parameters_fixture: str, expected_result: str | None, request: FixtureRequest
):
"""Test that the correct flow cell mode is returned for each RunParameters object."""
# GIVEN a RunParameters object
run_parameters: RunParameters = request.getfixturevalue(run_parameters_fixture)
# WHEN getting the flow cell mode
result: str | None = run_parameters.get_flow_cell_model()
# THEN the correct flow cell mode is returned
assert result == expected_result
26 changes: 0 additions & 26 deletions tests/services/illumina_post_processing_service/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,3 @@
from pathlib import Path

import pytest


@pytest.fixture
def run_paramters_with_flow_cell_mode_node_name() -> Path:
return Path(
"tests",
"fixtures",
"apps",
"demultiplexing",
"flow_cells",
"230912_A00187_1009_AHK33MDRX3",
"RunParameters.xml",
)


@pytest.fixture
def run_parameters_without_model() -> Path:
return Path(
"tests",
"fixtures",
"apps",
"demultiplexing",
"flow_cells",
"170517_ST-E00266_0210_BHJCFFALXX",
"runParameters.xml",
)

This file was deleted.

0 comments on commit e3739c0

Please sign in to comment.