From e3739c0c87e3ac07f14b2fdf930e70b835961d47 Mon Sep 17 00:00:00 2001
From: ChristianOertlin <christian.oertlin@scilifelab.se>
Date: Thu, 23 May 2024 11:19:47 +0200
Subject: [PATCH] refactor(demux post processing run parameters) (#3263)
 (patch)

# Descripion

Refactoring of the run parameters and extraction of flow cell model from run paramters.
---
 cg/constants/metrics.py                       |  2 +
 cg/models/demultiplex/run_parameters.py       | 39 +++++++++++++----
 .../bcl_convert_metrics_service.py            | 23 ++++++++++
 .../illumina_post_processing_service.py       |  2 +-
 .../illumina_post_processing_service/utils.py | 13 ------
 cg/store/crud/create.py                       |  7 +++
 .../run_parameters_fixtures.py                |  4 +-
 .../demultiplexing/test_run_parameters.py     | 28 +++++++++++-
 .../conftest.py                               | 26 -----------
 .../test_illumina_post_processing_utils.py    | 43 -------------------
 10 files changed, 93 insertions(+), 94 deletions(-)
 delete mode 100644 tests/services/illumina_post_processing_service/test_illumina_post_processing_utils.py

diff --git a/cg/constants/metrics.py b/cg/constants/metrics.py
index 7a4b320a20..a85e443b26 100644
--- a/cg/constants/metrics.py
+++ b/cg/constants/metrics.py
@@ -10,6 +10,8 @@ class QualityMetricsColumnNames(StrEnum):
     SAMPLE_INTERNAL_ID: str = "SampleID"
     MEAN_QUALITY_SCORE_Q30: str = "Mean Quality Score (PF)"
     Q30_BASES_PERCENT: str = "% Q30"
+    YIELD: str = "Yield"
+    YIELD_Q30: str = "YieldQ30"
 
 
 class DemuxMetricsColumnNames(StrEnum):
diff --git a/cg/models/demultiplex/run_parameters.py b/cg/models/demultiplex/run_parameters.py
index 4bb9143e67..87c84f1bf0 100644
--- a/cg/models/demultiplex/run_parameters.py
+++ b/cg/models/demultiplex/run_parameters.py
@@ -3,7 +3,7 @@
 import logging
 from abc import abstractmethod
 from pathlib import Path
-from xml.etree import ElementTree
+from xml.etree.ElementTree import Element, ElementTree
 
 from packaging.version import parse
 
@@ -44,9 +44,7 @@ def _validate_instrument(self, node_name: str, node_value: str):
         Raises:
             RunParametersError if the node does not have the expected value."""
         try:
-            application: ElementTree.Element | None = get_tree_node(
-                tree=self.tree, node_name=node_name
-            )
+            application: Element | None = get_tree_node(tree=self.tree, node_name=node_name)
         except XMLError:
             raise RunParametersError(
                 f"Could not find node {node_name} in the run parameters file. "
@@ -129,6 +127,10 @@ def _get_index_settings(self) -> IndexSettings:
             return NOVASEQ_6000_POST_1_5_KITS_INDEX_SETTINGS
         return NO_REVERSE_COMPLEMENTS_INDEX_SETTINGS
 
+    @abstractmethod
+    def get_flow_cell_model(self):
+        pass
+
     def __str__(self):
         return f"RunParameters(path={self.path}, sequencer={self.sequencer})"
 
@@ -189,6 +191,10 @@ def get_read_2_cycles(self) -> int:
         node_name: str = RunParametersXMLNodes.READ_2_HISEQ
         return self._get_node_integer_value(node_name=node_name)
 
+    def get_flow_cell_model(self) -> None:
+        """Return None for run parameters associated with HiSeq sequencing."""
+        return None
+
 
 class RunParametersNovaSeq6000(RunParameters):
     """Specific class for parsing run parameters of NovaSeq6000 sequencing."""
@@ -210,7 +216,7 @@ def control_software_version(self) -> str:
     def reagent_kit_version(self) -> str:
         """Return the reagent kit version if existent, return 'unknown' otherwise."""
         node_name: str = RunParametersXMLNodes.REAGENT_KIT_VERSION
-        xml_node: ElementTree.Element | None = self.tree.find(node_name)
+        xml_node: Element | None = self.tree.find(node_name)
         if xml_node is None:
             LOG.warning("Could not determine reagent kit version")
             LOG.info("Set reagent kit version to 'unknown'")
@@ -242,6 +248,11 @@ def get_read_2_cycles(self) -> int:
         node_name: str = RunParametersXMLNodes.READ_2_NOVASEQ_6000
         return self._get_node_integer_value(node_name=node_name)
 
+    def get_flow_cell_model(self) -> str:
+        """Return the flow cell model referred to as 'FlowCellMode' in the run parameters file."""
+        node_name: str = RunParametersXMLNodes.FLOW_CELL_MODE
+        return self._get_node_string_value(node_name=node_name)
+
 
 class RunParametersNovaSeqX(RunParameters):
     """Specific class for parsing run parameters of NovaSeqX sequencing."""
@@ -272,12 +283,10 @@ def sequencer(self) -> str:
     def _read_parser(self) -> dict[str, int]:
         """Return read and index cycle values parsed as a dictionary."""
         cycle_mapping: dict[str, int] = {}
-        planned_reads_tree: ElementTree.Element = get_tree_node(
+        planned_reads_tree: Element = get_tree_node(
             tree=self.tree, node_name=RunParametersXMLNodes.PLANNED_READS_NOVASEQ_X
         )
-        planned_reads: list[ElementTree.Element] = planned_reads_tree.findall(
-            RunParametersXMLNodes.INNER_READ
-        )
+        planned_reads: list[Element] = planned_reads_tree.findall(RunParametersXMLNodes.INNER_READ)
         for read_elem in planned_reads:
             read_name: str = read_elem.get(RunParametersXMLNodes.READ_NAME)
             cycles: int = int(read_elem.get(RunParametersXMLNodes.CYCLES))
@@ -299,3 +308,15 @@ def get_read_1_cycles(self) -> int:
     def get_read_2_cycles(self) -> int:
         """Return the number of cycles in the second read."""
         return self._read_parser.get(RunParametersXMLNodes.READ_2_NOVASEQ_X)
+
+    def get_flow_cell_model(self) -> str:
+        """Return the flow cell model referred to as 'Mode' or 'Name' in the run parameters file."""
+        consumable_infos: list[Element] = self.tree.findall(".//ConsumableInfo")
+        for consumable_info in consumable_infos:
+            type_element: Element | None = consumable_info.find("Type")
+            if type_element is not None and type_element.text == "FlowCell":
+                name_element: Element | None = consumable_info.find("Name") or consumable_info.find(
+                    "Mode"
+                )
+                if name_element is not None:
+                    return name_element.text
diff --git a/cg/services/bcl_convert_metrics_service/bcl_convert_metrics_service.py b/cg/services/bcl_convert_metrics_service/bcl_convert_metrics_service.py
index ab3f186df3..11724d01fb 100644
--- a/cg/services/bcl_convert_metrics_service/bcl_convert_metrics_service.py
+++ b/cg/services/bcl_convert_metrics_service/bcl_convert_metrics_service.py
@@ -111,3 +111,26 @@ def create_sample_run_metrics(
             base_mean_quality_score=mean_quality_score,
             created_at=datetime.now(),
         )
+
+    def create_sample_sequencing_metrics_for_flow_cell(
+        self,
+        flow_cell_directory: Path,
+        run_metrics_id: int,
+        store: Store,
+    ) -> list[IlluminaSampleSequencingMetrics]:
+        """Parse the demultiplexing metrics data into the sequencing statistics model."""
+        metrics_parser = MetricsParser(flow_cell_directory)
+        sample_internal_ids: list[str] = metrics_parser.get_sample_internal_ids()
+        sample_lane_sequencing_metrics: list[IlluminaSampleSequencingMetrics] = []
+
+        for sample_internal_id in sample_internal_ids:
+            for lane in metrics_parser.get_lanes_for_sample(sample_internal_id=sample_internal_id):
+                metrics: IlluminaSampleSequencingMetrics = self.create_sample_run_metrics(
+                    sample_internal_id=sample_internal_id,
+                    lane=lane,
+                    metrics_parser=metrics_parser,
+                    run_metrics_id=run_metrics_id,
+                    store=store,
+                )
+                sample_lane_sequencing_metrics.append(metrics)
+        return sample_lane_sequencing_metrics
diff --git a/cg/services/illumina_post_processing_service/illumina_post_processing_service.py b/cg/services/illumina_post_processing_service/illumina_post_processing_service.py
index 3b02987437..8f49724ac3 100644
--- a/cg/services/illumina_post_processing_service/illumina_post_processing_service.py
+++ b/cg/services/illumina_post_processing_service/illumina_post_processing_service.py
@@ -36,7 +36,7 @@ def store_illumina_flow_cell(
         Create flow cell from the parsed and validated flow cell data.
         And add the samples on the flow cell to the model.
         """
-        model: str | None = get_flow_cell_model_from_run_parameters(flow_cell.run_parameters_path)
+        model: str | None = flow_cell.run_parameters.get_flow_cell_model()
         new_flow_cell = IlluminaFlowCell(
             internal_id=flow_cell.id, type=DeviceType.ILLUMINA, model=model
         )
diff --git a/cg/services/illumina_post_processing_service/utils.py b/cg/services/illumina_post_processing_service/utils.py
index 2b2874dd7a..f070fdbca0 100644
--- a/cg/services/illumina_post_processing_service/utils.py
+++ b/cg/services/illumina_post_processing_service/utils.py
@@ -10,16 +10,3 @@
 
 def create_delivery_file_in_flow_cell_directory(flow_cell_directory: Path) -> None:
     Path(flow_cell_directory, DemultiplexingDirsAndFiles.DELIVERY).touch()
-
-
-def get_flow_cell_model_from_run_parameters(run_parameters_path: Path) -> str | None:
-    """Return the model of the flow cell."""
-    xml_tree: ElementTree = read_xml(run_parameters_path)
-    node: Element | None = None
-    for node_name in [RunParametersXMLNodes.MODE, RunParametersXMLNodes.FLOW_CELL_MODE]:
-        try:
-            node: Element = get_tree_node(xml_tree, node_name)
-            return node.text
-        except XMLError:
-            continue
-    return node
diff --git a/cg/store/crud/create.py b/cg/store/crud/create.py
index addc51a051..b8920f5fd9 100644
--- a/cg/store/crud/create.py
+++ b/cg/store/crud/create.py
@@ -31,6 +31,7 @@
     User,
     order_case,
     IlluminaFlowCell,
+    IlluminaSequencingRun,
 )
 
 LOG = logging.getLogger(__name__)
@@ -425,3 +426,9 @@ def add_illumina_flow_cell(self, flow_cell: IlluminaFlowCell) -> IlluminaFlowCel
         session.add(flow_cell)
         LOG.debug(f"Flow cell added to status db: {flow_cell.id}.")
         return flow_cell
+
+    def add_illumina_sequencing_metrics(
+        self, sequencing_metrics: IlluminaSequencingRun
+    ) -> IlluminaSequencingRun:
+        """Add a new Illumina flow cell to the status database as a pending transaction."""
+        pass
diff --git a/tests/fixture_plugins/demultiplex_fixtures/run_parameters_fixtures.py b/tests/fixture_plugins/demultiplex_fixtures/run_parameters_fixtures.py
index 32b92c8b00..f5694f15a4 100644
--- a/tests/fixture_plugins/demultiplex_fixtures/run_parameters_fixtures.py
+++ b/tests/fixture_plugins/demultiplex_fixtures/run_parameters_fixtures.py
@@ -26,7 +26,9 @@ def run_parameters_novaseq_6000_different_index(
 
 
 @pytest.fixture(scope="function")
-def run_parameters_novaseq_x_different_index(run_parameters_dir: Path) -> RunParametersNovaSeqX:
+def run_parameters_novaseq_x_different_index(
+    run_parameters_dir: Path,
+) -> RunParametersNovaSeqX:
     """Return a NovaSeqX RunParameters object with different index cycles."""
     path = Path(run_parameters_dir, "RunParameters_novaseq_X_different_index_cycles.xml")
     return RunParametersNovaSeqX(run_parameters_path=path)
diff --git a/tests/models/demultiplexing/test_run_parameters.py b/tests/models/demultiplexing/test_run_parameters.py
index dad052215b..d99bbf072d 100644
--- a/tests/models/demultiplexing/test_run_parameters.py
+++ b/tests/models/demultiplexing/test_run_parameters.py
@@ -51,7 +51,11 @@ def test_run_parameters_parent_class_fails(
     "run_parameters_path, constructor, sequencer",
     [
         ("hiseq_x_single_index_run_parameters_path", RunParametersHiSeq, Sequencers.HISEQX),
-        ("hiseq_2500_dual_index_run_parameters_path", RunParametersHiSeq, Sequencers.HISEQGA),
+        (
+            "hiseq_2500_dual_index_run_parameters_path",
+            RunParametersHiSeq,
+            Sequencers.HISEQGA,
+        ),
         (
             "novaseq_6000_run_parameters_pre_1_5_kits_path",
             RunParametersNovaSeq6000,
@@ -301,3 +305,25 @@ def test_get_index_settings(
     settings: IndexSettings = flow_cell.run_parameters.index_settings
     # THEN the correct index settings are returned
     assert settings == correct_settings
+
+
+@pytest.mark.parametrize(
+    "run_parameters_fixture, expected_result",
+    [
+        ("hiseq_x_single_index_run_parameters", None),
+        ("hiseq_2500_dual_index_run_parameters", None),
+        ("novaseq_6000_run_parameters_pre_1_5_kits", "S4"),
+        ("novaseq_6000_run_parameters_post_1_5_kits", "S1"),
+        ("novaseq_x_run_parameters", "10B"),
+    ],
+)
+def test_get_flow_cell_mode(
+    run_parameters_fixture: str, expected_result: str | None, request: FixtureRequest
+):
+    """Test that the correct flow cell mode is returned for each RunParameters object."""
+    # GIVEN a RunParameters object
+    run_parameters: RunParameters = request.getfixturevalue(run_parameters_fixture)
+    # WHEN getting the flow cell mode
+    result: str | None = run_parameters.get_flow_cell_model()
+    # THEN the correct flow cell mode is returned
+    assert result == expected_result
diff --git a/tests/services/illumina_post_processing_service/conftest.py b/tests/services/illumina_post_processing_service/conftest.py
index f530692a81..1c27c857de 100644
--- a/tests/services/illumina_post_processing_service/conftest.py
+++ b/tests/services/illumina_post_processing_service/conftest.py
@@ -3,29 +3,3 @@
 from pathlib import Path
 
 import pytest
-
-
-@pytest.fixture
-def run_paramters_with_flow_cell_mode_node_name() -> Path:
-    return Path(
-        "tests",
-        "fixtures",
-        "apps",
-        "demultiplexing",
-        "flow_cells",
-        "230912_A00187_1009_AHK33MDRX3",
-        "RunParameters.xml",
-    )
-
-
-@pytest.fixture
-def run_parameters_without_model() -> Path:
-    return Path(
-        "tests",
-        "fixtures",
-        "apps",
-        "demultiplexing",
-        "flow_cells",
-        "170517_ST-E00266_0210_BHJCFFALXX",
-        "runParameters.xml",
-    )
diff --git a/tests/services/illumina_post_processing_service/test_illumina_post_processing_utils.py b/tests/services/illumina_post_processing_service/test_illumina_post_processing_utils.py
deleted file mode 100644
index b4e6ed7cc7..0000000000
--- a/tests/services/illumina_post_processing_service/test_illumina_post_processing_utils.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""Test utils of the illumina post processing service."""
-
-from pathlib import Path
-
-from cg.services.illumina_post_processing_service.utils import (
-    get_flow_cell_model_from_run_parameters,
-)
-
-
-def test_get_flow_cell_model_from_run_parameters_with_mode_node_name(
-    novaseq_x_run_parameters_path: Path,
-):
-    # GIVEN a run parameters file from a NovaSeq X run
-
-    # WHEN getting the flow cell model from the run parameters file
-    flow_cell_model = get_flow_cell_model_from_run_parameters(novaseq_x_run_parameters_path)
-
-    # THEN the flow cell model should be returned
-    assert flow_cell_model == "10B"
-
-
-def test_get_flow_cel_model_from_run_parameters_with_flow_cell_mode_node_name(
-    run_paramters_with_flow_cell_mode_node_name: Path,
-):
-    # GIVEN a run parameters file with a FlowCellMode node
-
-    # WHEN getting the flow cell model from the run parameters file
-    flow_cell_model = get_flow_cell_model_from_run_parameters(
-        run_paramters_with_flow_cell_mode_node_name
-    )
-
-    # THEN the flow cell model should be returned
-    assert flow_cell_model == "S1"
-
-
-def test_get_flow_cell_model_from_run_paramters_without_model(run_parameters_without_model: Path):
-    # GIVEN a run parameters file without a model
-
-    # WHEN getting the flow cell model from the run parameters file
-    flow_cell_model = get_flow_cell_model_from_run_parameters(run_parameters_without_model)
-
-    # THEN the flow cell model should be None
-    assert flow_cell_model is None