diff --git a/cg/cli/post_process/post_process.py b/cg/cli/post_process/post_process.py index 8c4560fd96..93d43b540a 100644 --- a/cg/cli/post_process/post_process.py +++ b/cg/cli/post_process/post_process.py @@ -35,5 +35,4 @@ def post_process_sequencing_run(context: CGConfig, run_name: str, dry_run: bool) post_processing_service.post_process(run_name=run_name, dry_run=dry_run) -post_process_group: click.Group post_process_group.add_command(post_process_sequencing_run) diff --git a/cg/services/run_devices/exc.py b/cg/services/run_devices/exc.py index daa2a4e3e7..0b89724dff 100644 --- a/cg/services/run_devices/exc.py +++ b/cg/services/run_devices/exc.py @@ -2,28 +2,42 @@ class PostProcessingRunFileManagerError(CgError): + """Error raised if something goes wrong managing the sequencing run files.""" + pass class PostProcessingRunDataGeneratorError(CgError): + """Error raised if something goes wrong parsing the run directory data.""" + pass class PostProcessingParsingError(CgError): + """Error raised if something goes wrong parsing the sequencing run metrics.""" + pass class PostProcessingDataTransferError(CgError): + """Error raised if something goes wrong creating the DTOs for post-processing.""" + pass class PostProcessingStoreDataError(CgError): + """Error raised if something goes wrong storing the post-processing data in StatusDB.""" + pass class PostProcessingStoreFileError(CgError): + """Error raised if something goes wrong storing the post-processing files in Housekeeper.""" + pass class PostProcessingError(CgError): + """Error raised if something goes wrong during post-processing.""" + pass diff --git a/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py b/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py index 7bbafd44b3..adca59b4e0 100644 --- a/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py +++ b/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py @@ -1,16 +1,10 @@ from pathlib import Path from cg.services.run_devices.abstract_classes import RunDataGenerator -from cg.services.run_devices.error_handler import ( - handle_post_processing_errors, -) +from cg.services.run_devices.error_handler import handle_post_processing_errors from cg.services.run_devices.exc import PostProcessingRunDataGeneratorError from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData -from cg.services.run_devices.validators import ( - validate_has_expected_parts, - validate_name_pre_fix, -) -from cg.utils.string import get_element_from_split +from cg.services.run_devices.validators import validate_has_expected_parts, validate_name_pre_fix class PacBioRunDataGenerator(RunDataGenerator): @@ -40,16 +34,16 @@ def get_run_data(self, run_name: str, sequencing_dir: str) -> PacBioRunData: @staticmethod def _get_sequencing_run_name(run_name: str) -> str: - return get_element_from_split(value=run_name, element_position=0, split="/") + return run_name.split("/")[0] @staticmethod def _get_plate_well(run_name: str) -> str: - return get_element_from_split(value=run_name, element_position=-1, split="/") + return run_name.split("/")[1] def _get_plate(self, run_name: str) -> str: plate_well: str = self._get_plate_well(run_name) - return get_element_from_split(value=plate_well, element_position=0, split="_") + return plate_well.split("_")[0] def _get_well(self, run_name: str) -> str: plate_well: str = self._get_plate_well(run_name) - return get_element_from_split(value=plate_well, element_position=-1, split="_") + return plate_well.split("_")[-1] diff --git a/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py b/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py index 5624dc7467..6e2d230d47 100644 --- a/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py +++ b/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py @@ -19,9 +19,7 @@ def get_files_to_parse(self, run_data: PacBioRunData) -> list[Path]: """Get the file paths required by the PacBioMetricsParser.""" run_path: Path = run_data.full_path validate_files_or_directories_exist([run_path]) - files_to_parse: list[Path] = self._get_report_files(run_path) - files_to_parse.append(self._get_ccs_report_file(run_path)) - return files_to_parse + return self._get_report_files(run_path) @handle_post_processing_errors( to_except=(FileNotFoundError,), to_raise=PostProcessingRunFileManagerError @@ -29,9 +27,7 @@ def get_files_to_parse(self, run_data: PacBioRunData) -> list[Path]: def get_files_to_store(self, run_data: PacBioRunData) -> list[Path]: """Get the files to store for the PostProcessingHKService.""" run_path: Path = run_data.full_path - files_to_store: list[Path] = self.get_files_to_parse(run_data) - files_to_store.append(self._get_hifi_read_file(run_path)) - return files_to_store + return self.get_files_to_parse(run_data) + self._get_hifi_read_files(run_path) @staticmethod def _get_ccs_report_file(run_path: Path) -> Path: @@ -44,8 +40,7 @@ def _get_ccs_report_file(run_path: Path) -> Path: raise FileNotFoundError(f"No CCS report file found in {statistics_dir}") return files[0] - @staticmethod - def _get_report_files(run_path: Path) -> list[Path]: + def _get_report_files(self, run_path: Path) -> list[Path]: """Return the paths to the unzipped report files.""" unzipped_dir: Path = Path( run_path, PacBioDirsAndFiles.STATISTICS_DIR, PacBioDirsAndFiles.UNZIPPED_REPORTS_DIR @@ -55,16 +50,17 @@ def _get_report_files(run_path: Path) -> list[Path]: Path(unzipped_dir, PacBioDirsAndFiles.LOADING_REPORT), Path(unzipped_dir, PacBioDirsAndFiles.RAW_DATA_REPORT), Path(unzipped_dir, PacBioDirsAndFiles.SMRTLINK_DATASETS_REPORT), + self._get_ccs_report_file(run_path), ] validate_files_or_directories_exist(report_files) return report_files @staticmethod - def _get_hifi_read_file(run_path: Path) -> Path: + def _get_hifi_read_files(run_path: Path) -> list[Path]: """Return the path to the HiFi read file.""" hifi_dir = Path(run_path, PacBioDirsAndFiles.HIFI_READS) - bam_file: Path = get_files_matching_pattern( + bam_files: list[Path] = get_files_matching_pattern( directory=hifi_dir, pattern=f"*{FileExtensions.BAM}" - )[0] - validate_files_or_directories_exist([bam_file]) - return bam_file + ) + validate_files_or_directories_exist(bam_files) + return bam_files diff --git a/cg/utils/string.py b/cg/utils/string.py deleted file mode 100644 index c8c556234e..0000000000 --- a/cg/utils/string.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Utils related to string manipulation.""" - -from cg.exc import CgError - - -def get_element_from_split(value: str, element_position: int, split: str) -> str: - elements: list[str] = value.split(split) - if len(elements) < element_position: - raise CgError(message="Provided element position out of bounds.") - return elements[element_position] diff --git a/tests/fixtures/devices/pacbio/SMRTcells/r84202_20240522_133539/1_B01/hifi_reads/m84202_240522_155607_s2.hifi_reads.bam.pbi b/tests/fixtures/devices/pacbio/SMRTcells/r84202_20240522_133539/1_B01/hifi_reads/m84202_240522_155607_s2.hifi_reads.bam.pbi deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/services/run_devices/pacbio/post_processing/test_post_processing.py b/tests/services/run_devices/pacbio/post_processing/test_post_processing.py index 519922247d..1dcdfd6d4b 100644 --- a/tests/services/run_devices/pacbio/post_processing/test_post_processing.py +++ b/tests/services/run_devices/pacbio/post_processing/test_post_processing.py @@ -37,7 +37,7 @@ def test_pac_bio_post_processing_run_name_error(pac_bio_context): def test_pac_bio_post_processing_store_data_error( pac_bio_context: CGConfig, pac_bio_sequencing_run_name: str ): - # GIVEN a PacBioPostProcessingService and a wrong run name + # GIVEN a PacBioPostProcessingService that raises an error when storing data in StatusDB post_processing_service: PacBioPostProcessingService = ( pac_bio_context.post_processing_services.pacbio @@ -55,7 +55,7 @@ def test_pac_bio_post_processing_store_data_error( def test_pac_bio_post_processing_store_files_error( pac_bio_context: CGConfig, pac_bio_sequencing_run_name: str ): - # GIVEN a PacBioPostProcessingService + # GIVEN a PacBioPostProcessingService that raises an error when storing files in Housekeeper post_processing_service: PacBioPostProcessingService = ( pac_bio_context.post_processing_services.pacbio ) diff --git a/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py b/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py index 42df23971a..7dc753eff1 100644 --- a/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py +++ b/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py @@ -12,6 +12,7 @@ def test_get_run_data( + pac_bio_run_data_generator: PacBioRunDataGenerator, pac_bio_runs_dir: Path, pac_bio_test_run_name: str, pac_bio_smrt_cell_name: str, @@ -21,8 +22,7 @@ def test_get_run_data( run_name: str = "/".join([pac_bio_test_run_name, pac_bio_smrt_cell_name]) # WHEN Generating run data - run_data_generator = PacBioRunDataGenerator() - run_data: PacBioRunData = run_data_generator.get_run_data( + run_data: PacBioRunData = pac_bio_run_data_generator.get_run_data( run_name=run_name, sequencing_dir=pac_bio_runs_dir.as_posix() ) @@ -30,18 +30,18 @@ def test_get_run_data( assert run_data == expected_pac_bio_run_data -@pytest.mark.parametrize("run_name", ["rimproper_name", "d_improper_name "]) +@pytest.mark.parametrize("wrong_run_name", ["rimproper_name", "d_improper_name "]) def test_get_run_data_improper_name( + pac_bio_run_data_generator: PacBioRunDataGenerator, pac_bio_runs_dir: Path, - run_name: str, + wrong_run_name: str, ): - # GIVEN a PacBioRunDataGenerator and an improper run name - run_data_generator = PacBioRunDataGenerator() + # GIVEN a PacBioRunDataGenerator and a wrong run name - # WHEN Generating run data + # WHEN Generating run data with the wrong run name # THEN an PostProcessingRunDataGeneratorError is raised with pytest.raises(PostProcessingRunDataGeneratorError): - run_data_generator.get_run_data( - run_name=run_name, sequencing_dir=pac_bio_runs_dir.as_posix() + pac_bio_run_data_generator.get_run_data( + run_name=wrong_run_name, sequencing_dir=pac_bio_runs_dir.as_posix() ) diff --git a/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py b/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py index 6d45f8d6fa..f6e87897d9 100644 --- a/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py +++ b/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py @@ -1,25 +1,22 @@ +from pathlib import Path from unittest import mock import pytest -from pathlib import Path from cg.services.run_devices.exc import PostProcessingRunFileManagerError from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData -from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import ( - PacBioRunFileManager, -) +from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import PacBioRunFileManager def test_get_files_to_parse( - expected_pac_bio_run_data: PacBioRunData, pac_bio_report_files_to_parse: list[Path] + expected_pac_bio_run_data: PacBioRunData, + pac_bio_report_files_to_parse: list[Path], + pac_bio_run_file_manager: PacBioRunFileManager, ): - # GIVEN a run data object - - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a run data object and a PacBio run file manager # WHEN getting the files to parse - files: list[Path] = file_manager.get_files_to_parse(expected_pac_bio_run_data) + files: list[Path] = pac_bio_run_file_manager.get_files_to_parse(expected_pac_bio_run_data) # THEN the correct files are returned assert files == pac_bio_report_files_to_parse @@ -27,16 +24,14 @@ def test_get_files_to_parse( def test_get_files_to_store( expected_pac_bio_run_data: PacBioRunData, + pac_bio_run_file_manager: PacBioRunFileManager, pac_bio_report_files_to_parse: list[Path], pac_bio_hifi_read_file: Path, ): - # GIVEN a run data object - - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a run data object and a PacBio file manager # WHEN getting the files to store - files: list[Path] = file_manager.get_files_to_store(expected_pac_bio_run_data) + files: list[Path] = pac_bio_run_file_manager.get_files_to_store(expected_pac_bio_run_data) # THEN the correct files are returned full_list: list[Path] = pac_bio_report_files_to_parse + [pac_bio_hifi_read_file] @@ -45,32 +40,32 @@ def test_get_files_to_store( def test_get_files_to_store_error( expected_pac_bio_run_data: PacBioRunData, + pac_bio_run_file_manager: PacBioRunFileManager, ): # GIVEN a run data object - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a PacBio run file manager that can't find the HiFi read file with mock.patch.object( - file_manager, - attribute="_get_hifi_read_file", + pac_bio_run_file_manager, + attribute="_get_hifi_read_files", side_effect=FileNotFoundError, ): # WHEN getting the files to store # THEN an PostProcessingRunFileManagerError is raised with pytest.raises(PostProcessingRunFileManagerError): - file_manager.get_files_to_store(expected_pac_bio_run_data) + pac_bio_run_file_manager.get_files_to_store(expected_pac_bio_run_data) def test_get_files_to_parse_error( expected_pac_bio_run_data: PacBioRunData, + pac_bio_run_file_manager: PacBioRunFileManager, ): # GIVEN a run data object - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a PacBio run file manager that can't find the CCS report file with mock.patch.object( - file_manager, + pac_bio_run_file_manager, attribute="_get_ccs_report_file", side_effect=FileNotFoundError, ): @@ -78,4 +73,4 @@ def test_get_files_to_parse_error( # THEN an PostProcessingRunFileManagerError is raised with pytest.raises(PostProcessingRunFileManagerError): - file_manager.get_files_to_parse(expected_pac_bio_run_data) + pac_bio_run_file_manager.get_files_to_parse(expected_pac_bio_run_data) diff --git a/tests/services/run_devices/pacbio/store_service/test_store_service.py b/tests/services/run_devices/pacbio/store_service/test_store_service.py index 8a526176a0..5fe8df1028 100644 --- a/tests/services/run_devices/pacbio/store_service/test_store_service.py +++ b/tests/services/run_devices/pacbio/store_service/test_store_service.py @@ -5,8 +5,8 @@ import pytest from cg.services.run_devices.exc import ( - PostProcessingStoreDataError, PostProcessingDataTransferError, + PostProcessingStoreDataError, ) from cg.services.run_devices.pacbio.data_storage_service.pacbio_store_service import ( PacBioStoreService, @@ -17,7 +17,6 @@ from cg.services.run_devices.pacbio.data_transfer_service.dto import PacBioDTOs from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData from cg.store.models import PacBioSampleSequencingMetrics, PacBioSequencingRun, PacBioSMRTCell - from cg.store.store import Store @@ -28,7 +27,7 @@ def test_store_post_processing_data( ): # GIVEN a PacBioStoreService - # GIVEN a successful data transfer service + # GIVEN a data transfer service that returns the correct DTOs # WHEN storing data for a PacBio instrument run with mock.patch( @@ -67,7 +66,7 @@ def test_store_post_processing_data_error_database( ): # GIVEN a PacBioStoreService - # GIVEN a successful data transfer service + # GIVEN a data transfer service that returns the correct DTOs # WHEN storing data for a PacBio instrument run with mock.patch( @@ -85,7 +84,7 @@ def test_store_post_processing_data_error_parser( ): # GIVEN a PacBioStoreService - # GIVEN a successful data transfer service + # GIVEN a data transfer service that returns the correct DTOs # WHEN storing data for a PacBio instrument run with mock.patch( diff --git a/tests/utils/test_string_utils.py b/tests/utils/test_string_utils.py deleted file mode 100644 index f70cdc4ec1..0000000000 --- a/tests/utils/test_string_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Test for the string utilities.""" - -import pytest - -from cg.exc import CgError -from cg.utils.string import get_element_from_split - - -def test_get_element_from_split(): - - # GIVEN a string with a seperator - separated_string: str = "zero_one_two_three" - - # WHEN getting an element divided by a separator based on the position - element: str = get_element_from_split(value=separated_string, element_position=2, split="_") - - # THEN the expected element is returned - assert element == "two" - - -def test_get_element_from_split_error(): - - # GIVEN a string with a seperator - separated_string: str = "zero_one_two_three" - - # WHEN getting an element divided by a separator based on the position that is out of bounds - with pytest.raises(CgError): - get_element_from_split(value=separated_string, element_position=12, split="_") - - # THEN an error is raised