From 8342ad406f87d70ed23698b464dbf9dbaa0448a9 Mon Sep 17 00:00:00 2001 From: Henrik Stranneheim Date: Mon, 23 Oct 2023 08:41:04 +0200 Subject: [PATCH 1/5] Add backup flow cells (#2586) (minor) ### Added - New CLI command: cg backup flow-cell - Tests ### Changed - Add general PcdError, DsmcError - Initialize Pdc_api from context --- cg/cli/backup.py | 63 ++++++- cg/exc.py | 14 +- cg/meta/backup/backup.py | 20 +-- cg/meta/backup/pdc.py | 106 ++++++++++- cg/models/cg_config.py | 12 ++ cg/store/api/core.py | 10 +- cg/store/api/update.py | 18 ++ requirements.txt | 1 + tests/cli/backup/conftest.py | 3 +- tests/cli/backup/test_backup_command.py | 111 +++++++++++- tests/conftest.py | 23 +++ tests/meta/backup/test_meta_backup.py | 40 +---- tests/meta/backup/test_meta_pdc.py | 227 +++++++++++++++++++++++- tests/meta/encryption/conftest.py | 25 --- tests/store/api/test_update.py | 20 +++ 15 files changed, 590 insertions(+), 103 deletions(-) create mode 100644 cg/store/api/update.py create mode 100644 tests/store/api/test_update.py diff --git a/cg/cli/backup.py b/cg/cli/backup.py index 814a3c165f..0ec20c945d 100644 --- a/cg/cli/backup.py +++ b/cg/cli/backup.py @@ -10,7 +10,13 @@ from cg.apps.slurm.slurm_api import SlurmAPI from cg.constants.constants import DRY_RUN, FlowCellStatus from cg.constants.housekeeper_tags import SequencingFileTag -from cg.exc import FlowCellEncryptionError, FlowCellError +from cg.exc import ( + DsmcAlreadyRunningError, + FlowCellAlreadyBackedUpError, + FlowCellEncryptionError, + FlowCellError, + PdcError, +) from cg.meta.backup.backup import BackupAPI, SpringBackupAPI from cg.meta.backup.pdc import PdcAPI from cg.meta.encryption.encryption import ( @@ -20,7 +26,10 @@ ) from cg.meta.tar.tar import TarAPI from cg.models.cg_config import CGConfig -from cg.models.flow_cell.flow_cell import get_flow_cells_from_path +from cg.models.flow_cell.flow_cell import ( + FlowCellDirectoryData, + get_flow_cells_from_path, +) from cg.store import Store from cg.store.models import Flowcell, Sample @@ -34,13 +43,56 @@ def backup(context: CGConfig): pass +@backup.command("flow-cells") +@DRY_RUN +@click.pass_obj +def backup_flow_cells(context: CGConfig, dry_run: bool): + """Back-up flow cells.""" + pdc_api = context.pdc_api + pdc_api.dry_run = dry_run + status_db: Store = context.status_db + flow_cells: list[FlowCellDirectoryData] = get_flow_cells_from_path( + flow_cells_dir=Path(context.flow_cells_dir) + ) + for flow_cell in flow_cells: + db_flow_cell: Optional[Flowcell] = status_db.get_flow_cell_by_name( + flow_cell_name=flow_cell.id + ) + flow_cell_encryption_api = FlowCellEncryptionAPI( + binary_path=context.encryption.binary_path, + dry_run=dry_run, + encryption_dir=Path(context.backup.encryption_directories.current), + flow_cell=flow_cell, + pigz_binary_path=context.pigz.binary_path, + slurm_api=SlurmAPI(), + sbatch_parameter=context.backup.slurm_flow_cell_encryption.dict(), + tar_api=TarAPI(binary_path=context.tar.binary_path, dry_run=dry_run), + ) + try: + pdc_api.start_flow_cell_backup( + db_flow_cell=db_flow_cell, + flow_cell_encryption_api=flow_cell_encryption_api, + status_db=status_db, + ) + except ( + DsmcAlreadyRunningError, + FlowCellAlreadyBackedUpError, + FlowCellEncryptionError, + PdcError, + ) as error: + logging.error(f"{error}") + + @backup.command("encrypt-flow-cells") @DRY_RUN @click.pass_obj def encrypt_flow_cells(context: CGConfig, dry_run: bool): """Encrypt flow cells.""" status_db: Store = context.status_db - for flow_cell in get_flow_cells_from_path(flow_cells_dir=Path(context.flow_cells_dir)): + flow_cells: list[FlowCellDirectoryData] = get_flow_cells_from_path( + flow_cells_dir=Path(context.flow_cells_dir) + ) + for flow_cell in flow_cells: db_flow_cell: Optional[Flowcell] = status_db.get_flow_cell_by_name( flow_cell_name=flow_cell.id ) @@ -60,7 +112,7 @@ def encrypt_flow_cells(context: CGConfig, dry_run: bool): try: flow_cell_encryption_api.start_encryption() except (FlowCellError, FlowCellEncryptionError) as error: - logging.debug(f"{error}") + logging.error(f"{error}") @backup.command("fetch-flow-cell") @@ -70,7 +122,8 @@ def encrypt_flow_cells(context: CGConfig, dry_run: bool): def fetch_flow_cell(context: CGConfig, dry_run: bool, flow_cell_id: Optional[str] = None): """Fetch the first flow cell in the requested queue from backup""" - pdc_api = PdcAPI(binary_path=context.pdc.binary_path, dry_run=dry_run) + pdc_api = context.pdc_api + pdc_api.dry_run = dry_run encryption_api = EncryptionAPI(binary_path=context.encryption.binary_path, dry_run=dry_run) tar_api = TarAPI(binary_path=context.tar.binary_path, dry_run=dry_run) context.meta_apis["backup_api"] = BackupAPI( diff --git a/cg/exc.py b/cg/exc.py index 7d8b54523b..0a2c36b874 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -66,6 +66,10 @@ class CleanFlowCellFailedError(CgError): """ +class DsmcAlreadyRunningError(CgError): + """Raised when there is already a DCms process running on the system.""" + + class DecompressionNeededError(CgError): """Raised when decompression still needed to start analysis.""" @@ -92,6 +96,10 @@ class FlowCellEncryptionError(CgError): """Raised when there is a problem with encrypting a flow cell.""" +class FlowCellAlreadyBackedUpError(CgError): + """Raised when a flow cell is already backed-up.""" + + class HousekeeperFileMissingError(CgError): """ Exception raised when a file is missing in Housekeeper. @@ -188,7 +196,11 @@ class LoqusdbDuplicateRecordError(LoqusdbError): """Exception related to duplicate records in Loqusdb.""" -class PdcNoFilesMatchingSearchError(CgError): +class PdcError(CgError): + """Exception raised when PDC API interaction errors.""" + + +class PdcNoFilesMatchingSearchError(PdcError): """Exception raised when PDC API returns no files matching the search criteria.""" diff --git a/cg/meta/backup/backup.py b/cg/meta/backup/backup.py index d2b4f34098..bdf58fe2bf 100644 --- a/cg/meta/backup/backup.py +++ b/cg/meta/backup/backup.py @@ -86,14 +86,14 @@ def fetch_flow_cell(self, flow_cell: Optional[Flowcell] = None) -> Optional[floa LOG.info(f"{flow_cell.name}: retrieving from PDC") try: - dcms_output: list[str] = self.query_pdc_for_flow_cell(flow_cell.name) + dsmc_output: list[str] = self.query_pdc_for_flow_cell(flow_cell.name) except PdcNoFilesMatchingSearchError as error: LOG.error(f"PDC query failed: {error}") raise error - archived_key: Path = self.get_archived_encryption_key_path(dcms_output=dcms_output) - archived_flow_cell: Path = self.get_archived_flow_cell_path(dcms_output=dcms_output) + archived_key: Path = self.get_archived_encryption_key_path(dsmc_output=dsmc_output) + archived_flow_cell: Path = self.get_archived_flow_cell_path(dsmc_output=dsmc_output) if not self.dry_run: return self._process_flow_cell( @@ -279,11 +279,11 @@ def retrieve_archived_file(self, archived_file: Path, run_dir: Path) -> None: ) @classmethod - def get_archived_flow_cell_path(cls, dcms_output: list[str]) -> Optional[Path]: + def get_archived_flow_cell_path(cls, dsmc_output: list[str]) -> Optional[Path]: """Get the path of the archived flow cell from a PDC query.""" flow_cell_line: str = [ row - for row in dcms_output + for row in dsmc_output if FileExtensions.TAR in row and FileExtensions.GZIP in row and FileExtensions.GPG in row @@ -295,11 +295,11 @@ def get_archived_flow_cell_path(cls, dcms_output: list[str]) -> Optional[Path]: return archived_flow_cell @classmethod - def get_archived_encryption_key_path(cls, dcms_output: list[str]) -> Optional[Path]: + def get_archived_encryption_key_path(cls, dsmc_output: list[str]) -> Optional[Path]: """Get the encryption key for the archived flow cell from a PDC query.""" encryption_key_line: str = [ row - for row in dcms_output + for row in dsmc_output if FileExtensions.KEY in row and FileExtensions.GPG in row and FileExtensions.GZIP not in row @@ -347,12 +347,10 @@ def encrypt_and_archive_spring_file(self, spring_file_path: Path) -> None: self.encryption_api.key_asymmetric_encryption(spring_file_path) self.encryption_api.compare_spring_file_checksums(spring_file_path) self.pdc.archive_file_to_pdc( - file_path=str(self.encryption_api.encrypted_spring_file_path(spring_file_path)), - dry_run=self.dry_run, + file_path=str(self.encryption_api.encrypted_spring_file_path(spring_file_path)) ) self.pdc.archive_file_to_pdc( - file_path=str(self.encryption_api.encrypted_key_path(spring_file_path)), - dry_run=self.dry_run, + file_path=str(self.encryption_api.encrypted_key_path(spring_file_path)) ) self.mark_file_as_archived(spring_file_path) self.encryption_api.cleanup(spring_file_path) diff --git a/cg/meta/backup/pdc.py b/cg/meta/backup/pdc.py index 19a924e610..731b020083 100644 --- a/cg/meta/backup/pdc.py +++ b/cg/meta/backup/pdc.py @@ -1,8 +1,20 @@ """ Module to group PDC related commands """ import logging +from pathlib import Path + +import psutil from cg.constants.pdc import DSMCParameters +from cg.exc import ( + DsmcAlreadyRunningError, + FlowCellAlreadyBackedUpError, + FlowCellEncryptionError, + PdcError, +) +from cg.meta.encryption.encryption import FlowCellEncryptionAPI +from cg.store import Store +from cg.store.models import Flowcell from cg.utils import Process LOG = logging.getLogger(__name__) @@ -13,16 +25,31 @@ class PdcAPI: """Group PDC related commands""" - def __init__(self, binary_path: str = None, dry_run: bool = False): + def __init__(self, binary_path: str, dry_run: bool = False): self.process: Process = Process(binary=binary_path) self.dry_run: bool = dry_run - def archive_file_to_pdc(self, file_path: str, dry_run: bool = False) -> None: - """Archive a file by storing it on PDC""" - command: list = DSMCParameters.ARCHIVE_COMMAND.copy() - command.append(file_path) - if not dry_run: - self.run_dsmc_command(command=command) + @classmethod + def validate_is_dsmc_running(cls) -> bool: + """Check if a Dsmc process is already running on the system. + Raises: + Exception: for all non-exit exceptions. + """ + is_dsmc_running: bool = False + try: + for process in psutil.process_iter(): + if "dsmc" in process.name(): + is_dsmc_running = True + except Exception as error: + LOG.debug(f"{error}") + if is_dsmc_running: + LOG.debug("A Dsmc process is already running") + return is_dsmc_running + + def archive_file_to_pdc(self, file_path: str) -> None: + """Archive a file by storing it on PDC.""" + if not self.dry_run: + self.run_dsmc_command(command=DSMCParameters.ARCHIVE_COMMAND + [file_path]) def query_pdc(self, search_pattern: str) -> None: """Query PDC based on a given search pattern.""" @@ -41,7 +68,68 @@ def retrieve_file_from_pdc(self, file_path: str, target_path: str = None) -> Non self.run_dsmc_command(command=command) def run_dsmc_command(self, command: list) -> None: - """Runs a DSMC command""" + """Runs a DSMC command. + Raises: + PdcError when unable to process command. + """ LOG.debug("Starting DSMC command:") LOG.debug(f"{self.process.binary} {' '.join(command)}") - self.process.run_command(parameters=command, dry_run=self.dry_run) + try: + self.process.run_command(parameters=command, dry_run=self.dry_run) + except Exception as error: + raise PdcError(f"{error}") from error + + def validate_is_flow_cell_backup_possible( + self, db_flow_cell: Flowcell, flow_cell_encryption_api: FlowCellEncryptionAPI + ) -> bool: + """Check if back-up of flow cell is possible. + Raises: + DsmcAlreadyRunningError if there is already a Dsmc process ongoing. + FlowCellAlreadyBackupError if flow cell is already backed up. + FlowCellEncryptionError if encryption is not complete. + """ + if self.validate_is_dsmc_running(): + raise DsmcAlreadyRunningError("A Dsmc process is already running") + if db_flow_cell and db_flow_cell.has_backup: + raise FlowCellAlreadyBackedUpError( + f"Flow cell: {db_flow_cell.name} is already backed-up" + ) + if not flow_cell_encryption_api.complete_file_path.exists(): + raise FlowCellEncryptionError( + f"Flow cell: {flow_cell_encryption_api.flow_cell.id} encryption process is not complete" + ) + LOG.debug("Flow cell can be backed up") + + def backup_flow_cell( + self, files_to_archive: list[Path], store: Store, db_flow_cell: Flowcell + ) -> None: + """Back-up flow cell files.""" + archived_file_count: int = 0 + for encrypted_file in files_to_archive: + try: + self.archive_file_to_pdc(file_path=encrypted_file.as_posix()) + archived_file_count += 1 + except PdcError: + LOG.warning(f"{encrypted_file.as_posix()} cannot be archived") + if archived_file_count == len(files_to_archive) and not self.dry_run: + store.update_flow_cell_has_backup(flow_cell=db_flow_cell, has_backup=True) + LOG.info(f"Flow cell: {db_flow_cell.name} has been backed up") + + def start_flow_cell_backup( + self, + db_flow_cell: Flowcell, + flow_cell_encryption_api: FlowCellEncryptionAPI, + status_db: Store, + ) -> None: + """Check if back-up of flow cell is possible and if so starts it.""" + self.validate_is_flow_cell_backup_possible( + db_flow_cell=db_flow_cell, flow_cell_encryption_api=flow_cell_encryption_api + ) + self.backup_flow_cell( + files_to_archive=[ + flow_cell_encryption_api.final_passphrase_file_path, + flow_cell_encryption_api.encrypted_gpg_file_path, + ], + store=status_db, + db_flow_cell=db_flow_cell, + ) diff --git a/cg/models/cg_config.py b/cg/models/cg_config.py index f11e1b1489..f0a4ca6108 100644 --- a/cg/models/cg_config.py +++ b/cg/models/cg_config.py @@ -19,6 +19,7 @@ from cg.apps.tb import TrailblazerAPI from cg.constants.observations import LoqusdbInstance from cg.constants.priority import SlurmQos +from cg.meta.backup.pdc import PdcAPI from cg.store import Store from cg.store.database import initialize_database @@ -270,6 +271,7 @@ class CGConfig(BaseModel): mutacc_auto_api_: MutaccAutoAPI = None pigz: Optional[CommonAppConfig] = None pdc: Optional[CommonAppConfig] = None + pdc_api_: Optional[PdcAPI] scout: CommonAppConfig = None scout_api_: ScoutAPI = None tar: Optional[CommonAppConfig] = None @@ -306,6 +308,7 @@ class Config: "loqusdb_api_": "loqusdb_api", "madeline_api_": "madeline_api", "mutacc_auto_api_": "mutacc_auto_api", + "pdc_api_": "pdc_api", "scout_api_": "scout_api", "status_db_": "status_db", "trailblazer_api_": "trailblazer_api", @@ -415,6 +418,15 @@ def mutacc_auto_api(self) -> MutaccAutoAPI: self.mutacc_auto_api_ = api return api + @property + def pdc_api(self) -> PdcAPI: + api = self.__dict__.get("pdc_api_") + if api is None: + LOG.debug("Instantiating PDC api") + api = PdcAPI(binary_path=self.pdc.binary_path) + self.pdc_api_ = api + return api + @property def scout_api(self) -> ScoutAPI: api = self.__dict__.get("scout_api_") diff --git a/cg/store/api/core.py b/cg/store/api/core.py index 5d2c7af43a..ce5d5bf174 100644 --- a/cg/store/api/core.py +++ b/cg/store/api/core.py @@ -1,13 +1,13 @@ import logging +from cg.store.api.add import AddHandler from cg.store.api.delete import DeleteDataHandler +from cg.store.api.find_basic_data import FindBasicDataHandler from cg.store.api.find_business_data import FindBusinessDataHandler +from cg.store.api.status import StatusHandler +from cg.store.api.update import UpdateHandler from cg.store.database import get_session -from .add import AddHandler -from .find_basic_data import FindBasicDataHandler -from .status import StatusHandler - LOG = logging.getLogger(__name__) @@ -17,6 +17,7 @@ class CoreHandler( FindBasicDataHandler, FindBusinessDataHandler, StatusHandler, + UpdateHandler, ): """Aggregating class for the store api handlers.""" @@ -25,6 +26,7 @@ def __init__(self, session): FindBasicDataHandler(session) FindBusinessDataHandler(session) StatusHandler(session) + UpdateHandler(session) class Store(CoreHandler): diff --git a/cg/store/api/update.py b/cg/store/api/update.py new file mode 100644 index 0000000000..63e8662282 --- /dev/null +++ b/cg/store/api/update.py @@ -0,0 +1,18 @@ +"""Handler to update data objects""" + +from sqlalchemy.orm import Session + +from cg.store.api.base import BaseHandler +from cg.store.models import Flowcell + + +class UpdateHandler(BaseHandler): + """Contains methods to update database objects.""" + + def __init__(self, session: Session): + super().__init__(session=session) + self.session = session + + def update_flow_cell_has_backup(self, flow_cell: Flowcell, has_backup: bool) -> None: + flow_cell.has_backup = has_backup + self.session.commit() diff --git a/requirements.txt b/requirements.txt index 4dedc2f07c..339adb5248 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,6 +33,7 @@ packaging pandas paramiko petname +psutil pydantic>=2.4 python-dateutil pyyaml diff --git a/tests/cli/backup/conftest.py b/tests/cli/backup/conftest.py index 8322d15738..7f13501342 100644 --- a/tests/cli/backup/conftest.py +++ b/tests/cli/backup/conftest.py @@ -1,7 +1,6 @@ import pytest from cg.meta.backup.backup import BackupAPI -from cg.meta.backup.pdc import PdcAPI from cg.meta.encryption.encryption import EncryptionAPI from cg.meta.tar.tar import TarAPI from cg.models.cg_config import CGConfig @@ -14,7 +13,7 @@ def backup_context(cg_context: CGConfig) -> CGConfig: encryption_directories=cg_context.backup.encryption_directories, status=cg_context.status_db, tar_api=TarAPI(binary_path=cg_context.tar.binary_path), - pdc_api=PdcAPI(binary_path=cg_context.pdc.binary_path), + pdc_api=cg_context.pdc_api, flow_cells_dir=cg_context.flow_cells_dir, ) return cg_context diff --git a/tests/cli/backup/test_backup_command.py b/tests/cli/backup/test_backup_command.py index c215b38cc7..a1b8b17685 100644 --- a/tests/cli/backup/test_backup_command.py +++ b/tests/cli/backup/test_backup_command.py @@ -2,14 +2,121 @@ from pathlib import Path from click.testing import CliRunner +from psutil import Process -from cg.cli.backup import encrypt_flow_cells, fetch_flow_cell +from cg.cli.backup import backup_flow_cells, encrypt_flow_cells, fetch_flow_cell from cg.constants import EXIT_SUCCESS, FileExtensions, FlowCellStatus from cg.models.cg_config import CGConfig from cg.models.flow_cell.flow_cell import FlowCellDirectoryData from tests.store_helpers import StoreHelpers +def test_backup_flow_cells( + cli_runner: CliRunner, + cg_context: CGConfig, + caplog, + flow_cell_name: str, + flow_cell_full_name: str, + helpers: StoreHelpers, +): + """Test backing up flow cell in dry run mode.""" + caplog.set_level(logging.DEBUG) + + # GIVEN a flow cells directory + + # Given a flow cell with no back-up + helpers.add_flow_cell( + store=cg_context.status_db, flow_cell_name=flow_cell_name, has_backup=False + ) + + # GIVEN an encrypted flow cell + flow_cells_dir = Path(cg_context.backup.encryption_directories.current, flow_cell_full_name) + flow_cells_dir.mkdir(parents=True, exist_ok=True) + Path(flow_cells_dir, flow_cell_name).with_suffix(FileExtensions.COMPLETE).touch() + + # WHEN backing up flow cells in dry run mode + result = cli_runner.invoke(backup_flow_cells, ["--dry-run"], obj=cg_context) + + # THEN exits without any errors + assert result.exit_code == EXIT_SUCCESS + + +def test_backup_flow_cells_when_dsmc_is_running( + cli_runner: CliRunner, + cg_context: CGConfig, + caplog, + flow_cell_name: str, + flow_cell_full_name: str, + mocker, +): + """Test backing-up flow cell in dry run mode when Dsmc processing has started.""" + caplog.set_level(logging.ERROR) + + # GIVEN a flow cells directory + + # GIVEN an ongoing Dsmc process + mocker.patch.object(Process, "name", return_value="dsmc") + + # WHEN backing up flow cells in dry run mode + result = cli_runner.invoke(backup_flow_cells, ["--dry-run"], obj=cg_context) + + # THEN exits without any errors + assert result.exit_code == EXIT_SUCCESS + + # THEN communicate Dsmc process is already running + assert "A Dsmc process is already running" in caplog.text + + +def test_backup_flow_cells_when_flow_cell_already_has_backup( + cli_runner: CliRunner, + cg_context: CGConfig, + caplog, + flow_cell_name: str, + flow_cell_full_name: str, + helpers: StoreHelpers, +): + """Test backing-up flow cell in dry run mode when already backed-up.""" + caplog.set_level(logging.DEBUG) + + # GIVEN a flow cells directory + + # GIVEN a flow cell with a back-up + helpers.add_flow_cell( + store=cg_context.status_db, flow_cell_name=flow_cell_name, has_backup=True + ) + + # WHEN backing up flow cells in dry run mode + result = cli_runner.invoke(backup_flow_cells, ["--dry-run"], obj=cg_context) + + # THEN exits without any errors + assert result.exit_code == EXIT_SUCCESS + + # THEN communicate flow cell has already benn backed upped + assert f"Flow cell: {flow_cell_name} is already backed-up" in caplog.text + + +def test_backup_flow_cells_when_encryption_is_not_completed( + cli_runner: CliRunner, + cg_context: CGConfig, + caplog, + flow_cell_name: str, + flow_cell_full_name: str, +): + """Test backing-up flow cell in dry run mode when encryption is not complete.""" + caplog.set_level(logging.DEBUG) + + # GIVEN a flow cells directory + + # WHEN backing up flow cells in dry run mode + result = cli_runner.invoke(backup_flow_cells, ["--dry-run"], obj=cg_context) + + # THEN exits without any errors + assert result.exit_code == EXIT_SUCCESS + + # THEN communicate flow cell encryption is not completed + assert f"Flow cell: {flow_cell_name} encryption process is not complete" in caplog.text + + def test_encrypt_flow_cells( cli_runner: CliRunner, cg_context: CGConfig, caplog, sbatch_job_number: str ): @@ -130,7 +237,7 @@ def test_encrypt_flow_cell_when_encryption_already_completed( mocker.patch.object(FlowCellDirectoryData, "is_flow_cell_ready") FlowCellDirectoryData.is_flow_cell_ready.return_value = True - # GIVEN a pending flag file + # GIVEN a complete flag file flow_cells_dir = Path(cg_context.backup.encryption_directories.current, flow_cell_full_name) flow_cells_dir.mkdir(parents=True, exist_ok=True) Path(flow_cells_dir, flow_cell_name).with_suffix(FileExtensions.COMPLETE).touch() diff --git a/tests/conftest.py b/tests/conftest.py index 754a1e19b0..f08ea65b24 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,7 @@ from cg.apps.hermes.hermes_api import HermesApi from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims.api import LimsAPI +from cg.apps.slurm.slurm_api import SlurmAPI from cg.constants import FileExtensions, Pipeline, SequencingFileTag from cg.constants.constants import CaseActions, FileFormat, Strandedness from cg.constants.demultiplexing import BclConverter, DemultiplexingDirsAndFiles @@ -32,7 +33,9 @@ from cg.io.controller import ReadFile, WriteFile from cg.io.json import read_json, write_json from cg.io.yaml import write_yaml +from cg.meta.encryption.encryption import FlowCellEncryptionAPI from cg.meta.rsync import RsyncAPI +from cg.meta.tar.tar import TarAPI from cg.meta.transfer.external_data import ExternalDataAPI from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI from cg.meta.workflow.taxprofiler import TaxprofilerAnalysisAPI @@ -3233,3 +3236,23 @@ def novaseqx_flow_cell_analysis_incomplete( def demultiplex_not_complete_novaseqx_flow_cell(tmp_file: Path) -> Path: """Return the path to a NovaseqX flow cell for which demultiplexing is not complete.""" return tmp_file + + +@pytest.fixture +def flow_cell_encryption_api( + cg_context: CGConfig, flow_cell_full_name: str +) -> FlowCellEncryptionAPI: + flow_cell_encryption_api = FlowCellEncryptionAPI( + binary_path=cg_context.encryption.binary_path, + encryption_dir=Path(cg_context.backup.encryption_directories.current), + dry_run=True, + flow_cell=FlowCellDirectoryData( + flow_cell_path=Path(cg_context.flow_cells_dir, flow_cell_full_name) + ), + pigz_binary_path=cg_context.pigz.binary_path, + slurm_api=SlurmAPI(), + sbatch_parameter=cg_context.backup.slurm_flow_cell_encryption.dict(), + tar_api=TarAPI(binary_path=cg_context.tar.binary_path, dry_run=True), + ) + flow_cell_encryption_api.slurm_api.set_dry_run(dry_run=True) + return flow_cell_encryption_api diff --git a/tests/meta/backup/test_meta_backup.py b/tests/meta/backup/test_meta_backup.py index bd246337a1..049e9eb29b 100644 --- a/tests/meta/backup/test_meta_backup.py +++ b/tests/meta/backup/test_meta_backup.py @@ -79,7 +79,7 @@ def test_get_archived_encryption_key_path(dsmc_q_archive_output: list[str], flow ) # WHEN getting the encryption key path - key_path: Path = backup_api.get_archived_encryption_key_path(dcms_output=dsmc_q_archive_output) + key_path: Path = backup_api.get_archived_encryption_key_path(dsmc_output=dsmc_q_archive_output) # THEN this method should return a path object assert isinstance(key_path, Path) @@ -106,7 +106,7 @@ def test_get_archived_flow_cell_path(dsmc_q_archive_output: list[str], flow_cell ) # WHEN getting the flow cell path - flow_cell_path: Path = backup_api.get_archived_flow_cell_path(dcms_output=dsmc_q_archive_output) + flow_cell_path: Path = backup_api.get_archived_flow_cell_path(dsmc_output=dsmc_q_archive_output) # THEN this method should return a path object assert isinstance(flow_cell_path, Path) @@ -552,11 +552,9 @@ def test_encrypt_and_archive_spring_file( calls = [ call( file_path=str(mock_spring_encryption_api.encrypted_spring_file_path.return_value), - dry_run=False, ), call( file_path=str(mock_spring_encryption_api.encrypted_key_path.return_value), - dry_run=False, ), ] mock_pdc_api.archive_file_to_pdc.assert_has_calls(calls) @@ -568,40 +566,6 @@ def test_encrypt_and_archive_spring_file( mock_remove_archived_spring_files.assert_called_once_with(spring_file_path) -@mock.patch("cg.meta.backup.backup.SpringBackupAPI.is_spring_file_archived") -@mock.patch("cg.apps.housekeeper.hk") -@mock.patch("cg.meta.encryption.encryption") -@mock.patch("cg.meta.backup.pdc") -def test_encrypt_and_archive_spring_file_pdc_archiving_failed( - mock_pdc: PdcAPI, - mock_spring_encryption_api: SpringEncryptionAPI, - mock_housekeeper: HousekeeperAPI, - mock_is_archived, - spring_file_path, - caplog, -): - # GIVEN a spring file that needs to be encrypted and archived to PDC - spring_backup_api = SpringBackupAPI( - encryption_api=mock_spring_encryption_api, hk_api=mock_housekeeper, pdc_api=mock_pdc - ) - - # WHEN running the encryption and archiving process, and the encryption command fails - mock_is_archived.return_value = False - mock_spring_encryption_api.encrypted_spring_file_path.return_value = ( - spring_file_path.with_suffix(FileExtensions.SPRING + FileExtensions.GPG) - ) - mock_spring_encryption_api.encrypted_key_path.return_value = spring_file_path.with_suffix( - FileExtensions.KEY + FileExtensions.GPG - ) - mock_pdc.archive_file_to_pdc.side_effect = subprocess.CalledProcessError(1, "echo") - spring_backup_api.encrypt_and_archive_spring_file(spring_file_path=spring_file_path) - - # THEN the appropriate message should be logged and the spring file directory should be - # cleaned up - assert "Encryption failed" in caplog.text - mock_spring_encryption_api.cleanup.assert_called_with(spring_file_path) - - @mock.patch("cg.meta.backup.backup.SpringBackupAPI.is_spring_file_archived") @mock.patch("cg.apps.housekeeper.hk") @mock.patch("cg.meta.encryption.encryption") diff --git a/tests/meta/backup/test_meta_pdc.py b/tests/meta/backup/test_meta_pdc.py index a1e82f6150..06cd8176cd 100644 --- a/tests/meta/backup/test_meta_pdc.py +++ b/tests/meta/backup/test_meta_pdc.py @@ -1,14 +1,229 @@ """Tests for the meta PdcAPI""" +import logging from unittest import mock +import pytest + +from cg.exc import ( + DsmcAlreadyRunningError, + FlowCellAlreadyBackedUpError, + FlowCellEncryptionError, +) from cg.meta.backup.pdc import PdcAPI +from cg.meta.encryption.encryption import FlowCellEncryptionAPI +from cg.models.cg_config import CGConfig +from cg.store import Store +from cg.store.models import Flowcell +from tests.store_helpers import StoreHelpers + + +def test_validate_is_dsmc_process_running(cg_context: CGConfig, binary_path: str): + """Tests checking if a Dsmc process is running when no Dsmc process is running.""" + # GIVEN an instance of the PDC API + pdc_api = cg_context.pdc_api + + # GIVEN no Dsmc process is running + + # WHEN checking if Dsmc is running + is_dmsc_running: bool = pdc_api.validate_is_dsmc_running() + + # THEN return false + assert not is_dmsc_running + + +def test_validate_is_flow_cell_backup_possible( + base_store: Store, + caplog, + cg_context: CGConfig, + binary_path: str, + helpers: StoreHelpers, + flow_cell_encryption_api: FlowCellEncryptionAPI, +): + """Tests checking if a back-up of flow-cell is possible.""" + caplog.set_level(logging.DEBUG) + + # GIVEN an instance of the PDC API + pdc_api = cg_context.pdc_api + + # GIVEN no Dsmc process is running + + # GIVEN a database flow cell which is not backed up + db_flow_cell: Flowcell = helpers.add_flow_cell( + flow_cell_name=flow_cell_encryption_api.flow_cell.id, + store=base_store, + ) + + # GIVEN that encryption is completed + flow_cell_encryption_api.flow_cell_encryption_dir.mkdir(parents=True) + flow_cell_encryption_api.complete_file_path.touch() + + # WHEN checking if back-up is possible + pdc_api.validate_is_flow_cell_backup_possible( + db_flow_cell=db_flow_cell, flow_cell_encryption_api=flow_cell_encryption_api + ) + + # THEN communicate that it passed + assert "Flow cell can be backed up" in caplog.text + + +def test_validate_is_flow_cell_backup_when_dsmc_is_already_running( + base_store: Store, + cg_context: CGConfig, + binary_path: str, + helpers: StoreHelpers, + flow_cell_encryption_api: FlowCellEncryptionAPI, + mocker, +): + """Tests checking if a back-up of flow-cell is possible when Dsmc is already running.""" + # GIVEN an instance of the PDC API + pdc_api = cg_context.pdc_api + + # GIVEN a Dsmc process is already running + mocker.patch.object(PdcAPI, "validate_is_dsmc_running", return_value=True) + + # GIVEN a database flow cell which is not backed up + db_flow_cell: Flowcell = helpers.add_flow_cell( + flow_cell_name=flow_cell_encryption_api.flow_cell.id, + store=base_store, + ) + + # WHEN checking if back-up is possible + with pytest.raises(DsmcAlreadyRunningError): + pdc_api.validate_is_flow_cell_backup_possible( + db_flow_cell=db_flow_cell, flow_cell_encryption_api=flow_cell_encryption_api + ) + + # THEN error should be raised + + +def test_validate_is_flow_cell_backup_when_already_backed_up( + base_store: Store, + cg_context: CGConfig, + binary_path: str, + helpers: StoreHelpers, + flow_cell_encryption_api: FlowCellEncryptionAPI, +): + """Tests checking if a back-up of flow-cell is possible when flow cell is already backed up.""" + # GIVEN an instance of the PDC API + pdc_api = cg_context.pdc_api + + # GIVEN a database flow cell which is backed up + db_flow_cell: Flowcell = helpers.add_flow_cell( + flow_cell_name=flow_cell_encryption_api.flow_cell.id, store=base_store, has_backup=True + ) + + # WHEN checking if back-up is possible + with pytest.raises(FlowCellAlreadyBackedUpError): + pdc_api.validate_is_flow_cell_backup_possible( + db_flow_cell=db_flow_cell, flow_cell_encryption_api=flow_cell_encryption_api + ) + + # THEN error should be raised + + +def test_validate_is_flow_cell_backup_when_encryption_is_not_complete( + base_store: Store, + cg_context: CGConfig, + binary_path: str, + helpers: StoreHelpers, + flow_cell_encryption_api: FlowCellEncryptionAPI, +): + """Tests checking if a back-up of flow-cell is possible when encryption is not complete.""" + # GIVEN an instance of the PDC API + pdc_api = cg_context.pdc_api + + # GIVEN a database flow cell which is backed up + db_flow_cell: Flowcell = helpers.add_flow_cell( + flow_cell_name=flow_cell_encryption_api.flow_cell.id, + store=base_store, + ) + + # WHEN checking if back-up is possible + with pytest.raises(FlowCellEncryptionError): + pdc_api.validate_is_flow_cell_backup_possible( + db_flow_cell=db_flow_cell, flow_cell_encryption_api=flow_cell_encryption_api + ) + + # THEN error should be raised + + +def test_backup_flow_cell( + base_store: Store, + cg_context: CGConfig, + binary_path: str, + helpers: StoreHelpers, + flow_cell_encryption_api: FlowCellEncryptionAPI, + mocker, +): + """Tests back-up flow cell.""" + # GIVEN an instance of the PDC API + pdc_api = cg_context.pdc_api + + # GIVEN a mocked archiving call + mocker.patch.object(PdcAPI, "archive_file_to_pdc", return_value=None) + + # GIVEN a database flow cell which is not backed up + db_flow_cell: Flowcell = helpers.add_flow_cell( + flow_cell_name=flow_cell_encryption_api.flow_cell.id, + store=base_store, + ) + + # WHEN backing up flow cell + pdc_api.backup_flow_cell( + files_to_archive=[ + flow_cell_encryption_api.final_passphrase_file_path, + flow_cell_encryption_api.encrypted_gpg_file_path, + ], + store=base_store, + db_flow_cell=db_flow_cell, + ) + + # THEN flow cell should hava a back-up + assert db_flow_cell.has_backup + + +def test_backup_flow_cell_when_unable_to_archive( + base_store: Store, + binary_path: str, + cg_context: CGConfig, + helpers: StoreHelpers, + flow_cell_encryption_api: FlowCellEncryptionAPI, + caplog, +): + """Tests back-up flow cell when unable to archive.""" + caplog.set_level(logging.DEBUG) + + # GIVEN an instance of the PDC API + pdc_api = cg_context.pdc_api + + # GIVEN a database flow cell which is not backed up + db_flow_cell: Flowcell = helpers.add_flow_cell( + flow_cell_name=flow_cell_encryption_api.flow_cell.id, + store=base_store, + ) + + # WHEN backing up flow cell + pdc_api.backup_flow_cell( + files_to_archive=[ + flow_cell_encryption_api.final_passphrase_file_path, + flow_cell_encryption_api.encrypted_gpg_file_path, + ], + store=base_store, + db_flow_cell=db_flow_cell, + ) + + # THEN log unable to archive + assert ( + f"{flow_cell_encryption_api.encrypted_gpg_file_path.as_posix()} cannot be archived" + in caplog.text + ) @mock.patch("cg.meta.backup.pdc.Process") -def test_archive_file_to_pdc(mock_process, binary_path, backup_file_path): +def test_archive_file_to_pdc(mock_process, cg_context: CGConfig, binary_path, backup_file_path): """Tests execution command to archive file to PDC""" # GIVEN an instance of the PDC API - pdc_api = PdcAPI(binary_path=binary_path) + pdc_api = cg_context.pdc_api pdc_api.process = mock_process # WHEN archiving a file to PDC @@ -21,10 +236,10 @@ def test_archive_file_to_pdc(mock_process, binary_path, backup_file_path): @mock.patch("cg.meta.backup.pdc.Process") -def test_query_pdc(mock_process, binary_path, backup_file_path): +def test_query_pdc(mock_process, cg_context: CGConfig, binary_path, backup_file_path): """Tests execution command to query files to PDC""" # GIVEN an instance of the PDC API - pdc_api = PdcAPI(binary_path=binary_path) + pdc_api = cg_context.pdc_api pdc_api.process = mock_process # WHEN querying PDC @@ -35,10 +250,10 @@ def test_query_pdc(mock_process, binary_path, backup_file_path): @mock.patch("cg.meta.backup.pdc.Process") -def test_retrieve_file_from_pdc(mock_process, binary_path, backup_file_path): +def test_retrieve_file_from_pdc(mock_process, cg_context: CGConfig, binary_path, backup_file_path): """Tests execution command to retrieve files from PDC""" # GIVEN an instance of the PDC API - pdc_api = PdcAPI(binary_path=binary_path) + pdc_api = cg_context.pdc_api pdc_api.process = mock_process # WHEN retrieving a file form PDC diff --git a/tests/meta/encryption/conftest.py b/tests/meta/encryption/conftest.py index 9c676cd6ed..ab798a8b18 100644 --- a/tests/meta/encryption/conftest.py +++ b/tests/meta/encryption/conftest.py @@ -2,32 +2,7 @@ import pytest -from cg.apps.slurm.slurm_api import SlurmAPI from cg.constants.encryption import CipherAlgorithm, EncryptionUserID -from cg.meta.encryption.encryption import FlowCellEncryptionAPI -from cg.meta.tar.tar import TarAPI -from cg.models.cg_config import CGConfig -from cg.models.flow_cell.flow_cell import FlowCellDirectoryData - - -@pytest.fixture -def flow_cell_encryption_api( - cg_context: CGConfig, flow_cell_full_name: str -) -> FlowCellEncryptionAPI: - flow_cell_encryption_api = FlowCellEncryptionAPI( - binary_path=cg_context.encryption.binary_path, - encryption_dir=Path(cg_context.backup.encryption_directories.current), - dry_run=True, - flow_cell=FlowCellDirectoryData( - flow_cell_path=Path(cg_context.flow_cells_dir, flow_cell_full_name) - ), - pigz_binary_path=cg_context.pigz.binary_path, - slurm_api=SlurmAPI(), - sbatch_parameter=cg_context.backup.slurm_flow_cell_encryption.dict(), - tar_api=TarAPI(binary_path=cg_context.tar.binary_path, dry_run=True), - ) - flow_cell_encryption_api.slurm_api.set_dry_run(dry_run=True) - return flow_cell_encryption_api @pytest.fixture diff --git a/tests/store/api/test_update.py b/tests/store/api/test_update.py new file mode 100644 index 0000000000..0f825e080c --- /dev/null +++ b/tests/store/api/test_update.py @@ -0,0 +1,20 @@ +from cg.store import Store +from cg.store.models import Flowcell +from tests.store_helpers import StoreHelpers + + +def test_update_flow_cell_has_backup(base_store: Store, flow_cell_name: str, helpers: StoreHelpers): + """Test updating the backup status of a flow cell in the database.""" + + # GIVEN a database containing a flow cell without a back-up + flow_cell: Flowcell = helpers.add_flow_cell( + store=base_store, flow_cell_name=flow_cell_name, has_backup=False + ) + + assert not flow_cell.has_backup + + # WHEN updating flow cell attribute has back-up + base_store.update_flow_cell_has_backup(flow_cell=flow_cell, has_backup=True) + + # THEN flow cell has backup should be true + assert flow_cell.has_backup From 47cc6a1bca11a35e1c3eeab1ad2e6df1249b8e07 Mon Sep 17 00:00:00 2001 From: Clinical Genomics Bot Date: Mon, 23 Oct 2023 06:41:33 +0000 Subject: [PATCH 2/5] =?UTF-8?q?Bump=20version:=2051.6.14=20=E2=86=92=2051.?= =?UTF-8?q?7.0=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cg/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index f70856002e..04f99e962b 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 51.6.14 +current_version = 51.7.0 commit = True tag = True tag_name = v{new_version} diff --git a/cg/__init__.py b/cg/__init__.py index d341d108e6..4d3fda45aa 100644 --- a/cg/__init__.py +++ b/cg/__init__.py @@ -1,4 +1,4 @@ import pkg_resources __title__ = "cg" -__version__ = "51.6.14" +__version__ = "51.7.0" diff --git a/setup.py b/setup.py index 0ab0b6970e..bcdc73fb3a 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def parse_requirements(req_path="./requirements.txt"): setup( name=NAME, - version="51.6.14", + version="51.7.0", description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", From db38b76dce932477cc1014f0c2c5789a95c92dda Mon Sep 17 00:00:00 2001 From: Clinical Genomics Bot Date: Mon, 23 Oct 2023 08:37:05 +0000 Subject: [PATCH 3/5] =?UTF-8?q?Bump=20version:=2051.7.0=20=E2=86=92=2051.7?= =?UTF-8?q?.1=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cg/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 04f99e962b..149f3e57e1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 51.7.0 +current_version = 51.7.1 commit = True tag = True tag_name = v{new_version} diff --git a/cg/__init__.py b/cg/__init__.py index 4d3fda45aa..6c368f39af 100644 --- a/cg/__init__.py +++ b/cg/__init__.py @@ -1,4 +1,4 @@ import pkg_resources __title__ = "cg" -__version__ = "51.7.0" +__version__ = "51.7.1" diff --git a/setup.py b/setup.py index bcdc73fb3a..00c8470c3f 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def parse_requirements(req_path="./requirements.txt"): setup( name=NAME, - version="51.7.0", + version="51.7.1", description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", From df06433c61912efc1010dce997bbd68b546bf218 Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 23 Oct 2023 13:22:36 +0200 Subject: [PATCH 4/5] Migrate delivery report models to Pydantic v2 (#2579) ### Changed: - Migrate Delivery report models to Pydantic v2 --- cg/meta/report/field_validators.py | 28 +-- cg/meta/report/report_api.py | 2 +- cg/models/report/metadata.py | 130 +++++--------- cg/models/report/report.py | 114 +++++------- cg/models/report/sample.py | 100 +++++------ cg/models/report/validators.py | 95 ++++------ tests/meta/report/test_balsamic_api.py | 2 +- tests/meta/report/test_mip_dna_api.py | 2 +- tests/meta/report/test_report_api.py | 84 +++++++-- tests/meta/report/test_rnafusion_api.py | 2 +- tests/models/report/test_validators.py | 222 ++++++++++++++---------- 11 files changed, 380 insertions(+), 401 deletions(-) diff --git a/cg/meta/report/field_validators.py b/cg/meta/report/field_validators.py index 778de66cab..366904c2e1 100644 --- a/cg/meta/report/field_validators.py +++ b/cg/meta/report/field_validators.py @@ -37,34 +37,34 @@ def get_empty_fields(report_data: dict) -> list: def get_empty_report_data(report_data: ReportModel) -> dict: """Retrieve empty fields from a report data model.""" empty_fields = { - "report": get_empty_fields(report_data=report_data.dict()), - "customer": get_empty_fields(report_data=report_data.customer.dict()), - "case": get_empty_fields(report_data=report_data.case.dict()), + "report": get_empty_fields(report_data=report_data.model_dump()), + "customer": get_empty_fields(report_data=report_data.customer.model_dump()), + "case": get_empty_fields(report_data=report_data.case.model_dump()), "applications": { - app.tag: get_empty_fields(report_data=app.dict()) + app.tag: get_empty_fields(report_data=app.model_dump()) for app in report_data.case.applications - if get_empty_fields(report_data=app.dict()) + if get_empty_fields(report_data=app.model_dump()) }, - "data_analysis": get_empty_fields(report_data=report_data.case.data_analysis.dict()), + "data_analysis": get_empty_fields(report_data=report_data.case.data_analysis.model_dump()), "samples": { - sample.id: get_empty_fields(report_data=sample.dict()) + sample.id: get_empty_fields(report_data=sample.model_dump()) for sample in report_data.case.samples - if get_empty_fields(report_data=sample.dict()) + if get_empty_fields(report_data=sample.model_dump()) }, "methods": { - sample.id: get_empty_fields(report_data=sample.methods.dict()) + sample.id: get_empty_fields(report_data=sample.methods.model_dump()) for sample in report_data.case.samples - if get_empty_fields(report_data=sample.methods.dict()) + if get_empty_fields(report_data=sample.methods.model_dump()) }, "timestamps": { - sample.id: get_empty_fields(report_data=sample.timestamps.dict()) + sample.id: get_empty_fields(report_data=sample.timestamps.model_dump()) for sample in report_data.case.samples - if get_empty_fields(report_data=sample.timestamps.dict()) + if get_empty_fields(report_data=sample.timestamps.model_dump()) }, "metadata": { - sample.id: get_empty_fields(report_data=sample.metadata.dict()) + sample.id: get_empty_fields(report_data=sample.metadata.model_dump()) for sample in report_data.case.samples - if get_empty_fields(report_data=sample.metadata.dict()) + if get_empty_fields(report_data=sample.metadata.model_dump()) }, } # Clear empty values diff --git a/cg/meta/report/report_api.py b/cg/meta/report/report_api.py index 58167d8424..effa3afcdd 100644 --- a/cg/meta/report/report_api.py +++ b/cg/meta/report/report_api.py @@ -58,7 +58,7 @@ def create_delivery_report( report_data: ReportModel = self.validate_report_fields( case_id=case_id, report_data=report_data, force_report=force_report ) - rendered_report: str = self.render_delivery_report(report_data=report_data.dict()) + rendered_report: str = self.render_delivery_report(report_data=report_data.model_dump()) return rendered_report def create_delivery_report_file( diff --git a/cg/models/report/metadata.py b/cg/models/report/metadata.py index 1879fb5668..181313ea57 100644 --- a/cg/models/report/metadata.py +++ b/cg/models/report/metadata.py @@ -1,12 +1,14 @@ -from typing import Optional, Union +from typing import Optional -from pydantic.v1 import BaseModel, validator +from pydantic import BaseModel, BeforeValidator +from typing_extensions import Annotated +from cg.constants import NA_FIELD from cg.models.report.validators import ( - validate_empty_field, - validate_float, - validate_gender, - validate_percentage, + get_float_as_percentage, + get_float_as_string, + get_gender_as_string, + get_report_string, ) @@ -17,15 +19,10 @@ class SampleMetadataModel(BaseModel): Attributes: million_read_pairs: number of million read pairs obtained; source: StatusDB/sample/reads (/2*10^6) duplicates: fraction of mapped sequence that is marked as duplicate; source: pipeline workflow - """ - million_read_pairs: Union[None, float, str] - duplicates: Union[None, float, str] - - _float_values = validator("million_read_pairs", "duplicates", always=True, allow_reuse=True)( - validate_float - ) + million_read_pairs: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + duplicates: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD class MipDNASampleMetadataModel(SampleMetadataModel): @@ -39,18 +36,11 @@ class MipDNASampleMetadataModel(SampleMetadataModel): pct_10x: percent of targeted bases that are covered to 10X coverage or more; source: pipeline workflow """ - bait_set: Optional[str] - gender: Optional[str] - mapped_reads: Union[None, float, str] - mean_target_coverage: Union[None, float, str] - pct_10x: Union[None, float, str] - - _bait_set = validator("bait_set", always=True, allow_reuse=True)(validate_empty_field) - _gender = validator("gender", always=True, allow_reuse=True)(validate_gender) - - _float_values_mip = validator( - "mapped_reads", "mean_target_coverage", "pct_10x", always=True, allow_reuse=True - )(validate_float) + bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + gender: Annotated[str, BeforeValidator(get_gender_as_string)] = NA_FIELD + mapped_reads: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + mean_target_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + pct_10x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD class BalsamicSampleMetadataModel(SampleMetadataModel): @@ -61,12 +51,8 @@ class BalsamicSampleMetadataModel(SampleMetadataModel): fold_80: fold 80 base penalty; source: pipeline workflow """ - mean_insert_size: Union[None, float, str] - fold_80: Union[None, float, str] - - _float_values_balsamic = validator( - "mean_insert_size", "fold_80", always=True, allow_reuse=True - )(validate_float) + mean_insert_size: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + fold_80: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel): @@ -80,19 +66,11 @@ class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel): pct_500x: percent of targeted bases that are covered to 500X coverage or more; source: pipeline workflow """ - bait_set: Optional[str] - bait_set_version: Union[None, int, str] - median_target_coverage: Union[None, float, str] - pct_250x: Union[None, float, str] - pct_500x: Union[None, float, str] - - _str_values = validator("bait_set", "bait_set_version", always=True, allow_reuse=True)( - validate_empty_field - ) - - _float_values_balsamic_targeted = validator( - "median_target_coverage", "pct_250x", "pct_500x", always=True, allow_reuse=True - )(validate_float) + bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + bait_set_version: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + median_target_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + pct_250x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + pct_500x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel): @@ -104,13 +82,9 @@ class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel): pct_60x: fraction of bases that attained at least 15X sequence coverage; source: pipeline workflow """ - median_coverage: Union[None, float, str] - pct_15x: Union[None, float, str] - pct_60x: Union[None, float, str] - - _float_values_balsamic_wgs = validator( - "median_coverage", "pct_15x", "pct_60x", always=True, allow_reuse=True - )(validate_float) + median_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + pct_15x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + pct_60x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD class RnafusionSampleMetadataModel(SampleMetadataModel): @@ -134,42 +108,18 @@ class RnafusionSampleMetadataModel(SampleMetadataModel): uniquely_mapped_reads: percentage of mapped reads; source: pipeline workflow """ - bias_5_3: Union[None, float, str] - gc_content: Union[None, float, str] - input_amount: Union[None, float, str] - insert_size: Union[None, float, str] - insert_size_peak: Union[None, float, str] - mapped_reads: Union[None, float, str] - mean_length_r1: Union[None, float, str] - mrna_bases: Union[None, float, str] - pct_adapter: Union[None, float, str] - pct_surviving: Union[None, float, str] - q20_rate: Union[None, float, str] - q30_rate: Union[None, float, str] - ribosomal_bases: Union[None, float, str] - rin: Union[None, float, str] - uniquely_mapped_reads: Union[None, float, str] - - _float_values = validator( - "bias_5_3", - "input_amount", - "insert_size", - "insert_size_peak", - "mean_length_r1", - "mrna_bases", - "pct_adapter", - "pct_surviving", - "rin", - "uniquely_mapped_reads", - always=True, - allow_reuse=True, - )(validate_float) - _pct_values = validator( - "gc_content", - "mapped_reads", - "q20_rate", - "q30_rate", - "ribosomal_bases", - always=True, - allow_reuse=True, - )(validate_percentage) + bias_5_3: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + gc_content: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD + input_amount: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + insert_size: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + insert_size_peak: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + mapped_reads: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD + mean_length_r1: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + mrna_bases: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + pct_adapter: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + pct_surviving: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + q20_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD + q30_rate: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD + ribosomal_bases: Annotated[str, BeforeValidator(get_float_as_percentage)] = NA_FIELD + rin: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD + uniquely_mapped_reads: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD diff --git a/cg/models/report/report.py b/cg/models/report/report.py index 5e2db5400e..4195190ecb 100644 --- a/cg/models/report/report.py +++ b/cg/models/report/report.py @@ -1,18 +1,21 @@ -from datetime import datetime -from typing import Optional, Union +import logging +from typing import Optional -from pydantic.v1 import BaseModel, root_validator, validator +from pydantic import BaseModel, BeforeValidator, model_validator +from typing_extensions import Annotated -from cg.constants import DataDelivery, Pipeline +from cg.constants import NA_FIELD, REPORT_SUPPORTED_PIPELINES from cg.models.report.sample import ApplicationModel, SampleModel from cg.models.report.validators import ( - validate_date, - validate_empty_field, - validate_list, - validate_path, - validate_supported_pipeline, + get_analysis_type_as_string, + get_date_as_string, + get_list_as_string, + get_path_as_string, + get_report_string, ) +LOG = logging.getLogger(__name__) + class CustomerModel(BaseModel): """ @@ -25,14 +28,10 @@ class CustomerModel(BaseModel): scout_access: whether the customer has access to scout or not; source: statusDB/family/customer/scout_access """ - name: Optional[str] - id: Optional[str] - invoice_address: Optional[str] - scout_access: Optional[bool] - - _values = validator("name", "id", "invoice_address", always=True, allow_reuse=True)( - validate_empty_field - ) + name: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + id: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + invoice_address: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + scout_access: Optional[bool] = None class ScoutReportFiles(BaseModel): @@ -48,23 +47,12 @@ class ScoutReportFiles(BaseModel): smn_tsv: SMN gene variants file (MIP-DNA specific); source: HK """ - snv_vcf: Optional[str] - snv_research_vcf: Optional[str] - sv_vcf: Optional[str] - sv_research_vcf: Optional[str] - vcf_str: Optional[str] - smn_tsv: Optional[str] - - _str_values = validator( - "snv_vcf", - "snv_research_vcf", - "sv_vcf", - "sv_research_vcf", - "vcf_str", - "smn_tsv", - always=True, - allow_reuse=True, - )(validate_path) + snv_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD + snv_research_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD + sv_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD + sv_research_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD + vcf_str: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD + smn_tsv: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD class DataAnalysisModel(BaseModel): @@ -83,30 +71,29 @@ class DataAnalysisModel(BaseModel): scout_files: list of file names uploaded to Scout """ - customer_pipeline: Optional[Pipeline] - data_delivery: Optional[DataDelivery] - pipeline: Optional[Pipeline] - pipeline_version: Optional[str] - type: Optional[str] - genome_build: Optional[str] - variant_callers: Union[None, list[str], str] - panels: Union[None, list[str], str] + customer_pipeline: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + data_delivery: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + pipeline: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + pipeline_version: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + type: Annotated[str, BeforeValidator(get_analysis_type_as_string)] = NA_FIELD + genome_build: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + variant_callers: Annotated[str, BeforeValidator(get_list_as_string)] = NA_FIELD + panels: Annotated[str, BeforeValidator(get_list_as_string)] = NA_FIELD scout_files: ScoutReportFiles - _values = root_validator(pre=True, allow_reuse=True)(validate_supported_pipeline) - _str_values = validator( - "customer_pipeline", - "data_delivery", - "pipeline", - "pipeline_version", - "type", - "genome_build", - always=True, - allow_reuse=True, - )(validate_empty_field) - _list_values = validator("variant_callers", "panels", always=True, allow_reuse=True)( - validate_list - ) + @model_validator(mode="after") + def check_supported_pipeline(self) -> "DataAnalysisModel": + """Check if the report generation supports a specific pipeline and analysis type.""" + if self.pipeline != self.customer_pipeline: + LOG.error( + f"The analysis requested by the customer ({self.customer_pipeline}) does not match the one " + f"executed ({self.pipeline})" + ) + raise ValueError + if self.pipeline not in REPORT_SUPPORTED_PIPELINES: + LOG.error(f"The pipeline {self.pipeline} does not support delivery report generation") + raise ValueError + return self class CaseModel(BaseModel): @@ -121,14 +108,12 @@ class CaseModel(BaseModel): applications: case associated unique applications """ - name: Optional[str] - id: Optional[str] + name: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + id: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD samples: list[SampleModel] data_analysis: DataAnalysisModel applications: list[ApplicationModel] - _name = validator("name", always=True, allow_reuse=True)(validate_empty_field) - class ReportModel(BaseModel): """ @@ -143,10 +128,7 @@ class ReportModel(BaseModel): """ customer: CustomerModel - version: Union[None, int, str] - date: Union[None, datetime, str] + version: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + date: Annotated[str, BeforeValidator(get_date_as_string)] = NA_FIELD case: CaseModel - accredited: Optional[bool] - - _version = validator("version", always=True, allow_reuse=True)(validate_empty_field) - _date = validator("date", always=True, allow_reuse=True)(validate_date) + accredited: Optional[bool] = None diff --git a/cg/models/report/sample.py b/cg/models/report/sample.py index 81cae190af..a8150dcc15 100644 --- a/cg/models/report/sample.py +++ b/cg/models/report/sample.py @@ -1,16 +1,21 @@ -from datetime import datetime from typing import Optional, Union -from pydantic.v1 import BaseModel, validator +from pydantic import BaseModel, BeforeValidator +from typing_extensions import Annotated -from cg.constants.subject import Gender -from cg.models.report.metadata import SampleMetadataModel +from cg.constants import NA_FIELD +from cg.models.report.metadata import ( + BalsamicTargetedSampleMetadataModel, + BalsamicWGSSampleMetadataModel, + MipDNASampleMetadataModel, + RnafusionSampleMetadataModel, +) from cg.models.report.validators import ( - validate_boolean, - validate_date, - validate_empty_field, - validate_gender, - validate_rml_sample, + get_boolean_as_string, + get_date_as_string, + get_gender_as_string, + get_prep_category_as_string, + get_report_string, ) @@ -28,24 +33,13 @@ class ApplicationModel(BaseModel): external: whether the app tag is external or not; source: StatusDB/application/is_external """ - tag: Optional[str] - version: Union[None, int, str] - prep_category: Optional[str] - description: Optional[str] - limitations: Optional[str] - accredited: Optional[bool] - external: Optional[bool] - - _prep_category = validator("prep_category", always=True, allow_reuse=True)(validate_rml_sample) - _values = validator( - "tag", - "version", - "prep_category", - "description", - "limitations", - always=True, - allow_reuse=True, - )(validate_empty_field) + tag: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + version: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + prep_category: Annotated[str, BeforeValidator(get_prep_category_as_string)] = NA_FIELD + description: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + limitations: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + accredited: Optional[bool] = None + external: Optional[bool] = None class MethodsModel(BaseModel): @@ -57,12 +51,8 @@ class MethodsModel(BaseModel): sequencing: sequencing procedure; source: LIMS/sample/sequencing_method """ - library_prep: Optional[str] - sequencing: Optional[str] - - _values = validator("library_prep", "sequencing", always=True, allow_reuse=True)( - validate_empty_field - ) + library_prep: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + sequencing: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD class TimestampModel(BaseModel): @@ -76,19 +66,10 @@ class TimestampModel(BaseModel): reads_updated_at: sequencing date; source: StatusDB/sample/reads_updated_at """ - ordered_at: Union[None, datetime, str] - received_at: Union[None, datetime, str] - prepared_at: Union[None, datetime, str] - reads_updated_at: Union[None, datetime, str] - - _values = validator( - "ordered_at", - "received_at", - "prepared_at", - "reads_updated_at", - always=True, - allow_reuse=True, - )(validate_date) + ordered_at: Annotated[str, BeforeValidator(get_date_as_string)] = NA_FIELD + received_at: Annotated[str, BeforeValidator(get_date_as_string)] = NA_FIELD + prepared_at: Annotated[str, BeforeValidator(get_date_as_string)] = NA_FIELD + reads_updated_at: Annotated[str, BeforeValidator(get_date_as_string)] = NA_FIELD class SampleModel(BaseModel): @@ -109,20 +90,19 @@ class SampleModel(BaseModel): timestamps: processing timestamp attributes """ - name: Optional[str] - id: Optional[str] - ticket: Union[None, int, str] - status: Optional[str] - gender: Optional[str] = Gender.UNKNOWN - source: Optional[str] - tumour: Union[None, bool, str] + name: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + id: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + ticket: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + status: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + gender: Annotated[str, BeforeValidator(get_gender_as_string)] = NA_FIELD + source: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD + tumour: Annotated[str, BeforeValidator(get_boolean_as_string)] = NA_FIELD application: ApplicationModel methods: MethodsModel - metadata: SampleMetadataModel + metadata: Union[ + MipDNASampleMetadataModel, + BalsamicTargetedSampleMetadataModel, + BalsamicWGSSampleMetadataModel, + RnafusionSampleMetadataModel, + ] timestamps: TimestampModel - - _tumour = validator("tumour", always=True, allow_reuse=True)(validate_boolean) - _gender = validator("gender", always=True, allow_reuse=True)(validate_gender) - _values = validator("name", "id", "ticket", "status", "source", always=True, allow_reuse=True)( - validate_empty_field - ) diff --git a/cg/models/report/validators.py b/cg/models/report/validators.py index 38dc742942..93f443e2f5 100644 --- a/cg/models/report/validators.py +++ b/cg/models/report/validators.py @@ -1,7 +1,9 @@ import logging from datetime import datetime from pathlib import Path -from typing import Union +from typing import Any, Optional + +from pydantic import ValidationInfo from cg.constants import ( BALSAMIC_ANALYSIS_TYPE, @@ -9,92 +11,67 @@ NO_FIELD, PRECISION, REPORT_GENDER, - REPORT_SUPPORTED_PIPELINES, YES_FIELD, - Pipeline, ) +from cg.constants.constants import Pipeline, PrepCategory +from cg.constants.subject import Gender from cg.models.orders.constants import OrderType LOG = logging.getLogger(__name__) -def validate_empty_field(value: Union[int, str]) -> str: - """Formats an empty value to be included in the report as N/A.""" +def get_report_string(value: Any) -> str: + """Return report adapted string.""" return str(value) if value else NA_FIELD -def validate_boolean(value: Union[bool, str]) -> str: - """Formats a boolean value for the delivery report.""" - if isinstance(value, bool) or value: - if str(value) == "True": - return YES_FIELD - if str(value) == "False": - return NO_FIELD +def get_boolean_as_string(value: Optional[bool]) -> str: + """Return delivery report adapted string representation of a boolean.""" + if isinstance(value, bool): + return YES_FIELD if value else NO_FIELD return NA_FIELD -def validate_float(value: Union[float, str]) -> str: - """Returns a processed float value.""" +def get_float_as_string(value: Optional[float]) -> str: + """Return string representation of a float value.""" return str(round(float(value), PRECISION)) if value or isinstance(value, float) else NA_FIELD -def validate_percentage(value: Union[float, str]) -> str: - """Returns a processed float value as a percentage.""" - return validate_float(float(value) * 100) if value else NA_FIELD +def get_float_as_percentage(value: Optional[float]) -> str: + """Return string percentage representation of a float value.""" + return get_float_as_string(value * 100) if value or isinstance(value, float) else NA_FIELD -def validate_date(date: datetime) -> str: - """Returns the date part (year, month, day) from a datetime object.""" +def get_date_as_string(date: Optional[datetime]) -> str: + """Return the date string representation (year, month, day) of a datetime object.""" return str(date.date()) if date else NA_FIELD -def validate_list(value: list) -> str: - """Formats a list elements as comma separated individual values.""" - return validate_empty_field( - ", ".join(validate_empty_field(v) for v in value) if value else NA_FIELD - ) +def get_list_as_string(value: Optional[list[str]]) -> str: + """Return list elements as comma separated individual string values.""" + return ", ".join(v for v in value) if value else NA_FIELD -def validate_path(file_path: str) -> str: - """Returns the name of a specific file.""" +def get_path_as_string(file_path: Optional[str]) -> str: + """Return a report validated file name.""" return Path(file_path).name if file_path and Path(file_path).is_file() else NA_FIELD -def validate_gender(value: str) -> str: - """Formats the provided gender.""" - return validate_empty_field(REPORT_GENDER.get(value)) +def get_gender_as_string(gender: Optional[Gender]) -> str: + """Return a report adapted gender.""" + return get_report_string(REPORT_GENDER.get(gender)) -def validate_rml_sample(prep_category: str) -> str: - """Checks if a specific sample is a RML one.""" +def get_prep_category_as_string(prep_category: Optional[PrepCategory]) -> str: + """Return a report validated prep category as string.""" if prep_category == OrderType.RML: LOG.error("The delivery report generation does not support RML samples") raise ValueError - return validate_empty_field(prep_category) - - -def validate_balsamic_analysis_type(value: str) -> str: - """Translates the BALSAMIC analysis type string to an accepted value for the delivery report.""" - return validate_empty_field(BALSAMIC_ANALYSIS_TYPE.get(value)) - - -def validate_supported_pipeline(cls, values: dict) -> dict: - """Validates if the report generation supports a specific pipeline and analysis type.""" - if values and values.get("pipeline") and values.get("customer_pipeline"): - # Checks that the requested analysis and the executed one match - if values.get("pipeline") != values.get("customer_pipeline"): - LOG.error( - f"The analysis requested by the customer ({values.get('customer_pipeline')}) does not match the one " - f"executed ({values.get('pipeline')})" - ) - raise ValueError - # Check that the generation of the report supports the data analysis executed on the case - if values.get("pipeline") not in REPORT_SUPPORTED_PIPELINES: - LOG.error( - f"The pipeline {values.get('pipeline')} does not support delivery report generation" - ) - raise ValueError - # Validates the analysis type - if Pipeline.BALSAMIC in values.get("pipeline"): - values["type"] = validate_balsamic_analysis_type(values["type"]) - return values + return get_report_string(prep_category) + + +def get_analysis_type_as_string(analysis_type: Optional[str], info: ValidationInfo) -> str: + """Return the analysis type as an accepted string value for the delivery report.""" + if analysis_type and Pipeline.BALSAMIC in info.data.get("pipeline"): + analysis_type: str = BALSAMIC_ANALYSIS_TYPE.get(analysis_type) + return get_report_string(analysis_type) diff --git a/tests/meta/report/test_balsamic_api.py b/tests/meta/report/test_balsamic_api.py index c73484995d..f783ff5df9 100644 --- a/tests/meta/report/test_balsamic_api.py +++ b/tests/meta/report/test_balsamic_api.py @@ -46,7 +46,7 @@ def test_get_sample_metadata( ) # THEN check that the sample metadata is correctly retrieved - assert sample_metadata.dict() == expected_metadata + assert sample_metadata.model_dump() == expected_metadata def test_get_variant_callers(report_api_balsamic, case_id): diff --git a/tests/meta/report/test_mip_dna_api.py b/tests/meta/report/test_mip_dna_api.py index 45531ad84d..c423c81434 100644 --- a/tests/meta/report/test_mip_dna_api.py +++ b/tests/meta/report/test_mip_dna_api.py @@ -27,7 +27,7 @@ def test_get_sample_metadata( sample_metadata = report_api_mip_dna.get_sample_metadata(case_mip_dna, sample, mip_metadata) # THEN check that the sample metadata is correctly retrieved - assert sample_metadata == expected_metadata + assert sample_metadata.model_dump() == expected_metadata def test_get_sample_coverage(report_api_mip_dna, sample_store, helpers: StoreHelpers, case_mip_dna): diff --git a/tests/meta/report/test_report_api.py b/tests/meta/report/test_report_api.py index 6eb69f31b7..19a4ce065d 100644 --- a/tests/meta/report/test_report_api.py +++ b/tests/meta/report/test_report_api.py @@ -3,25 +3,16 @@ from datetime import datetime, timedelta from pathlib import Path +import pytest from _pytest.logging import LogCaptureFixture -from cg.constants import REPORT_GENDER +from cg.constants import REPORT_GENDER, Pipeline from cg.exc import DeliveryReportError from cg.meta.report.mip_dna import MipDNAReportAPI from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI from cg.models.mip.mip_analysis import MipAnalysis -from cg.models.report.report import ( - CaseModel, - CustomerModel, - DataAnalysisModel, - ReportModel, -) -from cg.models.report.sample import ( - ApplicationModel, - MethodsModel, - SampleModel, - TimestampModel, -) +from cg.models.report.report import CaseModel, CustomerModel, DataAnalysisModel, ReportModel +from cg.models.report.sample import ApplicationModel, MethodsModel, SampleModel, TimestampModel from cg.store import Store from cg.store.models import Analysis, Family, FamilySample from tests.meta.report.helper import recursive_assert @@ -73,7 +64,7 @@ def test_render_delivery_report(report_api_mip_dna: MipDNAReportAPI, case_mip_dn ) # WHEN rendering the report - rendered_report: str = report_api_mip_dna.render_delivery_report(report_data.dict()) + rendered_report: str = report_api_mip_dna.render_delivery_report(report_data.model_dump()) # THEN validate rendered report assert len(rendered_report) > 0 @@ -97,7 +88,7 @@ def test_get_validated_report_data(report_api_mip_dna: MipDNAReportAPI, case_mip report_data: ReportModel = report_api_mip_dna.validate_report_fields( case_mip_dna.internal_id, report_data, force_report=False ) - recursive_assert(report_data.dict()) + recursive_assert(report_data.model_dump()) def test_validate_report_empty_fields( @@ -190,7 +181,7 @@ def test_get_customer_data(report_api_mip_dna: MipDNAReportAPI, case_mip_dna: Fa customer_data: CustomerModel = report_api_mip_dna.get_customer_data(case_mip_dna) # THEN check if the retrieved customer data corresponds to the expected one - assert customer_data == expected_customer + assert customer_data.model_dump() == expected_customer def test_get_report_version_version( @@ -357,7 +348,7 @@ def test_get_sample_methods_data( sample_methods: MethodsModel = report_api_mip_dna.get_sample_methods_data(sample_id) # THEN check the agreement between expected and extracted values - assert sample_methods == expected_sample_methods + assert sample_methods.model_dump() == expected_sample_methods def test_get_case_analysis_data( @@ -381,6 +372,63 @@ def test_get_case_analysis_data( assert case_analysis_data.scout_files +def test_get_case_analysis_data_pipeline_match_error( + report_api_mip_dna: MipDNAReportAPI, + mip_analysis_api: MipDNAAnalysisAPI, + case_mip_dna: Family, + caplog: LogCaptureFixture, +): + """Test validation error if a customer requested pipeline does not match the data analysis.""" + + # GIVEN a pre-built case and a MIP-DNA analysis that has been started as Balsamic + mip_analysis: Analysis = case_mip_dna.analyses[0] + mip_analysis.pipeline = Pipeline.BALSAMIC + + # GIVEN a mip analysis mock metadata + mip_metadata: MipAnalysis = mip_analysis_api.get_latest_metadata(case_mip_dna.internal_id) + + # WHEN retrieving analysis information + + # THEN a validation error should be raised + with pytest.raises(ValueError): + report_api_mip_dna.get_case_analysis_data( + case=case_mip_dna, analysis=mip_analysis, analysis_metadata=mip_metadata + ) + assert ( + f"The analysis requested by the customer ({Pipeline.MIP_DNA}) does not match the one executed " + f"({mip_analysis.pipeline})" in caplog.text + ) + + +def test_get_case_analysis_data_pipeline_not_supported( + report_api_mip_dna: MipDNAReportAPI, + mip_analysis_api: MipDNAAnalysisAPI, + case_mip_dna: Family, + caplog: LogCaptureFixture, +): + """Test validation error if the analysis pipeline is not supported by the delivery report workflow.""" + + # GIVEN a pre-built case with Fluffy as data analysis + case_mip_dna.data_analysis = Pipeline.FLUFFY + mip_analysis: Analysis = case_mip_dna.analyses[0] + mip_analysis.pipeline = Pipeline.FLUFFY + + # GIVEN a mip analysis mock metadata + mip_metadata: MipAnalysis = mip_analysis_api.get_latest_metadata(case_mip_dna.internal_id) + + # WHEN retrieving data analysis information + + # THEN a validation error should be raised + with pytest.raises(ValueError): + report_api_mip_dna.get_case_analysis_data( + case=case_mip_dna, analysis=mip_analysis, analysis_metadata=mip_metadata + ) + assert ( + f"The pipeline {case_mip_dna.data_analysis} does not support delivery report generation" + in caplog.text + ) + + def test_get_sample_timestamp_data( report_api_mip_dna: MipDNAReportAPI, case_samples_data: list[FamilySample], @@ -404,4 +452,4 @@ def test_get_sample_timestamp_data( ) # THEN check if the dates are correctly retrieved - assert sample_timestamp_data == expected_case_samples_data + assert sample_timestamp_data.model_dump() == expected_case_samples_data diff --git a/tests/meta/report/test_rnafusion_api.py b/tests/meta/report/test_rnafusion_api.py index f9d08375aa..6c01436374 100644 --- a/tests/meta/report/test_rnafusion_api.py +++ b/tests/meta/report/test_rnafusion_api.py @@ -33,4 +33,4 @@ def test_get_sample_metadata( ) # THEN the sample metadata should be correctly retrieved and match the expected validated metrics - assert sample_metadata.dict() == rnafusion_validated_metrics + assert sample_metadata.model_dump() == rnafusion_validated_metrics diff --git a/tests/models/report/test_validators.py b/tests/models/report/test_validators.py index cd7de895f0..b1ab874cda 100644 --- a/tests/models/report/test_validators.py +++ b/tests/models/report/test_validators.py @@ -1,171 +1,213 @@ """Tests delivery report models validators.""" -from cg.constants import NA_FIELD, REPORT_GENDER, YES_FIELD, Pipeline +from datetime import datetime +from pathlib import Path +from typing import Any + +import pytest +from _pytest.logging import LogCaptureFixture +from pydantic import ValidationInfo + +from cg.constants import NA_FIELD, NO_FIELD, REPORT_GENDER, YES_FIELD, Pipeline +from cg.constants.constants import AnalysisType from cg.constants.subject import Gender from cg.models.orders.constants import OrderType from cg.models.report.validators import ( - validate_boolean, - validate_empty_field, - validate_float, - validate_gender, - validate_list, - validate_percentage, - validate_rml_sample, - validate_supported_pipeline, + get_analysis_type_as_string, + get_boolean_as_string, + get_date_as_string, + get_float_as_percentage, + get_float_as_string, + get_gender_as_string, + get_list_as_string, + get_path_as_string, + get_prep_category_as_string, + get_report_string, ) -def test_validate_empty_field(): - """Tests formatting an empty value.""" +def test_get_report_string(): + """Test formatting an empty value.""" # GIVEN a not valid empty field - none_field = None + none_field: Any = None # WHEN performing the validation - output = validate_empty_field(none_field) + output: str = get_report_string(none_field) # THEN check if the input value was formatted correctly assert output == NA_FIELD -def test_validate_boolean(): - """Tests boolean formatting for the delivery report.""" +def test_get_boolean_as_string(): + """Test boolean formatting for the delivery report.""" # GIVEN a not formatted inputs - none_field = None - true_field = True - not_bool_field = "not a boolean" + none_field: Any = None + true_field: bool = True + false_field: bool = False + not_bool_field: str = "not a boolean" # WHEN performing the validation - validated_none_field = validate_boolean(none_field) - validated_true_field = validate_boolean(true_field) - validated_not_bool_field = validate_boolean(not_bool_field) + validated_none_field: str = get_boolean_as_string(none_field) + validated_true_field: str = get_boolean_as_string(true_field) + validated_false_field: str = get_boolean_as_string(false_field) + validated_not_bool_field = get_boolean_as_string(not_bool_field) # THEN check if the input values were formatted correctly assert validated_none_field == NA_FIELD assert validated_true_field == YES_FIELD + assert validated_false_field == NO_FIELD assert validated_not_bool_field == NA_FIELD -def test_validate_float(): - """Tests the validation of a float value.""" +def test_get_float_as_string(): + """Test the validation of a float value.""" - # GIVEN a valid float input (float and string format) - float_value = 12.3456789 - str_value = "12.3456789" + # GIVEN a valid float input + float_value: float = 12.3456789 # WHEN performing the validation - validated_float_value = validate_float(float_value) - validated_str_value = validate_float(str_value) + validated_float_value: str = get_float_as_string(float_value) - # THEN check if the input values were formatted correctly + # THEN check if the input value was formatted correctly assert validated_float_value == "12.35" - assert validated_str_value == "12.35" -def test_validate_float_zero_input(): - """Tests the validation of a float value.""" +def test_get_float_as_string_zero_input(): + """Tests the validation of a float value when input is zero.""" - # GIVEN a valid float input (float and string format) - float_value = 0.0 - str_value = "0.0" + # GIVEN a valid float input + float_value: float = 0.0 # WHEN performing the validation - validated_float_value = validate_float(float_value) - validated_str_value = validate_float(str_value) + validated_float_value: str = get_float_as_string(float_value) - # THEN check if the input values were formatted correctly + # THEN check if the input value was formatted correctly assert validated_float_value == "0.0" - assert validated_str_value == "0.0" -def test_validate_percentage(): - """Tests the validation of a percentage value.""" +def test_get_float_as_percentage(): + """Test the validation of a percentage value.""" # GIVEN a not formatted percentage - pct_value = 0.9876 + pct_value: float = 0.9876 # WHEN performing the validation - validated_pct_value = validate_percentage(pct_value) + validated_pct_value: str = get_float_as_percentage(pct_value) - # THEN check if the input values were formatted correctly + # THEN check if the input value was formatted correctly assert validated_pct_value == "98.76" -def test_validate_list(): - """Tests if a list is transformed into a string of comma separated values.""" +def test_get_float_as_percentage_zero_input(): + """Test the validation of a percentage value when input is zero.""" + + # GIVEN a zero input + pct_value: float = 0.0 + + # WHEN performing the validation + validated_pct_value: str = get_float_as_percentage(pct_value) + + # THEN check if the input value was formatted correctly + assert validated_pct_value == "0.0" + + +def test_get_date_as_string(timestamp_now: datetime): + """Test the validation of a datetime object.""" + + # GIVEN a datetime object + + # WHEN performing the validation + validated_date_value: str = get_date_as_string(timestamp_now) + + # THEN check if the input values were formatted correctly + assert validated_date_value == timestamp_now.date().__str__() + + +def test_get_list_as_string(): + """Test if a list is transformed into a string of comma separated values.""" # GIVEN a mock list - mock_list = ["I'm", "a", "list"] + mock_list: list[str] = ["I am", "a", "list"] # WHEN performing the validation - validated_list = validate_list(mock_list) + validated_list: str = get_list_as_string(mock_list) # THEN check if the input values were formatted correctly - assert validated_list == "I'm, a, list" + assert validated_list == "I am, a, list" -def test_validate_rml_sample(caplog): - """Performs validation on a preparation category.""" +def test_get_path_as_string(filled_file: Path): + """Test file path name extraction.""" - # GIVEN an invalid prep category - prep_category = OrderType.RML + # GIVEN a mock path # WHEN performing the validation - try: - validate_rml_sample(prep_category) - assert False - # THEN check if an exception was raised - except ValueError: - assert "The delivery report generation does not support RML samples" in caplog.text + path_name: str = get_path_as_string(filled_file) + + # THEN check if the input values were formatted correctly + assert path_name == "a_file.txt" -def test_validate_gender(caplog): - """Tests report gender parsing.""" +def test_get_gender_as_string(): + """Test report gender parsing.""" # GIVEN an invalid gender category - gender = Gender.FEMALE - invalid_gender = "not_a_gender" + gender: Gender = Gender.FEMALE + invalid_gender: str = "not_a_gender" # WHEN performing the validation - validated_gender = validate_gender(gender) - validated_invalid_gender = validate_gender(invalid_gender) + validated_gender: str = get_gender_as_string(gender) + validated_invalid_gender: str = get_gender_as_string(invalid_gender) # THEN check if the gender has been correctly formatted assert validated_gender == REPORT_GENDER.get("female") assert validated_invalid_gender == NA_FIELD -def test_validate_supported_pipeline_match_error(caplog): - """Tests if a customer requested pipeline matches the data analysis one.""" +def test_get_prep_category_as_string(caplog: LogCaptureFixture): + """Test validation on a preparation category.""" - # GIVEN an input dictionary where the customers and executed pipeline are different - dict_different_pipelines = {"customer_pipeline": Pipeline.MIP_DNA, "pipeline": Pipeline.FLUFFY} + # GIVEN an invalid prep category + prep_category: OrderType = OrderType.RML # WHEN performing the validation - try: - validate_supported_pipeline(None, dict_different_pipelines) - assert False + # THEN check if an exception was raised - except ValueError: - assert ( - f"The analysis requested by the customer ({dict_different_pipelines.get('customer_pipeline')}) does not " - f"match the one executed ({dict_different_pipelines.get('pipeline')})" in caplog.text - ) + with pytest.raises(ValueError): + get_prep_category_as_string(prep_category) + assert "The delivery report generation does not support RML samples" in caplog.text -def test_validate_supported_pipeline(caplog): - """Tests that the analysis pipeline is supported by the delivery report workflow.""" +def test_get_analysis_type_as_string(): + """Test analysis type formatting for the delivery report generation.""" - # GIVEN a dictionary with a not supported pipeline - dict_invalid_pipeline = {"customer_pipeline": Pipeline.FLUFFY, "pipeline": Pipeline.FLUFFY} + # GIVEN a WGS analysis type and a model info dictionary + analysis_type: AnalysisType = AnalysisType.WHOLE_GENOME_SEQUENCING + model_info: ValidationInfo = ValidationInfo + model_info.data: dict[str, Any] = {"pipeline": Pipeline.MIP_DNA.value} # WHEN performing the validation - try: - validate_supported_pipeline(None, dict_invalid_pipeline) - assert False - # THEN check if an exception was raised - except ValueError: - assert ( - f"The pipeline {dict_invalid_pipeline.get('pipeline')} does not support delivery report generation" - in caplog.text - ) + validated_analysis_type: str = get_analysis_type_as_string( + analysis_type=analysis_type, info=model_info + ) + + # THEN check if the input value was formatted correctly + assert validated_analysis_type == analysis_type.value + + +def test_get_analysis_type_as_string_balsamic(): + """Test analysis type formatting for the delivery report generation.""" + + # GIVEN a WGS analysis type and a model info dictionary + analysis_type: str = "tumor_normal_wgs" + model_info: ValidationInfo = ValidationInfo + model_info.data: dict[str, Any] = {"pipeline": Pipeline.BALSAMIC.value} + + # WHEN performing the validation + validated_analysis_type: str = get_analysis_type_as_string( + analysis_type=analysis_type, info=model_info + ) + + # THEN check if the input value was formatted correctly + assert validated_analysis_type == "Tumör/normal (helgenomsekvensering)" From 0b8ae434b83e9b6c31fea0822802f760f9c1b225 Mon Sep 17 00:00:00 2001 From: Clinical Genomics Bot Date: Mon, 23 Oct 2023 11:23:08 +0000 Subject: [PATCH 5/5] =?UTF-8?q?Bump=20version:=2051.7.1=20=E2=86=92=2051.7?= =?UTF-8?q?.2=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cg/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 149f3e57e1..8e124ea1c4 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 51.7.1 +current_version = 51.7.2 commit = True tag = True tag_name = v{new_version} diff --git a/cg/__init__.py b/cg/__init__.py index 6c368f39af..4c08034b52 100644 --- a/cg/__init__.py +++ b/cg/__init__.py @@ -1,4 +1,4 @@ import pkg_resources __title__ = "cg" -__version__ = "51.7.1" +__version__ = "51.7.2" diff --git a/setup.py b/setup.py index 00c8470c3f..18af15ebda 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def parse_requirements(req_path="./requirements.txt"): setup( name=NAME, - version="51.7.1", + version="51.7.2", description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown",