Skip to content

Commit

Permalink
Add support to run raredisease with WES (#3232)
Browse files Browse the repository at this point in the history
### Added

- Support sending exomes cases in raredisease

### Changed

- Default target bed file for both MIP and raredisease updated to "twistexomerefseq_10.2_hg19_design.bed"

### Fixed
  • Loading branch information
rannick authored May 27, 2024
1 parent bf35bfd commit 5b90cde
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 98 deletions.
2 changes: 1 addition & 1 deletion cg/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ControlOptions(StrEnum):
EMPTY: str = ""


DEFAULT_CAPTURE_KIT = "twistexomerefseq_9.1_hg19_design.bed"
DEFAULT_CAPTURE_KIT = "twistexomerefseq_10.2_hg19_design.bed"


class CustomerId(StrEnum):
Expand Down
40 changes: 29 additions & 11 deletions cg/meta/workflow/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datetime import datetime
from pathlib import Path
from subprocess import CalledProcessError
from typing import Any, Iterator
from typing import Iterator

import click
from housekeeper.store.models import Bundle, Version
Expand All @@ -29,7 +29,7 @@
from cg.models.cg_config import CGConfig
from cg.models.fastq import FastqFileMeta
from cg.services.quality_controller import QualityControllerService
from cg.store.models import Analysis, Application, BedVersion, Case, CaseSample, Sample
from cg.store.models import Analysis, BedVersion, Case, CaseSample, Sample

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -66,6 +66,11 @@ def process(self):
def fastq_handler(self):
return FastqHandler

@property
def is_multiple_samples_allowed(self) -> bool:
"""Return whether the analysis supports multiple samples to be linked to the case."""
return True

@staticmethod
def get_help(context):
"""
Expand Down Expand Up @@ -370,6 +375,14 @@ def gather_file_metadata_for_sample(self, sample: Sample) -> list[FastqFileMeta]
)
]

def get_validated_case(self, case_id: str) -> Case:
case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id)
if not case.links:
raise CgError(f"No samples linked to {case_id}")
if nlinks := len(case.links) > 1 and not self.is_multiple_samples_allowed:
raise CgError(f"Only one sample per case is allowed. {nlinks} found")
return case

def link_fastq_files_for_sample(
self, case: Case, sample: Sample, concatenate: bool = False
) -> None:
Expand Down Expand Up @@ -692,12 +705,17 @@ def run_analysis(self, *args, **kwargs):
raise NotImplementedError

def get_data_analysis_type(self, case_id: str) -> str | None:
"""Return data analysis type carried out."""
case_sample: Sample = self.status_db.get_case_samples_by_case_id(case_internal_id=case_id)[
0
].sample
lims_sample: dict[str, Any] = self.lims_api.sample(case_sample.internal_id)
application: Application = self.status_db.get_application_by_tag(
tag=lims_sample.get("application")
)
return application.analysis_type if application else None
"""
Return data analysis type carried out.
Raises:
ValueError: If the samples in a case have not the same analysis type.
"""
case: Case = self.get_validated_case(case_id)
analysis_types: set[str] = {
link.sample.application_version.application.analysis_type for link in case.links
}
if len(analysis_types) > 1:
raise ValueError(
f"Case samples have different analysis types {', '.join(analysis_types)}"
)
return analysis_types.pop() if analysis_types else None
15 changes: 3 additions & 12 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,6 @@ def is_params_appended_to_nextflow_config(self) -> bool:
"""Return True if parameters should be added into the nextflow config file instead of the params file."""
return True

@property
def is_multiple_samples_allowed(self) -> bool:
"""Return whether the analysis supports multiple samples to be linked to the case."""
return True

@property
def is_multiqc_pattern_search_exact(self) -> bool:
"""Return True if only exact pattern search is allowed to collect metrics information from MultiQC file.
Expand Down Expand Up @@ -264,14 +259,10 @@ def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[l
raise NotImplementedError

def get_sample_sheet_content(self, case_id: str) -> list[list[Any]]:
"""Collect and format information required to build a sample sheet for a case.
"""Return formatted information required to build a sample sheet for a case.
This contains information for all samples linked to the case."""
case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id)
if len(case.links) == 0:
raise CgError(f"No samples linked to {case_id}")
if nlinks := len(case.links) > 1 and not self.is_multiple_samples_allowed:
raise CgError(f"Only one sample per case is allowed. {nlinks} found")
sample_sheet_content = []
sample_sheet_content: list = []
case: Case = self.get_validated_case(case_id)
LOG.info(f"Samples linked to case {case_id}: {len(case.links)}")
LOG.debug("Getting sample sheet information")
for link in case.links:
Expand Down
25 changes: 20 additions & 5 deletions cg/meta/workflow/raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from pathlib import Path
from typing import Any

from cg.constants import Workflow
from cg.constants.constants import GenomeVersion
from cg.constants import DEFAULT_CAPTURE_KIT, Workflow
from cg.constants.constants import AnalysisType, GenomeVersion
from cg.constants.gene_panel import GenePanelGenomeBuild
from cg.constants.nf_analysis import RAREDISEASE_METRIC_CONDITIONS
from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from cg.models.nf_analysis import WorkflowParameters
from cg.models.raredisease.raredisease import (
RarediseaseParameters,
RarediseaseSampleSheetEntry,
RarediseaseSampleSheetHeaders,
)
Expand Down Expand Up @@ -74,11 +74,26 @@ def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[l
)
return sample_sheet_entry.reformat_sample_content

def get_workflow_parameters(self, case_id: str) -> WorkflowParameters:
def get_target_bed(self, case_id: str, analysis_type: str) -> str:
"""
Return the target bed file from LIMS and use default capture kit for WGS.
"""
target_bed: str = self.get_target_bed_from_lims(case_id=case_id)
if not target_bed:
if analysis_type == AnalysisType.WHOLE_GENOME_SEQUENCING:
return DEFAULT_CAPTURE_KIT
raise ValueError("No capture kit was found in LIMS")
return target_bed

def get_workflow_parameters(self, case_id: str) -> RarediseaseParameters:
"""Return parameters."""
return WorkflowParameters(
analysis_type: AnalysisType = self.get_data_analysis_type(case_id=case_id)
target_bed: str = self.get_target_bed(case_id=case_id, analysis_type=analysis_type)
return RarediseaseParameters(
input=self.get_sample_sheet_path(case_id=case_id),
outdir=self.get_case_path(case_id=case_id),
analysis_type=analysis_type,
target_bed=target_bed,
)

@staticmethod
Expand Down
9 changes: 8 additions & 1 deletion cg/models/raredisease/raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from cg.models.nf_analysis import NextflowSampleSheetEntry
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters


class RarediseaseQCMetrics(BaseModel):
Expand Down Expand Up @@ -62,3 +62,10 @@ class RarediseaseSampleSheetHeaders(StrEnum):
@classmethod
def list(cls) -> list[str]:
return list(map(lambda header: header.value, cls))


class RarediseaseParameters(WorkflowParameters):
"""Model for Raredisease parameters."""

target_bed: str
analysis_type: str
1 change: 1 addition & 0 deletions tests/cli/workflow/nf_analysis/test_cli_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cg.apps.lims import LimsAPI
from cg.cli.workflow.base import workflow as workflow_cli
from cg.constants import EXIT_SUCCESS, Workflow
from cg.meta.workflow.raredisease import RarediseaseAnalysisAPI
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from tests.cli.workflow.conftest import mock_analysis_flow_cell
Expand Down
138 changes: 71 additions & 67 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2445,6 +2445,77 @@ def raredisease_deliverables_file_path(raredisease_dir, raredisease_case_id) ->
).with_suffix(FileExtensions.YAML)


@pytest.fixture(scope="function")
def raredisease_context(
cg_context: CGConfig,
helpers: StoreHelpers,
nf_analysis_housekeeper: HousekeeperAPI,
trailblazer_api: MockTB,
raredisease_case_id: str,
sample_id: str,
no_sample_case_id: str,
total_sequenced_reads_pass: int,
apptag_rna: str,
case_id_not_enough_reads: str,
sample_id_not_enough_reads: str,
total_sequenced_reads_not_pass: int,
mocker,
) -> CGConfig:
"""Context to use in CLI."""
cg_context.housekeeper_api_ = nf_analysis_housekeeper
cg_context.trailblazer_api_ = trailblazer_api
cg_context.meta_apis["analysis_api"] = RarediseaseAnalysisAPI(config=cg_context)
status_db: Store = cg_context.status_db

# Create ERROR case with NO SAMPLES
helpers.add_case(status_db, internal_id=no_sample_case_id, name=no_sample_case_id)

# Create textbook case with enough reads
case_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=raredisease_case_id,
name=raredisease_case_id,
data_analysis=Workflow.RAREDISEASE,
)

sample_raredisease_case_enough_reads: Sample = helpers.add_sample(
status_db,
internal_id=sample_id,
last_sequenced_at=datetime.now(),
reads=total_sequenced_reads_pass,
application_tag=apptag_rna,
)

helpers.add_relationship(
status_db,
case=case_enough_reads,
sample=sample_raredisease_case_enough_reads,
)

# Create case without enough reads
case_not_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=case_id_not_enough_reads,
name=case_id_not_enough_reads,
data_analysis=Workflow.RAREDISEASE,
)

sample_not_enough_reads: Sample = helpers.add_sample(
status_db,
internal_id=sample_id_not_enough_reads,
last_sequenced_at=datetime.now(),
reads=total_sequenced_reads_not_pass,
application_tag=apptag_rna,
)

helpers.add_relationship(status_db, case=case_not_enough_reads, sample=sample_not_enough_reads)

mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

return cg_context


@pytest.fixture(scope="function")
def deliverable_data(raredisease_dir: Path, raredisease_case_id: str, sample_id: str) -> dict:
return {
Expand Down Expand Up @@ -3882,73 +3953,6 @@ def downsample_api(
)


@pytest.fixture(scope="function")
def raredisease_context(
cg_context: CGConfig,
helpers: StoreHelpers,
nf_analysis_housekeeper: HousekeeperAPI,
trailblazer_api: MockTB,
sample_id: str,
no_sample_case_id: str,
total_sequenced_reads_pass: int,
apptag_rna: str,
raredisease_case_id: str,
case_id_not_enough_reads: str,
sample_id_not_enough_reads: str,
total_sequenced_reads_not_pass: int,
) -> CGConfig:
"""Raredisease context to use in CLI."""
cg_context.housekeeper_api_ = nf_analysis_housekeeper
cg_context.trailblazer_api_ = trailblazer_api
cg_context.meta_apis["analysis_api"] = RarediseaseAnalysisAPI(config=cg_context)
status_db: Store = cg_context.status_db

# Create ERROR case with NO SAMPLES
helpers.add_case(status_db, internal_id=no_sample_case_id, name=no_sample_case_id)

# Create a textbook case with enough reads
case_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=raredisease_case_id,
name=raredisease_case_id,
data_analysis=Workflow.RAREDISEASE,
)

sample_raredisease_case_enough_reads: Sample = helpers.add_sample(
status_db,
application_tag=apptag_rna,
internal_id=sample_id,
reads=total_sequenced_reads_pass,
last_sequenced_at=datetime.now(),
)

helpers.add_relationship(
status_db,
case=case_enough_reads,
sample=sample_raredisease_case_enough_reads,
)

# Create a case without enough reads
case_not_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=case_id_not_enough_reads,
name=case_id_not_enough_reads,
data_analysis=Workflow.RAREDISEASE,
)

sample_not_enough_reads: Sample = helpers.add_sample(
status_db,
application_tag=apptag_rna,
internal_id=sample_id_not_enough_reads,
reads=total_sequenced_reads_not_pass,
last_sequenced_at=datetime.now(),
)

helpers.add_relationship(status_db, case=case_not_enough_reads, sample=sample_not_enough_reads)

return cg_context


@pytest.fixture
def fastq_file_meta_raw(flow_cell_name: str) -> dict:
return {
Expand Down
5 changes: 4 additions & 1 deletion tests/meta/workflow/test_raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_get_sample_sheet_content(
assert contains_pattern


def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: str):
def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: str, mocker):
# GIVEN Raredisease analysis API and input (nextflow sample sheet path)/output (case directory) parameters
analysis_api: RarediseaseAnalysisAPI = raredisease_context.meta_apis["analysis_api"]

Expand All @@ -42,6 +42,9 @@ def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: s
# THEN care directory is created
assert os.path.exists(analysis_api.get_case_path(case_id=raredisease_case_id))

mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

# WHEN writing parameters file
analysis_api.write_params_file(case_id=raredisease_case_id)

Expand Down

0 comments on commit 5b90cde

Please sign in to comment.