Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to run raredisease with WES #3232

Merged
merged 42 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
255076f
add analysis_type and target_bed to config file
rannick May 15, 2024
d602e2e
fix typo and linting
rannick May 15, 2024
e4012b3
add new params to workflow params
rannick May 15, 2024
bbb60ea
fix parameters
rannick May 15, 2024
db785d5
add default setting
rannick May 15, 2024
e6c51c9
add error raising and mock lims access in test
rannick May 15, 2024
d9e75bf
add mock lims access for all necessary tests
rannick May 15, 2024
970bc25
black
rannick May 15, 2024
3219aa5
add mock lims access for all necessary tests
rannick May 15, 2024
5467a0c
Merge branch 'master' into raredisease-exomes
rannick May 15, 2024
e211fd8
Update cg/meta/workflow/nf_analysis.py
rannick May 16, 2024
6b057d2
Update cg/meta/workflow/nf_analysis.py
rannick May 16, 2024
a604856
Update cg/meta/workflow/raredisease.py
rannick May 16, 2024
c54bcc0
Update cg/meta/workflow/raredisease.py
rannick May 17, 2024
28c974c
Update tests/cli/workflow/nf_analysis/test_cli_config_case.py
rannick May 21, 2024
438b896
remove duplicate definition of raredisease_context
rannick May 21, 2024
a07258d
test with mocker part of fixture
rannick May 21, 2024
4d9a135
Merge branch 'raredisease-exomes' of https://github.com/Clinical-Geno…
rannick May 21, 2024
b795724
remove mocker
rannick May 23, 2024
1e47855
Merge branch 'master' into raredisease-exomes
rannick May 23, 2024
d7cf034
Update cg/meta/workflow/nf_analysis.py
rannick May 23, 2024
f8175ac
Update tests/conftest.py
rannick May 23, 2024
89f13c4
typing and moving to analysisAPI
rannick May 23, 2024
9fcae2d
fix merge conflicts
rannick May 23, 2024
3b89d94
remove chanjo2
rannick May 23, 2024
c0fbdab
use statusDB to extract analysis type
rannick May 23, 2024
3ce4b31
remove redundant mocker
rannick May 23, 2024
c360e6c
use optional string
rannick May 23, 2024
30107d7
Merge branch 'master' into raredisease-exomes
rannick May 23, 2024
5042ea0
Update cg/meta/workflow/analysis.py
rannick May 23, 2024
5449146
Update cg/meta/workflow/analysis.py
rannick May 23, 2024
89a956a
Merge branch 'master' into raredisease-exomes
rannick May 23, 2024
b4e6f37
move get_target bed to RarediseaseAnalysisAPI
rannick May 23, 2024
c4154e2
move get-
rannick May 23, 2024
1b7326a
black
rannick May 23, 2024
f32c99a
Update cg/meta/workflow/analysis.py
rannick May 23, 2024
b941ee3
add error message
rannick May 23, 2024
3837a16
move is_multiple_samples_allowed into analysisAPI to have get_validat…
rannick May 23, 2024
fc7721f
Update cg/meta/workflow/analysis.py
rannick May 24, 2024
a2709f4
Merge branch 'master' into raredisease-exomes
rannick May 24, 2024
8be9d31
black
rannick May 24, 2024
2f6f8c8
Merge branch 'master' into raredisease-exomes
rannick May 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cg/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ControlOptions(StrEnum):
EMPTY: str = ""


DEFAULT_CAPTURE_KIT = "twistexomerefseq_9.1_hg19_design.bed"
DEFAULT_CAPTURE_KIT = "twistexomerefseq_10.2_hg19_design.bed"
rannick marked this conversation as resolved.
Show resolved Hide resolved


class CustomerId(StrEnum):
Expand Down
21 changes: 10 additions & 11 deletions cg/meta/workflow/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datetime import datetime
from pathlib import Path
from subprocess import CalledProcessError
from typing import Any, Iterator
from typing import Iterator

import click
from housekeeper.store.models import Bundle, Version
Expand All @@ -29,7 +29,7 @@
from cg.models.cg_config import CGConfig
from cg.models.fastq import FastqFileMeta
from cg.services.quality_controller import QualityControllerService
from cg.store.models import Analysis, Application, BedVersion, Case, CaseSample, Sample
from cg.store.models import Analysis, BedVersion, Case, CaseSample, Sample

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -692,12 +692,11 @@ def run_analysis(self, *args, **kwargs):
raise NotImplementedError

def get_data_analysis_type(self, case_id: str) -> str | None:
"""Return data analysis type carried out."""
case_sample: Sample = self.status_db.get_case_samples_by_case_id(case_internal_id=case_id)[
0
].sample
lims_sample: dict[str, Any] = self.lims_api.sample(case_sample.internal_id)
application: Application = self.status_db.get_application_by_tag(
tag=lims_sample.get("application")
)
return application.analysis_type if application else None
"""Return data analysis type carried out. Raise an error is samples in a case have not the same analysis type"""
rannick marked this conversation as resolved.
Show resolved Hide resolved
case: Case = self.get_validated_case(case_id)
analysis_types: set[str] = {
link.sample.application_version.application.analysis_type for link in case.links
}
if len(analysis_types) > 1:
raise ValueError(...)
rannick marked this conversation as resolved.
Show resolved Hide resolved
return analysis_types.pop() if analysis_types else None
18 changes: 11 additions & 7 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,10 @@ def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[l
raise NotImplementedError

def get_sample_sheet_content(self, case_id: str) -> list[list[Any]]:
"""Collect and format information required to build a sample sheet for a case.
"""Return formatted information required to build a sample sheet for a case.
This contains information for all samples linked to the case."""
case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id)
if len(case.links) == 0:
raise CgError(f"No samples linked to {case_id}")
if nlinks := len(case.links) > 1 and not self.is_multiple_samples_allowed:
raise CgError(f"Only one sample per case is allowed. {nlinks} found")
sample_sheet_content = []
sample_sheet_content: list = []
case: Case = self.get_validated_case(case_id)
LOG.info(f"Samples linked to case {case_id}: {len(case.links)}")
LOG.debug("Getting sample sheet information")
for link in case.links:
Expand Down Expand Up @@ -768,6 +764,14 @@ def metrics_deliver(self, case_id: str, dry_run: bool):
self.write_metrics_deliverables(case_id=case_id, dry_run=dry_run)
self.validate_qc_metrics(case_id=case_id, dry_run=dry_run)

def get_validated_case(self, case_id: str) -> Case:
case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id)
if not case.links:
raise CgError(f"No samples linked to {case_id}")
if nlinks := len(case.links) > 1 and not self.is_multiple_samples_allowed:
raise CgError(f"Only one sample per case is allowed. {nlinks} found")
return case

def report_deliver(self, case_id: str, dry_run: bool) -> None:
"""Write deliverables file."""

Expand Down
25 changes: 20 additions & 5 deletions cg/meta/workflow/raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from pathlib import Path
from typing import Any

from cg.constants import Workflow
from cg.constants.constants import GenomeVersion
from cg.constants import DEFAULT_CAPTURE_KIT, Workflow
from cg.constants.constants import AnalysisType, GenomeVersion
from cg.constants.gene_panel import GenePanelGenomeBuild
from cg.constants.nf_analysis import RAREDISEASE_METRIC_CONDITIONS
from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from cg.models.nf_analysis import WorkflowParameters
from cg.models.raredisease.raredisease import (
RarediseaseParameters,
RarediseaseSampleSheetEntry,
RarediseaseSampleSheetHeaders,
)
Expand Down Expand Up @@ -74,11 +74,26 @@ def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[l
)
return sample_sheet_entry.reformat_sample_content

def get_workflow_parameters(self, case_id: str) -> WorkflowParameters:
def get_target_bed(self, case_id: str, analysis_type: str) -> str:
"""
Return the target bed file from LIMS and use default capture kit for WGS.
"""
target_bed: str = self.get_target_bed_from_lims(case_id=case_id)
if not target_bed:
if analysis_type == AnalysisType.WHOLE_GENOME_SEQUENCING:
return DEFAULT_CAPTURE_KIT
raise ValueError("No capture kit was found in LIMS")
return target_bed

def get_workflow_parameters(self, case_id: str) -> RarediseaseParameters:
"""Return parameters."""
return WorkflowParameters(
analysis_type: AnalysisType = self.get_data_analysis_type(case_id=case_id)
target_bed: str = self.get_target_bed(case_id=case_id, analysis_type=analysis_type)
return RarediseaseParameters(
input=self.get_sample_sheet_path(case_id=case_id),
outdir=self.get_case_path(case_id=case_id),
analysis_type=analysis_type,
target_bed=target_bed,
)

@staticmethod
Expand Down
9 changes: 8 additions & 1 deletion cg/models/raredisease/raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from cg.models.nf_analysis import NextflowSampleSheetEntry
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters


class RarediseaseQCMetrics(BaseModel):
Expand Down Expand Up @@ -62,3 +62,10 @@ class RarediseaseSampleSheetHeaders(StrEnum):
@classmethod
def list(cls) -> list[str]:
return list(map(lambda header: header.value, cls))


class RarediseaseParameters(WorkflowParameters):
"""Model for Raredisease parameters."""

target_bed: str
analysis_type: str
1 change: 1 addition & 0 deletions tests/cli/workflow/nf_analysis/test_cli_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cg.apps.lims import LimsAPI
from cg.cli.workflow.base import workflow as workflow_cli
from cg.constants import EXIT_SUCCESS, Workflow
from cg.meta.workflow.raredisease import RarediseaseAnalysisAPI
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from tests.cli.workflow.conftest import mock_analysis_flow_cell
Expand Down
138 changes: 71 additions & 67 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2445,6 +2445,77 @@ def raredisease_deliverables_file_path(raredisease_dir, raredisease_case_id) ->
).with_suffix(FileExtensions.YAML)


@pytest.fixture(scope="function")
def raredisease_context(
cg_context: CGConfig,
helpers: StoreHelpers,
nf_analysis_housekeeper: HousekeeperAPI,
trailblazer_api: MockTB,
raredisease_case_id: str,
sample_id: str,
no_sample_case_id: str,
total_sequenced_reads_pass: int,
apptag_rna: str,
case_id_not_enough_reads: str,
sample_id_not_enough_reads: str,
total_sequenced_reads_not_pass: int,
mocker,
) -> CGConfig:
"""Context to use in CLI."""
cg_context.housekeeper_api_ = nf_analysis_housekeeper
cg_context.trailblazer_api_ = trailblazer_api
cg_context.meta_apis["analysis_api"] = RarediseaseAnalysisAPI(config=cg_context)
status_db: Store = cg_context.status_db

# Create ERROR case with NO SAMPLES
helpers.add_case(status_db, internal_id=no_sample_case_id, name=no_sample_case_id)

# Create textbook case with enough reads
case_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=raredisease_case_id,
name=raredisease_case_id,
data_analysis=Workflow.RAREDISEASE,
)

sample_raredisease_case_enough_reads: Sample = helpers.add_sample(
status_db,
internal_id=sample_id,
last_sequenced_at=datetime.now(),
reads=total_sequenced_reads_pass,
application_tag=apptag_rna,
)

helpers.add_relationship(
status_db,
case=case_enough_reads,
sample=sample_raredisease_case_enough_reads,
)

# Create case without enough reads
case_not_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=case_id_not_enough_reads,
name=case_id_not_enough_reads,
data_analysis=Workflow.RAREDISEASE,
)

sample_not_enough_reads: Sample = helpers.add_sample(
status_db,
internal_id=sample_id_not_enough_reads,
last_sequenced_at=datetime.now(),
reads=total_sequenced_reads_not_pass,
application_tag=apptag_rna,
)

helpers.add_relationship(status_db, case=case_not_enough_reads, sample=sample_not_enough_reads)

mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

return cg_context


@pytest.fixture(scope="function")
def deliverable_data(raredisease_dir: Path, raredisease_case_id: str, sample_id: str) -> dict:
return {
Expand Down Expand Up @@ -3882,73 +3953,6 @@ def downsample_api(
)


@pytest.fixture(scope="function")
def raredisease_context(
cg_context: CGConfig,
helpers: StoreHelpers,
nf_analysis_housekeeper: HousekeeperAPI,
trailblazer_api: MockTB,
sample_id: str,
no_sample_case_id: str,
total_sequenced_reads_pass: int,
apptag_rna: str,
raredisease_case_id: str,
case_id_not_enough_reads: str,
sample_id_not_enough_reads: str,
total_sequenced_reads_not_pass: int,
) -> CGConfig:
"""Raredisease context to use in CLI."""
cg_context.housekeeper_api_ = nf_analysis_housekeeper
cg_context.trailblazer_api_ = trailblazer_api
cg_context.meta_apis["analysis_api"] = RarediseaseAnalysisAPI(config=cg_context)
status_db: Store = cg_context.status_db

# Create ERROR case with NO SAMPLES
helpers.add_case(status_db, internal_id=no_sample_case_id, name=no_sample_case_id)

# Create a textbook case with enough reads
case_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=raredisease_case_id,
name=raredisease_case_id,
data_analysis=Workflow.RAREDISEASE,
)

sample_raredisease_case_enough_reads: Sample = helpers.add_sample(
status_db,
application_tag=apptag_rna,
internal_id=sample_id,
reads=total_sequenced_reads_pass,
last_sequenced_at=datetime.now(),
)

helpers.add_relationship(
status_db,
case=case_enough_reads,
sample=sample_raredisease_case_enough_reads,
)

# Create a case without enough reads
case_not_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=case_id_not_enough_reads,
name=case_id_not_enough_reads,
data_analysis=Workflow.RAREDISEASE,
)

sample_not_enough_reads: Sample = helpers.add_sample(
status_db,
application_tag=apptag_rna,
internal_id=sample_id_not_enough_reads,
reads=total_sequenced_reads_not_pass,
last_sequenced_at=datetime.now(),
)

helpers.add_relationship(status_db, case=case_not_enough_reads, sample=sample_not_enough_reads)

return cg_context


@pytest.fixture
def fastq_file_meta_raw(flow_cell_name: str) -> dict:
return {
Expand Down
5 changes: 4 additions & 1 deletion tests/meta/workflow/test_raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_get_sample_sheet_content(
assert contains_pattern


def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: str):
def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: str, mocker):
# GIVEN Raredisease analysis API and input (nextflow sample sheet path)/output (case directory) parameters
analysis_api: RarediseaseAnalysisAPI = raredisease_context.meta_apis["analysis_api"]

Expand All @@ -42,6 +42,9 @@ def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: s
# THEN care directory is created
assert os.path.exists(analysis_api.get_case_path(case_id=raredisease_case_id))

mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

# WHEN writing parameters file
analysis_api.write_params_file(case_id=raredisease_case_id)

Expand Down
Loading