Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to run raredisease with WES #3232

Merged
merged 42 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
255076f
add analysis_type and target_bed to config file
rannick May 15, 2024
d602e2e
fix typo and linting
rannick May 15, 2024
e4012b3
add new params to workflow params
rannick May 15, 2024
bbb60ea
fix parameters
rannick May 15, 2024
db785d5
add default setting
rannick May 15, 2024
e6c51c9
add error raising and mock lims access in test
rannick May 15, 2024
d9e75bf
add mock lims access for all necessary tests
rannick May 15, 2024
970bc25
black
rannick May 15, 2024
3219aa5
add mock lims access for all necessary tests
rannick May 15, 2024
5467a0c
Merge branch 'master' into raredisease-exomes
rannick May 15, 2024
e211fd8
Update cg/meta/workflow/nf_analysis.py
rannick May 16, 2024
6b057d2
Update cg/meta/workflow/nf_analysis.py
rannick May 16, 2024
a604856
Update cg/meta/workflow/raredisease.py
rannick May 16, 2024
c54bcc0
Update cg/meta/workflow/raredisease.py
rannick May 17, 2024
28c974c
Update tests/cli/workflow/nf_analysis/test_cli_config_case.py
rannick May 21, 2024
438b896
remove duplicate definition of raredisease_context
rannick May 21, 2024
a07258d
test with mocker part of fixture
rannick May 21, 2024
4d9a135
Merge branch 'raredisease-exomes' of https://github.com/Clinical-Geno…
rannick May 21, 2024
b795724
remove mocker
rannick May 23, 2024
1e47855
Merge branch 'master' into raredisease-exomes
rannick May 23, 2024
d7cf034
Update cg/meta/workflow/nf_analysis.py
rannick May 23, 2024
f8175ac
Update tests/conftest.py
rannick May 23, 2024
89f13c4
typing and moving to analysisAPI
rannick May 23, 2024
9fcae2d
fix merge conflicts
rannick May 23, 2024
3b89d94
remove chanjo2
rannick May 23, 2024
c0fbdab
use statusDB to extract analysis type
rannick May 23, 2024
3ce4b31
remove redundant mocker
rannick May 23, 2024
c360e6c
use optional string
rannick May 23, 2024
30107d7
Merge branch 'master' into raredisease-exomes
rannick May 23, 2024
5042ea0
Update cg/meta/workflow/analysis.py
rannick May 23, 2024
5449146
Update cg/meta/workflow/analysis.py
rannick May 23, 2024
89a956a
Merge branch 'master' into raredisease-exomes
rannick May 23, 2024
b4e6f37
move get_target bed to RarediseaseAnalysisAPI
rannick May 23, 2024
c4154e2
move get-
rannick May 23, 2024
1b7326a
black
rannick May 23, 2024
f32c99a
Update cg/meta/workflow/analysis.py
rannick May 23, 2024
b941ee3
add error message
rannick May 23, 2024
3837a16
move is_multiple_samples_allowed into analysisAPI to have get_validat…
rannick May 23, 2024
fc7721f
Update cg/meta/workflow/analysis.py
rannick May 24, 2024
a2709f4
Merge branch 'master' into raredisease-exomes
rannick May 24, 2024
8be9d31
black
rannick May 24, 2024
2f6f8c8
Merge branch 'master' into raredisease-exomes
rannick May 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cg/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ControlOptions(StrEnum):
EMPTY: str = ""


DEFAULT_CAPTURE_KIT = "twistexomerefseq_9.1_hg19_design.bed"
DEFAULT_CAPTURE_KIT = "twistexomerefseq_10.2_hg19_design.bed"
rannick marked this conversation as resolved.
Show resolved Hide resolved


class CustomerId(StrEnum):
Expand Down
10 changes: 7 additions & 3 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,19 @@ def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[l
"""Collect and format information required to build a sample sheet for a single sample."""
raise NotImplementedError

def get_sample_sheet_content(self, case_id: str) -> list[list[Any]]:
"""Collect and format information required to build a sample sheet for a case.
This contains information for all samples linked to the case."""
def get_validated_case(self, case_id: str) -> Case:
case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id)
if len(case.links) == 0:
rannick marked this conversation as resolved.
Show resolved Hide resolved
raise CgError(f"No samples linked to {case_id}")
if nlinks := len(case.links) > 1 and not self.is_multiple_samples_allowed:
raise CgError(f"Only one sample per case is allowed. {nlinks} found")
return case

def get_sample_sheet_content(self, case_id: str) -> list[list[Any]]:
"""Return formatted information required to build a sample sheet for a case.
This contains information for all samples linked to the case."""
sample_sheet_content = []
case = self.get_case_from_string(case_id)
rannick marked this conversation as resolved.
Show resolved Hide resolved
LOG.info(f"Samples linked to case {case_id}: {len(case.links)}")
LOG.debug("Getting sample sheet information")
for link in case.links:
Expand Down
38 changes: 33 additions & 5 deletions cg/meta/workflow/raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from pathlib import Path
from typing import Any

from cg.constants import Workflow
from cg.constants.constants import GenomeVersion
from cg.constants import DEFAULT_CAPTURE_KIT, Workflow
from cg.constants.constants import AnalysisType, GenomeVersion
from cg.constants.gene_panel import GenePanelGenomeBuild
from cg.constants.nf_analysis import RAREDISEASE_METRIC_CONDITIONS
from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from cg.models.nf_analysis import WorkflowParameters
from cg.models.raredisease.raredisease import (
RarediseaseParameters,
RarediseaseSampleSheetEntry,
RarediseaseSampleSheetHeaders,
)
Expand Down Expand Up @@ -74,11 +74,39 @@ def get_sample_sheet_content_per_sample(self, case_sample: CaseSample) -> list[l
)
return sample_sheet_entry.reformat_sample_content

def get_workflow_parameters(self, case_id: str) -> WorkflowParameters:
def get_analysis_type(self, case_id: str):
case = self.get_case_from_string(case_id)
sample_analysis_type = ""
for link in case.links:
case_sample = link
sample_analysis_type_tmp = (
case_sample.sample.application_version.application.analysis_type
)
if sample_analysis_type_tmp != sample_analysis_type and sample_analysis_type != "":
raise ValueError(
f"{sample_analysis_type_tmp} has not the same analysis type as other samples in the case"
)
sample_analysis_type = sample_analysis_type_tmp
return sample_analysis_type
rannick marked this conversation as resolved.
Show resolved Hide resolved

def set_target_bed(self, case_id: str, analysis_type: str) -> str:
rannick marked this conversation as resolved.
Show resolved Hide resolved
if analysis_type == AnalysisType.WHOLE_GENOME_SEQUENCING:
target_bed = self.get_target_bed_from_lims(case_id=case_id) or DEFAULT_CAPTURE_KIT
else:
target_bed = self.get_target_bed_from_lims(case_id=case_id)
if not target_bed:
raise ValueError("No capture kit was found in LIMS")
return target_bed
rannick marked this conversation as resolved.
Show resolved Hide resolved

def get_workflow_parameters(self, case_id: str) -> RarediseaseParameters:
"""Return parameters."""
return WorkflowParameters(
analysis_type: AnalysisType = self.get_analysis_type(case_id=case_id)
target_bed: str = self.set_target_bed(case_id=case_id, analysis_type=analysis_type)
return RarediseaseParameters(
input=self.get_sample_sheet_path(case_id=case_id),
outdir=self.get_case_path(case_id=case_id),
analysis_type=analysis_type,
target_bed=target_bed,
)

@staticmethod
Expand Down
9 changes: 8 additions & 1 deletion cg/models/raredisease/raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from cg.models.nf_analysis import NextflowSampleSheetEntry
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters


class RarediseaseQCMetrics(BaseModel):
Expand Down Expand Up @@ -62,3 +62,10 @@ class RarediseaseSampleSheetHeaders(StrEnum):
@classmethod
def list(cls) -> list[str]:
return list(map(lambda header: header.value, cls))


class RarediseaseParameters(WorkflowParameters):
"""Model for Raredisease parameters."""

target_bed: str
analysis_type: str
11 changes: 11 additions & 0 deletions tests/cli/workflow/nf_analysis/test_cli_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cg.apps.lims import LimsAPI
from cg.cli.workflow.base import workflow as workflow_cli
from cg.constants import EXIT_SUCCESS, Workflow
from cg.meta.workflow.raredisease import RarediseaseAnalysisAPI
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from tests.cli.workflow.conftest import mock_analysis_flow_cell
Expand Down Expand Up @@ -40,6 +41,11 @@ def test_start(
# GIVEN that the sample source in LIMS is set
mocker.patch.object(LimsAPI, "get_source", return_value="blood")

# In the RAREDISEASE case, we need to mock lims fetching of the target bed file
if workflow == Workflow.RAREDISEASE:
mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

# WHEN invoking the command with dry-run specified
result = cli_runner.invoke(workflow_cli, [workflow, "start", case_id, "--dry-run"], obj=context)

Expand Down Expand Up @@ -82,6 +88,11 @@ def test_start_available(
# GIVEN that the sample source in LIMS is set
mocker.patch.object(LimsAPI, "get_source", return_value="blood")

# In the RAREDISEASE case, we need to mock lims fetching of the target bed file
if workflow == Workflow.RAREDISEASE:
mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

# WHEN invoking the command with dry-run specified
result = cli_runner.invoke(
workflow_cli, [workflow, "start-available", "--dry-run"], obj=context
Expand Down
138 changes: 71 additions & 67 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2445,6 +2445,77 @@ def raredisease_deliverables_file_path(raredisease_dir, raredisease_case_id) ->
).with_suffix(FileExtensions.YAML)


@pytest.fixture(scope="function")
def raredisease_context(
cg_context: CGConfig,
helpers: StoreHelpers,
nf_analysis_housekeeper: HousekeeperAPI,
trailblazer_api: MockTB,
raredisease_case_id: str,
sample_id: str,
no_sample_case_id: str,
total_sequenced_reads_pass: int,
apptag_rna: str,
case_id_not_enough_reads: str,
sample_id_not_enough_reads: str,
total_sequenced_reads_not_pass: int,
mocker,
) -> CGConfig:
"""context to use in cli"""
rannick marked this conversation as resolved.
Show resolved Hide resolved
cg_context.housekeeper_api_ = nf_analysis_housekeeper
cg_context.trailblazer_api_ = trailblazer_api
cg_context.meta_apis["analysis_api"] = RarediseaseAnalysisAPI(config=cg_context)
status_db: Store = cg_context.status_db

# Create ERROR case with NO SAMPLES
helpers.add_case(status_db, internal_id=no_sample_case_id, name=no_sample_case_id)

# Create textbook case with enough reads
case_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=raredisease_case_id,
name=raredisease_case_id,
data_analysis=Workflow.RAREDISEASE,
)

sample_raredisease_case_enough_reads: Sample = helpers.add_sample(
status_db,
internal_id=sample_id,
last_sequenced_at=datetime.now(),
reads=total_sequenced_reads_pass,
application_tag=apptag_rna,
)

helpers.add_relationship(
status_db,
case=case_enough_reads,
sample=sample_raredisease_case_enough_reads,
)

# Create case without enough reads
case_not_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=case_id_not_enough_reads,
name=case_id_not_enough_reads,
data_analysis=Workflow.RAREDISEASE,
)

sample_not_enough_reads: Sample = helpers.add_sample(
status_db,
internal_id=sample_id_not_enough_reads,
last_sequenced_at=datetime.now(),
reads=total_sequenced_reads_not_pass,
application_tag=apptag_rna,
)

helpers.add_relationship(status_db, case=case_not_enough_reads, sample=sample_not_enough_reads)

mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

return cg_context


@pytest.fixture(scope="function")
def deliverable_data(raredisease_dir: Path, raredisease_case_id: str, sample_id: str) -> dict:
return {
Expand Down Expand Up @@ -3882,73 +3953,6 @@ def downsample_api(
)


@pytest.fixture(scope="function")
def raredisease_context(
cg_context: CGConfig,
helpers: StoreHelpers,
nf_analysis_housekeeper: HousekeeperAPI,
trailblazer_api: MockTB,
sample_id: str,
no_sample_case_id: str,
total_sequenced_reads_pass: int,
apptag_rna: str,
raredisease_case_id: str,
case_id_not_enough_reads: str,
sample_id_not_enough_reads: str,
total_sequenced_reads_not_pass: int,
) -> CGConfig:
"""Raredisease context to use in CLI."""
cg_context.housekeeper_api_ = nf_analysis_housekeeper
cg_context.trailblazer_api_ = trailblazer_api
cg_context.meta_apis["analysis_api"] = RarediseaseAnalysisAPI(config=cg_context)
status_db: Store = cg_context.status_db

# Create ERROR case with NO SAMPLES
helpers.add_case(status_db, internal_id=no_sample_case_id, name=no_sample_case_id)

# Create a textbook case with enough reads
case_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=raredisease_case_id,
name=raredisease_case_id,
data_analysis=Workflow.RAREDISEASE,
)

sample_raredisease_case_enough_reads: Sample = helpers.add_sample(
status_db,
application_tag=apptag_rna,
internal_id=sample_id,
reads=total_sequenced_reads_pass,
last_sequenced_at=datetime.now(),
)

helpers.add_relationship(
status_db,
case=case_enough_reads,
sample=sample_raredisease_case_enough_reads,
)

# Create a case without enough reads
case_not_enough_reads: Case = helpers.add_case(
store=status_db,
internal_id=case_id_not_enough_reads,
name=case_id_not_enough_reads,
data_analysis=Workflow.RAREDISEASE,
)

sample_not_enough_reads: Sample = helpers.add_sample(
status_db,
application_tag=apptag_rna,
internal_id=sample_id_not_enough_reads,
reads=total_sequenced_reads_not_pass,
last_sequenced_at=datetime.now(),
)

helpers.add_relationship(status_db, case=case_not_enough_reads, sample=sample_not_enough_reads)

return cg_context


@pytest.fixture
def fastq_file_meta_raw(flow_cell_name: str) -> dict:
return {
Expand Down
5 changes: 4 additions & 1 deletion tests/meta/workflow/test_raredisease.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_get_sample_sheet_content(
assert contains_pattern


def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: str):
def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: str, mocker):
# GIVEN Raredisease analysis API and input (nextflow sample sheet path)/output (case directory) parameters
analysis_api: RarediseaseAnalysisAPI = raredisease_context.meta_apis["analysis_api"]

Expand All @@ -42,6 +42,9 @@ def test_write_params_file(raredisease_context: CGConfig, raredisease_case_id: s
# THEN care directory is created
assert os.path.exists(analysis_api.get_case_path(case_id=raredisease_case_id))

mocker.patch.object(RarediseaseAnalysisAPI, "get_target_bed_from_lims")
RarediseaseAnalysisAPI.get_target_bed_from_lims.return_value = "some_target_bed_file"

# WHEN writing parameters file
analysis_api.write_params_file(case_id=raredisease_case_id)

Expand Down
Loading