Skip to content

Commit

Permalink
Update Rnafusion for release v3.0.1 (#2621) (major)
Browse files Browse the repository at this point in the history
### Added
- Fusion VCF file to deliverables
- Gene counts file to deliverables
- CRAM index files to deliverables
- Scout upload of a Rnafusion alignment CRAM file
- Swedac logo in delivery report

### Changed
- Rnafusion bundle filenames
- Replaced deprecated metrics:
  - 5_3_bias by median_5prime_to_3prime_bias
  - reads_aligned by reads_pairs_examined
- Updated default parameters
- Skip path validation in model
  • Loading branch information
fevac authored Dec 7, 2023
1 parent e458ae1 commit 5469f9f
Show file tree
Hide file tree
Showing 22 changed files with 125 additions and 82 deletions.
11 changes: 6 additions & 5 deletions cg/constants/delivery.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,21 @@
{"fusion", "arriba"},
{"fusion", "star-fusion"},
{"fusion", "fusioncatcher"},
{"cram"},
{"fusioncatcher-summary"},
{"fusioninspector"},
{"fusionreport", "research"},
{"fusioninspector-html", "research"},
{"arriba-visualisation", "research"},
{"multiqc-html", "rna"},
{"software-versions"},
{"qc-metrics"},
{"multiqc-json"},
{"delivery-report"},
{"vcf-fusion"},
{"gene-counts"},
]

RNAFUSION_ANALYSIS_SAMPLE_TAGS: list[set[str]] = []
RNAFUSION_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"cram"},
{"cram-index"},
]


PIPELINE_ANALYSIS_TAG_MAP: dict[Pipeline, dict] = {
Expand Down
7 changes: 6 additions & 1 deletion cg/constants/scout_upload.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from enum import StrEnum

from cg.constants.housekeeper_tags import AlignmentFileTag


class GenomeBuild(StrEnum):
hg19: str = "37"
Expand Down Expand Up @@ -57,6 +59,7 @@ class ScoutCustomCaseReportTags(StrEnum):
RNAfusion_inspector={"fusioninspector-html", "clinical"},
RNAfusion_inspector_research={"fusioninspector-html", "research"},
delivery_report={"delivery-report"},
vcf_fusion={"vcf-fusion"},
)

MIP_SAMPLE_TAGS = dict(
Expand Down Expand Up @@ -86,4 +89,6 @@ class ScoutCustomCaseReportTags(StrEnum):
)


RNAFUSION_SAMPLE_TAGS = {}
RNAFUSION_SAMPLE_TAGS = dict(
alignment_file={AlignmentFileTag.CRAM},
)
14 changes: 0 additions & 14 deletions cg/meta/report/balsamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,3 @@ def get_template_name(self) -> str:
def get_upload_case_tags(self) -> dict:
"""Return Balsamic upload case tags."""
return BALSAMIC_CASE_TAGS

def get_scout_uploaded_file_from_hk(self, case_id: str, scout_tag: str) -> str | None:
"""Return file path of the uploaded to Scout file given its tag."""
version: Version = self.housekeeper_api.last_version(bundle=case_id)
tags: list = self.get_hk_scout_file_tags(scout_tag=scout_tag)
uploaded_file: File = self.housekeeper_api.get_latest_file(
bundle=case_id, tags=tags, version=version.id
)
if not tags or not uploaded_file:
LOG.warning(
f"No files were found for the following Scout Housekeeper tag: {scout_tag} (case: {case_id})"
)
return None
return uploaded_file.full_path
4 changes: 4 additions & 0 deletions cg/meta/report/field_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from cg.models.report.report import ReportModel


def get_mapped_reads_fraction(mapped_reads: float, total_reads: float) -> float | None:
return mapped_reads / total_reads if mapped_reads and total_reads else None


def get_million_read_pairs(reads: int) -> float | None:
"""Return number of sequencing reads as millions of read pairs."""
return (
Expand Down
17 changes: 15 additions & 2 deletions cg/meta/report/report_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,18 @@ def get_delivery_report_from_hk(self, case_id: str, version: Version) -> str | N
return delivery_report.full_path

def get_scout_uploaded_file_from_hk(self, case_id: str, scout_tag: str) -> str | None:
"""Return the file path of the uploaded to Scout file given its tag."""
raise NotImplementedError
"""Return file path of the uploaded to Scout file given its tag."""
version: Version = self.housekeeper_api.last_version(bundle=case_id)
tags: list = self.get_hk_scout_file_tags(scout_tag=scout_tag)
uploaded_file: File = self.housekeeper_api.get_latest_file(
bundle=case_id, tags=tags, version=version.id
)
if not tags or not uploaded_file:
LOG.warning(
f"No files were found for the following Scout Housekeeper tag: {scout_tag} (case: {case_id})"
)
return None
return uploaded_file.full_path

def render_delivery_report(self, report_data: dict) -> str:
"""Renders the report on the Jinja template."""
Expand Down Expand Up @@ -354,6 +364,9 @@ def get_scout_uploaded_files(self, case: Case) -> ScoutReportFiles:
smn_tsv=self.get_scout_uploaded_file_from_hk(
case_id=case.internal_id, scout_tag="smn_tsv"
),
vcf_fusion=self.get_scout_uploaded_file_from_hk(
case_id=case.internal_id, scout_tag="vcf_fusion"
),
)

@staticmethod
Expand Down
24 changes: 13 additions & 11 deletions cg/meta/report/rnafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
Pipeline,
)
from cg.constants.constants import GenomeVersion
from cg.meta.report.field_validators import get_million_read_pairs
from cg.constants.scout_upload import RNAFUSION_CASE_TAGS
from cg.meta.report.field_validators import get_mapped_reads_fraction, get_million_read_pairs
from cg.meta.report.report_api import ReportAPI
from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
from cg.models.analysis import AnalysisModel
Expand Down Expand Up @@ -47,15 +48,16 @@ def get_sample_metadata(
rin = self.lims_api.get_sample_rin(sample_id=sample.internal_id)

return RnafusionSampleMetadataModel(
bias_5_3=sample_metrics.bias_5_3,
bias_5_3=sample_metrics.median_5prime_to_3prime_bias,
duplicates=sample_metrics.pct_duplication,
gc_content=sample_metrics.after_filtering_gc_content,
input_amount=input_amount,
insert_size=None,
insert_size_peak=None,
mapped_reads=sample_metrics.reads_aligned
* 2
/ sample_metrics.before_filtering_total_reads,
mapped_reads=get_mapped_reads_fraction(
mapped_reads=sample_metrics.read_pairs_examined * 2,
total_reads=sample_metrics.before_filtering_total_reads,
),
mean_length_r1=sample_metrics.after_filtering_read1_mean_length,
million_read_pairs=get_million_read_pairs(
reads=sample_metrics.before_filtering_total_reads
Expand All @@ -77,12 +79,8 @@ def get_genome_build(self, analysis_metadata: AnalysisModel) -> str:
def get_report_accreditation(
self, samples: list[SampleModel], analysis_metadata: AnalysisModel
) -> bool:
"""Checks if the report is accredited or not. Rnafusion is not an accredited workflow."""
return False

def get_scout_uploaded_file_from_hk(self, case_id: str, scout_tag: str) -> str | None:
"""Return file path of the uploaded to Scout file given its tag."""
return None
"""Checks if the report is accredited or not. Rnafusion is an accredited workflow."""
return True

def get_template_name(self) -> str:
"""Return template name to render the delivery report."""
Expand Down Expand Up @@ -111,3 +109,7 @@ def get_required_fields(self, case: CaseModel) -> dict:
case=case, required_fields=REQUIRED_SAMPLE_METADATA_RNAFUSION_FIELDS
),
}

def get_upload_case_tags(self) -> dict:
"""Return Balsamic UMI upload case tags."""
return RNAFUSION_CASE_TAGS
2 changes: 1 addition & 1 deletion cg/meta/report/templates/balsamic_report.html
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ <h4 class="card-title">Scout</h4>
<p>Varianter finns uppladdade i Scout: <a href="https://scout.scilifelab.se/{{ customer.id }}/{{ case.name }}">scout.scilifelab.se/{{ customer.id }}/{{ case.name }}</a></p>
<ul>
{% if case.data_analysis.scout_files.snv_vcf != 'N/A' %}
<li><strong>Kliniskt relevanta förvärvade SNVs och INDELs</strong> : <em>{{ case.data_analysis.scout_files.snv_vcf.replace(case.id, case.name) }}</em></li>
<li><strong>Kliniskt relevanta förvärvade SNVs och INDELs</strong>: <em>{{ case.data_analysis.scout_files.snv_vcf.replace(case.id, case.name) }}</em></li>
{% endif %}
{% if case.data_analysis.scout_files.snv_research_vcf != 'N/A' %}
<li><strong>Förvärvade SNVs och INDELs för forskning</strong>: <em>{{ case.data_analysis.scout_files.snv_research_vcf.replace(case.id, case.name) }}</em></li>
Expand Down
12 changes: 11 additions & 1 deletion cg/meta/report/templates/rnafusion_report.html
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,17 @@ <h4>Kundinformation</h4>
<div class="card-block">
<h4 class="card-title">Scout</h4>
<div class="card-text">
<p>Gene fusioner finns uppladdade i Scout: <a href="https://scout.scilifelab.se/{{ customer.id }}/{{ case.name }}">scout.scilifelab.se/{{ customer.id }}/{{ case.name }}</a></p>
<p>Analysfiler finns uppladdade i Scout: <a href="https://scout.scilifelab.se/{{ customer.id }}/{{ case.name }}">scout.scilifelab.se/{{ customer.id }}/{{ case.name }}</a></p>
<ul>
<li>
<strong>Kliniska Fusionsvarianter</strong>:
{% if case.data_analysis.scout_files.vcf_fusion != 'N/A' %}
<em>{{ case.data_analysis.scout_files.vcf_fusion.replace(case.id, case.name) }}</em>
{% else %}
<em>Inga fusionsvarianter upptäcktes</em>
{% endif %}
</li>
</ul>
</div>
</div>
{% endif %}
Expand Down
1 change: 1 addition & 0 deletions cg/meta/upload/scout/hk_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class CaseTags(BaseModel):
RNAfusion_inspector_research: set[str] | None = Field(
None, description="RNAfusion inspector report containing all fusions"
)
vcf_fusion: set[str] | None = Field(None, description="VCF with fusions, clinical")
multiqc_rna: set[str] | None = Field(None, description="MultiQC report for RNA samples")
vcf_mei: set[str] | None = Field(
None, description="VCF with mobile element insertions, clinical"
Expand Down
23 changes: 15 additions & 8 deletions cg/meta/upload/scout/rnafusion_config_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

from cg.apps.lims import LimsAPI
from cg.constants.constants import PrepCategory
from cg.constants.scout_upload import (
RNAFUSION_CASE_TAGS,
RNAFUSION_SAMPLE_TAGS,
GenomeBuild,
)
from cg.constants.scout_upload import RNAFUSION_CASE_TAGS, RNAFUSION_SAMPLE_TAGS, GenomeBuild
from cg.meta.upload.scout.hk_tags import CaseTags, SampleTags
from cg.meta.upload.scout.scout_config_builder import ScoutConfigBuilder
from cg.models.scout.scout_load_config import RnafusionLoadConfig, ScoutCancerIndividual
from cg.models.scout.scout_load_config import (
RnafusionLoadConfig,
ScoutCancerIndividual,
ScoutIndividual,
)
from cg.store.models import Analysis, CaseSample

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -56,12 +56,19 @@ def _include_file(self, scout_key) -> None:
self.get_file_from_hk(getattr(self.case_tags, scout_key)),
)

def include_sample_alignment_file(self, config_sample: ScoutIndividual) -> None:
"""Include the RNA sample alignment file."""
config_sample.rna_alignment_path = self.get_sample_file(
hk_tags=self.sample_tags.alignment_file, sample_id=config_sample.sample_id
)

def build_config_sample(self, case_sample: CaseSample) -> ScoutCancerIndividual:
"""Build a sample with rnafusion specific information."""
config_sample = ScoutCancerIndividual()

self.add_common_sample_info(config_sample=config_sample, case_sample=case_sample)

self.add_common_sample_files(config_sample=config_sample, case_sample=case_sample)
config_sample.analysis_type = PrepCategory.WHOLE_TRANSCRIPTOME_SEQUENCING.value

# Replace sample_id with internal case id, as rnafusion currently uses case ids instead of sample ids
config_sample.sample_id = case_sample.case.internal_id
return config_sample
11 changes: 5 additions & 6 deletions cg/meta/upload/scout/scout_config_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ def add_common_sample_files(
case_sample: CaseSample,
) -> None:
"""Add common sample files for different analysis types."""
sample_id: str = case_sample.sample.internal_id
LOG.info(f"Adding common files for sample {sample_id}")
self.include_sample_alignment_file(config_sample=config_sample)
self.include_sample_files(config_sample=config_sample)
LOG.info(f"Adding common files for sample {case_sample.sample.internal_id}")
self.include_sample_alignment_file(config_sample)
self.include_sample_files(config_sample)

def build_config_sample(self, case_sample: CaseSample) -> ScoutIndividual:
"""Build a sample for the scout load config"""
Expand All @@ -79,9 +78,9 @@ def build_load_config(self) -> ScoutLoadConfig:
"""Build a load config for uploading a case to scout"""
raise NotImplementedError

def include_sample_files(self, config_sample: ScoutIndividual) -> None:
def include_sample_files(self, _config_sample: ScoutIndividual) -> None:
"""Include all files that are used on sample level in Scout"""
raise NotImplementedError
return None

def include_case_files(self) -> None:
"""Include all files that are used on case level in scout"""
Expand Down
4 changes: 4 additions & 0 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from pathlib import Path
from typing import Any

from cg.store.models import Sample

from cg.constants import Pipeline
from cg.constants.constants import FileExtensions, FileFormat, WorkflowManager
from cg.constants.nextflow import NFX_WORK_DIR
Expand Down Expand Up @@ -269,12 +271,14 @@ def get_deliverables_template_content() -> list[dict]:
def get_deliverables_for_case(self, case_id: str) -> PipelineDeliverables:
"""Return PipelineDeliverables for a given case."""
deliverable_template: list[dict] = self.get_deliverables_template_content()
sample_id: str = self.status_db.get_samples_by_case_id(case_id).pop().internal_id
files: list[FileDeliverable] = []
for file in deliverable_template:
for deliverable_field, deliverable_value in file.items():
if deliverable_value is None:
continue
file[deliverable_field] = file[deliverable_field].replace("CASEID", case_id)
file[deliverable_field] = file[deliverable_field].replace("SAMPLEID", sample_id)
file[deliverable_field] = file[deliverable_field].replace(
"PATHTOCASE", str(self.get_case_path(case_id=case_id))
)
Expand Down
1 change: 0 additions & 1 deletion cg/meta/workflow/rnafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def parse_analysis(self, qc_metrics_raw: list[MetricsBase], **kwargs) -> Rnafusi
"""Parse Rnafusion output analysis files and return analysis model."""
sample_metrics: dict[str, dict] = {}
for metric in qc_metrics_raw:
metric.name = metric.name.replace("5_3_bias", "bias_5_3")
try:
sample_metrics[metric.id].update({metric.name.lower(): metric.value})
except KeyError:
Expand Down
11 changes: 4 additions & 7 deletions cg/models/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic.v1 import BaseModel, Field, conlist, validator

from cg.exc import SampleSheetError, ValidationError
from cg.exc import SampleSheetError


class PipelineParameters(BaseModel):
Expand Down Expand Up @@ -52,12 +52,9 @@ class FileDeliverable(BaseModel):
tag: str

@validator("path", "path_index", pre=True)
def path_exist(cls, file_path: str | Path) -> str | None:
if file_path is not None:
path = Path(file_path)
if not path.exists():
raise ValidationError(f"Path {file_path} does not exist")
return str(path)
def set_path_as_string(cls, file_path: str | Path) -> str | None:
if file_path:
return str(Path(file_path))
return None


Expand Down
2 changes: 2 additions & 0 deletions cg/models/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class ScoutReportFiles(BaseModel):
sv_research_vcf: SV research VCF file uploaded to Scout; source: HK
vcf_str: Short Tandem Repeat variants file (MIP-DNA specific); source: HK
smn_tsv: SMN gene variants file (MIP-DNA specific); source: HK
vcf_fusion: Converted RNA fusion file to SV VCF (RNAfusion specific); source: HK
"""

snv_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
Expand All @@ -52,6 +53,7 @@ class ScoutReportFiles(BaseModel):
sv_research_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
vcf_str: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
smn_tsv: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
vcf_fusion: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD


class DataAnalysisModel(BaseModel):
Expand Down
9 changes: 2 additions & 7 deletions cg/models/rnafusion/rnafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ class RnafusionQCMetrics(BaseModel):
after_filtering_q30_rate: float | None
after_filtering_read1_mean_length: float | None
before_filtering_total_reads: float | None
bias_5_3: float | None
median_5prime_to_3prime_bias: float | None
pct_adapter: float | None
pct_mrna_bases: float | None
pct_ribosomal_bases: float | None
pct_surviving: float | None
pct_duplication: float | None
reads_aligned: float | None
read_pairs_examined: float | None
uniquely_mapped_percent: float | None


Expand All @@ -36,12 +36,7 @@ class RnafusionParameters(PipelineParameters):
cram: str = "arriba,starfusion"
fastp_trim: bool = True
fusioncatcher: bool = True
fusioninspector_filter: bool = False
fusionreport_filter: bool = False
pizzly: bool = False
squid: bool = False
starfusion: bool = True
trim: bool = False
trim_tail: int = 50


Expand Down
2 changes: 2 additions & 0 deletions cg/models/scout/scout_load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Reviewer(BaseModel):

class ScoutIndividual(BaseModel):
alignment_path: str | None = None
rna_alignment_path: str | None = None
analysis_type: Annotated[
Literal[
"external",
Expand Down Expand Up @@ -141,3 +142,4 @@ class RnafusionLoadConfig(ScoutLoadConfig):
RNAfusion_report: str | None = None
RNAfusion_report_research: str | None = None
samples: list[ScoutCancerIndividual] = []
vcf_fusion: str | None = None
Loading

0 comments on commit 5469f9f

Please sign in to comment.