Update Rnafusion for release v3.0.1 (#2621) (major)

### Added - Fusion VCF file to deliverables - Gene counts file to deliverables - CRAM index files to deliverables - Scout upload of a Rnafusion alignment CRAM file - Swedac logo in delivery report ### Changed - Rnafusion bundle filenames - Replaced deprecated metrics: - 5_3_bias by median_5prime_to_3prime_bias - reads_aligned by reads_pairs_examined - Updated default parameters - Skip path validation in model
Clinical-Genomics · Dec 7, 2023 · 5469f9f · 5469f9f
1 parent e458ae1
commit 5469f9f
Show file tree

Hide file tree

Showing 22 changed files with 125 additions and 82 deletions.
diff --git a/cg/constants/delivery.py b/cg/constants/delivery.py
@@ -153,20 +153,21 @@
     {"fusion", "arriba"},
     {"fusion", "star-fusion"},
     {"fusion", "fusioncatcher"},
-    {"cram"},
     {"fusioncatcher-summary"},
     {"fusioninspector"},
     {"fusionreport", "research"},
     {"fusioninspector-html", "research"},
     {"arriba-visualisation", "research"},
     {"multiqc-html", "rna"},
-    {"software-versions"},
-    {"qc-metrics"},
-    {"multiqc-json"},
     {"delivery-report"},
+    {"vcf-fusion"},
+    {"gene-counts"},
 ]
 
-RNAFUSION_ANALYSIS_SAMPLE_TAGS: list[set[str]] = []
+RNAFUSION_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
+    {"cram"},
+    {"cram-index"},
+]
 
 
 PIPELINE_ANALYSIS_TAG_MAP: dict[Pipeline, dict] = {

diff --git a/cg/constants/scout_upload.py b/cg/constants/scout_upload.py
@@ -1,5 +1,7 @@
 from enum import StrEnum
 
+from cg.constants.housekeeper_tags import AlignmentFileTag
+
 
 class GenomeBuild(StrEnum):
     hg19: str = "37"
@@ -57,6 +59,7 @@ class ScoutCustomCaseReportTags(StrEnum):
     RNAfusion_inspector={"fusioninspector-html", "clinical"},
     RNAfusion_inspector_research={"fusioninspector-html", "research"},
     delivery_report={"delivery-report"},
+    vcf_fusion={"vcf-fusion"},
 )
 
 MIP_SAMPLE_TAGS = dict(
@@ -86,4 +89,6 @@ class ScoutCustomCaseReportTags(StrEnum):
 )
 
 
-RNAFUSION_SAMPLE_TAGS = {}
+RNAFUSION_SAMPLE_TAGS = dict(
+    alignment_file={AlignmentFileTag.CRAM},
+)
diff --git a/cg/meta/report/balsamic.py b/cg/meta/report/balsamic.py
@@ -226,17 +226,3 @@ def get_template_name(self) -> str:
     def get_upload_case_tags(self) -> dict:
         """Return Balsamic upload case tags."""
         return BALSAMIC_CASE_TAGS
-
-    def get_scout_uploaded_file_from_hk(self, case_id: str, scout_tag: str) -> str | None:
-        """Return file path of the uploaded to Scout file given its tag."""
-        version: Version = self.housekeeper_api.last_version(bundle=case_id)
-        tags: list = self.get_hk_scout_file_tags(scout_tag=scout_tag)
-        uploaded_file: File = self.housekeeper_api.get_latest_file(
-            bundle=case_id, tags=tags, version=version.id
-        )
-        if not tags or not uploaded_file:
-            LOG.warning(
-                f"No files were found for the following Scout Housekeeper tag: {scout_tag} (case: {case_id})"
-            )
-            return None
-        return uploaded_file.full_path
diff --git a/cg/meta/report/field_validators.py b/cg/meta/report/field_validators.py
@@ -6,6 +6,10 @@
 from cg.models.report.report import ReportModel
 
 
+def get_mapped_reads_fraction(mapped_reads: float, total_reads: float) -> float | None:
+    return mapped_reads / total_reads if mapped_reads and total_reads else None
+
+
 def get_million_read_pairs(reads: int) -> float | None:
     """Return number of sequencing reads as millions of read pairs."""
     return (

diff --git a/cg/meta/report/report_api.py b/cg/meta/report/report_api.py
@@ -103,8 +103,18 @@ def get_delivery_report_from_hk(self, case_id: str, version: Version) -> str | N
         return delivery_report.full_path
 
     def get_scout_uploaded_file_from_hk(self, case_id: str, scout_tag: str) -> str | None:
-        """Return the file path of the uploaded to Scout file given its tag."""
-        raise NotImplementedError
+        """Return file path of the uploaded to Scout file given its tag."""
+        version: Version = self.housekeeper_api.last_version(bundle=case_id)
+        tags: list = self.get_hk_scout_file_tags(scout_tag=scout_tag)
+        uploaded_file: File = self.housekeeper_api.get_latest_file(
+            bundle=case_id, tags=tags, version=version.id
+        )
+        if not tags or not uploaded_file:
+            LOG.warning(
+                f"No files were found for the following Scout Housekeeper tag: {scout_tag} (case: {case_id})"
+            )
+            return None
+        return uploaded_file.full_path
 
     def render_delivery_report(self, report_data: dict) -> str:
         """Renders the report on the Jinja template."""
@@ -354,6 +364,9 @@ def get_scout_uploaded_files(self, case: Case) -> ScoutReportFiles:
             smn_tsv=self.get_scout_uploaded_file_from_hk(
                 case_id=case.internal_id, scout_tag="smn_tsv"
             ),
+            vcf_fusion=self.get_scout_uploaded_file_from_hk(
+                case_id=case.internal_id, scout_tag="vcf_fusion"
+            ),
         )
 
     @staticmethod

diff --git a/cg/meta/report/rnafusion.py b/cg/meta/report/rnafusion.py
@@ -14,7 +14,8 @@
     Pipeline,
 )
 from cg.constants.constants import GenomeVersion
-from cg.meta.report.field_validators import get_million_read_pairs
+from cg.constants.scout_upload import RNAFUSION_CASE_TAGS
+from cg.meta.report.field_validators import get_mapped_reads_fraction, get_million_read_pairs
 from cg.meta.report.report_api import ReportAPI
 from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
 from cg.models.analysis import AnalysisModel
@@ -47,15 +48,16 @@ def get_sample_metadata(
             rin = self.lims_api.get_sample_rin(sample_id=sample.internal_id)
 
         return RnafusionSampleMetadataModel(
-            bias_5_3=sample_metrics.bias_5_3,
+            bias_5_3=sample_metrics.median_5prime_to_3prime_bias,
             duplicates=sample_metrics.pct_duplication,
             gc_content=sample_metrics.after_filtering_gc_content,
             input_amount=input_amount,
             insert_size=None,
             insert_size_peak=None,
-            mapped_reads=sample_metrics.reads_aligned
-            * 2
-            / sample_metrics.before_filtering_total_reads,
+            mapped_reads=get_mapped_reads_fraction(
+                mapped_reads=sample_metrics.read_pairs_examined * 2,
+                total_reads=sample_metrics.before_filtering_total_reads,
+            ),
             mean_length_r1=sample_metrics.after_filtering_read1_mean_length,
             million_read_pairs=get_million_read_pairs(
                 reads=sample_metrics.before_filtering_total_reads
@@ -77,12 +79,8 @@ def get_genome_build(self, analysis_metadata: AnalysisModel) -> str:
     def get_report_accreditation(
         self, samples: list[SampleModel], analysis_metadata: AnalysisModel
     ) -> bool:
-        """Checks if the report is accredited or not. Rnafusion is not an accredited workflow."""
-        return False
-
-    def get_scout_uploaded_file_from_hk(self, case_id: str, scout_tag: str) -> str | None:
-        """Return file path of the uploaded to Scout file given its tag."""
-        return None
+        """Checks if the report is accredited or not. Rnafusion is an accredited workflow."""
+        return True
 
     def get_template_name(self) -> str:
         """Return template name to render the delivery report."""
@@ -111,3 +109,7 @@ def get_required_fields(self, case: CaseModel) -> dict:
                 case=case, required_fields=REQUIRED_SAMPLE_METADATA_RNAFUSION_FIELDS
             ),
         }
+
+    def get_upload_case_tags(self) -> dict:
+        """Return Balsamic UMI upload case tags."""
+        return RNAFUSION_CASE_TAGS
diff --git a/cg/meta/report/templates/balsamic_report.html b/cg/meta/report/templates/balsamic_report.html
@@ -241,7 +241,7 @@ <h4 class="card-title">Scout</h4>
               <p>Varianter finns uppladdade i Scout: <a href="https://scout.scilifelab.se/{{ customer.id }}/{{ case.name }}">scout.scilifelab.se/{{ customer.id }}/{{ case.name }}</a></p>
               <ul>
                 {% if case.data_analysis.scout_files.snv_vcf != 'N/A' %}
-                  <li><strong>Kliniskt relevanta förvärvade SNVs och INDELs</strong> : <em>{{ case.data_analysis.scout_files.snv_vcf.replace(case.id, case.name) }}</em></li>
+                  <li><strong>Kliniskt relevanta förvärvade SNVs och INDELs</strong>: <em>{{ case.data_analysis.scout_files.snv_vcf.replace(case.id, case.name) }}</em></li>
                 {% endif %}
                 {% if case.data_analysis.scout_files.snv_research_vcf != 'N/A' %}
                   <li><strong>Förvärvade SNVs och INDELs för forskning</strong>: <em>{{ case.data_analysis.scout_files.snv_research_vcf.replace(case.id, case.name) }}</em></li>

diff --git a/cg/meta/report/templates/rnafusion_report.html b/cg/meta/report/templates/rnafusion_report.html
@@ -222,7 +222,17 @@ <h4>Kundinformation</h4>
           <div class="card-block">
             <h4 class="card-title">Scout</h4>
             <div class="card-text">
-              <p>Gene fusioner finns uppladdade i Scout: <a href="https://scout.scilifelab.se/{{ customer.id }}/{{ case.name }}">scout.scilifelab.se/{{ customer.id }}/{{ case.name }}</a></p>
+              <p>Analysfiler finns uppladdade i Scout: <a href="https://scout.scilifelab.se/{{ customer.id }}/{{ case.name }}">scout.scilifelab.se/{{ customer.id }}/{{ case.name }}</a></p>
+              <ul>
+                <li>
+                  <strong>Kliniska Fusionsvarianter</strong>:
+                    {% if case.data_analysis.scout_files.vcf_fusion != 'N/A' %}
+                      <em>{{ case.data_analysis.scout_files.vcf_fusion.replace(case.id, case.name) }}</em>
+                    {% else %}
+                      <em>Inga fusionsvarianter upptäcktes</em>
+                    {% endif %}
+                </li>
+              </ul>
             </div>
           </div>
         {% endif %}

diff --git a/cg/meta/upload/scout/hk_tags.py b/cg/meta/upload/scout/hk_tags.py
@@ -46,6 +46,7 @@ class CaseTags(BaseModel):
     RNAfusion_inspector_research: set[str] | None = Field(
         None, description="RNAfusion inspector report containing all fusions"
     )
+    vcf_fusion: set[str] | None = Field(None, description="VCF with fusions, clinical")
     multiqc_rna: set[str] | None = Field(None, description="MultiQC report for RNA samples")
     vcf_mei: set[str] | None = Field(
         None, description="VCF with mobile element insertions, clinical"

diff --git a/cg/meta/upload/scout/rnafusion_config_builder.py b/cg/meta/upload/scout/rnafusion_config_builder.py
@@ -4,14 +4,14 @@
 
 from cg.apps.lims import LimsAPI
 from cg.constants.constants import PrepCategory
-from cg.constants.scout_upload import (
-    RNAFUSION_CASE_TAGS,
-    RNAFUSION_SAMPLE_TAGS,
-    GenomeBuild,
-)
+from cg.constants.scout_upload import RNAFUSION_CASE_TAGS, RNAFUSION_SAMPLE_TAGS, GenomeBuild
 from cg.meta.upload.scout.hk_tags import CaseTags, SampleTags
 from cg.meta.upload.scout.scout_config_builder import ScoutConfigBuilder
-from cg.models.scout.scout_load_config import RnafusionLoadConfig, ScoutCancerIndividual
+from cg.models.scout.scout_load_config import (
+    RnafusionLoadConfig,
+    ScoutCancerIndividual,
+    ScoutIndividual,
+)
 from cg.store.models import Analysis, CaseSample
 
 LOG = logging.getLogger(__name__)
@@ -56,12 +56,19 @@ def _include_file(self, scout_key) -> None:
             self.get_file_from_hk(getattr(self.case_tags, scout_key)),
         )
 
+    def include_sample_alignment_file(self, config_sample: ScoutIndividual) -> None:
+        """Include the RNA sample alignment file."""
+        config_sample.rna_alignment_path = self.get_sample_file(
+            hk_tags=self.sample_tags.alignment_file, sample_id=config_sample.sample_id
+        )
+
     def build_config_sample(self, case_sample: CaseSample) -> ScoutCancerIndividual:
         """Build a sample with rnafusion specific information."""
         config_sample = ScoutCancerIndividual()
-
         self.add_common_sample_info(config_sample=config_sample, case_sample=case_sample)
-
+        self.add_common_sample_files(config_sample=config_sample, case_sample=case_sample)
         config_sample.analysis_type = PrepCategory.WHOLE_TRANSCRIPTOME_SEQUENCING.value
 
+        # Replace sample_id with internal case id, as rnafusion currently uses case ids instead of sample ids
+        config_sample.sample_id = case_sample.case.internal_id
         return config_sample
diff --git a/cg/meta/upload/scout/scout_config_builder.py b/cg/meta/upload/scout/scout_config_builder.py
@@ -66,10 +66,9 @@ def add_common_sample_files(
         case_sample: CaseSample,
     ) -> None:
         """Add common sample files for different analysis types."""
-        sample_id: str = case_sample.sample.internal_id
-        LOG.info(f"Adding common files for sample {sample_id}")
-        self.include_sample_alignment_file(config_sample=config_sample)
-        self.include_sample_files(config_sample=config_sample)
+        LOG.info(f"Adding common files for sample {case_sample.sample.internal_id}")
+        self.include_sample_alignment_file(config_sample)
+        self.include_sample_files(config_sample)
 
     def build_config_sample(self, case_sample: CaseSample) -> ScoutIndividual:
         """Build a sample for the scout load config"""
@@ -79,9 +78,9 @@ def build_load_config(self) -> ScoutLoadConfig:
         """Build a load config for uploading a case to scout"""
         raise NotImplementedError
 
-    def include_sample_files(self, config_sample: ScoutIndividual) -> None:
+    def include_sample_files(self, _config_sample: ScoutIndividual) -> None:
         """Include all files that are used on sample level in Scout"""
-        raise NotImplementedError
+        return None
 
     def include_case_files(self) -> None:
         """Include all files that are used on case level in scout"""

diff --git a/cg/meta/workflow/nf_analysis.py b/cg/meta/workflow/nf_analysis.py
@@ -4,6 +4,8 @@
 from pathlib import Path
 from typing import Any
 
+from cg.store.models import Sample
+
 from cg.constants import Pipeline
 from cg.constants.constants import FileExtensions, FileFormat, WorkflowManager
 from cg.constants.nextflow import NFX_WORK_DIR
@@ -269,12 +271,14 @@ def get_deliverables_template_content() -> list[dict]:
     def get_deliverables_for_case(self, case_id: str) -> PipelineDeliverables:
         """Return PipelineDeliverables for a given case."""
         deliverable_template: list[dict] = self.get_deliverables_template_content()
+        sample_id: str = self.status_db.get_samples_by_case_id(case_id).pop().internal_id
         files: list[FileDeliverable] = []
         for file in deliverable_template:
             for deliverable_field, deliverable_value in file.items():
                 if deliverable_value is None:
                     continue
                 file[deliverable_field] = file[deliverable_field].replace("CASEID", case_id)
+                file[deliverable_field] = file[deliverable_field].replace("SAMPLEID", sample_id)
                 file[deliverable_field] = file[deliverable_field].replace(
                     "PATHTOCASE", str(self.get_case_path(case_id=case_id))
                 )

diff --git a/cg/meta/workflow/rnafusion.py b/cg/meta/workflow/rnafusion.py
@@ -242,7 +242,6 @@ def parse_analysis(self, qc_metrics_raw: list[MetricsBase], **kwargs) -> Rnafusi
         """Parse Rnafusion output analysis files and return analysis model."""
         sample_metrics: dict[str, dict] = {}
         for metric in qc_metrics_raw:
-            metric.name = metric.name.replace("5_3_bias", "bias_5_3")
             try:
                 sample_metrics[metric.id].update({metric.name.lower(): metric.value})
             except KeyError:

diff --git a/cg/models/nf_analysis.py b/cg/models/nf_analysis.py
@@ -2,7 +2,7 @@
 
 from pydantic.v1 import BaseModel, Field, conlist, validator
 
-from cg.exc import SampleSheetError, ValidationError
+from cg.exc import SampleSheetError
 
 
 class PipelineParameters(BaseModel):
@@ -52,12 +52,9 @@ class FileDeliverable(BaseModel):
     tag: str
 
     @validator("path", "path_index", pre=True)
-    def path_exist(cls, file_path: str | Path) -> str | None:
-        if file_path is not None:
-            path = Path(file_path)
-            if not path.exists():
-                raise ValidationError(f"Path {file_path} does not exist")
-            return str(path)
+    def set_path_as_string(cls, file_path: str | Path) -> str | None:
+        if file_path:
+            return str(Path(file_path))
         return None
 
 

diff --git a/cg/models/report/report.py b/cg/models/report/report.py
@@ -44,6 +44,7 @@ class ScoutReportFiles(BaseModel):
         sv_research_vcf: SV research VCF file uploaded to Scout; source: HK
         vcf_str: Short Tandem Repeat variants file (MIP-DNA specific); source: HK
         smn_tsv: SMN gene variants file (MIP-DNA specific); source: HK
+        vcf_fusion: Converted RNA fusion file to SV VCF (RNAfusion specific); source: HK
     """
 
     snv_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
@@ -52,6 +53,7 @@ class ScoutReportFiles(BaseModel):
     sv_research_vcf: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
     vcf_str: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
     smn_tsv: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
+    vcf_fusion: Annotated[str, BeforeValidator(get_path_as_string)] = NA_FIELD
 
 
 class DataAnalysisModel(BaseModel):

diff --git a/cg/models/rnafusion/rnafusion.py b/cg/models/rnafusion/rnafusion.py
@@ -15,13 +15,13 @@ class RnafusionQCMetrics(BaseModel):
     after_filtering_q30_rate: float | None
     after_filtering_read1_mean_length: float | None
     before_filtering_total_reads: float | None
-    bias_5_3: float | None
+    median_5prime_to_3prime_bias: float | None
     pct_adapter: float | None
     pct_mrna_bases: float | None
     pct_ribosomal_bases: float | None
     pct_surviving: float | None
     pct_duplication: float | None
-    reads_aligned: float | None
+    read_pairs_examined: float | None
     uniquely_mapped_percent: float | None
 
 
@@ -36,12 +36,7 @@ class RnafusionParameters(PipelineParameters):
     cram: str = "arriba,starfusion"
     fastp_trim: bool = True
     fusioncatcher: bool = True
-    fusioninspector_filter: bool = False
-    fusionreport_filter: bool = False
-    pizzly: bool = False
-    squid: bool = False
     starfusion: bool = True
-    trim: bool = False
     trim_tail: int = 50
 
 

diff --git a/cg/models/scout/scout_load_config.py b/cg/models/scout/scout_load_config.py
@@ -24,6 +24,7 @@ class Reviewer(BaseModel):
 
 class ScoutIndividual(BaseModel):
     alignment_path: str | None = None
+    rna_alignment_path: str | None = None
     analysis_type: Annotated[
         Literal[
             "external",
@@ -141,3 +142,4 @@ class RnafusionLoadConfig(ScoutLoadConfig):
     RNAfusion_report: str | None = None
     RNAfusion_report_research: str | None = None
     samples: list[ScoutCancerIndividual] = []
+    vcf_fusion: str | None = None