Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add(yield parsing to bclconvert metrics parser) #3271

Merged
merged 8 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
from cg.constants.devices import DeviceType
from cg.models.flow_cell.flow_cell import FlowCellDirectoryData
from cg.services.bcl_convert_metrics_service.parser import MetricsParser
from cg.store.models import SampleLaneSequencingMetrics, IlluminaSampleSequencingMetrics
from cg.store.models import (
SampleLaneSequencingMetrics,
IlluminaSampleSequencingMetrics,
IlluminaSequencingRun,
)
from cg.store.store import Store
from cg.utils.flow_cell import get_flow_cell_id

Expand Down Expand Up @@ -84,7 +88,6 @@ def _create_bcl_convert_sequencing_metrics(
def create_sample_run_metrics(
sample_internal_id: str,
lane: int,
run_metrics_id: int,
metrics_parser: MetricsParser,
store: Store,
) -> IlluminaSampleSequencingMetrics:
Expand All @@ -101,21 +104,29 @@ def create_sample_run_metrics(
)
sample_id: int = store.get_sample_by_internal_id(sample_internal_id).id

yield_: float = metrics_parser.get_yield_for_sample_in_lane(
sample_internal_id=sample_internal_id, lane=lane
)
yield_q30: float = metrics_parser.get_yield_q30_for_sample_in_lane(
sample_internal_id=sample_internal_id, lane=lane
)

return IlluminaSampleSequencingMetrics(
run_metrics_id=run_metrics_id,
sample_id=sample_id,
type=DeviceType.ILLUMINA,
flow_cell_lane=lane,
total_reads_in_lane=total_reads,
base_percentage_passing_q30=q30_bases_percent,
base_mean_quality_score=mean_quality_score,
_yield=yield_,
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
yield_q30=yield_q30,
created_at=datetime.now(),
)

def create_sample_sequencing_metrics_for_flow_cell(
self,
flow_cell_directory: Path,
run_metrics_id: int,
sequencing_run: IlluminaSequencingRun,
store: Store,
) -> list[IlluminaSampleSequencingMetrics]:
"""Parse the demultiplexing metrics data into the sequencing statistics model."""
Expand All @@ -129,8 +140,8 @@ def create_sample_sequencing_metrics_for_flow_cell(
sample_internal_id=sample_internal_id,
lane=lane,
metrics_parser=metrics_parser,
run_metrics_id=run_metrics_id,
store=store,
)
sample_lane_sequencing_metrics.instrument_run = sequencing_run
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
sample_lane_sequencing_metrics.append(metrics)
return sample_lane_sequencing_metrics
2 changes: 2 additions & 0 deletions cg/services/bcl_convert_metrics_service/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ class SequencingQualityMetrics(BaseModel):
..., alias=QualityMetricsColumnNames.MEAN_QUALITY_SCORE_Q30
)
q30_bases_percent: float = Field(..., alias=QualityMetricsColumnNames.Q30_BASES_PERCENT)
yield_: int = Field(..., alias=QualityMetricsColumnNames.YIELD)
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
yield_q30: float = Field(..., alias=QualityMetricsColumnNames.YIELD_Q30)


class DemuxMetrics(BaseModel):
Expand Down
30 changes: 30 additions & 0 deletions cg/services/bcl_convert_metrics_service/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,20 @@ def get_q30_bases_percent_for_sample_in_lane(self, sample_internal_id: str, lane
)
return self.calculate_mean_read_pair_q30_bases_percent(metrics=metrics)

def get_yield_for_sample_in_lane(self, sample_internal_id: str, lane: int) -> float:
"""Return the yield for a sample and lane."""
metrics: list[SequencingQualityMetrics] = self.get_read_pair_metrics_for_sample_and_lane(
sample_internal_id=sample_internal_id, lane=lane
)
return self.calculate_mean_yield(metrics=metrics)

def get_yield_q30_for_sample_in_lane(self, sample_internal_id: str, lane: int) -> float:
"""Return the yield Q30 for a sample and lane."""
metrics: list[SequencingQualityMetrics] = self.get_read_pair_metrics_for_sample_and_lane(
sample_internal_id=sample_internal_id, lane=lane
)
return self.calculate_mean_yield_q30(metrics=metrics)
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def calculate_mean_read_pair_q30_bases_percent(
cls, metrics: list[SequencingQualityMetrics]
Expand All @@ -130,6 +144,22 @@ def calculate_mean_quality_score(cls, metrics: list[SequencingQualityMetrics]) -
total_q_score += metric.mean_quality_score_q30
return round(total_q_score / SCALE_TO_READ_PAIRS, 2)

@classmethod
def calculate_mean_yield(cls, metrics: list[SequencingQualityMetrics]) -> float:
"""Calculate the mean yield for a list of metrics."""
total_yield: float = 0
for metric in metrics:
total_yield += metric.yield_
return round(total_yield / SCALE_TO_READ_PAIRS, 2)
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def calculate_mean_yield_q30(cls, metrics: list[SequencingQualityMetrics]) -> float:
"""Calculate the mean yield Q30 for a list of metrics."""
total_yield_q30: float = 0
for metric in metrics:
total_yield_q30 += metric.yield_q30
return round(total_yield_q30 / SCALE_TO_READ_PAIRS, 2)
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved

def get_mean_quality_score_for_sample_in_lane(
self, sample_internal_id: str, lane: int
) -> float:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
from cg.constants.devices import DeviceType
from cg.exc import MissingFilesError, FlowCellError
from cg.models.flow_cell.flow_cell import FlowCellDirectoryData
from cg.services.illumina_post_processing_service.database_utils import store_illumina_flow_cell
from cg.services.illumina_post_processing_service.utils import (
create_delivery_file_in_flow_cell_directory,
get_flow_cell_model_from_run_parameters,
)
from cg.services.illumina_post_processing_service.validation import (
is_flow_cell_ready_for_postprocessing,
Expand Down Expand Up @@ -45,7 +43,6 @@ def store_illumina_flow_cell(
@staticmethod
def store_illumina_sequencing_metrics(flow_cell: IlluminaFlowCell) -> None:
"""Store illumina run metrics in the status database."""
pass
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved

@staticmethod
def store_illumina_sample_sequencing_metrics():
Expand Down
2 changes: 2 additions & 0 deletions tests/services/bcl_convert_metrics_service/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def bcl_convert_quality_metric_model_with_data(
QualityMetricsColumnNames.SAMPLE_INTERNAL_ID.value: test_sample_internal_id,
QualityMetricsColumnNames.MEAN_QUALITY_SCORE_Q30.value: 36.15,
QualityMetricsColumnNames.Q30_BASES_PERCENT.value: 0.95,
QualityMetricsColumnNames.YIELD.value: 415032696,
QualityMetricsColumnNames.YIELD_Q30.value: 393745856,
}
)

Expand Down
Loading