Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add(new store flow illumina devices) #3272

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
from cg.constants.devices import DeviceType
from cg.models.flow_cell.flow_cell import FlowCellDirectoryData
from cg.services.bcl_convert_metrics_service.parser import MetricsParser
from cg.store.models import SampleLaneSequencingMetrics, IlluminaSampleSequencingMetrics
from cg.store.models import (
SampleLaneSequencingMetrics,
IlluminaSampleSequencingMetrics,
IlluminaSequencingRun,
)
from cg.store.store import Store
from cg.utils.flow_cell import get_flow_cell_id

Expand Down Expand Up @@ -84,7 +88,6 @@ def _create_bcl_convert_sequencing_metrics(
def create_sample_run_metrics(
sample_internal_id: str,
lane: int,
run_metrics_id: int,
metrics_parser: MetricsParser,
store: Store,
) -> IlluminaSampleSequencingMetrics:
Expand All @@ -101,21 +104,28 @@ def create_sample_run_metrics(
)
sample_id: int = store.get_sample_by_internal_id(sample_internal_id).id

yield_: float = metrics_parser.get_yield_for_sample_in_lane(
sample_internal_id=sample_internal_id, lane=lane
)
yield_q30: float = metrics_parser.get_yield_q30_for_sample_in_lane(
sample_internal_id=sample_internal_id, lane=lane
)

return IlluminaSampleSequencingMetrics(
run_metrics_id=run_metrics_id,
sample_id=sample_id,
type=DeviceType.ILLUMINA,
flow_cell_lane=lane,
total_reads_in_lane=total_reads,
base_percentage_passing_q30=q30_bases_percent,
base_mean_quality_score=mean_quality_score,
_yield=yield_,
yield_q30=yield_q30,
created_at=datetime.now(),
)

def create_sample_sequencing_metrics_for_flow_cell(
self,
flow_cell_directory: Path,
run_metrics_id: int,
store: Store,
) -> list[IlluminaSampleSequencingMetrics]:
"""Parse the demultiplexing metrics data into the sequencing statistics model."""
Expand All @@ -129,7 +139,6 @@ def create_sample_sequencing_metrics_for_flow_cell(
sample_internal_id=sample_internal_id,
lane=lane,
metrics_parser=metrics_parser,
run_metrics_id=run_metrics_id,
store=store,
)
sample_lane_sequencing_metrics.append(metrics)
Expand Down
2 changes: 2 additions & 0 deletions cg/services/bcl_convert_metrics_service/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ class SequencingQualityMetrics(BaseModel):
..., alias=QualityMetricsColumnNames.MEAN_QUALITY_SCORE_Q30
)
q30_bases_percent: float = Field(..., alias=QualityMetricsColumnNames.Q30_BASES_PERCENT)
yield_: int = Field(..., alias=QualityMetricsColumnNames.YIELD)
yield_q30: float = Field(..., alias=QualityMetricsColumnNames.YIELD_Q30)


class DemuxMetrics(BaseModel):
Expand Down
30 changes: 30 additions & 0 deletions cg/services/bcl_convert_metrics_service/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,20 @@ def get_q30_bases_percent_for_sample_in_lane(self, sample_internal_id: str, lane
)
return self.calculate_mean_read_pair_q30_bases_percent(metrics=metrics)

def get_yield_for_sample_in_lane(self, sample_internal_id: str, lane: int) -> float:
"""Return the yield for a sample and lane."""
metrics: list[SequencingQualityMetrics] = self.get_read_pair_metrics_for_sample_and_lane(
sample_internal_id=sample_internal_id, lane=lane
)
return self.calculate_mean_yield(metrics=metrics)

def get_yield_q30_for_sample_in_lane(self, sample_internal_id: str, lane: int) -> float:
"""Return the yield Q30 for a sample and lane."""
metrics: list[SequencingQualityMetrics] = self.get_read_pair_metrics_for_sample_and_lane(
sample_internal_id=sample_internal_id, lane=lane
)
return self.calculate_mean_yield_q30(metrics=metrics)

@classmethod
def calculate_mean_read_pair_q30_bases_percent(
cls, metrics: list[SequencingQualityMetrics]
Expand All @@ -130,6 +144,22 @@ def calculate_mean_quality_score(cls, metrics: list[SequencingQualityMetrics]) -
total_q_score += metric.mean_quality_score_q30
return round(total_q_score / SCALE_TO_READ_PAIRS, 2)

@classmethod
def calculate_mean_yield(cls, metrics: list[SequencingQualityMetrics]) -> float:
"""Calculate the mean yield for a list of metrics."""
total_yield: float = 0
for metric in metrics:
total_yield += metric.yield_
return round(total_yield / SCALE_TO_READ_PAIRS, 2)

@classmethod
def calculate_mean_yield_q30(cls, metrics: list[SequencingQualityMetrics]) -> float:
"""Calculate the mean yield Q30 for a list of metrics."""
total_yield_q30: float = 0
for metric in metrics:
total_yield_q30 += metric.yield_q30
return round(total_yield_q30 / SCALE_TO_READ_PAIRS, 2)

def get_mean_quality_score_for_sample_in_lane(
self, sample_internal_id: str, lane: int
) -> float:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
from cg.constants.devices import DeviceType
from cg.exc import MissingFilesError, FlowCellError
from cg.models.flow_cell.flow_cell import FlowCellDirectoryData
from cg.services.illumina_post_processing_service.database_utils import store_illumina_flow_cell
from cg.services.bcl_convert_metrics_service.bcl_convert_metrics_service import (
BCLConvertMetricsService,
)
from cg.services.illumina_post_processing_service.utils import (
create_delivery_file_in_flow_cell_directory,
get_flow_cell_model_from_run_parameters,
)
from cg.services.illumina_post_processing_service.validation import (
is_flow_cell_ready_for_postprocessing,
)
from cg.store.models import IlluminaFlowCell
from cg.store.models import IlluminaFlowCell, IlluminaSampleSequencingMetrics, IlluminaSequencingRun
from cg.store.store import Store

LOG = logging.getLogger(__name__)
Expand All @@ -27,10 +28,9 @@ def __init__(self, status_db: Store, housekeeper_api: HousekeeperAPI, dry_run: b
self.hk_api: HousekeeperAPI = housekeeper_api
self.dry_run: bool = False

@staticmethod
def store_illumina_flow_cell(
def get_illumina_flow_cell(
self,
flow_cell: FlowCellDirectoryData,
store: Store,
) -> IlluminaFlowCell:
"""
Create flow cell from the parsed and validated flow cell data.
Expand All @@ -40,25 +40,71 @@ def store_illumina_flow_cell(
new_flow_cell = IlluminaFlowCell(
internal_id=flow_cell.id, type=DeviceType.ILLUMINA, model=model
)
return store.add_illumina_flow_cell(new_flow_cell)
return new_flow_cell

@staticmethod
def store_illumina_sequencing_metrics(flow_cell: IlluminaFlowCell) -> None:
"""Store illumina run metrics in the status database."""
def aggregate_illumina_sample_metrics(
sequencing_metrics: list[IlluminaSampleSequencingMetrics],
) -> IlluminaSequencingRun:
pass

@staticmethod
def store_illumina_sample_sequencing_metrics():
def get_illumina_sequencing_run(
self,
sequencing_metrics: list[IlluminaSampleSequencingMetrics],
) -> IlluminaSequencingRun:
"""Store illumina run metrics in the status database."""
return self.aggregate_illumina_sample_metrics(sequencing_metrics)

def get_illumina_sample_sequencing_metrics(
self,
flow_cell_directory: Path,
) -> list[IlluminaSampleSequencingMetrics]:
"""Store illumina sample sequencing metrics in the status database."""
pass
metrics_service = BCLConvertMetricsService()
sequencing_metrics: list[IlluminaSampleSequencingMetrics] = (
metrics_service.create_sample_sequencing_metrics_for_flow_cell(
flow_cell_directory=flow_cell_directory, store=self.status_db
)
)
return sequencing_metrics

@staticmethod
def relate_illumina_flow_cell_to_run(
flow_cell: IlluminaFlowCell, sequencing_run: IlluminaSequencingRun
) -> tuple[IlluminaFlowCell, IlluminaSequencingRun]:
"""Relate the flow cell to the sequencing run."""
flow_cell.instrument_runs.append(sequencing_run)
sequencing_run.device.append(flow_cell)
return flow_cell, sequencing_run

@staticmethod
def relate_sequencing_run_to_sequence_metrics(
sequencing_run: IlluminaSequencingRun,
sequencing_metrics: list[IlluminaSampleSequencingMetrics],
) -> tuple[IlluminaSequencingRun, list[IlluminaSampleSequencingMetrics]]:
"""Relate the sequencing run to the sequencing metrics."""
sequencing_run.sample_metrics = sequencing_metrics
for metrics in sequencing_metrics:
metrics.instrument_run = sequencing_run
return sequencing_run, sequencing_metrics

def store_illumina_flow_cell_data(self, flow_cell: FlowCellDirectoryData) -> None:
"""Store flow cell data in the status database."""
flow_cell: IlluminaFlowCell = self.store_illumina_flow_cell(
flow_cell=flow_cell, store=self.status_db
illumina_flow_cell: IlluminaFlowCell = self.get_illumina_flow_cell(flow_cell=flow_cell)
self.get_illumina_flow_cell(flow_cell)
sequencing_metrics: list[IlluminaSampleSequencingMetrics] = (
self.get_illumina_sample_sequencing_metrics(flow_cell.path)
)
sequencing_run: IlluminaSequencingRun = self.get_illumina_sequencing_run()
illumina_flow_cell, sequencing_run = self.relate_illumina_flow_cell_to_run(
flow_cell=illumina_flow_cell, sequencing_run=sequencing_run
)
illumina_flow_cell, sequencing_metrics = self.relate_sequencing_run_to_sequence_metrics(
sequencing_run=sequencing_run, sequencing_metrics=sequencing_metrics
)
self.store_illumina_sequencing_metrics(flow_cell)
self.store_illumina_sample_sequencing_metrics()
self.status_db.add_illumina_flow_cell(illumina_flow_cell)
self.status_db.add_illumina_sequencing_run(sequencing_run)
self.status_db.add_illumina_sample_sequencing_metrics(sequencing_metrics)
self.status_db.commit_to_store()

def post_process_illumina_flow_cell(
Expand Down
11 changes: 7 additions & 4 deletions cg/store/crud/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
order_case,
IlluminaFlowCell,
IlluminaSequencingRun,
IlluminaSampleSequencingMetrics,
)

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -427,8 +428,10 @@ def add_illumina_flow_cell(self, flow_cell: IlluminaFlowCell) -> IlluminaFlowCel
LOG.debug(f"Flow cell added to status db: {flow_cell.id}.")
return flow_cell

def add_illumina_sequencing_metrics(
self, sequencing_metrics: IlluminaSequencingRun
) -> IlluminaSequencingRun:
def add_illumina_sample_sequencing_metrics(
self, sequencing_metrics: list[IlluminaSampleSequencingMetrics]
) -> list[IlluminaSampleSequencingMetrics]:
"""Add a new Illumina flow cell to the status database as a pending transaction."""
pass
session = get_session()
session.add_all(sequencing_metrics)
return sequencing_metrics
2 changes: 1 addition & 1 deletion cg/store/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ def _samples(self) -> list[Sample]:
return list(
{
sample_run_metric.sample
for run in self.instrument_run
for run in self.instrument_runs
for sample_run_metric in run.sample_run_metrics
}
)
Expand Down
2 changes: 2 additions & 0 deletions tests/services/bcl_convert_metrics_service/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def bcl_convert_quality_metric_model_with_data(
QualityMetricsColumnNames.SAMPLE_INTERNAL_ID.value: test_sample_internal_id,
QualityMetricsColumnNames.MEAN_QUALITY_SCORE_Q30.value: 36.15,
QualityMetricsColumnNames.Q30_BASES_PERCENT.value: 0.95,
QualityMetricsColumnNames.YIELD.value: 415032696,
QualityMetricsColumnNames.YIELD_Q30.value: 393745856,
}
)

Expand Down
Loading