Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add(pacbio store service) #3478

Merged
merged 13 commits into from
Jul 29, 2024
2 changes: 1 addition & 1 deletion cg/services/post_processing/abstract_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def get_post_processing_dtos(self, run_data: RunData) -> PostProcessingDTOs:
class PostProcessingStoreService(ABC):
"""Abstract class that manages storing data transfer objects in the database."""

def store_post_processing_data(self, run_name):
def store_post_processing_data(self, run_data: RunData):
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
pass


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from cg.services.post_processing.abstract_classes import (
PostProcessingStoreService,
)
from cg.services.post_processing.pacbio.data_transfer_service.data_transfer_service import (
PacBioDataTransferService,
)
from cg.services.post_processing.pacbio.data_transfer_service.dto import (
PacBioDTOs,
PacBioSMRTCellDTO,
PacBioSequencingRunDTO,
PacBioSampleSequencingMetricsDTO,
)
from cg.services.post_processing.pacbio.run_data_generator.run_data import PacBioRunData
from cg.store.models import PacBioSMRTCell, PacBioSequencingRun
from cg.store.store import Store


class PacBioStoreService(PostProcessingStoreService):
def __init__(self, store: Store, data_transfer_service: PacBioDataTransferService):
self.store = store
self.data_transfer_service = data_transfer_service

def _create_run_device(self, run_device_dto: PacBioSMRTCellDTO) -> PacBioSMRTCell:
return self.store.create_pac_bio_smrt_cell(run_device_dto)

def _create_instrument_run(
self, instrument_run_dto: PacBioSequencingRunDTO, smrt_cell: PacBioSMRTCell
) -> PacBioSequencingRun:
return self.store.create_pac_bio_sequencing_run(
sequencing_run_dto=instrument_run_dto, smrt_cell=smrt_cell
)

def _create_sample_run_metrics(
self,
sample_run_metrics_dtos: list[PacBioSampleSequencingMetricsDTO],
sequencing_run: PacBioSequencingRun,
):
for sample_run_metric in sample_run_metrics_dtos:
self.store.create_pac_bio_sample_sequencing_run(
sample_run_metrics_dto=sample_run_metric, sequencing_run=sequencing_run
)

def store_post_processing_data(self, run_data: PacBioRunData):
dtos: PacBioDTOs = self.data_transfer_service.get_post_processing_dtos()
smrt_cell: PacBioSMRTCell = self._create_run_device(dtos.run_device)
sequencing_run: PacBioSequencingRun = self._create_instrument_run(
instrument_run_dto=dtos.sequencing_run, smrt_cell=smrt_cell
)
self._create_sample_run_metrics(
sample_run_metrics_dtos=dtos.sample_sequencing_metrics, sequencing_run=sequencing_run
)
self.store.commit_to_store()
72 changes: 72 additions & 0 deletions cg/store/crud/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
IlluminaSampleSequencingMetricsDTO,
IlluminaSequencingRunDTO,
)
from cg.services.post_processing.pacbio.data_transfer_service.dto import (
PacBioSMRTCellDTO,
PacBioSequencingRunDTO,
PacBioSampleSequencingMetricsDTO,
)
from cg.store.base import BaseHandler
from cg.store.database import get_session
from cg.store.models import (
Expand All @@ -37,6 +42,9 @@
Sample,
User,
order_case,
PacBioSMRTCell,
PacBioSequencingRun,
PacBioSampleSequencingMetrics,
)

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -460,3 +468,67 @@ def add_illumina_sample_metrics_entry(
)
self.session.add(new_metric)
return new_metric

def create_pac_bio_smrt_cell(self, run_device_dto: PacBioSMRTCellDTO) -> PacBioSMRTCell:
if self.get_pac_bio_smrt_cell_by_internal_id(run_device_dto.internal_id):
raise ValueError(f"SMRT cell with {run_device_dto.internal_id} already exists.")
new_smrt_cell = PacBioSMRTCell(
type=run_device_dto.type, internal_id=run_device_dto.internal_id
)
self.session.add(new_smrt_cell)
return new_smrt_cell

def create_pac_bio_sequencing_run(
self, sequencing_run_dto: PacBioSequencingRunDTO, smrt_cell: PacBioSMRTCell
) -> PacBioSequencingRun:
new_sequencing_run = PacBioSequencingRun(
type=sequencing_run_dto.type,
well=sequencing_run_dto.well,
plate=sequencing_run_dto.plate,
movie_time_hours=sequencing_run_dto.movie_time_hours,
hifi_reads=sequencing_run_dto.hifi_reads,
hifi_yield=sequencing_run_dto.hifi_yield,
hifi_mean_read_length=sequencing_run_dto.hifi_mean_read_length,
hifi_median_read_quality=sequencing_run_dto.hifi_median_read_quality,
percent_reads_passing_q30=sequencing_run_dto.percent_reads_passing_q30,
productive_zmws=sequencing_run_dto.productive_zmws,
p0_percent=sequencing_run_dto.p0_percent,
p1_percent=sequencing_run_dto.p1_percent,
p2_percent=sequencing_run_dto.p2_percent,
polymerase_mean_read_length=sequencing_run_dto.polymerase_mean_read_length,
polymerase_read_length_n50=sequencing_run_dto.polymerase_read_length_n50,
polymerase_mean_longest_subread=sequencing_run_dto.polymerase_mean_longest_subread,
polymerase_longest_subread_n50=sequencing_run_dto.polymerase_longest_subread_n50,
control_reads=sequencing_run_dto.control_reads,
control_mean_read_length=sequencing_run_dto.control_mean_read_length,
control_mean_read_concordance=sequencing_run_dto.control_mean_read_concordance,
control_mode_read_concordance=sequencing_run_dto.control_mode_read_concordance,
failed_reads=sequencing_run_dto.failed_reads,
failed_yield=sequencing_run_dto.failed_yield,
failed_mean_read_length=sequencing_run_dto.failed_mean_read_length,
movie_name=sequencing_run_dto.movie_name,
device=smrt_cell,
)
self.session.add(new_sequencing_run)
return new_sequencing_run

def create_pac_bio_sample_sequencing_run(
self,
sample_run_metrics_dto: PacBioSampleSequencingMetricsDTO,
sequencing_run: PacBioSequencingRun,
) -> PacBioSampleSequencingMetrics:
sample: Sample = self.get_sample_by_internal_id(sample_run_metrics_dto.sample_internal_id)
new_sample_sequencing_run = PacBioSampleSequencingMetrics(
sample=sample,
hifi_reads=sample_run_metrics_dto.hifi_reads,
hifi_yield=sample_run_metrics_dto.hifi_yield,
hifi_mean_read_length=sample_run_metrics_dto.hifi_mean_read_length,
hifi_median_read_quality=sample_run_metrics_dto.hifi_median_read_quality,
percent_reads_passing_q30=sample_run_metrics_dto.percent_reads_passing_q30,
failed_reads=sample_run_metrics_dto.failed_reads,
failed_yield=sample_run_metrics_dto.failed_yield,
failed_mean_read_length=sample_run_metrics_dto.failed_mean_read_length,
instrument_run=sequencing_run,
)
self.session.add(new_sample_sequencing_run)
return new_sample_sequencing_run
12 changes: 12 additions & 0 deletions cg/store/crud/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
from cg.store.filters.status_invoice_filters import InvoiceFilter, apply_invoice_filter
from cg.store.filters.status_order_filters import OrderFilter, apply_order_filters
from cg.store.filters.status_organism_filters import OrganismFilter, apply_organism_filter
from cg.store.filters.status_pacbio_smrt_cell_filters import (
apply_pac_bio_smrt_cell_filters,
PacBioSMRTCellFilter,
)
from cg.store.filters.status_panel_filters import PanelFilter, apply_panel_filter
from cg.store.filters.status_pool_filters import PoolFilter, apply_pool_filter
from cg.store.filters.status_sample_filters import SampleFilter, apply_sample_filter
Expand All @@ -72,6 +76,7 @@
Sample,
SampleRunMetrics,
User,
PacBioSMRTCell,
)

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -1463,3 +1468,10 @@ def get_cases_for_sequencing_qc(self) -> list[Case]:
CaseFilter.HAS_SEQUENCE,
],
).all()

def get_pac_bio_smrt_cell_by_internal_id(self, internal_id: str) -> PacBioSMRTCell:
return apply_pac_bio_smrt_cell_filters(
filter_functions=[PacBioSMRTCellFilter.BY_INTERNAL_ID],
smrt_cells=self._get_query(table=PacBioSMRTCell),
internal_id=internal_id,
).first()
28 changes: 28 additions & 0 deletions cg/store/filters/status_pacbio_smrt_cell_filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Filters for the PacBio SMRT cell model."""

from enum import Enum

from sqlalchemy.orm import Query


def filter_pac_bio_smrt_cell_by_internal_id(smrt_cells: Query, internal_id: str, **kwargs) -> Query:
"""Get a PacBio SMRT cell by its internal id."""
return smrt_cells.filter_by(internal_id=internal_id)


def apply_pac_bio_smrt_cell_filters(
smrt_cells: Query, filter_functions: list[callable], internal_id: str
) -> Query:
"""Apply filtering functions and return filtered results."""
for function in filter_functions:
smrt_cells: Query = function(
smrt_cells=smrt_cells,
internal_id=internal_id,
)
return smrt_cells


class PacBioSMRTCellFilter(Enum):
"""Define SMRT cell filter functions."""

BY_INTERNAL_ID: callable = filter_pac_bio_smrt_cell_by_internal_id
98 changes: 98 additions & 0 deletions tests/services/post_processing/pacbio/store_service/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from unittest.mock import Mock

import pytest

from cg.constants.devices import DeviceType
from cg.services.post_processing.pacbio.data_storage_service.pacbio_store_service import (
PacBioStoreService,
)
from cg.services.post_processing.pacbio.data_transfer_service.data_transfer_service import (
PacBioDataTransferService,
)
from cg.services.post_processing.pacbio.data_transfer_service.dto import (
PacBioDTOs,
PacBioSMRTCellDTO,
PacBioSequencingRunDTO,
PacBioSampleSequencingMetricsDTO,
)
from cg.store.store import Store
from tests.store_helpers import StoreHelpers


@pytest.fixture
def pac_bio_smrt_cell_dto() -> PacBioSMRTCellDTO:
return PacBioSMRTCellDTO(type=DeviceType.PACBIO, internal_id="internal_id")


@pytest.fixture
def pac_bio_sequencing_run_dto() -> PacBioSequencingRunDTO:
sample_data = {
"type": DeviceType.PACBIO,
"well": "A1",
"plate": 1,
"movie_time_hours": 10,
"hifi_reads": 500000,
"hifi_yield": 3000000000,
"hifi_mean_read_length": 6000.5,
"hifi_median_read_length": 6000,
"hifi_mean_length_n50": 5000,
"hifi_median_read_quality": "Q20",
"percent_reads_passing_q30": 99.5,
"productive_zmws": 150000,
"p0_percent": 0.5,
"p1_percent": 1.5,
"p2_percent": 98.0,
"polymerase_mean_read_length": 7000.3,
"polymerase_read_length_n50": 6500.2,
"polymerase_mean_longest_subread": 12000.1,
"polymerase_longest_subread_n50": 11000.4,
"control_reads": 10000,
"control_mean_read_length": 5000.1,
"control_mean_read_concordance": 99.0,
"control_mode_read_concordance": 99.5,
"failed_reads": 1000,
"failed_yield": 500000000,
"failed_mean_read_length": 3000.2,
"movie_name": "movie123",
}
return PacBioSequencingRunDTO(**sample_data)


@pytest.fixture
def pac_bio_sample_sequencing_metrics_dto() -> list[PacBioSampleSequencingMetricsDTO]:
sample_metrics_data = {
"sample_internal_id": "sample_123",
"hifi_reads": 450000,
"hifi_yield": 2750000000,
"hifi_mean_read_length": 6100.7,
"hifi_median_read_length": 6000,
"hifi_median_read_quality": "Q30",
"percent_reads_passing_q30": 98.6,
"failed_reads": 1500,
"failed_yield": 500000000,
"failed_mean_read_length": 3200.3,
}
return [PacBioSampleSequencingMetricsDTO(**sample_metrics_data)]


@pytest.fixture
def pac_bio_dtos(
pac_bio_smrt_cell_dto: PacBioSMRTCellDTO,
pac_bio_sequencing_run_dto: PacBioSequencingRunDTO,
pac_bio_sample_sequencing_metrics_dto: list[PacBioSampleSequencingMetricsDTO],
) -> PacBioDTOs:
return PacBioDTOs(
run_device=pac_bio_smrt_cell_dto,
sequencing_run=pac_bio_sequencing_run_dto,
sample_sequencing_metrics=pac_bio_sample_sequencing_metrics_dto,
)


@pytest.fixture
def pac_bio_store_service(store: Store, helpers: StoreHelpers, pac_bio_dtos: PacBioDTOs):
helpers.add_sample(
store=store, internal_id=pac_bio_dtos.sample_sequencing_metrics[0].sample_internal_id
)
return PacBioStoreService(
store=store, data_transfer_service=PacBioDataTransferService(metrics_service=Mock())
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Module to test the PacBioStoreService."""

from cg.services.post_processing.pacbio.data_storage_service.pacbio_store_service import (
PacBioStoreService,
)
from cg.services.post_processing.pacbio.data_transfer_service.dto import PacBioDTOs
from cg.services.post_processing.pacbio.run_data_generator.run_data import PacBioRunData
from cg.store.models import PacBioSMRTCell, PacBioSequencingRun, PacBioSampleSequencingMetrics
from unittest import mock


def test_store_post_processing_data(
pac_bio_store_service: PacBioStoreService,
pac_bio_dtos: PacBioDTOs,
expected_pac_bio_run_data: PacBioRunData,
):
# GIVEN a PacBioStoreService

# GIVEN a successful data transfer service

# WHEN storing data for a PacBio instrument run
with mock.patch(
"cg.services.post_processing.pacbio.data_transfer_service.data_transfer_service.PacBioDataTransferService.get_post_processing_dtos",
return_value=pac_bio_dtos,
):
pac_bio_store_service.store_post_processing_data(expected_pac_bio_run_data)

# THEN the SMRT cell data is stored
smrt_cell: PacBioSMRTCell = pac_bio_store_service.store._get_query(PacBioSMRTCell).first()
assert smrt_cell
assert smrt_cell.internal_id == pac_bio_dtos.run_device.internal_id

# THEN the sequencing run is stored
sequencing_run: PacBioSequencingRun = pac_bio_store_service.store._get_query(
PacBioSequencingRun
).first()
assert sequencing_run
assert sequencing_run.well == pac_bio_dtos.sequencing_run.well

# THEN the sample sequencing metrics are stored
sample_sequencing_run_metrics: PacBioSampleSequencingMetrics = (
pac_bio_store_service.store._get_query(PacBioSampleSequencingMetrics).first()
)
assert sample_sequencing_run_metrics
assert (
sample_sequencing_run_metrics.sample.internal_id
== pac_bio_dtos.sample_sequencing_metrics[0].sample_internal_id
)
Loading