Skip to content

Commit

Permalink
(Archiving) Add archive location tag to spring files (#2717) (minor)
Browse files Browse the repository at this point in the history
### Changed

- Spring files are now tagged with the customer's archive location in Housekeeper.
  • Loading branch information
islean authored Nov 30, 2023
1 parent 714f131 commit eca6ce5
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 10 deletions.
3 changes: 2 additions & 1 deletion cg/cli/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import click

from cg.constants import STATUS_OPTIONS, DataDelivery, Pipeline, Priority
from cg.constants.archiving import PDC_ARCHIVE_LOCATION
from cg.constants.subject import Gender
from cg.meta.transfer.external_data import ExternalDataAPI
from cg.models.cg_config import CGConfig
Expand Down Expand Up @@ -58,7 +59,7 @@ def add():
"--data-archive-location",
"data_archive_location",
help="Specifies where to store data for the customer.",
default="PDC",
default=PDC_ARCHIVE_LOCATION,
show_default=True,
required=False,
)
Expand Down
9 changes: 6 additions & 3 deletions cg/cli/compress/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,12 @@ def clean_fastq(context: CGConfig, case_id: str | None, days_back: int, dry_run:

cleaned_inds = 0
for case in cases:
samples: Iterable[str] = store.get_sample_ids_by_case_id(case_id=case.internal_id)
for sample_id in samples:
was_cleaned: bool = compress_api.clean_fastq(sample_id=sample_id)
sample_ids: Iterable[str] = store.get_sample_ids_by_case_id(case_id=case.internal_id)
for sample_id in sample_ids:
archive_location: str = store.get_sample_by_internal_id(sample_id).archive_location
was_cleaned: bool = compress_api.clean_fastq(
sample_id=sample_id, archive_location=archive_location
)
if not was_cleaned:
LOG.info(f"Skipping individual {sample_id}")
continue
Expand Down
3 changes: 3 additions & 0 deletions cg/constants/archiving.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ class ArchiveLocations(StrEnum):
"""Archive locations for the different customers' Spring files."""

KAROLINSKA_BUCKET: str = "karolinska_bucket"


PDC_ARCHIVE_LOCATION: str = "PDC"
5 changes: 4 additions & 1 deletion cg/meta/compress/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def decompress_spring(self, sample_id: str) -> bool:
update_metadata_date(spring_metadata_path=compression.spring_metadata_path)
return True

def clean_fastq(self, sample_id: str) -> bool:
def clean_fastq(self, sample_id: str, archive_location: str) -> bool:
"""Check that FASTQ compression is completed for a case and clean.
This means removing compressed FASTQ files and update housekeeper to point to the new SPRING
Expand Down Expand Up @@ -170,6 +170,7 @@ def clean_fastq(self, sample_id: str) -> bool:
compression_obj=compression,
hk_fastq_first=sample_fastq[run_name]["hk_first"],
hk_fastq_second=sample_fastq[run_name]["hk_second"],
archive_location=archive_location,
)

self.remove_fastq(
Expand Down Expand Up @@ -232,10 +233,12 @@ def update_fastq_hk(
compression_obj: CompressionData,
hk_fastq_first: File,
hk_fastq_second: File,
archive_location: str,
) -> None:
"""Update Housekeeper with compressed FASTQ files and SPRING metadata file."""
version: Version = self.hk_api.last_version(sample_id)
spring_tags: list[str] = self.get_spring_tags_from_fastq(hk_fastq_first)
spring_tags.append(archive_location)
spring_metadata_tags: list[str] = self.get_spring_metadata_tags_from_fastq(hk_fastq_first)
LOG.info(f"Updating FASTQ files in Housekeeper for {sample_id}")
LOG.info(
Expand Down
3 changes: 2 additions & 1 deletion cg/store/api/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import petname

from cg.constants import DataDelivery, FlowCellStatus, Pipeline, Priority
from cg.constants.archiving import PDC_ARCHIVE_LOCATION
from cg.store.api.base import BaseHandler
from cg.store.models import (
Analysis,
Expand Down Expand Up @@ -45,7 +46,7 @@ def add_customer(
name: str,
invoice_address: str,
invoice_reference: str,
data_archive_location: str = "PDC",
data_archive_location: str = PDC_ARCHIVE_LOCATION,
scout_access: bool = False,
is_clinical: bool = False,
*args,
Expand Down
3 changes: 2 additions & 1 deletion cg/store/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Pipeline,
Priority,
)
from cg.constants.archiving import PDC_ARCHIVE_LOCATION
from cg.constants.constants import CONTROL_OPTIONS, CaseActions, PrepCategory

Model = declarative_base()
Expand Down Expand Up @@ -303,7 +304,7 @@ class Customer(Model):
return_samples = Column(types.Boolean, nullable=False, default=False)
scout_access = Column(types.Boolean, nullable=False, default=False)
uppmax_account = Column(types.String(32))
data_archive_location = Column(types.String(32), nullable=False, default="PDC")
data_archive_location = Column(types.String(32), nullable=False, default=PDC_ARCHIVE_LOCATION)
is_clinical = Column(types.Boolean, nullable=False, default=False)

collaborations = orm.relationship(
Expand Down
20 changes: 17 additions & 3 deletions tests/meta/compress/test_clean_fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from cg.apps.housekeeper.hk import HousekeeperAPI
from cg.constants import SequencingFileTag
from cg.constants.archiving import PDC_ARCHIVE_LOCATION
from cg.meta.compress import files
from cg.models import CompressionData
from tests.cli.compress.conftest import MockCompressAPI
Expand Down Expand Up @@ -85,19 +86,26 @@ def test_update_hk_fastq(
compression_obj=compression,
hk_fastq_first=fastq[run]["hk_first"],
hk_fastq_second=fastq[run]["hk_second"],
archive_location=PDC_ARCHIVE_LOCATION,
)

# THEN assert that the SPRING files have been added to Housekeeper
hk_spring_files: list[File] = list(real_housekeeper_api.files(tags=[SequencingFileTag.SPRING]))
hk_spring_metadata_files: list[File] = list(
real_housekeeper_api.files(tags=[SequencingFileTag.SPRING_METADATA])
)

# THEN assert that the Spring files and Spring metadata files have had the fastq file tags transferred
for spring_file in [hk_spring_files, hk_spring_metadata_files]:
assert spring_file
for tag_name in [tag.name for tag in fastq[run]["hk_first"].tags]:
if tag_name != SequencingFileTag.FASTQ:
assert tag_name in spring_file.tags

# THEN assert that the spring files have been tagged with the archive location
for spring_file in hk_spring_files:
assert PDC_ARCHIVE_LOCATION in [tag.name for tag in spring_file.tags]

# THEN assert that the SPRING files have been added to bundles directory
for spring_file in [hk_spring_files[0].path, hk_spring_metadata_files[0].path]:
assert Path(root_path, spring_file).exists()
Expand Down Expand Up @@ -127,7 +135,9 @@ def test_cli_clean_fastqs_removed(
assert fastq_second.exists()

# WHEN running the clean command
populated_compress_fastq_api.clean_fastq(sample)
populated_compress_fastq_api.clean_fastq(
sample_id=sample, archive_location=PDC_ARCHIVE_LOCATION
)

# THEN assert SPRING files exists
assert spring_file.exists()
Expand Down Expand Up @@ -156,7 +166,9 @@ def test_cli_clean_fastqs_no_spring_metadata(
assert not spring_metadata_file.exists()

# WHEN running the clean command
populated_compress_fastq_api.clean_fastq(sample)
populated_compress_fastq_api.clean_fastq(
sample_id=sample, archive_location=PDC_ARCHIVE_LOCATION
)

# THEN assert SPRING file exists
assert spring_file.exists()
Expand All @@ -182,7 +194,9 @@ def test_cli_clean_fastqs_pending_compression_metadata(
assert crunchy_flag_file.exists()

# WHEN running the clean command
populated_compress_fastq_api.clean_fastq(sample)
populated_compress_fastq_api.clean_fastq(
sample_id=sample, archive_location=PDC_ARCHIVE_LOCATION
)

# THEN assert SPRING file exists
assert spring_file.exists()
Expand Down

0 comments on commit eca6ce5

Please sign in to comment.