From 5222040d95c4850a62836f8d63f995111493f469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= <60181709+Karl-Svard@users.noreply.github.com> Date: Tue, 21 Nov 2023 17:38:24 +0100 Subject: [PATCH] Add support for 25B and 1.5B flow cells (#453)(minor) ### Added - New constants for 25B and 1.5B flow cells - New EPP for checking that given process UDFs have been set, cg_lims/EPPs/udf/check/check_process_udfs.py - New constants file for the `set` EPPs, cg_lims/EPPs/udf/set/constants.py - New EPP for automatically filling in the default sequencing settings for a given flow cell type, cg_lims/EPPs/udf/set/set_sequencing_settings.py ### Changed - Reworked the way constants were used in cg_lims/EPPs/udf/calculate/novaseq_x_denaturation.py - updated cg_lims/EPPs/udf/calculate/novaseq_x_volumes.py to use the new constants --- cg_lims/EPPs/udf/calculate/constants.py | 24 ++++- .../udf/calculate/novaseq_x_denaturation.py | 69 ++++++++++---- .../EPPs/udf/calculate/novaseq_x_volumes.py | 20 +++- cg_lims/EPPs/udf/check/base.py | 2 + cg_lims/EPPs/udf/check/check_process_udfs.py | 40 ++++++++ cg_lims/EPPs/udf/set/base.py | 2 + cg_lims/EPPs/udf/set/constants.py | 17 ++++ .../EPPs/udf/set/set_sequencing_settings.py | 95 +++++++++++++++++++ 8 files changed, 247 insertions(+), 22 deletions(-) create mode 100644 cg_lims/EPPs/udf/check/check_process_udfs.py create mode 100644 cg_lims/EPPs/udf/set/constants.py create mode 100644 cg_lims/EPPs/udf/set/set_sequencing_settings.py diff --git a/cg_lims/EPPs/udf/calculate/constants.py b/cg_lims/EPPs/udf/calculate/constants.py index 04eb1958..6bb272e4 100644 --- a/cg_lims/EPPs/udf/calculate/constants.py +++ b/cg_lims/EPPs/udf/calculate/constants.py @@ -5,18 +5,40 @@ class FlowCellTypes(StrEnum): """Flow cell types available from Illumina""" FLOW_CELL_10B: str = "10B" + FLOW_CELL_25B: str = "25B" + FLOW_CELL_15B: str = "1.5B" class FlowCellSize(IntEnum): """The total number of lanes for a flow cell type.""" FLOW_CELL_10B: int = 8 + FLOW_CELL_25B: int = 8 + FLOW_CELL_15B: int = 2 class FlowCellLaneVolumes10B(FloatEnum): - """The recommended volume of reagents per flow cell lane. All values are in ul.""" + """The recommended volume of reagents per 10B flow cell lane. All values are in ul.""" POOL_VOLUME: float = 34 PHIX_VOLUME: float = 1 NAOH_VOLUME: float = 8.5 BUFFER_VOLUME: float = 127.5 + + +class FlowCellLaneVolumes15B(FloatEnum): + """The recommended volume of reagents per 1.5B flow cell lane. All values are in ul.""" + + POOL_VOLUME: float = 34 + PHIX_VOLUME: float = 1 + NAOH_VOLUME: float = 8.5 + BUFFER_VOLUME: float = 127.5 + + +class FlowCellLaneVolumes25B(FloatEnum): + """The recommended volume of reagents per 25B flow cell lane. All values are in ul.""" + + POOL_VOLUME: float = 56 + PHIX_VOLUME: float = 1.6 + NAOH_VOLUME: float = 14 + BUFFER_VOLUME: float = 210 diff --git a/cg_lims/EPPs/udf/calculate/novaseq_x_denaturation.py b/cg_lims/EPPs/udf/calculate/novaseq_x_denaturation.py index aeac431b..463a49a3 100644 --- a/cg_lims/EPPs/udf/calculate/novaseq_x_denaturation.py +++ b/cg_lims/EPPs/udf/calculate/novaseq_x_denaturation.py @@ -7,7 +7,13 @@ from cg_lims.exceptions import LimsError, MissingUDFsError from cg_lims.get.artifacts import get_artifacts -from cg_lims.EPPs.udf.calculate.constants import FlowCellTypes, FlowCellSize, FlowCellLaneVolumes10B +from cg_lims.EPPs.udf.calculate.constants import ( + FlowCellTypes, + FlowCellSize, + FlowCellLaneVolumes10B, + FlowCellLaneVolumes15B, + FlowCellLaneVolumes25B, +) LOG = logging.getLogger(__name__) @@ -19,32 +25,59 @@ def __init__(self, per_lane_udf: str, total_udf: str, volume: float): self.volume: float = volume -DENATURATION_VOLUMES = { - FlowCellTypes.FLOW_CELL_10B: [ - DenaturationReagent( +class NovaSeqXDenaturation: + def __init__(self, pool: float, phix: float, naoh: float, buffer: float): + self.pool: DenaturationReagent = DenaturationReagent( per_lane_udf="Volume of Pool to Denature (ul) per Lane", total_udf="Total Volume of Pool to Denature (ul)", - volume=FlowCellLaneVolumes10B.POOL_VOLUME, - ), - DenaturationReagent( + volume=pool, + ) + self.phix: DenaturationReagent = DenaturationReagent( per_lane_udf="PhiX Volume (ul) per Lane", total_udf="Total PhiX Volume (ul)", - volume=FlowCellLaneVolumes10B.PHIX_VOLUME, - ), - DenaturationReagent( + volume=phix, + ) + self.naoh: DenaturationReagent = DenaturationReagent( per_lane_udf="NaOH Volume (ul) per Lane", total_udf="Total NaOH Volume (ul)", - volume=FlowCellLaneVolumes10B.NAOH_VOLUME, - ), - DenaturationReagent( + volume=naoh, + ) + self.buffer: DenaturationReagent = DenaturationReagent( per_lane_udf="Pre-load Buffer Volume (ul) per Lane", total_udf="Total Pre-load Buffer Volume (ul)", - volume=FlowCellLaneVolumes10B.BUFFER_VOLUME, - ), - ] + volume=buffer, + ) + + def get_reagent_list(self): + return [self.pool, self.phix, self.naoh, self.buffer] + + +DENATURATION_VOLUMES = { + FlowCellTypes.FLOW_CELL_10B: NovaSeqXDenaturation( + pool=FlowCellLaneVolumes10B.POOL_VOLUME, + phix=FlowCellLaneVolumes10B.PHIX_VOLUME, + naoh=FlowCellLaneVolumes10B.NAOH_VOLUME, + buffer=FlowCellLaneVolumes10B.BUFFER_VOLUME, + ), + FlowCellTypes.FLOW_CELL_15B: NovaSeqXDenaturation( + pool=FlowCellLaneVolumes15B.POOL_VOLUME, + phix=FlowCellLaneVolumes15B.PHIX_VOLUME, + naoh=FlowCellLaneVolumes15B.NAOH_VOLUME, + buffer=FlowCellLaneVolumes15B.BUFFER_VOLUME, + ), + FlowCellTypes.FLOW_CELL_25B: NovaSeqXDenaturation( + pool=FlowCellLaneVolumes25B.POOL_VOLUME, + phix=FlowCellLaneVolumes25B.PHIX_VOLUME, + naoh=FlowCellLaneVolumes25B.NAOH_VOLUME, + buffer=FlowCellLaneVolumes25B.BUFFER_VOLUME, + ), } -FLOW_CELL_SIZE = {FlowCellTypes.FLOW_CELL_10B: FlowCellSize.FLOW_CELL_10B} +FLOW_CELL_SIZE = { + FlowCellTypes.FLOW_CELL_10B: FlowCellSize.FLOW_CELL_10B, + FlowCellTypes.FLOW_CELL_15B: FlowCellSize.FLOW_CELL_15B, + FlowCellTypes.FLOW_CELL_25B: FlowCellSize.FLOW_CELL_25B, +} def get_flow_cell_type(process: Process) -> str: @@ -75,7 +108,7 @@ def set_process_udfs(process: Process, parent_process: Process) -> None: number_of_lanes: int = get_number_of_lanes(process=parent_process) process.udf["Flow Cell Type"] = flow_cell_type process.udf["Lanes to Load"] = number_of_lanes - for reagent in DENATURATION_VOLUMES[flow_cell_type]: + for reagent in DENATURATION_VOLUMES[flow_cell_type].get_reagent_list(): process.udf[reagent.total_udf] = number_of_lanes * reagent.volume.value process.udf[reagent.per_lane_udf] = reagent.volume.value process.put() diff --git a/cg_lims/EPPs/udf/calculate/novaseq_x_volumes.py b/cg_lims/EPPs/udf/calculate/novaseq_x_volumes.py index 7f892168..5bf02b52 100644 --- a/cg_lims/EPPs/udf/calculate/novaseq_x_volumes.py +++ b/cg_lims/EPPs/udf/calculate/novaseq_x_volumes.py @@ -7,13 +7,27 @@ from cg_lims.exceptions import LimsError, MissingUDFsError, InvalidValueError from cg_lims.get.artifacts import get_artifacts -from cg_lims.EPPs.udf.calculate.constants import FlowCellTypes, FlowCellSize, FlowCellLaneVolumes10B +from cg_lims.EPPs.udf.calculate.constants import ( + FlowCellTypes, + FlowCellSize, + FlowCellLaneVolumes10B, + FlowCellLaneVolumes15B, + FlowCellLaneVolumes25B, +) LOG = logging.getLogger(__name__) -FLOW_CELL_LANE_VOLUMES = {FlowCellTypes.FLOW_CELL_10B: FlowCellLaneVolumes10B.POOL_VOLUME} -FLOW_CELL_SIZE = {FlowCellTypes.FLOW_CELL_10B: FlowCellSize.FLOW_CELL_10B} +FLOW_CELL_LANE_VOLUMES = { + FlowCellTypes.FLOW_CELL_10B: FlowCellLaneVolumes10B.POOL_VOLUME, + FlowCellTypes.FLOW_CELL_15B: FlowCellLaneVolumes15B.POOL_VOLUME, + FlowCellTypes.FLOW_CELL_25B: FlowCellLaneVolumes25B.POOL_VOLUME, +} +FLOW_CELL_SIZE = { + FlowCellTypes.FLOW_CELL_10B: FlowCellSize.FLOW_CELL_10B, + FlowCellTypes.FLOW_CELL_15B: FlowCellSize.FLOW_CELL_15B, + FlowCellTypes.FLOW_CELL_25B: FlowCellSize.FLOW_CELL_25B, +} def get_flow_cell_type(process: Process) -> str: diff --git a/cg_lims/EPPs/udf/check/base.py b/cg_lims/EPPs/udf/check/base.py index 1cc3c73a..26f7c087 100644 --- a/cg_lims/EPPs/udf/check/base.py +++ b/cg_lims/EPPs/udf/check/base.py @@ -4,6 +4,7 @@ # commands from cg_lims.EPPs.udf.check.check_artifact_udfs import check_artifact_udfs +from cg_lims.EPPs.udf.check.check_process_udfs import check_process_udfs @click.group(invoke_without_command=True) @@ -14,3 +15,4 @@ def check(ctx): check.add_command(check_artifact_udfs) +check.add_command(check_process_udfs) diff --git a/cg_lims/EPPs/udf/check/check_process_udfs.py b/cg_lims/EPPs/udf/check/check_process_udfs.py new file mode 100644 index 00000000..124afb86 --- /dev/null +++ b/cg_lims/EPPs/udf/check/check_process_udfs.py @@ -0,0 +1,40 @@ +import logging +import sys +from typing import List +import click +from genologics.entities import Process +from cg_lims import options +from cg_lims.exceptions import LimsError, MissingUDFsError + +LOG = logging.getLogger(__name__) + + +def check_udfs(process: Process, process_udfs: List[str]) -> None: + """Check that process UDFs are set.""" + + warning = [] + for udf in process_udfs: + if process.udf.get(udf) is None: + warning.append(f"UDF: '{udf}' is missing for the step.") + if warning: + LOG.warning(" ".join(warning)) + raise MissingUDFsError(message=" ".join(warning)) + LOG.info("Process UDFs were all set.") + + +@click.command() +@options.process_udfs() +@click.pass_context +def check_process_udfs( + ctx: click.Context, + process_udfs: List[str], +): + """Script to check that process UDFs are set.""" + + LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") + process: Process = ctx.obj["process"] + try: + check_udfs(process=process, process_udfs=process_udfs) + click.echo("Process UDFs were checked.") + except LimsError as e: + sys.exit(e.message) diff --git a/cg_lims/EPPs/udf/set/base.py b/cg_lims/EPPs/udf/set/base.py index 2f18cb16..7149b9b6 100644 --- a/cg_lims/EPPs/udf/set/base.py +++ b/cg_lims/EPPs/udf/set/base.py @@ -4,6 +4,7 @@ from cg_lims.EPPs.udf.set.set_sample_date import set_sample_date from cg_lims.EPPs.udf.set.set_method import method_document from cg_lims.EPPs.udf.set.set_barcode import assign_barcode +from cg_lims.EPPs.udf.set.set_sequencing_settings import set_sequencing_settings @click.group(invoke_without_command=True) @@ -17,3 +18,4 @@ def set(context: click.Context): set.add_command(set_sample_date) set.add_command(method_document) set.add_command(assign_barcode) +set.add_command(set_sequencing_settings) diff --git a/cg_lims/EPPs/udf/set/constants.py b/cg_lims/EPPs/udf/set/constants.py new file mode 100644 index 00000000..54e91179 --- /dev/null +++ b/cg_lims/EPPs/udf/set/constants.py @@ -0,0 +1,17 @@ +from cg_lims.enums import IntEnum + + +class DefaultReadLength(IntEnum): + """Default read length most usually used by each flow cell type.""" + + FLOW_CELL_10B: int = 151 + FLOW_CELL_25B: int = 151 + FLOW_CELL_15B: int = 51 + + +class DefaultIndexLength(IntEnum): + """Default index length most usually used by each flow cell type.""" + + FLOW_CELL_10B: int = 10 + FLOW_CELL_25B: int = 10 + FLOW_CELL_15B: int = 8 diff --git a/cg_lims/EPPs/udf/set/set_sequencing_settings.py b/cg_lims/EPPs/udf/set/set_sequencing_settings.py new file mode 100644 index 00000000..315750a5 --- /dev/null +++ b/cg_lims/EPPs/udf/set/set_sequencing_settings.py @@ -0,0 +1,95 @@ +import logging +import sys +import click + +from typing import List, Optional +from genologics.lims import Artifact, Process +from cg_lims.exceptions import LimsError, MissingUDFsError +from cg_lims.get.artifacts import get_artifacts + +from cg_lims.EPPs.udf.calculate.constants import FlowCellTypes +from cg_lims.EPPs.udf.set.constants import DefaultReadLength, DefaultIndexLength + +LOG = logging.getLogger(__name__) + +DEFAULT_INDEX_LENGTHS = { + FlowCellTypes.FLOW_CELL_10B: DefaultIndexLength.FLOW_CELL_10B, + FlowCellTypes.FLOW_CELL_15B: DefaultIndexLength.FLOW_CELL_15B, + FlowCellTypes.FLOW_CELL_25B: DefaultIndexLength.FLOW_CELL_25B, +} + +DEFAULT_READ_LENGTHS = { + FlowCellTypes.FLOW_CELL_10B: DefaultReadLength.FLOW_CELL_10B, + FlowCellTypes.FLOW_CELL_15B: DefaultReadLength.FLOW_CELL_15B, + FlowCellTypes.FLOW_CELL_25B: DefaultReadLength.FLOW_CELL_25B, +} + + +def get_library_tube_strip(process: Process) -> str: + """Return the Library Tube Strip ID from a process.""" + library_tube_strip: str = process.udf.get("Library Tube Strip ID") + if not library_tube_strip: + LOG.error(f"Process {process.id} is missing UDF 'Library Tube Strip ID'") + raise MissingUDFsError(f"UDF 'Library Tube Strip ID' missing from previous step.") + return library_tube_strip + + +def get_flow_cell_type(process: Process) -> str: + """Return the Flow Cell Type from a process.""" + flow_cell_type: str = process.udf.get("Flow Cell Type") + if not flow_cell_type: + LOG.error(f"Process {process.id} is missing UDF 'Flow Cell Type'") + raise MissingUDFsError(f"UDF 'Flow Cell Type' missing from previous step.") + return flow_cell_type + + +def get_flow_cell_name(process: Process) -> str: + """Return the flow cell name of the step.""" + containers = process.output_containers() + return containers[0].name + + +def set_process_udfs(process: Process, parent_process: Process) -> None: + """Set Prepare for Sequencing (NovaSeq X) process UDFs.""" + library_tube_strip: str = get_library_tube_strip(process=parent_process) + flow_cell_type: str = get_flow_cell_type(process=parent_process) + flow_cell_name: str = get_flow_cell_name(process=process) + read_length: int = DEFAULT_READ_LENGTHS[flow_cell_type].value + index_length: int = DEFAULT_INDEX_LENGTHS[flow_cell_type].value + process.udf["Library Tube Strip ID"] = library_tube_strip + process.udf["Run Mode"] = flow_cell_type + process.udf["Read 1 Cycles"] = read_length + process.udf["Read 2 Cycles"] = read_length + process.udf["Index Read 1"] = index_length + process.udf["Index Read 2"] = index_length + process.udf["BaseSpace Run Name"] = flow_cell_name + process.put() + + +def get_parent_process(process: Process) -> Optional[Process]: + """Get the parent process of another process, assuming all input artifacts come from the same step.""" + input_artifacts: List[Artifact] = get_artifacts(process=process, input=True) + if not input_artifacts: + LOG.info(f"No input artifacts found for process {process.id}.") + return None + return input_artifacts[0].parent_process + + +@click.command() +@click.pass_context +def set_sequencing_settings(ctx): + """Sets the settings required for sequencing of NovaSeq X flow cells.""" + + LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") + + process: Process = ctx.obj["process"] + + try: + parent_process: Process = get_parent_process(process=process) + set_process_udfs(process=process, parent_process=parent_process) + message: str = "Sequencing settings have been successfully set." + LOG.info(message) + click.echo(message) + except LimsError as e: + LOG.error(e.message) + sys.exit(e.message)