diff --git a/docs/api.rst b/docs/api.rst index 148ed1c..e2a4974 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -27,6 +27,9 @@ or from pybwa import BwaMem mem = BwaMem(prefix="/path/to/genome.fasta") +The :class:`~pybwa.BwaIndex` object is useful when re-using the same index, such that it only needs to be loaded into +memory once. Both constructors for the :class:`~pybwa.BwaAln` and :class:`~pybwa.BwaMem` objects accept an index. + The :meth:`pybwa.BwaAln.align` method accepts a list of reads (as either strings or :class:`pysam.FastxRecord` s) to align and return a *single* :class:`pysam.AlignedSegment` per input read: @@ -66,20 +69,22 @@ It is constructed directly and options set on the object: recs = aln.align(queries=["GATTACA"], opt=opt) -The :meth:`pybwa.BwaMem.align` method accepts custom options provided as a :class:`~pybwa.BwaMemOptions` object. -It is constructed via the :class:`~pybwa.BwaMemOptionsBuilder` class, to support scaling gap open and extend penalties -when a using custom match score, or the specification of presets (via `mode`). +Similarly, the :meth:`pybwa.BwaMem.align` method accepts custom options provided as a :class:`~pybwa.BwaMemOptions` object. +It is constructed directly and options set on the object: .. code-block:: python - builder = BwaMemOptionsBuilder() - builder.min_seed_len = 32 - opt: BwaMemOptions = builder.build() + opt = BwaMemOptions() + opt.min_seed_len = 32 recs = aln.align(queries=["GATTACA"], opt=opt) -The :class:`~pybwa.BwaIndex` object is useful when re-using the same index, such that it only needs to be loaded into memory -once. -Both constructors for the :class:`~pybwa.BwaAln` and :class:`~pybwa.BwaMem` objects accept an index. +Note: the :meth:`~pybwa.BwaMemOptions.finalize` method will both apply the presets as specified by the +:meth:`~pybwa.BwaMemOptions.mode` option, as well as scale various other options (:code:`-TdBOELU`) based on the +:attr:`~pybwa.BwaMemOptions.match_score`. The presets and scaling will only be applied to other options that have not +been modified from their defaults. After calling the :meth:`~pybwa.BwaMemOptions.finalize` method, the options are +immutable, unless :code:`copy=True` is passed to :meth:`~pybwa.BwaMemOptions.finalize` method, in which case a copy +of the options are returned by the method. Regardless, the :meth:`~pybwa.BwaMemOptions.finalize` method *does not* +need to be called before the :meth:`pybwa.BwaMem.align` is invoked, as the latter will do so (making a local copy). API versus Command-line Differences =================================== @@ -109,9 +114,6 @@ Bwa Aln Bwa Mem ======= -.. autoclass:: pybwa.BwaMemOptionsBuilder - :members: - .. autoclass:: pybwa.BwaMemOptions :members: diff --git a/docs/index.rst b/docs/index.rst index 34be8ae..7fae029 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,5 +12,6 @@ Documentation Contents .. toctree:: :maxdepth: 2 + install.rst api.rst diff --git a/docs/install.rst b/docs/install.rst new file mode 100644 index 0000000..3de91a5 --- /dev/null +++ b/docs/install.rst @@ -0,0 +1,43 @@ +============ +Installation +============ + +1. Install the environment manager :code:`mamba` +2. Install the Python build tool :code:`poetry` + +3. Create an environment with Python, Cython, and Pysam: + +.. code-block:: bash + + mamba env create -f pybwa.yml + +4. Activate the environment: + +.. code-block:: bash + + mamba activate pybwa + +5. Clone the :code:`bwa` repo + +.. code-block:: bash + + git clone https://github.com/lh3/bwa + +6. Configure poetry to install into pre-existing virtual environments: + +.. code-block:: bash + + poetry config virtualenvs.create false + +7. Install :code:`pybwa` into the virtual environment: + +.. code-block:: bash + + poetry install + +8. Check your build: + +.. code-block:: bash + + poetry run pytest + diff --git a/pybwa/libbwaaln.pyx b/pybwa/libbwaaln.pyx index ba495b8..19e164f 100644 --- a/pybwa/libbwaaln.pyx +++ b/pybwa/libbwaaln.pyx @@ -33,6 +33,7 @@ cdef class BwaAlnOptions: free(self._delegate) cdef gap_opt_t* gap_opt(self): + """Returns the options struct to use with the bwa C library methods""" return self._delegate property max_mismatches: @@ -138,6 +139,14 @@ cdef class BwaAln: cdef BwaIndex _index def __init__(self, prefix: str | Path | None = None, index: BwaIndex | None = None): + """Constructs the :code:`bwa aln` aligner. + + One of `prefix` or `index` must be specified. + + Args: + prefix: the path prefix for the BWA index (typically a FASTA) + index: the index to use + """ if prefix is not None: assert Path(prefix).exists() self._index = BwaIndex(prefix=prefix) @@ -148,7 +157,6 @@ cdef class BwaAln: bwase_initialize() - # TODO: a list of records... def align(self, queries: List[FastxRecord] | List[str], opt: BwaAlnOptions | None = None) -> List[AlignedSegment]: """Align one or more queries with `bwa aln`. @@ -157,7 +165,8 @@ cdef class BwaAln: opt: the alignment options, or None to use the default options Returns: - one alignment per query + one alignment (:class:`~pysam.AlignedSegment`) per query + :code:`List[List[AlignedSegment]]`. """ if len(queries) == 0: return [] diff --git a/pybwa/libbwaindex.pyx b/pybwa/libbwaindex.pyx index 86144ff..aefd3c2 100644 --- a/pybwa/libbwaindex.pyx +++ b/pybwa/libbwaindex.pyx @@ -41,7 +41,7 @@ cdef class BwaIndex: with :code:`samtools dict `). Args: - prefix (str | Path): the path prefix for teh BWA index + prefix (str | Path): the path prefix for the BWA index (typically a FASTA) bwt (bool): load the BWT (FM-index) bns (bool): load the BNS (reference sequence metadata) pac (bool): load the PAC (the actual 2-bit encoded reference sequences with 'N' converted to a diff --git a/pybwa/libbwamem.pyi b/pybwa/libbwamem.pyi index 3accdba..8f356da 100644 --- a/pybwa/libbwamem.pyi +++ b/pybwa/libbwamem.pyi @@ -18,42 +18,136 @@ class BwaMemMode(enum.Enum): class BwaMemOptions: def __init__(self, finalize: bool = False) -> None: ... + _finalized: bool _ignore_alt: bool - min_seed_len: int - mode: BwaMemMode - band_width: int - match_score: int - mismatch_penalty: int - minimum_score: int - unpaired_penalty: int - n_threads: int - skip_pairing: bool - output_all_for_fragments: bool - interleaved_paired_end: bool - short_split_as_secondary: bool - skip_mate_rescue: bool - soft_clip_supplementary: bool - with_xr_tag: bool - query_coord_as_primary: bool - keep_mapq_for_supplementary: bool - with_xb_tag: bool - max_occurrences: int - off_diagonal_x_dropoff: float - ignore_alternate_contigs: bool - internal_seed_split_factor: float - drop_chain_fraction: float - max_mate_rescue_rounds: int - min_seeded_bases_in_chain: int - seed_occurrence_in_3rd_round: int - xa_max_hits: int | tuple[int, int] - xa_drop_ratio: float - gap_open_penalty: int | tuple[int, int] - gap_extension_penalty: int | tuple[int, int] - clipping_penalty: int | tuple[int, int] - -class BwaMemOptionsBuilder(BwaMemOptions): - def __init__(self, options: BwaMemOptions | None = None) -> None: ... - def build(self) -> BwaMemOptions: ... + _mode: BwaMemMode | None + @property + def finalized(self) -> bool: ... + @property + def min_seed_len(self) -> int: ... + @min_seed_len.setter + def min_seed_len(self, value: int) -> None: ... + @property + def mode(self) -> BwaMemMode: ... + @mode.setter + def mode(self, value: BwaMemMode) -> None: ... + @property + def band_width(self) -> int: ... + @band_width.setter + def band_width(self, value: int) -> None: ... + @property + def match_score(self) -> int: ... + @match_score.setter + def match_score(self, value: int) -> None: ... + @property + def mismatch_penalty(self) -> int: ... + @mismatch_penalty.setter + def mismatch_penalty(self, value: int) -> None: ... + @property + def minimum_score(self) -> int: ... + @minimum_score.setter + def minimum_score(self, value: int) -> None: ... + @property + def unpaired_penalty(self) -> int: ... + @unpaired_penalty.setter + def unpaired_penalty(self, value: int) -> None: ... + @property + def n_threads(self) -> int: ... + @n_threads.setter + def n_threads(self, value: int) -> None: ... + @property + def skip_pairing(self) -> bool: ... + @skip_pairing.setter + def skip_pairing(self, value: bool) -> None: ... + @property + def output_all_for_fragments(self) -> bool: ... + @output_all_for_fragments.setter + def output_all_for_fragments(self, value: bool) -> None: ... + @property + def interleaved_paired_end(self) -> bool: ... + @interleaved_paired_end.setter + def interleaved_paired_end(self, value: bool) -> None: ... + @property + def short_split_as_secondary(self) -> bool: ... + @short_split_as_secondary.setter + def short_split_as_secondary(self, value: bool) -> None: ... + @property + def skip_mate_rescue(self) -> bool: ... + @skip_mate_rescue.setter + def skip_mate_rescue(self, value: bool) -> None: ... + @property + def soft_clip_supplementary(self) -> bool: ... + @soft_clip_supplementary.setter + def soft_clip_supplementary(self, value: bool) -> None: ... + @property + def with_xr_tag(self) -> bool: ... + @with_xr_tag.setter + def with_xr_tag(self, value: bool) -> None: ... + @property + def query_coord_as_primary(self) -> bool: ... + @query_coord_as_primary.setter + def query_coord_as_primary(self, value: bool) -> None: ... + @property + def keep_mapq_for_supplementary(self) -> bool: ... + @keep_mapq_for_supplementary.setter + def keep_mapq_for_supplementary(self, value: bool) -> None: ... + @property + def with_xb_tag(self) -> bool: ... + @with_xb_tag.setter + def with_xb_tag(self, value: bool) -> None: ... + @property + def max_occurrences(self) -> int: ... + @max_occurrences.setter + def max_occurrences(self, value: int) -> None: ... + @property + def off_diagonal_x_dropoff(self) -> float: ... + @off_diagonal_x_dropoff.setter + def off_diagonal_x_dropoff(self, value: float) -> None: ... + @property + def ignore_alternate_contigs(self) -> bool: ... + @ignore_alternate_contigs.setter + def ignore_alternate_contigs(self, value: bool) -> None: ... + @property + def internal_seed_split_factor(self) -> float: ... + @internal_seed_split_factor.setter + def internal_seed_split_factor(self, value: float) -> None: ... + @property + def drop_chain_fraction(self) -> float: ... + @drop_chain_fraction.setter + def drop_chain_fraction(self, value: float) -> None: ... + @property + def max_mate_rescue_rounds(self) -> int: ... + @max_mate_rescue_rounds.setter + def max_mate_rescue_rounds(self, value: int) -> None: ... + @property + def min_seeded_bases_in_chain(self) -> int: ... + @min_seeded_bases_in_chain.setter + def min_seeded_bases_in_chain(self, value: int) -> None: ... + @property + def seed_occurrence_in_3rd_round(self) -> int: ... + @seed_occurrence_in_3rd_round.setter + def seed_occurrence_in_3rd_round(self, value: int) -> None: ... + @property + def xa_max_hits(self) -> int | tuple[int, int]: ... + @xa_max_hits.setter + def xa_max_hits(self, value: int | tuple[int, int]) -> None: ... + @property + def xa_drop_ratio(self) -> float: ... + @xa_drop_ratio.setter + def xa_drop_ratio(self, value: float) -> None: ... + @property + def gap_open_penalty(self) -> int | tuple[int, int]: ... + @gap_open_penalty.setter + def gap_open_penalty(self, value: int | tuple[int, int]) -> None: ... + @property + def gap_extension_penalty(self) -> int | tuple[int, int]: ... + @gap_extension_penalty.setter + def gap_extension_penalty(self, value: int | tuple[int, int]) -> None: ... + @property + def clipping_penalty(self) -> int | tuple[int, int]: ... + @clipping_penalty.setter + def clipping_penalty(self, value: int | tuple[int, int]) -> None: ... + def finalize(self, copy: bool = False) -> BwaMemOptions: ... class BwaMem: _index: BwaIndex diff --git a/pybwa/libbwamem.pyx b/pybwa/libbwamem.pyx index 56d2b90..5b27140 100644 --- a/pybwa/libbwamem.pyx +++ b/pybwa/libbwamem.pyx @@ -11,7 +11,6 @@ from pysam import FastxRecord, AlignedSegment, qualitystring_to_array __all__ = [ "BwaMemMode", "BwaMemOptions", - "BwaMemOptionsBuilder", "BwaMem", ] @@ -30,337 +29,232 @@ class BwaMemMode(enum.Enum): cdef class BwaMemOptions: """The container for options for :class:`~pybwa.BwaMem`.""" + _finalized: bool _ignore_alt: bool _mode: BwaMemMode | None cdef mem_opt_t* _options + cdef mem_opt_t* _options0 + + def _assert_not_finalized(self, attr_name: str) -> None: + """Raises an AttributeError if the options have been finalized. + + This is used in each setter below to enforce that setters are not used after + :meth:`~pybwa.BwaMemOptions.finalize` is called. This could be a property decorator, + but cython does not support decorating properties. + """ + if self._finalized: + raise AttributeError(f"can't set attribute: {attr_name}") + + def __init__(self, options: BwaMemOptions | None = None): + """Builds a new `BwaMemOptions` instance. + + Args: + options: initialize these options from the given options + """ + if options is None: + self._finalized = False + self._ignore_alt = False + self._mode = None + else: + self._finalized = options._finalized + self._ignore_alt = options._ignore_alt + self._mode = options._mode - def __init__(self): - self._ignore_alt = False - self._mode = None + cdef _copy_options(self, dst: BwaMemOptions, src: BwaMemOptions): + memcpy(dst._options, src._options, sizeof(mem_opt_t)) + memcpy(dst._options0, src._options0, sizeof(mem_opt_t)) - def __cinit__(self): + def __cinit__(self, opt: BwaMemOptions | None = None): self._options = mem_opt_init() + self._options0 = mem_opt_init() + if opt is not None: + self._copy_options(self, opt) def __dealloc__(self): free(self._options) self._options = NULL + free(self._options0) + self._options0 = NULL cdef mem_opt_t* mem_opt(self): + """Returns the options struct to use with the bwa C library methods""" + if not self._finalized: + raise Exception("Cannot call `mem_opt` until `finalize()` is called") return self._options - property min_seed_len: - """:code:`bwa mem -k `""" - def __get__(self) -> int: - return self._options.min_seed_len - - property mode: - """:code:`bwa mem -x `""" - def __get__(self) -> BwaMemMode: - return self._mode - - property band_width: - """:code:`bwa mem -w `""" - def __get__(self) -> int: - return self._options.w - - property match_score: - """:code:`bwa mem -A `""" - def __get__(self) -> int: - return self._options.a - - property mismatch_penalty: - """:code:`bwa mem -A `""" - def __get__(self) -> int: - return self._options.b - - property minimum_score: - """:code:`bwa mem -T `""" - def __get__(self) -> int: - return self._options.T - - property unpaired_penalty: - """:code:`bwa mem -U `""" - def __get__(self) -> int: - return self._options.pen_unpaired - - property n_threads: - """:code:`bwa mem -t `""" - def __get__(self) -> int: - return self._options.n_threads - - property skip_pairing: - """:code:`bwa mem -P`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_NOPAIRING) != 0 - - property output_all_for_fragments: - """:code:`bwa mem -a`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_ALL) != 0 - - property interleaved_paired_end: - """:code:`bwa mem -p`""" - def __get__(self) -> bool: - return (self._options.flag & (MEM_F_PE | MEM_F_SMARTPE)) != 0 - - property short_split_as_secondary: - """:code:`bwa mem -M`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_NO_MULTI) != 0 - - property skip_mate_rescue: - """:code:`bwa mem -S`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_NO_RESCUE) != 0 - - property soft_clip_supplementary: - """:code:`bwa mem -Y`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_SOFTCLIP) != 0 - - property with_xr_tag: - """:code:`bwa mem -V`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_REF_HDR) != 0 - - property query_coord_as_primary: - """:code:`bwa mem -5`""" - def __get__(self) -> bool: - return (self._options.flag & (MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ)) != 0 + @property + def finalized(self) -> bool: + """True if the options have been finalized with :meth:`~pybwa.BwaMemOptions.finalize`.""" + return self._finalized - property keep_mapq_for_supplementary: - """:code:`bwa mem -q`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_KEEP_SUPP_MAPQ) != 0 + def finalize(self, copy: bool = False) -> BwaMemOptions: + """Performs final initialization of these options. The object returned may not be + modified further. - property with_xb_tag: - """:code:`bwa mem -u`""" - def __get__(self) -> bool: - return (self._options.flag & MEM_F_XB) != 0 + If the mode is given, then the presets are applied to options that have not been explicitly + set. Otherwise, if the match score has been set, the match score scales the (-TdBOELU) + options if they have not been explicitly set. - property max_occurrences: - """:code:`bwa mem -c `""" - def __get__(self) -> int: - return self._options.max_occ - - property off_diagonal_x_dropoff: - """:code:`bwa mem -d `""" - def __get__(self) -> float: - return self._options.XA_drop_ratio - - property ignore_alternate_contigs: - """:code:`bwa mem -j`""" - def __get__(self) -> bool: - return self._ignore_alt - - property internal_seed_split_factor: - """:code:`bwa mem -r `""" - def __get__(self) -> float: - return self._options.split_factor - - property drop_chain_fraction: - """:code:`bwa mem -D `""" - def __get__(self) -> float: - return self._options.drop_ratio - - property max_mate_rescue_rounds: - """:code:`bwa mem -m `""" - def __get__(self) -> int: - return self._options.max_matesw - - property min_seeded_bases_in_chain: - """:code:`bwa mem -W `""" - def __get__(self) -> int: - return self._options.min_chain_weight - - property seed_occurrence_in_3rd_round: - """:code:`bwa mem -y `""" - def __get__(self) -> int: - return self._options.max_mem_intv - - property xa_max_hits: - """:code:`bwa mem -h >`""" - def __get__(self) -> int | tuple[int, int]: - return self._options.max_XA_hits, self._options.max_XA_hits_alt - - property xa_drop_ratio: - """:code:`bwa mem -y `""" - def __get__(self) -> float: - return self._options.XA_drop_ratio - - property gap_open_penalty: - """:code:`bwa mem -O >`""" - def __get__(self) -> int | tuple[int, int]: - if self._options.o_del == self._options.o_ins: - return self._options.o_del - else: - return self._options.o_del, self._options.o_ins - - property gap_extension_penalty: - """:code:`bwa mem -E >`""" - def __get__(self) -> int | tuple[int, int]: - if self._options.e_del == self._options.e_ins: - return self._options.e_del - else: - return self._options.e_del, self._options.e_ins - - - property clipping_penalty: - """:code:`bwa mem -L >`""" - def __get__(self) -> int | tuple[int, int]: - if self._options.pen_clip5 == self._options.pen_clip3: - return self._options.pen_clip5 - else: - return self._options.pen_clip5, self._options.pen_clip3 - - - -cdef class BwaMemOptionsBuilder(BwaMemOptions): - """The container for options for :class:`~pybwa.BwaMem`.""" - cdef mem_opt_t* _options0 - - def __init__(self, options: BwaMemOptions | None = None): - super().__init__() - if options is not None: - self.ignore_alternate_contigs = options.ignore_alternate_contigs - self.mode = options.mode - self._copy_options(options) - - cdef _copy_options(self, options: BwaMemOptions): - memcpy(self._options, options.mem_opt(), sizeof(mem_opt_t)) - - def __cinit__(self, options: BwaMemOptions | None = None): - self._options0 = mem_opt_init() - memset(self._options0, 0, sizeof(mem_opt_t)) - - def __dealloc__(self): - free(self._options0) - self._options0 = NULL + Args: + copy: true to return a finalized copy of this object, false to finalize this object + """ + opt: BwaMemOptions + if copy: + opt = BwaMemOptions() + memcpy(opt._options, self._options, sizeof(mem_opt_t)) + else: + opt = self - def build(self) -> BwaMemOptions: - if self._mode is None: + if opt._mode is None: # matching score is changed so scale the rest of the penalties/scores - if self._options0.a == 1: - if self._options0.b != 1: - self._options.b *= self._options.a - if self._options0.T != 1: - self._options.T *= self._options.a - if self._options0.o_del != 1: - self._options.o_del *= self._options.a - if self._options0.e_del != 1: - self._options.e_del *= self._options.a - if self._options0.o_ins != 1: - self._options.o_ins *= self._options.a - if self._options0.e_ins != 1: - self._options.e_ins *= self._options.a - if self._options0.zdrop != 1: - self._options.zdrop *= self._options.a - if self._options0.pen_clip5 != 1: - self._options.pen_clip5 *= self._options.a - if self._options0.pen_clip3 != 1: - self._options.pen_clip3 *= self._options.a - if self._options0.pen_unpaired != 1: - self._options.pen_unpaired *= self._options.a - elif self._mode == BwaMemMode.INTRACTG: - if self._options0.o_del != 1: - self._options.o_del = 16 - if self._options0.o_ins != 1: - self._options.o_ins = 16 - if self._options0.b != 1: - self._options.b = 9 - if self._options0.pen_clip5 != 1: - self._options.pen_clip5 = 5 - if self._options0.pen_clip3 != 1: - self._options.pen_clip3 = 5 + if opt._options0.a == 1: + if opt._options0.b != 1: + opt._options.b *= opt._options.a + if opt._options0.T != 1: + opt._options.T *= opt._options.a + if opt._options0.o_del != 1: + opt._options.o_del *= opt._options.a + if opt._options0.e_del != 1: + opt._options.e_del *= opt._options.a + if opt._options0.o_ins != 1: + opt._options.o_ins *= opt._options.a + if opt._options0.e_ins != 1: + opt._options.e_ins *= opt._options.a + if opt._options0.zdrop != 1: + opt._options.zdrop *= opt._options.a + if opt._options0.pen_clip5 != 1: + opt._options.pen_clip5 *= opt._options.a + if opt._options0.pen_clip3 != 1: + opt._options.pen_clip3 *= opt._options.a + if opt._options0.pen_unpaired != 1: + opt._options.pen_unpaired *= opt._options.a + elif opt._mode == BwaMemMode.INTRACTG: + if opt._options0.o_del != 1: + opt._options.o_del = 16 + if opt._options0.o_ins != 1: + opt._options.o_ins = 16 + if opt._options0.b != 1: + opt._options.b = 9 + if opt._options0.pen_clip5 != 1: + opt._options.pen_clip5 = 5 + if opt._options0.pen_clip3 != 1: + opt._options.pen_clip3 = 5 else: - if self._options0.o_del != 1: - self._options.o_del = 1 - if self._options0.o_ins != 1: - self._options.o_ins = 1 - if self._options0.e_del != 1: - self._options.e_del = 1 - if self._options0.e_ins != 1: - self._options.e_ins = 1 - if self._options0.b != 1: - self._options.b = 1 - if self._options0.split_factor == 0.0: - self._options.split_factor = 10.0 - if self._options0.pen_clip5 != 1: - self._options.pen_clip5 = 0 - if self._options0.pen_clip3 != 1: - self._options.pen_clip3 = 0 + if opt._options0.o_del != 1: + opt._options.o_del = 1 + if opt._options0.o_ins != 1: + opt._options.o_ins = 1 + if opt._options0.e_del != 1: + opt._options.e_del = 1 + if opt._options0.e_ins != 1: + opt._options.e_ins = 1 + if opt._options0.b != 1: + opt._options.b = 1 + if opt._options0.split_factor == 0.0: + opt._options.split_factor = 10.0 + if opt._options0.pen_clip5 != 1: + opt._options.pen_clip5 = 0 + if opt._options0.pen_clip3 != 1: + opt._options.pen_clip3 = 0 # ONT2D vs PACBIO options - if self._options0.min_chain_weight != 1: - self._options.min_chain_weight = 20 if self._mode == BwaMemMode.ONT2D else 40 - if self._options0.min_seed_len != 1: - self._options.min_seed_len = 14 if self._mode == BwaMemMode.ONT2D else 17 + if opt._options0.min_chain_weight != 1: + opt._options.min_chain_weight = 20 if opt._mode == BwaMemMode.ONT2D else 40 + if opt._options0.min_seed_len != 1: + opt._options.min_seed_len = 14 if opt._mode == BwaMemMode.ONT2D else 17 bwa_fill_scmat( - self._options.a, self._options.b, self._options.mat + opt._options.a, opt._options.b, opt._options.mat ) - return self - property min_seed_len: + opt._finalized = True + return opt + + @property + def min_seed_len(self) -> int: """:code:`bwa mem -k `""" - def __get__(self): - return super().min_seed_len - def __set__(self, value: int): - self._options.min_seed_len = value - self._options0.min_seed_len = 1 + return self._options.min_seed_len + + @min_seed_len.setter + def min_seed_len(self, value: int): + self._assert_not_finalized(attr_name=BwaMemOptions.min_seed_len.__name__) + self._options.min_seed_len = value + self._options0.min_seed_len = 1 - property mode: + @property + def mode(self) -> BwaMemMode: """:code:`bwa mem -x `""" - def __get__(self) -> BwaMemMode: - return super().mode - def __set__(self, value: BwaMemMode): - self._mode = value + return self._mode - property band_width: + @mode.setter + def mode(self, value: BwaMemMode) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.mode.__name__) + self._mode = value + + @property + def band_width(self) -> int: """:code:`bwa mem -w `""" - def __get__(self): - return super().band_width - def __set__(self, value: int): - self._options.w = value - self._options0.w = 1 + return self._options.w + + @band_width.setter + def band_width(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.band_width.__name__) + self._options.w = value + self._options0.w = 1 - property match_score: + @property + def match_score(self) -> int: """:code:`bwa mem -A `""" - def __get__(self): - return super().match_score - def __set__(self, value: int): - self._options.a = value - self._options0.a = 1 + return self._options.a - property mismatch_penalty: + + @match_score.setter + def match_score(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.match_score.__name__) + self._options.a = value + self._options0.a = 1 + + @property + def mismatch_penalty(self) -> int: """:code:`bwa mem -A `""" - def __get__(self): - return super().mismatch_penalty - def __set__(self, value: int): - self._options.b = value - self._options0.b = 1 + return self._options.b - property minimum_score: + @mismatch_penalty.setter + def mismatch_penalty(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.mismatch_penalty.__name__) + self._options.b = value + self._options0.b = 1 + + @property + def minimum_score(self) -> int: """:code:`bwa mem -T `""" - def __get__(self): - return super().minimum_score - def __set__(self, value: int): - self._options.T = value - self._options0.T = 1 + return self._options.T + + @minimum_score.setter + def minimum_score(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.minimum_score.__name__) + self._options.T = value + self._options0.T = 1 - property unpaired_penalty: + @property + def unpaired_penalty(self) -> int: """:code:`bwa mem -U `""" - def __get__(self): - return super().unpaired_penalty - def __set__(self, value: int): - self._options.pen_unpaired = value - self._options0.pen_unpaired = 1 + return self._options.pen_unpaired - property n_threads: + @unpaired_penalty.setter + def unpaired_penalty(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.unpaired_penalty.__name__) + self._options.pen_unpaired = value + self._options0.pen_unpaired = 1 + + @property + def n_threads(self) -> int: """:code:`bwa mem -t `""" - def __get__(self): - return super().n_threads - def __set__(self, value: int): - self._options.n_threads = value if value > 1 else 1 + return self._options.n_threads + + @n_threads.setter + def n_threads(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.n_threads.__name__) + self._options.n_threads = value if value > 1 else 1 def _set_flag(self, value: bool, flag: int): if value: @@ -369,206 +263,284 @@ cdef class BwaMemOptionsBuilder(BwaMemOptions): self._options.flag |= ~flag return self - property skip_pairing: + @property + def skip_pairing(self) -> bool: """:code:`bwa mem -P`""" - def __get__(self): - return super().skip_pairing - def __set__(self, value: bool): - self._set_flag(value, MEM_F_NOPAIRING) + return (self._options.flag & MEM_F_NOPAIRING) != 0 - property output_all_for_fragments: + @skip_pairing.setter + def skip_pairing(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.skip_pairing.__name__) + self._set_flag(value, MEM_F_NOPAIRING) + + @property + def output_all_for_fragments(self) -> bool: """:code:`bwa mem -a`""" - def __get__(self): - return super().output_all_for_fragments - def __set__(self, value: bool): - self._set_flag(value, MEM_F_ALL) + return (self._options.flag & MEM_F_ALL) != 0 + + @output_all_for_fragments.setter + def output_all_for_fragments(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.output_all_for_fragments.__name__) + self._set_flag(value, MEM_F_ALL) - property interleaved_paired_end: + @property + def interleaved_paired_end(self) -> bool: """:code:`bwa mem -p`""" - def __get__(self): - return super().interleaved_paired_end - def __set__(self, value: bool): - self._set_flag(value, MEM_F_PE | MEM_F_SMARTPE) + return (self._options.flag & (MEM_F_PE | MEM_F_SMARTPE)) != 0 + + @interleaved_paired_end.setter + def interleaved_paired_end(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.interleaved_paired_end.__name__) + self._set_flag(value, MEM_F_PE | MEM_F_SMARTPE) - property short_split_as_secondary: + @property + def short_split_as_secondary(self) -> bool: """:code:`bwa mem -M`""" - def __get__(self): - return (self._options.flag & MEM_F_NO_MULTI) != 0 - def __set__(self, value: bool): - self._set_flag(value, MEM_F_NO_MULTI) + return (self._options.flag & MEM_F_NO_MULTI) != 0 + + @short_split_as_secondary.setter + def short_split_as_secondary(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.short_split_as_secondary.__name__) + self._set_flag(value, MEM_F_NO_MULTI) - property skip_mate_rescue: + @property + def skip_mate_rescue(self) -> bool: """:code:`bwa mem -S`""" - def __get__(self): - return super().skip_pairing - def __set__(self, value: bool): - self._set_flag(value, MEM_F_NO_RESCUE) + return (self._options.flag & MEM_F_NO_RESCUE) != 0 + + @skip_mate_rescue.setter + def skip_mate_rescue(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.skip_mate_rescue.__name__) + self._set_flag(value, MEM_F_NO_RESCUE) - property soft_clip_supplementary: + @property + def soft_clip_supplementary(self) -> bool: """:code:`bwa mem -Y`""" - def __get__(self): - return super().soft_clip_supplementary - def __set__(self, value: bool): - self._set_flag(value, MEM_F_SOFTCLIP) + return (self._options.flag & MEM_F_SOFTCLIP) != 0 - property with_xr_tag: + @soft_clip_supplementary.setter + def soft_clip_supplementary(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.soft_clip_supplementary.__name__) + self._set_flag(value, MEM_F_SOFTCLIP) + + @property + def with_xr_tag(self) -> bool: """:code:`bwa mem -V`""" - def __get__(self): - return super().with_xr_tag - def __set__(self, value: bool): - self._set_flag(value, MEM_F_REF_HDR) + return (self._options.flag & MEM_F_REF_HDR) != 0 + + @with_xr_tag.setter + def with_xr_tag(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.with_xr_tag.__name__) + self._set_flag(value, MEM_F_REF_HDR) - property query_coord_as_primary: + @property + def query_coord_as_primary(self) -> bool: """:code:`bwa mem -5`""" - def __get__(self): - return super().query_coord_as_primary - def __set__(self, value: bool): - self._set_flag(value, MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ) # always apply MEM_F_KEEP_SUPP_MAPQ with -5 + return (self._options.flag & (MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ)) != 0 - property keep_mapq_for_supplementary: + @query_coord_as_primary.setter + def query_coord_as_primary(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.query_coord_as_primary.__name__) + self._set_flag(value, MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ) # always apply MEM_F_KEEP_SUPP_MAPQ with -5 + + @property + def keep_mapq_for_supplementary(self) -> bool: """:code:`bwa mem -q`""" - def __get__(self): - return super().keep_mapq_for_supplementary - def __set__(self, value: bool): - self._set_flag(value, MEM_F_KEEP_SUPP_MAPQ) + return (self._options.flag & MEM_F_KEEP_SUPP_MAPQ) != 0 + + @keep_mapq_for_supplementary.setter + def keep_mapq_for_supplementary(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.keep_mapq_for_supplementary.__name__) + self._set_flag(value, MEM_F_KEEP_SUPP_MAPQ) - property with_xb_tag: + @property + def with_xb_tag(self) -> bool: """:code:`bwa mem -u`""" - def __get__(self): - return super().with_xb_tag - def __set__(self, value: bool): - self._set_flag(value, MEM_F_XB) + return (self._options.flag & MEM_F_XB) != 0 - property max_occurrences: + @with_xb_tag.setter + def with_xb_tag(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.with_xb_tag.__name__) + self._set_flag(value, MEM_F_XB) + + @property + def max_occurrences(self) -> int: """:code:`bwa mem -c `""" - def __get__(self): - return super().max_occurrences - def __set__(self, value: int): - self._options.max_occ = value - self._options0.max_occ = 1 + return self._options.max_occ + + @max_occurrences.setter + def max_occurrences(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.max_occurrences.__name__) + self._options.max_occ = value + self._options0.max_occ = 1 - property off_diagonal_x_dropoff: + @property + def off_diagonal_x_dropoff(self) -> float: """:code:`bwa mem -d `""" - def __get__(self): - return super().off_diagonal_x_dropoff - def __set__(self, value: float): - self._options.XA_drop_ratio = value - self._options0.XA_drop_ratio = 1 + return self._options.XA_drop_ratio - property ignore_alternate_contigs: + @off_diagonal_x_dropoff.setter + def off_diagonal_x_dropoff(self, value: float) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.off_diagonal_x_dropoff.__name__) + self._options.XA_drop_ratio = value + self._options0.XA_drop_ratio = 1 + + @property + def ignore_alternate_contigs(self) -> bool: """:code:`bwa mem -j`""" - def __get__(self): - return super()._ignore_alt - def __set__(self, value: bool): - self._ignore_alt = value + return self._ignore_alt + + @ignore_alternate_contigs.setter + def ignore_alternate_contigs(self, value: bool) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.ignore_alternate_contigs.__name__) + self._ignore_alt = value - property internal_seed_split_factor: + @property + def internal_seed_split_factor(self) -> float: """:code:`bwa mem -r `""" - def __get__(self): - return super().internal_seed_split_factor - def __set__(self, value: float): - self._options.split_factor = value - self._options0.split_factor = 1 + return self._options.split_factor - property drop_chain_fraction: + @internal_seed_split_factor.setter + def internal_seed_split_factor(self, value: float) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.internal_seed_split_factor.__name__) + self._options.split_factor = value + self._options0.split_factor = 1 + + @property + def drop_chain_fraction(self) -> float: """:code:`bwa mem -D `""" - def __get__(self): - return super().drop_chain_fraction - def __set__(self, value: float): - self._options.drop_ratio = value - self._options0.drop_ratio = 1 + return self._options.drop_ratio + + @drop_chain_fraction.setter + def drop_chain_fraction(self, value: float) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.drop_chain_fraction.__name__) + self._options.drop_ratio = value + self._options0.drop_ratio = 1 - property max_mate_rescue_rounds: + @property + def max_mate_rescue_rounds(self) -> int: """:code:`bwa mem -m `""" - def __get__(self): - return super().max_mate_rescue_rounds - def __set__(self, value: int): - self._options.max_matesw = value - self._options0.max_matesw = 1 + return self._options.max_matesw - property min_seeded_bases_in_chain: + @max_mate_rescue_rounds.setter + def max_mate_rescue_rounds(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.max_mate_rescue_rounds.__name__) + self._options.max_matesw = value + self._options0.max_matesw = 1 + + @property + def min_seeded_bases_in_chain(self) -> int: """:code:`bwa mem -W `""" - def __get__(self): - return super().min_seeded_bases_in_chain - def __set__(self, value: int): - self._options.min_chain_weight = value - self._options0.min_chain_weight = 1 + return self._options.min_chain_weight + + @min_seeded_bases_in_chain.setter + def min_seeded_bases_in_chain(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.min_seeded_bases_in_chain.__name__) + self._options.min_chain_weight = value + self._options0.min_chain_weight = 1 - property seed_occurrence_in_3rd_round: + @property + def seed_occurrence_in_3rd_round(self) -> int: """:code:`bwa mem -y `""" - def __get__(self): - return super().seed_occurrence_in_3rd_round - def __set__(self, value: int): - self._options.max_mem_intv = value - self._options0.max_mem_intv = 1 + return self._options.max_mem_intv - property xa_max_hits: + @seed_occurrence_in_3rd_round.setter + def seed_occurrence_in_3rd_round(self, value: int) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.seed_occurrence_in_3rd_round.__name__) + self._options.max_mem_intv = value + self._options0.max_mem_intv = 1 + + @property + def xa_max_hits(self) -> int | tuple[int, int]: """:code:`bwa mem -h >`""" - def __get__(self): - return super().xa_max_hits - def __set__(self, value: int | tuple[int, int]): - self._options0.max_XA_hits = 1 - self._options0.max_XA_hits_alt = 1 - if isinstance(value, int): - self._options.max_XA_hits = value - self._options.max_XA_hits_alt = value - else: - left, right = value - self._options.max_XA_hits = left - self._options.max_XA_hits_alt = right + return self._options.max_XA_hits, self._options.max_XA_hits_alt + + @xa_max_hits.setter + def xa_max_hits(self, value: int | tuple[int, int]) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.xa_max_hits.__name__) + self._options0.max_XA_hits = 1 + self._options0.max_XA_hits_alt = 1 + if isinstance(value, int): + self._options.max_XA_hits = value + self._options.max_XA_hits_alt = value + else: + left, right = value + self._options.max_XA_hits = left + self._options.max_XA_hits_alt = right - property xa_drop_ratio: + @property + def xa_drop_ratio(self) -> float: """:code:`bwa mem -y `""" - def __get__(self): - return self._options.XA_drop_ratio - def __set__(self, value: float): - self._options.XA_drop_ratio = value + return self._options.XA_drop_ratio + + @xa_drop_ratio.setter + def xa_drop_ratio(self, value: float) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.xa_drop_ratio.__name__) + self._options.XA_drop_ratio = value - property gap_open_penalty: + @property + def gap_open_penalty(self) -> int | tuple[int, int]: """:code:`bwa mem -O >`""" - def __get__(self): - return super().gap_open_penalty - def __set__(self, value: int | tuple[int, int]): - self._options0.o_del = 1 - self._options0.o_ins = 1 - if isinstance(value, int): - self._options.o_del = value - self._options.o_ins = value - else: - deletions, insertions = value - self._options.o_del = deletions - self._options.o_ins = insertions + if self._options.o_del == self._options.o_ins: + return self._options.o_del + else: + return self._options.o_del, self._options.o_ins + + @gap_open_penalty.setter + def gap_open_penalty(self, value: int | tuple[int, int]) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.gap_open_penalty.__name__) + self._options0.o_del = 1 + self._options0.o_ins = 1 + if isinstance(value, int): + self._options.o_del = value + self._options.o_ins = value + else: + deletions, insertions = value + self._options.o_del = deletions + self._options.o_ins = insertions - property gap_extension_penalty: + @property + def gap_extension_penalty(self) -> int | tuple[int, int]: """:code:`bwa mem -E >`""" - def __get__(self): - return super().gap_extension_penalty - def __set__(self, value: int | tuple[int, int]): - self._options0.e_del = 1 - self._options0.e_ins = 1 - if isinstance(value, int): - self._options.e_del = value - self._options.e_ins = value - else: - deletions, insertions = value - self._options.e_del = deletions - self._options.e_ins = insertions + if self._options.e_del == self._options.e_ins: + return self._options.e_del + else: + return self._options.e_del, self._options.e_ins + + @gap_extension_penalty.setter + def gap_extension_penalty(self, value: int | tuple[int, int]) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.gap_extension_penalty.__name__) + self._options0.e_del = 1 + self._options0.e_ins = 1 + if isinstance(value, int): + self._options.e_del = value + self._options.e_ins = value + else: + deletions, insertions = value + self._options.e_del = deletions + self._options.e_ins = insertions - property clipping_penalty: + @property + def clipping_penalty(self) -> int | tuple[int, int]: """:code:`bwa mem -L >`""" - def __get__(self): - return super().clipping_penalty - def __set__(self, value: int | tuple[int, int]): - self._options0.pen_clip5 = 1 - self._options0.pen_clip3 = 1 - if isinstance(value, int): - self._options.pen_clip5 = value - self._options.pen_clip3 = value - else: - five_prime, three_prime = value - self._options.pen_clip5 = five_prime - self._options.pen_clip3 = three_prime + if self._options.pen_clip5 == self._options.pen_clip3: + return self._options.pen_clip5 + else: + return self._options.pen_clip5, self._options.pen_clip3 + + @clipping_penalty.setter + def clipping_penalty(self, value: int | tuple[int, int]) -> None: + self._assert_not_finalized(attr_name=BwaMemOptions.clipping_penalty.__name__) + self._options0.pen_clip5 = 1 + self._options0.pen_clip3 = 1 + if isinstance(value, int): + self._options.pen_clip5 = value + self._options.pen_clip3 = value + else: + five_prime, three_prime = value + self._options.pen_clip5 = five_prime + self._options.pen_clip3 = three_prime cdef class BwaMem: @@ -577,6 +549,14 @@ cdef class BwaMem: cdef BwaIndex _index def __init__(self, prefix: str | Path | None = None, index: BwaIndex | None = None): + """Constructs the :code:`bwa mem` aligner. + + One of `prefix` or `index` must be specified. + + Args: + prefix: the path prefix for the BWA index (typically a FASTA) + index: the index to use + """ if prefix is not None: assert Path(prefix).exists() self._index = BwaIndex(prefix=prefix) @@ -594,9 +574,14 @@ cdef class BwaMem: opt: the alignment options, or None to use the default options Returns: - one alignment per query + a list of alignments (:class:`~pysam.AlignedSegment`) per query + :code:`List[List[AlignedSegment]]`. """ - opt = BwaMemOptionsBuilder().build if opt is None else opt + if opt is None: + opt = BwaMemOptions().finalize() + elif not opt.finalized: + opt = opt.finalize(copy=True) + if len(queries) == 0: return [] elif isinstance(queries[0], str): @@ -688,7 +673,6 @@ cdef class BwaMem: mem_alnregs = mem_align1(mem_opt, self._index.bwt(), self._index.bns(), self._index.pac(), seq.l, seq.s) if opt.query_coord_as_primary: mem_reorder_primary5(opt.minimum_score, &mem_alnregs) - # mimic mem_reg2sam from bwamem.c XA = NULL keep_all = opt.output_all_for_fragments diff --git a/tests/test_bwapy.py b/tests/test_bwapy.py index 78b74f2..cd2b98d 100644 --- a/tests/test_bwapy.py +++ b/tests/test_bwapy.py @@ -9,7 +9,6 @@ from pybwa import BwaIndex from pybwa.libbwamem import BwaMem from pybwa.libbwamem import BwaMemOptions -from pybwa.libbwamem import BwaMemOptionsBuilder @pytest.fixture() @@ -66,16 +65,32 @@ def test_bwamem_options() -> None: # default options options = BwaMemOptions() assert options.min_seed_len == 19 - # build with default options - builder = BwaMemOptionsBuilder() - options = builder.build() - assert options.min_seed_len == 19 - # build with custom option - builder = BwaMemOptionsBuilder() - builder.min_seed_len = 20 - options = builder.build() + + # finalize, returning a copy + copy = options.finalize(copy=True) + assert copy.min_seed_len == 19 + assert copy.finalized + assert not options.finalized + + # update min seed len + options.min_seed_len = 20 assert options.min_seed_len == 20 + # finalize, returning a copy with a new value + copy = options.finalize(copy=True) + assert copy.min_seed_len == 20 + assert copy.finalized + assert not options.finalized + + # finalize, returning itself finalized + options.finalize() + assert options.finalized + assert options.min_seed_len == 20 # type:ignore[unreachable] + + # raise an exception if we try to set a new value + with pytest.raises(AttributeError): + options.min_seed_len = 19 + def test_bwamem(ref_fasta: Path, fastx_record: FastxRecord) -> None: opt = BwaMemOptions() @@ -95,7 +110,7 @@ def test_bwamem(ref_fasta: Path, fastx_record: FastxRecord) -> None: assert not rec.is_read2 assert rec.reference_start == 80 assert rec.is_forward - assert rec.cigarstring == "80M", print(str(rec)) + assert rec.cigarstring == "80M" assert len(recs[1]) == 1 rec = recs[1][0] @@ -105,5 +120,5 @@ def test_bwamem(ref_fasta: Path, fastx_record: FastxRecord) -> None: assert not rec.is_read2 assert rec.reference_start == 80 assert rec.is_reverse - assert rec.cigarstring == "80M", print(str(rec)) + assert rec.cigarstring == "80M" # TODO: test multi-mapping, reverse strand, etc