diff --git a/bwapy/libbwaaln.pyx b/bwapy/libbwaaln.pyx index 1488044..96f0022 100644 --- a/bwapy/libbwaaln.pyx +++ b/bwapy/libbwaaln.pyx @@ -28,9 +28,8 @@ cdef class BwaAlnOptions: def __init__(self): self._max_hits = 3 - self._cinit() - cdef _cinit(self): + def __cinit__(self): self._delegate = gap_init_opt() def __dealloc__(self): diff --git a/bwapy/libbwaindex.pxd b/bwapy/libbwaindex.pxd index 2e55e02..e9f4f42 100644 --- a/bwapy/libbwaindex.pxd +++ b/bwapy/libbwaindex.pxd @@ -45,7 +45,7 @@ cdef class BwaIndex: """Contains the index and nucleotide sequence for Bwa""" cdef bwaidx_t *_delegate cdef public object header - cdef _cinit(self, prefix, mode) cdef bwt_t *bwt(self) cdef bntseq_t *bns(self) - cdef uint8_t *pac(self) \ No newline at end of file + cdef uint8_t *pac(self) + cdef _load_index(self, prefix, mode) \ No newline at end of file diff --git a/bwapy/libbwaindex.pyx b/bwapy/libbwaindex.pyx index 7e969fa..a286f7f 100644 --- a/bwapy/libbwaindex.pyx +++ b/bwapy/libbwaindex.pyx @@ -48,6 +48,7 @@ cdef class BwaIndex: random base) """ cdef int mode + mode = 0 if bwt: mode |= BWA_IDX_BWT @@ -55,11 +56,9 @@ cdef class BwaIndex: mode |= BWA_IDX_BNS if pac: mode |= BWA_IDX_PAC - self._cinit(f"{prefix}", mode) - - cdef _cinit(self, prefix, mode): - cdef char *local_prefix + self._load_index(f"{prefix}", mode) + cdef _load_index(self, prefix, mode): prefix = bwa_idx_infer_prefix(force_bytes(prefix)) if not prefix: # FIXME: better error message diff --git a/bwapy/libbwamem.pyi b/bwapy/libbwamem.pyi index bbf356d..23a312c 100644 --- a/bwapy/libbwamem.pyi +++ b/bwapy/libbwamem.pyi @@ -1,7 +1,6 @@ import enum from pathlib import Path from typing import List -from typing import Self from pysam import AlignedSegment from pysam import FastxRecord @@ -18,9 +17,8 @@ class BwaMemMode(enum.Enum): INTRACTG = enum.auto() class BwaMemOptions: - _ignore_alt: bool def __init__(self, finalize: bool = False) -> None: ... - def finalize(self) -> Self: ... + _ignore_alt: bool min_seed_len: int mode: BwaMemMode band_width: int @@ -52,6 +50,10 @@ class BwaMemOptions: gap_extension_penalty: int | tuple[int, int] clipping_penalty: int | tuple[int, int] +class BwaMemOptionsBuilder(BwaMemOptions): + def __init__(self, options: BwaMemOptions | None = None) -> None: ... + def build(self) -> BwaMemOptions: ... + class BwaMem: _index: BwaIndex def __init__(self, prefix: str | Path | None = None, index: BwaIndex | None = None) -> None: ... diff --git a/bwapy/libbwamem.pyx b/bwapy/libbwamem.pyx index ca4b21c..2d6bca0 100644 --- a/bwapy/libbwamem.pyx +++ b/bwapy/libbwamem.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 from pathlib import Path from typing import List, Self -from libc.string cimport memset +from libc.string cimport memset, memcpy from libc.stdlib cimport calloc, free, malloc import enum from bwapy.libbwaindex cimport BwaIndex @@ -16,36 +16,206 @@ class BwaMemMode(enum.Enum): INTRACTG = enum.auto() -# TODO: must call finalize!!! -# how do we enforce this + cdef class BwaMemOptions: """The container for options for [`BwaMem`][bwapy.BwaMem].""" _ignore_alt: bool _mode: BwaMemMode | None cdef mem_opt_t* _options - cdef mem_opt_t* _options0 - def __init__(self, finalize: bool = False): + def __init__(self): self._ignore_alt = False - self._cinit() - if finalize: - self.finalize() + self._mode = None - cdef _cinit(self): + def __cinit__(self): self._options = mem_opt_init() - self._options0 = mem_opt_init() - memset(self._options0, 0, sizeof(mem_opt_t)) def __dealloc__(self): free(self._options) self._options = NULL - free(self._options0) - self._options0 = NULL cdef mem_opt_t* mem_opt(self): return self._options - def finalize(self) -> Self: + property min_seed_len: + """bwa mem -k """ + def __get__(self): + return self._options.min_seed_len + + property mode: + """bwa mem -x """ + def __get__(self) -> BwaMemMode: + return self._mode + + property band_width: + """bwa mem -w """ + def __get__(self): + return self._options.w + + property match_score: + """bwa mem -A """ + def __get__(self): + return self._options.a + + property mismatch_penalty: + """bwa mem -A """ + def __get__(self): + return self._options.b + + property minimum_score: + """bwa mem -T """ + def __get__(self): + return self._options.T + + property unpaired_penalty: + """bwa mem -U """ + def __get__(self): + return self._options.pen_unpaired + + property n_threads: + """bwa mem -t """ + def __get__(self): + return self._options.n_threads + + property skip_pairing: + def __get__(self): + return (self._options.flag & MEM_F_NOPAIRING) != 0 + + property output_all_for_fragments: + def __get__(self): + return (self._options.flag & MEM_F_ALL) != 0 + + property interleaved_paired_end: + def __get__(self): + return (self._options.flag & (MEM_F_PE | MEM_F_SMARTPE)) != 0 + + property skip_mate_rescue: + def __get__(self): + return (self._options.flag & MEM_F_NO_MULTI) != 0 + + property soft_clip_supplementary: + def __get__(self): + return (self._options.flag & MEM_F_SOFTCLIP) != 0 + + property with_xr_tag: + def __get__(self): + return (self._options.flag & MEM_F_REF_HDR) != 0 + + property query_coord_as_primary: + def __get__(self): + return (self._options.flag & (MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ)) != 0 + + property keep_mapq_for_supplementary: + def __get__(self): + return (self._options.flag & MEM_F_KEEP_SUPP_MAPQ) != 0 + + property with_xb_tag: + def __get__(self): + return (self._options.flag & MEM_F_XB) != 0 + + property max_occurrences: + """bwa mem -c """ + def __get__(self): + return self._options.max_occ + + property off_diagonal_x_dropoff: + """bwa mem -d """ + def __get__(self): + return self._options.XA_drop_ratio + + property ignore_alternate_contigs: + """bwa mem -j""" + def __get__(self): + return self._ignore_alt + + property internal_seed_split_factor: + """bwa mem -r """ + def __get__(self): + return self._options.split_factor + + property drop_chain_fraction: + """bwa mem -D """ + def __get__(self): + return self._options.drop_ratio + def __set__(self, value: float): + self._options.drop_ratio = value + self._options0.drop_ratio = 1 + + property max_mate_rescue_rounds: + """bwa mem -m """ + def __get__(self): + return self._options.max_matesw + + property min_seeded_bases_in_chain: + """bwa mem -W """ + def __get__(self): + return self._options.min_chain_weight + + property seed_occurrence_in_3rd_round: + """bwa mem -y """ + def __get__(self): + return self._options.max_mem_intv + + property xa_max_hits: + """bwa mem -h >""" + def __get__(self): + return self._options.max_XA_hits, self._options.max_XA_hits_alt + + property xa_drop_ration: + """bwa mem -y """ + def __get__(self): + return self._options.XA_drop_ratio + + property gap_open_penalty: + """bwa mem -O >""" + def __get__(self): + if self._options.o_del == self._options.o_ins: + return self._options.o_del + else: + return self._options.o_del, self._options.o_ins + + property gap_extension_penalty: + """bwa mem -E >""" + def __get__(self): + if self._options.e_del == self._options.e_ins: + return self._options.e_del + else: + return self._options.e_del, self._options.e_ins + + + property clipping_penalty: + """bwa mem -L >""" + def __get__(self): + if self._options.pen_clip5 == self._options.pen_clip3: + return self._options.pen_clip5 + else: + return self._options.pen_clip5, self._options.pen_clip3 + + + +cdef class BwaMemOptionsBuilder(BwaMemOptions): + """The container for options for [`BwaMem`][bwapy.BwaMem].""" + cdef mem_opt_t* _options0 + + def __init__(self, options: BwaMemOptions | None = None): + super().__init__() + if options is not None: + self.ignore_alternate_contigs = options.ignore_alternate_contigs + self.mode = options.mode + self._copy_options(options) + + cdef _copy_options(self, options: BwaMemOptions): + memcpy(self._options, options.mem_opt(), sizeof(mem_opt_t)) + + def __cinit__(self, options: BwaMemOptions | None = None): + self._options0 = mem_opt_init() + memset(self._options0, 0, sizeof(mem_opt_t)) + + def __dealloc__(self): + free(self._options0) + self._options0 = NULL + + def build(self) -> BwaMemOptions: if self._mode is None: # matching score is changed so scale the rest of the penalties/scores if self._options0.a == 1: @@ -111,7 +281,7 @@ cdef class BwaMemOptions: property min_seed_len: """bwa mem -k """ def __get__(self): - return self._options.min_seed_len + return super().min_seed_len def __set__(self, value: int): self._options.min_seed_len = value self._options0.min_seed_len = 1 @@ -119,14 +289,14 @@ cdef class BwaMemOptions: property mode: """bwa mem -x """ def __get__(self) -> BwaMemMode: - return self._mode + return super().mode def __set__(self, value: BwaMemMode): self._mode = value property band_width: """bwa mem -w """ def __get__(self): - return self._options.w + return super().band_width def __set__(self, value: int): self._options.w = value self._options0.w = 1 @@ -134,7 +304,7 @@ cdef class BwaMemOptions: property match_score: """bwa mem -A """ def __get__(self): - return self._options.a + return super().match_score def __set__(self, value: int): self._options.a = value self._options0.a = 1 @@ -142,7 +312,7 @@ cdef class BwaMemOptions: property mismatch_penalty: """bwa mem -A """ def __get__(self): - return self._options.b + return super().mismatch_penalty def __set__(self, value: int): self._options.b = value self._options0.b = 1 @@ -150,7 +320,7 @@ cdef class BwaMemOptions: property minimum_score: """bwa mem -T """ def __get__(self): - return self._options.T + return super().minimum_score def __set__(self, value: int): self._options.T = value self._options0.T = 1 @@ -158,7 +328,7 @@ cdef class BwaMemOptions: property unpaired_penalty: """bwa mem -U """ def __get__(self): - return self._options.pen_unpaired + return super().unpaired_penalty def __set__(self, value: int): self._options.pen_unpaired = value self._options0.pen_unpaired = 1 @@ -166,7 +336,7 @@ cdef class BwaMemOptions: property n_threads: """bwa mem -t """ def __get__(self): - return self._options.n_threads + return super().n_threads def __set__(self, value: int): self._options.n_threads = value if value > 1 else 1 @@ -179,62 +349,62 @@ cdef class BwaMemOptions: property skip_pairing: def __get__(self): - return self._options.flag & MEM_F_NOPAIRING > 0 + return super().skip_pairing def __set__(self, value: bool): self._set_flag(value, MEM_F_NOPAIRING) property output_all_for_fragments: def __get__(self): - return self._options.flag & MEM_F_ALL > 0 + return super().output_all_for_fragments def __set__(self, value: bool): self._set_flag(value, MEM_F_ALL) property interleaved_paired_end: def __get__(self): - return (self._options.flag & (MEM_F_PE | MEM_F_SMARTPE)) > 0 + return super().interleaved_paired_end def __set__(self, value: bool): self._set_flag(value, MEM_F_PE | MEM_F_SMARTPE) property skip_mate_rescue: def __get__(self): - return self._options.flag & MEM_F_NO_MULTI > 0 + return super().skip_pairing def __set__(self, value: bool): self._set_flag(value, MEM_F_NO_MULTI) property soft_clip_supplementary: def __get__(self): - return self._options.flag & MEM_F_SOFTCLIP > 0 + return super().soft_clip_supplementary def __set__(self, value: bool): self._set_flag(value, MEM_F_SOFTCLIP) property with_xr_tag: def __get__(self): - return self._options.flag & MEM_F_REF_HDR > 0 + return super().with_xr_tag def __set__(self, value: bool): self._set_flag(value, MEM_F_REF_HDR) property query_coord_as_primary: def __get__(self): - return (self._options.flag & (MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ)) > 0 + return super().query_coord_as_primary def __set__(self, value: bool): self._set_flag(value, MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ) #/ always apply MEM_F_KEEP_SUPP_MAPQ with -5 property keep_mapq_for_supplementary: def __get__(self): - return self._options.flag & MEM_F_KEEP_SUPP_MAPQ > 0 + return super().keep_mapq_for_supplementary def __set__(self, value: bool): self._set_flag(value, MEM_F_KEEP_SUPP_MAPQ) property with_xb_tag: def __get__(self): - return self._options.flag & MEM_F_XB > 0 + return super().with_xb_tag def __set__(self, value: bool): self._set_flag(value, MEM_F_XB) property max_occurrences: """bwa mem -c """ def __get__(self): - return self._options.max_occ + return super().max_occurrences def __set__(self, value: int): self._options.max_occ = value self._options0.max_occ = 1 @@ -242,7 +412,7 @@ cdef class BwaMemOptions: property off_diagonal_x_dropoff: """bwa mem -d """ def __get__(self): - return self._options.XA_drop_ratio + return super().off_diagonal_x_dropoff def __set__(self, value: float): self._options.XA_drop_ratio = value self._options0.XA_drop_ratio = 1 @@ -250,14 +420,14 @@ cdef class BwaMemOptions: property ignore_alternate_contigs: """bwa mem -j""" def __get__(self): - return self._ignore_alt + return super()._ignore_alt def __set__(self, value: bool): self._ignore_alt = value property internal_seed_split_factor: """bwa mem -r """ def __get__(self): - return self._options.split_factor + return super().internal_seed_split_factor def __set__(self, value: float): self._options.split_factor = value self._options0.split_factor = 1 @@ -265,7 +435,7 @@ cdef class BwaMemOptions: property drop_chain_fraction: """bwa mem -D """ def __get__(self): - return self._options.drop_ratio + return super().drop_chain_fraction def __set__(self, value: float): self._options.drop_ratio = value self._options0.drop_ratio = 1 @@ -273,7 +443,7 @@ cdef class BwaMemOptions: property max_mate_rescue_rounds: """bwa mem -m """ def __get__(self): - return self._options.max_matesw + return super().max_mate_rescue_rounds def __set__(self, value: int): self._options.max_matesw = value self._options0.max_matesw = 1 @@ -281,7 +451,7 @@ cdef class BwaMemOptions: property min_seeded_bases_in_chain: """bwa mem -W """ def __get__(self): - return self._options.min_chain_weight + return super().min_seeded_bases_in_chain def __set__(self, value: int): self._options.min_chain_weight = value self._options0.min_chain_weight = 1 @@ -289,7 +459,7 @@ cdef class BwaMemOptions: property seed_occurrence_in_3rd_round: """bwa mem -y """ def __get__(self): - return self._options.max_mem_intv + return super().seed_occurrence_in_3rd_round def __set__(self, value: int): self._options.max_mem_intv = value self._options0.max_mem_intv = 1 @@ -297,7 +467,7 @@ cdef class BwaMemOptions: property xa_max_hits: """bwa mem -h >""" def __get__(self): - return self._options.max_XA_hits, self._options.max_XA_hits_alt + return super().xa_max_hits def __set__(self, value: int | tuple[int, int]): self._options0.max_XA_hits = 1 self._options0.max_XA_hits_alt = 1 @@ -319,10 +489,7 @@ cdef class BwaMemOptions: property gap_open_penalty: """bwa mem -O >""" def __get__(self): - if self._options.o_del == self._options.o_ins: - return self._options.o_del - else: - return self._options.o_del, self._options.o_ins + return super().gap_open_penalty def __set__(self, value: int | tuple[int, int]): self._options0.o_del = 1 self._options0.o_ins = 1 @@ -337,10 +504,7 @@ cdef class BwaMemOptions: property gap_extension_penalty: """bwa mem -E >""" def __get__(self): - if self._options.e_del == self._options.e_ins: - return self._options.e_del - else: - return self._options.e_del, self._options.e_ins + return super().gap_extension_penalty def __set__(self, value: int | tuple[int, int]): self._options0.e_del = 1 self._options0.e_ins = 1 @@ -356,10 +520,7 @@ cdef class BwaMemOptions: property clipping_penalty: """bwa mem -L >""" def __get__(self): - if self._options.pen_clip5 == self._options.pen_clip3: - return self._options.pen_clip5 - else: - return self._options.pen_clip5, self._options.pen_clip3 + return super().clipping_penalty def __set__(self, value: int | tuple[int, int]): self._options0.pen_clip5 = 1 self._options0.pen_clip3 = 1 @@ -488,7 +649,7 @@ cdef class BwaMem: # set the flags mem_aln.flag |= 0x4 if mem_aln.rid < 0 else 0 mem_aln.flag |= 0x10 if mem_aln.is_rev > 0 else 0 - rec.flag = (mem_aln.flag&0xffff) | (0x100 if mem_aln.flag&0x10000 > 0 else 0) + rec.flag = (mem_aln.flag & 0xffff) | (0x100 if (mem_aln.flag & 0x10000) != 0 else 0) # reference id, position, mapq, and cigar if rec.is_mapped: @@ -497,10 +658,11 @@ cdef class BwaMem: rec.mapping_quality = mem_aln.mapq cigar = "" for k in range(mem_aln.n_cigar): - cigar_opt = mem_aln.cigar[k] & 0xf - if opt.soft_clip_supplementary and mem_aln.is_alt == 0 and (cigar_opt == 3 or cigar_opt == 4): - cigar_opt = 4 if num_mem_aln > 0 else 3 # // use hard clipping for supplementary alignments - cigar = cigar + f"{mem_aln.cigar[k] >> 4}:d" + "MIDS"[cigar_opt] + cigar_op = mem_aln.cigar[k] & 0xf + if opt.soft_clip_supplementary and mem_aln.is_alt == 0 and (cigar_op == 3 or cigar_op == 4): + cigar_op = 4 if num_mem_aln > 0 else 3 # // use hard clipping for supplementary alignments + cigar_len = mem_aln.cigar[k] >> 4 + cigar = cigar + f"{cigar_len}" + "MIDS"[cigar_op] rec.cigarstring = cigar # sequence and qualities @@ -512,9 +674,9 @@ cdef class BwaMem: if rec.is_mapped and mem_aln.n_cigar > 0 and num_mem_aln > 0 and not opt.soft_clip_supplementary and not mem_aln.is_alt: qb = 0 qe = seq.l - if mem_aln.cigar[0] & 0xf == 4 or mem_aln.cigar[0] & 0xf == 3: + if (mem_aln.cigar[0] & 0xf) == 4 or (mem_aln.cigar[0] & 0xf) == 3: qb += mem_aln.cigar[0] >> 4 - if mem_aln.cigar[mem_aln.n_cigar-1] & 0xf == 4 or mem_aln.cigar[mem_aln.n_cigar-1] & 0xf == 3: + if (mem_aln.cigar[mem_aln.n_cigar-1] & 0xf) == 4 or (mem_aln.cigar[mem_aln.n_cigar-1] & 0xf) == 3: qe -= mem_aln.cigar[mem_aln.n_cigar-1] >> 4 rec.query_sequence = rec.query_sequence [qb:qe] if query.quality is not None: diff --git a/tests/test_bwapy.py b/tests/test_bwapy.py index 050c0a5..75e2b48 100644 --- a/tests/test_bwapy.py +++ b/tests/test_bwapy.py @@ -8,6 +8,7 @@ from bwapy import BwaIndex from bwapy.libbwamem import BwaMem from bwapy.libbwamem import BwaMemOptions +from bwapy.libbwamem import BwaMemOptionsBuilder @pytest.fixture() @@ -45,11 +46,22 @@ def test_bwaaln(ref_fasta: Path, fastx_record: FastxRecord) -> None: def test_bwamem_options() -> None: - BwaMemOptions() + # default options + options = BwaMemOptions() + assert options.min_seed_len == 19 + # build with default options + builder = BwaMemOptionsBuilder() + options = builder.build() + assert options.min_seed_len == 19 + # build with custom option + builder = BwaMemOptionsBuilder() + builder.min_seed_len = 20 + options = builder.build() + assert options.min_seed_len == 20 def test_bwamem(ref_fasta: Path, fastx_record: FastxRecord) -> None: - opt = BwaMemOptions(finalize=True) + opt = BwaMemOptions() bwa = BwaMem(prefix=ref_fasta) recs = bwa.align(opt=opt, queries=[fastx_record])