Skip to content

Commit

Permalink
feat: add threading support to bwa aln (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 authored Jan 18, 2025
1 parent fa2872b commit fa10c6e
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 3 deletions.
3 changes: 3 additions & 0 deletions pybwa/libbwaaln.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ from libc.stdio cimport FILE
cdef extern from "libbwaaln_utils.h":
void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, int max_mm,
float fnr, bwt_t *bwt)
void bwa_cal_sa_reg_gap_threaded(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs,
const gap_opt_t *opt)


cdef extern from "bntseq.h":
unsigned char nst_nt4_table[256]
Expand Down
2 changes: 2 additions & 0 deletions pybwa/libbwaaln.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class BwaAlnOptions:
stop_at_max_best_hits: int | None = None,
max_hits: int | None = 3,
log_scaled_gap_penalty: bool | None = None,
threads: int | None = None,
) -> None: ...
max_mismatches: int # -n <int>
# fnr:float # -n <float>
Expand All @@ -38,6 +39,7 @@ class BwaAlnOptions:
max_hits: int # bwa samse -n <int>
log_scaled_gap_penalty: bool = True # -L
with_md: bool = True # bwa samse -d
threads: int # -t <int>

class BwaAln:
def __init__(self, prefix: str | Path | None = None, index: BwaIndex | None = None) -> None: ...
Expand Down
16 changes: 14 additions & 2 deletions pybwa/libbwaaln.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ cdef class BwaAlnOptions:
max_hits (int | None): :code:`bwa samse -n <int>`
log_scaled_gap_penalty (in | None): :code:`-L`
with_md (bool): output the MD to each alignment in the XA tag, otherwise use :code:`"."`
threads (int): the number of threads to use
"""
cdef gap_opt_t * _delegate

Expand All @@ -55,7 +56,8 @@ cdef class BwaAlnOptions:
stop_at_max_best_hits: int | None = None,
max_hits: int | None = 3,
log_scaled_gap_penalty: bool | None = None,
with_md: bool | None = False
with_md: bool | None = False,
threads: int | None = None
):
if max_mismatches is not None:
self.max_mismatches = max_mismatches
Expand Down Expand Up @@ -83,6 +85,8 @@ cdef class BwaAlnOptions:
self.log_scaled_gap_penalty = 1 if log_scaled_gap_penalty else 0
if with_md is not None:
self.with_md = with_md
if threads is not None:
self.threads = threads

def __cinit__(self):
self._delegate = gap_init_opt()
Expand Down Expand Up @@ -200,6 +204,14 @@ cdef class BwaAlnOptions:
def __set__(self, value: bool):
self._with_md = value

property threads:
""":code:`bwa aln -t"""
def __get__(self) -> int:
return self._delegate.n_threads
def __set__(self, value: int):
self._delegate.n_threads = value


cdef class BwaAln:
"""The class to align reads with :code:`bwa aln`."""

Expand Down Expand Up @@ -397,7 +409,7 @@ cdef class BwaAln:
seqs[i].tid = -1

# this is `bwa aln`, and the rest is `bwa samse`
bwa_cal_sa_reg_gap(0, self._index.bwt(), num_seqs, seqs, gap_opt)
bwa_cal_sa_reg_gap_threaded(0, self._index.bwt(), num_seqs, seqs, gap_opt)

# create the full alignment
for i in range(num_seqs):
Expand Down
27 changes: 27 additions & 0 deletions pybwa/libbwaaln_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,31 @@ void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs,
}
p->n_multi = n_multi;
}
}

void bwa_cal_sa_reg_gap_threaded(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt)
{
#ifdef HAVE_PTHREAD
if (opt->n_threads <= 1) { // no multi-threading at all
bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
} else {
pthread_t *tid;
pthread_attr_t attr;
thread_aux_t *data;
int j;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
for (j = 0; j < opt->n_threads; ++j) {
data[j].tid = j; data[j].bwt = bwt;
data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;
pthread_create(&tid[j], &attr, worker, data + j);
}
for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
free(data); free(tid);
}
#else
bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
#endif
}
5 changes: 4 additions & 1 deletion pybwa/libbwaaln_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,8 @@
#include "kstring.h"


// Version of "bwa_cal_pac_pos" bwase.c that can use an already loaded forward suffix array (BWT)
// Version of "bwa_cal_pac_pos" bwase.c that can use an already loaded forward suffix array (BWT).
void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr, bwt_t *bwt);

// Port of running "bwa_cal_sa_reg_gap" in the "bwa_aln_core" method in bwtaln.c so we can support multi-threading.
void bwa_cal_sa_reg_gap_threaded(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt);
23 changes: 23 additions & 0 deletions tests/test_bwapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,29 @@ def test_bwaaln(ref_fasta: Path, fastx_record: FastxRecord) -> None:
assert rec.cigarstring == "80M"


def test_bwaaln_threading(ref_fasta: Path, fastx_record: FastxRecord) -> None:
opt = BwaAlnOptions(threads=2)
bwa = BwaAln(prefix=ref_fasta)
revcomp_seq = None if not fastx_record.sequence else reverse_complement(fastx_record.sequence)
revcomp_record = FastxRecord(name="revcomp", sequence=revcomp_seq)

queries = [fastx_record if i % 2 == 0 else revcomp_record for i in range(100)]
recs = bwa.align(opt=opt, queries=queries)
assert len(recs) == len(queries)
for i, rec in enumerate(recs):
if i % 2 == 0:
assert rec.query_name == "test"
assert rec.is_forward
else:
assert rec.query_name == "revcomp"
assert rec.is_reverse
assert not rec.is_paired
assert not rec.is_read1
assert not rec.is_read2
assert rec.reference_start == 80
assert rec.cigarstring == "80M"


def test_bwamem_options() -> None:
# default options
options = BwaMemOptions()
Expand Down

0 comments on commit fa10c6e

Please sign in to comment.