Skip to content

Commit

Permalink
feat: add threading support to bwa mem (#28)
Browse files Browse the repository at this point in the history
* feat: add threading support to bwa mem

fix: define macros to enable threading (especially in bwa aln)
  • Loading branch information
nh13 authored Jan 18, 2025
1 parent fa10c6e commit d641015
Show file tree
Hide file tree
Showing 12 changed files with 412 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
- name: Run pytest
shell: bash
run: |
poetry run python -m pytest --cov=pybwa --cov-report=xml --cov-branch
poetry run python -m pytest --cov=pybwa --cov-report=xml --cov-branch
- name: Run docs
shell: bash
Expand Down
5 changes: 5 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def with_patches():
libraries.append("rt")
library_dirs=['pybwa', 'bwa']
extra_objects = []
define_macros = [("HAVE_PTHREAD", None), ("USE_MALLOC_WRAPPERS", None)]
h_files = []
c_files = []
for root_dir in library_dirs:
Expand All @@ -58,6 +59,7 @@ def with_patches():

if platform.system() != 'Windows':
compile_args = [
"-Wno-unused-result",
"-Wno-unreachable-code",
"-Wno-single-bit-bitfield-constant-conversion",
"-Wno-deprecated-declarations",
Expand All @@ -77,6 +79,7 @@ def with_patches():
language='c',
libraries=libraries,
library_dirs=library_dirs,
define_macros=define_macros
)

libbwaaln_module = Extension(
Expand All @@ -90,6 +93,7 @@ def with_patches():
language='c',
libraries=libraries,
library_dirs=library_dirs,
define_macros=define_macros
)

libbwamem_module = Extension(
Expand All @@ -103,6 +107,7 @@ def with_patches():
language='c',
libraries=libraries,
library_dirs=library_dirs,
define_macros=define_macros
)


Expand Down
4 changes: 2 additions & 2 deletions patches/0001-feat-add-HN-tag-to-bwa-aln.patch
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 7d36ae830fc7391cfa69ffcdf239755f01317c1c Mon Sep 17 00:00:00 2001
From 0cae163e3364e463324c98bc2d8f88495033dc31 Mon Sep 17 00:00:00 2001
From: Nils Homer <[email protected]>
Date: Thu, 16 Jan 2025 22:35:13 -0800
Subject: [PATCH 1/2] feat: add HN tag to bwa aln
Subject: [PATCH 1/3] feat: add HN tag to bwa aln

---
bwase.c | 17 +++++++++--------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 028b2a7f2129a6eb8038079c539a54526bb08422 Mon Sep 17 00:00:00 2001
From 9a9e459d581fc9cae5c4a73fb65ad191425630ec Mon Sep 17 00:00:00 2001
From: Nils Homer <[email protected]>
Date: Fri, 17 Jan 2025 00:29:08 -0800
Subject: [PATCH 2/2] feat: add an option to output MD in the XA tag
Subject: [PATCH 2/3] feat: add an option to output MD in the XA tag

---
bwape.c | 12 +++++++-----
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
From d1aec9f3dc7e8a28f1154eb14d94f857215a299e Mon Sep 17 00:00:00 2001
From: Nils Homer <[email protected]>
Date: Fri, 17 Jan 2025 21:33:12 -0700
Subject: [PATCH 3/3] feat: expose various internal bwamem methods and structs

---
bwamem.c | 23 +++--------------------
bwamem.h | 27 +++++++++++++++++++++++++++
bwamem_extra.c | 2 +-
3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/bwamem.c b/bwamem.c
index 03e2a05..5ca5a8d 100644
--- a/bwamem.c
+++ b/bwamem.c
@@ -116,11 +116,7 @@ mem_opt_t *mem_opt_init()
#define intv_lt(a, b) ((a).info < (b).info)
KSORT_INIT(mem_intv, bwtintv_t, intv_lt)

-typedef struct {
- bwtintv_v mem, mem1, *tmpv[2];
-} smem_aux_t;
-
-static smem_aux_t *smem_aux_init()
+smem_aux_t *smem_aux_init()
{
smem_aux_t *a;
a = calloc(1, sizeof(smem_aux_t));
@@ -129,7 +125,7 @@ static smem_aux_t *smem_aux_init()
return a;
}

-static void smem_aux_destroy(smem_aux_t *a)
+void smem_aux_destroy(smem_aux_t *a)
{
free(a->tmpv[0]->a); free(a->tmpv[0]);
free(a->tmpv[1]->a); free(a->tmpv[1]);
@@ -514,7 +510,6 @@ int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_
return m;
}

-typedef kvec_t(int) int_v;

static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t *a, int_v *z)
{ // similar to the loop in mem_chain_flt()
@@ -1188,19 +1183,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
return a;
}

-typedef struct {
- const mem_opt_t *opt;
- const bwt_t *bwt;
- const bntseq_t *bns;
- const uint8_t *pac;
- const mem_pestat_t *pes;
- smem_aux_t **aux;
- bseq1_t *seqs;
- mem_alnreg_v *regs;
- int64_t n_processed;
-} worker_t;
-
-static void worker1(void *data, int i, int tid)
+void worker1(void *data, int i, int tid)
{
worker_t *w = (worker_t*)data;
if (!(w->opt->flag&MEM_F_PE)) {
diff --git a/bwamem.h b/bwamem.h
index 0a0e3bb..52a9c53 100644
--- a/bwamem.h
+++ b/bwamem.h
@@ -30,6 +30,7 @@
#include "bwt.h"
#include "bntseq.h"
#include "bwa.h"
+#include "kvec.h"

#define MEM_MAPQ_COEF 30.0
#define MEM_MAPQ_MAX 60
@@ -123,6 +124,24 @@ typedef struct { // This struct is only used for the convenience of API.
int score, sub, alt_sc;
} mem_aln_t;

+typedef struct {
+ bwtintv_v mem, mem1, *tmpv[2];
+} smem_aux_t;
+
+typedef struct {
+ const mem_opt_t *opt;
+ const bwt_t *bwt;
+ const bntseq_t *bns;
+ const uint8_t *pac;
+ const mem_pestat_t *pes;
+ smem_aux_t **aux;
+ bseq1_t *seqs;
+ mem_alnreg_v *regs;
+ int64_t n_processed;
+} worker_t;
+
+typedef kvec_t(int) int_v;
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -136,6 +155,14 @@ extern "C" {
mem_opt_t *mem_opt_init(void);
void mem_fill_scmat(int a, int b, int8_t mat[25]);

+ int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
+ void mem_reorder_primary5(int T, mem_alnreg_v *a);
+
+ smem_aux_t *smem_aux_init();
+ void smem_aux_destroy(smem_aux_t *a);
+
+ void worker1(void *data, int i, int tid);
+
/**
* Align a batch of sequences and generate the alignments in the SAM format
*
diff --git a/bwamem_extra.c b/bwamem_extra.c
index c47b93f..f50cec7 100644
--- a/bwamem_extra.c
+++ b/bwamem_extra.c
@@ -102,7 +102,7 @@ const bwtintv_v *smem_next(smem_i *itr)
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
extern mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf);
- extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
+ extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
mem_alnreg_v ar;
char *seq;
seq = malloc(l_seq);
--
2.39.5 (Apple Git-154)

29 changes: 28 additions & 1 deletion pybwa/libbwaaln_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
#include "bwase.h"
#include "libbwaaln_utils.h"

#ifdef HAVE_PTHREAD
#include <pthread.h>
#endif

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif

void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr, bwt_t *bwt)
{
Expand All @@ -24,6 +31,24 @@ void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs,
}
}

// Copy as-is from bwtaln.c
#ifdef HAVE_PTHREAD
typedef struct {
int tid;
bwt_t *bwt;
int n_seqs;
bwa_seq_t *seqs;
const gap_opt_t *opt;
} thread_aux_t;

static void *worker(void *data)
{
thread_aux_t *d = (thread_aux_t*)data;
bwa_cal_sa_reg_gap(d->tid, d->bwt, d->n_seqs, d->seqs, d->opt);
return 0;
}
#endif

void bwa_cal_sa_reg_gap_threaded(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt)
{
#ifdef HAVE_PTHREAD
Expand All @@ -43,7 +68,9 @@ void bwa_cal_sa_reg_gap_threaded(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_
data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;
pthread_create(&tid[j], &attr, worker, data + j);
}
for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
for (j = 0; j < opt->n_threads; ++j) {
pthread_join(tid[j], 0);
}
free(data); free(tid);
}
#else
Expand Down
17 changes: 13 additions & 4 deletions pybwa/libbwamem.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,26 @@
from libc.stdint cimport uint8_t, int64_t, int32_t, uint64_t, int8_t, uint32_t
from libc.stdio cimport FILE

cdef extern from "bwa.h":
ctypedef struct bseq1_t:
int l_seq, id
char *name, *comment, *seq, *qual, *sam

cdef extern from "libbwamem_utils.h":
ctypedef struct mem_alns_t:
size_t n, m
mem_aln_t*a
mem_alns_t * mem_process_seqs_alt(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns,
const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs,
const mem_pestat_t *pes0)

cdef extern from "limits.h":
cdef int INT_MAX

cdef extern from "bwt.h":
ctypedef struct bwt_t:
int sa_intv


cdef extern from "bntseq.h":
ctypedef struct bntann1_t:
int64_t offset
Expand All @@ -29,7 +41,6 @@ cdef extern from "kstring.h":
size_t l, m
char *s


cdef extern from "bwamem.h":
int MEM_F_PE
int MEM_F_NOPAIRING
Expand Down Expand Up @@ -112,5 +123,3 @@ cdef extern void add_cigar(const mem_opt_t *opt, mem_aln_t *p, kstring_t *str, i

# from bwamem_extra.c
cdef extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);


5 changes: 5 additions & 0 deletions pybwa/libbwamem.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class BwaMemOptions:
gap_open_penalty: int | tuple[int, int] | None = None,
gap_extension_penalty: int | tuple[int, int] | None = None,
clipping_penalty: int | tuple[int, int] | None = None,
threads: int | None = None,
) -> None: ...
_finalized: bool
_ignore_alt: bool
Expand Down Expand Up @@ -180,6 +181,10 @@ class BwaMemOptions:
def clipping_penalty(self) -> int | tuple[int, int]: ...
@clipping_penalty.setter
def clipping_penalty(self, value: int | tuple[int, int]) -> None: ...
@property
def threads(self) -> int: ...
@threads.setter
def threads(self, value: int) -> None: ...
def finalize(self, copy: bool = False) -> BwaMemOptions: ...

class BwaMem:
Expand Down
Loading

0 comments on commit d641015

Please sign in to comment.