Skip to content

Commit

Permalink
feat: add threading support to bwa mem
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 committed Jan 18, 2025
1 parent 7d02ddb commit 20be05c
Show file tree
Hide file tree
Showing 10 changed files with 376 additions and 56 deletions.
4 changes: 2 additions & 2 deletions patches/0001-feat-add-HN-tag-to-bwa-aln.patch
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 7d36ae830fc7391cfa69ffcdf239755f01317c1c Mon Sep 17 00:00:00 2001
From 0cae163e3364e463324c98bc2d8f88495033dc31 Mon Sep 17 00:00:00 2001
From: Nils Homer <[email protected]>
Date: Thu, 16 Jan 2025 22:35:13 -0800
Subject: [PATCH 1/2] feat: add HN tag to bwa aln
Subject: [PATCH 1/3] feat: add HN tag to bwa aln

---
bwase.c | 17 +++++++++--------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 028b2a7f2129a6eb8038079c539a54526bb08422 Mon Sep 17 00:00:00 2001
From 9a9e459d581fc9cae5c4a73fb65ad191425630ec Mon Sep 17 00:00:00 2001
From: Nils Homer <[email protected]>
Date: Fri, 17 Jan 2025 00:29:08 -0800
Subject: [PATCH 2/2] feat: add an option to output MD in the XA tag
Subject: [PATCH 2/3] feat: add an option to output MD in the XA tag

---
bwape.c | 12 +++++++-----
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
From d1aec9f3dc7e8a28f1154eb14d94f857215a299e Mon Sep 17 00:00:00 2001
From: Nils Homer <[email protected]>
Date: Fri, 17 Jan 2025 21:33:12 -0700
Subject: [PATCH 3/3] feat: expose various internal bwamem methods and structs

---
bwamem.c | 23 +++--------------------
bwamem.h | 27 +++++++++++++++++++++++++++
bwamem_extra.c | 2 +-
3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/bwamem.c b/bwamem.c
index 03e2a05..5ca5a8d 100644
--- a/bwamem.c
+++ b/bwamem.c
@@ -116,11 +116,7 @@ mem_opt_t *mem_opt_init()
#define intv_lt(a, b) ((a).info < (b).info)
KSORT_INIT(mem_intv, bwtintv_t, intv_lt)

-typedef struct {
- bwtintv_v mem, mem1, *tmpv[2];
-} smem_aux_t;
-
-static smem_aux_t *smem_aux_init()
+smem_aux_t *smem_aux_init()
{
smem_aux_t *a;
a = calloc(1, sizeof(smem_aux_t));
@@ -129,7 +125,7 @@ static smem_aux_t *smem_aux_init()
return a;
}

-static void smem_aux_destroy(smem_aux_t *a)
+void smem_aux_destroy(smem_aux_t *a)
{
free(a->tmpv[0]->a); free(a->tmpv[0]);
free(a->tmpv[1]->a); free(a->tmpv[1]);
@@ -514,7 +510,6 @@ int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_
return m;
}

-typedef kvec_t(int) int_v;

static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t *a, int_v *z)
{ // similar to the loop in mem_chain_flt()
@@ -1188,19 +1183,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
return a;
}

-typedef struct {
- const mem_opt_t *opt;
- const bwt_t *bwt;
- const bntseq_t *bns;
- const uint8_t *pac;
- const mem_pestat_t *pes;
- smem_aux_t **aux;
- bseq1_t *seqs;
- mem_alnreg_v *regs;
- int64_t n_processed;
-} worker_t;
-
-static void worker1(void *data, int i, int tid)
+void worker1(void *data, int i, int tid)
{
worker_t *w = (worker_t*)data;
if (!(w->opt->flag&MEM_F_PE)) {
diff --git a/bwamem.h b/bwamem.h
index 0a0e3bb..52a9c53 100644
--- a/bwamem.h
+++ b/bwamem.h
@@ -30,6 +30,7 @@
#include "bwt.h"
#include "bntseq.h"
#include "bwa.h"
+#include "kvec.h"

#define MEM_MAPQ_COEF 30.0
#define MEM_MAPQ_MAX 60
@@ -123,6 +124,24 @@ typedef struct { // This struct is only used for the convenience of API.
int score, sub, alt_sc;
} mem_aln_t;

+typedef struct {
+ bwtintv_v mem, mem1, *tmpv[2];
+} smem_aux_t;
+
+typedef struct {
+ const mem_opt_t *opt;
+ const bwt_t *bwt;
+ const bntseq_t *bns;
+ const uint8_t *pac;
+ const mem_pestat_t *pes;
+ smem_aux_t **aux;
+ bseq1_t *seqs;
+ mem_alnreg_v *regs;
+ int64_t n_processed;
+} worker_t;
+
+typedef kvec_t(int) int_v;
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -136,6 +155,14 @@ extern "C" {
mem_opt_t *mem_opt_init(void);
void mem_fill_scmat(int a, int b, int8_t mat[25]);

+ int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
+ void mem_reorder_primary5(int T, mem_alnreg_v *a);
+
+ smem_aux_t *smem_aux_init();
+ void smem_aux_destroy(smem_aux_t *a);
+
+ void worker1(void *data, int i, int tid);
+
/**
* Align a batch of sequences and generate the alignments in the SAM format
*
diff --git a/bwamem_extra.c b/bwamem_extra.c
index c47b93f..f50cec7 100644
--- a/bwamem_extra.c
+++ b/bwamem_extra.c
@@ -102,7 +102,7 @@ const bwtintv_v *smem_next(smem_i *itr)
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
extern mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf);
- extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
+ extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
mem_alnreg_v ar;
char *seq;
seq = malloc(l_seq);
--
2.39.5 (Apple Git-154)

3 changes: 3 additions & 0 deletions pybwa/libbwaaln_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include "bwase.h"
#include "libbwaaln_utils.h"

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif

void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr, bwt_t *bwt)
{
Expand Down
17 changes: 13 additions & 4 deletions pybwa/libbwamem.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,26 @@
from libc.stdint cimport uint8_t, int64_t, int32_t, uint64_t, int8_t, uint32_t
from libc.stdio cimport FILE

cdef extern from "bwa.h":
ctypedef struct bseq1_t:
int l_seq, id
char *name, *comment, *seq, *qual, *sam

cdef extern from "libbwamem_utils.h":
ctypedef struct mem_alns_t:
size_t n, m
mem_aln_t*a
mem_alns_t * mem_process_seqs_alt(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns,
const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs,
const mem_pestat_t *pes0)

cdef extern from "limits.h":
cdef int INT_MAX

cdef extern from "bwt.h":
ctypedef struct bwt_t:
int sa_intv


cdef extern from "bntseq.h":
ctypedef struct bntann1_t:
int64_t offset
Expand All @@ -29,7 +41,6 @@ cdef extern from "kstring.h":
size_t l, m
char *s


cdef extern from "bwamem.h":
int MEM_F_PE
int MEM_F_NOPAIRING
Expand Down Expand Up @@ -112,5 +123,3 @@ cdef extern void add_cigar(const mem_opt_t *opt, mem_aln_t *p, kstring_t *str, i

# from bwamem_extra.c
cdef extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);


5 changes: 5 additions & 0 deletions pybwa/libbwamem.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class BwaMemOptions:
gap_open_penalty: int | tuple[int, int] | None = None,
gap_extension_penalty: int | tuple[int, int] | None = None,
clipping_penalty: int | tuple[int, int] | None = None,
threads: int | None = None,
) -> None: ...
_finalized: bool
_ignore_alt: bool
Expand Down Expand Up @@ -180,6 +181,10 @@ class BwaMemOptions:
def clipping_penalty(self) -> int | tuple[int, int]: ...
@clipping_penalty.setter
def clipping_penalty(self, value: int | tuple[int, int]) -> None: ...
@property
def threads(self) -> int: ...
@threads.setter
def threads(self, value: int) -> None: ...
def finalize(self, copy: bool = False) -> BwaMemOptions: ...

class BwaMem:
Expand Down
Loading

0 comments on commit 20be05c

Please sign in to comment.