-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: initial implementation of python bindings for
bwa aln
- Loading branch information
Showing
28 changed files
with
60,774 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
* @nh13 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
name: CI | ||
|
||
on: push | ||
env: | ||
POETRY_VERSION: 1.8 | ||
|
||
jobs: | ||
testing: | ||
runs-on: ubuntu-24.04 | ||
strategy: | ||
matrix: | ||
PYTHON_VERSION: ["3.9", "3.10", "3.11", "3.12"] | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: actions/checkout@v4 | ||
with: | ||
repository: "lh3/bwa" | ||
path: "bwa" | ||
- name: Set up Python ${{matrix.PYTHON_VERSION}} | ||
uses: actions/setup-python@v1 | ||
with: | ||
python-version: ${{matrix.PYTHON_VERSION}} | ||
|
||
- name: Get full Python version | ||
id: full-python-version | ||
shell: bash | ||
run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") | ||
|
||
- name: Install poetry | ||
shell: bash | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install poetry==${{env.POETRY_VERSION}} | ||
- name: Configure poetry | ||
shell: bash | ||
run: poetry config virtualenvs.in-project true | ||
|
||
- name: Set up cache | ||
uses: actions/cache@v2 | ||
id: cache | ||
with: | ||
path: .venv | ||
key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }} | ||
|
||
- name: Ensure cache is healthy | ||
if: steps.cache.outputs.cache-hit == 'true' | ||
shell: bash | ||
run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv | ||
|
||
- name: Check that the lock file is up to date | ||
shell: bash | ||
run: | | ||
poetry lock --check | ||
- name: Install deps | ||
shell: bash | ||
run: | | ||
poetry install | ||
- name: Style checking | ||
shell: bash | ||
run: | | ||
poetry run ruff format --check bwapy tests | ||
- name: Run lint | ||
shell: bash | ||
run: | | ||
poetry run ruff check bwapy tests | ||
- name: Run mypy | ||
shell: bash | ||
run: | | ||
poetry run mypy bwapy tests --config=pyproject.toml | ||
- name: Run pytest | ||
shell: bash | ||
run: | | ||
poetry run python -m pytest --cov=bwapy --cov-report=xml --cov-branch | ||
- name: Run docs | ||
shell: bash | ||
run: | | ||
set -euo pipefail | ||
poetry run mkdocs build --strict | ||
- name: Upload code coverage | ||
uses: codecov/[email protected] | ||
with: | ||
token: ${{ secrets.CODECOV_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
name: readthedocs/actions | ||
on: | ||
pull_request_target: | ||
types: | ||
- opened | ||
# Execute this action only on PRs that touch | ||
# documentation files. | ||
paths: | ||
- "docs/**" | ||
|
||
permissions: | ||
pull-requests: write | ||
|
||
jobs: | ||
documentation-links: | ||
runs-on: ubuntu-24.04 | ||
steps: | ||
- uses: readthedocs/actions/preview@v1 | ||
with: | ||
project-slug: "bwapy" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
version: 2 | ||
build: | ||
os: ubuntu-22.04 | ||
tools: | ||
python: "3.11" | ||
jobs: | ||
post_install: | ||
- pip install poetry==1.8.3 | ||
- poetry config virtualenvs.create false | ||
- VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install | ||
mkdocs: | ||
configuration: mkdocs.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
from setuptools import Extension, Distribution | ||
from typing import List | ||
|
||
from Cython.Build import cythonize | ||
from Cython.Distutils.build_ext import new_build_ext as cython_build_ext | ||
import multiprocessing | ||
from pathlib import Path | ||
|
||
SOURCE_DIR = Path("bwapy") | ||
BUILD_DIR = Path("cython_build") | ||
compile_args = [] | ||
link_args = [] | ||
include_dirs = ["bwa"] | ||
libraries = ['m', 'z', 'pthread'] | ||
library_dirs=['bwa'] | ||
extra_objects = [] #glob.glob(os.path.join('bwa', '*.o')) | ||
h_files = [] | ||
c_files = [] | ||
for root_dir in ["bwa", "bwapy"]: | ||
h_files.extend(str(x) for x in Path(root_dir).rglob("*.h")) | ||
c_files.extend(str(x) for x in Path(root_dir).rglob("*.c") if x.name not in ['example.c', 'main.c']) | ||
|
||
extension_module = Extension( | ||
name='bwapy.libbwapy', | ||
sources=['bwapy/libbwapy.pyx'] + c_files, | ||
depends=h_files, | ||
extra_compile_args=compile_args, | ||
extra_link_args=link_args, | ||
extra_objects=extra_objects, | ||
include_dirs=include_dirs, | ||
language='c', | ||
libraries=libraries, | ||
library_dirs=library_dirs, | ||
) | ||
|
||
|
||
def cythonize_helper(extension_modules: List[Extension]) -> List[Extension]: | ||
"""Cythonize all Python extensions""" | ||
|
||
return cythonize( | ||
module_list=extension_modules, | ||
|
||
# Don't build in source tree (this leaves behind .c files) | ||
build_dir=BUILD_DIR, | ||
|
||
# Don't generate an .html output file. Would contain source. | ||
annotate=False, | ||
|
||
# Parallelize our build | ||
nthreads=multiprocessing.cpu_count() * 2, | ||
|
||
# Tell Cython we're using Python 3. Becomes default in Cython 3 | ||
compiler_directives={"language_level": "3"}, | ||
|
||
# (Optional) Always rebuild, even if files untouched | ||
force=True, | ||
) | ||
|
||
CLASSIFIERS = ''' | ||
Development Status :: 4 - Beta | ||
Intended Audience :: Science/Research | ||
Intended Audience :: Developers | ||
License :: OSI Approved | ||
Programming Language :: Python | ||
Topic :: Software Development | ||
Topic :: Scientific/Engineering | ||
Operating System :: POSIX | ||
Operating System :: Unix | ||
Operating System :: MacOS | ||
''' | ||
|
||
|
||
def build(): | ||
# Collect and cythonize all files | ||
extension_modules = cythonize_helper([extension_module]) | ||
|
||
# Use Setuptools to collect files | ||
distribution = Distribution({ | ||
"name": "bwapy", | ||
'version': '0.0.1', # FIXME | ||
'description': 'Todo', # FIXME | ||
'long_description': 'FIXME', | ||
'long_description_content_type': 'text/x-rst', | ||
'author': 'Nils Homer', | ||
'author_email': '[email protected]', | ||
'license': 'MIT', | ||
'platforms': ['POSIX', 'UNIX', 'MacOS'], | ||
'classifiers': [_f for _f in CLASSIFIERS.split('\n') if _f], | ||
'url': 'https://github.com/fulcrumgenomics/bwapy', | ||
'packages': ['bwapy'], | ||
'package_dirs': {'bwapy': 'bwapy'}, | ||
"ext_modules": extension_modules, | ||
"cmdclass": { | ||
"build_ext": cython_build_ext, | ||
}, | ||
}) | ||
|
||
# Grab the build_ext command and copy all files back to source dir. | ||
# Done so Poetry grabs the files during the next step in its build. | ||
build_ext_cmd = distribution.get_command_obj("build_ext") | ||
build_ext_cmd.ensure_finalized() | ||
# Set the value to 1 for "inplace", with the goal to build extensions | ||
# in build directory, and then copy all files back to the source dir | ||
# (under the hood, "copy_extensions_to_source" will be called after | ||
# building the extensions). This is done so Poetry grabs the files | ||
# during the next step in its build. | ||
build_ext_cmd.inplace = 1 | ||
build_ext_cmd.run() | ||
|
||
|
||
if __name__ == "__main__": | ||
build() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: bwapy | ||
channels: | ||
- defaults | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- python=3.11 | ||
- cython=3.0.11 | ||
- pysam=0.22.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import bwapy.libbwapy as libbwapy | ||
from bwapy.libbwapy import * # noqa: F403 | ||
|
||
__all__ = libbwapy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# cython: language_level=3 | ||
|
||
from libc.stdint cimport uint8_t, uint64_t, uint16_t, uint32_t, int64_t, int32_t | ||
from libc.stdio cimport FILE | ||
|
||
cdef extern from "libbwapy_utils.h": | ||
void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, int max_mm, | ||
float fnr, bwt_t *bwt) | ||
|
||
cdef extern from "utils.h": | ||
int err_fseek(FILE *stream, long offset, int whence) | ||
size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream) | ||
|
||
cdef extern from "bntseq.h": | ||
unsigned char nst_nt4_table[256] | ||
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id) | ||
|
||
cdef extern from "bwa.h": | ||
char * bwa_idx_infer_prefix(const char * hint) | ||
|
||
cdef extern from "bwt.h": | ||
ctypedef struct bwt_t: | ||
int sa_intv | ||
|
||
bwt_t *bwt_restore_bwt(const char *fn) | ||
void bwt_restore_sa(const char *fn, bwt_t *bwt); | ||
void bwt_destroy(bwt_t *bwt) | ||
|
||
cdef extern from "bwtaln.h": | ||
int BWA_TYPE_NO_MATCH | ||
int BWA_MODE_LOGGAP | ||
int BWA_MODE_GAPE | ||
|
||
int __cigar_op(uint16_t __cigar) | ||
int __cigar_len(uint16_t __cigar) | ||
|
||
ctypedef struct gap_opt_t: | ||
int trim_qual | ||
int s_mm | ||
int s_gapo | ||
int s_gape | ||
int mode # bit 24-31 are the barcode length | ||
int indel_end_skip | ||
int max_del_occ | ||
int max_entries | ||
float fnr | ||
int max_diff | ||
int max_gapo | ||
int max_gape | ||
int max_seed_diff | ||
int seed_len | ||
int n_threads | ||
int max_top2 | ||
int trim_qual | ||
int sam | ||
char *rg_line | ||
int n_occ | ||
int interactive_mode | ||
int with_md | ||
|
||
gap_opt_t *gap_init_opt() | ||
void gap_print_opt(const gap_opt_t *opt) | ||
|
||
void seq_reverse(int len, unsigned char *seq, int is_comp) | ||
|
||
ctypedef struct bwt_aln1_t: | ||
pass | ||
|
||
cdef extern from "bntseq.h": | ||
ctypedef struct bntann1_t: | ||
int64_t offset | ||
int32_t len | ||
char *name | ||
|
||
ctypedef struct bntseq_t: | ||
int64_t l_pac | ||
bntann1_t *anns | ||
FILE * fp_pac | ||
|
||
bntseq_t * bns_restore(const char * prefix) | ||
void bns_destroy(bntseq_t *bns) | ||
|
||
cdef extern from "kseq.h": | ||
ctypedef struct kstring_t: | ||
char *s | ||
|
||
cdef extern from "bwase.h": | ||
void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi) | ||
int64_t pos_end(const bwa_seq_t *p) | ||
void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, unsigned char *_pacseq) | ||
char *bwa_cal_md1(int n_cigar, uint16_t *cigar, int len, uint64_t pos, unsigned char *seq, uint64_t l_pac, unsigned char *pacseq, kstring_t *str, int *_nm) | ||
void bwase_initialize() | ||
|
||
cdef extern from "bwtaln.h": | ||
ctypedef struct bwa_seq_t: | ||
|
||
char *name | ||
uint8_t *seq | ||
uint8_t *rseq | ||
uint8_t *qual | ||
uint32_t len | ||
uint32_t strand | ||
uint32_t type | ||
int mapQ | ||
int clip_len | ||
bwt_aln1_t *aln | ||
int n_aln | ||
uint16_t pos | ||
uint16_t *cigar | ||
int n_cigar | ||
int tid | ||
uint32_t full_len | ||
uint32_t nm | ||
char *md | ||
|
||
|
||
void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs) | ||
|
||
void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt) |
Oops, something went wrong.