diff --git a/Cargo.lock b/Cargo.lock index 673a7d5..1ce88ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -186,7 +186,7 @@ dependencies = [ [[package]] name = "maptide" -version = "0.1.0" +version = "0.2.0" dependencies = [ "noodles", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index 3b31ee5..04cecfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "maptide" -version = "0.1.0" +version = "0.2.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -10,4 +10,4 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.17.3", features = ["extension-module", "abi3-py37"] } -noodles = { version = "0.31.1", features = ["core", "bam", "sam", "bgzf"]} +noodles = { version = "0.31.1", features = ["core", "bam", "sam", "bgzf"] } diff --git a/README.md b/README.md index fb28c86..4aad681 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ``` $ pip install maptide ``` -Depending on your operating system, the Rust compiler may have to be installed. +Depending on your operating system, the Rust compiler may need to be installed. Installation instructions for the Rust compiler can be found here: https://www.rust-lang.org/tools/install @@ -25,39 +25,41 @@ $ pip install . ## Usage ``` $ maptide -h -usage: maptide [-h] [--region REGION] [--index INDEX] [--mapq MAPQ] [--baseq BASEQ] [--noindex] [--stats] [--decimals DECIMALS] bam +usage: maptide [-h] [-v] [-r REGION] [-i INDEX] [-m MAPPING_QUALITY] [-b BASE_QUALITY] [-s] [-d DECIMALS] bam positional arguments: - bam Path to BAM file + bam Path to BAM file -optional arguments: - -h, --help show this help message and exit - --region REGION Region to view, specified in the form CHROM:START-END (default: everything) - --index INDEX Path to index (BAI) file (default: .bai) - --mapq MAPQ Minimum mapping quality (default: 0) - --baseq BASEQ Minimum base quality (default: 0) - --noindex Do not use an index file when querying the BAM file (default: False) - --stats Output additional per-position statistics (default: False) - --decimals DECIMALS Number of decimal places to display (default: 3) +options: + -h, --help show this help message and exit + -v, --version show program's version number and exit + -r REGION, --region REGION + Region to view, specified in the form CHROM:START-END (default: everything) + -i INDEX, --index INDEX + Path to index (BAI) file (default: .bai) + -m MAPPING_QUALITY, --mapping-quality MAPPING_QUALITY + Minimum mapping quality (default: 0) + -b BASE_QUALITY, --base-quality BASE_QUALITY + Minimum base quality (default: 0) + -s, --stats Output additional per-position statistics (default: False) + -d DECIMALS, --decimals DECIMALS + Number of decimal places to display (default: 3) ``` -#### Frequencies over all positions in the reference: +#### Frequencies over all positions: ``` $ maptide /path/to/file.bam ``` -#### Frequencies over a specific region (with an index file): -If the index file has the same path as the BAM file, but with `.bai` appended on the end: +#### Frequencies over a region: ``` $ maptide /path/to/file.bam --region chrom:start-end ``` +If a region is specified, `maptide` will check for an index file with the same path as the BAM file, but with `.bai` appended on the end (i.e. `/path/to/file.bam.bai`). -Otherwise, the path needs to be specified: -``` -$ maptide /path/to/file.bam --region chrom:start-end --index /path/to/index.bai -``` +If an index file does not exist in this location, `maptide` will still run anyway, just without an index file. -#### Frequencies over a specific region (without an index file): +Index files that do not follow the naming convention `/path/to/file.bam.bai` can still be used, but a path to the file needs to be provided: ``` -$ maptide /path/to/file.bam chrom:start-end --region chrom:start-end --noindex +$ maptide /path/to/file.bam --region chrom:start-end --index /path/to/index.bai ``` diff --git a/python/maptide/__init__.py b/python/maptide/__init__.py index e36c981..bffb5ec 100644 --- a/python/maptide/__init__.py +++ b/python/maptide/__init__.py @@ -5,4 +5,3 @@ # __all__ = maptide.__all__ from .api import query, parse_region -del api \ No newline at end of file diff --git a/python/maptide/api.py b/python/maptide/api.py index bf16f63..b1a2153 100644 --- a/python/maptide/api.py +++ b/python/maptide/api.py @@ -1,42 +1,55 @@ -from .maptide import all_, query_, parse_region_ -from typing import Optional +import os +from typing import Dict, Tuple, List +from . import maptide #  type: ignore def query( bam: str, - region: Optional[str] = None, - bai: Optional[str] = None, + region: str | None = None, + bai: str | None = None, mapping_quality: int = 0, base_quality: int = 0, - indexed=True, -): - """Obtain base frequencies from the provided BAM file. - - Required arguments: - `bam`: Path to the BAM file. - Optional arguments: - `region` - `bai` - `mapping_quality` - `base_quality` - `indexed` - Returns: - A `dict` mapping tuples of the form `(int, int)` to base frequencies. - """ +) -> Dict[str, Dict[Tuple[int, int], List[int]]]: + """Performs a pileup over a region, obtaining per-position base frequencies for the provided BAM file. + + Parameters + ---------- + bam : str + Path to the BAM file. + region : str, optional + Region to query, in the form `CHROM:START-END` (default: all positions) + bai : str, optional + Path to index file (default: same path as the BAM file, but with .bai appended) + mapping_quality : int, optional + Minimum mapping quality for a read to be included in the pileup (default: 0) + base_quality : int, optional + Minimum base quality for a base within a read to be included in the pileup (default: 0) - if not indexed and bai: - raise Exception("Cannot set indexed=False while also providing a BAI file") + Returns + ------- + dict + Mapping: reference -> (reference position, insert position) -> [base frequencies]. + """ if region: - if indexed: - if not bai: - bai = bam + ".bai" - return query_(bam, bai, region, mapping_quality, base_quality) - else: - return query_(bam, None, region, mapping_quality, base_quality) + if not bai and os.path.isfile(bam + ".bai"): + bai = bam + ".bai" + return maptide.query(bam, bai, region, mapping_quality, base_quality) else: - return all_(bam, mapping_quality, base_quality) + return maptide.all(bam, mapping_quality, base_quality) -def parse_region(region: str): - return parse_region_(region) +def parse_region(region: str) -> Tuple[str, int, int]: + """Parses a region of the form `CHROM:START-END`, returning the tuple `(CHROM, START, END)`. + + Parameters + ---------- + region : str + Region to parse. + + Returns + ------- + tuple + Parsed region tuple. + """ + return maptide.parse_region(region) diff --git a/python/maptide/cli.py b/python/maptide/cli.py index 3c1f9a9..f9b1edd 100644 --- a/python/maptide/cli.py +++ b/python/maptide/cli.py @@ -1,8 +1,9 @@ -from .api import query, parse_region import argparse import math import sys import csv +import pkg_resources +from . import api def entropy(probabilities, normalised=False): @@ -36,7 +37,7 @@ def get_stats(counts, decimals=3): def iterate(data, region=None, stats=False, decimals=3): if region: - chrom, start, end = parse_region(region) + chrom, start, end = api.parse_region(region) for (pos, ins_pos), row in sorted(data[chrom].items()): if (not start or pos >= start) and (not end or pos <= end): if stats: @@ -54,42 +55,46 @@ def iterate(data, region=None, stats=False, decimals=3): def run(): parser = argparse.ArgumentParser() - index_group = parser.add_mutually_exclusive_group() parser.add_argument("bam", help="Path to BAM file") parser.add_argument( + "-v", + "--version", + action="version", + version=pkg_resources.get_distribution("maptide").version, + ) + parser.add_argument( + "-r", "--region", help="Region to view, specified in the form CHROM:START-END (default: everything)", ) - index_group.add_argument( + parser.add_argument( + "-i", "--index", - default=None, help="Path to index (BAI) file (default: .bai)", ) parser.add_argument( - "--mapq", + "-m", + "--mapping-quality", type=int, default=0, help="Minimum mapping quality (default: %(default)s)", ) parser.add_argument( - "--baseq", + "-b", + "--base-quality", type=int, default=0, help="Minimum base quality (default: %(default)s)", ) - index_group.add_argument( - "--noindex", - action="store_true", - default=False, - help="Do not use an index file when querying the BAM file (default: %(default)s)", - ) parser.add_argument( + "-s", "--stats", action="store_true", default=False, help="Output additional per-position statistics (default: %(default)s)", ) parser.add_argument( + "-d", "--decimals", type=int, default=3, @@ -128,13 +133,12 @@ def run(): writer = csv.writer(sys.stdout, delimiter="\t") writer.writerow(columns) - data = query( + data = api.query( bam=args.bam, region=args.region, bai=args.index, - mapping_quality=args.mapq, - base_quality=args.baseq, - indexed=not args.noindex, + mapping_quality=args.mapping_quality, + base_quality=args.base_quality, ) for row in iterate( diff --git a/src/lib.rs b/src/lib.rs index d1ca41a..e824549 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -380,7 +380,7 @@ fn merge_into_base_map( } #[pyfunction] -fn all_(bam_path: String, mapping_quality: usize, base_quality: usize) -> PyResult { +fn all(bam_path: String, mapping_quality: usize, base_quality: usize) -> PyResult { // Create initial maps let (mut ref_arrs, mut ins_maps, mut ref_lengths) = init_maps(); @@ -451,7 +451,7 @@ fn all_(bam_path: String, mapping_quality: usize, base_quality: usize) -> PyResu } #[pyfunction] -fn query_( +fn query( bam_path: String, bai_path: Option, region: String, @@ -572,7 +572,7 @@ fn query_( } #[pyfunction] -fn parse_region_(region: String) -> PyResult<(String, Option, Option)> { +fn parse_region(region: String) -> PyResult<(String, Option, Option)> { let region: Region = region .parse() .map_err(|x: ParseError| PyException::new_err(x.to_string()))?; @@ -592,9 +592,9 @@ fn parse_region_(region: String) -> PyResult<(String, Option, Option PyResult<()> { - m.add_function(wrap_pyfunction!(all_, m)?)?; - m.add_function(wrap_pyfunction!(query_, m)?)?; - m.add_function(wrap_pyfunction!(parse_region_, m)?)?; + m.add_function(wrap_pyfunction!(all, m)?)?; + m.add_function(wrap_pyfunction!(query, m)?)?; + m.add_function(wrap_pyfunction!(parse_region, m)?)?; Ok(()) }