Skip to content

Commit

Permalink
Merge pull request #2 from CLIMB-COVID/development
Browse files Browse the repository at this point in the history
0.2.0
  • Loading branch information
tombch authored Aug 2, 2023
2 parents 859c6b8 + 14eea79 commit 551cfbe
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 78 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "maptide"
version = "0.1.0"
version = "0.2.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -10,4 +10,4 @@ crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.17.3", features = ["extension-module", "abi3-py37"] }
noodles = { version = "0.31.1", features = ["core", "bam", "sam", "bgzf"]}
noodles = { version = "0.31.1", features = ["core", "bam", "sam", "bgzf"] }
44 changes: 23 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
```
$ pip install maptide
```
Depending on your operating system, the Rust compiler may have to be installed.
Depending on your operating system, the Rust compiler may need to be installed.

Installation instructions for the Rust compiler can be found here: https://www.rust-lang.org/tools/install

Expand All @@ -25,39 +25,41 @@ $ pip install .
## Usage
```
$ maptide -h
usage: maptide [-h] [--region REGION] [--index INDEX] [--mapq MAPQ] [--baseq BASEQ] [--noindex] [--stats] [--decimals DECIMALS] bam
usage: maptide [-h] [-v] [-r REGION] [-i INDEX] [-m MAPPING_QUALITY] [-b BASE_QUALITY] [-s] [-d DECIMALS] bam
positional arguments:
bam Path to BAM file
bam Path to BAM file
optional arguments:
-h, --help show this help message and exit
--region REGION Region to view, specified in the form CHROM:START-END (default: everything)
--index INDEX Path to index (BAI) file (default: </path/to/bam>.bai)
--mapq MAPQ Minimum mapping quality (default: 0)
--baseq BASEQ Minimum base quality (default: 0)
--noindex Do not use an index file when querying the BAM file (default: False)
--stats Output additional per-position statistics (default: False)
--decimals DECIMALS Number of decimal places to display (default: 3)
options:
-h, --help show this help message and exit
-v, --version show program's version number and exit
-r REGION, --region REGION
Region to view, specified in the form CHROM:START-END (default: everything)
-i INDEX, --index INDEX
Path to index (BAI) file (default: </path/to/bam>.bai)
-m MAPPING_QUALITY, --mapping-quality MAPPING_QUALITY
Minimum mapping quality (default: 0)
-b BASE_QUALITY, --base-quality BASE_QUALITY
Minimum base quality (default: 0)
-s, --stats Output additional per-position statistics (default: False)
-d DECIMALS, --decimals DECIMALS
Number of decimal places to display (default: 3)
```

#### Frequencies over all positions in the reference:
#### Frequencies over all positions:
```
$ maptide /path/to/file.bam
```

#### Frequencies over a specific region (with an index file):
If the index file has the same path as the BAM file, but with `.bai` appended on the end:
#### Frequencies over a region:
```
$ maptide /path/to/file.bam --region chrom:start-end
```
If a region is specified, `maptide` will check for an index file with the same path as the BAM file, but with `.bai` appended on the end (i.e. `/path/to/file.bam.bai`).

Otherwise, the path needs to be specified:
```
$ maptide /path/to/file.bam --region chrom:start-end --index /path/to/index.bai
```
If an index file does not exist in this location, `maptide` will still run anyway, just without an index file.

#### Frequencies over a specific region (without an index file):
Index files that do not follow the naming convention `/path/to/file.bam.bai` can still be used, but a path to the file needs to be provided:
```
$ maptide /path/to/file.bam chrom:start-end --region chrom:start-end --noindex
$ maptide /path/to/file.bam --region chrom:start-end --index /path/to/index.bai
```
1 change: 0 additions & 1 deletion python/maptide/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@
# __all__ = maptide.__all__

from .api import query, parse_region
del api
73 changes: 43 additions & 30 deletions python/maptide/api.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,55 @@
from .maptide import all_, query_, parse_region_
from typing import Optional
import os
from typing import Dict, Tuple, List
from . import maptide #  type: ignore


def query(
bam: str,
region: Optional[str] = None,
bai: Optional[str] = None,
region: str | None = None,
bai: str | None = None,
mapping_quality: int = 0,
base_quality: int = 0,
indexed=True,
):
"""Obtain base frequencies from the provided BAM file.
Required arguments:
`bam`: Path to the BAM file.
Optional arguments:
`region`
`bai`
`mapping_quality`
`base_quality`
`indexed`
Returns:
A `dict` mapping tuples of the form `(int, int)` to base frequencies.
"""
) -> Dict[str, Dict[Tuple[int, int], List[int]]]:
"""Performs a pileup over a region, obtaining per-position base frequencies for the provided BAM file.
Parameters
----------
bam : str
Path to the BAM file.
region : str, optional
Region to query, in the form `CHROM:START-END` (default: all positions)
bai : str, optional
Path to index file (default: same path as the BAM file, but with .bai appended)
mapping_quality : int, optional
Minimum mapping quality for a read to be included in the pileup (default: 0)
base_quality : int, optional
Minimum base quality for a base within a read to be included in the pileup (default: 0)
if not indexed and bai:
raise Exception("Cannot set indexed=False while also providing a BAI file")
Returns
-------
dict
Mapping: reference -> (reference position, insert position) -> [base frequencies].
"""

if region:
if indexed:
if not bai:
bai = bam + ".bai"
return query_(bam, bai, region, mapping_quality, base_quality)
else:
return query_(bam, None, region, mapping_quality, base_quality)
if not bai and os.path.isfile(bam + ".bai"):
bai = bam + ".bai"
return maptide.query(bam, bai, region, mapping_quality, base_quality)
else:
return all_(bam, mapping_quality, base_quality)
return maptide.all(bam, mapping_quality, base_quality)


def parse_region(region: str):
return parse_region_(region)
def parse_region(region: str) -> Tuple[str, int, int]:
"""Parses a region of the form `CHROM:START-END`, returning the tuple `(CHROM, START, END)`.
Parameters
----------
region : str
Region to parse.
Returns
-------
tuple
Parsed region tuple.
"""
return maptide.parse_region(region)
38 changes: 21 additions & 17 deletions python/maptide/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .api import query, parse_region
import argparse
import math
import sys
import csv
import pkg_resources
from . import api


def entropy(probabilities, normalised=False):
Expand Down Expand Up @@ -36,7 +37,7 @@ def get_stats(counts, decimals=3):

def iterate(data, region=None, stats=False, decimals=3):
if region:
chrom, start, end = parse_region(region)
chrom, start, end = api.parse_region(region)
for (pos, ins_pos), row in sorted(data[chrom].items()):
if (not start or pos >= start) and (not end or pos <= end):
if stats:
Expand All @@ -54,42 +55,46 @@ def iterate(data, region=None, stats=False, decimals=3):

def run():
parser = argparse.ArgumentParser()
index_group = parser.add_mutually_exclusive_group()
parser.add_argument("bam", help="Path to BAM file")
parser.add_argument(
"-v",
"--version",
action="version",
version=pkg_resources.get_distribution("maptide").version,
)
parser.add_argument(
"-r",
"--region",
help="Region to view, specified in the form CHROM:START-END (default: everything)",
)
index_group.add_argument(
parser.add_argument(
"-i",
"--index",
default=None,
help="Path to index (BAI) file (default: </path/to/bam>.bai)",
)
parser.add_argument(
"--mapq",
"-m",
"--mapping-quality",
type=int,
default=0,
help="Minimum mapping quality (default: %(default)s)",
)
parser.add_argument(
"--baseq",
"-b",
"--base-quality",
type=int,
default=0,
help="Minimum base quality (default: %(default)s)",
)
index_group.add_argument(
"--noindex",
action="store_true",
default=False,
help="Do not use an index file when querying the BAM file (default: %(default)s)",
)
parser.add_argument(
"-s",
"--stats",
action="store_true",
default=False,
help="Output additional per-position statistics (default: %(default)s)",
)
parser.add_argument(
"-d",
"--decimals",
type=int,
default=3,
Expand Down Expand Up @@ -128,13 +133,12 @@ def run():
writer = csv.writer(sys.stdout, delimiter="\t")
writer.writerow(columns)

data = query(
data = api.query(
bam=args.bam,
region=args.region,
bai=args.index,
mapping_quality=args.mapq,
base_quality=args.baseq,
indexed=not args.noindex,
mapping_quality=args.mapping_quality,
base_quality=args.base_quality,
)

for row in iterate(
Expand Down
12 changes: 6 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ fn merge_into_base_map(
}

#[pyfunction]
fn all_(bam_path: String, mapping_quality: usize, base_quality: usize) -> PyResult<MapTide> {
fn all(bam_path: String, mapping_quality: usize, base_quality: usize) -> PyResult<MapTide> {
// Create initial maps
let (mut ref_arrs, mut ins_maps, mut ref_lengths) = init_maps();

Expand Down Expand Up @@ -451,7 +451,7 @@ fn all_(bam_path: String, mapping_quality: usize, base_quality: usize) -> PyResu
}

#[pyfunction]
fn query_(
fn query(
bam_path: String,
bai_path: Option<String>,
region: String,
Expand Down Expand Up @@ -572,7 +572,7 @@ fn query_(
}

#[pyfunction]
fn parse_region_(region: String) -> PyResult<(String, Option<usize>, Option<usize>)> {
fn parse_region(region: String) -> PyResult<(String, Option<usize>, Option<usize>)> {
let region: Region = region
.parse()
.map_err(|x: ParseError| PyException::new_err(x.to_string()))?;
Expand All @@ -592,9 +592,9 @@ fn parse_region_(region: String) -> PyResult<(String, Option<usize>, Option<usiz
/// A Python module implemented in Rust.
#[pymodule]
fn maptide(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(all_, m)?)?;
m.add_function(wrap_pyfunction!(query_, m)?)?;
m.add_function(wrap_pyfunction!(parse_region_, m)?)?;
m.add_function(wrap_pyfunction!(all, m)?)?;
m.add_function(wrap_pyfunction!(query, m)?)?;
m.add_function(wrap_pyfunction!(parse_region, m)?)?;

Ok(())
}

0 comments on commit 551cfbe

Please sign in to comment.