From 523b9d4f0c2876756e68dd2a12eec3aaf8162395 Mon Sep 17 00:00:00 2001 From: Clint Valentine Date: Fri, 8 Nov 2024 15:34:53 -0800 Subject: [PATCH] feat: use typeline for IO (#24) --- .github/CODEOWNERS | 1 + .../{publish.yml => publish_bedspec.yml} | 14 +- .github/workflows/tests.yml | 4 +- README.md | 102 +++--- bedspec/__init__.py | 31 +- bedspec/_bedspec.py | 107 +++--- bedspec/_io.py | 289 --------------- bedspec/_reader.py | 84 +++++ bedspec/_writer.py | 29 ++ bedspec/overlap/__init__.py | 6 +- bedspec/overlap/_overlap.py | 93 ++--- poetry.lock | 94 +++-- pyproject.toml | 87 +++-- tests/test_bedspec.py | 32 +- tests/test_io.py | 332 ------------------ tests/test_overlap.py | 23 +- tests/test_reader.py | 103 ++++++ tests/test_writer.py | 152 ++++++++ 18 files changed, 711 insertions(+), 872 deletions(-) create mode 100644 .github/CODEOWNERS rename .github/workflows/{publish.yml => publish_bedspec.yml} (93%) delete mode 100644 bedspec/_io.py create mode 100644 bedspec/_reader.py create mode 100644 bedspec/_writer.py delete mode 100644 tests/test_io.py create mode 100644 tests/test_reader.py create mode 100644 tests/test_writer.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..3f9d317 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @clintval diff --git a/.github/workflows/publish.yml b/.github/workflows/publish_bedspec.yml similarity index 93% rename from .github/workflows/publish.yml rename to .github/workflows/publish_bedspec.yml index c7033b0..7e0c6a2 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish_bedspec.yml @@ -2,7 +2,7 @@ name: publish on: push: - tags: '\d+.\d+.\d+' + tags: '[0-9]+.[0-9]+.[0-9]+' env: POETRY_VERSION: 1.6 @@ -16,6 +16,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + submodules: true - uses: rickstaa/action-contains-tag@v1 id: contains_tag @@ -23,20 +24,20 @@ jobs: reference: "main" tag: "${{ github.ref_name }}" - tests: - name: tests + unit-tests: + name: unit-tests needs: on-main-branch-check if: ${{ needs.on-main-branch-check.outputs.on_main == 'true' }} uses: "./.github/workflows/tests.yml" build-wheels: name: build wheels - needs: tests + needs: unit-tests uses: "./.github/workflows/wheels.yml" build-sdist: name: build source distribution - needs: tests + needs: unit-tests runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -101,6 +102,7 @@ jobs: with: fetch-depth: 0 ref: ${{ github.ref_name }} + submodules: true - name: Generate a Changelog uses: orhun/git-cliff-action@v3 @@ -127,6 +129,6 @@ jobs: with: name: ${{ github.ref_name }} body: | - ${{ needs.draft-changelog.outputs.release_body }} + ${{ needs.make-changelog.outputs.release_body }} draft: false prerelease: false diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b8ef4fc..1487692 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: Code checks +name: unit tests on: push: @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - PYTHON_VERSION: ["3.11", "3.12"] + PYTHON_VERSION: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: diff --git a/README.md b/README.md index abf3a99..c40bd27 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,9 @@ [![PyPi Release](https://badge.fury.io/py/bedspec.svg)](https://badge.fury.io/py/bedspec) [![CI](https://github.com/clintval/bedspec/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/clintval/bedspec/actions/workflows/tests.yml?query=branch%3Amain) -[![Python Versions](https://img.shields.io/badge/python-3.11_|_3.12-blue)](https://github.com/clintval/bedspec) -[![MyPy Checked](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) +[![Python Versions](https://img.shields.io/badge/python-3.10_|_3.11_|_3.12-blue)](https://github.com/clintval/typeline) +[![basedpyright](https://img.shields.io/badge/basedpyright-checked-42b983)](https://docs.basedpyright.com/latest/) +[![mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/) [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://docs.astral.sh/ruff/) @@ -21,65 +22,71 @@ pip install bedspec ### Building a BED Feature -```python -from bedspec import Bed3 +```pycon +>>> from bedspec import Bed3 +>>> +>>> bed = Bed3("chr1", start=2, end=8) -bed = Bed3("chr1", start=2, end=8) ``` ### Writing -```python -from bedspec import BedWriter +```pycon +>>> from bedspec import BedWriter +>>> from tempfile import NamedTemporaryFile +>>> +>>> temp_file = NamedTemporaryFile(mode="w+t", suffix=".txt") +>>> +>>> with BedWriter.from_path(temp_file.name, Bed3) as writer: +... writer.write(bed) -with BedWriter.from_path("test.bed") as writer: - writer.write(bed) ``` ### Reading -```python -from bedspec import BedReader +```pycon +>>> from bedspec import BedReader +>>> +>>> with BedReader.from_path(temp_file.name, Bed3) as reader: +... for bed in reader: +... print(bed) +Bed3(refname='chr1', start=2, end=8) -with BedReader.from_path("test.bed", Bed3) as reader: - for bed in reader: - print(bed) -``` -```console -Bed3(refname="chr1", start=2, end=8) ``` ### BED Types This package provides builtin classes for the following BED formats: -```python -from bedspec import Bed2 -from bedspec import Bed3 -from bedspec import Bed4 -from bedspec import Bed5 -from bedspec import Bed6 -from bedspec import Bed12 -from bedspec import BedGraph -from bedspec import BedPE +```pycon +>>> from bedspec import Bed2 +>>> from bedspec import Bed3 +>>> from bedspec import Bed4 +>>> from bedspec import Bed5 +>>> from bedspec import Bed6 +>>> from bedspec import Bed12 +>>> from bedspec import BedGraph +>>> from bedspec import BedPE + ``` ### Overlap Detection Use a fast overlap detector for any collection of interval types, including third-party: -```python -from bedspec import Bed3, Bed4 -from bedspec.overlap import OverlapDetector - -bed1 = Bed3("chr1", start=1, end=4) -bed2 = Bed3("chr1", start=5, end=9) +```pycon +>>> from bedspec import Bed3, Bed4 +>>> from bedspec.overlap import OverlapDetector +>>> +>>> bed1 = Bed3("chr1", start=1, end=4) +>>> bed2 = Bed3("chr1", start=5, end=9) +>>> +>>> detector = OverlapDetector[Bed3]([bed1, bed2]) +>>> +>>> my_feature = Bed4("chr1", start=2, end=3, name="hi-mom") +>>> detector.overlaps(my_feature) +True -detector = OverlapDetector[Bed3]([bed1, bed2]) - -my_feature = Bed4("chr1", start=2, end=3, name="hi-mom") - -assert detector.overlaps(my_feature) is True ``` The overlap detector supports the following operations: @@ -95,17 +102,18 @@ To create a custom BED record, inherit from the relevant BED-type (`PointBed`, ` For example, to create a custom BED3+1 class: -```python -from dataclasses import dataclass - -from bedspec import SimpleBed +```pycon +>>> from dataclasses import dataclass +>>> +>>> from bedspec import SimpleBed +>>> +>>> @dataclass(eq=True) +... class Bed3Plus1(SimpleBed): +... refname: str +... start: int +... end: int +... my_custom_field: float | None -@dataclass(eq=True) -class Bed3Plus1(SimpleBed): - refname: str - start: int - end: int - my_custom_field: float | None ``` ## Development and Testing diff --git a/bedspec/__init__.py b/bedspec/__init__.py index 81d6160..f142687 100644 --- a/bedspec/__init__.py +++ b/bedspec/__init__.py @@ -1,4 +1,3 @@ -# ruff: noqa: F401 from ._bedspec import Bed2 from ._bedspec import Bed3 from ._bedspec import Bed4 @@ -10,11 +9,35 @@ from ._bedspec import BedLike from ._bedspec import BedPE from ._bedspec import BedStrand -from ._bedspec import GenomicSpan +from ._bedspec import BedType from ._bedspec import Named from ._bedspec import PairBed from ._bedspec import PointBed +from ._bedspec import ReferenceSpan from ._bedspec import SimpleBed from ._bedspec import Stranded -from ._io import BedReader -from ._io import BedWriter +from ._reader import BedReader +from ._writer import BedWriter + +__all__ = [ + "Bed2", + "Bed3", + "Bed4", + "Bed5", + "Bed6", + "Bed12", + "BedColor", + "BedGraph", + "BedLike", + "BedPE", + "BedStrand", + "BedType", + "Named", + "PairBed", + "PointBed", + "ReferenceSpan", + "SimpleBed", + "Stranded", + "BedReader", + "BedWriter", +] diff --git a/bedspec/_bedspec.py b/bedspec/_bedspec.py index fad466c..6fce3b8 100644 --- a/bedspec/_bedspec.py +++ b/bedspec/_bedspec.py @@ -5,17 +5,24 @@ from dataclasses import Field from dataclasses import dataclass from dataclasses import field -from dataclasses import fields -from enum import StrEnum +from enum import Enum from enum import unique from typing import Any from typing import ClassVar from typing import Protocol +from typing import TypeVar from typing import final from typing import runtime_checkable +from typing_extensions import Self from typing_extensions import override +COMMENT_PREFIXES: set[str] = {"#", "browser", "track"} +"""The set of BED comment prefixes that this library supports.""" + +MISSING_FIELD: str = "." +"""The string used to indicate a missing field in a BED record.""" + @runtime_checkable class DataclassInstance(Protocol): @@ -25,7 +32,7 @@ class DataclassInstance(Protocol): @unique -class BedStrand(StrEnum): +class BedStrand(str, Enum): """BED strands for forward and reverse orientations.""" Positive = "+" @@ -36,11 +43,19 @@ class BedStrand(StrEnum): def opposite(self) -> "BedStrand": """Return the opposite BED strand.""" - return BedStrand.Negative if self is BedStrand.Positive else BedStrand.Positive + if self is BedStrand.Positive: + return BedStrand.Negative + else: + return BedStrand.Positive + + @override + def __str__(self) -> str: + """Return this strand as a string.""" + return self.value @runtime_checkable -class GenomicSpan(Protocol): +class ReferenceSpan(Protocol): """A structural protocol for 0-based half-open objects located on a reference sequence.""" refname: str @@ -65,69 +80,68 @@ class Stranded(Protocol): class BedLike(ABC, DataclassInstance): """An abstract base class for all types of BED records.""" - def __new__(cls, *_: object, **__: object) -> "BedLike": - if not dataclasses.is_dataclass(cls): - raise TypeError("You must annotate custom BED class definitions with @dataclass!") - instance: BedLike = object.__new__(cls) - return instance - @abstractmethod - def territory(self) -> Iterator[GenomicSpan]: + def territory(self) -> Iterator[ReferenceSpan]: """Return intervals that describe the territory of this BED record.""" -def header(bed: BedLike | type[BedLike]) -> list[str]: - """Return the list of field names for this BED record.""" - return [field.name for field in fields(bed)] - - -def types(bed: BedLike | type[BedLike]) -> list[type | str | Any]: - """Return the list of field types for this BED record.""" - return [field.type for field in fields(bed)] +BedType = TypeVar("BedType", bound=BedLike) +"""A type variable for any kind of BED record type.""" +@dataclass class PointBed(BedLike, ABC): """An abstract class for a BED record that describes a 0-based 1-length point.""" refname: str start: int + def __init_subclass__(cls) -> None: + if not dataclasses.is_dataclass(cls): + raise TypeError("You must annotate custom BED class definitions with @dataclass!") + return super().__init_subclass__() + @final - @property - def length(self) -> int: + def __len__(self) -> int: """The length of this record.""" return 1 @override - def territory(self) -> Iterator[GenomicSpan]: + def territory(self) -> Iterator[ReferenceSpan]: """Return the territory of a single point BED record which is 1-length.""" yield Bed3(refname=self.refname, start=self.start, end=self.start + 1) -class SimpleBed(BedLike, GenomicSpan, ABC): +@dataclass +class SimpleBed(BedLike, ReferenceSpan, ABC): """An abstract class for a BED record that describes a contiguous linear interval.""" refname: str start: int end: int + def __init_subclass__(cls) -> None: + if not dataclasses.is_dataclass(cls): + raise TypeError("You must annotate custom BED class definitions with @dataclass!") + return super().__init_subclass__() + def __post_init__(self) -> None: """Validate this linear BED record.""" if self.start >= self.end or self.start < 0: raise ValueError("start must be greater than 0 and less than end!") @final - @property - def length(self) -> int: + def __len__(self) -> int: """The length of this record.""" return self.end - self.start @override - def territory(self) -> Iterator[GenomicSpan]: + def territory(self) -> Iterator[ReferenceSpan]: """Return the territory of a linear BED record which is just itself.""" yield self +@dataclass class PairBed(BedLike, ABC): """An abstract base class for a BED record that describes a pair of linear linear intervals.""" @@ -138,6 +152,11 @@ class PairBed(BedLike, ABC): start2: int end2: int + def __init_subclass__(cls) -> None: + if not dataclasses.is_dataclass(cls): + raise TypeError("You must annotate custom BED class definitions with @dataclass!") + return super().__init_subclass__() + def __post_init__(self) -> None: """Validate this pair of BED records.""" if self.start1 >= self.end1 or self.start1 < 0: @@ -155,13 +174,14 @@ def bed2(self) -> SimpleBed: """The second of the two intervals.""" return Bed3(refname=self.refname2, start=self.start2, end=self.end2) - def territory(self) -> Iterator[GenomicSpan]: + @override + def territory(self) -> Iterator[ReferenceSpan]: """Return the territory of this BED record which are two intervals.""" yield self.bed1 yield self.bed2 -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class BedColor: """The color of a BED record in red, green, and blue color values.""" @@ -175,7 +195,7 @@ def __post_init__(self) -> None: raise ValueError(f"RGB color values must be in the range [0, 255] but found: {self}") @classmethod - def from_string(cls, string: str) -> "BedColor": + def from_string(cls, string: str) -> Self: """Build a BED color instance from a string.""" try: r, g, b = map(int, string.split(",")) @@ -183,12 +203,13 @@ def from_string(cls, string: str) -> "BedColor": raise ValueError(f"Invalid string '{string}'. Expected 'int,int,int'!") from error return cls(r, g, b) + @override def __str__(self) -> str: """Return a comma-delimited string representation of this BED color.""" return f"{self.r},{self.g},{self.b}" -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class Bed2(PointBed): """A BED2 record that describes a single 0-based 1-length point.""" @@ -196,7 +217,7 @@ class Bed2(PointBed): start: int -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class Bed3(SimpleBed): """A BED3 record that describes a contiguous linear interval.""" @@ -205,7 +226,7 @@ class Bed3(SimpleBed): end: int = field(kw_only=True) -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class Bed4(SimpleBed): """A BED4 record that describes a contiguous linear interval.""" @@ -215,7 +236,7 @@ class Bed4(SimpleBed): name: str | None = field(kw_only=True) -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class Bed5(SimpleBed, Named): """A BED5 record that describes a contiguous linear interval.""" @@ -226,7 +247,7 @@ class Bed5(SimpleBed, Named): score: int | None = field(kw_only=True) -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class Bed6(SimpleBed, Named, Stranded): """A BED6 record that describes a contiguous linear interval.""" @@ -238,7 +259,7 @@ class Bed6(SimpleBed, Named, Stranded): strand: BedStrand | None = field(kw_only=True) -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class Bed12(SimpleBed, Named, Stranded): """A BED12 record that describes a contiguous linear interval.""" @@ -252,12 +273,12 @@ class Bed12(SimpleBed, Named, Stranded): thick_end: int | None = field(kw_only=True) item_rgb: BedColor | None = field(kw_only=True) block_count: int | None = field(kw_only=True) - block_sizes: list[int] = field(kw_only=True) - block_starts: list[int] = field(kw_only=True) + block_sizes: list[int] | None = field(kw_only=True) + block_starts: list[int] | None = field(kw_only=True) def __post_init__(self) -> None: """Validate this BED12 record.""" - super().__post_init__() + super(Bed12, self).__post_init__() if (self.thick_start is None) != (self.thick_end is None): raise ValueError("thick_start and thick_end must both be None or both be set!") if self.block_count is None: @@ -280,7 +301,7 @@ def __post_init__(self) -> None: raise ValueError("The last defined block's end must be equal to the BED end!") -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class BedGraph(SimpleBed): """A bedGraph feature for continuous-valued data.""" @@ -290,7 +311,7 @@ class BedGraph(SimpleBed): value: float = field(kw_only=True) -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, slots=True) class BedPE(PairBed, Named): """A BED record that describes a pair of BED records as per the bedtools spec.""" @@ -306,6 +327,7 @@ class BedPE(PairBed, Named): strand2: BedStrand | None = field(kw_only=True) @property + @override def bed1(self) -> Bed6: """The first of the two intervals as a BED6 record.""" return Bed6( @@ -318,6 +340,7 @@ def bed1(self) -> Bed6: ) @property + @override def bed2(self) -> Bed6: """The second of the two intervals as a BED6 record.""" return Bed6( @@ -332,7 +355,7 @@ def bed2(self) -> Bed6: @classmethod def from_bed6( cls, bed1: Bed6, bed2: Bed6, name: str | None = None, score: int | None = None - ) -> "BedPE": + ) -> Self: return cls( refname1=bed1.refname, start1=bed1.start, diff --git a/bedspec/_io.py b/bedspec/_io.py deleted file mode 100644 index aa373f8..0000000 --- a/bedspec/_io.py +++ /dev/null @@ -1,289 +0,0 @@ -import io -import json -from csv import DictReader -from csv import DictWriter -from dataclasses import asdict as as_dict -from pathlib import Path -from types import NoneType -from types import TracebackType -from typing import Any -from typing import ContextManager -from typing import Generic -from typing import Iterable -from typing import Iterator -from typing import TypeAlias -from typing import TypeVar -from typing import cast -from typing import get_args -from typing import get_origin - -from msgspec import convert -from msgspec import to_builtins -from typing_extensions import override - -from bedspec._bedspec import BedColor -from bedspec._bedspec import BedLike -from bedspec._bedspec import BedStrand -from bedspec._bedspec import header -from bedspec._bedspec import types - -BedType = TypeVar("BedType", bound=BedLike) -"""A type variable for any kind of BED record type.""" - -JsonType: TypeAlias = dict[str, "JsonType"] | list["JsonType"] | str | int | float | bool | None -"""A JSON-like data type.""" - -#################################################################################################### - -BED_EXTENSION: str = ".bed" -"""The default file extension for BED files.""" - -BEDGRAPH_EXTENSION: str = ".bedgraph" -"""The default file extension for bedGraph files.""" - -BEDPE_EXTENSION: str = ".bedpe" -"""The default file extension for BedPE files.""" - -BGZ_EXTENSION: str = ".bgz" -"""The default file extension for block-compressed gzip files (`.bgz`).""" - -BGZIP_EXTENSION: str = ".bgzip" -"""The default file extension for block-compressed gzip files (`.bgzip`).""" - -GZ_EXTENSION: str = ".gz" -"""The default file extension for compressed gzip files (`.gz`).""" - -GZIP_EXTENSION: str = ".gzip" -"""The default file extension for compressed gzip files (`.gzip`).""" - -TRACK_EXTENSION: str = ".track" -"""The default file extension for UCSC track files.""" - -_BGZIP_EXTENSIONS: set[str] = {BGZ_EXTENSION, BGZIP_EXTENSION} -"""All supported block-compressed gzip file extensions.""" - -_GZIP_EXTENSIONS: set[str] = {GZ_EXTENSION, GZIP_EXTENSION} -"""All supported compressed gzip file extensions.""" - -_ALL_GZIP_COMPATIBLE_EXTENSIONS: set[str] = _BGZIP_EXTENSIONS.union(_GZIP_EXTENSIONS) -"""All supported compressed and block-compressed gzip file extensions.""" - -#################################################################################################### - -COMMENT_PREFIXES: set[str] = {"#", "browser", "track"} -"""The set of BED comment prefixes that this library supports.""" - -MISSING_FIELD: str = "." -"""The string used to indicate a missing field in a BED record.""" - -#################################################################################################### - - -class BedWriter(ContextManager, Generic[BedType]): - """ - A writer of BED records. - - Args: - handle: An open file-like object to write to. - - Attributes: - bed_type: The BED type that this writer will write. - - """ - - def __init__(self, handle: io.TextIOWrapper) -> None: - """Initialize a BED writer without knowing yet what BED types we will write.""" - self._bed_type: type[BedType] | None = None - self._handle: io.TextIOWrapper = handle - self._writer: DictWriter | None = None - - @property - def bed_type(self) -> type[BedType] | None: - return self._bed_type - - @bed_type.setter - def bed_type(self, value: type[BedType]) -> None: - self._bed_type: type[BedType] = value # type: ignore[no-redef] - self._header: list[str] = header(cast(BedLike, value)) - self._types: list[type | str | Any] = types(cast(BedLike, value)) - - @override - def __enter__(self) -> "BedWriter[BedType]": - """Enter this context.""" - return self - - @override - def __exit__( - self, - __exc_type: type[BaseException] | None, - __exc_value: BaseException | None, - __traceback: TracebackType | None, - ) -> bool | None: - """Close and exit this context.""" - self.close() - return None - - def _maybe_setup_with(self, bed: BedType) -> None: - """Perform post-initialization and record validation.""" - if self.bed_type is None: - self.bed_type = type(bed) - - if self._writer is None: - self._writer = DictWriter(self._handle, delimiter="\t", fieldnames=self._header) - - if not isinstance(bed, self.bed_type): - raise TypeError( - f"BedWriter can only continue to write features of the same type." - f" Will not write a {type(bed).__name__} after a" - f" {self.bed_type.__name__}." - ) - - def _bed_to_dict(self, bed: BedType) -> dict[str, object]: - """Convert a BED record into a shallow dictionary.""" - shallow = {name: self._encode(getattr(bed, name)) for name in self._header} - return cast(dict[str, object], to_builtins(shallow, order="deterministic")) - - @staticmethod - def _encode(obj: Any) -> Any: - """A callback for special encoding of custom types.""" - if obj is None: - return "." - if isinstance(obj, (list, set, tuple)): - return ",".join(map(str, obj)) - if isinstance(obj, BedColor): - return str(obj) - return obj - - def write(self, bed: BedType) -> None: - """Write a BED record to the BED output.""" - self._maybe_setup_with(bed) - encoded = self._bed_to_dict(bed) - self._writer.writerow(encoded) - - def write_comment(self, comment: str) -> None: - """Write a comment to the BED output.""" - for line in comment.splitlines(): - prefix = "" if any(line.startswith(prefix) for prefix in COMMENT_PREFIXES) else "# " - self._handle.write(f"{prefix}{line}\n") - - @classmethod - def from_path(cls, path: Path | str) -> "BedWriter": - """Open a BED writer from a plain text file path.""" - writer: BedWriter = cls(handle=Path(path).open("w")) - return writer - - def close(self) -> bool | None: - """Close the underlying IO handle.""" - self._handle.close() - return None - - -class BedReader(ContextManager, Iterable[BedType], Generic[BedType]): - """ - A reader of BED records. - - This reader is capable of reading BED records but must be typed at runtime: - - ```python - from bedspec import BedReader, Bed3 - - with BedReader.from_path(path, Bed3) as reader: - print(list(reader)) - ``` - - Args: - handle: An open file-like object to read from. - - Attributes: - bed_type: The type of BED record that this reader will read. - - """ - - def __init__(self, handle: io.TextIOWrapper, bed_type: type[BedType]) -> None: - """Initialize a BED reader without knowing yet what BED types we will write.""" - self.bed_type: type[BedType] = bed_type - self._handle: io.TextIOWrapper = handle - self._header: list[str] = header(cast(BedLike, bed_type)) - self._types: list[type | str | Any] = types(cast(BedLike, bed_type)) - - def without_comments() -> Iterable[str]: - for line in self._handle: - line = line.strip() - if any(line.startswith(prefix) for prefix in COMMENT_PREFIXES): - continue - else: - yield line - - self._reader: DictReader = DictReader( - without_comments(), - delimiter="\t", - fieldnames=self._header, - ) - - @override - def __enter__(self) -> "BedReader[BedType]": - """Enter this context.""" - return self - - @override - def __exit__( - self, - __exc_type: type[BaseException] | None, - __exc_value: BaseException | None, - __traceback: TracebackType | None, - ) -> bool | None: - """Close and exit this context.""" - self.close() - return None - - @override - def __iter__(self) -> Iterator[BedType]: - """Iterate through the BED records of this IO handle.""" - for bed in self._reader: - yield convert( - self._csv_dict_to_json(bed), - self.bed_type, - strict=False, - ) - - self.close() - - @staticmethod - def _pre_decode(kind: type, obj: Any) -> Any: - if obj == MISSING_FIELD and NoneType in get_args(kind): - return None - if kind is BedColor or BedColor in get_args(kind): - if obj == "0": - return None - return json.dumps(as_dict(BedColor.from_string(cast(str, obj)))) - if kind is BedStrand or BedStrand in get_args(kind): - return f'"{obj}"' - return obj - - def _csv_dict_to_json(self, record: dict[str, str]) -> JsonType: - """Convert a CSV dictionary record to JSON using the known field types.""" - key_values: list[str] = [] - for (name, value), field_type in zip(record.items(), self._types, strict=True): - pre_decoded: str = self._pre_decode(cast(type, field_type), value) - origin_type = get_origin(field_type) - if pre_decoded is None: - key_values.append(f'"{name}":null') - elif origin_type is list: - key_values.append(f'"{name}":[{pre_decoded.rstrip(",")}]') - elif field_type is str or str in get_args(field_type): - key_values.append(f'"{name}":"{pre_decoded}"') - else: - key_values.append(f'"{name}":{pre_decoded}') - json_string: JsonType = json.loads(f"{{{','.join(key_values)}}}") - return json_string - - @classmethod - def from_path(cls, path: Path | str, bed_type: type[BedType]) -> "BedReader": - """Open a BED reader from a plain text file path.""" - reader = cls(handle=Path(path).open("r"), bed_type=bed_type) - return reader - - def close(self) -> None: - """Close the underlying IO handle.""" - self._handle.close() - return None diff --git a/bedspec/_reader.py b/bedspec/_reader.py new file mode 100644 index 0000000..14d7b71 --- /dev/null +++ b/bedspec/_reader.py @@ -0,0 +1,84 @@ +import json +from dataclasses import asdict as as_dict +from io import TextIOWrapper +from pathlib import Path +from types import NoneType +from types import UnionType +from typing import Any +from typing import cast +from typing import get_args +from typing import get_origin + +from typeline import TsvStructReader +from typing_extensions import Self +from typing_extensions import override + +from bedspec._bedspec import MISSING_FIELD +from bedspec._bedspec import BedColor +from bedspec._bedspec import BedStrand +from bedspec._bedspec import BedType + + +class BedReader(TsvStructReader[BedType]): + """A reader of BED records.""" + + @override + def __init__( + self, + handle: TextIOWrapper, + record_type: type[BedType], + /, + has_header: bool = False, + ): + """Initialize the BED reader.""" + super().__init__(handle, record_type, has_header=has_header) + + @property + @override + def comment_prefixes(self) -> set[str]: + return {"#", "browser", "track"} + + @staticmethod + def _build_union(*types: type) -> type | UnionType: + if len(types) == 1: + return types[0] + union: UnionType | type = types[0] + for t in types[1:]: + union |= t + return cast(UnionType, union) + + @override + def _decode(self, field_type: type[Any] | str | Any, item: Any) -> Any: + """A callback for overriding the decoding of builtin types and custom types.""" + type_args: tuple[type, ...] = get_args(field_type) + type_origin: type | None = get_origin(field_type) + is_union: bool = isinstance(field_type, UnionType) + + if item == MISSING_FIELD and NoneType in type_args: + return None + elif field_type is BedColor or BedColor in type_args: + if item == "0": + return None + return json.dumps(as_dict(BedColor.from_string(cast(str, item)))) # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + elif field_type is BedStrand or BedStrand in type_args: + return f'"{item}"' + elif type_origin in (frozenset, list, tuple, set): + stripped: str = item.rstrip(",") + return f"[{stripped}]" + elif is_union and len(type_args) >= 2 and NoneType in type_args: + other_types: set[type] = set(type_args) - {NoneType} + return self._decode(self._build_union(*other_types), item) + return super()._decode(field_type, item=item) + + @classmethod + @override + def from_path( + cls, + path: Path | str, + record_type: type[BedType], + /, + has_header: bool = False, + ) -> Self: + """Construct a BED reader from a file path.""" + reader = cls(Path(path).open("r"), record_type, has_header=has_header) + return reader diff --git a/bedspec/_writer.py b/bedspec/_writer.py new file mode 100644 index 0000000..848c312 --- /dev/null +++ b/bedspec/_writer.py @@ -0,0 +1,29 @@ +from typing import Any + +from typeline import TsvStructWriter +from typing_extensions import override + +from bedspec._bedspec import COMMENT_PREFIXES +from bedspec._bedspec import BedColor +from bedspec._bedspec import BedType + + +class BedWriter(TsvStructWriter[BedType]): + """A writer of BED records.""" + + @override + def _encode(self, item: Any) -> Any: + """A callback for over_readriding the encoding of builtin types and custom types.""" + if item is None: + return "." + if isinstance(item, (list, set, tuple)): + return ",".join(map(str, item)) # pyright: ignore[reportUnknownArgumentType] + if isinstance(item, BedColor): + return str(item) + return super()._encode(item=item) + + def write_comment(self, comment: str) -> None: + """Write a comment to the BED output.""" + for line in comment.splitlines(): + prefix = "" if any(line.startswith(prefix) for prefix in COMMENT_PREFIXES) else "# " + _ = self._handle.write(f"{prefix}{line}\n") diff --git a/bedspec/overlap/__init__.py b/bedspec/overlap/__init__.py index 2eebccb..a3d7700 100644 --- a/bedspec/overlap/__init__.py +++ b/bedspec/overlap/__init__.py @@ -1,3 +1,5 @@ -# ruff: noqa: F401 from ._overlap import OverlapDetector -from ._overlap import QueryReferenceSpanType + +__all__ = [ + "OverlapDetector", +] diff --git a/bedspec/overlap/_overlap.py b/bedspec/overlap/_overlap.py index d45857e..e43eaca 100644 --- a/bedspec/overlap/_overlap.py +++ b/bedspec/overlap/_overlap.py @@ -1,55 +1,25 @@ from collections import defaultdict +from collections.abc import Iterable +from collections.abc import Iterator from itertools import chain from typing import Generic -from typing import Hashable -from typing import Iterable -from typing import Iterator -from typing import Protocol from typing import TypeAlias from typing import TypeVar -from typing import runtime_checkable -import cgranges as cr - - -@runtime_checkable -class Span(Hashable, Protocol): - """A span with a start and an end. 0-based open-ended.""" - - @property - def start(self) -> int: - """A 0-based start position.""" - raise NotImplementedError - - @property - def end(self) -> int: - """A 0-based open-ended position.""" - raise NotImplementedError - - -@runtime_checkable -class ReferenceSpan(Span, Protocol): - """A feature on a reference sequence.""" - - @property - def refname(self) -> str: - """A reference sequence name.""" - raise NotImplementedError +from typing_extensions import override +import cgranges as cr +from bedspec._bedspec import ReferenceSpan -QueryReferenceSpanType = TypeVar("QueryReferenceSpanType", bound=ReferenceSpan) -"""Type variable for features being queried against the overlap detector.""" - -GenericReferenceSpanType = TypeVar("GenericReferenceSpanType", bound=ReferenceSpan) +ReferenceSpanType = TypeVar("ReferenceSpanType", bound=ReferenceSpan) """Type variable for features stored within the overlap detector.""" Refname: TypeAlias = str """A type alias for a reference sequence name string.""" -class OverlapDetector(Iterable[GenericReferenceSpanType], Generic[GenericReferenceSpanType]): - """ - Detects and returns overlaps between a collection of reference features and query feature. +class OverlapDetector(Iterable[ReferenceSpanType], Generic[ReferenceSpanType]): + """Detects and returns overlaps between a collection of reference features and query feature. The overlap detector may be built with any feature-like Python object that has the following properties: @@ -61,55 +31,50 @@ class OverlapDetector(Iterable[GenericReferenceSpanType], Generic[GenericReferen This detector is most efficiently used when all features to be queried are added ahead of time. """ - def __init__(self, features: Iterable[GenericReferenceSpanType] | None = None) -> None: - self._refname_to_features: dict[Refname, list[GenericReferenceSpanType]] = defaultdict(list) - self._refname_to_tree: dict[Refname, cr.cgranges] = defaultdict(cr.cgranges) # type: ignore[attr-defined,name-defined] + def __init__(self, features: Iterable[ReferenceSpanType] | None = None) -> None: + self._refname_to_features: dict[Refname, list[ReferenceSpanType]] = defaultdict(list) + self._refname_to_tree: dict[Refname, cr.cgranges] = defaultdict(cr.cgranges) # type: ignore[attr-defined,name-defined] # pyright: ignore[reportUnknownArgumentType, reportUnknownMemberType] self._refname_to_is_indexed: dict[Refname, bool] = defaultdict(lambda: False) if features is not None: - self.add_all(features) + self.add(*features) - def __iter__(self) -> Iterator[GenericReferenceSpanType]: + @override + def __iter__(self) -> Iterator[ReferenceSpanType]: """Iterate over the features in the overlap detector.""" return chain(*self._refname_to_features.values()) - def add(self, feature: GenericReferenceSpanType) -> None: + def add(self, *features: ReferenceSpanType) -> None: """Add a feature to this overlap detector.""" - if not isinstance(feature, Hashable): - raise ValueError(f"Genomic feature is not hashable but should be: {feature}") - - refname: Refname = feature.refname - feature_idx: int = len(self._refname_to_features[refname]) - - self._refname_to_features[refname].append(feature) - self._refname_to_tree[refname].add(refname, feature.start, feature.end, feature_idx) - self._refname_to_is_indexed[refname] = False # mark that this tree needs re-indexing - - def add_all(self, features: Iterable[GenericReferenceSpanType]) -> None: - """Adds one or more features to this overlap detector.""" for feature in features: - self.add(feature) + refname: Refname = feature.refname + feature_idx: int = len(self._refname_to_features[refname]) + + self._refname_to_features[refname].append(feature) + self._refname_to_tree[refname].add(refname, feature.start, feature.end, feature_idx) # pyright: ignore[reportUnknownMemberType] + self._refname_to_is_indexed[refname] = False # mark that this tree needs re-indexing - def overlapping(self, feature: QueryReferenceSpanType) -> Iterator[GenericReferenceSpanType]: + def overlapping(self, feature: ReferenceSpan) -> Iterator[ReferenceSpanType]: """Yields all the overlapping features for a given query feature.""" refname: Refname = feature.refname - if refname in self._refname_to_tree and not self._refname_to_is_indexed[refname]: - self._refname_to_tree[refname].index() # index the tree if we find it is not indexed + if refname in self._refname_to_tree.keys() and not self._refname_to_is_indexed[refname]: # pyright: ignore[reportUnknownMemberType] + self._refname_to_tree[refname].index() # pyright: ignore[reportUnknownMemberType] - for *_, idx in self._refname_to_tree[refname].overlap(refname, feature.start, feature.end): + idx: int + for *_, idx in self._refname_to_tree[refname].overlap(refname, feature.start, feature.end): # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] yield self._refname_to_features[refname][idx] - def overlaps(self, feature: QueryReferenceSpanType) -> bool: + def overlaps(self, feature: ReferenceSpan) -> bool: """Determine if a query feature overlaps any other features.""" return next(self.overlapping(feature), None) is not None - def enclosing(self, feature: QueryReferenceSpanType) -> Iterator[GenericReferenceSpanType]: + def enclosing(self, feature: ReferenceSpan) -> Iterator[ReferenceSpanType]: """Yields all the overlapping features that completely enclose the given query feature.""" for overlap in self.overlapping(feature): if feature.start >= overlap.start and feature.end <= overlap.end: yield overlap - def enclosed_by(self, feature: QueryReferenceSpanType) -> Iterator[GenericReferenceSpanType]: + def enclosed_by(self, feature: ReferenceSpan) -> Iterator[ReferenceSpanType]: """Yields all the overlapping features that are enclosed by the given query feature.""" for overlap in self.overlapping(feature): if feature.start <= overlap.start and feature.end >= overlap.end: diff --git a/poetry.lock b/poetry.lock index 5572102..3bacd84 100644 --- a/poetry.lock +++ b/poetry.lock @@ -96,9 +96,26 @@ files = [ {file = "coverage-7.6.4.tar.gz", hash = "sha256:29fc0f17b1d3fea332f8001d4558f8214af7f1d87a345f3a133c901d60347c73"}, ] +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + [package.extras] toml = ["tomli"] +[[package]] +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, +] + +[package.extras] +test = ["pytest (>=6)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -205,6 +222,7 @@ files = [ [package.dependencies] mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} typing-extensions = ">=4.6.0" [package.extras] @@ -243,13 +261,13 @@ files = [ [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -280,9 +298,11 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -326,29 +346,29 @@ test = ["numpy", "pytest-remotedata (>=0.3.2)", "sphinx"] [[package]] name = "ruff" -version = "0.7.1" +version = "0.7.3" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.7.1-py3-none-linux_armv6l.whl", hash = "sha256:cb1bc5ed9403daa7da05475d615739cc0212e861b7306f314379d958592aaa89"}, - {file = "ruff-0.7.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:27c1c52a8d199a257ff1e5582d078eab7145129aa02721815ca8fa4f9612dc35"}, - {file = "ruff-0.7.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:588a34e1ef2ea55b4ddfec26bbe76bc866e92523d8c6cdec5e8aceefeff02d99"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fc32f9cdf72dc75c451e5f072758b118ab8100727168a3df58502b43a599ca"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:985818742b833bffa543a84d1cc11b5e6871de1b4e0ac3060a59a2bae3969250"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32f1e8a192e261366c702c5fb2ece9f68d26625f198a25c408861c16dc2dea9c"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:699085bf05819588551b11751eff33e9ca58b1b86a6843e1b082a7de40da1565"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344cc2b0814047dc8c3a8ff2cd1f3d808bb23c6658db830d25147339d9bf9ea7"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4316bbf69d5a859cc937890c7ac7a6551252b6a01b1d2c97e8fc96e45a7c8b4a"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79d3af9dca4c56043e738a4d6dd1e9444b6d6c10598ac52d146e331eb155a8ad"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5c121b46abde94a505175524e51891f829414e093cd8326d6e741ecfc0a9112"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8422104078324ea250886954e48f1373a8fe7de59283d747c3a7eca050b4e378"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:56aad830af8a9db644e80098fe4984a948e2b6fc2e73891538f43bbe478461b8"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:658304f02f68d3a83c998ad8bf91f9b4f53e93e5412b8f2388359d55869727fd"}, - {file = "ruff-0.7.1-py3-none-win32.whl", hash = "sha256:b517a2011333eb7ce2d402652ecaa0ac1a30c114fbbd55c6b8ee466a7f600ee9"}, - {file = "ruff-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f38c41fcde1728736b4eb2b18850f6d1e3eedd9678c914dede554a70d5241307"}, - {file = "ruff-0.7.1-py3-none-win_arm64.whl", hash = "sha256:19aa200ec824c0f36d0c9114c8ec0087082021732979a359d6f3c390a6ff2a37"}, - {file = "ruff-0.7.1.tar.gz", hash = "sha256:9d8a41d4aa2dad1575adb98a82870cf5db5f76b2938cf2206c22c940034a36f4"}, + {file = "ruff-0.7.3-py3-none-linux_armv6l.whl", hash = "sha256:34f2339dc22687ec7e7002792d1f50712bf84a13d5152e75712ac08be565d344"}, + {file = "ruff-0.7.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:fb397332a1879b9764a3455a0bb1087bda876c2db8aca3a3cbb67b3dbce8cda0"}, + {file = "ruff-0.7.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:37d0b619546103274e7f62643d14e1adcbccb242efda4e4bdb9544d7764782e9"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d59f0c3ee4d1a6787614e7135b72e21024875266101142a09a61439cb6e38a5"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44eb93c2499a169d49fafd07bc62ac89b1bc800b197e50ff4633aed212569299"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d0242ce53f3a576c35ee32d907475a8d569944c0407f91d207c8af5be5dae4e"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6b6224af8b5e09772c2ecb8dc9f3f344c1aa48201c7f07e7315367f6dd90ac29"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c50f95a82b94421c964fae4c27c0242890a20fe67d203d127e84fbb8013855f5"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7f3eff9961b5d2644bcf1616c606e93baa2d6b349e8aa8b035f654df252c8c67"}, + {file = "ruff-0.7.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8963cab06d130c4df2fd52c84e9f10d297826d2e8169ae0c798b6221be1d1d2"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:61b46049d6edc0e4317fb14b33bd693245281a3007288b68a3f5b74a22a0746d"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:10ebce7696afe4644e8c1a23b3cf8c0f2193a310c18387c06e583ae9ef284de2"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3f36d56326b3aef8eeee150b700e519880d1aab92f471eefdef656fd57492aa2"}, + {file = "ruff-0.7.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5d024301109a0007b78d57ab0ba190087b43dce852e552734ebf0b0b85e4fb16"}, + {file = "ruff-0.7.3-py3-none-win32.whl", hash = "sha256:4ba81a5f0c5478aa61674c5a2194de8b02652f17addf8dfc40c8937e6e7d79fc"}, + {file = "ruff-0.7.3-py3-none-win_amd64.whl", hash = "sha256:588a9ff2fecf01025ed065fe28809cd5a53b43505f48b69a1ac7707b1b7e4088"}, + {file = "ruff-0.7.3-py3-none-win_arm64.whl", hash = "sha256:1713e2c5545863cdbfe2cbce21f69ffaf37b813bfd1fb3b90dc9a6f1963f5a8c"}, + {file = "ruff-0.7.3.tar.gz", hash = "sha256:e1d1ba2e40b6e71a61b063354d04be669ab0d39c352461f3d789cac68b54a313"}, ] [[package]] @@ -371,6 +391,32 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] +[[package]] +name = "tomli" +version = "2.0.2" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, + {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, +] + +[[package]] +name = "typeline" +version = "0.3.0" +description = "Write dataclasses to delimited text formats and read them back again." +optional = false +python-versions = "<4.0.0,>=3.10.0" +files = [ + {file = "typeline-0.3.0-py3-none-any.whl", hash = "sha256:c08e44e12c104c17706947e38e724cfbe5034569de7439fcdad3ee958aa19fc4"}, + {file = "typeline-0.3.0.tar.gz", hash = "sha256:57a2b8a7e20020cdfec470ec3be123364c159b5a758230a0aa1b4eda5ff3fecd"}, +] + +[package.dependencies] +msgspec = ">=0.18,<0.19" +typing-extensions = ">=4.12,<5.0" + [[package]] name = "typing-extensions" version = "4.12.2" @@ -384,5 +430,5 @@ files = [ [metadata] lock-version = "2.0" -python-versions = "^3.11.0" -content-hash = "039430541076ef54ae5dc06634b09bc484ea8456b067e63cc0141da98a5338e3" +python-versions = "^3.10.0,<4.0.0" +content-hash = "ec9aceec39fef68d5e92df55ccf934b59e2164af5fcd5d8878f270548739a5ca" diff --git a/pyproject.toml b/pyproject.toml index 6314e2b..f71ef51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bedspec" -version = "0.2.0" +version = "0.3.0" description = "An HTS-specs compliant BED toolkit." authors = ["Clint Valentine "] license = "MIT" @@ -14,8 +14,10 @@ classifiers = [ "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", + "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: File Formats", @@ -25,25 +27,20 @@ classifiers = [ "Typing :: Typed", ] include = ["CONTRIBUTING.md", "LICENSE"] -packages = [ - { include = "bedspec" }, - { include = "cgranges" }, -] +packages = [{ include = "bedspec" }, { include = "cgranges" }] [tool.poetry.dependencies] -python = "^3.11.0" -msgspec = "^0.18" +python = "^3.10.0,<4.0.0" +typeline = "^0.3" typing-extensions = "^4.12" [tool.poetry.dev-dependencies] -# TODO: satisfy basedpyright warnings and errors and add to CI? basedpyright = "^1.21" mypy = "^1.13" pytest = "^8.3" pytest-cov = "^5.0" -# TODO: get doctests to work and also run on the README? -pytest-doctestplus = "^1.2.1" -ruff = "0.7.1" +pytest-doctestplus = "^1.2" +ruff = "^0.7" [tool.poetry.build] script = "build.py" @@ -69,8 +66,14 @@ fix-all.sequence = [ check-lock = "poetry check --lock" check-format = "ruff format --check --diff" check-lint = "ruff check" -check-tests = "pytest" -check-typing = "mypy" +check-tests = "pytest --doctest-glob='*.md'" + +_check-mypy = "mypy" +_check-pyright = "basedpyright" +check-typing.sequence = [ + "_check-mypy", + "_check-pyright" +] check-all.ignore_fail = true check-all.sequence = [ @@ -90,6 +93,35 @@ fix-and-check-all.sequence = [ [tool.coverage.run] omit = ["cgranges/*"] +[[tool.mypy.overrides]] +module = "Cython.Build" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "setuptools" +ignore_missing_imports = true + +[tool.pytest.ini_options] +minversion = "7.4" +addopts = [ + "--color=yes", + "--cov-fail-under=80", + "--cov-report=term-missing", + "--cov", + "--cov=typeline", + "--doctest-modules", + "--doctest-plus", + "--import-mode=importlib", + "--ignore=build.py", + "--ignore=cgranges/", +] +doctest_plus = "enabled" +doctest_optionflags = [ + "ELLIPSIS", + "IGNORE_EXCEPTION_DETAIL", + "NORMALIZE_WHITESPACE", +] + [tool.mypy] files = ["build.py", "bedspec/", "tests/"] strict_optional = false @@ -108,26 +140,15 @@ warn_unused_configs = true warn_unused_ignores = true enable_error_code = "ignore-without-code" -[[tool.mypy.overrides]] -module = "Cython.Build" -ignore_missing_imports = true - -[[tool.mypy.overrides]] -module = "setuptools" -ignore_missing_imports = true - -[tool.pytest.ini_options] -minversion = "7.4" -addopts = [ - "--color=yes", - "--import-mode=importlib", - "--cov" -] +[tool.pyright] +include = ["bedspec/"] +reportAny = false +pythonVersion = "3.10" [tool.ruff] include = ["build.py", "bedspec/**", "tests/**"] line-length = 100 -target-version = "py311" +target-version = "py310" output-format = "full" preview = true @@ -138,7 +159,7 @@ select = [ "B", # bugbear "D", # pydocstyle (docstrings. We have the "google" convention enabled) "D204", # Blank line between class docstring and first (__init__) method - "D213", # Summary line should be located on the line after opening quotes + "D212", # summary line should be located on the same line as opening quotes "E", # pycodestyle errors "F", # pyflakes "I", # isort @@ -149,12 +170,15 @@ select = [ ignore = [ "E203", "E701", - "D212", # summary line should be located on the same line as opening quotes + "D213", # Summary line should be located on the line after opening quote "D100", # missing docstring in public module "D104", # missing docstring in public package ] unfixable = ["B"] +[tool.ruff.lint.mccabe] +max-complexity = 10 + [tool.ruff.lint.isort] force-single-line = true @@ -177,6 +201,7 @@ body = """ conventional_commits = true commit_parsers = [ { message = "^.+!:*", group = "Breaking"}, + { message = "^security*", group = "Security"}, { message = "^feat*", group = "Features"}, { message = "^fix*", group = "Bug Fixes"}, { message = "^docs*", group = "Documentation"}, diff --git a/tests/test_bedspec.py b/tests/test_bedspec.py index 566e317..29813eb 100644 --- a/tests/test_bedspec.py +++ b/tests/test_bedspec.py @@ -13,9 +13,9 @@ from bedspec import BedLike from bedspec import BedPE from bedspec import BedStrand -from bedspec import GenomicSpan from bedspec import PairBed from bedspec import PointBed +from bedspec import ReferenceSpan from bedspec import SimpleBed from bedspec import Stranded from bedspec._bedspec import DataclassInstance @@ -83,11 +83,11 @@ def test_all_bed_types_are_dataclasses(bed_type: type[BedLike]) -> None: def test_locatable_structural_type() -> None: - """Test that the GenomicSpan structural type is set correctly.""" - span: GenomicSpan = Bed6( + """Test that the ReferenceSpan structural type is set correctly.""" + span: ReferenceSpan = Bed6( refname="chr1", start=1, end=2, name="foo", score=3, strand=BedStrand.Positive ) - assert isinstance(span, GenomicSpan) + assert isinstance(span, ReferenceSpan) def test_stranded_structural_type() -> None: @@ -152,7 +152,7 @@ def test_point_bed_types_have_a_territory() -> None: def test_point_bed_types_are_length_1() -> None: """Test that a point BED has a length of 1.""" - assert Bed2(refname="chr1", start=1).length == 1 + assert len(Bed2(refname="chr1", start=1)) == 1 def test_simple_bed_types_have_a_territory() -> None: @@ -169,9 +169,9 @@ def test_simple_bed_types_have_a_territory() -> None: def test_simple_bed_types_have_length() -> None: """Test that a simple BED has the right length.""" - assert Bed3(refname="chr1", start=1, end=2).length == 1 - assert Bed3(refname="chr1", start=1, end=3).length == 2 - assert Bed3(refname="chr1", start=1, end=4).length == 3 + assert len(Bed3(refname="chr1", start=1, end=2)) == 1 + assert len(Bed3(refname="chr1", start=1, end=3)) == 2 + assert len(Bed3(refname="chr1", start=1, end=4)) == 3 def test_simple_bed_validates_start_and_end() -> None: @@ -248,6 +248,22 @@ def make_bed12( block_starts=block_starts, ) + with pytest.raises(ValueError, match="start must be greater than 0 and less than end!"): + Bed12( + refname="chr1", + start=2, + end=1, + name="bed12", + score=2, + strand=BedStrand.Positive, + thick_start=None, + thick_end=None, + item_rgb=BedColor(101, 2, 32), + block_count=None, + block_sizes=None, + block_starts=None, + ) + with pytest.raises( ValueError, match="thick_start and thick_end must both be None or both be set!" ): diff --git a/tests/test_io.py b/tests/test_io.py deleted file mode 100644 index 09ceb9a..0000000 --- a/tests/test_io.py +++ /dev/null @@ -1,332 +0,0 @@ -from pathlib import Path - -import pytest - -from bedspec import Bed2 -from bedspec import Bed3 -from bedspec import Bed4 -from bedspec import Bed5 -from bedspec import Bed6 -from bedspec import Bed12 -from bedspec import BedColor -from bedspec import BedGraph -from bedspec import BedLike -from bedspec import BedPE -from bedspec import BedReader -from bedspec import BedStrand -from bedspec import BedWriter -from bedspec._io import MISSING_FIELD - - -# fmt: off -@pytest.mark.parametrize( - "bed,expected", - [ - [Bed2(refname="chr1", start=1), "chr1\t1\n"], - [Bed3(refname="chr1", start=1, end=2), "chr1\t1\t2\n"], - [Bed4(refname="chr1", start=1, end=2, name="foo"), "chr1\t1\t2\tfoo\n"], - [Bed5(refname="chr1", start=1, end=2, name="foo", score=3), "chr1\t1\t2\tfoo\t3\n"], - [Bed6(refname="chr1", start=1, end=2, name="foo", score=3, strand=BedStrand.Positive), "chr1\t1\t2\tfoo\t3\t+\n"], # noqa: E501 - [BedGraph(refname="chr1", start=1, end=2, value=0.2), "chr1\t1\t2\t0.2\n"], - [ - BedPE( - refname1="chr1", - start1=1, - end1=2, - refname2="chr2", - start2=3, - end2=4, - name="foo", - score=5, - strand1=BedStrand.Positive, - strand2=BedStrand.Negative, - ), - "chr1\t1\t2\tchr2\t3\t4\tfoo\t5\t+\t-\n", - ], - ], -) -# fmt: on -def test_bed_writer_can_write_all_bed_types(bed: BedLike, expected: str, tmp_path: Path) -> None: - """Test that the BED writer can write all BED types.""" - with open(tmp_path / "test.bed", "w") as handle: - writer: BedWriter = BedWriter(handle) - writer.write(bed) - - assert Path(tmp_path / "test.bed").read_text() == expected - - -def test_bed_writer_can_be_closed(tmp_path: Path) -> None: - """Test that we can close a BED writer.""" - path: Path = tmp_path / "test.bed" - writer = BedWriter[Bed3](open(path, "w")) - writer.write(Bed3(refname="chr1", start=1, end=2)) - writer.close() - - with pytest.raises(ValueError, match="I/O operation on closed file"): - writer.write(Bed3(refname="chr1", start=1, end=2)) - - -def test_bed_writer_can_write_bed_records_from_a_path(tmp_path: Path) -> None: - """Test that the BED write can write BED records from a path if it is typed.""" - bed: Bed3 = Bed3(refname="chr1", start=1, end=2) - - with BedWriter.from_path(tmp_path / "test1.bed") as writer: - writer.write(bed) - - assert (tmp_path / "test1.bed").read_text() == "chr1\t1\t2\n" - - with BedWriter.from_path(str(tmp_path / "test2.bed")) as writer: - writer.write(bed) - - assert (tmp_path / "test2.bed").read_text() == "chr1\t1\t2\n" - - -def test_bed_writer_remembers_the_type_it_will_write(tmp_path: Path) -> None: - """Test that the BED writer remembers the type it can only write.""" - with open(tmp_path / "test.bed", "w") as handle: - writer: BedWriter = BedWriter(handle) - writer.write(Bed2(refname="chr1", start=1)) - assert writer.bed_type is Bed2 - with pytest.raises( - TypeError, - match=( - "BedWriter can only continue to write features of the same type. Will not write a" - " Bed3 after a Bed2" - ), - ): - writer.write(Bed3(refname="chr1", start=1, end=2)) - - -def test_bed_writer_remembers_the_type_it_will_write_generic(tmp_path: Path) -> None: - """Test that the generically parameterized BED writer remembers the type it can only write.""" - with open(tmp_path / "test.bed", "w") as handle: - writer = BedWriter[Bed2](handle) - writer.write(Bed2("chr1", 1)) - with pytest.raises( - TypeError, - match=( - "BedWriter can only continue to write features of the same type. Will not write a" - " Bed3 after a Bed2" - ), - ): - writer.write(Bed3(refname="chr1", start=1, end=2)) # type: ignore[arg-type] - - -def test_bed_writer_write_comment_with_prefix_pound_symbol(tmp_path: Path) -> None: - """Test that we can write comments that have a leading pound symbol.""" - with open(tmp_path / "test.bed", "w") as handle: - writer = BedWriter[Bed2](handle) - writer.write_comment("# hello mom!") - writer.write(Bed2(refname="chr1", start=1)) - writer.write_comment("# hello\ndad!") - writer.write(Bed2(refname="chr2", start=2)) - - expected = "# hello mom!\nchr1\t1\n# hello\n# dad!\nchr2\t2\n" - assert Path(tmp_path / "test.bed").read_text() == expected - - -def test_bed_writer_write_comment_without_prefix_pound_symbol(tmp_path: Path) -> None: - """Test that we can write comments that do not have a leading pound symbol.""" - with open(tmp_path / "test.bed", "w") as handle: - writer = BedWriter[Bed2](handle) - writer.write_comment("track this-is-fine") - writer.write_comment("browser is mario's enemy?") - writer.write_comment("hello\nmom!") - writer.write(Bed2(refname="chr1", start=1)) - writer.write_comment("hello dad!") - writer.write(Bed2(refname="chr2", start=2)) - - expected = ( - "track this-is-fine\n" - "browser is mario's enemy?\n" - "# hello\n" - "# mom!\n" - "chr1\t1\n" - "# hello dad!\n" - "chr2\t2\n" - ) - - assert Path(tmp_path / "test.bed").read_text() == expected - - -def test_bed_writer_can_be_used_as_context_manager(tmp_path: Path) -> None: - """Test that the BED writer can be used as a context manager.""" - with BedWriter[Bed2](open(tmp_path / "test.bed", "w")) as handle: - handle.write(Bed2(refname="chr1", start=1)) - handle.write(Bed2(refname="chr2", start=2)) - - expected = "chr1\t1\nchr2\t2\n" - assert Path(tmp_path / "test.bed").read_text() == expected - - -def test_bed_reader_can_read_bed_records_if_typed(tmp_path: Path) -> None: - """Test that the BED reader can read BED records if the reader is typed.""" - bed: Bed3 = Bed3(refname="chr1", start=1, end=2) - - with open(tmp_path / "test.bed", "w") as handle: - writer: BedWriter = BedWriter(handle) - writer.write(bed) - - assert Path(tmp_path / "test.bed").read_text() == "chr1\t1\t2\n" - - with open(tmp_path / "test.bed", "r") as handle: - assert list(BedReader(handle, Bed3)) == [bed] - - -def test_bed_reader_can_be_closed(tmp_path: Path) -> None: - """Test that we can close a BED reader.""" - path: Path = tmp_path / "test.bed" - path.touch() - reader = BedReader(open(path), Bed3) - reader.close() - - with pytest.raises(ValueError, match="I/O operation on closed file"): - next(iter(reader)) - - -def test_bed_reader_can_read_bed_records_from_a_path(tmp_path: Path) -> None: - """Test that the BED reader can read BED records from a path if it is typed.""" - bed: Bed3 = Bed3(refname="chr1", start=1, end=2) - - with open(tmp_path / "test.bed", "w") as handle: - writer: BedWriter = BedWriter(handle) - writer.write(bed) - - assert Path(tmp_path / "test.bed").read_text() == "chr1\t1\t2\n" - - reader = BedReader.from_path(tmp_path / "test.bed", Bed3) - assert list(reader) == [bed] - - reader = BedReader.from_path(str(tmp_path / "test.bed"), Bed3) - assert list(reader) == [bed] - - -def test_bed_reader_can_read_bed_records_with_comments(tmp_path: Path) -> None: - """Test that the BED reader can read BED records with comments.""" - bed: Bed3 = Bed3(refname="chr1", start=1, end=2) - - with open(tmp_path / "test.bed", "w") as handle: - writer: BedWriter = BedWriter(handle) - writer.write_comment("track\nthis-is-fine") - writer.write_comment("browser is mario's enemy?") - writer.write_comment("hello mom!") - handle.write("\n") # empty line - handle.write(" \t\n") # empty line - writer.write(bed) - writer.write_comment("hello dad!") - - with open(tmp_path / "test.bed", "r") as handle: - assert list(BedReader(handle, Bed3)) == [bed] - - -def test_bed_reader_can_read_optional_string_types(tmp_path: Path) -> None: - """Test that the BED reader can read BED records with optional string types.""" - bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) - - (tmp_path / "test.bed").write_text(f"chr1\t1\t2\t{MISSING_FIELD}\n") - - with open(tmp_path / "test.bed", "r") as handle: - assert list(BedReader(handle, Bed4)) == [bed] - - -def test_bed_reader_can_read_optional_other_types(tmp_path: Path) -> None: - """Test that the BED reader can read BED records with optional other types.""" - bed: Bed5 = Bed5(refname="chr1", start=1, end=2, name="foo", score=None) - - (tmp_path / "test.bed").write_text(f"chr1\t1\t2\tfoo\t{MISSING_FIELD}\n") - - with open(tmp_path / "test.bed", "r") as handle: - assert list(BedReader(handle, Bed5)) == [bed] - - -def test_bed_reader_can_be_used_as_context_manager(tmp_path: Path) -> None: - """Test that the BED reader can be used as a context manager.""" - bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) - - (tmp_path / "test.bed").write_text(f"chr1\t1\t2\t{MISSING_FIELD}\n") - - with BedReader(open(tmp_path / "test.bed"), Bed4) as reader: - assert list(reader) == [bed] - - -def test_we_can_roundtrip_a_bed_record_with_complex_types(tmp_path: Path) -> None: - """Test that we can roundtrip a BED record with complex types (e.g. lists).""" - bed12: Bed12 = Bed12( - refname="chr1", - start=2, - end=10, - name="bed12", - score=2, - strand=BedStrand.Positive, - thick_start=3, - thick_end=4, - item_rgb=BedColor(101, 2, 32), - block_count=2, - block_sizes=[1, 2], - block_starts=[0, 6], - ) - - with BedWriter.from_path(tmp_path / "test.bed") as writer: - writer.write(bed12) - - expected: str = "chr1\t2\t10\tbed12\t2\t+\t3\t4\t101,2,32\t2\t1,2\t0,6\n" - assert Path(tmp_path / "test.bed").read_text() == expected - - with BedReader.from_path(tmp_path / "test.bed", Bed12) as reader: - assert list(reader) == [bed12] - -# @pytest.mark.parametrize("ext", _ALL_GZIP_COMPATIBLE_EXTENSIONS) -# def test_bed_reader_can_read_gzip_compressed(tmp_path: Path, ext: str) -> None: -# """Test that the BED reader can read gzip compressed paths.""" -# bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) - -# with gzip.open(tmp_path / ("test.bed" + ext), "wt") as handle: -# handle.write(f"chr1\t1\t2\t{MISSING_FIELD}\n") - -# with BedReader[Bed4](gzip.open(tmp_path / ("test.bed" + ext), "rt")) as reader: -# assert list(reader) == [bed] - - -# @pytest.mark.parametrize("ext", _ALL_GZIP_COMPATIBLE_EXTENSIONS) -# def test_bed_reader_can_read_gzip_compressed_generic(tmp_path: Path, ext: str) -> None: -# """Test that the BED reader can read gzip compressed paths.""" -# bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) - -# with gzip.open(tmp_path / ("test.bed" + ext), "wt") as handle: -# handle.write(f"chr1\t1\t2\t{MISSING_FIELD}\n") - -# with BedReader.from_path(tmp_path / ("test.bed" + ext)) as reader: -# assert list(reader) == [bed] - -# @pytest.mark.parametrize("ext", _GZIP_EXTENSIONS) -# def test_bed_writer_can_write_gzip_compressed(tmp_path: Path, ext: str) -> None: -# """Test that the BED writer can write gzip compressed paths.""" -# bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) - -# with BedWriter[Bed4](gzip.open(tmp_path / ("test.bed" + ext), "wt")) as writer: -# writer.write(bed) - -# with BedReader[Bed4](gzip.open(tmp_path / ("test.bed" + ext), "rt")) as reader: -# assert list(reader) == [bed] - -# @pytest.mark.parametrize("ext", _GZIP_EXTENSIONS) -# def test_bed_writer_can_write_gzip_compressed_generic(tmp_path: Path, ext: str) -> None: -# """Test that the BED writer can write gzip compressed paths.""" -# bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) - -# with BedWriter.from_path(tmp_path / ("test.bed" + ext)) as writer: -# writer.write(bed) - -# with BedReader[Bed4](gzip.open(tmp_path / ("test.bed" + ext), "rt")) as reader: -# assert list(reader) == [bed] - -# @pytest.mark.parametrize("ext", _BGZIP_EXTENSIONS) -# def test_bed_writer_can_write_block_gzip_compressed_generic(tmp_path: Path, ext: str) -> None: -# """Test that the BED writer can write gzip compressed paths.""" -# bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) - -# with BedWriter.from_path(tmp_path / ("test.bed" + ext)) as writer: -# writer.write(bed) - -# with BedReader[Bed4](gzip.open(tmp_path / ("test.bed" + ext), "rt")) as reader: -# assert list(reader) == [bed] diff --git a/tests/test_overlap.py b/tests/test_overlap.py index 321c7bf..e395428 100644 --- a/tests/test_overlap.py +++ b/tests/test_overlap.py @@ -1,7 +1,3 @@ -from dataclasses import dataclass - -import pytest - from bedspec import Bed3 from bedspec import Bed4 from bedspec.overlap import OverlapDetector @@ -33,28 +29,13 @@ def test_we_can_add_a_feature_to_the_overlap_detector() -> None: assert list(detector) == [bed1, bed2] -def test_that_we_require_hashable_features_in_the_overlap_detector() -> None: - """Test that we require hashable features in the overlap detector.""" - - @dataclass - class MissingHashFeature: - refname: str - start: int - end: int - - feature: MissingHashFeature = MissingHashFeature("chr1", 2, 3) - detector: OverlapDetector[MissingHashFeature] = OverlapDetector() - - with pytest.raises(ValueError, match="Genomic feature is not hashable but should be"): - detector.add(feature) - - def test_we_can_add_all_features_to_the_overlap_detector() -> None: """Test we can add all features to the overlap detector.""" bed1 = Bed3(refname="chr1", start=1, end=2) bed2 = Bed4(refname="chr2", start=4, end=5, name="Clint Valentine") detector: OverlapDetector[Bed3 | Bed4] = OverlapDetector() - detector.add_all([bed1, bed2]) + beds: list[Bed3 | Bed4] = [bed1, bed2] + detector.add(*beds) assert list(detector) == [bed1, bed2] diff --git a/tests/test_reader.py b/tests/test_reader.py new file mode 100644 index 0000000..4cd3cd0 --- /dev/null +++ b/tests/test_reader.py @@ -0,0 +1,103 @@ +from pathlib import Path + +from bedspec import Bed3 +from bedspec import Bed4 +from bedspec import Bed5 +from bedspec import Bed12 +from bedspec import BedColor +from bedspec import BedReader +from bedspec import BedStrand +from bedspec import BedWriter +from bedspec._bedspec import MISSING_FIELD + + +def test_bed_reader_can_read_bed_records_from_a_path(tmp_path: Path) -> None: + """Test that the BED reader can read BED records from a path if it is typed.""" + bed: Bed3 = Bed3(refname="chr1", start=1, end=2) + + with open(tmp_path / "test.bed", "w") as handle: + writer: BedWriter = BedWriter(handle, Bed3) + writer.write(bed) + + assert Path(tmp_path / "test.bed").read_text() == "chr1\t1\t2\n" + + reader = BedReader.from_path(tmp_path / "test.bed", Bed3) + assert list(reader) == [bed] + + reader = BedReader.from_path(str(tmp_path / "test.bed"), Bed3) + assert list(reader) == [bed] + + +def test_bed_reader_can_read_bed_records_with_comments(tmp_path: Path) -> None: + """Test that the BED reader can read BED records with comments.""" + bed: Bed3 = Bed3(refname="chr1", start=1, end=2) + + with open(tmp_path / "test.bed", "w") as handle: + writer: BedWriter = BedWriter(handle, Bed3) + writer.write_comment("track\nthis-is-fine") + writer.write_comment("browser is mario's enemy?") + writer.write_comment("hello mom!") + handle.write("\n") # empty line + handle.write(" \n") # empty line + writer.write(bed) + writer.write_comment("hello dad!") + + with open(tmp_path / "test.bed", "r") as handle: + assert list(BedReader(handle, Bed3)) == [bed] + + +def test_bed_reader_can_read_optional_string_types(tmp_path: Path) -> None: + """Test that the BED reader can read BED records with optional string types.""" + bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) + + (tmp_path / "test.bed").write_text(f"chr1\t1\t2\t{MISSING_FIELD}\n") + + with open(tmp_path / "test.bed", "r") as handle: + assert list(BedReader(handle, Bed4)) == [bed] + + +def test_bed_reader_can_read_optional_other_types(tmp_path: Path) -> None: + """Test that the BED reader can read BED records with optional other types.""" + bed: Bed5 = Bed5(refname="chr1", start=1, end=2, name="foo", score=None) + + (tmp_path / "test.bed").write_text(f"chr1\t1\t2\tfoo\t{MISSING_FIELD}\n") + + with open(tmp_path / "test.bed", "r") as handle: + assert list(BedReader(handle, Bed5)) == [bed] + + +def test_bed_reader_can_be_used_as_context_manager(tmp_path: Path) -> None: + """Test that the BED reader can be used as a context manager.""" + bed: Bed4 = Bed4(refname="chr1", start=1, end=2, name=None) + + (tmp_path / "test.bed").write_text(f"chr1\t1\t2\t{MISSING_FIELD}\n") + + with BedReader(open(tmp_path / "test.bed"), Bed4) as reader: + assert list(reader) == [bed] + + +def test_we_can_roundtrip_a_bed_record_with_complex_types(tmp_path: Path) -> None: + """Test that we can roundtrip a BED record with complex types (e.g. lists).""" + bed12: Bed12 = Bed12( + refname="chr1", + start=2, + end=10, + name="bed12", + score=2, + strand=BedStrand.Positive, + thick_start=3, + thick_end=4, + item_rgb=BedColor(101, 2, 32), + block_count=2, + block_sizes=[1, 2], + block_starts=[0, 6], + ) + + with BedWriter.from_path(tmp_path / "test.bed", Bed12) as writer: + writer.write(bed12) + + expected: str = "chr1\t2\t10\tbed12\t2\t+\t3\t4\t101,2,32\t2\t1,2\t0,6\n" + assert Path(tmp_path / "test.bed").read_text() == expected + + with BedReader.from_path(tmp_path / "test.bed", Bed12) as reader: + assert list(reader) == [bed12] diff --git a/tests/test_writer.py b/tests/test_writer.py new file mode 100644 index 0000000..b1c21ca --- /dev/null +++ b/tests/test_writer.py @@ -0,0 +1,152 @@ +from pathlib import Path + +import pytest + +from bedspec import Bed2 +from bedspec import Bed3 +from bedspec import Bed4 +from bedspec import Bed5 +from bedspec import Bed6 +from bedspec import BedGraph +from bedspec import BedLike +from bedspec import BedPE +from bedspec import BedStrand +from bedspec import BedWriter + + +# fmt: off +@pytest.mark.parametrize( + "bed,expected", + [ + [Bed2(refname="chr1", start=1), "chr1\t1\n"], + [Bed3(refname="chr1", start=1, end=2), "chr1\t1\t2\n"], + [Bed4(refname="chr1", start=1, end=2, name="foo"), "chr1\t1\t2\tfoo\n"], + [Bed5(refname="chr1", start=1, end=2, name="foo", score=3), "chr1\t1\t2\tfoo\t3\n"], + [Bed6(refname="chr1", start=1, end=2, name="foo", score=3, strand=BedStrand.Positive), "chr1\t1\t2\tfoo\t3\t+\n"], # noqa: E501 + [BedGraph(refname="chr1", start=1, end=2, value=0.2), "chr1\t1\t2\t0.2\n"], + [ + BedPE( + refname1="chr1", + start1=1, + end1=2, + refname2="chr2", + start2=3, + end2=4, + name="foo", + score=5, + strand1=BedStrand.Positive, + strand2=BedStrand.Negative, + ), + "chr1\t1\t2\tchr2\t3\t4\tfoo\t5\t+\t-\n", + ], + ], +) +# fmt: on +def test_bed_writer_can_write_all_bed_types(bed: BedLike, expected: str, tmp_path: Path) -> None: + """Test that the BED writer can write all BED types.""" + with open(tmp_path / "test.bed", "w") as handle: + writer: BedWriter = BedWriter(handle, type(bed)) + writer.write(bed) + + assert Path(tmp_path / "test.bed").read_text() == expected + + +def test_bed_writer_can_be_closed(tmp_path: Path) -> None: + """Test that we can close a BED writer.""" + path: Path = tmp_path / "test.bed" + handle = open(path, "w") + try: + writer = BedWriter(handle, Bed3) + writer.write(Bed3(refname="chr1", start=1, end=2)) + writer.close() + + with pytest.raises(ValueError, match="I/O operation on closed file"): + writer.write(Bed3(refname="chr1", start=1, end=2)) + finally: + handle.close() + + +def test_bed_writer_can_write_bed_records_from_a_path(tmp_path: Path) -> None: + """Test that the BED write can write BED records from a path if it is typed.""" + bed: Bed3 = Bed3(refname="chr1", start=1, end=2) + + with BedWriter.from_path(tmp_path / "test1.bed", Bed3) as writer: + writer.write(bed) + + assert (tmp_path / "test1.bed").read_text() == "chr1\t1\t2\n" + + with BedWriter.from_path(str(tmp_path / "test2.bed"), Bed3) as writer: + writer.write(bed) + + assert (tmp_path / "test2.bed").read_text() == "chr1\t1\t2\n" + + +def test_bed_writer_remembers_the_type_it_will_write(tmp_path: Path) -> None: + """Test that the BED writer remembers the type it can only write.""" + with open(tmp_path / "test.bed", "w") as handle: + writer: BedWriter = BedWriter(handle, Bed2) + writer.write(Bed2(refname="chr1", start=1)) + with pytest.raises( + ValueError, + match="Expected Bed2 but found Bed3!", + ): + writer.write(Bed3(refname="chr1", start=1, end=2)) + + +def test_bed_writer_remembers_the_type_it_will_write_generic(tmp_path: Path) -> None: + """Test that the generically parameterized BED writer remembers the type it can only write.""" + with open(tmp_path / "test.bed", "w") as handle: + writer = BedWriter(handle, Bed2) + writer.write(Bed2("chr1", 1)) + with pytest.raises( + ValueError, + match="Expected Bed2 but found Bed3!", + ): + writer.write(Bed3(refname="chr1", start=1, end=2)) # type: ignore[arg-type] + + +def test_bed_writer_write_comment_with_prefix_pound_symbol(tmp_path: Path) -> None: + """Test that we can write comments that have a leading pound symbol.""" + with open(tmp_path / "test.bed", "w") as handle: + writer = BedWriter(handle, Bed2) + writer.write_comment("# hello mom!") + writer.write(Bed2(refname="chr1", start=1)) + writer.write_comment("# hello\ndad!") + writer.write(Bed2(refname="chr2", start=2)) + + expected = "# hello mom!\nchr1\t1\n# hello\n# dad!\nchr2\t2\n" + assert Path(tmp_path / "test.bed").read_text() == expected + + +def test_bed_writer_write_comment_without_prefix_pound_symbol(tmp_path: Path) -> None: + """Test that we can write comments that do not have a leading pound symbol.""" + with open(tmp_path / "test.bed", "w") as handle: + writer = BedWriter(handle, Bed2) + writer.write_comment("track this-is-fine") + writer.write_comment("browser is mario's enemy?") + writer.write_comment("hello\nmom!") + writer.write(Bed2(refname="chr1", start=1)) + writer.write_comment("hello dad!") + writer.write(Bed2(refname="chr2", start=2)) + + expected = ( + "track this-is-fine\n" + "browser is mario's enemy?\n" + "# hello\n" + "# mom!\n" + "chr1\t1\n" + "# hello dad!\n" + "chr2\t2\n" + ) + + assert Path(tmp_path / "test.bed").read_text() == expected + + +def test_bed_writer_can_be_used_as_context_manager(tmp_path: Path) -> None: + """Test that the BED writer can be used as a context manager.""" + with BedWriter(open(tmp_path / "test.bed", "w"), Bed2) as handle: + handle.write(Bed2(refname="chr1", start=1)) + handle.write(Bed2(refname="chr2", start=2)) + + expected = "chr1\t1\nchr2\t2\n" + assert Path(tmp_path / "test.bed").read_text() == expected