Skip to content

Commit

Permalink
feat: possibly better\?
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Nov 6, 2024
1 parent c90a920 commit 3136331
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 82 deletions.
1 change: 1 addition & 0 deletions bedspec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from ._bedspec import Bed4
from ._bedspec import Bed5
from ._bedspec import Bed6
from ._bedspec import Bed12
from ._bedspec import BedColor
from ._bedspec import BedGraph
from ._bedspec import BedLike
Expand Down
16 changes: 13 additions & 3 deletions bedspec/_bedspec.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import dataclasses
from abc import ABC
from abc import abstractmethod
from collections.abc import Iterator
from dataclasses import Field
from dataclasses import dataclass
from dataclasses import field
Expand All @@ -9,9 +10,8 @@
from enum import unique
from typing import Any
from typing import ClassVar
from typing import Iterator
from typing import Protocol
from typing import Type
from typing import override
from typing import runtime_checkable


Expand Down Expand Up @@ -83,7 +83,7 @@ def header(bed: BedLike | type[BedLike]) -> list[str]:
return _header


def types(bed: BedLike | type[BedLike]) -> list[Type | str | Any]:
def types(bed: BedLike | type[BedLike]) -> list[type | str | Any]:
"""Return the list of field types for this BED record."""
_types = [field.type for field in fields(bed)]
return _types
Expand All @@ -100,6 +100,7 @@ def length(self) -> int:
"""The length of this record."""
return 1

@override
def territory(self) -> Iterator[GenomicSpan]:
"""Return the territory of a single point BED record which is 1-length."""
yield Bed3(refname=self.refname, start=self.start, end=self.start + 1)
Expand All @@ -122,6 +123,7 @@ def length(self) -> int:
"""The length of this record."""
return self.end - self.start

@override
def territory(self) -> Iterator[GenomicSpan]:
"""Return the territory of a linear BED record which is just itself."""
yield self
Expand Down Expand Up @@ -173,6 +175,12 @@ def __post_init__(self) -> None:
if any(value > 255 or value < 0 for value in (self.r, self.g, self.b)):
raise ValueError(f"RGB color values must be in the range [0, 255] but found: {self}")

@classmethod
def from_string(cls, string: str) -> "BedColor":
"""Build a BED color instance from a string."""
r, g, b = map(int, string.split(","))
return cls(r, g, b)

def __str__(self) -> str:
"""Return a comma-delimited string representation of this BED color."""
return f"{self.r},{self.g},{self.b}"
Expand Down Expand Up @@ -245,6 +253,8 @@ class Bed12(SimpleBed, Named, Stranded):
block_sizes: list[int] = field(kw_only=True)
block_starts: list[int] = field(kw_only=True)

# TODO: add a post_init and BED12 validation


@dataclass(eq=True, frozen=True)
class BedGraph(SimpleBed):
Expand Down
79 changes: 54 additions & 25 deletions bedspec/_io.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,35 @@
import io
import json
from csv import DictReader
from csv import DictWriter
from dataclasses import asdict as as_dict
from pathlib import Path
from types import NoneType
from types import TracebackType
from typing import Any
from typing import ContextManager
from typing import Iterable
from typing import Iterator
from typing import Type
from typing import TypeAlias
from typing import TypeVar
from typing import cast
from typing import get_args
from typing import override

from msgspec import convert
from msgspec import to_builtins
from msgspec.inspect import UnionType
from msgspec.inspect import type_info

from bedspec._bedspec import BedColor
from bedspec._bedspec import BedLike
from bedspec._bedspec import BedStrand
from bedspec._bedspec import header
from bedspec._bedspec import types

BedType = TypeVar("BedType", bound=BedLike)
"""A type variable for any kind of BED record type."""

JsonType: TypeAlias = dict[str, "JsonType"] | list["JsonType"] | str | int | float | bool | None
"""A JSON-like data type."""

####################################################################################################

Expand Down Expand Up @@ -96,12 +103,14 @@ def bed_type(self) -> type[BedType] | None:
def bed_type(self, bed_type: type[BedType]) -> None:
self._bed_type: type[BedType] = bed_type # type: ignore[no-redef]
self._header: list[str] = header(cast(BedLike, bed_type))
self._types: list[Type | str | Any] = types(cast(BedLike, bed_type))
self._types: list[type | str | Any] = types(cast(BedLike, bed_type))

@override
def __enter__(self) -> "BedWriter[BedType]":
"""Enter this context."""
return self

@override
def __exit__(
self,
__exc_type: type[BaseException] | None,
Expand Down Expand Up @@ -129,12 +138,17 @@ def _maybe_setup_with(self, bed: BedType) -> None:

def _bed_to_dict(self, bed: BedType) -> dict[str, object]:
"""Convert a BED record into a shallow dictionary."""
encoded = {name: self._encode(getattr(bed, name)) for name in self._header}
return encoded

def _encode(self, value: object) -> object:
"""Encode a BED record value into a string given its type."""
return MISSING_FIELD if value is None else to_builtins(value)
shallow = {name: self._encode(getattr(bed, name)) for name in self._header}
return cast(dict[str, object], to_builtins(shallow, order="deterministic"))

@staticmethod
def _encode(obj: Any) -> Any:
"""A callback for special encoding of custom types."""
if obj is None:
return "."
if isinstance(obj, BedColor):
return str(obj)
return obj

def write(self, bed: BedType) -> None:
"""Write a BED record to the BED output."""
Expand Down Expand Up @@ -186,7 +200,7 @@ def __init__(self, handle: io.TextIOWrapper, bed_type: type[BedType]) -> None:
self.bed_type: type[BedType] = bed_type
self._handle: io.TextIOWrapper = handle
self._header: list[str] = header(cast(BedLike, bed_type))
self._types: list[Type | str | Any] = types(cast(BedLike, bed_type))
self._types: list[type | str | Any] = types(cast(BedLike, bed_type))

def without_comments() -> Iterable[str]:
for line in self._handle:
Expand All @@ -202,10 +216,12 @@ def without_comments() -> Iterable[str]:
fieldnames=self._header,
)

@override
def __enter__(self) -> "BedReader[BedType]":
"""Enter this context."""
return self

@override
def __exit__(
self,
__exc_type: type[BaseException] | None,
Expand All @@ -216,27 +232,40 @@ def __exit__(
self.close()
return None

@override
def __iter__(self) -> Iterator[BedType]:
"""Iterate through the BED records of this IO handle."""
if self.bed_type is None:
raise NotImplementedError("Untyped reading is not yet supported!")

for bed in self._reader:
decoded = {
name: self._decode(name, value, bed_type)
for (name, value), bed_type in zip(bed.items(), self._types, strict=True)
}
yield convert(decoded, self.bed_type, strict=False)
yield convert(
self._csv_dict_to_json(bed),
self.bed_type,
strict=False,
)

self.close()

def _decode[T](self, name: str, value: str, bed_type: T) -> str | T: # noqa: ARG002
"""Optionally decodes a BED record string into an object given its field types."""
type_meta = type_info(bed_type)
if isinstance(type_meta, UnionType) and type_meta.includes_none and value == MISSING_FIELD:
@staticmethod
def _pre_decode(kind: type, obj: Any) -> Any:
if obj == MISSING_FIELD and NoneType in get_args(kind):
return None
else:
return value
if kind is BedColor or BedColor in get_args(kind):
return json.dumps(as_dict(BedColor.from_string(cast(str, obj))))
if kind is BedStrand or BedStrand in get_args(kind):
return f'"{obj}"'
return obj

def _csv_dict_to_json(self, record: dict[str, str]) -> JsonType:
"""Convert a CSV dictionary record to JSON using the known field types."""
key_values: list[str] = []
for (name, value), field_type in zip(record.items(), self._types, strict=True):
pre_decoded: str = self._pre_decode(cast(type, field_type), value)
if pre_decoded is None:
key_values.append(f'"{name}":null')
elif field_type is str or str in get_args(field_type):
key_values.append(f'"{name}":"{pre_decoded}"')
else:
key_values.append(f'"{name}":{pre_decoded}')
return cast(JsonType, json.loads(f"{{{','.join(key_values)}}}"))

@classmethod
def from_path(cls, path: Path | str, bed_type: type[BedType]) -> "BedReader":
Expand Down
5 changes: 5 additions & 0 deletions bedspec/overlap/_overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ class _Span(Hashable, Protocol):
@property
def start(self) -> int:
"""A 0-based start position."""
return NotImplemented

@property
def end(self) -> int:
"""A 0-based open-ended position."""
return NotImplemented


@runtime_checkable
Expand All @@ -32,6 +34,7 @@ class _GenomicSpanWithChrom(_Span, Protocol):
@property
def chrom(self) -> str:
"""A reference sequence name."""
return NotImplemented


@runtime_checkable
Expand All @@ -41,6 +44,7 @@ class _GenomicSpanWithContig(_Span, Protocol):
@property
def contig(self) -> str:
"""A reference sequence name."""
return NotImplemented


@runtime_checkable
Expand All @@ -50,6 +54,7 @@ class _GenomicSpanWithRefName(_Span, Protocol):
@property
def refname(self) -> str:
"""A reference sequence name."""
return NotImplemented


GenomicSpanLike = TypeVar(
Expand Down
73 changes: 26 additions & 47 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3136331

Please sign in to comment.