From 0007dc291b362e7181c54ecc0b09ca1e7e368826 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Tue, 20 Feb 2024 17:55:42 -0600 Subject: [PATCH 1/2] feat: add ragged.io submodule with conversions for CF conventions --- src/ragged/_spec_array_object.py | 2 ++ src/ragged/io/__init__.py | 5 +++ src/ragged/io/cf.py | 57 ++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 src/ragged/io/__init__.py create mode 100644 src/ragged/io/cf.py diff --git a/src/ragged/_spec_array_object.py b/src/ragged/_spec_array_object.py index f099388..e83fe2e 100644 --- a/src/ragged/_spec_array_object.py +++ b/src/ragged/_spec_array_object.py @@ -1236,6 +1236,8 @@ def _unbox(*inputs: array) -> tuple[ak.Array | SupportsDLPack, ...]: msg = f"mixed array types: {types}" raise TypeError(msg) + # FIXME: either complain about mixed devices or cast to a common device + return tuple(x._impl for x in inputs) # pylint: disable=W0212 diff --git a/src/ragged/io/__init__.py b/src/ragged/io/__init__.py new file mode 100644 index 0000000..ae8206c --- /dev/null +++ b/src/ragged/io/__init__.py @@ -0,0 +1,5 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/ragged/blob/main/LICENSE + +from __future__ import annotations + +from .cf import * diff --git a/src/ragged/io/cf.py b/src/ragged/io/cf.py new file mode 100644 index 0000000..b36179f --- /dev/null +++ b/src/ragged/io/cf.py @@ -0,0 +1,57 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/ragged/blob/main/LICENSE + +from __future__ import annotations + +from typing import Tuple + +import awkward as ak +import numpy as np + +from .._spec_array_object import array, _unbox, _box +from .._import import device_namespace + + +def to_cf_contiguous(x: array) -> Tuple[array, array]: + if x.ndim != 2: + raise NotImplementedError + + (y,) = _unbox(x) + + return _box(type(x), ak.flatten(y)), _box(type(x), ak.num(y)) + + +def from_cf_contiguous(content: array, counts: array) -> array: + if content.ndim != 1 or counts.ndim != 1: + raise NotImplementedError + + cont, cnts = _unbox(content, counts) + + return _box(type(content), ak.unflatten(cont, cnts)) + + +def to_cf_indexed(x: array) -> Tuple[array, array]: + if x.ndim != 2: + raise NotImplementedError + + _, ns = device_namespace(x.device) + (y,) = _unbox(x) + + index, _ = ak.broadcast_arrays(ns.arange(len(x), dtype=ns.int64), y) + + return _box(type(x), ak.flatten(y)), _box(type(x), ak.flatten(index)) + + +def from_cf_indexed(content: array, index: array) -> array: + if content.ndim != 1 or index.ndim != 1: + raise NotImplementedError + + _, ns = device_namespace(content.device) + cont, ind = _unbox(content, index) + + counts = ns.zeros(ak.max(ind) + 1, dtype=ns.int64) + ns.add.at(counts, ns.asarray(ind), 1) + + return _box(type(content), ak.unflatten(cont[ns.argsort(ind)], counts)) + + +__all__ = ["to_cf_contiguous", "from_cf_contiguous", "to_cf_indexed", "from_cf_indexed"] From 06182c07eeef94e62dea7fdb464d7867f86e2a67 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Tue, 20 Feb 2024 18:01:07 -0600 Subject: [PATCH 2/2] fixed pre-commit errors --- src/ragged/io/__init__.py | 4 +++- src/ragged/io/cf.py | 11 ++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ragged/io/__init__.py b/src/ragged/io/__init__.py index ae8206c..05b1519 100644 --- a/src/ragged/io/__init__.py +++ b/src/ragged/io/__init__.py @@ -2,4 +2,6 @@ from __future__ import annotations -from .cf import * +from .cf import from_cf_contiguous, from_cf_indexed, to_cf_contiguous, to_cf_indexed + +__all__ = ["to_cf_contiguous", "from_cf_contiguous", "to_cf_indexed", "from_cf_indexed"] diff --git a/src/ragged/io/cf.py b/src/ragged/io/cf.py index b36179f..5c5de3d 100644 --- a/src/ragged/io/cf.py +++ b/src/ragged/io/cf.py @@ -2,16 +2,13 @@ from __future__ import annotations -from typing import Tuple - import awkward as ak -import numpy as np -from .._spec_array_object import array, _unbox, _box from .._import import device_namespace +from .._spec_array_object import _box, _unbox, array -def to_cf_contiguous(x: array) -> Tuple[array, array]: +def to_cf_contiguous(x: array) -> tuple[array, array]: if x.ndim != 2: raise NotImplementedError @@ -29,7 +26,7 @@ def from_cf_contiguous(content: array, counts: array) -> array: return _box(type(content), ak.unflatten(cont, cnts)) -def to_cf_indexed(x: array) -> Tuple[array, array]: +def to_cf_indexed(x: array) -> tuple[array, array]: if x.ndim != 2: raise NotImplementedError @@ -51,7 +48,7 @@ def from_cf_indexed(content: array, index: array) -> array: counts = ns.zeros(ak.max(ind) + 1, dtype=ns.int64) ns.add.at(counts, ns.asarray(ind), 1) - return _box(type(content), ak.unflatten(cont[ns.argsort(ind)], counts)) + return _box(type(content), ak.unflatten(cont[ns.argsort(ind)], counts)) # type: ignore[index] __all__ = ["to_cf_contiguous", "from_cf_contiguous", "to_cf_indexed", "from_cf_indexed"]