From 464f49a989704e1f11ed3fbd2e147306c3a32188 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 2 Sep 2024 11:15:29 +0200 Subject: [PATCH] Add `__binsparse_descriptor__` and `__binsparse_dlpack__`. --- .../numba_backend/_compressed/compressed.py | 58 +++++++++++++++++++ sparse/numba_backend/_coo/core.py | 38 ++++++++++++ sparse/numba_backend/_dok.py | 6 ++ sparse/numba_backend/_sparse_array.py | 25 ++++++++ 4 files changed, 127 insertions(+) diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 85bce095..04b9f2ed 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -844,6 +844,15 @@ def isinf(self): def isnan(self): return self.tocoo().isnan().asformat("gcxs", compressed_axes=self.compressed_axes) + # `GCXS` is a reshaped/transposed `CSR`, but it can't (usually) + # be expressed in the `binsparse` 0.1 language. + # We are missing index maps. + def __binsparse_descriptor__(self) -> dict: + return super().__binsparse_descriptor__() + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + return super().__binsparse_dlpack__() + class _Compressed2d(GCXS): class_compressed_axes: tuple[int] @@ -883,6 +892,34 @@ def from_numpy(cls, x, fill_value=0, idx_dtype=None): coo = COO.from_numpy(x, fill_value=fill_value, idx_dtype=idx_dtype) return cls.from_coo(coo, cls.class_compressed_axes, idx_dtype) + def __binsparse_descriptor__(self) -> dict: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[{self.data.dtype.itemsize // (8 * 2)}]" + return { + "binsparse": { + "version": "0.1", + "format": self.format.upper(), + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": str(self.indices.dtype), + "indices_1": str(self.indptr.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + return { + "pointers_to_1": self.indices, + "indices_1": self.indptr, + "values": self.data, + } + class CSR(_Compressed2d): """ @@ -915,6 +952,27 @@ def transpose(self, axes: None = None, copy: bool = False) -> Union["CSC", "CSR" return self return CSC((self.data, self.indices, self.indptr), self.shape[::-1]) + def __binsparse_descriptor__(self) -> dict: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[{self.data.dtype.itemsize // (8 * 2)}]" + return { + "binsparse": { + "version": "0.1", + "format": "CSR", + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": str(self.indices.dtype), + "indices_1": str(self.indptr.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + class CSC(_Compressed2d): """ diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index 2b4b5a82..8165d55a 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -1537,6 +1537,44 @@ def isnan(self): prune=True, ) + def __binsparse_descriptor__(self) -> dict: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[{self.data.dtype.itemsize // (8 * 2)}]" + return { + "binsparse": { + "version": "0.1", + "format": { + "custom": { + "level": { + "level_desc": "sparse", + "rank": self.ndim, + "level": { + "level_desc": "element", + }, + } + } + }, + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": "uint8", + "indices_1": str(self.coords.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + return { + "pointers_to_1": np.array([0, self.nnz], dtype=np.uint8), + "indices_1": self.coords, + "values": self.data, + } + def as_coo(x, shape=None, fill_value=None, idx_dtype=None): """ diff --git a/sparse/numba_backend/_dok.py b/sparse/numba_backend/_dok.py index 9c4e601d..809e926e 100644 --- a/sparse/numba_backend/_dok.py +++ b/sparse/numba_backend/_dok.py @@ -548,6 +548,12 @@ def reshape(self, shape, order="C"): return DOK.from_coo(self.to_coo().reshape(shape)) + def __binsparse_descriptor__(self) -> dict: + raise RuntimeError("`DOK` doesn't support the `__binsparse_descriptor__` protocol.") + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + raise RuntimeError("`DOK` doesn't support the `__binsparse_dlpack__` protocol.") + def to_slice(k): """Convert integer indices to one-element slices for consistency""" diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py index 7f47c6eb..e776a347 100644 --- a/sparse/numba_backend/_sparse_array.py +++ b/sparse/numba_backend/_sparse_array.py @@ -218,6 +218,31 @@ def _str_impl(self, summary): except (ImportError, ValueError): return summary + @abstractmethod + def __binsparse_descriptor__(self) -> dict: + """Return a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) + of this array. + + Returns + ------- + dict + Parsed `binsparse` descriptor. + """ + raise NotImplementedError + + @abstractmethod + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + """A `dict` containing the constituent arrays of this sparse array. The keys are compatible with the + [`binsparse`](https://graphblas.org/binsparse-specification/) scheme, and the values are [`__dlpack__`](https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack__.html) + compatible objects. + + Returns + ------- + dict[str, np.ndarray] + The constituent arrays. + """ + raise NotImplementedError + @abstractmethod def asformat(self, format): """