diff --git a/src/ragged/__init__.py b/src/ragged/__init__.py index 8377e92..02af4d1 100644 --- a/src/ragged/__init__.py +++ b/src/ragged/__init__.py @@ -130,7 +130,7 @@ nonzero, where, ) -from ._spec_set_functions import ( +from ._spec_set_functions import ( # pylint: disable=R0401 unique_all, unique_counts, unique_inverse, diff --git a/src/ragged/_spec_array_object.py b/src/ragged/_spec_array_object.py index 3e27993..8c76c89 100644 --- a/src/ragged/_spec_array_object.py +++ b/src/ragged/_spec_array_object.py @@ -16,6 +16,7 @@ import numpy as np from awkward.contents import ( Content, + EmptyArray, ListArray, ListOffsetArray, NumpyArray, @@ -44,6 +45,8 @@ def _shape_dtype(layout: Content) -> tuple[Shape, Dtype]: else: shape = (*shape, None) node = node.content + if isinstance(node, EmptyArray): + node = node.to_NumpyArray(dtype=np.float64) if isinstance(node, NumpyArray): shape = shape + node.data.shape[1:] diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index c21f885..259ccd9 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -8,6 +8,11 @@ from collections import namedtuple +import awkward as ak +import numpy as np + +import ragged + from ._spec_array_object import array unique_all_result = namedtuple( # pylint: disable=C0103 @@ -47,8 +52,39 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_all.html """ - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 128") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_all_result( + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 + indices=ragged.array([0]), + inverse_indices=ragged.array([0]), + counts=ragged.array([1]), + ) + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return unique_all_result( + values=ragged.array(np.empty(0, x.dtype)), + indices=ragged.array(np.empty(0, np.int64)), + inverse_indices=ragged.array(np.empty(0, np.int64)), + counts=ragged.array(np.empty(0, np.int64)), + ) + values, indices, inverse_indices, counts = np.unique( + x_flat.layout.data, # pylint: disable=E1101 + return_index=True, + return_inverse=True, + return_counts=True, + equal_nan=False, + ) + return unique_all_result( + values=ragged.array(values), + indices=ragged.array(indices), + inverse_indices=ragged.array(inverse_indices), + counts=ragged.array(counts), + ) + else: + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -77,9 +113,30 @@ def unique_counts(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html """ - - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 129") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_counts_result( + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 + counts=ragged.array([1]), # pylint: disable=W0212 + ) + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return unique_counts_result( + values=ragged.array(np.empty(0, x.dtype)), + counts=ragged.array(np.empty(0, np.int64)), + ) + values, counts = np.unique( + x_flat.layout.data, # pylint: disable=E1101 + return_counts=True, + equal_nan=False, + ) + return unique_counts_result( + values=ragged.array(values), counts=ragged.array(counts) + ) + else: + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) unique_inverse_result = namedtuple( # pylint: disable=C0103 @@ -108,9 +165,32 @@ def unique_inverse(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html """ - - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 130") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_inverse_result( + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 + inverse_indices=ragged.array([0]), + ) + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return unique_inverse_result( + values=ragged.array(np.empty(0, x.dtype)), + inverse_indices=ragged.array(np.empty(0, np.int64)), + ) + values, inverse_indices = np.unique( + x_flat.layout.data, # pylint: disable=E1101 + return_inverse=True, + equal_nan=False, + ) + + return unique_inverse_result( + values=ragged.array(values), + inverse_indices=ragged.array(inverse_indices), + ) + else: + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) def unique_values(x: array, /) -> array: @@ -128,6 +208,15 @@ def unique_values(x: array, /) -> array: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html """ - - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 131") # noqa: EM101 + if isinstance(x, ragged.array): + if x.ndim == 0: + return ragged.array(np.unique(x._impl, equal_nan=False)) # pylint: disable=W0212 + + else: + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 + return ragged.array(np.empty(0, x.dtype)) + return ragged.array(np.unique(x_flat.layout.data, equal_nan=False)) # pylint: disable=E1101 + else: + err = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(err) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 3c06863..3a74cbc 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -6,6 +6,8 @@ from __future__ import annotations +import awkward as ak + import ragged @@ -14,3 +16,231 @@ def test_existence(): assert ragged.unique_counts is not None assert ragged.unique_inverse is not None assert ragged.unique_values is not None + + +# unique_values tests +def test_can_take_list(): + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_moredimensions(): + arr = ragged.array([[1, 2, 2, 3, 4], [5, 6]]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + + +def test_can_take_1d_array(): + arr = ragged.array([5, 6, 7, 8, 8, 9, 1, 2, 3, 4, 10, 0, 15, 2]) + expected_unique_values = ragged.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15]) + assert ak.to_list(ragged.unique_values(arr)) == ak.to_list(expected_unique_values) + + +def test_can_take_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + unique_values = ragged.unique_values(arr) + assert unique_values == expected_unique_values + + +def test_can_take_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + unique_values = ragged.unique_values(arr) + assert unique_values == expected_unique_values + + +# unique_counts tests +def test_can_count_list(): + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_unique_counts = ragged.array([1, 1, 1, 2, 1, 1, 1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_count_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_counts = ragged.array([]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + assert ak.to_list(expected_counts) == ak.to_list(unique_counts) + + +def test_can_count_simple_array(): + arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + + +def test_can_count_normal_array(): + arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + + +def test_can_count_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + expected_counts = ragged.array([1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert unique_values == expected_unique_values + assert unique_counts == expected_counts + + +def test_can_count_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_counts = ragged.array([1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert unique_values == expected_unique_values + assert unique_counts == expected_counts + + +# unique_inverse tests +def test_can_inverse_list(): + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_inverse_indices = ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_inverse_indices = ragged.array([]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_simple_array(): + arr = ragged.array([[1, 2, 2], [3, 3, 3], [4, 4, 4, 4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_normal_array(): + arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + expected_inverse_indices = ragged.array([0]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert unique_values == expected_unique_values + assert inverse_indices == expected_inverse_indices + + +def test_can_inverse_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_inverse_indices = ragged.array([0]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert unique_values == expected_unique_values + assert inverse_indices == expected_inverse_indices + + +# unique_all tests +def test_can_all_list(): + arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_unique_indices = ragged.array([0, 1, 3, 6]) + expected_unique_inverse = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + expected_unique_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_unique_indices = ragged.array([]) + expected_unique_inverse = ragged.array([]) + expected_unique_counts = ragged.array([]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_normal_array(): + arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([2, 3, 4, 5]) + expected_unique_indices = ragged.array([0, 3, 6, 5]) + expected_unique_inverse = ragged.array([0, 0, 0, 1, 1, 3, 2, 2, 2, 2]) + expected_unique_counts = ragged.array([3, 2, 4, 1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + +def test_can_all_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + expected_unique_indices = ragged.array([0]) + expected_unique_inverse = ragged.array([0]) + expected_unique_counts = ragged.array([1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert unique_values == expected_unique_values + assert unique_indices == expected_unique_indices + assert unique_inverse == expected_unique_inverse + assert unique_counts == expected_unique_counts + + +def test_can_all_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_unique_indices = ragged.array([0]) + expected_unique_inverse = ragged.array([0]) + expected_unique_counts = ragged.array([1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert unique_values == expected_unique_values + assert unique_indices == expected_unique_indices + assert unique_inverse == expected_unique_inverse + assert unique_counts == expected_unique_counts