From 9225efb61bb61098612f74ba95af2fa155c00b9a Mon Sep 17 00:00:00 2001 From: Oleksii Hrechykha Date: Tue, 30 Jul 2024 18:32:25 +0300 Subject: [PATCH 01/19] adding set functions and tests --- src/ragged/_spec_set_functions.py | 54 +++++++++-- tests/test_spec_set_functions.py | 145 ++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+), 9 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index c21f885..ed72c61 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -8,6 +8,11 @@ from collections import namedtuple +import awkward as ak +import numpy as np + +import ragged + from ._spec_array_object import array unique_all_result = namedtuple( # pylint: disable=C0103 @@ -47,8 +52,16 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_all.html """ - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 128") # noqa: EM101 + if not isinstance(x, ragged.array): + err = f"Expected ragged type but got {type(x)}" + raise TypeError(err) + + if len(x)==1: + return ragged.array(x), ragged.array([0]), ragged.array([0]), ragged.array([1]) + + x_flat=ak.ravel(x._impl) + values, indices, inverse_indices, counts = np.unique(x_flat.layout.data, return_index=True, return_inverse=True, return_counts=True) + return ragged.array(values),ragged.array(indices), ragged.array(inverse_indices), ragged.array(counts) unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -77,9 +90,16 @@ def unique_counts(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html """ + if not isinstance(x, ragged.array): + err = f"Expected ragged type but got {type(x)}" + raise TypeError(err) - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 129") # noqa: EM101 + if len(x)==1: + return ragged.array(x), ragged.array([1]) + + x_flat = ak.ravel(x._impl) + values, counts = np.unique(x_flat.layout.data, return_counts=True) + return ragged.array(values), ragged.array(counts) unique_inverse_result = namedtuple( # pylint: disable=C0103 @@ -108,9 +128,17 @@ def unique_inverse(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html """ + if not isinstance(x, ragged.array): + err = f"Expected ragged type but got {type(x)}" + raise TypeError(err) + + if len(x) == 1: + return ragged.array(x), ragged.array([0]) - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 130") # noqa: EM101 + x_flat=ak.ravel(x._impl) + values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) + + return ragged.array(values), ragged.array(inverse_indices) def unique_values(x: array, /) -> array: @@ -128,6 +156,14 @@ def unique_values(x: array, /) -> array: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html """ - - x # noqa: B018, pylint: disable=W0104 - raise NotImplementedError("TODO 131") # noqa: EM101 + if not isinstance(x, ragged.array): + err = f"Expected ragged type but got {type(x)}" + raise TypeError(err) + + if len(x)==1: + return ragged.array(x) + + x_flat = ak.ravel(x._impl) + values = np.unique(x_flat.layout.data) + + return ragged.array(values) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 3c06863..ac11210 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -7,6 +7,16 @@ from __future__ import annotations import ragged +import awkward as ak +import pytest + +# Specific algorithm for unique_values: +# 1 take an input array +# 2 flatten input_array unless its 1d +# 3 {remember the first element, loop through the rest of the list to see if there are copies +# if yes then discard it and repeat the step +# if not then add it to the output and repeat the step} +# 4 once the cycle is over return an array of unique elements in the input array (the output must be of the same type as input array) def test_existence(): @@ -14,3 +24,138 @@ def test_existence(): assert ragged.unique_counts is not None assert ragged.unique_inverse is not None assert ragged.unique_values is not None + +#unique_values tests +def test_can_take_none(): + assert ragged.unique_values(None)==None + +def test_can_take_list(): + with pytest.raises(TypeError): + assert ragged.unique_values([1,2,4,3,4,5,6,20]) + +def test_can_take_empty_arr(): + with pytest.raises(TypeError): + assert ragged.unique_values(ragged.array([])) + +def test_can_take_moredimensions(): + with pytest.raises(ValueError): + assert ragged.unique_values(ragged.array([[1,2,3,4],[5,6]])) + +def test_can_take_1d_array(): + arr=ragged.array([5,6,7,8,8,9,1,2,3,4,10,0,15,2]) + expected_unique_values = ragged.array([0,1,2,3,4,5,6,7,8,9,10,15]) + assert ak.to_list(ragged.unique_values(arr))==ak.to_list(expected_unique_values) + + +#unique_counts tests +def test_can_count_none(): + with pytest.raises(TypeError): + assert ragged.unique_counts(None) is None + +def test_can_count_list(): + with pytest.raises(TypeError): + assert ragged.unique_counts([1,2,4,3,4,5,6,20]) is None + +def test_can_count_simple_array(): + arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + +def test_can_count_normal_array(): + arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + + +def test_can_count_scalar(): + arr = ragged.array([5]) + expected_unique_values = ragged.array([5]) + expected_counts = ragged.array([1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + +#unique_inverse tests +def test_can_take_none(): + with pytest.raises(TypeError): + assert ragged.unique_inverse(None) is None + +def test_can_take_list(): + with pytest.raises(TypeError): + assert ragged.unique_inverse([1,2,4,3,4,5,6,20]) is None + +def test_can_take_simple_array(): + arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + +def test_can_take_normal_array(): + arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_take_scalar(): + arr = ragged.array([5]) + expected_unique_values = ragged.array([5]) + expected_unique_indices = ragged.array([0]) + unique_values, unique_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + +#unique_all tests +def test_can_all_none(): + with pytest.raises(TypeError): + assert ragged.unique_all(None) is None + +def test_can_all_list(): + with pytest.raises(TypeError): + assert ragged.unique_all([1,2,4,3,4,5,6,20]) is None + +def test_can_all_simple_array(): + arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + expected_unique_values = ragged.array([1, 2, 3, 4]) + expected_unique_indices = ragged.array([0, 1, 3, 6]) + expected_unique_inverse = ragged.array([0, 1,1,2,2,2,3,3,3,3]) + expected_unique_counts = ragged.array([1, 2, 3, 4]) + unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + +def test_can_all_normal_array(): + arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]]) + expected_unique_values = ragged.array([2, 3, 4, 5]) + expected_unique_indices = ragged.array([0, 3, 6, 5]) + expected_unique_inverse = ragged.array([0,0,0,1,1,3,2,2,2,2]) + expected_unique_counts = ragged.array([3, 2, 4, 1]) + unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + +def test_can_all_scalar(): + arr = ragged.array([5]) + expected_unique_values = ragged.array([5]) + expected_unique_indices = ragged.array([0]) + expected_unique_inverse = ragged.array([0]) + expected_unique_counts = ragged.array([1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) \ No newline at end of file From 14c58ace9333c5beef83e7facea8c08eb80bd0c2 Mon Sep 17 00:00:00 2001 From: Oleksii Hrechykha Date: Wed, 31 Jul 2024 17:33:05 +0300 Subject: [PATCH 02/19] pushing pre-commit changes --- src/ragged/_spec_set_functions.py | 35 +++++++++------ tests/test_spec_set_functions.py | 74 ++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 39 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index ed72c61..b1d6c56 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -55,13 +55,20 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: if not isinstance(x, ragged.array): err = f"Expected ragged type but got {type(x)}" raise TypeError(err) - - if len(x)==1: + + if len(x) == 1: return ragged.array(x), ragged.array([0]), ragged.array([0]), ragged.array([1]) - - x_flat=ak.ravel(x._impl) - values, indices, inverse_indices, counts = np.unique(x_flat.layout.data, return_index=True, return_inverse=True, return_counts=True) - return ragged.array(values),ragged.array(indices), ragged.array(inverse_indices), ragged.array(counts) + + x_flat = ak.ravel(x._impl) + values, indices, inverse_indices, counts = np.unique( + x_flat.layout.data, return_index=True, return_inverse=True, return_counts=True + ) + return ( + ragged.array(values), + ragged.array(indices), + ragged.array(inverse_indices), + ragged.array(counts), + ) unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -94,7 +101,7 @@ def unique_counts(x: array, /) -> tuple[array, array]: err = f"Expected ragged type but got {type(x)}" raise TypeError(err) - if len(x)==1: + if len(x) == 1: return ragged.array(x), ragged.array([1]) x_flat = ak.ravel(x._impl) @@ -131,13 +138,13 @@ def unique_inverse(x: array, /) -> tuple[array, array]: if not isinstance(x, ragged.array): err = f"Expected ragged type but got {type(x)}" raise TypeError(err) - + if len(x) == 1: - return ragged.array(x), ragged.array([0]) + return ragged.array(x), ragged.array([0]) - x_flat=ak.ravel(x._impl) + x_flat = ak.ravel(x._impl) values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) - + return ragged.array(values), ragged.array(inverse_indices) @@ -159,10 +166,10 @@ def unique_values(x: array, /) -> array: if not isinstance(x, ragged.array): err = f"Expected ragged type but got {type(x)}" raise TypeError(err) - - if len(x)==1: + + if len(x) == 1: return ragged.array(x) - + x_flat = ak.ravel(x._impl) values = np.unique(x_flat.layout.data) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index ac11210..fb53ee7 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -6,10 +6,11 @@ from __future__ import annotations -import ragged import awkward as ak import pytest +import ragged + # Specific algorithm for unique_values: # 1 take an input array # 2 flatten input_array unless its 1d @@ -25,36 +26,43 @@ def test_existence(): assert ragged.unique_inverse is not None assert ragged.unique_values is not None -#unique_values tests + +# unique_values tests def test_can_take_none(): - assert ragged.unique_values(None)==None + assert ragged.unique_values(None) == None + def test_can_take_list(): with pytest.raises(TypeError): - assert ragged.unique_values([1,2,4,3,4,5,6,20]) + assert ragged.unique_values([1, 2, 4, 3, 4, 5, 6, 20]) + def test_can_take_empty_arr(): with pytest.raises(TypeError): assert ragged.unique_values(ragged.array([])) + def test_can_take_moredimensions(): with pytest.raises(ValueError): - assert ragged.unique_values(ragged.array([[1,2,3,4],[5,6]])) + assert ragged.unique_values(ragged.array([[1, 2, 3, 4], [5, 6]])) + def test_can_take_1d_array(): - arr=ragged.array([5,6,7,8,8,9,1,2,3,4,10,0,15,2]) - expected_unique_values = ragged.array([0,1,2,3,4,5,6,7,8,9,10,15]) - assert ak.to_list(ragged.unique_values(arr))==ak.to_list(expected_unique_values) + arr = ragged.array([5, 6, 7, 8, 8, 9, 1, 2, 3, 4, 10, 0, 15, 2]) + expected_unique_values = ragged.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15]) + assert ak.to_list(ragged.unique_values(arr)) == ak.to_list(expected_unique_values) -#unique_counts tests +# unique_counts tests def test_can_count_none(): with pytest.raises(TypeError): assert ragged.unique_counts(None) is None + def test_can_count_list(): with pytest.raises(TypeError): - assert ragged.unique_counts([1,2,4,3,4,5,6,20]) is None + assert ragged.unique_counts([1, 2, 4, 3, 4, 5, 6, 20]) is None + def test_can_count_simple_array(): arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) @@ -63,7 +71,8 @@ def test_can_count_simple_array(): unique_values, unique_counts = ragged.unique_counts(arr) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_counts) == ak.to_list(expected_counts) - + + def test_can_count_normal_array(): arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) expected_unique_values = ragged.array([1, 2, 3, 4]) @@ -71,7 +80,7 @@ def test_can_count_normal_array(): unique_values, unique_counts = ragged.unique_counts(arr) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_counts) == ak.to_list(expected_counts) - + def test_can_count_scalar(): arr = ragged.array([5]) @@ -81,14 +90,17 @@ def test_can_count_scalar(): assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_counts) == ak.to_list(expected_counts) -#unique_inverse tests + +# unique_inverse tests def test_can_take_none(): with pytest.raises(TypeError): assert ragged.unique_inverse(None) is None + def test_can_take_list(): with pytest.raises(TypeError): - assert ragged.unique_inverse([1,2,4,3,4,5,6,20]) is None + assert ragged.unique_inverse([1, 2, 4, 3, 4, 5, 6, 20]) is None + def test_can_take_simple_array(): arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) @@ -97,7 +109,8 @@ def test_can_take_simple_array(): unique_values, inverse_indices = ragged.unique_inverse(arr) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) - + + def test_can_take_normal_array(): arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) expected_unique_values = ragged.array([1, 2, 3, 4]) @@ -105,7 +118,7 @@ def test_can_take_normal_array(): unique_values, inverse_indices = ragged.unique_inverse(arr) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) - + def test_can_take_scalar(): arr = ragged.array([5]) @@ -115,47 +128,58 @@ def test_can_take_scalar(): assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) -#unique_all tests + +# unique_all tests def test_can_all_none(): with pytest.raises(TypeError): assert ragged.unique_all(None) is None + def test_can_all_list(): with pytest.raises(TypeError): - assert ragged.unique_all([1,2,4,3,4,5,6,20]) is None + assert ragged.unique_all([1, 2, 4, 3, 4, 5, 6, 20]) is None + def test_can_all_simple_array(): arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) expected_unique_values = ragged.array([1, 2, 3, 4]) expected_unique_indices = ragged.array([0, 1, 3, 6]) - expected_unique_inverse = ragged.array([0, 1,1,2,2,2,3,3,3,3]) + expected_unique_inverse = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) expected_unique_counts = ragged.array([1, 2, 3, 4]) - unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) - + + def test_can_all_normal_array(): arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]]) expected_unique_values = ragged.array([2, 3, 4, 5]) expected_unique_indices = ragged.array([0, 3, 6, 5]) - expected_unique_inverse = ragged.array([0,0,0,1,1,3,2,2,2,2]) + expected_unique_inverse = ragged.array([0, 0, 0, 1, 1, 3, 2, 2, 2, 2]) expected_unique_counts = ragged.array([3, 2, 4, 1]) - unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + def test_can_all_scalar(): arr = ragged.array([5]) expected_unique_values = ragged.array([5]) expected_unique_indices = ragged.array([0]) expected_unique_inverse = ragged.array([0]) expected_unique_counts = ragged.array([1]) - unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(arr) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) - assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) \ No newline at end of file + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) From 24ddbfd1768f06016602105cd9a3c406987d63e6 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Mon, 5 Aug 2024 11:29:16 +0300 Subject: [PATCH 03/19] ruff fixes for test_spec_set_functions.py --- tests/test_spec_set_functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index fb53ee7..8a3390c 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -29,7 +29,8 @@ def test_existence(): # unique_values tests def test_can_take_none(): - assert ragged.unique_values(None) == None + with pytest.raises(TypeError): + assert ragged.unique_values(None) is None def test_can_take_list(): From 22fe8b9fcdfdf43f7d090d9a9466ee00d7c34b68 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Mon, 5 Aug 2024 11:58:11 +0300 Subject: [PATCH 04/19] further fixes in test_spec_set_functions.py --- tests/test_spec_set_functions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 8a3390c..55efbb7 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -10,7 +10,7 @@ import pytest import ragged - +import re # Specific algorithm for unique_values: # 1 take an input array # 2 flatten input_array unless its 1d @@ -29,12 +29,12 @@ def test_existence(): # unique_values tests def test_can_take_none(): - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=f"Expected ragged type but got {type(None)}"): assert ragged.unique_values(None) is None def test_can_take_list(): - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=f"Expected ragged type but got "): assert ragged.unique_values([1, 2, 4, 3, 4, 5, 6, 20]) @@ -44,7 +44,7 @@ def test_can_take_empty_arr(): def test_can_take_moredimensions(): - with pytest.raises(ValueError): + with pytest.raises(ValueError,match=re.escape("the truth value of an array whose length is not 1 is ambiguous; use ak.any() or ak.all()")): assert ragged.unique_values(ragged.array([[1, 2, 3, 4], [5, 6]])) @@ -93,12 +93,12 @@ def test_can_count_scalar(): # unique_inverse tests -def test_can_take_none(): +def test_can_inverse_none(): with pytest.raises(TypeError): assert ragged.unique_inverse(None) is None -def test_can_take_list(): +def test_can_inverse_list(): with pytest.raises(TypeError): assert ragged.unique_inverse([1, 2, 4, 3, 4, 5, 6, 20]) is None From 91533576c385c65100138a772c44f6b5d7f21c55 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Mon, 5 Aug 2024 12:57:48 +0300 Subject: [PATCH 05/19] fixing mypy unreachable errors in _spec_set_functions.py --- src/ragged/_spec_set_functions.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index b1d6c56..3cef699 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -53,8 +53,7 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: """ if not isinstance(x, ragged.array): - err = f"Expected ragged type but got {type(x)}" - raise TypeError(err) + raise TypeError(f"Expected ragged type but got {type(x)}") if len(x) == 1: return ragged.array(x), ragged.array([0]), ragged.array([0]), ragged.array([1]) @@ -98,8 +97,7 @@ def unique_counts(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html """ if not isinstance(x, ragged.array): - err = f"Expected ragged type but got {type(x)}" - raise TypeError(err) + raise TypeError(f"Expected ragged type but got {type(x)}") if len(x) == 1: return ragged.array(x), ragged.array([1]) @@ -136,8 +134,7 @@ def unique_inverse(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html """ if not isinstance(x, ragged.array): - err = f"Expected ragged type but got {type(x)}" - raise TypeError(err) + raise TypeError(f"Expected ragged type but got {type(x)}") if len(x) == 1: return ragged.array(x), ragged.array([0]) @@ -164,8 +161,7 @@ def unique_values(x: array, /) -> array: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html """ if not isinstance(x, ragged.array): - err = f"Expected ragged type but got {type(x)}" - raise TypeError(err) + raise TypeError(f"Expected ragged type but got {type(x)}") if len(x) == 1: return ragged.array(x) From 6c814a7f7bb2cc5c92848b9387ca16bab48b5294 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Mon, 5 Aug 2024 18:52:39 +0300 Subject: [PATCH 06/19] marking tests with None and empty arrays as comments --- tests/test_spec_set_functions.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 55efbb7..0e1eb41 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -28,9 +28,9 @@ def test_existence(): # unique_values tests -def test_can_take_none(): - with pytest.raises(TypeError, match=f"Expected ragged type but got {type(None)}"): - assert ragged.unique_values(None) is None +#def test_can_take_none(): +# with pytest.raises(TypeError, match=f"Expected ragged type but got {type(None)}"): +# assert ragged.unique_values(None) is None def test_can_take_list(): @@ -38,9 +38,9 @@ def test_can_take_list(): assert ragged.unique_values([1, 2, 4, 3, 4, 5, 6, 20]) -def test_can_take_empty_arr(): - with pytest.raises(TypeError): - assert ragged.unique_values(ragged.array([])) +#def test_can_take_empty_arr(): +# with pytest.raises(TypeError): +# assert ragged.unique_values(ragged.array([])) def test_can_take_moredimensions(): @@ -55,9 +55,9 @@ def test_can_take_1d_array(): # unique_counts tests -def test_can_count_none(): - with pytest.raises(TypeError): - assert ragged.unique_counts(None) is None +#def test_can_count_none(): +# with pytest.raises(TypeError): +# assert ragged.unique_counts(None) is None def test_can_count_list(): @@ -93,9 +93,9 @@ def test_can_count_scalar(): # unique_inverse tests -def test_can_inverse_none(): - with pytest.raises(TypeError): - assert ragged.unique_inverse(None) is None +#def test_can_inverse_none(): +# with pytest.raises(TypeError): +# assert ragged.unique_inverse(None) is None def test_can_inverse_list(): @@ -104,7 +104,7 @@ def test_can_inverse_list(): def test_can_take_simple_array(): - arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + arr = ragged.array([[1, 2, 2], [3, 3, 3], [4, 4, 4, 4]]) expected_unique_values = ragged.array([1, 2, 3, 4]) expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) unique_values, inverse_indices = ragged.unique_inverse(arr) @@ -131,9 +131,9 @@ def test_can_take_scalar(): # unique_all tests -def test_can_all_none(): - with pytest.raises(TypeError): - assert ragged.unique_all(None) is None +#def test_can_all_none(): +# with pytest.raises(TypeError): +# assert ragged.unique_all(None) is None def test_can_all_list(): From 83dabd9e623d3da110e417545ae859c66c7c2600 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Mon, 19 Aug 2024 16:21:13 +0300 Subject: [PATCH 07/19] adding namedtuple & corresponding test fixes --- src/ragged/_spec_set_functions.py | 120 +++++++++++++++++++----------- tests/test_spec_set_functions.py | 86 +++++++++++++-------- 2 files changed, 129 insertions(+), 77 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 3cef699..9094386 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -52,22 +52,31 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_all.html """ - if not isinstance(x, ragged.array): - raise TypeError(f"Expected ragged type but got {type(x)}") - - if len(x) == 1: - return ragged.array(x), ragged.array([0]), ragged.array([0]), ragged.array([1]) - - x_flat = ak.ravel(x._impl) - values, indices, inverse_indices, counts = np.unique( - x_flat.layout.data, return_index=True, return_inverse=True, return_counts=True - ) - return ( - ragged.array(values), - ragged.array(indices), - ragged.array(inverse_indices), - ragged.array(counts), - ) + if isinstance(x, ragged.array): + if len(x) == 1: + return unique_all_result( + values=ragged.array(x), + indices=ragged.array([0]), + inverse_indices=ragged.array([0]), + counts=ragged.array([1]), + ) + else: + x_flat = ak.ravel(x._impl) + values, indices, inverse_indices, counts = np.unique( + x_flat.layout.data, + return_index=True, + return_inverse=True, + return_counts=True, + ) + return unique_all_result( + values=ragged.array(values), + indices=ragged.array(indices), + inverse_indices=ragged.array(inverse_indices), + counts=ragged.array(counts), + ) + else: + msg = f"Expected ragged type but got {type(x)}" + raise TypeError(msg) unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -96,15 +105,24 @@ def unique_counts(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html """ - if not isinstance(x, ragged.array): - raise TypeError(f"Expected ragged type but got {type(x)}") - - if len(x) == 1: - return ragged.array(x), ragged.array([1]) - - x_flat = ak.ravel(x._impl) - values, counts = np.unique(x_flat.layout.data, return_counts=True) - return ragged.array(values), ragged.array(counts) + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_counts_result( + values=ragged.array([x]), counts=ragged.array([1]) + ) + elif len(x) == 1: + return unique_counts_result( + values=ragged.array(x), counts=ragged.array([1]) + ) + else: + x_flat = ak.ravel(x._impl) + values, counts = np.unique(x_flat.layout.data, return_counts=True) + return unique_counts_result( + values=ragged.array(values), counts=ragged.array(counts) + ) + else: + msg = f"Expected ragged type but got {type(x)}" + raise TypeError(msg) unique_inverse_result = namedtuple( # pylint: disable=C0103 @@ -133,16 +151,26 @@ def unique_inverse(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html """ - if not isinstance(x, ragged.array): - raise TypeError(f"Expected ragged type but got {type(x)}") - - if len(x) == 1: - return ragged.array(x), ragged.array([0]) - - x_flat = ak.ravel(x._impl) - values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) - - return ragged.array(values), ragged.array(inverse_indices) + if isinstance(x, ragged.array): + if x.ndim == 0: + return unique_inverse_result( + values=ragged.array([x]), inverse_indices=ragged.array([0]) + ) + elif len(x) == 1: + return unique_inverse_result( + values=ragged.array(x), inverse_indices=ragged.array([0]) + ) + else: + x_flat = ak.ravel(x._impl) + values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) + + return unique_inverse_result( + values=ragged.array(values), + inverse_indices=ragged.array(inverse_indices), + ) + else: + msg = f"Expected ragged type but got {type(x)}" + raise TypeError(msg) def unique_values(x: array, /) -> array: @@ -160,13 +188,15 @@ def unique_values(x: array, /) -> array: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html """ - if not isinstance(x, ragged.array): - raise TypeError(f"Expected ragged type but got {type(x)}") - - if len(x) == 1: - return ragged.array(x) - - x_flat = ak.ravel(x._impl) - values = np.unique(x_flat.layout.data) - - return ragged.array(values) + if isinstance(x, ragged.array): + if x.ndim == 0: + return ragged.array([x]) + + if len(x) == 1: + return ragged.array(x) + else: + x_flat = ak.ravel(x._impl) + return ragged.array(np.unique(x_flat.layout.data)) + else: + err = f"Expected ragged type but got {type(x)}" + raise TypeError(err) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 0e1eb41..1786895 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -6,18 +6,12 @@ from __future__ import annotations +import re + import awkward as ak import pytest import ragged -import re -# Specific algorithm for unique_values: -# 1 take an input array -# 2 flatten input_array unless its 1d -# 3 {remember the first element, loop through the rest of the list to see if there are copies -# if yes then discard it and repeat the step -# if not then add it to the output and repeat the step} -# 4 once the cycle is over return an array of unique elements in the input array (the output must be of the same type as input array) def test_existence(): @@ -28,23 +22,35 @@ def test_existence(): # unique_values tests -#def test_can_take_none(): -# with pytest.raises(TypeError, match=f"Expected ragged type but got {type(None)}"): -# assert ragged.unique_values(None) is None +def test_can_take_none(): + with pytest.raises(TypeError): + assert ragged.unique_values(ragged.array(None)) is None def test_can_take_list(): - with pytest.raises(TypeError, match=f"Expected ragged type but got "): - assert ragged.unique_values([1, 2, 4, 3, 4, 5, 6, 20]) - - -#def test_can_take_empty_arr(): -# with pytest.raises(TypeError): -# assert ragged.unique_values(ragged.array([])) + with pytest.raises( + ValueError, + match=re.escape( + "the truth value of an array whose length is not 1 is ambiguous;" + ), + ): + assert ragged.unique_values( + ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + ) == ragged.array([1, 2, 3, 4, 5, 6, 20]) + + +def test_can_take_empty_arr(): + with pytest.raises(TypeError): + assert ragged.unique_values(ragged.array([])) == ragged.array([]) def test_can_take_moredimensions(): - with pytest.raises(ValueError,match=re.escape("the truth value of an array whose length is not 1 is ambiguous; use ak.any() or ak.all()")): + with pytest.raises( + ValueError, + match=re.escape( + "the truth value of an array whose length is not 1 is ambiguous;" + ), + ): assert ragged.unique_values(ragged.array([[1, 2, 3, 4], [5, 6]])) @@ -55,14 +61,16 @@ def test_can_take_1d_array(): # unique_counts tests -#def test_can_count_none(): -# with pytest.raises(TypeError): -# assert ragged.unique_counts(None) is None +def test_can_count_none(): + with pytest.raises(TypeError): + assert ragged.unique_counts(ragged.array(None)) is None def test_can_count_list(): with pytest.raises(TypeError): - assert ragged.unique_counts([1, 2, 4, 3, 4, 5, 6, 20]) is None + assert ragged.unique_counts( + ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + ) == ragged.array([1, 2, 3, 4, 5, 6, 20]), ragged.array([1, 1, 2, 1, 1, 1, 1]) def test_can_count_simple_array(): @@ -93,14 +101,18 @@ def test_can_count_scalar(): # unique_inverse tests -#def test_can_inverse_none(): -# with pytest.raises(TypeError): -# assert ragged.unique_inverse(None) is None +def test_can_inverse_none(): + with pytest.raises(TypeError): + assert ragged.unique_inverse(ragged.array(None)) is None def test_can_inverse_list(): with pytest.raises(TypeError): - assert ragged.unique_inverse([1, 2, 4, 3, 4, 5, 6, 20]) is None + assert ragged.unique_inverse( + ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + ) == ragged.array([1, 2, 3, 4, 5, 6, 20]), ragged.array( + [0, 1, 3, 2, 3, 4, 5, 6] + ) def test_can_take_simple_array(): @@ -131,14 +143,24 @@ def test_can_take_scalar(): # unique_all tests -#def test_can_all_none(): -# with pytest.raises(TypeError): -# assert ragged.unique_all(None) is None +def test_can_all_none(): + with pytest.raises(TypeError): + assert ragged.unique_all(ragged.array(None)) is None def test_can_all_list(): - with pytest.raises(TypeError): - assert ragged.unique_all([1, 2, 4, 3, 4, 5, 6, 20]) is None + with pytest.raises( + ValueError, + match=re.escape( + "the truth value of an array whose length is not 1 is ambiguous;" + ), + ): + assert ragged.unique_all(ragged.array([1, 2, 4, 3, 4, 5, 6, 20])) == ( + ragged.array([1, 2, 3, 4, 5, 6, 20]), + ragged.array([0, 1, 3, 2, 5, 6, 7]), + ragged.array([0, 1, 3, 2, 3, 4, 5, 6]), + ragged.array([1, 1, 1, 2, 1, 1, 1]), + ) def test_can_all_simple_array(): From 1ca91060f1d794452079c0fa815619a205f1cc91 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Mon, 26 Aug 2024 14:41:20 +0300 Subject: [PATCH 08/19] function if changes + test standartization --- src/ragged/_spec_set_functions.py | 35 ++++------ tests/test_spec_set_functions.py | 108 +++++++++++++----------------- 2 files changed, 61 insertions(+), 82 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 9094386..e8df841 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -53,9 +53,9 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: """ if isinstance(x, ragged.array): - if len(x) == 1: + if x.ndim == 0: return unique_all_result( - values=ragged.array(x), + values=ragged.array([x]), indices=ragged.array([0]), inverse_indices=ragged.array([0]), counts=ragged.array([1]), @@ -67,6 +67,7 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: return_index=True, return_inverse=True, return_counts=True, + equal_nan=False, ) return unique_all_result( values=ragged.array(values), @@ -75,8 +76,8 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: counts=ragged.array(counts), ) else: - msg = f"Expected ragged type but got {type(x)}" - raise TypeError(msg) + msg = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(msg) # type: ignore unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -110,10 +111,6 @@ def unique_counts(x: array, /) -> tuple[array, array]: return unique_counts_result( values=ragged.array([x]), counts=ragged.array([1]) ) - elif len(x) == 1: - return unique_counts_result( - values=ragged.array(x), counts=ragged.array([1]) - ) else: x_flat = ak.ravel(x._impl) values, counts = np.unique(x_flat.layout.data, return_counts=True) @@ -121,8 +118,8 @@ def unique_counts(x: array, /) -> tuple[array, array]: values=ragged.array(values), counts=ragged.array(counts) ) else: - msg = f"Expected ragged type but got {type(x)}" - raise TypeError(msg) + msg = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(msg) # type: ignore unique_inverse_result = namedtuple( # pylint: disable=C0103 @@ -152,13 +149,9 @@ def unique_inverse(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html """ if isinstance(x, ragged.array): - if x.ndim == 0: - return unique_inverse_result( - values=ragged.array([x]), inverse_indices=ragged.array([0]) - ) - elif len(x) == 1: + if ak.is_scalar(x): return unique_inverse_result( - values=ragged.array(x), inverse_indices=ragged.array([0]) + values=x, inverse_indices=ragged.array([0]) ) else: x_flat = ak.ravel(x._impl) @@ -169,8 +162,8 @@ def unique_inverse(x: array, /) -> tuple[array, array]: inverse_indices=ragged.array(inverse_indices), ) else: - msg = f"Expected ragged type but got {type(x)}" - raise TypeError(msg) + msg = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(msg) # type: ignore def unique_values(x: array, /) -> array: @@ -192,11 +185,9 @@ def unique_values(x: array, /) -> array: if x.ndim == 0: return ragged.array([x]) - if len(x) == 1: - return ragged.array(x) else: x_flat = ak.ravel(x._impl) return ragged.array(np.unique(x_flat.layout.data)) else: - err = f"Expected ragged type but got {type(x)}" - raise TypeError(err) + err = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(err) # type: ignore diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 1786895..60026e1 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -6,8 +6,6 @@ from __future__ import annotations -import re - import awkward as ak import pytest @@ -22,36 +20,28 @@ def test_existence(): # unique_values tests -def test_can_take_none(): - with pytest.raises(TypeError): - assert ragged.unique_values(ragged.array(None)) is None +# def test_can_take_none(): +# with pytest.raises(TypeError): +# assert ragged.unique_values(ragged.array(None)) is None def test_can_take_list(): - with pytest.raises( - ValueError, - match=re.escape( - "the truth value of an array whose length is not 1 is ambiguous;" - ), - ): - assert ragged.unique_values( - ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) - ) == ragged.array([1, 2, 3, 4, 5, 6, 20]) + assert ak.to_list( + ragged.unique_values(ragged.array([1, 2, 4, 3, 4, 5, 6, 20])) + ) == ak.to_list(ragged.array([1, 2, 3, 4, 5, 6, 20])) def test_can_take_empty_arr(): - with pytest.raises(TypeError): - assert ragged.unique_values(ragged.array([])) == ragged.array([]) + # with pytest.raises(TypeError): + assert ak.to_list(ragged.unique_values(ragged.array([]))) == ak.to_list( + ragged.array([]) + ) def test_can_take_moredimensions(): - with pytest.raises( - ValueError, - match=re.escape( - "the truth value of an array whose length is not 1 is ambiguous;" - ), - ): - assert ragged.unique_values(ragged.array([[1, 2, 3, 4], [5, 6]])) + assert ak.to_list( + ragged.unique_values(ragged.array([[1, 2, 2, 3, 4], [5, 6]])) + ) == ak.to_list(ragged.array([1, 2, 3, 4, 5, 6])) def test_can_take_1d_array(): @@ -61,16 +51,18 @@ def test_can_take_1d_array(): # unique_counts tests -def test_can_count_none(): - with pytest.raises(TypeError): - assert ragged.unique_counts(ragged.array(None)) is None +# def test_can_count_none(): +# with pytest.raises(TypeError): +# assert ragged.unique_counts(ragged.array(None)) is None def test_can_count_list(): - with pytest.raises(TypeError): - assert ragged.unique_counts( - ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) - ) == ragged.array([1, 2, 3, 4, 5, 6, 20]), ragged.array([1, 1, 2, 1, 1, 1, 1]) + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_unique_counts = ragged.array([1, 1, 1, 2, 1, 1, 1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) def test_can_count_simple_array(): @@ -101,21 +93,22 @@ def test_can_count_scalar(): # unique_inverse tests -def test_can_inverse_none(): - with pytest.raises(TypeError): - assert ragged.unique_inverse(ragged.array(None)) is None +# def test_can_inverse_none(): +# with pytest.raises(TypeError): +# assert ak.to_list(ragged.unique_inverse(ragged.array(None))) is ak.to_list(None) def test_can_inverse_list(): - with pytest.raises(TypeError): - assert ragged.unique_inverse( - ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) - ) == ragged.array([1, 2, 3, 4, 5, 6, 20]), ragged.array( - [0, 1, 3, 2, 3, 4, 5, 6] - ) + arr=ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_values=ragged.array([1,2,3,4,5,6,20]) + expected_inverse=ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) + values, inverse= ragged.unique_inverse(arr) + assert ak.to_list(expected_values)==ak.to_list(values) + assert ak.to_list(expected_inverse)==ak.to_list(inverse) + -def test_can_take_simple_array(): +def test_can_inverse_simple_array(): arr = ragged.array([[1, 2, 2], [3, 3, 3], [4, 4, 4, 4]]) expected_unique_values = ragged.array([1, 2, 3, 4]) expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) @@ -124,7 +117,7 @@ def test_can_take_simple_array(): assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) -def test_can_take_normal_array(): +def test_can_inverse_normal_array(): arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]]) expected_unique_values = ragged.array([1, 2, 3, 4]) expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]) @@ -133,9 +126,9 @@ def test_can_take_normal_array(): assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) -def test_can_take_scalar(): - arr = ragged.array([5]) - expected_unique_values = ragged.array([5]) +def test_can_inverse_scalar(): + arr = ragged.array(5) + expected_unique_values = 5 expected_unique_indices = ragged.array([0]) unique_values, unique_indices = ragged.unique_inverse(arr) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) @@ -145,25 +138,20 @@ def test_can_take_scalar(): # unique_all tests def test_can_all_none(): with pytest.raises(TypeError): - assert ragged.unique_all(ragged.array(None)) is None + arr=None + expected_unique_values = ragged.array(None) + expected_unique_indices = ragged.array(None) + expected_unique_inverse = ragged.array(None) + expected_unique_counts = ragged.array(None) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) -def test_can_all_list(): - with pytest.raises( - ValueError, - match=re.escape( - "the truth value of an array whose length is not 1 is ambiguous;" - ), - ): - assert ragged.unique_all(ragged.array([1, 2, 4, 3, 4, 5, 6, 20])) == ( - ragged.array([1, 2, 3, 4, 5, 6, 20]), - ragged.array([0, 1, 3, 2, 5, 6, 7]), - ragged.array([0, 1, 3, 2, 3, 4, 5, 6]), - ragged.array([1, 1, 1, 2, 1, 1, 1]), - ) - -def test_can_all_simple_array(): +def test_can_all_list(): arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) expected_unique_values = ragged.array([1, 2, 3, 4]) expected_unique_indices = ragged.array([0, 1, 3, 6]) From f04ccbffe7b227b3f4078218d5f7df13ed5d89b2 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Tue, 27 Aug 2024 08:27:47 +0300 Subject: [PATCH 09/19] correcting function ifs + test standartization --- src/ragged/_spec_set_functions.py | 20 +++++++++----------- tests/test_spec_set_functions.py | 29 ++++++++++++++++------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index e8df841..3141bb3 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -76,8 +76,8 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: counts=ragged.array(counts), ) else: - msg = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(msg) # type: ignore + msg = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(msg) # type: ignore unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -118,8 +118,8 @@ def unique_counts(x: array, /) -> tuple[array, array]: values=ragged.array(values), counts=ragged.array(counts) ) else: - msg = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(msg) # type: ignore + msg = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(msg) # type: ignore unique_inverse_result = namedtuple( # pylint: disable=C0103 @@ -150,9 +150,7 @@ def unique_inverse(x: array, /) -> tuple[array, array]: """ if isinstance(x, ragged.array): if ak.is_scalar(x): - return unique_inverse_result( - values=x, inverse_indices=ragged.array([0]) - ) + return unique_inverse_result(values=x, inverse_indices=ragged.array([0])) else: x_flat = ak.ravel(x._impl) values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) @@ -162,8 +160,8 @@ def unique_inverse(x: array, /) -> tuple[array, array]: inverse_indices=ragged.array(inverse_indices), ) else: - msg = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(msg) # type: ignore + msg = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(msg) # type: ignore def unique_values(x: array, /) -> array: @@ -189,5 +187,5 @@ def unique_values(x: array, /) -> array: x_flat = ak.ravel(x._impl) return ragged.array(np.unique(x_flat.layout.data)) else: - err = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(err) # type: ignore + err = f"Expected ragged type but got {type(x)}" # type: ignore + raise TypeError(err) # type: ignore diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 60026e1..49789bb 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -33,9 +33,9 @@ def test_can_take_list(): def test_can_take_empty_arr(): # with pytest.raises(TypeError): - assert ak.to_list(ragged.unique_values(ragged.array([]))) == ak.to_list( - ragged.array([]) - ) + assert ak.to_list(ragged.unique_values(ragged.array([]))) == ak.to_list( + ragged.array([]) + ) def test_can_take_moredimensions(): @@ -99,13 +99,12 @@ def test_can_count_scalar(): def test_can_inverse_list(): - arr=ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) - expected_values=ragged.array([1,2,3,4,5,6,20]) - expected_inverse=ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) - values, inverse= ragged.unique_inverse(arr) - assert ak.to_list(expected_values)==ak.to_list(values) - assert ak.to_list(expected_inverse)==ak.to_list(inverse) - + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_inverse = ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) + values, inverse = ragged.unique_inverse(arr) + assert ak.to_list(expected_values) == ak.to_list(values) + assert ak.to_list(expected_inverse) == ak.to_list(inverse) def test_can_inverse_simple_array(): @@ -138,19 +137,23 @@ def test_can_inverse_scalar(): # unique_all tests def test_can_all_none(): with pytest.raises(TypeError): - arr=None + arr = None expected_unique_values = ragged.array(None) expected_unique_indices = ragged.array(None) expected_unique_inverse = ragged.array(None) expected_unique_counts = ragged.array(None) - unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(arr) + ( + unique_values, + unique_indices, + unique_inverse, + unique_counts, + ) = ragged.unique_all(arr) assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) - def test_can_all_list(): arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) expected_unique_values = ragged.array([1, 2, 3, 4]) From 7b30c58b4e3e7d95e0d89df2ae4d972026cd1359 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Wed, 28 Aug 2024 11:31:03 +0300 Subject: [PATCH 10/19] further test standartisation --- src/ragged/_spec_set_functions.py | 28 +++++++++------- tests/test_spec_set_functions.py | 56 ++++++++++++++++--------------- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 3141bb3..31f50d9 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -55,7 +55,7 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_all_result( - values=ragged.array([x]), + values=ragged.array(x), indices=ragged.array([0]), inverse_indices=ragged.array([0]), counts=ragged.array([1]), @@ -76,8 +76,8 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: counts=ragged.array(counts), ) else: - msg = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(msg) # type: ignore + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) unique_counts_result = namedtuple( # pylint: disable=C0103 @@ -109,7 +109,7 @@ def unique_counts(x: array, /) -> tuple[array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_counts_result( - values=ragged.array([x]), counts=ragged.array([1]) + values=ragged.array(x), counts=ragged.array([1]) ) else: x_flat = ak.ravel(x._impl) @@ -118,8 +118,8 @@ def unique_counts(x: array, /) -> tuple[array, array]: values=ragged.array(values), counts=ragged.array(counts) ) else: - msg = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(msg) # type: ignore + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) unique_inverse_result = namedtuple( # pylint: disable=C0103 @@ -149,8 +149,10 @@ def unique_inverse(x: array, /) -> tuple[array, array]: https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html """ if isinstance(x, ragged.array): - if ak.is_scalar(x): - return unique_inverse_result(values=x, inverse_indices=ragged.array([0])) + if x.ndim == 0: + return unique_inverse_result( + values=ragged.array(x), inverse_indices=ragged.array([0]) + ) else: x_flat = ak.ravel(x._impl) values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) @@ -160,8 +162,8 @@ def unique_inverse(x: array, /) -> tuple[array, array]: inverse_indices=ragged.array(inverse_indices), ) else: - msg = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(msg) # type: ignore + msg = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(msg) def unique_values(x: array, /) -> array: @@ -181,11 +183,11 @@ def unique_values(x: array, /) -> array: """ if isinstance(x, ragged.array): if x.ndim == 0: - return ragged.array([x]) + return ragged.array(x) else: x_flat = ak.ravel(x._impl) return ragged.array(np.unique(x_flat.layout.data)) else: - err = f"Expected ragged type but got {type(x)}" # type: ignore - raise TypeError(err) # type: ignore + err = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] + raise TypeError(err) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 49789bb..4df62c7 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -7,7 +7,6 @@ from __future__ import annotations import awkward as ak -import pytest import ragged @@ -26,22 +25,25 @@ def test_existence(): def test_can_take_list(): - assert ak.to_list( - ragged.unique_values(ragged.array([1, 2, 4, 3, 4, 5, 6, 20])) - ) == ak.to_list(ragged.array([1, 2, 3, 4, 5, 6, 20])) + arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) def test_can_take_empty_arr(): # with pytest.raises(TypeError): - assert ak.to_list(ragged.unique_values(ragged.array([]))) == ak.to_list( - ragged.array([]) - ) + arr = ragged.array([]) + expected_unique_values = ragged.array([0]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) def test_can_take_moredimensions(): - assert ak.to_list( - ragged.unique_values(ragged.array([[1, 2, 2, 3, 4], [5, 6]])) - ) == ak.to_list(ragged.array([1, 2, 3, 4, 5, 6])) + arr = ragged.array([[1, 2, 2, 3, 4], [5, 6]]) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6]) + unique_values = ragged.unique_values(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) def test_can_take_1d_array(): @@ -135,23 +137,23 @@ def test_can_inverse_scalar(): # unique_all tests -def test_can_all_none(): - with pytest.raises(TypeError): - arr = None - expected_unique_values = ragged.array(None) - expected_unique_indices = ragged.array(None) - expected_unique_inverse = ragged.array(None) - expected_unique_counts = ragged.array(None) - ( - unique_values, - unique_indices, - unique_inverse, - unique_counts, - ) = ragged.unique_all(arr) - assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) - assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) - assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) - assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) +# def test_can_all_none(): +# with pytest.raises(TypeError): +# arr = None +# expected_unique_values = ragged.array(None) +# expected_unique_indices = ragged.array(None) +# expected_unique_inverse = ragged.array(None) +# expected_unique_counts = ragged.array(None) +# ( +# unique_values, +# unique_indices, +# unique_inverse, +# unique_counts, +# ) = ragged.unique_all(arr) +# assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) +# assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) +# assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) +# assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) def test_can_all_list(): From 7fb48f1af44d26ce1a8c87b2401cda4b21183788 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Thu, 29 Aug 2024 12:35:12 +0300 Subject: [PATCH 11/19] scalar handling and testing --- tests/test_spec_set_functions.py | 111 +++++++++++++++++-------------- 1 file changed, 62 insertions(+), 49 deletions(-) diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 4df62c7..8e63461 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -19,11 +19,6 @@ def test_existence(): # unique_values tests -# def test_can_take_none(): -# with pytest.raises(TypeError): -# assert ragged.unique_values(ragged.array(None)) is None - - def test_can_take_list(): arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) @@ -52,12 +47,21 @@ def test_can_take_1d_array(): assert ak.to_list(ragged.unique_values(arr)) == ak.to_list(expected_unique_values) -# unique_counts tests -# def test_can_count_none(): -# with pytest.raises(TypeError): -# assert ragged.unique_counts(ragged.array(None)) is None +def test_can_take_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) + unique_values = ragged.unique_values(arr) + assert unique_values == expected_unique_values +def test_can_take_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + unique_values = ragged.unique_values(arr) + assert unique_values == expected_unique_values + + +# unique_counts tests def test_can_count_list(): arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) @@ -85,21 +89,25 @@ def test_can_count_normal_array(): assert ak.to_list(unique_counts) == ak.to_list(expected_counts) -def test_can_count_scalar(): - arr = ragged.array([5]) - expected_unique_values = ragged.array([5]) +def test_can_count_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) expected_counts = ragged.array([1]) unique_values, unique_counts = ragged.unique_counts(arr) - assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) - assert ak.to_list(unique_counts) == ak.to_list(expected_counts) + assert unique_values == expected_unique_values + assert unique_counts == expected_counts -# unique_inverse tests -# def test_can_inverse_none(): -# with pytest.raises(TypeError): -# assert ak.to_list(ragged.unique_inverse(ragged.array(None))) is ak.to_list(None) +def test_can_count_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_counts = ragged.array([1]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert unique_values == expected_unique_values + assert unique_counts == expected_counts +# unique_inverse tests def test_can_inverse_list(): arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) expected_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) @@ -127,35 +135,25 @@ def test_can_inverse_normal_array(): assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) -def test_can_inverse_scalar(): +def test_can_inverse_scalar_int(): arr = ragged.array(5) - expected_unique_values = 5 - expected_unique_indices = ragged.array([0]) - unique_values, unique_indices = ragged.unique_inverse(arr) - assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) - assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + expected_unique_values = ragged.array(5) + expected_inverse_indices = ragged.array([0]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert unique_values == expected_unique_values + assert inverse_indices == expected_inverse_indices -# unique_all tests -# def test_can_all_none(): -# with pytest.raises(TypeError): -# arr = None -# expected_unique_values = ragged.array(None) -# expected_unique_indices = ragged.array(None) -# expected_unique_inverse = ragged.array(None) -# expected_unique_counts = ragged.array(None) -# ( -# unique_values, -# unique_indices, -# unique_inverse, -# unique_counts, -# ) = ragged.unique_all(arr) -# assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) -# assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) -# assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) -# assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) +def test_can_inverse_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_inverse_indices = ragged.array([0]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert unique_values == expected_unique_values + assert inverse_indices == expected_inverse_indices +# unique_all tests def test_can_all_list(): arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) expected_unique_values = ragged.array([1, 2, 3, 4]) @@ -186,16 +184,31 @@ def test_can_all_normal_array(): assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) -def test_can_all_scalar(): - arr = ragged.array([5]) - expected_unique_values = ragged.array([5]) +def test_can_all_scalar_int(): + arr = ragged.array(5) + expected_unique_values = ragged.array(5) expected_unique_indices = ragged.array([0]) expected_unique_inverse = ragged.array([0]) expected_unique_counts = ragged.array([1]) unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( arr ) - assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) - assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) - assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) - assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + assert unique_values == expected_unique_values + assert unique_indices == expected_unique_indices + assert unique_inverse == expected_unique_inverse + assert unique_counts == expected_unique_counts + + +def test_can_all_scalar_float(): + arr = ragged.array(4.326) + expected_unique_values = ragged.array(4.326) + expected_unique_indices = ragged.array([0]) + expected_unique_inverse = ragged.array([0]) + expected_unique_counts = ragged.array([1]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert unique_values == expected_unique_values + assert unique_indices == expected_unique_indices + assert unique_inverse == expected_unique_inverse + assert unique_counts == expected_unique_counts From 6e8b6ee7fefce549562b5a53ac6f72d2740614ce Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Tue, 10 Sep 2024 09:08:59 +0300 Subject: [PATCH 12/19] _array_object changes, empty array handling + tests --- src/ragged/_spec_array_object.py | 6 +++- src/ragged/_spec_set_functions.py | 22 +++++++++++++++ tests/test_spec_set_functions.py | 46 ++++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/src/ragged/_spec_array_object.py b/src/ragged/_spec_array_object.py index 3e27993..0978c72 100644 --- a/src/ragged/_spec_array_object.py +++ b/src/ragged/_spec_array_object.py @@ -16,6 +16,7 @@ import numpy as np from awkward.contents import ( Content, + EmptyArray, ListArray, ListOffsetArray, NumpyArray, @@ -44,7 +45,10 @@ def _shape_dtype(layout: Content) -> tuple[Shape, Dtype]: else: shape = (*shape, None) node = node.content - + if isinstance(node, EmptyArray): + node = node.to_NumpyArray(dtype=np.float64) + shape = shape + node.data.shape[1:] + return shape, node.data.dtype if isinstance(node, NumpyArray): shape = shape + node.data.shape[1:] return shape, node.data.dtype diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 31f50d9..7835dc1 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -62,6 +62,13 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: ) else: x_flat = ak.ravel(x._impl) + if isinstance(x_flat.layout, ak.contents.EmptyArray): + return unique_all_result( + values=ragged.array([]), + indices=ragged.array([]), + inverse_indices=ragged.array([]), + counts=ragged.array([]), + ) values, indices, inverse_indices, counts = np.unique( x_flat.layout.data, return_index=True, @@ -113,6 +120,10 @@ def unique_counts(x: array, /) -> tuple[array, array]: ) else: x_flat = ak.ravel(x._impl) + if isinstance(x_flat.layout, ak.contents.EmptyArray): + return unique_counts_result( + values=ragged.array([]), counts=ragged.array([]) + ) values, counts = np.unique(x_flat.layout.data, return_counts=True) return unique_counts_result( values=ragged.array(values), counts=ragged.array(counts) @@ -155,6 +166,10 @@ def unique_inverse(x: array, /) -> tuple[array, array]: ) else: x_flat = ak.ravel(x._impl) + if isinstance(x_flat.layout, ak.contents.EmptyArray): + return unique_inverse_result( + values=ragged.array([]), inverse_indices=ragged.array([]) + ) values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) return unique_inverse_result( @@ -187,6 +202,13 @@ def unique_values(x: array, /) -> array: else: x_flat = ak.ravel(x._impl) + if isinstance(x_flat.layout, ak.contents.EmptyArray): + return ragged.array([]) + # print("x._impl type is", type(x._impl)) + # print("x_flat type is", type(x_flat)) + # print("x_flat laoyut is", x_flat.layout) + # print("x_flat layout type is", type(x_flat.layout)) + # print("x_flat layout data type is", type(x_flat.layout.data)) return ragged.array(np.unique(x_flat.layout.data)) else: err = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] diff --git a/tests/test_spec_set_functions.py b/tests/test_spec_set_functions.py index 8e63461..3a74cbc 100644 --- a/tests/test_spec_set_functions.py +++ b/tests/test_spec_set_functions.py @@ -27,9 +27,8 @@ def test_can_take_list(): def test_can_take_empty_arr(): - # with pytest.raises(TypeError): arr = ragged.array([]) - expected_unique_values = ragged.array([0]) + expected_unique_values = ragged.array([]) unique_values = ragged.unique_values(arr) assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) @@ -71,6 +70,15 @@ def test_can_count_list(): assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) +def test_can_count_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_counts = ragged.array([]) + unique_values, unique_counts = ragged.unique_counts(arr) + assert ak.to_list(expected_unique_values) == ak.to_list(unique_values) + assert ak.to_list(expected_counts) == ak.to_list(unique_counts) + + def test_can_count_simple_array(): arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) expected_unique_values = ragged.array([1, 2, 3, 4]) @@ -110,11 +118,20 @@ def test_can_count_scalar_float(): # unique_inverse tests def test_can_inverse_list(): arr = ragged.array([1, 2, 4, 3, 4, 5, 6, 20]) - expected_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) - expected_inverse = ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) - values, inverse = ragged.unique_inverse(arr) - assert ak.to_list(expected_values) == ak.to_list(values) - assert ak.to_list(expected_inverse) == ak.to_list(inverse) + expected_unique_values = ragged.array([1, 2, 3, 4, 5, 6, 20]) + expected_inverse_indices = ragged.array([0, 1, 3, 2, 3, 4, 5, 6]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) + + +def test_can_inverse_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_inverse_indices = ragged.array([]) + unique_values, inverse_indices = ragged.unique_inverse(arr) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices) def test_can_inverse_simple_array(): @@ -169,6 +186,21 @@ def test_can_all_list(): assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) +def test_can_all_empty_arr(): + arr = ragged.array([]) + expected_unique_values = ragged.array([]) + expected_unique_indices = ragged.array([]) + expected_unique_inverse = ragged.array([]) + expected_unique_counts = ragged.array([]) + unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all( + arr + ) + assert ak.to_list(unique_values) == ak.to_list(expected_unique_values) + assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices) + assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse) + assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts) + + def test_can_all_normal_array(): arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]]) expected_unique_values = ragged.array([2, 3, 4, 5]) From 36dfb42ed5dfc06343386f73164fa1692497c6cb Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Tue, 10 Sep 2024 11:16:21 +0300 Subject: [PATCH 13/19] implementing Jim's suggestion, disabling CI errors --- src/ragged/_spec_set_functions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 7835dc1..33bd62a 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -55,14 +55,14 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_all_result( - values=ragged.array(x), + values=ragged.array(np.unique(x._impl)), indices=ragged.array([0]), inverse_indices=ragged.array([0]), counts=ragged.array([1]), ) else: - x_flat = ak.ravel(x._impl) - if isinstance(x_flat.layout, ak.contents.EmptyArray): + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return unique_all_result( values=ragged.array([]), indices=ragged.array([]), @@ -116,7 +116,7 @@ def unique_counts(x: array, /) -> tuple[array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_counts_result( - values=ragged.array(x), counts=ragged.array([1]) + values=ragged.array(np.unique(x._impl)), counts=ragged.array([1]) ) else: x_flat = ak.ravel(x._impl) @@ -162,7 +162,8 @@ def unique_inverse(x: array, /) -> tuple[array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_inverse_result( - values=ragged.array(x), inverse_indices=ragged.array([0]) + values=ragged.array(np.unique(x._impl)), + inverse_indices=ragged.array([0]), ) else: x_flat = ak.ravel(x._impl) @@ -198,7 +199,7 @@ def unique_values(x: array, /) -> array: """ if isinstance(x, ragged.array): if x.ndim == 0: - return ragged.array(x) + return ragged.array(np.unique(x._impl)) else: x_flat = ak.ravel(x._impl) From e5852b38375f80d45c316216e4d998796a136551 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Tue, 10 Sep 2024 14:59:14 +0300 Subject: [PATCH 14/19] disabling errors and warnings --- src/ragged/__init__.py | 2 +- src/ragged/_spec_set_functions.py | 34 ++++++++++++++----------------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/ragged/__init__.py b/src/ragged/__init__.py index 8377e92..ea238aa 100644 --- a/src/ragged/__init__.py +++ b/src/ragged/__init__.py @@ -130,7 +130,7 @@ nonzero, where, ) -from ._spec_set_functions import ( +from ._spec_set_functions import ( # pylint: disable=W0622 unique_all, unique_counts, unique_inverse, diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 33bd62a..26f2241 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -55,7 +55,7 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_all_result( - values=ragged.array(np.unique(x._impl)), + values=ragged.array(np.unique(x._impl)), # pylint: disable=W0212 indices=ragged.array([0]), inverse_indices=ragged.array([0]), counts=ragged.array([1]), @@ -70,7 +70,7 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: counts=ragged.array([]), ) values, indices, inverse_indices, counts = np.unique( - x_flat.layout.data, + x_flat.layout.data, # pylint: disable=E1101 return_index=True, return_inverse=True, return_counts=True, @@ -116,15 +116,16 @@ def unique_counts(x: array, /) -> tuple[array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_counts_result( - values=ragged.array(np.unique(x._impl)), counts=ragged.array([1]) + values=ragged.array(np.unique(x._impl)), + counts=ragged.array([1]), # pylint: disable=W0212 ) else: - x_flat = ak.ravel(x._impl) - if isinstance(x_flat.layout, ak.contents.EmptyArray): + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return unique_counts_result( values=ragged.array([]), counts=ragged.array([]) ) - values, counts = np.unique(x_flat.layout.data, return_counts=True) + values, counts = np.unique(x_flat.layout.data, return_counts=True) # pylint: disable=E1101 return unique_counts_result( values=ragged.array(values), counts=ragged.array(counts) ) @@ -162,16 +163,16 @@ def unique_inverse(x: array, /) -> tuple[array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_inverse_result( - values=ragged.array(np.unique(x._impl)), + values=ragged.array(np.unique(x._impl)), # pylint: disable=W0212 inverse_indices=ragged.array([0]), ) else: - x_flat = ak.ravel(x._impl) - if isinstance(x_flat.layout, ak.contents.EmptyArray): + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return unique_inverse_result( values=ragged.array([]), inverse_indices=ragged.array([]) ) - values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) + values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) # pylint: disable=E1101 return unique_inverse_result( values=ragged.array(values), @@ -199,18 +200,13 @@ def unique_values(x: array, /) -> array: """ if isinstance(x, ragged.array): if x.ndim == 0: - return ragged.array(np.unique(x._impl)) + return ragged.array(np.unique(x._impl)) # pylint: disable=W0212 else: - x_flat = ak.ravel(x._impl) - if isinstance(x_flat.layout, ak.contents.EmptyArray): + x_flat = ak.ravel(x._impl) # pylint: disable=W0212 + if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return ragged.array([]) - # print("x._impl type is", type(x._impl)) - # print("x_flat type is", type(x_flat)) - # print("x_flat laoyut is", x_flat.layout) - # print("x_flat layout type is", type(x_flat.layout)) - # print("x_flat layout data type is", type(x_flat.layout.data)) - return ragged.array(np.unique(x_flat.layout.data)) + return ragged.array(np.unique(x_flat.layout.data)) # pylint: disable=E1101 else: err = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] raise TypeError(err) From 1ff29b986dec48e3c07b72cb407548e7b3d33ce1 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Tue, 10 Sep 2024 15:03:47 +0300 Subject: [PATCH 15/19] further ignores + adding equal_nan in np.unique instances --- src/ragged/__init__.py | 2 +- src/ragged/_spec_set_functions.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/ragged/__init__.py b/src/ragged/__init__.py index ea238aa..02af4d1 100644 --- a/src/ragged/__init__.py +++ b/src/ragged/__init__.py @@ -130,7 +130,7 @@ nonzero, where, ) -from ._spec_set_functions import ( # pylint: disable=W0622 +from ._spec_set_functions import ( # pylint: disable=R0401 unique_all, unique_counts, unique_inverse, diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 26f2241..ed0a2db 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -55,7 +55,7 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_all_result( - values=ragged.array(np.unique(x._impl)), # pylint: disable=W0212 + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 indices=ragged.array([0]), inverse_indices=ragged.array([0]), counts=ragged.array([1]), @@ -116,7 +116,7 @@ def unique_counts(x: array, /) -> tuple[array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_counts_result( - values=ragged.array(np.unique(x._impl)), + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 counts=ragged.array([1]), # pylint: disable=W0212 ) else: @@ -125,7 +125,9 @@ def unique_counts(x: array, /) -> tuple[array, array]: return unique_counts_result( values=ragged.array([]), counts=ragged.array([]) ) - values, counts = np.unique(x_flat.layout.data, return_counts=True) # pylint: disable=E1101 + values, counts = np.unique( + x_flat.layout.data, return_counts=True, equal_nan=False + ) # pylint: disable=E1101 return unique_counts_result( values=ragged.array(values), counts=ragged.array(counts) ) @@ -163,7 +165,7 @@ def unique_inverse(x: array, /) -> tuple[array, array]: if isinstance(x, ragged.array): if x.ndim == 0: return unique_inverse_result( - values=ragged.array(np.unique(x._impl)), # pylint: disable=W0212 + values=ragged.array(np.unique(x._impl, equal_nan=False)), # pylint: disable=W0212 inverse_indices=ragged.array([0]), ) else: @@ -172,7 +174,9 @@ def unique_inverse(x: array, /) -> tuple[array, array]: return unique_inverse_result( values=ragged.array([]), inverse_indices=ragged.array([]) ) - values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True) # pylint: disable=E1101 + values, inverse_indices = np.unique( + x_flat.layout.data, return_inverse=True, equal_nan=False + ) # pylint: disable=E1101 return unique_inverse_result( values=ragged.array(values), @@ -200,13 +204,13 @@ def unique_values(x: array, /) -> array: """ if isinstance(x, ragged.array): if x.ndim == 0: - return ragged.array(np.unique(x._impl)) # pylint: disable=W0212 + return ragged.array(np.unique(x._impl, equal_nan=False)) # pylint: disable=W0212 else: x_flat = ak.ravel(x._impl) # pylint: disable=W0212 if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return ragged.array([]) - return ragged.array(np.unique(x_flat.layout.data)) # pylint: disable=E1101 + return ragged.array(np.unique(x_flat.layout.data, equal_nan=False)) # pylint: disable=E1101 else: err = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable] raise TypeError(err) From 48893b8bba2036d21ba4c3d5bf67b0b33d3b00f9 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Tue, 10 Sep 2024 15:12:28 +0300 Subject: [PATCH 16/19] better ignores --- src/ragged/_spec_set_functions.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index ed0a2db..326edf9 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -126,8 +126,10 @@ def unique_counts(x: array, /) -> tuple[array, array]: values=ragged.array([]), counts=ragged.array([]) ) values, counts = np.unique( - x_flat.layout.data, return_counts=True, equal_nan=False - ) # pylint: disable=E1101 + x_flat.layout.data, + return_counts=True, + equal_nan=False, # pylint: disable=E1101 + ) return unique_counts_result( values=ragged.array(values), counts=ragged.array(counts) ) @@ -175,8 +177,10 @@ def unique_inverse(x: array, /) -> tuple[array, array]: values=ragged.array([]), inverse_indices=ragged.array([]) ) values, inverse_indices = np.unique( - x_flat.layout.data, return_inverse=True, equal_nan=False - ) # pylint: disable=E1101 + x_flat.layout.data, + return_inverse=True, + equal_nan=False, # pylint: disable=E1101 + ) return unique_inverse_result( values=ragged.array(values), From 943d717d250ae7b946286ce5e6fc31f05fc75018 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Tue, 10 Sep 2024 15:19:58 +0300 Subject: [PATCH 17/19] improving ignores --- src/ragged/_spec_set_functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 326edf9..0ab8af6 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -126,9 +126,9 @@ def unique_counts(x: array, /) -> tuple[array, array]: values=ragged.array([]), counts=ragged.array([]) ) values, counts = np.unique( - x_flat.layout.data, + x_flat.layout.data, # pylint: disable=E1101 return_counts=True, - equal_nan=False, # pylint: disable=E1101 + equal_nan=False, ) return unique_counts_result( values=ragged.array(values), counts=ragged.array(counts) @@ -177,9 +177,9 @@ def unique_inverse(x: array, /) -> tuple[array, array]: values=ragged.array([]), inverse_indices=ragged.array([]) ) values, inverse_indices = np.unique( - x_flat.layout.data, + x_flat.layout.data, # pylint: disable=E1101 return_inverse=True, - equal_nan=False, # pylint: disable=E1101 + equal_nan=False, ) return unique_inverse_result( From 8c0867ac5322516d7e5028718402674caacfd563 Mon Sep 17 00:00:00 2001 From: ohrechykha <80257135+ohrechykha@users.noreply.github.com> Date: Thu, 12 Sep 2024 07:31:13 +0300 Subject: [PATCH 18/19] avoiding code duplication in _spec_array_object Co-authored-by: Jim Pivarski --- src/ragged/_spec_array_object.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ragged/_spec_array_object.py b/src/ragged/_spec_array_object.py index 0978c72..8c76c89 100644 --- a/src/ragged/_spec_array_object.py +++ b/src/ragged/_spec_array_object.py @@ -47,8 +47,7 @@ def _shape_dtype(layout: Content) -> tuple[Shape, Dtype]: node = node.content if isinstance(node, EmptyArray): node = node.to_NumpyArray(dtype=np.float64) - shape = shape + node.data.shape[1:] - return shape, node.data.dtype + if isinstance(node, NumpyArray): shape = shape + node.data.shape[1:] return shape, node.data.dtype From 67b5807d6e91f5bc8ab3cba2559c2c8cc30c75a5 Mon Sep 17 00:00:00 2001 From: ohrechykha Date: Thu, 12 Sep 2024 20:43:03 +0300 Subject: [PATCH 19/19] returning np.empty and input dtype in all functions --- src/ragged/_spec_set_functions.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/ragged/_spec_set_functions.py b/src/ragged/_spec_set_functions.py index 0ab8af6..259ccd9 100644 --- a/src/ragged/_spec_set_functions.py +++ b/src/ragged/_spec_set_functions.py @@ -64,10 +64,10 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]: x_flat = ak.ravel(x._impl) # pylint: disable=W0212 if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return unique_all_result( - values=ragged.array([]), - indices=ragged.array([]), - inverse_indices=ragged.array([]), - counts=ragged.array([]), + values=ragged.array(np.empty(0, x.dtype)), + indices=ragged.array(np.empty(0, np.int64)), + inverse_indices=ragged.array(np.empty(0, np.int64)), + counts=ragged.array(np.empty(0, np.int64)), ) values, indices, inverse_indices, counts = np.unique( x_flat.layout.data, # pylint: disable=E1101 @@ -123,7 +123,8 @@ def unique_counts(x: array, /) -> tuple[array, array]: x_flat = ak.ravel(x._impl) # pylint: disable=W0212 if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return unique_counts_result( - values=ragged.array([]), counts=ragged.array([]) + values=ragged.array(np.empty(0, x.dtype)), + counts=ragged.array(np.empty(0, np.int64)), ) values, counts = np.unique( x_flat.layout.data, # pylint: disable=E1101 @@ -174,7 +175,8 @@ def unique_inverse(x: array, /) -> tuple[array, array]: x_flat = ak.ravel(x._impl) # pylint: disable=W0212 if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 return unique_inverse_result( - values=ragged.array([]), inverse_indices=ragged.array([]) + values=ragged.array(np.empty(0, x.dtype)), + inverse_indices=ragged.array(np.empty(0, np.int64)), ) values, inverse_indices = np.unique( x_flat.layout.data, # pylint: disable=E1101 @@ -213,7 +215,7 @@ def unique_values(x: array, /) -> array: else: x_flat = ak.ravel(x._impl) # pylint: disable=W0212 if isinstance(x_flat.layout, ak.contents.EmptyArray): # pylint: disable=E1101 - return ragged.array([]) + return ragged.array(np.empty(0, x.dtype)) return ragged.array(np.unique(x_flat.layout.data, equal_nan=False)) # pylint: disable=E1101 else: err = f"Expected ragged type but got {type(x)}" # type: ignore[unreachable]