Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: unique_values function algorithm #55

Closed
wants to merge 11 commits into from
55 changes: 46 additions & 9 deletions src/ragged/_spec_set_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@

from collections import namedtuple

import awkward as ak
import numpy as np

import ragged

from ._spec_array_object import array

unique_all_result = namedtuple( # pylint: disable=C0103
Expand Down Expand Up @@ -47,8 +52,17 @@ def unique_all(x: array, /) -> tuple[array, array, array, array]:
https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_all.html
"""

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 128") # noqa: EM101
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

if len(x)==1:
return ragged.array(x), ragged.array([0]), ragged.array([0]), ragged.array([1])

x_flat=ak.ravel(x._impl)
values, indices, inverse_indices, counts = np.unique(x_flat.layout.data, return_index=True, return_inverse=True, return_counts=True)

return ragged.array(values),ragged.array(indices), ragged.array(inverse_indices), ragged.array(counts)


unique_counts_result = namedtuple( # pylint: disable=C0103
Expand Down Expand Up @@ -77,9 +91,16 @@ def unique_counts(x: array, /) -> tuple[array, array]:

https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_counts.html
"""
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 129") # noqa: EM101
if len(x)==1:
return ragged.array(x), ragged.array([1])

x_flat = ak.ravel(x._impl)
values, counts = np.unique(x_flat.layout.data, return_counts=True)
return ragged.array(values), ragged.array(counts)


unique_inverse_result = namedtuple( # pylint: disable=C0103
Expand Down Expand Up @@ -108,9 +129,17 @@ def unique_inverse(x: array, /) -> tuple[array, array]:

https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_inverse.html
"""
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

if len(x) == 1:
return ragged.array(x), ragged.array([0])

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 130") # noqa: EM101
x_flat=ak.ravel(x._impl)
values, inverse_indices = np.unique(x_flat.layout.data, return_inverse=True)

return ragged.array(values), ragged.array(inverse_indices)


def unique_values(x: array, /) -> array:
Expand All @@ -128,6 +157,14 @@ def unique_values(x: array, /) -> array:

https://data-apis.org/array-api/latest/API_specification/generated/array_api.unique_values.html
"""

x # noqa: B018, pylint: disable=W0104
raise NotImplementedError("TODO 131") # noqa: EM101
if not isinstance(x, ragged.array):
err = f"Expected ragged type but got {type(x)}"
raise TypeError(err)

if len(x)==1:
return ragged.array(x)

x_flat = ak.ravel(x._impl)
values = np.unique(x_flat.layout.data)

return ragged.array(values)
161 changes: 161 additions & 0 deletions tests/test_55_spec_set_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/ragged/blob/main/LICENSE

"""
https://data-apis.org/array-api/latest/API_specification/set_functions.html
"""

from __future__ import annotations

import ragged
import awkward as ak
import pytest

# Specific algorithm for unique_values:
# 1 take an input array
# 2 flatten input_array unless its 1d
# 3 {remember the first element, loop through the rest of the list to see if there are copies
# if yes then discard it and repeat the step
# if not then add it to the output and repeat the step}
# 4 once the cycle is over return an array of unique elements in the input array (the output must be of the same type as input array)


def test_existence():
assert ragged.unique_all is not None
assert ragged.unique_counts is not None
assert ragged.unique_inverse is not None
assert ragged.unique_values is not None

#unique_values tests
def test_can_take_none():
assert ragged.unique_values(None)==None

def test_can_take_list():
with pytest.raises(TypeError):
assert ragged.unique_values([1,2,4,3,4,5,6,20])

def test_can_take_empty_arr():
with pytest.raises(TypeError):
assert ragged.unique_values(ragged.array([]))

def test_can_take_moredimensions():
with pytest.raises(ValueError):
assert ragged.unique_values(ragged.array([[1,2,3,4],[5,6]]))

def test_can_take_1d_array():
arr=ragged.array([5,6,7,8,8,9,1,2,3,4,10,0,15,2])
expected_unique_values = ragged.array([0,1,2,3,4,5,6,7,8,9,10,15])
assert ak.to_list(ragged.unique_values(arr))==ak.to_list(expected_unique_values)


#unique_counts tests
def test_can_count_none():
with pytest.raises(TypeError):
assert ragged.unique_counts(None) is None

def test_can_count_list():
with pytest.raises(TypeError):
assert ragged.unique_counts([1,2,4,3,4,5,6,20]) is None

def test_can_count_simple_array():
arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_counts = ragged.array([1, 2, 3, 4])
unique_values, unique_counts = ragged.unique_counts(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_counts) == ak.to_list(expected_counts)

def test_can_count_normal_array():
arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_counts = ragged.array([1, 2, 3, 4])
unique_values, unique_counts = ragged.unique_counts(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_counts) == ak.to_list(expected_counts)


def test_can_count_scalar():
arr = ragged.array([5])
expected_unique_values = ragged.array([5])
expected_counts = ragged.array([1])
unique_values, unique_counts = ragged.unique_counts(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_counts) == ak.to_list(expected_counts)

#unique_inverse tests
def test_can_take_none():
with pytest.raises(TypeError):
assert ragged.unique_inverse(None) is None

def test_can_take_list():
with pytest.raises(TypeError):
assert ragged.unique_inverse([1,2,4,3,4,5,6,20]) is None

def test_can_take_simple_array():
arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])
unique_values, inverse_indices = ragged.unique_inverse(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices)

def test_can_take_normal_array():
arr = ragged.array([[1, 2, 2], [3], [3, 3], [4, 4, 4], [4]])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_inverse_indices = ragged.array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])
unique_values, inverse_indices = ragged.unique_inverse(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(inverse_indices) == ak.to_list(expected_inverse_indices)


def test_can_take_scalar():
arr = ragged.array([5])
expected_unique_values = ragged.array([5])
expected_unique_indices = ragged.array([0])
unique_values, unique_indices = ragged.unique_inverse(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)

#unique_all tests
def test_can_all_none():
with pytest.raises(TypeError):
assert ragged.unique_all(None) is None

def test_can_all_list():
with pytest.raises(TypeError):
assert ragged.unique_all([1,2,4,3,4,5,6,20]) is None

def test_can_all_simple_array():
arr = ragged.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
expected_unique_values = ragged.array([1, 2, 3, 4])
expected_unique_indices = ragged.array([0, 1, 3, 6])
expected_unique_inverse = ragged.array([0, 1,1,2,2,2,3,3,3,3])
expected_unique_counts = ragged.array([1, 2, 3, 4])
unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)

def test_can_all_normal_array():
arr = ragged.array([[2, 2, 2], [3], [3, 5], [4, 4, 4], [4]])
expected_unique_values = ragged.array([2, 3, 4, 5])
expected_unique_indices = ragged.array([0, 3, 6, 5])
expected_unique_inverse = ragged.array([0,0,0,1,1,3,2,2,2,2])
expected_unique_counts = ragged.array([3, 2, 4, 1])
unique_values, unique_indices, unique_inverse,unique_counts = ragged.unique_all(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)

def test_can_all_scalar():
arr = ragged.array([5])
expected_unique_values = ragged.array([5])
expected_unique_indices = ragged.array([0])
expected_unique_inverse = ragged.array([0])
expected_unique_counts = ragged.array([1])
unique_values, unique_indices, unique_inverse, unique_counts = ragged.unique_all(arr)
assert ak.to_list(unique_values) == ak.to_list(expected_unique_values)
assert ak.to_list(unique_indices) == ak.to_list(expected_unique_indices)
assert ak.to_list(unique_inverse) == ak.to_list(expected_unique_inverse)
assert ak.to_list(unique_counts) == ak.to_list(expected_unique_counts)
16 changes: 0 additions & 16 deletions tests/test_spec_set_functions.py

This file was deleted.

Loading