Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: extend to_layout options #2763

Merged
merged 17 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/awkward/_connect/numba/arrayview.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,7 @@ class ArrayView:
def fromarray(cls, array):
behavior = behavior_of(array)
layout = ak.operations.to_layout(
array,
allow_record=False,
allow_other=False,
array, allow_record=False, allow_unknown=False, primitive_policy="error"
)

return ArrayView(
Expand Down Expand Up @@ -579,7 +577,9 @@ class RecordView:
@classmethod
def fromrecord(cls, record):
behavior = behavior_of(record)
layout = ak.operations.to_layout(record, allow_record=True, allow_other=False)
layout = ak.operations.to_layout(
record, allow_record=True, allow_unknown=False, primitive_policy="error"
)
assert isinstance(layout, ak.record.Record)
arraylayout = layout.array
return RecordView(
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/_connect/numexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def evaluate(
arguments = getArguments(names, local_dict, global_dict)

arrays = [
ak.operations.to_layout(x, allow_record=True, allow_other=True)
ak.operations.to_layout(x, allow_record=True, allow_unknown=True)
for x in arguments
]

Expand Down Expand Up @@ -129,7 +129,7 @@ def re_evaluate(local_dict=None):
arguments = getArguments(names, local_dict)

arrays = [
ak.operations.to_layout(x, allow_record=True, allow_other=True)
ak.operations.to_layout(x, allow_record=True, allow_unknown=True)
for x in arguments
]

Expand Down
120 changes: 73 additions & 47 deletions src/awkward/_connect/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,13 @@ def _to_rectilinear(arg, backend: Backend):
)
# Otherwise, cast to layout and convert
else:
layout = ak.to_layout(arg, allow_record=False, allow_other=False)
layout = ak.to_layout(
arg,
allow_record=False,
allow_unknown=False,
primitive_policy="error",
string_policy="error",
)
return layout.to_backend(backend).to_backend_array(allow_missing=True)
elif isinstance(arg, tuple):
return tuple(_to_rectilinear(x, backend) for x in arg)
Expand Down Expand Up @@ -96,7 +102,14 @@ def array_function(func, types, args, kwargs: dict[str, Any], behavior: Mapping
result = func(*rectilinear_args, **rectilinear_kwargs)
# We want the result to be a layout (this will fail for functions returning non-array convertibles)
out = ak.operations.ak_to_layout._impl(
result, allow_record=True, allow_other=True, regulararray=True
result,
allow_record=True,
allow_unknown=True,
allow_none=True,
regulararray=True,
use_from_iter=True,
primitive_policy="pass-through",
string_policy="pass-through",
)
return wrap_layout(out, behavior=behavior, allow_other=True)

Expand Down Expand Up @@ -136,12 +149,13 @@ def _array_ufunc_custom_cast(inputs, behavior: Mapping | None, backend):
nextinputs = []
for x in args:
cast_fcn = find_custom_cast(x, behavior)
if cast_fcn is not None:
x = cast_fcn(x)
# String conversion
elif isinstance(x, (str, bytes)):
x = ak.to_layout([x])
maybe_layout = ak.operations.to_layout(x, allow_record=True, allow_other=True)
maybe_layout = ak.operations.to_layout(
x if cast_fcn is None else cast_fcn(x),
allow_record=True,
allow_unknown=True,
primitive_policy="pass-through",
string_policy="pass-through",
)
if isinstance(maybe_layout, (ak.contents.Content, ak.record.Record)):
maybe_layout = maybe_layout.to_backend(backend)

Expand Down Expand Up @@ -257,62 +271,74 @@ def _array_ufunc_categorical(
out = getattr(ufunc, method)(*nextinputs, **kwargs)
if not isinstance(out, tuple):
out = (out,)
return tuple(ak.to_layout(x, allow_other=True) for x in out)
return tuple(ak.to_layout(x, allow_unknown=True) for x in out)


def _array_ufunc_string_likes(
ufunc, method: str, inputs, kwargs: dict[str, Any], behavior: Mapping | None
):
assert method == "__call__"

if (
ufunc in (numpy.equal, numpy.not_equal)
and len(inputs) == 2
and isinstance(inputs[0], ak.contents.Content)
and isinstance(inputs[1], ak.contents.Content)
and inputs[0].parameter("__array__") in ("string", "bytestring")
and inputs[1].parameter("__array__") == inputs[0].parameter("__array__")
):
left, right = inputs
nplike = left.backend.nplike
if ufunc not in (numpy.equal, numpy.not_equal) or len(inputs) != 2:
return

left, right = inputs

if isinstance(left, ak.contents.Content) and left.parameter("__array__") in (
"string",
"bytestring",
):
left = ak.without_parameters(left, highlevel=False)
elif isinstance(left, (str, bytes)):
left = ak.without_parameters([left], highlevel=False)
else:
return

if isinstance(right, ak.contents.Content) and right.parameter("__array__") in (
"string",
"bytestring",
):
right = ak.without_parameters(right, highlevel=False)
elif isinstance(right, (str, bytes)):
right = ak.without_parameters([right], highlevel=False)
else:
return

# first condition: string lengths must be the same
left_counts_layout = ak._do.reduce(
left, ak._reducers.Count(), axis=-1, mask=False
)
assert left_counts_layout.is_numpy
right_counts_layout = ak._do.reduce(
right, ak._reducers.Count(), axis=-1, mask=False
)
assert right_counts_layout.is_numpy
left, right = ak.broadcast_arrays(left, right, highlevel=False, depth_limit=1)
nplike = left.backend.nplike

counts1 = nplike.asarray(left_counts_layout.data)
counts2 = nplike.asarray(right_counts_layout.data)
# first condition: string lengths must be the same
left_counts_layout = ak._do.reduce(left, ak._reducers.Count(), axis=-1, mask=False)
assert left_counts_layout.is_numpy
right_counts_layout = ak._do.reduce(
right, ak._reducers.Count(), axis=-1, mask=False
)
assert right_counts_layout.is_numpy

out = counts1 == counts2
counts1 = nplike.asarray(left_counts_layout.data)
counts2 = nplike.asarray(right_counts_layout.data)

# only compare characters in strings that are possibly equal (same length)
possible = nplike.logical_and(out, counts1)
possible_counts = counts1[possible]
out = counts1 == counts2

if len(possible_counts) > 0:
onepossible = left[possible]
twopossible = right[possible]
reduced = ak.operations.all(
wrap_layout(onepossible) == wrap_layout(twopossible),
axis=-1,
highlevel=False,
)
# update same-length strings with a verdict about their characters
out[possible] = reduced.data
# only compare characters in strings that are possibly equal (same length)
possible = nplike.logical_and(out, counts1)
possible_counts = counts1[possible]

if ufunc is numpy.not_equal:
out = nplike.logical_not(out)
if len(possible_counts) > 0:
onepossible = left[possible]
twopossible = right[possible]
reduced = ak.operations.all(
wrap_layout(onepossible) == wrap_layout(twopossible),
axis=-1,
highlevel=False,
)
# update same-length strings with a verdict about their characters
out[possible] = reduced.data

return (ak.contents.NumpyArray(out),)
if ufunc is numpy.not_equal:
out = nplike.logical_not(out)

return (ak.contents.NumpyArray(out),)


def array_ufunc(ufunc, method: str, inputs, kwargs: dict[str, Any]):
Expand Down
6 changes: 5 additions & 1 deletion src/awkward/_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
from awkward._nplikes.jax import Jax
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpylike import NumpyMetadata
from awkward._typing import TYPE_CHECKING
from awkward.errors import AxisError

if TYPE_CHECKING:
from awkward.contents import Content

np = NumpyMetadata.instance()
numpy = Numpy.instance()
numpy_backend = NumpyBackend.instance()
Expand Down Expand Up @@ -177,7 +181,7 @@ def attach(x):
return layout


def maybe_posaxis(layout, axis, depth):
def maybe_posaxis(layout: Content, axis: int, depth: int) -> int | None:
from awkward.record import Record

if isinstance(layout, Record):
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/_nplikes/typetracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ def arange(
else:
length = unknown_length

default_int_type = np.int64 if (ak._util.win or ak._util.bits32) else np.int32
default_int_type = np.int32 if (ak._util.win or ak._util.bits32) else np.int64
jpivarski marked this conversation as resolved.
Show resolved Hide resolved
return TypeTracerArray._new(dtype or default_int_type, (length,))

def meshgrid(
Expand Down
30 changes: 19 additions & 11 deletions src/awkward/_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,16 +279,17 @@ def normalise_item(item, backend: Backend) -> SliceItem:
nplike = nplike_of_obj(item, default=None)
# We can end up with non-array objects associated with an nplike
if nplike is not None and nplike.is_own_array(item):
# Is it a scalar, not array?
if len(item.shape) == 0:
raise AssertionError(
"scalar arrays should be handled by integer-like indexing"
)
else:
layout = ak.operations.ak_to_layout._impl(
item, allow_record=False, allow_other=True, regulararray=False
)
return normalise_item(layout, backend)
layout = ak.operations.ak_to_layout._impl(
item,
allow_record=False,
allow_unknown=False,
allow_none=False,
regulararray=False,
use_from_iter=False,
primitive_policy="error",
string_policy="as-characters",
)
return normalise_item(layout, backend)

# Empty index array
elif len(item) == 0:
Expand All @@ -301,7 +302,14 @@ def normalise_item(item, backend: Backend) -> SliceItem:
# Other iterable
else:
layout = ak.operations.ak_to_layout._impl(
item, allow_record=False, allow_other=True, regulararray=False
item,
allow_record=False,
allow_unknown=False,
allow_none=False,
regulararray=False,
use_from_iter=True,
primitive_policy="error",
string_policy="as-characters",
)
return normalise_item(layout, backend)

Expand Down
30 changes: 19 additions & 11 deletions src/awkward/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,16 +649,17 @@ def _getitem(self, where):
nplike = nplike_of_obj(where, default=None)
# We can end up with non-array objects associated with an nplike
if nplike is not None and nplike.is_own_array(where):
# Is it a scalar, not array?
if len(where.shape) == 0:
raise AssertionError(
"scalar arrays should be handled by integer-like indexing"
)
else:
layout = ak.operations.ak_to_layout._impl(
where, allow_record=False, allow_other=False, regulararray=False
)
return self._getitem(layout)
layout = ak.operations.ak_to_layout._impl(
where,
allow_record=False,
allow_unknown=False,
allow_none=False,
regulararray=False,
use_from_iter=False,
primitive_policy="error",
string_policy="as-characters",
)
return self._getitem(layout)

elif len(where) == 0:
return self._carry(
Expand All @@ -674,7 +675,14 @@ def _getitem(self, where):

else:
layout = ak.operations.ak_to_layout._impl(
where, allow_record=False, allow_other=False, regulararray=False
where,
allow_record=False,
allow_unknown=False,
allow_none=False,
regulararray=False,
use_from_iter=True,
primitive_policy="error",
string_policy="as-characters",
)
return self._getitem(layout)

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def __init__(

else:
layout = ak.operations.to_layout(
data, allow_record=False, regulararray=False
data, allow_record=False, regulararray=False, primitive_policy="error"
)

if not isinstance(layout, ak.contents.Content):
Expand Down
4 changes: 3 additions & 1 deletion src/awkward/operations/ak_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def all(

def _impl(array, axis, keepdims, mask_identity, highlevel, behavior):
axis = regularize_axis(axis)
layout = ak.operations.to_layout(array, allow_record=False, allow_other=False)
layout = ak.operations.to_layout(
array, allow_record=False, allow_unknown=False, primitive_policy="error"
)
behavior = behavior_of(array, behavior=behavior)
reducer = ak._reducers.All()

Expand Down
4 changes: 3 additions & 1 deletion src/awkward/operations/ak_any.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def any(

def _impl(array, axis, keepdims, mask_identity, highlevel, behavior):
axis = regularize_axis(axis)
layout = ak.operations.to_layout(array, allow_record=False, allow_other=False)
layout = ak.operations.to_layout(
array, allow_record=False, allow_unknown=False, primitive_policy="error"
)
behavior = behavior_of(array, behavior=behavior)
reducer = ak._reducers.Any()

Expand Down
8 changes: 6 additions & 2 deletions src/awkward/operations/ak_argcartesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior):
backend = backend_of(*arrays.values(), default=cpu, coerce_to_common=True)
layouts = {
n: ak._do.local_index(
ak.operations.to_layout(x, allow_record=False, allow_other=False),
ak.operations.to_layout(
x, allow_record=False, allow_unknown=False, primitive_policy="error"
),
axis,
).to_backend(backend)
for n, x in arrays.items()
Expand All @@ -121,7 +123,9 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior):
backend = backend_of(*arrays, default=cpu, coerce_to_common=True)
layouts = [
ak._do.local_index(
ak.operations.to_layout(x, allow_record=False, allow_other=False),
ak.operations.to_layout(
x, allow_record=False, allow_unknown=False, primitive_policy="error"
),
axis,
).to_backend(backend)
for x in arrays
Expand Down
4 changes: 3 additions & 1 deletion src/awkward/operations/ak_argcombinations.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ def _impl(
raise ValueError("the 'axis' for argcombinations must be non-negative")
else:
layout = ak._do.local_index(
ak.operations.to_layout(array, allow_record=False, allow_other=False),
ak.operations.to_layout(
array, allow_record=False, allow_unknown=False, primitive_policy="error"
),
axis,
)
out = ak._do.combinations(
Expand Down
4 changes: 3 additions & 1 deletion src/awkward/operations/ak_argmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ def nanargmax(

def _impl(array, axis, keepdims, mask_identity, highlevel, behavior):
axis = regularize_axis(axis)
layout = ak.operations.to_layout(array, allow_record=False, allow_other=False)
layout = ak.operations.to_layout(
array, allow_record=False, allow_unknown=False, primitive_policy="error"
)
behavior = behavior_of(array, behavior=behavior)
reducer = ak._reducers.ArgMax()

Expand Down
Loading