Skip to content

Commit

Permalink
fix: always check '.length' for 'unknown_length' (#3332)
Browse files Browse the repository at this point in the history
Co-authored-by: Ianna Osborne <[email protected]>
  • Loading branch information
jpivarski and ianna authored Jan 10, 2025
1 parent c7e5e66 commit 32c1171
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 23 deletions.
9 changes: 6 additions & 3 deletions src/awkward/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,14 +614,14 @@ def _numbers_to_type(self, name, including_unknown):
return self.to_ByteMaskedArray()._numbers_to_type(name, including_unknown)

def _is_unique(self, negaxis, starts, parents, outlength):
if self._mask.length == 0:
if self._mask.length is not unknown_length and self._mask.length == 0:
return True
return self.to_IndexedOptionArray64()._is_unique(
negaxis, starts, parents, outlength
)

def _unique(self, negaxis, starts, parents, outlength):
if self._mask.length == 0:
if self._mask.length is not unknown_length and self._mask.length == 0:
return self
out = self.to_IndexedOptionArray64()._unique(
negaxis, starts, parents, outlength
Expand Down Expand Up @@ -810,7 +810,10 @@ def to_packed(self, recursive: bool = True) -> Self:

else:
excess_length = int(math.ceil(self._length / 8.0))
if self._mask.length == excess_length:
if (
self._mask.length is not unknown_length
and self._mask.length == excess_length
):
mask = self._mask
else:
mask = self._mask[:excess_length]
Expand Down
6 changes: 3 additions & 3 deletions src/awkward/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def _offsets_and_flattened(self, axis: int, depth: int) -> tuple[Index, Content]

offsets, flattened = next._offsets_and_flattened(axis, depth)

if offsets.length == 0:
if offsets.length is not unknown_length and offsets.length == 0:
return (
offsets,
ak.contents.IndexedOptionArray(
Expand Down Expand Up @@ -790,14 +790,14 @@ def _numbers_to_type(self, name, including_unknown):
)

def _is_unique(self, negaxis, starts, parents, outlength):
if self._mask.length == 0:
if self._mask.length is not unknown_length and self._mask.length == 0:
return True
return self.to_IndexedOptionArray64()._is_unique(
negaxis, starts, parents, outlength
)

def _unique(self, negaxis, starts, parents, outlength):
if self._mask.length == 0:
if self._mask.length is not unknown_length and self._mask.length == 0:
return self
return self.to_IndexedOptionArray64()._unique(
negaxis, starts, parents, outlength
Expand Down
10 changes: 5 additions & 5 deletions src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
from awkward._nplikes.placeholder import PlaceholderArray
from awkward._nplikes.shape import ShapeItem
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward._nplikes.typetracer import TypeTracer
from awkward._parameters import (
parameters_intersect,
Expand Down Expand Up @@ -781,7 +781,7 @@ def _numbers_to_type(self, name, including_unknown):
)

def _is_unique(self, negaxis, starts, parents, outlength):
if self._index.length == 0:
if self._index.length is not unknown_length and self._index.length == 0:
return True

nextindex = self._unique_index(self._index)
Expand All @@ -793,7 +793,7 @@ def _is_unique(self, negaxis, starts, parents, outlength):
return next._is_unique(negaxis, starts, parents, outlength)

def _unique(self, negaxis, starts, parents, outlength):
if self._index.length == 0:
if self._index.length is not unknown_length and self._index.length == 0:
return self

branch, depth = self.branch_depth
Expand Down Expand Up @@ -1044,7 +1044,7 @@ def _to_arrow(
return out

else:
if self._content.length == 0:
if self._content.length is not unknown_length and self._content.length == 0:
# IndexedOptionArray._to_arrow replaces -1 in the index with 0. So behind
# every masked value is self._content[0], unless self._content.length == 0.
# In that case, don't call self._content[index]; it's empty anyway.
Expand All @@ -1058,7 +1058,7 @@ def _to_arrow(
return next2._to_arrow(pyarrow, mask_node, validbytes, length, options)

def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
if self._content.length == 0:
if self._content.length is not unknown_length and self._content.length == 0:
# IndexedOptionArray._to_arrow replaces -1 in the index with 0. So behind
# every masked value is self._content[0], unless self._content.length == 0.
# In that case, don't call self._content[index]; it's empty anyway.
Expand Down
9 changes: 6 additions & 3 deletions src/awkward/contents/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ def _numbers_to_type(self, name, including_unknown):
)

def _is_unique(self, negaxis, starts, parents, outlength):
if self.length == 0:
if self.length is not unknown_length and self.length == 0:
return True
elif len(self.shape) != 1:
return self.to_RegularArray()._is_unique(
Expand All @@ -711,9 +711,12 @@ def _is_unique(self, negaxis, starts, parents, outlength):
else:
out = self._unique(negaxis, starts, parents, outlength)
if isinstance(out, ak.contents.ListOffsetArray):
return out.content.length == self.length
return (
out.content.length is not unknown_length
and out.content.length == self.length
)
else:
return out.length == self.length
return out.length is not unknown_length and out.length == self.length

def _unique(self, negaxis, starts, parents, outlength):
if self.shape[0] == 0:
Expand Down
6 changes: 3 additions & 3 deletions src/awkward/contents/unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ def _argsort_next(
parameters=self._parameters,
mergebool=True,
)
if simplified.length == 0:
if simplified.length is not unknown_length and simplified.length == 0:
return ak.contents.NumpyArray(
self._backend.nplike.empty(0, dtype=np.int64),
parameters=None,
Expand All @@ -1374,7 +1374,7 @@ def _argsort_next(
)

def _sort_next(self, negaxis, starts, parents, outlength, ascending, stable):
if self.length == 0:
if self.length is not unknown_length and self.length == 0:
return self

simplified = type(self).simplified(
Expand All @@ -1384,7 +1384,7 @@ def _sort_next(self, negaxis, starts, parents, outlength, ascending, stable):
parameters=self._parameters,
mergebool=True,
)
if simplified.length == 0:
if simplified.length is not unknown_length and simplified.length == 0:
return simplified

if isinstance(simplified, ak.contents.UnionArray):
Expand Down
8 changes: 4 additions & 4 deletions src/awkward/contents/unmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from awkward._nplikes.array_like import ArrayLike
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
from awkward._nplikes.shape import ShapeItem
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward._nplikes.typetracer import MaybeNone
from awkward._parameters import (
parameters_intersect,
Expand Down Expand Up @@ -340,7 +340,7 @@ def _offsets_and_flattened(self, axis: int, depth: int) -> tuple[Index, Content]
raise AxisError("axis=0 not allowed for flatten")
else:
offsets, flattened = self._content._offsets_and_flattened(axis, depth)
if offsets.length == 0:
if offsets.length is not unknown_length and offsets.length == 0:
return (
offsets,
UnmaskedArray(flattened, parameters=self._parameters),
Expand Down Expand Up @@ -399,12 +399,12 @@ def _numbers_to_type(self, name, including_unknown):
)

def _is_unique(self, negaxis, starts, parents, outlength):
if self._content.length == 0:
if self._content.length is not unknown_length and self._content.length == 0:
return True
return self._content._is_unique(negaxis, starts, parents, outlength)

def _unique(self, negaxis, starts, parents, outlength):
if self._content.length == 0:
if self._content.length is not unknown_length and self._content.length == 0:
return self
return self._content._unique(negaxis, starts, parents, outlength)

Expand Down
3 changes: 2 additions & 1 deletion src/awkward/operations/ak_firsts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
_remove_named_axis,
)
from awkward._nplikes.numpy_like import NumpyMetadata
from awkward._nplikes.shape import unknown_length
from awkward._regularize import regularize_axis
from awkward.errors import AxisError

Expand Down Expand Up @@ -85,7 +86,7 @@ def _impl(array, axis, highlevel, behavior, attrs):
# and length > 0 cases.
backend = ak.backend(array)
slicer = ak.to_backend(ak.from_iter([None, 0]), backend)
if layout.length == 0:
if layout.length is not unknown_length and layout.length == 0:
out = layout[slicer[[0]]][0]
else:
out = layout[slicer[[1]]][0]
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _getitem_fields(self, where, only_fields: tuple[str, ...] = ()):
return self._array._getitem_fields(where)._getitem_at(self._at)

def to_packed(self, recursive: bool = True) -> Self:
if self._array.length == 1:
if self._array.length is not unknown_length and self._array.length == 1:
return Record(self._array.to_packed(recursive), self._at)
else:
return Record(self._array[self._at : self._at + 1].to_packed(recursive), 0)
Expand Down

0 comments on commit 32c1171

Please sign in to comment.