Skip to content

Commit

Permalink
feat!: prepare for 2.5.0 (#2798)
Browse files Browse the repository at this point in the history
* fix: drop lengths recursively (#2775)

* fix: drop lengths recursively

* test: ensure slicing from-typetracer works

* chore: bump pyproject.toml

* chore: remove to_categorical (#2779)
  • Loading branch information
agoose77 authored Nov 7, 2023
1 parent 3012363 commit bd4cb8d
Show file tree
Hide file tree
Showing 26 changed files with 64 additions and 508 deletions.
2 changes: 1 addition & 1 deletion docs/redirects.json
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
"_auto/ak.to_arrow.any-ext": "../reference/generated/ak.to_arrow.html",
"_auto/ak.to_arrow_table.any-ext": "../reference/generated/ak.to_arrow_table.html",
"_auto/ak.to_buffers.any-ext": "../reference/generated/ak.to_buffers.html",
"_auto/ak.to_categorical.any-ext": "../reference/generated/ak.to_categorical.html",
"_auto/ak.to_categorical.any-ext": "../reference/generated/ak.str.to_categorical.html",
"_auto/ak.to_cupy.any-ext": "../reference/generated/ak.to_cupy.html",
"_auto/ak.to_jax.any-ext": "../reference/generated/ak.to_jax.html",
"_auto/ak.to_json.any-ext": "../reference/generated/ak.to_json.html",
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/toctree.txt
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,10 @@
.. toctree::
:caption: Arrays of categorical data

generated/ak.to_categorical
generated/ak.from_categorical
generated/ak.is_categorical
generated/ak.categories
generated/ak.str.to_categorical

.. toctree::
:caption: Indexing and grouping
Expand Down
4 changes: 2 additions & 2 deletions docs/user-guide/how-to-create-strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,15 @@ Categorical strings

A large set of strings with few unique values are more efficiently manipulated as integers than as strings. In Pandas, this is [categorical data](https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html), in R, it's called a [factor](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/factor), and in Arrow and Parquet, it's [dictionary encoding](https://arrow.apache.org/blog/2019/09/05/faster-strings-cpp-parquet/).

The {func}`ak.to_categorical` function makes Awkward Arrays categorical in this sense. {func}`ak.to_arrow` and {func}`ak.to_parquet` recognize categorical data and convert it to the corresponding Arrow and Parquet types.
The {func}`ak.str.to_categorical` (requires PyArrow) function makes Awkward Arrays categorical in this sense. {func}`ak.to_arrow` and {func}`ak.to_parquet` recognize categorical data and convert it to the corresponding Arrow and Parquet types.

```{code-cell} ipython3
uncategorized = ak.Array(["three", "one", "two", "two", "three", "one", "one", "one"])
uncategorized
```

```{code-cell} ipython3
categorized = ak.to_categorical(uncategorized)
categorized = ak.str.to_categorical(uncategorized)
categorized
```

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "hatchling.build"

[project]
name = "awkward"
version = "2.4.9"
version = "2.5.0rc1"
description = "Manipulate JSON-like data with NumPy-like idioms."
license = { text = "BSD-3-Clause" }
requires-python = ">=3.8"
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def _to_typetracer(self, forget_length: bool) -> Self:
tt = TypeTracer.instance()
return BitMaskedArray(
self._mask.to_nplike(tt),
self._content._to_typetracer(False),
self._content._to_typetracer(forget_length),
self._valid_when,
unknown_length if forget_length else self.length,
self._lsb_order,
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def _to_typetracer(self, forget_length: bool) -> Self:
mask = self._mask.to_nplike(tt)
return ByteMaskedArray(
mask.forget_length() if forget_length else mask,
self._content._to_typetracer(False),
self._content._to_typetracer(forget_length),
self._valid_when,
parameters=self._parameters,
)
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def _to_typetracer(self, forget_length: bool) -> Self:
index = self._index.to_nplike(TypeTracer.instance())
return IndexedArray(
index.forget_length() if forget_length else index,
self._content._to_typetracer(False),
self._content._to_typetracer(forget_length),
parameters=self._parameters,
)

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _to_typetracer(self, forget_length: bool) -> Self:
index = self._index.to_nplike(TypeTracer.instance())
return IndexedOptionArray(
index.forget_length() if forget_length else index,
self._content._to_typetracer(False),
self._content._to_typetracer(forget_length),
parameters=self._parameters,
)

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def _to_typetracer(self, forget_length: bool) -> Self:
return ListArray(
starts.forget_length() if forget_length else starts,
self._stops.to_nplike(tt),
self._content._to_typetracer(False),
self._content._to_typetracer(forget_length),
parameters=self._parameters,
)

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _to_typetracer(self, forget_length: bool) -> Self:
offsets = self._offsets.to_nplike(TypeTracer.instance())
return ListOffsetArray(
offsets.forget_length() if forget_length else offsets,
self._content._to_typetracer(False),
self._content._to_typetracer(forget_length),
parameters=self._parameters,
)

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def _to_typetracer(self, forget_length: bool) -> Self:
return UnionArray(
tags.forget_length() if forget_length else tags,
self._index.to_nplike(tt),
[x._to_typetracer(False) for x in self._contents],
[x._to_typetracer(forget_length) for x in self._contents],
parameters=self._parameters,
)

Expand Down
1 change: 0 additions & 1 deletion src/awkward/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@
from awkward.operations.ak_to_arrow_table import *
from awkward.operations.ak_to_backend import *
from awkward.operations.ak_to_buffers import *
from awkward.operations.ak_to_categorical import *
from awkward.operations.ak_to_cupy import *
from awkward.operations.ak_to_dataframe import *
from awkward.operations.ak_to_feather import *
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/operations/ak_categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def categories(array, highlevel=True):
#ak.contents.IndexedOptionArray labeled with parameter
`"__array__" = "categorical"`), then this function returns its categories.
See also #ak.is_categorical, #ak.to_categorical, #ak.from_categorical.
See also #ak.is_categorical, #ak.str.to_categorical, #ak.from_categorical.
"""
# Dispatch
yield (array,)
Expand Down
3 changes: 1 addition & 2 deletions src/awkward/operations/ak_from_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ def from_categorical(array, *, highlevel=True, behavior=None):
size of the dataset. (Conversion to categorical is expensive; conversion
from categorical is cheap.)
See also #ak.is_categorical, #ak.categories, #ak.to_categorical,
#ak.str.to_categorical, #ak.from_categorical.
See also #ak.is_categorical, #ak.categories, #ak.str.to_categorical.
"""
# Dispatch
yield (array,)
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/operations/ak_is_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def is_categorical(array):
`"__array__" = "categorical"`), then this function returns True;
otherwise, it returns False.
See also #ak.categories, #ak.to_categorical, #ak.from_categorical.
See also #ak.categories, #ak.str.to_categorical, #ak.from_categorical.
"""
# Dispatch
yield (array,)
Expand Down
160 changes: 0 additions & 160 deletions src/awkward/operations/ak_to_categorical.py

This file was deleted.

Loading

0 comments on commit bd4cb8d

Please sign in to comment.