Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/github_actions/actions-e9810afe0a
Browse files Browse the repository at this point in the history
  • Loading branch information
ianna authored Jan 16, 2025
2 parents b7b2d66 + c075948 commit c089750
Show file tree
Hide file tree
Showing 11 changed files with 79 additions and 46 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
additional_dependencies: [pyyaml]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
rev: v0.9.1
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
Expand Down Expand Up @@ -62,7 +62,7 @@ repos:
files: ^tests/

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.30.0
rev: 0.31.0
hooks:
- id: check-github-workflows
args: ["--verbose"]
Expand Down
4 changes: 2 additions & 2 deletions dev/generate-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,7 +1254,7 @@ def gencudaunittests(specdict):
if test["error"]:
f.write(
f"""
error_message = re.escape("{test['message']} in compiled CUDA code ({spec.templatized_kernel_name})")
error_message = re.escape("{test["message"]} in compiled CUDA code ({spec.templatized_kernel_name})")
"""
)
f.write(
Expand Down Expand Up @@ -1326,7 +1326,7 @@ def genunittests():
os.path.join(CURRENT_DIR, "..", "awkward-cpp", "tests-spec-explicit", func),
"w",
) as file:
file.write("import pytest\n" "import numpy\n" "import kernels\n\n")
file.write("import pytest\nimport numpy\nimport kernels\n\n")
for test in function["tests"]:
num += 1
funcName = "def test_" + function["name"] + "_" + str(num) + "():\n"
Expand Down
42 changes: 21 additions & 21 deletions src/awkward/_connect/avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def dum_dat(self, dtype, count):
elif dtype["type"] == "boolean":
return f"0 node{count}-data <- stack "
elif dtype["type"] == "bytes":
return f"1 node{count}-offsets +<- stack 97 node{count+1}-data <- stack "
return f"1 node{count}-offsets +<- stack 97 node{count + 1}-data <- stack "
elif dtype["type"] == "string":
return f"0 node{count}-offsets +<- stack "
elif dtype["type"] == "enum":
Expand All @@ -224,18 +224,18 @@ def rec_exp_json_code(
if file["type"] == "null":
aform = ak.forms.IndexedOptionForm(
"i64",
ak.forms.EmptyForm(form_key=f"node{form_next_id+1}"),
ak.forms.EmptyForm(form_key=f"node{form_next_id + 1}"),
form_key=f"node{form_next_id}",
)
declarations.append(f"output node{form_next_id+1}-data uint8 \n")
declarations.append(f"output node{form_next_id + 1}-data uint8 \n")
declarations.append(f"output node{form_next_id}-index int64 \n")
form_keys.append(f"node{form_next_id+1}-data")
form_keys.append(f"node{form_next_id + 1}-data")
form_keys.append(f"node{form_next_id}-index")
exec_code.append(
"\n" + " " * ind + f"-1 node{form_next_id}-index <- stack"
)
exec_code.append(
"\n" + " " * ind + f"0 node{form_next_id+1}-data <- stack"
"\n" + " " * ind + f"0 node{form_next_id + 1}-data <- stack"
)

return (
Expand Down Expand Up @@ -292,14 +292,14 @@ def rec_exp_json_code(
ak.forms.NumpyForm(
"uint8",
parameters={"__array__": "char"},
form_key=f"node{form_next_id+1}",
form_key=f"node{form_next_id + 1}",
),
parameters={"__array__": "string"},
form_key=f"node{form_next_id}",
)
declarations.append(f"output node{form_next_id+1}-data uint8 \n")
declarations.append(f"output node{form_next_id + 1}-data uint8 \n")
declarations.append(f"output node{form_next_id}-offsets int64 \n")
form_keys.append(f"node{form_next_id+1}-data")
form_keys.append(f"node{form_next_id + 1}-data")
form_keys.append(f"node{form_next_id}-offsets")
init_code.append(f"0 node{form_next_id}-offsets <- stack\n")

Expand All @@ -310,7 +310,7 @@ def rec_exp_json_code(
"\n" + " " * ind + f"dup node{form_next_id}-offsets +<- stack\n"
)
exec_code.append(
"\n" + " " * (ind + 1) + f"stream #B-> node{form_next_id+1}-data"
"\n" + " " * (ind + 1) + f"stream #B-> node{form_next_id + 1}-data"
)

if self.is_primitive:
Expand Down Expand Up @@ -449,15 +449,15 @@ def rec_exp_json_code(
)

elif file["type"] == "bytes":
declarations.append(f"output node{form_next_id+1}-data uint8\n")
declarations.append(f"output node{form_next_id + 1}-data uint8\n")
declarations.append(f"output node{form_next_id}-offsets int64\n")
form_keys.append(f"node{form_next_id+1}-data")
form_keys.append(f"node{form_next_id + 1}-data")
form_keys.append(f"node{form_next_id}-offsets")
aform = ak.forms.ListOffsetForm(
"i64",
ak.forms.NumpyForm(
"uint8",
form_key=f"node{form_next_id+1}",
form_key=f"node{form_next_id + 1}",
parameters={"__array__": "byte"},
),
parameters={"__array__": "bytestring"},
Expand All @@ -470,7 +470,7 @@ def rec_exp_json_code(
"\n" + " " * ind + f"dup node{form_next_id}-offsets +<- stack\n"
)
exec_code.append(
"\n" + " " * (ind + 1) + f"stream #B-> node{form_next_id+1}-data"
"\n" + " " * (ind + 1) + f"stream #B-> node{form_next_id + 1}-data"
)

return (
Expand Down Expand Up @@ -758,12 +758,12 @@ def rec_exp_json_code(
)

elif file["type"] == "fixed":
form_keys.append(f"node{form_next_id+1}-data")
declarations.append(f"output node{form_next_id+1}-data uint8 \n")
form_keys.append(f"node{form_next_id + 1}-data")
declarations.append(f"output node{form_next_id + 1}-data uint8 \n")
aform = ak.forms.RegularForm(
ak.forms.NumpyForm(
"uint8",
form_key=f"node{form_next_id+1}",
form_key=f"node{form_next_id + 1}",
parameters={"__array__": "byte"},
),
parameters={"__array__": "bytestring"},
Expand All @@ -773,7 +773,7 @@ def rec_exp_json_code(

temp = file["size"]
exec_code.append(
"\n" + " " * ind + f"{temp} stream #B-> node{form_next_id+1}-data"
"\n" + " " * ind + f"{temp} stream #B-> node{form_next_id + 1}-data"
)

return (
Expand All @@ -794,10 +794,10 @@ def rec_exp_json_code(
ak.forms.NumpyForm(
"uint8",
parameters={"__array__": "char"},
form_key=f"node{form_next_id+2}",
form_key=f"node{form_next_id + 2}",
),
parameters={"__array__": "string"},
form_key=f"node{form_next_id+1}",
form_key=f"node{form_next_id + 1}",
),
parameters={"__array__": "categorical"},
form_key=f"node{form_next_id}",
Expand All @@ -815,10 +815,10 @@ def rec_exp_json_code(
for elem in x:
dat.append(np.uint8(ord(elem)))

container[f"node{form_next_id+1}-offsets"] = np.array(
container[f"node{form_next_id + 1}-offsets"] = np.array(
offset, dtype=np.int64
)
container[f"node{form_next_id+2}-data"] = np.array(dat, dtype=np.uint8)
container[f"node{form_next_id + 2}-data"] = np.array(dat, dtype=np.uint8)
exec_code.append(
"\n" + " " * ind + f"stream zigzag-> node{form_next_id}-index"
)
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def note(self) -> str:
arguments.append(f"\n {valuestr}")

extra_line = "" if len(arguments) == 0 else "\n "
calling_note = f'{self.name}({"".join(arguments)}{extra_line})'
calling_note = f"{self.name}({''.join(arguments)}{extra_line})"
return f"""
This error occurred while calling
Expand Down
3 changes: 1 addition & 2 deletions src/awkward/_nplikes/jax.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ def ma(self):
@property
def char(self):
raise ValueError(
"JAX arrays cannot do string manipulations until JAX implements "
"numpy.char"
"JAX arrays cannot do string manipulations until JAX implements numpy.char"
)

@property
Expand Down
12 changes: 6 additions & 6 deletions src/awkward/_reducers.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,9 +647,9 @@ def initial(self) -> float | None:
def _identity_for(self, dtype: DTypeLike | None) -> float:
dtype = np.dtype(dtype)

assert (
dtype.kind.upper() != "M"
), "datetime64/timedelta64 should be converted to int64 before reduction"
assert dtype.kind.upper() != "M", (
"datetime64/timedelta64 should be converted to int64 before reduction"
)
if self._initial is None:
if dtype in (
np.int8,
Expand Down Expand Up @@ -755,9 +755,9 @@ def initial(self):
def _identity_for(self, dtype: DTypeLike | None):
dtype = np.dtype(dtype)

assert (
dtype.kind.upper() != "M"
), "datetime64/timedelta64 should be converted to int64 before reduction"
assert dtype.kind.upper() != "M", (
"datetime64/timedelta64 should be converted to int64 before reduction"
)
if self._initial is None:
if dtype in (
np.int8,
Expand Down
6 changes: 3 additions & 3 deletions src/awkward/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ def __init__(
raise TypeError("Index data must be one-dimensional")

if np.issubdtype(self._data.dtype, np.longlong):
assert (
np.dtype(np.longlong).itemsize == 8
), "longlong is always 64-bit, right?"
assert np.dtype(np.longlong).itemsize == 8, (
"longlong is always 64-bit, right?"
)

self._data = self._data.view(np.int64)

Expand Down
34 changes: 34 additions & 0 deletions src/awkward/operations/ak_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,40 @@ def transform(
`"none"`
The output arrays will not be given parameters.
Performance Tip
================
#ak.transform will traverse the layout of (potentially multiple) arrays once.
This can be useful if one wants to apply a batch of transformations in one single
layout traversal. Traversing the layout multiple times can be inefficient.
Consider the following example:
>>> def batch_of_operations(array):
... return np.sqrt(np.sin(array) + 1) - 1
...
>>> def apply_batch_of_operations(layout, **kwargs):
... if layout.is_numpy:
... return ak.contents.NumpyArray(
... batch_of_operations(layout.data)
... )
...
>>> array = ak.Array(
... [[[[[1.1, 2.2, 3.3], []], None], []],
... [[[[4.4, 5.5]]]]]
... )
>>> %timeit ak.transform(apply_batch_of_operations, array)
... 68.5 μs ± 663 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
>>> %timeit batch_of_operations(array)
... 1.07 ms ± 39.1 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
The first `%timeit` cell shows the time it takes to apply the batch of operations using #ak.transform,
which allows to apply the operations in one single traversal of the layout. The second `%timeit` cell shows
the runtime of applying the operations directly to the array, which traverses the layout multiple times.
To be more explicit: one layout traversal for each operation.
See also: #ak.is_valid and #ak.valid_when to check the validity of transformed
outputs.
"""
Expand Down
6 changes: 3 additions & 3 deletions src/awkward/prettyprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,11 +443,11 @@ def valuestr(

def bytes_repr(nbytes: int) -> str:
count, unit = (
(f"{nbytes / 1e9 :,.1f}", "GB")
(f"{nbytes / 1e9:,.1f}", "GB")
if nbytes > 1e9
else (f"{nbytes / 1e6 :,.1f}", "MB")
else (f"{nbytes / 1e6:,.1f}", "MB")
if nbytes > 1e6
else (f"{nbytes / 1e3 :,.1f}", "kB")
else (f"{nbytes / 1e3:,.1f}", "kB")
if nbytes > 1e3
else (f"{nbytes:,}", "B")
)
Expand Down
6 changes: 3 additions & 3 deletions tests-cuda/test_2922b_new_cuda_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,9 +1197,9 @@ def test_0093_simplify_uniontypes_and_optiontypes_numpyarray_merge():
cuda_two = ak.to_backend(two, "cuda", highlevel=False)

cuda_three = cuda_one._mergemany([cuda_two])
assert ak.to_numpy(cuda_three).dtype == np.dtype(
z
), f"{x} {y} {z} {ak.to_numpy(cuda_three).dtype.type}"
assert ak.to_numpy(cuda_three).dtype == np.dtype(z), (
f"{x} {y} {z} {ak.to_numpy(cuda_three).dtype.type}"
)
assert to_list(cuda_three) == to_list(
np.concatenate([ak.to_numpy(cuda_one), ak.to_numpy(two)])
)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_0093_simplify_uniontypes_and_optiontypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def test_numpyarray_merge():
one = ak.contents.NumpyArray(np.array([1, 2, 3], dtype=x))
two = ak.contents.NumpyArray(np.array([4, 5], dtype=y))
three = one._mergemany([two])
assert ak.to_numpy(three).dtype == np.dtype(
z
), f"{x} {y} {z} {ak.to_numpy(three).dtype.type}"
assert ak.to_numpy(three).dtype == np.dtype(z), (
f"{x} {y} {z} {ak.to_numpy(three).dtype.type}"
)
assert to_list(three) == to_list(
np.concatenate([ak.to_numpy(one), ak.to_numpy(two)])
)
Expand Down

0 comments on commit c089750

Please sign in to comment.