Skip to content

Commit

Permalink
simplify
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq committed Jan 6, 2025
1 parent a13ccf2 commit 1222bd8
Showing 1 changed file with 2 additions and 5 deletions.
7 changes: 2 additions & 5 deletions src/datasets/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3352,15 +3352,12 @@ def apply_function_on_filtered_inputs(pa_inputs, indices, check_same_num_example
validate_function_output(processed_inputs, indices)
if not update_data:
return None # Nothing to update, let's move on
if shard._format_type or input_columns:
# TODO(QL, MS): ideally the behavior should be the same even if the dataset is formatted (may require major release)
inputs_to_merge = dict(zip(pa_inputs.column_names, pa_inputs.itercolumns()))
elif isinstance(inputs, LazyDict):
if isinstance(inputs, LazyDict):
inputs_to_merge = {
k: (v if k not in inputs.keys_to_format else pa_inputs[k]) for k, v in inputs.data.items()
}
else:
inputs_to_merge = inputs
inputs_to_merge = dict(zip(pa_inputs.column_names, pa_inputs.itercolumns()))
if remove_columns is not None:
for column in remove_columns:
# `function` can modify input in-place causing column to be already removed.
Expand Down

0 comments on commit 1222bd8

Please sign in to comment.