diff --git a/modin/core/dataframe/pandas/partitioning/partition_manager.py b/modin/core/dataframe/pandas/partitioning/partition_manager.py index fea0c686a96..798f14ea75d 100644 --- a/modin/core/dataframe/pandas/partitioning/partition_manager.py +++ b/modin/core/dataframe/pandas/partitioning/partition_manager.py @@ -805,7 +805,9 @@ def split_pandas_df_into_partitions( parts = [ [ update_bar( - put_func(col_part.iloc[i : i + row_chunksize]), + put_func( + col_part.iloc[i : i + row_chunksize].copy() + ), # `copy()` to fix zero-copy pickling ) for col_part in col_parts ] diff --git a/modin/test/storage_formats/pandas/test_internals.py b/modin/test/storage_formats/pandas/test_internals.py index 71310f3ee5d..4d11dda7fdc 100644 --- a/modin/test/storage_formats/pandas/test_internals.py +++ b/modin/test/storage_formats/pandas/test_internals.py @@ -1511,3 +1511,24 @@ def assert_materialized(obj): assert call_queue == reconstructed_queue assert_everything_materialized(reconstructed_queue) + + +@pytest.mark.skipif(Engine.get() != "Ray", reason="Ray specific") +def test_zero_copy_pickling(): + import ray + + df = pd.DataFrame(np.zeros((100, 100))) + part = ray.get(df._query_compiler._modin_frame._partitions[0][0]._data) + + try: + part.values[0, 0] = 10 + except ValueError as err: + breakpoint() + if "assignment destination is read-only" in str(err): + pass + else: + # Unexpected exception + raise err + else: + # The exception must be thrown + raise RuntimeError("not zero copy pickling")