From a305eb1755297af8dbf8e8e9d9c58a8ee50798e3 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 31 Oct 2023 21:19:40 +0100 Subject: [PATCH] FIX-#0000: make sure pickling is zero-copy for Ray Signed-off-by: Anatoly Myachev --- .../pandas/partitioning/partition_manager.py | 4 +++- .../storage_formats/pandas/test_internals.py | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/modin/core/dataframe/pandas/partitioning/partition_manager.py b/modin/core/dataframe/pandas/partitioning/partition_manager.py index fea0c686a96..798f14ea75d 100644 --- a/modin/core/dataframe/pandas/partitioning/partition_manager.py +++ b/modin/core/dataframe/pandas/partitioning/partition_manager.py @@ -805,7 +805,9 @@ def split_pandas_df_into_partitions( parts = [ [ update_bar( - put_func(col_part.iloc[i : i + row_chunksize]), + put_func( + col_part.iloc[i : i + row_chunksize].copy() + ), # `copy()` to fix zero-copy pickling ) for col_part in col_parts ] diff --git a/modin/test/storage_formats/pandas/test_internals.py b/modin/test/storage_formats/pandas/test_internals.py index 71310f3ee5d..dee63975cc6 100644 --- a/modin/test/storage_formats/pandas/test_internals.py +++ b/modin/test/storage_formats/pandas/test_internals.py @@ -1511,3 +1511,23 @@ def assert_materialized(obj): assert call_queue == reconstructed_queue assert_everything_materialized(reconstructed_queue) + + +@pytest.mark.skipif(Engine.get() != "Ray", reason="Ray specific") +def test_zero_copy_pickling(): + import ray + + df = pd.DataFrame(np.zeros((100, 100))) + part = ray.get(df._query_compiler._modin_frame._partitions[0][0]._data) + + try: + part.values[0, 0] = 10 + except ValueError as err: + if "assignment destination is read-only" in str(err): + pass + else: + # Unexpected exception + raise err + else: + # The exception must be thrown + raise RuntimeError("not zero copy pickling")