From 20d7ad521e3d8646f2f9000498c03b3e56408d12 Mon Sep 17 00:00:00 2001 From: "Brendan C. Ward" Date: Thu, 4 Jul 2024 10:28:13 -0700 Subject: [PATCH 1/3] Raise NotImplementedError if user passes an open file handle to write --- CHANGES.md | 3 ++- pyogrio/geopandas.py | 3 ++- pyogrio/raw.py | 14 +++++++++-- pyogrio/tests/test_arrow.py | 39 ++++++++++++++++++++++++++++++ pyogrio/tests/test_geopandas_io.py | 22 +++++++++++++++++ pyogrio/tests/test_raw_io.py | 22 +++++++++++++++++ 6 files changed, 99 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 786762c9..f6bfc86b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,7 +5,8 @@ ### Bug fixes - Silence warning from `write_dataframe` with `GeoSeries.notna()` (#435). -- BUG: Enable mask & bbox filter when geometry column not read (#431). +- Enable mask & bbox filter when geometry column not read (#431). +- Raise NotImplmentedError when user attempts to write to an open file handle. ## 0.9.0 (2024-06-17) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index 71fddcad..b700d99c 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -360,7 +360,8 @@ def write_dataframe( in the output file. path : str or io.BytesIO path to output file on writeable file system or an io.BytesIO object to - allow writing to memory + allow writing to memory. Will raise NotImplementedError if an open file + handle is passed; use BytesIO instead. NOTE: support for writing to memory is limited to specific drivers. layer : str, optional (default: None) layer name to create. If writing to memory and layer name is not diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 08ec0476..f8ff0fdb 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -1,4 +1,5 @@ from io import BytesIO +from pathlib import Path import warnings from pyogrio._env import GDALEnv @@ -522,7 +523,8 @@ def _get_write_path_driver(path, driver, append=False): ---------- path : str or io.BytesIO path to output file on writeable file system or an io.BytesIO object to - allow writing to memory + allow writing to memory. Will raise NotImplementedError if an open file + handle is passed. driver : str, optional (default: None) The OGR format driver used to write the vector file. By default attempts to infer driver from path. Must be provided to write to a file-like @@ -535,6 +537,8 @@ def _get_write_path_driver(path, driver, append=False): (path, driver) """ + print("type", type(path), hasattr(path, "write"), isinstance(path, Path)) + if isinstance(path, BytesIO): if driver is None: raise ValueError("driver must be provided to write to in-memory file") @@ -554,6 +558,11 @@ def _get_write_path_driver(path, driver, append=False): if append: raise NotImplementedError("append is not supported for in-memory files") + elif hasattr(path, "write") and not isinstance(path, Path): + raise NotImplementedError( + "writing to an open file handle is not yet supported; instead, write to a BytesIO instance and then read bytes from that to write to the file handle" + ) + else: path = vsi_path(str(path)) @@ -605,7 +614,8 @@ def write( ---------- path : str or io.BytesIO path to output file on writeable file system or an io.BytesIO object to - allow writing to memory + allow writing to memory. Will raise NotImplementedError if an open file + handle is passed; use BytesIO instead. NOTE: support for writing to memory is limited to specific drivers. geometry : ndarray of WKB encoded geometries or None If None, geometries will not be written to output file diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 7edddcdb..eb2320e2 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -5,6 +5,7 @@ import os from packaging.version import Version import sys +from zipfile import ZipFile import pytest import numpy as np @@ -945,6 +946,44 @@ def test_write_memory_existing_unsupported(naturalearth_lowres): ) +def test_write_open_file_handle(tmp_path, naturalearth_lowres): + """Verify that writing to an open file handle is not currently supported""" + + meta, table = read_arrow(naturalearth_lowres, max_features=1) + meta["geometry_type"] = "MultiPolygon" + + # verify it fails for regular file handle + with pytest.raises( + NotImplementedError, match="writing to an open file handle is not yet supported" + ): + with open(tmp_path / "test.geojson", "wb") as f: + write_arrow( + table, + f, + driver="GeoJSON", + layer="test", + crs=meta["crs"], + geometry_type=meta["geometry_type"], + geometry_name=meta["geometry_name"] or "wkb_geometry", + ) + + # verify it fails for ZipFile + with pytest.raises( + NotImplementedError, match="writing to an open file handle is not yet supported" + ): + with ZipFile(tmp_path / "test.geojson.zip", "w") as z: + with z.open("test.geojson", "w") as f: + write_arrow( + table, + f, + driver="GeoJSON", + layer="test", + crs=meta["crs"], + geometry_type=meta["geometry_type"], + geometry_name=meta["geometry_name"] or "wkb_geometry", + ) + + @requires_arrow_write_api def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text): encoding, text = encoded_text diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index be05a05a..12146c3d 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -3,6 +3,7 @@ from io import BytesIO import locale import warnings +from zipfile import ZipFile import numpy as np import pytest @@ -1971,6 +1972,27 @@ def test_write_memory_existing_unsupported(naturalearth_lowres): write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test") +def test_write_open_file_handle(tmp_path, naturalearth_lowres): + """Verify that writing to an open file handle is not currently supported""" + + df = read_dataframe(naturalearth_lowres) + + # verify it fails for regular file handle + with pytest.raises( + NotImplementedError, match="writing to an open file handle is not yet supported" + ): + with open(tmp_path / "test.geojson", "wb") as f: + write_dataframe(df.head(1), f) + + # verify it fails for ZipFile + with pytest.raises( + NotImplementedError, match="writing to an open file handle is not yet supported" + ): + with ZipFile(tmp_path / "test.geojson.zip", "w") as z: + with z.open("test.geojson", "w") as f: + write_dataframe(df.head(1), f) + + @pytest.mark.parametrize("ext", ["gpkg", "geojson"]) def test_non_utf8_encoding_io(tmp_path, ext, encoded_text): """Verify that we write non-UTF data to the data source diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index ebabe073..1b68df77 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -3,6 +3,7 @@ from io import BytesIO import json import sys +from zipfile import ZipFile import numpy as np from numpy import array_equal @@ -1171,6 +1172,27 @@ def test_write_memory_existing_unsupported(naturalearth_lowres): write(buffer, geometry, field_data, driver="GeoJSON", layer="test", **meta) +def test_write_open_file_handle(tmp_path, naturalearth_lowres): + """Verify that writing to an open file handle is not currently supported""" + + meta, _, geometry, field_data = read(naturalearth_lowres) + + # verify it fails for regular file handle + with pytest.raises( + NotImplementedError, match="writing to an open file handle is not yet supported" + ): + with open(tmp_path / "test.geojson", "wb") as f: + write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta) + + # verify it fails for ZipFile + with pytest.raises( + NotImplementedError, match="writing to an open file handle is not yet supported" + ): + with ZipFile(tmp_path / "test.geojson.zip", "w") as z: + with z.open("test.geojson", "w") as f: + write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta) + + @pytest.mark.parametrize("ext", ["fgb", "gpkg", "geojson"]) @pytest.mark.parametrize( "read_encoding,write_encoding", From 66b185d937587f11f2545b22c1fb10f8f38f464c Mon Sep 17 00:00:00 2001 From: "Brendan C. Ward" Date: Thu, 4 Jul 2024 10:37:40 -0700 Subject: [PATCH 2/3] Update changelog --- CHANGES.md | 2 +- pyogrio/raw.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index f6bfc86b..290e0d4e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,7 +6,7 @@ - Silence warning from `write_dataframe` with `GeoSeries.notna()` (#435). - Enable mask & bbox filter when geometry column not read (#431). -- Raise NotImplmentedError when user attempts to write to an open file handle. +- Raise NotImplmentedError when user attempts to write to an open file handle (#442). ## 0.9.0 (2024-06-17) diff --git a/pyogrio/raw.py b/pyogrio/raw.py index f8ff0fdb..a0cb4ee5 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -537,8 +537,6 @@ def _get_write_path_driver(path, driver, append=False): (path, driver) """ - print("type", type(path), hasattr(path, "write"), isinstance(path, Path)) - if isinstance(path, BytesIO): if driver is None: raise ValueError("driver must be provided to write to in-memory file") From ce9cd80b0a0ebfa6be4a2113a506403a8fcfb93b Mon Sep 17 00:00:00 2001 From: "Brendan C. Ward" Date: Thu, 4 Jul 2024 10:50:21 -0700 Subject: [PATCH 3/3] Add missing test annotation --- pyogrio/tests/test_arrow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index eb2320e2..247dd769 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -946,6 +946,7 @@ def test_write_memory_existing_unsupported(naturalearth_lowres): ) +@requires_arrow_write_api def test_write_open_file_handle(tmp_path, naturalearth_lowres): """Verify that writing to an open file handle is not currently supported"""