Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: Raise NotImplementedError if user passes an open file handle to write #442

Merged
merged 4 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
### Bug fixes

- Silence warning from `write_dataframe` with `GeoSeries.notna()` (#435).
- BUG: Enable mask & bbox filter when geometry column not read (#431).
- Enable mask & bbox filter when geometry column not read (#431).
- Raise NotImplmentedError when user attempts to write to an open file handle (#442).
- Prevent seek on read from compressed inputs (#443).

## 0.9.0 (2024-06-17)
Expand Down
3 changes: 2 additions & 1 deletion pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,8 @@ def write_dataframe(
in the output file.
path : str or io.BytesIO
path to output file on writeable file system or an io.BytesIO object to
allow writing to memory
allow writing to memory. Will raise NotImplementedError if an open file
handle is passed; use BytesIO instead.
NOTE: support for writing to memory is limited to specific drivers.
layer : str, optional (default: None)
layer name to create. If writing to memory and layer name is not
Expand Down
12 changes: 10 additions & 2 deletions pyogrio/raw.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from io import BytesIO
from pathlib import Path
import warnings

from pyogrio._env import GDALEnv
Expand Down Expand Up @@ -522,7 +523,8 @@ def _get_write_path_driver(path, driver, append=False):
----------
path : str or io.BytesIO
path to output file on writeable file system or an io.BytesIO object to
allow writing to memory
allow writing to memory. Will raise NotImplementedError if an open file
handle is passed.
driver : str, optional (default: None)
The OGR format driver used to write the vector file. By default attempts
to infer driver from path. Must be provided to write to a file-like
Expand Down Expand Up @@ -554,6 +556,11 @@ def _get_write_path_driver(path, driver, append=False):
if append:
raise NotImplementedError("append is not supported for in-memory files")

elif hasattr(path, "write") and not isinstance(path, Path):
raise NotImplementedError(
"writing to an open file handle is not yet supported; instead, write to a BytesIO instance and then read bytes from that to write to the file handle"
)

else:
path = vsi_path(str(path))

Expand Down Expand Up @@ -605,7 +612,8 @@ def write(
----------
path : str or io.BytesIO
path to output file on writeable file system or an io.BytesIO object to
allow writing to memory
allow writing to memory. Will raise NotImplementedError if an open file
handle is passed; use BytesIO instead.
NOTE: support for writing to memory is limited to specific drivers.
geometry : ndarray of WKB encoded geometries or None
If None, geometries will not be written to output file
Expand Down
40 changes: 40 additions & 0 deletions pyogrio/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
from packaging.version import Version
import sys
from zipfile import ZipFile

import pytest
import numpy as np
Expand Down Expand Up @@ -951,6 +952,45 @@ def test_write_memory_existing_unsupported(naturalearth_lowres):
)


@requires_arrow_write_api
def test_write_open_file_handle(tmp_path, naturalearth_lowres):
"""Verify that writing to an open file handle is not currently supported"""

meta, table = read_arrow(naturalearth_lowres, max_features=1)
meta["geometry_type"] = "MultiPolygon"

# verify it fails for regular file handle
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with open(tmp_path / "test.geojson", "wb") as f:
write_arrow(
table,
f,
driver="GeoJSON",
layer="test",
crs=meta["crs"],
geometry_type=meta["geometry_type"],
geometry_name=meta["geometry_name"] or "wkb_geometry",
)

# verify it fails for ZipFile
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
with z.open("test.geojson", "w") as f:
write_arrow(
table,
f,
driver="GeoJSON",
layer="test",
crs=meta["crs"],
geometry_type=meta["geometry_type"],
geometry_name=meta["geometry_name"] or "wkb_geometry",
)


@requires_arrow_write_api
def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
encoding, text = encoded_text
Expand Down
22 changes: 22 additions & 0 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from io import BytesIO
import locale
import warnings
from zipfile import ZipFile

import numpy as np
import pytest
Expand Down Expand Up @@ -1971,6 +1972,27 @@ def test_write_memory_existing_unsupported(naturalearth_lowres):
write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")


def test_write_open_file_handle(tmp_path, naturalearth_lowres):
"""Verify that writing to an open file handle is not currently supported"""

df = read_dataframe(naturalearth_lowres)

# verify it fails for regular file handle
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with open(tmp_path / "test.geojson", "wb") as f:
write_dataframe(df.head(1), f)

# verify it fails for ZipFile
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
with z.open("test.geojson", "w") as f:
write_dataframe(df.head(1), f)


@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
"""Verify that we write non-UTF data to the data source
Expand Down
22 changes: 22 additions & 0 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from io import BytesIO
import json
import sys
from zipfile import ZipFile

import numpy as np
from numpy import array_equal
Expand Down Expand Up @@ -1177,6 +1178,27 @@ def test_write_memory_existing_unsupported(naturalearth_lowres):
write(buffer, geometry, field_data, driver="GeoJSON", layer="test", **meta)


def test_write_open_file_handle(tmp_path, naturalearth_lowres):
"""Verify that writing to an open file handle is not currently supported"""

meta, _, geometry, field_data = read(naturalearth_lowres)

# verify it fails for regular file handle
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with open(tmp_path / "test.geojson", "wb") as f:
write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)

# verify it fails for ZipFile
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
with z.open("test.geojson", "w") as f:
write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)


@pytest.mark.parametrize("ext", ["fgb", "gpkg", "geojson"])
@pytest.mark.parametrize(
"read_encoding,write_encoding",
Expand Down