diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 73b62cd1..262bc1a3 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -2,6 +2,8 @@ from pathlib import Path from zipfile import ZIP_DEFLATED, ZipFile +import numpy as np + from pyogrio import ( __gdal_version_string__, __version__, @@ -126,28 +128,165 @@ def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): @pytest.fixture(scope="session") -def test_fgdb_vsi(): - return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip" +def line_zm_file(): + return _data_dir / "line_zm.gpkg" @pytest.fixture(scope="session") -def test_gpkg_nulls(): - return _data_dir / "test_gpkg_nulls.gpkg" +def curve_file(): + return _data_dir / "curve.gpkg" @pytest.fixture(scope="session") -def test_ogr_types_list(): - return _data_dir / "test_ogr_types_list.geojson" +def curve_polygon_file(): + return _data_dir / "curvepolygon.gpkg" @pytest.fixture(scope="session") -def test_datetime(): - return _data_dir / "test_datetime.geojson" +def multisurface_file(): + return _data_dir / "multisurface.gpkg" @pytest.fixture(scope="session") -def test_datetime_tz(): - return _data_dir / "test_datetime_tz.geojson" +def test_gpkg_nulls(): + return _data_dir / "test_gpkg_nulls.gpkg" + + +@pytest.fixture(scope="function") +def no_geometry_file(tmp_path): + # create a GPKG layer that does not include geometry + filename = tmp_path / "test_no_geometry.gpkg" + write( + filename, + layer="no_geometry", + geometry=None, + field_data=[np.array(["a", "b", "c"])], + fields=["col"], + ) + + return filename + + +@pytest.fixture(scope="function") +def list_field_values_file(tmp_path): + # Create a GeoJSON file with list values in a property + list_geojson = """{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { "int64": 1, "list_int64": [0, 1] }, + "geometry": { "type": "Point", "coordinates": [0, 2] } + }, + { + "type": "Feature", + "properties": { "int64": 2, "list_int64": [2, 3] }, + "geometry": { "type": "Point", "coordinates": [1, 2] } + }, + { + "type": "Feature", + "properties": { "int64": 3, "list_int64": [4, 5] }, + "geometry": { "type": "Point", "coordinates": [2, 2] } + }, + { + "type": "Feature", + "properties": { "int64": 4, "list_int64": [6, 7] }, + "geometry": { "type": "Point", "coordinates": [3, 2] } + }, + { + "type": "Feature", + "properties": { "int64": 5, "list_int64": [8, 9] }, + "geometry": { "type": "Point", "coordinates": [4, 2] } + } + ] + }""" + + filename = tmp_path / "test_ogr_types_list.geojson" + with open(filename, "w") as f: + _ = f.write(list_geojson) + + return filename + + +@pytest.fixture(scope="function") +def nested_geojson_file(tmp_path): + # create GeoJSON file with nested properties + nested_geojson = """{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [0, 0] + }, + "properties": { + "top_level": "A", + "intermediate_level": { + "bottom_level": "B" + } + } + } + ] + }""" + + filename = tmp_path / "test_nested.geojson" + with open(filename, "w") as f: + _ = f.write(nested_geojson) + + return filename + + +@pytest.fixture(scope="function") +def datetime_file(tmp_path): + # create GeoJSON file with millisecond precision + datetime_geojson = """{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { "col": "2020-01-01T09:00:00.123" }, + "geometry": { "type": "Point", "coordinates": [1, 1] } + }, + { + "type": "Feature", + "properties": { "col": "2020-01-01T10:00:00" }, + "geometry": { "type": "Point", "coordinates": [2, 2] } + } + ] + }""" + + filename = tmp_path / "test_datetime.geojson" + with open(filename, "w") as f: + _ = f.write(datetime_geojson) + + return filename + + +@pytest.fixture(scope="function") +def datetime_tz_file(tmp_path): + # create GeoJSON file with datetimes with timezone + datetime_tz_geojson = """{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" }, + "geometry": { "type": "Point", "coordinates": [1, 1] } + }, + { + "type": "Feature", + "properties": { "datetime_col": "2020-01-01T10:00:00-05:00" }, + "geometry": { "type": "Point", "coordinates": [2, 2] } + } + ] + }""" + + filename = tmp_path / "test_datetime_tz.geojson" + with open(filename, "w") as f: + f.write(datetime_tz_geojson) + + return filename @pytest.fixture(scope="function") diff --git a/pyogrio/tests/fixtures/README.md b/pyogrio/tests/fixtures/README.md index 89ef7937..4cab72d0 100644 --- a/pyogrio/tests/fixtures/README.md +++ b/pyogrio/tests/fixtures/README.md @@ -1,13 +1,28 @@ # Test datasets -## Natural Earth lowres +## Obtaining / creating test datasets -`naturalearth_lowres.shp` was copied from GeoPandas. +If a test dataset can be created in code, do that instead. If it is used in a +single test, create the test dataset as part of that test. If it is used in +more than a single test, add it to `pyogrio/tests/conftest.py` instead, as a +function-scoped test fixture. + +If you need to obtain 3rd party test files: + +- add a section below that describes the source location and processing steps + to derive that dataset +- make sure the license is compatible with including in Pyogrio (public domain or open-source) + and record that license below + +Please keep the test files no larger than necessary to use in tests. -## FGDB test dataset +## Included test datasets + +### Natural Earth lowres + +`naturalearth_lowres.shp` was copied from GeoPandas. -`test_fgdb.gdb.zip` -Downloaded from http://trac.osgeo.org/gdal/raw-attachment/wiki/FileGDB/test_fgdb.gdb.zip +License: public domain ### GPKG test dataset with null values @@ -75,15 +90,19 @@ NOTE: Reading boolean values into GeoPandas using Fiona backend treats those values as `None` and column dtype as `object`; Pyogrio treats those values as `np.nan` and column dtype as `float64`. -### GPKG test with MultiSurface - -This was extracted from https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_0308_HU4_GDB.zip -`NHDWaterbody` layer using ogr2ogr: - -```bash -ogr2ogr test_mixed_surface.gpkg NHDPLUS_H_0308_HU4_GDB.gdb NHDWaterbody -where '"NHDPlusID" = 15000300070477' -select "NHDPlusID" -``` +License: same as Pyogrio ### OSM PBF test This was downloaded from https://github.com/openstreetmap/OSM-binary/blob/master/resources/sample.pbf + +License: [Open Data Commons Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/) + +### Test files for geometry types that are downgraded on read + +`line_zm.gpkg` was created using QGIS to digitize a LineString GPKG layer with Z and M enabled. Downgraded to LineString Z on read. +`curve.gpkg` was created using QGIS to digitize a Curve GPKG layer. Downgraded to LineString on read. +`curvepolygon.gpkg` was created using QGIS to digitize a CurvePolygon GPKG layer. Downgraded to Polygon on read. +`multisurface.gpkg` was created using QGIS to digitize a MultiSurface GPKG layer. Downgraded to MultiPolygon on read. + +License: same as Pyogrio diff --git a/pyogrio/tests/fixtures/curve.gpkg b/pyogrio/tests/fixtures/curve.gpkg new file mode 100644 index 00000000..974f5be9 Binary files /dev/null and b/pyogrio/tests/fixtures/curve.gpkg differ diff --git a/pyogrio/tests/fixtures/test_multisurface.gpkg b/pyogrio/tests/fixtures/curvepolygon.gpkg similarity index 90% rename from pyogrio/tests/fixtures/test_multisurface.gpkg rename to pyogrio/tests/fixtures/curvepolygon.gpkg index 5da53695..34f86c79 100644 Binary files a/pyogrio/tests/fixtures/test_multisurface.gpkg and b/pyogrio/tests/fixtures/curvepolygon.gpkg differ diff --git a/pyogrio/tests/fixtures/line_zm.gpkg b/pyogrio/tests/fixtures/line_zm.gpkg new file mode 100644 index 00000000..a369df53 Binary files /dev/null and b/pyogrio/tests/fixtures/line_zm.gpkg differ diff --git a/pyogrio/tests/fixtures/multisurface.gpkg b/pyogrio/tests/fixtures/multisurface.gpkg new file mode 100644 index 00000000..595a2fea Binary files /dev/null and b/pyogrio/tests/fixtures/multisurface.gpkg differ diff --git a/pyogrio/tests/fixtures/poly_not_enough_points.shp.zip b/pyogrio/tests/fixtures/poly_not_enough_points.shp.zip deleted file mode 100644 index 90af7b3c..00000000 Binary files a/pyogrio/tests/fixtures/poly_not_enough_points.shp.zip and /dev/null differ diff --git a/pyogrio/tests/fixtures/test_datetime.geojson b/pyogrio/tests/fixtures/test_datetime.geojson deleted file mode 100644 index eb949330..00000000 --- a/pyogrio/tests/fixtures/test_datetime.geojson +++ /dev/null @@ -1,7 +0,0 @@ -{ -"type": "FeatureCollection", -"features": [ -{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } }, -{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } } -] -} diff --git a/pyogrio/tests/fixtures/test_datetime_tz.geojson b/pyogrio/tests/fixtures/test_datetime_tz.geojson deleted file mode 100644 index e6b39206..00000000 --- a/pyogrio/tests/fixtures/test_datetime_tz.geojson +++ /dev/null @@ -1,8 +0,0 @@ -{ -"type": "FeatureCollection", -"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, -"features": [ -{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } }, -{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } } -] -} diff --git a/pyogrio/tests/fixtures/test_fgdb.gdb.zip b/pyogrio/tests/fixtures/test_fgdb.gdb.zip deleted file mode 100644 index a4279f2e..00000000 Binary files a/pyogrio/tests/fixtures/test_fgdb.gdb.zip and /dev/null differ diff --git a/pyogrio/tests/fixtures/test_nested.geojson b/pyogrio/tests/fixtures/test_nested.geojson deleted file mode 100644 index fdf5de80..00000000 --- a/pyogrio/tests/fixtures/test_nested.geojson +++ /dev/null @@ -1,18 +0,0 @@ -{ - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "geometry": { - "type": "Point", - "coordinates": [0, 0] - }, - "properties": { - "top_level": "A", - "intermediate_level": { - "bottom_level": "B" - } - } - } - ] -} diff --git a/pyogrio/tests/fixtures/test_ogr_types_list.geojson b/pyogrio/tests/fixtures/test_ogr_types_list.geojson deleted file mode 100644 index 85719696..00000000 --- a/pyogrio/tests/fixtures/test_ogr_types_list.geojson +++ /dev/null @@ -1,12 +0,0 @@ -{ -"type": "FeatureCollection", -"name": "test", -"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, -"features": [ -{ "type": "Feature", "properties": { "int64": 1, "list_int64": [ 0, 1 ] }, "geometry": { "type": "Point", "coordinates": [ 0.0, 2.0 ] } }, -{ "type": "Feature", "properties": { "int64": 2, "list_int64": [ 2, 3 ] }, "geometry": { "type": "Point", "coordinates": [ 1.0, 2.0 ] } }, -{ "type": "Feature", "properties": { "int64": 3, "list_int64": [ 4, 5 ] }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }, -{ "type": "Feature", "properties": { "int64": 4, "list_int64": [ 6, 7 ] }, "geometry": { "type": "Point", "coordinates": [ 3.0, 2.0 ] } }, -{ "type": "Feature", "properties": { "int64": 5, "list_int64": [ 8, 9 ] }, "geometry": { "type": "Point", "coordinates": [ 4.0, 2.0 ] } } -] -} diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 0230a771..7b2d6673 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -132,24 +132,24 @@ def test_read_arrow_ignore_geometry(naturalearth_lowres): assert_frame_equal(result, expected) -def test_read_arrow_nested_types(test_ogr_types_list): +def test_read_arrow_nested_types(list_field_values_file): # with arrow, list types are supported - result = read_dataframe(test_ogr_types_list, use_arrow=True) + result = read_dataframe(list_field_values_file, use_arrow=True) assert "list_int64" in result.columns assert result["list_int64"][0].tolist() == [0, 1] -def test_read_arrow_to_pandas_kwargs(test_fgdb_vsi): +def test_read_arrow_to_pandas_kwargs(no_geometry_file): # with arrow, list types are supported arrow_to_pandas_kwargs = {"strings_to_categorical": True} - result = read_dataframe( - test_fgdb_vsi, - layer="basetable_2", + df = read_dataframe( + no_geometry_file, + read_geometry=False, use_arrow=True, arrow_to_pandas_kwargs=arrow_to_pandas_kwargs, ) - assert "SEGMENT_NAME" in result.columns - assert result["SEGMENT_NAME"].dtype.name == "category" + assert df.col.dtype.name == "category" + assert np.array_equal(df.col.values.categories, ["a", "b", "c"]) def test_read_arrow_raw(naturalearth_lowres): @@ -297,14 +297,15 @@ def use_arrow_context(): del os.environ["PYOGRIO_USE_ARROW"] -def test_enable_with_environment_variable(test_ogr_types_list): +def test_enable_with_environment_variable(list_field_values_file): # list types are only supported with arrow, so don't work by default and work # when arrow is enabled through env variable - result = read_dataframe(test_ogr_types_list) + result = read_dataframe(list_field_values_file) assert "list_int64" not in result.columns with use_arrow_context(): - result = read_dataframe(test_ogr_types_list) + result = read_dataframe(list_field_values_file) + assert "list_int64" in result.columns @@ -491,6 +492,88 @@ def test_write_geojson(tmp_path, naturalearth_lowres): ) +@requires_arrow_write_api +@pytest.mark.skipif( + __gdal_version__ < (3, 6, 0), + reason="OpenFileGDB write support only available for GDAL >= 3.6.0", +) +@pytest.mark.parametrize( + "write_int64", + [ + False, + pytest.param( + True, + marks=pytest.mark.skipif( + __gdal_version__ < (3, 9, 0), + reason="OpenFileGDB write support for int64 values for GDAL >= 3.9.0", + ), + ), + ], +) +def test_write_openfilegdb(tmp_path, write_int64): + expected_field_data = [ + np.array([True, False, True], dtype="bool"), + np.array([1, 2, 3], dtype="int16"), + np.array([1, 2, 3], dtype="int32"), + np.array([1, 2, 3], dtype="int64"), + np.array([1, 2, 3], dtype="float32"), + np.array([1, 2, 3], dtype="float64"), + ] + + table = pa.table( + { + "geometry": points, + **{field.dtype.name: field for field in expected_field_data}, + } + ) + + filename = tmp_path / "test.gdb" + + expected_meta = {"crs": "EPSG:4326"} + + # int64 is not supported without additional config: https://gdal.org/en/latest/drivers/vector/openfilegdb.html#bit-integer-field-support + # it is converted to float64 by default and raises a warning + # (for GDAL >= 3.9.0 only) + write_params = ( + {"TARGET_ARCGIS_VERSION": "ARCGIS_PRO_3_2_OR_LATER"} if write_int64 else {} + ) + + if write_int64 or __gdal_version__ < (3, 9, 0): + ctx = contextlib.nullcontext() + else: + ctx = pytest.warns( + RuntimeWarning, match="Integer64 will be written as a Float64" + ) + + with ctx: + write_arrow( + table, + filename, + driver="OpenFileGDB", + geometry_type="Point", + geometry_name="geometry", + **expected_meta, + **write_params, + ) + + meta, table = read_arrow(filename) + + if not write_int64: + expected_field_data[3] = expected_field_data[3].astype("float64") + + # bool types are converted to int32 + expected_field_data[0] = expected_field_data[0].astype("int32") + + assert meta["crs"] == expected_meta["crs"] + + # NOTE: geometry name is set to "SHAPE" by GDAL + assert np.array_equal(table[meta["geometry_name"]], points) + for i in range(len(expected_field_data)): + values = table[table.schema.names[i]].to_numpy() + assert values.dtype == expected_field_data[i].dtype + assert np.array_equal(values, expected_field_data[i]) + + @pytest.mark.parametrize( "driver", { diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index 5936c6a5..1d593466 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -151,7 +151,15 @@ def test_list_drivers(): assert len(drivers) == len(expected) -def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi): +def test_list_layers( + naturalearth_lowres, + naturalearth_lowres_vsi, + line_zm_file, + curve_file, + curve_polygon_file, + multisurface_file, + no_geometry_file, +): assert array_equal( list_layers(naturalearth_lowres), [["naturalearth_lowres", "Polygon"]] ) @@ -165,16 +173,17 @@ def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi with pytest.warns( UserWarning, match=r"Measured \(M\) geometry types are not supported" ): - fgdb_layers = list_layers(test_fgdb_vsi) - # GDAL >= 3.4.0 includes 'another_relationship' layer - assert len(fgdb_layers) >= 7 + assert array_equal(list_layers(line_zm_file), [["line_zm", "LineString Z"]]) - # Make sure that nonspatial layer has None for geometry - assert array_equal(fgdb_layers[0], ["basetable_2", None]) + # Curve / surface types are downgraded to plain types + assert array_equal(list_layers(curve_file), [["curve", "LineString"]]) + assert array_equal(list_layers(curve_polygon_file), [["curvepolygon", "Polygon"]]) + assert array_equal( + list_layers(multisurface_file), [["multisurface", "MultiPolygon"]] + ) - # Confirm that measured 3D is downgraded to plain 3D during read - assert array_equal(fgdb_layers[3], ["test_lines", "MultiLineString Z"]) - assert array_equal(fgdb_layers[6], ["test_areas", "MultiPolygon Z"]) + # Make sure that nonspatial layer has None for geometry + assert array_equal(list_layers(no_geometry_file), [["no_geometry", None]]) def test_list_layers_bytes(geojson_bytes): @@ -499,8 +508,8 @@ def test_read_info_filelike(geojson_filelike): ), ], ) -def test_read_info_dataset_kwargs(data_dir, dataset_kwargs, fields): - meta = read_info(data_dir / "test_nested.geojson", **dataset_kwargs) +def test_read_info_dataset_kwargs(nested_geojson_file, dataset_kwargs, fields): + meta = read_info(nested_geojson_file, **dataset_kwargs) assert meta["fields"].tolist() == fields @@ -559,8 +568,8 @@ def test_read_info_unspecified_layer_warning(data_dir): read_info(data_dir / "sample.osm.pbf") -def test_read_info_without_geometry(test_fgdb_vsi): - assert read_info(test_fgdb_vsi, layer="basetable_2")["total_bounds"] is None +def test_read_info_without_geometry(no_geometry_file): + assert read_info(no_geometry_file)["total_bounds"] is None @pytest.mark.parametrize( diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index ea36fa2a..74efa6f7 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -36,7 +36,6 @@ from geopandas.testing import assert_geodataframe_equal from pandas.testing import ( - assert_frame_equal, assert_index_equal, assert_series_equal, ) @@ -147,14 +146,13 @@ def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow): @pytest.mark.parametrize( - "columns, fid_as_index, exp_len", [(None, False, 2), ([], True, 2), ([], False, 0)] + "columns, fid_as_index, exp_len", [(None, False, 3), ([], True, 3), ([], False, 0)] ) def test_read_layer_without_geometry( - test_fgdb_vsi, columns, fid_as_index, use_arrow, exp_len + no_geometry_file, columns, fid_as_index, use_arrow, exp_len ): result = read_dataframe( - test_fgdb_vsi, - layer="basetable", + no_geometry_file, columns=columns, fid_as_index=fid_as_index, use_arrow=use_arrow, @@ -200,38 +198,62 @@ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow): ) -def test_read_force_2d(test_fgdb_vsi, use_arrow): - with pytest.warns( - UserWarning, match=r"Measured \(M\) geometry types are not supported" - ): - df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1) - assert df.iloc[0].geometry.has_z +def test_read_force_2d(tmp_path, use_arrow): + filename = tmp_path / "test.gpkg" - df = read_dataframe( - test_fgdb_vsi, - layer="test_lines", - force_2d=True, - max_features=1, - use_arrow=use_arrow, - ) - assert not df.iloc[0].geometry.has_z + # create a GPKG with 3D point values + expected = gp.GeoDataFrame( + geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326" + ) + write_dataframe(expected, filename) + df = read_dataframe(filename) + assert df.iloc[0].geometry.has_z + + df = read_dataframe( + filename, + force_2d=True, + max_features=1, + use_arrow=use_arrow, + ) + assert not df.iloc[0].geometry.has_z + + +def test_read_layer(tmp_path, use_arrow): + filename = tmp_path / "test.gpkg" + + # create a multilayer GPKG + expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326") + write_dataframe( + expected1, + filename, + layer="layer1", + ) + + expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326") + write_dataframe(expected2, filename, layer="layer2", append=True) + + assert np.array_equal( + list_layers(filename), [["layer1", "Point"], ["layer2", "Point"]] + ) -@pytest.mark.filterwarnings("ignore: Measured") -@pytest.mark.filterwarnings("ignore: More than one layer found in") -def test_read_layer(test_fgdb_vsi, use_arrow): - layers = list_layers(test_fgdb_vsi) - kwargs = {"use_arrow": use_arrow, "read_geometry": False, "max_features": 1} + kwargs = {"use_arrow": use_arrow, "max_features": 1} - # The first layer is read by default (NOTE: first layer has no features) - df = read_dataframe(test_fgdb_vsi, **kwargs) - df2 = read_dataframe(test_fgdb_vsi, layer=layers[0][0], **kwargs) - assert_frame_equal(df, df2) + # The first layer is read by default, which will warn when there are multiple + # layers + with pytest.warns(UserWarning, match="More than one layer found"): + df = read_dataframe(filename, **kwargs) - # Reading a specific layer should return that layer. + assert_geodataframe_equal(df, expected1) + + # Reading a specific layer by name should return that layer. # Detected here by a known column. - df = read_dataframe(test_fgdb_vsi, layer="test_lines", **kwargs) - assert "RIVER_MILE" in df.columns + df = read_dataframe(filename, layer="layer2", **kwargs) + assert_geodataframe_equal(df, expected2) + + # Reading a specific layer by index should return that layer + df = read_dataframe(filename, layer=1, **kwargs) + assert_geodataframe_equal(df, expected2) def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow): @@ -239,22 +261,19 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow): read_dataframe(naturalearth_lowres_all_ext, layer="wrong", use_arrow=use_arrow) -@pytest.mark.filterwarnings("ignore: Measured") -def test_read_datetime(test_fgdb_vsi, use_arrow): - df = read_dataframe( - test_fgdb_vsi, layer="test_lines", use_arrow=use_arrow, max_features=1 - ) +def test_read_datetime(datetime_file, use_arrow): + df = read_dataframe(datetime_file, use_arrow=use_arrow) if PANDAS_GE_20: # starting with pandas 2.0, it preserves the passed datetime resolution - assert df.SURVEY_DAT.dtype.name == "datetime64[ms]" + assert df.col.dtype.name == "datetime64[ms]" else: - assert df.SURVEY_DAT.dtype.name == "datetime64[ns]" + assert df.col.dtype.name == "datetime64[ns]" @pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ") @pytest.mark.requires_arrow_write_api -def test_read_datetime_tz(test_datetime_tz, tmp_path, use_arrow): - df = read_dataframe(test_datetime_tz) +def test_read_datetime_tz(datetime_tz_file, tmp_path, use_arrow): + df = read_dataframe(datetime_tz_file) # Make the index non-consecutive to test this case as well. Added for issue # https://github.com/geopandas/pyogrio/issues/324 df = df.set_index(np.array([0, 2])) @@ -324,14 +343,17 @@ def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow): assert_geodataframe_equal(df, result) -def test_read_null_values(test_fgdb_vsi, use_arrow): - df = read_dataframe( - test_fgdb_vsi, layer="basetable_2", use_arrow=use_arrow, read_geometry=False - ) +def test_read_null_values(tmp_path, use_arrow): + filename = tmp_path / "test_null_values_no_geometry.gpkg" + + # create a GPKG with no geometries and only null values + expected = pd.DataFrame({"col": [None, None]}) + write_dataframe(expected, filename) + + df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False) # make sure that Null values are preserved - assert df.SEGMENT_NAME.isnull().max() - assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] is None + assert np.array_equal(df.col.values, expected.col.values) def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow): @@ -610,17 +632,22 @@ def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext): assert len(df) == 1 -def test_read_fids_force_2d(test_fgdb_vsi): - with pytest.warns( - UserWarning, match=r"Measured \(M\) geometry types are not supported" - ): - df = read_dataframe(test_fgdb_vsi, layer="test_lines", fids=[22]) - assert len(df) == 1 - assert df.iloc[0].geometry.has_z +def test_read_fids_force_2d(tmp_path): + filename = tmp_path / "test.gpkg" - df = read_dataframe(test_fgdb_vsi, layer="test_lines", force_2d=True, fids=[22]) - assert len(df) == 1 - assert not df.iloc[0].geometry.has_z + # create a GPKG with 3D point values + expected = gp.GeoDataFrame( + geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326" + ) + write_dataframe(expected, filename) + + df = read_dataframe(filename, fids=[1]) + assert_geodataframe_equal(df, expected.iloc[:1]) + + df = read_dataframe(filename, force_2d=True, fids=[1]) + assert np.array_equal( + df.geometry.values, shapely.force_2d(expected.iloc[:1].geometry.values) + ) @pytest.mark.parametrize("skip_features", [10, 200]) @@ -1677,7 +1704,7 @@ def test_write_geometry_z_types_auto( ("ignore", None), ], ) -def test_read_invalid_shp(data_dir, use_arrow, on_invalid, message): +def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message): if on_invalid == "raise": handler = pytest.raises(shapely.errors.GEOSException, match=message) elif on_invalid == "warn": @@ -1687,33 +1714,50 @@ def test_read_invalid_shp(data_dir, use_arrow, on_invalid, message): else: raise ValueError(f"unknown value for on_invalid: {on_invalid}") + # create a GeoJSON file with an invalid exterior ring + invalid_geojson = """{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Polygon", + "coordinates": [ [ [0, 0], [0, 0] ] ] + } + } + ] + }""" + + filename = tmp_path / "test.geojson" + with open(filename, "w") as f: + _ = f.write(invalid_geojson) + with handler: df = read_dataframe( - data_dir / "poly_not_enough_points.shp.zip", + filename, use_arrow=use_arrow, on_invalid=on_invalid, ) df.geometry.isnull().all() -def test_read_multisurface(data_dir, use_arrow): +def test_read_multisurface(multisurface_file, use_arrow): if use_arrow: with pytest.raises(shapely.errors.GEOSException): # TODO(Arrow) # shapely fails parsing the WKB - read_dataframe(data_dir / "test_multisurface.gpkg", use_arrow=True) + read_dataframe(multisurface_file, use_arrow=True) else: - df = read_dataframe(data_dir / "test_multisurface.gpkg") + df = read_dataframe(multisurface_file) # MultiSurface should be converted to MultiPolygon assert df.geometry.type.tolist() == ["MultiPolygon"] -def test_read_dataset_kwargs(data_dir, use_arrow): - filename = data_dir / "test_nested.geojson" - +def test_read_dataset_kwargs(nested_geojson_file, use_arrow): # by default, nested data are not flattened - df = read_dataframe(filename, use_arrow=use_arrow) + df = read_dataframe(nested_geojson_file, use_arrow=use_arrow) expected = gp.GeoDataFrame( { @@ -1726,7 +1770,9 @@ def test_read_dataset_kwargs(data_dir, use_arrow): assert_geodataframe_equal(df, expected) - df = read_dataframe(filename, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES") + df = read_dataframe( + nested_geojson_file, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES" + ) expected = gp.GeoDataFrame( { diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index 83f5ce44..54127d0b 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -619,13 +619,80 @@ def test_write_no_geom_no_fields(): __gdal_version__ < (3, 6, 0), reason="OpenFileGDB write support only available for GDAL >= 3.6.0", ) -def test_write_openfilegdb(tmp_path, naturalearth_lowres): - meta, _, geometry, field_data = read(naturalearth_lowres) +@pytest.mark.parametrize( + "write_int64", + [ + False, + pytest.param( + True, + marks=pytest.mark.skipif( + __gdal_version__ < (3, 9, 0), + reason="OpenFileGDB write support for int64 values for GDAL >= 3.9.0", + ), + ), + ], +) +def test_write_openfilegdb(tmp_path, write_int64): + # Point(0, 0) + expected_geometry = np.array( + [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object + ) + expected_field_data = [ + np.array([True, False, True], dtype="bool"), + np.array([1, 2, 3], dtype="int16"), + np.array([1, 2, 3], dtype="int32"), + np.array([1, 2, 3], dtype="int64"), + np.array([1, 2, 3], dtype="float32"), + np.array([1, 2, 3], dtype="float64"), + ] + expected_fields = ["bool", "int16", "int32", "int64", "float32", "float64"] + expected_meta = { + "geometry_type": "Point", + "crs": "EPSG:4326", + "fields": expected_fields, + } filename = tmp_path / "test.gdb" - write(filename, geometry, field_data, driver="OpenFileGDB", **meta) - assert filename.exists() + # int64 is not supported without additional config: https://gdal.org/en/latest/drivers/vector/openfilegdb.html#bit-integer-field-support + # it is converted to float64 by default and raises a warning + # (for GDAL >= 3.9.0 only) + write_params = ( + {"TARGET_ARCGIS_VERSION": "ARCGIS_PRO_3_2_OR_LATER"} if write_int64 else {} + ) + + if write_int64 or __gdal_version__ < (3, 9, 0): + ctx = contextlib.nullcontext() + else: + ctx = pytest.warns( + RuntimeWarning, match="Integer64 will be written as a Float64" + ) + + with ctx: + write( + filename, + expected_geometry, + expected_field_data, + driver="OpenFileGDB", + **expected_meta, + **write_params, + ) + + meta, _, geometry, field_data = read(filename) + + if not write_int64: + expected_field_data[3] = expected_field_data[3].astype("float64") + + # bool types are converted to int32 + expected_field_data[0] = expected_field_data[0].astype("int32") + + assert meta["crs"] == expected_meta["crs"] + assert np.array_equal(meta["fields"], expected_meta["fields"]) + + assert np.array_equal(geometry, expected_geometry) + for i in range(len(expected_field_data)): + assert field_data[i].dtype == expected_field_data[i].dtype + assert np.array_equal(field_data[i], expected_field_data[i]) @pytest.mark.parametrize("ext", DRIVERS) @@ -934,17 +1001,17 @@ def test_read_data_types_numeric_with_null(test_gpkg_nulls): assert field.dtype == "float64" -def test_read_unsupported_types(test_ogr_types_list): - fields = read(test_ogr_types_list)[3] +def test_read_unsupported_types(list_field_values_file): + fields = read(list_field_values_file)[3] # list field gets skipped, only integer field is read assert len(fields) == 1 - fields = read(test_ogr_types_list, columns=["int64"])[3] + fields = read(list_field_values_file, columns=["int64"])[3] assert len(fields) == 1 -def test_read_datetime_millisecond(test_datetime): - field = read(test_datetime)[3][0] +def test_read_datetime_millisecond(datetime_file): + field = read(datetime_file)[3][0] assert field.dtype == "datetime64[ms]" assert field[0] == np.datetime64("2020-01-01 09:00:00.123") assert field[1] == np.datetime64("2020-01-01 10:00:00.000") @@ -973,13 +1040,14 @@ def test_read_unsupported_ext_with_prefix(tmp_path): assert field_data[0] == "data1" -def test_read_datetime_as_string(test_datetime_tz): - field = read(test_datetime_tz)[3][0] +def test_read_datetime_as_string(datetime_tz_file): + field = read(datetime_tz_file)[3][0] assert field.dtype == "datetime64[ms]" # timezone is ignored in numpy layer assert field[0] == np.datetime64("2020-01-01 09:00:00.123") assert field[1] == np.datetime64("2020-01-01 10:00:00.000") - field = read(test_datetime_tz, datetime_as_string=True)[3][0] + + field = read(datetime_tz_file, datetime_as_string=True)[3][0] assert field.dtype == "object" # GDAL doesn't return strings in ISO format (yet) assert field[0] == "2020/01/01 09:00:00.123-05"