Skip to content

Commit

Permalink
ENH: warn when reading a multilayer file without specifying layer (#362)
Browse files Browse the repository at this point in the history
* ENH: warn when reading multilayer file without specifying layer

* Update CHANGES.md

* Update CHANGES.md

* Avoid new warnings being shown in tests

* Version in changelog to 0.8.0

* Consolidate determining default layer in function

* Rename some more tests

* Remove 0.7.3 title, improve PR366 description
  • Loading branch information
theroggy authored Mar 11, 2024
1 parent 6e5db3d commit dff672c
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 23 deletions.
6 changes: 5 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@

### Improvements

- `read_arrow` and `open_arrow` now provide [GeoArrow-compliant extension metadata](https://geoarrow.org/extension-types.html), including the CRS, when using GDAL 3.8 or higher.
- `read_arrow` and `open_arrow` now provide
[GeoArrow-compliant extension metadata](https://geoarrow.org/extension-types.html),
including the CRS, when using GDAL 3.8 or higher (#366).
- Warn when reading from a multilayer file without specifying a layer (#362).


### Bug fixes

Expand Down
89 changes: 69 additions & 20 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ cdef apply_bbox_filter(OGRLayerH ogr_layer, bbox):
Parameters
----------
ogr_layer : pointer to open OGR layer
bbox: list or tuple of xmin, ymin, xmax, ymax
bbox : list or tuple of xmin, ymin, xmax, ymax
Raises
------
Expand All @@ -629,7 +629,7 @@ cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb):
Parameters
----------
ogr_layer : pointer to open OGR layer
wkb: WKB encoding of geometry
wkb : WKB encoding of geometry
"""

cdef OGRGeometryH ogr_geometry = NULL
Expand Down Expand Up @@ -1124,9 +1124,8 @@ def ogr_read(
ogr_dataset = ogr_open(path_c, 0, dataset_options)

if sql is None:
# layer defaults to index 0
if layer is None:
layer = 0
layer = get_default_layer(ogr_dataset)
ogr_layer = get_ogr_layer(ogr_dataset, layer)
else:
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
Expand Down Expand Up @@ -1315,9 +1314,8 @@ def ogr_open_arrow(
ogr_dataset = ogr_open(path_c, 0, dataset_options)

if sql is None:
# layer defaults to index 0
if layer is None:
layer = 0
layer = get_default_layer(ogr_dataset)
ogr_layer = get_ogr_layer(ogr_dataset, layer)
else:
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
Expand Down Expand Up @@ -1471,11 +1469,10 @@ def ogr_read_bounds(
path_b = path.encode('utf-8')
path_c = path_b

# layer defaults to index 0
if layer is None:
layer = 0

ogr_dataset = ogr_open(path_c, 0, NULL)

if layer is None:
layer = get_default_layer(ogr_dataset)
ogr_layer = get_ogr_layer(ogr_dataset, layer)

# Apply the attribute filter
Expand Down Expand Up @@ -1511,13 +1508,13 @@ def ogr_read_info(
path_b = path.encode('utf-8')
path_c = path_b

# layer defaults to index 0
if layer is None:
layer = 0


try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)

if layer is None:
layer = get_default_layer(ogr_dataset)
ogr_layer = get_ogr_layer(ogr_dataset, layer)

# Encoding is derived from the user, from the dataset capabilities / type,
Expand Down Expand Up @@ -1564,15 +1561,71 @@ def ogr_read_info(

def ogr_list_layers(str path):
cdef const char *path_c = NULL
cdef const char *ogr_name = NULL
cdef OGRDataSourceH ogr_dataset = NULL
cdef OGRLayerH ogr_layer = NULL

path_b = path.encode('utf-8')
path_c = path_b

ogr_dataset = ogr_open(path_c, 0, NULL)

layers = get_layer_names(ogr_dataset)

if ogr_dataset != NULL:
GDALClose(ogr_dataset)
ogr_dataset = NULL

return layers


cdef str get_default_layer(OGRDataSourceH ogr_dataset):
""" Get the layer in the dataset that is read by default.
The caller is responsible for closing the dataset.
Parameters
----------
ogr_dataset : pointer to open OGR dataset
Returns
-------
str
the name of the default layer to be read.
"""
layers = get_layer_names(ogr_dataset)
first_layer_name = layers[0][0]

if len(layers) > 1:
dataset_name = os.path.basename(get_string(OGR_DS_GetName(ogr_dataset)))

other_layer_names = ', '.join([f"'{l}'" for l in layers[1:, 0]])
warnings.warn(
f"More than one layer found in '{dataset_name}': '{first_layer_name}' "
f"(default), {other_layer_names}. Specify layer parameter to avoid this "
"warning.",
stacklevel=2,
)

return first_layer_name


cdef get_layer_names(OGRDataSourceH ogr_dataset):
""" Get the layers in the dataset.
The caller is responsible for closing the dataset.
Parameters
----------
ogr_dataset : pointer to open OGR dataset
Returns
-------
ndarray(n)
array of layer names
"""
cdef OGRLayerH ogr_layer = NULL

layer_count = GDALDatasetGetLayerCount(ogr_dataset)

data = np.empty(shape=(layer_count, 2), dtype=object)
Expand All @@ -1583,10 +1636,6 @@ def ogr_list_layers(str path):
data_view[i, 0] = get_string(OGR_L_GetName(ogr_layer))
data_view[i, 1] = get_geometry_type(ogr_layer)

if ogr_dataset != NULL:
GDALClose(ogr_dataset)
ogr_dataset = NULL

return data


Expand Down
2 changes: 2 additions & 0 deletions pyogrio/_ogr.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ cdef extern from "ogr_api.h":
OGRDataSourceH OGR_Dr_Open(OGRSFDriverH driver, const char *path, int bupdate)
const char* OGR_Dr_GetName(OGRSFDriverH driver)

const char* OGR_DS_GetName(OGRDataSourceH)

OGRFeatureH OGR_F_Create(OGRFeatureDefnH featuredefn)
void OGR_F_Destroy(OGRFeatureH feature)

Expand Down
7 changes: 7 additions & 0 deletions pyogrio/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def test_read_arrow(naturalearth_lowres_all_ext):
assert_geodataframe_equal(result, expected, check_less_precise=check_less_precise)


def test_read_arrow_unspecified_layer_warning(data_dir):
"""Reading a multi-layer file without specifying a layer gives a warning."""
with pytest.warns(UserWarning, match="More than one layer found "):
read_arrow(data_dir / "sample.osm.pbf")


@pytest.mark.parametrize("skip_features, expected", [(10, 167), (200, 0)])
def test_read_arrow_skip_features(naturalearth_lowres, skip_features, expected):
table = read_arrow(naturalearth_lowres, skip_features=skip_features)[1]
Expand Down Expand Up @@ -117,6 +123,7 @@ def test_read_arrow_to_pandas_kwargs(test_fgdb_vsi):
arrow_to_pandas_kwargs = {"strings_to_categorical": True}
result = read_dataframe(
test_fgdb_vsi,
layer="basetable_2",
use_arrow=True,
arrow_to_pandas_kwargs=arrow_to_pandas_kwargs,
)
Expand Down
14 changes: 13 additions & 1 deletion pyogrio/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ def test_read_bounds_max_features(naturalearth_lowres):
assert bounds.shape == (4, 2)


def test_read_bounds_unspecified_layer_warning(data_dir):
"""Reading a multi-layer file without specifying a layer gives a warning."""
with pytest.warns(UserWarning, match="More than one layer found "):
read_bounds(data_dir / "sample.osm.pbf")


def test_read_bounds_negative_max_features(naturalearth_lowres):
with pytest.raises(ValueError, match="'max_features' must be >= 0"):
read_bounds(naturalearth_lowres, max_features=-1)
Expand Down Expand Up @@ -451,8 +457,14 @@ def test_read_info_force_total_bounds(
assert info["total_bounds"] is None


def test_read_info_unspecified_layer_warning(data_dir):
"""Reading a multi-layer file without specifying a layer gives a warning."""
with pytest.warns(UserWarning, match="More than one layer found "):
read_info(data_dir / "sample.osm.pbf")


def test_read_info_without_geometry(test_fgdb_vsi):
assert read_info(test_fgdb_vsi)["total_bounds"] is None
assert read_info(test_fgdb_vsi, layer="basetable_2")["total_bounds"] is None


@pytest.mark.parametrize(
Expand Down
5 changes: 4 additions & 1 deletion pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def test_read_force_2d(test_fgdb_vsi, use_arrow):


@pytest.mark.filterwarnings("ignore: Measured")
@pytest.mark.filterwarnings("ignore: More than one layer found in")
def test_read_layer(test_fgdb_vsi, use_arrow):
layers = list_layers(test_fgdb_vsi)
kwargs = {"use_arrow": use_arrow, "read_geometry": False, "max_features": 1}
Expand Down Expand Up @@ -251,7 +252,9 @@ def test_read_write_datetime_tz_with_nulls(tmp_path):


def test_read_null_values(test_fgdb_vsi, use_arrow):
df = read_dataframe(test_fgdb_vsi, use_arrow=use_arrow, read_geometry=False)
df = read_dataframe(
test_fgdb_vsi, layer="basetable_2", use_arrow=use_arrow, read_geometry=False
)

# make sure that Null values are preserved
assert df.SEGMENT_NAME.isnull().max()
Expand Down
6 changes: 6 additions & 0 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
assert len(geometry) == len(fields[0])


def test_read_arrow_unspecified_layer_warning(data_dir):
"""Reading a multi-layer file without specifying a layer gives a warning."""
with pytest.warns(UserWarning, match="More than one layer found "):
read(data_dir / "sample.osm.pbf")


def test_read_invalid_layer(naturalearth_lowres):
with pytest.raises(DataLayerError, match="Layer 'invalid' could not be opened"):
read(naturalearth_lowres, layer="invalid")
Expand Down

0 comments on commit dff672c

Please sign in to comment.