From 1cad71e6d70d4da0b5a9b147e4cc6b8d6f96d05d Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 26 Sep 2023 15:24:51 +0200 Subject: [PATCH 1/3] Adding `--use-driver` option to fix the GDAL driver to be used --- README.md | 9 +++++ src/stactools/datacube/commands.py | 41 ++++++++++++++++----- src/stactools/datacube/stac.py | 57 ++++++++++++++++++++++++------ 3 files changed, 87 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index c8b2167..93773a6 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,15 @@ stac datacube extend-item source --asset asset-name Use `stac datacube --help` to see all subcommands and options. +### Fixing GDAL driver + +Sometimes it is necessary to fix the GDAL driver to be used. For example: ZARRs +on HTTP storages. For this, the `--use-driver` option can be used: + +```shell +stac datacube create-item --use-driver ZARR http://example.com/some.zarr/ out.json +``` + ## Contributing We use [pre-commit](https://pre-commit.com/) to check any changes. diff --git a/src/stactools/datacube/commands.py b/src/stactools/datacube/commands.py index f0d97d7..8d7d508 100644 --- a/src/stactools/datacube/commands.py +++ b/src/stactools/datacube/commands.py @@ -20,25 +20,48 @@ def create_datacube_command(cli: Group) -> Command: def datacube() -> None: pass + rtol_option = click.option( + "--rtol", + type=float, + default=1.0e-5, + help="relative tolerance of floating point values to be considered equal", + ) + use_driver_option = click.option( + "--use-driver", + type=str, + default=None, + help="specify the driver prefix (like NETCDF or ZARR)", + ) + @datacube.command("extend-item") - @click.argument("source") - @click.option("--asset", type=str) - @click.option("--rtol", type=float, default=1.0e-5) + @click.argument("item_filename") + @click.option("--asset", type=str, help="name of the asset to extend") + @rtol_option + @use_driver_option def extend_item_command( - source: str, asset: Optional[str] = None, rtol: float = 1.0e-5 + item_filename: str, + asset: Optional[str] = None, + rtol: float = 1.0e-5, + use_driver: Optional[str] = None, ) -> None: - item = pystac.Item.from_file(source) - stac.extend_item(item, asset_name=asset, rtol=rtol) + item = pystac.Item.from_file(item_filename) + stac.extend_item( + item, asset_name=asset, rtol=rtol, use_driver=use_driver + ) item.save_object() @datacube.command("create-item") @click.argument("source") @click.argument("destination") - @click.option("--rtol", type=float, default=1.0e-5) + @rtol_option + @use_driver_option def create_item_command( - source: str, destination: str, rtol: float = 1.0e-5 + source: str, + destination: str, + rtol: float = 1.0e-5, + use_driver: Optional[str] = None, ) -> None: - item = stac.create_item(source, rtol=rtol) + item = stac.create_item(source, rtol=rtol, use_driver=use_driver) item.save_object(dest_href=destination) return datacube diff --git a/src/stactools/datacube/stac.py b/src/stactools/datacube/stac.py index 6271271..be6297a 100644 --- a/src/stactools/datacube/stac.py +++ b/src/stactools/datacube/stac.py @@ -1,7 +1,17 @@ import os.path import re from datetime import datetime, timedelta -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast +from typing import ( + Any, + Dict, + Iterable, + Iterator, + List, + Optional, + Tuple, + Union, + cast, +) from urllib.parse import urlparse import numpy as np @@ -125,7 +135,7 @@ def iso_duration(td: timedelta) -> str: def read_dimensions_and_variables( - href: str, rtol: float = 1.0e-5 + href: str, rtol: float = 1.0e-5, use_driver: Optional[str] = None ) -> Tuple[Dict[str, Dimension], Dict[str, Variable], Dict[str, Any]]: url = urlparse(href) if not url.scheme: @@ -138,6 +148,9 @@ def read_dimensions_and_variables( else: raise ValueError(f"Unsupported HREF {href}") + if use_driver is not None: + path = f'{use_driver}:"{path}"' + ds = gdal.OpenEx(path, gdal.OF_MULTIDIM_RASTER | gdal.GA_ReadOnly) info = gdal.MultiDimInfo(ds) @@ -167,7 +180,9 @@ def read_dimensions_and_variables( step = float((data[-1] - data[0]) / len(data)) values = ( - [float(v) for v in data] if not evenly_spaced else cast(List[float], []) + [float(v) for v in data] + if not evenly_spaced + else cast(List[float], []) ) else: evenly_spaced = False @@ -257,7 +272,9 @@ def read_dimensions_and_variables( { "type": VariableType.DATA, "unit": array_info.get("unit"), - "dimensions": [dim_name[1:] for dim_name in array_info["dimensions"]], + "dimensions": [ + dim_name[1:] for dim_name in array_info["dimensions"] + ], # TODO: description } ) @@ -310,7 +327,12 @@ def get_geometry( ), ) attributes = info.get("attributes", {}) - if x_dim and y_dim and None not in x_dim.extent and None not in y_dim.extent: + if ( + x_dim + and y_dim + and None not in x_dim.extent + and None not in y_dim.extent + ): x_low, x_high = x_dim.extent y_low, y_high = y_dim.extent @@ -351,9 +373,14 @@ def get_geometry( def extend_asset( - item: Item, asset: Asset, rtol: float = 1.0e-5 + item: Item, + asset: Asset, + rtol: float = 1.0e-5, + use_driver: Optional[str] = None, ) -> DatacubeExtension[Asset]: - dimensions, variables, info = read_dimensions_and_variables(asset.href, rtol) + dimensions, variables, info = read_dimensions_and_variables( + asset.href, rtol, use_driver + ) datacube = DatacubeExtension.ext(asset, add_if_missing=True) datacube.apply(dimensions, variables) @@ -386,7 +413,10 @@ def extend_asset( def extend_item( - item: Item, asset_name: Optional[str] = None, rtol: float = 1.0e-5 + item: Item, + asset_name: Optional[str] = None, + rtol: float = 1.0e-5, + use_driver: Optional[str] = None, ) -> Item: if not asset_name: for name, asset in item.assets.items(): @@ -398,12 +428,16 @@ def extend_item( raise ValueError("Unable to find data asset to extend") asset = item.assets[asset_name] - datacube = extend_asset(item, asset, rtol) + datacube = extend_asset(item, asset, rtol, use_driver) dimensions = datacube.dimensions.values() # add geometry, we assume lon/lat here common = CommonMetadata(item) - if not common.start_datetime and not common.end_datetime and not item.datetime: + if ( + not common.start_datetime + and not common.end_datetime + and not item.datetime + ): time_dimension = cast( Optional[TemporalDimension], _get_dimension(dimensions, DimensionType.TEMPORAL), @@ -423,6 +457,7 @@ def create_item( href: str, read_href_modifier: Optional[ReadHrefModifier] = None, rtol: float = 1.0e-5, + use_driver: Optional[str] = None, ) -> Item: id = os.path.splitext(os.path.basename(href))[0] if read_href_modifier: @@ -438,5 +473,5 @@ def create_item( item.add_asset("data", Asset(href=href, roles=["data"])) item.datetime = None - extend_item(item, "data", rtol) + extend_item(item, "data", rtol, use_driver) return item From cb071f10f62f25bd76543edaae3b3f95b5b65dea Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 3 Oct 2023 11:04:59 +0200 Subject: [PATCH 2/3] Adding test for `--use-driver` option --- tests/data/test.zarr/.zarray | 22 ++++++++++++++++++++++ tests/test_stac.py | 19 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tests/data/test.zarr/.zarray diff --git a/tests/data/test.zarr/.zarray b/tests/data/test.zarr/.zarray new file mode 100644 index 0000000..d46a9bc --- /dev/null +++ b/tests/data/test.zarr/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 10, + 10 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " None: @@ -253,3 +255,20 @@ def test_get_geometry_from_metadata() -> None: ), ), } + + +def test_use_driver(tmp_path): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "datacube", + "create-item", + "--use-driver", + "ZARR", + "tests/data-files/test.zarr", + tmp_path / "out.json", + ], + ) + print(result.stdout) + assert result.exit_code == 0 From 7ea4a0d5add3645219d20dae53c290105cd7b5fe Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 3 Oct 2023 10:55:22 -0600 Subject: [PATCH 3/3] Update tests/test_stac.py --- tests/test_stac.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_stac.py b/tests/test_stac.py index 1ad28e6..d0e47b9 100644 --- a/tests/test_stac.py +++ b/tests/test_stac.py @@ -270,5 +270,4 @@ def test_use_driver(tmp_path): tmp_path / "out.json", ], ) - print(result.stdout) assert result.exit_code == 0