Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding --use-driver option to fix the GDAL driver to be used #14

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ stac datacube extend-item source --asset asset-name

Use `stac datacube --help` to see all subcommands and options.

### Fixing GDAL driver

Sometimes it is necessary to fix the GDAL driver to be used. For example: ZARRs
on HTTP storages. For this, the `--use-driver` option can be used:

```shell
stac datacube create-item --use-driver ZARR http://example.com/some.zarr/ out.json
```

## Contributing

We use [pre-commit](https://pre-commit.com/) to check any changes.
Expand Down
41 changes: 32 additions & 9 deletions src/stactools/datacube/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,48 @@ def create_datacube_command(cli: Group) -> Command:
def datacube() -> None:
pass

rtol_option = click.option(
"--rtol",
type=float,
default=1.0e-5,
help="relative tolerance of floating point values to be considered equal",
)
use_driver_option = click.option(
"--use-driver",
type=str,
default=None,
help="specify the driver prefix (like NETCDF or ZARR)",
)

@datacube.command("extend-item")
@click.argument("source")
@click.option("--asset", type=str)
@click.option("--rtol", type=float, default=1.0e-5)
@click.argument("item_filename")
@click.option("--asset", type=str, help="name of the asset to extend")
@rtol_option
@use_driver_option
def extend_item_command(
source: str, asset: Optional[str] = None, rtol: float = 1.0e-5
item_filename: str,
asset: Optional[str] = None,
rtol: float = 1.0e-5,
use_driver: Optional[str] = None,
) -> None:
item = pystac.Item.from_file(source)
stac.extend_item(item, asset_name=asset, rtol=rtol)
item = pystac.Item.from_file(item_filename)
stac.extend_item(
item, asset_name=asset, rtol=rtol, use_driver=use_driver
)
item.save_object()

@datacube.command("create-item")
@click.argument("source")
@click.argument("destination")
@click.option("--rtol", type=float, default=1.0e-5)
@rtol_option
@use_driver_option
def create_item_command(
source: str, destination: str, rtol: float = 1.0e-5
source: str,
destination: str,
rtol: float = 1.0e-5,
use_driver: Optional[str] = None,
) -> None:
item = stac.create_item(source, rtol=rtol)
item = stac.create_item(source, rtol=rtol, use_driver=use_driver)
item.save_object(dest_href=destination)

return datacube
57 changes: 46 additions & 11 deletions src/stactools/datacube/stac.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
import os.path
import re
from datetime import datetime, timedelta
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast
from typing import (
Any,
Dict,
Iterable,
Iterator,
List,
Optional,
Tuple,
Union,
cast,
)
from urllib.parse import urlparse

import numpy as np
Expand Down Expand Up @@ -125,7 +135,7 @@ def iso_duration(td: timedelta) -> str:


def read_dimensions_and_variables(
href: str, rtol: float = 1.0e-5
href: str, rtol: float = 1.0e-5, use_driver: Optional[str] = None
) -> Tuple[Dict[str, Dimension], Dict[str, Variable], Dict[str, Any]]:
url = urlparse(href)
if not url.scheme:
Expand All @@ -138,6 +148,9 @@ def read_dimensions_and_variables(
else:
raise ValueError(f"Unsupported HREF {href}")

if use_driver is not None:
path = f'{use_driver}:"{path}"'

ds = gdal.OpenEx(path, gdal.OF_MULTIDIM_RASTER | gdal.GA_ReadOnly)
info = gdal.MultiDimInfo(ds)

Expand Down Expand Up @@ -167,7 +180,9 @@ def read_dimensions_and_variables(
step = float((data[-1] - data[0]) / len(data))

values = (
[float(v) for v in data] if not evenly_spaced else cast(List[float], [])
[float(v) for v in data]
if not evenly_spaced
else cast(List[float], [])
)
else:
evenly_spaced = False
Expand Down Expand Up @@ -257,7 +272,9 @@ def read_dimensions_and_variables(
{
"type": VariableType.DATA,
"unit": array_info.get("unit"),
"dimensions": [dim_name[1:] for dim_name in array_info["dimensions"]],
"dimensions": [
dim_name[1:] for dim_name in array_info["dimensions"]
],
# TODO: description
}
)
Expand Down Expand Up @@ -310,7 +327,12 @@ def get_geometry(
),
)
attributes = info.get("attributes", {})
if x_dim and y_dim and None not in x_dim.extent and None not in y_dim.extent:
if (
x_dim
and y_dim
and None not in x_dim.extent
and None not in y_dim.extent
):
x_low, x_high = x_dim.extent
y_low, y_high = y_dim.extent

Expand Down Expand Up @@ -351,9 +373,14 @@ def get_geometry(


def extend_asset(
item: Item, asset: Asset, rtol: float = 1.0e-5
item: Item,
asset: Asset,
rtol: float = 1.0e-5,
use_driver: Optional[str] = None,
) -> DatacubeExtension[Asset]:
dimensions, variables, info = read_dimensions_and_variables(asset.href, rtol)
dimensions, variables, info = read_dimensions_and_variables(
asset.href, rtol, use_driver
)
datacube = DatacubeExtension.ext(asset, add_if_missing=True)
datacube.apply(dimensions, variables)

Expand Down Expand Up @@ -386,7 +413,10 @@ def extend_asset(


def extend_item(
item: Item, asset_name: Optional[str] = None, rtol: float = 1.0e-5
item: Item,
asset_name: Optional[str] = None,
rtol: float = 1.0e-5,
use_driver: Optional[str] = None,
) -> Item:
if not asset_name:
for name, asset in item.assets.items():
Expand All @@ -398,12 +428,16 @@ def extend_item(
raise ValueError("Unable to find data asset to extend")

asset = item.assets[asset_name]
datacube = extend_asset(item, asset, rtol)
datacube = extend_asset(item, asset, rtol, use_driver)

dimensions = datacube.dimensions.values()
# add geometry, we assume lon/lat here
common = CommonMetadata(item)
if not common.start_datetime and not common.end_datetime and not item.datetime:
if (
not common.start_datetime
and not common.end_datetime
and not item.datetime
):
time_dimension = cast(
Optional[TemporalDimension],
_get_dimension(dimensions, DimensionType.TEMPORAL),
Expand All @@ -423,6 +457,7 @@ def create_item(
href: str,
read_href_modifier: Optional[ReadHrefModifier] = None,
rtol: float = 1.0e-5,
use_driver: Optional[str] = None,
) -> Item:
id = os.path.splitext(os.path.basename(href))[0]
if read_href_modifier:
Expand All @@ -438,5 +473,5 @@ def create_item(

item.add_asset("data", Asset(href=href, roles=["data"]))
item.datetime = None
extend_item(item, "data", rtol)
extend_item(item, "data", rtol, use_driver)
return item
22 changes: 22 additions & 0 deletions tests/data/test.zarr/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"chunks": [
10,
10
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": 0.0,
"filters": null,
"order": "C",
"shape": [
10,
10
],
"zarr_format": 2
}
18 changes: 18 additions & 0 deletions tests/test_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
Variable,
VerticalSpatialDimension,
)
from click.testing import CliRunner

from stactools.datacube import stac
from stactools.cli.cli import cli


def test_create_item() -> None:
Expand Down Expand Up @@ -253,3 +255,19 @@ def test_get_geometry_from_metadata() -> None:
),
),
}


def test_use_driver(tmp_path):
runner = CliRunner()
result = runner.invoke(
cli,
[
"datacube",
"create-item",
"--use-driver",
"ZARR",
"tests/data-files/test.zarr",
tmp_path / "out.json",
],
)
assert result.exit_code == 0