From 4ecb6b9bfd927640e9e912abc7cf2c2bcbb8385b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:23 -0800 Subject: [PATCH 01/18] Bump actions/download-artifact from 3 to 4 (#81) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 99e6907..23f4a99 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -52,7 +52,7 @@ jobs: name: Install Python with: python-version: 3.9 - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist @@ -73,7 +73,7 @@ jobs: if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist From d0aae5aab6063ac778ea2b1eea89e46cf81ecb83 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:33 -0800 Subject: [PATCH 02/18] Bump actions/upload-artifact from 3 to 4 (#80) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 23f4a99..9c41391 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -39,7 +39,7 @@ jobs: else echo "✅ Looks good" fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: releases path: dist From 6ed6d793d40c6e8534b7f111312e9afcab9bd865 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:42 -0800 Subject: [PATCH 03/18] Bump actions/setup-python from 4 to 5 (#82) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 9c41391..bd329f0 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -13,7 +13,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: 3.9 @@ -48,7 +48,7 @@ jobs: needs: build-artifacts runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: 3.9 From 40846df7933d4627d0d5b2850168002205d47a1d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:50 -0800 Subject: [PATCH 04/18] [pre-commit.ci] pre-commit autoupdate (#83) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/dependabot.yml | 6 +++--- .github/workflows/main.yaml | 4 ++-- .pre-commit-config.yaml | 10 +++++----- .readthedocs.yaml | 2 +- codecov.yml | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index bc63aca..8ac6b8c 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,6 +1,6 @@ version: 2 updates: - - package-ecosystem: 'github-actions' - directory: '/' + - package-ecosystem: "github-actions" + directory: "/" schedule: - interval: 'monthly' + interval: "monthly" diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index e0146dd..f30b2bf 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -9,7 +9,7 @@ on: - main workflow_dispatch: schedule: - - cron: '0 0 * * *' # Daily “At 00:00” + - cron: "0 0 * * *" # Daily “At 00:00” concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.9', '3.10'] #TODO: add 3.11 once sparse/numba support it + python-version: ["3.9", "3.10"] #TODO: add 3.11 once sparse/numba support it timeout-minutes: 20 defaults: run: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e60e45f..b05c750 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -16,17 +16,17 @@ repos: - id: mixed-line-ending - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.0.292' + rev: "v0.1.9" hooks: - id: ruff - args: ['--fix'] + args: ["--fix"] - repo: https://github.com/keewis/blackdoc - rev: v0.3.8 + rev: v0.3.9 hooks: - id: blackdoc - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.3 + rev: v4.0.0-alpha.8 hooks: - id: prettier diff --git a/.readthedocs.yaml b/.readthedocs.yaml index fb9bd82..08a0fa9 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,7 +8,7 @@ version: 2 build: os: ubuntu-20.04 tools: - python: 'mambaforge-4.10' + python: "mambaforge-4.10" # Build documentation in the doc/ directory with Sphinx sphinx: diff --git a/codecov.yml b/codecov.yml index 0bbe239..2c4cc5d 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,8 +5,8 @@ codecov: comment: false ignore: - - 'tests/*.py' - - 'setup.py' + - "tests/*.py" + - "setup.py" coverage: precision: 2 From bd8c186c1ada1733cee112bec072f931b6294b97 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 10 Jan 2024 18:22:47 -0500 Subject: [PATCH 05/18] Update badges, monogram, and grammar --- README.md | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index a5f76d1..e27c29a 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,10 @@ -

- - - - - - +

+ + + + CarbonPlan monogram. + +

# ndpyramid @@ -18,7 +12,9 @@ A small utility for generating ND array pyramids using Xarray and Zarr. [![CI](https://github.com/carbonplan/ndpyramid/actions/workflows/main.yaml/badge.svg)](https://github.com/carbonplan/ndpyramid/actions/workflows/main.yaml) -![MIT License](https://badgen.net/badge/license/MIT/blue) +![PyPI](https://img.shields.io/pypi/v/ndpyramid) +[![Conda Version](https://img.shields.io/conda/vn/conda-forge/ndpyramid.svg)](https://anaconda.org/conda-forge/ndpyramid) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) # installation @@ -74,8 +70,8 @@ See the docstrings for more details about input parameters and options. ## license -All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/) licensed, but we request that you please provide attribution if reusing any of our digital content (graphics, logo, articles, etc.). +All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/)-licensed, but we request that you please provide attribution if reusing any of our digital content (graphics, logo, articles, etc.). ## about us -CarbonPlan is a non-profit organization that uses data and science for climate action. We aim to improve the transparency and scientific integrity of climate solutions with open data and tools. Find out more at [carbonplan.org](https://carbonplan.org/) or get in touch by [opening an issue](https://github.com/carbonplan/ndpyramid/issues/new) or [sending us an email](mailto:hello@carbonplan.org). +CarbonPlan is a nonprofit organization that uses data and science for climate action. We aim to improve the transparency and scientific integrity of climate solutions with open data and tools. Find out more at [carbonplan.org](https://carbonplan.org/) or get in touch by [opening an issue](https://github.com/carbonplan/ndpyramid/issues/new) or [sending us an email](mailto:hello@carbonplan.org). From 97c90ba17e50a39e23bbd54a03fa234e9fe139f4 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 12 Jan 2024 13:48:39 -0500 Subject: [PATCH 06/18] Setup continuous benchmarking workflow with pytest-codspeed (#85) --- .github/workflows/codspeed.yml | 39 ++++++++++++++++++++++++++++++++++ .github/workflows/main.yaml | 12 +++++++---- ci/environment.yml | 3 +++ ndpyramid/common.py | 2 +- tests/test_pyramids.py | 28 ++++++++++++------------ 5 files changed, 65 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/codspeed.yml diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml new file mode 100644 index 0000000..0c19679 --- /dev/null +++ b/.github/workflows/codspeed.yml @@ -0,0 +1,39 @@ +name: codspeed-benchmarks + +on: + # Run on pushes to the main branch + push: + branches: + - "main" + # Run on pull requests + pull_request: + # `workflow_dispatch` allows CodSpeed to trigger backtest + # performance analysis in order to generate initial data. + workflow_dispatch: + +jobs: + benchmarks: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Conda environment from environment.yml + uses: mamba-org/setup-micromamba@v1 + with: + # environment-file is not assumed anymore + environment-file: ci/environment.yml + create-args: >- + python=3.10 + # now called cache-environment + cache-environment: true + + - name: Install package + run: | + python -m pip install . --no-deps + - name: Conda list information + run: | + conda env list + conda list + - name: Run benchmarks + uses: CodSpeedHQ/action@v2 diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f30b2bf..e6b4aad 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -29,14 +29,18 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 + - name: Install Conda environment from environment.yml - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/setup-micromamba@v1 with: + # environment-file is not assumed anymore environment-file: ci/environment.yml - cache-downloads: true - extra-specs: | + create-args: >- python=${{ matrix.python-version }} + # now called cache-environment + cache-environment: true - name: Install package run: | diff --git a/ci/environment.yml b/ci/environment.yml index 7f3f7a5..2e78937 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -10,6 +10,7 @@ dependencies: - numpy - pip - pooch + - pre-commit - pydantic>=1.10 - pyproj - pytest @@ -24,3 +25,5 @@ dependencies: - xesmf - zarr - cf_xarray>=0.8.0 + - pip: + - pytest-codspeed diff --git a/ndpyramid/common.py b/ndpyramid/common.py index 4b7976f..23162dd 100644 --- a/ndpyramid/common.py +++ b/ndpyramid/common.py @@ -17,7 +17,7 @@ def __init__(self, **data) -> None: self._crs = epsg_codes[self.name] self._proj = pyproj.Proj(self._crs) - @pydantic.validate_arguments + @pydantic.validate_call def transform(self, *, dim:int) -> rasterio.transform.Affine: diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index d268245..08080ed 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -14,19 +14,19 @@ def temperature(): return ds -def test_xarray_coarsened_pyramid(temperature): +def test_xarray_coarsened_pyramid(temperature, benchmark): factors = [4, 2, 1] - pyramid = pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim') + pyramid = benchmark(lambda: pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim')) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) pyramid.to_zarr(MemoryStore()) -def test_reprojected_pyramid(temperature): +def test_reprojected_pyramid(temperature, benchmark): pytest.importorskip('rioxarray') levels = 2 temperature = temperature.rio.write_crs('EPSG:4326') - pyramid = pyramid_reproject(temperature, levels=2) + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels assert pyramid.ds.attrs['multiscales'][0]['datasets'][0]['crs'] == 'EPSG:3857' @@ -34,11 +34,11 @@ def test_reprojected_pyramid(temperature): @pytest.mark.parametrize('regridder_apply_kws', [None, {'keep_attrs': False}]) -def test_regridded_pyramid(temperature, regridder_apply_kws): +def test_regridded_pyramid(temperature, regridder_apply_kws, benchmark): pytest.importorskip('xesmf') - pyramid = pyramid_regrid( + pyramid = benchmark(lambda: pyramid_regrid( temperature, levels=2, regridder_apply_kws=regridder_apply_kws, other_chunks={'time': 2} - ) + )) assert pyramid.ds.attrs['multiscales'] expected_attrs = ( temperature['air'].attrs @@ -50,30 +50,30 @@ def test_regridded_pyramid(temperature, regridder_apply_kws): pyramid.to_zarr(MemoryStore()) -def test_regridded_pyramid_with_weights(temperature): +def test_regridded_pyramid_with_weights(temperature, benchmark): pytest.importorskip('xesmf') levels = 2 weights_pyramid = generate_weights_pyramid(temperature.isel(time=0), levels) - pyramid = pyramid_regrid( + pyramid = benchmark(lambda: pyramid_regrid( temperature, levels=levels, weights_pyramid=weights_pyramid, other_chunks={'time': 2} - ) + )) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels pyramid.to_zarr(MemoryStore()) @pytest.mark.parametrize('projection', ['web-mercator', 'equidistant-cylindrical']) -def test_make_grid_ds(projection): +def test_make_grid_ds(projection, benchmark): - grid = make_grid_ds(0, pixels_per_tile=8, projection=projection) + grid = benchmark(lambda: make_grid_ds(0, pixels_per_tile=8, projection=projection)) lon_vals = grid.lon_b.values assert np.all((lon_vals[-1, :] - lon_vals[0, :]) < 0.001) assert grid.attrs['title'] == 'Web Mercator Grid' if projection == 'web-mercator' else 'Equidistant Cylindrical Grid' @pytest.mark.parametrize('levels', [1, 2]) @pytest.mark.parametrize('method', ['bilinear', 'conservative']) -def test_generate_weights_pyramid(temperature, levels, method): - weights_pyramid = generate_weights_pyramid(temperature.isel(time=0), levels, method=method) +def test_generate_weights_pyramid(temperature, levels, method, benchmark): + weights_pyramid = benchmark(lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method)) assert weights_pyramid.ds.attrs['levels'] == levels assert weights_pyramid.ds.attrs['regrid_method'] == method assert set(weights_pyramid['0'].ds.data_vars) == {'S', 'col', 'row'} From 1d5caa7ee84f3aafb7eabdb87f0859ccb7c2f00c Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 16 Jan 2024 12:42:35 -0500 Subject: [PATCH 07/18] Run benchmarks in CI (#86) --- .github/workflows/codspeed.yml | 41 +++++++++++++++++++++------------- tests/test_pyramids.py | 1 + 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index 0c19679..c378a27 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -14,26 +14,37 @@ on: jobs: benchmarks: runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} steps: - name: Checkout uses: actions/checkout@v4 - - name: Install Conda environment from environment.yml - uses: mamba-org/setup-micromamba@v1 + - name: Setup Miniconda + uses: conda-incubator/setup-miniconda@v3.0.1 with: - # environment-file is not assumed anymore - environment-file: ci/environment.yml - create-args: >- - python=3.10 - # now called cache-environment - cache-environment: true + auto-activate-base: true + activate-environment: "" # base environment + channels: conda-forge,nodefaults + channel-priority: strict - - name: Install package + - name: Install dependencies run: | - python -m pip install . --no-deps - - name: Conda list information - run: | - conda env list - conda list + # $CONDA is an environment variable pointing to the root of the miniconda directory + # Preprend $CONDA/bin to $PATH so that conda's python is used over system python + echo $CONDA/bin >> $GITHUB_PATH + conda install --solver=libmamba dask python=3.10 \ + esmpy>=8.2.0 mpich netcdf4 numpy pip pooch pydantic>=1.10 pyproj \ + pytest pytest-cov pytest-mypy rasterio rioxarray scipy sparse>=0.13.0 \ + xarray xarray-datatree>=0.0.11 xesmf zarr cf_xarray>=0.8.0 + python -m pip install -U pytest-codspeed setuptools + python -m pip install -e . --no-deps + - name: Run benchmarks - uses: CodSpeedHQ/action@v2 + uses: CodSpeedHQ/action@v2.0.2 + with: + run: | + conda list + python -c "import ndpyramid; print(ndpyramid.__version__)" + python -m pytest --codspeed diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index 08080ed..64928f4 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -73,6 +73,7 @@ def test_make_grid_ds(projection, benchmark): @pytest.mark.parametrize('levels', [1, 2]) @pytest.mark.parametrize('method', ['bilinear', 'conservative']) def test_generate_weights_pyramid(temperature, levels, method, benchmark): + pytest.importorskip('xesmf') weights_pyramid = benchmark(lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method)) assert weights_pyramid.ds.attrs['levels'] == levels assert weights_pyramid.ds.attrs['regrid_method'] == method From af6f2fb0c89677bb4c296951d3755b5d6a26408c Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 19 Jan 2024 12:00:55 -0500 Subject: [PATCH 08/18] Add information about pyramid structure and metadata schema (#89) Co-authored-by: Kata Martin --- .gitignore | 1 + docs/index.rst | 5 +-- docs/schema.md | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 docs/schema.md diff --git a/.gitignore b/.gitignore index 5e0244c..94d60b5 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ instance/ # Sphinx documentation docs/_build/ +docs/generated/ # PyBuilder target/ diff --git a/docs/index.rst b/docs/index.rst index 115284d..f2040cc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,10 +5,6 @@ ndpyramid A small utility for generating ND array pyramids using Xarray and Zarr. -.. toctree:: - :hidden: - self - .. toctree:: :maxdepth: 1 :hidden: @@ -30,3 +26,4 @@ A small utility for generating ND array pyramids using Xarray and Zarr. :caption: Reference API + schema diff --git a/docs/schema.md b/docs/schema.md new file mode 100644 index 0000000..51ad024 --- /dev/null +++ b/docs/schema.md @@ -0,0 +1,91 @@ +# Schema + +## Background + +`ndpyramid` was created to generate pyramids for use with the [`@carbonplan/maps`](https://github.com/carbonplan/maps) toolkit. Check out blog posts about the [initial release](https://carbonplan.org/blog/maps-library-release) and [updates](https://carbonplan.org/blog/zarr-visualization-update) for more information about the toolkit's history. While our mapping toolkit remains the primary motivation for the library, the pyramids also have the potential to speed up other mapping approaches such as [dynamic tiling](https://nasa-impact.github.io/zarr-visualization-report/approaches/tiling/05-cmip6-pyramids.html). + +In order to provide highly performant rendering, the `pyramid_reproject` and `pyramid_regrid` methods generate pyramids according to the [map zoom level quadtree](https://docs.mapbox.com/help/glossary/zoom-level/#zoom-level-quadtrees) pattern. This structure, in which the number of tiles at a given zoom level corresponds to 2zoom and each zoom level covers the entire globe, is commonly referred to as ['web-optimized'](https://cogeotiff.github.io/rio-cogeo/Advanced/#web-optimized-cog) when levels are in the Web Mercator projection because it minimizes the number of GET requests required for rendering a tile map. + +In fact, earlier releases of the toolkit only generated and supported Web Mercator (EPSG:3857) pyramids, both to minimize GET requests and avoid reprojection on the client. Release `v3.0.0` of `@carbonplan/maps` and `v0.1.0` of `ndpyramid` added support for the Equidistant Cylindrical (EPSG:4326) projection for cases in which users want pyramids in the same projection as the original data, at the [expense of slower rendering times](https://nasa-impact.github.io/zarr-visualization-report/approaches/dynamic-client/e2e-results-projection.html). + +## Pyramid schema + +While the [map zoom level quadtree structure](https://docs.mapbox.com/help/glossary/zoom-level/#zoom-level-quadtrees) has been used for many years, there was no convention for storing the quadtree pyramids in Xarray and Zarr when we started work on this toolkit (although parallel development occurred in the microscopy and other communities). Therefore, we created a pyramid and metadata schema for `ndpyramid`. The resulting Zarr store for a dataset with one `tavg` data variable would look like: + +```{code} +/ + ├── .zmetadata + ├── 0 + │ ├── tavg + │ └── 0.0 + ├── 1 + │ ├── tavg + │ └── 0.0 + │ └── 0.1 + │ └── 1.0 + │ └── 1.1 + ├── 2 +... +``` + +Note the quadrupling of the number of chunks as zoom level increases. This, combined with the global extent of individual levels and specific projection, allows inference of the placement of chunks on a web map based on the chunk index. + +Metadata about the pyramids is stored in the `multiscales` attribute of the Xarray DataTree or Zarr store: + +```{code} +{ + "multiscales": [ + { + "datasets": [ + { + "path": "0", + "pixels_per_tile": 128, + "crs": "EPSG:3857" + }, + { + "path": "1", + "pixels_per_tile": 128, + "crs": "EPSG:3857" + } + ... + ], + "metadata": { + "args": [], + "method": "pyramid_reproject", + "version": "0.0.post64" + }, + "type": "reduce" + } + ] +} +``` + +Currently, `@carbonplan/maps` does not rely on the `"crs"` attribute, but future releases may determine the projection based on that attribute (assuming Web Mercator projection if it is not provided). + +In addition, the mapping toolkit relies on the `_ARRAY_DIMENSIONS` attribute introduced by Xarray, which stores the dimension names. + +## Pyramids for @carbonplan/maps + +In addition to following the quadtree pyramid structure and metadata schema, the pyramids currently must also meet the following requirements for use with `@carbonplan/maps`: + +- Consistent chunk size across pyramid levels (128, 256, or 512 are recommended) +- [zlib](https://numcodecs.readthedocs.io/en/stable/zlib.html) or [gzip](https://numcodecs.readthedocs.io/en/stable/gzip.html) compression +- Web Mercator (EPSG:3857) or Equidistant Cylindrical (EPSG:4326) projection +- Data types supported by [zarr-js](https://github.com/freeman-lab/zarr-js). The following are supported as of `v3.3.0` for Zarr v2: + + ```{code} + ' Date: Thu, 1 Feb 2024 11:18:07 -0500 Subject: [PATCH 09/18] Bump CodSpeedHQ/action from 2.0.2 to 2.0.3 (#96) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codspeed.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index c378a27..a8a027f 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -42,7 +42,7 @@ jobs: python -m pip install -e . --no-deps - name: Run benchmarks - uses: CodSpeedHQ/action@v2.0.2 + uses: CodSpeedHQ/action@v2.0.3 with: run: | conda list From ad2e6918ed5b206ee15a97e3e0790aaefa619631 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 11:18:30 -0500 Subject: [PATCH 10/18] Bump codecov/codecov-action from 3 to 4 (#97) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index e6b4aad..27c2e0b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -53,7 +53,7 @@ jobs: run: | python -m pytest - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: file: ./coverage.xml fail_ci_if_error: false From 509ff7db585e18744dc396a7308423156482921d Mon Sep 17 00:00:00 2001 From: Kata Martin Date: Mon, 5 Feb 2024 10:54:20 -0500 Subject: [PATCH 11/18] Document coordinate array chunking requirement (#98) --- docs/schema.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/schema.md b/docs/schema.md index 51ad024..78b3f26 100644 --- a/docs/schema.md +++ b/docs/schema.md @@ -69,6 +69,7 @@ In addition, the mapping toolkit relies on the `_ARRAY_DIMENSIONS` attribute int In addition to following the quadtree pyramid structure and metadata schema, the pyramids currently must also meet the following requirements for use with `@carbonplan/maps`: - Consistent chunk size across pyramid levels (128, 256, or 512 are recommended) +- Storage of non-spatial coordinate arrays in single chunk - [zlib](https://numcodecs.readthedocs.io/en/stable/zlib.html) or [gzip](https://numcodecs.readthedocs.io/en/stable/gzip.html) compression - Web Mercator (EPSG:3857) or Equidistant Cylindrical (EPSG:4326) projection - Data types supported by [zarr-js](https://github.com/freeman-lab/zarr-js). The following are supported as of `v3.3.0` for Zarr v2: From d5c17e5d956ce956b1e4c7ffb55140f25dc1cfe5 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 5 Feb 2024 16:57:29 -0500 Subject: [PATCH 12/18] Fix typing and ignore missing imports (#100) --- .pre-commit-config.yaml | 5 +++++ ndpyramid/core.py | 10 +++++++--- ndpyramid/utils.py | 2 ++ pyproject.toml | 4 ++++ 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b05c750..e7cb992 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,3 +30,8 @@ repos: rev: v4.0.0-alpha.8 hooks: - id: prettier + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.8.0 + hooks: + - id: mypy diff --git a/ndpyramid/core.py b/ndpyramid/core.py index 95bf442..c191096 100644 --- a/ndpyramid/core.py +++ b/ndpyramid/core.py @@ -7,7 +7,7 @@ import xarray as xr from .common import Projection -from .utils import add_metadata_and_zarr_encoding, get_version, multiscales_template +from .utils import add_metadata_and_zarr_encoding, get_levels, get_version, multiscales_template def pyramid_coarsen( @@ -48,7 +48,7 @@ def pyramid_coarsen( for key, factor in enumerate(factors): # merge dictionary via union operator kwargs |= {d: factor for d in dims} - plevels[str(key)] = ds.coarsen(**kwargs).mean() + plevels[str(key)] = ds.coarsen(**kwargs).mean() # type: ignore plevels['/'] = xr.Dataset(attrs=attrs) return dt.DataTree.from_dict(plevels) @@ -96,6 +96,9 @@ def pyramid_reproject( import rioxarray # noqa: F401 from rasterio.warp import Resampling + if not levels: + levels = get_levels(ds) + # multiscales spec save_kwargs = {'levels': levels, 'pixels_per_tile': pixels_per_tile} attrs = { @@ -108,9 +111,10 @@ def pyramid_reproject( ) } + # Convert resampling from string to dictionary if necessary if isinstance(resampling, str): - resampling_dict = defaultdict(lambda: resampling) + resampling_dict:dict = defaultdict(lambda: resampling) else: resampling_dict = resampling diff --git a/ndpyramid/utils.py b/ndpyramid/utils.py index 54cc1b1..8272721 100644 --- a/ndpyramid/utils.py +++ b/ndpyramid/utils.py @@ -30,6 +30,8 @@ def get_version() -> str: return __version__ +def get_levels(ds: xr.Dataset) -> int: + raise NotImplementedError('Automatic determination of number of levels is not yet implemented') def multiscales_template( *, diff --git a/pyproject.toml b/pyproject.toml index b8579b2..2af8a66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,3 +109,7 @@ known-first-party = ["ndpyramid"] [tool.pytest.ini_options] console_output_style = "count" addopts = "--cov=./ --cov-report=xml --verbose" + +[tool.mypy] +ignore_missing_imports = true +no_implicit_optional = false From d76392bafd993c351464c30146784ac3d450de28 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Thu, 8 Feb 2024 13:41:14 -0500 Subject: [PATCH 13/18] Update demo notebook (#102) --- .pre-commit-config.yaml | 53 +++++++++++++- ci/environment.yml | 1 + docs/conf.py | 8 +- ndpyramid/common.py | 13 +++- ndpyramid/core.py | 14 ++-- ndpyramid/regrid.py | 33 ++++++--- ndpyramid/utils.py | 10 ++- notebooks/demo.ipynb | 158 ++++++++++++++-------------------------- pyproject.toml | 6 +- tests/test_pyramids.py | 35 ++++++--- 10 files changed, 186 insertions(+), 145 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e7cb992..e90a7c3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,22 +15,67 @@ repos: - id: debug-statements - id: mixed-line-ending - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.1.9" + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.0 hooks: - - id: ruff - args: ["--fix"] + - id: pyupgrade + args: + - "--py39-plus" + + - repo: https://github.com/psf/black + rev: 24.1.1 + hooks: + - id: black - repo: https://github.com/keewis/blackdoc rev: v0.3.9 hooks: - id: blackdoc + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.2.1" + hooks: + - id: ruff + args: ["--fix"] + - repo: https://github.com/pre-commit/mirrors-prettier rev: v4.0.0-alpha.8 hooks: - id: prettier + - repo: https://github.com/kynan/nbstripout + rev: 0.7.1 + hooks: + - id: nbstripout + args: + [ + "--extra-keys", + "metadata.celltoolbar metadata.kernelspec", + "metadata.language_info.codemirror_mode.version", + "metadata.language_info.pygments_lexer", + "metadata.language_info.version", + "metadata.toc", + "metadata.notify_time", + "metadata.varInspector", + "cell.metadata.heading_collapsed", + "cell.metadata.hidden", + "cell.metadata.code_folding", + "cell.metadata.tags", + "cell.metadata.init_cell", + ] + + - repo: https://github.com/nbQA-dev/nbQA + rev: 1.7.1 + hooks: + - id: nbqa-ruff + args: ["--fix"] + - id: nbqa-isort + args: ["--profile=black"] + additional_dependencies: [isort==5.6.4] + - id: nbqa-black + - id: nbqa-pyupgrade + args: ["--py39-plus"] + - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.8.0 hooks: diff --git a/ci/environment.yml b/ci/environment.yml index 2e78937..b9a4937 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -18,6 +18,7 @@ dependencies: - pytest-mypy - rasterio - rioxarray + - s3fs - scipy - sparse>=0.13.0 - xarray diff --git a/docs/conf.py b/docs/conf.py index 78a7393..8381d3b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -27,9 +27,11 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ['myst_parser', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary',] +extensions = [ + 'myst_parser', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', +] autosummary_generate = True diff --git a/ndpyramid/common.py b/ndpyramid/common.py index 23162dd..1fdbf3f 100644 --- a/ndpyramid/common.py +++ b/ndpyramid/common.py @@ -18,14 +18,17 @@ def __init__(self, **data) -> None: self._proj = pyproj.Proj(self._crs) @pydantic.validate_call - def transform(self, *, dim:int) -> rasterio.transform.Affine: - + def transform(self, *, dim: int) -> rasterio.transform.Affine: if self.name == 'web-mercator': # set up the transformation matrix for the web-mercator projection such that the data conform # to the slippy-map tiles assumed boundaries. See https://github.com/carbonplan/ndpyramid/pull/70 # for detailed on calculating the parameters. - return rasterio.transform.Affine.translation(-20037508.342789244, 20037508.342789248) * rasterio.transform.Affine.scale((20037508.342789244 * 2) / dim, -(20037508.342789248 * 2) / dim) + return rasterio.transform.Affine.translation( + -20037508.342789244, 20037508.342789248 + ) * rasterio.transform.Affine.scale( + (20037508.342789244 * 2) / dim, -(20037508.342789248 * 2) / dim + ) elif self.name == 'equidistant-cylindrical': # set up the transformation matrix that maps between the Equidistant Cylindrical projection # and the latitude-longitude projection. The Affine.translation function moves the origin @@ -33,4 +36,6 @@ def transform(self, *, dim:int) -> rasterio.transform.Affine: # and the Affine.scale function scales the grid coordinates to match the size of the grid # in latitude-longitude coordinates. The resulting transformation matrix maps grid coordinates to # latitude-longitude coordinates. - return rasterio.transform.Affine.translation(-180, 90) * rasterio.transform.Affine.scale(360 / dim, -180 / dim) + return rasterio.transform.Affine.translation( + -180, 90 + ) * rasterio.transform.Affine.scale(360 / dim, -180 / dim) diff --git a/ndpyramid/core.py b/ndpyramid/core.py index c191096..928b7a7 100644 --- a/ndpyramid/core.py +++ b/ndpyramid/core.py @@ -48,7 +48,7 @@ def pyramid_coarsen( for key, factor in enumerate(factors): # merge dictionary via union operator kwargs |= {d: factor for d in dims} - plevels[str(key)] = ds.coarsen(**kwargs).mean() # type: ignore + plevels[str(key)] = ds.coarsen(**kwargs).mean() # type: ignore plevels['/'] = xr.Dataset(attrs=attrs) return dt.DataTree.from_dict(plevels) @@ -57,14 +57,13 @@ def pyramid_coarsen( def pyramid_reproject( ds: xr.Dataset, *, - projection:typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator', + projection: typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator', levels: int = None, pixels_per_tile: int = 128, other_chunks: dict = None, resampling: str | dict = 'average', extra_dim: str = None, ) -> dt.DataTree: - """Create a multiscale pyramid of a dataset via reprojection. Parameters @@ -111,10 +110,9 @@ def pyramid_reproject( ) } - # Convert resampling from string to dictionary if necessary if isinstance(resampling, str): - resampling_dict:dict = defaultdict(lambda: resampling) + resampling_dict: dict = defaultdict(lambda: resampling) else: resampling_dict = resampling @@ -159,6 +157,10 @@ def reproject(da, var): pyramid = dt.DataTree.from_dict(plevels) pyramid = add_metadata_and_zarr_encoding( - pyramid, levels=levels, pixels_per_tile=pixels_per_tile, other_chunks=other_chunks, projection=projection_model + pyramid, + levels=levels, + pixels_per_tile=pixels_per_tile, + other_chunks=other_chunks, + projection=projection_model, ) return pyramid diff --git a/ndpyramid/regrid.py b/ndpyramid/regrid.py index b43e44a..b3d991e 100644 --- a/ndpyramid/regrid.py +++ b/ndpyramid/regrid.py @@ -40,7 +40,11 @@ def _reconstruct_xesmf_weights(ds_w): ) -def make_grid_ds(level: int, pixels_per_tile: int = 128, projection:typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator') -> xr.Dataset: +def make_grid_ds( + level: int, + pixels_per_tile: int = 128, + projection: typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator', +) -> xr.Dataset: """Make a dataset representing a target grid Parameters @@ -71,14 +75,13 @@ def make_grid_ds(level: int, pixels_per_tile: int = 128, projection:typing.Liter transform = projection_model.transform(dim=dim) if projection_model.name == 'equidistant-cylindrical': - title='Equidistant Cylindrical Grid' + title = 'Equidistant Cylindrical Grid' elif projection_model.name == 'web-mercator': - title='Web Mercator Grid' + title = 'Web Mercator Grid' p = projection_model._proj - grid_shape = (dim, dim) bounds_shape = (dim + 1, dim + 1) @@ -116,7 +119,10 @@ def make_grid_ds(level: int, pixels_per_tile: int = 128, projection:typing.Liter ) -def make_grid_pyramid(levels: int = 6, projection:typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator') -> dt.DataTree: +def make_grid_pyramid( + levels: int = 6, + projection: typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator', +) -> dt.DataTree: """helper function to create a grid pyramid for use with xesmf Parameters @@ -136,9 +142,11 @@ def make_grid_pyramid(levels: int = 6, projection:typing.Literal['web-mercator', def generate_weights_pyramid( - ds_in: xr.Dataset, levels: int, method: str = 'bilinear', regridder_kws: dict = None, - projection:typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator' - + ds_in: xr.Dataset, + levels: int, + method: str = 'bilinear', + regridder_kws: dict = None, + projection: typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator', ) -> dt.DataTree: """helper function to generate weights for a multiscale regridder @@ -180,7 +188,7 @@ def generate_weights_pyramid( def pyramid_regrid( ds: xr.Dataset, - projection:typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator', + projection: typing.Literal['web-mercator', 'equidistant-cylindrical'] = 'web-mercator', target_pyramid: dt.DataTree = None, levels: int = None, weights_pyramid: dt.DataTree = None, @@ -189,7 +197,6 @@ def pyramid_regrid( regridder_apply_kws: dict = None, other_chunks: dict = None, pixels_per_tile: int = 128, - ) -> dt.DataTree: """Make a pyramid using xesmf's regridders @@ -283,7 +290,11 @@ def pyramid_regrid( pyramid = dt.DataTree.from_dict(plevels) pyramid = add_metadata_and_zarr_encoding( - pyramid, levels=levels, other_chunks=other_chunks, pixels_per_tile=pixels_per_tile, projection=Projection(name=projection) + pyramid, + levels=levels, + other_chunks=other_chunks, + pixels_per_tile=pixels_per_tile, + projection=Projection(name=projection), ) return pyramid diff --git a/ndpyramid/utils.py b/ndpyramid/utils.py index 8272721..d3a16f1 100644 --- a/ndpyramid/utils.py +++ b/ndpyramid/utils.py @@ -30,9 +30,11 @@ def get_version() -> str: return __version__ + def get_levels(ds: xr.Dataset) -> int: raise NotImplementedError('Automatic determination of number of levels is not yet implemented') + def multiscales_template( *, datasets: list = None, @@ -117,9 +119,13 @@ def set_zarr_encoding( def add_metadata_and_zarr_encoding( - pyramid: dt.DataTree, *, levels: int, other_chunks: dict = None, pixels_per_tile: int = 128, projection: Projection = None + pyramid: dt.DataTree, + *, + levels: int, + other_chunks: dict = None, + pixels_per_tile: int = 128, + projection: Projection = None, ) -> dt.DataTree: - '''Postprocess data pyramid. Adds multiscales metadata and sets Zarr encoding Parameters diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index c1b9d44..63d44e0 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -2,14 +2,21 @@ "cells": [ { "cell_type": "markdown", - "id": "270cab20", + "id": "0", "metadata": {}, "source": [ - "\n", + "

\n", + "\n", + "\n", + " \n", + " \"CarbonPlan\n", + "\n", + "\n", + "

\n", "\n", "# Demo map data preparation\n", "\n", - "_by Joe Hamman & Jeremy Freeman (CarbonPlan), September 27, 2021_\n", + "_by Joe Hamman & Jeremy Freeman (CarbonPlan), September 27, 2021, Updated by Max Jones (CarbonPlan), February 8, 2024_\n", "\n", "This notebook demonstrates the production of Zarr data pyramids for use with\n", "[`@carbonplan/maps`](https://github.com/carbonplan/maps), an api for interactive\n", @@ -17,42 +24,54 @@ "\n", "Some of the libraries used here are in pre-release condition. Specifically\n", "`ndpyramid` and `datatree` are currently udergoing rapid development. Use the\n", - "pattern below but expect changes to the specific apis.\n" + "pattern below but expect changes to the specific apis.\n", + "\n", + "All of the libraries used in this demonstration are included in [this conda environment file](https://github.com/carbonplan/ndpyramid/blob/main/ci/environment.yml).\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "076c0441", + "id": "1", "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", "import pandas as pd\n", - "import rioxarray\n", - "from ndpyramid import pyramid_reproject\n", - "from carbonplan_data.utils import set_zarr_encoding\n", - "from carbonplan_data.metadata import get_cf_global_attrs" + "import xarray as xr\n", + "\n", + "from ndpyramid import pyramid_reproject" ] }, { "cell_type": "code", "execution_count": null, - "id": "f8de434a", + "id": "2", "metadata": {}, "outputs": [], "source": [ "VERSION = 2\n", "LEVELS = 6\n", "PIXELS_PER_TILE = 128\n", + "S3 = False\n", + "input_path = f\"s3://carbonplan-maps/v{VERSION}/demo/raw\"\n", + "if S3:\n", + " base = f\"s3://carbonplan-maps/v{VERSION}/demo/\"\n", + " store_2d = base + \"2d/tavg\"\n", + " store_3d = base + \"3d/tavg-prec\"\n", + " store_3d_1var = base + \"3d/tavg-month\"\n", + " store_4d = base + \"4d/tavg-prec-month\"\n", + "else:\n", + " import zarr\n", "\n", - "input_path = f\"gs://carbonplan-maps/v{VERSION}/demo/raw\"\n", - "save_path = f\"gs://carbonplan-maps/v{VERSION}/demo/\"" + " store_2d = zarr.storage.MemoryStore()\n", + " store_3d = zarr.storage.MemoryStore()\n", + " store_3d_1var = zarr.storage.MemoryStore()\n", + " store_4d = zarr.storage.MemoryStore()" ] }, { "cell_type": "markdown", - "id": "9a2af554", + "id": "3", "metadata": {}, "source": [ "## 2d (tavg)\n", @@ -63,14 +82,13 @@ { "cell_type": "code", "execution_count": null, - "id": "e8629d34", + "id": "4", "metadata": {}, "outputs": [], "source": [ "%%time\n", "# input dataset\n", "path = f\"{input_path}/wc2.1_2.5m_tavg_10.tif\"\n", - "\n", "# open and extract the input dataset\n", "ds = (\n", " xr.open_dataarray(path, engine=\"rasterio\")\n", @@ -82,27 +100,13 @@ "# create the pyramid\n", "dt = pyramid_reproject(ds, levels=LEVELS)\n", "\n", - "# modify the data in the pyramid\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE})\n", - " child.ds[\"tavg\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", - "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"2d/tavg\", consolidated=True)" + "dt.to_zarr(store_2d, consolidated=True)" ] }, { "cell_type": "markdown", - "id": "f845a5db", + "id": "5", "metadata": {}, "source": [ "## 3d, two variables (tavg and prec)\n", @@ -115,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28e45eb9", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -145,30 +149,16 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS)\n", - "\n", - "# modify the data in the pyramid\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE, \"band\": 2})\n", - " child.ds[\"climate\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'band': 2})\n", + "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"3d/tavg-prec\", consolidated=True)\n", - "dt.ds.attrs" + "dt.to_zarr(store_3d, consolidated=True)" ] }, { "cell_type": "markdown", - "id": "cccd8513", + "id": "7", "metadata": {}, "source": [ "## 3d, one variable, multiple time points\n", @@ -181,7 +171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c6408ef2", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -202,31 +192,16 @@ "ds[\"month\"] = ds[\"month\"].astype(\"int32\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS)\n", - "\n", - "# modify the data in the pyramid\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE, \"month\": 12})\n", - " child.ds[\"tavg\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", - "\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'month': 12})\n", + "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"3d/tavg-month\", consolidated=True)\n", - "dt.ds.attrs" + "dt.to_zarr(store_3d_1var, consolidated=True)" ] }, { "cell_type": "markdown", - "id": "a9c778b7", + "id": "9", "metadata": {}, "source": [ "## 4d, multiple variables, multiple time points\n", @@ -239,7 +214,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d456314d", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -278,41 +253,15 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, extra_dim=\"band\")\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE, \"band\": 2, \"month\": 12})\n", - " child.ds[\"climate\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", - "\n", + "dt = pyramid_reproject(ds, levels=LEVELS, extra_dim=\"band\", other_chunks={'band': 2, 'month': 12})\n", + "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"4d/tavg-prec-month\", consolidated=True)\n", - "dt.ds.attrs" + "dt.to_zarr(store_4d, consolidated=True)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a934685a", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -323,7 +272,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.12.1" + }, + "vscode": { + "interpreter": { + "hash": "4580a72b3e3feb25716476fc9450edd092f3d65a32d836c2e2da7f2a9c3a9be4" + } } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 2af8a66..b797a41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,8 @@ exclude = [ "node_modules", "venv", ] + +[tool.ruff.lint] per-file-ignores = {} # E402: module level import not at top of file # E501: line too long - let black worry about that @@ -100,10 +102,10 @@ select = [ ] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 18 -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["ndpyramid"] [tool.pytest.ini_options] diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index 64928f4..3be0007 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -16,7 +16,9 @@ def temperature(): def test_xarray_coarsened_pyramid(temperature, benchmark): factors = [4, 2, 1] - pyramid = benchmark(lambda: pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim')) + pyramid = benchmark( + lambda: pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim') + ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) pyramid.to_zarr(MemoryStore()) @@ -26,7 +28,7 @@ def test_reprojected_pyramid(temperature, benchmark): pytest.importorskip('rioxarray') levels = 2 temperature = temperature.rio.write_crs('EPSG:4326') - pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels assert pyramid.ds.attrs['multiscales'][0]['datasets'][0]['crs'] == 'EPSG:3857' @@ -36,9 +38,11 @@ def test_reprojected_pyramid(temperature, benchmark): @pytest.mark.parametrize('regridder_apply_kws', [None, {'keep_attrs': False}]) def test_regridded_pyramid(temperature, regridder_apply_kws, benchmark): pytest.importorskip('xesmf') - pyramid = benchmark(lambda: pyramid_regrid( - temperature, levels=2, regridder_apply_kws=regridder_apply_kws, other_chunks={'time': 2} - )) + pyramid = benchmark( + lambda: pyramid_regrid( + temperature, levels=2, regridder_apply_kws=regridder_apply_kws, other_chunks={'time': 2} + ) + ) assert pyramid.ds.attrs['multiscales'] expected_attrs = ( temperature['air'].attrs @@ -54,9 +58,11 @@ def test_regridded_pyramid_with_weights(temperature, benchmark): pytest.importorskip('xesmf') levels = 2 weights_pyramid = generate_weights_pyramid(temperature.isel(time=0), levels) - pyramid = benchmark(lambda: pyramid_regrid( - temperature, levels=levels, weights_pyramid=weights_pyramid, other_chunks={'time': 2} - )) + pyramid = benchmark( + lambda: pyramid_regrid( + temperature, levels=levels, weights_pyramid=weights_pyramid, other_chunks={'time': 2} + ) + ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels pyramid.to_zarr(MemoryStore()) @@ -65,16 +71,23 @@ def test_regridded_pyramid_with_weights(temperature, benchmark): @pytest.mark.parametrize('projection', ['web-mercator', 'equidistant-cylindrical']) def test_make_grid_ds(projection, benchmark): - grid = benchmark(lambda: make_grid_ds(0, pixels_per_tile=8, projection=projection)) + grid = benchmark(lambda: make_grid_ds(0, pixels_per_tile=8, projection=projection)) lon_vals = grid.lon_b.values assert np.all((lon_vals[-1, :] - lon_vals[0, :]) < 0.001) - assert grid.attrs['title'] == 'Web Mercator Grid' if projection == 'web-mercator' else 'Equidistant Cylindrical Grid' + assert ( + grid.attrs['title'] == 'Web Mercator Grid' + if projection == 'web-mercator' + else 'Equidistant Cylindrical Grid' + ) + @pytest.mark.parametrize('levels', [1, 2]) @pytest.mark.parametrize('method', ['bilinear', 'conservative']) def test_generate_weights_pyramid(temperature, levels, method, benchmark): pytest.importorskip('xesmf') - weights_pyramid = benchmark(lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method)) + weights_pyramid = benchmark( + lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method) + ) assert weights_pyramid.ds.attrs['levels'] == levels assert weights_pyramid.ds.attrs['regrid_method'] == method assert set(weights_pyramid['0'].ds.data_vars) == {'S', 'col', 'row'} From f4dd94850c340f53d25c03800e25be8d3548b4a3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Thu, 8 Feb 2024 13:51:50 -0500 Subject: [PATCH 14/18] Update demo.ipynb --- notebooks/demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index 63d44e0..4d6bbe5 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -9,7 +9,7 @@ "\n", "\n", " \n", - " \"CarbonPlan\n", + " \"CarbonPlan\n", "\n", "\n", "

\n", From b8410a5632705657f74d4932a124bba0bb22207f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 13 Feb 2024 19:03:46 -0500 Subject: [PATCH 15/18] Add `clear_attrs` parameter to `pyramid_reproject` (#104) --- docs/schema.md | 1 + ndpyramid/core.py | 5 +++++ notebooks/demo.ipynb | 17 +++++++---------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/schema.md b/docs/schema.md index 78b3f26..33bb2fe 100644 --- a/docs/schema.md +++ b/docs/schema.md @@ -72,6 +72,7 @@ In addition to following the quadtree pyramid structure and metadata schema, the - Storage of non-spatial coordinate arrays in single chunk - [zlib](https://numcodecs.readthedocs.io/en/stable/zlib.html) or [gzip](https://numcodecs.readthedocs.io/en/stable/gzip.html) compression - Web Mercator (EPSG:3857) or Equidistant Cylindrical (EPSG:4326) projection +- The `.zattrs` must conform to the [IETF JSON Standard](https://datatracker.ietf.org/doc/html/rfc8259). - Data types supported by [zarr-js](https://github.com/freeman-lab/zarr-js). The following are supported as of `v3.3.0` for Zarr v2: ```{code} diff --git a/ndpyramid/core.py b/ndpyramid/core.py index 928b7a7..1b3c7f6 100644 --- a/ndpyramid/core.py +++ b/ndpyramid/core.py @@ -63,6 +63,7 @@ def pyramid_reproject( other_chunks: dict = None, resampling: str | dict = 'average', extra_dim: str = None, + clear_attrs: bool = False, ) -> dt.DataTree: """Create a multiscale pyramid of a dataset via reprojection. @@ -84,6 +85,8 @@ def pyramid_reproject( If a dict, keys are variable names and values are warp resampling methods. extra_dim : str, optional The name of the extra dimension to iterate over. Default is None. + clear_attrs : bool, False + Clear the attributes of the DataArrays within the multiscale pyramid. Default is False. Returns ------- @@ -138,6 +141,8 @@ def reproject(da, var): # create the data array for each level plevels[lkey] = xr.Dataset(attrs=ds.attrs) for k, da in ds.items(): + if clear_attrs: + da.attrs.clear() if len(da.shape) == 4: # if extra_dim is not specified, raise an error if extra_dim is None: diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index 4d6bbe5..bfb31cc 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -98,7 +98,7 @@ ")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS)\n", + "dt = pyramid_reproject(ds, levels=LEVELS, clear_attrs=True)\n", "\n", "# write the pyramid to zarr\n", "dt.to_zarr(store_2d, consolidated=True)" @@ -149,7 +149,7 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'band': 2})\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'band': 2}, clear_attrs=True)\n", "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", @@ -192,7 +192,7 @@ "ds[\"month\"] = ds[\"month\"].astype(\"int32\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'month': 12})\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'month': 12}, clear_attrs=True)\n", "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", @@ -253,7 +253,9 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, extra_dim=\"band\", other_chunks={'band': 2, 'month': 12})\n", + "dt = pyramid_reproject(\n", + " ds, levels=LEVELS, extra_dim=\"band\", other_chunks={'band': 2, 'month': 12}, clear_attrs=True\n", + ")\n", "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", @@ -272,12 +274,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" - }, - "vscode": { - "interpreter": { - "hash": "4580a72b3e3feb25716476fc9450edd092f3d65a32d836c2e2da7f2a9c3a9be4" - } + "version": "3.11.7" } }, "nbformat": 4, From 900e313b7d96023f0a0fa71e799cac8c8ca12c87 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:17:31 -0800 Subject: [PATCH 16/18] Update Dependabot configuration (#108) --- .github/dependabot.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 8ac6b8c..d57929b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,3 +4,7 @@ updates: directory: "/" schedule: interval: "monthly" + groups: + actions: + patterns: + - "*" From 948534a5cd7ed19ae1f3f5fa91cfe4564db9437b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:22:38 -0800 Subject: [PATCH 17/18] Bump the actions group with 3 updates (#109) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codspeed.yml | 4 ++-- .github/workflows/pypi-release.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index a8a027f..1a74574 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -22,7 +22,7 @@ jobs: uses: actions/checkout@v4 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@v3.0.3 with: auto-activate-base: true activate-environment: "" # base environment @@ -42,7 +42,7 @@ jobs: python -m pip install -e . --no-deps - name: Run benchmarks - uses: CodSpeedHQ/action@v2.0.3 + uses: CodSpeedHQ/action@v2.2.1 with: run: | conda list diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index bd329f0..c3de4f8 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -78,7 +78,7 @@ jobs: name: releases path: dist - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.8.11 + uses: pypa/gh-action-pypi-publish@v1.8.12 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} From 9d1d0cd7aa3cbd6fc469290af63d6f8f9c263b77 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Mar 2024 13:40:42 -0400 Subject: [PATCH 18/18] Change nodata from rioxarray default to nan in pyramid_reproject (#110) --- ndpyramid/core.py | 5 ++++- tests/test_pyramids.py | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ndpyramid/core.py b/ndpyramid/core.py index 1b3c7f6..34e2463 100644 --- a/ndpyramid/core.py +++ b/ndpyramid/core.py @@ -4,6 +4,7 @@ from collections import defaultdict import datatree as dt +import numpy as np import xarray as xr from .common import Projection @@ -131,12 +132,14 @@ def pyramid_reproject( dst_transform = projection_model.transform(dim=dim) def reproject(da, var): - return da.rio.reproject( + da.encoding['_FillValue'] = np.nan + da = da.rio.reproject( projection_model._crs, resampling=Resampling[resampling_dict[var]], shape=(dim, dim), transform=dst_transform, ) + return da # create the data array for each level plevels[lkey] = xr.Dataset(attrs=ds.attrs) diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index 3be0007..e5f60be 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -28,13 +28,23 @@ def test_reprojected_pyramid(temperature, benchmark): pytest.importorskip('rioxarray') levels = 2 temperature = temperature.rio.write_crs('EPSG:4326') - pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=levels)) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels assert pyramid.ds.attrs['multiscales'][0]['datasets'][0]['crs'] == 'EPSG:3857' pyramid.to_zarr(MemoryStore()) +def test_reprojected_pyramid_fill(temperature, benchmark): + """ + Test for https://github.com/carbonplan/ndpyramid/issues/93. + """ + pytest.importorskip('rioxarray') + temperature = temperature.rio.write_crs('EPSG:4326') + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=1)) + assert np.isnan(pyramid['0'].air.isel(time=0, x=0, y=0).values) + + @pytest.mark.parametrize('regridder_apply_kws', [None, {'keep_attrs': False}]) def test_regridded_pyramid(temperature, regridder_apply_kws, benchmark): pytest.importorskip('xesmf')