From 4ecb6b9bfd927640e9e912abc7cf2c2bcbb8385b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:23 -0800 Subject: [PATCH 01/18] Bump actions/download-artifact from 3 to 4 (#81) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 99e6907..23f4a99 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -52,7 +52,7 @@ jobs: name: Install Python with: python-version: 3.9 - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist @@ -73,7 +73,7 @@ jobs: if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist From d0aae5aab6063ac778ea2b1eea89e46cf81ecb83 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:33 -0800 Subject: [PATCH 02/18] Bump actions/upload-artifact from 3 to 4 (#80) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 23f4a99..9c41391 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -39,7 +39,7 @@ jobs: else echo "✅ Looks good" fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: releases path: dist From 6ed6d793d40c6e8534b7f111312e9afcab9bd865 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:42 -0800 Subject: [PATCH 03/18] Bump actions/setup-python from 4 to 5 (#82) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 9c41391..bd329f0 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -13,7 +13,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: 3.9 @@ -48,7 +48,7 @@ jobs: needs: build-artifacts runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: 3.9 From 40846df7933d4627d0d5b2850168002205d47a1d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:35:50 -0800 Subject: [PATCH 04/18] [pre-commit.ci] pre-commit autoupdate (#83) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/dependabot.yml | 6 +++--- .github/workflows/main.yaml | 4 ++-- .pre-commit-config.yaml | 10 +++++----- .readthedocs.yaml | 2 +- codecov.yml | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index bc63aca..8ac6b8c 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,6 +1,6 @@ version: 2 updates: - - package-ecosystem: 'github-actions' - directory: '/' + - package-ecosystem: "github-actions" + directory: "/" schedule: - interval: 'monthly' + interval: "monthly" diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index e0146dd..f30b2bf 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -9,7 +9,7 @@ on: - main workflow_dispatch: schedule: - - cron: '0 0 * * *' # Daily “At 00:00” + - cron: "0 0 * * *" # Daily “At 00:00” concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.9', '3.10'] #TODO: add 3.11 once sparse/numba support it + python-version: ["3.9", "3.10"] #TODO: add 3.11 once sparse/numba support it timeout-minutes: 20 defaults: run: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e60e45f..b05c750 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -16,17 +16,17 @@ repos: - id: mixed-line-ending - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.0.292' + rev: "v0.1.9" hooks: - id: ruff - args: ['--fix'] + args: ["--fix"] - repo: https://github.com/keewis/blackdoc - rev: v0.3.8 + rev: v0.3.9 hooks: - id: blackdoc - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.3 + rev: v4.0.0-alpha.8 hooks: - id: prettier diff --git a/.readthedocs.yaml b/.readthedocs.yaml index fb9bd82..08a0fa9 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,7 +8,7 @@ version: 2 build: os: ubuntu-20.04 tools: - python: 'mambaforge-4.10' + python: "mambaforge-4.10" # Build documentation in the doc/ directory with Sphinx sphinx: diff --git a/codecov.yml b/codecov.yml index 0bbe239..2c4cc5d 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,8 +5,8 @@ codecov: comment: false ignore: - - 'tests/*.py' - - 'setup.py' + - "tests/*.py" + - "setup.py" coverage: precision: 2 From bd8c186c1ada1733cee112bec072f931b6294b97 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 10 Jan 2024 18:22:47 -0500 Subject: [PATCH 05/18] Update badges, monogram, and grammar --- README.md | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index a5f76d1..e27c29a 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,10 @@ -
+ + +
# ndpyramid @@ -18,7 +12,9 @@ A small utility for generating ND array pyramids using Xarray and Zarr. [![CI](https://github.com/carbonplan/ndpyramid/actions/workflows/main.yaml/badge.svg)](https://github.com/carbonplan/ndpyramid/actions/workflows/main.yaml) -![MIT License](https://badgen.net/badge/license/MIT/blue) +![PyPI](https://img.shields.io/pypi/v/ndpyramid) +[![Conda Version](https://img.shields.io/conda/vn/conda-forge/ndpyramid.svg)](https://anaconda.org/conda-forge/ndpyramid) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) # installation @@ -74,8 +70,8 @@ See the docstrings for more details about input parameters and options. ## license -All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/) licensed, but we request that you please provide attribution if reusing any of our digital content (graphics, logo, articles, etc.). +All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/)-licensed, but we request that you please provide attribution if reusing any of our digital content (graphics, logo, articles, etc.). ## about us -CarbonPlan is a non-profit organization that uses data and science for climate action. We aim to improve the transparency and scientific integrity of climate solutions with open data and tools. Find out more at [carbonplan.org](https://carbonplan.org/) or get in touch by [opening an issue](https://github.com/carbonplan/ndpyramid/issues/new) or [sending us an email](mailto:hello@carbonplan.org). +CarbonPlan is a nonprofit organization that uses data and science for climate action. We aim to improve the transparency and scientific integrity of climate solutions with open data and tools. Find out more at [carbonplan.org](https://carbonplan.org/) or get in touch by [opening an issue](https://github.com/carbonplan/ndpyramid/issues/new) or [sending us an email](mailto:hello@carbonplan.org). From 97c90ba17e50a39e23bbd54a03fa234e9fe139f4 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 12 Jan 2024 13:48:39 -0500 Subject: [PATCH 06/18] Setup continuous benchmarking workflow with pytest-codspeed (#85) --- .github/workflows/codspeed.yml | 39 ++++++++++++++++++++++++++++++++++ .github/workflows/main.yaml | 12 +++++++---- ci/environment.yml | 3 +++ ndpyramid/common.py | 2 +- tests/test_pyramids.py | 28 ++++++++++++------------ 5 files changed, 65 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/codspeed.yml diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml new file mode 100644 index 0000000..0c19679 --- /dev/null +++ b/.github/workflows/codspeed.yml @@ -0,0 +1,39 @@ +name: codspeed-benchmarks + +on: + # Run on pushes to the main branch + push: + branches: + - "main" + # Run on pull requests + pull_request: + # `workflow_dispatch` allows CodSpeed to trigger backtest + # performance analysis in order to generate initial data. + workflow_dispatch: + +jobs: + benchmarks: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Conda environment from environment.yml + uses: mamba-org/setup-micromamba@v1 + with: + # environment-file is not assumed anymore + environment-file: ci/environment.yml + create-args: >- + python=3.10 + # now called cache-environment + cache-environment: true + + - name: Install package + run: | + python -m pip install . --no-deps + - name: Conda list information + run: | + conda env list + conda list + - name: Run benchmarks + uses: CodSpeedHQ/action@v2 diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f30b2bf..e6b4aad 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -29,14 +29,18 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 + - name: Install Conda environment from environment.yml - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/setup-micromamba@v1 with: + # environment-file is not assumed anymore environment-file: ci/environment.yml - cache-downloads: true - extra-specs: | + create-args: >- python=${{ matrix.python-version }} + # now called cache-environment + cache-environment: true - name: Install package run: | diff --git a/ci/environment.yml b/ci/environment.yml index 7f3f7a5..2e78937 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -10,6 +10,7 @@ dependencies: - numpy - pip - pooch + - pre-commit - pydantic>=1.10 - pyproj - pytest @@ -24,3 +25,5 @@ dependencies: - xesmf - zarr - cf_xarray>=0.8.0 + - pip: + - pytest-codspeed diff --git a/ndpyramid/common.py b/ndpyramid/common.py index 4b7976f..23162dd 100644 --- a/ndpyramid/common.py +++ b/ndpyramid/common.py @@ -17,7 +17,7 @@ def __init__(self, **data) -> None: self._crs = epsg_codes[self.name] self._proj = pyproj.Proj(self._crs) - @pydantic.validate_arguments + @pydantic.validate_call def transform(self, *, dim:int) -> rasterio.transform.Affine: diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index d268245..08080ed 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -14,19 +14,19 @@ def temperature(): return ds -def test_xarray_coarsened_pyramid(temperature): +def test_xarray_coarsened_pyramid(temperature, benchmark): factors = [4, 2, 1] - pyramid = pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim') + pyramid = benchmark(lambda: pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim')) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) pyramid.to_zarr(MemoryStore()) -def test_reprojected_pyramid(temperature): +def test_reprojected_pyramid(temperature, benchmark): pytest.importorskip('rioxarray') levels = 2 temperature = temperature.rio.write_crs('EPSG:4326') - pyramid = pyramid_reproject(temperature, levels=2) + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels assert pyramid.ds.attrs['multiscales'][0]['datasets'][0]['crs'] == 'EPSG:3857' @@ -34,11 +34,11 @@ def test_reprojected_pyramid(temperature): @pytest.mark.parametrize('regridder_apply_kws', [None, {'keep_attrs': False}]) -def test_regridded_pyramid(temperature, regridder_apply_kws): +def test_regridded_pyramid(temperature, regridder_apply_kws, benchmark): pytest.importorskip('xesmf') - pyramid = pyramid_regrid( + pyramid = benchmark(lambda: pyramid_regrid( temperature, levels=2, regridder_apply_kws=regridder_apply_kws, other_chunks={'time': 2} - ) + )) assert pyramid.ds.attrs['multiscales'] expected_attrs = ( temperature['air'].attrs @@ -50,30 +50,30 @@ def test_regridded_pyramid(temperature, regridder_apply_kws): pyramid.to_zarr(MemoryStore()) -def test_regridded_pyramid_with_weights(temperature): +def test_regridded_pyramid_with_weights(temperature, benchmark): pytest.importorskip('xesmf') levels = 2 weights_pyramid = generate_weights_pyramid(temperature.isel(time=0), levels) - pyramid = pyramid_regrid( + pyramid = benchmark(lambda: pyramid_regrid( temperature, levels=levels, weights_pyramid=weights_pyramid, other_chunks={'time': 2} - ) + )) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels pyramid.to_zarr(MemoryStore()) @pytest.mark.parametrize('projection', ['web-mercator', 'equidistant-cylindrical']) -def test_make_grid_ds(projection): +def test_make_grid_ds(projection, benchmark): - grid = make_grid_ds(0, pixels_per_tile=8, projection=projection) + grid = benchmark(lambda: make_grid_ds(0, pixels_per_tile=8, projection=projection)) lon_vals = grid.lon_b.values assert np.all((lon_vals[-1, :] - lon_vals[0, :]) < 0.001) assert grid.attrs['title'] == 'Web Mercator Grid' if projection == 'web-mercator' else 'Equidistant Cylindrical Grid' @pytest.mark.parametrize('levels', [1, 2]) @pytest.mark.parametrize('method', ['bilinear', 'conservative']) -def test_generate_weights_pyramid(temperature, levels, method): - weights_pyramid = generate_weights_pyramid(temperature.isel(time=0), levels, method=method) +def test_generate_weights_pyramid(temperature, levels, method, benchmark): + weights_pyramid = benchmark(lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method)) assert weights_pyramid.ds.attrs['levels'] == levels assert weights_pyramid.ds.attrs['regrid_method'] == method assert set(weights_pyramid['0'].ds.data_vars) == {'S', 'col', 'row'} From 1d5caa7ee84f3aafb7eabdb87f0859ccb7c2f00c Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 16 Jan 2024 12:42:35 -0500 Subject: [PATCH 07/18] Run benchmarks in CI (#86) --- .github/workflows/codspeed.yml | 41 +++++++++++++++++++++------------- tests/test_pyramids.py | 1 + 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index 0c19679..c378a27 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -14,26 +14,37 @@ on: jobs: benchmarks: runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} steps: - name: Checkout uses: actions/checkout@v4 - - name: Install Conda environment from environment.yml - uses: mamba-org/setup-micromamba@v1 + - name: Setup Miniconda + uses: conda-incubator/setup-miniconda@v3.0.1 with: - # environment-file is not assumed anymore - environment-file: ci/environment.yml - create-args: >- - python=3.10 - # now called cache-environment - cache-environment: true + auto-activate-base: true + activate-environment: "" # base environment + channels: conda-forge,nodefaults + channel-priority: strict - - name: Install package + - name: Install dependencies run: | - python -m pip install . --no-deps - - name: Conda list information - run: | - conda env list - conda list + # $CONDA is an environment variable pointing to the root of the miniconda directory + # Preprend $CONDA/bin to $PATH so that conda's python is used over system python + echo $CONDA/bin >> $GITHUB_PATH + conda install --solver=libmamba dask python=3.10 \ + esmpy>=8.2.0 mpich netcdf4 numpy pip pooch pydantic>=1.10 pyproj \ + pytest pytest-cov pytest-mypy rasterio rioxarray scipy sparse>=0.13.0 \ + xarray xarray-datatree>=0.0.11 xesmf zarr cf_xarray>=0.8.0 + python -m pip install -U pytest-codspeed setuptools + python -m pip install -e . --no-deps + - name: Run benchmarks - uses: CodSpeedHQ/action@v2 + uses: CodSpeedHQ/action@v2.0.2 + with: + run: | + conda list + python -c "import ndpyramid; print(ndpyramid.__version__)" + python -m pytest --codspeed diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index 08080ed..64928f4 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -73,6 +73,7 @@ def test_make_grid_ds(projection, benchmark): @pytest.mark.parametrize('levels', [1, 2]) @pytest.mark.parametrize('method', ['bilinear', 'conservative']) def test_generate_weights_pyramid(temperature, levels, method, benchmark): + pytest.importorskip('xesmf') weights_pyramid = benchmark(lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method)) assert weights_pyramid.ds.attrs['levels'] == levels assert weights_pyramid.ds.attrs['regrid_method'] == method From af6f2fb0c89677bb4c296951d3755b5d6a26408c Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 19 Jan 2024 12:00:55 -0500 Subject: [PATCH 08/18] Add information about pyramid structure and metadata schema (#89) Co-authored-by: Kata Martin\n", + "\n", + "\n", + "\n", + "
\n", "\n", "# Demo map data preparation\n", "\n", - "_by Joe Hamman & Jeremy Freeman (CarbonPlan), September 27, 2021_\n", + "_by Joe Hamman & Jeremy Freeman (CarbonPlan), September 27, 2021, Updated by Max Jones (CarbonPlan), February 8, 2024_\n", "\n", "This notebook demonstrates the production of Zarr data pyramids for use with\n", "[`@carbonplan/maps`](https://github.com/carbonplan/maps), an api for interactive\n", @@ -17,42 +24,54 @@ "\n", "Some of the libraries used here are in pre-release condition. Specifically\n", "`ndpyramid` and `datatree` are currently udergoing rapid development. Use the\n", - "pattern below but expect changes to the specific apis.\n" + "pattern below but expect changes to the specific apis.\n", + "\n", + "All of the libraries used in this demonstration are included in [this conda environment file](https://github.com/carbonplan/ndpyramid/blob/main/ci/environment.yml).\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "076c0441", + "id": "1", "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", "import pandas as pd\n", - "import rioxarray\n", - "from ndpyramid import pyramid_reproject\n", - "from carbonplan_data.utils import set_zarr_encoding\n", - "from carbonplan_data.metadata import get_cf_global_attrs" + "import xarray as xr\n", + "\n", + "from ndpyramid import pyramid_reproject" ] }, { "cell_type": "code", "execution_count": null, - "id": "f8de434a", + "id": "2", "metadata": {}, "outputs": [], "source": [ "VERSION = 2\n", "LEVELS = 6\n", "PIXELS_PER_TILE = 128\n", + "S3 = False\n", + "input_path = f\"s3://carbonplan-maps/v{VERSION}/demo/raw\"\n", + "if S3:\n", + " base = f\"s3://carbonplan-maps/v{VERSION}/demo/\"\n", + " store_2d = base + \"2d/tavg\"\n", + " store_3d = base + \"3d/tavg-prec\"\n", + " store_3d_1var = base + \"3d/tavg-month\"\n", + " store_4d = base + \"4d/tavg-prec-month\"\n", + "else:\n", + " import zarr\n", "\n", - "input_path = f\"gs://carbonplan-maps/v{VERSION}/demo/raw\"\n", - "save_path = f\"gs://carbonplan-maps/v{VERSION}/demo/\"" + " store_2d = zarr.storage.MemoryStore()\n", + " store_3d = zarr.storage.MemoryStore()\n", + " store_3d_1var = zarr.storage.MemoryStore()\n", + " store_4d = zarr.storage.MemoryStore()" ] }, { "cell_type": "markdown", - "id": "9a2af554", + "id": "3", "metadata": {}, "source": [ "## 2d (tavg)\n", @@ -63,14 +82,13 @@ { "cell_type": "code", "execution_count": null, - "id": "e8629d34", + "id": "4", "metadata": {}, "outputs": [], "source": [ "%%time\n", "# input dataset\n", "path = f\"{input_path}/wc2.1_2.5m_tavg_10.tif\"\n", - "\n", "# open and extract the input dataset\n", "ds = (\n", " xr.open_dataarray(path, engine=\"rasterio\")\n", @@ -82,27 +100,13 @@ "# create the pyramid\n", "dt = pyramid_reproject(ds, levels=LEVELS)\n", "\n", - "# modify the data in the pyramid\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE})\n", - " child.ds[\"tavg\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", - "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"2d/tavg\", consolidated=True)" + "dt.to_zarr(store_2d, consolidated=True)" ] }, { "cell_type": "markdown", - "id": "f845a5db", + "id": "5", "metadata": {}, "source": [ "## 3d, two variables (tavg and prec)\n", @@ -115,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28e45eb9", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -145,30 +149,16 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS)\n", - "\n", - "# modify the data in the pyramid\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE, \"band\": 2})\n", - " child.ds[\"climate\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'band': 2})\n", + "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"3d/tavg-prec\", consolidated=True)\n", - "dt.ds.attrs" + "dt.to_zarr(store_3d, consolidated=True)" ] }, { "cell_type": "markdown", - "id": "cccd8513", + "id": "7", "metadata": {}, "source": [ "## 3d, one variable, multiple time points\n", @@ -181,7 +171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c6408ef2", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -202,31 +192,16 @@ "ds[\"month\"] = ds[\"month\"].astype(\"int32\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS)\n", - "\n", - "# modify the data in the pyramid\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE, \"month\": 12})\n", - " child.ds[\"tavg\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", - "\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'month': 12})\n", + "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"3d/tavg-month\", consolidated=True)\n", - "dt.ds.attrs" + "dt.to_zarr(store_3d_1var, consolidated=True)" ] }, { "cell_type": "markdown", - "id": "a9c778b7", + "id": "9", "metadata": {}, "source": [ "## 4d, multiple variables, multiple time points\n", @@ -239,7 +214,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d456314d", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -278,41 +253,15 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, extra_dim=\"band\")\n", - "for child in dt.children.values():\n", - " child.ds = set_zarr_encoding(\n", - " child.ds, codec_config={\"id\": \"zlib\", \"level\": 1}, float_dtype=\"float32\"\n", - " )\n", - " child.ds = child.ds.chunk({\"x\": PIXELS_PER_TILE, \"y\": PIXELS_PER_TILE, \"band\": 2, \"month\": 12})\n", - " child.ds[\"climate\"].attrs.clear()\n", - "dt.attrs = get_cf_global_attrs(version=VERSION)\n", - "\n", - "for level in range(LEVELS):\n", - " slevel = str(level)\n", - " dt.ds.attrs['multiscales'][0]['datasets'][level]['pixels_per_tile'] = PIXELS_PER_TILE\n", - "dt.ds.attrs['multiscales'][0]['metadata']['version'] = VERSION\n", - "\n", + "dt = pyramid_reproject(ds, levels=LEVELS, extra_dim=\"band\", other_chunks={'band': 2, 'month': 12})\n", + "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", - "dt.to_zarr(save_path + \"4d/tavg-prec-month\", consolidated=True)\n", - "dt.ds.attrs" + "dt.to_zarr(store_4d, consolidated=True)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a934685a", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -323,7 +272,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.12.1" + }, + "vscode": { + "interpreter": { + "hash": "4580a72b3e3feb25716476fc9450edd092f3d65a32d836c2e2da7f2a9c3a9be4" + } } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 2af8a66..b797a41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,8 @@ exclude = [ "node_modules", "venv", ] + +[tool.ruff.lint] per-file-ignores = {} # E402: module level import not at top of file # E501: line too long - let black worry about that @@ -100,10 +102,10 @@ select = [ ] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 18 -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["ndpyramid"] [tool.pytest.ini_options] diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index 64928f4..3be0007 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -16,7 +16,9 @@ def temperature(): def test_xarray_coarsened_pyramid(temperature, benchmark): factors = [4, 2, 1] - pyramid = benchmark(lambda: pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim')) + pyramid = benchmark( + lambda: pyramid_coarsen(temperature, dims=('lat', 'lon'), factors=factors, boundary='trim') + ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) pyramid.to_zarr(MemoryStore()) @@ -26,7 +28,7 @@ def test_reprojected_pyramid(temperature, benchmark): pytest.importorskip('rioxarray') levels = 2 temperature = temperature.rio.write_crs('EPSG:4326') - pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels assert pyramid.ds.attrs['multiscales'][0]['datasets'][0]['crs'] == 'EPSG:3857' @@ -36,9 +38,11 @@ def test_reprojected_pyramid(temperature, benchmark): @pytest.mark.parametrize('regridder_apply_kws', [None, {'keep_attrs': False}]) def test_regridded_pyramid(temperature, regridder_apply_kws, benchmark): pytest.importorskip('xesmf') - pyramid = benchmark(lambda: pyramid_regrid( - temperature, levels=2, regridder_apply_kws=regridder_apply_kws, other_chunks={'time': 2} - )) + pyramid = benchmark( + lambda: pyramid_regrid( + temperature, levels=2, regridder_apply_kws=regridder_apply_kws, other_chunks={'time': 2} + ) + ) assert pyramid.ds.attrs['multiscales'] expected_attrs = ( temperature['air'].attrs @@ -54,9 +58,11 @@ def test_regridded_pyramid_with_weights(temperature, benchmark): pytest.importorskip('xesmf') levels = 2 weights_pyramid = generate_weights_pyramid(temperature.isel(time=0), levels) - pyramid = benchmark(lambda: pyramid_regrid( - temperature, levels=levels, weights_pyramid=weights_pyramid, other_chunks={'time': 2} - )) + pyramid = benchmark( + lambda: pyramid_regrid( + temperature, levels=levels, weights_pyramid=weights_pyramid, other_chunks={'time': 2} + ) + ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels pyramid.to_zarr(MemoryStore()) @@ -65,16 +71,23 @@ def test_regridded_pyramid_with_weights(temperature, benchmark): @pytest.mark.parametrize('projection', ['web-mercator', 'equidistant-cylindrical']) def test_make_grid_ds(projection, benchmark): - grid = benchmark(lambda: make_grid_ds(0, pixels_per_tile=8, projection=projection)) + grid = benchmark(lambda: make_grid_ds(0, pixels_per_tile=8, projection=projection)) lon_vals = grid.lon_b.values assert np.all((lon_vals[-1, :] - lon_vals[0, :]) < 0.001) - assert grid.attrs['title'] == 'Web Mercator Grid' if projection == 'web-mercator' else 'Equidistant Cylindrical Grid' + assert ( + grid.attrs['title'] == 'Web Mercator Grid' + if projection == 'web-mercator' + else 'Equidistant Cylindrical Grid' + ) + @pytest.mark.parametrize('levels', [1, 2]) @pytest.mark.parametrize('method', ['bilinear', 'conservative']) def test_generate_weights_pyramid(temperature, levels, method, benchmark): pytest.importorskip('xesmf') - weights_pyramid = benchmark(lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method)) + weights_pyramid = benchmark( + lambda: generate_weights_pyramid(temperature.isel(time=0), levels, method=method) + ) assert weights_pyramid.ds.attrs['levels'] == levels assert weights_pyramid.ds.attrs['regrid_method'] == method assert set(weights_pyramid['0'].ds.data_vars) == {'S', 'col', 'row'} From f4dd94850c340f53d25c03800e25be8d3548b4a3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Thu, 8 Feb 2024 13:51:50 -0500 Subject: [PATCH 14/18] Update demo.ipynb --- notebooks/demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index 63d44e0..4d6bbe5 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -9,7 +9,7 @@ "\n", "\n", "\n", "\n", From b8410a5632705657f74d4932a124bba0bb22207f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 13 Feb 2024 19:03:46 -0500 Subject: [PATCH 15/18] Add `clear_attrs` parameter to `pyramid_reproject` (#104) --- docs/schema.md | 1 + ndpyramid/core.py | 5 +++++ notebooks/demo.ipynb | 17 +++++++---------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/schema.md b/docs/schema.md index 78b3f26..33bb2fe 100644 --- a/docs/schema.md +++ b/docs/schema.md @@ -72,6 +72,7 @@ In addition to following the quadtree pyramid structure and metadata schema, the - Storage of non-spatial coordinate arrays in single chunk - [zlib](https://numcodecs.readthedocs.io/en/stable/zlib.html) or [gzip](https://numcodecs.readthedocs.io/en/stable/gzip.html) compression - Web Mercator (EPSG:3857) or Equidistant Cylindrical (EPSG:4326) projection +- The `.zattrs` must conform to the [IETF JSON Standard](https://datatracker.ietf.org/doc/html/rfc8259). - Data types supported by [zarr-js](https://github.com/freeman-lab/zarr-js). The following are supported as of `v3.3.0` for Zarr v2: ```{code} diff --git a/ndpyramid/core.py b/ndpyramid/core.py index 928b7a7..1b3c7f6 100644 --- a/ndpyramid/core.py +++ b/ndpyramid/core.py @@ -63,6 +63,7 @@ def pyramid_reproject( other_chunks: dict = None, resampling: str | dict = 'average', extra_dim: str = None, + clear_attrs: bool = False, ) -> dt.DataTree: """Create a multiscale pyramid of a dataset via reprojection. @@ -84,6 +85,8 @@ def pyramid_reproject( If a dict, keys are variable names and values are warp resampling methods. extra_dim : str, optional The name of the extra dimension to iterate over. Default is None. + clear_attrs : bool, False + Clear the attributes of the DataArrays within the multiscale pyramid. Default is False. Returns ------- @@ -138,6 +141,8 @@ def reproject(da, var): # create the data array for each level plevels[lkey] = xr.Dataset(attrs=ds.attrs) for k, da in ds.items(): + if clear_attrs: + da.attrs.clear() if len(da.shape) == 4: # if extra_dim is not specified, raise an error if extra_dim is None: diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index 4d6bbe5..bfb31cc 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -98,7 +98,7 @@ ")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS)\n", + "dt = pyramid_reproject(ds, levels=LEVELS, clear_attrs=True)\n", "\n", "# write the pyramid to zarr\n", "dt.to_zarr(store_2d, consolidated=True)" @@ -149,7 +149,7 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'band': 2})\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'band': 2}, clear_attrs=True)\n", "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", @@ -192,7 +192,7 @@ "ds[\"month\"] = ds[\"month\"].astype(\"int32\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'month': 12})\n", + "dt = pyramid_reproject(ds, levels=LEVELS, other_chunks={'month': 12}, clear_attrs=True)\n", "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", @@ -253,7 +253,9 @@ "ds[\"band\"] = ds[\"band\"].astype(\"str\")\n", "\n", "# create the pyramid\n", - "dt = pyramid_reproject(ds, levels=LEVELS, extra_dim=\"band\", other_chunks={'band': 2, 'month': 12})\n", + "dt = pyramid_reproject(\n", + " ds, levels=LEVELS, extra_dim=\"band\", other_chunks={'band': 2, 'month': 12}, clear_attrs=True\n", + ")\n", "dt.ds.attrs\n", "\n", "# write the pyramid to zarr\n", @@ -272,12 +274,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" - }, - "vscode": { - "interpreter": { - "hash": "4580a72b3e3feb25716476fc9450edd092f3d65a32d836c2e2da7f2a9c3a9be4" - } + "version": "3.11.7" } }, "nbformat": 4, From 900e313b7d96023f0a0fa71e799cac8c8ca12c87 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:17:31 -0800 Subject: [PATCH 16/18] Update Dependabot configuration (#108) --- .github/dependabot.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 8ac6b8c..d57929b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,3 +4,7 @@ updates: directory: "/" schedule: interval: "monthly" + groups: + actions: + patterns: + - "*" From 948534a5cd7ed19ae1f3f5fa91cfe4564db9437b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:22:38 -0800 Subject: [PATCH 17/18] Bump the actions group with 3 updates (#109) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codspeed.yml | 4 ++-- .github/workflows/pypi-release.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index a8a027f..1a74574 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -22,7 +22,7 @@ jobs: uses: actions/checkout@v4 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@v3.0.3 with: auto-activate-base: true activate-environment: "" # base environment @@ -42,7 +42,7 @@ jobs: python -m pip install -e . --no-deps - name: Run benchmarks - uses: CodSpeedHQ/action@v2.0.3 + uses: CodSpeedHQ/action@v2.2.1 with: run: | conda list diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index bd329f0..c3de4f8 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -78,7 +78,7 @@ jobs: name: releases path: dist - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.8.11 + uses: pypa/gh-action-pypi-publish@v1.8.12 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} From 9d1d0cd7aa3cbd6fc469290af63d6f8f9c263b77 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 12 Mar 2024 13:40:42 -0400 Subject: [PATCH 18/18] Change nodata from rioxarray default to nan in pyramid_reproject (#110) --- ndpyramid/core.py | 5 ++++- tests/test_pyramids.py | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ndpyramid/core.py b/ndpyramid/core.py index 1b3c7f6..34e2463 100644 --- a/ndpyramid/core.py +++ b/ndpyramid/core.py @@ -4,6 +4,7 @@ from collections import defaultdict import datatree as dt +import numpy as np import xarray as xr from .common import Projection @@ -131,12 +132,14 @@ def pyramid_reproject( dst_transform = projection_model.transform(dim=dim) def reproject(da, var): - return da.rio.reproject( + da.encoding['_FillValue'] = np.nan + da = da.rio.reproject( projection_model._crs, resampling=Resampling[resampling_dict[var]], shape=(dim, dim), transform=dst_transform, ) + return da # create the data array for each level plevels[lkey] = xr.Dataset(attrs=ds.attrs) diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index 3be0007..e5f60be 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -28,13 +28,23 @@ def test_reprojected_pyramid(temperature, benchmark): pytest.importorskip('rioxarray') levels = 2 temperature = temperature.rio.write_crs('EPSG:4326') - pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=2)) + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=levels)) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == levels assert pyramid.ds.attrs['multiscales'][0]['datasets'][0]['crs'] == 'EPSG:3857' pyramid.to_zarr(MemoryStore()) +def test_reprojected_pyramid_fill(temperature, benchmark): + """ + Test for https://github.com/carbonplan/ndpyramid/issues/93. + """ + pytest.importorskip('rioxarray') + temperature = temperature.rio.write_crs('EPSG:4326') + pyramid = benchmark(lambda: pyramid_reproject(temperature, levels=1)) + assert np.isnan(pyramid['0'].air.isel(time=0, x=0, y=0).values) + + @pytest.mark.parametrize('regridder_apply_kws', [None, {'keep_attrs': False}]) def test_regridded_pyramid(temperature, regridder_apply_kws, benchmark): pytest.importorskip('xesmf')