From d600fdaa4cd48c6fe335d37b109efe4c1c4e5278 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Thu, 16 May 2019 22:42:28 +0300 Subject: [PATCH 01/31] Add averaging bucket resampler --- satpy/resample.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/satpy/resample.py b/satpy/resample.py index 3b575cd146..7c3086f852 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -6,6 +6,7 @@ # Author(s): # # Martin Raspaud +# Panu Lahtinen # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -142,6 +143,7 @@ from pyresample.geometry import SwathDefinition, AreaDefinition from pyresample.kd_tree import XArrayResamplerNN from pyresample.bilinear.xarr import XArrayResamplerBilinear +from pyresample import bucket from satpy import CHUNK_SIZE from satpy.config import config_search_paths, get_config_path @@ -823,11 +825,75 @@ def compute(self, data, expand=True, **kwargs): coords=coords or None) +class BucketResampler(BaseResampler): + """Base class for bucket resampling which implements averaging. + + This resampler implements on-disk caching when the `cache_dir` argument + is provided to the `resample` method. This should provide significant + performance improvements on consecutive resampling of geostationary data. + + Args: + cache_dir (str): Long term storage directory for intermediate + results. By default only 10 different source/target + combinations are cached to save space. + + """ + + def __init__(self, source_geo_def, target_geo_def): + super(BucketResampler, self).__init__(source_geo_def, target_geo_def) + self._cache = {} + + def precompute(self, **kwargs): + """Create a X and Y indices and store them for later use. + """ + LOG.debug("Computing resampling indices") + lons, lats = self.source_geo_def.get_lonlats() + x_idxs, y_idxs = bucket.get_bucket_indices(self.target_geo_def, + lons, lats) + self._cache = {'x_idxs': x_idxs, 'y_idxs': y_idxs} + + def compute(self, data, fill_value=np.nan, **kwargs): + fill_value = kwargs.get('fill_value', np.nan) + LOG.debug("Resampling %s", str(data.name)) + x_idxs = self._cache.get('x_idxs', None) + y_idxs = self._cache.get('y_idxs', None) + res = bucket.resample_bucket_average( + data, fill_value=fill_value, x_idxs=x_idxs, y_idxs=y_idxs, + target_shape=self.target_geo_def.shape) + + return res + + def resample(self, data, **kwargs): + """Resample `data` by calling `precompute` and `compute` methods. + + Args: + data (xarray.DataArray): Data to be resampled + + Returns (xarray.DataArray): Data resampled to the target area + + """ + cache_id = self.precompute(**kwargs) + result = self.compute(data, **kwargs) + if data.ndim == 3 and data.dims[0] == 'bands': + dims = ('bands', 'y', 'x') + elif data.ndim == 2: + dims = ('y', 'x') + else: + dims = data.dims + result = xr.DataArray(result, dims=dims, + attrs=data.attrs.copy()) + return result + + RESAMPLERS = {"kd_tree": KDTreeResampler, "nearest": KDTreeResampler, "ewa": EWAResampler, "bilinear": BilinearResampler, "native": NativeResampler, + "bucket_avg": BucketResampler, + #"bucket_sum": BucketSum, + #"bucket_count": BucketCount, + #"bucket_fraction": BucketFraction, } From 85a18094ec0dc52a3842c5cd57a941d73e4fbf30 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Thu, 16 May 2019 23:34:02 +0300 Subject: [PATCH 02/31] Make bucket resampling work also with 3D datasets --- satpy/resample.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 7c3086f852..14437e6267 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -857,11 +857,22 @@ def compute(self, data, fill_value=np.nan, **kwargs): LOG.debug("Resampling %s", str(data.name)) x_idxs = self._cache.get('x_idxs', None) y_idxs = self._cache.get('y_idxs', None) - res = bucket.resample_bucket_average( - data, fill_value=fill_value, x_idxs=x_idxs, y_idxs=y_idxs, - target_shape=self.target_geo_def.shape) + results = [] + if data.ndim == 3: + for i in range(data.shape[0]): + res = bucket.resample_bucket_average( + data[i, :, :], fill_value=fill_value, + x_idxs=x_idxs, y_idxs=y_idxs, + target_shape=self.target_geo_def.shape) + results.append(res) + else: + res = bucket.resample_bucket_average( + data, fill_value=fill_value, + x_idxs=x_idxs, y_idxs=y_idxs, + target_shape=self.target_geo_def.shape) + results.append(res) - return res + return da.stack(results) def resample(self, data, **kwargs): """Resample `data` by calling `precompute` and `compute` methods. @@ -873,15 +884,18 @@ def resample(self, data, **kwargs): """ cache_id = self.precompute(**kwargs) - result = self.compute(data, **kwargs) + attrs = data.attrs.copy() + data_arr = data.data if data.ndim == 3 and data.dims[0] == 'bands': dims = ('bands', 'y', 'x') elif data.ndim == 2: dims = ('y', 'x') else: dims = data.dims - result = xr.DataArray(result, dims=dims, - attrs=data.attrs.copy()) + + result = da.squeeze(self.compute(data_arr, **kwargs)) + result = xr.DataArray(result, dims=dims, coords=data.coords, + attrs=attrs) return result From 880a11fd8a3a0ff73b37bd406b89ff23d72e3eb2 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 17 May 2019 00:10:52 +0300 Subject: [PATCH 03/31] Add accumulative bucket resampling --- satpy/resample.py | 49 +++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 14437e6267..0986f48cc6 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -826,26 +826,14 @@ def compute(self, data, expand=True, **kwargs): class BucketResampler(BaseResampler): - """Base class for bucket resampling which implements averaging. - - This resampler implements on-disk caching when the `cache_dir` argument - is provided to the `resample` method. This should provide significant - performance improvements on consecutive resampling of geostationary data. - - Args: - cache_dir (str): Long term storage directory for intermediate - results. By default only 10 different source/target - combinations are cached to save space. - - """ + """Base class for bucket resampling which implements averaging.""" def __init__(self, source_geo_def, target_geo_def): super(BucketResampler, self).__init__(source_geo_def, target_geo_def) self._cache = {} def precompute(self, **kwargs): - """Create a X and Y indices and store them for later use. - """ + """Create X and Y indices and store them for later use.""" LOG.debug("Computing resampling indices") lons, lats = self.source_geo_def.get_lonlats() x_idxs, y_idxs = bucket.get_bucket_indices(self.target_geo_def, @@ -853,6 +841,7 @@ def precompute(self, **kwargs): self._cache = {'x_idxs': x_idxs, 'y_idxs': y_idxs} def compute(self, data, fill_value=np.nan, **kwargs): + """Call the resampling.""" fill_value = kwargs.get('fill_value', np.nan) LOG.debug("Resampling %s", str(data.name)) x_idxs = self._cache.get('x_idxs', None) @@ -883,7 +872,7 @@ def resample(self, data, **kwargs): Returns (xarray.DataArray): Data resampled to the target area """ - cache_id = self.precompute(**kwargs) + self.precompute(**kwargs) attrs = data.attrs.copy() data_arr = data.data if data.ndim == 3 and data.dims[0] == 'bands': @@ -899,13 +888,41 @@ def resample(self, data, **kwargs): return result +class BucketSum(BucketResampler): + """Base class for bucket resampling which implements averaging.""" + + def __init__(self, source_geo_def, target_geo_def): + super(BucketSum, self).__init__(source_geo_def, target_geo_def) + self._cache = {} + + def compute(self, data, fill_value=np.nan, **kwargs): + """Call the resampling.""" + fill_value = kwargs.get('fill_value', np.nan) + LOG.debug("Resampling %s", str(data.name)) + x_idxs = self._cache.get('x_idxs', None) + y_idxs = self._cache.get('y_idxs', None) + results = [] + if data.ndim == 3: + for i in range(data.shape[0]): + res = bucket.get_sum_from_bucket_indices( + data[i, :, :], x_idxs, y_idxs, + self.target_geo_def.shape) + results.append(res) + else: + res = bucket.get_sum_from_bucket_indices( + data, x_idxs, y_idxs, self.target_geo_def.shape) + results.append(res) + + return da.stack(results) + + RESAMPLERS = {"kd_tree": KDTreeResampler, "nearest": KDTreeResampler, "ewa": EWAResampler, "bilinear": BilinearResampler, "native": NativeResampler, "bucket_avg": BucketResampler, - #"bucket_sum": BucketSum, + "bucket_sum": BucketSum, #"bucket_count": BucketCount, #"bucket_fraction": BucketFraction, } From 7aaed7821b1c0a365cf5e797f13487d3df93b34b Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 17 May 2019 00:13:23 +0300 Subject: [PATCH 04/31] Fix doc string for BucketSum class --- satpy/resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 0986f48cc6..5aa1186908 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -889,7 +889,7 @@ def resample(self, data, **kwargs): class BucketSum(BucketResampler): - """Base class for bucket resampling which implements averaging.""" + """Class for bucket resampling which implements accumulation (sum).""" def __init__(self, source_geo_def, target_geo_def): super(BucketSum, self).__init__(source_geo_def, target_geo_def) @@ -923,7 +923,7 @@ def compute(self, data, fill_value=np.nan, **kwargs): "native": NativeResampler, "bucket_avg": BucketResampler, "bucket_sum": BucketSum, - #"bucket_count": BucketCount, + "bucket_count": BucketCount, #"bucket_fraction": BucketFraction, } From fb090fbf877b6bbe8dc9a7deed513468bbd1d664 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 17 May 2019 00:16:17 +0300 Subject: [PATCH 05/31] Remove unused fill_value from BucketSum.compute() --- satpy/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/resample.py b/satpy/resample.py index 5aa1186908..b977e3779c 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -895,7 +895,7 @@ def __init__(self, source_geo_def, target_geo_def): super(BucketSum, self).__init__(source_geo_def, target_geo_def) self._cache = {} - def compute(self, data, fill_value=np.nan, **kwargs): + def compute(self, data, **kwargs): """Call the resampling.""" fill_value = kwargs.get('fill_value', np.nan) LOG.debug("Resampling %s", str(data.name)) From 7784e001e741b1474ead9e2091060893390cbf32 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 17 May 2019 00:22:06 +0300 Subject: [PATCH 06/31] Add bucket resampler that returns the number of values in each bin --- satpy/resample.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/satpy/resample.py b/satpy/resample.py index b977e3779c..0c1c81aaa7 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -897,7 +897,6 @@ def __init__(self, source_geo_def, target_geo_def): def compute(self, data, **kwargs): """Call the resampling.""" - fill_value = kwargs.get('fill_value', np.nan) LOG.debug("Resampling %s", str(data.name)) x_idxs = self._cache.get('x_idxs', None) y_idxs = self._cache.get('y_idxs', None) @@ -916,6 +915,34 @@ def compute(self, data, **kwargs): return da.stack(results) +class BucketCount(BucketResampler): + """Class for bucket resampling which implements hit-counting.""" + + def __init__(self, source_geo_def, target_geo_def): + super(BucketCount, self).__init__(source_geo_def, target_geo_def) + self._cache = {} + + def compute(self, data, **kwargs): + """Call the resampling.""" + LOG.debug("Resampling %s", str(data.name)) + x_idxs = self._cache.get('x_idxs', None) + y_idxs = self._cache.get('y_idxs', None) + results = [] + if data.ndim == 3: + for i in range(data.shape[0]): + res = bucket.get_count_from_bucket_indices( + x_idxs, y_idxs, + self.target_geo_def.shape) + results.append(res) + else: + res = bucket.get_count_from_bucket_indices( + x_idxs, y_idxs, self.target_geo_def.shape) + results.append(res) + + return da.stack(results) + + + RESAMPLERS = {"kd_tree": KDTreeResampler, "nearest": KDTreeResampler, "ewa": EWAResampler, From 1bc7aca1f7731d7c6fa231632dd1b2fd1d3b6a04 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 17 May 2019 00:51:58 +0300 Subject: [PATCH 07/31] Remove placeholder for bucket_fraction resampler --- satpy/resample.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 0c1c81aaa7..691f0769fa 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -942,7 +942,6 @@ def compute(self, data, **kwargs): return da.stack(results) - RESAMPLERS = {"kd_tree": KDTreeResampler, "nearest": KDTreeResampler, "ewa": EWAResampler, @@ -951,7 +950,6 @@ def compute(self, data, **kwargs): "bucket_avg": BucketResampler, "bucket_sum": BucketSum, "bucket_count": BucketCount, - #"bucket_fraction": BucketFraction, } From 3c825fab09cf37321b59bd598c7f49ee244d68c6 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 17 May 2019 22:40:54 +0300 Subject: [PATCH 08/31] Adjust to use pyresample.bucket.BucketResampler class --- satpy/resample.py | 53 ++++++++++++----------------------------------- 1 file changed, 13 insertions(+), 40 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 691f0769fa..a437a7ba14 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -830,35 +830,27 @@ class BucketResampler(BaseResampler): def __init__(self, source_geo_def, target_geo_def): super(BucketResampler, self).__init__(source_geo_def, target_geo_def) - self._cache = {} + self.resampler = None def precompute(self, **kwargs): """Create X and Y indices and store them for later use.""" - LOG.debug("Computing resampling indices") - lons, lats = self.source_geo_def.get_lonlats() - x_idxs, y_idxs = bucket.get_bucket_indices(self.target_geo_def, - lons, lats) - self._cache = {'x_idxs': x_idxs, 'y_idxs': y_idxs} + LOG.debug("Initializing bucket resampler.") + source_lons, source_lats = self.source_geo_def.get_lonlats() + self.resampler = bucket.BucketResampler(self.target_geo_def, + source_lons, + source_lats) def compute(self, data, fill_value=np.nan, **kwargs): """Call the resampling.""" fill_value = kwargs.get('fill_value', np.nan) - LOG.debug("Resampling %s", str(data.name)) - x_idxs = self._cache.get('x_idxs', None) - y_idxs = self._cache.get('y_idxs', None) results = [] if data.ndim == 3: for i in range(data.shape[0]): - res = bucket.resample_bucket_average( - data[i, :, :], fill_value=fill_value, - x_idxs=x_idxs, y_idxs=y_idxs, - target_shape=self.target_geo_def.shape) + res = self.resampler.get_average(data[i, :, :], + fill_value=fill_value) results.append(res) else: - res = bucket.resample_bucket_average( - data, fill_value=fill_value, - x_idxs=x_idxs, y_idxs=y_idxs, - target_shape=self.target_geo_def.shape) + res = self.resampler.get_average(data, fill_value=fill_value) results.append(res) return da.stack(results) @@ -870,7 +862,6 @@ def resample(self, data, **kwargs): data (xarray.DataArray): Data to be resampled Returns (xarray.DataArray): Data resampled to the target area - """ self.precompute(**kwargs) attrs = data.attrs.copy() @@ -891,25 +882,16 @@ def resample(self, data, **kwargs): class BucketSum(BucketResampler): """Class for bucket resampling which implements accumulation (sum).""" - def __init__(self, source_geo_def, target_geo_def): - super(BucketSum, self).__init__(source_geo_def, target_geo_def) - self._cache = {} - def compute(self, data, **kwargs): """Call the resampling.""" LOG.debug("Resampling %s", str(data.name)) - x_idxs = self._cache.get('x_idxs', None) - y_idxs = self._cache.get('y_idxs', None) results = [] if data.ndim == 3: for i in range(data.shape[0]): - res = bucket.get_sum_from_bucket_indices( - data[i, :, :], x_idxs, y_idxs, - self.target_geo_def.shape) + res = self.resampler.get_sum(data[i, :, :]) results.append(res) else: - res = bucket.get_sum_from_bucket_indices( - data, x_idxs, y_idxs, self.target_geo_def.shape) + res = self.resampler.get_sum(data) results.append(res) return da.stack(results) @@ -918,25 +900,16 @@ def compute(self, data, **kwargs): class BucketCount(BucketResampler): """Class for bucket resampling which implements hit-counting.""" - def __init__(self, source_geo_def, target_geo_def): - super(BucketCount, self).__init__(source_geo_def, target_geo_def) - self._cache = {} - def compute(self, data, **kwargs): """Call the resampling.""" LOG.debug("Resampling %s", str(data.name)) - x_idxs = self._cache.get('x_idxs', None) - y_idxs = self._cache.get('y_idxs', None) results = [] if data.ndim == 3: for i in range(data.shape[0]): - res = bucket.get_count_from_bucket_indices( - x_idxs, y_idxs, - self.target_geo_def.shape) + res = self.resampler.get_count() results.append(res) else: - res = bucket.get_count_from_bucket_indices( - x_idxs, y_idxs, self.target_geo_def.shape) + res = self.resampler.get_count() results.append(res) return da.stack(results) From 4e2939ce6883acb5dbc1adf503243156fdbd1c74 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 17 May 2019 23:09:29 +0300 Subject: [PATCH 09/31] Fix mock import --- satpy/tests/reader_tests/test_avhrr_l1b_gaclac.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_avhrr_l1b_gaclac.py b/satpy/tests/reader_tests/test_avhrr_l1b_gaclac.py index b8ec45884f..ad6194dbc7 100644 --- a/satpy/tests/reader_tests/test_avhrr_l1b_gaclac.py +++ b/satpy/tests/reader_tests/test_avhrr_l1b_gaclac.py @@ -18,7 +18,11 @@ from unittest import TestCase, main, TestLoader, TestSuite import numpy as np -import mock +try: + import unittest.mock as mock +except ImportError: + # separate mock package py<3.3 + import mock GAC_PATTERN = 'NSS.GHRR.{platform_id:2s}.D{start_time:%y%j.S%H%M}.E{end_time:%H%M}.B{orbit_number:05d}{end_orbit_last_digits:02d}.{station:2s}' # noqa From 3f028139d191075fe875cbfe0e6199a48ee0e92a Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Sat, 18 May 2019 21:17:56 +0300 Subject: [PATCH 10/31] Use explicit fill_value instead of **kwargs --- satpy/resample.py | 1 - 1 file changed, 1 deletion(-) diff --git a/satpy/resample.py b/satpy/resample.py index a437a7ba14..26cff02e52 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -842,7 +842,6 @@ def precompute(self, **kwargs): def compute(self, data, fill_value=np.nan, **kwargs): """Call the resampling.""" - fill_value = kwargs.get('fill_value', np.nan) results = [] if data.ndim == 3: for i in range(data.shape[0]): From 5ae01453286c1fb6864bc556a2825966756de58d Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 10:57:09 +0300 Subject: [PATCH 11/31] Adjust handling of dim names and array dimension adjustment --- satpy/resample.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 26cff02e52..7f14f48218 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -867,12 +867,15 @@ def resample(self, data, **kwargs): data_arr = data.data if data.ndim == 3 and data.dims[0] == 'bands': dims = ('bands', 'y', 'x') - elif data.ndim == 2: + # Both one and two dimensional input data results in 2D output + elif data.ndim in (1, 2): dims = ('y', 'x') else: dims = data.dims - result = da.squeeze(self.compute(data_arr, **kwargs)) + result = self.compute(data_arr, **kwargs) + if result.ndim > len(dims): + result = da.squeeze(result) result = xr.DataArray(result, dims=dims, coords=data.coords, attrs=attrs) return result From 66e45ef2a4f2c560f3c5653c9685f10bbfe62b5b Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 10:58:14 +0300 Subject: [PATCH 12/31] Add tests to satpy.resample.BcuketResampler --- satpy/tests/test_resample.py | 93 ++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index bac2d107aa..8604807249 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -479,6 +479,99 @@ def close(self): shutil.rmtree(the_dir) +class TestBucketResampler(unittest.TestCase): + """Test the bucket resamplers.""" + + def setUp(self): + from satpy.resample import BucketResampler + get_lonlats = mock.MagicMock() + get_lonlats.return_value = (1, 2) + self.source_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.target_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.bucket = BucketResampler(self.source_geo_def, self.target_geo_def) + + def test_init(self): + """Test bucket resampler initialization""" + self.assertIsNone(self.bucket.resampler) + self.assertTrue(self.bucket.source_geo_def == self.source_geo_def) + self.assertTrue(self.bucket.target_geo_def == self.target_geo_def) + + @mock.patch('pyresample.bucket.BucketResampler') + def test_precompute(self, bucket): + """Test bucket resampler precomputation""" + bucket.return_value = True + self.bucket.precompute() + self.assertTrue(self.bucket.resampler) + bucket.assert_called_once_with(self.target_geo_def, 1, 2) + + def test_compute(self): + """Test bucket resampler computation.""" + import dask.array as da + # 1D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((5,)) + self.bucket.resampler.get_average.return_value = data + res = self.bucket.compute(data, fill_value=2) + self.bucket.resampler.get_average.assert_called_once_with(data, + fill_value=2) + self.assertEqual(res.shape, (1, 5)) + # 2D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((5, 5)) + self.bucket.resampler.get_average.return_value = data + res = self.bucket.compute(data, fill_value=2) + self.bucket.resampler.get_average.assert_called_once_with(data, + fill_value=2) + self.assertEqual(res.shape, (1, 5, 5)) + # 3D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((3, 5, 5)) + self.bucket.resampler.get_average.return_value = data[0, :, :] + res = self.bucket.compute(data, fill_value=2) + self.assertEqual(res.shape, (3, 5, 5)) + + @mock.patch('pyresample.bucket.BucketResampler') + def test_resample(self, pyresample_bucket): + """Test bucket resamplers resample method.""" + import xarray as xr + import dask.array as da + self.bucket.resampler = mock.MagicMock() + self.bucket.precompute = mock.MagicMock() + self.bucket.compute = mock.MagicMock() + + # 1D input data + data = xr.DataArray(da.ones((5,)), dims=('foo'), attrs={'bar': 'baz'}) + self.bucket.compute.return_value = da.ones((5, 5)) + res = self.bucket.resample(data) + self.bucket.precompute.assert_called_once() + self.bucket.compute.assert_called_once() + self.assertEqual(res.shape, (5, 5)) + self.assertEqual(res.dims, ('y', 'x')) + self.assertTrue('bar' in res.attrs) + self.assertEqual(res.attrs['bar'], 'baz') + + # 2D input data + data = xr.DataArray(da.ones((5, 5)), dims=('foo', 'bar')) + self.bucket.compute.return_value = da.ones((5, 5)) + res = self.bucket.resample(data) + self.assertEqual(res.shape, (5, 5)) + self.assertEqual(res.dims, ('y', 'x')) + + # 3D input data with 'bands' dim + data = xr.DataArray(da.ones((1, 5, 5)), dims=('bands', 'foo', 'bar')) + self.bucket.compute.return_value = da.ones((1, 5, 5)) + res = self.bucket.resample(data) + self.assertEqual(res.shape, (1, 5, 5)) + self.assertEqual(res.dims, ('bands', 'y', 'x')) + + # 3D input data with misc dim names + data = xr.DataArray(da.ones((3, 5, 5)), dims=('foo', 'bar', 'baz')) + self.bucket.compute.return_value = da.ones((3, 5, 5)) + res = self.bucket.resample(data) + self.assertEqual(res.shape, (3, 5, 5)) + self.assertEqual(res.dims, ('foo', 'bar', 'baz')) + + def suite(): """The test suite for test_scene. """ From dd8036e940e987a275d12b16b7154023dd5e25e2 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 11:10:28 +0300 Subject: [PATCH 13/31] Add unittests for `BucketSum` and `BucketCount` resamplers --- satpy/tests/test_resample.py | 74 +++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 8604807249..5ca3cf2ca5 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -480,7 +480,7 @@ def close(self): class TestBucketResampler(unittest.TestCase): - """Test the bucket resamplers.""" + """Test the bucket resampler.""" def setUp(self): from satpy.resample import BucketResampler @@ -572,6 +572,78 @@ def test_resample(self, pyresample_bucket): self.assertEqual(res.dims, ('foo', 'bar', 'baz')) +class TestBucketSum(unittest.TestCase): + """Test the sum bucket resampler.""" + + def setUp(self): + from satpy.resample import BucketSum + get_lonlats = mock.MagicMock() + get_lonlats.return_value = (1, 2) + self.source_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.target_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.bucket = BucketSum(self.source_geo_def, self.target_geo_def) + + def test_compute(self): + """Test sum bucket resampler computation.""" + import dask.array as da + # 1D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((5,)) + self.bucket.resampler.get_sum.return_value = data + res = self.bucket.compute(data) + self.bucket.resampler.get_sum.assert_called_once_with(data) + self.assertEqual(res.shape, (1, 5)) + # 2D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((5, 5)) + self.bucket.resampler.get_sum.return_value = data + res = self.bucket.compute(data) + self.bucket.resampler.get_sum.assert_called_once_with(data) + self.assertEqual(res.shape, (1, 5, 5)) + # 3D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((3, 5, 5)) + self.bucket.resampler.get_sum.return_value = data[0, :, :] + res = self.bucket.compute(data) + self.assertEqual(res.shape, (3, 5, 5)) + + +class TestBucketCount(unittest.TestCase): + """Test the sum bucket resampler.""" + + def setUp(self): + from satpy.resample import BucketCount + get_lonlats = mock.MagicMock() + get_lonlats.return_value = (1, 2) + self.source_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.target_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.bucket = BucketCount(self.source_geo_def, self.target_geo_def) + + def test_compute(self): + """Test sum bucket resampler computation.""" + import dask.array as da + # 1D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((5,)) + self.bucket.resampler.get_count.return_value = data + res = self.bucket.compute(data) + self.bucket.resampler.get_count.assert_called_once_with() + self.assertEqual(res.shape, (1, 5)) + # 2D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((5, 5)) + self.bucket.resampler.get_count.return_value = data + res = self.bucket.compute(data) + self.bucket.resampler.get_count.assert_called_once_with() + self.assertEqual(res.shape, (1, 5, 5)) + # 3D data + self.bucket.resampler = mock.MagicMock() + data = da.ones((3, 5, 5)) + self.bucket.resampler.get_count.return_value = data[0, :, :] + res = self.bucket.compute(data) + self.assertEqual(res.shape, (3, 5, 5)) + + def suite(): """The test suite for test_scene. """ From 7ff2a35a2d535f9727076a13a30b749b13f02a06 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 11:12:26 +0300 Subject: [PATCH 14/31] Add bucket resampler tests to suite --- satpy/tests/test_resample.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 5ca3cf2ca5..73fe6bf5d9 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -654,6 +654,9 @@ def suite(): mysuite.addTest(loader.loadTestsFromTestCase(TestEWAResampler)) mysuite.addTest(loader.loadTestsFromTestCase(TestHLResample)) mysuite.addTest(loader.loadTestsFromTestCase(TestBilinearResampler)) + mysuite.addTest(loader.loadTestsFromTestCase(TestBucketResampler)) + mysuite.addTest(loader.loadTestsFromTestCase(TestBucketSum)) + mysuite.addTest(loader.loadTestsFromTestCase(TestBucketCount)) return mysuite From 3e321b02dabdca506fb5e23e428af4d693ff69e0 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 11:29:22 +0300 Subject: [PATCH 15/31] Add some documentation on bucket resamplers --- satpy/resample.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 7f14f48218..f3eeb6579b 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -40,6 +40,9 @@ "ewa", "Elliptical Weighted Averaging", :class:`~satpy.resample.EWAResampler` "native", "Native", :class:`~satpy.resample.NativeResampler` "bilinear", "Bilinear", :class:`~satpy.resample.BilinearResampler` + "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.BucketResampler` + "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.BucketSum` + "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.BucketCount` The resampling algorithm used can be specified with the ``resampler`` keyword argument and defaults to ``nearest``: @@ -826,7 +829,11 @@ def compute(self, data, expand=True, **kwargs): class BucketResampler(BaseResampler): - """Base class for bucket resampling which implements averaging.""" + """Base class for bucket resampling which implements averaging. + + Bucket resampling calculates the average of all the values that + are closest to each bin and inside the target area. + """ def __init__(self, source_geo_def, target_geo_def): super(BucketResampler, self).__init__(source_geo_def, target_geo_def) @@ -882,7 +889,11 @@ def resample(self, data, **kwargs): class BucketSum(BucketResampler): - """Class for bucket resampling which implements accumulation (sum).""" + """Class for bucket resampling which implements accumulation (sum). + + This resampler calculates the cumulative sum of all the values + that are closest to each bin and inside the target area. + """ def compute(self, data, **kwargs): """Call the resampling.""" @@ -900,7 +911,11 @@ def compute(self, data, **kwargs): class BucketCount(BucketResampler): - """Class for bucket resampling which implements hit-counting.""" + """Class for bucket resampling which implements hit-counting. + + This resampler calculates the number of occurences of the input + data closest to each bin and inside the target area. + """ def compute(self, data, **kwargs): """Call the resampling.""" From 603eb8918e2b352c05ea4b26541034af538882cb Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 11:37:39 +0300 Subject: [PATCH 16/31] Expose fill_value and mask_all_nan kwargs for bucket resamplers --- satpy/resample.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index f3eeb6579b..c67c50adf3 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -833,6 +833,13 @@ class BucketResampler(BaseResampler): Bucket resampling calculates the average of all the values that are closest to each bin and inside the target area. + + Parameters + ---------- + fill_value : float (default: np.nan) + Fill value for missing data + mask_all_nans : boolean (default: False) + Mask all locations with all-NaN values """ def __init__(self, source_geo_def, target_geo_def): @@ -847,16 +854,18 @@ def precompute(self, **kwargs): source_lons, source_lats) - def compute(self, data, fill_value=np.nan, **kwargs): + def compute(self, data, fill_value=np.nan, mask_all_nan=False, **kwargs): """Call the resampling.""" results = [] if data.ndim == 3: for i in range(data.shape[0]): res = self.resampler.get_average(data[i, :, :], - fill_value=fill_value) + fill_value=fill_value, + mask_all_nan=mask_all_nan) results.append(res) else: - res = self.resampler.get_average(data, fill_value=fill_value) + res = self.resampler.get_average(data, fill_value=fill_value, + mask_all_nan=mask_all_nan) results.append(res) return da.stack(results) @@ -893,18 +902,26 @@ class BucketSum(BucketResampler): This resampler calculates the cumulative sum of all the values that are closest to each bin and inside the target area. + + Parameters + ---------- + fill_value : float (default: np.nan) + Fill value for missing data + mask_all_nans : boolean (default: False) + Mask all locations with all-NaN values """ - def compute(self, data, **kwargs): + def compute(self, data, mask_all_nan=False, **kwargs): """Call the resampling.""" LOG.debug("Resampling %s", str(data.name)) results = [] if data.ndim == 3: for i in range(data.shape[0]): - res = self.resampler.get_sum(data[i, :, :]) + res = self.resampler.get_sum(data[i, :, :], + mask_all_nan=mask_all_nan) results.append(res) else: - res = self.resampler.get_sum(data) + res = self.resampler.get_sum(data, mask_all_nan=mask_all_nan) results.append(res) return da.stack(results) From 5b39349b19ef26b149a8b5ac79fb0f1618f6bbaa Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 12:41:00 +0300 Subject: [PATCH 17/31] Rename BucketResampler to BucketResamplerBase, sub-class Avg, adjust tests --- satpy/resample.py | 69 ++++++++++++++++++++---------------- satpy/tests/test_resample.py | 28 +++++++++------ 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index c67c50adf3..a5063e86e3 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -40,7 +40,7 @@ "ewa", "Elliptical Weighted Averaging", :class:`~satpy.resample.EWAResampler` "native", "Native", :class:`~satpy.resample.NativeResampler` "bilinear", "Bilinear", :class:`~satpy.resample.BilinearResampler` - "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.BucketResampler` + "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.BucketAvg` "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.BucketSum` "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.BucketCount` @@ -828,22 +828,12 @@ def compute(self, data, expand=True, **kwargs): coords=coords or None) -class BucketResampler(BaseResampler): +class BucketResamplerBase(BaseResampler): """Base class for bucket resampling which implements averaging. - - Bucket resampling calculates the average of all the values that - are closest to each bin and inside the target area. - - Parameters - ---------- - fill_value : float (default: np.nan) - Fill value for missing data - mask_all_nans : boolean (default: False) - Mask all locations with all-NaN values """ def __init__(self, source_geo_def, target_geo_def): - super(BucketResampler, self).__init__(source_geo_def, target_geo_def) + super(BucketResamplerBase, self).__init__(source_geo_def, target_geo_def) self.resampler = None def precompute(self, **kwargs): @@ -854,21 +844,9 @@ def precompute(self, **kwargs): source_lons, source_lats) - def compute(self, data, fill_value=np.nan, mask_all_nan=False, **kwargs): + def compute(self, data, **kwargs): """Call the resampling.""" - results = [] - if data.ndim == 3: - for i in range(data.shape[0]): - res = self.resampler.get_average(data[i, :, :], - fill_value=fill_value, - mask_all_nan=mask_all_nan) - results.append(res) - else: - res = self.resampler.get_average(data, fill_value=fill_value, - mask_all_nan=mask_all_nan) - results.append(res) - - return da.stack(results) + raise NotImplementedError("Use the sub-classes") def resample(self, data, **kwargs): """Resample `data` by calling `precompute` and `compute` methods. @@ -897,7 +875,38 @@ def resample(self, data, **kwargs): return result -class BucketSum(BucketResampler): +class BucketAvg(BucketResamplerBase): + """Class for averaging bucket resampling. + + Bucket resampling calculates the average of all the values that + are closest to each bin and inside the target area. + + Parameters + ---------- + fill_value : float (default: np.nan) + Fill value for missing data + mask_all_nans : boolean (default: False) + Mask all locations with all-NaN values + """ + + def compute(self, data, fill_value=np.nan, mask_all_nan=False, **kwargs): + """Call the resampling.""" + results = [] + if data.ndim == 3: + for i in range(data.shape[0]): + res = self.resampler.get_average(data[i, :, :], + fill_value=fill_value, + mask_all_nan=mask_all_nan) + results.append(res) + else: + res = self.resampler.get_average(data, fill_value=fill_value, + mask_all_nan=mask_all_nan) + results.append(res) + + return da.stack(results) + + +class BucketSum(BucketResamplerBase): """Class for bucket resampling which implements accumulation (sum). This resampler calculates the cumulative sum of all the values @@ -927,7 +936,7 @@ def compute(self, data, mask_all_nan=False, **kwargs): return da.stack(results) -class BucketCount(BucketResampler): +class BucketCount(BucketResamplerBase): """Class for bucket resampling which implements hit-counting. This resampler calculates the number of occurences of the input @@ -954,7 +963,7 @@ def compute(self, data, **kwargs): "ewa": EWAResampler, "bilinear": BilinearResampler, "native": NativeResampler, - "bucket_avg": BucketResampler, + "bucket_avg": BucketAvg, "bucket_sum": BucketSum, "bucket_count": BucketCount, } diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 73fe6bf5d9..6c55ef6d09 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -479,16 +479,16 @@ def close(self): shutil.rmtree(the_dir) -class TestBucketResampler(unittest.TestCase): +class TestBucketAvg(unittest.TestCase): """Test the bucket resampler.""" def setUp(self): - from satpy.resample import BucketResampler + from satpy.resample import BucketAvg get_lonlats = mock.MagicMock() get_lonlats.return_value = (1, 2) self.source_geo_def = mock.MagicMock(get_lonlats=get_lonlats) self.target_geo_def = mock.MagicMock(get_lonlats=get_lonlats) - self.bucket = BucketResampler(self.source_geo_def, self.target_geo_def) + self.bucket = BucketAvg(self.source_geo_def, self.target_geo_def) def test_init(self): """Test bucket resampler initialization""" @@ -512,16 +512,20 @@ def test_compute(self): data = da.ones((5,)) self.bucket.resampler.get_average.return_value = data res = self.bucket.compute(data, fill_value=2) - self.bucket.resampler.get_average.assert_called_once_with(data, - fill_value=2) + self.bucket.resampler.get_average.assert_called_once_with( + data, + fill_value=2, + mask_all_nan=False) self.assertEqual(res.shape, (1, 5)) # 2D data self.bucket.resampler = mock.MagicMock() data = da.ones((5, 5)) self.bucket.resampler.get_average.return_value = data res = self.bucket.compute(data, fill_value=2) - self.bucket.resampler.get_average.assert_called_once_with(data, - fill_value=2) + self.bucket.resampler.get_average.assert_called_once_with( + data, + fill_value=2, + mask_all_nan=False) self.assertEqual(res.shape, (1, 5, 5)) # 3D data self.bucket.resampler = mock.MagicMock() @@ -591,14 +595,18 @@ def test_compute(self): data = da.ones((5,)) self.bucket.resampler.get_sum.return_value = data res = self.bucket.compute(data) - self.bucket.resampler.get_sum.assert_called_once_with(data) + self.bucket.resampler.get_sum.assert_called_once_with( + data, + mask_all_nan=False) self.assertEqual(res.shape, (1, 5)) # 2D data self.bucket.resampler = mock.MagicMock() data = da.ones((5, 5)) self.bucket.resampler.get_sum.return_value = data res = self.bucket.compute(data) - self.bucket.resampler.get_sum.assert_called_once_with(data) + self.bucket.resampler.get_sum.assert_called_once_with( + data, + mask_all_nan=False) self.assertEqual(res.shape, (1, 5, 5)) # 3D data self.bucket.resampler = mock.MagicMock() @@ -654,7 +662,7 @@ def suite(): mysuite.addTest(loader.loadTestsFromTestCase(TestEWAResampler)) mysuite.addTest(loader.loadTestsFromTestCase(TestHLResample)) mysuite.addTest(loader.loadTestsFromTestCase(TestBilinearResampler)) - mysuite.addTest(loader.loadTestsFromTestCase(TestBucketResampler)) + mysuite.addTest(loader.loadTestsFromTestCase(TestBucketAvg)) mysuite.addTest(loader.loadTestsFromTestCase(TestBucketSum)) mysuite.addTest(loader.loadTestsFromTestCase(TestBucketCount)) From 3b20e621947a47f7811a61ecd76f4f92f72c74a1 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Fri, 24 May 2019 12:41:42 +0300 Subject: [PATCH 18/31] Remove authors --- satpy/resample.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index a5063e86e3..6ffc2ff4e1 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -3,11 +3,6 @@ # # Copyright (c) 2015-2018 # -# Author(s): -# -# Martin Raspaud -# Panu Lahtinen -# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or From ef01ffde1be4d54fc9d76b1af22588007fd2e316 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 27 May 2019 10:44:31 +0300 Subject: [PATCH 19/31] Ensure coordinates are as dask arrays --- satpy/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/satpy/resample.py b/satpy/resample.py index e26024d685..e1bc64e576 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -834,7 +834,8 @@ def __init__(self, source_geo_def, target_geo_def): def precompute(self, **kwargs): """Create X and Y indices and store them for later use.""" LOG.debug("Initializing bucket resampler.") - source_lons, source_lats = self.source_geo_def.get_lonlats() + source_lons, source_lats = self.source_geo_def.get_lonlats( + chunks=CHUNK_SIZE) self.resampler = bucket.BucketResampler(self.target_geo_def, source_lons, source_lats) From ec0b70fedf420bbb243407cc50ae2eb81d779522 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 08:00:56 +0300 Subject: [PATCH 20/31] Remove coordinates from the returned xr.DataArray --- satpy/resample.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index e1bc64e576..9657158494 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -862,12 +862,10 @@ def resample(self, data, **kwargs): dims = ('y', 'x') else: dims = data.dims - result = self.compute(data_arr, **kwargs) if result.ndim > len(dims): result = da.squeeze(result) - result = xr.DataArray(result, dims=dims, coords=data.coords, - attrs=attrs) + result = xr.DataArray(result, dims=dims, attrs=attrs) return result From 85daf7deb4d887bf394d5b9a46bf5f561060cdd2 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 08:25:05 +0300 Subject: [PATCH 21/31] Add 'bands' coordinate to output xr.DataArray if it's present in input data --- satpy/resample.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/satpy/resample.py b/satpy/resample.py index 9657158494..8e4c764c50 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -862,10 +862,14 @@ def resample(self, data, **kwargs): dims = ('y', 'x') else: dims = data.dims + coords = {} + if 'bands' in data.coords: + coords['bands'] = data.coords['bands'] result = self.compute(data_arr, **kwargs) if result.ndim > len(dims): result = da.squeeze(result) - result = xr.DataArray(result, dims=dims, attrs=attrs) + result = xr.DataArray(result, dims=dims, coords=coords, + attrs=attrs) return result From dec29da601285c8382c1a931d96ff99eadce3328 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 08:25:54 +0300 Subject: [PATCH 22/31] Fix intendation --- satpy/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/resample.py b/satpy/resample.py index 8e4c764c50..962fe2b08c 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -864,7 +864,7 @@ def resample(self, data, **kwargs): dims = data.dims coords = {} if 'bands' in data.coords: - coords['bands'] = data.coords['bands'] + coords['bands'] = data.coords['bands'] result = self.compute(data_arr, **kwargs) if result.ndim > len(dims): result = da.squeeze(result) From ff1b40c3530978c7867ea7a622b2a1c7322a4554 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 10:54:49 +0300 Subject: [PATCH 23/31] Add BucketFraction resampler and unit tests --- satpy/resample.py | 30 +++++++++++++++- satpy/tests/test_resample.py | 67 ++++++++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index 962fe2b08c..9f489d05af 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -862,10 +862,18 @@ def resample(self, data, **kwargs): dims = ('y', 'x') else: dims = data.dims + result = self.compute(data_arr, **kwargs) coords = {} if 'bands' in data.coords: coords['bands'] = data.coords['bands'] - result = self.compute(data_arr, **kwargs) + # Fractions are returned in a dict + elif isinstance(result, dict): + coords['categories'] = sorted(result.keys()) + dims = ('categories', 'y', 'x') + new_result = [] + for cat in coords['categories']: + new_result.append(result[cat]) + result = da.stack(new_result) if result.ndim > len(dims): result = da.squeeze(result) result = xr.DataArray(result, dims=dims, coords=coords, @@ -956,6 +964,25 @@ def compute(self, data, **kwargs): return da.stack(results) +class BucketFraction(BucketResamplerBase): + """Class for bucket resampling to compute category fractions + + This resampler calculates the fraction of occurences of the input + data per category. + """ + + def compute(self, data, fill_value=np.nan, categories=None, **kwargs): + """Call the resampling.""" + LOG.debug("Resampling %s", str(data.name)) + if data.ndim > 2: + raise ValueError("BucketFraction not implemented for 3D datasets") + + result = self.resampler.get_fractions(data, categories=categories, + fill_value=fill_value) + + return result + + RESAMPLERS = {"kd_tree": KDTreeResampler, "nearest": KDTreeResampler, "ewa": EWAResampler, @@ -964,6 +991,7 @@ def compute(self, data, **kwargs): "bucket_avg": BucketAvg, "bucket_sum": BucketSum, "bucket_count": BucketCount, + "bucket_fraction": BucketFraction, } diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 6c55ef6d09..7709c8d558 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -562,11 +562,13 @@ def test_resample(self, pyresample_bucket): self.assertEqual(res.dims, ('y', 'x')) # 3D input data with 'bands' dim - data = xr.DataArray(da.ones((1, 5, 5)), dims=('bands', 'foo', 'bar')) + data = xr.DataArray(da.ones((1, 5, 5)), dims=('bands', 'foo', 'bar'), + coords={'bands': ['L']}) self.bucket.compute.return_value = da.ones((1, 5, 5)) res = self.bucket.resample(data) self.assertEqual(res.shape, (1, 5, 5)) self.assertEqual(res.dims, ('bands', 'y', 'x')) + self.assertEqual(res.coords['bands'], ['L']) # 3D input data with misc dim names data = xr.DataArray(da.ones((3, 5, 5)), dims=('foo', 'bar', 'baz')) @@ -617,7 +619,7 @@ def test_compute(self): class TestBucketCount(unittest.TestCase): - """Test the sum bucket resampler.""" + """Test the count bucket resampler.""" def setUp(self): from satpy.resample import BucketCount @@ -628,7 +630,7 @@ def setUp(self): self.bucket = BucketCount(self.source_geo_def, self.target_geo_def) def test_compute(self): - """Test sum bucket resampler computation.""" + """Test count bucket resampler computation.""" import dask.array as da # 1D data self.bucket.resampler = mock.MagicMock() @@ -652,6 +654,65 @@ def test_compute(self): self.assertEqual(res.shape, (3, 5, 5)) +class TestBucketFraction(unittest.TestCase): + """Test the fraction bucket resampler.""" + + def setUp(self): + from satpy.resample import BucketFraction + get_lonlats = mock.MagicMock() + get_lonlats.return_value = (1, 2) + self.source_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.target_geo_def = mock.MagicMock(get_lonlats=get_lonlats) + self.bucket = BucketFraction(self.source_geo_def, self.target_geo_def) + + def test_compute(self): + """Test fraction bucket resampler computation.""" + import dask.array as da + import numpy as np + + self.bucket.resampler = mock.MagicMock() + data = da.ones((3, 3)) + + # No kwargs given + res = self.bucket.compute(data) + self.bucket.resampler.get_fractions.assert_called_with( + data, + categories=None, + fill_value=np.nan) + # Custo kwargs + res = self.bucket.compute(data, categories=[1, 2], fill_value=0) + self.bucket.resampler.get_fractions.assert_called_with( + data, + categories=[1, 2], + fill_value=0) + + # Too many dimensions + data = da.ones((3, 5, 5)) + with self.assertRaises(ValueError): + res = self.bucket.compute(data) + + + @mock.patch('pyresample.bucket.BucketResampler') + def test_resample(self, pyresample_bucket): + """Test fraction bucket resamplers resample method.""" + import xarray as xr + import dask.array as da + import numpy as np + + self.bucket.resampler = mock.MagicMock() + self.bucket.precompute = mock.MagicMock() + self.bucket.compute = mock.MagicMock() + + # Fractions return a dict + data = xr.DataArray(da.ones((1, 5, 5)), dims=('bands', 'y', 'x')) + arr = da.ones((5, 5)) + self.bucket.compute.return_value = {0: arr, 1: arr, 2: arr} + res = self.bucket.resample(data) + self.assertTrue('categories' in res.coords) + self.assertTrue('categories' in res.dims) + self.assertTrue(np.all(res.coords['categories'] == np.array([0, 1, 2]))) + + def suite(): """The test suite for test_scene. """ From 5c4f33a3290631b1ce10a814e7101b68ac1fbcb0 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 10:58:16 +0300 Subject: [PATCH 24/31] Fix stickler complaints --- satpy/tests/test_resample.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 7709c8d558..b7a3f8011a 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -689,8 +689,7 @@ def test_compute(self): # Too many dimensions data = da.ones((3, 5, 5)) with self.assertRaises(ValueError): - res = self.bucket.compute(data) - + _ = self.bucket.compute(data) @mock.patch('pyresample.bucket.BucketResampler') def test_resample(self, pyresample_bucket): From e6e10641a34f268513b4933bdcec39859ddc4ef1 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 12:30:45 +0300 Subject: [PATCH 25/31] Fix stickler complaints --- satpy/tests/test_resample.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index b7a3f8011a..71ce76beb7 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -674,13 +674,13 @@ def test_compute(self): data = da.ones((3, 3)) # No kwargs given - res = self.bucket.compute(data) + _ = self.bucket.compute(data) self.bucket.resampler.get_fractions.assert_called_with( data, categories=None, fill_value=np.nan) - # Custo kwargs - res = self.bucket.compute(data, categories=[1, 2], fill_value=0) + # Custom kwargs + _ = self.bucket.compute(data, categories=[1, 2], fill_value=0) self.bucket.resampler.get_fractions.assert_called_with( data, categories=[1, 2], From b1c9c5e21f0a9f6d7e6817e6b32a277748704146 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 15:44:31 +0300 Subject: [PATCH 26/31] Adjust some attributes after bucket resampling --- satpy/resample.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/satpy/resample.py b/satpy/resample.py index 9f489d05af..e1dd6af1bc 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -876,8 +876,14 @@ def resample(self, data, **kwargs): result = da.stack(new_result) if result.ndim > len(dims): result = da.squeeze(result) + attrs['units'] = None + attrs['standard_name'] = 'area_fraction' + attrs['calibration'] = None + attrs['projection'] = self.target_geo_def + attrs['navigation'] = None result = xr.DataArray(result, dims=dims, coords=coords, attrs=attrs) + return result From e07b7a17ef443c19343e970adffa0c911e5f5450 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Tue, 28 May 2019 15:53:50 +0300 Subject: [PATCH 27/31] Use proper attribute values depending on which bucket resampling is done --- satpy/resample.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index e1dd6af1bc..0916323f64 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -876,11 +876,18 @@ def resample(self, data, **kwargs): result = da.stack(new_result) if result.ndim > len(dims): result = da.squeeze(result) - attrs['units'] = None - attrs['standard_name'] = 'area_fraction' - attrs['calibration'] = None + + # Adjust some attributes + if "BucketFraction" in str(self): + attrs['units'] = None + attrs['calibration'] = None + attrs['standard_name'] = 'area_fraction' + elif "BucketCount" in str(self): + attrs['units'] = None + attrs['calibration'] = None + attrs['standard_name'] = 'number_of_observations' attrs['projection'] = self.target_geo_def - attrs['navigation'] = None + result = xr.DataArray(result, dims=dims, coords=coords, attrs=attrs) From 97e3e51d99c2e1da1d187e6ae64c11970958afe9 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Wed, 29 May 2019 09:43:05 +0300 Subject: [PATCH 28/31] Do not overwrite `scn[chan].projection` --- satpy/resample.py | 1 - 1 file changed, 1 deletion(-) diff --git a/satpy/resample.py b/satpy/resample.py index 0916323f64..2bdcf65805 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -886,7 +886,6 @@ def resample(self, data, **kwargs): attrs['units'] = None attrs['calibration'] = None attrs['standard_name'] = 'number_of_observations' - attrs['projection'] = self.target_geo_def result = xr.DataArray(result, dims=dims, coords=coords, attrs=attrs) From 7d5af8d3e8d3c0c6504c04750ee54ed847e03634 Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 3 Jun 2019 08:42:58 +0300 Subject: [PATCH 29/31] Add bucket_fraction to list of resamplers --- satpy/resample.py | 1 + 1 file changed, 1 insertion(+) diff --git a/satpy/resample.py b/satpy/resample.py index 2bdcf65805..d9c13660f3 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -38,6 +38,7 @@ "bucket_avg", "Average Bucket Resampling", :class:`~satpy.resample.BucketAvg` "bucket_sum", "Sum Bucket Resampling", :class:`~satpy.resample.BucketSum` "bucket_count", "Count Bucket Resampling", :class:`~satpy.resample.BucketCount` + "bucket_fraction", "Fraction Bucket Resampling", :class:`~satpy.resample.BucketFraction` The resampling algorithm used can be specified with the ``resampler`` keyword argument and defaults to ``nearest``: From f1d1b1725c881ac90e1d2f17bea32e3ffdc9d0fd Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 3 Jun 2019 10:42:49 +0300 Subject: [PATCH 30/31] Use empty string for "no value" for 'units' and 'calibration' attributes --- satpy/resample.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/satpy/resample.py b/satpy/resample.py index d9c13660f3..89c51df8ed 100644 --- a/satpy/resample.py +++ b/satpy/resample.py @@ -880,12 +880,12 @@ def resample(self, data, **kwargs): # Adjust some attributes if "BucketFraction" in str(self): - attrs['units'] = None - attrs['calibration'] = None + attrs['units'] = '' + attrs['calibration'] = '' attrs['standard_name'] = 'area_fraction' elif "BucketCount" in str(self): - attrs['units'] = None - attrs['calibration'] = None + attrs['units'] = '' + attrs['calibration'] = '' attrs['standard_name'] = 'number_of_observations' result = xr.DataArray(result, dims=dims, coords=coords, From 040b67e0393a2d38fc273f7533991aedc1ea321c Mon Sep 17 00:00:00 2001 From: Panu Lahtinen Date: Mon, 3 Jun 2019 20:04:01 +0300 Subject: [PATCH 31/31] Add TestBucketFraction to the test suite --- satpy/tests/test_resample.py | 1 + 1 file changed, 1 insertion(+) diff --git a/satpy/tests/test_resample.py b/satpy/tests/test_resample.py index 71ce76beb7..f9d761cdb3 100644 --- a/satpy/tests/test_resample.py +++ b/satpy/tests/test_resample.py @@ -725,6 +725,7 @@ def suite(): mysuite.addTest(loader.loadTestsFromTestCase(TestBucketAvg)) mysuite.addTest(loader.loadTestsFromTestCase(TestBucketSum)) mysuite.addTest(loader.loadTestsFromTestCase(TestBucketCount)) + mysuite.addTest(loader.loadTestsFromTestCase(TestBucketFraction)) return mysuite