From 7232c4343aeb9d44276cad7c3c0cb04dbf7a61b5 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 10 Aug 2023 16:17:02 -0800 Subject: [PATCH 01/30] Add _custom_model class attribute, exception in fit and p0 and bounds as parameters --- skgstat/Variogram.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 1858c70..eb6c88b 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -2,6 +2,7 @@ Variogram class """ import copy +import inspect import warnings from typing import Iterable, Callable, Union, Tuple @@ -335,6 +336,7 @@ def __init__(self, # model can be a function or a string self._model = None + self._custom_model = False self.set_model(model_name=model) # specify if the lag should be given absolute or relative to the maxlag @@ -973,6 +975,7 @@ def set_model(self, model_name): ) % model_name ) else: # pragma: no cover + self._custom_model = True self._model = model_name def _build_harmonized_model(self): @@ -1454,7 +1457,7 @@ def preprocessing(self, force=False): self._calc_diff(force=force) self._calc_groups(force=force) - def fit(self, force=False, method=None, sigma=None, **kwargs): + def fit(self, force=False, method=None, sigma=None, bounds=None, p0=None, **kwargs): """Fit the variogram The fit function will fit the theoretical variogram function to the @@ -1563,18 +1566,34 @@ def fit(self, force=False, method=None, sigma=None, **kwargs): self.cof = [r, s, n] return - # Switch the method - # wrap the model to include or exclude the nugget - if self.use_nugget: - def wrapped(*args): - return self._model(*args) + # For a supported model, wrap the function depending on nugget and get logical bounds + if not self._custom_model: + # Switch the method + # wrap the model to include or exclude the nugget + if self.use_nugget: + def wrapped(*args): + return self._model(*args) + else: + def wrapped(*args): + return self._model(*args, 0) + + # get p0 + if bounds is None: + bounds = (0, self.__get_fit_bounds(x, y)) + if p0 is None: + p0 = np.asarray(bounds[1]) + # Else, inspect the function for the number of arguments else: - def wrapped(*args): - return self._model(*args, 0) + # The number of arguments of argspec minus one is what we initialized + argspec = inspect.getfullargspec(self._model) + nb_args = len(argspec) - 1 + if bounds is None: + bounds = (0, [np.maximum(np.nanmax(x), np.nanmax(y))] * nb_args) + if p0 is None: + p0 = np.asarray(bounds[1]) - # get p0 - bounds = (0, self.__get_fit_bounds(x, y)) - p0 = np.asarray(bounds[1]) + def wrapped(*args): + return self._model(*args) # Trust Region Reflective if self.fit_method == 'trf': From 5247cd45aaf1d3a676c3b22e0597ce05e5451348 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 10 Aug 2023 16:17:38 -0800 Subject: [PATCH 02/30] Add tests on custom model definition and fitting --- skgstat/tests/test_models.py | 15 +++++++++++++++ skgstat/tests/test_variogram.py | 15 ++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/skgstat/tests/test_models.py b/skgstat/tests/test_models.py index 6cac237..4110514 100644 --- a/skgstat/tests/test_models.py +++ b/skgstat/tests/test_models.py @@ -186,6 +186,21 @@ def adder(l, a): for r, c in zip(res, adder([1, 4, 8], 4)): self.assertEqual(r, c) + def test_sum_spherical(self): + @variogram + def sum_spherical(h, r1, c1, r2, c2, b1=0, b2=0): + return spherical(h, r1, c1, b1) + spherical(h, r2, c2, b2) + + # Parameters for the two spherical models + params = [1, 0.3, 10, 0.7] + + # Values at which we'll evaluate the function and its expected result + vals = [0, 1, 100] + res = [0, 0.3 + spherical(1, 10, 0.7), 1] + + for r, c in zip(res, sum_spherical(vals, *params)): + self.assertEqual(r, c) + if __name__=='__main__': unittest.main() diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 52ce572..a5fdaeb 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -19,6 +19,7 @@ from skgstat import OrdinaryKriging from skgstat import estimators from skgstat import plotting +from skgstat.models import variogram, spherical, matern class TestSpatiallyCorrelatedData(unittest.TestCase): @@ -61,7 +62,7 @@ def test_sparse_maxlag_30(self): self.assertAlmostEqual(x, y, places=3) -class TestVariogramInstatiation(unittest.TestCase): +class TestVariogramInstantiation(unittest.TestCase): def setUp(self): # set up default values, whenever c and v are not important np.random.seed(42) @@ -949,6 +950,18 @@ def test_implicit_nugget(self): self.assertTrue(abs(V.parameters[-1] - 2.) < 1e-10) + def test_fit_custom_model(self): + + # Define a custom variogram and run the fit + @variogram + def sum_spherical(h, r1, c1, r2, c2, b1, b2): + return spherical(h, r1, c1, b1) + spherical(h, r2, c2, b2) + + V = Variogram(self.c, self.v, use_nugget=True, model=sum_spherical) + + # Check that 6 parameters were found + assert len(V.cof) == 6 + class TestVariogramQualityMeasures(unittest.TestCase): def setUp(self): From 7b3cd222b2ac1d075f96620b1310f93ce428ba65 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 10 Aug 2023 17:03:30 -0800 Subject: [PATCH 03/30] Add to DirectionalVariogram and rename _custom_model to _is_model_custom for clarity --- skgstat/DirectionalVariogram.py | 1 + skgstat/Variogram.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/skgstat/DirectionalVariogram.py b/skgstat/DirectionalVariogram.py index b57686f..a6982f9 100644 --- a/skgstat/DirectionalVariogram.py +++ b/skgstat/DirectionalVariogram.py @@ -294,6 +294,7 @@ def __init__(self, # set the directional model self._directional_model = None + self._is_model_custom = False self.set_directional_model(model_name=directional_model) # the binning settings diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index eb6c88b..ca8655d 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -336,7 +336,7 @@ def __init__(self, # model can be a function or a string self._model = None - self._custom_model = False + self._is_model_custom = False self.set_model(model_name=model) # specify if the lag should be given absolute or relative to the maxlag @@ -975,7 +975,7 @@ def set_model(self, model_name): ) % model_name ) else: # pragma: no cover - self._custom_model = True + self._is_model_custom = True self._model = model_name def _build_harmonized_model(self): @@ -1567,7 +1567,7 @@ def fit(self, force=False, method=None, sigma=None, bounds=None, p0=None, **kwar return # For a supported model, wrap the function depending on nugget and get logical bounds - if not self._custom_model: + if not self._is_model_custom: # Switch the method # wrap the model to include or exclude the nugget if self.use_nugget: From 20749fc9bb6d8d0ac8b2cefc7029fb0d4731579d Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Sun, 20 Aug 2023 16:24:28 -0800 Subject: [PATCH 04/30] Build sum of models from string name --- skgstat/Variogram.py | 49 +++++++++++++++++++++++++++++++++ skgstat/tests/test_variogram.py | 44 ++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index ca8655d..6c4d515 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -965,6 +965,8 @@ def set_model(self, model_name): if model_name.lower() == 'harmonize': self._harmonize = True self._model = self._build_harmonized_model() + elif "+" in model_name: + self._model = self._build_sum_models(model_name) elif hasattr(models, model_name.lower()): self._model = getattr(models, model_name.lower()) else: @@ -978,6 +980,53 @@ def set_model(self, model_name): self._is_model_custom = True self._model = model_name + def _build_sum_models(self, sum_models_name: str): + """ + Build sum of theoretical models. + """ + + # Remove all whitespaces in the string, in case the user wrote someting like "spherical + gaussian" + sum_models_name = ''.join(sum_models_name.split()) + + # Get individual model names + list_model_names = sum_models_name.split("+") + + # Check that all models exist in the "models" module + if not all(hasattr(models, model_name.lower()) for model_name in list_model_names): + raise ValueError( + ( + 'One of the theoretical models in the list "%s" is not' + ' understood, please provide existing model names separated by "+".' + ) % ", ".join(list_model_names) + ) + + # Build the variogram function for the sum of models + # First, build the models from their py_func (ignoring variogram decorator) and get args per model + list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names] + nb_args_per_model = [len(inspect.getfullargspec(model).args) for model in list_models] + + # If nugget is used + if self.use_nugget: + + # Compute cumulative number of args removing the lags + cum_args_minus_lag = np.cumsum(np.array(nb_args_per_model) - 1) + + # If nugget is not used + else: + # Same removing also the nugget + cum_args_minus_lag = np.cumsum(np.array(nb_args_per_model) - 2) + + # Prepare argument slices to distribute to submodels + args_indices = np.insert(cum_args_minus_lag, 0, 0) # We add the first indice of 0 + args_slices = [(slice(args_indices[i], args_indices[i + 1])) for i in range(len(args_indices) - 1)] + + # Distribute first argument (lag) and use all others in order (nugget ignored when last argument not passed) + @models.variogram + def sum_models(h, *args): + return np.sum(list_models[i](h, *args[args_slices[i]]) for i in range(len(list_models))) + + return sum_models + def _build_harmonized_model(self): x = self.bins y = self.experimental diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index a5fdaeb..743bc5a 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -19,7 +19,7 @@ from skgstat import OrdinaryKriging from skgstat import estimators from skgstat import plotting -from skgstat.models import variogram, spherical, matern +from skgstat.models import variogram, spherical, gaussian, exponential, cubic, stable, matern class TestSpatiallyCorrelatedData(unittest.TestCase): @@ -962,6 +962,48 @@ def sum_spherical(h, r1, c1, r2, c2, b1, b2): # Check that 6 parameters were found assert len(V.cof) == 6 + def test_build_sum_models(self): + + # Initiate variogram without fitting + V = Variogram(self.c, self.v, fit_method=None) + + # 1/ Check that errors are raised if argument is not good + # Should accept upper case and spaces + V._build_sum_models("Spherical + spherical") + + # Raises an error if model does not exist + with self.assertRaises(ValueError) as e: + V._build_sum_models("Notamodel + spherical") + self.assertTrue('One of the theoretical models in the list' in str(e.exception)) + + # 2/ Build a sum of two spherical models + sum_spherical = V._build_sum_models("spherical+spherical") + + # Testing the same way as in test_models/test_sum_spherical + # Parameters for the two spherical models + params = [1, 0.3, 10, 0.7] + + # Values at which we'll evaluate the function and its expected result + vals = [0, 1, 100] + res = [0, 0.3 + spherical(1, 10, 0.7), 1] + + for r, c in zip(res, sum_spherical(vals, *params)): + self.assertEqual(r, c) + + # 3/ Build a sum of all models + sum_spherical = V._build_sum_models("spherical+gaussian+exponential+cubic+stable+matern") + min_nb_args = 2 + 2 + 2 + 2 + 3 + 3 + + # Check that the function fails for a number of args of 13 (lower than 14) + with self.assertRaises(TypeError) as e: + sum_spherical(0, *[1,]*(min_nb_args - 1)) + self.assertTrue('positional arguments' in str(e.exception)) + + # Check that it runs for 14 and that the result is correct + sum_res = sum(model(0, 1, 1) for model in [spherical, gaussian, exponential, cubic]) + \ + sum((model(0, 1, 1, 1)) for model in [stable, matern]) + assert sum_spherical(0, *[1, ] * min_nb_args) == sum_res + class TestVariogramQualityMeasures(unittest.TestCase): def setUp(self): From c93ed9a6f7224f3f2fb0641ab07497652be05ffc Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Sun, 20 Aug 2023 21:57:56 -0800 Subject: [PATCH 05/30] Add self._model_name attribute, and logic for sum of models in describe(), parameters() and get_fit_bounds() --- skgstat/Variogram.py | 258 ++++++++++++++++++++++++++++--------------- 1 file changed, 172 insertions(+), 86 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 6c4d515..1ca92a8 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -336,6 +336,7 @@ def __init__(self, # model can be a function or a string self._model = None + self._model_name = None self._is_model_custom = False self.set_model(model_name=model) @@ -963,12 +964,15 @@ def set_model(self, model_name): # at first reset harmonize self._harmonize = False if model_name.lower() == 'harmonize': + mname = 'harmonize' self._harmonize = True self._model = self._build_harmonized_model() elif "+" in model_name: - self._model = self._build_sum_models(model_name) + mname = ''.join(model_name.split()).lower() # Remove all whitespaces + self._model = self._build_sum_models(mname) elif hasattr(models, model_name.lower()): - self._model = getattr(models, model_name.lower()) + mname = model_name.lower() + self._model = getattr(models, mname) else: raise ValueError( ( @@ -976,33 +980,23 @@ def set_model(self, model_name): ' understood, please provide the function' ) % model_name ) + # Set model name attribute + self._model_name = mname + else: # pragma: no cover self._is_model_custom = True self._model = model_name + self._model_name = model_name.__name__ - def _build_sum_models(self, sum_models_name: str): + def _get_argpos_sum_models(self, list_model_names: list[str]) -> list[slice]: """ - Build sum of theoretical models. + Get argument slice position for each model of a list of models (to build from total args, or all fit parameters) """ - # Remove all whitespaces in the string, in case the user wrote someting like "spherical + gaussian" - sum_models_name = ''.join(sum_models_name.split()) - - # Get individual model names - list_model_names = sum_models_name.split("+") - - # Check that all models exist in the "models" module - if not all(hasattr(models, model_name.lower()) for model_name in list_model_names): - raise ValueError( - ( - 'One of the theoretical models in the list "%s" is not' - ' understood, please provide existing model names separated by "+".' - ) % ", ".join(list_model_names) - ) - - # Build the variogram function for the sum of models - # First, build the models from their py_func (ignoring variogram decorator) and get args per model + # Doing this here for other functions (fit, describe, etc), even though already done in _build_sum_models list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names] + + # Get the number of arguments per model nb_args_per_model = [len(inspect.getfullargspec(model).args) for model in list_models] # If nugget is used @@ -1020,6 +1014,33 @@ def _build_sum_models(self, sum_models_name: str): args_indices = np.insert(cum_args_minus_lag, 0, 0) # We add the first indice of 0 args_slices = [(slice(args_indices[i], args_indices[i + 1])) for i in range(len(args_indices) - 1)] + return args_slices + + def _build_sum_models(self, sum_models_name: str): + """ + Build sum of theoretical models, variogram-decorated function. + """ + + # Remove all whitespaces in the string, in case the user wrote someting like "spherical + gaussian" + sum_models_name = ''.join(sum_models_name.split()).lower() + + # Get individual model names + list_model_names = sum_models_name.split("+") + + # Check that all models exist in the "models" module + if not all(hasattr(models, model_name.lower()) for model_name in list_model_names): + raise ValueError( + ( + 'One of the theoretical models in the list "%s" is not' + ' understood, please provide existing model names separated by "+".' + ) % ", ".join(list_model_names) + ) + + # First, build the models from their py_func (ignoring variogram decorator) and get args per model + list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names] + # Get the argument positions for the model sum (function uses model names to be called by other methods) + args_slices = self._get_argpos_sum_models(list_model_names=list_model_names) + # Distribute first argument (lag) and use all others in order (nugget ignored when last argument not passed) @models.variogram def sum_models(h, *args): @@ -1712,10 +1733,10 @@ def ml(params): n = kwargs.get('nugget', self._kwargs.get('fit_nugget', old_params.get('nugget', 0.0))) # check if a s parameter is needed - if self._model.__name__ in ('stable', 'matern'): - if self._model.__name__ == 'stable': + if self._model_name in ('stable', 'matern'): + if self._model_name == 'stable': s2 = kwargs.get('shape', self._kwargs.get('fit_shape', old_params.get('shape',2.0))) - if self._model.__name__ == 'matern': + if self._model_name == 'matern': s2 = kwargs.get('shape', self._kwargs.get('fit_shape', old_params.get('smoothness', 2.0))) # set @@ -2036,33 +2057,45 @@ def __get_fit_bounds(self, x, y): list """ - mname = self._model.__name__ + all_mname = self._model_name - # use range, sill and smoothness parameter - if mname == 'matern': - # a is max(x), C0 is max(y) s is limited to 20? - bounds = [np.nanmax(x), np.nanmax(y), 20.] + # for a sum of models, create a list + if "+" in all_mname: + list_mname = all_mname.split("+") + else: + list_mname = [all_mname] - # use range, sill and shape parameter - elif mname == 'stable': - # a is max(x), C0 is max(y) s is limited to 2? - bounds = [np.nanmax(x), np.nanmax(y), 2.] + # we append all bounds (for one or several models) + all_bounds = [] + for mname in list_mname: - # use only sill - elif mname == 'nugget': - # a is max(x): - bounds = [np.nanmax(x)] + # use range, sill and smoothness parameter + if mname == 'matern': + # a is max(x), C0 is max(y) s is limited to 20? + bounds = [np.nanmax(x), np.nanmax(y), 20.] - # use range and sill - else: - # a is max(x), C0 is max(y) - bounds = [np.nanmax(x), np.nanmax(y)] + # use range, sill and shape parameter + elif mname == 'stable': + # a is max(x), C0 is max(y) s is limited to 2? + bounds = [np.nanmax(x), np.nanmax(y), 2.] - # if use_nugget is True add the nugget - if self.use_nugget: - bounds.append(0.99*np.nanmax(y)) + # use only sill + elif mname == 'nugget': + # a is max(x): + bounds = [np.nanmax(x)] - return bounds + # use range and sill + else: + # a is max(x), C0 is max(y) + bounds = [np.nanmax(x), np.nanmax(y)] + + # if use_nugget is True add the nugget + if self.use_nugget: + bounds.append(0.99*np.nanmax(y)) + + all_bounds += bounds + + return all_bounds def data(self, n=100, force=False): """Theoretical variogram function @@ -2532,29 +2565,58 @@ def fnname(fn): rdict = dict( model=fnname(self._model) if not self._harmonize else "harmonize", estimator=fnname(self._estimator), - dist_func=fnname(self.dist_function), - - normalized_effective_range=cof[0] * maxlag, - normalized_sill=cof[1] * maxvar, - normalized_nugget=cof[-1] * maxvar if self.use_nugget else 0, - - effective_range=cof[0], - sill=cof[1], - nugget=cof[-1] if self.use_nugget else 0, - ) + dist_func=fnname(self.dist_function)) - # handle s parameters for matern and stable model - if self._model.__name__ == 'matern': - rdict['smoothness'] = cof[2] - elif self._model.__name__ == 'stable': - rdict['shape'] = cof[2] + def create_dict_for_model(model_name, cof, maxlag, maxvar, use_nugget, id = ''): + """ + Create dictionary of parameters for an individual model. + Optionally, append a model ID to the key (for the case of summed models). + """ + # id appended to the param key name to differentiate models for a sum + if id != '': + id = '_' + id + + d = { + 'normalized_effective_range' + id: cof[0] * maxlag, + 'normalized_sill' + id: cof[1] * maxvar, + 'normalized_nugget' + id: cof[-1] * maxvar if use_nugget else 0, + 'effective_range' + id: cof[0], + 'sill' + id: cof[1], + 'nugget' + id: cof[-1] if use_nugget else 0, + } + + # handle s parameters for matern and stable model + if model_name == 'matern': + d['smoothness' + id] = cof[2] + elif model_name == 'stable': + d['shape' + id] = cof[2] + + return d + + # for a sum of models + if "+" in self._model_name: + list_model_names = self._model_name.split("+") + list_argslices = self._get_argpos_sum_models(list_model_names=list_model_names) + all_params = {} + # add the parameters for each model, with parameter suffix from 1 to the total number + for i in range(list_model_names): + model_params = create_dict_for_model(model_name=list_model_names[i], cof=cof[list_argslices[i]], + maxlag=maxlag, maxvar=maxvar, use_nugget=self.use_nugget, id=str(i+1)) + all_params.update(model_params) + + # for a single model + else: + all_params = create_dict_for_model(model_name=self._model_name, cof=cof, maxlag=maxlag, maxvar=maxvar, + use_nugget=self.use_nugget) + # update dictionary + rdict.update(all_params) # add other stuff if not short version requested if not short: kwargs = self._kwargs params = dict( estimator=self._estimator.__name__, - model=self._model.__name__, + model=self._model_name, dist_func=str(self.dist_function), bin_func=self._bin_func_name, normalize=self.normalized, @@ -2589,40 +2651,64 @@ def parameters(self): params : list [range, sill, nugget] for most models and [range, sill, shape, nugget] for matern and stable model. + [range1, sill1, nugget1, range2, still2, nugget2] for a sum of 2 models. """ d = self.describe() if 'error' in d: return [None, None, None] - elif self._model.__name__ == 'matern': - return list([ - d['effective_range'], - d['sill'], - d['smoothness'], - d['nugget'] - ]) - elif self._model.__name__ == 'stable': - return list([ - d['effective_range'], - d['sill'], - d['shape'], - d['nugget'] - ]) - elif self._model.__name__ == 'nugget': - return list([d['nugget']]) + + def get_params_list_from_dict(d, model_name, id=''): + + # ID used to differentiate params for a sum of models + if id != '': + id = '_'+id + + # Get specific smoothness and shape parameters for matern and stable + if model_name == 'matern': + return list([ + d['effective_range'+id], + d['sill'+id], + d['smoothness'+id], + d['nugget'+id] + ]) + elif model_name == 'stable': + return list([ + d['effective_range'+id], + d['sill'+id], + d['shape'+id], + d['nugget'+id] + ]) + # Get nugget for only-nugget + elif model_name == 'nugget': + return list([d['nugget'+id]]) + # Or get classic parameters + else: + return list([ + d['effective_range'+id], + d['sill'+id], + d['nugget'+id] + ]) + + # Get parameters for a sum of models + if '+' in self._model_name: + list_model_names = self._model_name.split('+') + list_params = [] + for i in range(len(list_model_names)): + params = get_params_list_from_dict(d, model_name=list_model_names[i], id=str(i+1)) + list_params += params + # Or for a single model else: - return list([ - d['effective_range'], - d['sill'], - d['nugget'] - ]) + list_params = get_params_list_from_dict(d, model_name=self._model_name) + + return list_params def to_DataFrame(self, n=100, force=False): """Variogram DataFrame Returns the fitted theoretical variogram as a pandas.DataFrame - instance. The n and force parameter control the calaculation, - refer to the data funciton for more info. + instance. The n and force parameter control the calculation, + refer to the data function for more info. .. deprecated:: 1.0.10 The return value of this function will change with a future release @@ -2651,7 +2737,7 @@ def to_DataFrame(self, n=100, force=False): return DataFrame({ 'lags': lags, - self._model.__name__: data} + self._model_name: data} ).copy() def to_gstools(self, **kwargs): @@ -2977,7 +3063,7 @@ def __repr__(self): # pragma: no cover :return: """ try: - _name = self._model.__name__ + _name = self._model_name _b = int(len(self.bins)) except Exception: return "< abstract Variogram >" From daf6edad77373d18e13034efd6bc32492619cd8b Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 14:15:02 -0800 Subject: [PATCH 06/30] Add more tests and correct bugs --- skgstat/Variogram.py | 18 ++++-- skgstat/tests/test_variogram.py | 107 +++++++++++++++++++++++++++++--- 2 files changed, 113 insertions(+), 12 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 1ca92a8..0b308b9 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -1655,8 +1655,8 @@ def wrapped(*args): # Else, inspect the function for the number of arguments else: # The number of arguments of argspec minus one is what we initialized - argspec = inspect.getfullargspec(self._model) - nb_args = len(argspec) - 1 + argspec = inspect.getfullargspec(self._model.__wrapped__) + nb_args = len(argspec.args) - 1 if bounds is None: bounds = (0, [np.maximum(np.nanmax(x), np.nanmax(y))] * nb_args) if p0 is None: @@ -2593,13 +2593,18 @@ def create_dict_for_model(model_name, cof, maxlag, maxvar, use_nugget, id = ''): return d + # for a custom model: we list the optimized params for the function wrapped by the variogram decorator + if self._is_model_custom: + custom_arg_names = inspect.getfullargspec(self._model.__wrapped__).args + all_params = {"param"+str(i+1)+"_"+custom_arg_names[i+1]: cof[i] for i in range(len(custom_arg_names) - 1)} + # for a sum of models - if "+" in self._model_name: + elif "+" in self._model_name: list_model_names = self._model_name.split("+") list_argslices = self._get_argpos_sum_models(list_model_names=list_model_names) all_params = {} # add the parameters for each model, with parameter suffix from 1 to the total number - for i in range(list_model_names): + for i in range(len(list_model_names)): model_params = create_dict_for_model(model_name=list_model_names[i], cof=cof[list_argslices[i]], maxlag=maxlag, maxvar=maxvar, use_nugget=self.use_nugget, id=str(i+1)) all_params.update(model_params) @@ -2652,6 +2657,7 @@ def parameters(self): [range, sill, nugget] for most models and [range, sill, shape, nugget] for matern and stable model. [range1, sill1, nugget1, range2, still2, nugget2] for a sum of 2 models. + [param1, param2, param3, ...] in order for a custom model. """ d = self.describe() @@ -2691,7 +2697,9 @@ def get_params_list_from_dict(d, model_name, id=''): ]) # Get parameters for a sum of models - if '+' in self._model_name: + if self._is_model_custom: + list_params = self.cof + elif '+' in self._model_name: list_model_names = self._model_name.split('+') list_params = [] for i in range(len(list_model_names)): diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 743bc5a..1702353 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -1027,8 +1027,8 @@ def test_rmse(self): V = Variogram(self.c, self.v) for model, rmse in zip( - ['spherical', 'gaussian', 'stable'], - [3.3705, 3.3707, 3.193] + ['spherical', 'gaussian', 'stable', 'spherical+gaussian'], + [3.3705, 3.3707, 3.193, 3.0653] ): V.set_model(model) self.assertAlmostEqual(V.rmse, rmse, places=4) @@ -1037,8 +1037,8 @@ def test_mean_residual(self): V = Variogram(self.c, self.v) for model, mr in zip( - ['spherical', 'cubic', 'stable'], - [2.6803, 2.6803, 2.6966] + ['spherical', 'cubic', 'stable', 'spherical+gaussian'], + [2.6803, 2.6803, 2.6966, 2.4723] ): V.set_model(model) self.assertAlmostEqual(V.mean_residual, mr, places=4) @@ -1047,8 +1047,8 @@ def test_nrmse(self): V = Variogram(self.c, self.v, n_lags=15) for model, nrmse in zip( - ['spherical', 'gaussian', 'stable', 'exponential'], - [0.3536, 0.3535, 0.3361, 0.3499] + ['spherical', 'gaussian', 'stable', 'exponential', 'spherical+gaussian'], + [0.3536, 0.3535, 0.3361, 0.3499, 0.3264] ): V.set_model(model) self.assertAlmostEqual(V.nrmse, nrmse, places=4) @@ -1215,7 +1215,75 @@ def test_data_normalized(self): [0., 13.97, 13.97, 13.97, 13.97], decimal=2 ) - + + def test_set_model(self): + """Test setting model: individual, sum or custom""" + V = self.V.clone() + + # Individual model + for model_name in ['spherical', 'gaussian', 'exponential', 'cubic', 'matern', 'stable']: + V.set_model(model_name) + assert V._model_name == model_name + assert V._model == globals()[model_name] + assert V._is_model_custom is False + + # Sum of models + for model_name in ['spherical+gaussian', 'cubic+matern+stable']: + V.set_model(model_name) + assert V._model_name == model_name + assert V._is_model_custom is False + + # Custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1, b1) + x + + V.set_model(custom_model) + assert V._model_name == "custom_model" + assert V._model == custom_model + assert V._is_model_custom is True + + def test_describe(self): + """Test the describe functions for different models""" + V = self.V.clone() + + keys_model = ['normalized_effective_range', 'normalized_sill', 'normalized_nugget', + 'effective_range', 'sill', 'nugget'] + + # Individual model: normal keys should be in dict + for model_name in ['spherical', 'gaussian', 'exponential', 'cubic', 'matern', 'stable']: + V.set_model(model_name) + dict = V.describe() + for key in keys_model: + assert key in dict + if model_name == 'matern': + assert 'smoothness' in dict + if model_name == 'stable': + assert 'shape' in dict + + # Sum of models: keys with a numbered ID should be in dict + V.set_model('spherical+gaussian') + dict = V.describe() + for key in [k+'_1' for k in keys_model] + [k+'_2' for k in keys_model]: + assert key in dict + + V.set_model('cubic+matern+stable') + dict = V.describe() + for key in [k + '_1' for k in keys_model] + [k + '_2' for k in keys_model] + [k + '_3' for k in keys_model]: + assert key in dict + assert 'smoothness_2' in dict + assert 'shape_3' in dict + + # Custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + V.set_model(custom_model) + dict = V.describe() + custom_keys = ["param1_r1", "param2_c1", "param3_x"] + for key in custom_keys: + assert key in dict + def test_parameter_property_matern(self): V = self.V.clone() @@ -1232,6 +1300,31 @@ def test_parameter_property_stable(self): V.set_model('stable') assert_array_almost_equal(V.parameters, param, decimal=2) + def test_parameter_property_sum_models(self): + V = self.V.clone() + + # Using models with 3 parameters (range, sill, nugget) + param = [3.67, 10.61, 0, 42.3, 5.02, 0] + V.set_model('spherical+gaussian') + assert_array_almost_equal(V.parameters, param, decimal=2) + + # Using a stable model with 4 parameters + param2 = [35.77, 10.97, 0, 0, 42.3, 4.74, 0] + V.set_model('stable+cubic') + assert_array_almost_equal(V.parameters, param2, decimal=2) + + def test_parameter_property_custom_model(self): + V = self.V.clone() + + # Custom model will give parameters back in the order of the custom function + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + param = [42.3, 5.76, 9.79] + V.set_model(custom_model) + assert_array_almost_equal(V.parameters, param, decimal=2) + class TestVariogramPlotlyPlots(unittest.TestCase): def setUp(self): From 2df8592deb73363b3391077693086b4af9c732b8 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 14:21:45 -0800 Subject: [PATCH 07/30] Remove static typing, move to description --- skgstat/Variogram.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 0b308b9..fd2ae8a 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -988,9 +988,9 @@ def set_model(self, model_name): self._model = model_name self._model_name = model_name.__name__ - def _get_argpos_sum_models(self, list_model_names: list[str]) -> list[slice]: + def _get_argpos_sum_models(self, list_model_names): """ - Get argument slice position for each model of a list of models (to build from total args, or all fit parameters) + Get arg slice position (list of slices) for the sum of model of a list of model names (list of strings). """ # Doing this here for other functions (fit, describe, etc), even though already done in _build_sum_models @@ -2696,9 +2696,10 @@ def get_params_list_from_dict(d, model_name, id=''): d['nugget'+id] ]) - # Get parameters for a sum of models + # For a custom model, just pass cof in order if self._is_model_custom: list_params = self.cof + # Get parameters for a sum of models elif '+' in self._model_name: list_model_names = self._model_name.split('+') list_params = [] From 5d37015ae20a7573969c1cf8c2c014f26affebea Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 15:11:45 -0800 Subject: [PATCH 08/30] Fix instantiation and fix tests --- skgstat/Variogram.py | 8 ++++---- skgstat/tests/test_variogram.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index fd2ae8a..afa9020 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -334,6 +334,10 @@ def __init__(self, if fit_method is not None: self.preprocessing(force=True) + # set if nugget effect shall be used + self._use_nugget = None + self.use_nugget = use_nugget + # model can be a function or a string self._model = None self._model_name = None @@ -343,10 +347,6 @@ def __init__(self, # specify if the lag should be given absolute or relative to the maxlag self._normalized = normalize - # set if nugget effect shall be used - self._use_nugget = None - self.use_nugget = use_nugget - # set the fitting method and sigma array self._fit_method = None if fit_method is not None: diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 1702353..24e1e0c 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -634,6 +634,35 @@ def test_nofit(self): assert V.cov is None assert V.cof is None + def test_model(self): + """ + Test that all types of models instantiate (test_set_model() only checks overwriting already instantiated vario) + """ + + # Individual model + for model_name in ['spherical', 'gaussian', 'exponential', 'cubic', 'matern', 'stable']: + V = Variogram(self.c, self.v, model=model_name) + assert V._model_name == model_name + assert V._model == globals()[model_name] + assert V._is_model_custom is False + + # Sum of models + for model_name in ['spherical+gaussian', 'cubic+matern+stable']: + V = Variogram(self.c, self.v, model=model_name) + assert V._model_name == model_name + assert V._is_model_custom is False + + # Custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1, b1) + x + + V = Variogram(self.c, self.v, model=custom_model) + assert V._model_name == "custom_model" + assert V._model == custom_model + assert V._is_model_custom is True + + def test_get_bin_count(self): V = Variogram(self.c, self.v) From 66aff871d70d35f4d8b3f5d3a5bc53c4633b82fc Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 15:19:26 -0800 Subject: [PATCH 09/30] Add tests for plotting --- skgstat/tests/test_variogram.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 24e1e0c..dec3724 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -636,7 +636,8 @@ def test_nofit(self): def test_model(self): """ - Test that all types of models instantiate (test_set_model() only checks overwriting already instantiated vario) + Test that all types of models instantiate properly + (to complement test_set_model() that only checks already instantiated vario) """ # Individual model @@ -1362,7 +1363,15 @@ def setUp(self): self.c = np.random.gamma(10, 4, (150, 2)) np.random.seed(42) self.v = np.random.normal(10, 4, 150) + # V = individual model self.V = Variogram(self.c, self.v) + # V2 = sum of models + self.V2 = Variogram(self.c, self.v, model='spherical+gaussian') + # V3 = custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + self.V3 = Variogram(self.c, self.v, model=custom_model) def test_plotly_main_plot(self): if PLOTLY_FOUND: @@ -1373,6 +1382,14 @@ def test_plotly_main_plot(self): isinstance(self.V.plot(show=False), go.Figure) ) + self.assertTrue( + isinstance(self.V2.plot(show=False), go.Figure) + ) + + self.assertTrue( + isinstance(self.V3.plot(show=False), go.Figure) + ) + plotting.backend('matplotlib') def test_plotly_scattergram(self): From 82fa55916d4a214858e11c5b36c08961f9bcb6be Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 16:36:21 -0800 Subject: [PATCH 10/30] Document fit_bounds and fit_p0, and add tests --- skgstat/Variogram.py | 48 +++++++++++++++++++++++-- skgstat/tests/test_variogram.py | 62 +++++++++++++++++++++++++++++++-- 2 files changed, 104 insertions(+), 6 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index afa9020..c8c4e4b 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -246,6 +246,24 @@ def __init__(self, If present, the plot will indicate the confidence interval as error bars around the experimental variogram. + fit_bounds: 2-tuple of array_like or Bounds, optional + .. versionadded:: 1.0.12 + + Lower and upper bounds on parameters passed to scipy.optimize.curve_fit. + + Order is typically (range, sill, nugget) or (range, sill, smoothness, nugget) for individual models, or + (range1, sill1, nugget1, range2, sill2, nugget2) for a sum of 2 models. + Recommended for custom models, where bounds cannot be determined logically. + For internal models, defaults to known min/max values for the sill (0, max variance), range (0, max lag) + and smoothness (0, 2) or (0, 20) for stable and matern, respectively. + + fit_p0: array_like, optional + .. versionadded:: 1.0.12 + + Initial guess for the parameters passed to scipy.optimize.curve_fit. + + Same order as for fit_bounds. + Defaults to upper bounds values. For custom models, if no bounds are defined, defaults to 1. """ # Before we do anything else, make kwargs available self._kwargs = self._validate_kwargs(**kwargs) @@ -363,7 +381,9 @@ def __init__(self, # do the preprocessing and fitting upon initialization # Note that fit() calls preprocessing - self.fit(force=True) + fit_bounds = self._kwargs.get('fit_bounds') # returns None if empty + fit_p0 = self._kwargs.get('fit_p0') + self.fit(force=True, bounds=fit_bounds, p0=fit_p0) # finally check if any of the uncertainty propagation kwargs are set self._experimental_conf_interval = None @@ -1576,6 +1596,21 @@ def fit(self, force=False, method=None, sigma=None, bounds=None, p0=None, **kwar Uncertainty array for the bins. Has to have the same dimension as self.bins. Refer to Variogram.fit_sigma for more information. + bounds: 2-tuple of array_like or Bounds, optional + Lower and upper bounds on parameters passed to scipy.optimize.curve_fit. + + Order is typically (range, sill, nugget) or (range, sill, smoothness, nugget) for individual models, or + (range1, sill1, nugget1, range2, sill2, nugget2) for a sum of 2 models. + Recommended for custom models, where bounds cannot be determined logically. + For internal models, defaults to known min/max values for the sill (0, max variance), range (0, max lag) + and smoothness (0, 2) or (0, 20) for stable and matern, respectively. + + p0: array_like, optional + Initial guess for the parameters passed to scipy.optimize.curve_fit. + + Same order as for fit_bounds. + Defaults to upper bounds values. For custom models, if no bounds are defined, defaults to 1. + Returns ------- void @@ -1658,9 +1693,16 @@ def wrapped(*args): argspec = inspect.getfullargspec(self._model.__wrapped__) nb_args = len(argspec.args) - 1 if bounds is None: - bounds = (0, [np.maximum(np.nanmax(x), np.nanmax(y))] * nb_args) + warnings.warn("Parameter bounds cannot be logically derived for a custom model during the fit. " + "User bounds can be passed using fit(..., bounds=) or Variogram(..., fit_bounds=).", UserWarning) + bounds = ([-np.inf] * nb_args, [np.inf] * nb_args) if p0 is None: - p0 = np.asarray(bounds[1]) + # If all bounds are infinite (not defined, pass 1) + if all(~np.isfinite(bounds).flatten()): + p0 = np.ones(nb_args) + # Else pass the upper bounds + else: + p0 = np.asarray(bounds[1]) def wrapped(*args): return self._model(*args) diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index dec3724..6d7e0ea 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -656,7 +656,7 @@ def test_model(self): # Custom model @variogram def custom_model(h, r1, c1, x): - return spherical(h, r1, c1, b1) + x + return spherical(h, r1, c1) + x V = Variogram(self.c, self.v, model=custom_model) assert V._model_name == "custom_model" @@ -1034,6 +1034,62 @@ def test_build_sum_models(self): sum((model(0, 1, 1, 1)) for model in [stable, matern]) assert sum_spherical(0, *[1, ] * min_nb_args) == sum_res + def test_fit_bounds(self): + """ + Test the fit_bounds kwarg of Variogram and bounds args of fit() + """ + Vnofit = Variogram(self.c, self.v, fit_method=None) + + bounds=(0, [np.max(Vnofit.bins), np.max(Vnofit.experimental)]) + + # Initiate variogram with bounds for fit + V = Variogram(self.c, self.v, fit_bounds=bounds) + # Same but calling fit + V.fit(bounds=bounds) + + # For a sum of models + V = Variogram(self.c, self.v, model='spherical+gaussian', fit_bounds=(0, bounds[1]*2)) + # Same but calling fit + V.fit(bounds=(0, bounds[1]*2)) + + # Check an error is raised for a custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + with self.assertWarns(UserWarning): + Variogram(self.c, self.v, model=custom_model) + + bounds_custom = [(0, 0, 0), (np.max(self.c), np.max(self.v), np.max(self.v))] + + # And that no error is raised otherwise + Variogram(self.c, self.v, model=custom_model, fit_bounds=bounds_custom) + + def test_fit_p0(self): + """ + Test the fit_p0 kwarg of Variogram and bounds args of fit() + """ + Vnofit = Variogram(self.c, self.v, fit_method=None) + p0 = (np.max(Vnofit.bins), np.max(Vnofit.experimental)) + + # Initiate variogram with bounds for fit + V = Variogram(self.c, self.v, fit_p0=p0) + # Same but calling fit + V.fit(p0=p0) + + # For a sum of models + V = Variogram(self.c, self.v, model='spherical+gaussian', fit_p0=p0 * 2) + # Same but calling fit + V.fit(p0=p0 * 2) + + # For a custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + p0_custom = (np.max(Vnofit.bins), np.max(Vnofit.experimental), np.max(Vnofit.experimental)) + Variogram(self.c, self.v, model=custom_model, fit_p0=p0_custom) + class TestVariogramQualityMeasures(unittest.TestCase): def setUp(self): @@ -1266,7 +1322,7 @@ def test_set_model(self): # Custom model @variogram def custom_model(h, r1, c1, x): - return spherical(h, r1, c1, b1) + x + return spherical(h, r1, c1) + x V.set_model(custom_model) assert V._model_name == "custom_model" @@ -1351,7 +1407,7 @@ def test_parameter_property_custom_model(self): def custom_model(h, r1, c1, x): return spherical(h, r1, c1) + x - param = [42.3, 5.76, 9.79] + param = [1., -0.33, 14.] V.set_model(custom_model) assert_array_almost_equal(V.parameters, param, decimal=2) From 4185c755e523b339f02393f0f322fc7331a58ff0 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 16:43:23 -0800 Subject: [PATCH 11/30] Update model argument description --- skgstat/Variogram.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index c8c4e4b..67159d7 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -92,8 +92,12 @@ def __init__(self, differences, aligned to the 1D distance matrix (flattened upper triangle) and return a scalar, that converges towards small values for similarity (high covariance). - model : str - String identifying the theoretical variogram function to be used + model : str | Callable + .. versionchanged:: 1.0.12 + Added support for sum of models (e.g., "spherical+gaussian"), or custom model (Callable). Using + `fit_bounds` to optimized the fit is recommended for custom models, and can be useful for sum of models. + + String or callable identifying the theoretical variogram function to be used to describe the experimental variogram. Can be one of: * spherical [Spherical, default] @@ -104,6 +108,8 @@ def __init__(self, * matern [Matérn model] * nugget [nugget effect variogram] + Any number of these theoretical models can be summed using "+" iteratively, e.g. "spherical+cubic+matern". + dist_func : str String identifying the distance function. Defaults to 'euclidean'. Can be any metric accepted by From bc4e1708876aa65e5e7b6d9b5153c3c26dc7c146 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 16:56:20 -0800 Subject: [PATCH 12/30] Fix random test and inf bound check --- skgstat/Variogram.py | 2 +- skgstat/tests/test_variogram.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 67159d7..d6aa1f3 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -1704,7 +1704,7 @@ def wrapped(*args): bounds = ([-np.inf] * nb_args, [np.inf] * nb_args) if p0 is None: # If all bounds are infinite (not defined, pass 1) - if all(~np.isfinite(bounds).flatten()): + if bounds == ([-np.inf] * nb_args, [np.inf] * nb_args): p0 = np.ones(nb_args) # Else pass the upper bounds else: diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 6d7e0ea..55ada86 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -1407,8 +1407,11 @@ def test_parameter_property_custom_model(self): def custom_model(h, r1, c1, x): return spherical(h, r1, c1) + x - param = [1., -0.33, 14.] + param = [42.3, 5.76, 9.79] V.set_model(custom_model) + + # Provide bounds to avoid a random fit + V.fit(bounds=(0, [np.max(V.bins), np.max(V.experimental), np.max(V.experimental)])) assert_array_almost_equal(V.parameters, param, decimal=2) From aa2d7edf777484f6b59e8bb78dd54d6d826ca083 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 21 Aug 2023 17:15:23 -0800 Subject: [PATCH 13/30] Fix warnings in test_variogram --- skgstat/Variogram.py | 2 +- skgstat/tests/test_variogram.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index d6aa1f3..1fe4809 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -1070,7 +1070,7 @@ def _build_sum_models(self, sum_models_name: str): # Distribute first argument (lag) and use all others in order (nugget ignored when last argument not passed) @models.variogram def sum_models(h, *args): - return np.sum(list_models[i](h, *args[args_slices[i]]) for i in range(len(list_models))) + return sum(list_models[i](h, *args[args_slices[i]]) for i in range(len(list_models))) return sum_models diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 55ada86..eae4b77 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -658,12 +658,15 @@ def test_model(self): def custom_model(h, r1, c1, x): return spherical(h, r1, c1) + x - V = Variogram(self.c, self.v, model=custom_model) + with self.assertWarns(UserWarning): + V = Variogram(self.c, self.v, model=custom_model) + assert V._model_name == "custom_model" assert V._model == custom_model assert V._is_model_custom is True + def test_get_bin_count(self): V = Variogram(self.c, self.v) @@ -987,7 +990,8 @@ def test_fit_custom_model(self): def sum_spherical(h, r1, c1, r2, c2, b1, b2): return spherical(h, r1, c1, b1) + spherical(h, r2, c2, b2) - V = Variogram(self.c, self.v, use_nugget=True, model=sum_spherical) + with self.assertWarns(UserWarning) as w: + V = Variogram(self.c, self.v, use_nugget=True, model=sum_spherical) # Check that 6 parameters were found assert len(V.cof) == 6 @@ -1088,7 +1092,8 @@ def custom_model(h, r1, c1, x): return spherical(h, r1, c1) + x p0_custom = (np.max(Vnofit.bins), np.max(Vnofit.experimental), np.max(Vnofit.experimental)) - Variogram(self.c, self.v, model=custom_model, fit_p0=p0_custom) + with self.assertWarns(UserWarning) as w: + Variogram(self.c, self.v, model=custom_model, fit_p0=p0_custom) class TestVariogramQualityMeasures(unittest.TestCase): @@ -1365,7 +1370,8 @@ def test_describe(self): def custom_model(h, r1, c1, x): return spherical(h, r1, c1) + x V.set_model(custom_model) - dict = V.describe() + with self.assertWarns(UserWarning) as w: + dict = V.describe() custom_keys = ["param1_r1", "param2_c1", "param3_x"] for key in custom_keys: assert key in dict @@ -1430,7 +1436,8 @@ def setUp(self): @variogram def custom_model(h, r1, c1, x): return spherical(h, r1, c1) + x - self.V3 = Variogram(self.c, self.v, model=custom_model) + self.V3 = Variogram(self.c, self.v, model=custom_model, + fit_bounds=(0, [np.max(self.V.bins), np.max(self.V.experimental), np.max(self.V.experimental)])) def test_plotly_main_plot(self): if PLOTLY_FOUND: From ce9aa857e46253e25aa58fba106cd1512e7134db Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Tue, 22 Aug 2023 15:02:16 -0800 Subject: [PATCH 14/30] Fix typo in description --- skgstat/Variogram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 1fe4809..f8b40dc 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -95,7 +95,7 @@ def __init__(self, model : str | Callable .. versionchanged:: 1.0.12 Added support for sum of models (e.g., "spherical+gaussian"), or custom model (Callable). Using - `fit_bounds` to optimized the fit is recommended for custom models, and can be useful for sum of models. + `fit_bounds` to optimize the fit is recommended for custom models, and can be useful for sum of models. String or callable identifying the theoretical variogram function to be used to describe the experimental variogram. Can be one of: From de3a948e911f503b0acece86f1f960a6df198f10 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 21 Sep 2023 11:24:02 -0800 Subject: [PATCH 15/30] Fix floating precision error --- skgstat/tests/test_variogram.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index eae4b77..116c688 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -1122,7 +1122,7 @@ def test_rmse(self): [3.3705, 3.3707, 3.193, 3.0653] ): V.set_model(model) - self.assertAlmostEqual(V.rmse, rmse, places=4) + self.assertAlmostEqual(V.rmse, rmse, places=3) def test_mean_residual(self): V = Variogram(self.c, self.v) @@ -1132,7 +1132,7 @@ def test_mean_residual(self): [2.6803, 2.6803, 2.6966, 2.4723] ): V.set_model(model) - self.assertAlmostEqual(V.mean_residual, mr, places=4) + self.assertAlmostEqual(V.mean_residual, mr, places=3) def test_nrmse(self): V = Variogram(self.c, self.v, n_lags=15) @@ -1142,12 +1142,12 @@ def test_nrmse(self): [0.3536, 0.3535, 0.3361, 0.3499, 0.3264] ): V.set_model(model) - self.assertAlmostEqual(V.nrmse, nrmse, places=4) + self.assertAlmostEqual(V.nrmse, nrmse, places=3) def test_nrmse_r(self): V = Variogram(self.c, self.v, estimator='cressie') - self.assertAlmostEqual(V.nrmse_r, 0.63543, places=5) + self.assertAlmostEqual(V.nrmse_r, 0.63543, places=3) def test_r(self): V = Variogram(self.c, self.v, n_lags=12, normalize=False) From 2e317416147248267f57dd595842c7be7213e0b8 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 21 Sep 2023 12:14:00 -0800 Subject: [PATCH 16/30] Add example in user guide --- docs/userguide/variogram.rst | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/docs/userguide/variogram.rst b/docs/userguide/variogram.rst index 1d35de3..bf11cbb 100644 --- a/docs/userguide/variogram.rst +++ b/docs/userguide/variogram.rst @@ -766,6 +766,53 @@ variogram is rather showing a Gaussian or exponential behavior. If you would like to export a Variogram instance to gstools, the smoothness parameter may not be smaller than ``0.2``. +Sum of models +~~~~~~~~~~~~~ + +All the above models can be combined into a sum of models, in order to fit more complex empirical variograms that might +capture signals with multiple correlations ranges. +To fit with a sum of models, simply add a "+" between any number of models: + +.. ipython:: python + :okwarning: + + # Fit a sum of two spherical models + V.model = "spherical+spherical" + + @savefig sum_of_models.png width=8in + V.plot(); + +Custom model +~~~~~~~~~~~~ + +Additionally, any custom model can be passed to the model argument to fit a specific function to the empirical +variogram. This model needs to be built with the ``@variogram`` decorator, which requires the lag argument +(typically, ``h``) to be listed first in the custom function. + +.. caution:: + + When using a custom model, it is highly recommended to define the parameter bounds as these cannot be derived + logically by SciKit-GStat as for other models. This is done by passing either ``fit_bounds`` to ``Variogram()`` + at instantiation or ``bounds`` to ``Variogram.fit()``. + + +.. ipython:: python + :okwarning: + + # Build a custom model by applying the @variogram decorator (here adding a linear term to a spherical model) + from skgstat.models import variogram, spherical + @variogram + def custom_model(h, r1, c1, a): + return spherical(h, r1, c1) + h * a + V.model = custom_model + + # We define the bounds for r1, c1 and a + bounds_custom = [(0, 0, 0), (np.max(V.bins), np.max(V.experimental), 2)] + V.fit(bounds=bounds_custom) + + @savefig custom_model.png width=8in + V.plot(); + When direction matters ====================== From 69853396071ef3cd25a9a3c36d3e79d13944eeec Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 21 Sep 2023 13:15:31 -0800 Subject: [PATCH 17/30] Make nugget consistent for a sum of models --- skgstat/Variogram.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index f8b40dc..524977f 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -109,6 +109,7 @@ def __init__(self, * nugget [nugget effect variogram] Any number of these theoretical models can be summed using "+" iteratively, e.g. "spherical+cubic+matern". + The nugget parameters of the models are removed except for the last model (sum of nuggets = single nugget). dist_func : str String identifying the distance function. Defaults to @@ -192,7 +193,8 @@ def __init__(self, Defaults to False. If True, a nugget effet will be added to all Variogram.models as a third (or fourth) fitting parameter. A nugget is essentially the y-axis interception of the theoretical - variogram function. + variogram function. For a sum of variogram, the nugget is defined + in its last model. maxlag : float, str Can specify the maximum lag distance directly by giving a value larger than 1. The binning function will not find any lag class @@ -1016,25 +1018,20 @@ def set_model(self, model_name): def _get_argpos_sum_models(self, list_model_names): """ - Get arg slice position (list of slices) for the sum of model of a list of model names (list of strings). + Get argument slice position (list of slices) for the sum of models from a list of model names (list of strings). """ # Doing this here for other functions (fit, describe, etc), even though already done in _build_sum_models list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names] - # Get the number of arguments per model - nb_args_per_model = [len(inspect.getfullargspec(model).args) for model in list_models] + # Get the number of arguments per model (e.g., [3, 4, 4]) + nb_args_per_model = np.array([len(inspect.getfullargspec(model).args) for model in list_models]) - # If nugget is used - if self.use_nugget: - - # Compute cumulative number of args removing the lags - cum_args_minus_lag = np.cumsum(np.array(nb_args_per_model) - 1) - - # If nugget is not used - else: - # Same removing also the nugget - cum_args_minus_lag = np.cumsum(np.array(nb_args_per_model) - 2) + # We remove the nugget and lags parameters, except nugget for the last model (sum of nuggets = single nugget) + nb_args_per_model -= 2 + nb_args_per_model[-1] += 1 + # Compute cumulative number of args removing 2 args everywhere (all lags and nuggets, last one will compensate) + cum_args_minus_lag = np.cumsum(nb_args_per_model) # Prepare argument slices to distribute to submodels args_indices = np.insert(cum_args_minus_lag, 0, 0) # We add the first indice of 0 From fbff1102360d1a222c2d45bb2c82841143eec8ba Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 21 Sep 2023 13:52:00 -0800 Subject: [PATCH 18/30] Finalize nugget for sum and add tests --- skgstat/Variogram.py | 6 +++--- skgstat/tests/test_variogram.py | 16 ++++++++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 524977f..3fffc89 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -2112,7 +2112,7 @@ def __get_fit_bounds(self, x, y): # we append all bounds (for one or several models) all_bounds = [] - for mname in list_mname: + for i, mname in enumerate(list_mname): # use range, sill and smoothness parameter if mname == 'matern': @@ -2134,8 +2134,8 @@ def __get_fit_bounds(self, x, y): # a is max(x), C0 is max(y) bounds = [np.nanmax(x), np.nanmax(y)] - # if use_nugget is True add the nugget - if self.use_nugget: + # if use_nugget is True add the nugget (for the last model only in case it is a sum) + if self.use_nugget and i == (len(list_mname) - 1): bounds.append(0.99*np.nanmax(y)) all_bounds += bounds diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 116c688..8cb131b 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -990,11 +990,23 @@ def test_fit_custom_model(self): def sum_spherical(h, r1, c1, r2, c2, b1, b2): return spherical(h, r1, c1, b1) + spherical(h, r2, c2, b2) - with self.assertWarns(UserWarning) as w: + with self.assertWarns(UserWarning): + # Custom models should ignore the nugget setting, so let's try both and check V = Variogram(self.c, self.v, use_nugget=True, model=sum_spherical) + V2 = Variogram(self.c, self.v, use_nugget=False, model=sum_spherical) # Check that 6 parameters were found - assert len(V.cof) == 6 + assert len(V.cof) == len(V2.cof) == 6 + + def test_fit_sum_models_nugget(self): + + # Define a sum of variogram and run the fit + V = Variogram(self.c, self.v, model="spherical+spherical", use_nugget=False) + V2 = Variogram(self.c, self.v, model="spherical+spherical", use_nugget=True) + + # Check that 4 parameters were found without nugget, 5 otherwise + assert len(V.cof) == 4 + assert len(V2.cof) == 5 def test_build_sum_models(self): From f4d33f180d25ef083bf2ec2351d246deaacd52d6 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 2 Oct 2023 10:37:59 -0800 Subject: [PATCH 19/30] Rerun tests with scipy 1.11.3 --- skgstat/tests/test_variogram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 8cb131b..62ea1de 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -1026,7 +1026,7 @@ def test_build_sum_models(self): sum_spherical = V._build_sum_models("spherical+spherical") # Testing the same way as in test_models/test_sum_spherical - # Parameters for the two spherical models + # Parameters for the two spherical models params = [1, 0.3, 10, 0.7] # Values at which we'll evaluate the function and its expected result From 34388c64e57869fc8fc82ab35c7957a153451c2a Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 11:10:51 -0700 Subject: [PATCH 20/30] Force SciPy versions to before 1.11.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d418082..bc1fd37 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -scipy +scipy<=1.11.1 numpy pandas matplotlib From d357c1a079643f08db818d6e5e5ff131ca00f7bf Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 11:23:16 -0700 Subject: [PATCH 21/30] Try with NumPy version before 1.25 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index bc1fd37..513fe78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy +numpy<=1.25 pandas matplotlib numba From 56127e2948a1ade89d251ea535c62b73882e0721 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 11:27:44 -0700 Subject: [PATCH 22/30] Try with 1.24 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 513fe78..86ae7e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy<=1.25 +numpy<=1.24 pandas matplotlib numba From 7d5d28757778bc532a6b90c4039f575ec828e60e Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 12:20:19 -0700 Subject: [PATCH 23/30] Try 1.24.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 86ae7e9..915f37c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy<=1.24 +numpy<=1.25.1 pandas matplotlib numba From 591ae370cff7cb2cc5de145f2ad6cae5d74f0089 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 12:27:37 -0700 Subject: [PATCH 24/30] Try 1.24.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 915f37c..597e5ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy<=1.25.1 +numpy<=1.24.1 pandas matplotlib numba From d0bb82df5946d477b20ee48349d58641dca451c5 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 12:31:33 -0700 Subject: [PATCH 25/30] Try 1.24.2 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 597e5ec..20d7386 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy<=1.24.1 +numpy<=1.24.2 pandas matplotlib numba From 9a76ae7e88180b5d39b3a38c988c3a80d27f87ca Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 12:36:17 -0700 Subject: [PATCH 26/30] Try 1.24.3 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 20d7386..82700d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy<=1.24.2 +numpy<=1.24.3 pandas matplotlib numba From 35903f892b8a5bcffd8234e789c30904de525ece Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Thu, 5 Oct 2023 12:40:55 -0700 Subject: [PATCH 27/30] Try 1.25.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 82700d0..19b014b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy<=1.24.3 +numpy<=1.25.0 pandas matplotlib numba From 9ada1f4569f524c280579433417ece0ddb567220 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 9 Oct 2023 13:34:05 -0500 Subject: [PATCH 28/30] Remove NumPy version fixing --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 19b014b..bc1fd37 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ scipy<=1.11.1 -numpy<=1.25.0 +numpy pandas matplotlib numba From 59781f1cb87cbaee8815b57f4508dfce154aa8e3 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Mon, 9 Oct 2023 13:41:19 -0500 Subject: [PATCH 29/30] Change stable entropy bin test precision to 0 decimals --- skgstat/tests/test_variogram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 62ea1de..1c9622a 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -255,7 +255,7 @@ def test_binning_method_stable(self): assert_array_almost_equal( V.bins, np.array([4.3, 8.4, 12.8, 17.1, 21.4, 25.2, 29.9, 33.2, 38.5, 42.8]), - decimal=1 + decimal=0 ) def test_binning_method_stable_maxiter(self): @@ -265,7 +265,7 @@ def test_binning_method_stable_maxiter(self): assert_array_almost_equal( V.bins, np.array([4.3, 8.4, 12.8, 17.1, 21.4, 25.2, 29.9, 33.2, 38.5, 42.8]), - decimal=1 + decimal=0 ) def test_binning_method_stable_fix_bins(self): @@ -280,7 +280,7 @@ def test_binning_method_stable_fix_bins(self): assert_array_almost_equal( V.bins, np.array([4.2, 8.6, 12.8, 17.1, 21.2, 25.5, 29.3, 33.2, 37.4, 43.]), - decimal=1 + decimal=0 ) def test_binning_change_nlags(self): From 9690d393db5a2dfefaa09879b698b8891092ff8f Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Fri, 13 Oct 2023 12:55:09 -0500 Subject: [PATCH 30/30] Linting --- skgstat/Variogram.py | 2 +- skgstat/tests/test_variogram.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index db607ad..d2c1176 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -1044,7 +1044,7 @@ def _build_sum_models(self, sum_models_name: str): Build sum of theoretical models, variogram-decorated function. """ - # Remove all whitespaces in the string, in case the user wrote someting like "spherical + gaussian" + # Remove all whitespaces in the string, in case the user wrote something like "spherical + gaussian" sum_models_name = ''.join(sum_models_name.split()).lower() # Get individual model names diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 8c1dfd9..c4271ee 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -1026,7 +1026,7 @@ def test_build_sum_models(self): sum_spherical = V._build_sum_models("spherical+spherical") # Testing the same way as in test_models/test_sum_spherical - # Parameters for the two spherical models + # Parameters for the two spherical models params = [1, 0.3, 10, 0.7] # Values at which we'll evaluate the function and its expected result