diff --git a/.coveragerc b/.coveragerc index 24efba7..1904344 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,12 +1,12 @@ [run] -omit = +omit = skgstat/tests/* skgstat/plotting/* docs/* setup.py [report] -exclude_lines = +exclude_lines = pragma: no cover def __repr__ def __str__ diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f906763..32ca7dd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,15 +1,21 @@ name: Test and build docs -on: push +on: + push: + branches: + - '*' + pull_request: + branches: + - '*' jobs: test: name: Run Unittest runs-on: ubuntu-20.04 strategy: - matrix: + matrix: python: ['3.6', '3.7', '3.8', '3.9', '3.10'] - + steps: - name: Checkout uses: actions/checkout@master @@ -18,7 +24,7 @@ jobs: with: python-version: ${{ matrix.python }} - name: Install SciKit-GStat - run: | + run: | pip3 install -r requirements.txt python3 setup.py install - name: Install PyTest requirements @@ -45,7 +51,7 @@ jobs: with: python-version: '3.8' - name: Install SciKit-GStat - run: | + run: | pip3 install -r requirements.txt python3 setup.py install - name: Install Sphinx requirements @@ -55,13 +61,13 @@ jobs: - name: Install pdflatex run: sudo apt install texlive-latex-extra texlive-latex-recommended texlive-fonts-recommended pandoc - name: make HTML & LaTeX docs - run: | + run: | cd docs - make html + make html make latex continue-on-error: true - name: compile LaTeX - run: | + run: | cd docs/_build/latex pdflatex -interaction=nonstopmode -halt-on-error SciKitGStat.tex cd ../.. @@ -73,7 +79,7 @@ jobs: target_branch: gh-pages build_dir: docs/_build/html env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} release: name: Create Github release @@ -95,7 +101,7 @@ jobs: name: Publish to PyPi runs-on: ubuntu-20.04 needs: test - if: startsWith(github.event.ref, 'refs/tags/v') + if: startsWith(github.event.ref, 'refs/tags/v') steps: - uses: actions/checkout@v3 @@ -113,13 +119,13 @@ jobs: uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ - password: ${{ secrets.PYPI_TOKEN }} - - ci_develop: + password: ${{ secrets.PYPI_TOKEN }} + + ci_develop: name: Print Github Context for Development runs-on: ubuntu-20.04 if: true - + steps: - name: Dump GitHub context env: diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..c57fa18 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,15 @@ +name: Linting and formatting (pre-commit) + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - uses: pre-commit/action@v3.0.0 diff --git a/.gitignore b/.gitignore index 80ecee0..bac6da2 100644 --- a/.gitignore +++ b/.gitignore @@ -113,4 +113,4 @@ container Playground.ipynb in_progress docs/auto_examples -docs/gen_modules \ No newline at end of file +docs/gen_modules diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e7a06fe --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +repos: + # Fix common spelling mistakes + - repo: https://github.com/codespell-project/codespell + rev: v2.2.1 + hooks: + - id: codespell + args: [ + # Verly is a Name, coo references the SciPy coo sparse matrix + '--ignore-words-list', 'verly,coo', + '--write-changes', + # 'nd,alos,inout', + # '--ignore-regex', '\bhist\b', + '--' + ] + types_or: [python, rst, markdown] + files: ^(skgstat|docs|tutorials)/ + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace diff --git a/Dockerfile b/Dockerfile index b4f5e80..ea611c0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,4 +29,4 @@ RUN pip install jupyter # open port 8888 EXPOSE 8888 -CMD jupyter notebook --ip "0.0.0.0" \ No newline at end of file +CMD jupyter notebook --ip "0.0.0.0" diff --git a/Dockerfile.legacy b/Dockerfile.legacy index 13f30b1..5d74ce2 100644 --- a/Dockerfile.legacy +++ b/Dockerfile.legacy @@ -2,7 +2,7 @@ # use the minimal jupyter notebook FROM jupyter/minimal-notebook:ad3574d3c5c7 -# build tutorials folder +# build tutorials folder USER root RUN mkdir tutorials @@ -12,7 +12,7 @@ USER $NB_USER # copy the tutorials content COPY ./docs/tutorials ./tutorials -# install the latest version +# install the latest version COPY ./ ./scikit-gstat # use the latest @@ -20,7 +20,7 @@ RUN cd scikit-gstat && \ pip install . && \ cd .. -# the interfaces has two additional +# the interfaces has two additional # optional dependencies: pykrige and gstools RUN pip install pykrige gstools diff --git a/MANIFEST.in b/MANIFEST.in index 8b9a556..8ee1ba5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,4 +8,4 @@ include .coveragerc include Dockerfile include Dockerfile.legacy graft skgstat/data/rf -graft skgstat/data/samples \ No newline at end of file +graft skgstat/data/samples diff --git a/README.rst b/README.rst index 0982b79..6cd6355 100644 --- a/README.rst +++ b/README.rst @@ -79,8 +79,8 @@ PyPI pip install scikit-gstat -**Note:** It can happen that the installation of numba or numpy is failing using pip. Especially on Windows systems. -Usually, a missing Dll (see eg. `#31 `_) or visual c++ redistributable is the reason. +**Note:** It can happen that the installation of numba or numpy is failing using pip. Especially on Windows systems. +Usually, a missing Dll (see eg. `#31 `_) or visual c++ redistributable is the reason. GIT: ^^^^ diff --git a/RELEASE.md b/RELEASE.md index bbf3fb3..a020ea6 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,6 @@ # Scikit-GStat -SciKit-Gstat is a scipy-styled variogram estimation and analysis module for geostatistics. +SciKit-Gstat is a scipy-styled variogram estimation and analysis module for geostatistics. It includes classes for variogram estimation and ordinary kriging. More advanced use-cases like directional variograms and space-time variograms are included as well. @@ -26,4 +26,4 @@ or bibtex: URL = {https://gmd.copernicus.org/articles/15/2505/2022/}, DOI = {10.5194/gmd-15-2505-2022} } -``` \ No newline at end of file +``` diff --git a/docs/Makefile b/docs/Makefile index 1ab91c4..081fcab 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/changelog.rst b/docs/changelog.rst index 4137408..0068704 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -19,13 +19,13 @@ Version 1.0.9 - [data] Added a data generator for creating :func:`random multivariate data ` - [tests] Added tests for :func:`random multivariate data ` -Verison 1.0.8 +Version 1.0.8 ------------- -- [util] added :func:`cross_variograms ` for calculating cross-variograms for +- [util] added :func:`cross_variograms ` for calculating cross-variograms for all combinations of ``N`` input variables. Variograms are returned in a 2D List (matrix) with all primary variograms on the diagonal. - [util] added support for :class:`DirectionalVariogram ` in case azimuth, - tolerance or bandwith are passed as keyword arguments to :func:`cross_variograms ` + tolerance or bandwidth are passed as keyword arguments to :func:`cross_variograms ` - [util] added tests for new cross_variograms function to be implemented. Version 1.0.7 @@ -36,13 +36,13 @@ Version 1.0.7 Version 1.0.6 ------------- -This is technically the same as the 1.0.5 as I screwed up and uplaoded to PyPI without merging the changes. +This is technically the same as the 1.0.5 as I screwed up and uploaded to PyPI without merging the changes. Version 1.0.5 ------------- -- [Variogram] The variogram now accepts 2D values in ``(n_samples, 2)`` shape. The second column will be - interpreted as a co-variable co-located to the observations and the Variogram then calculates the +- [Variogram] The variogram now accepts 2D values in ``(n_samples, 2)`` shape. The second column will be + interpreted as a co-variable co-located to the observations and the Variogram then calculates the **cross-variogram** for ``values[:, 0] ~ values[:, 1]`` Note that this only implements the basic calculation of cross-variograms. Utility functions and plotting still need to be updated. A tutorial will also be added in future. @@ -61,7 +61,7 @@ Version 1.0.3 ------------- - [Variogram] :func:`Variogram.pairwise_diffs ` wraps around old ``_diff`` and should be used instead of directly accessing ``_diff`` -- [Variogram] :func:`Variogram.model_residuals ` will replace +- [Variogram] :func:`Variogram.model_residuals ` will replace ``Variogram.residuals`` which has been deprecated Version 1.0.1 @@ -74,9 +74,9 @@ Version 1.0.1 Version 1.0.0 ============= - [plotting] the 3D surface plot is now handling the opacity settings correctly. -- [utils] the utils now include the likelihood submodule, which includes a +- [utils] the utils now include the likelihood submodule, which includes a :func:`get_likelihood ` function factory. - The returned function can be minimized using SciPy to perform maximum likelihood fits. + The returned function can be minimized using SciPy to perform maximum likelihood fits. Version 0.6.14 -------------- @@ -86,7 +86,7 @@ Version 0.6.14 Version 0.6.13 -------------- -- [docs] the sphinx recipe knitting a TeX file from docs is now ignored on fail +- [docs] the sphinx recipe knitting a TeX file from docs is now ignored on fail Reason is that the current build is too lage and any kind of buffer is overflowing - [docs] The jupyter notebook tutorials for the Docker image are now at root level. - [docs] The documentation tutorials are now sphinx-gallery builds of the notebook @@ -97,7 +97,7 @@ Version 0.6.13 Version 0.6.12 -------------- - [data] the dataset loader can now return pandas.DataFrame objects -- [Dockerfile] some cleanups for making future tutorials work. +- [Dockerfile] some cleanups for making future tutorials work. Version 0.6.11 -------------- @@ -108,7 +108,7 @@ Version 0.6.10 - [Variogram] The KMeans based binning function is now raising a value error if a ConvergenceWarning is found. The reason is, that the original settings for binning were not valid if KMeans did not converge and thus, the bins array might not be - in a well defined state. + in a well defined state. Version 0.6.9 ------------- @@ -122,19 +122,19 @@ Version 0.6.8 Version 0.6.7 ------------- - [RasterMetricSpace] a new class is introduced: :class:`RasterEquidistantMetricSpace `. - An instance can be passed as `coordinates`. It samples a given Raster image at concentric rings, to derive a + An instance can be passed as `coordinates`. It samples a given Raster image at concentric rings, to derive a more uniformly distributed distance matrix. Version 0.6.6 ------------- -- [Variogram] The automatic fitting of a theoretical variogram model is now optional. You can pass `None` as +- [Variogram] The automatic fitting of a theoretical variogram model is now optional. You can pass `None` as `fit_method` parameter, which will suppress the fitting. Version 0.6.5 ------------- -- [Variogram] now supports custom bin edges for the experimental variogram. :func:`Variogram.bins ` +- [Variogram] now supports custom bin edges for the experimental variogram. :func:`Variogram.bins ` now accepts a list or array of upper bin edges. -- [Variogram] has a new property called :func:`bin_count ` which returns the number of +- [Variogram] has a new property called :func:`bin_count ` which returns the number of point pairs within each lag class Version 0.6.4 @@ -143,11 +143,11 @@ Version 0.6.4 - [Kriging] `OrdinaryKriging._estimator ` handles the error variance matrix index now correctly. On error during kriging, the index was not incremented, which lead to malformed error variance field output. -Version 0.6.3 +Version 0.6.3 ------------- - [interfaces] If any of the gstools interfaces are used, the Variogram will call :func:`fit ` - without forcing a full preprocessing cycle. This fixes edge cases, where a parameter was mutated, but the fitting - not performed before the instance was exported. This should only have happended in very rare occasions. + without forcing a full preprocessing cycle. This fixes edge cases, where a parameter was mutated, but the fitting + not performed before the instance was exported. This should only have happened in very rare occasions. - [data] added the meuse dataset from the R-package ``'sp'`` Version 0.6.2 @@ -155,12 +155,12 @@ Version 0.6.2 - [Variogram] the fitting method is now implemented as :func:`Variogram.fit_method ` property. It will drop fitting parameters if the fit method is changed to something else than ``'manual'``. - [Variogram] If an invalid :func:`Variogram.fit_method ` is set, an - :class:`AttributeError` will instantly be raised. Beforehand it was only raised on the next call of + :class:`AttributeError` will instantly be raised. Beforehand it was only raised on the next call of :func:`fit ` Version 0.6.1 ------------- -- The Dockerfile was completely rewritten. A user can now specify the used Python version +- The Dockerfile was completely rewritten. A user can now specify the used Python version at build time of the docker image. - The Dockerfile is now part of the python package @@ -172,13 +172,13 @@ Version 0.6.0 it gets stable with Version 1.0 or 1.1 .. note:: - The current implementation of uncertainty propagation is not stable. It will be changed until + The current implementation of uncertainty propagation is not stable. It will be changed until version 0.7. The entry-point `obs_sigma` will stay stable and persist, but currently the uncertainty - propagation will not be updated and invalidated as the Variogram instance changes. + propagation will not be updated and invalidated as the Variogram instance changes. Version 0.5.6 ------------- -- [Variogram] the interal :class:`MetricSpace ` instance used to calculate the distance matrix +- [Variogram] the internal :class:`MetricSpace ` instance used to calculate the distance matrix is now available as the :any:`Variogram.metric_space ` property. - [Variogram] :any:`Variogram.metric_space ` is now read-only. - [unittest] two unittests are changed (linting, not functionality) @@ -190,13 +190,13 @@ Version 0.5.5 Version 0.5.4 ------------- -- [util] added a new `cross_validation` utility module to cross-validate variograms with leave-one-out Kriging +- [util] added a new `cross_validation` utility module to cross-validate variograms with leave-one-out Kriging cross validations. Version 0.5.3 ------------- - [MetricSpace] new class :class:`ProbabilisticMetricSpace ` that - extends the metric space by a stochastic element to draw samples from the input data, instead of using + extends the metric space by a stochastic element to draw samples from the input data, instead of using the full dataset. Version 0.5.2 @@ -208,14 +208,14 @@ Version 0.5.2 Version 0.5.1 ------------- -- [plotting] the spatio-temporal 2D and 3D plots now label the axis correctly. +- [plotting] the spatio-temporal 2D and 3D plots now label the axis correctly. - [plotting] fixed swapped plotting axes for spatio-temporal plots. Version 0.5.0 ------------- - [MetricSpace] A new class :class:`MetricSpace ` was introduced. This class can be passed to any class that accepted coordinates so far. This wrapper can be used to pre-calculate large distance - matrices and pass it to a lot of Variograms. + matrices and pass it to a lot of Variograms. - [MetricSpacePair] A new class :class:`MetricSpacePair ` was introduced. This is a pair of two :class:`MetricSpaces ` and pre-calculates all distances between the two spaces. This is i.e. used in Kriging to pre-calcualte all distance between the input coordinates and @@ -223,12 +223,12 @@ Version 0.5.0 Version 0.4.4 ------------- -- [models] the changes to :func:`matern ` introduced in `0.3.2` are reversed. +- [models] the changes to :func:`matern ` introduced in `0.3.2` are reversed. The Matérn model does not adapt the smoothness scaling to effective range anymore, as the behavior was too inconsistent. - [interface] minor bugfix of circular import in `variogram_estimator` interface - [models] :func:`matern(0, ...) ` now returns the nugget instead of `numpy.NaN` -- [models] :func:`stable(0, ...) ` now returns the nugget instead of `numpy.NaN` or a +- [models] :func:`stable(0, ...) ` now returns the nugget instead of `numpy.NaN` or a `ZeroDivisionError`. Version 0.4.3 @@ -238,10 +238,10 @@ Version 0.4.3 Version 0.4.2 ------------- -- [Variogram] :func:`bins ` now cases manual setted bin edges automatically +- [Variogram] :func:`bins ` now cases manual set bin edges automatically to a :func:`numpy.array`. - [Variogram] :func:`get_empirical ` returns the empirical variogram. - That is a tuple of the current :func:`bins ` and + That is a tuple of the current :func:`bins ` and :func:`experimental ` arrays, with the option to move the bin to the lag classes centers. @@ -264,15 +264,15 @@ Version 0.3.11 Version 0.3.10 -------------- -- [binning] added a median aggregation option to :func:`ward `. This can be - enabled by setting `binning_agg_func` to `'median'`. The cluster centroids will be derived from +- [binning] added a median aggregation option to :func:`ward `. This can be + enabled by setting `binning_agg_func` to `'median'`. The cluster centroids will be derived from the members median value, instead of mean value. -- [Variogram] added :func:`fit_method-'ml' ` - a maximum likelihood fitting +- [Variogram] added :func:`fit_method-'ml' ` - a maximum likelihood fitting procedure to fit the theoretical variogram to the experimental -- [Variogram] added :func:`fit_method-'manual' `. This is a manual fitting - method that takes the variogram parameters either at instantiation prefixed by `fit_`, or as - keyword arguments by :func:`fit `. -- [Variogram] the manual fitting method will preseve the previous parameters, if the Variogram was +- [Variogram] added :func:`fit_method-'manual' `. This is a manual fitting + method that takes the variogram parameters either at instantiation prefixed by `fit_`, or as + keyword arguments by :func:`fit `. +- [Variogram] the manual fitting method will preserve the previous parameters, if the Variogram was fitted before and the fitting parameters are not manually overwritten. @@ -288,25 +288,25 @@ Version 0.3.8 - [plotting] minor bugfixes in plotting routines (wrong arguments, pltting issues) - [docs] added a tutorial about plotting - [binning] added :func:`auto_derived_lags ` for a variety - of different methods that find a good estimate for either the number of lag classes or the - lag class width. These can be used by passing the method name as :func:`bin_func ` - parameter: Freedman-Diaconis (`'fd'`), Sturge's rule (`'sturges'`), Scott's rule (`'scott'`) and - Doane's extension to Sturge's rule (`'doane'`). + of different methods that find a good estimate for either the number of lag classes or the + lag class width. These can be used by passing the method name as :func:`bin_func ` + parameter: Freedman-Diaconis (`'fd'`), Sturge's rule (`'sturges'`), Scott's rule (`'scott'`) and + Doane's extension to Sturge's rule (`'doane'`). Uses `histogram_bin_edges ` internally. Version 0.3.7 ------------- -- [Variogram] now accepts arbitary kwargs. These can be used to further specify functional behavior - of the class. As of Version `0.3.7` this is used to pass arguments down to the - :func:`entropy ` and :func:`percentile ` +- [Variogram] now accepts arbitrary kwargs. These can be used to further specify functional behavior + of the class. As of Version `0.3.7` this is used to pass arguments down to the + :func:`entropy ` and :func:`percentile ` estimators. -- [Variogram] the :func:`describe ` now adds the - :func:`init ` arguments by default to the output. The method can output +- [Variogram] the :func:`describe ` now adds the + :func:`init ` arguments by default to the output. The method can output the init params as a nested dict inside the output or flatten the output dict. Version 0.3.6 ------------- -.. warning:: +.. warning:: There is some potential breaking behaviour - [Variogram] some internal code cleanup. Removed some unnecessary loops @@ -314,30 +314,30 @@ Version 0.3.6 a recalculation of the lag groupings. So far they were kept untouches, which might result in old experimental variogram values for the changed instance. **This is a potential breaking change**. -- [Variogram] The :func:`lag_classes ` generator now yields empty - arrays for unoccupied lag classes. This will result in :class:`NaN ` values for the +- [Variogram] The :func:`lag_classes ` generator now yields empty + arrays for unoccupied lag classes. This will result in :class:`NaN ` values for the semi-variance. This is actually a bug-fix. **This is a potential breaking change** Version 0.3.5 ------------- -- [plotting] The :func:`location_trend ` can now add - trend model lines to the scatter plot for the `'plotly'` backend and calculate the +- [plotting] The :func:`location_trend ` can now add + trend model lines to the scatter plot for the `'plotly'` backend and calculate the R² for the trend model. - [Variogram] the *internal* attribute holding the name of the current distance function was renamed from `_dict_func` to `_dist_func_name` Version 0.3.4 ------------- -- [plotting] The :func:`scattergram ` +- [plotting] The :func:`scattergram ` functions color the plotted points with respect to the lag bin they - are originating from. For `matplotlib`, this coloring is suppressed, but can activated by + are originating from. For `matplotlib`, this coloring is suppressed, but can activated by passing the argument ``scattergram(single_color-False)``. Version 0.3.3 ------------- -- [plotting] a new submodule is introduced: :py:mod:`skgstat.plotting`. This contains all plotting functions. +- [plotting] a new submodule is introduced: :py:mod:`skgstat.plotting`. This contains all plotting functions. The plotting behavior is not changed, but using :func:`skgstat.plotting.backend`, the used plotting library can be switched from `matplotlib` to `plotly` - [stmodels] some code cleanup @@ -374,38 +374,38 @@ Version 0.2.8 - [Variogram] `harmonize` parameter is removed - [Variogram] Monotonization (old harmonize par) is available as a new theoretical model function. Can be used by setting `model-'harmonize'` -- [interfaces] gstools interface implemented. +- [interfaces] gstools interface implemented. :func:`gstools_cov_model ` - takes a :class:`skgstat.Variogram` instance and returns a **fitted** - `gstools.CovModel`. + takes a :class:`skgstat.Variogram` instance and returns a **fitted** + `gstools.CovModel`. Version 0.2.7 ------------- - [Kriging] Little performance gains due to code cleanup. -- [Variogram] The `normalize-True` default in `__init__` will change to +- [Variogram] The `normalize-True` default in `__init__` will change to `normalize-False` in a future version. A DeprecationWarning was included. -- [tests] The Variogram class fitting unit tests are now explicitly setting +- [tests] The Variogram class fitting unit tests are now explicitly setting the normalize parameter to handle the future deprecation. - [tests] More unittests were added to increase coverage -- [interfaces] The new submodule `skgstat.interfaces` is introduced. This - submodule collects interfacing classes to use skgstat classes with other +- [interfaces] The new submodule `skgstat.interfaces` is introduced. This + submodule collects interfacing classes to use skgstat classes with other Python modules. -- [interfaces] The first interfacing class is the - :class:`VariogramEstimator `. This - is a scikit-learn compatible `Estimator` class that can wrap a `Variogram`. +- [interfaces] The first interfacing class is the + :class:`VariogramEstimator `. This + is a scikit-learn compatible `Estimator` class that can wrap a `Variogram`. The intended usage is to find variogram hyper-parameters using `GridSearchCV`. This is also the only usecase covered in the unit tests. -- [interfaces] Implemented - :func:`pykrige_as_kwargs `. - Pass a :class:`Variogram ` object and a dict of parameters - is returned that can be passed to pykrige Kriging classes using the double +- [interfaces] Implemented + :func:`pykrige_as_kwargs `. + Pass a :class:`Variogram ` object and a dict of parameters + is returned that can be passed to pykrige Kriging classes using the double star operator. -- Added Dockerfile. You can now build a docker container with scikit-gstat +- Added Dockerfile. You can now build a docker container with scikit-gstat installed in a miniconda environment. On run, a jupyter server is exposed on Port 8888. In a future release, this server will serve tutorial notebooks. - [stmodels] small bugfix in product model -- [stmodels] removed variogram wrapper and added stvariogram wrapper to +- [stmodels] removed variogram wrapper and added stvariogram wrapper to correctly detect space and time lags Version 0.2.6 @@ -420,16 +420,16 @@ Version 0.2.6 the kriging matrix. - [OrdinaryKriging]: calculates the kriging variance along with the estimation itself. - The Kriging variance can be accessed after a call to - :func:`OrdinaryKriging.transform ` and can be - accessed through the `OrdinaryKriging.sigma` attribute. + The Kriging variance can be accessed after a call to + :func:`OrdinaryKriging.transform ` and can be + accessed through the `OrdinaryKriging.sigma` attribute. - [Variogram] deprecated :func:`Variogram.compiled_model `. Use :func:`Variogram.fitted_model ` instead. - [Variogram] added a new and much faster version of the parameterized model: :func:`Variogram.fitted_model ` -- [Variogram] minor change in the cubic model. This made the adaption of the - associated unit test necessary. +- [Variogram] minor change in the cubic model. This made the adaption of the + associated unit test necessary. Version 0.2.5 ------------- @@ -472,4 +472,4 @@ Version 0.2.1 Version 0.2.0 ------------- -- completely rewritten Variogram class compared to v0.1.8 \ No newline at end of file +- completely rewritten Variogram class compared to v0.1.8 diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 4d6f417..44ea949 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -58,7 +58,7 @@ step classes to `'uniform'` distribution in the lag classes. Mutating -------- -One of the main strenghs of :class:`Variogram ` is its +One of the main strenghs of :class:`Variogram ` is its ability to change arguments in place. Any dependent result or parameter will be invalidated and re-caluculated. You can i.e. increase the number of lag classes: @@ -76,4 +76,3 @@ You can i.e. increase the number of lag classes: Note, how the experimental variogram was updated and the model was fitted to the new data automatically. - diff --git a/docs/index.rst b/docs/index.rst index 47290c2..87bc612 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,7 @@ Welcome to SciKit GStat `Download the docs as PDF `_ -SciKit-Gstat is a scipy-styled analysis module for variogram analysis. +SciKit-Gstat is a scipy-styled analysis module for variogram analysis. The base class is called :class:`Variogram `, which is probably the only import needed. However, several other classes exist: @@ -17,8 +17,8 @@ only import needed. However, several other classes exist: * :class:`OrdinaryKriging ` for interpolation * :class:`MetricSpace ` for pre-computed spatial samples -The variogram classes have a similar interface and can compute experimental variograms -and fit theoretical variogram model functions. +The variogram classes have a similar interface and can compute experimental variograms +and fit theoretical variogram model functions. The module makes use of a rich selection of semi-variance estimators, variogram model functions and sptial binning functions, while being extensible at the same time. @@ -47,4 +47,3 @@ The code itself is published and has a DOI. It can be cited as: technical/technical reference/reference changelog - diff --git a/docs/install.rst b/docs/install.rst index e1870c4..0f096e5 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -43,12 +43,12 @@ Note ---- On Windows, you might run into problems installing all requirements -in a clean Python environment, especially if C++ redistributables are missing. +in a clean Python environment, especially if C++ redistributables are missing. This can happen i.e. on *bare* VMs and the compilation of libraries required by scipy, numpy or numba package are the ones failing. In these cases, install the libraries first, and then SciKit-GStat or move to -the conda-forge package +the conda-forge package .. code-block:: bash - conda install numpy, scipy, numba \ No newline at end of file + conda install numpy, scipy, numba diff --git a/docs/reference/binning.rst b/docs/reference/binning.rst index f46c255..3077fc9 100644 --- a/docs/reference/binning.rst +++ b/docs/reference/binning.rst @@ -3,7 +3,7 @@ Binning functions ================= SciKit-GStat implements a large amount of binning functions, -which can be used to spatially aggregate the distance matrix +which can be used to spatially aggregate the distance matrix into lag classes, or bins. There are a number of functions available, which usually accept more than one method identifier: @@ -20,4 +20,3 @@ more than one method identifier: .. autofunction:: skgstat.binning.ward .. autofunction:: skgstat.binning.stable_entropy_lags - diff --git a/docs/reference/data.rst b/docs/reference/data.rst index f39707a..278806d 100644 --- a/docs/reference/data.rst +++ b/docs/reference/data.rst @@ -12,4 +12,4 @@ Utility Functions ----------------- ..automodule:: skgstat.data._loader - :members: field, get_sample \ No newline at end of file + :members: field, get_sample diff --git a/docs/reference/estimator.rst b/docs/reference/estimator.rst index 51a049d..ab65295 100644 --- a/docs/reference/estimator.rst +++ b/docs/reference/estimator.rst @@ -2,7 +2,7 @@ Estimator Functions =================== -Scikit-GStat implements various semi-variance estimators. These fucntions can +Scikit-GStat implements various semi-variance estimators. These functions can be found in the skgstat.estimators submodule. Each of these functions can be used independently from Variogram class. In this case the estimator is expecting an array of pairwise differences to calculate the semi-variance. @@ -55,4 +55,4 @@ Percentile percentile of the given pairwise differences and does not bear any information about their variance. -.. autofunction:: skgstat.estimators.percentile \ No newline at end of file +.. autofunction:: skgstat.estimators.percentile diff --git a/docs/reference/kriging.rst b/docs/reference/kriging.rst index 88e80ce..6882a5c 100644 --- a/docs/reference/kriging.rst +++ b/docs/reference/kriging.rst @@ -5,4 +5,4 @@ Kriging Class .. autoclass:: skgstat.OrdinaryKriging :members: - .. automethod:: __init__ \ No newline at end of file + .. automethod:: __init__ diff --git a/docs/reference/metric_space.rst b/docs/reference/metric_space.rst index 3734ad1..7dc9440 100644 --- a/docs/reference/metric_space.rst +++ b/docs/reference/metric_space.rst @@ -18,4 +18,4 @@ MetricSpacePair :members: .. automethod:: __init__ - .. automethod:: find_closest \ No newline at end of file + .. automethod:: find_closest diff --git a/docs/reference/models.rst b/docs/reference/models.rst index 8d2e65a..5ae25ee 100644 --- a/docs/reference/models.rst +++ b/docs/reference/models.rst @@ -5,7 +5,7 @@ Variogram models Scikit-GStat implements different theoretical variogram functions. These model functions expect a single lag value or an array of lag values as input data. Each function has at least a parameter `a` for the effective range and -a parameter `c0` for the sill. The nugget parameter `b` is optinal and will +a parameter `c0` for the sill. The nugget parameter `b` is optional and will be set to :math:`b:=0` if not given. Spherical model @@ -40,4 +40,4 @@ Stable model Matérn model ~~~~~~~~~~~~ -.. autofunction:: skgstat.models.matern \ No newline at end of file +.. autofunction:: skgstat.models.matern diff --git a/docs/reference/reference.rst b/docs/reference/reference.rst index 79af8c1..59c9b58 100644 --- a/docs/reference/reference.rst +++ b/docs/reference/reference.rst @@ -16,4 +16,4 @@ Code Reference interfaces data metric_space - util \ No newline at end of file + util diff --git a/docs/technical/direction.rst b/docs/technical/direction.rst index 5a1cf46..5b13147 100644 --- a/docs/technical/direction.rst +++ b/docs/technical/direction.rst @@ -89,7 +89,7 @@ random coordinates, the visualization is shown below. @savefig dv1.png width=6in DV.pair_field(plt.gca()) - + The model can easily be changed, using the :func:`set_directional_model ` function: @@ -130,4 +130,3 @@ All other methods and attributes can be used in the same way. - :func:`DirectionalVariogram.bins ` - :func:`DirectionalVariogram._calc_groups ` - diff --git a/docs/technical/estimate_kriging.rst b/docs/technical/estimate_kriging.rst index 92cc255..116b5b8 100644 --- a/docs/technical/estimate_kriging.rst +++ b/docs/technical/estimate_kriging.rst @@ -108,4 +108,4 @@ quite obvious. plt.plot(x, y_c, '-b') @savefig krig_coarse.png width=7in - plt.plot(x, y_e2, '-g') \ No newline at end of file + plt.plot(x, y_e2, '-g') diff --git a/docs/technical/fitting.rst b/docs/technical/fitting.rst index 894c537..ee88a9f 100644 --- a/docs/technical/fitting.rst +++ b/docs/technical/fitting.rst @@ -203,7 +203,7 @@ variograms used so far. cm = plt.get_cmap('gist_earth') - # increase the distance by one, to aviod zeros + # increase the distance by one, to avoid zeros X = np.asarray([(_ + 1) for _ in x]) s1 = X / np.max(X) @@ -226,6 +226,3 @@ variograms used so far. That's it. - - - diff --git a/docs/technical/technical.rst b/docs/technical/technical.rst index 31c345f..cccb595 100644 --- a/docs/technical/technical.rst +++ b/docs/technical/technical.rst @@ -3,8 +3,8 @@ Technical Notes =============== This chapter collects a number of technical advises for using scikit-gstat. -These examples either give details on the implementation or -guide a correct package usage. This are technical notes, no tutorials. +These examples either give details on the implementation or +guide a correct package usage. This are technical notes, no tutorials. The application of the shown examples might not make sense in every situation .. toctree:: diff --git a/docs/tutorials/data/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json b/docs/tutorials/data/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json index 5d6346a..6b9a194 100644 --- a/docs/tutorials/data/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json +++ b/docs/tutorials/data/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json @@ -5,14 +5,14 @@ "Info": "These files are part of the dataset published with the data-paper mentioned below.", "cite-as": "Fersch, B., Francke, T., Heistermann, M., Schrön, M., Döpper, V., Jakobi, J. et. al (2020): A dense network of cosmic-ray neutron sensors for soilmoisture observation in a highly instrumented pre-alpine headwater catchment in Germany. Earth System Science Data. https://doi.org/10.5194/essd-2020-48" }, - + "Provider": { "Name": "Benjamin Fersch", "Institution": "KIT Campus Alpin", "Email": "fersch@kit.edu", - "Comment": "" + "Comment": "" }, - + "SpaceTimeCoverage": { "StartDate": "2019-05-01", "EndDate": "2019-07-31", @@ -23,14 +23,14 @@ "Elevation": "595 m ASL", "Weblink": "https://geoportal.bayern.de/bayernatlas/?zoom=15&lang=de&topic=ba&bgLayer=atkis&E=654197&N=5299736&catalogNodes=122,11&layers=luftbild&crosshair=marker" }, - + "Source": { "Name": "Benjamin Fersch", "Institution": "KIT Campus Alpin", "LinkToOriginalSource": "https://www.imk-ifu.kit.edu/tereno.php" }, - - + + "Variables": { "time": "time of measurement", "lat": "profile latitude coordinate", @@ -45,7 +45,7 @@ "T_a": "soil temperature (sensor group a)", "T_b": "soil temperature (sensor group b)" }, - + "Units": { "time": "minutes since 2019-05-01 00:00:00", "lat": "degree north", @@ -60,20 +60,19 @@ "T_a": "degree Celsius", "T_b": "degree Celsius" }, - + "SpatialReferenceSystem": { "Name": "WGS 84", - "EPSG": "4326" + "EPSG": "4326" }, - - + + "TemporalReferenceSystem": { "TimeZone": "UTC", "IntervalLength": "15 minutes", "IntervalAggregation": "instantaneous", "TimestampAtEndOfInterval": "FALSE" }, - + "Remarks": "Each profile is equipped with 2 redundant sensors (a, and b)\n Metadata also contained in NetCDF header." } - \ No newline at end of file diff --git a/docs/tutorials/data/tereno_fendt/tereno.json b/docs/tutorials/data/tereno_fendt/tereno.json index cc816e7..c366180 100644 --- a/docs/tutorials/data/tereno_fendt/tereno.json +++ b/docs/tutorials/data/tereno_fendt/tereno.json @@ -17754,4 +17754,4 @@ ] ], "description": "Data derived from Fersch et al. (2020) https://doi.org/10.5194/essd-2020-48. Published under CC BY 4.0.\n It is From the WSN product, the T_a in 20cm depth is extracted" -} \ No newline at end of file +} diff --git a/docs/tutorials/tutorial_01_getting_started.py b/docs/tutorials/tutorial_01_getting_started.py index 183e4a4..3b4357a 100644 --- a/docs/tutorials/tutorial_01_getting_started.py +++ b/docs/tutorials/tutorial_01_getting_started.py @@ -30,7 +30,7 @@ # 1.1 Load data # ------------- # SciKit-GStat includes a data submodule, that contains some sample datasets. It also offers some basic random sampling on data sources. -# Here we use the well-known Meuse dataset from the R package ``sp`` (https://cran.r-project.org/web/packages/st/index.html). +# Here we use the well-known Meuse dataset from the R package ``sp`` (https://cran.r-project.org/web/packages/st/index.html). # If not specified different, the loading function will only export the lead measurements from the data source. # # **Note:** The data is distributed along with the package sp under a GPL-3 license. @@ -61,12 +61,12 @@ # As a quick reminder, the variogram relates pair-wise separating distances of `coordinates` and relates them to the *semi-variance* of the corresponding `values` pairs. The default estimator used is the Matheron estimator: # # .. math:: -# +# # \gamma (h) = \frac{1}{2N(h)} * \sum_{i=1}^{N(h)}(Z(x_i) - Z(x_{i + h}))^2 -# +# # For more details, please refer to the `User Guide `_. # -# The :class:`Variogram ` class takes at least two arguments. +# The :class:`Variogram ` class takes at least two arguments. # The :func:`coordinates ` and the :func:`values ` observed at these locations. # If you use older versions, `_. # -# Consequently, the :class:`OrdinaryKriging ` class needs a :class:`Variogram ` +# Consequently, the :class:`OrdinaryKriging ` class needs a :class:`Variogram ` # object as a mandatory attribute. Two very important optional attributes are ``min_points`` and ``max_points```. # They will limit the size of the Kriging equation system. As we have 200 observations, # we can require at least 5 neighbors within the range. More than 15 will only unnecessarily slow down the computation. # The ``mode='exact'`` attribute will advise the class to build and solve the system above for each location. # # **Note:** The recommended way for kriging applications is to use the interface to :any:`gstools`. -# There is an easy-to-use interface via :func:`Variogram.to_gstools ` +# There is an easy-to-use interface via :func:`Variogram.to_gstools ` # and :func:`Variogram.to_gs_krige `. # The getting started tutorial will use the builtin kriging class, anyway. ok = skg.OrdinaryKriging(V, min_points=5, max_points=15, mode='exact') @@ -146,7 +146,7 @@ # %% # The :func:`OrdinaryKriging.transform ` method will apply the interpolation for passed arrays of coordinates. # It requires each dimension as a single 1D array. We can easily build a meshgrid of 100x100 coordinates and pass them to the interpolator. -# To recieve a 2D result, we can simply reshape the result. The Kriging error will be available as the ``sigma`` attribute of the interpolator. +# To receive a 2D result, we can simply reshape the result. The Kriging error will be available as the ``sigma`` attribute of the interpolator. # build the target grid x = coords[:, 0] diff --git a/docs/tutorials/tutorial_02_estimators.py b/docs/tutorials/tutorial_02_estimators.py index ddae0ec..cc1c96b 100644 --- a/docs/tutorials/tutorial_02_estimators.py +++ b/docs/tutorials/tutorial_02_estimators.py @@ -33,7 +33,7 @@ # %% -# +# def plot_scatter(data, ax): art = ax.scatter(data.x, data.y, 50, c=data.v, cmap='plasma') plt.colorbar(art, ax=ax) @@ -57,16 +57,16 @@ def plot_scatter(data, ax): vario = V2 # %% -# The default estimator configured in :class:`Variogram ` +# The default estimator configured in :class:`Variogram ` # is the :func:`Mathéron estimator ` # (Mathéron, 1963). It is defined like: -# +# # .. math:: -# +# # \gamma (h) = \frac{1}{2N(h)} * \sum_{i=1}^{N(h)}(Z(x_i) - Z(x_{i+h}))^2 -# +# # where: -# +# # * :math:`h` is the distance lag # * :math:`h` is the number of observation pairs in :math:`h`-lag class # * :math:`Z(x_i)` is the observation at the :math:`i`-th location :math:`x` @@ -89,35 +89,35 @@ def plot_scatter(data, ax): # Setting :func:`estimator='cressie' ` # will set the Cressie-Hawkins estimator. # It is implemented as follows (Cressie and Hawkins, 1980): -# +# # .. math:: -# +# # 2\gamma (h) = \frac{\left(\frac{1}{N(h)} \sum_{i=1}^{N(h)} |Z(x_i) - Z(x_{i+h})|^{0.5}\right)^4}{0.457 + \frac{0.494}{N(h)} + \frac{0.045}{N^2(h)}} -# +# # By setting :func:`estimator='dowd' `, # the Dowd estimator (Dowd, 1984) will be used: -# +# # .. matho:: -# -# 2\gamma (h) = 2.198 * {median(Z(x_i) - Z(x_{i+h}))}^2 -# +# +# 2\gamma (h) = 2.198 * {median(Z(x_i) - Z(x_{i+h}))}^2 +# # Finally, :func:`estimator='genton' ` # will set the Genton estimator (Genton, 1998): -# +# # .. math:: -# +# # \gamma (h) = 2.2191\{|V_i(h) - V_j(h)|; i < j\}_{(k)} -# -# with: -# +# +# with: +# # .. math:: # k = \binom{[N_h / 2] + 1}{2} -# +# # and: -# +# # .. math:: # q = \binom{N_h}{2} -# +# fig, _a = plt.subplots(1, 3, figsize=(12, 3), sharey=True) axes = _a.flatten() for ax, estimator_name in zip(axes, ('matheron', 'cressie', 'dowd')): @@ -126,8 +126,8 @@ def plot_scatter(data, ax): ax.set_title(estimator_name.capitalize()) # %% -# The important part is here that the effective range as well as the sill is -# changeing for the estimator. This will likely change the Kriging result. +# The important part is here that the effective range as well as the sill is +# changing for the estimator. This will likely change the Kriging result. # For Kriging, the difference on the first few lag classes is important, # as no points will be used for estimation, that lies outside the range. # We will zoom in, to actually use a higher resolution. Thus the results @@ -187,14 +187,14 @@ def plot_scatter(data, ax): # the upper left corner not as quite well as the other estimators. One can also # see, that a substantial amount of the deviations are caused by the noisy # character of the original image. Note that we loaded the field without -# applying any kind of filter to it. +# applying any kind of filter to it. # # 2.4 References # -------------- # Cressie, N., and D. Hawkins (1980): Robust estimation of the variogram. Math. Geol., 12, 115-125. -# +# # Dowd, P. A., (1984): The variogram and kriging: Robust and resistant estimators, in Geostatistics for Natural Resources Characterization. Edited by G. Verly et al., pp. 91 - 106, D. Reidel, Dordrecht. -# +# # Genton, M. G., (1998): Highly robust variogram estimation, Math. Geol., 30, 213 - 221. -# +# # Matheron, G. (1963). Principles of geostatistics. Economic Geology, 58(8), 1246–1266. https://doi.org/10.2113/gsecongeo.58.8.1246 diff --git a/docs/tutorials/tutorial_03_variogram_models.py b/docs/tutorials/tutorial_03_variogram_models.py index b589770..c32cc29 100644 --- a/docs/tutorials/tutorial_03_variogram_models.py +++ b/docs/tutorials/tutorial_03_variogram_models.py @@ -2,11 +2,11 @@ 3 - Variogram Models ==================== -This tutorial will guide you through the theoretical variogram models available for the :class:`Variogram ` class. +This tutorial will guide you through the theoretical variogram models available for the :class:`Variogram ` class. **In this tutorial you will learn:** - * how to choose an appropiate model function + * how to choose an appropriate model function * how to judge fitting quality * about sample size influence @@ -23,8 +23,8 @@ # ------------- # For this example we will use the pancake dataset. You can use the # :mod:``skgstat.data`` submodule to directly sample the dataset. This is the -# red-channel of an image of an actual pancake. The intersting thing about this pancake is, -# that it shows some clear spatial structures in its browning, but of different +# red-channel of an image of an actual pancake. The interesting thing about this pancake is, +# that it shows some clear spatial structures in its browning, but of different # shapes at different scales. This should be reflectable with different samples. s = [30, 80, 300] data1 = skg.data.pancake(N=s[0], seed=42, as_dataframe=True).get('sample') @@ -48,11 +48,11 @@ def plot_scatter(data, ax): # -------------------------------- # One of the features of :mod:`skgstat` is the fact that it is programmed object oriented. # That means, we can just instantiate a :class:`Variogram ` object -# and start changing arguments unitl it models spatial dependency in our observations well. +# and start changing arguments until it models spatial dependency in our observations well. V1 = skg.Variogram(data1[['x', 'y']].values, data1.v.values, maxlag='median', normalize=False) V1.plot(show=False); -# %% +# %% # Plot the others as well V2 = skg.Variogram(data2[['x', 'y']].values, data2.v.values, maxlag='median', normalize=False) V3 = skg.Variogram(data3[['x', 'y']].values, data3.v.values, maxlag='median', normalize=False) @@ -100,12 +100,12 @@ def plot_scatter(data, ax): if i == 2: v.bin_func = 'scott' axes[i][j].set_xlabel('Lag (-)') - + # plot axes[i][j].plot(v.bins, v.experimental, '.b') axes[i][j].plot(x, v.fitted_model(x), '-g') axes[i][j].grid(which='major', axis='x') - + # label first col if j == 0: axes[i][j].set_ylabel(col_lab[i]) @@ -115,7 +115,7 @@ def plot_scatter(data, ax): plt.tight_layout() # %% -# That actually demonstrates how the selection of the experimental variogram can +# That actually demonstrates how the selection of the experimental variogram can # have huge influence on the base data for fitting. Now consider the center column. # In each of the plots, the selection of model is not deterministic. # You can argue for at least two different models here, that might actually be supported by the empirical data. @@ -140,13 +140,13 @@ def plot_scatter(data, ax): # %% # This is quite important. We find all 6 models to describe the experimental # variogram more or less equally well in terms of RMSE. Think of the -# implications: We basically can use any model we like. +# implications: We basically can use any model we like. # This is a problem as i.e. the gaussian and the spherical model describe # fundamentally different spatial properties. Thus, our model selection # should be driven by interpretation of the variogram, and not the difference # in RMSE of only 0.4%, which might very likely not be significant at all. -# -# +# +# # But what does this difference look like, when it comes to interpolation? def interpolate(V, ax): @@ -176,13 +176,13 @@ def interpolate(V, ax): # %% # This should illustrate, how important the selection of model is, even if no observation uncertainties are propagated into the analysis. -# +# # 1. Gaussian model is far off, producing estimations far outside the observed value ranges # 2. All other models seem produce quite comparable mean values # 3. BUT: the standard deviation is quite different # 4. The median of the field can vary by more than 3 units, even if we took the Gaussian model out -# -# You have to remind that we had quite some observations. The selection of model becomes even more arbitrary with smaller samples and more importantly: We have to consider more than one equally probable parameterization of each model when the experimental is more spreaded. +# +# You have to remind that we had quite some observations. The selection of model becomes even more arbitrary with smaller samples and more importantly: We have to consider more than one equally probable parameterization of each model when the experimental is more spread. # Finally, we can calculate the difference between the kriging fields to inspect the spread of estimations spatially: # @@ -195,7 +195,7 @@ def interpolate(V, ax): # %% # The colorbar is spanning the entire value range. Thus, given the minor differences in the fitting of the models, we would have to reject just any estimation based on an automatic fit, which is considering some uncertainties in the selection of parameters, because the RMSE values were too close. -# +# # To use the result from above, we need to justfy the selection of model first and manually fit the model based on expert knowledge. # %% @@ -205,11 +205,11 @@ def interpolate(V, ax): V1.plot(show=False); # %% -# This is a nugget-effect variogram. Thus we have to reject any geostatistical -# analysis based on this sample. It just does not expose any spatial pattern that +# This is a nugget-effect variogram. Thus we have to reject any geostatistical +# analysis based on this sample. It just does not expose any spatial pattern that # can be exploited. -# -# What about the denser sample. Increasing the sample size should reject some +# +# What about the denser sample. Increasing the sample size should reject some # of the models. Remind, that we are sampling at more short distances and thus, # the variogram will be governed by the short ranged patterns of the field, while # the other samples are more dependent on the medium and large range patterns, as @@ -226,9 +226,9 @@ def interpolate(V, ax): axes[i].set_ylim(0, 2000) # %% -# We can now clearly reject the cubic, gaussian and exponential model. +# We can now clearly reject the cubic, gaussian and exponential model. # I personally would also reject the spherical model we used in the fist place, -# as it is systematically underestimating the semi-variance on short distances. +# as it is systematically underestimating the semi-variance on short distances. d_fields = [] fig, _a = plt.subplots(2,3, figsize=(18, 12), sharex=True, sharey=True) axes = _a.flatten() diff --git a/docs/tutorials/tutorial_04_plotting.py b/docs/tutorials/tutorial_04_plotting.py index fcef1a9..ed65184 100644 --- a/docs/tutorials/tutorial_04_plotting.py +++ b/docs/tutorials/tutorial_04_plotting.py @@ -4,10 +4,10 @@ At the core of SciKit-GStat is a set of classes, that can be used interactively to perform variogram analysis. One important aspect of this analysis is a rich collection of plotting functions. These are directly available as class methods -of the :class:`Variogram `, -:class:`DirectionalVariogram ` and +of the :class:`Variogram `, +:class:`DirectionalVariogram ` and :class:`SpaceTimeVariogram ` method. -With version ``0.3.3``, SciKit-GStat implements two different plotting backend: +With version ``0.3.3``, SciKit-GStat implements two different plotting backend: `matplotlib `_ and `plotly `_. Generally speaking, matplotlib is great for creating publication ready figures in a variety of formats, including vector-graphic PDF files. Plotly, on the other @@ -22,18 +22,18 @@ meaning you need to take care of the installation yourself, to keep SciKit-GStat's dependency list shorter. -The data used to create the :class:`Variogram ` and -:class:`DirectionalVariogram ` is from +The data used to create the :class:`Variogram ` and +:class:`DirectionalVariogram ` is from Mälicke (2021). Here, pancake dataset is used. The spatio-temporal data is derived from Fersch et al. (2020). From that data -publication, the wireless sensor network data is used. The originaly published +publication, the wireless sensor network data is used. The originally published 15 minutes intervals soil temperature data at 20 cm depth was taken for all 55 stations and aggregated to mean hourly values. To further decrease the data size, only every 6th data point is used here. Estimating the full data set will take approx. 120GB RAM and processing took about 30 minutes. The results for the thinned data sample are very comparable. -Both data samples can either be obtained by the orignial publications, +Both data samples can either be obtained by the original publications, or from the SciKit-GStat documentation. Both samples are published under Creative Commons BY 4.0 license. Please cite the original publications if you use the data, and **not** SciKit-GStat. @@ -89,7 +89,7 @@ # %% # Estimate the spatio-temporal variogram with a product-sum model. # Only every 6th hour is taken into account to decrease the memory footprint. -# If you use the full dataset, you need ^120 GiB RAM. +# If you use the full dataset, you need ^120 GiB RAM. # The marginal variograms are kept as they are. STV = skg.SpaceTimeVariogram(coords, vals[:,::6], x_lags=20, t_lags=20, model='product-sum') print(STV) @@ -98,15 +98,15 @@ # 4.2 Backend # ----------- # -# You can switch to `plotly` as a plotting backend by calling the +# You can switch to `plotly` as a plotting backend by calling the # :mod:`plotting.backend` function and passing the name of the backend. # Note that plotly is only a soft dependency and will not automatically be # installed along with SciKit-GStat. You can install it like: -# +# # .. code-block:: bash -# +# # pip install plotly -# +# # Note that in a Jupyter environment you might want to use the plotly.offline # environment to embed the needed Javascript into the notebook. In these cases # you have to catch the Figure object and use the iplot function from the @@ -114,7 +114,7 @@ # # 4.3 Variogram # ------------- -# +# # 4.3.1 :func:`Variogram.plot ` # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # The :func:`Variogram.plot ` is the main plotting @@ -123,7 +123,7 @@ # or further analysis, make sure, that a suitable model was found and fitted # to the experimental data. Further, you have to make sure that the statistical # foundation of this estimation is sound, the lag classes are well designed and -# backed by a suiatable amount of data. +# backed by a suiatable amount of data. # Otherwise, any other geostatistical analysis or method will have to fail, # no matter how nice the results might look like. @@ -137,8 +137,8 @@ fig # %% -# A useful argument for ``plot`` is the ``ax``, this takes a -# ``matplotlib.AxesSubplot`` for the ``'matplotlib'`` backend and a +# A useful argument for ``plot`` is the ``ax``, this takes a +# ``matplotlib.AxesSubplot`` for the ``'matplotlib'`` backend and a # ``plotly.Figure`` for the ``'plotly'`` backend. # You need to supply the correct amount of subplots (two). For convenience, # the histogram in the upper subplot can be disabled. @@ -147,7 +147,7 @@ width=800, height=200, template='seaborn', - showlegend=False, + showlegend=False, margin=dict(l=0, r=0, b=0, t=0) ) @@ -203,7 +203,7 @@ # ``'plotly'`` backend, as you can click on the legend entries to hide a # specific class, or double-click to show only the selected lag class. # This makes it much easier to inspect the classes. -# +# # Plotly # """""" backend('plotly') @@ -213,7 +213,7 @@ # %% # It is, however possible to re-create the plot that was used up to SciKit-GStat # version ``0.3.0`` with only one color. This is still the default for the -# ``'matplotlib'`` backend. +# ``'matplotlib'`` backend. fig = V.scattergram(single_color=True, show=False) fig @@ -232,12 +232,12 @@ # dimension separatedly. With the ``'plotly'`` backend, each dimension will appear # as a coloured group in a single plot. By double-clicking the legend, you can # inspect each group separately. -# +# # The ``'plotly'`` backend will automatically switch the used plot type from a # ordinary scatter-plot to a WebGL backed scatter-plot, if there are more than # 5000 observations. This will add some startup-overhead for the plot to appear, # but the interactivity actions (like pan, zoom) are speed up by magnitudes. -# +# # Plotly # ^^^^^^ backend('plotly') @@ -256,11 +256,11 @@ # Matplotlib # """""""""" -# +# # There is a difference between the ``'matplotlib'`` and ``'plotly'`` backend in # this plotting function. As Plotly utilizes the legend by default to show and # hide traces on the plot, the user can conveniently switch between the -# coordinate dimensions. +# coordinate dimensions. # In Matplotlib, the figures are not interactive by default and therefore # SciKit-GStat will create one subplot for each coordinate dimension. backend('matplotlib') @@ -269,7 +269,7 @@ # %% # 4.3.4 :func:`distance_difference plot ` # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # The final utility plot presented here is a scatter-plot that relates all # pairwise-differences in value to the spatial distance of the respective point # pairs. This can already be considered to be a variogram. For convenience, the @@ -278,10 +278,10 @@ # adjust. To estimate valid, expressive variograms, this is maybe the most important # preparation step. If your lag classes do not represent your data well, you will # never find a useful variogram. -# +# # Plotly # """""" -# +# backend('plotly') fig = V.distance_difference_plot(show=False) fig @@ -305,10 +305,10 @@ # %% # 4.4 Directional Variogram # ------------------------- -# +# # 4.4.1 :func:`pair_field ` # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # The :class:`DirectionalVariogram ` class is # inheriting from :class:`Variogram `. Therefore all plotting # method shown above are available for directional variograms, as well. @@ -328,7 +328,7 @@ # %% # Obviously, one can see the ``azimuth`` (40°) and narrow ``tolerance`` (15°) # settings in the cone-like shapes of the connection lines, but the whole plot -# is not really instructive or helpful like this. +# is not really instructive or helpful like this. # Using the ``points`` keyword, you can show the lines only for a given set of # coordinate locations. You have to pass a list of coordinate indices. With # ``add_points=True``, the seleceted points will be highlighted in red. @@ -337,7 +337,7 @@ # %% # Plotly # """""" -# +# # **Note:** It is not recommended to plot the full # :func:`pair_field ` with all points # using plotly. Due to the implementation, that makes the plot really, @@ -361,10 +361,10 @@ # 4.5.1 `plot(kind='scatter') ` # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # The scatter plot can be used to inspect the experimental variogram data on a # spatial and temporal axis, with the fitted spatio-temporal model fitted to the data. -# +# # Plotly # """""" backend('plotly') @@ -394,7 +394,7 @@ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # 3D plots are great for data exploration, especially if they are interactive. # For publications, 3D plots are not that helpful. Additionally, it can be quite -# tricky sometimes to find a good angle to focus on the main message of a 3D plot. +# tricky sometimes to find a good angle to focus on the main message of a 3D plot. # Hence, there are more plotting modes. They can either be used by # setting ``kind='contour'`` or ``kind='contourf'``. Alternatively, these two # plotting types also have their own method. @@ -403,7 +403,7 @@ # dimension on the y-axis. The semi-variance itself is shown as a contour plot, # that can either only plot the lines (``'contour'``) or filled areas for each # contour (``'contourf'``). -# +# # Plotly # """""" backend('plotly') @@ -418,7 +418,7 @@ # %% # Matplotlib # """""""""" -# +# # The matplotlib versions of the contour plots are not that sophisticated, # but the returned figure can be adjusted to your needs. backend('matplotlib') @@ -426,7 +426,7 @@ # %% -# +# fig = STV.plot(kind='contourf') # %% @@ -437,14 +437,14 @@ # implemented as :class:`Variogram ` instances and can be # changed and plotted like any other :class:`Variogram ` instance, # it can come very handy to plot the marginal models side-by-side. -# +# # This can be done with the :func`marginals ` method. backend('plotly') fig = STV.marginals(show=False) fig # %% -# +# backend('matplotlib') fig = STV.marginals() @@ -457,4 +457,3 @@ backend('plotly') fig = STV.marginals(include_model=True, show=False) fig - diff --git a/docs/tutorials/tutorial_05_binning.py b/docs/tutorials/tutorial_05_binning.py index 7a24ad9..0c81e93 100644 --- a/docs/tutorials/tutorial_05_binning.py +++ b/docs/tutorials/tutorial_05_binning.py @@ -7,7 +7,7 @@ geostatistical literature. The main reason is, that usually, the same method is used. A user-set amount of equidistant lag classes is formed with ``0`` as lower bound and ``maxlag`` as upper bound. Maxlag is often set to the median or 60% -percentile of all pairwise separating distances. +percentile of all pairwise separating distances. In SciKit-GStat this is also the default behavior, but only one of dozen of different implemented methods. Thus, we want to shed some light onto the other @@ -35,7 +35,7 @@ # --------------- # Loads a data sample and draws `n_samples` from the field. # For sampling the field, random samples from a gamma distribution with a fairly -# high scale are drawn, to ensure there are some outliers in the samle. The +# high scale are drawn, to ensure there are some outliers in the sample. The # values are then re-scaled to the shape of the random field and the values # are extracted from it. # You can use either of the next two cell to work either on the pancake or @@ -53,7 +53,7 @@ fig # %% -# .. note:: +# .. note:: # You need to comment the next cell to use the pancake dataset. This cell will # will overwrite the ``coords`` and ``vals`` array create in the last cell. coords, vals = skg.data.meuse().get('sample') @@ -100,8 +100,8 @@ # The histogram of the :func:`'uniform' ` # method will adjust the lag class widths to have the same sample size for each # lag class. This can be used, when there must not be any empty lag classes on -# small data samples, or comparable sample sizes are desireable for the -# semi-variance estimator. +# small data samples, or comparable sample sizes are desirable for the +# semi-variance estimator. # apply binning bins, _ = skg.binning.uniform_count_lags(V.distance, N, None) @@ -121,9 +121,9 @@ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # The distance matrix is clustered by a K-Means algorithm. # The centroids are used as lag class centers. Each lag class is then formed -# by taking half the distance to each sorted neighboring centroid as a bound. +# by taking half the distance to each sorted neighboring centroid as a bound. # This will most likely result in non-equidistant lag classes. -# +# # One important note about K-Means clustering is, that it is not a # deterministic method, as the starting points for clustering are taken randomly. # Thus, the decision was made to seed the random start values. Therefore, the @@ -148,17 +148,17 @@ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # The other clustering algorithm is a hierarchical clustering algorithm. # This algorithm groups values together based on their similarity, which is -# expressed by Ward's criterion. +# expressed by Ward's criterion. # Agglomerative algorithms work iteratively and deterministic, as at first # iteration each value forms a cluster on its own. Each cluster is then merged # with the most similar other cluster, one at a time, until all clusters are -# merged, or the clustering is interrupted. +# merged, or the clustering is interrupted. # Here, the clustering is interrupted as soon as the specified number of lag # classes is reached. The lag classes are then formed similar to the K-Means # method, either by taking the cluster mean or median as center. -# +# # Ward's criterion defines the one other cluster as the closest, that results -# in the smallest intra-cluster variance for the merged clusters. +# in the smallest intra-cluster variance for the merged clusters. # The main downside is the processing speed. You will see a significant # difference for ``'ward'`` and should not use it on medium and large datasets. @@ -178,16 +178,16 @@ # %% # 5.3 Lag class binning - adjustable ``N`` # ----------------------------------------- -# +# # 5.3.1 :func:`'sturges' ` lag classes # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# Sturge's rule is well known and pretty straightforward. It's the defaul +# Sturge's rule is well known and pretty straightforward. It's the default # method for histograms in R. The number of equidistant lag classes is defined like: -# +# # .. math:: -# +# # n =log_2 (x + 1) -# +# # Sturge's rule works good for small, normal distributed datasets. # apply binning @@ -208,13 +208,13 @@ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Scott's rule is another quite popular approach to estimate histograms. # The rule is defined like: -# +# # .. math:: -# +# # h = \sigma \frac{24 * \sqrt{\pi}}{x}^{\frac{1}{3}} -# +# # Other than Sturge's rule, it will estimate the lag class width from the -# sample size standard deviation. Thus, it is also quite sensitive to outliers. +# sample size standard deviation. Thus, it is also quite sensitive to outliers. # apply binning bins, n = skg.binning.auto_derived_lags(V.distance, 'scott', None) @@ -232,13 +232,13 @@ # %% # 5.3.3 :func:`'sqrt' ` lag classes # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# The only advantage of this method is its speed. The number of lag classes +# The only advantage of this method is its speed. The number of lag classes # is simply defined like: -# +# # .. math:: -# +# # n = \sqrt{x} $$ -# +# # Thus, it's usually not really a good choice, unless you have a lot of samples. # apply binning @@ -257,14 +257,14 @@ # %% # 5.3.4 :func:`'fd' ` lag classes # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # The Freedman-Diaconis estimator can be used to derive the number of lag -# classes again from an optimal lag class width like: +# classes again from an optimal lag class width like: +# +# .. math:: # -# .. math:: -# # h = 2\frac{IQR}{x^{1/3}} -# +# # As it is based on the interquartile range (IQR), it is very robust to outlier. # That makes it a suitable method to estimate lag classes on non-normal distance # matrices. On the other side it usually over-estimates the $n$ for small @@ -286,13 +286,13 @@ # %% # 5.3.5 :func:`'doane' ` lag classes # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # Doane's rule is an extension to Sturge's rule that takes the skewness of the # distance matrix into account. It was found to be a very reasonable choice on # most datasets where the other estimators didn't yield good results. -# +# # It is defined like: -# +# # .. math:: # \begin{split} # n = 1 + \log_{2}(s) + \log_2\left(1 + \frac{|g|}{k}\right) \\ @@ -321,7 +321,7 @@ # all variograms, so any change is due to the lag class binning. The variogram # will use a maximum lag of ``200`` to get rid of the very thin last bins at # large distances. -# +# # The ``maxlag`` is very close to the effective range of the variogram, thus you # can only see differences in sill. But the variogram fitting is not at the # focus of this tutorial. You can also change the parameter and fit a more @@ -362,7 +362,7 @@ # %% # 5.4.3 :func:`'kmeans' ` lag classes # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # set the new binning method V.bin_func = 'kmeans' @@ -447,7 +447,7 @@ # %% # 5.4.9 :func:`'doane' ` lag classes # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # In[23]: @@ -459,4 +459,3 @@ print(f'"{V._bin_func_name}" adjusted {V.n_lags} lag classes - range: {np.round(V.cof[0], 1)} sill: {np.round(V.cof[1], 1)}') fig.update_layout(template='plotly_white') fig - diff --git a/docs/tutorials/tutorial_06_gstools.py b/docs/tutorials/tutorial_06_gstools.py index 46a9736..0101593 100644 --- a/docs/tutorials/tutorial_06_gstools.py +++ b/docs/tutorials/tutorial_06_gstools.py @@ -2,12 +2,12 @@ 6 - GSTools =========== With version ``0.5`` ``scikit-gstat`` offers an interface to the awesome `gstools `_ -library. This way, you can use a :class:`Variogram ` estimated with ``scikit-gstat`` +library. This way, you can use a :class:`Variogram ` estimated with ``scikit-gstat`` in `gstools `_ to perform random field generation, kriging and much, much more. -For a :class:`Variogram ` instance, there are three possibilities to export into `gstools `_ : +For a :class:`Variogram ` instance, there are three possibilities to export into `gstools `_ : - 1. :func:`Variogram.get_empirical(bin_center=True) ` returns a pair of distance lag bins and experimental semi-variance values, like `gstools.variogram.vario_estimate `_. + 1. :func:`Variogram.get_empirical(bin_center=True) ` returns a pair of distance lag bins and experimental semi-variance values, like `gstools.variogram.vario_estimate `_. 2. :func:`Variogram.to_gstools ` returns a parameterized :any:`CovModel ` derived from the Variogram. 3. :func:`Variogram.to_gs_krige ` returns a :any:`GSTools Krige ` instance based on the variogram @@ -18,8 +18,8 @@ 6.1.1 Reproducing the gstools example ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -You can reproduce the `Getting Started example for variogram estimation from GSTools docs `_ -with ``scikit-gstat``, and replace the calculation of the empirical variogram with :class:`skg.Variogram `. +You can reproduce the `Getting Started example for variogram estimation from GSTools docs `_ +with ``scikit-gstat``, and replace the calculation of the empirical variogram with :class:`skg.Variogram `. Note: This does only make sense if you want to use a distance metric, binning procedure or semi-variance estimator, that is not included in `gstools` or are bound to `scikit-gstat` for any other reason. :class:`Variogram ` will _always_ perform a full model fitting cycle on instantiation, which could lead to some substantial overhead here. This behavior might change in a future version of `scikit-gstat`. @@ -60,7 +60,7 @@ # bin_center, gamma = gs.vario_estimate((x, y), field) # # -# Here, we can use :class:`skg.Variogram `. +# Here, we can use :class:`skg.Variogram `. # From the shown arguments, :func:`estimator ` and # :func:`bin_func ` are using the default values: @@ -127,7 +127,7 @@ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # Now, with the example `from the GSTools docs `_ working, -# we can start chaning the arguments to create quite different empirical variograms. +# we can start changing the arguments to create quite different empirical variograms. # # **Note**: This should just illustrate the available possibilities, the result is by no means producing a better # estimate of the initially created Gaussian random field. @@ -172,7 +172,7 @@ # 6.2.1 exporting :class:`Variogram ` # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# In this example, the same Variogram from above is estimated, but we use the :func:`exponential ` model. +# In this example, the same Variogram from above is estimated, but we use the :func:`exponential ` model. # An exponential covariance function was used in the first place to create the field that was sampled. skg.plotting.backend('plotly') @@ -197,7 +197,7 @@ # and you want to use the :class:`Variogram.coordinates `, you **must** transpose them. # # .. code-block:: python -# +# # # variogram is a skgstat.Variogram instance # model = variogram.to_gstools() # cond_pos = variogram.coordinates.T @@ -224,16 +224,16 @@ malformed.plot() # %% -# Notice how the spatial properties as well as the value range has changed. -# That's why it is important to estimate :class:`Variogram ` or :any:`CovModel ` +# Notice how the spatial properties as well as the value range has changed. +# That's why it is important to estimate :class:`Variogram ` or :any:`CovModel ` # carefully and not let the GIS do that for you somewhere hidden in the dark. # %% # 6.3 ``to_gs_krige`` # ~~~~~~~~~~~~~~~~~~~ # -# Finally, after carefully esitmating and fitting a variogram using SciKit-GStat, -# you can also export it directly into a :any:`GSTools Krige ` instance. +# Finally, after carefully estimating and fitting a variogram using SciKit-GStat, +# you can also export it directly into a :any:`GSTools Krige ` instance. # We use the variogram as in the other sections: # export diff --git a/docs/tutorials/tutorial_07_maximum_likelihood_fit.py b/docs/tutorials/tutorial_07_maximum_likelihood_fit.py index 74ef0c2..715b890 100644 --- a/docs/tutorials/tutorial_07_maximum_likelihood_fit.py +++ b/docs/tutorials/tutorial_07_maximum_likelihood_fit.py @@ -85,25 +85,25 @@ # load the likelihood function for this variogram likelihood = get_likelihood(V) -# minimize the likelihood function +# minimize the likelihood function t3 = time() res = minimize(likelihood, p0, bounds=bounds, method='SLSQP') t4 = time() -# some priting +# some printing print(f"Processing time {np.round(t4 - t3, 2)} seconds") print('initial guess: ', p0.round(1)) print('optimal parameters:', res.x.round(1)) # %% # Here, you can see one of the main limitations for ML approaches: runtime. -# A sample size of 300 is rather small and the ML is running +# A sample size of 300 is rather small and the ML is running # considerably slower that MoM. -# +# # Apply the optimized parameters. For comparison, the three method-of-moment methods # from SciKit-GStat are applied as well. Note that the used sample is quite dense. # Thus we do not expect a different between the MoM based procedures. -# They should all find the same paramters. +# They should all find the same parameters. # use 100 steps x = np.linspace(0, V.bins[-1], 100) @@ -154,7 +154,7 @@ def f(h, a): return 0. return (3*h) / (2*a) - 0.5 * (h / a)**3 -# create the autocovariance matrix +# create the autocovariance matrix def get_A(r, s, b, dists): a = np.array([f(d, r) for d in dists]) A = squareform((s / (s + b)) * (1 - a)) @@ -182,7 +182,7 @@ def like(r, s, b, z, dists): # %% # You can adjust the autocorrelation function above to any other model and # implement other approaches by adjusting the ``like`` function. -# Finally, minimizing this function is the same like before. You also have to +# Finally, minimizing this function is the same like before. You also have to # take care of the SciPy interface, as you need to wrap the custom likelihood # function to provide the parameters in the right format. from scipy.optimize import minimize diff --git a/docs/userguide/introduction.rst b/docs/userguide/introduction.rst index 761f925..1a164f7 100644 --- a/docs/userguide/introduction.rst +++ b/docs/userguide/introduction.rst @@ -22,7 +22,7 @@ What is geostatistics? ====================== The basic idea of geostatistics is to describe and estimate spatial -covariance, or correlation, in a set of point data. +covariance, or correlation, in a set of point data. While the main tool, the semi-variogram, is quite easy to implement and use, a lot of important assumptions are underlying it. The typical application is geostatistics is an interpolation. Therefore, @@ -79,7 +79,7 @@ own set of tools, and apparently definitions, and progress is made until today. It is not the objective of ``scikit-gstat`` to be a comprehensive collection of all available tools. The objective is more to offer some common and also more sophisticated tools for variogram analysis. -Thus, when useing ``scikit-gstat``, you typically need another library for +Thus, when using ``scikit-gstat``, you typically need another library for the actual application, like interpolation. In most cases that will be `gstools `_. However, one can split geostatistics into three main fields, each of it with its @@ -95,9 +95,9 @@ own tools: .. note:: - I am not planning to implement tools from all three fields. + I am not planning to implement tools from all three fields. You can rather use one of the interfaces, like :func:`Variogram.to_gstools ` - to export a variogram to another library, that covers kriging and + to export a variogram to another library, that covers kriging and spatial random field generation in great detail. @@ -121,4 +121,4 @@ Each function will return a dictionary of the actual sample and a brief descript import skgstat as skg skg.data.aniso(N=20) -These samples contain a coordinate and a value array. \ No newline at end of file +These samples contain a coordinate and a value array. diff --git a/docs/userguide/kriging.rst b/docs/userguide/kriging.rst index da05e4a..6b00db0 100644 --- a/docs/userguide/kriging.rst +++ b/docs/userguide/kriging.rst @@ -5,80 +5,80 @@ Interpolation Spatial interpolation ===================== -In geostatistics the procedure of spatial interpolation is -known as *Kriging*. That goes back to the inventor of -Kriging, a South-African mining engineer called Dave Krige. +In geostatistics the procedure of spatial interpolation is +known as *Kriging*. That goes back to the inventor of +Kriging, a South-African mining engineer called Dave Krige. He published the method in 1951. -In many text books you will also find the term *prediction*, but -be aware that Kriging is still based on the assumption -that the variable is a random field. THerefore I prefer the +In many text books you will also find the term *prediction*, but +be aware that Kriging is still based on the assumption +that the variable is a random field. THerefore I prefer the term *estimation* and would label the Kriging method a *BLUE*, **B** est **L** inear **U** nbiased **E** stimator. -In general terms, the objective is to estimate a variable at -a location that was not observed using observations from -close locations. Kriging is considered to be the **best** -estimator, because we utilize the spatial structure -described by a variogram to find suitable weights for +In general terms, the objective is to estimate a variable at +a location that was not observed using observations from +close locations. Kriging is considered to be the **best** +estimator, because we utilize the spatial structure +described by a variogram to find suitable weights for averaging the observations at close locations. -Given a set of observation points `s` and observation +Given a set of observation points `s` and observation values at these locations :math:`Z(s)`, it can already be stated -that the estimation at an unobserved location :math:`Z^{*}(s_0)` +that the estimation at an unobserved location :math:`Z^{*}(s_0)` is a weighted mean: .. math:: Z^{*}(s_0) = \sum_{i=0}^N {\lambda}_i Z(s_i) - -where :math:`N` is the size of :math:`s` and :math:`\lambda` -is the array of weights. This is what we want to calculate + +where :math:`N` is the size of :math:`s` and :math:`\lambda` +is the array of weights. This is what we want to calculate from a fitted variogram model. -Assumed that :math:`\lambda` had already been calculated, +Assumed that :math:`\lambda` had already been calculated, estimating the prediction is pretty straightforward: .. ipython:: python :suppress: - + import numpy as np from scipy.spatial.distance import pdist, squareform from pprint import pprint np.set_printoptions(precision=3) - + .. ipython:: python - + Z_s = np.array([4.2, 6.1, 0.2, 0.7, 5.2]) lam = np.array([0.1, 0.3, 0.1, 0.1, 0.4]) - + # calculate the weighted mean np.sum(Z_s * lam) - + or shorter: .. ipython:: python - + Z_s.dot(lam) -In the example above the weights were just made up. -Now we need to understand how this array of weights +In the example above the weights were just made up. +Now we need to understand how this array of weights can be calculated. Using a spatial model ===================== -Instead of just making up weights, we will now learn +Instead of just making up weights, we will now learn how we can utilize a variogram model to calculate the weights. -At its core a variogram describes how point observations become -more dissimilar with distance. Point distances can easily be calculated, +At its core a variogram describes how point observations become +more dissimilar with distance. Point distances can easily be calculated, not only for observed locations, but also for unobserved locations. -As the variogram is only a function of *distance*, we can easily +As the variogram is only a function of *distance*, we can easily calculate a semi-variance value for any possible combination of point -pairs. +pairs. -Assume we have five close observations for an unobserved location, -like in the example above. Instead of making up weights, we can use -the semi-variance value as a weight, as a first shot. -What we still need are locations and a variogram model. For both, +Assume we have five close observations for an unobserved location, +like in the example above. Instead of making up weights, we can use +the semi-variance value as a weight, as a first shot. +What we still need are locations and a variogram model. For both, we can just make something up. .. ipython:: python @@ -86,55 +86,55 @@ we can just make something up. x = np.array([4.0, 2.0, 4.1, 0.3, 2.0]) y = np.array([5.5, 1.2, 3.7, 2.0, 2.5]) z = np.array([4.2, 6.1, 0.2, 0.7, 5.2]) - + s0 = [2., 2.] - + distance_matrix = pdist([s0] + list(zip(x,y))) - + squareform(distance_matrix) - -Next, we build up a variogram model of spherical shape, that uses a -effective range larger than the distances in the matrix. Otherwise, -we would just calcualte the arithmetic mean. + +Next, we build up a variogram model of spherical shape, that uses a +effective range larger than the distances in the matrix. Otherwise, +we would just calculate the arithmetic mean. .. ipython:: python from skgstat.models import spherical - + # range= 7. sill = 2. nugget = 0. model = lambda h: spherical(h, 7.0, 2.0, 0.0) - -The distances to the first point `s0` are the first 5 elements in -the distance matrix. Therefore the semi-variances are calculated + +The distances to the first point `s0` are the first 5 elements in +the distance matrix. Therefore the semi-variances are calculated straightforward. .. ipython:: python variances = model(distance_matrix[:5]) assert len(variances) == 5 - -Of course we could now use the inverse of these semi-variances + +Of course we could now use the inverse of these semi-variances to weigh the observations, **but that would not be correct.** -Remeber, that this array `variances` is what we want the -target weights to incorporte. Whatever the weights are, these -variances should be respected. At the same time, the five +Remember, that this array `variances` is what we want the +target weights to incorporte. Whatever the weights are, these +variances should be respected. At the same time, the five points among each other also have distances and therefore variances -that should be respected. Or to put it differently. -Take the first observation point :math:`s_1`. The associated variances -:math:`\gamma` to the other four points need to match the one +that should be respected. Or to put it differently. +Take the first observation point :math:`s_1`. The associated variances +:math:`\gamma` to the other four points need to match the one just calculated. .. math:: a_1 * \gamma(s_1, s_1) + a_2 * \gamma(s_1, s_2) + a_3 * \gamma(s_1, s_3) + a_4 * \gamma(s_1, s_4) + a_5 * \gamma(s_1, s_5) = \gamma(s_1, s_0) -Ok. First: :math:`\gamma(s_1, s_1)` is zero because the distance is obviously zero +Ok. First: :math:`\gamma(s_1, s_1)` is zero because the distance is obviously zero and the model does not have a nugget. All other distances have already been calculated. -:math:`a_1 ... a_5` are factors. These are the weights used to satisfy all given -semi-variances. This is what we need. Obviously, we cannot calculate 5 unknown +:math:`a_1 ... a_5` are factors. These are the weights used to satisfy all given +semi-variances. This is what we need. Obviously, we cannot calculate 5 unknown variables from just one equation. Lukily we have four more observations. Writing the above equation for :math:`s_2, s_3, s_4, s_5`. -Additionally, we will write the linear equation system in matrix form as a +Additionally, we will write the linear equation system in matrix form as a dot product of the :math:`\gamma_i` and the :math:`a_i` part. .. math:: @@ -145,14 +145,14 @@ dot product of the :math:`\gamma_i` and the :math:`a_i` part. \gamma(s_3, s_1) & \gamma(s_3, s_2) & \gamma(s_3, s_3) & \gamma(s_3, s_4) & \gamma(s_3, s_5) \\ \gamma(s_4, s_1) & \gamma(s_4, s_2) & \gamma(s_4, s_3) & \gamma(s_4, s_4) & \gamma(s_4, s_5) \\ \gamma(s_5, s_1) & \gamma(s_5, s_2) & \gamma(s_5, s_3) & \gamma(s_5, s_4) & \gamma(s_5, s_5) \\ - \end{pmatrix} * + \end{pmatrix} * \begin{bmatrix} a_1 \\ a_2 \\ a_3 \\ a_4 \\ a_5\\ - \end{bmatrix} = + \end{bmatrix} = \begin{pmatrix} \gamma(s_0, s_1) \\ \gamma(s_0, s_2) \\ @@ -161,13 +161,13 @@ dot product of the :math:`\gamma_i` and the :math:`a_i` part. \gamma(s_0, s_5) \\ \end{pmatrix} -That might look a bit complicated at first, but we have calculated almost everything. +That might look a bit complicated at first, but we have calculated almost everything. The last matrix are the `variances` that we calculated in the last step. -The first matrix is of same shape as the sqaureform distance matrix calculated in -the very begining. All we need to do is to map the variogram model on it and +The first matrix is of same shape as the sqaureform distance matrix calculated in +the very beginning. All we need to do is to map the variogram model on it and solve the system for the matrix of factors :math:`a_1 \ldots a_5`. In Python, there are several strategies how you could solve this problem. -Let's at first build the matrix. We need a distance matrix without +Let's at first build the matrix. We need a distance matrix without :math:`s_0` for that. .. ipython:: python @@ -192,12 +192,12 @@ And solve it: # calculate estimation Z_s.dot(a) -That's it. Well, not really. We might have used the -variogram and the spatial structure infered from the -data for getting better results, but in fact our -result is not **unbiased**. That means, the solver +That's it. Well, not really. We might have used the +variogram and the spatial structure inferred from the +data for getting better results, but in fact our +result is not **unbiased**. That means, the solver can choose any combination that satisfies the equation, -even setting everything to zero except one weight. +even setting everything to zero except one weight. That means :math:`a` could be biased. That would not be helpful. @@ -208,14 +208,14 @@ That would not be helpful. Kriging equation system ======================= -In the last section we came pretty close to the -Kriging algorithm. The only thing missing is to +In the last section we came pretty close to the +Kriging algorithm. The only thing missing is to assure unbiasedness. The weights sum up to almost one, but they are not one. -We want to ensure, that they are always one. This -is done by adding one more equation to the linear +We want to ensure, that they are always one. This +is done by adding one more equation to the linear equation system. Also, we will rename the :math:`a` -array to :math:`\lambda`, which is more frequently +array to :math:`\lambda`, which is more frequently used for Kriging weights. The missing equation is: .. math:: @@ -233,7 +233,7 @@ In matrix form this changes :math:`M` to: \gamma(s_4, s_1) & \gamma(s_4, s_2) & \gamma(s_4, s_3) & \gamma(s_4, s_4) & \gamma(s_4, s_5) & 1\\ \gamma(s_5, s_1) & \gamma(s_5, s_2) & \gamma(s_5, s_3) & \gamma(s_5, s_4) & \gamma(s_5, s_5) & 1\\ 1 & 1 & 1 & 1 & 1 & 0 \\ - \end{pmatrix} * + \end{pmatrix} * \begin{bmatrix} \lambda_1 \\ \lambda_2 \\ @@ -241,7 +241,7 @@ In matrix form this changes :math:`M` to: \lambda_4 \\ \lambda_5 \\ \mu \\ - \end{bmatrix} = + \end{bmatrix} = \begin{pmatrix} \gamma(s_0, s_1) \\ \gamma(s_0, s_2) \\ @@ -251,15 +251,15 @@ In matrix form this changes :math:`M` to: 1 \\ \end{pmatrix} -This is the Kriging equation for Ordinary Kriging that can be found -in text books. We added the ones to the result array and into the -matrix of semivariances. :math:`\mu` is a Lagrangian multiplier -that will be used to estimate the Kriging variance, which will +This is the Kriging equation for Ordinary Kriging that can be found +in text books. We added the ones to the result array and into the +matrix of semivariances. :math:`\mu` is a Lagrangian multiplier +that will be used to estimate the Kriging variance, which will be covered later. -Ordinary Kriging still assumes the observation and their residuals +Ordinary Kriging still assumes the observation and their residuals to be normally distributed and second order stationarity. -.. todo:: +.. todo:: Include the references to Kitanidis and Bardossy. Applied in Python, this can be done like: @@ -293,34 +293,34 @@ perfectly sum up to one now. Kriging error ============= -In the last step, we introduced a factor :math:`\mu`. -It was needed to solve the linear equation system -while assuring that the weights sum up to one. +In the last step, we introduced a factor :math:`\mu`. +It was needed to solve the linear equation system +while assuring that the weights sum up to one. This factor can in turn be added to the weighted -target semi-variances used to build the equation system, +target semi-variances used to build the equation system, to obtain the Kriging error. .. ipython:: python sum(B[:-1] * weights[:-1]) + weights[-1] -This is really usefull when a whole map is interpolated. +This is really useful when a whole map is interpolated. Using Kriging, you can also produce a map showing in which regions the interpolation is more certain. Example ======= -We can use the data shown in the variography section, -to finally interpolate the field and check the -Kriging error. You could either build a loop around the -code shown in the previous section, or just use +We can use the data shown in the variography section, +to finally interpolate the field and check the +Kriging error. You could either build a loop around the +code shown in the previous section, or just use skgstat. .. ipython:: python :suppress: - import pandas as pd + import pandas as pd from skgstat import Variogram import matplotlib.pyplot as plt @@ -328,9 +328,9 @@ skgstat. :okwarning: data = pd.read_csv('data/sample_lr.csv') - V = Variogram(data[['x', 'y']].values, data.z.values, + V = Variogram(data[['x', 'y']].values, data.z.values, maxlag=90, n_lags=25, model='gaussian', normalize=False) - + @savefig kriging_used_variogram.png width=8in V.plot() @@ -339,28 +339,28 @@ skgstat. ok = OrdinaryKriging(V, min_points=5, max_points=20, mode='exact') The :class:`OrdinaryKriging ` class -need at least a fitted :class:`Variogram ` -instance. Using `min_points` we can demand the Kriging equation +need at least a fitted :class:`Variogram ` +instance. Using `min_points` we can demand the Kriging equation system to be build upon at least 5 points to yield robust results. If not enough close observations are found within the effective range -of the variogram, the estimation will not be calculated and a +of the variogram, the estimation will not be calculated and a `np.NaN` value is estimated. -The `max_points` parameter will set the upper bound of the +The `max_points` parameter will set the upper bound of the equation system by using in this case at last the 20 nearest points. Adding more will most likely not change the estimation, as more points -will recieve small, if not negligible, weights. -But it will increase the processing time, as each added point will +will receive small, if not negligible, weights. +But it will increase the processing time, as each added point will increase the Kriging equation system dimensionality by one. -The `mode` parameter sets the method that will +The `mode` parameter sets the method that will build up the equation system. There are two implemented: -`mode='exact'` and `mode='estimate'`. Estimate is much faster, but -if not used carefully, it can lead to numerical instability quite -quickly. In the technical notes section of this userguide, you +`mode='exact'` and `mode='estimate'`. Estimate is much faster, but +if not used carefully, it can lead to numerical instability quite +quickly. In the technical notes section of this userguide, you will find a whole section on the two modes. -Finally, we need the unobsered locations. The observations in +Finally, we need the unobsered locations. The observations in the file were drawn from a `100x100` random field. .. ipython:: python @@ -383,4 +383,3 @@ the file were drawn from a `100x100` random field. @savefig kriging_result_and_error.png width=8in fig.show() - diff --git a/docs/userguide/userguide.rst b/docs/userguide/userguide.rst index 364de39..469ba20 100644 --- a/docs/userguide/userguide.rst +++ b/docs/userguide/userguide.rst @@ -11,4 +11,4 @@ along with a more general introduction to variogram analysis. introduction variogram - kriging \ No newline at end of file + kriging diff --git a/docs/userguide/variogram.rst b/docs/userguide/variogram.rst index 1d35de3..b195c48 100644 --- a/docs/userguide/variogram.rst +++ b/docs/userguide/variogram.rst @@ -73,7 +73,7 @@ From my personal point of view, there are three main issues with this approach: Therefore one will have to understand how the :class:`Variogram Class ` works along with some basic -knowledge about variography in oder to be able to properly use ``scikit-gstat``. +knowledge about variography in order to be able to properly use ``scikit-gstat``. However, what we can discuss from the figure, is what a variogram actually is. At its core it relates a dependent variable to an independent variable and, @@ -138,15 +138,15 @@ suitable compromise. Before diving into binning, we have to understand how the :class:`Variogram Class ` handles distance data. The -distance calculation can be controlled by the +distance calculation can be controlled by the :func:`dist_func ` argument, which takes either a string or a function. The default value is `'euclidean'`. This value is directly passed down to the :func:`pdist ` as the `metric` argument. -Consequently, the distance data is stored as a distance matrix for all -input locations passed to :class:`Variogram ` on -creation. To be more precise, only the upper triangle is stored -in a :class:`array ` with the distance values sorted +Consequently, the distance data is stored as a distance matrix for all +input locations passed to :class:`Variogram ` on +creation. To be more precise, only the upper triangle is stored +in a :class:`array ` with the distance values sorted row-wise. Consider this very straightforward set of locations: .. ipython:: python @@ -167,15 +167,15 @@ Binning ------- As already mentioned, in real world observation data, there won't -be two observation location pairs at **exactly** the same distance. +be two observation location pairs at **exactly** the same distance. Thus, we need to group information about point pairs at **similar** distance -together, to learn how similar their observed values are. +together, to learn how similar their observed values are. With a :class:`Variogram `, we will basically try -to find and describe some systematic statistical behavior from these -similarities. The process of grouping distance data together is +to find and describe some systematic statistical behavior from these +similarities. The process of grouping distance data together is called *binning*. -``scikit-gstat`` has many different methods for binning distance data. +``scikit-gstat`` has many different methods for binning distance data. They can be set using the :func:`bin_func ` attribute. You have to pass the name of the method. The available methods are: @@ -188,17 +188,17 @@ The available methods are: * :func:`doane ` - derive number of bins by Doane's rule * :func:`fd ` - derive number of bins by Freedmann-Diaconis estimator * :func:`kmeans ` - derive bins by K-Means clustering -* :func:`ward ` - derive bins by hierachical clustering and Ward's criterion +* :func:`ward ` - derive bins by hierarchical clustering and Ward's criterion * :func:`stable_entropy ` - derive bins from stable entropy setting -``['even', 'uniform', 'kmeans', 'ward', 'stable_entropy']`` methods will use two parameters +``['even', 'uniform', 'kmeans', 'ward', 'stable_entropy']`` methods will use two parameters to calculate the bins from the distance matrix: :any:`n_lags `, the amount of bins, and :any:`maxlag `, the maximum distance lag to be considered. ``['sturges', 'scott', 'sqrt', 'fd', 'doane']`` will only use :any:`maxlag ` to derive :any:`n_lags ` from statistical properties of the distance matrix. -The :func:`even ` method will +The :func:`even ` method will then form :any:`n_lags ` bins from ``0`` to :any:`maxlag ` -of same width. +of same width. The :func:`uniform ` method will form the same amount of classes within the same range, using the same point pair count in each bin. The following example illustrates this: @@ -213,11 +213,11 @@ The following example illustrates this: distances = pdist(loc) -Now, look at the different bin edges for the calculated dummy +Now, look at the different bin edges for the calculated dummy distance matrix: .. ipython:: python - :okwarning: + :okwarning: even_width_lags(distances, 10, 250) uniform_count_lags(distances, 10, 250) @@ -252,16 +252,16 @@ and :any:`n_lags `. Observation differences ----------------------- -By the term *observation differences*, the distance between the -observed values are meant. As already layed out, the main idea of -a variogram is to systematially relate similarity of observations -to their spatial proximity. The spatial part was covered in the -sections above, finalized with the calculation of a suitable +By the term *observation differences*, the distance between the +observed values are meant. As already laid out, the main idea of +a variogram is to systematially relate similarity of observations +to their spatial proximity. The spatial part was covered in the +sections above, finalized with the calculation of a suitable binning of all distances. We want to relate exactly these bins -to a measure of similarity of all observation point pairs that +to a measure of similarity of all observation point pairs that fall into this bin. -That's basically it. We need to do three more steps to come up +That's basically it. We need to do three more steps to come up with *one* value per bin, statistically describing the similarity at that distance. @@ -270,16 +270,16 @@ at that distance. 3. Describe all differences by one number -Finding all pairs within a bin is straightforward. We already have -the bin edges and all distances between all possible observation -point combinations (stored in the distance matrix). Using the -:func:`squareform ` function +Finding all pairs within a bin is straightforward. We already have +the bin edges and all distances between all possible observation +point combinations (stored in the distance matrix). Using the +:func:`squareform ` function of scipy, we *could* turn the distance matrix into a 2D version. Then the row and column indices align with the values indices. -However, :class:`Variogram ` implements +However, :class:`Variogram ` implements a method for doing mapping a bit more efficiently. -A :class:`array ` of bin groups for each point pair that +A :class:`array ` of bin groups for each point pair that is indexed exactly like the :func:`distance ` array can be obtained by :func:`lag_groups `. @@ -292,14 +292,14 @@ submodule. coords, vals = skg.data.pancake(N=200).get('sample') V = skg.Variogram( - coords, + coords, vals, n_lags=25 ) V.maxlag = 500 Then, you can compare the first 10 point pairs from the distance matrix -to the first 10 elements returned by the +to the first 10 elements returned by the :func:`lag_groups function `. .. ipython:: python @@ -311,10 +311,10 @@ to the first 10 elements returned by the # first 10 groups V.lag_groups()[:10] -Now, we need the actual :func:`Variogram.bins ` +Now, we need the actual :func:`Variogram.bins ` to verify the grouping. -.. ipython:: python +.. ipython:: python :okwarning: V.bins @@ -323,22 +323,22 @@ The elements ``[2, 3, 6, 8]``are grouped into group ``7``. Their distance values are ``[151.2, 156.1, 142.4, 156.5]``. The grouping starts with ``0``, therefore the corresponding upper bound of the bin is at index ``7`` and the lower at ``6``. -The bin edges are therefore ``140. < x < 160.``. +The bin edges are therefore ``140. < x < 160.``. Consequently, the binning and grouping worked fine. -If you want to access all value pairs at a given group, it would of -course be possible to use the machanism above to find the correct points. -However, :class:`Variogram ` offers an iterator -that already does that for you: -:func:`lag_classes `. This iterator -will yield all pair-wise observation value differences for the bin -of the actual iteration. The first iteration (index = 0, if you wish) -will yield all differences of group id ``0``. +If you want to access all value pairs at a given group, it would of +course be possible to use the mechanism above to find the correct points. +However, :class:`Variogram ` offers an iterator +that already does that for you: +:func:`lag_classes `. This iterator +will yield all pair-wise observation value differences for the bin +of the actual iteration. The first iteration (index = 0, if you wish) +will yield all differences of group id ``0``. .. note:: - :func:`lag_classes ` will yield - the difference in value of observation point pairs, not the pairs + :func:`lag_classes ` will yield + the difference in value of observation point pairs, not the pairs themselves. .. ipython:: python @@ -346,18 +346,18 @@ will yield all differences of group id ``0``. for i, group in enumerate(V.lag_classes()): print('[Group %d]: %.2f' % (i, np.mean(group))) -The only thing that is missing for a variogram is that we will not +The only thing that is missing for a variogram is that we will not use the arithmetic mean to describe the realtionship. Experimental variograms ----------------------- -The last stage before a variogram function can be modeled is to define +The last stage before a variogram function can be modeled is to define an experimental variogram, also known as *empirical variogram*, which will be used to parameterize a variogram model. -However, the expermental variogram already contains a lot of information -about spatial relationships in the data. Therefore, it's worth looking -at more closely. Last but not least a poor expermental variogram will +However, the experimental variogram already contains a lot of information +about spatial relationships in the data. Therefore, it's worth looking +at more closely. Last but not least a poor experimental variogram will also affect the variogram model, which is ultimatively used to interpolate the input data. @@ -373,16 +373,16 @@ the input data. :func:`experimental `, thus it is a tuple of two 1D arrays. -The previous sections summarized how distance is calculated and handeled -by the :class:`Variogram class `. -The :func:`lag_groups ` function makes it -possible to find corresponding observation value pairs for all distance -lags. Finally the last step will be to use a more suitable estimator -for the similarity of observation values at a specific lag. +The previous sections summarized how distance is calculated and handled +by the :class:`Variogram class `. +The :func:`lag_groups ` function makes it +possible to find corresponding observation value pairs for all distance +lags. Finally the last step will be to use a more suitable estimator +for the similarity of observation values at a specific lag. In geostatistics this estimator is called semi-variance and the -the most popular estimator is called *Matheron estimator*. +the most popular estimator is called *Matheron estimator*. By default, the :func:`Matheron ` estimator will be used. -It is defined as +It is defined as .. math:: \gamma (h) = \frac{1}{2N(h)} * \sum_{i=1}^{N(h)}(x)^2 @@ -392,17 +392,17 @@ with: .. math:: x = Z(x_i) - Z(x_{i+h}) -where :math:`Z(x_i)` is the observation value at the i-th location -:math:`x_i`. :math:`h` is the distance lag and :math:`N(h)` is the +where :math:`Z(x_i)` is the observation value at the i-th location +:math:`x_i`. :math:`h` is the distance lag and :math:`N(h)` is the number of point pairs at that lag. -You will find more estimators in :mod:`skgstat.estimators`. -There is the :func:`Cressie-Hawkins `, -which is more robust to extreme values. Other so called robust -estimators are :func:`Dowd ` or +You will find more estimators in :mod:`skgstat.estimators`. +There is the :func:`Cressie-Hawkins `, +which is more robust to extreme values. Other so called robust +estimators are :func:`Dowd ` or :func:`Genton `. -The remaining are experimental estimators and should only be used -with caution. +The remaining are experimental estimators and should only be used +with caution. Let's compare them directly. You could use the code from the last section to group the pair-wise value differencens into lag groups and apply the formula for each estimator. In the example below, we will iteratively change @@ -437,24 +437,24 @@ achieve this: Variogram models ---------------- -The last step to describe the spatial pattern in a data set +The last step to describe the spatial pattern in a data set using variograms is to model the empirically observed and calculated -experimental variogram with a proper mathematical function. -Technically, this setp is straightforward. We need to define a -function that takes a distance value and returns -a semi-variance value. One big advantage of these models is, that we +experimental variogram with a proper mathematical function. +Technically, this setp is straightforward. We need to define a +function that takes a distance value and returns +a semi-variance value. One big advantage of these models is, that we can assure different things, like positive definitenes. Most models are also monotonically increasing and approach an upper bound. Usually these models need three parameters to fit to the experimental -variogram. All three parameters have a meaning and are usefull +variogram. All three parameters have a meaning and are useful to learn something about the data. This upper bound a model approaches -is called *sill*. The distance at which 95% of the sill are approached -is called the *effective range*. -That means, the range is the distance at which -observation values do **not** become more dissimilar with increasing -distance. They are statistically independent. That also means, it doesn't -make any sense to further describe spatial relationships of observations -further apart with means of geostatistics. +is called *sill*. The distance at which 95% of the sill are approached +is called the *effective range*. +That means, the range is the distance at which +observation values do **not** become more dissimilar with increasing +distance. They are statistically independent. That also means, it doesn't +make any sense to further describe spatial relationships of observations +further apart with means of geostatistics. The last parameter is the *nugget*. It is used to add semi-variance to all values. Graphically that means to *move the variogram up on the y-axis*. The nugget is the semi-variance modeled @@ -469,15 +469,15 @@ cannot be described spatially. it was decided to fit models on the *effective range*. You can translate one into the other quite easily. Transformation factors are reported in literature, but not commonly the same ones are used. - Finally, the transformation is always coded into SciKit-GStat's + Finally, the transformation is always coded into SciKit-GStat's :any:`models `, even if it's a 1:1 *transformation*. The spherical model ~~~~~~~~~~~~~~~~~~~ -The sperical model is the most commonly used variogram model. +The sperical model is the most commonly used variogram model. It is characterized by a very steep, exponential increase in semi-variance. -That means it approaches the sill quite quickly. It can be used when +That means it approaches the sill quite quickly. It can be used when observations show strong dependency on short distances. It is defined like: @@ -488,30 +488,30 @@ if h < r, and .. math:: \gamma = b + C_0 - + else. ``b`` is the nugget, :math:`C_0` is the sill, ``h`` is the input -distance lag and ``r`` is the effective range. That is the range parameter -described above, that describes the correlation length. -Many other variogram model implementations might define the range parameter, -which is a variogram parameter. This is a bit confusing, as the range parameter -is specific to the used model. Therefore I decided to directly use the -*effective range* as a parameter, as that makes more sense in my opinion. - -As we already calculated an experimental variogram and find the spherical -model in the :any:`models ` sub-module, we can utilize e.g. -:func:`curve_fit ` from scipy to fit the model +distance lag and ``r`` is the effective range. That is the range parameter +described above, that describes the correlation length. +Many other variogram model implementations might define the range parameter, +which is a variogram parameter. This is a bit confusing, as the range parameter +is specific to the used model. Therefore I decided to directly use the +*effective range* as a parameter, as that makes more sense in my opinion. + +As we already calculated an experimental variogram and find the spherical +model in the :any:`models ` sub-module, we can utilize e.g. +:func:`curve_fit ` from scipy to fit the model using a least squares approach. .. note:: - With the given example, the default usage of :func:`curve_fit ` + With the given example, the default usage of :func:`curve_fit ` will use the Levenberg-Marquardt algorithm, without initial guess for the parameters. - This will fail to find a suitable range parameter. + This will fail to find a suitable range parameter. Thus, for this example, you need to pass an initial guess to the method. - + .. ipython:: python :okwarning: - + from skgstat import models # set estimator back @@ -520,34 +520,34 @@ using a least squares approach. xdata = V.bins ydata = V.experimental - + from scipy.optimize import curve_fit - + # initial guess - otherwise lm will not find a range - p0 = [np.mean(xdata), np.mean(ydata), 0] + p0 = [np.mean(xdata), np.mean(ydata), 0] cof, cov =curve_fit(models.spherical, xdata, ydata, p0=p0) - + Here, *cof* are now the coefficients found to fit the model to the data. .. ipython:: python :okwarning: print("range: %.2f sill: %.f nugget: %.2f" % (cof[0], cof[1], cof[2])) - + .. ipython:: python :okwarning: - + xi =np.linspace(xdata[0], xdata[-1], 100) yi = [models.spherical(h, *cof) for h in xi] - + plt.plot(xdata, ydata, 'og') @savefig manual_fitted_variogram.png width=8in plt.plot(xi, yi, '-b'); -The :class:`Variogram Class ` does in principle the -same thing. The only difference is that it tries to find a good -initial guess for the parameters and limits the search space for -parameters. That should make the fitting more robust. +The :class:`Variogram Class ` does in principle the +same thing. The only difference is that it tries to find a good +initial guess for the parameters and limits the search space for +parameters. That should make the fitting more robust. Technically, we used the Levenberg-Marquardt algorithm above. That's a commonly used, very fast least squares implementation. However, sometimes it fails to find good parameters, as it is @@ -556,9 +556,9 @@ The default for :class:`Variogram ` is Trust-Region Reflective (TRF), which is also the default for :class:`Variogram `. It uses a valid parameter space as bounds and therefore won't fail in finding parameters. -You can, hoever, switch to Levenberg-Marquardt -by setting the :class:`Variogram.fit_method ` -to 'lm'. +You can, however, switch to Levenberg-Marquardt +by setting the :class:`Variogram.fit_method ` +to 'lm'. .. ipython:: python @@ -568,7 +568,7 @@ to 'lm'. @savefig trf_automatic_fit.png width=8in V.plot(); pprint(V.parameters) - + V.fit_method ='lm' @savefig lm_automatic_fit.png width=8in V.plot(); @@ -576,30 +576,30 @@ to 'lm'. .. note:: - In this example, the fitting method does not make a difference + In this example, the fitting method does not make a difference at all. Generally, you can say that Levenberg-Marquardt is faster and TRF is more robust. Exponential model ~~~~~~~~~~~~~~~~~ -The exponential model is quite similar to the spherical one. -It models semi-variance values to increase exponentially with -distance, like the spherical. The main difference is that this -increase is not as steep as for the spherical. That means, the -effective range is larger for an exponential model, that was +The exponential model is quite similar to the spherical one. +It models semi-variance values to increase exponentially with +distance, like the spherical. The main difference is that this +increase is not as steep as for the spherical. That means, the +effective range is larger for an exponential model, that was parameterized with the same range parameter. .. note:: - Remember that SciKit-GStat uses the *effective range* + Remember that SciKit-GStat uses the *effective range* to overcome this confusing behaviour. Consequently, the exponential can be used for data that shows a way -too large spatial correlation extent for a spherical model to -capture. +too large spatial correlation extent for a spherical model to +capture. -Applied to the data used so far, you can see the difference between +Applied to the data used so far, you can see the difference between the two models quite nicely: .. ipython:: python @@ -615,7 +615,7 @@ the two models quite nicely: # switch the model V.model = 'exponential' - + @savefig compare_spherical_exponential.png width=8in V.plot(axes=axes[1], hist=False); @@ -635,7 +635,7 @@ Also, the goodness of fit is quite comparable: r_sph = V.describe().get('effective_range') # exponential - V.model = 'exponential' + V.model = 'exponential' rmse_exp = V.rmse r_exp = V.describe().get('effective_range') @@ -645,7 +645,7 @@ Also, the goodness of fit is quite comparable: But the difference in effective range is more pronounced: .. ipython:: python - + print('Spherical effective range: %.1f' % r_sph) print('Exponential effective range: %.1f' % r_exp) @@ -676,12 +676,12 @@ Finally, we can use both models to perform a Kriging interpolation. axes[0].set_title('Spherical') axes[1].set_title('Exponential') axes[0].imshow(field1, origin='lower', cmap='terrain_r', vmin=vmin, vmax=vmax) - + @savefig model_compare_kriging.png width=8in axes[1].imshow(field2, origin='lower', cmap='terrain_r', vmin=vmin, vmax=vmax) While the two final maps look alike, in the difference plot, you can -spot some differences. While performing an analysis, with the model functions in mind, +spot some differences. While performing an analysis, with the model functions in mind, you should take these differences and add them as uncertainty cause by model choice to your final result. @@ -701,19 +701,19 @@ your final result. Gaussian model ~~~~~~~~~~~~~~ -The last fundamental variogram model is the Gaussian. -Unlike the spherical and exponential it models a very different +The last fundamental variogram model is the Gaussian. +Unlike the spherical and exponential it models a very different spatial relationship between semi-variance and distance. -Following the Gaussian model, observations are assumed to -be similar up to intermediate distances, showing just a -gentle increase in semi-variance. Then, the semi-variance -increases dramatically wihtin just a few distance units up +Following the Gaussian model, observations are assumed to +be similar up to intermediate distances, showing just a +gentle increase in semi-variance. Then, the semi-variance +increases dramatically within just a few distance units up to the sill, which is again approached asymtotically. -The model can be used to simulate very sudden and sharp -changes in the variable at a specific distance, +The model can be used to simulate very sudden and sharp +changes in the variable at a specific distance, while being very similar at smaller distances. -To show a typical Gaussian model, we will load another +To show a typical Gaussian model, we will load another sample dataset, that actually shows a Gaussian experimental variogram. .. ipython:: python @@ -731,15 +731,15 @@ sample dataset, that actually shows a Gaussian experimental variogram. Matérn model ~~~~~~~~~~~~ -Another, quite powerful model is the Matérn model. -Especially in cases where you cannot chose the appropiate model a priori so easily. -The Matérn model takes an additional smoothness paramter, that can -change the shape of the function in between an exponential -model shape and a Gaussian one. +Another, quite powerful model is the Matérn model. +Especially in cases where you cannot chose the appropriate model a priori so easily. +The Matérn model takes an additional smoothness parameter, that can +change the shape of the function in between an exponential +model shape and a Gaussian one. .. ipython:: python :okwarning: - + xi = np.linspace(0, 100, 100) # plot a exponential and a gaussian @@ -766,45 +766,92 @@ variogram is rather showing a Gaussian or exponential behavior. If you would like to export a Variogram instance to gstools, the smoothness parameter may not be smaller than ``0.2``. +Sum of models +~~~~~~~~~~~~~ + +All the above models can be combined into a sum of models, in order to fit more complex empirical variograms that might +capture signals with multiple correlations ranges. +To fit with a sum of models, simply add a "+" between any number of models: + +.. ipython:: python + :okwarning: + + # Fit a sum of two spherical models + V.model = "spherical+spherical" + + @savefig sum_of_models.png width=8in + V.plot(); + +Custom model +~~~~~~~~~~~~ + +Additionally, any custom model can be passed to the model argument to fit a specific function to the empirical +variogram. This model needs to be built with the ``@variogram`` decorator, which requires the lag argument +(typically, ``h``) to be listed first in the custom function. + +.. caution:: + + When using a custom model, it is highly recommended to define the parameter bounds as these cannot be derived + logically by SciKit-GStat as for other models. This is done by passing either ``fit_bounds`` to ``Variogram()`` + at instantiation or ``bounds`` to ``Variogram.fit()``. + + +.. ipython:: python + :okwarning: + + # Build a custom model by applying the @variogram decorator (here adding a linear term to a spherical model) + from skgstat.models import variogram, spherical + @variogram + def custom_model(h, r1, c1, a): + return spherical(h, r1, c1) + h * a + V.model = custom_model + + # We define the bounds for r1, c1 and a + bounds_custom = [(0, 0, 0), (np.max(V.bins), np.max(V.experimental), 2)] + V.fit(bounds=bounds_custom) + + @savefig custom_model.png width=8in + V.plot(); + When direction matters ====================== What is 'direction'? -------------------- -The classic approach to calculate a variogram is based on the -assumption that covariance between observations can be related to -their separating distance. For this, point pairs of all observation +The classic approach to calculate a variogram is based on the +assumption that covariance between observations can be related to +their separating distance. For this, point pairs of all observation points are formed and it is assumed that they can be formed without any restriction. -The only paramter to be influenced is a limiting distance, beyond which -a point pair does not make sense anymore. +The only parameter to be influenced is a limiting distance, beyond which +a point pair does not make sense anymore. -This assumption might not always hold. Especially in landscapes, processes do -not occur randomly, but in an organized manner. This organization is often +This assumption might not always hold. Especially in landscapes, processes do +not occur randomly, but in an organized manner. This organization is often directed, which can lead to stronger covariance in one direction than another. Therefore, another step has to be introduced before lag classes are formed. -The *direction* of a variogram is then a orientation, which two points need. -If they are not oriented in the specified way, they will be ignored while calculating -a semi-variance value for a given lag class. Usually, you will specify a -orientation, which is called :func:`azimuth `, -and a :func:`tolerance `, which is an +The *direction* of a variogram is then a orientation, which two points need. +If they are not oriented in the specified way, they will be ignored while calculating +a semi-variance value for a given lag class. Usually, you will specify a +orientation, which is called :func:`azimuth `, +and a :func:`tolerance `, which is an offset from the given azimuth, at which a point pair will still be accepted. -Defining orientiation +Defining orientation --------------------- One has to decide how orientation of two points is determined. In scikit-gstat, orientation between two observation points is only defined in :math:`\mathbb{R}^2`. -We define the orientation as the **angle between the vector connecting two observation points +We define the orientation as the **angle between the vector connecting two observation points with the x-axis**. -Thus, also the :func:`azimuth ` is defined as an -angle of the azimutal vector to the x-axis, with an -:func:`tolerance ` in degrees added to the +Thus, also the :func:`azimuth ` is defined as an +angle of the azimutal vector to the x-axis, with an +:func:`tolerance ` in degrees added to the exact azimutal orientation clockwise and counter clockwise. -The angle :math:`\Phi` between two vetors ``u,v`` is given like: +The angle :math:`\Phi` between two vectors ``u,v`` is given like: .. math:: @@ -828,29 +875,29 @@ The angle :math:`\Phi` between two vetors ``u,v`` is given like: @savefig sample_orientation_of_2_1.png width=6in ax.annotate('26.5°', (1.5, 0.25), fontsize=14, color='r') -The described definition of orientation is illustrated in the figure above. +The described definition of orientation is illustrated in the figure above. There are two observation points, :math:`A (0,0)` and :math:`B (2, 1)`. To decide -wether to account for them when calculating the semi-variance at their separating +whether to account for them when calculating the semi-variance at their separating distance lag, their orientation is used. Only if the direction of the varigram includes -this orientation, the points are used. Imagine the azimuth and tolerance would be +this orientation, the points are used. Imagine the azimuth and tolerance would be ``45°``, then anything between ``0°`` (East) and ``90°`` orientation would be included. -The given example shows the orientation angle :math:`\Phi = 26.5°`, which means the +The given example shows the orientation angle :math:`\Phi = 26.5°`, which means the vector :math:`\overrightarrow{AB}` is included. Calculating orientations ------------------------ -SciKit-GStat implements a slightly adaped version of the formula given in the -last section. It makes use of symmetric search areas (tolerance is applied clockwise -and counter clockwise) und therefore any calculated angle might be the result -of calculating the orientation of :math:`\overrightarrow{AB}` or -:math:`\overrightarrow{BA}`. Mathematically, these two vectors have two different -angles, but they are always both taken into account or omitted for a variagram -at the same time. Thus, it does not make a difference for variography. -However, it does make a difference when you try to use the orientation angles +SciKit-GStat implements a slightly adapted version of the formula given in the +last section. It makes use of symmetric search areas (tolerance is applied clockwise +and counter clockwise) und therefore any calculated angle might be the result +of calculating the orientation of :math:`\overrightarrow{AB}` or +:math:`\overrightarrow{BA}`. Mathematically, these two vectors have two different +angles, but they are always both taken into account or omitted for a variagram +at the same time. Thus, it does not make a difference for variography. +However, it does make a difference when you try to use the orientation angles directly as the containing matrix can contain the inverse angles. -This can be demonstrated by an easy example. Let ``c`` be a set of points mirrored +This can be demonstrated by an easy example. Let ``c`` be a set of points mirrored along the x-axis. .. ipython:: python @@ -868,7 +915,7 @@ We can plug these two arrays into the the formula above: angles = np.degrees(np.arccos(u.dot(east) / np.sqrt(np.sum(u**2, axis=1)))) angles.round(1) -You can see, that the both points and their mirrored counterpart have the same +You can see, that the both points and their mirrored counterpart have the same angle to the x-axis, just like expected. This can be visualized by the plot below: .. ipython:: python @@ -884,12 +931,12 @@ angle to the x-axis, just like expected. This can be visualized by the plot belo @savefig sample_orientation_of_multiple_points.png width=6in ax.scatter(c[:,0], c[:,1], 50, c='r') -The main difference to the internal structure storing the orientation angles for a +The main difference to the internal structure storing the orientation angles for a :class:`DirectionalVariogram ` instance will store different angles. -To use the class on only five points, we need to prevent the class from fitting, as +To use the class on only five points, we need to prevent the class from fitting, as fitting on only 5 points will not work. But this does not affect the orientation calculations. -Therefore, the :func:`fit ` mehtod is overwritten. +Therefore, the :func:`fit ` method is overwritten. .. ipython:: python :okwarning: @@ -902,7 +949,7 @@ Therefore, the :func:`fit ` mehtod is overwrit DV._calc_direction_mask_data() np.degrees(DV._angles + np.pi)[:len(c) - 1] -The first two points (with positive y-coordinate) show the same result. The other two, +The first two points (with positive y-coordinate) show the same result. The other two, with negative y-coordinates, are also calculated counter clockwise: .. ipython:: python @@ -910,15 +957,15 @@ with negative y-coordinates, are also calculated counter clockwise: 360 - np.degrees(DV._angles + np.pi)[[2,3]] -The :class:`DirectionalVariogram ` class has a plotting -function to show a network graph of all point pairs that are oriented in the -variogram direction. But first we need to increase the tolerance as half tolerance +The :class:`DirectionalVariogram ` class has a plotting +function to show a network graph of all point pairs that are oriented in the +variogram direction. But first we need to increase the tolerance as half tolerance (``45° / 2 = 22.5°`` clockwise and counter clockwise) is smaller than both orientations. .. ipython:: python :okwarning: - DV.tolerance = 90 + DV.tolerance = 90 @savefig sample_pair_field_plot.png width=8in DV.pair_field() @@ -934,8 +981,8 @@ Directional variogram coords = np.random.randint(100, size=(300,2)) vals = [field[_[0], _[1]] for _ in coords] -The next step is to create two different variogram instances, which share the same -parameters, but use a different azimuth angle. One oriented to North and the +The next step is to create two different variogram instances, which share the same +parameters, but use a different azimuth angle. One oriented to North and the second one oriented to East. .. ipython:: python @@ -945,7 +992,7 @@ second one oriented to East. Veast = skg.DirectionalVariogram(coords, vals, azimuth=0, tolerance=90, maxlag=80, n_lags=20) pd.DataFrame({'north':Vnorth.describe(), 'east': Veast.describe()}) -You can see, how the two are differing in effective range and also sill, only +You can see, how the two are differing in effective range and also sill, only caused by the orientation. Let's look at the experimental variogram: .. ipython:: python @@ -958,16 +1005,16 @@ caused by the orientation. Let's look at the experimental variogram: @savefig expermiental_direcional_varigram_comparison.png width=8in plt.legend(loc='upper left') -The shape of both experimental variograms is very similar on the first 40 meters -of distance. Within this range, the apparent anisotropy is not pronounced. +The shape of both experimental variograms is very similar on the first 40 meters +of distance. Within this range, the apparent anisotropy is not pronounced. The East-West oriented variograms also have an effective range of only about 40 meters, -which means that in this direction the observations become statistically independent +which means that in this direction the observations become statistically independent at larger distances. -For the North-South variogram the effective range is way bigger and the variogram -plot reveals much larger correlation lengths in that direction. The spatial +For the North-South variogram the effective range is way bigger and the variogram +plot reveals much larger correlation lengths in that direction. The spatial dependency is thus directed in North-South direction. -To perform Kriging, you would now transform the data, especially in North-West -direction, unitl both variograms look the same within the effective range. +To perform Kriging, you would now transform the data, especially in North-West +direction, until both variograms look the same within the effective range. Finally, the Kriging result is back-transformed into the original coordinate system. diff --git a/requirements.txt b/requirements.txt index 699e548..df005bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -scipy +scipy<=1.11.1 numpy pandas matplotlib @@ -6,4 +6,4 @@ numba scikit-learn imageio tqdm -typing_extensions \ No newline at end of file +typing_extensions diff --git a/requirements.unittest.3.6.txt b/requirements.unittest.3.6.txt index 270dd01..29546ab 100644 --- a/requirements.unittest.3.6.txt +++ b/requirements.unittest.3.6.txt @@ -3,4 +3,4 @@ pytest-cov pytest-depends pykrige gstools>=1.3 -plotly \ No newline at end of file +plotly diff --git a/requirements.unittest.3.7.txt b/requirements.unittest.3.7.txt index 270dd01..29546ab 100644 --- a/requirements.unittest.3.7.txt +++ b/requirements.unittest.3.7.txt @@ -3,4 +3,4 @@ pytest-cov pytest-depends pykrige gstools>=1.3 -plotly \ No newline at end of file +plotly diff --git a/requirements.unittest.3.8.txt b/requirements.unittest.3.8.txt index 89886b8..93111bc 100644 --- a/requirements.unittest.3.8.txt +++ b/requirements.unittest.3.8.txt @@ -4,4 +4,4 @@ pytest-depends pykrige gstools>=1.3 plotly -gstatsim \ No newline at end of file +gstatsim diff --git a/skgstat/DirectionalVariogram.py b/skgstat/DirectionalVariogram.py index b57686f..9d5c742 100644 --- a/skgstat/DirectionalVariogram.py +++ b/skgstat/DirectionalVariogram.py @@ -16,7 +16,7 @@ class DirectionalVariogram(Variogram): coordinates and relates them to one of the semi-variance measures of the given dependent values. - The direcitonal version of a Variogram will only form paris of points + The directional version of a Variogram will only form paris of points that share a specified spatial relationship. """ @@ -69,7 +69,7 @@ def __init__(self, * minmax [MinMax Scaler] * entropy [Shannon Entropy] - If a callable is passed, it has to accept an array of absoulte + If a callable is passed, it has to accept an array of absolute differences, aligned to the 1D distance matrix (flattened upper triangle) and return a scalar, that converges towards small values for similarity (high covariance). @@ -294,6 +294,7 @@ def __init__(self, # set the directional model self._directional_model = None + self._is_model_custom = False self.set_directional_model(model_name=directional_model) # the binning settings @@ -406,7 +407,7 @@ def _calc_direction_mask_data(self, force=False): def azimuth(self): """Direction azimuth - Main direction for te selection of points in the formation of point + Main direction for the selection of points in the formation of point pairs. East of the coordinate plane is defined to be 0° and then the azimuth is set clockwise up to 180°and count-clockwise to -180°. @@ -628,7 +629,7 @@ def pair_field(self, ax=None, cmap="gist_rainbow", points='all', add_points=True cmap : string Any color-map name that is supported by matplotlib points : 'all', int, list - If not ``'all'``, only the given coordinate (int) or + If not ``'all'``, only the given coordinate (int) or list of coordinates (list) will be plotted. Recommended, if the input data is quite large. add_points : bool @@ -636,7 +637,7 @@ def pair_field(self, ax=None, cmap="gist_rainbow", points='all', add_points=True alpha : float Alpha value for the colors to make overlapping vertices visualize better. Defaults to ``0.3``. - + """ # get the backend used_backend = plotting.backend() @@ -644,7 +645,7 @@ def pair_field(self, ax=None, cmap="gist_rainbow", points='all', add_points=True if used_backend == 'matplotlib': return plotting.matplotlib_pair_field(self, ax=ax, cmap=cmap, points=points, add_points=add_points, alpha=alpha, **kwargs) elif used_backend == 'plotly': - return plotting.plotly_pair_field(self, fig=ax, points=points, add_points=add_points, alpha=alpha, **kwargs) + return plotting.plotly_pair_field(self, fig=ax, points=points, add_points=add_points, alpha=alpha, **kwargs) def _triangle(self, angles, dists): r"""Triangular Search Area diff --git a/skgstat/Kriging.py b/skgstat/Kriging.py index c2a1e38..b4f4932 100644 --- a/skgstat/Kriging.py +++ b/skgstat/Kriging.py @@ -340,7 +340,7 @@ def _estimator(self, idx): sigma_index is now always incremented """ - # indicate if this esimation raised an error + # indicate if this estimation raised an error did_error = False # reun estimation @@ -397,7 +397,7 @@ def _krige(self, idx): .. math:: \hat{Z} = \sum_i(w_i * z_i) - where :math:`w_i` is the calulated kriging weight for the i-th point + where :math:`w_i` is the calculated kriging weight for the i-th point and :math:`z_i` is the observed value at that point. The kriging variance :math:`\sigma^2` (sigma) is calculate as follows: diff --git a/skgstat/MetricSpace.py b/skgstat/MetricSpace.py index 2b46abb..a2e37f5 100644 --- a/skgstat/MetricSpace.py +++ b/skgstat/MetricSpace.py @@ -307,7 +307,7 @@ def __init__( self._ltree = None self._rtree = None self._dists = None - # Do a very quick check to see throw exceptions + # Do a very quick check to see throw exceptions # if self.dist_metric is invalid... pdist(self.coords[:1, :], metric=self.dist_metric) @@ -385,7 +385,7 @@ def dists(self): return self._dists -# Subfunctions used in RasterEquidistantMetricSpace +# Subfunctions used in RasterEquidistantMetricSpace # (outside class so that they can be pickled by multiprocessing) def _get_disk_sample( coords: np.ndarray, @@ -407,7 +407,7 @@ def _get_disk_sample( count = np.count_nonzero(idx1) indices1 = np.argwhere(idx1) - # Second index: randomly select half of the valid pixels, + # Second index: randomly select half of the valid pixels, # so that the other half can be used by the equidist # sample for low distances indices2 = rnd_func.choice(count, size=min(count, sample_count), replace=False) diff --git a/skgstat/SpaceTimeVariogram.py b/skgstat/SpaceTimeVariogram.py index fbf64cf..87deffd 100644 --- a/skgstat/SpaceTimeVariogram.py +++ b/skgstat/SpaceTimeVariogram.py @@ -337,7 +337,7 @@ def t_lags(self): raise ValueError("Only 'max' supported as string argument.") elif self._t_lags is None: self._t_lags = len(self.tbins) - + return self._t_lags @t_lags.setter @@ -394,7 +394,7 @@ def set_bin_func(self, bin_func, axis): Parameters ---------- bin_func : str - Sepcifies the function to be loaded. Can be either 'even' or + Specifies the function to be loaded. Can be either 'even' or 'uniform'. axis : str Specifies the axis to be used for binning. Can be either 'space' or @@ -886,7 +886,7 @@ def _calc_diff(self, force=False): Notes ----- This is a Python only implementation that can get quite slow as any - added obervation on the space or time axis will increase the matrix + added observation on the space or time axis will increase the matrix dimension by one. It is also slow as 4 loops are needed to loop the matrix. I am evaluating at the moment if the function performs better using numpys vectorizations or by implementing a Cython, Fortran, @@ -1327,7 +1327,7 @@ def contour(self, ax=None, zoom_factor=100., levels=10, colors='k', Plot a 2D contour plot of the experimental variogram. The experimental semi-variance values are spanned over a space - time lag meshgrid. This grid is (linear) interpolated onto the given - resolution for visual reasons. Then, contour lines are caluclated + resolution for visual reasons. Then, contour lines are calculated from the denser grid. Their number can be specified by *levels*. Parameters @@ -1354,7 +1354,7 @@ def contour(self, ax=None, zoom_factor=100., levels=10, colors='k', The method used for densifying the meshgrid. Can be one of 'fast' or 'precise'. Fast will use the scipy.ndimage.zoom method to incresae the node density. This is fast, but cannot - interpolate *behind* any NaN occurance. 'Precise' performs an + interpolate *behind* any NaN occurrence. 'Precise' performs an actual linear interpolation between the nodes using scipy.interpolate.griddata. This takes more time, but the result is less smoothed out. @@ -1384,7 +1384,7 @@ def contourf(self, ax=None, zoom_factor=100., levels=10, Plot a 2D filled contour plot of the experimental variogram. The experimental semi-variance values are spanned over a space - time lag meshgrid. This grid is (linear) interpolated onto the given - resolution for visual reasons. Then, contour lines are caluclated + resolution for visual reasons. Then, contour lines are calculated from the denser grid. Their number can be specified by *levels*. Finally, each contour region is filled with a color supplied by the specified *cmap*. @@ -1411,7 +1411,7 @@ def contourf(self, ax=None, zoom_factor=100., levels=10, The method used for densifying the meshgrid. Can be one of 'fast' or 'precise'. Fast will use the scipy.ndimage.zoom method to incresae the node density. This is fast, but cannot - interpolate *behind* any NaN occurance. 'Precise' performs an + interpolate *behind* any NaN occurrence. 'Precise' performs an actual linear interpolation between the nodes using scipy.interpolate.griddata. This takes more time, but the result is less smoothed out. diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 0b3dca8..1e439d8 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -2,6 +2,7 @@ Variogram class """ import copy +import inspect from typing_extensions import Literal import warnings from typing import Iterable, Callable, List, Optional, Union, Tuple @@ -89,12 +90,16 @@ def __init__(self, * minmax [MinMax Scaler] * entropy [Shannon Entropy] - If a callable is passed, it has to accept an array of absoulte + If a callable is passed, it has to accept an array of absolute differences, aligned to the 1D distance matrix (flattened upper triangle) and return a scalar, that converges towards small values for similarity (high covariance). - model : str - String identifying the theoretical variogram function to be used + model : str | Callable + .. versionchanged:: 1.0.12 + Added support for sum of models (e.g., "spherical+gaussian"), or custom model (Callable). Using + `fit_bounds` to optimize the fit is recommended for custom models, and can be useful for sum of models. + + String or callable identifying the theoretical variogram function to be used to describe the experimental variogram. Can be one of: * spherical [Spherical, default] @@ -105,6 +110,9 @@ def __init__(self, * matern [Matérn model] * nugget [nugget effect variogram] + Any number of these theoretical models can be summed using "+" iteratively, e.g. "spherical+cubic+matern". + The nugget parameters of the models are removed except for the last model (sum of nuggets = single nugget). + dist_func : str String identifying the distance function. Defaults to 'euclidean'. Can be any metric accepted by @@ -132,7 +140,7 @@ def __init__(self, :func:`distance ` as `n_lags`. * `'kmeans'` uses KMeans clustering to well supported bins - * `'ward'` uses hierachical clustering to find + * `'ward'` uses hierarchical clustering to find minimum-variance clusters. More details are given in the documentation for @@ -187,7 +195,8 @@ def __init__(self, Defaults to False. If True, a nugget effet will be added to all Variogram.models as a third (or fourth) fitting parameter. A nugget is essentially the y-axis interception of the theoretical - variogram function. + variogram function. For a sum of variogram, the nugget is defined + in its last model. maxlag : float, str Can specify the maximum lag distance directly by giving a value larger than 1. The binning function will not find any lag class @@ -247,6 +256,24 @@ def __init__(self, If present, the plot will indicate the confidence interval as error bars around the experimental variogram. + fit_bounds: 2-tuple of array_like or Bounds, optional + .. versionadded:: 1.0.12 + + Lower and upper bounds on parameters passed to scipy.optimize.curve_fit. + + Order is typically (range, sill, nugget) or (range, sill, smoothness, nugget) for individual models, or + (range1, sill1, nugget1, range2, sill2, nugget2) for a sum of 2 models. + Recommended for custom models, where bounds cannot be determined logically. + For internal models, defaults to known min/max values for the sill (0, max variance), range (0, max lag) + and smoothness (0, 2) or (0, 20) for stable and matern, respectively. + + fit_p0: array_like, optional + .. versionadded:: 1.0.12 + + Initial guess for the parameters passed to scipy.optimize.curve_fit. + + Same order as for fit_bounds. + Defaults to upper bounds values. For custom models, if no bounds are defined, defaults to 1. """ # Before we do anything else, make kwargs available self._kwargs = self._validate_kwargs(**kwargs) @@ -335,17 +362,19 @@ def __init__(self, if fit_method is not None: self.preprocessing(force=True) + # set if nugget effect shall be used + self._use_nugget = None + self.use_nugget = use_nugget + # model can be a function or a string self._model = None + self._model_name = None + self._is_model_custom = False self.set_model(model_name=model) # specify if the lag should be given absolute or relative to the maxlag self._normalized = normalize - # set if nugget effect shall be used - self._use_nugget = None - self.use_nugget = use_nugget - # set the fitting method and sigma array self._fit_method = None if fit_method is not None: @@ -362,7 +391,9 @@ def __init__(self, # do the preprocessing and fitting upon initialization # Note that fit() calls preprocessing - self.fit(force=True) + fit_bounds = self._kwargs.get('fit_bounds') # returns None if empty + fit_p0 = self._kwargs.get('fit_p0') + self.fit(force=True, bounds=fit_bounds, p0=fit_p0) # finally check if any of the uncertainty propagation kwargs are set self._experimental_conf_interval = None @@ -688,7 +719,7 @@ def set_bin_func(self, bin_func: Union[str, Iterable, Callable[[np.ndarray, floa between two neighboring clusters. Note: This does not necessarily result in even width bins. - **`'ward'`** uses a hierachical culstering algorithm to iteratively + **`'ward'`** uses a hierarchical culstering algorithm to iteratively merge pairs of clusters until there are only `n` remaining clusters. The merging is done by minimizing the variance for the merged cluster. @@ -776,7 +807,7 @@ def set_bin_func(self, bin_func: Union[str, Iterable, Callable[[np.ndarray, floa def _bin_func_wrapper(self, distances, n, maxlag): """ - Wrapper arounf the call of the actual binning method. + Wrapper around the call of the actual binning method. This is needed to pass keyword arguments to kmeans or stable_entropy binning methods, and respect the slightly different function signature of auto_derived_lags. @@ -963,10 +994,15 @@ def set_model(self, model_name): # at first reset harmonize self._harmonize = False if model_name.lower() == 'harmonize': + mname = 'harmonize' self._harmonize = True self._model = self._build_harmonized_model() + elif "+" in model_name: + mname = ''.join(model_name.split()).lower() # Remove all whitespaces + self._model = self._build_sum_models(mname) elif hasattr(models, model_name.lower()): - self._model = getattr(models, model_name.lower()) + mname = model_name.lower() + self._model = getattr(models, mname) else: raise ValueError( ( @@ -974,8 +1010,68 @@ def set_model(self, model_name): ' understood, please provide the function' ) % model_name ) + # Set model name attribute + self._model_name = mname + else: # pragma: no cover + self._is_model_custom = True self._model = model_name + self._model_name = model_name.__name__ + + def _get_argpos_sum_models(self, list_model_names): + """ + Get argument slice position (list of slices) for the sum of models from a list of model names (list of strings). + """ + + # Doing this here for other functions (fit, describe, etc), even though already done in _build_sum_models + list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names] + + # Get the number of arguments per model (e.g., [3, 4, 4]) + nb_args_per_model = np.array([len(inspect.getfullargspec(model).args) for model in list_models]) + + # We remove the nugget and lags parameters, except nugget for the last model (sum of nuggets = single nugget) + nb_args_per_model -= 2 + nb_args_per_model[-1] += 1 + # Compute cumulative number of args removing 2 args everywhere (all lags and nuggets, last one will compensate) + cum_args_minus_lag = np.cumsum(nb_args_per_model) + + # Prepare argument slices to distribute to submodels + args_indices = np.insert(cum_args_minus_lag, 0, 0) # We add the first indice of 0 + args_slices = [(slice(args_indices[i], args_indices[i + 1])) for i in range(len(args_indices) - 1)] + + return args_slices + + def _build_sum_models(self, sum_models_name: str): + """ + Build sum of theoretical models, variogram-decorated function. + """ + + # Remove all whitespaces in the string, in case the user wrote something like "spherical + gaussian" + sum_models_name = ''.join(sum_models_name.split()).lower() + + # Get individual model names + list_model_names = sum_models_name.split("+") + + # Check that all models exist in the "models" module + if not all(hasattr(models, model_name.lower()) for model_name in list_model_names): + raise ValueError( + ( + 'One of the theoretical models in the list "%s" is not' + ' understood, please provide existing model names separated by "+".' + ) % ", ".join(list_model_names) + ) + + # First, build the models from their py_func (ignoring variogram decorator) and get args per model + list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names] + # Get the argument positions for the model sum (function uses model names to be called by other methods) + args_slices = self._get_argpos_sum_models(list_model_names=list_model_names) + + # Distribute first argument (lag) and use all others in order (nugget ignored when last argument not passed) + @models.variogram + def sum_models(h, *args): + return sum(list_models[i](h, *args[args_slices[i]]) for i in range(len(list_models))) + + return sum_models def _build_harmonized_model(self): x = self.bins @@ -1131,8 +1227,8 @@ def distance_matrix(self): def maxlag(self): """ Maximum lag distance to be considered in this Variogram instance. - You can limit the distance at which point pairs are calcualted. - There are three possible ways how to do that, in absoulte lag units, + You can limit the distance at which point pairs are calculated. + There are three possible ways how to do that, in absolute lag units, which is a number larger one. Secondly, a number ``0 < maxlag < 1`` can be set, which will use this share of the maximum distance as maxlag. Lastly, a string can be set: ``'mean'`` and ``'median'`` @@ -1145,7 +1241,7 @@ def maxlag(self): calculated. Hence, it does **not** speed up the calculation of large distance matrices, just the estimation of the variogram. Thus, if you pre-calcualte the distance matrix using - :class:`MetricSpace `, only absoulte + :class:`MetricSpace `, only absolute limits can be used. """ @@ -1389,7 +1485,7 @@ def _validate_kwargs(self, **kwargs): def lag_groups(self): """Lag class groups - Retuns a mask array with as many elements as self._diff has, + Returns a mask array with as many elements as self._diff has, identifying the lag class group for each pairwise difference. Can be used to extract all pairwise values within the same lag bin. @@ -1456,7 +1552,7 @@ def preprocessing(self, force=False): self._calc_diff(force=force) self._calc_groups(force=force) - def fit(self, force=False, method=None, sigma=None, **kwargs): + def fit(self, force=False, method=None, sigma=None, bounds=None, p0=None, **kwargs): """Fit the variogram The fit function will fit the theoretical variogram function to the @@ -1505,6 +1601,21 @@ def fit(self, force=False, method=None, sigma=None, **kwargs): Uncertainty array for the bins. Has to have the same dimension as self.bins. Refer to Variogram.fit_sigma for more information. + bounds: 2-tuple of array_like or Bounds, optional + Lower and upper bounds on parameters passed to scipy.optimize.curve_fit. + + Order is typically (range, sill, nugget) or (range, sill, smoothness, nugget) for individual models, or + (range1, sill1, nugget1, range2, sill2, nugget2) for a sum of 2 models. + Recommended for custom models, where bounds cannot be determined logically. + For internal models, defaults to known min/max values for the sill (0, max variance), range (0, max lag) + and smoothness (0, 2) or (0, 20) for stable and matern, respectively. + + p0: array_like, optional + Initial guess for the parameters passed to scipy.optimize.curve_fit. + + Same order as for fit_bounds. + Defaults to upper bounds values. For custom models, if no bounds are defined, defaults to 1. + Returns ------- void @@ -1565,18 +1676,41 @@ def fit(self, force=False, method=None, sigma=None, **kwargs): self.cof = [r, s, n] return - # Switch the method - # wrap the model to include or exclude the nugget - if self.use_nugget: - def wrapped(*args): - return self._model(*args) + # For a supported model, wrap the function depending on nugget and get logical bounds + if not self._is_model_custom: + # Switch the method + # wrap the model to include or exclude the nugget + if self.use_nugget: + def wrapped(*args): + return self._model(*args) + else: + def wrapped(*args): + return self._model(*args, 0) + + # get p0 + if bounds is None: + bounds = (0, self.__get_fit_bounds(x, y)) + if p0 is None: + p0 = np.asarray(bounds[1]) + # Else, inspect the function for the number of arguments else: - def wrapped(*args): - return self._model(*args, 0) + # The number of arguments of argspec minus one is what we initialized + argspec = inspect.getfullargspec(self._model.__wrapped__) + nb_args = len(argspec.args) - 1 + if bounds is None: + warnings.warn("Parameter bounds cannot be logically derived for a custom model during the fit. " + "User bounds can be passed using fit(..., bounds=) or Variogram(..., fit_bounds=).", UserWarning) + bounds = ([-np.inf] * nb_args, [np.inf] * nb_args) + if p0 is None: + # If all bounds are infinite (not defined, pass 1) + if bounds == ([-np.inf] * nb_args, [np.inf] * nb_args): + p0 = np.ones(nb_args) + # Else pass the upper bounds + else: + p0 = np.asarray(bounds[1]) - # get p0 - bounds = (0, self.__get_fit_bounds(x, y)) - p0 = np.asarray(bounds[1]) + def wrapped(*args): + return self._model(*args) # Trust Region Reflective if self.fit_method == 'trf': @@ -1646,10 +1780,10 @@ def ml(params): n = kwargs.get('nugget', self._kwargs.get('fit_nugget', old_params.get('nugget', 0.0))) # check if a s parameter is needed - if self._model.__name__ in ('stable', 'matern'): - if self._model.__name__ == 'stable': + if self._model_name in ('stable', 'matern'): + if self._model_name == 'stable': s2 = kwargs.get('shape', self._kwargs.get('fit_shape', old_params.get('shape',2.0))) - if self._model.__name__ == 'matern': + if self._model_name == 'matern': s2 = kwargs.get('shape', self._kwargs.get('fit_shape', old_params.get('smoothness', 2.0))) # set @@ -1739,7 +1873,7 @@ def _format_values_stack(self, values: np.ndarray) -> np.ndarray: """ Create a numpy column stack to calculate differences between two value arrays. The format function will handle sparse matrices, as these do not include - pairwise differences that are separated beyond maxlag. + pairwise differences that are separated beyond maxlag. The dense numpy.array matrices contain all point pairs. """ @@ -1970,33 +2104,45 @@ def __get_fit_bounds(self, x, y): list """ - mname = self._model.__name__ + all_mname = self._model_name - # use range, sill and smoothness parameter - if mname == 'matern': - # a is max(x), C0 is max(y) s is limited to 20? - bounds = [np.nanmax(x), np.nanmax(y), 20.] + # for a sum of models, create a list + if "+" in all_mname: + list_mname = all_mname.split("+") + else: + list_mname = [all_mname] - # use range, sill and shape parameter - elif mname == 'stable': - # a is max(x), C0 is max(y) s is limited to 2? - bounds = [np.nanmax(x), np.nanmax(y), 2.] + # we append all bounds (for one or several models) + all_bounds = [] + for i, mname in enumerate(list_mname): - # use only sill - elif mname == 'nugget': - # a is max(x): - bounds = [np.nanmax(x)] + # use range, sill and smoothness parameter + if mname == 'matern': + # a is max(x), C0 is max(y) s is limited to 20? + bounds = [np.nanmax(x), np.nanmax(y), 20.] - # use range and sill - else: - # a is max(x), C0 is max(y) - bounds = [np.nanmax(x), np.nanmax(y)] + # use range, sill and shape parameter + elif mname == 'stable': + # a is max(x), C0 is max(y) s is limited to 2? + bounds = [np.nanmax(x), np.nanmax(y), 2.] + + # use only sill + elif mname == 'nugget': + # a is max(x): + bounds = [np.nanmax(x)] + + # use range and sill + else: + # a is max(x), C0 is max(y) + bounds = [np.nanmax(x), np.nanmax(y)] + + # if use_nugget is True add the nugget (for the last model only in case it is a sum) + if self.use_nugget and i == (len(list_mname) - 1): + bounds.append(0.99*np.nanmax(y)) - # if use_nugget is True add the nugget - if self.use_nugget: - bounds.append(0.99*np.nanmax(y)) + all_bounds += bounds - return bounds + return all_bounds def data(self, n=100, force=False): """Theoretical variogram function @@ -2063,7 +2209,7 @@ def residuals(self): corresponding lag values .. deprecated:: 1.0.4 - residuals can be ambigious, thus the property is renamed to model_residuals + residuals can be ambiguous, thus the property is renamed to model_residuals Returns ------- @@ -2100,7 +2246,7 @@ def model_residuals(self) -> np.ndarray: def mean_residual(self): """Mean Model residuals - Calculates the mean, absoulte deviations between the experimental + Calculates the mean, absolute deviations between the experimental variogram and theretical model values. Returns @@ -2259,7 +2405,7 @@ def nrmse_r(self): Notes ----- Unlike Variogram.nrmse, nrmse_r is not normalized to the mean of y, - but the differece of the maximum y to its mean: + but the difference of the maximum y to its mean: .. math:: NRMSE_r = \frac{RMSE}{max(y) - mean(y)} @@ -2275,7 +2421,7 @@ def r(self): :return: """ - # get the experimental and theoretical variogram and cacluate means + # get the experimental and theoretical variogram and calculate means experimental, model = self.model_deviations() mx = np.nanmean(experimental) my = np.nanmean(model) @@ -2449,7 +2595,7 @@ def describe(self, short=False, flat=False): maxlag = np.nanmax(self.bins) maxvar = np.nanmax(self.experimental) - # get the fitting coefficents + # get the fitting coefficients cof = self.cof # build the dict @@ -2466,29 +2612,63 @@ def fnname(fn): rdict = dict( model=fnname(self._model) if not self._harmonize else "harmonize", estimator=fnname(self._estimator), - dist_func=fnname(self.dist_function), + dist_func=fnname(self.dist_function)) - normalized_effective_range=cof[0] * maxlag, - normalized_sill=cof[1] * maxvar, - normalized_nugget=cof[-1] * maxvar if self.use_nugget else 0, - - effective_range=cof[0], - sill=cof[1], - nugget=cof[-1] if self.use_nugget else 0, - ) - - # handle s parameters for matern and stable model - if self._model.__name__ == 'matern': - rdict['smoothness'] = cof[2] - elif self._model.__name__ == 'stable': - rdict['shape'] = cof[2] + def create_dict_for_model(model_name, cof, maxlag, maxvar, use_nugget, id = ''): + """ + Create dictionary of parameters for an individual model. + Optionally, append a model ID to the key (for the case of summed models). + """ + # id appended to the param key name to differentiate models for a sum + if id != '': + id = '_' + id + + d = { + 'normalized_effective_range' + id: cof[0] * maxlag, + 'normalized_sill' + id: cof[1] * maxvar, + 'normalized_nugget' + id: cof[-1] * maxvar if use_nugget else 0, + 'effective_range' + id: cof[0], + 'sill' + id: cof[1], + 'nugget' + id: cof[-1] if use_nugget else 0, + } + + # handle s parameters for matern and stable model + if model_name == 'matern': + d['smoothness' + id] = cof[2] + elif model_name == 'stable': + d['shape' + id] = cof[2] + + return d + + # for a custom model: we list the optimized params for the function wrapped by the variogram decorator + if self._is_model_custom: + custom_arg_names = inspect.getfullargspec(self._model.__wrapped__).args + all_params = {"param"+str(i+1)+"_"+custom_arg_names[i+1]: cof[i] for i in range(len(custom_arg_names) - 1)} + + # for a sum of models + elif "+" in self._model_name: + list_model_names = self._model_name.split("+") + list_argslices = self._get_argpos_sum_models(list_model_names=list_model_names) + all_params = {} + # add the parameters for each model, with parameter suffix from 1 to the total number + for i in range(len(list_model_names)): + model_params = create_dict_for_model(model_name=list_model_names[i], cof=cof[list_argslices[i]], + maxlag=maxlag, maxvar=maxvar, use_nugget=self.use_nugget, id=str(i+1)) + all_params.update(model_params) + + # for a single model + else: + all_params = create_dict_for_model(model_name=self._model_name, cof=cof, maxlag=maxlag, maxvar=maxvar, + use_nugget=self.use_nugget) + # update dictionary + rdict.update(all_params) # add other stuff if not short version requested if not short: kwargs = self._kwargs params = dict( estimator=self._estimator.__name__, - model=self._model.__name__, + model=self._model_name, dist_func=str(self.dist_function), bin_func=self._bin_func_name, normalize=self.normalized, @@ -2523,40 +2703,68 @@ def parameters(self): params : list [range, sill, nugget] for most models and [range, sill, shape, nugget] for matern and stable model. + [range1, sill1, nugget1, range2, still2, nugget2] for a sum of 2 models. + [param1, param2, param3, ...] in order for a custom model. """ d = self.describe() if 'error' in d: return [None, None, None] - elif self._model.__name__ == 'matern': - return list([ - d['effective_range'], - d['sill'], - d['smoothness'], - d['nugget'] - ]) - elif self._model.__name__ == 'stable': - return list([ - d['effective_range'], - d['sill'], - d['shape'], - d['nugget'] - ]) - elif self._model.__name__ == 'nugget': - return list([d['nugget']]) + + def get_params_list_from_dict(d, model_name, id=''): + + # ID used to differentiate params for a sum of models + if id != '': + id = '_'+id + + # Get specific smoothness and shape parameters for matern and stable + if model_name == 'matern': + return list([ + d['effective_range'+id], + d['sill'+id], + d['smoothness'+id], + d['nugget'+id] + ]) + elif model_name == 'stable': + return list([ + d['effective_range'+id], + d['sill'+id], + d['shape'+id], + d['nugget'+id] + ]) + # Get nugget for only-nugget + elif model_name == 'nugget': + return list([d['nugget'+id]]) + # Or get classic parameters + else: + return list([ + d['effective_range'+id], + d['sill'+id], + d['nugget'+id] + ]) + + # For a custom model, just pass cof in order + if self._is_model_custom: + list_params = self.cof + # Get parameters for a sum of models + elif '+' in self._model_name: + list_model_names = self._model_name.split('+') + list_params = [] + for i in range(len(list_model_names)): + params = get_params_list_from_dict(d, model_name=list_model_names[i], id=str(i+1)) + list_params += params + # Or for a single model else: - return list([ - d['effective_range'], - d['sill'], - d['nugget'] - ]) + list_params = get_params_list_from_dict(d, model_name=self._model_name) + + return list_params def to_DataFrame(self, n=100, force=False): """Variogram DataFrame Returns the fitted theoretical variogram as a pandas.DataFrame - instance. The n and force parameter control the calaculation, - refer to the data funciton for more info. + instance. The n and force parameter control the calculation, + refer to the data function for more info. .. deprecated:: 1.0.10 The return value of this function will change with a future release @@ -2585,7 +2793,7 @@ def to_DataFrame(self, n=100, force=False): return DataFrame({ 'lags': lags, - self._model.__name__: data} + self._model_name: data} ).copy() def gstatsim_prediction_grid(self, resolution: Optional[int] = None, rows: Optional[int] = None, cols: Optional[int] = None, as_numpy: bool = False) -> Union[gstatsim_mod.Grid, np.ndarray]: @@ -2691,7 +2899,7 @@ def to_gstools(self, **kwargs): def to_gs_krige(self, **kwargs): """ - Instatiate a GSTools Krige class. + Instantiate a GSTools Krige class. This can only export isotropic models. Note: the `fit_variogram` is always set to `False` @@ -2913,7 +3121,7 @@ def location_trend(self, axes=None, show=True, **kwargs): def distance_difference_plot(self, ax=None, plot_bins=True, show=True): """Raw distance plot - Plots all absoulte value differences of all point pair combinations + Plots all absolute value differences of all point pair combinations over their separating distance, without sorting them into a lag. .. versionchanged:: 0.4.0 @@ -2964,7 +3172,7 @@ def __repr__(self): # pragma: no cover :return: """ try: - _name = self._model.__name__ + _name = self._model_name _b = int(len(self.bins)) except Exception: return "< abstract Variogram >" @@ -2973,7 +3181,7 @@ def __repr__(self): # pragma: no cover def __str__(self): # pragma: no cover """String Representation - Descriptive respresentation of this Variogram instance that shall give + Descriptive representation of this Variogram instance that shall give the main variogram parameters in a print statement. Returns diff --git a/skgstat/__version__.py b/skgstat/__version__.py index 9048498..cc08f08 100644 --- a/skgstat/__version__.py +++ b/skgstat/__version__.py @@ -1 +1 @@ -__version__ = '1.0.12' \ No newline at end of file +__version__ = '1.0.14' diff --git a/skgstat/binning.py b/skgstat/binning.py index b230921..cc68ca9 100644 --- a/skgstat/binning.py +++ b/skgstat/binning.py @@ -170,7 +170,7 @@ def kmeans(distances, n, maxlag, binning_random_state=42, **kwargs): # filter for distances < maxlag d = distances[np.where(distances <= maxlag)] - # filter the sklearn convervence warning, because working with + # filter the sklearn convervence warning, because working with # undefined state in binning does not make any sense with warnings.catch_warnings(): warnings.filterwarnings('error') @@ -200,7 +200,7 @@ def ward(distances, n, maxlag, **kwargs): center. Note: this does not necessarily result in equidistance lag classes. The clustering is done by merging pairs of clusters that minimize the - variance for the merged clusters, unitl `n` clusters are found. + variance for the merged clusters, until `n` clusters are found. Parameters ---------- @@ -308,7 +308,7 @@ def loss(edges): return np.sum(np.abs(np.diff(h))) # minimize the loss function - opt = dict(maxiter=kwargs.get('binning_maxiter', 5000)) + opt = dict(maxiter=kwargs.get('binning_maxiter', 5000)) res = minimize(loss, initial_guess, method='Nelder-Mead', options=opt) if res.success: diff --git a/skgstat/data/__init__.py b/skgstat/data/__init__.py index 1790eea..1d5bb06 100644 --- a/skgstat/data/__init__.py +++ b/skgstat/data/__init__.py @@ -10,20 +10,20 @@ origins = dict( pancake="""Image of a pancake with apparent spatial structure. - Copyright Mirko Mälicke, 2020. If you use this data cite SciKit-GStat: + Copyright Mirko Mälicke, 2020. If you use this data cite SciKit-GStat: - Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation - toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, + Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation + toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, https://doi.org/10.5194/gmd-15-2505-2022, 2022. """, aniso="""Random field greyscale image with geometric anisotropy. The anisotropy in North-East direction has a factor of 3. The random field was generated using gstools. - Copyright Mirko Mälicke, 2020. If you use this data, cite SciKit-GStat: + Copyright Mirko Mälicke, 2020. If you use this data, cite SciKit-GStat: - Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation - toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, + Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation + toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, https://doi.org/10.5194/gmd-15-2505-2022, 2022. """, @@ -41,14 +41,14 @@ """, corr_var="""Random sample at random locations created using numpy. - The sample can be created for multiple variables, which will be + The sample can be created for multiple variables, which will be cross-correlated. The statistical moment of each variable can be specified as well as the co-variance matrix can be given. IMPORTANT: This data generator is part of SciKit-GStat and is built on Numpy. If you use it, please cite: - Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation - toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, + Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation + toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, https://doi.org/10.5194/gmd-15-2505-2022, 2022. """ @@ -229,7 +229,7 @@ def aniso_field(): ------- result : dict Dictionary of the sample and a citation information. - The sample a numpy array repesenting the image. + The sample a numpy array representing the image. See Also -------- @@ -340,7 +340,7 @@ def corr_variable( Returns random cross-correlated variables assigned to random coordinate locations. These can be used for testing cross-variograms, or as a random benchmark for cross-variograms in method development, aka. does - actual correlated data exhibit different cross-variograms of random + actual correlated data exhibit different cross-variograms of random variables of the same correlation coefficient matrix. Parameters @@ -350,20 +350,20 @@ def corr_variable( length has to match size. means : List[float] Mean values of the variables, defaults to two variables with - mean of 1. The number of means determines the number of + mean of 1. The number of means determines the number of variables, which will be returned. vars : List[float] - Univariate variances for each of the random variables. + Univariate variances for each of the random variables. If None, and cov is given, the diagonal of the correlation - coefficient matrix will be used. If cov is None, the + coefficient matrix will be used. If cov is None, the correlation will be random, but the variance will match. If vars is None, random variances will be used. cov : list, float - Co-variance matrix. The co-variances and variances for all + Co-variance matrix. The co-variances and variances for all created random variables can be given directly, as matrix of shape ``(len(means), len(means))``. If cov is a float, the same matrix will be created using the same - co-variance for all combinations. + co-variance for all combinations. coordinates : np.ndarray Coordinates to be used for the sample. If None, random locations are created. @@ -375,13 +375,13 @@ def corr_variable( ------- result : dict Dictionary of the sample and a citation information. - + """ # Handle coordinates if coordinates is None: np.random.seed(seed) coordinates = np.random.normal(10, 5, size=(size, 2)) - + # get the number of variables N = len(means) @@ -390,7 +390,7 @@ def corr_variable( np.random.seed(seed) # use 0...1 ratio of m for variance vars = [np.random.random() * m for m in means] - + # check the cov matrix if cov is None: np.random.seed(seed) @@ -402,16 +402,16 @@ def corr_variable( elif isinstance(cov, (int, float)): cov = np.ones((N, N)) * cov np.fill_diagonal(cov, vars) - + # matrix already elif isinstance(cov, (np.ndarray, list, tuple)) and np.asarray(cov).ndim == 2: # overwrite variances cov = np.asarray(cov) vars = np.diag(cov) - + else: raise ValueError("if cov is given it has to be either one uniform co-variance, or a co-variance matrix.") - + # create the values np.random.seed(seed) values = np.random.multivariate_normal(means, cov, size=size) diff --git a/skgstat/data/_loader.py b/skgstat/data/_loader.py index 1415bec..73f07f0 100644 --- a/skgstat/data/_loader.py +++ b/skgstat/data/_loader.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -# for python 3.6 compability +# for python 3.6 compatibility try: from imageio.v3 import imread except ImportError: @@ -31,7 +31,7 @@ def field(fname: str, band: Union[int, str] = 0) -> np.ndarray: ---------- fname : str The filename (not path) of the field. The file - extenstion can be omitted. + extension can be omitted. band : int, str The band to use, can either be an integer or the literal ``'mean'``, which will average all bands @@ -77,7 +77,7 @@ def get_sample( ---------- fname : str The filename (not path) of the field. The file - extenstion can be omitted. + extension can be omitted. N : int Sample size seed : int diff --git a/skgstat/data/samples/README.md b/skgstat/data/samples/README.md index abc72f5..ee8e6c4 100644 --- a/skgstat/data/samples/README.md +++ b/skgstat/data/samples/README.md @@ -8,4 +8,4 @@ Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/. Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data - analysis with R, Second edition. Springer, NY. https://asdar-book.org/. \ No newline at end of file + analysis with R, Second edition. Springer, NY. https://asdar-book.org/. diff --git a/skgstat/data/samples/meuse.txt b/skgstat/data/samples/meuse.txt index 59621a1..ee16c08 100644 --- a/skgstat/data/samples/meuse.txt +++ b/skgstat/data/samples/meuse.txt @@ -153,4 +153,4 @@ 179085,330292,3.1,39,173,496,8.577,0.423837,9.1,"3","1","0","Ah",520 178875,330311,2.1,31,119,342,8.429,0.27709,6.5,"3","1","0","Ah",350 179466,330381,0.8,21,51,162,9.406,0.358606,5.7,"3","1","0","W",460 -180627,330190,2.7,27,124,375,8.261,0.0122243,5.5,"3","3","0","W",40 \ No newline at end of file +180627,330190,2.7,27,124,375,8.261,0.0122243,5.5,"3","3","0","W",40 diff --git a/skgstat/estimators.py b/skgstat/estimators.py index 74cf62c..adb4dd1 100644 --- a/skgstat/estimators.py +++ b/skgstat/estimators.py @@ -175,7 +175,7 @@ def dowd(x): """ # convert - return 2.198 * np.nanmedian(x)**2 + return 2.198 * np.nanmedian(x)**2 / 2 @jit(forceobj=True) diff --git a/skgstat/interfaces/gstools.py b/skgstat/interfaces/gstools.py index ca11fa9..4d9f97b 100644 --- a/skgstat/interfaces/gstools.py +++ b/skgstat/interfaces/gstools.py @@ -76,11 +76,11 @@ def skgstat_to_gstools(variogram, **kwargs): # at least gstools>=1.3.0 is needed if list(map(int, gs.__version__.split(".")[:2])) < [1, 3]: # pragma: no cover - raise ValueError("to_gstools: GSTools v1.3 or greater requiered.") + raise ValueError("to_gstools: GSTools v1.3 or greater required.") # if Variogram is a cross-variogram warn the user if variogram.is_cross_variogram: - warnings.warn("This instance is a cross-variogram!!" + + warnings.warn("This instance is a cross-variogram!!" + " GSTools.CovModel will most likely not handle this Variogram correctly.") @@ -124,7 +124,7 @@ def skgstat_to_gstools(variogram, **kwargs): def skgstat_to_krige(variogram, **kwargs): """ - Instatiate a GSTools Krige class. + Instantiate a GSTools Krige class. This can only export isotropic models. Note: the `fit_variogram` is always set to `False` @@ -172,7 +172,7 @@ def skgstat_to_krige(variogram, **kwargs): # at least gstools>=1.3.0 is needed if list(map(int, gs.__version__.split(".")[:2])) < [1, 3]: # pragma: no cover - raise ValueError("to_gstools: GSTools v1.3 or greater requiered.") + raise ValueError("to_gstools: GSTools v1.3 or greater required.") # convert variogram to a CovModel model = skgstat_to_gstools(variogram=variogram) diff --git a/skgstat/interfaces/pykrige.py b/skgstat/interfaces/pykrige.py index 85963b0..bf704b3 100644 --- a/skgstat/interfaces/pykrige.py +++ b/skgstat/interfaces/pykrige.py @@ -61,7 +61,7 @@ def pykrige_params(variogram): if not __check_pykrige_available(): return - # get the parameters into the correct order. + # get the parameters into the correct order. pars = variogram.parameters return [pars[1], pars[0], pars[2]] @@ -74,7 +74,7 @@ def pykrige_as_kwargs(variogram, adjust_maxlag=False, adjust_nlags=False): if not __check_pykrige_available(): return - # as far as I get it, there is no maximum lag in pykrige. + # as far as I get it, there is no maximum lag in pykrige. if adjust_maxlag: variogram.maxlag = None else: diff --git a/skgstat/interfaces/variogram_estimator.py b/skgstat/interfaces/variogram_estimator.py index f92559a..203bc7d 100644 --- a/skgstat/interfaces/variogram_estimator.py +++ b/skgstat/interfaces/variogram_estimator.py @@ -22,7 +22,7 @@ def __init__(self, ): r"""VariogramEstimator class - Interface class for usage with scikit-learn. This class is intentended + Interface class for usage with scikit-learn. This class is intended for usage with the GridSearchCV or Pipeline classes of scikit-learn. The input parameters are the same as for the @@ -31,7 +31,7 @@ def __init__(self, The only parameter specific to the Estimator class is the `use_score` attribute. This can be the root mean squared error (rmse), mean squared - error (mse) or mean absoulte error (mae). The Estimater can either calculate + error (mse) or mean absolute error (mae). The Estimater can either calculate the score based on the model fit (model ~ experimental) or using a leave-one-out cross-validation of a OrdinaryKriging using the model diff --git a/skgstat/models.py b/skgstat/models.py index f76a998..5270c37 100644 --- a/skgstat/models.py +++ b/skgstat/models.py @@ -20,7 +20,7 @@ def wrapper(*args, **kwargs): @variogram @jit(nopython=True) -def spherical(h, r, c0, b=0): +def spherical(h, r, c0, b=0.0): r"""Spherical Variogram function Implementation of the spherical variogram function. Calculates the @@ -54,7 +54,7 @@ def spherical(h, r, c0, b=0): The implementation follows [6]_: .. math:: - \gamma = b + C_0 * \left({1.5*\frac{h}{a} - 0.5*\frac{h}{a}^3}\right) + \gamma = b + C_0 * \left({1.5*\frac{h}{a} - 0.5*(\frac{h}{a})^3}\right) if :math:`h < r`, and @@ -84,7 +84,7 @@ def spherical(h, r, c0, b=0): @variogram @jit(nopython=True) -def exponential(h, r, c0, b=0): +def exponential(h, r, c0, b=0.0): r"""Exponential Variogram function Implementation of the exponential variogram function. Calculates the @@ -134,7 +134,7 @@ def exponential(h, r, c0, b=0): .. [8] Chiles, J.P., Delfiner, P. (1999). Geostatistics. Modeling Spatial Uncertainty. Wiley Interscience. - .. [9] Journel, A G, and Huijbregts, C J. Mining geostatistics. + .. [9] Journel, A G, and Huijbregts, C J. Mining geostatistics. United Kingdom: N. p., 1976. """ @@ -146,7 +146,7 @@ def exponential(h, r, c0, b=0): @variogram @jit(nopython=True) -def gaussian(h, r, c0, b=0): +def gaussian(h, r, c0, b=0.0): r""" Gaussian Variogram function Implementation of the Gaussian variogram function. Calculates the @@ -199,7 +199,7 @@ def gaussian(h, r, c0, b=0): .. [10] Chiles, J.P., Delfiner, P. (1999). Geostatistics. Modeling Spatial Uncertainty. Wiley Interscience. - .. [11] Journel, A G, and Huijbregts, C J. Mining geostatistics. + .. [11] Journel, A G, and Huijbregts, C J. Mining geostatistics. United Kingdom: N. p., 1976. """ @@ -211,7 +211,7 @@ def gaussian(h, r, c0, b=0): @variogram @jit(nopython=True) -def cubic(h, r, c0, b=0): +def cubic(h, r, c0, b=0.0): r"""Cubic Variogram function Implementation of the Cubic variogram function. Calculates the @@ -257,7 +257,7 @@ def cubic(h, r, c0, b=0): References ---------- - .. [12] Montero, J.-M., Mateu, J., & others. (2015). Spatial and spatio-temporal + .. [12] Montero, J.-M., Mateu, J., & others. (2015). Spatial and spatio-temporal geostatistical modeling and kriging (Vol. 998). John Wiley & Sons. """ @@ -275,7 +275,7 @@ def cubic(h, r, c0, b=0): @variogram @jit(nopython=True) -def stable(h, r, c0, s, b=0): +def stable(h, r, c0, s, b=0.0): r"""Stable Variogram function Implementation of the stable variogram function. Calculates the @@ -347,7 +347,7 @@ def stable(h, r, c0, s, b=0): @variogram @jit(forceobj=True) -def matern(h, r, c0, s, b=0): +def matern(h, r, c0, s, b=0.0): r"""Matérn Variogram function Implementation of the Matérn variogram function. Calculates the diff --git a/skgstat/plotting/__init__.py b/skgstat/plotting/__init__.py index a07faca..7139a4e 100644 --- a/skgstat/plotting/__init__.py +++ b/skgstat/plotting/__init__.py @@ -24,7 +24,7 @@ def backend(name=None): elif name not in ALLOWED_BACKENDS: raise ValueError( - "'%s' is not an allowed plotting backend.\nOptions are: [%s]" % + "'%s' is not an allowed plotting backend.\nOptions are: [%s]" % (name, ','.join(["'%s'" % _ for _ in ALLOWED_BACKENDS])) ) @@ -32,7 +32,7 @@ def backend(name=None): try: import plotly except ImportError: - print('You need to install plotly >=4.12.0 separatly:\npip install plotly') + print('You need to install plotly >=4.12.0 separately:\npip install plotly') return # were are good to set the new backend diff --git a/skgstat/plotting/stvariogram_marginal.py b/skgstat/plotting/stvariogram_marginal.py index 8b375b4..ed8f410 100644 --- a/skgstat/plotting/stvariogram_marginal.py +++ b/skgstat/plotting/stvariogram_marginal.py @@ -81,7 +81,7 @@ def plotly_marginal(stvariogram, fig=None, include_model=False, **kwargs): try: fig.set_subplots(rows=1, cols=2, shared_yaxes=shared_yaxes) except ValueError: - # figure has alredy subplots + # figure has already subplots pass # get some settings diff --git a/skgstat/plotting/stvariogram_plot3d.py b/skgstat/plotting/stvariogram_plot3d.py index 6cc6a9e..6959947 100644 --- a/skgstat/plotting/stvariogram_plot3d.py +++ b/skgstat/plotting/stvariogram_plot3d.py @@ -131,7 +131,7 @@ def plotly_plot_3d(stvariogram, kind='scatter', fig=None, **kwargs): xaxis_title='space', yaxis_title='time', zaxis_title='semivariance [%s]' % stvariogram.estimator.__name__ - )) + )) # return return fig diff --git a/skgstat/plotting/variogram_plot.py b/skgstat/plotting/variogram_plot.py index 5b5f4b2..ef2bba0 100644 --- a/skgstat/plotting/variogram_plot.py +++ b/skgstat/plotting/variogram_plot.py @@ -115,7 +115,7 @@ def matplotlib_variogram_plot( linestyles='dashed' ) - # anotate + # annotate ax2.axes.set_ylabel('N') # show the figure @@ -151,7 +151,7 @@ def plotly_variogram_plot( else: raise ValueError('axes has to be None or a plotly.Figure.') - # handle error bars on exerimental + # handle error bars on experimental conf = variogram._experimental_conf_interval if conf is not None: error_y = dict( diff --git a/skgstat/stmodels.py b/skgstat/stmodels.py index b3bba73..1e87c60 100644 --- a/skgstat/stmodels.py +++ b/skgstat/stmodels.py @@ -52,8 +52,8 @@ def sum(lags, Vx, Vt): .. math:: \gamma (h,t) = \gamma_x (h) + \gamma_t (t) - - Where :math:`\gamma_x(h)` is the spatial marginal variogram and + + Where :math:`\gamma_x(h)` is the spatial marginal variogram and :math:`\gamma_t(t)` is the temporal marginal variogram. It is not a good idea to use this model in almost any case, as it assumes @@ -113,9 +113,9 @@ def product(lags, Vx, Vt, Cx, Ct): The product sum model is implemented following [14]_: .. math:: - \gamma (h,t) = C_x * \gamma_t(t) + C_t * \gamma_x(h) - \gamma_x(h) * \gamma_t(t) - - Where :math:`\gamma_x(h)` is the spatial marginal variogram and + \gamma (h,t) = C_x * \gamma_t(t) + C_t * \gamma_x(h) - \gamma_x(h) * \gamma_t(t) + + Where :math:`\gamma_x(h)` is the spatial marginal variogram and :math:`\gamma_t(t)` is the temporal marginal variogram. References @@ -155,7 +155,7 @@ def product_sum(lags, Vx, Vt, k1, k2, k3, Cx, Ct): Fitting parameter. k1 has to be positive or zero and may not be larger than all marginal sill values. k2 : float - Fitting paramter. k2 has to be positive or zero and may not be larger + Fitting parameter. k2 has to be positive or zero and may not be larger than all marginal sill values. k3 : float Fitting parameter. k3 has to be positive and may not be larger than diff --git a/skgstat/tests/__init__.py b/skgstat/tests/__init__.py index c0ea76b..f3c0cbd 100644 --- a/skgstat/tests/__init__.py +++ b/skgstat/tests/__init__.py @@ -36,4 +36,4 @@ import os os.environ['SKG_SUPRESS'] = 'TRUE' -""" \ No newline at end of file +""" diff --git a/skgstat/tests/test_binning.py b/skgstat/tests/test_binning.py index 66adaec..60507e8 100644 --- a/skgstat/tests/test_binning.py +++ b/skgstat/tests/test_binning.py @@ -134,7 +134,7 @@ def test_kmeans(self): def test_kmeans_convergence(self): with self.assertRaises(ValueError) as err: kmeans(np.array([1, 1, 1, 1, 1]), 3, None) - + self.assertTrue('KMeans failed to converge' in str(err.exception)) def test_ward(self): diff --git a/skgstat/tests/test_cross_utility.py b/skgstat/tests/test_cross_utility.py index 69bf0a2..2f35efd 100644 --- a/skgstat/tests/test_cross_utility.py +++ b/skgstat/tests/test_cross_utility.py @@ -19,11 +19,11 @@ def setUp(self) -> None: # set up default values, whenever c and v are not important np.random.seed(42) self.c = np.random.gamma(10, 4, (100, 2)) - + # build the multivariate sample means = [1, 10, 100, 1000] cov = [[1, 0.8, 0.7, 0.6], [0.8, 1, 0.2, 0.2], [0.7, 0.2, 1.0, 0.2], [0.6, 0.2, 0.2, 1.0]] - + np.random.seed(42) self.v = np.random.multivariate_normal(means, cov, size=100) @@ -34,7 +34,7 @@ def test_cross_matrix_shape(self): # check shape mat = np.asarray(mat, dtype='object') self.assertTrue(mat.shape, (4, 4)) - + def test_cross_matrix_diagonal(self): """Test that the primary variograms are correct""" # get the cross variogram matrix @@ -69,7 +69,7 @@ def test_check_cross_variogram(self): assert_array_almost_equal(mat[0][2].bins, second.bins, 1) def test_for_directional_variograms(self): - """Check that DirectionalVariograms are also calcualted correctly""" + """Check that DirectionalVariograms are also calculated correctly""" mat = cross_variograms(self.c, self.v, azimuth=90) mat = np.asarray(mat, dtype='object').flatten() diff --git a/skgstat/tests/test_data_loader.py b/skgstat/tests/test_data_loader.py index 66435be..edc9bdd 100644 --- a/skgstat/tests/test_data_loader.py +++ b/skgstat/tests/test_data_loader.py @@ -54,7 +54,7 @@ def test_meuse_loads(): zinc, df[['zinc']].values, decimal=6 ) - # check exeption + # check exception with pytest.raises(AttributeError) as e: data.meuse(variable='unknown') @@ -89,7 +89,7 @@ def test_corr_var_derirved(): # test uniform covariance cov = np.ones((2, 2)) * 0.8 np.fill_diagonal(cov, vars) - + # generate test sample np.random.seed(42) d = np.random.multivariate_normal([1.0, 10.0], cov, size=50) @@ -102,5 +102,5 @@ def test_corr_var_derirved(): def test_corr_var_matrix_error(): with pytest.raises(ValueError) as e: data.corr_variable(50, [1.0, 2.0], cov='NotAllowed') - + assert 'uniform co-variance, or a co-variance matrix' in str(e.value) diff --git a/skgstat/tests/test_directionalvariogram.py b/skgstat/tests/test_directionalvariogram.py index 6aef731..985ed8b 100644 --- a/skgstat/tests/test_directionalvariogram.py +++ b/skgstat/tests/test_directionalvariogram.py @@ -16,7 +16,7 @@ def setUp(self): def test_standard_settings(self): DV = DirectionalVariogram(self.c, self.v, normalize=True) - + assert_array_almost_equal(DV.describe()["normalized_effective_range"], 436., decimal=0) assert_array_almost_equal(DV.describe()["normalized_sill"], 2706., decimal=0) assert_array_almost_equal(DV.describe()["normalized_nugget"], 0., decimal=0) @@ -77,13 +77,13 @@ def test_invalid_model_type(self): 'model name, or it has to be the search area ' 'itself' ) - + def test_binning_change_nlags(self): DV = DirectionalVariogram(self.c, self.v, n_lags=5) self.assertEqual(DV.n_lags, 5) - # go through the n_lags chaning procedure + # go through the n_lags changing procedure DV.bin_func = 'scott' # with scott, there are 6 classes now diff --git a/skgstat/tests/test_estimator.py b/skgstat/tests/test_estimator.py index 548948a..320191f 100644 --- a/skgstat/tests/test_estimator.py +++ b/skgstat/tests/test_estimator.py @@ -55,8 +55,8 @@ def test_dowd(self): x2 = np.random.gamma(10, 4, 100) # test - self.assertAlmostEqual(dowd(x1), 2.0873, places=4) - self.assertAlmostEqual(dowd(x2), 3170.97, places=2) + self.assertAlmostEqual(dowd(x1), 1.0437, places=4) + self.assertAlmostEqual(dowd(x2), 1585.48, places=2) def test_genton(self): # extract actual estimator diff --git a/skgstat/tests/test_interfaces.py b/skgstat/tests/test_interfaces.py index 2fe8f70..3fc74cb 100644 --- a/skgstat/tests/test_interfaces.py +++ b/skgstat/tests/test_interfaces.py @@ -93,7 +93,7 @@ def test_find_best_model(self): gs = gs.fit(self.c, self.v) - # Python 3.6 yields 'exponential', + # Python 3.6 yields 'exponential', # while 3.7, 3.8 yield 'gaussian' - this is so stupid self.assertTrue(gs.best_params_['model'] in ['gaussian', 'exponential']) @@ -287,7 +287,7 @@ def test_infer_dims(self): assert_array_almost_equal( model.variogram(self.xi), self.yi, decimal=2 ) - + class TestGstoolsAllModels(unittest.TestCase): def setUp(self): diff --git a/skgstat/tests/test_kriging.py b/skgstat/tests/test_kriging.py index 3b039e5..7419e9f 100644 --- a/skgstat/tests/test_kriging.py +++ b/skgstat/tests/test_kriging.py @@ -33,7 +33,7 @@ def test_coordinates_with_duplicates(self): def test_min_points_type_check(self): with self.assertRaises(ValueError) as e: OrdinaryKriging(self.V, min_points=4.0) - + self.assertEqual( str(e.exception), 'min_points has to be an integer.' ) @@ -41,7 +41,7 @@ def test_min_points_type_check(self): def test_min_points_negative(self): with self.assertRaises(ValueError) as e: OrdinaryKriging(self.V, min_points=-2) - + self.assertEqual( str(e.exception), 'min_points can\'t be negative.' ) @@ -49,7 +49,7 @@ def test_min_points_negative(self): def test_min_points_larger_max_points(self): with self.assertRaises(ValueError) as e: OrdinaryKriging(self.V, min_points=10, max_points=5) - + self.assertEqual( str(e.exception), 'min_points can\'t be larger than max_points.' ) @@ -57,7 +57,7 @@ def test_min_points_larger_max_points(self): def test_max_points_type_check(self): with self.assertRaises(ValueError) as e: OrdinaryKriging(self.V, max_points=16.0) - + self.assertEqual( str(e.exception), 'max_points has to be an integer.' ) @@ -66,7 +66,7 @@ def test_max_points_negative(self): with self.assertRaises(ValueError) as e: ok = OrdinaryKriging(self.V, max_points=10) ok.max_points = - 2 - + self.assertEqual( str(e.exception), 'max_points can\'t be negative.' ) @@ -75,7 +75,7 @@ def test_max_points_smaller_min_points(self): with self.assertRaises(ValueError) as e: ok = OrdinaryKriging(self.V, min_points=3, max_points=5) ok.max_points = 2 - + self.assertEqual( str(e.exception), 'max_points can\'t be smaller than min_points.' ) @@ -94,7 +94,7 @@ def test_mode_settings(self): def test_mode_unknown(self): with self.assertRaises(ValueError) as e: OrdinaryKriging(self.V, mode='foo') - + self.assertEqual( str(e.exception), "mode has to be one of 'exact', 'estimate'." ) @@ -102,7 +102,7 @@ def test_mode_unknown(self): def test_precision_TypeError(self): with self.assertRaises(TypeError) as e: OrdinaryKriging(self.V, precision='5.5') - + self.assertEqual( str(e.exception), 'precision has to be of type int' ) @@ -110,7 +110,7 @@ def test_precision_TypeError(self): def test_precision_ValueError(self): with self.assertRaises(ValueError) as e: OrdinaryKriging(self.V, precision=0) - + self.assertEqual( str(e.exception), 'The precision has be be > 1' ) @@ -118,7 +118,7 @@ def test_precision_ValueError(self): def test_solver_AttributeError(self): with self.assertRaises(AttributeError) as e: OrdinaryKriging(self.V, solver='peter') - + self.assertEqual( str(e.exception), "solver has to be ['inv', 'numpy', 'scipy']" ) diff --git a/skgstat/tests/test_likelihood.py b/skgstat/tests/test_likelihood.py index 32481c0..add487e 100644 --- a/skgstat/tests/test_likelihood.py +++ b/skgstat/tests/test_likelihood.py @@ -61,7 +61,7 @@ def test_likelihood(): # get the likelihood function like = li.get_likelihood(vario) - # cretae the optimization attributes + # create the optimization attributes sep_mean = vario.distance.mean() sam_var = vario.values.var() diff --git a/skgstat/tests/test_metric_space.py b/skgstat/tests/test_metric_space.py index c30a631..62dbc1b 100644 --- a/skgstat/tests/test_metric_space.py +++ b/skgstat/tests/test_metric_space.py @@ -108,7 +108,7 @@ def test_raster_metric(): # Check the interface with a Variogram object works with warnings.catch_warnings(): - # this will throw a optimze warning on random data + # this will throw a optimize warning on random data warnings.simplefilter('ignore') V = skg.Variogram(rems, vals) @@ -122,7 +122,7 @@ def test_raster_metric(): rems_sub = skg.RasterEquidistantMetricSpace(coords_sub, shape=shape, extent=(x[0],x[-1],y[0],y[-1]), samples=100, runs=10, rnd=42) with warnings.catch_warnings(): - # this will throw a optimze warning on random data + # this will throw a optimize warning on random data warnings.simplefilter('ignore') V = skg.Variogram(rems_sub, vals_sub) @@ -132,6 +132,6 @@ def test_raster_metric(): rems_sub = skg.RasterEquidistantMetricSpace(coords_sub, shape=shape, extent=(x[0],x[-1],y[0],y[-1]), samples=100, runs=11, rnd=42) with warnings.catch_warnings(): - # this will throw a optimze warning on random data + # this will throw a optimize warning on random data warnings.simplefilter('ignore') V = skg.Variogram(rems_sub, vals_sub) diff --git a/skgstat/tests/test_models.py b/skgstat/tests/test_models.py index 6cac237..8ba23a5 100644 --- a/skgstat/tests/test_models.py +++ b/skgstat/tests/test_models.py @@ -153,7 +153,7 @@ def test_matern_nugget(self): for r, m in zip(result, model): self.assertAlmostEqual(r, m, places=2) - + def test_matern_r_switch(self): # run the default with an extreme s value @@ -186,6 +186,21 @@ def adder(l, a): for r, c in zip(res, adder([1, 4, 8], 4)): self.assertEqual(r, c) + def test_sum_spherical(self): + @variogram + def sum_spherical(h, r1, c1, r2, c2, b1=0, b2=0): + return spherical(h, r1, c1, b1) + spherical(h, r2, c2, b2) + + # Parameters for the two spherical models + params = [1, 0.3, 10, 0.7] + + # Values at which we'll evaluate the function and its expected result + vals = [0, 1, 100] + res = [0, 0.3 + spherical(1, 10, 0.7), 1] + + for r, c in zip(res, sum_spherical(vals, *params)): + self.assertEqual(r, c) + if __name__=='__main__': unittest.main() diff --git a/skgstat/tests/test_plotting_backend.py b/skgstat/tests/test_plotting_backend.py index 0bc3b45..3966cc1 100644 --- a/skgstat/tests/test_plotting_backend.py +++ b/skgstat/tests/test_plotting_backend.py @@ -2,7 +2,7 @@ from skgstat.plotting import backend import matplotlib.pyplot as plt -import plotly.graph_objects as go +import plotly.graph_objects as go def test_backend_no_args(): diff --git a/skgstat/tests/test_spacetimevariogram.py b/skgstat/tests/test_spacetimevariogram.py index e93c713..b2d130f 100644 --- a/skgstat/tests/test_spacetimevariogram.py +++ b/skgstat/tests/test_spacetimevariogram.py @@ -108,7 +108,7 @@ def test_xdist_func_raises_ValueError(self): def test_tdist_func(self): V = SpaceTimeVariogram(self.c, self.v, tdist_func='jaccard') - # with jaccard, all shoud disagree + # with jaccard, all should disagree self.assertTrue(all([_ == 1. for _ in V.tdistance])) def test_tdist_func_raises_ValueError(self): diff --git a/skgstat/tests/test_stmodels.py b/skgstat/tests/test_stmodels.py index 578ca48..202c84c 100644 --- a/skgstat/tests/test_stmodels.py +++ b/skgstat/tests/test_stmodels.py @@ -86,7 +86,7 @@ def setUp(self): def test_default(self): assert_array_almost_equal( - [stmodels.product_sum(h, self.Vx, self.Vt, + [stmodels.product_sum(h, self.Vx, self.Vt, k1=2.2, k2=2.3, k3=4.3, Cx=5, Ct=7) for h in self.lags], [35.55, 101.99, 118.6, 118.6, 113.92, 116.91], decimal=2 @@ -94,7 +94,7 @@ def test_default(self): def test_default_as_array(self): assert_array_almost_equal( - stmodels.product_sum(self.lags, self.Vx, self.Vt, + stmodels.product_sum(self.lags, self.Vx, self.Vt, k1=2.2, k2=2.3, k3=4.3, Cx=5, Ct=7), [35.55, 101.99, 118.6, 118.6, 113.92, 116.91], decimal=2 @@ -102,7 +102,7 @@ def test_default_as_array(self): def test_with_zero_ks(self): assert_array_almost_equal( - stmodels.product_sum(self.lags, self.Vx, self.Vt, + stmodels.product_sum(self.lags, self.Vx, self.Vt, k1=0, k2=0, k3=0, Cx=5, Ct=7), [0., 0., 0., 0., 0., 0.], decimal=2 @@ -110,7 +110,7 @@ def test_with_zero_ks(self): def test_with_all_one(self): assert_array_almost_equal( - stmodels.product_sum(self.lags, self.Vx, self.Vt, + stmodels.product_sum(self.lags, self.Vx, self.Vt, k1=1, k2=1, k3=1, Cx=5, Ct=7), [14.71, 41.13, 47. ,47. ,44.96, 46.61], decimal=2 @@ -118,7 +118,7 @@ def test_with_all_one(self): def test_as_product_model(self): assert_array_almost_equal( - stmodels.product_sum(self.lags, self.Vx, self.Vt, + stmodels.product_sum(self.lags, self.Vx, self.Vt, k1=1, k2=0, k3=0, Cx=5, Ct=7), stmodels.product(self.lags, self.Vx, self.Vt, 5, 7), decimal=2 diff --git a/skgstat/tests/test_util.py b/skgstat/tests/test_util.py index 71c613c..c6bb454 100644 --- a/skgstat/tests/test_util.py +++ b/skgstat/tests/test_util.py @@ -110,11 +110,11 @@ def test_propagate_many_targets(): V = Variogram(c, v, n_lags=12) - # progate many + # propagate many conf_list = propagate(V, 'values', sigma=10, evalf=['experimental', 'parameter'], num_iter=50) assert len(conf_list) == 2 # unstack the list conf_exp, conf_par = conf_list assert conf_exp.shape == (12, 3) - assert conf_par.shape == (3, 3) \ No newline at end of file + assert conf_par.shape == (3, 3) diff --git a/skgstat/tests/test_variogram.py b/skgstat/tests/test_variogram.py index 52ce572..ef10141 100644 --- a/skgstat/tests/test_variogram.py +++ b/skgstat/tests/test_variogram.py @@ -15,22 +15,30 @@ print('No plotly installed. Skip plot tests') PLOTLY_FOUND = False +try: + import gstools + print(f'Found PyKrige: {gstools.__version__}') + GSTOOLS_AVAILABLE = True +except ImportError: + GSTOOLS_AVAILABLE = False # pragma: no cover + from skgstat import Variogram, DirectionalVariogram from skgstat import OrdinaryKriging from skgstat import estimators from skgstat import plotting +from skgstat.models import variogram, spherical, gaussian, exponential, cubic, stable, matern class TestSpatiallyCorrelatedData(unittest.TestCase): def setUp(self): # Generate some random but spatially correlated data # with a range of ~20 - + np.random.seed(42) c = np.random.sample((50, 2)) * 60 np.random.seed(42) v = np.random.normal(10, 4, 50) - + V = Variogram(c, v).describe() V["effective_range"] = 20 OK = OrdinaryKriging(V, coordinates=c, values=v) @@ -47,13 +55,13 @@ def test_dense_maxlag_inf(self): for x, y in zip(Vdense.parameters, Vsparse.parameters): self.assertAlmostEqual(x, y, places=3) - + def test_sparse_maxlag_50(self): V = Variogram(self.c, self.v, maxlag=50) for x, y in zip(V.parameters, [20.264, 6.478, 0]): self.assertAlmostEqual(x, y, places=3) - + def test_sparse_maxlag_30(self): V = Variogram(self.c, self.v, maxlag=30) @@ -61,7 +69,7 @@ def test_sparse_maxlag_30(self): self.assertAlmostEqual(x, y, places=3) -class TestVariogramInstatiation(unittest.TestCase): +class TestVariogramInstantiation(unittest.TestCase): def setUp(self): # set up default values, whenever c and v are not important np.random.seed(42) @@ -168,7 +176,7 @@ def test_value_error_on_set_trf(self): v.fit_method = 'trf' self.assertTrue("'trf' is bounded and therefore" in str(e.exception)) - + def test_value_error_trf(self): """Test the Attribute error on TRF instantiation on single value input""" # catch the same input value warning @@ -192,7 +200,7 @@ def test_pairwise_diffs(self): diff = pdist(np.column_stack((self.v, np.zeros(len(self.v)))), metric='euclidean') assert_array_almost_equal(V.pairwise_diffs, diff, decimal=2) - + def test_pairwise_diffs_preprocessing(self): """ Remove the diffs and then request the diffs again to check preprocessing @@ -254,7 +262,7 @@ def test_binning_method_stable(self): assert_array_almost_equal( V.bins, np.array([4.3, 8.4, 12.8, 17.1, 21.4, 25.2, 29.9, 33.2, 38.5, 42.8]), - decimal=1 + decimal=0 ) def test_binning_method_stable_maxiter(self): @@ -264,7 +272,7 @@ def test_binning_method_stable_maxiter(self): assert_array_almost_equal( V.bins, np.array([4.3, 8.4, 12.8, 17.1, 21.4, 25.2, 29.9, 33.2, 38.5, 42.8]), - decimal=1 + decimal=0 ) def test_binning_method_stable_fix_bins(self): @@ -279,7 +287,7 @@ def test_binning_method_stable_fix_bins(self): assert_array_almost_equal( V.bins, np.array([4.2, 8.6, 12.8, 17.1, 21.2, 25.5, 29.3, 33.2, 37.4, 43.]), - decimal=1 + decimal=0 ) def test_binning_change_nlags(self): @@ -424,7 +432,7 @@ def test_unknown_dist_func(self): with self.assertRaises(ValueError) as e: V.set_dist_function('notadistance') - + self.assertEqual( str(e.exception), 'Unknown Distance Metric: notadistance' @@ -435,7 +443,7 @@ def test_wrong_dist_func_input(self): with self.assertRaises(ValueError) as e: V.set_dist_function(55) - + self.assertEqual( str(e.exception), 'Input not supported. Pass a string or callable.' @@ -463,7 +471,7 @@ def disabled_test_direct_dist_setting(): # Distance can no longer be explicitly set # it would require setting the whole MetricSpace, with a # non-sparse diagonal matrix - + V = Variogram([(0, 0), (4, 1), (1, 1)], [1, 2, 3], n_lags=2) V.distance = np.array([0, 0, 100]) @@ -633,6 +641,39 @@ def test_nofit(self): assert V.cov is None assert V.cof is None + def test_model(self): + """ + Test that all types of models instantiate properly + (to complement test_set_model() that only checks already instantiated vario) + """ + + # Individual model + for model_name in ['spherical', 'gaussian', 'exponential', 'cubic', 'matern', 'stable']: + V = Variogram(self.c, self.v, model=model_name) + assert V._model_name == model_name + assert V._model == globals()[model_name] + assert V._is_model_custom is False + + # Sum of models + for model_name in ['spherical+gaussian', 'cubic+matern+stable']: + V = Variogram(self.c, self.v, model=model_name) + assert V._model_name == model_name + assert V._is_model_custom is False + + # Custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + with self.assertWarns(UserWarning): + V = Variogram(self.c, self.v, model=custom_model) + + assert V._model_name == "custom_model" + assert V._model == custom_model + assert V._is_model_custom is True + + + def test_get_bin_count(self): V = Variogram(self.c, self.v) @@ -689,7 +730,7 @@ def test_fit_sigma_raises_AttributeError(self): with self.assertRaises(AttributeError) as e: self.V.fit_sigma - + self.assertTrue( 'len(fit_sigma)' in str(e.exception) ) @@ -901,7 +942,7 @@ def test_manual_fit(self): ) self.assertEqual(V.parameters, [10., 5., 0.0]) - + def test_manual_fit_change(self): V = Variogram( self.c, @@ -937,7 +978,7 @@ def test_manual_preserve_params(self): params, decimal=1 ) - + def test_implicit_nugget(self): V = Variogram(self.c, self.v, use_nugget=False) @@ -949,6 +990,130 @@ def test_implicit_nugget(self): self.assertTrue(abs(V.parameters[-1] - 2.) < 1e-10) + def test_fit_custom_model(self): + + # Define a custom variogram and run the fit + @variogram + def sum_spherical(h, r1, c1, r2, c2, b1, b2): + return spherical(h, r1, c1, b1) + spherical(h, r2, c2, b2) + + with self.assertWarns(UserWarning): + # Custom models should ignore the nugget setting, so let's try both and check + V = Variogram(self.c, self.v, use_nugget=True, model=sum_spherical) + V2 = Variogram(self.c, self.v, use_nugget=False, model=sum_spherical) + + # Check that 6 parameters were found + assert len(V.cof) == len(V2.cof) == 6 + + def test_fit_sum_models_nugget(self): + + # Define a sum of variogram and run the fit + V = Variogram(self.c, self.v, model="spherical+spherical", use_nugget=False) + V2 = Variogram(self.c, self.v, model="spherical+spherical", use_nugget=True) + + # Check that 4 parameters were found without nugget, 5 otherwise + assert len(V.cof) == 4 + assert len(V2.cof) == 5 + + def test_build_sum_models(self): + + # Initiate variogram without fitting + V = Variogram(self.c, self.v, fit_method=None) + + # 1/ Check that errors are raised if argument is not good + # Should accept upper case and spaces + V._build_sum_models("Spherical + spherical") + + # Raises an error if model does not exist + with self.assertRaises(ValueError) as e: + V._build_sum_models("Notamodel + spherical") + self.assertTrue('One of the theoretical models in the list' in str(e.exception)) + + # 2/ Build a sum of two spherical models + sum_spherical = V._build_sum_models("spherical+spherical") + + # Testing the same way as in test_models/test_sum_spherical + # Parameters for the two spherical models + params = [1, 0.3, 10, 0.7] + + # Values at which we'll evaluate the function and its expected result + vals = [0, 1, 100] + res = [0, 0.3 + spherical(1, 10, 0.7), 1] + + for r, c in zip(res, sum_spherical(vals, *params)): + self.assertEqual(r, c) + + # 3/ Build a sum of all models + sum_spherical = V._build_sum_models("spherical+gaussian+exponential+cubic+stable+matern") + min_nb_args = 2 + 2 + 2 + 2 + 3 + 3 + + # Check that the function fails for a number of args of 13 (lower than 14) + with self.assertRaises(TypeError) as e: + sum_spherical(0, *[1,]*(min_nb_args - 1)) + self.assertTrue('positional arguments' in str(e.exception)) + + # Check that it runs for 14 and that the result is correct + sum_res = sum(model(0, 1, 1) for model in [spherical, gaussian, exponential, cubic]) + \ + sum((model(0, 1, 1, 1)) for model in [stable, matern]) + assert sum_spherical(0, *[1, ] * min_nb_args) == sum_res + + def test_fit_bounds(self): + """ + Test the fit_bounds kwarg of Variogram and bounds args of fit() + """ + Vnofit = Variogram(self.c, self.v, fit_method=None) + + bounds=(0, [np.max(Vnofit.bins), np.max(Vnofit.experimental)]) + + # Initiate variogram with bounds for fit + V = Variogram(self.c, self.v, fit_bounds=bounds) + # Same but calling fit + V.fit(bounds=bounds) + + # For a sum of models + V = Variogram(self.c, self.v, model='spherical+gaussian', fit_bounds=(0, bounds[1]*2)) + # Same but calling fit + V.fit(bounds=(0, bounds[1]*2)) + + # Check an error is raised for a custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + with self.assertWarns(UserWarning): + Variogram(self.c, self.v, model=custom_model) + + bounds_custom = [(0, 0, 0), (np.max(self.c), np.max(self.v), np.max(self.v))] + + # And that no error is raised otherwise + Variogram(self.c, self.v, model=custom_model, fit_bounds=bounds_custom) + + def test_fit_p0(self): + """ + Test the fit_p0 kwarg of Variogram and bounds args of fit() + """ + Vnofit = Variogram(self.c, self.v, fit_method=None) + p0 = (np.max(Vnofit.bins), np.max(Vnofit.experimental)) + + # Initiate variogram with bounds for fit + V = Variogram(self.c, self.v, fit_p0=p0) + # Same but calling fit + V.fit(p0=p0) + + # For a sum of models + V = Variogram(self.c, self.v, model='spherical+gaussian', fit_p0=p0 * 2) + # Same but calling fit + V.fit(p0=p0 * 2) + + # For a custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + p0_custom = (np.max(Vnofit.bins), np.max(Vnofit.experimental), np.max(Vnofit.experimental)) + with self.assertWarns(UserWarning) as w: + Variogram(self.c, self.v, model=custom_model, fit_p0=p0_custom) + class TestVariogramQualityMeasures(unittest.TestCase): def setUp(self): @@ -972,42 +1137,42 @@ def test_rmse(self): V = Variogram(self.c, self.v) for model, rmse in zip( - ['spherical', 'gaussian', 'stable'], - [3.3705, 3.3707, 3.193] + ['spherical', 'gaussian', 'stable', 'spherical+gaussian'], + [3.3705, 3.3707, 3.193, 3.0653] ): V.set_model(model) - self.assertAlmostEqual(V.rmse, rmse, places=4) + self.assertAlmostEqual(V.rmse, rmse, places=3) def test_mean_residual(self): V = Variogram(self.c, self.v) for model, mr in zip( - ['spherical', 'cubic', 'stable'], - [2.6803, 2.6803, 2.6966] + ['spherical', 'cubic', 'stable', 'spherical+gaussian'], + [2.6803, 2.6803, 2.6966, 2.4723] ): V.set_model(model) - self.assertAlmostEqual(V.mean_residual, mr, places=4) + self.assertAlmostEqual(V.mean_residual, mr, places=3) def test_nrmse(self): V = Variogram(self.c, self.v, n_lags=15) for model, nrmse in zip( - ['spherical', 'gaussian', 'stable', 'exponential'], - [0.3536, 0.3535, 0.3361, 0.3499] + ['spherical', 'gaussian', 'stable', 'exponential', 'spherical+gaussian'], + [0.3536, 0.3535, 0.3361, 0.3499, 0.3264] ): V.set_model(model) - self.assertAlmostEqual(V.nrmse, nrmse, places=4) + self.assertAlmostEqual(V.nrmse, nrmse, places=3) def test_nrmse_r(self): V = Variogram(self.c, self.v, estimator='cressie') - self.assertAlmostEqual(V.nrmse_r, 0.63543, places=5) + self.assertAlmostEqual(V.nrmse_r, 0.63543, places=3) def test_r(self): V = Variogram(self.c, self.v, n_lags=12, normalize=False) for model, r in zip( - ('gaussian', 'exponential', 'stable'), + ('gaussian', 'exponential', 'stable'), [0.39, 0.55, 0.60] ): V.set_model(model) @@ -1021,7 +1186,7 @@ def test_NS(self): [0.0206, 0.0206, 0.0206] ): self.assertAlmostEqual(V.NS, NS, places=4) - + def test_mae(self): V = Variogram(self.c, self.v, n_lags=15) @@ -1087,7 +1252,7 @@ def test_get_empirical(self): # test assert_array_almost_equal(bins, emp_x) assert_array_almost_equal(exp, emp_y) - + def test_get_empirical_center(self): V = Variogram(self.c, self.v) @@ -1110,7 +1275,7 @@ def test_data_no_force(self): assert_array_almost_equal( lags, - [0., 4.7, 9.4, 14.1, 18.8, 23.5, 28.2, 32.9, 37.6, 42.3], + [0., 4.7, 9.4, 14.1, 18.8, 23.5, 28.2, 32.9, 37.6, 42.3], decimal=2 ) @@ -1124,7 +1289,7 @@ def disabled_test_data_with_force(self): # Distance can no longer be explicitly set # it would require setting the whole MetricSpace, with a # non-sparse diagonal matrix - + # should work if _dist is corccupted self.V._dist = self.V._dist * 5. self.V.cof = None @@ -1160,15 +1325,84 @@ def test_data_normalized(self): [0., 13.97, 13.97, 13.97, 13.97], decimal=2 ) - + + def test_set_model(self): + """Test setting model: individual, sum or custom""" + V = self.V.clone() + + # Individual model + for model_name in ['spherical', 'gaussian', 'exponential', 'cubic', 'matern', 'stable']: + V.set_model(model_name) + assert V._model_name == model_name + assert V._model == globals()[model_name] + assert V._is_model_custom is False + + # Sum of models + for model_name in ['spherical+gaussian', 'cubic+matern+stable']: + V.set_model(model_name) + assert V._model_name == model_name + assert V._is_model_custom is False + + # Custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + V.set_model(custom_model) + assert V._model_name == "custom_model" + assert V._model == custom_model + assert V._is_model_custom is True + + def test_describe(self): + """Test the describe functions for different models""" + V = self.V.clone() + + keys_model = ['normalized_effective_range', 'normalized_sill', 'normalized_nugget', + 'effective_range', 'sill', 'nugget'] + + # Individual model: normal keys should be in dict + for model_name in ['spherical', 'gaussian', 'exponential', 'cubic', 'matern', 'stable']: + V.set_model(model_name) + dict = V.describe() + for key in keys_model: + assert key in dict + if model_name == 'matern': + assert 'smoothness' in dict + if model_name == 'stable': + assert 'shape' in dict + + # Sum of models: keys with a numbered ID should be in dict + V.set_model('spherical+gaussian') + dict = V.describe() + for key in [k+'_1' for k in keys_model] + [k+'_2' for k in keys_model]: + assert key in dict + + V.set_model('cubic+matern+stable') + dict = V.describe() + for key in [k + '_1' for k in keys_model] + [k + '_2' for k in keys_model] + [k + '_3' for k in keys_model]: + assert key in dict + assert 'smoothness_2' in dict + assert 'shape_3' in dict + + # Custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + V.set_model(custom_model) + with self.assertWarns(UserWarning) as w: + dict = V.describe() + custom_keys = ["param1_r1", "param2_c1", "param3_x"] + for key in custom_keys: + assert key in dict + def test_parameter_property_matern(self): V = self.V.clone() - + # test matern param = [42.3, 16.2, 0.1, 0.] V.set_model('matern') assert_array_almost_equal(V.parameters, param, decimal=2) - + def test_parameter_property_stable(self): V = self.V.clone() @@ -1177,6 +1411,34 @@ def test_parameter_property_stable(self): V.set_model('stable') assert_array_almost_equal(V.parameters, param, decimal=2) + def test_parameter_property_sum_models(self): + V = self.V.clone() + + # Using models with 3 parameters (range, sill, nugget) + param = [3.67, 10.61, 0, 42.3, 5.02, 0] + V.set_model('spherical+gaussian') + assert_array_almost_equal(V.parameters, param, decimal=2) + + # Using a stable model with 4 parameters + param2 = [35.77, 10.97, 0, 0, 42.3, 4.74, 0] + V.set_model('stable+cubic') + assert_array_almost_equal(V.parameters, param2, decimal=2) + + def test_parameter_property_custom_model(self): + V = self.V.clone() + + # Custom model will give parameters back in the order of the custom function + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + + param = [42.3, 5.76, 9.79] + V.set_model(custom_model) + + # Provide bounds to avoid a random fit + V.fit(bounds=(0, [np.max(V.bins), np.max(V.experimental), np.max(V.experimental)])) + assert_array_almost_equal(V.parameters, param, decimal=2) + class TestVariogramPlotlyPlots(unittest.TestCase): def setUp(self): @@ -1185,7 +1447,16 @@ def setUp(self): self.c = np.random.gamma(10, 4, (150, 2)) np.random.seed(42) self.v = np.random.normal(10, 4, 150) + # V = individual model self.V = Variogram(self.c, self.v) + # V2 = sum of models + self.V2 = Variogram(self.c, self.v, model='spherical+gaussian') + # V3 = custom model + @variogram + def custom_model(h, r1, c1, x): + return spherical(h, r1, c1) + x + self.V3 = Variogram(self.c, self.v, model=custom_model, + fit_bounds=(0, [np.max(self.V.bins), np.max(self.V.experimental), np.max(self.V.experimental)])) def test_plotly_main_plot(self): if PLOTLY_FOUND: @@ -1196,6 +1467,14 @@ def test_plotly_main_plot(self): isinstance(self.V.plot(show=False), go.Figure) ) + self.assertTrue( + isinstance(self.V2.plot(show=False), go.Figure) + ) + + self.assertTrue( + isinstance(self.V3.plot(show=False), go.Figure) + ) + plotting.backend('matplotlib') def test_plotly_scattergram(self): @@ -1230,7 +1509,7 @@ def test_plotly_dd_plot(self): ) plotting.backend('matplotlib') - + def test_undefined_backend(self): # force the backend into an undefined state import skgstat @@ -1244,7 +1523,7 @@ def test_undefined_backend(self): str(e.exception), 'The plotting backend has an undefined state.' ) - + # make the backend valid again skgstat.__backend__ = 'matplotlib' @@ -1279,7 +1558,7 @@ def test_samples(self): self.data[['x', 'y']].values, self.data.z.values, samples=sample_size, binning_random_state=44).describe() - + self.assertAlmostEqual(Vf["normalized_effective_range"], Vs["normalized_effective_range"], delta = Vf["normalized_effective_range"] / 5) self.assertAlmostEqual(Vf["effective_range"], Vs["effective_range"], delta = Vf["effective_range"] / 5) self.assertAlmostEqual(Vf["sill"], Vs["sill"], delta = Vf["sill"] / 5) @@ -1371,13 +1650,14 @@ def test_directional_covariable(self): assert_array_almost_equal(self.v[:,1], vario._co_variable) def test_cross_variogram_warns(self): - """Test warning when cross-variogram is exported to gstools""" + """Test warning when cross-variogram is exported to gstools (only if GSTools is installed)""" vario = Variogram(self.c, self.v) - with self.assertWarns(Warning) as w: - vario.to_gstools() - - self.assertTrue("This instance is a cross-variogram!!" in str(w.warning)) + if GSTOOLS_AVAILABLE: + with self.assertWarns(Warning) as w: + vario.to_gstools() + + self.assertTrue("This instance is a cross-variogram!!" in str(w.warning)) if __name__ == '__main__': # pragma: no cover diff --git a/skgstat/util/__init__.py b/skgstat/util/__init__.py index 962c3e7..0e1cbde 100644 --- a/skgstat/util/__init__.py +++ b/skgstat/util/__init__.py @@ -1 +1 @@ -from .shannon import shannon_entropy \ No newline at end of file +from .shannon import shannon_entropy diff --git a/skgstat/util/cross_variogram.py b/skgstat/util/cross_variogram.py index 91e56fa..d235287 100644 --- a/skgstat/util/cross_variogram.py +++ b/skgstat/util/cross_variogram.py @@ -1,5 +1,5 @@ """ -Cross-variogram utility function. This module can be used to calcualte +Cross-variogram utility function. This module can be used to calculate cross-variograms for more than two variables, by creating a variogram for each combination of variables. @@ -20,7 +20,7 @@ def cross_variograms(coordinates: np.ndarray, values: np.ndarray, **kwargs) -> L The diagonal of the *'matrix'* holds primary variograms (without cross option) for the respective column. The function accepts all keyword arguments that are also accepted by - :class:`Variogram ` and + :class:`Variogram ` and :class:`DirectionalVariogram ` and passes them down to the respective function. The directional variogram will be used as base class if any of the specific arguments are present: azimuth, bandwidth diff --git a/skgstat/util/likelihood.py b/skgstat/util/likelihood.py index 677ed78..ed71c24 100644 --- a/skgstat/util/likelihood.py +++ b/skgstat/util/likelihood.py @@ -4,8 +4,8 @@ References ---------- -[601] Lark, R. M. "Estimating variograms of soil properties by the - method‐of‐moments and maximum likelihood." European Journal +[601] Lark, R. M. "Estimating variograms of soil properties by the + method‐of‐moments and maximum likelihood." European Journal of Soil Science 51.4 (2000): 717-728. """ from typing import Callable, List @@ -19,7 +19,7 @@ DOC_TEMPLATE = """Autocorrelation function. -This function calcualtes the sptial autocorrelation for any +This function calculates the spatial autocorrelation for any model function only, by setting nugget to 0 and sill to 1. This can be used to create an autocorreation matrix as used to derive a maximum likelihhod function for the model. @@ -35,8 +35,8 @@ References ---------- -[601] Lark, R. M. "Estimating variograms of soil properties by the - method‐of‐moments and maximum likelihood." European Journal +[601] Lark, R. M. "Estimating variograms of soil properties by the + method‐of‐moments and maximum likelihood." European Journal of Soil Science 51.4 (2000): 717-728. """ @@ -73,7 +73,7 @@ def _build_A(transformed_func: Callable, params: List[float], dists: np.ndarray) a = np.fromiter(map(transformed_func, dists, cycle([r]), cycle([s])), dtype=float) else: r, c0, b = params - # calcualte the upper triangle of A: + # calculate the upper triangle of A: a = np.fromiter(map(transformed_func, dists, cycle([r])), dtype=float) # build the full matrix @@ -92,7 +92,7 @@ def get_likelihood(variogram: Variogram) -> Callable: negative log-likelihood for this set of parameters. The signature of the reutrned function has the interface as needed by :func:`scipy.optimize.minimize`. - The paramters for the models are given as ``[r, c0, b]``, which + The parameters for the models are given as ``[r, c0, b]``, which are the effective range, sill and nugget. For the :func:`Matérn ` and :func:`Stable ` models, the parameters include diff --git a/skgstat/util/shannon.py b/skgstat/util/shannon.py index bdc1d2c..432b11a 100644 --- a/skgstat/util/shannon.py +++ b/skgstat/util/shannon.py @@ -18,7 +18,7 @@ def shannon_entropy(x, bins): bins : list, int upper edges of the bins used to calculate the histogram of x. - + Returns ------- h : float diff --git a/skgstat/util/uncertainty.py b/skgstat/util/uncertainty.py index 6c241c3..efbe491 100644 --- a/skgstat/util/uncertainty.py +++ b/skgstat/util/uncertainty.py @@ -94,9 +94,9 @@ def propagate( Notes ----- For each member of the evaluated property, the lower and upper bound - along with the median value is retuned as ``[low, median, up]``. + along with the median value is returned as ``[low, median, up]``. Thus the returned array has the shape ``(N, 3)``. - N is the lengh of evaluated property, which is + N is the length of evaluated property, which is :func:`n_lags ` diff --git a/tutorials/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json b/tutorials/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json index 5d6346a..6b9a194 100644 --- a/tutorials/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json +++ b/tutorials/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json @@ -5,14 +5,14 @@ "Info": "These files are part of the dataset published with the data-paper mentioned below.", "cite-as": "Fersch, B., Francke, T., Heistermann, M., Schrön, M., Döpper, V., Jakobi, J. et. al (2020): A dense network of cosmic-ray neutron sensors for soilmoisture observation in a highly instrumented pre-alpine headwater catchment in Germany. Earth System Science Data. https://doi.org/10.5194/essd-2020-48" }, - + "Provider": { "Name": "Benjamin Fersch", "Institution": "KIT Campus Alpin", "Email": "fersch@kit.edu", - "Comment": "" + "Comment": "" }, - + "SpaceTimeCoverage": { "StartDate": "2019-05-01", "EndDate": "2019-07-31", @@ -23,14 +23,14 @@ "Elevation": "595 m ASL", "Weblink": "https://geoportal.bayern.de/bayernatlas/?zoom=15&lang=de&topic=ba&bgLayer=atkis&E=654197&N=5299736&catalogNodes=122,11&layers=luftbild&crosshair=marker" }, - + "Source": { "Name": "Benjamin Fersch", "Institution": "KIT Campus Alpin", "LinkToOriginalSource": "https://www.imk-ifu.kit.edu/tereno.php" }, - - + + "Variables": { "time": "time of measurement", "lat": "profile latitude coordinate", @@ -45,7 +45,7 @@ "T_a": "soil temperature (sensor group a)", "T_b": "soil temperature (sensor group b)" }, - + "Units": { "time": "minutes since 2019-05-01 00:00:00", "lat": "degree north", @@ -60,20 +60,19 @@ "T_a": "degree Celsius", "T_b": "degree Celsius" }, - + "SpatialReferenceSystem": { "Name": "WGS 84", - "EPSG": "4326" + "EPSG": "4326" }, - - + + "TemporalReferenceSystem": { "TimeZone": "UTC", "IntervalLength": "15 minutes", "IntervalAggregation": "instantaneous", "TimestampAtEndOfInterval": "FALSE" }, - + "Remarks": "Each profile is equipped with 2 redundant sensors (a, and b)\n Metadata also contained in NetCDF header." } - \ No newline at end of file diff --git a/tutorials/tereno_fendt/tereno.json b/tutorials/tereno_fendt/tereno.json index cc816e7..c366180 100644 --- a/tutorials/tereno_fendt/tereno.json +++ b/tutorials/tereno_fendt/tereno.json @@ -17754,4 +17754,4 @@ ] ], "description": "Data derived from Fersch et al. (2020) https://doi.org/10.5194/essd-2020-48. Published under CC BY 4.0.\n It is From the WSN product, the T_a in 20cm depth is extracted" -} \ No newline at end of file +}