Isotropic kernels (#27)

* feat: add isotropic Matern and squared exponential kernels. * test: add tests for new isotropic kernels. * fix: add default bounds for isotropic kernels. * test: add more tests for isotropic kernels. * refactor: raise error if `compute_grad` and `X_star` is not None. * test: add more tests for isotropic kernels. * fix: set default lower/upper bounds from min/max radius.
acerbilab · Apr 12, 2023 · 20730b5 · 20730b5
1 parent 26e4f18
commit 20730b5
Show file tree

Hide file tree

Showing 6 changed files with 1,648 additions and 0 deletions.
diff --git a/gpyreg/__init__.py b/gpyreg/__init__.py
@@ -1,6 +1,7 @@
 """Lightweight Gaussian process regression package."""
 
 import gpyreg.covariance_functions
+import gpyreg.isotropic_covariance_functions
 import gpyreg.f_min_fill
 import gpyreg.mean_functions
 import gpyreg.noise_functions

diff --git a/gpyreg/covariance_functions.py b/gpyreg/covariance_functions.py
@@ -169,6 +169,8 @@ def compute(
         K = sf2 * np.exp(-tmp / 2)
 
         if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
             dK = np.zeros((cov_N, N, N))
             for i in range(0, D):
                 # Gradient of cov length scales
@@ -255,6 +257,8 @@ def compute(
         K = sf2 * self.f(tmp) * np.exp(-tmp)
 
         if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
             dK = np.zeros((cov_N, N, N))
             for i in range(0, D):
                 Ki = squareform(
@@ -331,6 +335,8 @@ def compute(
         K = sf2 * M ** (-alpha)
 
         if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
             dK = np.zeros((cov_N, N, N))
 
             # Gradient respect of lenght scale.

diff --git a/gpyreg/isotropic_covariance_functions.py b/gpyreg/isotropic_covariance_functions.py
@@ -0,0 +1,259 @@
+from abc import ABC, abstractmethod
+
+import numpy as np
+from scipy.spatial.distance import cdist, pdist, squareform
+
+from .covariance_functions import AbstractKernel, Matern, SquaredExponential
+
+class AbstractIsotropicKernel(AbstractKernel):
+    """Abstract base class for isotropic kernel functions.
+
+    The two default hyperparameters are the log-lengthscale and
+    log-outputscale.
+    """
+
+    def hyperparameter_count(self, D: int):
+        """
+        Return the number of hyperparameters this covariance function has.
+
+        Parameters
+        ----------
+        D : int
+            The dimensionality of the kernel.
+
+        Returns
+        -------
+        count : int
+            The number of hyperparameters.
+        """
+        return 2
+
+    def hyperparameter_info(self, D: int):
+        """
+        Return information on the names of hyperparameters for setting
+        them in other parts of the program.
+
+        Parameters
+        ----------
+        D : int
+            The dimensionality of the kernel.
+
+        Returns
+        -------
+        hyper_info : array_like
+            A list of tuples of hyperparameter names and their number,
+            in the order they are in the hyperparameter array.
+        """
+        return [
+            ("covariance_log_lengthscale", 1),
+            ("covariance_log_outputscale", 1),
+        ]
+
+    def get_bounds_info(self, X: np.ndarray, y: np.ndarray):
+        """
+        Return information on the lower, upper, plausible lower
+        and plausible upper bounds of the hyperparameters of this
+        covariance function.
+
+        Parameters
+        ----------
+        X : ndarray, shape (N, D)
+            A 2D array where each row is a test point.
+        y : ndarray, shape (N, 1)
+            A 2D array where each row is a test target.
+
+        Returns
+        -------
+        cov_bound_info: dict
+            A dictionary containing the bound info with the following elements:
+
+            **LB** : np.ndarray, shape (cov_N, 1)
+                    The lower bounds of the hyperparameters.
+            **UB** : np.ndarray, shape (cov_N, 1)
+                    The upper bounds of the hyperparameters.
+            **PLB** : np.ndarray, shape (cov_N, 1)
+                    The plausible lower bounds of the hyperparameters.
+            **PUB** : np.ndarray, shape (cov_N, 1)
+                    The plausible upper bounds of the hyperparameters.
+            **x0** : np.ndarray, shape (cov_N, 1)
+                    The plausible starting point.
+
+            where ``cov_N`` is the number of hyperparameters.
+        """
+        cov_N = self.hyperparameter_count(X.shape[1])
+        return _isotropic_bounds_info_helper(cov_N, X, y)
+
+class MaternIsotropic(AbstractIsotropicKernel, Matern):
+    """
+    Isotropic Matern kernel.
+
+    Overrides `compute`. Inherits `hyperparameter_count` and
+    `hyperparameter_info` from :class:`AbstractIsotropicKernel`. Inherits other
+    methods from :class:`Matern`.
+
+    Parameters
+    ----------
+    degree : {1, 3, 5}
+        The degree of the isotropic Matern kernel.
+
+        Currently the only supported degrees are 1, 3, 5, and if
+        some other degree is provided a ``ValueError`` exception is raised.
+    """
+
+    # Overriding abstract method
+    def compute(
+        self,
+        hyp: np.ndarray,
+        X: np.ndarray,
+        X_star: np.ndarray = None,
+        compute_diag: bool = False,
+        compute_grad: bool = False,
+    ):
+
+        N, D = X.shape
+        cov_N = self.hyperparameter_count(D)
+
+        if hyp.size != cov_N:
+            raise ValueError(
+                f"Expected {cov_N} covariance function hyperparameters, "
+                f"{hyp.size} passed instead."
+            )
+        if hyp.ndim != 1:
+            raise ValueError(
+                "Covariance function output is available only for "
+                "one-sample hyperparameter inputs."
+            )
+
+        ell = np.exp(hyp[0])
+        sf2 = np.exp(2 * hyp[1])
+
+        if X_star is None:
+            if compute_diag:
+                tmp = np.zeros((N, 1))
+            else:
+                tmp = squareform(
+                    pdist(X * np.sqrt(self.degree) / ell)
+                )
+        else:
+            a = X * np.sqrt(self.degree) / ell
+            b = X_star * np.sqrt(self.degree) / ell
+            tmp = cdist(a, b)
+
+        K = sf2 * self.f(tmp) * np.exp(-tmp)
+
+        if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
+            dK = np.zeros((cov_N, N, N))
+            # Gradient of cov length scale
+            K_ls = squareform(pdist(np.sqrt(self.degree) / ell * X, "sqeuclidean"))
+            # With d=1 kernel there will be issues caused by zero
+            # divisions. This is OK, the kernel is just not
+            # differentiable there.
+            with np.errstate(all="ignore"):
+                dK[0, :, :] = sf2 * (self.df(tmp) * np.exp(-tmp)) * K_ls
+            # Gradient of cov output scale
+            dK[1, :, :] = 2 * K
+            return K, dK.transpose(1, 2, 0)
+
+        return K
+
+class SquaredExponentialIsotropic(AbstractIsotropicKernel, SquaredExponential):
+    """Isotropic squared exponential kernel.
+
+    Overrides `compute`. Inherits `hyperparameter_count` and
+    `hyperparameter_info` from :class:`AbstractIsotropicKernel`. Inherits other
+    methods from :class:`SquaredExponential`.
+    """
+
+    # Overriding abstract method
+    def compute(
+        self,
+        hyp: np.ndarray,
+        X: np.ndarray,
+        X_star: np.ndarray = None,
+        compute_diag: bool = False,
+        compute_grad: bool = False,
+    ):
+
+        N, D = X.shape
+        cov_N = self.hyperparameter_count(D)
+
+        if hyp.size != cov_N:
+            raise ValueError(
+                f"Expected {cov_N} covariance function hyperparameters, "
+                f"{hyp.size} passed instead."
+            )
+        if hyp.ndim != 1:
+            raise ValueError(
+                "Covariance function output is available only for "
+                "one-sample hyperparameter inputs."
+            )
+
+        ell = np.exp(hyp[0])
+        sf2 = np.exp(2 * hyp[1])
+
+        if X_star is None:
+            if compute_diag:
+                tmp = np.zeros((N, 1))
+            else:
+                tmp = squareform(pdist(X / ell, "sqeuclidean"))
+        else:
+            tmp = cdist(X / ell, X_star / ell, "sqeuclidean")
+
+        K = sf2 * np.exp(-tmp / 2)
+
+        if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
+            dK = np.zeros((cov_N, N, N))
+            # Gradient of cov length scale
+            dK[0, :, :] = K * squareform(pdist(X / ell, "sqeuclidean"))
+            # Gradient of cov output scale.
+            dK[1, :, :] = 2 * K
+            return K, dK.transpose(1, 2, 0)
+
+        return K
+
+def _isotropic_bounds_info_helper(cov_N, X, y):
+    _, D = X.shape
+    tol = 1e-6
+    lower_bounds = np.full((cov_N,), -np.inf)
+    upper_bounds = np.full((cov_N,), np.inf)
+    plausible_lower_bounds = np.full((cov_N,), -np.inf)
+    plausible_upper_bounds = np.full((cov_N,), np.inf)
+    plausible_x0 = np.full((cov_N,), np.nan)
+
+    width = np.mean(np.max(X, axis=0) - np.min(X, axis=0))
+    min_width = np.min(width)
+    max_width = np.max(width)
+    if np.size(y) <= 1:
+        y = np.array([0, 1])
+    height = np.max(y) - np.min(y)
+
+    lower_bounds[0:cov_N - 1] = np.log(min_width) + np.log(tol)
+    upper_bounds[0:cov_N - 1] = np.log(max_width * 10)
+    plausible_lower_bounds[0:cov_N - 1] = np.log(min_width) + 0.5 * np.log(tol)
+    plausible_upper_bounds[0:cov_N - 1] = np.log(max_width)
+    plausible_x0[0:cov_N - 1] = np.log(np.std(X, ddof=1))
+
+    lower_bounds[cov_N - 1] = np.log(height) + np.log(tol)
+    upper_bounds[cov_N - 1] = np.log(height * 10)
+    plausible_lower_bounds[cov_N - 1] = np.log(height) + 0.5 * np.log(tol)
+    plausible_upper_bounds[cov_N - 1] = np.log(height)
+    plausible_x0[cov_N - 1] = np.log(np.std(y, ddof=1))
+
+    # Plausible starting point
+    i_nan = np.isnan(plausible_x0)
+    plausible_x0[i_nan] = 0.5 * (
+        plausible_lower_bounds[i_nan] + plausible_upper_bounds[i_nan]
+    )
+
+    bounds_info = {
+        "LB": lower_bounds,
+        "UB": upper_bounds,
+        "PLB": plausible_lower_bounds,
+        "PUB": plausible_upper_bounds,
+        "x0": plausible_x0,
+    }
+    return bounds_info
diff --git a/gpyreg/testing/test_covariance_functions.py b/gpyreg/testing/test_covariance_functions.py
@@ -14,6 +14,7 @@ def test_squared_exponential_compute_sanity_checks():
     D = 3
     N = 20
     X = np.ones((N, D))
+    X_star = np.zeros((N, D))
 
     with pytest.raises(ValueError) as execinfo:
         hyp = np.ones(D + 2)
@@ -29,6 +30,13 @@ def test_squared_exponential_compute_sanity_checks():
         "Covariance function output is available only for"
         in execinfo.value.args[0]
     )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(D + 1)
+        squared_expontential.compute(hyp, X, X_star, compute_grad=True)
+    assert (
+        "X_star should be None when compute_grad is True."
+        in execinfo.value.args[0]
+    )
 
 
 def test_sqr_exp_kernel_gradient():
@@ -48,6 +56,7 @@ def test_matern_compute_sanity_checks():
     D = 3
     N = 20
     X = np.ones((N, D))
+    X_star = np.zeros((N, D))
 
     with pytest.raises(ValueError) as execinfo:
         hyp = np.ones(D + 2)
@@ -63,6 +72,13 @@ def test_matern_compute_sanity_checks():
         "Covariance function output is available only for"
         in execinfo.value.args[0]
     )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(D + 1)
+        matern.compute(hyp, X, X_star, compute_grad=True)
+    assert (
+        "X_star should be None when compute_grad is True."
+        in execinfo.value.args[0]
+    )
 
 
 def test_matern_invalid_degree():