From 20730b50eef39dd6f9d73c4923f6919d973f0634 Mon Sep 17 00:00:00 2001
From: Bobby-Huggins <70731267+Bobby-Huggins@users.noreply.github.com>
Date: Wed, 12 Apr 2023 22:59:05 +0300
Subject: [PATCH] Isotropic kernels (#27)

* feat: add isotropic Matern and squared exponential kernels.

* test: add tests for new isotropic kernels.

* fix: add default bounds for isotropic kernels.

* test: add more tests for isotropic kernels.

* refactor: raise error if `compute_grad` and `X_star` is not None.

* test: add more tests for isotropic kernels.

* fix: set default lower/upper bounds from min/max radius.
---
 gpyreg/__init__.py                            |    1 +
 gpyreg/covariance_functions.py                |    6 +
 gpyreg/isotropic_covariance_functions.py      |  259 ++++
 gpyreg/testing/test_covariance_functions.py   |   16 +
 .../test_gaussian_process_isotropic.py        | 1136 +++++++++++++++++
 .../test_isotropic_covariance_functions.py    |  230 ++++
 6 files changed, 1648 insertions(+)
 create mode 100644 gpyreg/isotropic_covariance_functions.py
 create mode 100644 gpyreg/testing/test_gaussian_process_isotropic.py
 create mode 100644 gpyreg/testing/test_isotropic_covariance_functions.py

diff --git a/gpyreg/__init__.py b/gpyreg/__init__.py
index fb5c1ee..e9ff15e 100644
--- a/gpyreg/__init__.py
+++ b/gpyreg/__init__.py
@@ -1,6 +1,7 @@
 """Lightweight Gaussian process regression package."""
 
 import gpyreg.covariance_functions
+import gpyreg.isotropic_covariance_functions
 import gpyreg.f_min_fill
 import gpyreg.mean_functions
 import gpyreg.noise_functions
diff --git a/gpyreg/covariance_functions.py b/gpyreg/covariance_functions.py
index 01f489a..3e58f11 100644
--- a/gpyreg/covariance_functions.py
+++ b/gpyreg/covariance_functions.py
@@ -169,6 +169,8 @@ def compute(
         K = sf2 * np.exp(-tmp / 2)
 
         if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
             dK = np.zeros((cov_N, N, N))
             for i in range(0, D):
                 # Gradient of cov length scales
@@ -255,6 +257,8 @@ def compute(
         K = sf2 * self.f(tmp) * np.exp(-tmp)
 
         if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
             dK = np.zeros((cov_N, N, N))
             for i in range(0, D):
                 Ki = squareform(
@@ -331,6 +335,8 @@ def compute(
         K = sf2 * M ** (-alpha)
 
         if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
             dK = np.zeros((cov_N, N, N))
 
             # Gradient respect of lenght scale.
diff --git a/gpyreg/isotropic_covariance_functions.py b/gpyreg/isotropic_covariance_functions.py
new file mode 100644
index 0000000..82fbcb1
--- /dev/null
+++ b/gpyreg/isotropic_covariance_functions.py
@@ -0,0 +1,259 @@
+from abc import ABC, abstractmethod
+
+import numpy as np
+from scipy.spatial.distance import cdist, pdist, squareform
+
+from .covariance_functions import AbstractKernel, Matern, SquaredExponential
+
+class AbstractIsotropicKernel(AbstractKernel):
+    """Abstract base class for isotropic kernel functions.
+
+    The two default hyperparameters are the log-lengthscale and
+    log-outputscale.
+    """
+
+    def hyperparameter_count(self, D: int):
+        """
+        Return the number of hyperparameters this covariance function has.
+
+        Parameters
+        ----------
+        D : int
+            The dimensionality of the kernel.
+
+        Returns
+        -------
+        count : int
+            The number of hyperparameters.
+        """
+        return 2
+
+    def hyperparameter_info(self, D: int):
+        """
+        Return information on the names of hyperparameters for setting
+        them in other parts of the program.
+
+        Parameters
+        ----------
+        D : int
+            The dimensionality of the kernel.
+
+        Returns
+        -------
+        hyper_info : array_like
+            A list of tuples of hyperparameter names and their number,
+            in the order they are in the hyperparameter array.
+        """
+        return [
+            ("covariance_log_lengthscale", 1),
+            ("covariance_log_outputscale", 1),
+        ]
+
+    def get_bounds_info(self, X: np.ndarray, y: np.ndarray):
+        """
+        Return information on the lower, upper, plausible lower
+        and plausible upper bounds of the hyperparameters of this
+        covariance function.
+
+        Parameters
+        ----------
+        X : ndarray, shape (N, D)
+            A 2D array where each row is a test point.
+        y : ndarray, shape (N, 1)
+            A 2D array where each row is a test target.
+
+        Returns
+        -------
+        cov_bound_info: dict
+            A dictionary containing the bound info with the following elements:
+
+            **LB** : np.ndarray, shape (cov_N, 1)
+                    The lower bounds of the hyperparameters.
+            **UB** : np.ndarray, shape (cov_N, 1)
+                    The upper bounds of the hyperparameters.
+            **PLB** : np.ndarray, shape (cov_N, 1)
+                    The plausible lower bounds of the hyperparameters.
+            **PUB** : np.ndarray, shape (cov_N, 1)
+                    The plausible upper bounds of the hyperparameters.
+            **x0** : np.ndarray, shape (cov_N, 1)
+                    The plausible starting point.
+
+            where ``cov_N`` is the number of hyperparameters.
+        """
+        cov_N = self.hyperparameter_count(X.shape[1])
+        return _isotropic_bounds_info_helper(cov_N, X, y)
+
+class MaternIsotropic(AbstractIsotropicKernel, Matern):
+    """
+    Isotropic Matern kernel.
+
+    Overrides `compute`. Inherits `hyperparameter_count` and
+    `hyperparameter_info` from :class:`AbstractIsotropicKernel`. Inherits other
+    methods from :class:`Matern`.
+
+    Parameters
+    ----------
+    degree : {1, 3, 5}
+        The degree of the isotropic Matern kernel.
+
+        Currently the only supported degrees are 1, 3, 5, and if
+        some other degree is provided a ``ValueError`` exception is raised.
+    """
+
+    # Overriding abstract method
+    def compute(
+        self,
+        hyp: np.ndarray,
+        X: np.ndarray,
+        X_star: np.ndarray = None,
+        compute_diag: bool = False,
+        compute_grad: bool = False,
+    ):
+
+        N, D = X.shape
+        cov_N = self.hyperparameter_count(D)
+
+        if hyp.size != cov_N:
+            raise ValueError(
+                f"Expected {cov_N} covariance function hyperparameters, "
+                f"{hyp.size} passed instead."
+            )
+        if hyp.ndim != 1:
+            raise ValueError(
+                "Covariance function output is available only for "
+                "one-sample hyperparameter inputs."
+            )
+
+        ell = np.exp(hyp[0])
+        sf2 = np.exp(2 * hyp[1])
+
+        if X_star is None:
+            if compute_diag:
+                tmp = np.zeros((N, 1))
+            else:
+                tmp = squareform(
+                    pdist(X * np.sqrt(self.degree) / ell)
+                )
+        else:
+            a = X * np.sqrt(self.degree) / ell
+            b = X_star * np.sqrt(self.degree) / ell
+            tmp = cdist(a, b)
+
+        K = sf2 * self.f(tmp) * np.exp(-tmp)
+
+        if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
+            dK = np.zeros((cov_N, N, N))
+            # Gradient of cov length scale
+            K_ls = squareform(pdist(np.sqrt(self.degree) / ell * X, "sqeuclidean"))
+            # With d=1 kernel there will be issues caused by zero
+            # divisions. This is OK, the kernel is just not
+            # differentiable there.
+            with np.errstate(all="ignore"):
+                dK[0, :, :] = sf2 * (self.df(tmp) * np.exp(-tmp)) * K_ls
+            # Gradient of cov output scale
+            dK[1, :, :] = 2 * K
+            return K, dK.transpose(1, 2, 0)
+
+        return K
+
+class SquaredExponentialIsotropic(AbstractIsotropicKernel, SquaredExponential):
+    """Isotropic squared exponential kernel.
+
+    Overrides `compute`. Inherits `hyperparameter_count` and
+    `hyperparameter_info` from :class:`AbstractIsotropicKernel`. Inherits other
+    methods from :class:`SquaredExponential`.
+    """
+
+    # Overriding abstract method
+    def compute(
+        self,
+        hyp: np.ndarray,
+        X: np.ndarray,
+        X_star: np.ndarray = None,
+        compute_diag: bool = False,
+        compute_grad: bool = False,
+    ):
+
+        N, D = X.shape
+        cov_N = self.hyperparameter_count(D)
+
+        if hyp.size != cov_N:
+            raise ValueError(
+                f"Expected {cov_N} covariance function hyperparameters, "
+                f"{hyp.size} passed instead."
+            )
+        if hyp.ndim != 1:
+            raise ValueError(
+                "Covariance function output is available only for "
+                "one-sample hyperparameter inputs."
+            )
+
+        ell = np.exp(hyp[0])
+        sf2 = np.exp(2 * hyp[1])
+
+        if X_star is None:
+            if compute_diag:
+                tmp = np.zeros((N, 1))
+            else:
+                tmp = squareform(pdist(X / ell, "sqeuclidean"))
+        else:
+            tmp = cdist(X / ell, X_star / ell, "sqeuclidean")
+
+        K = sf2 * np.exp(-tmp / 2)
+
+        if compute_grad:
+            if X_star is not None:
+                raise ValueError("X_star should be None when compute_grad is True.")
+            dK = np.zeros((cov_N, N, N))
+            # Gradient of cov length scale
+            dK[0, :, :] = K * squareform(pdist(X / ell, "sqeuclidean"))
+            # Gradient of cov output scale.
+            dK[1, :, :] = 2 * K
+            return K, dK.transpose(1, 2, 0)
+
+        return K
+
+def _isotropic_bounds_info_helper(cov_N, X, y):
+    _, D = X.shape
+    tol = 1e-6
+    lower_bounds = np.full((cov_N,), -np.inf)
+    upper_bounds = np.full((cov_N,), np.inf)
+    plausible_lower_bounds = np.full((cov_N,), -np.inf)
+    plausible_upper_bounds = np.full((cov_N,), np.inf)
+    plausible_x0 = np.full((cov_N,), np.nan)
+
+    width = np.mean(np.max(X, axis=0) - np.min(X, axis=0))
+    min_width = np.min(width)
+    max_width = np.max(width)
+    if np.size(y) <= 1:
+        y = np.array([0, 1])
+    height = np.max(y) - np.min(y)
+
+    lower_bounds[0:cov_N - 1] = np.log(min_width) + np.log(tol)
+    upper_bounds[0:cov_N - 1] = np.log(max_width * 10)
+    plausible_lower_bounds[0:cov_N - 1] = np.log(min_width) + 0.5 * np.log(tol)
+    plausible_upper_bounds[0:cov_N - 1] = np.log(max_width)
+    plausible_x0[0:cov_N - 1] = np.log(np.std(X, ddof=1))
+
+    lower_bounds[cov_N - 1] = np.log(height) + np.log(tol)
+    upper_bounds[cov_N - 1] = np.log(height * 10)
+    plausible_lower_bounds[cov_N - 1] = np.log(height) + 0.5 * np.log(tol)
+    plausible_upper_bounds[cov_N - 1] = np.log(height)
+    plausible_x0[cov_N - 1] = np.log(np.std(y, ddof=1))
+
+    # Plausible starting point
+    i_nan = np.isnan(plausible_x0)
+    plausible_x0[i_nan] = 0.5 * (
+        plausible_lower_bounds[i_nan] + plausible_upper_bounds[i_nan]
+    )
+
+    bounds_info = {
+        "LB": lower_bounds,
+        "UB": upper_bounds,
+        "PLB": plausible_lower_bounds,
+        "PUB": plausible_upper_bounds,
+        "x0": plausible_x0,
+    }
+    return bounds_info
diff --git a/gpyreg/testing/test_covariance_functions.py b/gpyreg/testing/test_covariance_functions.py
index c6ca526..848716f 100644
--- a/gpyreg/testing/test_covariance_functions.py
+++ b/gpyreg/testing/test_covariance_functions.py
@@ -14,6 +14,7 @@ def test_squared_exponential_compute_sanity_checks():
     D = 3
     N = 20
     X = np.ones((N, D))
+    X_star = np.zeros((N, D))
 
     with pytest.raises(ValueError) as execinfo:
         hyp = np.ones(D + 2)
@@ -29,6 +30,13 @@ def test_squared_exponential_compute_sanity_checks():
         "Covariance function output is available only for"
         in execinfo.value.args[0]
     )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(D + 1)
+        squared_expontential.compute(hyp, X, X_star, compute_grad=True)
+    assert (
+        "X_star should be None when compute_grad is True."
+        in execinfo.value.args[0]
+    )
 
 
 def test_sqr_exp_kernel_gradient():
@@ -48,6 +56,7 @@ def test_matern_compute_sanity_checks():
     D = 3
     N = 20
     X = np.ones((N, D))
+    X_star = np.zeros((N, D))
 
     with pytest.raises(ValueError) as execinfo:
         hyp = np.ones(D + 2)
@@ -63,6 +72,13 @@ def test_matern_compute_sanity_checks():
         "Covariance function output is available only for"
         in execinfo.value.args[0]
     )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(D + 1)
+        matern.compute(hyp, X, X_star, compute_grad=True)
+    assert (
+        "X_star should be None when compute_grad is True."
+        in execinfo.value.args[0]
+    )
 
 
 def test_matern_invalid_degree():
diff --git a/gpyreg/testing/test_gaussian_process_isotropic.py b/gpyreg/testing/test_gaussian_process_isotropic.py
new file mode 100644
index 0000000..af0319f
--- /dev/null
+++ b/gpyreg/testing/test_gaussian_process_isotropic.py
@@ -0,0 +1,1136 @@
+import copy
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pytest
+import scipy.stats
+from scipy.integrate import quad
+from scipy.misc import derivative
+
+import gpyreg as gpr
+from gpyreg.isotropic_covariance_functions import MaternIsotropic, SquaredExponentialIsotropic
+
+
+@pytest.mark.filterwarnings(
+    """ignore:Matplotlib is currently using agg:UserWarning"""
+)
+def test_empty_gp():
+    N = 20
+    D = 2
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.NegativeQuadratic(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    # Test that temporary_data dict exists (required for e.g. pyvbmc)
+    assert isinstance(gp.temporary_data, dict)
+
+    # Test that lower and upper bounds are set appropriately.
+    bounds = gp.get_bounds()
+    assert np.all(np.isnan(bounds["covariance_log_lengthscale"]))
+    assert np.all(np.isnan(bounds["covariance_log_lengthscale"]))
+    assert np.all(np.isnan(bounds["covariance_log_outputscale"]))
+    assert np.all(np.isnan(bounds["covariance_log_outputscale"]))
+    assert np.all(np.isnan(bounds["noise_log_scale"]))
+    assert np.all(np.isnan(bounds["noise_log_scale"]))
+    assert np.all(np.isnan(bounds["mean_const"]))
+    assert np.all(np.isnan(bounds["mean_const"]))
+    assert np.all(np.isnan(bounds["mean_location"]))
+    assert np.all(np.isnan(bounds["mean_location"]))
+    assert np.all(np.isnan(bounds["mean_log_scale"]))
+    assert np.all(np.isnan(bounds["mean_log_scale"]))
+
+    # Test that hyperparameter priors are set appropriately.
+    prior = gp.get_priors()
+    assert prior["covariance_log_lengthscale"] is None
+    assert prior["covariance_log_outputscale"] is None
+    assert prior["noise_log_scale"] is None
+    assert prior["mean_const"] is None
+    assert prior["mean_location"] is None
+    assert prior["mean_log_scale"] is None
+
+    # Come up with some hyperparameters.
+    cov_N = gp.covariance.hyperparameter_count(D)
+    mean_N = gp.mean.hyperparameter_count(D)
+    noise_N = gp.noise.hyperparameter_count()
+    hyp = np.random.standard_normal(size=(3, cov_N + noise_N + mean_N))
+    hyp[:, D] *= 0.2
+    hyp[:, D + 1 : D + 1 + noise_N] *= 0.3
+
+    # Set GP to have them.
+    gp.update(hyp=hyp)
+
+    # Test that we can call prediction functions etc.
+    # with a GP that only has hyperparameters.
+    xx, yy = np.meshgrid(np.linspace(-5, 5, 20), np.linspace(-5, 5, 20))
+    x_star = np.array((xx.ravel(), yy.ravel())).T
+    gp.predict_full(x_star, add_noise=True)
+    gp.predict_full(x_star, add_noise=False)
+
+    gp.predict(x_star, add_noise=True)
+    gp.predict(x_star, add_noise=False)
+
+    y_star = np.zeros((400, 1))
+    gp.predict(x_star, y_star, return_lpd=True, add_noise=False)
+    gp.predict(x_star, y_star, return_lpd=True, add_noise=True)
+
+    # gp.quad(0, 1, compute_var=True)
+
+    # gp.plot()
+
+
+@pytest.mark.filterwarnings(
+    """ignore:Matplotlib is currently using agg:UserWarning"""
+)
+def test_random_function():
+    N = 20
+    D = 2
+    X = np.random.standard_normal(size=(N, D))
+
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(1),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    cov_N = gp.covariance.hyperparameter_count(D)
+    mean_N = gp.mean.hyperparameter_count(D)
+    noise_N = gp.noise.hyperparameter_count()
+
+    N_s = np.random.randint(1, 3)
+    hyp = np.random.standard_normal(size=(N_s, cov_N + noise_N + mean_N))
+    hyp[:, D] *= 0.2
+    hyp[:, D + 1 : D + 1 + noise_N] *= 0.3
+
+    gp.update(hyp=hyp)
+    y = gp.random_function(X)
+    gp.update(X_new=X, y_new=y)
+
+    # gp.plot()
+
+    X_new = np.random.standard_normal(size=(10, D))
+    y_new = gp.random_function(X_new)
+
+    # Test return_lpd:
+    __, __, lpd = gp.predict(
+        X_new, y_new, return_lpd=True, add_noise=False, separate_samples=False
+    )
+    assert lpd.shape == (10, 1)
+    __, __, lpd = gp.predict(
+        X_new, y_new, return_lpd=True, add_noise=False, separate_samples=True
+    )
+    assert lpd.shape == (10, N_s)
+    __, __, lpd = gp.predict(
+        X_new, y_new, return_lpd=True, add_noise=True, separate_samples=False
+    )
+    assert lpd.shape == (10, 1)
+    __, __, lpd = gp.predict(
+        X_new, y_new, return_lpd=True, add_noise=True, separate_samples=True
+    )
+    assert lpd.shape == (10, N_s)
+
+    gp.update(X_new=X_new, y_new=y_new)
+
+    # gp.plot(delta_y=5, max_min_flag=False)
+
+
+def test_getters_setters():
+    N = 20
+    D = 2
+    X = np.random.uniform(low=-3, high=3, size=(N, D))
+    y = np.sin(np.sum(X, 1)) + np.random.normal(scale=0.1, size=N)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    hyp_dict_list = gp.get_hyperparameters()
+    assert len(hyp_dict_list) == 1
+
+    hyp_dict = hyp_dict_list[0]
+    assert np.all(np.isnan(hyp_dict["covariance_log_lengthscale"]))
+    assert np.all(np.isnan(hyp_dict["covariance_log_outputscale"]))
+    assert np.all(np.isnan(hyp_dict["noise_log_scale"]))
+    assert np.all(np.isnan(hyp_dict["mean_const"]))
+
+    assert np.all(np.isnan(gp.get_hyperparameters(as_array=True)))
+
+    gp_priors_mistaken = {
+        "covariance_log_outputscal": (
+            "smoothbox_student_t",
+            (-0.5, 0.5, np.log(10), 3),
+        ),
+        "covariance_log_lengthscale": (
+            "student_t",
+            (np.log(np.std(X, ddof=1)), np.log(10), 3),
+        ),
+        "noise_log_scale": ("gaussian", (np.log(1e-3), 1.0)),
+        "mean_const": ("smoothbox", (np.min(y), np.max(y), 1.0)),
+    }
+
+    with pytest.raises(ValueError):
+        gp.set_priors(gp_priors_mistaken)
+
+    gp_priors = {
+        "covariance_log_outputscale": (
+            "smoothbox_student_t",
+            (-0.5, 0.5, np.log(10), 3),
+        ),
+        "covariance_log_lengthscale": (
+            "student_t",
+            (np.log(np.std(X, ddof=1)), np.log(10), 3),
+        ),
+        "noise_log_scale": ("gaussian", (np.log(1e-3), 1.0)),
+        "mean_const": ("smoothbox", (np.min(y), np.max(y), 1.0)),
+    }
+
+    gp.set_priors(gp_priors)
+
+    prior1 = gp.get_priors()
+
+    gp_bounds_mistaken = {
+        "covariance_log_outputscal": (-np.inf, np.inf),
+        "covariance_log_lengthscale": (-np.inf, np.inf),
+        "noise_log_scale": (-np.inf, np.inf),
+        "mean_const": (-np.inf, np.inf),
+    }
+
+    with pytest.raises(ValueError):
+        gp.set_bounds(gp_bounds_mistaken)
+
+    hyp_arr = np.array(
+        [[-0.4630094, -0.2209450, -7.2947503, 0.03713608]]
+    )
+    hyp = gp.hyperparameters_to_dict(hyp_arr)
+    gp.set_hyperparameters(hyp)
+    assert np.all(gp.get_hyperparameters(as_array=True) == hyp_arr)
+
+    gp.set_hyperparameters(hyp_arr)
+    assert np.all(gp.get_hyperparameters(as_array=True) == hyp_arr)
+
+    gp_train = {"n_samples": 10}
+    hyp, _, _ = gp.fit(X=X, y=y, options=gp_train)
+
+    assert np.all(gp.get_hyperparameters(as_array=True) == hyp)
+
+    hyp_dict_list = gp.get_hyperparameters()
+    for i, hyp_dict in enumerate(hyp_dict_list):
+        assert np.all(hyp_dict["covariance_log_lengthscale"] == hyp[i, 0])
+        assert np.all(hyp_dict["covariance_log_outputscale"] == hyp[i, 1])
+        assert np.all(hyp_dict["noise_log_scale"] == hyp[i, 2])
+        assert np.all(hyp_dict["mean_const"] == hyp[i, 3])
+
+    prior2 = gp.get_priors()
+
+    bounds = gp.get_bounds()
+    assert np.all(
+        bounds["covariance_log_lengthscale"][0] == gp.lower_bounds[0]
+    )
+    assert np.all(
+        bounds["covariance_log_outputscale"][0] == gp.lower_bounds[1]
+    )
+    assert np.all(bounds["noise_log_scale"][0] == gp.lower_bounds[2])
+    assert np.all(bounds["mean_const"][0] == gp.lower_bounds[3])
+
+    assert np.all(
+        bounds["covariance_log_lengthscale"][1] == gp.upper_bounds[0]
+    )
+    assert np.all(
+        bounds["covariance_log_outputscale"][1] == gp.upper_bounds[1]
+    )
+    assert np.all(bounds["noise_log_scale"][1] == gp.upper_bounds[2])
+    assert np.all(bounds["mean_const"][1] == gp.upper_bounds[3])
+
+
+@pytest.mark.filterwarnings(
+    """ignore:Matplotlib is currently using agg:UserWarning"""
+)
+def test_cleaning():
+    N = 20
+    D = 2
+    X = np.random.uniform(low=-3, high=3, size=(N, D))
+    y = np.reshape(
+        np.sin(np.sum(X, 1)) + np.random.normal(scale=0.1, size=N), (-1, 1)
+    )
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp.temporary_data["foo"] = "bar"
+    assert len(gp.temporary_data) == 1
+
+    gp_train = {"n_samples": 10}
+    hyps, _, _ = gp.fit(X=X, y=y, options=gp_train)
+
+    posteriors = copy.deepcopy(gp.posteriors)
+
+    gp.clean()
+
+    assert len(gp.temporary_data) == 0
+
+    for i in range(0, 10):
+        assert np.all(posteriors[i].hyp == gp.posteriors[i].hyp)
+        assert gp.posteriors[i].alpha is None
+        assert gp.posteriors[i].sW is None
+        assert gp.posteriors[i].L is None
+        assert gp.posteriors[i].L_chol is None
+        assert gp.posteriors[i].sn2_mult is None
+
+    gp.update(compute_posterior=True)
+
+    for i in range(0, 10):
+        assert np.all(posteriors[i].hyp == gp.posteriors[i].hyp)
+        assert np.all(posteriors[i].alpha == gp.posteriors[i].alpha)
+        assert np.all(posteriors[i].sW == gp.posteriors[i].sW)
+        assert np.all(posteriors[i].L == gp.posteriors[i].L)
+        assert posteriors[i].L_chol == gp.posteriors[i].L_chol
+        assert posteriors[i].sn2_mult == posteriors[i].sn2_mult
+
+    # gp.plot()
+
+
+def partial(f, x0_orig, x0_i, i):
+    x0 = x0_orig.copy()
+    x0[i] = x0_i
+    return f(x0)
+
+
+def compute_gradient(f, x0):
+    num_grad = np.zeros(x0.shape)
+
+    for i in range(0, np.size(x0)):
+        f_i = lambda x0_i: partial(f, x0, x0_i, i)
+        tmp = scipy.misc.derivative(
+            f_i, x0[i], dx=np.finfo(float).eps ** (1 / 5.0), order=5
+        )
+        num_grad[i] = tmp
+
+    return num_grad
+
+
+def check_grad(f, grad, x0):
+    analytical_grad = grad(x0)
+    numerical_grad = compute_gradient(f, x0)
+    return np.abs(analytical_grad - numerical_grad)
+
+
+@pytest.mark.filterwarnings(
+    """ignore:Matplotlib is currently using agg:UserWarning"""
+)
+def test_gp_gradient_computations():
+    N = 20
+    D = 2
+    X = np.random.standard_normal(size=(N, D))
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    cov_N = gp.covariance.hyperparameter_count(D)
+    mean_N = gp.mean.hyperparameter_count(D)
+    noise_N = gp.noise.hyperparameter_count()
+
+    N_s = np.random.randint(1, 3)
+    hyp = np.random.standard_normal(size=(N_s, cov_N + noise_N + mean_N))
+    hyp[:, D] *= 0.2
+    hyp[:, D + 1 : D + 1 + noise_N] *= 0.3
+
+    gp.update(hyp=hyp, compute_posterior=False)
+    y = gp.random_function(X)
+
+    gp.update(X_new=X, y_new=y)
+
+    hyp0 = hyp[0, :]
+
+    # Check GP marginal likelihood gradient computation.
+    f = lambda hyp_: gp._GP__compute_nlZ(hyp_, False, False)
+    f_grad = lambda hyp_: gp._GP__compute_nlZ(hyp_, True, False)[1]
+    assert np.all(
+        np.isclose(
+            check_grad(
+                f,
+                f_grad,
+                hyp0 * np.exp(0.1 * np.random.uniform(size=hyp0.size)),
+            ),
+            0.0,
+            atol=1e-6,
+        )
+    )
+
+    # Check GP hyperparameters log prior gradient computation.
+    hyp1 = hyp0 * np.exp(0.1 * np.random.uniform(size=hyp0.size))
+    prior_types = np.random.permutation(range(0, 5))
+    for i in range(0, cov_N + mean_N + noise_N):
+        prior_type = prior_types[i]
+        if prior_type == 1:  # 'gaussian'
+            gp.hyper_priors["mu"][i] = np.random.standard_normal()
+            gp.hyper_priors["sigma"][i] = np.exp(np.random.standard_normal())
+            gp.hyper_priors["df"][i] = 0
+        elif prior_type == 2:  #'student_t'
+            gp.hyper_priors["mu"][i] = np.random.standard_normal()
+            gp.hyper_priors["sigma"][i] = np.random.standard_normal()
+            gp.hyper_priors["df"][i] = np.exp(np.random.standard_normal())
+        elif prior_type == 3:  # 'smoothbox'
+            gp.hyper_priors["a"][i] = -3
+            gp.hyper_priors["b"][i] = 3
+            gp.hyper_priors["sigma"][i] = np.random.standard_normal()
+            gp.hyper_priors["df"][i] = 0
+        elif prior_type == 4:  # 'smoothbox_student_t'
+            gp.hyper_priors["a"][i] = -3
+            gp.hyper_priors["b"][i] = 3
+            gp.hyper_priors["sigma"][i] = np.random.standard_normal()
+            gp.hyper_priors["df"][i] = np.exp(np.random.standard_normal())
+        else:  # None
+            pass
+
+    # Manual changes to hyper priors requires us to call this
+    gp._GP__recompute_normalization_constants()
+
+    f = lambda hyp_: gp._GP__compute_log_priors(hyp_, False)
+    f_grad = lambda hyp_: gp._GP__compute_log_priors(hyp_, True)[1]
+    assert np.all(
+        np.isclose(
+            check_grad(f, f_grad, hyp1),
+            0.0,
+            atol=1e-6,
+        )
+    )
+
+    # Test rank-1 update.
+    idx = int(np.ceil(X.shape[0] / 2))
+    gp1 = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp1.update(X_new=X[0:idx, :], y_new=y[0:idx], hyp=hyp)
+
+    for i in range(idx, N):
+        gp1.update(X_new=X[i : i + 1, :], y_new=y[i : i + 1])
+
+    # These should be exactly the same.
+    assert np.all(gp.X == gp1.X)
+    assert np.all(gp.y == gp1.y)
+    assert np.all(gp.posteriors[0].hyp == gp1.posteriors[0].hyp)
+
+    # These only approximately the same I think.
+    assert np.all(np.isclose(gp.posteriors[0].alpha, gp1.posteriors[0].alpha))
+    assert np.all(np.isclose(gp.posteriors[0].sW, gp1.posteriors[0].sW))
+    assert np.all(np.isclose(gp.posteriors[0].L, gp1.posteriors[0].L))
+    assert np.isclose(gp.posteriors[0].sn2_mult, gp1.posteriors[0].sn2_mult)
+    assert gp.posteriors[0].L_chol and gp1.posteriors[0].L_chol
+
+    # Test getting and setting hyperparameters.
+    hyp_dict = gp.get_hyperparameters()
+    gp1.set_hyperparameters(hyp_dict)
+
+    assert np.all(
+        np.isclose(
+            gp.get_hyperparameters(as_array=True),
+            gp1.get_hyperparameters(as_array=True),
+        )
+    )
+
+    # Test printing
+    print(gp)
+
+    # Test plotting
+    # gp.plot()
+
+
+def test_split_update():
+    N = 20
+    D = 2
+    X = np.random.standard_normal(size=(N, D))
+    s2 = np.full((N, 1), 0.05)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(user_provided_add=True),
+    )
+
+    cov_N = gp.covariance.hyperparameter_count(D)
+    mean_N = gp.mean.hyperparameter_count(D)
+    noise_N = gp.noise.hyperparameter_count()
+
+    N_s = np.random.randint(1, 3)
+    hyp = np.random.standard_normal(size=(N_s, cov_N + noise_N + mean_N))
+    hyp[:, D] *= 0.2
+    hyp[:, D + 1 : D + 1 + noise_N] *= 0.3
+
+    gp.update(hyp=hyp, compute_posterior=False)
+    y = gp.random_function(X)
+
+    gp.update(X_new=X, y_new=y, s2_new=s2, compute_posterior=True)
+
+    gp1 = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(user_provided_add=True),
+    )
+
+    gp1.update(
+        X_new=X[0:10, :],
+        y_new=y[0:10],
+        s2_new=s2[0:10, :],
+        hyp=hyp,
+        compute_posterior=True,
+    )
+    gp1.update(
+        X_new=X[10:, :],
+        y_new=y[10:],
+        s2_new=s2[10:, :],
+        hyp=hyp,
+        compute_posterior=True,
+    )
+
+    # These should be exactly the same.
+    assert np.all(gp.X == gp1.X)
+    assert np.all(gp.y == gp1.y)
+    assert np.all(gp.posteriors[0].hyp == gp1.posteriors[0].hyp)
+
+    # These only approximately the same I think.
+    assert np.all(np.isclose(gp.posteriors[0].alpha, gp1.posteriors[0].alpha))
+    assert np.all(np.isclose(gp.posteriors[0].sW, gp1.posteriors[0].sW))
+    assert np.all(np.isclose(gp.posteriors[0].L, gp1.posteriors[0].L))
+    assert np.isclose(gp.posteriors[0].sn2_mult, gp1.posteriors[0].sn2_mult)
+    assert gp.posteriors[0].L_chol and gp1.posteriors[0].L_chol
+
+
+@pytest.mark.filterwarnings(
+    """ignore:Matplotlib is currently using agg:UserWarning"""
+)
+def test_quadrature_without_noise():
+    f = lambda x: np.exp(-((x - 0.35) ** 2 / (2 * 0.01))) + np.sin(10 * x) / 3
+    f_p = lambda x: f(x) * scipy.stats.norm.pdf(x, scale=0.1)
+    N = 50
+    D = 1
+    X = np.linspace(-2.5, 2.5, N)
+    y = f(X)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ZeroMean(),
+        noise=gpr.noise_functions.GaussianNoise(
+            constant_add=True,
+        ),
+    )
+
+    gp_train = {"n_samples": 0}
+    gp.fit(
+        X=np.reshape(X, (-1, 1)), y=np.reshape(y, (-1, 1)), options=gp_train
+    )
+
+    F_true = scipy.integrate.quad(f_p, -np.inf, np.inf)[0]
+
+    mu_N = 1000
+    x_star = np.reshape(np.linspace(-10, 10, mu_N), (-1, 1))
+    f_mu, f_cov = gp.predict_full(x_star, add_noise=False)
+
+    F_predict = 0
+    for i in range(0, mu_N):
+        F_predict += f_mu[i, 0] * scipy.stats.norm.pdf(x_star[i], scale=0.1)
+    F_predict *= 20 / mu_N
+
+    pdf_tmp = np.reshape(scipy.stats.norm.pdf(x_star, scale=0.1), (-1, 1))
+    tmp = np.dot(pdf_tmp, pdf_tmp.T)
+    F_var_predict = np.sum(np.sum(f_cov[:, :, 0] * tmp)) * (20 / mu_N) ** 2
+
+    F_bayes, F_var_bayes = gp.quad(0, 0.1, compute_var=True)
+
+    assert np.abs(F_var_bayes - F_var_predict) < 0.00001
+    assert np.abs(F_bayes - F_predict) < 0.0001
+    assert np.abs(F_true - F_bayes) < 0.0001
+    assert np.abs(F_true - F_predict) < 0.0001
+
+    F_bayes_2, F_var_bayes_2 = gp.quad(0.5, 0.4, compute_var=True)
+
+    # Test that we can compute multiple quadratures easily.
+    F_bayes_total, F_var_bayes_total = gp.quad(
+        np.array([[0], [0.5]]), np.array([[0.1], [0.4]]), compute_var=True
+    )
+    assert np.isclose(F_bayes[0, 0], F_bayes_total[0, 0])
+    assert np.isclose(F_bayes_2[0, 0], F_bayes_total[1, 0])
+    assert np.isclose(F_var_bayes[0, 0], F_var_bayes_total[0, 0])
+    assert np.isclose(F_var_bayes_2[0, 0], F_var_bayes_total[1, 0])
+
+    # gp.plot()
+
+
+@pytest.mark.filterwarnings(
+    """ignore:Matplotlib is currently using agg:UserWarning"""
+)
+def test_quadrature_with_noise():
+    N = 500
+    D = 1
+    s2_constant = 0.01
+    X = np.reshape(np.linspace(-15, 15, N), (-1, 1))
+    s2 = np.full(X.shape, s2_constant)
+
+    mu_N = 1000
+    x_star = np.reshape(np.linspace(-15, 15, mu_N), (-1, 1))
+
+    y = np.sin(X) + np.sqrt(s2) * scipy.stats.norm.ppf(
+        np.random.random_sample(X.shape)
+    )
+    y[y < 0] = -np.abs(3 * y[y < 0]) ** 2
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.NegativeQuadratic(),
+        noise=gpr.noise_functions.GaussianNoise(
+            constant_add=True,
+            user_provided_add=True,
+            scale_user_provided=True,
+            rectified_linear_output_dependent_add=True,
+        ),
+    )
+
+    gp_train = {"n_samples": 10}
+    gp.fit(X=X, y=y, s2=s2, options=gp_train)
+
+    f_mu, f_cov = gp.predict_full(x_star, s2_star=s2_constant, add_noise=True)
+    F_predict = 0
+    for i in range(0, mu_N):
+        F_predict += f_mu[i, 0] * scipy.stats.norm.pdf(x_star[i], scale=0.11)
+    F_predict *= 30 / mu_N
+
+    pdf_tmp = np.reshape(scipy.stats.norm.pdf(x_star, scale=0.1), (-1, 1))
+    tmp = np.dot(pdf_tmp, pdf_tmp.T)
+    F_predict_var = np.sum(np.sum(f_cov[:, :, 0] * tmp)) * (30 / mu_N) ** 2
+
+    F_bayes, F_bayes_var = gp.quad(0, 0.1, compute_var=True)
+
+    assert np.abs(F_bayes_var - F_predict_var) < 0.01
+    assert np.abs(F_bayes - F_predict) < 0.01
+
+    def f(x):
+        y = np.sin(x)
+        if y < 0:
+            return -np.abs(3 * y) ** 2
+        return y
+
+    f_p = lambda x: f(x) * scipy.stats.norm.pdf(x, scale=0.1)
+
+    F_true = scipy.integrate.quad(f_p, -np.inf, np.inf)[0]
+
+    assert np.abs(F_true - F_bayes) < 0.1
+
+    # gp.plot()
+
+
+@pytest.mark.filterwarnings(
+    """ignore:Matplotlib is currently using agg:UserWarning"""
+)
+def test_fitting_with_fixed_bounds():
+    N = 20
+    D = 1
+    X = np.reshape(np.linspace(-10, 10, N), (-1, 1))
+    y = 1 + np.sin(X)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(3),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp_bounds = {
+        "covariance_log_outputscale": (-np.inf, np.inf),
+        "covariance_log_lengthscale": (-np.inf, np.inf),
+        "noise_log_scale": (-np.inf, np.inf),
+        "mean_const": (0.5, 0.5),
+    }
+
+    gp_priors = {
+        "covariance_log_outputscale": None,
+        "covariance_log_lengthscale": None,
+        "noise_log_scale": ("gaussian", (np.log(1e-3), 1.0)),
+        "mean_const": None,
+    }
+
+    gp.set_priors(gp_priors)
+    gp.set_bounds(gp_bounds)
+
+    assert gp.get_bounds() == gp_bounds
+
+    hyp, _, _ = gp.fit(X=X, y=y)
+
+    assert np.all(hyp[:, 3] == 0.5)
+
+    # gp.plot()
+
+
+def test_setting_bounds():
+    N = 20
+    D = 2
+    X = np.reshape(np.linspace(-10, 10, N), (-1, 2))
+    y = 1 + np.sin(X)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(3),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp_bounds = {
+        "covariance_log_outputscale": (-np.inf, 1.0),
+        "covariance_log_lengthscale": (-2.0, np.inf),
+        "noise_log_scale": (-np.inf, np.inf),
+        "mean_const": (-4.0, 4.0),
+    }
+
+    gp_priors = {
+        "covariance_log_outputscale": None,
+        "covariance_log_lengthscale": None,
+        "noise_log_scale": ("gaussian", (np.log(1e-3), 1.0)),
+        "mean_const": None,
+    }
+
+    # Test setting all bounds manually:
+    lower_bounds = np.array([-2.0, -np.inf, -np.inf, -4.0])
+    upper_bounds = np.array([np.inf, 1.0, np.inf, 4.0])
+    gp.set_priors(gp_priors)
+    gp.set_bounds(gp_bounds)
+    hyp, _, _ = gp.fit(X=X, y=y)
+    assert np.all(gp.lower_bounds == lower_bounds)
+    # Make sure fitting doesn't undo the set bounds:
+    hyp, _, _ = gp.fit(X=X, y=y)
+    assert np.all(gp.lower_bounds == lower_bounds)
+    assert np.all(gp.upper_bounds == upper_bounds)
+
+    # Test setting all bounds automatically (by default)
+    gp.set_bounds(None)
+    assert np.all(np.isnan(gp.lower_bounds))
+    assert np.all(np.isnan(gp.upper_bounds))
+    hyp, _, _ = gp.fit(X=X, y=y)
+    default_lower_bounds = np.concatenate(
+        [
+            gp.covariance.get_bounds_info(gp.X, gp.y)["LB"],
+            gp.noise.get_bounds_info(gp.X, gp.y)["LB"],
+            gp.mean.get_bounds_info(gp.X, gp.y)["LB"],
+        ]
+    )
+    default_upper_bounds = np.concatenate(
+        [
+            gp.covariance.get_bounds_info(gp.X, gp.y)["UB"],
+            gp.noise.get_bounds_info(gp.X, gp.y)["UB"],
+            gp.mean.get_bounds_info(gp.X, gp.y)["UB"],
+        ]
+    )
+    assert np.all(gp.lower_bounds == default_lower_bounds)
+    assert np.all(gp.upper_bounds == default_upper_bounds)
+
+    # Test setting some bounds to defaults, via set_bounds.
+    # Bounds with value ``None`` should map to default values. Other bounds
+    # should stay the same.
+    gp_bounds = {
+        "covariance_log_outputscale": None,
+        "covariance_log_lengthscale": (-2.0, np.inf),
+        "noise_log_scale": None,
+        "mean_const": (-4.0, 4.0),
+    }
+    gp.set_bounds(gp_bounds)
+    mask = np.array([False, True, True, False])
+    assert np.all(np.isnan(gp.lower_bounds[mask]))
+    assert np.all(np.isnan(gp.upper_bounds[mask]))
+    assert np.all(gp.lower_bounds[~mask] == lower_bounds[~mask])
+    assert np.all(gp.upper_bounds[~mask] == upper_bounds[~mask])
+    gp.fit(X, y)
+    assert np.all(gp.lower_bounds[mask] == default_lower_bounds[mask])
+    assert np.all(gp.upper_bounds[mask] == default_upper_bounds[mask])
+    assert np.all(gp.lower_bounds[~mask] == lower_bounds[~mask])
+    assert np.all(gp.upper_bounds[~mask] == upper_bounds[~mask])
+
+    gp_bounds = {
+        "covariance_log_outputscale": (-np.inf, 1.0),
+        "covariance_log_lengthscale": None,
+        "noise_log_scale": (-np.inf, np.inf),
+        "mean_const": None,
+    }
+    gp.set_bounds(gp_bounds)
+    mask = np.array([False, True, True, False])
+    assert np.all(np.isnan(gp.lower_bounds[~mask]))
+    assert np.all(np.isnan(gp.upper_bounds[~mask]))
+    assert np.all(gp.lower_bounds[mask] == lower_bounds[mask])
+    assert np.all(gp.upper_bounds[mask] == upper_bounds[mask])
+    gp.fit(X, y)
+    assert np.all(gp.lower_bounds[~mask] == default_lower_bounds[~mask])
+    assert np.all(gp.upper_bounds[~mask] == default_upper_bounds[~mask])
+    assert np.all(gp.lower_bounds[mask] == lower_bounds[mask])
+    assert np.all(gp.upper_bounds[mask] == upper_bounds[mask])
+
+    # Test setting some bounds to defaults, via gp.fit():
+    lower_bounds = np.array([-2.0, -np.inf, np.nan, -4.0])
+    upper_bounds = np.array([np.nan, np.nan, np.inf, np.nan])
+    fit_options = {
+        "lower_bounds": lower_bounds,
+        "upper_bounds": upper_bounds,
+    }
+    hyp, _, _ = gp.fit(X=X, y=y, options=fit_options)
+    # Bounds should follow user-provided options, where not nan:
+    mask = np.isnan(lower_bounds)
+    assert np.all(gp.lower_bounds[~mask] == lower_bounds[~mask])
+    assert np.all(gp.upper_bounds[mask] == upper_bounds[mask])
+    # Bounds should follow defaults, where nan:
+    assert np.all(gp.lower_bounds[mask] == default_lower_bounds[mask])
+    assert np.all(gp.upper_bounds[~mask] == default_upper_bounds[~mask])
+
+
+def test_fitting_options():
+    N = 20
+    D = 1
+    X = np.reshape(np.linspace(-10, 10, N), (-1, 1))
+    y = 1 + np.sin(X)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp_train_1 = {"opts_N": 0}
+    gp_train_2 = {"n_samples": 0}
+    gp_train_3 = {"init_N": 0}
+    gp_train_4 = {"opts_N": 0, "n_samples": 0}
+    gp_train_5 = {"n_samples": 0, "init_N": 0}
+    gp_train_6 = {"opts_N": 0, "init_N": 0}
+    gp_train_7 = {"opts_N": 0, "n_samples": 0, "init_N": 0}
+    gp_train_8 = {"init_N": 1}
+
+    # Test that all these at least can be run in a row.
+    gp.fit(X=X, y=y, options=gp_train_1)
+    gp.fit(X=X, y=y, options=gp_train_2)
+    gp.fit(X=X, y=y, options=gp_train_3)
+    gp.fit(X=X, y=y, options=gp_train_4)
+    gp.fit(X=X, y=y, options=gp_train_5)
+    gp.fit(X=X, y=y, options=gp_train_6)
+    gp.fit(X=X, y=y, options=gp_train_7)
+    gp.fit(X=X, y=y, options=gp_train_8)
+
+
+def test_fitting():
+    N = 1000
+    D = 1
+    X = np.reshape(np.linspace(-10, 10, N), (-1, 1))
+
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(5),
+        mean=gpr.mean_functions.ZeroMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    cov_N = gp.covariance.hyperparameter_count(D)
+    mean_N = gp.mean.hyperparameter_count(D)
+    noise_N = gp.noise.hyperparameter_count()
+
+    N_s = 1
+    hyp = np.random.standard_normal(size=(N_s, cov_N + noise_N + mean_N))
+    hyp[:, D] *= 0.3
+    hyp[:, D + 1 : D + 1 + noise_N] *= 0.3
+
+    gp.update(hyp=hyp, compute_posterior=False)
+    y = gp.random_function(X, add_noise=True)
+    gp.update(X_new=X, y_new=y, hyp=hyp, compute_posterior=True)
+
+    gp1 = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(5),
+        mean=gpr.mean_functions.ZeroMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp_train = {"n_samples": 0}
+    hyp2, _, _ = gp1.fit(X=X, y=y, options=gp_train)
+
+    assert np.all(np.abs(hyp - hyp2)[0] < 0.5)
+
+    assert (
+        np.abs(gp.log_likelihood(hyp[0, :]) - gp.log_likelihood(hyp2[0, :]))
+        < 20
+    )
+
+
+def test_get_recommended_bounds_no_bounds_set():
+    D = 3
+    d = 1 + 2 * np.random.randint(0, 3)
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(d),
+        mean=gpr.mean_functions.ZeroMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+    with pytest.raises(ValueError) as execinfo:
+        gp.get_recommended_bounds()
+    assert "GP does not have X or y set!" in execinfo.value.args[0]
+    gp.X = 1
+    with pytest.raises(ValueError) as execinfo:
+        gp.get_recommended_bounds()
+    assert "GP does not have X or y set!" in execinfo.value.args[0]
+    gp.X = None
+    gp.y = 1
+    with pytest.raises(ValueError) as execinfo:
+        gp.get_recommended_bounds()
+    assert "GP does not have X or y set!" in execinfo.value.args[0]
+
+
+def test_set_hyperparameters_wrong_shape():
+    D = 3
+    d = 1 + 2 * np.random.randint(0, 3)
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(d),
+        mean=gpr.mean_functions.ZeroMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+    with pytest.raises(ValueError) as execinfo:
+        gp.set_hyperparameters(np.ones((1, 20)))
+    assert (
+        "Input hyperparameter array is the wrong shape!"
+        in execinfo.value.args[0]
+    )
+
+
+def test_hyperparameters_to_dict_wrong_shape():
+    D = 3
+    d = 1 + 2 * np.random.randint(0, 3)
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(d),
+        mean=gpr.mean_functions.ZeroMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+    with pytest.raises(ValueError) as execinfo:
+        gp.hyperparameters_to_dict(np.ones((1, 20)))
+    assert (
+        "Input hyperparameter array is the wrong shape!"
+        in execinfo.value.args[0]
+    )
+
+
+def test_hyperparameters_from_dict_single_dict():
+    D = 3
+    d = 1 + 2 * np.random.randint(0, 3)
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(d),
+        mean=gpr.mean_functions.ZeroMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    hyper_dict = gp.get_hyperparameters()[0]
+    gp.hyperparameters_from_dict(hyper_dict)
+    for key in hyper_dict.keys():
+        assert np.all(
+            np.array_equal(
+                gp.get_hyperparameters()[0][key],
+                hyper_dict[key],
+                equal_nan=True,
+            )
+        )
+
+
+def test_quad_not_squared_exponential():
+    D = 3
+    d = 1 + 2 * np.random.randint(0, 3)
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(d),
+        mean=gpr.mean_functions.NegativeQuadratic(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+    with pytest.raises(ValueError) as execinfo:
+        gp.quad(0, 0.1, compute_var=True)
+    assert (
+        "Bayesian quadrature only supports the squared exponential"
+        in execinfo.value.args[0]
+    )
+
+
+def test_predict_lpd():
+    D = 3
+    gp = gpr.GP(
+        D=D,
+        covariance=SquaredExponentialIsotropic(),
+        mean=gpr.mean_functions.NegativeQuadratic(),
+        noise=gpr.noise_functions.GaussianNoise(user_provided_add=True),
+    )
+    hyp = np.array(
+        [
+            [
+                # Covariance
+                0.0,
+                0.0,
+                0.0,  # log ell
+                1.0,  # log sf2
+                # Noise
+                np.log(np.pi),  # log std. dev. of noise
+                # Mean
+                -(D / 2) * np.log(2 * np.pi),  # MVN mode
+                0.0,
+                0.0,
+                0.0,  # Mode location
+                0.0,
+                0.0,
+                0.0,  # log scale
+            ],
+            [
+                # Covariance
+                0.0,
+                0.0,
+                0.0,  # log ell
+                1.0,  # log sf2
+                # Noise
+                np.log(np.pi),  # log std. dev. of noise
+                # Mean
+                -(D / 2) * np.log(2 * np.pi),  # MVN mode
+                0.0,
+                0.0,
+                0.0,  # Mode location
+                0.0,
+                0.0,
+                0.0,  # log scale
+            ],
+        ]
+    )
+    gp.update(hyp=hyp)
+
+    X_star = np.arange(-9, 9).reshape((-1, 3))
+    offset = np.random.normal(size=(6, 1))
+    y_star = (
+        scipy.stats.multivariate_normal.logpdf(
+            X_star, mean=np.zeros((D,))
+        ).reshape(-1, 1)
+        + offset
+    )
+    s2_star = np.arange(-3, 3).reshape((-1, 1))
+    s2_star = np.zeros((6, 1))
+    f_mu, f_s2, lpd = gp.predict(
+        X_star, y_star, s2_star=s2_star, return_lpd=True
+    )
+    assert np.allclose(
+        lpd,
+        scipy.stats.norm.logpdf(
+            y_star, loc=f_mu, scale=np.sqrt(np.pi * s2_star + f_s2)
+        ),
+    )
+    __, __, lpd2 = gp.predict(
+        X_star, y_star, s2_star=s2_star, return_lpd=True, add_noise=True
+    )
+    assert np.all(lpd2 == lpd)
+    __, __, lpd3 = gp.predict(
+        X_star,
+        y_star,
+        s2_star=s2_star,
+        return_lpd=True,
+        add_noise=True,
+        separate_samples=True,
+    )
+    assert np.all(lpd3[:, 0:1] == lpd)
+    assert np.all(lpd3[:, 1:2] == lpd)
+
+
+def test__str__and__repr__():
+    # 1-D:
+    N = 20
+    D = 1
+    X = np.reshape(np.linspace(-10, 10, N), (-1, 1))
+    y = 1 + np.sin(X)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(3),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp_bounds = {
+        "covariance_log_outputscale": (np.nan, np.nan),
+        "covariance_log_lengthscale": (np.nan, np.nan),
+        "noise_log_scale": (np.nan, np.nan),
+        "mean_const": (0.5, 0.5),
+    }
+
+    gp_priors = {
+        "covariance_log_outputscale": None,
+        "covariance_log_lengthscale": None,
+        "noise_log_scale": ("gaussian", (np.log(1e-3), 1.0)),
+        "mean_const": None,
+    }
+
+    gp.set_priors(gp_priors)
+    gp.set_bounds(gp_bounds)
+    hyp, _, _ = gp.fit(X=X, y=y)
+
+    str_ = gp.__str__()
+    assert "Covariance function: MaternIsotropic" in str_
+    repr_ = gp.__repr__()
+    assert (
+        "self.covariance = <gpyreg.isotropic_covariance_functions.MaternIsotropic object at "
+        in repr_
+    )
+    assert "self.lower_bounds = [-10.8" in repr_
+
+    # 2-D:
+    N = 20
+    D = 2
+    X = np.reshape(np.linspace(-10, 10, N), (-1, 2))
+    y = 1 + np.sin(X)
+
+    gp = gpr.GP(
+        D=D,
+        covariance=MaternIsotropic(3),
+        mean=gpr.mean_functions.ConstantMean(),
+        noise=gpr.noise_functions.GaussianNoise(constant_add=True),
+    )
+
+    gp_bounds = {
+        "covariance_log_outputscale": (np.nan, np.nan),
+        "covariance_log_lengthscale": (np.nan, np.nan),
+        "noise_log_scale": (np.nan, np.nan),
+        "mean_const": (0.5, 0.5),
+    }
+
+    gp_priors = {
+        "covariance_log_outputscale": None,
+        "covariance_log_lengthscale": None,
+        "noise_log_scale": ("gaussian", (np.log(1e-3), 1.0)),
+        "mean_const": None,
+    }
+
+    gp.set_priors(gp_priors)
+    gp.set_bounds(gp_bounds)
+    hyp, _, _ = gp.fit(X=X, y=y)
+
+    str_ = gp.__str__()
+    assert "Covariance function: MaternIsotropic" in str_
+    repr_ = gp.__repr__()
+    assert (
+        "self.covariance = <gpyreg.isotropic_covariance_functions.MaternIsotropic object at "
+        in repr_
+    )
+    assert "self.lower_bounds = [-10.8" in repr_
diff --git a/gpyreg/testing/test_isotropic_covariance_functions.py b/gpyreg/testing/test_isotropic_covariance_functions.py
new file mode 100644
index 0000000..ec587f9
--- /dev/null
+++ b/gpyreg/testing/test_isotropic_covariance_functions.py
@@ -0,0 +1,230 @@
+import numpy as np
+import pytest
+
+from gpyreg.covariance_functions import AbstractKernel, Matern, SquaredExponential
+from gpyreg.isotropic_covariance_functions import (
+    AbstractIsotropicKernel,
+    MaternIsotropic,
+    SquaredExponentialIsotropic,
+)
+
+
+def test_squared_exponential_isotropic_compute_sanity_checks():
+    squared_expontential = SquaredExponentialIsotropic()
+    D = 3
+    N = 20
+    X = np.ones((N, D))
+    X_star = np.zeros((N, D))
+
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(D + 2)
+        squared_expontential.compute(hyp, X)
+    assert (
+        "Expected 2 covariance function hyperparameters"
+        in execinfo.value.args[0]
+    )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones((2, 1))
+        squared_expontential.compute(hyp, X)
+    assert (
+        "Covariance function output is available only for"
+        in execinfo.value.args[0]
+    )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(2)
+        squared_expontential.compute(hyp, X, X_star, compute_grad=True)
+    assert (
+        "X_star should be None when compute_grad is True."
+        in execinfo.value.args[0]
+    )
+
+
+def test_sqr_exp_iso_kernel_gradient():
+    sqr_exp = SquaredExponentialIsotropic()
+    D = 3
+    N = 20
+    diag_cov = np.eye(N) * (0.2)
+    X = (np.random.multivariate_normal(np.zeros(N), diag_cov, D)).T
+    hyp_D = 2
+    diag_cov = np.eye(hyp_D) * (0.2)
+    hyp = np.random.multivariate_normal(np.zeros(hyp_D), diag_cov)
+    _test_kernel_gradient_(sqr_exp, hyp, X)
+
+
+def test_matern_isotropic_compute_sanity_checks():
+    matern = MaternIsotropic(3)
+    D = 3
+    N = 20
+    X = np.ones((N, D))
+    X_star = np.zeros((N, D))
+
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(D + 2)
+        matern.compute(hyp, X)
+    assert (
+        "Expected 2 covariance function hyperparameters"
+        in execinfo.value.args[0]
+    )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones((2, 1))
+        matern.compute(hyp, X)
+    assert (
+        "Covariance function output is available only for"
+        in execinfo.value.args[0]
+    )
+    with pytest.raises(ValueError) as execinfo:
+        hyp = np.ones(2)
+        matern.compute(hyp, X, X_star, compute_grad=True)
+    assert (
+        "X_star should be None when compute_grad is True."
+        in execinfo.value.args[0]
+    )
+
+
+def test_matern_isotropic_invalid_degree():
+    for degree in [0, 2, 4, 6]:
+        with pytest.raises(ValueError) as execinfo:
+            MaternIsotropic(degree)
+        assert (
+            "Only degrees 1, 3 and 5 are supported for the"
+            in execinfo.value.args[0]
+        )
+
+
+def test_matern_isotropic_kernel_gradient():
+    matern_fun = MaternIsotropic(3)
+    D = 3
+    N = 20
+    diag_cov = np.eye(N) * (0.2)
+    X = (np.random.multivariate_normal(np.zeros(N), diag_cov, D)).T
+    hyp_D = 2
+    diag_cov = np.eye(hyp_D) * (0.2)
+    hyp = np.random.multivariate_normal(np.zeros(hyp_D), diag_cov)
+
+    _test_kernel_gradient_(matern_fun, hyp, X)
+
+def _test_kernel_gradient_(
+    kernel_fun: AbstractKernel,
+    hyp,
+    X: np.ndarray,
+    X_star: np.ndarray = None,
+    h=1e-5,
+    eps=1e-4,
+):
+    """
+    Test the gradient of the kernel function via the Five-point stencil difference method (https://en.wikipedia.org/wiki/Five-point_stencil).
+
+    Parameters
+    ----------
+    kernel_fun : AbstractKernel
+
+    X : ndarray, shape (N, D)
+
+    hyp : ndarray, shape (cov_N,)
+        A 1D array of hyperparameters, where ``cov_N`` is
+        the number of hyperparameters.
+    h: float
+        Grid spacing.
+    eps: float
+        Error tolerance.
+    """
+
+    K, dK = kernel_fun.compute(hyp, X, X_star, compute_grad=True)
+
+    hyp_new = hyp.copy()
+    finite_diff = np.zeros((K.shape[0], K.shape[1], len(hyp)))
+
+    for idx, h_p in enumerate(hyp.squeeze()):
+        hyp_new[idx] = h_p + 2.0 * h
+        f_2h = kernel_fun.compute(hyp_new, X, X_star)
+        hyp_new[idx] = h_p
+
+        hyp_new[idx] = h_p + h
+        f_h = kernel_fun.compute(hyp_new, X, X_star)
+        hyp_new[idx] = h_p
+
+        hyp_new[idx] = h_p - h
+        f_neg_h = kernel_fun.compute(hyp_new, X, X_star)
+        hyp_new[idx] = h_p
+
+        hyp_new[idx] = h_p - 2 * h
+        f_neg_2h = kernel_fun.compute(hyp_new, X, X_star)
+
+        finite_diff[:, :, idx] = -f_2h + 8.0 * f_h - 8.0 * f_neg_h + f_neg_2h
+        finite_diff[:, :, idx] = finite_diff[:, :, idx] / (12 * h)
+
+    assert np.all(np.abs(finite_diff - dK) <= eps)
+
+def test_matern_isotropic_against_anisotropic():
+    N = 10
+    M = 5
+    for degree in [1, 3, 5]:
+        D = np.random.randint(1, 11)
+
+        # Isotropic kernel:
+        matern_iso = MaternIsotropic(degree)
+        n_hyp_iso = matern_iso.hyperparameter_count(D)
+        hyp_iso = np.random.normal(size=n_hyp_iso)
+
+        # Anisotropic kernel with equal length scales:
+        matern = Matern(degree)
+        n_hyp = matern.hyperparameter_count(D)
+        hyp = np.zeros(n_hyp)
+        hyp[0:-1] = hyp_iso[0]
+        hyp[-1] = hyp_iso[1]
+
+        # Test both on random data:
+        X = np.random.normal(size=(N, D))
+        X_star = np.random.normal(size=(M, D))
+
+        K1_iso = matern_iso.compute(hyp_iso, X)
+        K1 = matern.compute(hyp, X)
+        assert np.allclose(K1_iso, K1), f"degree {degree}"
+
+        K2_iso, dK2_iso = matern_iso.compute(hyp_iso, X, compute_grad=True)
+        K2, dK2 = matern.compute(hyp, X, compute_grad=True)
+        dK2 = np.dstack([dK2[:, :, 0:-1].sum(axis=2, keepdims=True), dK2[:, :, [-1]]])
+        assert np.allclose(K2_iso, K2), f"degree {degree}"
+        assert np.allclose(dK2_iso, dK2, equal_nan=True), f"degree {degree}"
+        assert np.allclose(K2_iso, K1_iso), f"degree {degree}"
+
+        K3_iso = matern_iso.compute(hyp_iso, X, X_star)
+        K3 = matern.compute(hyp, X, X_star)
+        assert np.allclose(K3_iso, K3), f"degree {degree}"
+
+def test_squared_exponential_isotropic_against_anisotropic():
+    N = 10
+    M = 5
+    D = np.random.randint(1, 11)
+
+    # Isotropic kernel:
+    sqexp_iso = SquaredExponentialIsotropic()
+    n_hyp_iso = sqexp_iso.hyperparameter_count(D)
+    hyp_iso = np.random.normal(size=n_hyp_iso)
+
+    # Anisotropic kernel with equal length scales:
+    sqexp = SquaredExponential()
+    n_hyp = sqexp.hyperparameter_count(D)
+    hyp = np.zeros(n_hyp)
+    hyp[0:-1] = hyp_iso[0]
+    hyp[-1] = hyp_iso[1]
+
+    # Test both on random data:
+    X = np.random.normal(size=(N, D))
+    X_star = np.random.normal(size=(M, D))
+
+    K1_iso = sqexp_iso.compute(hyp_iso, X)
+    K1 = sqexp.compute(hyp, X)
+    assert np.allclose(K1_iso, K1)
+
+    K2_iso, dK2_iso = sqexp_iso.compute(hyp_iso, X, compute_grad=True)
+    K2, dK2 = sqexp.compute(hyp, X, compute_grad=True)
+    dK2 = np.dstack([dK2[:, :, 0:-1].sum(axis=2, keepdims=True), dK2[:, :, [-1]]])
+    assert np.allclose(K2_iso, K2)
+    assert np.allclose(dK2_iso, dK2)
+    assert np.allclose(K2_iso, K1_iso)
+
+    K3_iso = sqexp_iso.compute(hyp_iso, X, X_star)
+    K3 = sqexp.compute(hyp, X, X_star)
+    assert np.allclose(K3_iso, K3)
+