From 8ae80a954b70ce9967e3331de60f523d58b5e584 Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 12:28:12 -0400
Subject: [PATCH 1/8] fix 1D kriging predictions and add new hyperparam
 initialization

---
 src/Kriging.jl  | 83 ++++++++++++++++++++++++++++++-------------------
 test/Kriging.jl | 27 ++++++++++++++--
 2 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/src/Kriging.jl b/src/Kriging.jl
index eef379a29..cbc4ee5ee 100644
--- a/src/Kriging.jl
+++ b/src/Kriging.jl
@@ -35,20 +35,23 @@ function std_error_at_point(k::Kriging, val)
     n = length(k.x)
     d = length(k.x[1])
     r = zeros(eltype(k.x[1]), n, 1)
-    @inbounds for i in 1:n
-        sum = zero(eltype(k.x[1]))
-        for l in 1:d
-            sum = sum + k.theta[l] * norm(val[l] - k.x[i][l])^(k.p[l])
+    r = [
+        let
+            sum = zero(eltype(k.x[1]))
+            for l in 1:d
+                sum = sum + k.theta[l] * norm(val[l] - k.x[i][l])^(k.p[l])
+            end
+            exp(-sum)
         end
-        r[i] = exp(-sum)
-    end
+    for i in 1:n]
+
     one = ones(eltype(k.x[1]), n, 1)
     one_t = one'
     a = r' * k.inverse_of_R * r
-    a = a[1]
     b = one_t * k.inverse_of_R * one
-    b = b[1]
-    mean_squared_error = k.sigma * (1 - a + (1 - a)^2 / (b))
+    c = one_t * k.inverse_of_R * r
+
+    mean_squared_error = k.sigma * (1 - a[1] + (1 - c[1])^2 / b[1])
     return sqrt(abs(mean_squared_error))
 end
 
@@ -56,38 +59,30 @@ end
 Gives the current estimate for 'val' with respect to the Kriging object k.
 """
 function (k::Kriging)(val::Number)
-    phi = z -> exp(-(abs(z))^k.p)
     n = length(k.x)
-    prediction = zero(eltype(k.x[1]))
-    for i in 1:n
-        prediction = prediction + k.b[i] * phi(val - k.x[i])
-    end
-    prediction = k.mu + prediction
-    return prediction
+    return k.mu + sum(k.b[i] * exp(-sum(k.theta * abs(val - k.x[i])^k.p)) for i in 1:n)
 end
 
 """
     Returns sqrt of expected mean_squared_error error at the point.
 """
 function std_error_at_point(k::Kriging, val::Number)
-    phi(z) = exp(-(abs(z))^k.p)
     n = length(k.x)
-    r = zeros(eltype(k.x[1]), n, 1)
-    @inbounds for i in 1:n
-        r[i] = phi(val - k.x[i])
-    end
-    one = ones(eltype(k.x[1]), n, 1)
+    r = [
+        exp(-k.theta * abs(val - k.x[i])^k.p) for i in 1:n
+    ]
+    one = ones(eltype(k.x), n)
     one_t = one'
     a = r' * k.inverse_of_R * r
-    a = a[1]
     b = one_t * k.inverse_of_R * one
-    b = b[1]
-    mean_squared_error = k.sigma * (1 - a + (1 - a)^2 / (b))
+    c = one_t * k.inverse_of_R * r
+
+    mean_squared_error = k.sigma * (1 - a[1] + (1 - c[1])^2 / b[1])
     return sqrt(abs(mean_squared_error))
 end
 
 """
-    Kriging(x,y,lb::Number,ub::Number;p::Number=1.0)
+    Kriging(x, y, lb::Number, ub::Number; p::Number=2.0, theta::Number = 0.5/var(x))
 
 Constructor for type Kriging.
 
@@ -95,26 +90,37 @@ Constructor for type Kriging.
 -(x,y): sampled points
 -p: value between 0 and 2 modelling the
    smoothness of the function being approximated, 0-> rough  2-> C^infinity
+- theta: value > 0 modeling how much the function is changing in the i-th variable.
 """
-function Kriging(x, y, lb::Number, ub::Number; p = 1.0, theta = 1.0)
+function Kriging(x, y, lb::Number, ub::Number; p = 2.0, theta = 0.5/var(x))
     if length(x) != length(unique(x))
         println("There exists a repetion in the samples, cannot build Kriging.")
         return
     end
+
+    if p > 2.0 || p < 0.0
+        throw(ArgumentError("Hyperparameter p must be between 0 and 2! Got: $p."))
+    end
+
+    if theta < 0
+        throw(ArgumentError("Hyperparameter theta must be positive! Got: $theta"))
+    end
+
     mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta)
     Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R)
 end
 
 function _calc_kriging_coeffs(x, y, p::Number, theta::Number)
     n = length(x)
-    R = zeros(eltype(x[1]), n, n)
 
-    R = [exp(-theta * abs(x[i] - x[j])^p)
-         for j in 1:n, i in 1:n]
+    R = [
+        exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n
+    ]
 
     one = ones(eltype(x[1]), n, 1)
     one_t = one'
     inverse_of_R = inv(R)
+
     mu = (one_t * inverse_of_R * y) / (one_t * inverse_of_R * one)
     b = inverse_of_R * (y - one * mu)
     sigma = ((y - one * mu)' * inverse_of_R * (y - one * mu)) / n
@@ -127,17 +133,30 @@ end
 Constructor for Kriging surrogate.
 
 - (x,y): sampled points
-- p: array of values 0<=p<2 modeling the
+- p: array of values 0<=p<=2 modeling the
      smoothness of the function being approximated in the i-th variable.
      low p -> rough, high p -> smooth
 - theta: array of values > 0 modeling how much the function is
           changing in the i-th variable.
 """
-function Kriging(x, y, lb, ub; p = collect(one.(x[1])), theta = collect(one.(x[1])))
+function Kriging(x, y, lb, ub; p = 2 .* collect(one.(x[1])),
+                theta = [0.5 / var(x_i[i] for x_i in x) for i in 1:length(x[1])])
+
     if length(x) != length(unique(x))
         println("There exists a repetition in the samples, cannot build Kriging.")
         return
     end
+
+    for i in 1:length(x[1])
+        if p[i] > 2.0 || p[i] < 0.0
+            throw(ArgumentError("All p must be between 0 and 2! Got: $p."))
+        end
+
+        if theta[i] < 0.0
+            throw(ArgumentError("All theta must be positive! Got: $theta."))
+        end
+    end
+
     mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta)
     Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R)
 end
diff --git a/test/Kriging.jl b/test/Kriging.jl
index 7d5527cd1..20e2c34e2 100644
--- a/test/Kriging.jl
+++ b/test/Kriging.jl
@@ -1,6 +1,8 @@
 using LinearAlgebra
 using Surrogates
 using Test
+using Statistics
+
 #1D
 lb = 0.0
 ub = 10.0
@@ -8,17 +10,31 @@ f = x -> log(x) * exp(x)
 x = sample(5, lb, ub, SobolSample())
 y = f.(x)
 my_p = 1.9
-my_k = Kriging(x, y, lb, ub, p = 2.3)
+
+# Check input validation for 1D Kriging
+@test_throws ArgumentError my_k = Kriging(x, y, lb, ub, p = -1.0)
+@test_throws ArgumentError my_k = Kriging(x, y, lb, ub, p = 3.0)
+@test_throws ArgumentError my_k = Kriging(x, y, lb, ub, theta = -2.0)
+
+my_k = Kriging(x, y, lb, ub, p = my_p)
+@test my_k.theta ≈ 0.5 / var(x)
+
 add_point!(my_k, 4.0, 75.68)
 add_point!(my_k, [5.0, 6.0], [238.86, 722.84])
 pred = my_k(5.5)
 
+
+@test 238.86 ≤ pred ≤ 722.84
+@test my_k(5.0) ≈ 238.86
+@test std_error_at_point(my_k, 5.0) < 1e-5 * my_k(5.0)
+
 #WITHOUT ADD POINT
 x = [1.0, 2.0, 3.0]
 y = [4.0, 5.0, 6.0]
 my_p = 1.3
 my_k = Kriging(x, y, lb, ub, p = my_p)
 est = my_k(1.0)
+@test est == 4.0
 std_err = std_error_at_point(my_k, 1.0)
 @test std_err < 10^(-6)
 
@@ -42,7 +58,7 @@ est = my_k(4.0)
 std_err = std_error_at_point(my_k, 4.0)
 @test std_err < 10^(-6)
 
-#Testin kwargs 1D
+#Testing kwargs 1D
 kwar_krig = Kriging(x, y, lb, ub);
 
 #ND
@@ -90,5 +106,10 @@ est = my_k((10.0, 11.0, 12.0))
 std_err = std_error_at_point(my_k, (10.0, 11.0, 12.0))
 @test std_err < 10^(-6)
 
-#test kwargs ND
+#test kwargs ND (hyperparameter initialization)
 kwarg_krig_ND = Kriging(x, y, lb, ub)
+
+d = length(x[3])
+
+@test all(==(2.0), kwarg_krig_ND.p)
+@test all(kwarg_krig_ND.theta .≈ [0.5/var(x_i[ℓ] for x_i in x) for ℓ in 1:3])

From 3428c804811e556097bd92e366de126bbd17aa4a Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 17:03:32 -0400
Subject: [PATCH 2/8] Update some docs and add formatting

---
 docs/src/BraninFunction.md | 32 ++++++++++++++---------------
 docs/src/kriging.md        |  4 ++--
 src/Kriging.jl             | 41 ++++++++++++++++----------------------
 test/Kriging.jl            |  9 ++++-----
 4 files changed, 39 insertions(+), 47 deletions(-)

diff --git a/docs/src/BraninFunction.md b/docs/src/BraninFunction.md
index ae48ad3c9..0f1b28cfd 100644
--- a/docs/src/BraninFunction.md
+++ b/docs/src/BraninFunction.md
@@ -40,29 +40,29 @@ lower_bound = [-5, 0]
 upper_bound = [10,15]
 xys = sample(n_samples, lower_bound, upper_bound, SobolSample())
 zs = branin.(xys);
-x, y = -5:10, 0:15 # hide
-p1 = surface(x, y, (x1,x2) -> branin((x1,x2))) # hide
-xs = [xy[1] for xy in xys] # hide
-ys = [xy[2] for xy in xys] # hide
-scatter!(xs, ys, zs) # hide
-p2 = contour(x, y, (x1,x2) -> branin((x1,x2))) # hide
-scatter!(xs, ys) # hide
-plot(p1, p2, title="True function") # hide
+x, y = -5:10, 0:15
+p1 = surface(x, y, (x1,x2) -> branin((x1,x2)))
+xs = [xy[1] for xy in xys]
+ys = [xy[2] for xy in xys]
+scatter!(xs, ys, zs)
+p2 = contour(x, y, (x1,x2) -> branin((x1,x2)))
+scatter!(xs, ys)
+plot(p1, p2, title="True function")
 ```
 
-Now it's time to fitting different surrogates and then we will plot them.
-We will have a look on `Kriging Surrogate`:
+Now it's time to try fitting different surrogates and then we will plot them.
+We will have a look at the kriging surrogate `Kriging Surrogate`. :
 
 ```@example BraninFunction
-kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound, p=[1.9, 1.9])
+kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound)
 ```
 
 ```@example BraninFunction
-p1 = surface(x, y, (x, y) -> kriging_surrogate([x y])) # hide
-scatter!(xs, ys, zs, marker_z=zs) # hide
-p2 = contour(x, y, (x, y) -> kriging_surrogate([x y])) # hide
-scatter!(xs, ys, marker_z=zs) # hide
-plot(p1, p2, title="Kriging Surrogate") # hide
+p1 = surface(x, y, (x, y) -> kriging_surrogate([x y]))
+scatter!(xs, ys, zs, marker_z=zs)
+p2 = contour(x, y, (x, y) -> kriging_surrogate([x y]))
+scatter!(xs, ys, marker_z=zs)
+plot(p1, p2, title="Kriging Surrogate")
 ```
 
 Now, we will have a look on `Inverse Distance Surrogate`:
diff --git a/docs/src/kriging.md b/docs/src/kriging.md
index 06a3832b7..2f5c74f58 100644
--- a/docs/src/kriging.md
+++ b/docs/src/kriging.md
@@ -38,7 +38,7 @@ With our sampled points we can build the Kriging surrogate using the `Kriging` f
 `kriging_surrogate` behaves like an ordinary function which we can simply plot. A nice statistical property of this surrogate is being able to calculate the error of the function at each point, we plot this as a confidence interval using the `ribbon` argument.
 
 ```@example kriging_tutorial1d
-kriging_surrogate = Kriging(x, y, lower_bound, upper_bound, p=1.9);
+kriging_surrogate = Kriging(x, y, lower_bound, upper_bound);
 
 plot(x, y, seriestype=:scatter, label="Sampled points", xlims=(lower_bound, upper_bound), ylims=(-7, 17), legend=:top)
 plot!(xs, f.(xs), label="True function", legend=:top)
@@ -107,7 +107,7 @@ plot(p1, p2, title="True function") # hide
 Using the sampled points we build the surrogate, the steps are analogous to the 1-dimensional case.
 
 ```@example kriging_tutorialnd
-kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound, p=[1.9, 1.9])
+kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound, p=[2.0, 2.0], theta=[0.03, 0.003])
 ```
 
 ```@example kriging_tutorialnd
diff --git a/src/Kriging.jl b/src/Kriging.jl
index cbc4ee5ee..3f778bb3a 100644
--- a/src/Kriging.jl
+++ b/src/Kriging.jl
@@ -1,7 +1,8 @@
 #=
-One-dimensional Kriging method, following this paper:
+One-dimensional Kriging method, following these papers:
+"Efficient Global Optimization of Expensive Black Box Functions" and
 "A Taxonomy of Global Optimization Methods Based on Response Surfaces"
-by DONALD R. JONES
+both by DONALD R. JONES
 =#
 mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R} <: AbstractSurrogate
     x::X
@@ -35,23 +36,21 @@ function std_error_at_point(k::Kriging, val)
     n = length(k.x)
     d = length(k.x[1])
     r = zeros(eltype(k.x[1]), n, 1)
-    r = [
-        let
-            sum = zero(eltype(k.x[1]))
-            for l in 1:d
-                sum = sum + k.theta[l] * norm(val[l] - k.x[i][l])^(k.p[l])
-            end
-            exp(-sum)
-        end
-    for i in 1:n]
+    r = [let
+             sum = zero(eltype(k.x[1]))
+             for l in 1:d
+                 sum = sum + k.theta[l] * norm(val[l] - k.x[i][l])^(k.p[l])
+             end
+             exp(-sum)
+         end
+         for i in 1:n]
 
     one = ones(eltype(k.x[1]), n, 1)
     one_t = one'
     a = r' * k.inverse_of_R * r
     b = one_t * k.inverse_of_R * one
-    c = one_t * k.inverse_of_R * r
 
-    mean_squared_error = k.sigma * (1 - a[1] + (1 - c[1])^2 / b[1])
+    mean_squared_error = k.sigma * (1 - a[1] + (1 - a[1])^2 / b[1])
     return sqrt(abs(mean_squared_error))
 end
 
@@ -68,16 +67,13 @@ end
 """
 function std_error_at_point(k::Kriging, val::Number)
     n = length(k.x)
-    r = [
-        exp(-k.theta * abs(val - k.x[i])^k.p) for i in 1:n
-    ]
+    r = [exp(-k.theta * abs(val - k.x[i])^k.p) for i in 1:n]
     one = ones(eltype(k.x), n)
     one_t = one'
     a = r' * k.inverse_of_R * r
     b = one_t * k.inverse_of_R * one
-    c = one_t * k.inverse_of_R * r
 
-    mean_squared_error = k.sigma * (1 - a[1] + (1 - c[1])^2 / b[1])
+    mean_squared_error = k.sigma * (1 - a[1] + (1 - a[1])^2 / b[1])
     return sqrt(abs(mean_squared_error))
 end
 
@@ -92,7 +88,7 @@ Constructor for type Kriging.
    smoothness of the function being approximated, 0-> rough  2-> C^infinity
 - theta: value > 0 modeling how much the function is changing in the i-th variable.
 """
-function Kriging(x, y, lb::Number, ub::Number; p = 2.0, theta = 0.5/var(x))
+function Kriging(x, y, lb::Number, ub::Number; p = 2.0, theta = 0.5 / var(x))
     if length(x) != length(unique(x))
         println("There exists a repetion in the samples, cannot build Kriging.")
         return
@@ -113,9 +109,7 @@ end
 function _calc_kriging_coeffs(x, y, p::Number, theta::Number)
     n = length(x)
 
-    R = [
-        exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n
-    ]
+    R = [exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n]
 
     one = ones(eltype(x[1]), n, 1)
     one_t = one'
@@ -140,8 +134,7 @@ Constructor for Kriging surrogate.
           changing in the i-th variable.
 """
 function Kriging(x, y, lb, ub; p = 2 .* collect(one.(x[1])),
-                theta = [0.5 / var(x_i[i] for x_i in x) for i in 1:length(x[1])])
-
+                 theta = [0.5 / var(x_i[i] for x_i in x) for i in 1:length(x[1])])
     if length(x) != length(unique(x))
         println("There exists a repetition in the samples, cannot build Kriging.")
         return
diff --git a/test/Kriging.jl b/test/Kriging.jl
index 20e2c34e2..a16092a97 100644
--- a/test/Kriging.jl
+++ b/test/Kriging.jl
@@ -12,9 +12,9 @@ y = f.(x)
 my_p = 1.9
 
 # Check input validation for 1D Kriging
-@test_throws ArgumentError my_k = Kriging(x, y, lb, ub, p = -1.0)
-@test_throws ArgumentError my_k = Kriging(x, y, lb, ub, p = 3.0)
-@test_throws ArgumentError my_k = Kriging(x, y, lb, ub, theta = -2.0)
+@test_throws ArgumentError my_k=Kriging(x, y, lb, ub, p = -1.0)
+@test_throws ArgumentError my_k=Kriging(x, y, lb, ub, p = 3.0)
+@test_throws ArgumentError my_k=Kriging(x, y, lb, ub, theta = -2.0)
 
 my_k = Kriging(x, y, lb, ub, p = my_p)
 @test my_k.theta ≈ 0.5 / var(x)
@@ -23,7 +23,6 @@ add_point!(my_k, 4.0, 75.68)
 add_point!(my_k, [5.0, 6.0], [238.86, 722.84])
 pred = my_k(5.5)
 
-
 @test 238.86 ≤ pred ≤ 722.84
 @test my_k(5.0) ≈ 238.86
 @test std_error_at_point(my_k, 5.0) < 1e-5 * my_k(5.0)
@@ -112,4 +111,4 @@ kwarg_krig_ND = Kriging(x, y, lb, ub)
 d = length(x[3])
 
 @test all(==(2.0), kwarg_krig_ND.p)
-@test all(kwarg_krig_ND.theta .≈ [0.5/var(x_i[ℓ] for x_i in x) for ℓ in 1:3])
+@test all(kwarg_krig_ND.theta .≈ [0.5 / var(x_i[ℓ] for x_i in x) for ℓ in 1:3])

From aa8c23ea23ef70e0397a848cd049c28141a6c523 Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 17:20:52 -0400
Subject: [PATCH 3/8] Fix test

---
 test/Kriging.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Kriging.jl b/test/Kriging.jl
index a16092a97..0157f462d 100644
--- a/test/Kriging.jl
+++ b/test/Kriging.jl
@@ -25,7 +25,7 @@ pred = my_k(5.5)
 
 @test 238.86 ≤ pred ≤ 722.84
 @test my_k(5.0) ≈ 238.86
-@test std_error_at_point(my_k, 5.0) < 1e-5 * my_k(5.0)
+@test std_error_at_point(my_k, 5.0) < 1e-3 * my_k(5.0)
 
 #WITHOUT ADD POINT
 x = [1.0, 2.0, 3.0]

From 24cebb208292191445a5387797daf8f36f1ca75a Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 17:39:14 -0400
Subject: [PATCH 4/8] minor tweak to new defaults, add extra tests

---
 src/Kriging.jl  | 4 ++--
 test/Kriging.jl | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Kriging.jl b/src/Kriging.jl
index 3f778bb3a..eceef444b 100644
--- a/src/Kriging.jl
+++ b/src/Kriging.jl
@@ -88,7 +88,7 @@ Constructor for type Kriging.
    smoothness of the function being approximated, 0-> rough  2-> C^infinity
 - theta: value > 0 modeling how much the function is changing in the i-th variable.
 """
-function Kriging(x, y, lb::Number, ub::Number; p = 2.0, theta = 0.5 / var(x))
+function Kriging(x, y, lb::Number, ub::Number; p = 2.0, theta = 0.5 / std(x)^p)
     if length(x) != length(unique(x))
         println("There exists a repetion in the samples, cannot build Kriging.")
         return
@@ -134,7 +134,7 @@ Constructor for Kriging surrogate.
           changing in the i-th variable.
 """
 function Kriging(x, y, lb, ub; p = 2 .* collect(one.(x[1])),
-                 theta = [0.5 / var(x_i[i] for x_i in x) for i in 1:length(x[1])])
+                 theta = [0.5 / std(x_i[i] for x_i in x)^p[i] for i in 1:length(x[1])])
     if length(x) != length(unique(x))
         println("There exists a repetition in the samples, cannot build Kriging.")
         return
diff --git a/test/Kriging.jl b/test/Kriging.jl
index 0157f462d..e2b44934f 100644
--- a/test/Kriging.jl
+++ b/test/Kriging.jl
@@ -17,7 +17,7 @@ my_p = 1.9
 @test_throws ArgumentError my_k=Kriging(x, y, lb, ub, theta = -2.0)
 
 my_k = Kriging(x, y, lb, ub, p = my_p)
-@test my_k.theta ≈ 0.5 / var(x)
+@test my_k.theta ≈ 0.5 * std(x)^(-my_p)
 
 add_point!(my_k, 4.0, 75.68)
 add_point!(my_k, [5.0, 6.0], [238.86, 722.84])
@@ -108,6 +108,10 @@ std_err = std_error_at_point(my_k, (10.0, 11.0, 12.0))
 #test kwargs ND (hyperparameter initialization)
 kwarg_krig_ND = Kriging(x, y, lb, ub)
 
+@test_throws ArgumentError Kriging(x, y, lb, ub, p = 3 * my_p)
+@test_throws ArgumentError Kriging(x, y, lb, ub, p = -my_p)
+@test_throws ArgumentError Kriging(x, y, lb, ub, theta = -my_theta)
+
 d = length(x[3])
 
 @test all(==(2.0), kwarg_krig_ND.p)

From 617eb34d42a8ce0a4c92a6bd7ea7afb715d7d6d7 Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 18:07:43 -0400
Subject: [PATCH 5/8] add nugget regularization to fix singular exception when
 p = 2.0

---
 src/Kriging.jl       | 40 +++++++++++++++++++++++++++++++++++++--
 test/optimization.jl | 45 +++++++++++++++++++++++++++++---------------
 2 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/src/Kriging.jl b/src/Kriging.jl
index eceef444b..b8f2e06f2 100644
--- a/src/Kriging.jl
+++ b/src/Kriging.jl
@@ -111,8 +111,26 @@ function _calc_kriging_coeffs(x, y, p::Number, theta::Number)
 
     R = [exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n]
 
-    one = ones(eltype(x[1]), n, 1)
+    # Estimate nugget based on maximum allowed condition number
+    # This regularizes R to allow for points being close to eachother without R becoming
+    # singular, at the cost of slightly relaxing the interpolation condition
+    λ = eigen(R).values
+    λmax = λ[end]
+    λmin = λ[1]
+
+    κmax = 1e8
+    λdiff = λmax - κmax * λmin
+    if λdiff ≥ 0
+        nugget = λdiff / (κmax - 1)
+    else
+        nugget = 0.0
+    end
+
+    one = ones(eltype(x[1]), n)
     one_t = one'
+
+    R = R + Diagonal(nugget * one)
+
     inverse_of_R = inv(R)
 
     mu = (one_t * inverse_of_R * y) / (one_t * inverse_of_R * one)
@@ -167,8 +185,26 @@ function _calc_kriging_coeffs(x, y, p, theta)
          end
          for j in 1:n, i in 1:n]
 
-    one = ones(n, 1)
+    # Estimate nugget based on maximum allowed condition number
+    # This regularizes R to allow for points being close to eachother without R becoming
+    # singular, at the cost of slightly relaxing the interpolation condition
+
+    λ = eigen(R).values
+    λmax = λ[end]
+    λmin = λ[1]
+
+    κmax = 1e8
+    λdiff = λmax - κmax * λmin
+    if λdiff ≥ 0
+        nugget = λdiff / (κmax - 1)
+    else
+        nugget = 0.0
+    end
+
+    one = ones(eltype(x[1]), n)
     one_t = one'
+
+    R = R + Diagonal(nugget * one[:, 1])
     inverse_of_R = inv(R)
 
     mu = (one_t * inverse_of_R * y) / (one_t * inverse_of_R * one)
diff --git a/test/optimization.jl b/test/optimization.jl
index 383b24727..687d4e82a 100644
--- a/test/optimization.jl
+++ b/test/optimization.jl
@@ -1,8 +1,8 @@
 using Surrogates
 using LinearAlgebra
-using Flux
-using Flux: @epochs
-using PolyChaos
+#using Flux
+#using Flux: @epochs
+#using PolyChaos
 
 #######SRBF############
 ##### 1D #####
@@ -21,18 +21,34 @@ a = 2
 b = 6
 
 #Using Kriging
-my_k_SRBF1 = Kriging(x, y, lb, ub)
-surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1, UniformSample())
+begin
+    x = [2.5, 4.0, 6.0]
+    y = [6.0, 9.0, 13.0]
+    my_k_SRBF1 = Kriging(x, y, lb, ub; p)
+    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1, UniformSample())
+end
 
 #Using RadialBasis
-my_rad_SRBF1 = RadialBasis(x, y, a, b, rad = linearRadial())
-surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1, UniformSample())
-
-my_wend_1d = Wendland(x, y, lb, ub)
-surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d, UniformSample())
-
-my_earth1d = EarthSurrogate(x, y, lb, ub)
-surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d, LowDiscrepancySample(2))
+begin
+    x = [2.5, 4.0, 6.0]
+    y = [6.0, 9.0, 13.0]
+    my_rad_SRBF1 = RadialBasis(x, y, a, b, rad = linearRadial())
+    (xstar, fstar) = surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1, UniformSample())
+end
+
+begin
+    x = [2.5, 4.0, 6.0]
+    y = [6.0, 9.0, 13.0]
+    my_wend_1d = Wendland(x, y, lb, ub)
+    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d, UniformSample())
+end
+
+begin
+    x = [2.5, 4.0, 6.0]
+    y = [6.0, 9.0, 13.0]
+    my_earth1d = EarthSurrogate(x, y, lb, ub)
+    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d, LowDiscrepancySample(2))
+end
 
 ##### ND #####
 objective_function_ND = z -> 3 * norm(z) + 1
@@ -40,8 +56,6 @@ lb = [1.0, 1.0]
 ub = [6.0, 6.0]
 x = sample(5, lb, ub, SobolSample())
 y = objective_function_ND.(x)
-p = [1.5, 1.5]
-theta = [1.0, 1.0]
 
 #Kriging
 
@@ -66,6 +80,7 @@ alpha = [2.0, 2.0]
 n = 4
 my_loba_ND = LobachevskySurrogate(x, y, lb, ub)
 surrogate_optimize(objective_function_ND, SRBF(), lb, ub, my_loba_ND, UniformSample())
+
 #Linear
 lb = [1.0, 1.0]
 ub = [6.0, 6.0]

From f4e6c7d29f69c2a6b060ac4b66a4c02fecf09be7 Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 18:11:43 -0400
Subject: [PATCH 6/8] formatting

---
 test/optimization.jl | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/test/optimization.jl b/test/optimization.jl
index 687d4e82a..f4054afc2 100644
--- a/test/optimization.jl
+++ b/test/optimization.jl
@@ -25,7 +25,8 @@ begin
     x = [2.5, 4.0, 6.0]
     y = [6.0, 9.0, 13.0]
     my_k_SRBF1 = Kriging(x, y, lb, ub; p)
-    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1, UniformSample())
+    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1,
+                                      UniformSample())
 end
 
 #Using RadialBasis
@@ -33,21 +34,24 @@ begin
     x = [2.5, 4.0, 6.0]
     y = [6.0, 9.0, 13.0]
     my_rad_SRBF1 = RadialBasis(x, y, a, b, rad = linearRadial())
-    (xstar, fstar) = surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1, UniformSample())
+    (xstar, fstar) = surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1,
+                                        UniformSample())
 end
 
 begin
     x = [2.5, 4.0, 6.0]
     y = [6.0, 9.0, 13.0]
     my_wend_1d = Wendland(x, y, lb, ub)
-    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d, UniformSample())
+    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d,
+                                      UniformSample())
 end
 
 begin
     x = [2.5, 4.0, 6.0]
     y = [6.0, 9.0, 13.0]
     my_earth1d = EarthSurrogate(x, y, lb, ub)
-    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d, LowDiscrepancySample(2))
+    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d,
+                                      LowDiscrepancySample(2))
 end
 
 ##### ND #####

From 2c62b128c566ab6aa810f88a7e3ee02a55f19fdb Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 18:29:18 -0400
Subject: [PATCH 7/8] fix for default theta = Inf when using section samplers

---
 src/Kriging.jl       | 12 +++++++-----
 test/optimization.jl |  7 +------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/Kriging.jl b/src/Kriging.jl
index b8f2e06f2..8c685a7b5 100644
--- a/src/Kriging.jl
+++ b/src/Kriging.jl
@@ -88,7 +88,8 @@ Constructor for type Kriging.
    smoothness of the function being approximated, 0-> rough  2-> C^infinity
 - theta: value > 0 modeling how much the function is changing in the i-th variable.
 """
-function Kriging(x, y, lb::Number, ub::Number; p = 2.0, theta = 0.5 / std(x)^p)
+function Kriging(x, y, lb::Number, ub::Number; p = 2.0,
+                 theta = 0.5 / max(1e-6 * abs(ub - lb), std(x))^p)
     if length(x) != length(unique(x))
         println("There exists a repetion in the samples, cannot build Kriging.")
         return
@@ -98,7 +99,7 @@ function Kriging(x, y, lb::Number, ub::Number; p = 2.0, theta = 0.5 / std(x)^p)
         throw(ArgumentError("Hyperparameter p must be between 0 and 2! Got: $p."))
     end
 
-    if theta < 0
+    if theta ≤ 0
         throw(ArgumentError("Hyperparameter theta must be positive! Got: $theta"))
     end
 
@@ -152,7 +153,8 @@ Constructor for Kriging surrogate.
           changing in the i-th variable.
 """
 function Kriging(x, y, lb, ub; p = 2 .* collect(one.(x[1])),
-                 theta = [0.5 / std(x_i[i] for x_i in x)^p[i] for i in 1:length(x[1])])
+                 theta = [0.5 / max(1e-6 * norm(ub .- lb), std(x_i[i] for x_i in x))^p[i]
+                          for i in 1:length(x[1])])
     if length(x) != length(unique(x))
         println("There exists a repetition in the samples, cannot build Kriging.")
         return
@@ -163,7 +165,7 @@ function Kriging(x, y, lb, ub; p = 2 .* collect(one.(x[1])),
             throw(ArgumentError("All p must be between 0 and 2! Got: $p."))
         end
 
-        if theta[i] < 0.0
+        if theta[i] ≤ 0.0
             throw(ArgumentError("All theta must be positive! Got: $theta."))
         end
     end
@@ -188,8 +190,8 @@ function _calc_kriging_coeffs(x, y, p, theta)
     # Estimate nugget based on maximum allowed condition number
     # This regularizes R to allow for points being close to eachother without R becoming
     # singular, at the cost of slightly relaxing the interpolation condition
-
     λ = eigen(R).values
+
     λmax = λ[end]
     λmin = λ[1]
 
diff --git a/test/optimization.jl b/test/optimization.jl
index f4054afc2..43cbadae1 100644
--- a/test/optimization.jl
+++ b/test/optimization.jl
@@ -1,8 +1,5 @@
 using Surrogates
 using LinearAlgebra
-#using Flux
-#using Flux: @epochs
-#using PolyChaos
 
 #######SRBF############
 ##### 1D #####
@@ -210,11 +207,9 @@ lb = [-1.0, x2_constraint, -1.0]
 ub = [6.0, x2_constraint, 6.0]
 x = sample(5, lb, ub, sampler)
 y = objective_function_section.(x)
-p = [1.2, 1.2, 1.2]
-theta = [2.0, 2.0, 2.0]
 
 #Kriging
-my_k_EIN_section = Kriging(x, y, lb, ub; p = p)
+my_k_EIN_section = Kriging(x, y, lb, ub)
 # Constrain our sampling to the plane where x[2] = 1
 surrogate_optimize(objective_function_section, EI(), lb, ub, my_k_EIN_section, sampler)
 

From a862299269ac1b7dfcf44fc3cf9e4e040a5e45ce Mon Sep 17 00:00:00 2001
From: Thomas Marks <marksta@umich.edu>
Date: Mon, 11 Jul 2022 23:25:44 -0400
Subject: [PATCH 8/8] formatting and add source for nugget regularization

---
 src/Kriging.jl       |  8 +++++--
 test/optimization.jl | 54 ++++++++++++++++++++------------------------
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/src/Kriging.jl b/src/Kriging.jl
index 8c685a7b5..d5285200a 100644
--- a/src/Kriging.jl
+++ b/src/Kriging.jl
@@ -113,8 +113,10 @@ function _calc_kriging_coeffs(x, y, p::Number, theta::Number)
     R = [exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n]
 
     # Estimate nugget based on maximum allowed condition number
-    # This regularizes R to allow for points being close to eachother without R becoming
+    # This regularizes R to allow for points being close to each other without R becoming
     # singular, at the cost of slightly relaxing the interpolation condition
+    # Derived from "An analytic comparison of regularization methods for Gaussian Processes"
+    # by Mohammadi et al (https://arxiv.org/pdf/1602.00853.pdf)
     λ = eigen(R).values
     λmax = λ[end]
     λmin = λ[1]
@@ -188,8 +190,10 @@ function _calc_kriging_coeffs(x, y, p, theta)
          for j in 1:n, i in 1:n]
 
     # Estimate nugget based on maximum allowed condition number
-    # This regularizes R to allow for points being close to eachother without R becoming
+    # This regularizes R to allow for points being close to each other without R becoming
     # singular, at the cost of slightly relaxing the interpolation condition
+    # Derived from "An analytic comparison of regularization methods for Gaussian Processes"
+    # by Mohammadi et al (https://arxiv.org/pdf/1602.00853.pdf)
     λ = eigen(R).values
 
     λmax = λ[end]
diff --git a/test/optimization.jl b/test/optimization.jl
index 43cbadae1..149164722 100644
--- a/test/optimization.jl
+++ b/test/optimization.jl
@@ -18,38 +18,32 @@ a = 2
 b = 6
 
 #Using Kriging
-begin
-    x = [2.5, 4.0, 6.0]
-    y = [6.0, 9.0, 13.0]
-    my_k_SRBF1 = Kriging(x, y, lb, ub; p)
-    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1,
-                                      UniformSample())
-end
+
+x = [2.5, 4.0, 6.0]
+y = [6.0, 9.0, 13.0]
+my_k_SRBF1 = Kriging(x, y, lb, ub; p)
+xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1,
+                                  UniformSample())
 
 #Using RadialBasis
-begin
-    x = [2.5, 4.0, 6.0]
-    y = [6.0, 9.0, 13.0]
-    my_rad_SRBF1 = RadialBasis(x, y, a, b, rad = linearRadial())
-    (xstar, fstar) = surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1,
-                                        UniformSample())
-end
-
-begin
-    x = [2.5, 4.0, 6.0]
-    y = [6.0, 9.0, 13.0]
-    my_wend_1d = Wendland(x, y, lb, ub)
-    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d,
-                                      UniformSample())
-end
-
-begin
-    x = [2.5, 4.0, 6.0]
-    y = [6.0, 9.0, 13.0]
-    my_earth1d = EarthSurrogate(x, y, lb, ub)
-    xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d,
-                                      LowDiscrepancySample(2))
-end
+
+x = [2.5, 4.0, 6.0]
+y = [6.0, 9.0, 13.0]
+my_rad_SRBF1 = RadialBasis(x, y, a, b, rad = linearRadial())
+(xstar, fstar) = surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1,
+                                    UniformSample())
+
+x = [2.5, 4.0, 6.0]
+y = [6.0, 9.0, 13.0]
+my_wend_1d = Wendland(x, y, lb, ub)
+xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d,
+                                  UniformSample())
+
+x = [2.5, 4.0, 6.0]
+y = [6.0, 9.0, 13.0]
+my_earth1d = EarthSurrogate(x, y, lb, ub)
+xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d,
+                                  LowDiscrepancySample(2))
 
 ##### ND #####
 objective_function_ND = z -> 3 * norm(z) + 1