diff --git a/docs/src/BraninFunction.md b/docs/src/BraninFunction.md index ae48ad3c9..0f1b28cfd 100644 --- a/docs/src/BraninFunction.md +++ b/docs/src/BraninFunction.md @@ -40,29 +40,29 @@ lower_bound = [-5, 0] upper_bound = [10,15] xys = sample(n_samples, lower_bound, upper_bound, SobolSample()) zs = branin.(xys); -x, y = -5:10, 0:15 # hide -p1 = surface(x, y, (x1,x2) -> branin((x1,x2))) # hide -xs = [xy[1] for xy in xys] # hide -ys = [xy[2] for xy in xys] # hide -scatter!(xs, ys, zs) # hide -p2 = contour(x, y, (x1,x2) -> branin((x1,x2))) # hide -scatter!(xs, ys) # hide -plot(p1, p2, title="True function") # hide +x, y = -5:10, 0:15 +p1 = surface(x, y, (x1,x2) -> branin((x1,x2))) +xs = [xy[1] for xy in xys] +ys = [xy[2] for xy in xys] +scatter!(xs, ys, zs) +p2 = contour(x, y, (x1,x2) -> branin((x1,x2))) +scatter!(xs, ys) +plot(p1, p2, title="True function") ``` -Now it's time to fitting different surrogates and then we will plot them. -We will have a look on `Kriging Surrogate`: +Now it's time to try fitting different surrogates and then we will plot them. +We will have a look at the kriging surrogate `Kriging Surrogate`. : ```@example BraninFunction -kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound, p=[1.9, 1.9]) +kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound) ``` ```@example BraninFunction -p1 = surface(x, y, (x, y) -> kriging_surrogate([x y])) # hide -scatter!(xs, ys, zs, marker_z=zs) # hide -p2 = contour(x, y, (x, y) -> kriging_surrogate([x y])) # hide -scatter!(xs, ys, marker_z=zs) # hide -plot(p1, p2, title="Kriging Surrogate") # hide +p1 = surface(x, y, (x, y) -> kriging_surrogate([x y])) +scatter!(xs, ys, zs, marker_z=zs) +p2 = contour(x, y, (x, y) -> kriging_surrogate([x y])) +scatter!(xs, ys, marker_z=zs) +plot(p1, p2, title="Kriging Surrogate") ``` Now, we will have a look on `Inverse Distance Surrogate`: diff --git a/docs/src/kriging.md b/docs/src/kriging.md index 06a3832b7..2f5c74f58 100644 --- a/docs/src/kriging.md +++ b/docs/src/kriging.md @@ -38,7 +38,7 @@ With our sampled points we can build the Kriging surrogate using the `Kriging` f `kriging_surrogate` behaves like an ordinary function which we can simply plot. A nice statistical property of this surrogate is being able to calculate the error of the function at each point, we plot this as a confidence interval using the `ribbon` argument. ```@example kriging_tutorial1d -kriging_surrogate = Kriging(x, y, lower_bound, upper_bound, p=1.9); +kriging_surrogate = Kriging(x, y, lower_bound, upper_bound); plot(x, y, seriestype=:scatter, label="Sampled points", xlims=(lower_bound, upper_bound), ylims=(-7, 17), legend=:top) plot!(xs, f.(xs), label="True function", legend=:top) @@ -107,7 +107,7 @@ plot(p1, p2, title="True function") # hide Using the sampled points we build the surrogate, the steps are analogous to the 1-dimensional case. ```@example kriging_tutorialnd -kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound, p=[1.9, 1.9]) +kriging_surrogate = Kriging(xys, zs, lower_bound, upper_bound, p=[2.0, 2.0], theta=[0.03, 0.003]) ``` ```@example kriging_tutorialnd diff --git a/src/Kriging.jl b/src/Kriging.jl index eef379a29..d5285200a 100644 --- a/src/Kriging.jl +++ b/src/Kriging.jl @@ -1,7 +1,8 @@ #= -One-dimensional Kriging method, following this paper: +One-dimensional Kriging method, following these papers: +"Efficient Global Optimization of Expensive Black Box Functions" and "A Taxonomy of Global Optimization Methods Based on Response Surfaces" -by DONALD R. JONES +both by DONALD R. JONES =# mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R} <: AbstractSurrogate x::X @@ -35,20 +36,21 @@ function std_error_at_point(k::Kriging, val) n = length(k.x) d = length(k.x[1]) r = zeros(eltype(k.x[1]), n, 1) - @inbounds for i in 1:n - sum = zero(eltype(k.x[1])) - for l in 1:d - sum = sum + k.theta[l] * norm(val[l] - k.x[i][l])^(k.p[l]) - end - r[i] = exp(-sum) - end + r = [let + sum = zero(eltype(k.x[1])) + for l in 1:d + sum = sum + k.theta[l] * norm(val[l] - k.x[i][l])^(k.p[l]) + end + exp(-sum) + end + for i in 1:n] + one = ones(eltype(k.x[1]), n, 1) one_t = one' a = r' * k.inverse_of_R * r - a = a[1] b = one_t * k.inverse_of_R * one - b = b[1] - mean_squared_error = k.sigma * (1 - a + (1 - a)^2 / (b)) + + mean_squared_error = k.sigma * (1 - a[1] + (1 - a[1])^2 / b[1]) return sqrt(abs(mean_squared_error)) end @@ -56,38 +58,27 @@ end Gives the current estimate for 'val' with respect to the Kriging object k. """ function (k::Kriging)(val::Number) - phi = z -> exp(-(abs(z))^k.p) n = length(k.x) - prediction = zero(eltype(k.x[1])) - for i in 1:n - prediction = prediction + k.b[i] * phi(val - k.x[i]) - end - prediction = k.mu + prediction - return prediction + return k.mu + sum(k.b[i] * exp(-sum(k.theta * abs(val - k.x[i])^k.p)) for i in 1:n) end """ Returns sqrt of expected mean_squared_error error at the point. """ function std_error_at_point(k::Kriging, val::Number) - phi(z) = exp(-(abs(z))^k.p) n = length(k.x) - r = zeros(eltype(k.x[1]), n, 1) - @inbounds for i in 1:n - r[i] = phi(val - k.x[i]) - end - one = ones(eltype(k.x[1]), n, 1) + r = [exp(-k.theta * abs(val - k.x[i])^k.p) for i in 1:n] + one = ones(eltype(k.x), n) one_t = one' a = r' * k.inverse_of_R * r - a = a[1] b = one_t * k.inverse_of_R * one - b = b[1] - mean_squared_error = k.sigma * (1 - a + (1 - a)^2 / (b)) + + mean_squared_error = k.sigma * (1 - a[1] + (1 - a[1])^2 / b[1]) return sqrt(abs(mean_squared_error)) end """ - Kriging(x,y,lb::Number,ub::Number;p::Number=1.0) + Kriging(x, y, lb::Number, ub::Number; p::Number=2.0, theta::Number = 0.5/var(x)) Constructor for type Kriging. @@ -95,26 +86,56 @@ Constructor for type Kriging. -(x,y): sampled points -p: value between 0 and 2 modelling the smoothness of the function being approximated, 0-> rough 2-> C^infinity +- theta: value > 0 modeling how much the function is changing in the i-th variable. """ -function Kriging(x, y, lb::Number, ub::Number; p = 1.0, theta = 1.0) +function Kriging(x, y, lb::Number, ub::Number; p = 2.0, + theta = 0.5 / max(1e-6 * abs(ub - lb), std(x))^p) if length(x) != length(unique(x)) println("There exists a repetion in the samples, cannot build Kriging.") return end + + if p > 2.0 || p < 0.0 + throw(ArgumentError("Hyperparameter p must be between 0 and 2! Got: $p.")) + end + + if theta ≤ 0 + throw(ArgumentError("Hyperparameter theta must be positive! Got: $theta")) + end + mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta) Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R) end function _calc_kriging_coeffs(x, y, p::Number, theta::Number) n = length(x) - R = zeros(eltype(x[1]), n, n) - R = [exp(-theta * abs(x[i] - x[j])^p) - for j in 1:n, i in 1:n] + R = [exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n] + + # Estimate nugget based on maximum allowed condition number + # This regularizes R to allow for points being close to each other without R becoming + # singular, at the cost of slightly relaxing the interpolation condition + # Derived from "An analytic comparison of regularization methods for Gaussian Processes" + # by Mohammadi et al (https://arxiv.org/pdf/1602.00853.pdf) + λ = eigen(R).values + λmax = λ[end] + λmin = λ[1] + + κmax = 1e8 + λdiff = λmax - κmax * λmin + if λdiff ≥ 0 + nugget = λdiff / (κmax - 1) + else + nugget = 0.0 + end - one = ones(eltype(x[1]), n, 1) + one = ones(eltype(x[1]), n) one_t = one' + + R = R + Diagonal(nugget * one) + inverse_of_R = inv(R) + mu = (one_t * inverse_of_R * y) / (one_t * inverse_of_R * one) b = inverse_of_R * (y - one * mu) sigma = ((y - one * mu)' * inverse_of_R * (y - one * mu)) / n @@ -127,17 +148,30 @@ end Constructor for Kriging surrogate. - (x,y): sampled points -- p: array of values 0<=p<2 modeling the +- p: array of values 0<=p<=2 modeling the smoothness of the function being approximated in the i-th variable. low p -> rough, high p -> smooth - theta: array of values > 0 modeling how much the function is changing in the i-th variable. """ -function Kriging(x, y, lb, ub; p = collect(one.(x[1])), theta = collect(one.(x[1]))) +function Kriging(x, y, lb, ub; p = 2 .* collect(one.(x[1])), + theta = [0.5 / max(1e-6 * norm(ub .- lb), std(x_i[i] for x_i in x))^p[i] + for i in 1:length(x[1])]) if length(x) != length(unique(x)) println("There exists a repetition in the samples, cannot build Kriging.") return end + + for i in 1:length(x[1]) + if p[i] > 2.0 || p[i] < 0.0 + throw(ArgumentError("All p must be between 0 and 2! Got: $p.")) + end + + if theta[i] ≤ 0.0 + throw(ArgumentError("All theta must be positive! Got: $theta.")) + end + end + mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta) Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R) end @@ -155,8 +189,28 @@ function _calc_kriging_coeffs(x, y, p, theta) end for j in 1:n, i in 1:n] - one = ones(n, 1) + # Estimate nugget based on maximum allowed condition number + # This regularizes R to allow for points being close to each other without R becoming + # singular, at the cost of slightly relaxing the interpolation condition + # Derived from "An analytic comparison of regularization methods for Gaussian Processes" + # by Mohammadi et al (https://arxiv.org/pdf/1602.00853.pdf) + λ = eigen(R).values + + λmax = λ[end] + λmin = λ[1] + + κmax = 1e8 + λdiff = λmax - κmax * λmin + if λdiff ≥ 0 + nugget = λdiff / (κmax - 1) + else + nugget = 0.0 + end + + one = ones(eltype(x[1]), n) one_t = one' + + R = R + Diagonal(nugget * one[:, 1]) inverse_of_R = inv(R) mu = (one_t * inverse_of_R * y) / (one_t * inverse_of_R * one) diff --git a/test/Kriging.jl b/test/Kriging.jl index 7d5527cd1..e2b44934f 100644 --- a/test/Kriging.jl +++ b/test/Kriging.jl @@ -1,6 +1,8 @@ using LinearAlgebra using Surrogates using Test +using Statistics + #1D lb = 0.0 ub = 10.0 @@ -8,17 +10,30 @@ f = x -> log(x) * exp(x) x = sample(5, lb, ub, SobolSample()) y = f.(x) my_p = 1.9 -my_k = Kriging(x, y, lb, ub, p = 2.3) + +# Check input validation for 1D Kriging +@test_throws ArgumentError my_k=Kriging(x, y, lb, ub, p = -1.0) +@test_throws ArgumentError my_k=Kriging(x, y, lb, ub, p = 3.0) +@test_throws ArgumentError my_k=Kriging(x, y, lb, ub, theta = -2.0) + +my_k = Kriging(x, y, lb, ub, p = my_p) +@test my_k.theta ≈ 0.5 * std(x)^(-my_p) + add_point!(my_k, 4.0, 75.68) add_point!(my_k, [5.0, 6.0], [238.86, 722.84]) pred = my_k(5.5) +@test 238.86 ≤ pred ≤ 722.84 +@test my_k(5.0) ≈ 238.86 +@test std_error_at_point(my_k, 5.0) < 1e-3 * my_k(5.0) + #WITHOUT ADD POINT x = [1.0, 2.0, 3.0] y = [4.0, 5.0, 6.0] my_p = 1.3 my_k = Kriging(x, y, lb, ub, p = my_p) est = my_k(1.0) +@test est == 4.0 std_err = std_error_at_point(my_k, 1.0) @test std_err < 10^(-6) @@ -42,7 +57,7 @@ est = my_k(4.0) std_err = std_error_at_point(my_k, 4.0) @test std_err < 10^(-6) -#Testin kwargs 1D +#Testing kwargs 1D kwar_krig = Kriging(x, y, lb, ub); #ND @@ -90,5 +105,14 @@ est = my_k((10.0, 11.0, 12.0)) std_err = std_error_at_point(my_k, (10.0, 11.0, 12.0)) @test std_err < 10^(-6) -#test kwargs ND +#test kwargs ND (hyperparameter initialization) kwarg_krig_ND = Kriging(x, y, lb, ub) + +@test_throws ArgumentError Kriging(x, y, lb, ub, p = 3 * my_p) +@test_throws ArgumentError Kriging(x, y, lb, ub, p = -my_p) +@test_throws ArgumentError Kriging(x, y, lb, ub, theta = -my_theta) + +d = length(x[3]) + +@test all(==(2.0), kwarg_krig_ND.p) +@test all(kwarg_krig_ND.theta .≈ [0.5 / var(x_i[ℓ] for x_i in x) for ℓ in 1:3]) diff --git a/test/optimization.jl b/test/optimization.jl index 383b24727..149164722 100644 --- a/test/optimization.jl +++ b/test/optimization.jl @@ -1,8 +1,5 @@ using Surrogates using LinearAlgebra -using Flux -using Flux: @epochs -using PolyChaos #######SRBF############ ##### 1D ##### @@ -21,18 +18,32 @@ a = 2 b = 6 #Using Kriging -my_k_SRBF1 = Kriging(x, y, lb, ub) -surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1, UniformSample()) + +x = [2.5, 4.0, 6.0] +y = [6.0, 9.0, 13.0] +my_k_SRBF1 = Kriging(x, y, lb, ub; p) +xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_k_SRBF1, + UniformSample()) #Using RadialBasis + +x = [2.5, 4.0, 6.0] +y = [6.0, 9.0, 13.0] my_rad_SRBF1 = RadialBasis(x, y, a, b, rad = linearRadial()) -surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1, UniformSample()) +(xstar, fstar) = surrogate_optimize(objective_function, SRBF(), a, b, my_rad_SRBF1, + UniformSample()) +x = [2.5, 4.0, 6.0] +y = [6.0, 9.0, 13.0] my_wend_1d = Wendland(x, y, lb, ub) -surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d, UniformSample()) +xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_wend_1d, + UniformSample()) +x = [2.5, 4.0, 6.0] +y = [6.0, 9.0, 13.0] my_earth1d = EarthSurrogate(x, y, lb, ub) -surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d, LowDiscrepancySample(2)) +xstar, fstar = surrogate_optimize(objective_function, SRBF(), a, b, my_earth1d, + LowDiscrepancySample(2)) ##### ND ##### objective_function_ND = z -> 3 * norm(z) + 1 @@ -40,8 +51,6 @@ lb = [1.0, 1.0] ub = [6.0, 6.0] x = sample(5, lb, ub, SobolSample()) y = objective_function_ND.(x) -p = [1.5, 1.5] -theta = [1.0, 1.0] #Kriging @@ -66,6 +75,7 @@ alpha = [2.0, 2.0] n = 4 my_loba_ND = LobachevskySurrogate(x, y, lb, ub) surrogate_optimize(objective_function_ND, SRBF(), lb, ub, my_loba_ND, UniformSample()) + #Linear lb = [1.0, 1.0] ub = [6.0, 6.0] @@ -191,11 +201,9 @@ lb = [-1.0, x2_constraint, -1.0] ub = [6.0, x2_constraint, 6.0] x = sample(5, lb, ub, sampler) y = objective_function_section.(x) -p = [1.2, 1.2, 1.2] -theta = [2.0, 2.0, 2.0] #Kriging -my_k_EIN_section = Kriging(x, y, lb, ub; p = p) +my_k_EIN_section = Kriging(x, y, lb, ub) # Constrain our sampling to the plane where x[2] = 1 surrogate_optimize(objective_function_section, EI(), lb, ub, my_k_EIN_section, sampler)