From a0686e80494cead609fa592f24e071bb833f8d63 Mon Sep 17 00:00:00 2001 From: Vikram Date: Tue, 19 Jul 2022 16:42:21 +0530 Subject: [PATCH 1/7] make GEKPLS differentiable, optimization friendly and similar in behavior to other surrogates --- src/GEKPLS.jl | 326 +++++++++++++++++++++++++++++--------------- src/Optimization.jl | 10 +- test/GEKPLS.jl | 156 +++++++++++---------- 3 files changed, 309 insertions(+), 183 deletions(-) diff --git a/src/GEKPLS.jl b/src/GEKPLS.jl index 7880e7bcb..62de3c3f7 100644 --- a/src/GEKPLS.jl +++ b/src/GEKPLS.jl @@ -1,9 +1,16 @@ using LinearAlgebra using Statistics -mutable struct GEKPLS{T <: AbstractFloat} <: AbstractSurrogate - x::Matrix{T} #1 - y::Matrix{T} #2 +""" +GEKPLS(x, y, x_matrix, y_matrix, grads, xlimits, delta_x, extra_points, n_comp, beta, gamma, theta, + reduced_likelihood_function_value, + X_offset, X_scale, X_after_std, pls_mean_reshaped, y_mean, y_std) +""" +mutable struct GEKPLS{T, X, Y} <: AbstractSurrogate + x::X + y::Y + x_matrix::Matrix{T} #1 + y_matrix::Matrix{T} #2 grads::Matrix{T} #3 xl::Matrix{T} #xlimits #4 delta::T #5 @@ -21,6 +28,10 @@ mutable struct GEKPLS{T <: AbstractFloat} <: AbstractSurrogate y_std::T #17 end +""" + bounds_error(x, xl) +Checks if input x values are within range of upper and lower bounds +""" function bounds_error(x, xl) num_x_rows = size(x, 1) num_dim = size(xl, 1) @@ -34,8 +45,23 @@ function bounds_error(x, xl) return false end -#constructor for GEKPLS Struct -function GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, theta) +""" + GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, theta) + +Constructor for GEKPLS Struct +- x_vec: vector of tuples with x values +- y_vec: vector of floats with outputs +- grads_vec: gradients associated with each of the X points +- n_comp: number of components +- xlimits: concatenated array of lower and upper bounds +- delta_x: step size while doing Taylor approximation +- extra_points: number of points to consider +- theta: initial expected variance of PLS regression components +""" +function GEKPLS(x_vec, y_vec, grads_vec, n_comp, delta_x, xlimits, extra_points, theta) + X = vector_of_tuples_to_matrix(x_vec) + y = reshape(y_vec, (size(X, 1), 1)) + grads = vector_of_tuples_to_matrix2(grads_vec) #ensure that X values are within the upper and lower bounds if bounds_error(X, xlimits) @@ -55,14 +81,20 @@ function GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, theta) "squar_exp", d, nt, ij, y_after_std) - return GEKPLS(X, y, grads, xlimits, delta_x, extra_points, n_comp, beta, gamma, theta, + return GEKPLS(x_vec, y_vec, X, y, grads, xlimits, delta_x, extra_points, n_comp, beta, + gamma, theta, reduced_likelihood_function_value, X_offset, X_scale, X_after_std, pls_mean_reshaped, y_mean, y_std) - println("struct created") end -# predictor -function (g::GEKPLS)(X_test) +""" + (g::GEKPLS)(X_test) + + Take in a set of one or more points and provide predicted approximate outputs (predictor). +""" +function (g::GEKPLS)(x_vec) + _check_dimension(g, x_vec) + X_test = prep_data_for_pred(x_vec) n_eval, n_features_x = size(X_test) X_cont = (X_test .- g.X_offset) ./ g.X_scale dx = differences(X_cont, g.X_after_std) @@ -72,11 +104,23 @@ function (g::GEKPLS)(X_test) f = ones(n_eval, 1) y_ = (f * g.beta) + (r * g.gamma) y = g.y_mean .+ g.y_std * y_ - return y + if size(y, 1) > 1 + return vec(y) + else + return y[1] #this is necessary for differentiation; Zygote expects a scalar output + end end -function add_point!(g::GEKPLS, new_x, new_y, new_grads) - if new_x in g.x +""" + add_point!(g::GEKPLS, new_x, new_y, new_grads) + +add a new point to the dataset. +""" + +function add_point!(g::GEKPLS, x_tup, y_val, grad_tup) + new_x = prep_data_for_pred(x_tup) + new_grads = prep_data_for_pred(grad_tup) + if (findfirst(==(vec(new_x)), eachrow(g.x_matrix)) !== nothing) println("Adding a sample that already exists. Cannot build GEKPLS") return end @@ -85,17 +129,21 @@ function add_point!(g::GEKPLS, new_x, new_y, new_grads) println("x values outside bounds") return end - - g.x = vcat(g.x, new_x) - g.y = vcat(g.y, new_y) + temp_y = copy(g.y) #without the copy here, we get error ("cannot resize array with shared data") + push!(g.x, x_tup) + push!(temp_y, y_val) + g.y = temp_y + g.x_matrix = vcat(g.x_matrix, new_x) + g.y_matrix = vcat(g.y_matrix, y_val) g.grads = vcat(g.grads, new_grads) - pls_mean, X_after_PLS, y_after_PLS = _ge_compute_pls(g.x, g.y, g.num_components, + pls_mean, X_after_PLS, y_after_PLS = _ge_compute_pls(g.x_matrix, g.y_matrix, + g.num_components, g.grads, g.delta, g.xl, g.extra_points) g.X_after_std, y_after_std, g.X_offset, g.y_mean, g.X_scale, g.y_std = standardization(X_after_PLS, y_after_PLS) D, ij = cross_distances(g.X_after_std) - g.pls_mean = reshape(pls_mean, (size(g.x, 2), g.num_components)) + g.pls_mean = reshape(pls_mean, (size(g.x_matrix, 2), g.num_components)) d = componentwise_distance_PLS(D, "squar_exp", g.num_components, g.pls_mean) nt, nd = size(X_after_PLS) g.beta, g.gamma, g.reduced_likelihood_function_value = _reduced_likelihood_function(g.theta, @@ -106,24 +154,26 @@ function add_point!(g::GEKPLS, new_x, new_y, new_grads) y_after_std) end +""" + _ge_compute_pls(X, y, n_comp, grads, delta_x, xlimits, extra_points) +Gradient-enhanced PLS-coefficients. +Parameters +---------- +- X: [n_obs,dim] - The input variables. +- y: [n_obs,ny] - The output variable +- n_comp: int - Number of principal components used. +- gradients: - The gradient values. Matrix size (n_obs,dim) +- delta_x: real - The step used in the First Order Taylor Approximation +- xlimits: [dim, 2]- The upper and lower var bounds. +- extra_points: int - The number of extra points per each training point. +Returns +------- +- Coeff_pls: [dim, n_comp] - The PLS-coefficients. +- X: Concatenation of XX: [extra_points*nt, dim] - Extra points added (when extra_points > 0) and X +- y: Concatenation of yy[extra_points*nt, 1]- Extra points added (when extra_points > 0) and y +""" function _ge_compute_pls(X, y, n_comp, grads, delta_x, xlimits, extra_points) - """ - Gradient-enhanced PLS-coefficients. - Parameters - ---------- - X: [n_obs,dim] - The input variables. - y: [n_obs,ny] - The output variable - n_comp: int - Number of principal components used. - gradients: - The gradient values. Matrix size (n_obs,dim) - delta_x: real - The step used in the First Order Taylor Approximation - xlimits: [dim, 2]- The upper and lower var bounds. - extra_points: int - The number of extra points per each training point. - Returns - ------- - Coeff_pls: [dim, n_comp] - The PLS-coefficients. - X: Concatenation of XX: [extra_points*nt, dim] - Extra points added (when extra_points > 0) and X - y: Concatenation of yy[extra_points*nt, 1]- Extra points added (when extra_points > 0) and y - """ + # this function is equivalent to a combination of # https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/utils/kriging_utils.py#L1036 # and https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/surrogate_models/gekpls.py#L48 @@ -256,35 +306,35 @@ end ######end of bb design###### -function standardization(X, y) - """ - We substract the mean from each variable. Then, we divide the values of each - variable by its standard deviation. +""" +We substract the mean from each variable. Then, we divide the values of each +variable by its standard deviation. - Parameters - ---------- +Parameters +---------- - X - The input variables. - y - The output variable. +X - The input variables. +y - The output variable. - Returns - ------- +Returns +------- - X: [n_obs, dim] - The standardized input matrix. +X: [n_obs, dim] + The standardized input matrix. - y: [n_obs, 1] - The standardized output vector. +y: [n_obs, 1] + The standardized output vector. - X_offset: The mean (or the min if scale_X_to_unit=True) of each input variable. +X_offset: The mean (or the min if scale_X_to_unit=True) of each input variable. - y_mean: The mean of the output variable. +y_mean: The mean of the output variable. - X_scale: The standard deviation of each input variable. +X_scale: The standard deviation of each input variable. - y_std: The standard deviation of the output variable. +y_std: The standard deviation of the output variable. - """ +""" +function standardization(X, y) #Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L21 X_offset = mean(X, dims = 1) X_scale = std(X, dims = 1) @@ -297,25 +347,25 @@ function standardization(X, y) return X, y, X_offset, y_mean, X_scale, y_std end -function cross_distances(X) - """ - Computes the nonzero componentwise cross-distances between the vectors - in X +""" +Computes the nonzero componentwise cross-distances between the vectors +in X - Parameters - ---------- +Parameters +---------- - X: [n_obs, dim] +X: [n_obs, dim] - Returns - ------- - D: [n_obs * (n_obs - 1) / 2, dim] - - The cross-distances between the vectors in X. +Returns +------- +D: [n_obs * (n_obs - 1) / 2, dim] + - The cross-distances between the vectors in X. - ij: [n_obs * (n_obs - 1) / 2, 2] - - The indices i and j of the vectors in X associated to the cross- - distances in D. - """ +ij: [n_obs * (n_obs - 1) / 2, 2] + - The indices i and j of the vectors in X associated to the cross- + distances in D. +""" +function cross_distances(X) # equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L86 n_samples, n_features = size(X) n_nonzero_cross_dist = (n_samples * (n_samples - 1)) รท 2 @@ -333,8 +383,7 @@ function cross_distances(X) return D, Int.(ij) end -function componentwise_distance_PLS(D, corr, n_comp, coeff_pls) - """ +""" Computes the nonzero componentwise cross-spatial-correlation-distance between the vectors in X. @@ -364,7 +413,9 @@ function componentwise_distance_PLS(D, corr, n_comp, coeff_pls) - The componentwise cross-spatial-correlation-distance between the vectors in X. - """ +""" +function componentwise_distance_PLS(D, corr, n_comp, coeff_pls) + #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 #todo #figure out how to handle this computation in the case of very large matrices @@ -381,25 +432,45 @@ function componentwise_distance_PLS(D, corr, n_comp, coeff_pls) return D_corr end +""" +Squared exponential correlation model. +Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L604 +Parameters: +----------- +theta : Hyperparameters of the correlation model +d: componentwise distances from componentwise_distance_PLS + +Returns: +-------- +r: array containing the values of the autocorrelation model + +""" function squar_exp(theta, d) - """ - Squared exponential correlation model. - Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L604 - Parameters: - ----------- - theta : Hyperparameters of the correlation model - d: componentwise distances from componentwise_distance_PLS - - Returns: - -------- - r: array containing the values of the autocorrelation model - - """ n_components = size(d)[2] theta = reshape(theta, (1, n_components)) return exp.(-sum(theta .* d, dims = 2)) end +""" + differences(X, Y) +return differences between two arrays + +given an input like this: + +X = [1.0 1.0 1.0; 2.0 2.0 2.0] +Y = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0] +diff = differences(X,Y) + +We get an output (diff) that looks like this: + +[ 0. -1. -2. + -3. -4. -5. + -6. -7. -8. + 1. 0. -1. + -2. -3. -4. + -5. -6. -7.] + +""" function differences(X, Y) #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L392 #code credit: Elias Carvalho - https://stackoverflow.com/questions/72392010/row-wise-operations-between-matrices-in-julia @@ -408,31 +479,34 @@ function differences(X, Y) return Rx - Ry end +""" + _reduced_likelihood_function(theta, kernel_type, d, nt, ij, y_norma, noise = 0.0) + +Compute the reduced likelihood function value and other coefficients necessary for prediction +This function is a loose translation of SMT code from +https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 +It determines the BLUP parameters and evaluates the reduced likelihood function for the given theta. + +Parameters +---------- +theta: array containing the parameters at which the Gaussian Process model parameters should be determined. +kernel_type: name of the correlation function. +d: The componentwise cross-spatial-correlation-distance between the vectors in X. +nt: number of training points +ij: The indices i and j of the vectors in X associated to the cross-distances in D. +y_norma: Standardized y values +noise: noise hyperparameter - increasing noise reduces reduced_likelihood_function_value + + +Returns +------- +reduced_likelihood_function_value: real + - The value of the reduced likelihood function associated with the given autocorrelation parameters theta. +beta: Generalized least-squares regression weights +gamma: Gaussian Process weights. + +""" function _reduced_likelihood_function(theta, kernel_type, d, nt, ij, y_norma, noise = 0.0) - """ - This function is a loose translation of SMT code from - https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 - It determines the BLUP parameters and evaluates the reduced likelihood function for the given theta. - - Parameters - ---------- - theta: array containing the parameters at which the Gaussian Process model parameters should be determined. - kernel_type: name of the correlation function. - d: The componentwise cross-spatial-correlation-distance between the vectors in X. - nt: number of training points - ij: The indices i and j of the vectors in X associated to the cross-distances in D. - y_norma: Standardized y values - noise: noise hyperparameter - increasing noise reduces reduced_likelihood_function_value - - - Returns - ------- - reduced_likelihood_function_value: real - - The value of the reduced likelihood function associated with the given autocorrelation parameters theta. - beta: Generalized least-squares regression weights - gamma: Gaussian Process weights. - - """ #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 reduced_likelihood_function_value = -Inf nugget = 1000000.0 * eps() #a jitter for numerical stability; reducing the multiple from 1000000.0 results in positive definite error for Cholesky decomposition; @@ -467,6 +541,7 @@ end # The code below is a simplified version of # SKLearn's PLS # https://github.com/scikit-learn/scikit-learn/blob/80598905e/sklearn/cross_decomposition/_pls.py +# It is completely self-contained (no dependencies) function _center_scale(X, Y) x_mean = mean(X, dims = 1) @@ -534,3 +609,38 @@ function _modified_pls(X, Y, n_components) end ### MODIFIED PLS ABOVE ### + +### BELOW ARE HELPER FUNCTIONS TO HELP MODIFY VECTORS INTO ARRAYS + +function vector_of_tuples_to_matrix(v) + num_rows = length(v) + num_cols = length(first(v)) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col] = v[row][col] + end + end + return K +end + +function vector_of_tuples_to_matrix2(v) + #convert gradients into matrix form + num_rows = length(v) + num_cols = length(first(first(v))) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col] = v[row][1][col] + end + end + return K +end + +function prep_data_for_pred(v) + l = length(first(v)) + if (l == 1) + return [tup[k] for k in 1:1, tup in v] + end + return [tup[k] for tup in v, k in 1:l] +end diff --git a/src/Optimization.jl b/src/Optimization.jl index 4347b6a75..a2fb005d2 100644 --- a/src/Optimization.jl +++ b/src/Optimization.jl @@ -1,5 +1,6 @@ using LinearAlgebra using Distributions +using Zygote abstract type SurrogateOptimizationAlgorithm end @@ -76,7 +77,7 @@ When w is close to zero, we do pure exploration, while w close to 1 corresponds """ function surrogate_optimize(obj::Function, ::SRBF, lb, ub, surr::AbstractSurrogate, sample_type::SamplingAlgorithm; maxiters = 100, - num_new_samples = 100) + num_new_samples = 100, needs_gradient = false) scale = 0.2 success = 0 failure = 0 @@ -177,7 +178,12 @@ function surrogate_optimize(obj::Function, ::SRBF, lb, ub, surr::AbstractSurroga adaptive_point_y = obj(adaptive_point_x) #5) Update surrogate with (adaptive_point,objective(adaptive_point) - add_point!(surr, adaptive_point_x, adaptive_point_y) + if (needs_gradient) + adaptive_grad = Zygote.gradient(obj, adaptive_point_x) + add_point!(surr, adaptive_point_x, adaptive_point_y, adaptive_grad) + else + add_point!(surr, adaptive_point_x, adaptive_point_y) + end #6) How to go on? if surr(adaptive_point_x) < incumbent_value diff --git a/test/GEKPLS.jl b/test/GEKPLS.jl index 7be936c8d..bae787e57 100644 --- a/test/GEKPLS.jl +++ b/test/GEKPLS.jl @@ -1,33 +1,7 @@ using Surrogates using Zygote -function vector_of_tuples_to_matrix(v) - #convert training data generated by surrogate sampling into a matrix suitable for GEKPLS - num_rows = length(v) - num_cols = length(first(v)) - K = zeros(num_rows, num_cols) - for row in 1:num_rows - for col in 1:num_cols - K[row, col] = v[row][col] - end - end - return K -end - -function vector_of_tuples_to_matrix2(v) - #convert gradients into matrix form - num_rows = length(v) - num_cols = length(first(first(v))) - K = zeros(num_rows, num_cols) - for row in 1:num_rows - for col in 1:num_cols - K[row, col] = v[row][1][col] - end - end - return K -end - -# # water flow function tests +# #water flow function tests function water_flow(x) r_w = x[1] r = x[2] @@ -43,17 +17,14 @@ function water_flow(x) end n = 1000 -d = 8 lb = [0.05, 100, 63070, 990, 63.1, 700, 1120, 9855] ub = [0.15, 50000, 115600, 1110, 116, 820, 1680, 12045] x = sample(n, lb, ub, SobolSample()) -X = vector_of_tuples_to_matrix(x) -grads = vector_of_tuples_to_matrix2(gradient.(water_flow, x)) -y = reshape(water_flow.(x), (size(x, 1), 1)) +grads = gradient.(water_flow, x) +y = water_flow.(x) xlimits = hcat(lb, ub) n_test = 100 x_test = sample(n_test, lb, ub, GoldenSample()) -X_test = vector_of_tuples_to_matrix(x_test) y_true = water_flow.(x_test) @testset "Test 1: Water Flow Function Test (dimensions = 8; n_comp = 2; extra_points = 2)" begin @@ -61,8 +32,8 @@ y_true = water_flow.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.03, atol = 0.02) #rmse: 0.039 end @@ -72,8 +43,8 @@ end delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) #change hard-coded 2 param to variable - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.02, atol = 0.01) #rmse: 0.027 end @@ -83,13 +54,13 @@ end delta_x = 0.0001 extra_points = 3 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.02, atol = 0.01) #rmse: 0.027 end -## welded beam tests +# ## welded beam tests function welded_beam(x) h = x[1] l = x[2] @@ -101,17 +72,14 @@ function welded_beam(x) end n = 1000 -d = 3 lb = [0.125, 5.0, 5.0] ub = [1.0, 10.0, 10.0] x = sample(n, lb, ub, SobolSample()) -X = vector_of_tuples_to_matrix(x) -grads = vector_of_tuples_to_matrix2(gradient.(welded_beam, x)) -y = reshape(welded_beam.(x), (size(x, 1), 1)) +grads = gradient.(welded_beam, x) +y = welded_beam.(x) xlimits = hcat(lb, ub) n_test = 100 x_test = sample(n_test, lb, ub, GoldenSample()) -X_test = vector_of_tuples_to_matrix(x_test) y_true = welded_beam.(x_test) @testset "Test 4: Welded Beam Function Test (dimensions = 3; n_comp = 3; extra_points = 2)" begin @@ -119,8 +87,8 @@ y_true = welded_beam.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 39.0, atol = 0.5) #rmse: 38.988 end @@ -130,8 +98,8 @@ end delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 39.5, atol = 0.5) #rmse: 39.481 end @@ -142,8 +110,8 @@ end delta_x = 0.0001 extra_points = 4 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 37.5, atol = 0.5) #rmse: 37.87 end @@ -158,13 +126,11 @@ n = 100 lb = [-5.0, -5.0, -5.0] ub = [5.0, 5.0, 5.0] x = sample(n, lb, ub, SobolSample()) -X = vector_of_tuples_to_matrix(x) -grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, x)) -y = reshape(sphere_function.(x), (size(x, 1), 1)) +grads = gradient.(sphere_function, x) +y = sphere_function.(x) xlimits = hcat(lb, ub) n_test = 100 x_test = sample(n_test, lb, ub, GoldenSample()) -X_test = vector_of_tuples_to_matrix(x_test) y_true = sphere_function.(x_test) @testset "Test 7: Sphere Function Test (dimensions = 3; n_comp = 2; extra_points = 2)" begin @@ -172,8 +138,8 @@ y_true = sphere_function.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.001, atol = 0.05) #rmse: 0.00083 end @@ -184,13 +150,11 @@ d = 2 lb = [-10.0, -10.0] ub = [10.0, 10.0] x = sample(n, lb, ub, SobolSample()) -X = vector_of_tuples_to_matrix(x) -grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, x)) -y = reshape(sphere_function.(x), (size(x, 1), 1)) +grads = gradient.(sphere_function, x) +y = sphere_function.(x) xlimits = hcat(lb, ub) n_test = 10 x_test = sample(n_test, lb, ub, GoldenSample()) -X_test = vector_of_tuples_to_matrix(x_test) y_true = sphere_function.(x_test) @testset "Test 8: Sphere Function Test (dimensions = 2; n_comp = 2; extra_points = 2" begin @@ -198,8 +162,8 @@ y_true = sphere_function.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) - y_pred = g(X_test) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.1, atol = 0.5) #rmse: 0.0022 end @@ -207,9 +171,8 @@ end @testset "Test 9: Add Point Test (dimensions = 3; n_comp = 2; extra_points = 2)" begin #first we create a surrogate model with just 3 input points initial_x_vec = [(1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0)] - initial_y = reshape(sphere_function.(initial_x_vec), (size(initial_x_vec, 1), 1)) - initial_X = vector_of_tuples_to_matrix(initial_x_vec) - initial_grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, initial_x_vec)) + initial_y = sphere_function.(initial_x_vec) + initial_grads = gradient.(sphere_function, initial_x_vec) lb = [-5.0, -5.0, -5.0] ub = [10.0, 10.0, 10.0] xlimits = hcat(lb, ub) @@ -217,23 +180,70 @@ end delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(initial_X, initial_y, initial_grads, n_comp, delta_x, xlimits, extra_points, + g = GEKPLS(initial_x_vec, initial_y, initial_grads, n_comp, delta_x, xlimits, + extra_points, initial_theta) n_test = 100 x_test = sample(n_test, lb, ub, GoldenSample()) - X_test = vector_of_tuples_to_matrix(x_test) y_true = sphere_function.(x_test) - y_pred1 = g(X_test) + y_pred1 = g.(x_test) rmse1 = sqrt(sum(((y_pred1 - y_true) .^ 2) / n_test)) #rmse1 = 31.91 #then we update the model with more points to see if performance improves n = 100 x = sample(n, lb, ub, SobolSample()) - X = vector_of_tuples_to_matrix(x) - grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, x)) - y = reshape(sphere_function.(x), (size(x, 1), 1)) - add_point!(g, X, y, grads) - y_pred2 = g(X_test) + grads = gradient.(sphere_function, x) + y = sphere_function.(x) + for i in 1:size(x, 1) + add_point!(g, x[i], y[i], grads[i][1]) + end + y_pred2 = g.(x_test) rmse2 = sqrt(sum(((y_pred2 - y_true) .^ 2) / n_test)) #rmse2 = 0.0015 @test (rmse2 < rmse1) end + +@testset "Test 10: Check optimization (dimensions = 3; n_comp = 2; extra_points = 2)" begin + lb = [-5.0, -5.0, -5.0] + ub = [10.0, 10.0, 10.0] + xlimits = hcat(lb, ub) + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = [0.01 for i in 1:n_comp] + n = 100 + x = sample(n, lb, ub, SobolSample()) + grads = gradient.(sphere_function, x) + y = sphere_function.(x) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + x_point, minima = surrogate_optimize(sphere_function, SRBF(), lb, ub, g, + UniformSample(); maxiters = 10, + num_new_samples = 10, needs_gradient = true) + @test isapprox(minima, 0.0, atol = 0.00001) +end + +@testset "Test 11: Check gradient (dimensions = 3; n_comp = 2; extra_points = 2)" begin + lb = [-5.0, -5.0, -5.0] + ub = [10.0, 10.0, 10.0] + xlimits = hcat(lb, ub) + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = [0.01 for i in 1:n_comp] + n = 100 + x = sample(n, lb, ub, SobolSample()) + grads = gradient.(sphere_function, x) + y = sphere_function.(x) + g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + grad_surr = gradient(g, (1.0, 1.0, 1.0)) + #test at a single point + grad_true = gradient(sphere_function, (1.0, 1.0, 1.0)) + bool_tup = isapprox.((grad_surr[1] .- grad_true[1]), (0.0, 0.0, 0.0), (atol = 0.001)) + @test (true, true, true) == bool_tup + #test for a bunch of points + grads_surr_vec = gradient.(g, x) + sum_of_rmse = 0.0 + for i in eachindex(grads_surr_vec) + sum_of_rmse += sqrt((sum((grads_surr_vec[i][1] .- grads[i][1]) .^ 2) / 3.0)) + end + @test isapprox(sum_of_rmse, 0.05, atol = 0.01) +end From b50887dc63432c23c9d08cb1c39fba9947f7daf4 Mon Sep 17 00:00:00 2001 From: Vikram Date: Tue, 19 Jul 2022 17:39:51 +0530 Subject: [PATCH 2/7] fix error in add_point redundancy check --- src/GEKPLS.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GEKPLS.jl b/src/GEKPLS.jl index 62de3c3f7..7da47c663 100644 --- a/src/GEKPLS.jl +++ b/src/GEKPLS.jl @@ -120,7 +120,7 @@ add a new point to the dataset. function add_point!(g::GEKPLS, x_tup, y_val, grad_tup) new_x = prep_data_for_pred(x_tup) new_grads = prep_data_for_pred(grad_tup) - if (findfirst(==(vec(new_x)), eachrow(g.x_matrix)) !== nothing) + if vec(new_x) in eachrow(g.x_matrix) println("Adding a sample that already exists. Cannot build GEKPLS") return end From 79042186d11cfeccfaee0ed8ec37da2d0873228b Mon Sep 17 00:00:00 2001 From: Vikram Date: Tue, 19 Jul 2022 18:05:26 +0530 Subject: [PATCH 3/7] remove line before function src/GEKPLS.jl Co-authored-by: Christopher Rackauckas --- src/GEKPLS.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/GEKPLS.jl b/src/GEKPLS.jl index 7da47c663..33e381703 100644 --- a/src/GEKPLS.jl +++ b/src/GEKPLS.jl @@ -116,7 +116,6 @@ end add a new point to the dataset. """ - function add_point!(g::GEKPLS, x_tup, y_val, grad_tup) new_x = prep_data_for_pred(x_tup) new_grads = prep_data_for_pred(grad_tup) From 85c4d097893878b577c84d75ba70971a00b1ee19 Mon Sep 17 00:00:00 2001 From: Vikram Date: Tue, 19 Jul 2022 19:49:28 +0530 Subject: [PATCH 4/7] remove unnecessary if condition in predictor --- src/GEKPLS.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/GEKPLS.jl b/src/GEKPLS.jl index 33e381703..1d0355a90 100644 --- a/src/GEKPLS.jl +++ b/src/GEKPLS.jl @@ -104,11 +104,7 @@ function (g::GEKPLS)(x_vec) f = ones(n_eval, 1) y_ = (f * g.beta) + (r * g.gamma) y = g.y_mean .+ g.y_std * y_ - if size(y, 1) > 1 - return vec(y) - else - return y[1] #this is necessary for differentiation; Zygote expects a scalar output - end + return y[1] end """ From c50f10a808736c6f37d67b00afcc8a97757e92da Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 20 Jul 2022 08:23:21 +0530 Subject: [PATCH 5/7] Increase tolerance, maxiters and num_new_samples The optimization check fails because maxiters and num_new_samples which are, by default set to 100 each have been reduced. So I have increased tolerance, maxiters and num_new_samples. --- test/GEKPLS.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/GEKPLS.jl b/test/GEKPLS.jl index bae787e57..d1008eedf 100644 --- a/test/GEKPLS.jl +++ b/test/GEKPLS.jl @@ -216,9 +216,9 @@ end y = sphere_function.(x) g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) x_point, minima = surrogate_optimize(sphere_function, SRBF(), lb, ub, g, - UniformSample(); maxiters = 10, - num_new_samples = 10, needs_gradient = true) - @test isapprox(minima, 0.0, atol = 0.00001) + UniformSample(); maxiters = 20, + num_new_samples = 20, needs_gradient = true) + @test isapprox(minima, 0.0, atol = 0.0001) end @testset "Test 11: Check gradient (dimensions = 3; n_comp = 2; extra_points = 2)" begin From 6ba08d70fc254869a02344eea7b44ce092c6dfab Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 20 Jul 2022 13:02:45 +0530 Subject: [PATCH 6/7] Fix input param to accept lower and upper bounds Makes it more convenient for users to add lower and upper bounds without having to concatenate before sending in params to GEKPLS constructor. --- src/GEKPLS.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/GEKPLS.jl b/src/GEKPLS.jl index 1d0355a90..33dc2242f 100644 --- a/src/GEKPLS.jl +++ b/src/GEKPLS.jl @@ -46,19 +46,21 @@ function bounds_error(x, xl) end """ - GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, theta) + GEKPLS(X, y, grads, n_comp, delta_x, lb, ub, extra_points, theta) Constructor for GEKPLS Struct - x_vec: vector of tuples with x values - y_vec: vector of floats with outputs - grads_vec: gradients associated with each of the X points - n_comp: number of components -- xlimits: concatenated array of lower and upper bounds +- lb: lower bounds +- ub: upper bounds - delta_x: step size while doing Taylor approximation - extra_points: number of points to consider - theta: initial expected variance of PLS regression components """ -function GEKPLS(x_vec, y_vec, grads_vec, n_comp, delta_x, xlimits, extra_points, theta) +function GEKPLS(x_vec, y_vec, grads_vec, n_comp, delta_x, lb, ub, extra_points, theta) + xlimits = hcat(lb, ub) X = vector_of_tuples_to_matrix(x_vec) y = reshape(y_vec, (size(X, 1), 1)) grads = vector_of_tuples_to_matrix2(grads_vec) From ab58c0719ae23548c9111fc4a225a86a08fc696e Mon Sep 17 00:00:00 2001 From: Vikram Date: Wed, 20 Jul 2022 13:05:08 +0530 Subject: [PATCH 7/7] Update tests for lower and upper bound inputs --- test/GEKPLS.jl | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/test/GEKPLS.jl b/test/GEKPLS.jl index d1008eedf..612792d81 100644 --- a/test/GEKPLS.jl +++ b/test/GEKPLS.jl @@ -22,7 +22,6 @@ ub = [0.15, 50000, 115600, 1110, 116, 820, 1680, 12045] x = sample(n, lb, ub, SobolSample()) grads = gradient.(water_flow, x) y = water_flow.(x) -xlimits = hcat(lb, ub) n_test = 100 x_test = sample(n_test, lb, ub, GoldenSample()) y_true = water_flow.(x_test) @@ -32,7 +31,7 @@ y_true = water_flow.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.03, atol = 0.02) #rmse: 0.039 @@ -43,7 +42,7 @@ end delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.02, atol = 0.01) #rmse: 0.027 @@ -54,7 +53,7 @@ end delta_x = 0.0001 extra_points = 3 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.02, atol = 0.01) #rmse: 0.027 @@ -77,7 +76,6 @@ ub = [1.0, 10.0, 10.0] x = sample(n, lb, ub, SobolSample()) grads = gradient.(welded_beam, x) y = welded_beam.(x) -xlimits = hcat(lb, ub) n_test = 100 x_test = sample(n_test, lb, ub, GoldenSample()) y_true = welded_beam.(x_test) @@ -87,7 +85,7 @@ y_true = welded_beam.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 39.0, atol = 0.5) #rmse: 38.988 @@ -98,7 +96,7 @@ end delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 39.5, atol = 0.5) #rmse: 39.481 @@ -110,7 +108,7 @@ end delta_x = 0.0001 extra_points = 4 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 37.5, atol = 0.5) #rmse: 37.87 @@ -128,7 +126,6 @@ ub = [5.0, 5.0, 5.0] x = sample(n, lb, ub, SobolSample()) grads = gradient.(sphere_function, x) y = sphere_function.(x) -xlimits = hcat(lb, ub) n_test = 100 x_test = sample(n_test, lb, ub, GoldenSample()) y_true = sphere_function.(x_test) @@ -138,7 +135,7 @@ y_true = sphere_function.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.001, atol = 0.05) #rmse: 0.00083 @@ -146,13 +143,11 @@ end ## 2D n = 50 -d = 2 lb = [-10.0, -10.0] ub = [10.0, 10.0] x = sample(n, lb, ub, SobolSample()) grads = gradient.(sphere_function, x) y = sphere_function.(x) -xlimits = hcat(lb, ub) n_test = 10 x_test = sample(n_test, lb, ub, GoldenSample()) y_true = sphere_function.(x_test) @@ -162,7 +157,7 @@ y_true = sphere_function.(x_test) delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) y_pred = g.(x_test) rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) @test isapprox(rmse, 0.1, atol = 0.5) #rmse: 0.0022 @@ -175,12 +170,11 @@ end initial_grads = gradient.(sphere_function, initial_x_vec) lb = [-5.0, -5.0, -5.0] ub = [10.0, 10.0, 10.0] - xlimits = hcat(lb, ub) n_comp = 2 delta_x = 0.0001 extra_points = 2 initial_theta = [0.01 for i in 1:n_comp] - g = GEKPLS(initial_x_vec, initial_y, initial_grads, n_comp, delta_x, xlimits, + g = GEKPLS(initial_x_vec, initial_y, initial_grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) n_test = 100 @@ -205,7 +199,6 @@ end @testset "Test 10: Check optimization (dimensions = 3; n_comp = 2; extra_points = 2)" begin lb = [-5.0, -5.0, -5.0] ub = [10.0, 10.0, 10.0] - xlimits = hcat(lb, ub) n_comp = 2 delta_x = 0.0001 extra_points = 2 @@ -214,7 +207,7 @@ end x = sample(n, lb, ub, SobolSample()) grads = gradient.(sphere_function, x) y = sphere_function.(x) - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) x_point, minima = surrogate_optimize(sphere_function, SRBF(), lb, ub, g, UniformSample(); maxiters = 20, num_new_samples = 20, needs_gradient = true) @@ -224,7 +217,6 @@ end @testset "Test 11: Check gradient (dimensions = 3; n_comp = 2; extra_points = 2)" begin lb = [-5.0, -5.0, -5.0] ub = [10.0, 10.0, 10.0] - xlimits = hcat(lb, ub) n_comp = 2 delta_x = 0.0001 extra_points = 2 @@ -233,7 +225,7 @@ end x = sample(n, lb, ub, SobolSample()) grads = gradient.(sphere_function, x) y = sphere_function.(x) - g = GEKPLS(x, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + g = GEKPLS(x, y, grads, n_comp, delta_x, lb, ub, extra_points, initial_theta) grad_surr = gradient(g, (1.0, 1.0, 1.0)) #test at a single point grad_true = gradient(sphere_function, (1.0, 1.0, 1.0))