From f94088647408d080e0a392fe8f0e42c0dd0b77be Mon Sep 17 00:00:00 2001 From: "Lars H. B. Olsen" <92097196+LHBO@users.noreply.github.com> Date: Mon, 15 Jan 2024 11:47:14 +0000 Subject: [PATCH] Improving the efficiency of the Gaussian and (Gaussian) copula methods (#366) --- .gitignore | 14 +- DESCRIPTION | 2 - NAMESPACE | 3 + R/RcppExports.R | 82 + R/approach_copula.R | 190 +- R/approach_gaussian.R | 111 +- R/shapr-package.R | 2 + inst/scripts/compare_copula_in_R_and_C++.R | 1551 ++++++++++ inst/scripts/compare_gaussian_in_R_and_C++.R | 2735 +++++++++++++++++ man/gaussian_transform_separate.Rd | 2 +- man/inv_gaussian_transform.Rd | 24 - man/inv_gaussian_transform_cpp.Rd | 23 + man/prepare_data.Rd | 7 +- man/prepare_data_copula_cpp.Rd | 52 + man/prepare_data_gaussian_cpp.Rd | 36 + man/quantile_type7_cpp.Rd | 26 + man/sample_copula.Rd | 40 - man/sample_gaussian.Rd | 27 - man/setup_approach.Rd | 3 + python/install_r_packages.R | 5 +- src/Copula.cpp | 169 + src/Gaussian.cpp | 88 + src/RcppExports.cpp | 60 + tests/testthat/_snaps/output.md | 38 +- .../_snaps/output/output_lm_numeric_comb1.rds | Bin 5117 -> 5110 bytes .../_snaps/output/output_lm_numeric_comb2.rds | Bin 5103 -> 5100 bytes .../_snaps/output/output_lm_numeric_comb3.rds | Bin 5043 -> 5039 bytes .../output/output_lm_numeric_copula.rds | Bin 4968 -> 4966 bytes .../output/output_lm_numeric_gaussian.rds | Bin 4868 -> 4865 bytes tests/testthat/_snaps/plot/msev-bar-50-ci.svg | 16 +- .../plot/msev-bar-with-ci-different-width.svg | 4 +- .../_snaps/plot/msev-bar-without-ci.svg | 4 +- tests/testthat/_snaps/plot/msev-bar.svg | 16 +- .../msev-combination-bar-specified-width.svg | 80 +- .../_snaps/plot/msev-combination-bar.svg | 320 +- .../plot/msev-combination-line-point.svg | 84 +- .../msev-explicand-bar-specified-width.svg | 30 +- .../_snaps/plot/msev-explicand-bar.svg | 30 +- ...v-explicand-for-specified-observations.svg | 22 +- .../_snaps/plot/msev-explicand-line-point.svg | 44 +- 40 files changed, 5275 insertions(+), 665 deletions(-) create mode 100644 inst/scripts/compare_copula_in_R_and_C++.R create mode 100644 inst/scripts/compare_gaussian_in_R_and_C++.R delete mode 100644 man/inv_gaussian_transform.Rd create mode 100644 man/inv_gaussian_transform_cpp.Rd create mode 100644 man/prepare_data_copula_cpp.Rd create mode 100644 man/prepare_data_gaussian_cpp.Rd create mode 100644 man/quantile_type7_cpp.Rd delete mode 100644 man/sample_copula.Rd delete mode 100644 man/sample_gaussian.Rd create mode 100644 src/Copula.cpp create mode 100644 src/Gaussian.cpp diff --git a/.gitignore b/.gitignore index e6b4b3974..619cbb459 100644 --- a/.gitignore +++ b/.gitignore @@ -5,32 +5,22 @@ Rprof.out src/*.o src/*.so - *.o - *.so - tmp/ - \.RDataTmp - *.out - *.Rout - src-i386/ - src-x64/ - paper_experiments/res/single_res/ - paper_experiments/res/single_res_old/ inst/doc - docs/* doc Meta docs /doc/ /Meta/ -.idea \ No newline at end of file +.idea +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION index 42e96754a..f7994608d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,8 +32,6 @@ Imports: stats, data.table, Rcpp (>= 0.12.15), - condMVNorm, - mvnfast, Matrix, future.apply Suggests: diff --git a/NAMESPACE b/NAMESPACE index 956c374c8..6537b3bcb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,6 +63,8 @@ export(observation_impute_cpp) export(plot_MSEv_eval_crit) export(predict_model) export(prepare_data) +export(prepare_data_copula_cpp) +export(prepare_data_gaussian_cpp) export(rss_cpp) export(setup) export(setup_approach) @@ -96,6 +98,7 @@ importFrom(stats,model.matrix) importFrom(stats,predict) importFrom(stats,pt) importFrom(stats,qt) +importFrom(stats,rnorm) importFrom(stats,sd) importFrom(stats,setNames) importFrom(utils,head) diff --git a/R/RcppExports.R b/R/RcppExports.R index bbe62a76d..1f27325fe 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,6 +80,88 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } +#' Compute the quantiles using quantile type seven +#' +#' @details Using quantile type number seven from stats::quantile in R. +#' +#' @param x arma::vec. Numeric vector whose sample quantiles are wanted. +#' @param probs arma::vec. Numeric vector of probabilities with values between zero and one. +#' +#' @return A vector of length `length(probs)` with the quantiles is returned. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +quantile_type7_cpp <- function(x, probs) { + .Call(`_shapr_quantile_type7_cpp`, x, probs) +} + +#' Transforms new data to a standardized normal distribution +#' +#' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +#' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +#' +#' @return arma::mat of the same dimension as `z` +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +inv_gaussian_transform_cpp <- function(z, x) { + .Call(`_shapr_inv_gaussian_transform_cpp`, z, x) +} + +#' Generate (Gaussian) Copula MC samples +#' +#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +#' univariate standard normal. +#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +#' to explain on the original scale. +#' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +#' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +#' transformed to a standardized normal distribution. +#' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +#' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +#' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +#' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +#' transformed to a standardized normal distribution. +#' +#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +#' copula MC samples for each explicand and coalition on the original scale. +#' +#' @export +#' @keywords internal +#' @author Lars Henry Berge Olsen +prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) +} + +#' Generate Gaussian MC samples +#' +#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +#' univariate standard normal. +#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +#' to explain. +#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +#' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature. +#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +#' between all pairs of features. +#' +#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +#' MC samples for each explicand and coalition. +#' +#' @export +#' @keywords internal +#' @author Lars Henry Berge Olsen +prepare_data_gaussian_cpp <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + #' (Generalized) Mahalanobis distance #' #' Used to get the Euclidean distance as well by setting \code{mcov} = \code{diag(m)}. diff --git a/R/approach_copula.R b/R/approach_copula.R index 403d88809..4e7f5e914 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -1,12 +1,11 @@ #' @rdname setup_approach -#' #' @inheritParams default_doc_explain -#' #' @export +#' @author Martin Jullum setup_approach.copula <- function(internal, ...) { parameters <- internal$parameters - x_train <- internal$data$x_train - x_explain <- internal$data$x_explain + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) # Checking if factor features are present feature_specs <- internal$objects$feature_specs @@ -23,28 +22,21 @@ setup_approach.copula <- function(internal, ...) { } # Prepare transformed data - parameters$copula.mu <- rep(0, ncol(x_train)) - x_train0 <- apply( - X = x_train, - MARGIN = 2, - FUN = gaussian_transform - ) - parameters$copula.cov_mat <- get_cov_mat(x_train0) - + parameters$copula.mu <- rep(0, ncol(x_train_mat)) + x_train_mat0 <- apply(X = x_train_mat, MARGIN = 2, FUN = gaussian_transform) + parameters$copula.cov_mat <- get_cov_mat(x_train_mat0) x_explain_gaussian <- apply( - X = rbind(x_explain, x_train), + X = rbind(x_explain_mat, x_train_mat), MARGIN = 2, FUN = gaussian_transform_separate, - n_y = nrow(x_explain) + n_y = nrow(x_explain_mat) ) + if (is.null(dim(x_explain_gaussian))) x_explain_gaussian <- t(as.matrix(x_explain_gaussian)) - if (is.null(dim(x_explain_gaussian))) { - x_explain_gaussian <- t(as.matrix(x_explain_gaussian)) - } - # TODO: Change to this a data.table for consistency (not speed/memory) - internal$data$copula.x_explain_gaussian <- x_explain_gaussian + # Add objects to internal list internal$parameters <- parameters + internal$data$copula.x_explain_gaussian <- as.data.table(x_explain_gaussian) return(internal) } @@ -52,130 +44,75 @@ setup_approach.copula <- function(internal, ...) { #' @inheritParams default_doc #' @rdname prepare_data #' @export -prepare_data.copula <- function(internal, index_features = NULL, ...) { - x_train <- internal$data$x_train - x_explain <- internal$data$x_explain +#' @author Lars Henry Berge Olsen +prepare_data.copula <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names n_explain <- internal$parameters$n_explain - copula.cov_mat <- internal$parameters$copula.cov_mat n_samples <- internal$parameters$n_samples - copula.mu <- internal$parameters$copula.mu n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) - copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian - X <- internal$objects$X - - - x_explain0 <- as.matrix(x_explain) - dt_l <- list() - if (is.null(index_features)) { - features <- X$features - } else { - features <- X$features[index_features] - } - + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) - for (i in seq_len(n_explain)) { - l <- lapply( - X = features, - FUN = sample_copula, - n_samples = n_samples, - mu = copula.mu, - cov_mat = copula.cov_mat, - m = n_features, - x_explain = x_explain0[i, , drop = FALSE], - x_train = as.matrix(x_train), - x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE] - ) + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") - dt_l[[i]][, w := 1 / n_samples] - dt_l[[i]][, id := i] - if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] - } - dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) return(dt) } -#' Sample conditional variables using the Gaussian copula approach -#' -#' @param index_given Integer vector. The indices of the features to condition upon. Note that -#' `min(index_given) >= 1` and `max(index_given) <= m`. -#' @param m Positive integer. The total number of features. -#' @param x_explain_gaussian Numeric matrix. Contains the observation whose predictions ought -#' to be explained (test data), -#' after quantile-transforming them to standard Gaussian variables. -#' @param x_explain Numeric matrix. Contains the features of the observation whose -#' predictions ought to be explained (test data). -#' -#' @return data.table -#' -#' @keywords internal -#' -#' @author Martin Jullum -sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaussian, x_train, x_explain) { - # Handles the unconditional and full conditional separtely when predicting - if (length(index_given) %in% c(0, m)) { - ret <- matrix(x_explain, ncol = m, nrow = 1) - } else { - dependent_ind <- (seq_len(length(mu)))[-index_given] - - tmp <- condMVNorm::condMVN( - mean = mu, - sigma = cov_mat, - dependent.ind = dependent_ind, - given.ind = index_given, - X.given = x_explain_gaussian[index_given] - ) - - ret0_z <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) - - ret0_x <- apply( - X = rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), - MARGIN = 2, - FUN = inv_gaussian_transform, - n_z = n_samples - ) - - ret <- matrix(NA, ncol = m, nrow = n_samples) - ret[, index_given] <- rep(x_explain[index_given], each = n_samples) - ret[, dependent_ind] <- ret0_x - } - colnames(ret) <- colnames(x_explain) - return(as.data.table(ret)) -} - - -#' Transforms new data to a standardized normal distribution +#' Transforms a sample to standardized normal distribution #' -#' @param zx Numeric vector. The first `n_z` items are the Gaussian data, and the last part is -#' the data with the original transformation. -#' @param n_z Positive integer. Number of elements of `zx` that belongs to new data. +#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. #' -#' @return Numeric vector of length `n_z` +#' @return Numeric vector of length `length(x)` #' #' @keywords internal -#' #' @author Martin Jullum -inv_gaussian_transform <- function(zx, n_z) { - if (n_z >= length(zx)) stop("n_z should be less than length(zx)") - ind <- 1:n_z - z <- zx[ind] - x <- zx[-ind] - u <- stats::pnorm(z) - x_new <- stats::quantile(x, probs = u) - return(as.double(x_new)) +gaussian_transform <- function(x) { + u <- rank(x) / (length(x) + 1) + z <- stats::qnorm(u) + return(z) } #' Transforms new data to standardized normal (dimension 1) based on other data transformations #' #' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last #' part is the data with the original transformation. -#' @param n_y Positive integer. Number of elements of `yx` that belongs to the gaussian data. +#' @param n_y Positive integer. Number of elements of `yx` that belongs to the Gaussian data. #' #' @return Vector of back-transformed Gaussian data #' #' @keywords internal -#' #' @author Martin Jullum gaussian_transform_separate <- function(yx, n_y) { if (n_y >= length(yx)) stop("n_y should be less than length(yx)") @@ -187,18 +124,3 @@ gaussian_transform_separate <- function(yx, n_y) { z_y <- stats::qnorm(u_y) return(z_y) } - -#' Transforms a sample to standardized normal distribution -#' -#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. -#' -#' @return Numeric vector of length `length(x)` -#' -#' @keywords internal -#' -#' @author Martin Jullum -gaussian_transform <- function(x) { - u <- rank(x) / (length(x) + 1) - z <- stats::qnorm(u) - return(z) -} diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index 8191757a2..23dd34d98 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -45,46 +45,47 @@ setup_approach.gaussian <- function(internal, return(internal) } +#' @inheritParams default_doc #' @rdname prepare_data #' @export -prepare_data.gaussian <- function(internal, index_features = NULL, ...) { - x_train <- internal$data$x_train - x_explain <- internal$data$x_explain +#' @author Lars Henry Berge Olsen +prepare_data.gaussian <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names n_explain <- internal$parameters$n_explain - gaussian.cov_mat <- internal$parameters$gaussian.cov_mat - n_samples <- internal$parameters$n_samples - gaussian.mu <- internal$parameters$gaussian.mu n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + n_combinations_now <- length(index_features) + x_explain_mat <- as.matrix(internal$data$x_explain) + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use Cpp to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}) for all coalitions and explicands. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_gaussian_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat + ) - X <- internal$objects$X - - x_explain0 <- as.matrix(x_explain) - dt_l <- list() - - if (is.null(index_features)) { - features <- X$features - } else { - features <- X$features[index_features] - } - - for (i in seq_len(n_explain)) { - l <- lapply( - X = features, - FUN = sample_gaussian, - n_samples = n_samples, - mu = gaussian.mu, - cov_mat = gaussian.cov_mat, - m = n_features, - x_explain = x_explain0[i, , drop = FALSE] - ) + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) - dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") - dt_l[[i]][, w := 1 / n_samples] - dt_l[[i]][, id := i] - if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] - } + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) return(dt) } @@ -111,47 +112,3 @@ get_cov_mat <- function(x_train, min_eigen_value = 1e-06) { get_mu_vec <- function(x_train) { unname(colMeans(x_train)) } - -#' Sample conditional Gaussian variables -#' -#' @inheritParams sample_copula -#' -#' @return data.table -#' -#' @keywords internal -#' -#' @author Martin Jullum -sample_gaussian <- function(index_given, n_samples, mu, cov_mat, m, x_explain) { - # Check input - stopifnot(is.matrix(x_explain)) - - # Handles the unconditional and full conditional separtely when predicting - cnms <- colnames(x_explain) - if (length(index_given) %in% c(0, m)) { - return(data.table::as.data.table(x_explain)) - } - - dependent_ind <- seq_along(mu)[-index_given] - x_explain_gaussian <- x_explain[index_given] - tmp <- condMVNorm::condMVN( - mean = mu, - sigma = cov_mat, - dependent.ind = dependent_ind, - given.ind = index_given, - X.given = x_explain_gaussian - ) - - # Makes the conditional covariance matrix symmetric in the rare case where numerical instability made it unsymmetric - if (!isSymmetric(tmp[["condVar"]])) { - tmp[["condVar"]] <- Matrix::symmpart(tmp$condVar) - } - - ret0 <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) - - ret <- matrix(NA, ncol = m, nrow = n_samples) - ret[, index_given] <- rep(x_explain_gaussian, each = n_samples) - ret[, dependent_ind] <- ret0 - - colnames(ret) <- cnms - return(as.data.table(ret)) -} diff --git a/R/shapr-package.R b/R/shapr-package.R index 320619c33..75bb26ba7 100644 --- a/R/shapr-package.R +++ b/R/shapr-package.R @@ -23,6 +23,8 @@ #' #' @importFrom stats sd qt pt #' +#' @importFrom stats rnorm +#' #' @importFrom Rcpp sourceCpp #' #' @keywords internal diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R new file mode 100644 index 000000000..fd6b1cfb4 --- /dev/null +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -0,0 +1,1551 @@ +# Libraries ------------------------------------------------------------------------------------------------------- +# library(shapr) +# library(rbenchmark) +library(data.table) +devtools::load_all(".") + +# Old R code ------------------------------------------------------------------------------------------------------ +## R Old version --------------------------------------------------------------------------------------------------- +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +prepare_data.copula_old <- function(internal, index_features = NULL, ...) { + X <- internal$objects$X + x_train <- internal$data$x_train + x_explain <- internal$data$x_explain + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian + + x_explain0 <- as.matrix(x_explain) + dt_l <- list() + if (is.null(index_features)) { + features <- X$features + } else { + features <- X$features[index_features] + } + + for (i in seq_len(n_explain)) { + cat(sprintf("%d,", i)) + l <- lapply( + X = features, + FUN = sample_copula_old, + n_samples = n_samples, + mu = copula.mu, + cov_mat = copula.cov_mat, + m = n_features, + x_explain = x_explain0[i, , drop = FALSE], + x_train = as.matrix(x_train), + x_explain_gaussian = as.matrix(copula.x_explain_gaussian)[i, , drop = FALSE] + ) + dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") + dt_l[[i]][, w := 1 / n_samples] + dt_l[[i]][, id := i] + if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] + } + + dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) + + return(dt) +} + +#' Sample conditional variables using the Gaussian copula approach +#' +#' @param index_given Integer vector. The indices of the features to condition upon. Note that +#' `min(index_given) >= 1` and `max(index_given) <= m`. +#' @param m Positive integer. The total number of features. +#' @param x_explain_gaussian Numeric matrix. Contains the observation whose predictions ought +#' to be explained (test data), +#' after quantile-transforming them to standard Gaussian variables. +#' @param x_explain Numeric matrix. Contains the features of the observation whose +#' predictions ought to be explained (test data). +#' +#' @return data.table +#' +#' @keywords internal +#' +#' @author Martin Jullum +sample_copula_old <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaussian, x_train, x_explain) { + # Handles the unconditional and full conditional separtely when predicting + if (length(index_given) %in% c(0, m)) { + ret <- matrix(x_explain, ncol = m, nrow = 1) + } else { + dependent_ind <- (seq_len(length(mu)))[-index_given] + + tmp <- condMVNorm::condMVN( + mean = mu, + sigma = cov_mat, + dependent.ind = dependent_ind, + given.ind = index_given, + X.given = x_explain_gaussian[index_given] + ) + + ret0_z <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) + + ret0_x <- apply( + X = rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), + MARGIN = 2, + FUN = inv_gaussian_transform_old, + n_z = n_samples, + type = 7 + ) + + ret <- matrix(NA, ncol = m, nrow = n_samples) + ret[, index_given] <- rep(x_explain[index_given], each = n_samples) + ret[, dependent_ind] <- ret0_x + } + colnames(ret) <- colnames(x_explain) + return(as.data.table(ret)) +} + + +#' Transforms new data to a standardized normal distribution +#' +#' @param zx Numeric vector. The first `n_z` items are the Gaussian data, and the last part is +#' the data with the original transformation. +#' @param n_z Positive integer. Number of elements of `zx` that belongs to new data. +#' +#' @return Numeric vector of length `n_z` +#' +#' @keywords internal +#' +#' @author Martin Jullum +inv_gaussian_transform_old <- function(zx, n_z, type) { + if (n_z >= length(zx)) stop("n_z should be less than length(zx)") + ind <- 1:n_z + z <- zx[ind] + x <- zx[-ind] + u <- stats::pnorm(z) + x_new <- stats::quantile(x, probs = u, type = type) + return(as.double(x_new)) +} + +#' Transforms a sample to standardized normal distribution +#' +#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. +#' +#' @return Numeric vector of length `length(x)` +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform_old <- function(x) { + u <- rank(x) / (length(x) + 1) + z <- stats::qnorm(u) + return(z) +} + +#' Transforms new data to standardized normal (dimension 1) based on other data transformations +#' +#' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last +#' part is the data with the original transformation. +#' @param n_y Positive integer. Number of elements of `yx` that belongs to the Gaussian data. +#' +#' @return Vector of back-transformed Gaussian data +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform_separate_old <- function(yx, n_y) { + if (n_y >= length(yx)) stop("n_y should be less than length(yx)") + ind <- 1:n_y + x <- yx[-ind] + tmp <- rank(yx)[ind] + tmp <- tmp - rank(tmp) + 0.5 + u_y <- tmp / (length(x) + 1) + z_y <- stats::qnorm(u_y) + return(z_y) +} + + +## C++ arma version ------------------------------------------------------------------------------------------------- +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +#' @author Lars Henry Berge Olsen +prepare_data.copula_cpp_arma <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix + # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include + # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp_arma( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) + + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + return(dt) +} + + + + + +## C++ and R version ---------------------------------------------------------------------------------------------- +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +#' @author Lars Henry Berge Olsen +prepare_data.copula_cpp_and_R <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix + # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include + # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp_and_R( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) + + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + return(dt) +} + +#' Transform data using the inverse Gaussian transformation. +#' +#' @details This function is called from `prepare_data_copula_cpp()` as the this was faster +#' +#' @param z Matrix. The data are the Gaussian Monte Carlos samples to transform. +#' @param x Matrix. The data with the original transformation. Used to conduct the transformation of `z`. +#' +#' @return Matrix of the same dimension as `z`. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +inv_gaussian_transform_R <- function(z, x) { + u <- stats::pnorm(z) + x_new <- sapply(seq_len(ncol(u)), function(idx) quantile.type7(x[, idx], probs = u[, idx])) + return(x_new) +} + +#' Compute the quantiles using quantile type seven +#' +#' @details Using quantile type number 7 from stats::quantile. +#' +#' @param x numeric vector whose sample quantiles are wanted. +#' @param probs numeric vector of probabilities with values between zero and one. +#' +#' @return A vector of length length(`probs`) is returned. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +quantile.type7 <- function(x, probs) { + n <- length(x) + probs <- pmax(0, pmin(1, probs)) # allow for slight overshoot + index <- 1 + (n - 1) * probs + lo <- floor(index) + hi <- ceiling(index) + x <- sort(x, partial = unique(c(lo, hi))) + qs <- x[lo] + i <- which(index > lo) + h <- (index - lo)[i] + qs[i] <- (1 - h) * qs[i] + h * x[hi[i]] + return(qs) +} + + + +# C++ with sourceRcpp --------------------------------------------------------------------------------------------- +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +#' @author Lars Henry Berge Olsen +prepare_data.copula_sourceCpp <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp_sourceCpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) + + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + return(dt) +} + +Rcpp::sourceCpp( + code = ' + #include +using namespace Rcpp; + +// [[Rcpp::depends(RcppArmadillo)]] + +// Compute the quantiles using quantile type seven +// +// @details Using quantile type number seven from stats::quantile in R. +// +// @param x arma::vec. Numeric vector whose sample quantiles are wanted. +// @param probs arma::vec. Numeric vector of probabilities with values between zero and one. +// +// @return A vector of length `length(probs)` with the quantiles is returned. +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] + arma::vec quantile_type7_cpp_sourceCpp(const arma::vec& x, const arma::vec& probs) { + int n = x.n_elem; + int m = probs.n_elem; + + // Initialize output quantile vector + arma::vec qs(m); + + // Calculate indices + arma::vec index = 1 + (n - 1) * probs; + arma::vec lo = arma::floor(index); + arma::vec hi = arma::ceil(index); + + // Sort the data + arma::vec sorted_x = arma::sort(x); + + // Calculate quantiles using quantile type seven + for (int i = 0; i < m; ++i) { + qs(i) = sorted_x(lo(i) - 1); + if (index(i) > lo(i)) { + double h = index(i) - lo(i); + qs(i) = (1 - h) * qs(i) + h * sorted_x(hi(i) - 1); + } + } + + return qs; + } + +// Transforms new data to a standardized normal distribution +// +// @details The function uses `arma::quantile(...)` which corresponds to Rs `stats::quantile(..., type = 5)`. +// +// @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +// @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +// +// @return arma::mat of the same dimension as `z` +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] + arma::mat inv_gaussian_transform_cpp_sourceCpp(const arma::mat& z, const arma::mat& x) { + int n_features = z.n_cols; + int n_samples = z.n_rows; + arma::mat z_new(n_samples, n_features); + arma::mat u = arma::normcdf(z); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + z_new.col(feature_idx) = quantile_type7_cpp_sourceCpp(x.col(feature_idx), u.col(feature_idx)); + } + return z_new; + } + +// Generate (Gaussian) Copula MC samples +// +// @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +// univariate standard normal. +// @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +// to explain on the original scale. +// @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +// observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +// @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +// the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +// This is not a problem internally in shapr as the empty and grand coalitions treated differently. +// @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +// using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +// @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +// between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// +// @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +// the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +// copula MC samples for each explicand and coalition on the original scale. +// +// @export +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] + arma::cube prepare_data_copula_cpp_sourceCpp(const arma::mat& MC_samples_mat, + const arma::mat& x_explain_mat, + const arma::mat& x_explain_gaussian_mat, + const arma::mat& x_train_mat, + const arma::mat& S, + const arma::vec& mu, + const arma::mat& cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp_sourceCpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; + } + +// Transforms a sample to standardized normal distribution +// +// @param x Numeric matrix. The data which should be transformed to a standard normal distribution. +// +// @return Numeric matrix of dimension `dim(x)` +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +Rcpp::NumericMatrix gaussian_transform_cpp_sourceCpp(const arma::mat& x) { + int n_obs = x.n_rows; + int n_features = x.n_cols; + + // Pre allocate the return matrix + Rcpp::NumericMatrix x_trans(n_obs, n_features); + + // Iterate over the columns, i.e., the features + for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { + // Compute the rank and transform to standardized normal distribution + arma::vec rank_now = arma::conv_to::from(arma::sort_index(arma::sort_index(x.col(idx_feature)))); + Rcpp::NumericVector u = Rcpp::wrap((rank_now + 1) / (n_obs + 1)); + x_trans(Rcpp::_, idx_feature) = Rcpp::qnorm(u); + } + + return x_trans; +} + +// Transforms new data to standardized normal (column-wise) based on other data transformations +// +// @param y arma::mat. A numeric matrix containing the data that is to be transformed. +// @param x arma::mat. A numeric matrix containing the data of the original transformation. +// +// @return An arma::mat matrix of the same dimension as `y` containing the back-transformed Gaussian data. +// +// @keywords internal +// @author Lars Henry Berge Olsen, Martin Jullum +// [[Rcpp::export]] +Rcpp::NumericMatrix gaussian_transform_separate_cpp_sourceCpp(const arma::mat& y, const arma::mat& x) { + int n_features = x.n_cols; + int n_y_rows = y.n_rows; + int n_x_rows = x.n_rows; + + // Pre allocate the return matrix + Rcpp::NumericMatrix z_y(n_y_rows, n_features); + + // Compute the transformation for each feature at the time + for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { + arma::vec yx_now = arma::join_cols(y.col(idx_feature), x.col(idx_feature)); + arma::vec rank_now_1 = arma::conv_to::from(arma::sort_index(arma::sort_index(yx_now))).head(n_y_rows); + arma::vec rank_now_2 = arma::conv_to::from(arma::sort_index(arma::sort_index(rank_now_1))); + arma::vec tmp = rank_now_1 - rank_now_2 + 0.5; + Rcpp::NumericVector u_y = Rcpp::wrap(tmp / (n_x_rows + 1)); + z_y(Rcpp::_, idx_feature) = Rcpp::qnorm(u_y); + } + + return z_y; +} + + ' +) + + + +# Old C++ code ---------------------------------------------------------------------------------------------------- +Rcpp::sourceCpp( + code = ' +// [[Rcpp::depends("RcppArmadillo")]] +#include +using namespace Rcpp; + +// Transforms new data to a standardized normal distribution +// +// @details The function uses `arma::quantile(...)` which corresponds to Rs `stats::quantile(..., type = 5)`. +// +// @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +// @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +// +// @return arma::mat of the same dimension as `z` +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp_arma(arma::mat z, arma::mat x) { + int n_features = z.n_cols; + int n_samples = z.n_rows; + arma::mat z_new(n_samples, n_features); + arma::mat u = arma::normcdf(z); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + z_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); + } + return z_new; +} + +// Generate (Gaussian) Copula MC samples +// +// @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +// univariate standard normal. +// @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +// to explain on the original scale. +// @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +// observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +// @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +// the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +// This is not a problem internally in shapr as the empty and grand coalitions treated differently. +// @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +// using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +// @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +// between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// +// @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +// the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +// copula MC samples for each explicand and coalition on the original scale. +// +// @export +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_copula_cpp_arma(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp_arma(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} + +// Generate (Gaussian) Copula MC samples +// +// @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +// univariate standard normal. +// @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +// to explain on the original scale. +// @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +// observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +// @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +// the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +// This is not a problem internally in shapr as the empty and grand coalitions treated differently. +// @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +// using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +// @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +// between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// +// @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +// the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +// copula MC samples for each explicand and coalition on the original scale. +// +// @export +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Get the R functions for computing the inverse gaussian transform + Rcpp::Function inv_gaussian_transform_R("inv_gaussian_transform_R"); + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + arma::mat x_train_mat_now = x_train_mat.cols(Sbar_now_idx); + //arma::mat x_train_mat_now = arma::normcdf(x_train_mat.cols(Sbar_now_idx)); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + Rcpp::as(inv_gaussian_transform_R(Rcpp::wrap(MC_samples_mat_now_now), + Rcpp::wrap(x_train_mat_now))); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} +') + + + + + + + + + + +# Setup ----------------------------------------------------------------------------------------------------------- +{ + n_samples <- 1000 + n_train <- 1000 + n_test <- 20 + M <- 8 + rho <- 0.5 + betas <- c(0, rep(1, M)) + + # We use the Gaussian copula approach + approach <- "copula" + + # Mean of the multivariate Gaussian distribution + mu <- rep(0, times = M) + mu <- seq(M) + + # Create the covariance matrix + sigma <- matrix(rho, ncol = M, nrow = M) # Old + for (i in seq(1, M - 1)) { + for (j in seq(i + 1, M)) { + sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) + } + } + diag(sigma) <- 1 + + # Set seed for reproducibility + seed_setup <- 1996 + set.seed(seed_setup) + + # Make Gaussian data + data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) + data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) + colnames(data_train) <- paste("X", seq(M), sep = "") + colnames(data_test) <- paste("X", seq(M), sep = "") + + # Make the response + response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) + response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) + + # Put together the data + data_train_with_response <- copy(data_train)[, y := response_train] + data_test_with_response <- copy(data_test)[, y := response_test] + + # Fit a LM model + predictive_model <- lm(y ~ ., data = data_train_with_response) + + # Get the prediction zero, i.e., the phi0 Shapley value. + prediction_zero <- mean(response_train) + + model <- predictive_model + x_explain <- data_test + x_train <- data_train + keep_samp_for_vS <- FALSE + predict_model <- NULL + get_model_specs <- NULL + timing <- TRUE + n_combinations <- NULL + group <- NULL + feature_specs <- shapr:::get_feature_specs(get_model_specs, model) + n_batches <- 1 + seed <- 1 + + internal <- setup( + x_train = x_train, + x_explain = x_explain, + approach = approach, + prediction_zero = prediction_zero, + n_combinations = n_combinations, + group = group, + n_samples = n_samples, + n_batches = n_batches, + seed = seed, + feature_specs = feature_specs, + keep_samp_for_vS = keep_samp_for_vS, + predict_model = predict_model, + get_model_specs = get_model_specs, + timing = timing + ) + + # Gets predict_model (if not passed to explain) + predict_model <- shapr:::get_predict_model( + predict_model = predict_model, + model = model + ) + + # Sets up the Shapley (sampling) framework and prepares the + # conditional expectation computation for the chosen approach + # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters + internal <- shapr:::setup_computation(internal, model, predict_model) +} + + +# Compare shapr compile and rcpp compile -------------------------------------------------------------------------- +look_at_coalitions <- seq(1, 2^M - 2) +index_features = internal$objects$S_batch$`1`[look_at_coalitions] +S <- internal$objects$S[index_features, , drop = FALSE] +feature_names <- internal$parameters$feature_names +n_explain <- internal$parameters$n_explain +n_samples <- internal$parameters$n_samples +n_features <- internal$parameters$n_features +n_combinations_now <- length(index_features) +x_train_mat <- as.matrix(internal$data$x_train) +x_explain_mat <- as.matrix(internal$data$x_explain) +copula.mu <- internal$parameters$copula.mu +copula.cov_mat <- internal$parameters$copula.cov_mat +copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + +# Generate the MC samples from N(0, 1) +MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + +# Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, +# and then transforming them back to the original scale using the inverse Gaussian transform in C++. +# The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). +time_1 = system.time({dt_1 <- prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) +time_1 + +time_2 = system.time({dt_2 <- shapr:::prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) +time_2 + +time_3 = system.time({dt_3 <- + .Call(`_shapr_prepare_data_copula_cpp`, + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + )}) +time_3 + +Rcpp::sourceCpp("src/Copula.cpp") +time_4 = system.time({dt_4 <- prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) +time_4 + +time_5 = system.time({dt_5 <- shapr::prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) + +rbind(time_1, time_2, time_3, time_4, time_5) +all.equal(dt_1, dt_2) +all.equal(dt_2, dt_3) +all.equal(dt_3, dt_4) +all.equal(dt_4, dt_5) + +# Compare prepare_data.copula ---------------------------------------------------------------------------------------- +set.seed(123) + +# Recall that old version iterate over the observations and then the coalitions. +# While the new version iterate over the coalitions and then the observations. +# The latter lets us reuse the computed conditional distributions for all observations. +look_at_coalitions <- seq(1, 2^M - 2) +# look_at_coalitions <- seq(1, 2^M - 2, 10) +# look_at_coalitions <- seq(1, 2^M - 2, 25) + +# The old R code +time_only_R <- system.time({ + res_only_R <- prepare_data.copula_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +time_only_R + +# The C++ code with my own quantile function +time_only_cpp <- system.time({ + res_only_cpp <- prepare_data.copula( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp, c("id", "id_combination")) +time_only_cpp + +# The C++ code with my own quantile function +time_only_cpp_sourceCpp <- system.time({ + res_only_cpp_sourceCpp <- prepare_data.copula_sourceCpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp_sourceCpp, c("id", "id_combination")) +time_only_cpp_sourceCpp + +# The C++ code with quantile functions from arma +time_only_cpp_arma <- system.time({ + res_only_cpp_arma <- prepare_data.copula_cpp_arma( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp_arma, c("id", "id_combination")) +time_only_cpp_arma + +# The new C++ code with quantile from R +time_cpp_and_R <- system.time({ + res_cpp_and_R <- prepare_data.copula_cpp_and_R( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_cpp_and_R, c("id", "id_combination")) +time_cpp_and_R + +# Create a table of the times. Less is better +times <- rbind( + time_only_R, + time_only_cpp, + time_only_cpp_sourceCpp, + time_only_cpp_arma, + time_cpp_and_R +) +times + +# TIMES for all coalitions (254), n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 + +# user.self sys.self elapsed user.child sys.child +# time_only_R 65.266 2.142 70.896 0 0 +# time_only_cpp 4.622 0.393 5.212 0 0 +# time_only_cpp_sourceCpp 1.823 0.423 2.279 0 0 +# time_only_cpp_arma 23.874 0.604 27.801 0 0 +# time_cpp_and_R 6.826 1.493 8.683 0 0 + +# Relative speedup of new method +times_relative <- t(sapply(seq_len(nrow(times)), function(idx) times[1, ] / times[idx, ])) +rownames(times_relative) <- paste0(rownames(times), "_rel") +times_relative + +# RELATIVE TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 +# user.self sys.self elapsed user.child sys.child +# time_only_R_rel 1.0000 1.0000 1.0000 NaN NaN +# time_only_cpp_rel 14.1207 5.4504 13.6025 NaN NaN +# time_only_cpp_sourceCpp_rel 35.8014 5.0638 31.1084 NaN NaN +# time_only_cpp_arma_rel 2.7338 3.5464 2.5501 NaN NaN +# time_cpp_and_R_rel 9.5614 1.4347 8.1649 NaN NaN + +# Aggregate the MC sample values for each explicand and combination +res_only_R <- res_only_R[, w := NULL] +res_only_cpp <- res_only_cpp[, w := NULL] +res_only_cpp_sourceCpp <- res_only_cpp_sourceCpp[, w := NULL] +res_only_cpp_arma <- res_only_cpp_arma[, w := NULL] +res_cpp_and_R <- res_cpp_and_R[, w := NULL] +res_only_R_agr <- res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_agr <- res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_sourceCpp_agr <- res_only_cpp_sourceCpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_arma_agr <- res_only_cpp_arma[, lapply(.SD, mean), by = c("id", "id_combination")] +res_cpp_and_R_agr <- res_cpp_and_R[, lapply(.SD, mean), by = c("id", "id_combination")] + +# Difference +res_only_R_agr - res_only_cpp_agr +res_only_R_agr - res_only_cpp_sourceCpp_agr +res_only_R_agr - res_only_cpp_arma_agr +res_only_R_agr - res_cpp_and_R_agr + +# Max absolute difference +max(abs(res_only_R_agr - res_only_cpp_agr)) +max(abs(res_only_R_agr - res_only_cpp_sourceCpp_agr)) +max(abs(res_only_R_agr - res_only_cpp_arma_agr)) +max(abs(res_only_R_agr - res_cpp_and_R_agr)) + +# Max absolute relative difference +max(abs(res_only_R_agr - res_only_cpp_agr) / res_only_cpp_agr) +max(abs(res_only_R_agr - res_only_cpp_sourceCpp_agr) / res_only_cpp_sourceCpp_agr) +max(abs(res_only_R_agr - res_only_cpp_arma_agr) / res_only_cpp_arma_agr) +max(abs(res_only_R_agr - res_cpp_and_R_agr) / res_cpp_and_R_agr) + +# Compare gaussian_transform -------------------------------------------------------------------------------------- +set.seed(123) +x_temp_rows = 10000 +x_temp_cols = 20 +x_temp = matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols) + +# Compare for equal values +system.time({gaussian_transform_R_res = apply(X = x_temp, MARGIN = 2, FUN = gaussian_transform_old)}) +system.time({gaussian_transform_cpp_res = gaussian_transform_cpp(x_temp)}) +system.time({gaussian_transform_cpp_sourceCpp_res = gaussian_transform_cpp_sourceCpp(x_temp)}) +all.equal(gaussian_transform_R_res, gaussian_transform_cpp_res) # TRUE +all.equal(gaussian_transform_R_res, gaussian_transform_cpp_sourceCpp_res) # TRUE + +# Compare time (generate new data each time such that the result is not stored in the cache) +rbenchmark::benchmark(R = apply(X = matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols), + MARGIN = 2, + FUN = gaussian_transform_old), + cpp = gaussian_transform_cpp(matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols)), + cpp_sourceCpp = gaussian_transform_cpp_sourceCpp(matrix(rnorm(x_temp_rows*x_temp_cols), + x_temp_rows, + x_temp_cols)), + replications = 100) +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 100 7.604 1.987 7.059 0.294 0 0 +# 3 cpp_sourceCpp 100 3.827 1.000 3.529 0.254 0 0 +# 1 R 100 6.183 1.616 4.899 0.738 0 0 + + +# Compare gaussian_transform_separate ------------------------------------------------------------------------- +set.seed(123) +x_cols = 10 +x_train_rows = 50000 +x_explain_rows = 50000 +x_train_temp = matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols) +x_explain_temp = matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols) +x_explain_train_temp = rbind(x_explain_temp, x_train_temp) + +system.time({r = apply(X = rbind(x_explain_temp, x_train_temp), + MARGIN = 2, + FUN = gaussian_transform_separate_old, + n_y = nrow(x_explain_temp))}) +system.time({cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)}) +system.time({cpp_sourceCpp = gaussian_transform_separate_cpp_sourceCpp(x_explain_temp, x_train_temp)}) +all.equal(r, cpp) +all.equal(r, cpp_sourceCpp) + +# Rcpp::sourceCpp("src/Copula.cpp") # C++ code is faster when I recompile it? I don't understand. +rbenchmark::benchmark(r = apply(X = rbind(x_explain_temp, x_train_temp), + MARGIN = 2, + FUN = gaussian_transform_separate_old, + n_y = nrow(x_explain_temp)), + cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp), + cpp_sourceCpp = gaussian_transform_separate_cpp_sourceCpp(x_explain_temp, x_train_temp), + replications = 20) +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 20 10.933 2.352 10.082 0.179 0 0 +# 3 cpp_sourceCpp 20 4.648 1.000 4.389 0.100 0 0 +# 1 r 20 9.787 2.106 8.409 0.797 0 0 + +rbenchmark::benchmark(r = apply(X = rbind(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + MARGIN = 2, + FUN = gaussian_transform_separate_old, + n_y = nrow(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols))), + cpp = gaussian_transform_separate_cpp(matrix(rnorm(x_explain_rows*x_cols), + x_explain_rows, + x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + cpp2 = .Call(`_shapr_gaussian_transform_separate_cpp`, + matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + cpp_cpp_sourceCpp = gaussian_transform_separate_cpp_sourceCpp(matrix(rnorm(x_explain_rows*x_cols), + x_explain_rows, + x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + replications = 20) + +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 20 12.634 2.202 11.275 0.352 0.00 0.00 +# 4 cpp_cpp_sourceCpp 20 5.737 1.000 5.287 0.182 0.00 0.00 +# 3 cpp2 20 11.566 2.016 10.890 0.246 0.01 0.01 +# 1 r 20 11.937 2.081 10.232 1.027 0.00 0.00 + + + + + +# Simple C examples compile issues time -------------------------------------------------------------------------------- +sourceCpp( + code = ' +#include +// [[Rcpp::depends(RcppArmadillo)]] +// [[Rcpp::export]] +Rcpp::NumericVector addVectors(const Rcpp::NumericVector& vec1, const Rcpp::NumericVector& vec2) { + // Check if the input vectors are of the same length + if (vec1.size() != vec2.size()) { + Rcpp::stop("Vectors must be of the same length."); + } + + // Create a result vector of the same length as the input vectors + Rcpp::NumericVector result(vec1.size()); + + // Perform element-wise addition + for (int i = 0; i < vec1.size(); ++i) { + result[i] = vec1[i] + vec2[i]; + } + + return result; +} + +// [[Rcpp::export]] +Rcpp::NumericMatrix addMatrices(const Rcpp::NumericMatrix& mat1, const Rcpp::NumericMatrix& mat2) { + // Check if the input matrices have the same dimensions + if (mat1.nrow() != mat2.nrow() || mat1.ncol() != mat2.ncol()) { + Rcpp::stop("Matrices must have the same dimensions."); + } + + // Create a result matrix of the same dimensions as the input matrices + Rcpp::NumericMatrix result(mat1.nrow(), mat1.ncol()); + + // Perform element-wise addition + for (int i = 0; i < mat1.nrow(); ++i) { + for (int j = 0; j < mat1.ncol(); ++j) { + result(i, j) = mat1(i, j) + mat2(i, j); + } + } + + return result; +} + +// [[Rcpp::export]] +arma::mat addMatricesArmadillo(const arma::mat& mat1, const arma::mat& mat2) { + // Check if the input matrices have the same dimensions + if (mat1.n_rows != mat2.n_rows || mat1.n_cols != mat2.n_cols) { + Rcpp::stop("Matrices must have the same dimensions."); + } + + // Perform element-wise addition using Armadillo + arma::mat result = mat1 + mat2; + + return result; +}') + + +# !!!!!READ!!!!! +# Copy the code above into `src/Copula.cpp` and then build the package with +devtools::load_all(".") + +# Dimension of matrix +n = 1000000 +m = 100 + +# Make matrices +mat1 = matrix(rnorm(n*m), n, m) +mat2 = matrix(rnorm(n*m), n, m) + +# Time when using the compiled code using `devtools::load_all()` +shapr_vec_time = system.time({shapr_vec_res = addVectors(mat1[,1], mat2[,1])}) +shapr_mat_rcpp_time = system.time({shapr_mat_rcpp_res <- addMatrices(mat1, mat2)}) +shapr_mat_arma_time = system.time({shapr_mat_arma_res <- addMatricesArmadillo(mat1, mat2)}) + +# Then we compile with `Rcpp::compileAttributes()` +Rcpp::compileAttributes(pkgdir = ".", verbose = TRUE) +compileAttributes_vec_time = system.time({compileAttributes_vec_res = addVectors(mat1[,1], mat2[,1])}) +compileAttributes_mat_rcpp_time = system.time({compileAttributes_mat_rcpp_res <- addMatrices(mat1, mat2)}) +compileAttributes_mat_arma_time = system.time({compileAttributes_mat_arma_res <- addMatricesArmadillo(mat1, mat2)}) + +# Then we compile with `Rcpp::sourceCpp()` +# Here a shared library is built +Rcpp::sourceCpp("src/Copula.cpp", verbose = TRUE) +sourceCpp_vec_time = system.time({sourceCpp_vec_res = addVectors(mat1[,1], mat2[,1])}) +sourceCpp_mat_rcpp_time = system.time({sourceCpp_mat_rcpp_res <- addMatrices(mat1, mat2)}) +sourceCpp_mat_arma_time = system.time({sourceCpp_mat_arma_res <- addMatricesArmadillo(mat1, mat2)}) + +# Look at the times. See a drastic decrease when using sourceCpp. Half on my mac +rbind(shapr_vec_time, + compileAttributes_vec_time, + sourceCpp_vec_time) +rbind(shapr_mat_rcpp_time, + compileAttributes_mat_rcpp_time, + sourceCpp_mat_rcpp_time) +rbind(shapr_mat_arma_time, + compileAttributes_mat_arma_time, + sourceCpp_mat_arma_time) + +# All equal +all.equal(shapr_vec_res, compileAttributes_vec_res) +all.equal(shapr_vec_res, sourceCpp_vec_res) + +all.equal(shapr_mat_rcpp_res, compileAttributes_mat_rcpp_res) +all.equal(shapr_mat_rcpp_res, sourceCpp_mat_rcpp_res) + +all.equal(shapr_mat_arma_res, compileAttributes_mat_arma_res) +all.equal(shapr_mat_arma_res, sourceCpp_mat_arma_res) + + + + +# Large n_samples equal results ---------------------------------------------------------------------------------------- +{ + n_samples <- 1000000 + n_train <- 1000 + n_test <- 5 + M <- 4 + rho <- 0.5 + betas <- c(0, rep(1, M)) + + # We use the Gaussian copula approach + approach <- "copula" + + # Mean of the multivariate Gaussian distribution + mu <- rep(0, times = M) + mu <- seq(M) + + # Create the covariance matrix + sigma <- matrix(rho, ncol = M, nrow = M) # Old + for (i in seq(1, M - 1)) { + for (j in seq(i + 1, M)) { + sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) + } + } + diag(sigma) <- 1 + + # Set seed for reproducibility + seed_setup <- 1996 + set.seed(seed_setup) + + # Make Gaussian data + data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) + data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) + colnames(data_train) <- paste("X", seq(M), sep = "") + colnames(data_test) <- paste("X", seq(M), sep = "") + + # Make the response + response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) + response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) + + # Put together the data + data_train_with_response <- copy(data_train)[, y := response_train] + data_test_with_response <- copy(data_test)[, y := response_test] + + # Fit a LM model + predictive_model <- lm(y ~ ., data = data_train_with_response) + + # Get the prediction zero, i.e., the phi0 Shapley value. + prediction_zero <- mean(response_train) + + model <- predictive_model + x_explain <- data_test + x_train <- data_train + keep_samp_for_vS <- FALSE + predict_model <- NULL + get_model_specs <- NULL + timing <- TRUE + n_combinations <- NULL + group <- NULL + feature_specs <- shapr:::get_feature_specs(get_model_specs, model) + n_batches <- 1 + seed <- 1 + + internal <- setup( + x_train = x_train, + x_explain = x_explain, + approach = approach, + prediction_zero = prediction_zero, + n_combinations = n_combinations, + group = group, + n_samples = n_samples, + n_batches = n_batches, + seed = seed, + feature_specs = feature_specs, + keep_samp_for_vS = keep_samp_for_vS, + predict_model = predict_model, + get_model_specs = get_model_specs, + timing = timing + ) + + # Gets predict_model (if not passed to explain) + predict_model <- shapr:::get_predict_model( + predict_model = predict_model, + model = model + ) + + # Sets up the Shapley (sampling) framework and prepares the + # conditional expectation computation for the chosen approach + # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters + internal <- shapr:::setup_computation(internal, model, predict_model) +} + +look_at_coalitions <- seq(1, 2^M - 2) +# look_at_coalitions <- seq(1, 2^M - 2, 10) +# look_at_coalitions <- seq(1, 2^M - 2, 25) + +# The old R code +time_only_R <- system.time({ + res_only_R <- prepare_data.copula_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +time_only_R + +# The C++ code with my own quantile function +time_only_cpp <- system.time({ + res_only_cpp <- prepare_data.copula( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp, c("id", "id_combination")) +time_only_cpp + +# The C++ code with my own quantile function +time_only_cpp_sourceCpp <- system.time({ + res_only_cpp_sourceCpp <- prepare_data.copula_sourceCpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp_sourceCpp, c("id", "id_combination")) +time_only_cpp_sourceCpp + +# Look at the differences +# Aggregate the MC sample values for each explicand and combination +# res_only_R <- res_only_R[, w := NULL] +# res_only_cpp <- res_only_cpp[, w := NULL] +# res_only_cpp_sourceCpp <- res_only_cpp_sourceCpp[, w := NULL] +res_only_R_agr <- res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_agr <- res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_sourceCpp_agr <- res_only_cpp_sourceCpp[, lapply(.SD, mean), by = c("id", "id_combination")] + +# Difference +res_only_R_agr - res_only_cpp_agr +res_only_R_agr - res_only_cpp_sourceCpp_agr + +# Max absolute difference +max(abs(res_only_R_agr - res_only_cpp_agr)) +max(abs(res_only_R_agr - res_only_cpp_sourceCpp_agr)) + +# Look at the difference in Shapley values +temp_shapley_value_func = function(dt, internal, model, predict_model) { + compute_preds( + dt, # Updating dt by reference + feature_names = internal$parameters$feature_names, + predict_model = predict_model, + model = model, + pred_cols = paste0("p_hat", seq_len(internal$parameters$output_size)), + type = internal$parameters$type, + horizon = internal$parameters$horizon, + n_endo = internal$data$n_endo, + explain_idx = internal$parameters$explain_idx, + explain_lags = internal$parameters$explain_lags, + y = internal$data$y, + xreg = internal$data$xreg + ) + dt_vS2 <- compute_MCint(dt, paste0("p_hat", seq_len(internal$parameters$output_size))) + dt_vS <- rbind(t(as.matrix(c(1, rep(prediction_zero, n_test)))), dt_vS2, t(as.matrix(c(2^M, response_test))), + use.names = FALSE) + colnames(dt_vS) = colnames(dt_vS2) + compute_shapley_new(internal, dt_vS) +} + +# Compute the Shapley values +res_shapley_R = temp_shapley_value_func(data.table::copy(res_only_R), internal, model, predict_model) +res_shapley_cpp = temp_shapley_value_func(data.table::copy(res_only_cpp), internal, model, predict_model) +res_shapley_cpp_sourceCpp = temp_shapley_value_func(data.table::copy(res_only_cpp_sourceCpp), + internal, + model, + predict_model) +# Look at the difference +abs(res_shapley_R - res_shapley_cpp) +abs(res_shapley_R - res_shapley_cpp_sourceCpp) +max(abs(res_shapley_R - res_shapley_cpp)) +max(abs(res_shapley_R - res_shapley_cpp_sourceCpp)) + +# When n_samples <- 1000000, n_train <- 1000, n_test <- 5, M <- 4 +# > abs(res_shapley_R - res_shapley_cpp) +# none X1 X2 X3 X4 +# 1: 7.2140e-11 0.00056643 0.00109848 9.5478e-05 0.00043657 +# 2: 4.3903e-10 0.00179695 0.00163158 1.8549e-03 0.00202031 +# 3: 9.3072e-11 0.00142949 0.00087037 1.2457e-03 0.00180482 +# 4: 5.1367e-11 0.00079767 0.00099899 7.2505e-04 0.00052373 +# 5: 3.8260e-10 0.00032232 0.00046644 1.1651e-03 0.00102102 +# > abs(res_shapley_R - res_shapley_cpp_sourceCpp) +# none X1 X2 X3 X4 +# 1: 3.1773e-10 0.00061369 0.00096567 0.00139486 0.00174684 +# 2: 2.1354e-10 0.00164283 0.00139693 0.00051290 0.00075879 +# 3: 1.2370e-10 0.00143125 0.00066145 0.00021455 0.00055524 +# 4: 2.0396e-10 0.00090834 0.00091129 0.00077478 0.00077773 +# 5: 1.3627e-10 0.00038308 0.00033615 0.00031426 0.00026733 +# > max(abs(res_shapley_R - res_shapley_cpp)) +# [1] 0.0020203 +# > max(abs(res_shapley_R - res_shapley_cpp_sourceCpp)) +# [1] 0.0017468 diff --git a/inst/scripts/compare_gaussian_in_R_and_C++.R b/inst/scripts/compare_gaussian_in_R_and_C++.R new file mode 100644 index 000000000..b9ca398aa --- /dev/null +++ b/inst/scripts/compare_gaussian_in_R_and_C++.R @@ -0,0 +1,2735 @@ +# Libraries ------------------------------------------------------------------------------------------------------- +# library(shapr) +# library(rbenchmark) +library(data.table) + + + +# Other functions ------------------------------------------------------------------------------------------------- +#' Sample conditional Gaussian variables +#' +#' @inheritParams sample_copula +#' +#' @return data.table +#' +#' @keywords internal +#' +#' @author Martin Jullum +sample_gaussian <- function(index_given, n_samples, mu, cov_mat, m, x_explain) { + # Check input + stopifnot(is.matrix(x_explain)) + + # Handles the unconditional and full conditional separtely when predicting + cnms <- colnames(x_explain) + if (length(index_given) %in% c(0, m)) { + return(data.table::as.data.table(x_explain)) + } + + dependent_ind <- seq_along(mu)[-index_given] + x_explain_gaussian <- x_explain[index_given] + tmp <- condMVNorm::condMVN( + mean = mu, + sigma = cov_mat, + dependent.ind = dependent_ind, + given.ind = index_given, + X.given = x_explain_gaussian + ) + + # Makes the conditional covariance matrix symmetric in the rare case where numerical instability made it unsymmetric + if (!isSymmetric(tmp[["condVar"]])) { + tmp[["condVar"]] <- Matrix::symmpart(tmp$condVar) + } + + ret0 <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) + + ret <- matrix(NA, ncol = m, nrow = n_samples) + ret[, index_given] <- rep(x_explain_gaussian, each = n_samples) + ret[, dependent_ind] <- ret0 + + colnames(ret) <- cnms + return(as.data.table(ret)) +} + + +# Cpp functions --------------------------------------------------------------------------------------------------- +# #include +# #include +# using namespace Rcpp; +# +# +# //' Generate Gaussian MC samples +# //' +# //' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the +# //' univariate standard normal. +# //' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations +# //' to explain. +# //' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of +# //' the used coalitions. +# //' @param mu vector. Vector of length `n_features` containing the mean of each feature. +# //' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between +# //' all features. +# //' +# //' @export +# //' @keywords internal +# //' +# //' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times +# //' `n_features` containing the conditional MC samples for each coalition and explicand. +# //' @author Lars Henry Berge Olsen +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat ret(n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list.push_back(ret); +# } +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp_with_wrap(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat ret(n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list.push_back(Rcpp::wrap(ret)); +# } +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Create a list containing the MC samples for all coalitions and test observations +# Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); +# +# // Loop over the different test observations and Combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# arma::mat ret(n_samples, n_features); +# ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); +# ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); +# result_list.push_back(ret); +# } +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# std::list result_list; +# // Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. +# arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, +# S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, +# n_samples); +# +# return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); +# return_mat.submat(row_indices_now, Sbar_now_idx) = +# MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# } +# } +# +# return return_mat; +# } +# +# // Diff in v2 is where we do the transpose +# // [[Rcpp::export]] +# arma::mat prepare_data_gaussian_cpp_fix_large_mat_v2(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# std::list result_list; +# // Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. +# arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, +# S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, +# n_samples); +# +# return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); +# return_mat.submat(row_indices_now, Sbar_now_idx) = +# trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); +# } +# } +# +# return return_mat; +# } +# +# // [[Rcpp::export]] +# arma::cube prepare_data_gaussian_cpp_fix_cube(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# arma::cube result_cube(n_samples, n_features, n_explain*n_coalitions); +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_cube.slice(S_ind*n_explain + idx_now) = aux_mat; +# } +# } +# +# return result_cube; +# } +# +# // [[Rcpp::export]] +# arma::cube prepare_data_gaussian_cpp_fix_cube_v2(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_cube.col(S_ind*n_explain + idx_now) = aux_mat; +# } +# } +# +# return result_cube; +# } +# +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# +# // Create a list containing lists that contian the MC samples for all coalitions and test observations in each matrix +# Rcpp::List result_list(S.n_rows); +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# Rcpp::List result_list_now(n_explain); +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list_now[idx_now] = aux_mat; +# } +# result_list[S_ind] = result_list_now; +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# std::list prepare_data_gaussian_cpp_fix_std_list(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# std::list result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list.push_back(aux_mat); +# } +# } +# +# return result_list; +# } + + + +# Old and new version --------------------------------------------------------------------------------------------- +prepare_data_gaussian_old <- function(internal, index_features = NULL, ...) { + x_train <- internal$data$x_train + x_explain <- internal$data$x_explain + n_explain <- internal$parameters$n_explain + gaussian.cov_mat <- internal$parameters$gaussian.cov_mat + n_samples <- internal$parameters$n_samples + gaussian.mu <- internal$parameters$gaussian.mu + n_features <- internal$parameters$n_features + + X <- internal$objects$X + + x_explain0 <- as.matrix(x_explain) + dt_l <- list() + + if (is.null(index_features)) { + features <- X$features + } else { + features <- X$features[index_features] + } + + for (i in seq_len(n_explain)) { + cat(sprintf("%d,", i)) + l <- lapply( + X = features, + FUN = sample_gaussian, #shapr:::sample_gaussian, + n_samples = n_samples, + mu = gaussian.mu, + cov_mat = gaussian.cov_mat, + m = n_features, + x_explain = x_explain0[i, , drop = FALSE] + ) + + dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") + dt_l[[i]][, w := 1 / n_samples] + dt_l[[i]][, id := i] + if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] + } + + dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) + return(dt) +} + + +# In this version we improve the method by only computing the conditional covariance matrices once. +prepare_data_gaussian_new_v1 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Sample the MC samples from the conditional Gaussian distribution for one test observation. + .Call("rmvnCpp", + n_ = n_samples, + mu_ = x_Sbar_mean[, idx_now], + sigma_ = cond_cov_mat_Sbar_given_S, + ncores_ = 1, + isChol_ = FALSE, + A_ = B, + PACKAGE = "mvnfast" + ) + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- B + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# This is similar to v1, but we compute the Cholensky decomposition only once for each coalitions. +# In v1, it is computed n_explain times. +prepare_data_gaussian_new_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + # Compute the Cholensky decomposition + cond_cov_mat_Sbar_given_S_chol <- chol(cond_cov_mat_Sbar_given_S) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Sample the MC samples from the conditional Gaussian distribution for one test observation. + .Call("rmvnCpp", + n_ = n_samples, + mu_ = x_Sbar_mean[, idx_now], + sigma_ = cond_cov_mat_Sbar_given_S_chol, + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- B + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Here we improve the method speed by only sampling once per coalition +# and only add the test-observation-dependent mean in a secondary call. +prepare_data_gaussian_new_v3 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # rbenchmark::benchmark( + # t(sweep(x_S_star, 2, mu_S, FUN = "-")), + # t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples, + mu_ = rep(0, length(mu_Sbar)), + sigma_ = cond_cov_mat_Sbar_given_S, + ncores_ = 1, + isChol_ = FALSE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B <- t(B) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Same as v3, but we now use R to compute Cholensky +prepare_data_gaussian_new_v4 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples, + mu_ = rep(0, length(mu_Sbar)), + sigma_ = chol(cond_cov_mat_Sbar_given_S), + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B <- t(B) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Here we only want to generate the data once. So we generate n_samples from N(0, I), +# and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. +prepare_data_gaussian_new_v5 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = n_features) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples, + mu_ = rep(0, n_features), + sigma_ = diag(n_features), + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% t(sweep(x_S_star, 2, mu_S, FUN = "-")) + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + # B <- matrix(nrow = n_samples, ncol = n_features) + # class(B) <- "numeric" + + # .Call("rmvnCpp", + # n_ = n_samples, + # mu_ = rep(0, n_features), + # sigma_ = diag(n_features), + # ncores_ = 1, + # isChol_ = TRUE, + # A_ = B, + # PACKAGE = "mvnfast" + # ) + + B <- matrix(rnorm(n_samples*n_features),nrow = n_samples, ncol = n_features) + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% t(sweep(x_S_star, 2, mu_S, FUN = "-")) + + + + + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + # B <- matrix(nrow = n_samples, ncol = n_features) + # class(B) <- "numeric" + + # .Call("rmvnCpp", + # n_ = n_samples, + # mu_ = rep(0, n_features), + # sigma_ = diag(n_features), + # ncores_ = 1, + # isChol_ = TRUE, + # A_ = B, + # PACKAGE = "mvnfast" + # ) + + B <- matrix(rnorm(n_samples*n_features),nrow = n_samples, ncol = n_features) + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_S_star[idx_now,]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + + + +prepare_data_gaussian_new_v5_rnorm_cpp <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_with_wrap <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_with_wrap( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + + +prepare_data_gaussian_new_v5_rnorm_cpp_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_v2( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp to create the data table with the MC samples for all explicands and coalitions + dt <- as.data.table( + prepare_data_gaussian_cpp_fix_large_mat( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + ) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp to create the data table with the MC samples for all explicands and coalitions + dt <- as.data.table( + prepare_data_gaussian_cpp_fix_large_mat_v2( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + ) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # Here we first put the inner list together and then the whole thing. Maybe exist another faster way! + dt = as.data.table(do.call(rbind, lapply(result_list, function(inner_list) do.call(rbind, inner_list)))) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_cube <- prepare_data_gaussian_cpp_fix_cube( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # Reshape the 3D array to 2D + # This is slower + # dt = as.data.table(matrix(aperm(result_cube, c(1, 3, 2)), + # nrow = prod(dim(result_cube)[-2]), + # ncol = dim(result_cube)[2])) + dims = dim(result_cube) + result_cube = aperm(result_cube, c(1, 3, 2)) + dim(result_cube) <- c(prod(dims[-2]), dims[2]) + dt = as.data.table(result_cube) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + n_combinations_now <- length(index_features) + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + dt <- prepare_data_gaussian_cpp_fix_cube_v2( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # Reshape and convert to data.table + dim(dt) = c(n_combinations_now*n_explain*n_samples, n_features) + print(system.time({dt = as.data.table(dt)}, gcFirst = FALSE)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix_std_list <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_fix_std_list( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # FIND A BETTER WAY TO DO THIS + for (i in seq(length(result_list))) { + dim(result_list[[i]]) = c(n_samples, n_features) + } + + # Here we first put the inner list together and then the whole thing. Maybe exist another faster way! + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Here we only want to generate the data once. So we generate n_samples*n_batches from N(0, I), +# and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. +prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + n_combinations_in_this_batch <- nrow(S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples * n_combinations_in_this_batch, ncol = n_features) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples * n_combinations_in_this_batch, + mu_ = rep(0, n_features), + sigma_ = diag(n_features), + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Indices of the start for the combinations + B_indices <- n_samples * (seq(0, n_combinations_in_this_batch)) + 1 + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% t(sweep(x_S_star, 2, mu_S, FUN = "-")) + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Extract the relevant samples for this combination + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[B_indices[S_ind]:(B_indices[S_ind + 1] - 1), Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Compare the methods --------------------------------------------------------------------------------------------- + +## Setup ----------------------------------------------------------------------------------------------------------- + +{ + n_samples <- 1000 + # n_samples <- 25000 + n_train <- 1000 + n_test <- 100 + M <- 8 + rho <- 0.5 + betas <- c(0, rep(1, M)) + + # We use the Gaussian approach + approach <- "gaussian" + + # Mean of the multivariate Gaussian distribution + mu <- rep(0, times = M) + mu <- seq(M) + + # Create the covariance matrix + sigma <- matrix(rho, ncol = M, nrow = M) # Old + for (i in seq(1, M - 1)) { + for (j in seq(i + 1, M)) { + sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) + } + } + diag(sigma) <- 1 + + # Set seed for reproducibility + seed_setup <- 1996 + set.seed(seed_setup) + + # Make Gaussian data + data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) + data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) + colnames(data_train) <- paste("X", seq(M), sep = "") + colnames(data_test) <- paste("X", seq(M), sep = "") + + # Make the response + response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) + response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) + + # Put together the data + data_train_with_response <- copy(data_train)[, y := response_train] + data_test_with_response <- copy(data_test)[, y := response_test] + + # Fit a LM model + predictive_model <- lm(y ~ ., data = data_train_with_response) + + # Get the prediction zero, i.e., the phi0 Shapley value. + prediction_zero <- mean(response_train) + + model <- predictive_model + x_explain <- data_test + x_train <- data_train + keep_samp_for_vS <- FALSE + predict_model <- NULL + get_model_specs <- NULL + timing <- TRUE + n_combinations <- NULL + group <- NULL + feature_specs <- get_feature_specs(get_model_specs, model) + n_batches <- 1 + seed <- 1 + + internal <- setup( + x_train = x_train, + x_explain = x_explain, + approach = approach, + prediction_zero = prediction_zero, + n_combinations = n_combinations, + group = group, + n_samples = n_samples, + n_batches = n_batches, + seed = seed, + feature_specs = feature_specs, + keep_samp_for_vS = keep_samp_for_vS, + predict_model = predict_model, + get_model_specs = get_model_specs, + timing = timing, + gaussian.mu = mu, + gaussian.cov_mat = sigma + ) + + # Gets predict_model (if not passed to explain) + predict_model <- get_predict_model( + predict_model = predict_model, + model = model + ) + + # Sets up the Shapley (sampling) framework and prepares the + # conditional expectation computation for the chosen approach + # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters + internal <- setup_computation(internal, model, predict_model) +} + + + +## Compare time ---------------------------------------------------------------------------------------------------- + +# Recall that old version iterate over the observations and then the coalitions. +# While the new version iterate over the coalitions and then the observations. +# The latter lets us reuse the computed conditional distributions for all observations. +look_at_coalitions <- seq(1, 2^M - 2) +#look_at_coalitions <- seq(1, 2^M - 2, 10) +#look_at_coalitions <- seq(1, 2^M - 2, 25) +time_old <- system.time({ + res_old <- prepare_data_gaussian_old(internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_old <- NULL +# Set to NULL as it is many GB when we look at many combinations in one batch and the methods slow down due to +# little available memory. The same below. + +time_new_v1 <- system.time({ + res_new_v1 <- prepare_data_gaussian_new_v1( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v1 <- NULL + +time_new_v2 <- system.time({ + res_new_v2 <- prepare_data_gaussian_new_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v2 <- NULL + +time_new_v3 <- system.time({ + res_new_v3 <- prepare_data_gaussian_new_v3( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v3 <- NULL + +time_new_v4 <- system.time({ + res_new_v4 <- prepare_data_gaussian_new_v4( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v4 <- NULL + +time_new_v5 <- system.time({ + res_new_v5 <- prepare_data_gaussian_new_v5( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5 <- NULL + +time_new_v5_rnorm <- system.time({ + res_new_v5_rnorm <- prepare_data_gaussian_new_v5_rnorm( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm <- NULL + +time_new_v5_rnorm_v2 <- system.time({ + res_new_v5_rnorm_v2 <- prepare_data_gaussian_new_v5_rnorm_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_v2 <- NULL + +time_new_v5_rnorm_cpp <- system.time({ + res_new_v5_rnorm_cpp <- prepare_data_gaussian_new_v5_rnorm_cpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp <- NULL + +time_new_v5_rnorm_cpp_with_wrap <- system.time({ + res_new_v5_rnorm_cpp_with_wrap <- prepare_data_gaussian_new_v5_rnorm_cpp_with_wrap( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_with_wrap <- NULL + +time_new_v5_rnorm_cpp_v2 <- system.time({ + res_new_v5_rnorm_cpp_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_v2 <- NULL + +time_new_v5_rnorm_cpp_fix_large_mat <- system.time({ + res_new_v5_rnorm_cpp_fix_large_mat <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_large_mat <- NULL + +time_new_v5_rnorm_cpp_fix_large_mat_v2 <- system.time({ + res_new_v5_rnorm_cpp_fix_large_mat_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_large_mat_v2 <- NULL + +time_new_v5_rnorm_cpp_fix_cube <- system.time({ + res_new_v5_rnorm_cpp_fix_cube <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_cube <- NULL + +time_new_v5_rnorm_cpp_fix_cube_v2 <- system.time({ + res_new_v5_rnorm_cpp_fix_cube_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_cube_v2 <- NULL + +time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- system.time({ + res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- NULL + +time_new_v5_rnorm_cpp_fix_std_list <- system.time({ + res_new_v5_rnorm_cpp_fix_std_list <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_std_list( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_std_list <- NULL + +time_new_v6 <- system.time({ + res_new_v6 <- prepare_data_gaussian_new_v6( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v6 <- NULL + +# Create a table of the times. Less is better +times <- rbind(time_old, + time_new_v1, + time_new_v2, + time_new_v3, + time_new_v4, + time_new_v5, + time_new_v5_rnorm, + time_new_v5_rnorm_v2, + time_new_v5_rnorm_cpp, + time_new_v5_rnorm_cpp_with_wrap, + time_new_v5_rnorm_cpp_v2, + time_new_v5_rnorm_cpp_fix_large_mat, + time_new_v5_rnorm_cpp_fix_large_mat_v2, + time_new_v5_rnorm_cpp_fix_cube, + time_new_v5_rnorm_cpp_fix_cube_v2, + time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices, + time_new_v5_rnorm_cpp_fix_std_list, + time_new_v6) +times + +# Look at the relative time compared to the old method. Larger value is better. +# Tells us how many times faster the new version is. +times_relative <- t(sapply(seq_len(nrow(times)), function(idx) times[1, ] / times[idx, ])) +rownames(times_relative) <- paste0(rownames(times), "_rel") +times_relative + +# ALL COALITIONS (look_at_coalitions = seq(1, 2^M-2)) +# user.self sys.self elapsed user.child sys.child +# time_old 38.663 3.654 43.044 0.000 0.000 +# time_new_v1 14.693 3.539 18.709 0.000 0.000 +# time_new_v2 15.545 3.897 19.966 0.012 0.032 +# time_new_v3 13.476 3.838 17.812 0.000 0.000 +# time_new_v4 14.085 4.858 19.718 0.015 0.033 +# time_new_v5 13.508 4.104 18.148 0.000 0.000 +# time_new_v5_rnorm 13.107 4.178 17.705 0.000 0.000 +# time_new_v5_rnorm_v2 13.309 4.458 18.233 0.010 0.023 +# time_new_v5_rnorm_cpp 44.782 5.589 51.849 0.000 0.000 +# time_new_v5_rnorm_cpp_with_wrap 45.816 4.799 51.979 0.021 0.070 +# time_new_v5_rnorm_cpp_v2 44.997 6.513 52.931 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_large_mat 5.594 2.142 7.831 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_large_mat_v2 6.160 2.112 8.499 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_cube 5.607 2.745 8.558 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_cube_v2 4.621 2.121 6.862 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices 6.016 3.687 10.469 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_std_list 5.407 3.272 8.841 0.000 0.000 +# time_new_v6 13.540 4.267 18.361 0.000 0.000 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.00000 1.00000 1.00000 NaN NaN +# time_new_v1_rel 2.63139 1.03250 2.30071 NaN NaN +# time_new_v2_rel 2.48717 0.93764 2.15586 0 0 +# time_new_v3_rel 2.86903 0.95206 2.41657 NaN NaN +# time_new_v4_rel 2.74498 0.75216 2.18298 0 0 +# time_new_v5_rel 2.86223 0.89035 2.37183 NaN NaN +# time_new_v5_rnorm_rel 2.94980 0.87458 2.43118 NaN NaN +# time_new_v5_rnorm_v2_rel 2.90503 0.81965 2.36077 0 0 +# time_new_v5_rnorm_cpp_rel 0.86336 0.65378 0.83018 NaN NaN +# time_new_v5_rnorm_cpp_with_wrap_rel 0.84388 0.76141 0.82810 0 0 +# time_new_v5_rnorm_cpp_v2_rel 0.85924 0.56103 0.81321 NaN NaN +# time_new_v5_rnorm_cpp_fix_large_mat_rel 6.91151 1.70588 5.49662 NaN NaN +# time_new_v5_rnorm_cpp_fix_large_mat_v2_rel 6.27646 1.73011 5.06460 NaN NaN +# time_new_v5_rnorm_cpp_fix_cube_rel 6.89549 1.33115 5.02968 NaN NaN +# time_new_v5_rnorm_cpp_fix_cube_v2_rel 8.36680 1.72277 6.27281 NaN NaN +# time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices_rel 6.42670 0.99105 4.11157 NaN NaN +# time_new_v5_rnorm_cpp_fix_std_list_rel 7.15055 1.11675 4.86868 NaN NaN +# time_new_v6_rel 2.85547 0.85634 2.34432 NaN NaN + + +# 26 coalitions (look_at_coalitions = seq(1, 2^M-2, 10)) +# user.self sys.self elapsed user.child sys.child +# time_old 25.913 2.797 30.399 0 0 +# time_new_v1 7.071 1.624 8.997 0 0 +# time_new_v2 6.653 1.461 8.244 0 0 +# time_new_v3 5.700 1.690 7.521 0 0 +# time_new_v4 5.877 1.826 7.852 0 0 +# time_new_v5 5.522 1.594 7.286 0 0 +# time_new_v6 5.559 1.668 7.335 0 0 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.0000 1.0000 1.0000 NaN NaN +# time_new_v1_rel 3.6647 1.7223 3.3788 NaN NaN +# time_new_v2_rel 3.8949 1.9144 3.6874 NaN NaN +# time_new_v3_rel 4.5461 1.6550 4.0419 NaN NaN +# time_new_v4_rel 4.4092 1.5318 3.8715 NaN NaN +# time_new_v5_rel 4.6927 1.7547 4.1722 NaN NaN +# time_new_v6_rel 4.6614 1.6769 4.1444 NaN NaN + + +# 11 coalitions (look_at_coalitions = seq(1, 2^M-2, 25)) +# user.self sys.self elapsed user.child sys.child +# time_old 11.251 1.187 12.961 0.000 0.000 +# time_new_v1 3.273 0.873 4.306 0.000 0.000 +# time_new_v2 3.043 0.690 4.011 0.000 0.000 +# time_new_v3 2.677 0.794 3.587 0.000 0.000 +# time_new_v4 2.598 0.759 3.460 0.000 0.000 +# time_new_v5 2.574 0.752 3.613 0.000 0.000 +# time_new_v6 2.303 0.669 3.009 0.000 0.000 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.0000 1.0000 1.0000 NaN NaN +# time_new_v1_rel 3.4375 1.3597 3.0100 NaN NaN +# time_new_v2_rel 3.6973 1.7203 3.2314 NaN NaN +# time_new_v3_rel 4.2028 1.4950 3.6133 NaN NaN +# time_new_v4_rel 4.3306 1.5639 3.7460 NaN NaN +# time_new_v5_rel 4.3710 1.5785 3.5873 NaN NaN +# time_new_v6_rel 4.8854 1.7743 4.3074 NaN NaN + + +## Compare mean ---------------------------------------------------------------------------------------------------- +look_at_coalition <- 25 +one_coalition_time_old <- system.time({ + one_coalition_res_old <- prepare_data_gaussian_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +one_coalition_time_old2 <- system.time({ + one_coalition_res_old2 <- prepare_data_gaussian_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v1 <- system.time({ + one_coalition_res_new_v1 <- prepare_data_gaussian_new_v1( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v2 <- system.time({ + one_coalition_res_new_v2 <- prepare_data_gaussian_new_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v3 <- system.time({ + one_coalition_res_new_v3 <- prepare_data_gaussian_new_v3( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v4 <- system.time({ + one_coalition_res_new_v4 <- prepare_data_gaussian_new_v4( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v5 <- system.time({ + one_coalition_res_new_v5 <- prepare_data_gaussian_new_v5( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm <- system.time({ + one_coalition_res_new_v5_rnorm <- prepare_data_gaussian_new_v5_rnorm( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_v2 <- prepare_data_gaussian_new_v5_rnorm_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp <- system.time({ + one_coalition_res_new_v5_rnorm_cpp <- prepare_data_gaussian_new_v5_rnorm_cpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_with_wrap <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_with_wrap <- prepare_data_gaussian_new_v5_rnorm_cpp_with_wrap( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_large_mat <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_large_mat <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_large_mat_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_large_mat_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_cube <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_cube <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_cube_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_cube_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_std_list <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_std_list <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_std_list( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v6 <- system.time({ + one_coalition_res_new_v6 <- prepare_data_gaussian_new_v6( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +rbind(one_coalition_time_old, + one_coalition_time_old2, + one_coalition_time_new_v1, + one_coalition_time_new_v2, + one_coalition_time_new_v3, + one_coalition_time_new_v4, + one_coalition_time_new_v5, + one_coalition_time_new_v5_rnorm, + one_coalition_time_new_v5_rnorm_v2, + one_coalition_time_new_v5_rnorm_cpp, + one_coalition_time_new_v5_rnorm_cpp_with_wrap, + one_coalition_time_new_v5_rnorm_cpp_v2, + one_coalition_time_new_v5_rnorm_cpp_fix_large_mat, + one_coalition_time_new_v5_rnorm_cpp_fix_large_mat_v2, + one_coalition_time_new_v5_rnorm_cpp_fix_cube, + one_coalition_time_new_v5_rnorm_cpp_fix_cube_v2, + one_coalition_time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices, + one_coalition_time_new_v5_rnorm_cpp_fix_std_list, + one_coalition_time_new_v6) + +internal$objects$S[internal$objects$S_batch$`1`[look_at_coalition], , drop = FALSE] +means_old <- one_coalition_res_old[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_old2 <- one_coalition_res_old2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v1 <- one_coalition_res_new_v1[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v2 <- one_coalition_res_new_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v3 <- one_coalition_res_new_v3[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v4 <- one_coalition_res_new_v4[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5 <- one_coalition_res_new_v5[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm <- one_coalition_res_new_v5_rnorm[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_v2 <- one_coalition_res_new_v5_rnorm_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp <- one_coalition_res_new_v5_rnorm_cpp[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_with_wrap <- one_coalition_res_new_v5_rnorm_cpp_with_wrap[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_v2 <- one_coalition_res_new_v5_rnorm_cpp_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_large_mat <- one_coalition_res_new_v5_rnorm_cpp_fix_large_mat[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_large_mat_v2 <- one_coalition_res_new_v5_rnorm_cpp_fix_large_mat_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_cube <- one_coalition_res_new_v5_rnorm_cpp_fix_cube[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_cube_v2 <- one_coalition_res_new_v5_rnorm_cpp_fix_cube_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- one_coalition_res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_std_list <- one_coalition_res_new_v5_rnorm_cpp_fix_std_list[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v6 <- one_coalition_res_new_v6[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] + +# They are all in the same ballpark, so the differences are due to sampling. +# This is supported by the fact that mean_old and mean_old2 use the same old code, and the difference there is the +# same as for the new methods. +# A larger n_samples makes these closer to 0 (I have done that and for other means too) +max(abs(means_old - means_old2)) +max(abs(means_old - means_v1)) +max(abs(means_old - means_v2)) +max(abs(means_old - means_v3)) +max(abs(means_old - means_v4)) +max(abs(means_old - means_v5)) +max(abs(means_old - means_v5_rnorm)) +max(abs(means_old - means_v5_rnorm_v2)) +max(abs(means_old - means_v5_rnorm_cpp)) +max(abs(means_old - means_v5_rnorm_cpp_with_wrap)) +max(abs(means_old - means_v5_rnorm_cpp_v2)) +max(abs(means_old - means_v5_rnorm_cpp_fix_large_mat)) +max(abs(means_old - means_v5_rnorm_cpp_fix_large_mat_v2)) +max(abs(means_old - means_v5_rnorm_cpp_fix_cube)) +max(abs(means_old - means_v5_rnorm_cpp_fix_cube_v2)) +max(abs(means_old - means_v5_rnorm_cpp_fix_list_of_lists_of_matrices)) +max(abs(means_old - means_v5_rnorm_cpp_fix_std_list)) +max(abs(means_old - means_v6)) + + + diff --git a/man/gaussian_transform_separate.Rd b/man/gaussian_transform_separate.Rd index eef1e0c6a..89afb6494 100644 --- a/man/gaussian_transform_separate.Rd +++ b/man/gaussian_transform_separate.Rd @@ -10,7 +10,7 @@ gaussian_transform_separate(yx, n_y) \item{yx}{Numeric vector. The first \code{n_y} items is the data that is transformed, and last part is the data with the original transformation.} -\item{n_y}{Positive integer. Number of elements of \code{yx} that belongs to the gaussian data.} +\item{n_y}{Positive integer. Number of elements of \code{yx} that belongs to the Gaussian data.} } \value{ Vector of back-transformed Gaussian data diff --git a/man/inv_gaussian_transform.Rd b/man/inv_gaussian_transform.Rd deleted file mode 100644 index 76f058772..000000000 --- a/man/inv_gaussian_transform.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{inv_gaussian_transform} -\alias{inv_gaussian_transform} -\title{Transforms new data to a standardized normal distribution} -\usage{ -inv_gaussian_transform(zx, n_z) -} -\arguments{ -\item{zx}{Numeric vector. The first \code{n_z} items are the Gaussian data, and the last part is -the data with the original transformation.} - -\item{n_z}{Positive integer. Number of elements of \code{zx} that belongs to new data.} -} -\value{ -Numeric vector of length \code{n_z} -} -\description{ -Transforms new data to a standardized normal distribution -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/inv_gaussian_transform_cpp.Rd b/man/inv_gaussian_transform_cpp.Rd new file mode 100644 index 000000000..7cf04833c --- /dev/null +++ b/man/inv_gaussian_transform_cpp.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{inv_gaussian_transform_cpp} +\alias{inv_gaussian_transform_cpp} +\title{Transforms new data to a standardized normal distribution} +\usage{ +inv_gaussian_transform_cpp(z, x) +} +\arguments{ +\item{z}{arma::mat. The data are the Gaussian Monte Carlos samples to transform.} + +\item{x}{arma::mat. The data with the original transformation. Used to conduct the transformation of \code{z}.} +} +\value{ +arma::mat of the same dimension as \code{z} +} +\description{ +Transforms new data to a standardized normal distribution +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 097fef9b8..23e57b18d 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -17,13 +17,13 @@ prepare_data(internal, index_features = NULL, ...) \method{prepare_data}{categorical}(internal, index_features = NULL, ...) -\method{prepare_data}{copula}(internal, index_features = NULL, ...) +\method{prepare_data}{copula}(internal, index_features, ...) \method{prepare_data}{ctree}(internal, index_features = NULL, ...) \method{prepare_data}{empirical}(internal, index_features = NULL, ...) -\method{prepare_data}{gaussian}(internal, index_features = NULL, ...) +\method{prepare_data}{gaussian}(internal, index_features, ...) \method{prepare_data}{independence}(internal, index_features = NULL, ...) @@ -44,4 +44,7 @@ the contribution function by Monte Carlo integration. \description{ Generate data used for predictions and Monte Carlo integration } +\author{ +Lars Henry Berge Olsen +} \keyword{internal} diff --git a/man/prepare_data_copula_cpp.Rd b/man/prepare_data_copula_cpp.Rd new file mode 100644 index 000000000..ca901031d --- /dev/null +++ b/man/prepare_data_copula_cpp.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{prepare_data_copula_cpp} +\alias{prepare_data_copula_cpp} +\title{Generate (Gaussian) Copula MC samples} +\usage{ +prepare_data_copula_cpp( + MC_samples_mat, + x_explain_mat, + x_explain_gaussian_mat, + x_train_mat, + S, + mu, + cov_mat +) +} +\arguments{ +\item{MC_samples_mat}{arma::mat. Matrix of dimension (\code{n_samples}, \code{n_features}) containing samples from the +univariate standard normal.} + +\item{x_explain_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the observations +to explain on the original scale.} + +\item{x_explain_gaussian_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the +observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +transformed to a standardized normal distribution.} + +\item{x_train_mat}{arma::mat. Matrix of dimension (\code{n_train}, \code{n_features}) containing the training observations.} + +\item{S}{arma::mat. Matrix of dimension (\code{n_combinations}, \code{n_features}) containing binary representations of +the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +This is not a problem internally in shapr as the empty and grand coalitions treated differently.} + +\item{mu}{arma::vec. Vector of length \code{n_features} containing the mean of each feature after being transformed +using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution.} + +\item{cov_mat}{arma::mat. Matrix of dimension (\code{n_features}, \code{n_features}) containing the pairwise covariance +between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +transformed to a standardized normal distribution.} +} +\value{ +An arma::cube/3D array of dimension (\code{n_samples}, \code{n_explain} * \code{n_coalitions}, \code{n_features}), where +the columns (\emph{,j,}) are matrices of dimension (\code{n_samples}, \code{n_features}) containing the conditional Gaussian +copula MC samples for each explicand and coalition on the original scale. +} +\description{ +Generate (Gaussian) Copula MC samples +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/prepare_data_gaussian_cpp.Rd b/man/prepare_data_gaussian_cpp.Rd new file mode 100644 index 000000000..b24b431e6 --- /dev/null +++ b/man/prepare_data_gaussian_cpp.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{prepare_data_gaussian_cpp} +\alias{prepare_data_gaussian_cpp} +\title{Generate Gaussian MC samples} +\usage{ +prepare_data_gaussian_cpp(MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} +\arguments{ +\item{MC_samples_mat}{arma::mat. Matrix of dimension (\code{n_samples}, \code{n_features}) containing samples from the +univariate standard normal.} + +\item{x_explain_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the observations +to explain.} + +\item{S}{arma::mat. Matrix of dimension (\code{n_combinations}, \code{n_features}) containing binary representations of +the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +This is not a problem internally in shapr as the empty and grand coalitions treated differently.} + +\item{mu}{arma::vec. Vector of length \code{n_features} containing the mean of each feature.} + +\item{cov_mat}{arma::mat. Matrix of dimension (\code{n_features}, \code{n_features}) containing the pairwise covariance +between all pairs of features.} +} +\value{ +An arma::cube/3D array of dimension (\code{n_samples}, \code{n_explain} * \code{n_coalitions}, \code{n_features}), where +the columns (\emph{,j,}) are matrices of dimension (\code{n_samples}, \code{n_features}) containing the conditional Gaussian +MC samples for each explicand and coalition. +} +\description{ +Generate Gaussian MC samples +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/quantile_type7_cpp.Rd b/man/quantile_type7_cpp.Rd new file mode 100644 index 000000000..69e3100a9 --- /dev/null +++ b/man/quantile_type7_cpp.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{quantile_type7_cpp} +\alias{quantile_type7_cpp} +\title{Compute the quantiles using quantile type seven} +\usage{ +quantile_type7_cpp(x, probs) +} +\arguments{ +\item{x}{arma::vec. Numeric vector whose sample quantiles are wanted.} + +\item{probs}{arma::vec. Numeric vector of probabilities with values between zero and one.} +} +\value{ +A vector of length \code{length(probs)} with the quantiles is returned. +} +\description{ +Compute the quantiles using quantile type seven +} +\details{ +Using quantile type number seven from stats::quantile in R. +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/sample_copula.Rd b/man/sample_copula.Rd deleted file mode 100644 index c180f25ca..000000000 --- a/man/sample_copula.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{sample_copula} -\alias{sample_copula} -\title{Sample conditional variables using the Gaussian copula approach} -\usage{ -sample_copula( - index_given, - n_samples, - mu, - cov_mat, - m, - x_explain_gaussian, - x_train, - x_explain -) -} -\arguments{ -\item{index_given}{Integer vector. The indices of the features to condition upon. Note that -\code{min(index_given) >= 1} and \code{max(index_given) <= m}.} - -\item{m}{Positive integer. The total number of features.} - -\item{x_explain_gaussian}{Numeric matrix. Contains the observation whose predictions ought -to be explained (test data), -after quantile-transforming them to standard Gaussian variables.} - -\item{x_explain}{Numeric matrix. Contains the features of the observation whose -predictions ought to be explained (test data).} -} -\value{ -data.table -} -\description{ -Sample conditional variables using the Gaussian copula approach -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/sample_gaussian.Rd b/man/sample_gaussian.Rd deleted file mode 100644 index f91312e85..000000000 --- a/man/sample_gaussian.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_gaussian.R -\name{sample_gaussian} -\alias{sample_gaussian} -\title{Sample conditional Gaussian variables} -\usage{ -sample_gaussian(index_given, n_samples, mu, cov_mat, m, x_explain) -} -\arguments{ -\item{index_given}{Integer vector. The indices of the features to condition upon. Note that -\code{min(index_given) >= 1} and \code{max(index_given) <= m}.} - -\item{m}{Positive integer. The total number of features.} - -\item{x_explain}{Numeric matrix. Contains the features of the observation whose -predictions ought to be explained (test data).} -} -\value{ -data.table -} -\description{ -Sample conditional Gaussian variables -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/setup_approach.Rd b/man/setup_approach.Rd index 86cb164bb..a87fbd106 100644 --- a/man/setup_approach.Rd +++ b/man/setup_approach.Rd @@ -153,3 +153,6 @@ This is useful if the underlying time series are scaled between 0 and 1, for exa The different choices of \code{approach} takes different (optional) parameters, which are forwarded from \code{\link[=explain]{explain()}}. } +\author{ +Martin Jullum +} diff --git a/python/install_r_packages.R b/python/install_r_packages.R index 0c8768d91..598886243 100644 --- a/python/install_r_packages.R +++ b/python/install_r_packages.R @@ -1,3 +1,4 @@ # Installs the required R-packages -install.packages("remotes",repos="https://cloud.r-project.org") -remotes::install_github("NorskRegnesentral/shapr") # Installs the development version of shapr from the master branch on CRAN +install.packages("remotes", repos = "https://cloud.r-project.org") +remotes::install_github("NorskRegnesentral/shapr") +# Installs the development version of shapr from the master branch on CRAN diff --git a/src/Copula.cpp b/src/Copula.cpp new file mode 100644 index 000000000..732ed3a4f --- /dev/null +++ b/src/Copula.cpp @@ -0,0 +1,169 @@ +#include +using namespace Rcpp; + +// [[Rcpp::depends(RcppArmadillo)]] + +//' Compute the quantiles using quantile type seven +//' +//' @details Using quantile type number seven from stats::quantile in R. +//' +//' @param x arma::vec. Numeric vector whose sample quantiles are wanted. +//' @param probs arma::vec. Numeric vector of probabilities with values between zero and one. +//' +//' @return A vector of length `length(probs)` with the quantiles is returned. +//' +//' @keywords internal +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::vec quantile_type7_cpp(const arma::vec& x, const arma::vec& probs) { + int n = x.n_elem; + int m = probs.n_elem; + + // Initialize output quantile vector + arma::vec qs(m); + + // Calculate indices + arma::vec index = 1 + (n - 1) * probs; + arma::vec lo = arma::floor(index); + arma::vec hi = arma::ceil(index); + + // Sort the data + arma::vec sorted_x = arma::sort(x); + + // Calculate quantiles using quantile type seven + for (int i = 0; i < m; ++i) { + qs(i) = sorted_x(lo(i) - 1); + if (index(i) > lo(i)) { + double h = index(i) - lo(i); + qs(i) = (1 - h) * qs(i) + h * sorted_x(hi(i) - 1); + } + } + + return qs; +} + +//' Transforms new data to a standardized normal distribution +//' +//' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +//' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +//' +//' @return arma::mat of the same dimension as `z` +//' +//' @keywords internal +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp(const arma::mat& z, const arma::mat& x) { + int n_features = z.n_cols; + int n_samples = z.n_rows; + arma::mat z_new(n_samples, n_features); + arma::mat u = arma::normcdf(z); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + z_new.col(feature_idx) = quantile_type7_cpp(x.col(feature_idx), u.col(feature_idx)); + } + return z_new; +} + +//' Generate (Gaussian) Copula MC samples +//' +//' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +//' univariate standard normal. +//' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +//' to explain on the original scale. +//' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +//' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +//' transformed to a standardized normal distribution. +//' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +//' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +//' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +//' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +//' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +//' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +//' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +//' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +//' transformed to a standardized normal distribution. +//' +//' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +//' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +//' copula MC samples for each explicand and coalition on the original scale. +//' +//' @export +//' @keywords internal +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_copula_cpp(const arma::mat& MC_samples_mat, + const arma::mat& x_explain_mat, + const arma::mat& x_explain_gaussian_mat, + const arma::mat& x_train_mat, + const arma::mat& S, + const arma::vec& mu, + const arma::mat& cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp new file mode 100644 index 000000000..c375ed510 --- /dev/null +++ b/src/Gaussian.cpp @@ -0,0 +1,88 @@ +#include +using namespace Rcpp; + +// [[Rcpp::depends(RcppArmadillo)]] + +//' Generate Gaussian MC samples +//' +//' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +//' univariate standard normal. +//' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +//' to explain. +//' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +//' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +//' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +//' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature. +//' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +//' between all pairs of features. +//' +//' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +//' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +//' MC samples for each explicand and coalition. +//' +//' @export +//' @keywords internal +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_gaussian_cpp(const arma::mat& MC_samples_mat, + const arma::mat& x_explain_mat, + const arma::mat& S, + const arma::vec& mu, + const arma::mat& cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values of the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 8face37f8..c95d55541 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -80,6 +80,62 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// quantile_type7_cpp +arma::vec quantile_type7_cpp(const arma::vec& x, const arma::vec& probs); +RcppExport SEXP _shapr_quantile_type7_cpp(SEXP xSEXP, SEXP probsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type probs(probsSEXP); + rcpp_result_gen = Rcpp::wrap(quantile_type7_cpp(x, probs)); + return rcpp_result_gen; +END_RCPP +} +// inv_gaussian_transform_cpp +arma::mat inv_gaussian_transform_cpp(const arma::mat& z, const arma::mat& x); +RcppExport SEXP _shapr_inv_gaussian_transform_cpp(SEXP zSEXP, SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type z(zSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(inv_gaussian_transform_cpp(z, x)); + return rcpp_result_gen; +END_RCPP +} +// prepare_data_copula_cpp +arma::cube prepare_data_copula_cpp(const arma::mat& MC_samples_mat, const arma::mat& x_explain_mat, const arma::mat& x_explain_gaussian_mat, const arma::mat& x_train_mat, const arma::mat& S, const arma::vec& mu, const arma::mat& cov_mat); +RcppExport SEXP _shapr_prepare_data_copula_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP x_explain_gaussian_matSEXP, SEXP x_train_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_explain_gaussian_mat(x_explain_gaussian_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_train_mat(x_train_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type mu(muSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_copula_cpp(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} +// prepare_data_gaussian_cpp +arma::cube prepare_data_gaussian_cpp(const arma::mat& MC_samples_mat, const arma::mat& x_explain_mat, const arma::mat& S, const arma::vec& mu, const arma::mat& cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type mu(muSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} // mahalanobis_distance_cpp arma::cube mahalanobis_distance_cpp(Rcpp::List featureList, arma::mat Xtrain_mat, arma::mat Xtest_mat, arma::mat mcov, bool S_scale_dist); RcppExport SEXP _shapr_mahalanobis_distance_cpp(SEXP featureListSEXP, SEXP Xtrain_matSEXP, SEXP Xtest_matSEXP, SEXP mcovSEXP, SEXP S_scale_distSEXP) { @@ -155,6 +211,10 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_correction_matrix_cpp", (DL_FUNC) &_shapr_correction_matrix_cpp, 2}, {"_shapr_aicc_full_single_cpp", (DL_FUNC) &_shapr_aicc_full_single_cpp, 5}, {"_shapr_aicc_full_cpp", (DL_FUNC) &_shapr_aicc_full_cpp, 6}, + {"_shapr_quantile_type7_cpp", (DL_FUNC) &_shapr_quantile_type7_cpp, 2}, + {"_shapr_inv_gaussian_transform_cpp", (DL_FUNC) &_shapr_inv_gaussian_transform_cpp, 2}, + {"_shapr_prepare_data_copula_cpp", (DL_FUNC) &_shapr_prepare_data_copula_cpp, 7}, + {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, {"_shapr_observation_impute_cpp", (DL_FUNC) &_shapr_observation_impute_cpp, 5}, diff --git a/tests/testthat/_snaps/output.md b/tests/testthat/_snaps/output.md index 9a84b0089..bd2dd91fe 100644 --- a/tests/testthat/_snaps/output.md +++ b/tests/testthat/_snaps/output.md @@ -81,20 +81,20 @@ Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -8.545 7.779 14.586 0.4475 -1.6653 - 2: 42.44 4.826 -4.295 -11.655 -1.1250 -1.6309 - 3: 42.44 7.163 -25.491 0.368 -0.5455 0.9377 + none Solar.R Wind Temp Month Day + 1: 42.44 -8.117 7.438 14.0026 0.8602 -1.5813 + 2: 42.44 5.278 -5.219 -12.1079 -0.8073 -1.0235 + 3: 42.44 7.867 -25.995 -0.1377 -0.2368 0.9342 # output_lm_numeric_copula Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -6.371 7.355 14.470 -0.6108 -2.241 - 2: 42.44 4.115 -4.159 -9.980 -1.9378 -1.917 - 3: 42.44 5.932 -25.086 1.857 -1.3624 1.090 + none Solar.R Wind Temp Month Day + 1: 42.44 -5.960 7.046 13.863 -0.274 -2.074 + 2: 42.44 4.482 -4.892 -10.491 -1.659 -1.319 + 3: 42.44 6.587 -25.533 1.279 -1.043 1.142 # output_lm_numeric_ctree @@ -150,20 +150,20 @@ Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -8.809 9.149 15.503 -2.8888 -0.3522 - 2: 42.44 3.146 -4.566 -7.727 -4.3771 -0.3559 - 3: 42.44 6.655 -22.559 -1.645 0.5634 -0.5832 + none Solar.R Wind Temp Month Day + 1: 42.44 -8.746 9.03 15.366 -2.619 -0.4293 + 2: 42.44 3.126 -4.50 -7.789 -4.401 -0.3161 + 3: 42.44 7.037 -22.86 -1.837 0.607 -0.5181 # output_lm_numeric_comb2 Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -9.302 9.454 17.2106 -1.767 -2.9936 - 2: 42.44 5.189 -5.352 -8.5382 -2.854 -2.3245 - 3: 42.44 6.388 -22.748 0.0177 -1.441 0.2159 + none Solar.R Wind Temp Month Day + 1: 42.44 -9.294 9.327 17.31641 -1.754 -2.9935 + 2: 42.44 5.194 -5.506 -8.45049 -2.935 -2.1810 + 3: 42.44 6.452 -22.967 -0.09553 -1.310 0.3519 # output_lm_numeric_comb3 @@ -171,9 +171,9 @@ (out <- code) Output none Solar.R Wind Temp Month Day - 1: 42.44 -6.940 10.773 12.187 -3.692 0.27495 - 2: 42.44 2.628 -2.656 -8.569 -5.313 0.03032 - 3: 42.44 5.827 -22.183 3.440 -2.954 -1.69839 + 1: 42.44 -6.952 10.777 12.160 -3.641 0.25767 + 2: 42.44 2.538 -2.586 -8.503 -5.376 0.04789 + 3: 42.44 5.803 -22.122 3.362 -2.926 -1.68514 # output_lm_mixed_independence diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds index d0dbda0e2eb675aa8449c1a361506a9df5f0d348..87eb7463002939b1ea7743de5f30dfb55d05392b 100644 GIT binary patch literal 5110 zcmVJYok_tR_r9Q>UF zt0P{DjjRu9+W*k3lk@8PpD3C>q47sYeZtm$8y`Kezdov~^^2n&eC)S%%Lnw^ea0bQ zc}>-)lj>M~P~DK&oWq&?IV1XL+cs}jZ$B|JN4f0s2K&)L>CflgT4_I1J0~;6b2K(Z zj_YOEB0}q7(w6D1IH9J-3580N*`$ZGD&K6>S`zYD)i{Hxh@~IVmz6U)*=Cci6f%lo z+L@Aeye;M#39K!D4D#9dzow?9+fs#eMy-`8B!@zuBCSoEVAD=D>Rn2TEm)#NKv06g zWYb$rT4RD$Z!EUxi*aE)fI9%KZ!Hlkhmk^YEHtA8GOrSCh1F`%nwUqhID^HY(;6L? z+bnvfh#ziPU#`dhnRI&SQyCd!OYlE%p$tPb7$np`i#YfRc{S&f=dU>OKEN+cOX<`i z&w857mNKla(~XQ%)`CQtNK+3{S|Lgsp%sq@?*g$3OuSN(798X76U}>|5;b)!>T-a1 zxDn->OGka#<_7BU*Vv2RkENmB>X%6hcP*h0NA>zI4_%$k*Jl=A-EqG6O*CEB{ln_a zhWeP`j0=6AudW|$e4=K<%_WZd?m79pd&$4APxMpzOxyUF{coQ6>x)%U{MC-1r_b9M z@8E}eG%jkOb0hb=@a6oDlTAxrShh2uFC)J?uBC~TAnSSM=& zbz<32*Or%C%vv35DzMz5FEZ$C2D3?1t+$w=IKZUQnaidcOj=T7WlEM<%oR*yZeh2Bk7g%wX{x1He2mfVE03g%o#k@S#7mxEjDpokh9KJ_cnCnkbt{NXEETd zkHOLB0gf7LxzWJr10AJPD|FNJtk_Q|7L?|k8yY!mec1v|7VI3-zZky8a1zfQyr9cM zaT@RDII@L}A+EqM2}31@Z5S?Lnn6I@2IntgsKRMq3?VpOjxm{?A7MC(VK;{BxQw** zBF3FD{tDy17=Ht@dt%7OkPq|wNoapNh7U1q4>FH2tiZ4y!y>?I$neKt=Y)+KnfqG> z9l2we5A(>58S#0FUxjz&dU8pib)lU>Fc3{PQ56#T=Dz~uum zPQ^G#@U1Ku{E!DS$&wlZa>w90wctY;0-sz0AolAp)M9ABa1aCe?AQqQ?1fDb(Ypqx zBf&RD)JKSoC_-4hkR0+cknaNdNXRqXYajXTM?E1Q3;BmQ`a{ux+W>#}zVz3Oj^)Fl zzAKcodzwU_yY@w0lCXZ1|qPJp{e=$Rj4 zBz}yL*wItco*MLG_OW&tKci=MvG&zK&-$seL_RBL?Y4tF)29dEq;OaH3`^;vCI0N% zLOx#74@@s+kF-Bnf3o`ClJ=OrO#dDdx)=%e1$yR3ss2*T_&g>4WcAE{((wfHIcIz% zb3F?K-ciw5IT8c@^rmqN&uuP~qcMSd1k#TZaZpd_hGCcjd?d~l0zIL{afy2v%5#8^ zw3P;Y1W&*)6hjIp#FsI^&(gy|Hyn?+!5DHyx{b#$5%@;|4JjMP39+^i^dk8Juz~Q6 z1vwsBkLMjGrR#2}O4_4;Mx!TTj@fNkkY+(IH*3q^Y zB7u+W?V~Vs#DMn~^z0`9_{i6b66i_Z59$bQu*i-;3}me)i)k3}k$N@fJizD(G+sax z0(CNIQw90Np}ESRM3_`l=%T&80S1xk%NwrK+_R)V?GN8J~ikV zDbi65J_rO^QBY6lBA|VuM>s%xkSPbdLO_1F$bNqjT^q1l4YYosV~9{kzPIF#uguvT zJqfH8$`$N8>^=_vluexbGrkA^v~>A2Y@IJvm)$h+k_N-K!p;~Df7uEs*+GeTNc1Jq zIa^f5dfrP^#%6Qd7QBA@|BKgML^3N?RW9GC-c^ooSm*bT2Vh03sUvq0W2z{_-)y=E z%KjD(W5zqWRs_Fw|0viAgi$+HZ)D%|Dtb5I`11EJ*6M6#i^iz0Bs9*s#a4!BPKcIr z=shLpx_Yn`hD27fO85aEnS+6xGjft>M&yPF^N5aj60J!zWzmsDO%mZq6eW>33fd0_ zOyqK$^DJ6?G%n}dAB~I8u35a0OL*iGKDmTf?uK73p_fbO@q+TJZS8}fLOeSeV zT&8qWCgoMQp;Ne_leR0>Ra_>elkzGgWlFC3*l}0xj^3Rdck-p<(jTz9G0Vm?$2rH3 z807Z|ihBg58xZTk#*`F%S{dB6LE0vBG&(h5?rh=^ey^}z-W9(WM1TLb_#W+Gc5_0E zBK_fw*YYNIaxJ-Pe9(NetIB`d0+&0~e@Vvm;9h3(xi;#wJd zwK7)U8+Kk$yX+cN?AY5-B)$${qgrxd(fkTSmcs;COSR=j{Y zncrYv{V%l7`3S&jw%GJVoa?2kR3l*nNycbBf4Qk?!s^fN*m3MCnfXCJ#VbADp!kX+ znd1E`vk5Ns`)OHlSAGVUzvsWy=~Kt+`3@y7>zzp6jQgc&BzOr8gIh z2D+aqU*AKAX#Svv`ASNkwBkpQw+QIoqI|vLCeVB`noe=$i`!i2H*3dJ zyz2ZcLGFutDBs|p&(nO-fjdlpnqT7kcR@~VG{vdNK=5(pfhNkAUU@EMku}{e3BJ7@KQf6%-T}xO*J1R|Pr8Cheek&p!{K^zW#$X!+??Cn)`e6ua`#B@?B4~rFd)I0b2g$+&3woa@$i(KFxDqKTGNA z7apW^zeK%E^QRt~O7nLPEo1(n`K#uSY5jp;>uCOmbyR*>l=o~}ez?sKF7hK5O{3WN z+wQb}uc^J@=bx@o{);{;7kdBj*J$}|+Z3T6zB(tAr(LA^b6?DIp%3zF6#Q872IV{T z+j7^-{agUx#_<1LOC#+!*3##@wK7;mGPd^?P19>y~d{T2zBlp0 ze5dmR&-ZD(FyC+6o%3DoyW>{{5AlQ-;>lyY5Klg_Lp7oqMgnSG`-C{(kH4*3SLb->v+9`{7>oe^9@)^lL3Yv{t^g?7rXm z+S-12Q2Gb8+x0ufB}PVbj>A{>teDoZ8!YL0W;E23Gn0Eg+hp|jddA7lr`()%x_pPR zC(p7(mgvJ0eOV%jYxyAA@l?!OuGcv}UJ6-a980j5ajJX`e5y2iw8WnLMmPHemVR=E z01g8T4+R~U|D6HH0FDF99s|S!?g=;naBskgfcpUM3m6_KqJDt;15O4!0PsM-g8&Z( zJOuDiz$t)vz|3Iq1A9Y`_?4@zxgV9F(PgEv%hz>Bxv7VC{@1kLD06lDr>0%kp^Rt0 zdm@9&MZ@3n)+DSqp%Jp+dfB-O)UVLec2(p4R5WTwQ1^L#P3Ym=d2Iq0$D*u=NyB>h*-_Rv zGkbik-it=vSUcTwPXx-@m^bOY%uXo%*21OjQi{;S^G82jIHCi}4e7aPY4UQEwfB$u z^5fMgyJAM)yum-wib>>*&hdWU%6=! z8a3}~SflBeW`4;0xcfO1`jr*1l)4AJT2_Z0zWRhSZg% zbpEUt8XAAT#u}_ zqSTAGu3aB-5RIt0^Y<&gH=`j>SH~_pIUVu3j2?R%UPmeKMBgcBID-c4DtpV{a0;b7 zd285_f@e@_K$v>#CodrMweCtm=@!I)wg2(?7s}9}5haN-pJ8aobE^Y(@2zX*hpY{& zR@J>ZA9cC8q~^1#3usuDsL8Ny1Hgta<&0w+`i@Y zr(HNzxDEaBk3}rzG z8mxV_*7(LHZ1=v>(x1ao#^zOZwZVC)U#BDM!_1y&z?tm!KcaRmgsv8jSjsh}T3ezf zQIhW?$@l#o4~1G)W5XVbfb2Po{~7*aBdl`Ukzv`EFTw7|l7qFcfV1v*F)D_bcw-ZH^^J0ZWdY zV{gria%8=3l!Z9*>F)-_ui&a%@Qji?$7Tl6r`gt`7RFK2Z!nJjxf+hURd(+vz2_6W zteexOFrAYrhi5}nw4F0cJ4|!Dm)T+_%BnpqbXb39(!qM z6w;4QoLCS!7wJZ4rgm9Shzf7Lnv?#{HPrj~TJ7KF-9+dW-!>`oER=luhq;^)L1*dj?l7l?sR7nGm z#Gnx$9x;s@qDH~*2G%4k=z;pbJaC`?cdw&Kp(T}%uQi+GlgFa?vAb@*J}cB= z_rxs~Tb^iZM!D&;+n4P1M%mL|Jv3)s7@D#(HZ#%pO&s-qdSP?O=ZL>^GSzo|D9Y@= zDd)}Tv1s<6&JXmd%|d1Nm@!wspNtBVkAL{p(P}jQjmB~Hy?dfb3+A5PRFH)7Yuk47 zI~9j={kxrfNt=gys1Drzb8;3+j5T~%o4RMD&(V#n_)-{b|`3pyKE>Bi8A~f|q-3jB{NVam! zo)<=~L(#b_*RS2T0TtGmr}G`>qVYTZUo*e2Ktbi_7MEVzb$^=`$?ea5tS@JO|9#3m zDcj;ZAu)LXB`5E=3_7jj1(KSUHoK#8?@x9;R;e{sI8^dte^<gzs7!b|M3{BE3%W`ScHd8ZyJ#(vc&@{D7NlnZ(mwIY$Pg~Sf{`cPR+?j7ahNu3d z-uu7z=I8O;d+s^so^$T{-FqqU;y6yhDHU=~;f9l(Ti(MDcJG5}fjFZC?2f^YFru_R zrxq@a-$n3?8?ufOJahM^$5?r7`!}wu`~EEmdB!YQF(4xdwSKF8`nK7(?eZNml9vWn zuB~m6`qj({`*Ul1zYrC1T$5}s?;*?Z@vU21+wP1fdLej`?MC*V=sz~p+V!j2c6&80 z=!x2ZX|1DuR?Xp0svi5Wsz(hgw;iZCAJp*mYTJP?S0rkR#@bFU%L!kwWeRphj>~1( zBTwR_Nn5Hf#|brcP9Q2xW|JP$s$8>CYl+QaMI#KRLY975Us}fWWSULZ638e9X(vhQ z@wAvH#+q4n2k9PEd^!*gT5073Jjyt%*eht20;(I<3*}xz(a) zhWOx$^<{eepGl{OHkFY+HV6NM5Xvw_fI~w03y6cCkXLg~dH%9J?*;PGw3JURimbcI zY$?U&I>N{#Wld<5i7a&!r4^#I5o&R}|0WQdz|<=xX(2H7IMKWtDpu3LqK>;sgioVf zb4mK47QdsmpLDw5`FIM7Q_m+U+_Z#06xHev)m@pu*A6MVvgK^tH8eri_4CO?>T4qd z)6VsJX>x6b@yVLi*XG#EyJqEX>n=Z28}Fm^8o%yw+dtfMR~4zk`77-Xq)uNKZI_2~ zG$MS~#&yS5p;l+NM71vQs14iD5Y{>QA*P?nvvMRG{(JV{Ltp2&9BG*I^5Uheex{$v zvvQQHdb-V=i(@21M^R`fg>cEC#oQz;9+-s<<3QkRrq*jFHTKcKUo4_ONiX&;GF0gc zHRXolQmvhJ!$R^~#5FaL5}f*ri2pyaKd2g9cLI-w|5uIfvgJ@)dtNk25$|LTU`{92 z)wN}17PD5zs`4+h=nD-xtHEs2Ox9b>K=w0fbmr1=29uVQlrtlXE#?Ylv8PE>t}QJy z>dPI3aayac#7-Ef6^?1F$j0$Lq%oa^$E)z18Ysw~BOU!Hq@itdj%POq6D^F(2 zhSEBz%)XYDm@S6Mc5W|fk;bSkW|f9!=B8C@Doh4kO(}IkGf{6SF0nEOcdMb)U@B%V z>8rFlE7NAu6zR3r3X9k$eN~xJYhV?sOd6}jk@W?BjWB(%Wv*O9?V4yPw=aDr4U4>; zFVIBnoKT^6QB+ZmhqKE@@McSHY-t4xuAQVaS87VN%pYI-Y8s~EvOO~2RVw%ty+s!To&LcbGE$&-8jVJrqWpqcbMHscs(Qg2xP%%&at79!`7EA;AFwhBJGRe1coDc?BE4m7L3z) zH^-gF5J^ITVKjzH3>z_A#5{e0w*}5$z)*$L-WY;#x(s77IyYh1k6{~zUoef-^$Nxv zFg}iPFN}W#-Q6%`V#tN@{S?%{8N=t8H-?O33~yjqg<%%prKI~~uyMjhjg0-z1RJ^U zupE|=rDBM};0rR>FvMZFiQ#L(7UF}fBS&XNSqjFr7?WMb4;Y@o5HG}s8;t2m7~}1| zEI^2@ED++513Jl)8U%WW<1)1nLm7gYTm&Hg>oCm2P>*2`hBg@1fj>K86GZIBp98Wm zh>a2T6y`?MMp(U&9P*Kn?+E!Y$TQr17x{gIxa3q(DPM#no}HV> zM@!m)*~R>kwg+oZRvss*kNL~&kCE_2O0YNZvp7oamtrR8E{P{AXYrHvC(zG1=0`Ht zhhe}wDmqsV#gM@X^AsN2+z^h=3Eabwew3(#a>6$V!&s0b^IQS&6J9(oaSs7K3*<;$ zDIiDiC=3HI4CI9QWjM&Q^n+jl|JPb51EWFLik za-d#j!^5I>BWoO4*T|aT3BYKcF6b<+643~cJJ1;2T~x;OxQRe4>n2Kjh~!l+WIaT5 zu?&W{an&F6IMkH?oel#1IB@ zWN#mip&bUizo2J7ejrD_UX;L3@;*>TcmqX#_+ubzHCaqUK#r8FImZD;JK*sEo**cb zL7ghdw+4GuP_G~O5zINhLrA(U_@aWke1YZzd?B3UP(=*P^ zJb|uY-(mM~_@`{(T%YmX|EHzXr(yGasXFbZNt84geiC-ZaQMqsK*-LBcqq{hiF#WHx29BbhbH3`b^BG82bG{eggq zUXF8|MT?Kd<(%uIaq-zTn=j-N8M#DGE|HbHke5sNkS4hg0ob!Aplhh%m zDP7Q{vI-Y`3Kx9RdZo6CX;MC^tU^Lla*g|rE4nLwS9)COm-b6vz^?i%>(3nL=s#kR z-z6yS5|l1LYzOO8Qt)bKa8(DXn~c$jggJ_| zhbLak8`#OU8GAG{ zR^J_VTu?jh8dU7q+fXRJ4q&}na$(W<3PYB~6qJ`}%Z&O-a38`HNG^oj=p9ZI`|2P5 zgnjkD&_Bl`fO50NsxRc6FIA-$2^&b#M{D^@4Gp8-{r0vE&t1in*5*>Y*zG-vFDnu$ z{$_C|!6iPwE*xms7Lbxwc#6z*&!s)K-nMMv!um&Ru2H=5*_jkq`=}`XLYqhV&G{L? z_ZyY#RaZyzulg^e*f=kW;uU8rDSyHn=Rw~r;QN5eb&ncF^NDBz#g(sYbmHHj9ZB($ zvr`1UuWYAs{eC-3^M$)_Gy7?Nj`tseo_P@zCnE#F2bH@Us9b90sewbc1%zMh7{z1y zOBcWCMR9#zwrxp3K#2dgYJ!7*39G01cKK^5|E^d1P`>>gN7C|N+bpB`)jPV;{K*k% zl;7vYj+DP<(j&CIE>2C$ulrr1<+3Y+p6^C)p?UXr`cVEAs$rCVY{_BDf9{RW6knZc zrhHLH+R^f_PzK?T@c%f7mfyTMl+w38*^=VV7VW0=L)Gt7IpxM@n0}h)PP{<*YMIr}?8j#?k!kx`ix0G=IhXB`x25a}mw|yprk<3HO{z>3duJ?4-ZVtnn0k|J0S1 z?=-a*;{5AXDu2OC<;3qB`WB_%vW^wn;rJ_L(?eV z@(H07pZ&6UU~;T?|GC$bdEn=Pp9g*$@Y}@vSx5Ce*u#T8Hn4{Wdu+@ev3~3?5B}P~ zUmN(#Gk-<;Njz;3PafjQLp*IPo??5r6LW1sd+p_+y=+2zX?fOOguiTjE)V^|CiI73 z8}tVr`U5ZYhiazZ2K|i}`dbt)^fwbP^tW)E(BIOTejD^>9{Mv6{n-ZnnHTzV66Ih0 zY!4fZ7aqooO&Bk+yf9uK0}Qck2V;OJd8&k#v|_-k81BRIZ3=Q-WzyfywmZ4 z$NP9*81J`ij`6Pc-f~mH!+gRE^U349FrVzO!F<9C^T{nI{dm6OVZO4#e8t0j#k2Ve z^Lt(Yz{Csl;SHNGAASpR%Y}O8b>fBj&`R~U={?#8^DW3-gZgIjFyHbp-)^eerrz5o z!IR8i#f9~=D4rGdH;NBOrciwT;%5YxbQ|R1;!O$p{*Dl>5sTSsE_!(OPk(pX=iKY> zZud{kZC_LR?o`j+mfx-2&CP$W<#(#*Ud!)9zt?uSTl?=fE=~QKD-O+-Z!W*@b-Xrr z9PXF@e*Jd-j&YhJBRKovD|=Q<%h(N;^gJ^H%E_6@-JWeS{<}TnWam>Z&N`jG!`PE& zSv*VhWQkrZk-#;5kZgY{R$ivp**{(iSYiZAu$OVFTn&7xG$!t#wk|?LviB^lAMS&O zsOqbtMnS9`kQ2wz9pdR^j;?PQ16=4kk51$ij*%Yd$n*Uv)f z)31b_Hr;3xhauIMuZJC%qvT`jQoag#041-`FYf$iJ{qF4{V_S=07~obS@lU>97@e< zTU5Ay8cOlvHvfkFQ2*LB2@7@{Mni{=JLi*m1`T*NW6F=;FG7juwoTdf0**tfb;Y`N zUa0?~(t#bm?T!XSpDkW<={dxA*0h*%d^k#;9(e(Ma0;a)?6-Mszl2gYZeP377>Wi4 zz5DippkS1I;ri8I2JbIc@{$KVx#Ig-0eJUY8cLv;J)~a7D!J{Q4hIpRJ`I z_!^F)flpl@v@d@)O7;s;f41Xggih!#=a+nn_~YL^@$9)$)OT=kyv%D5>i^=qe%p2~ zY7~c|uhoq2*7j4>N#(!#=KP&#(1_~IWA>z?2WPB_eR9!DC@tXWaKEfssPEAD)ryt< zQR=5>FTPagjj~Taugq#6jRySkd>8YyuTk&Qdji(Z`51+scE9YVd z29$Q|y2oqZ<)QS34|g35j6?~VPCxw^(i$Ze#(rAYn}VMYF3Nw2Rd%#*s%&*2=`V_g=&qa(RmNM-)icTkh$tsXmvbk z_QM(RL+8d~%sa0#Es2FyZ?Qj_CJ|x_fO*_wdY$9lvhXmPb9`Ya*zjlFzlr8o+?%9isrODU2U6Nktv#R8VLZ?dqx)%yfT448+?YG)- zuJ>Di#r<(0r2ltk{J%TnrtXaC%Qf`$+-aW-elis0)uoS4Iel%_~ z8vAL>?_M1cj`SJvkK~6{BVERj27P{H*#vr<=FMR5n0YyUC*8bS-aTMU#BLy5ijRq{Lop|&0X()5&c5U<*U6HQ6_pz#)G&I%Y$>no$RH%Pv%@ZBI`yC<0%%flI z$U^Q>s)VF{k!bMe`%EMHt5M*G{xu0RV^Ht;NxOXi{x%vNTwM7C-YjZAsaY={lZT@7 zwqARCO0eDU++&`z$8WlVvSyx;51(a1Ss5AMe^T#_9^E|lnW39cp}}oeb`3KfM*Qs~ z$=<7i(U9Kjv)+%$LsMTno8&cb7%H_z4!?3{3@S)G`1$eulhMfcPLHUK>xM?ptUkFu zKLO>=YuUx;XcWr!?Q-N*Z4Qc2?Y{L+;xH5+elV}h_&(}+xpVZb#oj2%?c}~kW?e;@ zN^8vX!A3M{#`GRttS=(t*8IA;PrZwJUizp$s?Htty&YV>^mHD7ZeP}=F^ba&jr&-4 z*tiVI77yS4a{5XXk-d1;@?EP@L5+C=->w>s{KEGw^BM&TC_D9h${VsLy10)wmiPlW4rJXw*bwyfrE&cpHwb>V7r!G(*H+lKuDl zcRru5>s|HUtE;Q38D5WJ7+c29R>s)c<44ARNNRlBZkXnU3wD4VFt`&z6xZ$QqJGW4 zBlyGYiSH9U>+6*EBxrYxINBnqvwYh%B(j8HDdqazD48| zv++O3o(+aTh(+MfK@9wavJ+!bW`8x8>w&)XSt{o#N~(iaXUJ7(L>Z)-HZ@qZ5qWAa zdbSlkYoHeUSI@rK1!~?-Qmv2}bDkjJk}Upb=6AB>qo&moX9GNYrq(|hk@scq{T31~ zEBE0(mtQSC7dDPP{CUvjtPPvFy^Z%fv_YG=jV;?Nw^VwV`S*^r$?l%6;(oe)f8DH} zr@6grEB+Qb?f|>`Leh-WD9@}9eD>b?Nnb1)lFpv2NZf7A&fzx5#}t%q*WQuAIYVr1_ZE%?_mXQ~PFp;IsEKk6gNQ%TF@mY%|rlkZKvE#a<%fghlA+_X4>( zYWX@cQz3Y4Qwb*hspks4-k?(`skn#UpvqJ$O=_K1K0#&B0oh$ESL$*z)LI4M7->b> z23Iro2urrOL6*Uzww#WLJSKG$D;vn54sV^UW3of>Ku?&sCV1s%60zULfdPNansIk{=A0F_{f~WdRWIM216(YH_&;CAp%1ghQop{BnF#cVP>$2#<&<`GI{@u;XMrPh5Rsm zFde_ZFi{xS7jkRk1$jsYn`DOZ2D^hX&r`^u4T7AM0Fd~V80KR*i{Us1Jh3r5AfAJ; zo)EvMz3LnZ!RTN9|F0T5piVPAkAPPFj}6OY=hYDW8@ppZePz zaIi$4<{?CqM?Xm%VUqeh!7mygt%vGUc^Vh3-xK6%I|WJXQ#!4;5tOMvZ2))jwX{#M zR4!1G&z3cmUz4;0^^3+MZ4cU>lpi6fkH$;=ZzGWlkzf~)r+Jk6FU3^PL6T3(r}>li zC$P^{&5vZPCt!G;5$4K%7zQxHJcY+L)0g4r1SS=p(}_9Y6S-InZ-5?|=Z1qkk;U^8 zGZ5%WphxP820en)F!aLEn-S)h!Jto{enq?+-GBHjEMG+Tq|A zDG!Ahh~5yeL!K4jW0J_1SdbqKF_1XpKraz|?IH4UC_E1ZyF{J^`DBQHF!+)L_6I|~ zJw>`{pqmc$BtyN_hg4C!ku{F2Yh=xE2B0)&D|GtoD54P=2cS{9gNR4%*o#1{YcG0s z66rfy(RC8h#Xe9SDc@1zn;69Pi+z^XBaYdcFJchqOiXuTs^^2!92m>ksV~l=x@Ziv z-N-uH5JNN2BO9IoM%c>Y_X~ap?GAe64c-pqN!b;6MAl0bhX)3-R+GiFG3XJ#CsVZ- z^amLyknskd4bA#e8BmNWE|?kg|idRy~aMh$0Q}lS;}Wi=IJ&Wd@4ABAIj}-_oyx76 zX#UnZv;E)lLqcL(M4IdQd1}~`_Go^chyEYb=+#a#Y^lj{7CUk0agv?7FYH~-ba}QS z-)K}Tv`p1D*1l>xTSxxNEX-*0bMX#c`AWq9_sk@j%Lt8*otVa4}u|C@s4UERO8WbLN# zKauvT*`Q4fU4P=Qr1c$F(%Z`yVZ_uicB*4s%X+JBYb{@##ePCZqyk}s!Yc6o?PmY@a{d51-6+?J=-OvFb_n6n~c;W-Btmn=fY`tDye#<=HO3 z3wGuQ@;C~q2|j0ctdiG@DY)9Z-@f_*CBdOArhia!v?Gtt4jII)s9(Ra$G$>>eI7PD z%a{F!Zs+BXEa=9|oemzs^B?)G=gV6Ugz@Do!{T^(*SW#G{Pgi7dH#tAPoDqG{XWmP zDHZIT8TA!kc39Pom*41^z|((P@gpz)%hHxSelSVL%Y|O>=lO@w03sjgvDusFmzDJ6 z>EFE5kjJ|gALHpi6t3miV-1C5s`0~Ye8GO0oL=nvoUoO?{ z=lRFV7W3shpYrw_2RKjS>8Bdpv9RwqGl$16H(K-jgW4uSo*zBn_3zblw2*i6UB=VP zO>YS8aOtW*kG{v3ubzL)LcYH1H6f4LYk0kjKfKG!ZOHTG@$LQDy`#ciBIiAeVnLn- zc^2e3kmtnmNf%Wt_``xf9Qeb6KOFT(Y#+zVLcAQr%R#&>jaOu!kbJaYB13SlV7hUZ0b~LVw_d{@}wwe_)|MutI+*r1m-JZ>-SYLRq1| zX<4Da1#m)t>rd@-(4Sf8&n)z34*D}I^ylup{MM=MI2bQ1j2BKAFX5~(UgB9{yo6Zz zgU2HWSD(qW7Ga{nhsUsjmI2$=-|VsV%>@e%H19 z%kp1T&&%>(M1R?KsIC96nwOe>b(M#@D%VxqFFRiAIuEZZ|Eh6Y?qe+G$Uw$??rJ`G zCOmovDLu*z1U@;ktnIO;wc|=R(5dB-r_}>noA&gf1AXYo)O=)Y{x;62S1HYVmErVZ z7=56h0y?J1VP|5_j;!i)bzx^*ZO2P@n4W;W0XGH=pSzmB)(8R|0yq>f{d(gyz+r&H z0Y?CC54Z#1j)37aThtkF7r;%cXgmB`BuRyzZx$jzWEV4r%kj?+sDn|9p8QuGdeJ-(Ks{=YSx84}=t`iDnUr03j4i4O}4t_6RL25Ltv z`fyVp6u;}(63zF=(ZJ(N)c0f=C^q`}?!KSeqPW5fZ~u0=73zCsZ+4F36O<6Ls^x;i{6p05;lVT_+T?uJyA`112MMbKZzUyFN$J+x~H&>A7+gJ?-hb z9pgh#WS8C9?jLnQ?5PJaTTY%p?D(0oTMhhCuZ44$eci?f^)Bpn`R1ofQPkM9)IP22 zp_qF|Z9Z<8j(V-n_J2O}DC#!s@Lp9wZxlIqwfnwu|K|KZ>xKje-<5jV1vFR{O% zm{}2H1~Bd@{^FFAk0LWrTClaGwuf}&@I3>cTY5yc+*uIrS*gDCFz3BLr% zmZBbxMSBW&-9_E*efz;=S05C4u-WC?BTG@7>#}ys{gYAr^Mqlk@hpnZRYWCgQ&Gon zkDZU0Vu!k}YSC)Z=}V}~^yIPyEpWU)WM13w+F{gn+R(zHHHC+eNQ0+5KRo`&C4P*AXjdu%YAb> z>d_tjFULF7kQqDj{W%SY{}$btSwlan{1f`oAG=|`MWr{4(i=S?%NcjPubzr(fYR0( z-c7V0hcugqH|Zd+sV~UXnHH7)bzc|M=z%#-db8)xSbx3xD{hMEUr+t_cKE-y!!_Lw z^Pd``SsM3e5wZV7(brzTvZDWSq}<|h;8I#kq~BQ@ZmJANxet0gU*G-#N*-?vThXcz zW$tNr+u>kKG|AZfv%dNQ^v?Od^;fMOjs`g_?=ds36&mqqQTv{Ol_+&tuT~ouA~fJh z`abuBW$5)ovK>bwgOEvNW~!h5;on3jXonJJ-;NR%7ICdwqvG3@Yi9vbV$>w^DT?&+9=HYV&K{ zKHcQasNd-aCz_=NprJELCiRJ!jQTI_rwh713UwKBXw&BVAE1%Pp9H_2G8-wkeKR6x zaw$q*+4b1u&TpbVHxl2NTd)(QdNfhx`M9A$i;DM_Hm~wWp-&J~9&lW9y$uS!o8~oN zlY@G4hPg-Yrl8>;jo9&b*+uq=_r~L8ui3xc%ZlW(=OxycD}ViaO0Ar&@rxj_xH~US zKHX9)73Ob^Jb7xhD=F*wq_<%O3QfM*B`5mlwDdZc`~jTg(=pLHRxNq<#HxV`YSc#l O1?69kIzEf~dh`9ALoF zVP>2e5ED0Qf>EQa$Z7T;_4^MZPb{wbFFh%VQUb$n7-{EE-qC!VnRBQHf^dG=?P_Fc+uHyWP*&tlofML(rzLihvA-)hn;J{7wn!}Si> zBRArtR+*a;3&c1hD|l!mZWRy9nm&1TO-RC#))Ct0U8jfH|dPB}@^ zj+;R@F^siUAA^jzQdLz|AvGvVqck#uWI(K!tu!gaOv+4+%7&6-z#1hA8ic8}CY3>} z)PxyTnjC{F2NN3uR07n!w?wTBMheA&(2S$SF_S^X+Hua(>GL&8D0$#kRC*QuPn#v> z5W3TW46nKPA4JaqLlF2P)Xzg4{DiVAXH({Xv6Sn9y!2Vh=Pe4VvsP!wQ))yJWReaw zXmk)+>LhxWi=H*miqp$SUu*(X?Do`27ELZCF2AwjCv3uzas%&+ZNv+c=URN1(K=#xsvUGWw zYORvg7+FKP23epHBa5PQQTQb^Mok9J&g}WZLNTE^YvPS)r)8Y#h`C2t@ zCXYIyn5a_cjx{j`XOlWlt<7aFsS1@@CZ`Om1Jc1=_pncW^*1&h3e&qqt_oRF`wQ|uo1~pz8)Rtg) zT561XjhZ>-Wua!~XN^}evWGwxx{!G*B++7SI7@A-hiJ<_)HyG|=o}R$l5SL3a6yj%h48HhTk1lDscs zcpF265Ff5DruV=&8si2+Y#n?c4ymA%WEfx2I~dn_3o&#+5R-cV#Q!V|3o)F;a0CNR zY}|J6XCJI5#O_)6xjDo}gE|Qd1`1eAeoK%H${|p03FYQcX1K#a@;ijuL%9`{J96}g zf&e!J`8J*DuNogK`$K(eptG_s$TNE67J=SeVjtsU`WQcJH&~+QRlv+oR+jq9;=<%v zJu9<%X74M2TT1v@96}^<43PNIUeca7*v0%~?J#-9&-`NTdjmh~r(lVGMrZ9dhBC9K z9pFfRTm1}6`GO?zY+XaSt)w5AUCbY8f3W^!_2H8Cn7_>ab`ri233dm57DuW5Qq1I> zCGlkSEPm4Q1o}B^ek60fKZenqkSh~04B&)3h37WckE1z(8v@Uxh&rezd~q1YfE>wl z!-1dh;=IHS1bPa{k+xz$j^GRoy)pFRg#0oX#2fIjlDEL6+(m@Y-R)UQwB3t5se=zt!{EP>=WU#fT$i|`YJQVa2ejfN!!T-Tv zOA6>84DI$3$!37;C}<}Y+GREj5%n8cX;rU z5r}1-M9;1wc^5mft|GeF1|}n|cahj8266jho2BiDeYR(d7{oCX(_Ojh@nAG(&enHk zi<_t|<^$_DvW_;w&>ZB*hNlH5Y-REL1>He=f*g5+cLaV?_JBIV>m%~R3j3_(GinwCMunreKc?w97VOew=l4MxL92FD}qleV};&UlY!{Kb3)v z&4Gt)+gZ$fK+YR%Y%a3V8)D!Ex>`Uz;R}HFi5>m`O+lv&{PG3;{v!YDi})IX-`>FM z0XF&yb!5LHtKw|WQx*zzIorp)z!8dL6=(mXm@D3J`wz9c&*i)gIpT}3oDn;X>-^@{&5QKttPUgjG;w~imW<_Lf6YTv7;CeGlY zY`!`nZlqI(rTodkMRWU~YRf+^dme-3EhSrCFedn)h^k(1COH=gUHzzRMXk z&w@_{^k4VNn?Va*k*mx%8r4cIXWbV%S@(x^WUtIdjW$0IZ+Wv`s`!TtBsP63tHU4A zoZSv|)+jSo8g@?LLQkG8Ckpj*lvyU7L7`C<5FTsJVJl`3C#)iD6gqOYt14T0$%;c( zPY;lzGl;XUC=FrMlC0hc@&H-I$ST8D4YHb$%uiMqvO=|h_I&^oy&Pvf?i8QC$~gO{ zuj1oTmPch08JR>*CXtodk(WvMWfFdwgkLVHmrLp$Iomw%AZbHPbF@Q~%F6BV$?fn- z+m+fXrb+pvvT_N{k*hv-?9uJ<+tXuDzjR#I2W)T5vhmDu*6||-SuH_cOK`LUVmsKF zl0v;Y277I=t1|Qv>?@UyEz$2lJZ~aun$gH~fxM#WJk;BI|!r zu)X~H_m=Ekz5QRLy=*jC7sJq>@(byGmlf7}v60>-DG23xf+g-dAUf zZy12;D!Buxek3M6+-PV`!+hONfMKVYlzZ}iW4#cb3WOsZ_o z_TpJ;k#!SN%>P_fm9gqrr5UHK+)3NgDE`dpbBceFccb{wXUPPQ^?0zjkGiBmOnmkg zlIGrxANLRQ+Qo}cj+*tD;xA{+q`1h#h2rm&>6BkLbO7)@q;j2)9;M|Ez1C8!Sr|(3 z`dbB*KXU0^&^H_S)=;?)p&7K?4Nai9V9_@={9h|yqj>qP$%5WRKTx?|4{ygNwRTz7s89qxBC0*3$Bpz3plF^00Wy@9}<1%0Fw;2wHzM+?&>y zd;U!89m)hfKaTvCmYr92r~K<(`cwK(%g<8&+e=$he1EEr@`auYr1ksJ0Ky;SwaJ&( zKf9Mm>EFNEh~gbf4paJ>qA#eN<2P?J{j|(onn(GH-#tS4p0xOgmd|(0q~*$^i&=bV zxlFf*)*pVhgqH7oN%c2r;Wm}hk2kzyqd#Ewc#7Sxx1sg>v`vLLKe$ijOY6DV@YnZW zM(Gu%F+x9FxFXPFN@@AZ>B%@`C^;e~usVWS`CD<1Nd8S)hm`HE-x3iH=1U!&!PeE7sHZe)S z+d=Ty_HnLu4uiPtlxhNJL76QUbe&Z2J8#C31Iks)bja5FyIitp@7-v5N!ds2OI`C z9B>5SPJlZDhVNEU7rYteduTZqHRH{!Z5m^EIEK*v9jFoLD*%^;>jwcb}=HD0$wh0YM`gqu6KH zWCzyHKq%sk4;r_PLxWDN9r|`u1saf0xI48#j*@0vR5tkZdz4)IeXmU^IiAV?3HNXkx$g6vP={g^JHhgFXbq{=-gj^z4R*TcX@a2c$Y`0f5^%FEi-p~O$0g-=|59VL7J^__%qDwN#T z!%(s;3-w*%*1+HWI_h7v^_8M+ktn%jU-C_lcTm#wvL;uxPpZYC|Gs%s_O6LWC}v;O z?tUXsbomz>PJNYz61-1#+~Ru~#Xjj*9&+<8igPM%w4#q6iYfmg&b?6z>Yq6}X;5M; zivPAJcly1HC_3VJN%@jY6#eyq3ggdC2yMt2J$}hgs8|0L?_G2bMri-pUX6F0KnXuD zS(4Os8H#cGcKx^WJW%v$&)K6oMxxl&ZOS(cn1;}e`LFHYu@S{={oCGVrz%j)%*U&@ zPYOX%U3cbse$*B5$M45(K6VuGlV;0qHVj0)KYV}LckTR8pXur;Zkj15dO~02Aa*_r!6rt;@|Q>it!oCHt0Ci$l_ZmIu$|jYHwrb5k$yw^8hz z@Nok;Pn2+e`mqO5nJB)^;@)0mgHgn`!E0r|c0&nMyDV$C=`l*2=lA-yHI*na!bev& z?k&{yyQiO@3LJxir^(wt9rQhFkUht7ykv)V=hF1=BqIP}IKWmu`(HL-8KVI(`$ib6s z+bl$J7d)=LZ@hyd&j#if9t}m^FD5s=iyBK(kmK*4sOS}Am8J+qgrwX_QttdGo&nXV z#g_9MCvsLXdD=eiZGA3*t|utBW!3>gkmv7ABR! z^3gSk5L*Crnd4Mh)-RuhPpdiWhCqBPqdVy z6Fv#37}CCm$T*S@x|u`t<8EVi4dbZl4;V+kZ-(U-mEAB(Z}fyHXP*3G!SqeVC?nQG zQRy3o#y=dEI(Ox~8a$S%vuzsv^S;rj(E^K~>}D^Jv;TVaXWSICKS%n{?eKqYhikeW zrr++NIU3LM@VI}Xn5&~NFHbsxvNn6|y^zrw>Ax-uGgXD5y!$<$tc|#jQiC7mFG^O9LF^kLgqsceVuguT*1Py%qZ(nZMJs%B!V{c0PT|p?#_r1q+j=zIa z()!%?U%wNL4t=_C_nf;ZZ~5)cXS184_(SXCXFl>o=|}%E&z*aQhTd4@KEJm=8a2cr zXXPRT%JKKTx5955npk{1^W2P1sMnGynP2UC1;w1&-g(xCU!j!km0P~>7=qG1z3290 zc{K73?{#M`8i^to&o0a8c@d?ZpLBf2_`jp!%{-c&OWlY%MFj-he!JA-cb4kt1fToS zsNFB|CAw*wP~wUEN1JD~KttcXH??p0G?cV7Q5Sq|BBT>STsOUHP&O)j4R(|yA(XD7u-x~k=-rScM9tAP2Zta6_se3)AH?TsQSZwHzw>tV{Yp+29!KN zs`uiyUU_F53LkOVKOk@&;=hg!?>a0UB}FZNcyY^hGjCNTz$ZBco#~YB}uk zsO3d%5Z}t>+S4hG(XeT6_Bj4qTfXdHJ`c_pUU{*X70G4K3#>1f|NQrqS{Ym87eQin zPs&a{-BM>ME#De>Q);ySN8X*>^hhHMVaK&G0{3!EqG2utAR0U R)JFP(^51?0bCbV3007>BCZ7NR diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds index 630236b010d0600923eb381156fa0545fd9cc80d..7ab8d808dc5380cda9513ec7150fb592edf0501e 100644 GIT binary patch literal 5039 zcmV;g6Hx3QiwFP!000001MNErTvS!}Zx$GKL2yB3Q4x1sa6uH`0R%CYSQOB-afAUT zVFrf*5v?pSG0Uvfk}p5)D|6{9KQp(=2JJI5chfBQtv<~q(-tk2|Gn>>JM-o-r2eG- z{r>-Nejd-g_bm6EbMHO(y?YG<%T%m0M`SWmj=`AE$|Ln9rA$w%$!ICUifWi{imV@Z zvuSb!>&u&fayt31zP|pp+@QQ-T^Tb-iH#olI*TsCqMJ~xcW5av6OA$jeh~(vMQ=9h ziX+PO#RX=40cmWBa0j93y`^fEI8y}N9=lN^n-@`~FV&O(jCrg(ccRi@HstAwvB9lS zS6)_T&>0&th~uiD^K35phhbC@2t}tv`U}9}PprFf4t4&jt?q&H^0J)IR}x)Uqsd$% zSi?feBo&QlR7fm!k(5=EaxwPe^5DZDHG!#D%eoZ zh7+ry<%O-`ZHnCLf;QC$b&h?Q>1Xnc4vLIl(*G0q8o%{e{oI$AE@%BS{Y;+GAxraA z+qqXBm90EgzM%vot;E50k+HZD7M2D4k*|pczKPVZh2#Q-zQRI-j(g~eI|o^&qU0mZ ze}Q)Ice?C8H4b|FzCug?HwRTy?|dZP>i^0kM=ey;qssPd&gn`^%_dzQV{cY!*5@1Y zEC!QNTctOfu-V6`%`=ruFc@`|QpP$eG@Hslxa(+Sc*(6$$Og0Ev4lal4Gja#-O%LDYflMMJBVM%Es*> zSfnl16|znPQ?n8(wB<$v>83<*LOWS+C@iut23L!r#9%CBF6k?Ec^0P4s4dXzEahgY zPx{KzVx56?s4;3SW_#5e`L*IYW}91?RWz@3B+xYyA)XquedMBjvGLmPV z4uZFB7mUzbbZorrjgnxsu(wJRw@7SlC)!3*t25-~A-$EPp9)>EwnSGcB?s8aWfq;; zBBlA+X?dm!BFO^m$#(RJl5#dNY)y>S{`8>4=S;R!(w#Al4g!JG1dfpjPPQ;bD^ezp zPTVB|VbnDOV+m9c*i7IG;fY4x=A?d^KqV=A5(psWQi9tN{4s%p1hx_Qk+jji77*Nl z;FAPL68sJ7?m-}xKo+LRC$ax61U@3X-RWW@u$aJV0&@^Ar*o8ml|Ux~bb)>-+Q=Ou zazsWEPavFtH_F@~(3`+50y{-ps1J&cV){|U5nM+w-P%tPm_eYA7$0spX&*#zEWv(a zY!&_(hfLH-H=qF2n?Y#4Vhk04F}Z?3{m&yXpTJ=PdkM59umSzqflnLM?%AXqgs~}x zzT)Eqv=#S$P+~m{>m9Kkgms2{?xMfl&;#q8u-=Okeh`YdIm&m96n+i(SUnKwow1$O z15lo|_ggEr2g&SXd`utXXZ?1N>FI))`N`^Xe_33ZJfpKZqceNkBkm~UXK@IV#j&l- zj~=rAe9)#jo**JBO>1XY%-rKLh(6ERh|9vg5FyOzfB0 zFj6vZbdRI^8r?J85m=kMlXh0tNZKfmE4HzAR|$>jagl&j)OO$yTfr8dj^k^1b+7AZ($CT(})8peaQxpI!aGh5sxeK8-{xY2#I1%V)xqp#c{ z1nAildA<-1T6|EBz9y)VpVqyQMtS`uel#OM_iDPCwnjNh_vP#-1;NPUhCBgCQ(&JO zthYgXG}x~X`q7HB9}m!SJM=|^eR*S>7xJ~{?57t>v@r;Ig3&e>Gk=uxMH_=8Hu_== znxU=`q*K1O*gv%+5TOm~RH9!2s6SBRzqf?1Ir{C3yk2NyfJmb+MM~T2+;mPjcAGD@ ztJurZeVnwZ>pABKY7hPn=kWg4G_QmXPt!C?S_0pSPsVuC%63566O=@TW?!0}QzdO| zSzeL?ZCG%2K>F^@yUTJ&Lw7>tjAXYV*IZ6jL#_|$9c};tu>K_p> z=DclaCGba&kBS`=6zeAFi`h%JMmUJKy`Fm)=<+Nkv$j}YL3!+Ji|q`doR}?@IC^T% zara<549%=`m+(S4x(1tZ_RLAM8Pyvgt|MW-(`-$%Da(#DYtjryvnb8PA=tk^Vyc(p z?1#S6GiW8}{0v%pNX+tuQYNF6$th*BN+i8#euubU-W9L$6MlbLe2;!Gzd12S(eZF6 zdwIR>=(FLyUQ@Rxf zve@ClLc@;g4f)bzeKs_*i-(3+5sEaXpsYw&TCAUf_Zm!r>|)17xQl7z(fiaN@aX-& z=%4*UzRYB{=<_+pTWh&R;xi_lqjmiE_4Q-k+<(VP(p%w_^;rU5>atA0S5*TAynAUX z#YJ8}EgEXr<`UVXkEEz(cy8kZwUCESv3N#_RzV7GOy4kjBid+2{bXcedXRjCdcP)q#_zrd)Ezo~zyHcpH{j7&jKa-On z@O#bcDDcmo@`ym+*V|X1-}L!jpewG4dJc`4uiPdl#X(HxIYlVing*1TU_aL|$AkVGj0?^=QTQ$m4qCaXs?(_2}#QXyzbZT<`U~ zxZZ{JLDu_3UR>|Dt@icq>$&xoipPAyi}_?KFXofate8)DF`wLa&`a#lA0pp?|Kq`*W{vfk78%Rz-ZHflQBQzgqY2T1ZOYyZms57bIy8 zSB8)72Jz1Y{atfn5Ip?r{MXOzj)ml{eqCoq8X+xeokGpUKvKr(sXe^3kT^5fKe*3L zNM2(d_UMYSko=@MDWiTD#C6I%4$EGFhim@x)6XqCLi(7*m0h?Kkg{QMKQ3f1r0n!Q zwD-NWkevNYtncY4NX^MKZLQx4Bf|0$(|txle9B9n*`vlm>cB4(2Npa6$uqCDK4<*3 zK^zhj4JoZU=0kkdfKFemCH~iJ{4ti_X^X?KZlQ5KZ}SkJ)~+CbUoHnl``#{uWMo8ciX1zdV$0t-tzW_U=J8lTQao6{+-cGdW-J2WThw;_lE@#Zit%mn6hy~rR~%jI2QmIxIThc20x^SgwkDnT zhS*J?rr)Z{har_u-i+D23I=!IQQtlA3=Em~rq8w=wGHAhT%mb6_@4j+|FvXk&!wkf zn9*}UMbUbQKiwhJGH(|o?Ae{vW#cA@9?@s5>YX7F|G|YTFYWV$^mEUv)7p1~m>-|- zVtRTf3^=#fZ~fdg5O~h@s*8F7^gg5izCSe2z?(CUUc(WcGDHc4Tvmxxm6+PaK4C zL&jvDSA7X{_itK0AfX#fOD#N8y;KRgUo<=L?u0Ec{(}~W7RH2tKBdni*+JEimy#6Q zu_hOCZ~i?k{_X3~`>R#DAD(f6*wj%j3wN6#cV6I(xg96Ms7r_6eI_*+a{ZV2PZ_)u z^amDos4|U*nUhEO7$c7Wyr3)^GkQA=TeqMpJHrFAx%CqQ4&8+G%`G{jcM)*yuBUyd zH$&HjkvrD}9)|IQ!kY!Ce}IXPfADVHqw^uw6n(0{X}`_yl#AoEZe_0k6o2DhQN0r~ z);t~>eX0v&RVQpZ)N3-NZA!>}Hem+vcaFt+u5JZM12(3;6*>y0|LwvckNFQli8Ukaojh0tqnDk_sq5VX#@1Ay*_hoAvgWtw;&nV6(!IMJTd2!~?wUQf-x!z= zk?ZPL^~%hL%z-uQipLFxxcKqwryO|`vZp-Vyw@Aapxb?5?h*e;D2zJs{e}-ZL7&=B zJf8n;0l<<8!`GK|f~49*^McJwAgx9b-D|#uzwI^a)|Sv_(9R?Jz2={pAg3;+=F-i} zFm`8`tAiAKV91EVJj1D0_xG?OyX?7-{pHM`|D1A9#y0xpkd)m=V5eVp8S-?tZ<2h4 z)&^Hn&g02$!zy&ePx{;JHw(X2w`-C=(t~{s*DxU!L|o F000oUCm;X- literal 5043 zcmV;k6HM$MiwFP!000001MNEtTvSE(cOS64AA%1=9tk48@PUG&cn1(d1>~WCrmZV1 zunD`gyAYC<8Rj#!@{#&C^~=(OChE+FMPpW5+auM_PUbV(*FF%Yne}O3BXY0lGWzLl^;Z3!Gfx#Q zzF1M)8hHh8e%ZfPwJLP+)&nW5oj>@}=g)JkdhjdUzpmKv)Gf;&-52k>y|)i`MULBL z*dtfsq+U~|HQ|B}bxt5E^#;8b%Bmbgsm2(U&6@Ib`a)J7sVysKdeRMga|u)wLo|~l z{kR$pk43S*JTb_p;QtyL8g5GsDk#;Mm_c$Nx)o~7nkchoLaEk{Qe?y$B?^3_bb7Pa zsMnN6nY5)vMr{!$wgI>U(DL3AwQ`s#6h8pnD1prbtJ0Qh@qhXP)}1R>sWa*dG^Idr zDb`e&OgfFeF@rF!GBVHR;(riE8HP}BN@$;lIQR*57tXHE|75MZfxNUVYw~1RGw_J73*-)ek;jaZBNt(8$t5qJ_QC`UG z9a~OrKy5D8MYJz*sSVoJ5Y#zg1k=ytSv!(t{g(OffdAp^PBhGYapfA;Khw|TSv$&6 zJsmjr%EOYCrzq5wL8Rp{*iI4_7tF%4fFJO+(7?Bl8n%#}P_YkPXeewSS#jr3j-h1u z@m9Z~4)1pQ!S(S()XQfvDgECZR8@oHk#uSJCyyMlP}Tq{A7FD%Q(kT~XbKp+ceznp zs4Fn*40?67)@T5-mtI|9D4U?uYe||U71c_%v{n|X$s6tn_gX{)tD=c zVxP2C<)s=O>rkavn~k=rC-AFIE-NzhMzHw^tBM< zju@Z9I2z;cKzDZx=@@b#MLq@n@5Hbj^L8VP4a0H_8!*fUyoStC3>FNXFpvfMfnX!| z6_&#?vLp-<7(7Ad28Lc3ZeiFX*g|}eg$e0LmWXjJ#$;=+$M6h>-a>r12QhsB#t9hv z3bB>>K^(F{C)t4fL2njr^ATbwLlBcI0L1?S3=1$E#jqbkAcigA&u)0yAa>8hOoLvxW^adw-C7(eT`qeM?6VCE;QOZ{bWVe+h<)mb~U_W{6R5`Gqka7i2k zC4O|5^ydS1G5=UUOrG&GzgYi1z|Y31qeMTWvwqt^o!Qe3aNhuX{R~U_LM8F++(f;r zWE_}X%pd7^u<>N=y(IlHf0_N=Bz)l#><;`aj#B%jn8`Uy;>p@s{G{^<^mDfSNY?r= z4EWK7=E|WM(l{Yc;kC`Bax^D!BcVKwsDpOGHw43YkRy36ANUC`&P&_~pl5&_=_?WB z2p)qW9>ZWx$S+wS&&nxa8_q}EKn$58+eTw}2;_$Y52598LaxmRyGT73d?0e8Ko2Qv zz{U)bEkl4m3w$7cCWBl$*cvCYF&E0YpqKFTz@H8NXMrslpg#-x9Vn6=1G3|wpKR!t z*)UQxZe))m`x@CZTmcx()d8KARU#VUaRwTrJB!+w9w!lqWt~K27m>Wmfvk&&F1CTm zNZVBs+r%L5Uu?6qAF;S8+{-K z-k>W4+6iAE^iS*v0B8?7<=~e;=noM2?Wk-@VNrod?lw{%%=-&@8(aUkR zLtpV3w48H%1}#1$X8A%ck&#Q}!}$=T;KnWPUfP3eFp zl~p+4Q#jz0_A9kjOq23SWfc;dl50G79MK)|JJRDwzjR)D0(LZK*?i_W+x!uOyqTbA zCMX?%*bX+Qq~O-V;HVGMH(8^h2~jmuNI>}A!hU&IywR8b{;{~3elWi|AxDw%aK(Fh zgZ1dM@x5M4(;D3q{$mN!3(S97y@j1-QvsI9Q$LYeqWTxRJiQx6Tnl5D7RJr&*LGQK z_h6x7NA99# zzbqN7+vS^>TzHnG!&%9XY_+_;WXaKSGjCA*=JXj9*LbKX{zx;5@*8r~fbUl-7k%IW ztuOL^o#N635frb#SV{T&F24-=W&__^D%T@o46VnaM=7pcxWkTrn`Sh{t1eC!^e+6A z$_@PWBCQwhyTk0K^||hU2znNTQk;Nv1b?mE*FfcxD$focy304@N>~Js=`UQ_8%^=i zQJI!izP|0echwNw_UE9Zv>u$hneuX>)h#il&^Nye#-ZI$YNUm zw&w&|zjI&-iw~_|GkikZ_uX1b>zCf7`rC!LPNDQetuEQ=51c)bV)yf1X!~w`dm+xh zT&MCsxT);;Jp)!y`fc-gVH{4K73hgS(E8b9lkNC@JBeS3T=@N@gTEJfm_{%eY zMfyoREf7y0;>kliEi9g5d$`ljTZHjC#KU-5gz?hwY`h46`NSL^=7U9;4{a?lA9$D# zyf7bXn0^b)H(r=;5xg+p^t>?NLM+038_x7wU_SFOpLv+i7MRbxFrNod{!P<+T425K zuwE>}dWqtN^^(F1>m}UI9=skcupW6>k36hL-nJfn+#k*!zzgfWffv?0T_1S8PvnL5 ze%oSO?>_Ezw-h|&6JE$CkMlx4`OE_OgctJ3Z9DxqU-6KyERe5w$X7heSD4@J=2|^3 zwf%1W+-v*Y==VAf&9(o2cxrQ*<1^_KGcUe{|&$KihY@7Hhp z_ZYhz8OmADXRQa+q>Wv2Nsl~3p`9FUHuw0`(Xpl{$Tah4)ZxLdtT!w4VTEX}>7!-q z7ha}vZGrV&C7%`YSb=@erpi&nn@OY3n3UjQXrnh?_J-*L*dK5^z#RaG0`3Sn9B>3+ z_IW^8z}*2y0qzC3H{d>iqXEOGx~L!E7{IZB`vV>TI1cbYz=HtC10D>R2h1FXIIz#R zQR3KaN~etzLJ|28OPt*bg}gC0F7 z^Q)Detw88SQW_h4g)zPYm(Opr}oS>Ig zCkLPrzb{yIZf^n_Ugz6oTC^Tz^gL7$e>EJX`cHJ3uAPP+bY9jcY0eEaeCwFH)HwlY z`1Jj2Yxo?LaPaW1{#lpMh-K{uj(j!(W%l=aca5bIrOo~(clCl8l&*+W-3+cq!*ica z@HyQdrRQZE>KgW-q2UE7nO-ANQrZjdxueFR^w=XQu|*G|;nS|QJE#A>Q5;fIbZKqF z3Q{kOy%nA0pQMU%AJvn&JJ3S02 zuB=b?G~*u#MNOKwW#tZ(cqnC&c76>SkXjr4@#u*t-Y+Mw^880Aeqdf*>IF}fu1H0{R=oWAW4Vu5&YuE0jjpFd2O!Z>$8wka|y<%mLm8a1Vy?ab$ z$!3&vx?`w${ud~D-`>>7t=mxC(B7LAZw^99?_a#~!U1=bdG0x7#sghZ{Ljxt8m8_+ zG3WOCZl1dl1)OvK$w|2o^*W>d?L=85N_riA>QU4iB^@tt-x(T?`09qmw|7UQxHGGw zjIt+Czei5SoXI?k`gR^xKkpGsdfF;DE^pgMaX-HQ3sbLe?0Rq1xiV; ztg>_*iK5o*?tjUcj{4W9x4(?qNK%mU&rej!)g>BpZ*^};y^o|G{WqQgwW!6`^BX5} z2wa6Ngqtk8Mjy4h(eVzu@oMVYkh$tqXtsTc?1d}h?~@ygG4I^UH6#{htw} z01Oiz(H7V~Hx|BU=4>w%3by>&^lxJL755%C@rJBmAEpa`g`tgG<2fVM(^^k@ z>9?YZ$SBEI)XX9JMYXB0iFwrU7tEtSH^X|{%5Ea1H-AEu3&WzbPNvO8W0vm;3g2%+ z`uG#6kzw^scr4cy+I9N(eY4P{1y(=V4PY?m`0?uRxKZ|l@_%oU|9gwv)GadomJOMY zz0kg6=({NSc^9u&Z#tuKG0|l&woO60JDbPP-#-ZH4@T6YMW3V0s-b;no;-}k4H}bu zL2(4lJ-BU6OmbH=IlcHy%}P1S|I+)=TN8Go@$a|(YEgU$(x&x(C^x7E6{Mvmgw4oD z`8QwANLqg#_4<09=IXOfC^2MfbZo_EXteM4GrDCTqwKP`<|lXf8Rg5nOnY~*9%)bS z+V3^KBYO6z$GXC)nJ6LleCv!t9ZFo+e$UkU29)>9N6(o{vQXCEKHiHPT*p{3>EBP73-#L-szM(BjjoF&Y zwF?iU{MfIzpE_KPM!$M4ueMirG;opAYS;e10rSdnx03-TqHe#ygQ~XTR;v-}ab(YiFo8>fjdlPOHxhD6ckc z#^swops{-*e;Od$hXxHTF3{Dty|;%I$z{(y>@R2j{^yit8QbKSLt=I>%1*xQ(iLc| z-z51^YNIPD$MIyhVU?QF3ad#j?9Y0k?m_;HOY)~mqJ6AU@SKQNBV#Ppndk$`e*kGH JVd?fg007rzofQJN!>8EnixuW%?Y;%IY|WnTBP~nT6F;0m-zIr}FQ!&)S!B4^Z++ z_5Sa5KbPyQwbxpEt+l7K_dbkk$1se7Q7Yt&!VW(&c4I~-^c{?OZkSL4w#U$s7ew)c zt2QWK@5|%WKM#C~#|yUyv$TA_=b5ikEuFKFYt6v$IMis0sVQ){HSM+234qDEgTHJG{ z%kzm`{o*pSu|AHg+ts_;^`}Z}{mk>sTRUG^z}@MVveU8hHhU}1boBH$yKpFSTrR^A zIq^a2HN{#pesCed2~4Hlpx45)LzbaPV+zfrMPqgPT>3m(TWq9uQVn`b0T7Bf%?wFB zPA0>&P+C_94DxySzlMf}`%;H;iZo{GkQ|upay1rBs6{iSNNdB%GhvGo1FoSuy+v!% zYl=e6+M+y@HV-qq06YL_`)GNya_A`pcY|uEU-mf$V`-5F+DFFsmu2`Lq(g?mADk1) zmm>y#gl6lQmgsh%FMXEE*?6Njn2I$;qLffgSqm0rB2Vo^&kE6V5!7P$=;;)@(4iR@ zWY$Dk^F(Mn=AnObUfzqX`5`b=e&3VZp{}}hE@V1;d`FM3^S12d4s<aQ=`A|x>(<|7Gm(rG;MkQ8#C}(TV&wJ;EF_YL+ z4^rPZ=NEEY)KklD?b>U#PxYak{hsmm9rVg_Zps8t%jRYNw0>&est@ICP3|XO-T0Tc zWEd!Nb;Xb_Ii$r-BI1Ze=#X>+xi)IOHZt=-@L1LWCc9D3HAbV!pvj@)os1@Jt}e%- zGw9VbwI&0wJL=UrhT*;fdL6E&m;|Aorq$&aSg3%#MOUoT=hKk1(=|C3YE7@s(`qcGCUH#K z>Bb_Bj#lWPS6fVtWCxH}3%x;LI^&p#U(bJ9AO*3zmbJO#FmXvCnDJnHCh8CC7wgN_up-f$@u`rDjssff?I_dC}Od1~qhOaSP!W{(5$UN~gUQL*441pNB zVwi}b48sQ)YOzcd$acnb4TkCXxjhDV{A|RyJH{VkIE~>5hTE9O*R>ks9vFXxaRkQS zg6$9tsTi^tVfL8|^&i0SK9&vUJ1d5D7`9_r3V1W$#uzvZJu&bde4pSW^99z!I{?)wS@H_RF2xC^0eL_5<9&C(>N(DjhE(y>eF&c({k$XV}JuB@-z>Dl014#;s}w{ z=K_Aw_-H*;pUTs?X#FlAPur=7#6IQIdb1Au5VyT?JB%i%np!-PLf%-+` zk+uhIPg>qzQXh?%`X4Nj3zXpYAW!os^Oo!V?7DOWJZ`P zM`B1}gn0^&ZDs^R<^*OmJWt}yK{+oMhv7-kr#Opk0)Rl zieVTd%r6f&wv>CIO9Pt6?}~n`IrsQ*9d1~$II9QkMiwBWz>$H2*kQ}qGv~uzC$y*jv~I;2dX12 zcaZod266pjpQZJPV{XnDF^F>}<~uS?^Fev`jBV`H7bj6&GzQvk{5tA^!3XsC-HRUv zUkv#Dg6uszf*${xtps_Vc7`%u)=d;gCk*^r%`c{1L60wYVH!7Gz98cWGVV|&gE}36 z?gsuiK)rOQ?+=a=(M<0;!1>b1S+(6F-eDo3d=mI(D z1h)L3oR{;4`guRR0J?!qImG1-_Ps>$cM!>ShPYio)){^^LwS>yTk$#Xq%Dul-fXr1Lup`O zT+bM5Jy#Zn{2Yc>wR!0Oj~cznaf5=IoNlvScA75PPJ6-L)yh}rYf8;#okq_zZe#5l zx3g{JuiVCrzO-0t(&ao7`Fr$x8oaf!MDROQ*K7mY7ip$wi|E;d136;0o>Fwk)8tqT zCUudvjF)K~taO3*XM{yh4y~tTY+nNCivqtu@{7GQ=O!KF*N~Y<_tz4obm*~kQy7Ff9_1|VKO`s~5|qt=*bmyLq|mO7p}9Kf6@q_kX!$%oA?z{X zwdXhSPFM2#kHrre2aTH%<|w{Bobc-0Kxa+3vHFYpPo!vDK_8x5$2X(HEA+nazYR-P zeg}H3pvx6sKLFQJazWDc%0!k%4VViw#v<(uxPPGrBo|P2QZ zve{s=Xmc6cn`Nm-!b>3ENB6Tg8yY5TJoSLX^H2Vax3dUdZ}%F(b&5d*pID#D;{xY9 zYli8LxT@lFtN8h9N&M7zxHs0UIXh|bF9dH}xQO6#X9t20X~qzFLv{+t{Y3O4Dk};7 zT&Fh(E?N~t@SE4lhW_$C z^iS6aoqO^D^`Fqo+W%Xyv&x^~Sfu0eIpxU)qBp#(YS_plu70%vK`iF~Z|#W)g3pdg z=Qg;ycI|YeoX4KGea;fvH~VcO|H+5qaP4b2asZyAn~@{{-068gqAV!x}O(>%ietn&>U``$|n32y)0K_G=ErZ zX#1FzcVk(QXF;9?c@E?`v3%N>S{D3a!5hVLd0z zho?Yqi%`$1o~$q*T8Mq`ffG5HZ$a-DsBaMq^DPVW?T3qxxP0av>BP@p`MGD861+5M z7r_?;RRmYp?&EPmNSvdvemGdyM@HB~`-|3y^>j5CeYf%?;Ve3Pb$}v z{%_fDZT)|gd~3^ptDe@D{}#Ws?eMVvKWbiD`n6Ra+KO(gxLZ43+d2=AD*vc)+wNm* z=170WdVop}Q|XzXAM>Z4N!skU=-a<6jA{8~*ZQ5A*{IF2?nK7Xhq3g5epcs@rG|Zn zHFkO9_t?U|m*x#Y_K+iB7r^d-y8`YG*dK5Yz=42+0Mm~w`T!0A916HU;4r}9fFl4$ z0v-T(AmBlO2Lm1gI0|qy;26L|0S^Pr0;T~&9_aVPDE2i){ivXgDCLQ{AGx6STy$P=a76})rq#mgww{rRMS?Rp6%-gl@Gp8IAxO6*-8J=HS_p-ZG zeJXaK*!&d3!Q9a(`X4nD`i=KM=-SuD*&{EZME|PUn#e??%CEY1cHjS?_@436OFr~Q z!;_NJ*A1yg?CNKdo;{X}RHNFD*u6CeB}J|rHE#A66xBY;<>))*zw4*wN3Ic^3|(PU-s^T0_J8`hn=WJae?fKX*&<11P|X=x2ET#_;U_7&UEgJ z!pgJoEEX)T7^v*sEK{wWl>_2U<>9f(7NucUUX zMqMORkn-2hON{CQjU`MSCZWS6bi^Nc!qcW6H~x8rPM@otjy;5nB%4APwYtgS4ZUz` z>DG>hYL{zi{2sa^Cd6M!H&vt2wKHn?Tv)Uw>-W}thUfxdm@-wH)A-S|@KrU_xLr`t z;ZM7N3(c=+ecGCQe=KL{UmWBl7N&ofl5eF)9#}YFZP?)!GNa_b<)#6V@4C&oE%c*? z-=QD`nT$gK8=^ytI zK#Lw&3B`*0Q5Y6f0HTt@?0~*(D{@<(Ix}ztZ%c{oK z2O?w4EM4iGy{P!@qzShxpF)`_7Y8p5Kcy?A*5c4^yEO)xcR}sxP5&Zyz5zd-yt(nqGP| ze{2$BD^5LID04v*<3GLJg&U9ZwnZMj_*yzrkB>Q_UVH>4SY~csWFCw%PejJA{X7zl zNuHl}O>Www*H*uap%T*Pvds&fO z<+Ngb`R0#*PkAV3Tl{cGEZ&ib^Iur$ax~UYf?No<$%Rw%`J{JUWtyT=t4ogb??r)j k=Kq*X@`aaZ9cz+2JKm~^3Kr?ieC?L_Hfs2ozL6G+*NY zBa99+I5QxoCjX!L%+%5hZ<$&?&}$i%m1YO^;uT+s*+nZ&g_~l&&3q-?efC*<=FDM8 z{iM3zcbm`UI&1B<)?RDvz1KeboWrOV48tfG2ZfwbDDfqu9FZR1H4^i@G2sB%5raD~ zhz3R0yj$|wG9JHP-fJh1=S;nDjkcHjPye~JN;w#L7L+BA4{nXx>{uCid|N+@e&A%& z>eFXumV3VU^mZrn)bgm-HRrFc`@+&5c5RY-QNpD1;9Iv|SoeG``{2{b{a9thO#v8rCn5Wi>QbILlO<0tPJXMOW6{2e$^rC$7bc$VQ zX~zYbB~g|<5n9PS@=wmo`?Hs?hlI-?wBfd@YHpkeD`F3B?Ra+LrjNMYe)}A|pbxpt zog=c!8ayoRmk)Kxi5jQjPS;)C@M`pNZuk27IiW@S+4sLsS#TUpvFJlPd--JXXK#%d z$5z!Re__g*%xzLlD!ftlsl`6khj#Wn6VR^D+QnSv=r-mJuLaTmseOw+w6mA95B>1d zg#gJiP-JWKAYF1ui&7%uj78{@^aiog`SBU5e8%C#^YOd1XK z*K@K)lTQ>>6AeaHVH#D^npFAI%((_VFkB4<=KKP)%A|dfmKF!|w0ujM$u$_YFIuEq z5RX(kbq?*+KRInsp{hWy#ogqQAXHN|+MHZ76>v0b^R)UL8j_|+on@xh^s0#(wYk73 zj!9FLuTyJjhfaEx*=QwOg1kzY4Fb~*$3*;^sx?u+-SjG&d5fI4p7(P~fyPc#fk~6C zvO5SaLNyG}E1+Wqj4VT;Do<@@tOZp8TQ4m-e341vgTQbW!}oZCU>R8(e2sS#<`RYw z41O5KVkpG03&RyG(-&l0Vfr$LB7E(N!3SUGWBe4xUt&0pp#sCtn8)|E9OI4{|A28% zjQ4>2qRyn!EM3>=0|82AakQ}B`b2J2xR z*+2}T7+QkP9Sq$u{EFeA;0qsvELd2+vRI7EF~%DZa|Xl97$SuHFbS9+g>ekVUP5kV z-jIh>u*sJJAFw+dw|NRVlp)B;6#zc|EDXyqRAcxG1Fk;Irw~si6mH({`S>~za-%~% zCXyctDB*uAksRm{po4)91e)Tmhxp%N6b5uBpt~{Thk^jN0{v%tl3yb^O8Y~5XW&!X z2lOew*EWG4DDjWVQF~OL_S;cn=V`z+PD)GTrFo(Hw4Ktlo%-7WaIi$4<{?Cq#{fwj zVUqql!7myg?T6}9c^Vh(-xK8NICYfRr+nJ4FVNJVE`WRa+uEmCDio6 z{i5+m$AgY1ZSOAWkH$;=?;?>4kziMlr+Jk6FU3^PQIb#EPV*<7Phg+1)<=G=Ct(=R z2(>a1!w^QOQ+RDN&oiVZFzIkTjyDJGyj&cH382Ti4+a^YHG4OE?0=;DLwXev>47kn!ySzLL@~II2aPTDs><@>2qeZ%-L3bSVlM4M( zAJRqR#_w_bzQ*qvE&!D0Vuw$!okTod#u0dw?ncUp&LVv$JG#yyzSsw< zBW-t*_$CH%|6-q|{fJ|>=ZhG`ITQ1pna25`JV(YhcIu0Zs4p4=9XEa-ZH*xi^!U?D zdkpO`;QI@5_H+k5{x;hI!%O=KRwYOa%(UEr+ z@#t!9-9*+O{I6u4JZmB5*s9uu+7nafvOsgSQ{u)dyS~F#51;*7(urr; zJC~W1Bo)hES!(#u*ruKn_@UgsZnwHJ&i?F;5ar&9;}%=+iGZX{9cFCct`~%a#C7Ut zv3F)}*zE`ebKA+1ee3A(eCGzn`Us%;ve@YSuAnB1Y|fu9MUwNWKO6=vd{vIRz+}>@ z^^EmgsI;CBo5^3fjTwDGp2n!ndLr_7nMic`);EUVp*y<+=%`a?YIO84??j&2T8{lK zC#ti|2BS)+Ddc6WHHYq)L5#4A&{^of*zT%y=jC@Ce)n_(J+gur>yFY2W-VXUTSFb- zcQJmKp}PjZoA8yN-(C0}sy+1Y4VbsfF!rA@${G8QIUShBk3!`V9l1nLF42|S(U(i) z3n7w>--Ue{1HL%h~Quc#D35@C50Bv4EFk<&*Xz*!e9yJ7=lQ^Omb`Ij{0>5vLJgR5)%iNjGN#A0a;9xhBY3rP9l_TWeF#3hI+@40ZnsJX zXe+#82W8jr^=iSONuP10B_-A4=HDTB%iMVc&vtVnc&~Z{kvC)v0l8m@UeBXP3H_!= zDZ#pBp#*QPD@NQ&(To12 zj?mdh>Z$*Pe$Dm1U}sqn!7)h7;}Z@?8i?M&!kPhz6<+PH1c$Pi|N51~JqfNJF^pT| z<>lv5F`LJ2eh#cAbi0i0ME=n7NFsMU_yy8_E1;Cn+xCYM`rODtMBZ&lFp-}>Z47BY z+TD}1-*vxA+GRHcJKv1mLukkMB8mKFrzFBZz2-Y2fAO8p1m7$+5V_Fr+mZGIXb3MK zzn#6F=sE0qnc62bb9OP2D_`&xk-OJ^1))!M%OrID z(Gr>;Lfp&xz$zPH9;1hXsE)@P`F|IO>nsK8}}#csYodgLqjQugE^1PY&|Q zLOxl@Cr9%s_J=vUloQ757z^XY3FD<^>3H$-`IFOFm=ByVAKGv*A6S?VtS}#DQ~Mmu zH&&Q$p{y|9^sF%7+H=Bu8%*tUFrQhN&n(Pm4(2l}%;zW~zinxC26OE@d6 zmv~lKFCjMm;PuGCdSqcevalXm>w5Hb9iJM-3hTXr71ldhA9%e_W`*_sfU~Z5PuD%a zDp;r|tWZy;vqC-D&p|z5g?jS9#y+lBEYvFw>JZFxIA=Rgb@(%MkZ6xsO?PwAhd{sid-#39ZO<__H8O~?JU}H6 zQ|U9m_ROF9Ow#7OMeqJ)5lquhb}b)nnesJRmXpXRdNGn-(C^fo(o}Hnu*5F6e&8&e zduiSfWDhw5_5|z$*bneifP(;c1RMf56fpg2;aR|8fWrZI2OI&o2jHH7djWn9aBskU z07n9j0^ApHG~j-K`vV>Tm<3D&hCI*@bWzLz&MmQQCmN!8wT*ksOf)RD_rdRWoI~-o z4W1($ccHj%=N!viAA+9Ou3);YyN8l1D~CoEY0<#E8Mo1bK?ns@^@vj5LBq~OPdxCw z5)FCw{+tQ@WGLyr>j>4kn<&Ni<6GY6FQTDI(_fu`{cRM#WX9K-7kiLv`Ijt0$-!4f7rWg-2^;-NdZdm)NsAndR)=m! z$zS}qZ)9mS8vM!)zgqphMtMl+KW}jA%X?AGsiNJJQd*&yx1JtSwqquWe{lKk_>cfJ z@Te+SeRwDucweQ->x6OZ+SG)b*(mYIlw9>E*(g4`@>^$j144CS<^Hp$pqNV^&M4Yf zitYFwIJ4OWMW=sp=#y!XqnKBoJuvpv1{Bx1rqZ`(Eb6y)|Lo8A$Wf#!_dt2cNz`x2 z`|cH$D;wn@vGXU&Bj#Kb(ruk{XqOczE_Utl)wdGRpsioOIji%>XwaUI|2C)HODJOE zxF7Q-UqJEWZWtHtia<&25_S~lo0E$sOO%brBPm^QDpSt+Vx?js8i(Wi@i&j%c}m=J$<+*pl9*CJod=fbQpT0UUrGekE4L*^t+mh~%Q;WK5%dR$P@ z#qt=q;5G47Cq`U0*U^XJ_>dBgQzFLD2RiQCjmT=Iz>nsIyi{pY7|MHyed?$IqwjV5G&yz00t z3F)T~y%Or|jwUrM=(gvxV<>fdK(5E;uaKt0_?)`ZUZ~jPSO4R)XVRc)m<>4v3@^aJ-Ku!O33nk_oaZPD80gIRgiop zN}l%%>vXk0il0_^^oP#hpmgQYj=mkMQAWDrX7V>3P>%~=j#+iH9>wRTcanu|Mcq$z z__W03B1-F0rh4l8JTyUf)@*KBgVfvif8cz+0Chb$=eFmW95mo^g*L3g8>L*`(<$*1 zgOYZYUCd12ki34u%D2MXqmVsod}gohg9Z$$A70fy9gWOcwB=^Uee6%V-HSRN?f-Z; zE0R|^kFmd;`{VCZ9?97zKimT329k$(jBE mCeUvDUuQ|a@DlA~jgqJ2ts1GIPHQ4JDE|Su8h^i_I{*O7+z(0s diff --git a/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds b/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds index 5e95e0b146488c65a059c7179291775917612e3d..d941cac1595573b26a0cbd6ff8750a625470a12b 100644 GIT binary patch literal 4865 zcmV+c6aMTUiwFP!000001MNEtSX9;4X9ke>L%|22LWn40Buc(8&H@1m0Z|lu9ly(UJ*E?&ApympMdtA->*7x&>sTI3KqOHYL8XFOU39# zv8Ogx_?&bNzC6NIF{p0!2j%fq*7n|tG~cAU2^BpD#NC>HF2J(u)v9r8-sD(2fAQ(r zH6IlAwwyl_WxBcJElbr1r+^=W9%Jp6+bM2tl{c1Ku4V;IoG{}Z{_>f``2L|^lTehT zT|pvp7J}5Pi!>&3;YEWJnf7{vUW3<;=?0zJ7(R+MWoq@g?0U4OsF>MFG3d<&NZ7@x zr^@!%wqBn6QUpx zi0+B)OMt^4rTME|bF>TU%dh2fQh(a$4aOq1PLdR+sc6QcLgJ}|6`myf0K} zzN)U7%vU7l)$Bgs=N3#>gnl?J@mNK$cl?EZOQ%&N>t-z2a%-`*Jv23a?}N&76%lUs zE`{5sTduW9-<0PVz}Iv=I&|)~URHf}+RE0bgk!7f)@gy~Y^h&jkaoQMcp zl-?-UN~70GX6i(b74>M+k9n>xE;bs}IZWKM*r>_X=9skxy=t1qXh61yUX^1g%GT=D zLW_xYly5YYuwI?@DwDdXSf?>HFtXL=oB}Ju-lWkmf4vGdnqsP;$}<>MrRhvbYf=?Y zH5VB4$Z$23n2SryDwB2^D=zltsm0bpQ(!P^r&*<4s7ETDI-hmwo01-1sw&ZINjF6_ z2vwOzn_pmN0&UFNBCS53g`}CH&M`A?1kr|A#?FfCD zz!Y-rO2CI)7Zco>;GG2iLtrm~TEY|hT262f!QT1%ECjj}5GMH!(MRqG(IYyFp#(w*xTDT30(}VFCGe@}ix7jN zo4AA(u>@BTOtvBJB!SrkBE zh*_MBmdDHT!t_}?qggxi_W{J+Wb!Nz!LmI1%i`!Q>(2}QV)3zlm_Czdak2irP@aub zkjy^gvwk}u&HM>N9O-LopJBONpe&y~nxT8i#)0|8;*pOB8&B5WN7f&Um-!zilM9w% zSCnUYl>0BoOs|bBpRAqbPd=Z}KG#qmg|(hUfZQ3-TKOn}WKOJ8WNmYa9IXl5XuK8< z4}k50TpWQc)D!C5Sde!`H>id5NABUsBQjNbEOCqB|aSAH#k|VZY3W(UNf!_Bdf*6ZQ;e1jci2g3qoU zB|Jf<4e}Vjjiim)agcyi*FkdaB+++lLf1*cm-@hTE)pNTFbAG!D*)RCIe+Y5@WU6u4{a(j zE+4e-D~aD-BG(S%_Ci@V^wCFb6Al*2dd~KF#4?d@#||7Vcm!cz&o%v|;r`D-HfNyL zIab-+O$%945jZQ}8M8-JP2XurbcE_FROb{4kBz*OgvVBM`)0C!@4qGMHYQn=j#F$7 zW6o2ohqZ?Dj{{=6R&z&bV@6+6q%mr9?u+~*62^kJ4y^?K=<%^*M{S)tTccwKS4aAU z(t5;o&m#wKgHff^lnOEpYm4m+ft*+^l{k9#obB$xb{L_u3cG|G>IrMmlWVA)LNycY z`iSd@&Uc|&3)PfWN1zP8dE1W&GO*Myv16LRu?<-SUJayhxKos4JC zHJ&?7@tcxw%1%@E<@3@VaZ_`a&1a5lm_Jfb{!Xy_onYStNc~`QN)9fq3{CaHo<9f1 zgwL5NB!sUN_sieJ@3y7C|Figa#=+v|#2O`xhcnsB>l?lB_?!O!MYD0n`TyiqVG20D z#$Gi2w_({`gCpk4Lz^po?#;3&dothnS-m2a888*7i*=f*_~4%zkiGeEpf4+$xvxI? z2i#Zxh4D4Kyfhh%W=$?<`$|*pk$3|s%+U(|r~3NwYY*31NbSm>x;35R)edh^eAR9M z#RpfX2)MxQ`pRhS-nOyvxu=C%w=n+k_buyJt~~bGf?E`CoHw81Ic|;=f28=@Dsr+Zl2U5BJbQ?$8uluj3^p*p?DgAwB zJe7BQwi}gSFm(cLukPbT+i!dPMB5cLqMaiXcT>8}>jSC$+m1<;|LvL+RQ|%tJt+Qp zrh&?ZeBG6{e*(#Ze4yvMKD7Prl}9Om&y4mI?^snu`CrXhNA>JKn$7G}nme_G%2g~p zMCI-Ttf2Ha4`oxju6iZQ52b4i`)GUB-BpynxRKiL6yQ9Q@;`5P(Z;@ic_GEFXG3Xw zrQT1>^Yx#p{$&?O8+mu%S1JFVIZGUe?@o*S*vphYef$X<`L=F9hczw>}cv-~pQuA!Q1o`5^bROq}MVt>E zEjS-|oDaM>ALcOo7MyRqINw5ealYw!alQpu#Q8Ry*|*?)=5aprIG-&zpLuaU52Es0 z<~?M=^}^$Nv54y>oEO(i0xzzYU>kqPdbHqrme!i}hqWFV>R-7OW?{SWoWR*eCUh$9iSKdc|YC;#s{S z@-DaE((__HykimT;bGLe}EaZcj?;b`3-Ir01*DA^-cv)x?s!OXAze*3!&mwsmOU>F;QrF*nKtJwcK5G{;F^CbZ2iE=RIJIdow#!yI$ss8w#!ak6=)^B!UZ4SgQ`Ef=(mj=U@e*gHBqn?G7o^h3RyRupe(va{fp5czl#h41Z`cos z+m_5-cs26v5acZ|hb!a;zf4wom?;p-E z?49Qm|9SlcBx!QS?@s6msa$~9w(GyZ@VPaee$d}(l!wH96(_d37eZ{x1Lk9$4G{CC z|M8CdcQxc;(U(z%&(=cRo$+s%oL9lnr)%S$^$vsN6;Bx*-Y^cJdfb|_ z2hKw550BrwI(`DgI^_2`d2$g9DZ4f~bnJ3~y3njM-YXz+bGLqPdh~_ZSwDIAT)G4j zlK(Zc`+G+rZi?oEX{%BIGCn!G=~_9&zF6_yx$HtnPIhm*?3rp9oE1}(xn(^W&*vkn zkL`!pmyF%leRCQHPtryO@3{-nYo4xsZNUYIS)*3pTKojWe=;Mo_OtgOdfwjdUk>wy zffM&vxrY4+L!MphvA1$nqdX*SS+i!QRwQtXQ&bSQm z^EMc-My!HJpVW|J)hbAMOL-%G6B&o4>MtrQ-5}}Cz^YkwGoioh@PVIsPKEBTxxC|< zm;%AOhCGv6^(w?#`X|NqvOwI)Ma5v5bRYD+yk}L^iZL*t@6Dif3131;w$qVg z`yPRzlRkRI&-)$>nBsGBrdkO@!dx!@y!k5hKMeCc_P9X&w&UM~#aw~N&@Zk|&p8VN zPp0@?f)270WdG~WONvzmYIB4tLPqzM(f$6!Cp@j{al@DWT79l&3h@wMLfJHWsZ@;~ z-msTd&Astsp*rN68$QJKAcXYu+{S7wI+tR#kPEZMX#LPv$Pn8A4B3xsavHw26+h?Y z8jcHgZ27b4-^}pawLERsEyoVG93{s-b=ga0c6R}d8 zYt!kU_qjo{9$4dKFYmf?f8EWKH(vk!0{HJ2z|Fk?rk|m~to=#8=|66Ru|cuB(mvk^ zSqtr>U$*xESe*TA$(c5^YjN1Yu6-!;+dmRIx(l7R%J$eQrYceRB5*LDt9J6C+&7&FI|g z&u{lR3d71i?NK&$225HV$W~J%?E~;dmbp= zI0i^zq2i~(J{p_{HHM!`d6|2{qV+szOi{zj+Cs%wz+?jirs zf(jVnmhAYcX+LnEC>CEz$b}xiD7~J%GzijOUvhlTsg*Es%v{6!=Zb*q?RPxGcU8-7 zR%EYoTCl%-|L5PQ{4Qsk{pd$3?m@+ckFvBmYU}4jUX{x literal 4868 zcmV+f6Z`BRiwFP!000001MM3LSX5Q_&BDHjf(sIhfV+|lsG#|d2m-Py?y2Jl1B?zc zI5QxcEvEfN_M7IGR+gIPQfa@Yvbz zn|7_3@wlh9H2TpGZ#7>((%#;+bNTJj&wW|iK0T#ib=G|Tk5(|N#F;{IYwU*k<(jK4v{>|7BaVHB4 zPT@-vbFS{G>~<5TC_3((o_Mk}+&BLFo-a%&zx(dmA-bru>O|qgH6K zu#R%grb5=Mn^9xY<`)=rmKw%Htu-sp&QM!)I_9s>WSy>nDrj;{W=&BhQ_@>B1=Fl~ zCL=ODOoi5hLaWB2pU#Sl+B&VkUTE@6X8m-#v^(`kW64owdaW;rd zPZAULYpUMD{Pr|zSmy0=zDB{%DTTT^nhGttY)zem;KAx)M1CPluAP--D$?X@tz1o6 zRpJKB3Xf1|(u6<=loL2ZCJ8A-ik9S>>?zzO0^tN&5g?l$S43bJfhr;sgR;#CeVITp zx%MF7N3IJ9et_WJ1dbCZBXEuIguWIL97^yxf_oDD4chKPAeBHSR-nhR{|^Z4B(j}_ z2}|G=0vid;K`d<6{se3U+7b{Z`3})X?gY^zI*Ne=+7a+Votp%@5x7I(W6>8O21S^- zgcY#_ml902A?_;zvj}t-^TQ<&el)=Y2yQCoR^f|z7>YK9Qs9SnhmbZOF^3AkoKztQ z@n;cONZ=%aPY47M*oN^Oz~U|VT|%w{F*gS2kwbqFuvGXh10~YoNQWUEh&01p4+_6S z&;{wXNI%HYAA}%ohWZ_Q(qAn(M*CxXd*n0P5A_*8a42EU z$!K}JEH6x-wKJNvGk@D44wK2VJcP^g7$A$Ii>yB%^ozyE`eFJ^p2fxb_d$6!PN6dU zjL-UQfi&}{GvY{pNBa!Rt&N8`h8TLSV zmPfh&a?JFcW%*?7EPwL(g!Z|b`Y5dR!34;i0j-ru1X4J$PLZ|EC33VTaKrF=oL~;y z1-Uo^6Hrg6bE8mRkR^4AOGkbh>Ir?tqMm@q5O|0{e@?6~Lr|Yx4?^EaJ>vQj$dLFp zn!v-TpNujBPdFf@qAy8EkH8oNz2Rs_xYnYNX%b)JP<{x;AjBDuda3AZjKs$gcs&B` z3i3S455@S0pf71?e+c&5SE4%xb;n^pL$P1x!!XIX345HduL*mG8v^6G)xl@iE)t#~ zN-iTT_yT1b?CZE_);I3j=bGP=9?6x{Y!n8_alwDE?=Y|&6$+% z%GJ&X<2iGVu`^%XBz>_M*tiM%XmbLAs3)9Yf(Zl>Aomya1nP}?!mYL%iM9Lff65mFb7T0Rxq{;ask-C;DMK;|R0)rbysL!AR&(=uvVQmfCF>3*S(Glt4u>(f zV*6pO=KSM?Sk-9mC>_ig3-fhmeb#-Ee?-Dq@b;mVz#lz6Dt6R1Xea6n?BMD`pHSM5 zxL!Hr;B7K%47wsgren)xFIv4kH6{vEt-up&i}`M5T=03O7^1Z z{|(FT8eA}6-ui6mb8nVK*^~L&&*~Lv%z!0NTVT*l!w3J&fb7kO6Mb1x&wce*f53h9 zUl?D_%S(&NY}I9Rj;}Q39*H-Q!W=E-uT)o$dHv`;8>wBn)3#<({F>8hihoe`rufin zsRGXPyuP%*zN~3%e0GIU>z<9D^qy_q(xoTImE5HGt*4)%c($ht#UE;iQ+d;f6qNg! z>h(Nwgwl(ftfSblupPx4DvPLmdFooixMY|m2-`9?&ctzz5 z(e9#sRIl&Pm6Xmte2@80>A4=ih;|l+P&@$i0{&cmxSHw>EUM_ARMs@ODy$t(_%Bo) z>Phj*;Tg6SO`EoAQZ`$_Ew2Thq;$}TtyKQtqCQmac-Uy#em!6vr8n>ILh17(_|f(|RY{cp@5h=`ykps6%0D%G4b@Zcn#Jr>nk#>v%9TF*36;AY zyoAzcADl?(dq-?dH<$KhOs$dA2D>54CAILJ5k{94Ro?rN%c_SBP9Zv7O0 ziYxc#_8$=8(Qp2(0X)j{D9@w34drc8`Lwe-9{u6b9~=6^qdzw0kJLVim&bT*7_SZE zWe_i}mv9Gv$a=KldgO6E^0*%Pn)T@8F@9(?FRu4$UR>{VeUSA& znHSglU0cn1_wm?sN5x}3;l+CL2rt%?{Wh#8yjV}}I@l-mipP3o!+OPIz2aHDBJ%FH z)*5-S9^STz_3$X_trz=Q*p?UTp_SSX=rz`c^%nJRVt>!@SZ{f(x4TQqe2xV~x(W3w zH~Zuqis!WZ7saQ;V=2B=wL`#pUE*BD{lmq+KXT&vJ4CWayvBBO$pHn7fhW7tw$v3q9xB6*l`)~0ZIu5_r|NG5LeZR)aLu1j66?a3|Yh&l( ze&z2sZpU+sLyZjK>H5c?r+h4=x)A&5f}ha+x>n7!HWgt!ah2*lkG zcSqam!V-WX6+z;_Xi2Ebv5wn0X59~u$;9KdI#)rNODgOxm z!g>EokTLYQ;@&wP60UzRboi)!OYQ|0Lh6G_#h!P@LTt~^^N+cl zfQLrLt@Ci61sUaDFa0aOC8YZJXLyV{1IgRNURbf}IHXw=r@miu6VeA=fjeDJK;qD( z;_v6qhqNJ|pP%vY(~wx4wY4y5I}A#@<*~MGF(lg()*sDvhU72CcUotdK>E43a%;>u zNS`-o+N3rgK*o@hCkI1+NI4SlQ>*onkn*j6^N%wUA+_o0Ynx93q+A%_m!aqY2`Mim zy?uED42JXEf}u)Cb9U_IzU5 z_)SFd5QzQ!jX{gHeFL#6uBng241s>$EhbJn;|1{Pmw*pTAUSbnuk~M~L(HqsuX^#9;Q*m8 zhd=dX1;k$XFx79>IEd-%<<_ImYtVnQYI{^bGz{47?!K${K8WA4^@|6&B6w)*JJCxf zEr32tx26S_ctF2-uX~prSXL_!gS$L@Ebq!J2;X|(ZrPp%5I4pYylPDp3|bsNXXYMTqLX$N%+! zqY$}4o#0dQ48%R}`va`p4GGPZzNtx_A;oji#frn@pl4L#vP~rw(EGhj2|szRf%ZF6 zoqKn>00YAdM|D^}1$w{ua>dR)e$X$=*Zs!4u@JTMP_%B&lMt`lSbQ|JBSa=lZ5g!v z73lL-YU@kTLRNy*zy7?WK$EAncGq;5(LH2z&p+`APosKV^JTx@n5`=&9^y+VhejWb zrq;t7_OhzJH-0Qsr)+D@hq&H^kba(9Ta88MUZ53nVbz)KANmRzVjF;I;v`*G&DXZ# z=e%6aaY4nFKb!vb48N-3X|r!RD%^6E9Q)K=H}}i!x#tV)bi%%QD{Y;Q*OM8w@DVW! zh<;FP$*yM}RsR9==-1t_zbIud5#=xK#4L~AscLuVa&wsYRsKiOJ+)9UwME{?yEoSp zu~MJy(CMG|xk0@i*yCg`?}E6$?&irCum64l{Pzps`d$Fj&(L7zwH2*b|1=6lre8Q1 z()}DvD2tnW*>f*U`KZG*@5y69zi>@*bY_1TamK8!n41APxd%L4{ho!{Cth4Q{P1j; zS^1{(Lib&ezVmcQtB##u^!Sn;t-t#jM$GqWof5jcn8Wt~ z&aUbaw=ot5UjNa1x2hb5?_cq(r%MOOXqvdmGBXI8tKKMov^*Yqy;MGSxw!+34&xJ- zUuq5yk9u@_(fG*_xxDzH6SGIz;~stS-MF-i??c*KsV7@|zX@r{>AH%sHW(N0*K6qq zO&}xY>*3k&`NN<$9^bcRgAp_>Vx}j!llYt6ol^4Y9vIhh-+oQTd`O5&{Ac=XCrEvI z@pZ+;@sMVyG)=s69ulf1^{Sjz1;dX1vN&Z#4onCu?PA$l0+FL;?hJ_ug(2DvA>Hqd zgV@8b-bjm`0K@Z#wfsbT4z%<9uAg0A0XeHPrz{#<1>N_}PYWJg36LCkWAQ%#5~i(d z84+{}Qp%5B2@RYI%BQxJoo_i6I!^8pGiT@oNFQ*z{X11-U`)}&|ByY%6|ZGsxUOlIsgEP1KWN8 diff --git a/tests/testthat/_snaps/plot/msev-bar-50-ci.svg b/tests/testthat/_snaps/plot/msev-bar-50-ci.svg index ff95215fc..7d5630803 100644 --- a/tests/testthat/_snaps/plot/msev-bar-50-ci.svg +++ b/tests/testthat/_snaps/plot/msev-bar-50-ci.svg @@ -43,21 +43,21 @@ - + - + - - - + + + - - - + + + 0 diff --git a/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg b/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg index 17d6e9ec2..c6d232890 100644 --- a/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg +++ b/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg @@ -28,9 +28,9 @@ - + - + diff --git a/tests/testthat/_snaps/plot/msev-bar-without-ci.svg b/tests/testthat/_snaps/plot/msev-bar-without-ci.svg index 5b31384c1..1be15b58a 100644 --- a/tests/testthat/_snaps/plot/msev-bar-without-ci.svg +++ b/tests/testthat/_snaps/plot/msev-bar-without-ci.svg @@ -41,9 +41,9 @@ - + - + 0 diff --git a/tests/testthat/_snaps/plot/msev-bar.svg b/tests/testthat/_snaps/plot/msev-bar.svg index f7b6b2e15..57d503c70 100644 --- a/tests/testthat/_snaps/plot/msev-bar.svg +++ b/tests/testthat/_snaps/plot/msev-bar.svg @@ -28,21 +28,21 @@ - + - + - - - + + + - - - + + + diff --git a/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg b/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg index 7d95ab35d..9a7ee4506 100644 --- a/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg +++ b/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg @@ -57,36 +57,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -132,16 +132,16 @@ - - - - - - - - - - + + + + + + + + + + diff --git a/tests/testthat/_snaps/plot/msev-combination-bar.svg b/tests/testthat/_snaps/plot/msev-combination-bar.svg index 9d2de04ab..42a758234 100644 --- a/tests/testthat/_snaps/plot/msev-combination-bar.svg +++ b/tests/testthat/_snaps/plot/msev-combination-bar.svg @@ -57,36 +57,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -132,16 +132,16 @@ - - - - - - - - - - + + + + + + + + + + @@ -237,96 +237,96 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -462,36 +462,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/plot/msev-combination-line-point.svg b/tests/testthat/_snaps/plot/msev-combination-line-point.svg index 1b229513f..65b02981c 100644 --- a/tests/testthat/_snaps/plot/msev-combination-line-point.svg +++ b/tests/testthat/_snaps/plot/msev-combination-line-point.svg @@ -57,36 +57,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -132,25 +132,25 @@ - - - - - - - - - - + + + + + + + + + + - + - + diff --git a/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg b/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg index 4bb61aa68..c3be8d1e8 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg @@ -27,27 +27,27 @@ - - - - - + + + + + - - - - - - + + + + + + 0 -100 -200 +100 +200 - - + + diff --git a/tests/testthat/_snaps/plot/msev-explicand-bar.svg b/tests/testthat/_snaps/plot/msev-explicand-bar.svg index 3c6d7b21f..02cdd7d64 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-bar.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-bar.svg @@ -27,27 +27,27 @@ - - - - - + + + + + - - - - - - + + + + + + 0 -100 -200 +100 +200 - - + + diff --git a/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg b/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg index 77659a782..87b0706fd 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg @@ -27,23 +27,23 @@ - - - + + + - - - - + + + + 0 -100 -200 +100 +200 - - + + 1 diff --git a/tests/testthat/_snaps/plot/msev-explicand-line-point.svg b/tests/testthat/_snaps/plot/msev-explicand-line-point.svg index e3afb1ef1..13aa02cfb 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-line-point.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-line-point.svg @@ -27,33 +27,33 @@ - + - - - + + + - - - - - - - - - - + + + + + + + + + + -100 -150 -200 -250 - - - - +100 +150 +200 +250 + + + +