From d637952f28662e009519091a6e67ae3bbfd64eca Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 4 Dec 2023 15:25:19 +0000 Subject: [PATCH 01/62] Comparing different versions of Gaussian method --- R/Compare_Gaussian.R | 1082 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1082 insertions(+) create mode 100644 R/Compare_Gaussian.R diff --git a/R/Compare_Gaussian.R b/R/Compare_Gaussian.R new file mode 100644 index 000000000..9fac8f939 --- /dev/null +++ b/R/Compare_Gaussian.R @@ -0,0 +1,1082 @@ +# Libraries ------------------------------------------------------------------------------------------------------- +library(data.table) +library(shapr) +library(rbenchmark) + + +# Old and new version --------------------------------------------------------------------------------------------- +prepare_data_gaussian_old <- function(internal, index_features = NULL, ...) { + x_train <- internal$data$x_train + x_explain <- internal$data$x_explain + n_explain <- internal$parameters$n_explain + gaussian.cov_mat <- internal$parameters$gaussian.cov_mat + n_samples <- internal$parameters$n_samples + gaussian.mu <- internal$parameters$gaussian.mu + n_features <- internal$parameters$n_features + + X <- internal$objects$X + + x_explain0 <- as.matrix(x_explain) + dt_l <- list() + + if (is.null(index_features)) { + features <- X$features + } else { + features <- X$features[index_features] + } + + for (i in seq_len(n_explain)) { + cat(sprintf("%d,", i)) + l <- lapply( + X = features, + FUN = shapr:::sample_gaussian, + n_samples = n_samples, + mu = gaussian.mu, + cov_mat = gaussian.cov_mat, + m = n_features, + x_explain = x_explain0[i, , drop = FALSE] + ) + + dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") + dt_l[[i]][, w := 1 / n_samples] + dt_l[[i]][, id := i] + if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] + } + + dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) + return(dt) +} + + +# In this version we improve the method by only computing the conditional covariance matrices once. +prepare_data_gaussian_new_v1 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Sample the MC samples from the conditional Gaussian distribution for one test observation. + .Call("rmvnCpp", + n_ = n_samples, + mu_ = x_Sbar_mean[, idx_now], + sigma_ = cond_cov_mat_Sbar_given_S, + ncores_ = 1, + isChol_ = FALSE, + A_ = B, + PACKAGE = "mvnfast" + ) + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- B + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# This is similar to v1, but we compute the Cholensky decomposition only once for each coalitions. +# In v1, it is computed n_explain times. +prepare_data_gaussian_new_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + # Compute the Cholensky decomposition + cond_cov_mat_Sbar_given_S_chol <- chol(cond_cov_mat_Sbar_given_S) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Sample the MC samples from the conditional Gaussian distribution for one test observation. + .Call("rmvnCpp", + n_ = n_samples, + mu_ = x_Sbar_mean[, idx_now], + sigma_ = cond_cov_mat_Sbar_given_S_chol, + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- B + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Here we improve the method speed by only sampling once per coalition +# and only add the test-observation-dependent mean in a secondary call. +prepare_data_gaussian_new_v3 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # rbenchmark::benchmark( + # t(sweep(x_S_star, 2, mu_S, FUN = "-")), + # t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples, + mu_ = rep(0, length(mu_Sbar)), + sigma_ = cond_cov_mat_Sbar_given_S, + ncores_ = 1, + isChol_ = FALSE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B <- t(B) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Same as v3, but we now use R to compute Cholensky +prepare_data_gaussian_new_v4 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = sum(Sbar_now)) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples, + mu_ = rep(0, length(mu_Sbar)), + sigma_ = chol(cond_cov_mat_Sbar_given_S), + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B <- t(B) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Here we only want to generate the data once. So we generate n_samples from N(0, I), +# and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. +prepare_data_gaussian_new_v5 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples, ncol = n_features) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples, + mu_ = rep(0, n_features), + sigma_ = diag(n_features), + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% t(sweep(x_S_star, 2, mu_S, FUN = "-")) + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Here we only want to generate the data once. So we generate n_samples*n_batches from N(0, I), +# and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. +prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + n_combinations_in_this_batch <- nrow(S) + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + B <- matrix(nrow = n_samples * n_combinations_in_this_batch, ncol = n_features) + class(B) <- "numeric" + + .Call("rmvnCpp", + n_ = n_samples * n_combinations_in_this_batch, + mu_ = rep(0, n_features), + sigma_ = diag(n_features), + ncores_ = 1, + isChol_ = TRUE, + A_ = B, + PACKAGE = "mvnfast" + ) + + # Indices of the start for the combinations + B_indices <- n_samples * (seq(0, n_combinations_in_this_batch)) + 1 + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% t(sweep(x_S_star, 2, mu_S, FUN = "-")) + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Extract the relevant samples for this combination + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[B_indices[S_ind]:(B_indices[S_ind + 1] - 1), Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +# Compare the methods --------------------------------------------------------------------------------------------- + +## Setup ----------------------------------------------------------------------------------------------------------- + +n_samples <- 1000 +n_samples <- 25000 +n_train <- 1000 +n_test <- 500 +M <- 8 +rho <- 0.5 +betas <- c(0, rep(1, M)) + +# We use the Gaussian approach +approach <- "gaussian" + +# Mean of the multivariate Gaussian distribution +mu <- rep(0, times = M) +mu <- seq(M) + +# Create the covariance matrix +sigma <- matrix(rho, ncol = M, nrow = M) # Old +for (i in seq(1, M - 1)) { + for (j in seq(i + 1, M)) { + sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) + } +} +diag(sigma) <- 1 + +# Set seed for reproducibility +seed_setup <- 1996 +set.seed(seed_setup) + +# Make Gaussian data +data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) +data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) +colnames(data_train) <- paste("X", seq(M), sep = "") +colnames(data_test) <- paste("X", seq(M), sep = "") + +# Make the response +response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) +response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) + +# Put together the data +data_train_with_response <- copy(data_train)[, y := response_train] +data_test_with_response <- copy(data_test)[, y := response_test] + +# Fit a LM model +predictive_model <- lm(y ~ ., data = data_train_with_response) + +# Get the prediction zero, i.e., the phi0 Shapley value. +prediction_zero <- mean(response_train) + +model <- predictive_model +x_explain <- data_test +x_train <- data_train +keep_samp_for_vS <- FALSE +predict_model <- NULL +get_model_specs <- NULL +timing <- TRUE +n_combinations <- NULL +group <- NULL +feature_specs <- get_feature_specs(get_model_specs, model) +n_batches <- 1 +seed <- 1 + +internal <- setup( + x_train = x_train, + x_explain = x_explain, + approach = approach, + prediction_zero = prediction_zero, + n_combinations = n_combinations, + group = group, + n_samples = n_samples, + n_batches = n_batches, + seed = seed, + feature_specs = feature_specs, + keep_samp_for_vS = keep_samp_for_vS, + predict_model = predict_model, + get_model_specs = get_model_specs, + timing = timing, + gaussian.mu = mu, + gaussian.cov_mat = sigma +) + +# Gets predict_model (if not passed to explain) +predict_model <- get_predict_model( + predict_model = predict_model, + model = model +) + +# Sets up the Shapley (sampling) framework and prepares the +# conditional expectation computation for the chosen approach +# Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters +internal <- setup_computation(internal, model, predict_model) + + + +## Compare time ---------------------------------------------------------------------------------------------------- + +# Recall that old version iterate over the observations and then the coalitions. +# While the new version iterate over the coalitions and then the observations. +# The latter lets us reuse the computed conditional distributions for all observations. +look_at_coalitions <- seq(1, 2^M - 2) +look_at_coalitions <- seq(1, 2^M - 2, 10) +look_at_coalitions <- seq(1, 2^M - 2, 25) +time_old <- system.time({ + res_old <- prepare_data_gaussian_old(internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_old <- NULL +# Set to NULL as it is many GB when we look at many combinations in one batch and the methods slow down due to +# little available memory. The same below. + +time_new_v1 <- system.time({ + res_new_v1 <- prepare_data_gaussian_new_v1( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v1 <- NULL + +time_new_v2 <- system.time({ + res_new_v2 <- prepare_data_gaussian_new_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v2 <- NULL + +time_new_v3 <- system.time({ + res_new_v3 <- prepare_data_gaussian_new_v3( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v3 <- NULL + +time_new_v4 <- system.time({ + res_new_v4 <- prepare_data_gaussian_new_v4( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v4 <- NULL + +time_new_v5 <- system.time({ + res_new_v5 <- prepare_data_gaussian_new_v5( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5 <- NULL + +time_new_v6 <- system.time({ + res_new_v6 <- prepare_data_gaussian_new_v6( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v6 <- NULL + +# Create a table of the times. Less is better +times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, time_new_v6) +times + +# Look at the relative time compared to the old method. Larger value is better. +# Tells us how many times faster the new version is. +times_relative <- t(sapply(seq_len(nrow(times)), function(idx) times[1, ] / times[idx, ])) +rownames(times_relative) <- paste0(rownames(times), "_rel") +times_relative + +# ALL COALITIONS (look_at_coalitions = seq(1, 2^M-2)) +# user.self sys.self elapsed user.child sys.child +# time_old 195.319 34.193 244.74 0.000 0.000 +# time_new_v1 69.243 31.176 114.28 0.000 0.000 +# time_new_v2 67.820 31.743 116.54 0.000 0.000 +# time_new_v3 61.005 33.955 110.45 0.000 0.000 +# time_new_v4 61.426 31.499 105.11 0.000 0.000 +# time_new_v5 59.670 33.061 106.08 0.000 0.000 +# time_new_v6 61.810 33.342 109.48 0.000 0.000 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.0000 1.0000 1.0000 NaN NaN +# time_new_v1_rel 2.8208 1.0968 2.1416 NaN NaN +# time_new_v2_rel 2.8800 1.0772 2.1000 NaN NaN +# time_new_v3_rel 3.2017 1.0070 2.2159 NaN NaN +# time_new_v4_rel 3.1797 1.0855 2.3284 NaN NaN +# time_new_v5_rel 3.2733 1.0342 2.3071 NaN NaN +# time_new_v6_rel 3.1600 1.0255 2.2354 NaN NaN + + +# 26 coalitions (look_at_coalitions = seq(1, 2^M-2, 10)) +# user.self sys.self elapsed user.child sys.child +# time_old 25.913 2.797 30.399 0 0 +# time_new_v1 7.071 1.624 8.997 0 0 +# time_new_v2 6.653 1.461 8.244 0 0 +# time_new_v3 5.700 1.690 7.521 0 0 +# time_new_v4 5.877 1.826 7.852 0 0 +# time_new_v5 5.522 1.594 7.286 0 0 +# time_new_v6 5.559 1.668 7.335 0 0 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.0000 1.0000 1.0000 NaN NaN +# time_new_v1_rel 3.6647 1.7223 3.3788 NaN NaN +# time_new_v2_rel 3.8949 1.9144 3.6874 NaN NaN +# time_new_v3_rel 4.5461 1.6550 4.0419 NaN NaN +# time_new_v4_rel 4.4092 1.5318 3.8715 NaN NaN +# time_new_v5_rel 4.6927 1.7547 4.1722 NaN NaN +# time_new_v6_rel 4.6614 1.6769 4.1444 NaN NaN + + +# 11 coalitions (look_at_coalitions = seq(1, 2^M-2, 25)) +# user.self sys.self elapsed user.child sys.child +# time_old 11.251 1.187 12.961 0.000 0.000 +# time_new_v1 3.273 0.873 4.306 0.000 0.000 +# time_new_v2 3.043 0.690 4.011 0.000 0.000 +# time_new_v3 2.677 0.794 3.587 0.000 0.000 +# time_new_v4 2.598 0.759 3.460 0.000 0.000 +# time_new_v5 2.574 0.752 3.613 0.000 0.000 +# time_new_v6 2.303 0.669 3.009 0.000 0.000 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.0000 1.0000 1.0000 NaN NaN +# time_new_v1_rel 3.4375 1.3597 3.0100 NaN NaN +# time_new_v2_rel 3.6973 1.7203 3.2314 NaN NaN +# time_new_v3_rel 4.2028 1.4950 3.6133 NaN NaN +# time_new_v4_rel 4.3306 1.5639 3.7460 NaN NaN +# time_new_v5_rel 4.3710 1.5785 3.5873 NaN NaN +# time_new_v6_rel 4.8854 1.7743 4.3074 NaN NaN + + +## Compare mean ---------------------------------------------------------------------------------------------------- +look_at_coalition <- 25 +one_coalition_time_old <- system.time({ + one_coalition_res_old <- prepare_data_gaussian_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +one_coalition_time_old2 <- system.time({ + one_coalition_res_old2 <- prepare_data_gaussian_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v1 <- system.time({ + one_coalition_res_new_v1 <- prepare_data_gaussian_new_v1( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v2 <- system.time({ + one_coalition_res_new_v2 <- prepare_data_gaussian_new_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v3 <- system.time({ + one_coalition_res_new_v3 <- prepare_data_gaussian_new_v3( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v4 <- system.time({ + one_coalition_res_new_v4 <- prepare_data_gaussian_new_v4( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v5 <- system.time({ + one_coalition_res_new_v5 <- prepare_data_gaussian_new_v5( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v6 <- system.time({ + one_coalition_res_new_v6 <- prepare_data_gaussian_new_v6( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +rbind(one_coalition_time_old, + one_coalition_time_old2, + one_coalition_time_new_v1, + one_coalition_time_new_v2, + one_coalition_time_new_v3, + one_coalition_time_new_v4, + one_coalition_time_new_v5, + one_coalition_time_new_v6) + +internal$objects$S[internal$objects$S_batch$`1`[look_at_coalition], , drop = FALSE] +means_old <- one_coalition_res_old[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_old2 <- one_coalition_res_old2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v1 <- one_coalition_res_new_v1[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v2 <- one_coalition_res_new_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v3 <- one_coalition_res_new_v3[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v4 <- one_coalition_res_new_v4[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5 <- one_coalition_res_new_v5[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v6 <- one_coalition_res_new_v6[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] + +# They are all in the same ballpark, so the differences are due to sampling. +# This is supported by the fact that mean_old and mean_old2 use the same old code, and the difference there is the +# same as for the new methods. +# A larger n_samples makes these closer to 0 (I have done that and for other means too) +max(abs(means_old - means_old2)) +max(abs(means_old - means_v1)) +max(abs(means_old - means_v2)) +max(abs(means_old - means_v3)) +max(abs(means_old - means_v4)) +max(abs(means_old - means_v5)) +max(abs(means_old - means_v6)) From 4c6bead7a5817d293af0d9b7f7a97207122a648d Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 4 Dec 2023 15:50:29 +0000 Subject: [PATCH 02/62] Hashed out large `n_samples` --- R/Compare_Gaussian.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Compare_Gaussian.R b/R/Compare_Gaussian.R index 9fac8f939..bb57833d2 100644 --- a/R/Compare_Gaussian.R +++ b/R/Compare_Gaussian.R @@ -799,7 +799,7 @@ prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { ## Setup ----------------------------------------------------------------------------------------------------------- n_samples <- 1000 -n_samples <- 25000 +# n_samples <- 25000 n_train <- 1000 n_test <- 500 M <- 8 From cbf53a283eff14342f88199c5ed319110ae8b033 Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 4 Dec 2023 16:01:58 +0000 Subject: [PATCH 03/62] Commented out shapr --- R/Compare_Gaussian.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Compare_Gaussian.R b/R/Compare_Gaussian.R index bb57833d2..695c8b822 100644 --- a/R/Compare_Gaussian.R +++ b/R/Compare_Gaussian.R @@ -1,6 +1,6 @@ # Libraries ------------------------------------------------------------------------------------------------------- +# library(shapr) library(data.table) -library(shapr) library(rbenchmark) From 87fdc5433d39c122e812d41426f9de7a7af8f985 Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 4 Dec 2023 16:17:01 +0000 Subject: [PATCH 04/62] Moved file to `inst/scripts/` as tests fails --- {R => inst/scripts}/Compare_Gaussian.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename {R => inst/scripts}/Compare_Gaussian.R (99%) diff --git a/R/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R similarity index 99% rename from R/Compare_Gaussian.R rename to inst/scripts/Compare_Gaussian.R index 695c8b822..e980cb1af 100644 --- a/R/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -1,7 +1,7 @@ # Libraries ------------------------------------------------------------------------------------------------------- # library(shapr) +# library(rbenchmark) library(data.table) -library(rbenchmark) # Old and new version --------------------------------------------------------------------------------------------- From a91e3ad93c9b340e1009d4d6851624e25b11faa8 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 8 Dec 2023 15:58:45 +0100 Subject: [PATCH 05/62] adding rnorm alternative for version 5 --- inst/scripts/Compare_Gaussian.R | 143 +++++++++++++++++++++++++++++++- 1 file changed, 139 insertions(+), 4 deletions(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index e980cb1af..06c4c9720 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -664,6 +664,132 @@ prepare_data_gaussian_new_v5 <- function(internal, index_features, ...) { return(dt) } +prepare_data_gaussian_new_v5_rnorm <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. +# B <- matrix(nrow = n_samples, ncol = n_features) +# class(B) <- "numeric" + +# .Call("rmvnCpp", +# n_ = n_samples, +# mu_ = rep(0, n_features), +# sigma_ = diag(n_features), +# ncores_ = 1, +# isChol_ = TRUE, +# A_ = B, +# PACKAGE = "mvnfast" +# ) + + B <- matrix(rnorm(n_samples*n_features),nrow = n_samples, ncol = n_features) + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% t(sweep(x_S_star, 2, mu_S, FUN = "-")) + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + + # Here we only want to generate the data once. So we generate n_samples*n_batches from N(0, I), # and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { @@ -801,7 +927,7 @@ prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { n_samples <- 1000 # n_samples <- 25000 n_train <- 1000 -n_test <- 500 +n_test <- 100 M <- 8 rho <- 0.5 betas <- c(0, rep(1, M)) @@ -897,8 +1023,8 @@ internal <- setup_computation(internal, model, predict_model) # While the new version iterate over the coalitions and then the observations. # The latter lets us reuse the computed conditional distributions for all observations. look_at_coalitions <- seq(1, 2^M - 2) -look_at_coalitions <- seq(1, 2^M - 2, 10) -look_at_coalitions <- seq(1, 2^M - 2, 25) +#look_at_coalitions <- seq(1, 2^M - 2, 10) +#look_at_coalitions <- seq(1, 2^M - 2, 25) time_old <- system.time({ res_old <- prepare_data_gaussian_old(internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) @@ -936,6 +1062,12 @@ time_new_v5 <- system.time({ index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) res_new_v5 <- NULL +time_new_v5_rnorm <- system.time({ + res_new_v5_rnorm <- prepare_data_gaussian_new_v5_rnorm( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm <- NULL + time_new_v6 <- system.time({ res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -943,7 +1075,7 @@ time_new_v6 <- system.time({ res_new_v6 <- NULL # Create a table of the times. Less is better -times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, time_new_v6) +times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, time_new_v5_rnorm, time_new_v6) times # Look at the relative time compared to the old method. Larger value is better. @@ -1080,3 +1212,6 @@ max(abs(means_old - means_v3)) max(abs(means_old - means_v4)) max(abs(means_old - means_v5)) max(abs(means_old - means_v6)) + + + From 0d67b1775394aafdc781fa30fd7e6da0efe57f1b Mon Sep 17 00:00:00 2001 From: LHBO Date: Tue, 12 Dec 2023 13:42:59 +0000 Subject: [PATCH 06/62] Added new version of v5_rnorm without sweep. 20% faster --- inst/scripts/Compare_Gaussian.R | 142 +++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 06c4c9720..fc6527478 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -748,6 +748,10 @@ prepare_data_gaussian_new_v5_rnorm <- function(internal, index_features, ...) { # Compute the conditional mean of Xsbar given Xs = Xs_star x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% t(sweep(x_S_star, 2, mu_S, FUN = "-")) + + + + # Transform the samples to be from N(O, Sigma_Sbar|S) # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the @@ -789,6 +793,135 @@ prepare_data_gaussian_new_v5_rnorm <- function(internal, index_features, ...) { return(dt) } +prepare_data_gaussian_new_v5_rnorm_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. + # B <- matrix(nrow = n_samples, ncol = n_features) + # class(B) <- "numeric" + + # .Call("rmvnCpp", + # n_ = n_samples, + # mu_ = rep(0, n_features), + # sigma_ = diag(n_features), + # ncores_ = 1, + # isChol_ = TRUE, + # A_ = B, + # PACKAGE = "mvnfast" + # ) + + B <- matrix(rnorm(n_samples*n_features),nrow = n_samples, ncol = n_features) + + + #function(x_explain_mat, S, mu, cov_mat) + + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) + } + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + # Here we only want to generate the data once. So we generate n_samples*n_batches from N(0, I), # and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. @@ -1068,6 +1201,12 @@ time_new_v5_rnorm <- system.time({ index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) res_new_v5_rnorm <- NULL +time_new_v5_rnorm_v2 <- system.time({ + res_new_v5_rnorm_v2 <- prepare_data_gaussian_new_v5_rnorm_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_v2 <- NULL + time_new_v6 <- system.time({ res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -1075,7 +1214,8 @@ time_new_v6 <- system.time({ res_new_v6 <- NULL # Create a table of the times. Less is better -times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, time_new_v5_rnorm, time_new_v6) +times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, + time_new_v5_rnorm, time_new_v5_rnorm_v2, time_new_v6) times # Look at the relative time compared to the old method. Larger value is better. From 9d0ef5b47e1f4a5773a72d3e61aa5d611c7b4b62 Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 18 Dec 2023 15:54:38 +0100 Subject: [PATCH 07/62] Added gaussian cpp code --- src/Gaussian.cpp | 87 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 src/Gaussian.cpp diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp new file mode 100644 index 000000000..8c9eca3f8 --- /dev/null +++ b/src/Gaussian.cpp @@ -0,0 +1,87 @@ +#include +#include +using namespace Rcpp; + + +//' Generate Gaussian MC samples +//' +//' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the +//' univariate standard normal. +//' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations +//' to explain. +//' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of +//' the used coalitions. +//' @param mu vector. Vector of length `n_features` containing the mean of each feature. +//' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between +//' all features. +//' +//' @export +//' @keywords internal +//' +//' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times +//' `n_features` containing the conditional MC samples for each coalition and explicand. +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.ncols; + int n_features = MC_samples_mat.n_cols; + int n_samples = MC_samples_mat.n_rows; + + // Create a list containing the MC samples for all coalitions and test observations + Rcpp::List resultList; + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric and positive definite(?) + if (!cond_cov_mat_Sbar_given_S.is_sympd()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = (cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t()); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); + + // Loop over the different test observations and Combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + arma::mat ret(n_samples, n_features, arma::fill::zeros); + ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); + resultList.push_back(ret); + } + } + + return resultList; +} From 7737587992db9db6e952a57d43cfef14a5ca3c8c Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 18 Dec 2023 15:55:08 +0100 Subject: [PATCH 08/62] added gaussian cpp in comparison file --- inst/scripts/Compare_Gaussian.R | 97 +++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 5 deletions(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index fc6527478..95a7a931a 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -831,9 +831,6 @@ prepare_data_gaussian_new_v5_rnorm_v2 <- function(internal, index_features, ...) B <- matrix(rnorm(n_samples*n_features),nrow = n_samples, ncol = n_features) - - #function(x_explain_mat, S, mu, cov_mat) - # Generate a data table containing all Monte Carlo samples for all test observations and coalitions dt <- data.table::rbindlist( # Iterate over the coalitions @@ -894,7 +891,7 @@ prepare_data_gaussian_new_v5_rnorm_v2 <- function(internal, index_features, ...) lapply(seq(n_explain), function(idx_now) { # Combine the generated values with the values we conditioned on ret <- matrix(NA, ncol = n_features, nrow = n_samples) - ret[, S_now] <- rep(c(x_explain_mat[idx_now, S_now]), each = n_samples) + ret[, S_now] <- rep(c(x_S_star[idx_now,]), each = n_samples) ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) # Set names of the columns and convert to a data.table @@ -923,6 +920,66 @@ prepare_data_gaussian_new_v5_rnorm_v2 <- function(internal, index_features, ...) } + +prepare_data_gaussian_new_v5_rnorm_cpp <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + system.time({ + result_list <- prepare_data_gaussian_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat, + n_explain = n_explain, + n_features = n_features, + n_samples = n_samples) + }, gcFirst = FALSE) + + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + + # Here we only want to generate the data once. So we generate n_samples*n_batches from N(0, I), # and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { @@ -1207,6 +1264,12 @@ time_new_v5_rnorm_v2 <- system.time({ index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) res_new_v5_rnorm_v2 <- NULL +time_new_v5_rnorm_cpp <- system.time({ + res_new_v5_rnorm_cpp <- prepare_data_gaussian_new_v5_rnorm_cpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp <- NULL + time_new_v6 <- system.time({ res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -1215,7 +1278,7 @@ res_new_v6 <- NULL # Create a table of the times. Less is better times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, - time_new_v5_rnorm, time_new_v5_rnorm_v2, time_new_v6) + time_new_v5_rnorm, time_new_v5_rnorm_v2, time_new_v5_rnorm_cpp, time_new_v6) times # Look at the relative time compared to the old method. Larger value is better. @@ -1317,6 +1380,21 @@ one_coalition_time_new_v5 <- system.time({ internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +one_coalition_time_new_v5_rnorm <- system.time({ + one_coalition_res_new_v5_rnorm <- prepare_data_gaussian_new_v5_rnorm( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v5_rnorm_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_v2 <- prepare_data_gaussian_new_v5_rnorm_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +one_coalition_time_new_v5_rnorm_cpp <- system.time({ + one_coalition_res_new_v5_rnorm_cpp <- prepare_data_gaussian_new_v5_rnorm_cpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + one_coalition_time_new_v6 <- system.time({ one_coalition_res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -1329,6 +1407,9 @@ rbind(one_coalition_time_old, one_coalition_time_new_v3, one_coalition_time_new_v4, one_coalition_time_new_v5, + one_coalition_time_new_v5_rnorm, + one_coalition_time_new_v5_rnorm_v2, + one_coalition_time_new_v5_rnorm_cpp, one_coalition_time_new_v6) internal$objects$S[internal$objects$S_batch$`1`[look_at_coalition], , drop = FALSE] @@ -1339,6 +1420,9 @@ means_v2 <- one_coalition_res_new_v2[, lapply(.SD, mean), .SDcols = paste0("X", means_v3 <- one_coalition_res_new_v3[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v4 <- one_coalition_res_new_v4[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v5 <- one_coalition_res_new_v5[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm <- one_coalition_res_new_v5_rnorm[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_v2 <- one_coalition_res_new_v5_rnorm_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp <- one_coalition_res_new_v5_rnorm_cpp[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v6 <- one_coalition_res_new_v6[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] # They are all in the same ballpark, so the differences are due to sampling. @@ -1351,6 +1435,9 @@ max(abs(means_old - means_v2)) max(abs(means_old - means_v3)) max(abs(means_old - means_v4)) max(abs(means_old - means_v5)) +max(abs(means_old - means_v5_rnorm)) +max(abs(means_old - means_v5_rnorm_v2)) +max(abs(means_old - means_v5_rnorm_cpp)) max(abs(means_old - means_v6)) From 9d493a5cef1f10649388e5fd48a5ff0ab9bc5bd4 Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 18 Dec 2023 15:58:33 +0100 Subject: [PATCH 09/62] update cpp supporting files --- R/RcppExports.R | 22 ++++++++++++++++++++++ src/Gaussian.cpp | 2 +- src/RcppExports.cpp | 16 ++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index bbe62a76d..6662602fe 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,6 +80,28 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } +#' Generate Gaussian MC samples +#' +#' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the +#' univariate standard normal. +#' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations +#' to explain. +#' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of +#' the used coalitions. +#' @param mu vector. Vector of length `n_features` containing the mean of each feature. +#' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between +#' all features. +#' +#' @export +#' @keywords internal +#' +#' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times +#' `n_features` containing the conditional MC samples for each coalition and explicand. +#' @author Lars Henry Berge Olsen +prepare_data_gaussian_cpp <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + #' (Generalized) Mahalanobis distance #' #' Used to get the Euclidean distance as well by setting \code{mcov} = \code{diag(m)}. diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 8c9eca3f8..656bf5884 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -27,7 +27,7 @@ Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat S, arma::vec mu, arma::mat cov_mat) { - int n_explain = x_explain_mat.ncols; + int n_explain = x_explain_mat.n_cols; int n_features = MC_samples_mat.n_cols; int n_samples = MC_samples_mat.n_rows; diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 8face37f8..c0d457c7c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -80,6 +80,21 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// prepare_data_gaussian_cpp +Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} // mahalanobis_distance_cpp arma::cube mahalanobis_distance_cpp(Rcpp::List featureList, arma::mat Xtrain_mat, arma::mat Xtest_mat, arma::mat mcov, bool S_scale_dist); RcppExport SEXP _shapr_mahalanobis_distance_cpp(SEXP featureListSEXP, SEXP Xtrain_matSEXP, SEXP Xtest_matSEXP, SEXP mcovSEXP, SEXP S_scale_distSEXP) { @@ -155,6 +170,7 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_correction_matrix_cpp", (DL_FUNC) &_shapr_correction_matrix_cpp, 2}, {"_shapr_aicc_full_single_cpp", (DL_FUNC) &_shapr_aicc_full_single_cpp, 5}, {"_shapr_aicc_full_cpp", (DL_FUNC) &_shapr_aicc_full_cpp, 6}, + {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, {"_shapr_observation_impute_cpp", (DL_FUNC) &_shapr_observation_impute_cpp, 5}, From 6d2eb568b7b8562bea7447e6c72fe8857e1a4f8e Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 18 Dec 2023 16:02:12 +0100 Subject: [PATCH 10/62] Removed redudant parameters --- inst/scripts/Compare_Gaussian.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 95a7a931a..db1a7cdd2 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -953,10 +953,7 @@ prepare_data_gaussian_new_v5_rnorm_cpp <- function(internal, index_features, ... x_explain_mat = x_explain_mat, S = S, mu = mu, - cov_mat = cov_mat, - n_explain = n_explain, - n_features = n_features, - n_samples = n_samples) + cov_mat = cov_mat) }, gcFirst = FALSE) dt = as.data.table(do.call(rbind, result_list)) From ed5b87a498b3b1de5e5bba57a5b1d3fec920f4ee Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 18 Dec 2023 16:08:53 +0100 Subject: [PATCH 11/62] logical error used `n_rows` instead of `n_cols` --- inst/scripts/Compare_Gaussian.R | 2 ++ src/Gaussian.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index db1a7cdd2..82e817a29 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -956,6 +956,8 @@ prepare_data_gaussian_new_v5_rnorm_cpp <- function(internal, index_features, ... cov_mat = cov_mat) }, gcFirst = FALSE) + t(sapply(result_list, function(j) dim(j))) + dt = as.data.table(do.call(rbind, result_list)) setnames(dt, feature_names) dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 656bf5884..652233f9d 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -27,9 +27,9 @@ Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat S, arma::vec mu, arma::mat cov_mat) { - int n_explain = x_explain_mat.n_cols; - int n_features = MC_samples_mat.n_cols; + int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; // Create a list containing the MC samples for all coalitions and test observations Rcpp::List resultList; From 530c816d8437f310fda9661ac256de8d76760c12 Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 18 Dec 2023 18:01:03 +0100 Subject: [PATCH 12/62] =?UTF-8?q?Pr=C3=B8vde=20=C3=A5=20gj=C3=B8re=20koden?= =?UTF-8?q?=20raskere,=20men=20blitt=20tregere(=3F)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- inst/scripts/Compare_Gaussian.R | 182 +++++++++++++++++--------------- src/Gaussian.cpp | 18 ++-- 2 files changed, 104 insertions(+), 96 deletions(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 82e817a29..93cd08f7c 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -1113,96 +1113,99 @@ prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { ## Setup ----------------------------------------------------------------------------------------------------------- -n_samples <- 1000 -# n_samples <- 25000 -n_train <- 1000 -n_test <- 100 -M <- 8 -rho <- 0.5 -betas <- c(0, rep(1, M)) - -# We use the Gaussian approach -approach <- "gaussian" - -# Mean of the multivariate Gaussian distribution -mu <- rep(0, times = M) -mu <- seq(M) - -# Create the covariance matrix -sigma <- matrix(rho, ncol = M, nrow = M) # Old -for (i in seq(1, M - 1)) { - for (j in seq(i + 1, M)) { - sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) +{ + n_samples <- 1000 + # n_samples <- 25000 + n_train <- 1000 + n_test <- 100 + M <- 8 + rho <- 0.5 + betas <- c(0, rep(1, M)) + + # We use the Gaussian approach + approach <- "gaussian" + + # Mean of the multivariate Gaussian distribution + mu <- rep(0, times = M) + mu <- seq(M) + + # Create the covariance matrix + sigma <- matrix(rho, ncol = M, nrow = M) # Old + for (i in seq(1, M - 1)) { + for (j in seq(i + 1, M)) { + sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) + } } + diag(sigma) <- 1 + + # Set seed for reproducibility + seed_setup <- 1996 + set.seed(seed_setup) + + # Make Gaussian data + data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) + data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) + colnames(data_train) <- paste("X", seq(M), sep = "") + colnames(data_test) <- paste("X", seq(M), sep = "") + + # Make the response + response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) + response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) + + # Put together the data + data_train_with_response <- copy(data_train)[, y := response_train] + data_test_with_response <- copy(data_test)[, y := response_test] + + # Fit a LM model + predictive_model <- lm(y ~ ., data = data_train_with_response) + + # Get the prediction zero, i.e., the phi0 Shapley value. + prediction_zero <- mean(response_train) + + model <- predictive_model + x_explain <- data_test + x_train <- data_train + keep_samp_for_vS <- FALSE + predict_model <- NULL + get_model_specs <- NULL + timing <- TRUE + n_combinations <- NULL + group <- NULL + feature_specs <- get_feature_specs(get_model_specs, model) + n_batches <- 1 + seed <- 1 + + internal <- setup( + x_train = x_train, + x_explain = x_explain, + approach = approach, + prediction_zero = prediction_zero, + n_combinations = n_combinations, + group = group, + n_samples = n_samples, + n_batches = n_batches, + seed = seed, + feature_specs = feature_specs, + keep_samp_for_vS = keep_samp_for_vS, + predict_model = predict_model, + get_model_specs = get_model_specs, + timing = timing, + gaussian.mu = mu, + gaussian.cov_mat = sigma + ) + + # Gets predict_model (if not passed to explain) + predict_model <- get_predict_model( + predict_model = predict_model, + model = model + ) + + # Sets up the Shapley (sampling) framework and prepares the + # conditional expectation computation for the chosen approach + # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters + internal <- setup_computation(internal, model, predict_model) + } -diag(sigma) <- 1 - -# Set seed for reproducibility -seed_setup <- 1996 -set.seed(seed_setup) - -# Make Gaussian data -data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) -data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) -colnames(data_train) <- paste("X", seq(M), sep = "") -colnames(data_test) <- paste("X", seq(M), sep = "") - -# Make the response -response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) -response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) - -# Put together the data -data_train_with_response <- copy(data_train)[, y := response_train] -data_test_with_response <- copy(data_test)[, y := response_test] - -# Fit a LM model -predictive_model <- lm(y ~ ., data = data_train_with_response) - -# Get the prediction zero, i.e., the phi0 Shapley value. -prediction_zero <- mean(response_train) - -model <- predictive_model -x_explain <- data_test -x_train <- data_train -keep_samp_for_vS <- FALSE -predict_model <- NULL -get_model_specs <- NULL -timing <- TRUE -n_combinations <- NULL -group <- NULL -feature_specs <- get_feature_specs(get_model_specs, model) -n_batches <- 1 -seed <- 1 - -internal <- setup( - x_train = x_train, - x_explain = x_explain, - approach = approach, - prediction_zero = prediction_zero, - n_combinations = n_combinations, - group = group, - n_samples = n_samples, - n_batches = n_batches, - seed = seed, - feature_specs = feature_specs, - keep_samp_for_vS = keep_samp_for_vS, - predict_model = predict_model, - get_model_specs = get_model_specs, - timing = timing, - gaussian.mu = mu, - gaussian.cov_mat = sigma -) - -# Gets predict_model (if not passed to explain) -predict_model <- get_predict_model( - predict_model = predict_model, - model = model -) - -# Sets up the Shapley (sampling) framework and prepares the -# conditional expectation computation for the chosen approach -# Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters -internal <- setup_computation(internal, model, predict_model) @@ -1379,16 +1382,19 @@ one_coalition_time_new_v5 <- system.time({ internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +set.seed(123) one_coalition_time_new_v5_rnorm <- system.time({ one_coalition_res_new_v5_rnorm <- prepare_data_gaussian_new_v5_rnorm( internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +set.seed(123) one_coalition_time_new_v5_rnorm_v2 <- system.time({ one_coalition_res_new_v5_rnorm_v2 <- prepare_data_gaussian_new_v5_rnorm_v2( internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +set.seed(123) one_coalition_time_new_v5_rnorm_cpp <- system.time({ one_coalition_res_new_v5_rnorm_cpp <- prepare_data_gaussian_new_v5_rnorm_cpp( internal = internal, diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 652233f9d..3109e42d4 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -31,6 +31,9 @@ Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, int n_samples = MC_samples_mat.n_rows; int n_features = MC_samples_mat.n_cols; + // Pre-allocate result matrix + arma::mat ret(n_samples, n_features); + // Create a list containing the MC samples for all coalitions and test observations Rcpp::List resultList; @@ -42,7 +45,7 @@ Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, // Get current coalition S and the indices of the features in coalition S and mask Sbar arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); // Extract the features we condition on @@ -62,23 +65,22 @@ Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - // Ensure that the conditional covariance matrix is symmetric and positive definite(?) - if (!cond_cov_mat_Sbar_given_S.is_sympd()) { + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); } // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = (cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t()); // Can we speed it up by reducing the number of transposes? + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? x_Sbar_mean.each_col() += mu_Sbar; // Transform the samples to be from N(O, Sigma_Sbar|S) arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); - // Loop over the different test observations and Combine the generated values with the values we conditioned on + // Loop over the different test observations and combine the generated values with the values we conditioned on for (int idx_now = 0; idx_now < n_explain; idx_now++) { - arma::mat ret(n_samples, n_features, arma::fill::zeros); - ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); + ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); resultList.push_back(ret); } } From 1e8c5931ec17abbcc76a6394b8cf39e4ff230a88 Mon Sep 17 00:00:00 2001 From: LHBO Date: Mon, 18 Dec 2023 18:01:56 +0100 Subject: [PATCH 13/62] typo --- src/Gaussian.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 3109e42d4..4461107f9 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -75,7 +75,7 @@ Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, x_Sbar_mean.each_col() += mu_Sbar; // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); // Loop over the different test observations and combine the generated values with the values we conditioned on for (int idx_now = 0; idx_now < n_explain; idx_now++) { From e4eb606a6126c2c4e55b4aefd095c64f94bbd4b0 Mon Sep 17 00:00:00 2001 From: LHBO Date: Wed, 20 Dec 2023 14:00:06 +0100 Subject: [PATCH 14/62] Tried to see if I could make it speed it up. --- R/RcppExports.R | 22 +-- inst/scripts/Compare_Gaussian.R | 85 +++++++++-- src/Gaussian.cpp | 254 +++++++++++++++++++++----------- src/RcppExports.cpp | 16 ++ 4 files changed, 265 insertions(+), 112 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 6662602fe..445985bde 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,28 +80,14 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } -#' Generate Gaussian MC samples -#' -#' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the -#' univariate standard normal. -#' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations -#' to explain. -#' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of -#' the used coalitions. -#' @param mu vector. Vector of length `n_features` containing the mean of each feature. -#' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between -#' all features. -#' -#' @export -#' @keywords internal -#' -#' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times -#' `n_features` containing the conditional MC samples for each coalition and explicand. -#' @author Lars Henry Berge Olsen prepare_data_gaussian_cpp <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { .Call(`_shapr_prepare_data_gaussian_cpp`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) } +prepare_data_gaussian_cpp_v2 <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_v2`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + #' (Generalized) Mahalanobis distance #' #' Used to get the Euclidean distance as well by setting \code{mcov} = \code{diag(m)}. diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 93cd08f7c..16bbd6f75 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -947,16 +947,65 @@ prepare_data_gaussian_new_v5_rnorm_cpp <- function(internal, index_features, ... MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Call cpp - system.time({ - result_list <- prepare_data_gaussian_cpp( - MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - S = S, - mu = mu, - cov_mat = cov_mat) - }, gcFirst = FALSE) + result_list <- prepare_data_gaussian_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) - t(sapply(result_list, function(j) dim(j))) + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_v2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_v2( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) dt = as.data.table(do.call(rbind, result_list)) setnames(dt, feature_names) @@ -1272,6 +1321,12 @@ time_new_v5_rnorm_cpp <- system.time({ index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) res_new_v5_rnorm_cpp <- NULL +time_new_v5_rnorm_cpp_v2 <- system.time({ + res_new_v5_rnorm_cpp_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_v2 <- NULL + time_new_v6 <- system.time({ res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -1280,7 +1335,8 @@ res_new_v6 <- NULL # Create a table of the times. Less is better times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, - time_new_v5_rnorm, time_new_v5_rnorm_v2, time_new_v5_rnorm_cpp, time_new_v6) + time_new_v5_rnorm, time_new_v5_rnorm_v2, time_new_v5_rnorm_cpp, + time_new_v5_rnorm_cpp_v2, time_new_v6) times # Look at the relative time compared to the old method. Larger value is better. @@ -1400,6 +1456,12 @@ one_coalition_time_new_v5_rnorm_cpp <- system.time({ internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + one_coalition_time_new_v6 <- system.time({ one_coalition_res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -1415,6 +1477,7 @@ rbind(one_coalition_time_old, one_coalition_time_new_v5_rnorm, one_coalition_time_new_v5_rnorm_v2, one_coalition_time_new_v5_rnorm_cpp, + one_coalition_time_new_v5_rnorm_cpp_v2, one_coalition_time_new_v6) internal$objects$S[internal$objects$S_batch$`1`[look_at_coalition], , drop = FALSE] @@ -1428,6 +1491,7 @@ means_v5 <- one_coalition_res_new_v5[, lapply(.SD, mean), .SDcols = paste0("X", means_v5_rnorm <- one_coalition_res_new_v5_rnorm[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v5_rnorm_v2 <- one_coalition_res_new_v5_rnorm_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v5_rnorm_cpp <- one_coalition_res_new_v5_rnorm_cpp[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_v2 <- one_coalition_res_new_v5_rnorm_cpp_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v6 <- one_coalition_res_new_v6[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] # They are all in the same ballpark, so the differences are due to sampling. @@ -1443,6 +1507,7 @@ max(abs(means_old - means_v5)) max(abs(means_old - means_v5_rnorm)) max(abs(means_old - means_v5_rnorm_v2)) max(abs(means_old - means_v5_rnorm_cpp)) +max(abs(means_old - means_v5_rnorm_cpp_v2)) max(abs(means_old - means_v6)) diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 4461107f9..a60ca99a8 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -3,87 +3,173 @@ using namespace Rcpp; -//' Generate Gaussian MC samples -//' -//' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the -//' univariate standard normal. -//' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations -//' to explain. -//' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of -//' the used coalitions. -//' @param mu vector. Vector of length `n_features` containing the mean of each feature. -//' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between -//' all features. -//' -//' @export -//' @keywords internal -//' -//' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times -//' `n_features` containing the conditional MC samples for each coalition and explicand. -//' @author Lars Henry Berge Olsen -// [[Rcpp::export]] -Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - - // Pre-allocate result matrix - arma::mat ret(n_samples, n_features); - - // Create a list containing the MC samples for all coalitions and test observations - Rcpp::List resultList; - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - resultList.push_back(ret); - } - } - - return resultList; -} + //' Generate Gaussian MC samples + //' + //' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the + //' univariate standard normal. + //' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations + //' to explain. + //' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of + //' the used coalitions. + //' @param mu vector. Vector of length `n_features` containing the mean of each feature. + //' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between + //' all features. + //' + //' @export + //' @keywords internal + //' + //' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times + //' `n_features` containing the conditional MC samples for each coalition and explicand. + //' @author Lars Henry Berge Olsen + // [[Rcpp::export]] + Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + + // Pre-allocate result matrix + arma::mat ret(n_samples, n_features); + + // Create a list containing the MC samples for all coalitions and test observations + Rcpp::List resultList; + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + resultList.push_back(ret); + } + } + + return resultList; + } + + + + + //' Generate Gaussian MC samples + //' + //' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the + //' univariate standard normal. + //' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations + //' to explain. + //' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of + //' the used coalitions. + //' @param mu vector. Vector of length `n_features` containing the mean of each feature. + //' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between + //' all features. + //' + //' @export + //' @keywords internal + //' + //' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times + //' `n_features` containing the conditional MC samples for each coalition and explicand. + //' @author Lars Henry Berge Olsen + // [[Rcpp::export]] + Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + + // Create a list containing the MC samples for all coalitions and test observations + Rcpp::List resultList; + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); + + // Loop over the different test observations and Combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + arma::mat ret(n_samples, n_features); + ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); + resultList.push_back(ret); + } + } + + return resultList; + } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index c0d457c7c..b522836fd 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -95,6 +95,21 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// prepare_data_gaussian_cpp_v2 +Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_v2(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_v2(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} // mahalanobis_distance_cpp arma::cube mahalanobis_distance_cpp(Rcpp::List featureList, arma::mat Xtrain_mat, arma::mat Xtest_mat, arma::mat mcov, bool S_scale_dist); RcppExport SEXP _shapr_mahalanobis_distance_cpp(SEXP featureListSEXP, SEXP Xtrain_matSEXP, SEXP Xtest_matSEXP, SEXP mcovSEXP, SEXP S_scale_distSEXP) { @@ -171,6 +186,7 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_aicc_full_single_cpp", (DL_FUNC) &_shapr_aicc_full_single_cpp, 5}, {"_shapr_aicc_full_cpp", (DL_FUNC) &_shapr_aicc_full_cpp, 6}, {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, + {"_shapr_prepare_data_gaussian_cpp_v2", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_v2, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, {"_shapr_observation_impute_cpp", (DL_FUNC) &_shapr_observation_impute_cpp, 5}, From 3f038eb8a63610fc064fe0a110e423e97e7f57d1 Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 4 Jan 2024 17:48:15 +0100 Subject: [PATCH 15/62] Here we add different cpp versions to find the fastest one --- inst/scripts/Compare_Gaussian.R | 459 ++++++++++++++++++++++- src/Gaussian.cpp | 633 ++++++++++++++++++++++++++++++++ 2 files changed, 1076 insertions(+), 16 deletions(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 16bbd6f75..0dab8a1b1 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -974,6 +974,377 @@ prepare_data_gaussian_new_v5_rnorm_cpp <- function(internal, index_features, ... return(dt) } +prepare_data_gaussian_new_v5_rnorm_cpp_just_extracting <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_just_extracting( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # dt = as.data.table(do.call(rbind, result_list)) + # setnames(dt, feature_names) + # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + # + # # Update the id_combination. This will always be called as `index_features` is never NULL. + # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + # + # # Add uniform weights + # dt[, w := 1 / n_samples] + # + # # Remove: + # # This is not needed when we assume that the empty and grand coalitions will never be present + # # dt[id_combination %in% c(1, n_combinations), w := 1] + # + # # Return the MC samples + # return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_with_cond_mean_var <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_with_cond_mean_var( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # dt = as.data.table(do.call(rbind, result_list)) + # setnames(dt, feature_names) + # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + # + # # Update the id_combination. This will always be called as `index_features` is never NULL. + # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + # + # # Add uniform weights + # dt[, w := 1 / n_samples] + # + # # Remove: + # # This is not needed when we assume that the empty and grand coalitions will never be present + # # dt[id_combination %in% c(1, n_combinations), w := 1] + # + # # Return the MC samples + # return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_with_chol <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_with_chol( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) +# +# dt = as.data.table(do.call(rbind, result_list)) +# setnames(dt, feature_names) +# dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] +# dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] +# data.table::setcolorder(dt, c("id_combination", "id", feature_names)) +# +# # Update the id_combination. This will always be called as `index_features` is never NULL. +# if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] +# +# # Add uniform weights +# dt[, w := 1 / n_samples] +# +# # Remove: +# # This is not needed when we assume that the empty and grand coalitions will never be present +# # dt[id_combination %in% c(1, n_combinations), w := 1] +# +# # Return the MC samples +# return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_without_adding_to_list <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_without_adding_to_list( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + # + # dt = as.data.table(do.call(rbind, result_list)) + # setnames(dt, feature_names) + # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + # + # # Update the id_combination. This will always be called as `index_features` is never NULL. + # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + # + # # Add uniform weights + # dt[, w := 1 / n_samples] + # + # # Remove: + # # This is not needed when we assume that the empty and grand coalitions will never be present + # # dt[id_combination %in% c(1, n_combinations), w := 1] + # + # # Return the MC samples + # return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fake_list <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_fake_list( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + # + # dt = as.data.table(do.call(rbind, result_list)) + # setnames(dt, feature_names) + # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + # + # # Update the id_combination. This will always be called as `index_features` is never NULL. + # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + # + # # Add uniform weights + # dt[, w := 1 / n_samples] + # + # # Remove: + # # This is not needed when we assume that the empty and grand coalitions will never be present + # # dt[id_combination %in% c(1, n_combinations), w := 1] + # + # # Return the MC samples + # return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp to create the data table with the MC samples for all explicands and coalitions + dt <- as.data.table( + prepare_data_gaussian_cpp_fix_large_mat( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + ) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + +prepare_data_gaussian_new_v5_rnorm_cpp_fix2 <- function(internal, index_features, ...) { + # This function assumes that index_features will never include the empty and + # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the + # grand coalition before calling the `prepare_data()` function and the empty + # coalition is never included in the `internal$objects$S_batch` list. + + # Extract objects that we are going to use + x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) + n_explain <- internal$parameters$n_explain + n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + + # Extract the relevant coalitions specified in `index_features` from `S`. + # This will always be called as `index_features` is never NULL. + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Call cpp + result_list <- prepare_data_gaussian_cpp_fix( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) +} + prepare_data_gaussian_new_v5_rnorm_cpp_v2 <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the @@ -1168,9 +1539,12 @@ prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { n_train <- 1000 n_test <- 100 M <- 8 + rho <- 0.5 betas <- c(0, rep(1, M)) + + # We use the Gaussian approach approach <- "gaussian" @@ -1321,6 +1695,42 @@ time_new_v5_rnorm_cpp <- system.time({ index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) res_new_v5_rnorm_cpp <- NULL +# time_new_v5_rnorm_cpp_just_extracting <- system.time({ +# res_new_v5_rnorm_cpp_just_extracting <- prepare_data_gaussian_new_v5_rnorm_cpp_just_extracting( +# internal = internal, +# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +# res_new_v5_rnorm_cpp_just_extracting <- NULL +# +# time_new_v5_rnorm_cpp_with_cond_mean_var <- system.time({ +# res_new_v5_rnorm_cpp_with_cond_mean_var <- prepare_data_gaussian_new_v5_rnorm_cpp_with_cond_mean_var( +# internal = internal, +# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +# res_new_v5_rnorm_cpp_with_cond_mean_var <- NULL +# +# time_new_v5_rnorm_cpp_with_chol <- system.time({ +# res_new_v5_rnorm_cpp_with_chol <- prepare_data_gaussian_new_v5_rnorm_cpp_with_chol( +# internal = internal, +# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +# res_new_v5_rnorm_cpp_with_chol <- NULL +# +# time_new_v5_rnorm_cpp_without_adding_to_list <- system.time({ +# res_new_v5_rnorm_cpp_without_adding_to_list <- prepare_data_gaussian_new_v5_rnorm_cpp_without_adding_to_list( +# internal = internal, +# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +# res_new_v5_rnorm_cpp_without_adding_to_list <- NULL +# +# time_new_v5_rnorm_cpp_fake_list <- system.time({ +# res_new_v5_rnorm_cpp_fake_list <- prepare_data_gaussian_new_v5_rnorm_cpp_fake_list( +# internal = internal, +# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +# res_new_v5_rnorm_cpp_fake_list <- NULL + +time_new_v5_rnorm_cpp_fix <- system.time({ + res_new_v5_rnorm_cpp_fix <- prepare_data_gaussian_new_v5_rnorm_cpp_fix( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix <- NULL + time_new_v5_rnorm_cpp_v2 <- system.time({ res_new_v5_rnorm_cpp_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_v2( internal = internal, @@ -1336,6 +1746,10 @@ res_new_v6 <- NULL # Create a table of the times. Less is better times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, time_new_v5_rnorm, time_new_v5_rnorm_v2, time_new_v5_rnorm_cpp, + # time_new_v5_rnorm_cpp_just_extracting, time_new_v5_rnorm_cpp_with_cond_mean_var, + # time_new_v5_rnorm_cpp_with_chol, time_new_v5_rnorm_cpp_without_adding_to_list, + # time_new_v5_rnorm_cpp_fake_list, + time_new_v5_rnorm_cpp_fix, time_new_v5_rnorm_cpp_v2, time_new_v6) times @@ -1346,22 +1760,35 @@ rownames(times_relative) <- paste0(rownames(times), "_rel") times_relative # ALL COALITIONS (look_at_coalitions = seq(1, 2^M-2)) -# user.self sys.self elapsed user.child sys.child -# time_old 195.319 34.193 244.74 0.000 0.000 -# time_new_v1 69.243 31.176 114.28 0.000 0.000 -# time_new_v2 67.820 31.743 116.54 0.000 0.000 -# time_new_v3 61.005 33.955 110.45 0.000 0.000 -# time_new_v4 61.426 31.499 105.11 0.000 0.000 -# time_new_v5 59.670 33.061 106.08 0.000 0.000 -# time_new_v6 61.810 33.342 109.48 0.000 0.000 -# user.self sys.self elapsed user.child sys.child -# time_old_rel 1.0000 1.0000 1.0000 NaN NaN -# time_new_v1_rel 2.8208 1.0968 2.1416 NaN NaN -# time_new_v2_rel 2.8800 1.0772 2.1000 NaN NaN -# time_new_v3_rel 3.2017 1.0070 2.2159 NaN NaN -# time_new_v4_rel 3.1797 1.0855 2.3284 NaN NaN -# time_new_v5_rel 3.2733 1.0342 2.3071 NaN NaN -# time_new_v6_rel 3.1600 1.0255 2.2354 NaN NaN +# user.self sys.self elapsed user.child sys.child +# time_old 38.773 5.301 45.087 0 0 +# time_new_v1 13.056 3.227 16.489 0 0 +# time_new_v2 12.760 3.188 16.159 0 0 +# time_new_v3 11.536 3.891 15.677 0 0 +# time_new_v4 11.293 3.681 15.150 0 0 +# time_new_v5 11.249 3.580 15.029 0 0 +# time_new_v5_rnorm 11.719 3.871 15.960 0 0 +# time_new_v5_rnorm_v2 11.893 3.780 16.055 0 0 +# time_new_v5_rnorm_cpp 40.220 7.029 51.638 0 0 +# time_new_v5_rnorm_cpp_fix 5.464 1.682 7.247 0 0 +# time_new_v5_rnorm_cpp_v2 38.499 4.262 43.501 0 0 +# time_new_v6 11.546 3.413 15.165 0 0 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.00000 1.00000 1.00000 NaN NaN +# time_new_v1_rel 2.96975 1.64270 2.73437 NaN NaN +# time_new_v2_rel 3.03864 1.66280 2.79021 NaN NaN +# time_new_v3_rel 3.36104 1.36237 2.87600 NaN NaN +# time_new_v4_rel 3.43337 1.44010 2.97604 NaN NaN +# time_new_v5_rel 3.44680 1.48073 3.00000 NaN NaN +# time_new_v5_rnorm_rel 3.30856 1.36941 2.82500 NaN NaN +# time_new_v5_rnorm_v2_rel 3.26015 1.40238 2.80828 NaN NaN +# time_new_v5_rnorm_cpp_rel 0.96402 0.75416 0.87314 NaN NaN +# time_new_v5_rnorm_cpp_fix_rel 7.09608 3.15161 6.22147 NaN NaN +# time_new_v5_rnorm_cpp_v2_rel 1.00712 1.24378 1.03646 NaN NaN +# time_new_v6_rel 3.35813 1.55318 2.97310 NaN NaN + +# ----------------------------------------------------------------------------------------------------------------- + # 26 coalitions (look_at_coalitions = seq(1, 2^M-2, 10)) diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index a60ca99a8..e08ec9fa6 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -90,6 +90,639 @@ using namespace Rcpp; + // // [[Rcpp::export]] + // Rcpp::List prepare_data_gaussian_cpp_just_extracting(arma::mat MC_samples_mat, + // arma::mat x_explain_mat, + // arma::mat S, + // arma::vec mu, + // arma::mat cov_mat) { + // int n_explain = x_explain_mat.n_rows; + // int n_samples = MC_samples_mat.n_rows; + // int n_features = MC_samples_mat.n_cols; + // + // // Pre-allocate result matrix + // arma::mat ret(n_samples, n_features); + // + // // Create a list containing the MC samples for all coalitions and test observations + // Rcpp::List resultList; + // + // // Iterate over the coalitions + // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + // + // // TODO: REMOVE IN FINAL VERSION Small printout + // Rcpp::Rcout << S_ind + 1 << ","; + // + // // Get current coalition S and the indices of the features in coalition S and mask Sbar + // arma::mat S_now = S.row(S_ind); + // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + // + // // Extract the features we condition on + // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + // + // // Extract the mean values for the features in the two sets + // arma::vec mu_S = mu.elem(S_now_idx); + // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + // + // // Extract the relevant parts of the covariance matrix + // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + // + // // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + // // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + // // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + // // + // // // Ensure that the conditional covariance matrix is symmetric + // // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + // // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + // // } + // // + // // // Compute the conditional mean of Xsbar given Xs = Xs_star + // // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + // // x_Sbar_mean.each_col() += mu_Sbar; + // // + // // // Transform the samples to be from N(O, Sigma_Sbar|S) + // // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + // // + // // // Loop over the different test observations and combine the generated values with the values we conditioned on + // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { + // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + // // resultList.push_back(ret); + // // } + // } + // + // return resultList; + // } + // + // + // // [[Rcpp::export]] + // Rcpp::List prepare_data_gaussian_cpp_with_cond_mean_var(arma::mat MC_samples_mat, + // arma::mat x_explain_mat, + // arma::mat S, + // arma::vec mu, + // arma::mat cov_mat) { + // int n_explain = x_explain_mat.n_rows; + // int n_samples = MC_samples_mat.n_rows; + // int n_features = MC_samples_mat.n_cols; + // + // // Pre-allocate result matrix + // arma::mat ret(n_samples, n_features); + // + // // Create a list containing the MC samples for all coalitions and test observations + // Rcpp::List resultList; + // + // // Iterate over the coalitions + // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + // + // // TODO: REMOVE IN FINAL VERSION Small printout + // Rcpp::Rcout << S_ind + 1 << ","; + // + // // Get current coalition S and the indices of the features in coalition S and mask Sbar + // arma::mat S_now = S.row(S_ind); + // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + // + // // Extract the features we condition on + // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + // + // // Extract the mean values for the features in the two sets + // arma::vec mu_S = mu.elem(S_now_idx); + // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + // + // // Extract the relevant parts of the covariance matrix + // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + // + // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + // + // // Ensure that the conditional covariance matrix is symmetric + // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + // } + // + // // Compute the conditional mean of Xsbar given Xs = Xs_star + // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + // x_Sbar_mean.each_col() += mu_Sbar; + // + // // // Transform the samples to be from N(O, Sigma_Sbar|S) + // // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + // + // // // Loop over the different test observations and combine the generated values with the values we conditioned on + // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { + // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + // // resultList.push_back(ret); + // // } + // } + // + // return resultList; + // } + // + // + // // [[Rcpp::export]] + // Rcpp::List prepare_data_gaussian_cpp_with_chol(arma::mat MC_samples_mat, + // arma::mat x_explain_mat, + // arma::mat S, + // arma::vec mu, + // arma::mat cov_mat) { + // int n_explain = x_explain_mat.n_rows; + // int n_samples = MC_samples_mat.n_rows; + // int n_features = MC_samples_mat.n_cols; + // + // // Pre-allocate result matrix + // arma::mat ret(n_samples, n_features); + // + // // Create a list containing the MC samples for all coalitions and test observations + // Rcpp::List resultList; + // + // // Iterate over the coalitions + // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + // + // // TODO: REMOVE IN FINAL VERSION Small printout + // Rcpp::Rcout << S_ind + 1 << ","; + // + // // Get current coalition S and the indices of the features in coalition S and mask Sbar + // arma::mat S_now = S.row(S_ind); + // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + // + // // Extract the features we condition on + // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + // + // // Extract the mean values for the features in the two sets + // arma::vec mu_S = mu.elem(S_now_idx); + // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + // + // // Extract the relevant parts of the covariance matrix + // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + // + // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + // + // // Ensure that the conditional covariance matrix is symmetric + // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + // } + // + // // Compute the conditional mean of Xsbar given Xs = Xs_star + // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + // x_Sbar_mean.each_col() += mu_Sbar; + // + // // Transform the samples to be from N(O, Sigma_Sbar|S) + // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + // + // // // Loop over the different test observations and combine the generated values with the values we conditioned on + // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { + // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + // // resultList.push_back(ret); + // // } + // } + // + // return resultList; + // } + // + // + // + // // [[Rcpp::export]] + // Rcpp::List prepare_data_gaussian_cpp_without_adding_to_list(arma::mat MC_samples_mat, + // arma::mat x_explain_mat, + // arma::mat S, + // arma::vec mu, + // arma::mat cov_mat) { + // int n_explain = x_explain_mat.n_rows; + // int n_samples = MC_samples_mat.n_rows; + // int n_features = MC_samples_mat.n_cols; + // + // // Pre-allocate result matrix + // arma::mat ret(n_samples, n_features); + // + // // Create a list containing the MC samples for all coalitions and test observations + // Rcpp::List resultList; + // + // // Iterate over the coalitions + // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + // + // // TODO: REMOVE IN FINAL VERSION Small printout + // Rcpp::Rcout << S_ind + 1 << ","; + // + // // Get current coalition S and the indices of the features in coalition S and mask Sbar + // arma::mat S_now = S.row(S_ind); + // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + // + // // Extract the features we condition on + // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + // + // // Extract the mean values for the features in the two sets + // arma::vec mu_S = mu.elem(S_now_idx); + // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + // + // // Extract the relevant parts of the covariance matrix + // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + // + // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + // + // // Ensure that the conditional covariance matrix is symmetric + // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + // } + // + // // Compute the conditional mean of Xsbar given Xs = Xs_star + // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + // x_Sbar_mean.each_col() += mu_Sbar; + // + // // Transform the samples to be from N(O, Sigma_Sbar|S) + // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + // + // // Loop over the different test observations and combine the generated values with the values we conditioned on + // for (int idx_now = 0; idx_now < n_explain; idx_now++) { + // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + // // resultList.push_back(ret); + // } + // } + // + // return resultList; + // } + // + // // [[Rcpp::export]] + // Rcpp::List prepare_data_gaussian_cpp_fake_list(arma::mat MC_samples_mat, + // arma::mat x_explain_mat, + // arma::mat S, + // arma::vec mu, + // arma::mat cov_mat) { + // int n_explain = x_explain_mat.n_rows; + // int n_samples = MC_samples_mat.n_rows; + // int n_features = MC_samples_mat.n_cols; + // + // // Pre-allocate result matrix + // arma::mat ret(n_samples, n_features); + // + // // Create a list containing the MC samples for all coalitions and test observations + // Rcpp::List resultList; + // Rcpp::List resultList2; + // + // // Iterate over the coalitions + // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + // + // // TODO: REMOVE IN FINAL VERSION Small printout + // Rcpp::Rcout << S_ind + 1 << ","; + // + // // Get current coalition S and the indices of the features in coalition S and mask Sbar + // arma::mat S_now = S.row(S_ind); + // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + // + // // Extract the features we condition on + // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + // + // // Extract the mean values for the features in the two sets + // arma::vec mu_S = mu.elem(S_now_idx); + // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + // + // // Extract the relevant parts of the covariance matrix + // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + // + // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + // + // // Ensure that the conditional covariance matrix is symmetric + // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + // } + // + // // Compute the conditional mean of Xsbar given Xs = Xs_star + // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + // x_Sbar_mean.each_col() += mu_Sbar; + // + // // Transform the samples to be from N(O, Sigma_Sbar|S) + // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + // + // // Loop over the different test observations and combine the generated values with the values we conditioned on + // for (int idx_now = 0; idx_now < n_explain; idx_now++) { + // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + // resultList.push_back(ret); + // } + // } + // + // return resultList2; + // } + // + // // Fungerer men treg + // // // [[Rcpp::export]] + // // arma::cube prepare_data_gaussian_cpp_fix(arma::mat MC_samples_mat, + // // arma::mat x_explain_mat, + // // arma::mat S, + // // arma::vec mu, + // // arma::mat cov_mat) { + // // int n_explain = x_explain_mat.n_rows; + // // int n_samples = MC_samples_mat.n_rows; + // // int n_features = MC_samples_mat.n_cols; + // // + // // // Pre-allocate result matrix + // // arma::mat ret(n_samples, n_features); + // // arma::cube result(n_samples, n_features, n_explain*n_samples); + // // + // // + // // // Create a list containing the MC samples for all coalitions and test observations + // // Rcpp::List resultList(n_explain * n_samples); + // // + // // // Iterate over the coalitions + // // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + // // + // // // TODO: REMOVE IN FINAL VERSION Small printout + // // Rcpp::Rcout << S_ind + 1 << ","; + // // + // // // Get current coalition S and the indices of the features in coalition S and mask Sbar + // // arma::mat S_now = S.row(S_ind); + // // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + // // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + // // + // // // Extract the features we condition on + // // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + // // + // // // Extract the mean values for the features in the two sets + // // arma::vec mu_S = mu.elem(S_now_idx); + // // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + // // + // // // Extract the relevant parts of the covariance matrix + // // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + // // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + // // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + // // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + // // + // // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + // // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + // // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + // // + // // // Ensure that the conditional covariance matrix is symmetric + // // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + // // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + // // } + // // + // // // Compute the conditional mean of Xsbar given Xs = Xs_star + // // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + // // x_Sbar_mean.each_col() += mu_Sbar; + // // + // // // Transform the samples to be from N(O, Sigma_Sbar|S) + // // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + // // + // // // Loop over the different test observations and combine the generated values with the values we conditioned on + // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { + // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + // // result.slice(S_ind*n_explain + idx_now) = ret; + // // } + // // } + // // + // // return result; + // // } + + // [[Rcpp::export]] + Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + + // Pre-allocate result matrix + arma::mat ret(n_samples, n_features); + + // Create a list containing the MC samples for all coalitions and test observations + std::list resultList; + // Rcpp::List resultList; + + Rcpp::List listt(S.n_rows); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + + Rcpp::List listt2(n_explain); + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + //Rcpp::NumericMatrix matRcpp(ret.begin(), ret.n_rows, ret.n_cols); + //resultList.push_back(ret); + listt2[idx_now] = ret; + } + listt[S_ind] = listt2; + } + + + + return listt; + } + + // [[Rcpp::export]] +std::list prepare_data_gaussian_cpp_fix(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + + // Pre-allocate result matrix + arma::mat ret(n_samples, n_features); + + // Create a list containing the MC samples for all coalitions and test observations + std::list resultList; + // Rcpp::List resultList; + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + //Rcpp::NumericMatrix matRcpp(ret.begin(), ret.n_rows, ret.n_cols); + resultList.push_back(ret); + } + } + + return resultList; + } + + // [[Rcpp::export]] +arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Pre-allocate result matrix + arma::mat ret(n_coalitions*n_explain*n_samples, n_features); + + // Create a list containing the MC samples for all coalitions and test observations + std::list resultList; + // Rcpp::List resultList; + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + + // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. + arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, + S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, + n_samples); + + ret.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + ret.submat(row_indices_now, Sbar_now_idx) = + MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + + } + } + + return ret; + } //' Generate Gaussian MC samples //' From b0c2180d5392e28463e8c960cfb3ac1857f14663 Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 4 Jan 2024 21:39:59 +0100 Subject: [PATCH 16/62] Arma::cube with efficient indicing is the fastest --- R/RcppExports.R | 31 ++ inst/scripts/Compare_Gaussian.R | 721 ++++++++++++++++------------ src/Gaussian.cpp | 824 +++++++++++++------------------- src/RcppExports.cpp | 112 +++++ 4 files changed, 883 insertions(+), 805 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 445985bde..866709e08 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,14 +80,45 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } +#' Generate Gaussian MC samples +NULL + prepare_data_gaussian_cpp <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { .Call(`_shapr_prepare_data_gaussian_cpp`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) } +prepare_data_gaussian_cpp_with_wrap <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_with_wrap`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + prepare_data_gaussian_cpp_v2 <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { .Call(`_shapr_prepare_data_gaussian_cpp_v2`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) } +prepare_data_gaussian_cpp_fix_large_mat <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_fix_large_mat`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + +prepare_data_gaussian_cpp_fix_large_mat_v2 <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_fix_large_mat_v2`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + +prepare_data_gaussian_cpp_fix_cube <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_fix_cube`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + +prepare_data_gaussian_cpp_fix_cube_v2 <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_fix_cube_v2`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + +prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + +prepare_data_gaussian_cpp_fix_std_list <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_gaussian_cpp_fix_std_list`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} + #' (Generalized) Mahalanobis distance #' #' Used to get the Euclidean distance as well by setting \code{mcov} = \code{diag(m)}. diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 0dab8a1b1..8dde791af 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -687,18 +687,18 @@ prepare_data_gaussian_new_v5_rnorm <- function(internal, index_features, ...) { S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] # Allocate an empty matrix used in mvnfast:::rmvnCpp to store the generated MC samples. -# B <- matrix(nrow = n_samples, ncol = n_features) -# class(B) <- "numeric" - -# .Call("rmvnCpp", -# n_ = n_samples, -# mu_ = rep(0, n_features), -# sigma_ = diag(n_features), -# ncores_ = 1, -# isChol_ = TRUE, -# A_ = B, -# PACKAGE = "mvnfast" -# ) + # B <- matrix(nrow = n_samples, ncol = n_features) + # class(B) <- "numeric" + + # .Call("rmvnCpp", + # n_ = n_samples, + # mu_ = rep(0, n_features), + # sigma_ = diag(n_features), + # ncores_ = 1, + # isChol_ = TRUE, + # A_ = B, + # PACKAGE = "mvnfast" + # ) B <- matrix(rnorm(n_samples*n_features),nrow = n_samples, ncol = n_features) @@ -831,79 +831,79 @@ prepare_data_gaussian_new_v5_rnorm_v2 <- function(internal, index_features, ...) B <- matrix(rnorm(n_samples*n_features),nrow = n_samples, ncol = n_features) - # Generate a data table containing all Monte Carlo samples for all test observations and coalitions - dt <- data.table::rbindlist( - # Iterate over the coalitions - lapply( - seq_len(nrow(S)), - function(S_ind) { - # This function generates the conditional samples Xsbar | Xs = Xs_star - # and combine those values with the unconditional values. - cat(sprintf("%d,", S_ind)) - - # Get boolean representations if the features are in the S and the Sbar sets - S_now <- as.logical(S[S_ind, ]) - Sbar_now <- !as.logical(S[S_ind, ]) - - # Remove: - # Do not need to treat the empty and grand coalitions different as they will never be present - # if (sum(S_now) %in% c(0, n_features)) { - # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) - # } - - # Extract the features we condition on - x_S_star <- x_explain_mat[, S_now, drop = FALSE] - - # Extract the mean values for the features in the two sets - mu_S <- mu[S_now] - mu_Sbar <- mu[Sbar_now] - - # Extract the relevant parts of the covariance matrix - cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] - cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] - cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] - cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] - - # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) - cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar - - # Ensure that the conditional covariance matrix symmetric in the - # rare case where numerical instability made it unsymmetrical. - if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { - cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) - } - - # Compute the conditional mean of Xsbar given Xs = Xs_star - x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) - - - # Transform the samples to be from N(O, Sigma_Sbar|S) - # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` - # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the - # original B (i.e., not transposed B). - B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) - - # Create a data.table containing the MC samples for all test observations for one coalition - data.table::rbindlist( - - # Loop over the different test observations - lapply(seq(n_explain), function(idx_now) { - # Combine the generated values with the values we conditioned on - ret <- matrix(NA, ncol = n_features, nrow = n_samples) - ret[, S_now] <- rep(c(x_S_star[idx_now,]), each = n_samples) - ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) - - # Set names of the columns and convert to a data.table - colnames(ret) <- feature_names - as.data.table(ret) - }), - use.names = TRUE, idcol = "id", fill = TRUE - ) + # Generate a data table containing all Monte Carlo samples for all test observations and coalitions + dt <- data.table::rbindlist( + # Iterate over the coalitions + lapply( + seq_len(nrow(S)), + function(S_ind) { + # This function generates the conditional samples Xsbar | Xs = Xs_star + # and combine those values with the unconditional values. + cat(sprintf("%d,", S_ind)) + + # Get boolean representations if the features are in the S and the Sbar sets + S_now <- as.logical(S[S_ind, ]) + Sbar_now <- !as.logical(S[S_ind, ]) + + # Remove: + # Do not need to treat the empty and grand coalitions different as they will never be present + # if (sum(S_now) %in% c(0, n_features)) { + # return(data.table::as.data.table(cbind("id" = seq(n_explain), x_explain))) + # } + + # Extract the features we condition on + x_S_star <- x_explain_mat[, S_now, drop = FALSE] + + # Extract the mean values for the features in the two sets + mu_S <- mu[S_now] + mu_Sbar <- mu[Sbar_now] + + # Extract the relevant parts of the covariance matrix + cov_mat_SS <- cov_mat[S_now, S_now, drop = FALSE] + cov_mat_SSbar <- cov_mat[S_now, Sbar_now, drop = FALSE] + cov_mat_SbarS <- cov_mat[Sbar_now, S_now, drop = FALSE] + cov_mat_SbarSbar <- cov_mat[Sbar_now, Sbar_now, drop = FALSE] + + # Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + cov_mat_SbarS_cov_mat_SS_inv <- cov_mat_SbarS %*% solve(cov_mat_SS) + cond_cov_mat_Sbar_given_S <- cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv %*% cov_mat_SSbar + + # Ensure that the conditional covariance matrix symmetric in the + # rare case where numerical instability made it unsymmetrical. + if (!isSymmetric(cond_cov_mat_Sbar_given_S)) { + cond_cov_mat_Sbar_given_S <- Matrix::symmpart(cond_cov_mat_Sbar_given_S) } - ), - idcol = "id_combination" - ) + + # Compute the conditional mean of Xsbar given Xs = Xs_star + x_Sbar_mean <- mu_Sbar + cov_mat_SbarS_cov_mat_SS_inv %*% (t(x_S_star) - mu_S) + + + # Transform the samples to be from N(O, Sigma_Sbar|S) + # Transpose her and untranspose later for faster matrix addition in `t(B + x_Sbar_mean[, idx_now])` + # as it seems to be faster than using `sweep(B, 2, x_Sbar_mean[, idx_now], FUN = "+")` on the + # original B (i.e., not transposed B). + B_now <- t(B[, Sbar_now] %*% chol(cond_cov_mat_Sbar_given_S)) + + # Create a data.table containing the MC samples for all test observations for one coalition + data.table::rbindlist( + + # Loop over the different test observations + lapply(seq(n_explain), function(idx_now) { + # Combine the generated values with the values we conditioned on + ret <- matrix(NA, ncol = n_features, nrow = n_samples) + ret[, S_now] <- rep(c(x_S_star[idx_now,]), each = n_samples) + ret[, Sbar_now] <- t(B_now + x_Sbar_mean[, idx_now]) + + # Set names of the columns and convert to a data.table + colnames(ret) <- feature_names + as.data.table(ret) + }), + use.names = TRUE, idcol = "id", fill = TRUE + ) + } + ), + idcol = "id_combination" + ) # Update the id_combination. This will always be called as `index_features` is never NULL. if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] @@ -974,7 +974,7 @@ prepare_data_gaussian_new_v5_rnorm_cpp <- function(internal, index_features, ... return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_just_extracting <- function(internal, index_features, ...) { +prepare_data_gaussian_new_v5_rnorm_cpp_with_wrap <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1000,34 +1000,35 @@ prepare_data_gaussian_new_v5_rnorm_cpp_just_extracting <- function(internal, ind MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Call cpp - result_list <- prepare_data_gaussian_cpp_just_extracting( - MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - S = S, - mu = mu, - cov_mat = cov_mat) + result_list <- prepare_data_gaussian_cpp_with_wrap( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] - # dt = as.data.table(do.call(rbind, result_list)) - # setnames(dt, feature_names) - # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] - # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] - # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - # - # # Update the id_combination. This will always be called as `index_features` is never NULL. - # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] - # - # # Add uniform weights - # dt[, w := 1 / n_samples] - # - # # Remove: - # # This is not needed when we assume that the empty and grand coalitions will never be present - # # dt[id_combination %in% c(1, n_combinations), w := 1] - # - # # Return the MC samples - # return(dt) + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_with_cond_mean_var <- function(internal, index_features, ...) { + +prepare_data_gaussian_new_v5_rnorm_cpp_v2 <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1053,34 +1054,34 @@ prepare_data_gaussian_new_v5_rnorm_cpp_with_cond_mean_var <- function(internal, MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Call cpp - result_list <- prepare_data_gaussian_cpp_with_cond_mean_var( - MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - S = S, - mu = mu, - cov_mat = cov_mat) + result_list <- prepare_data_gaussian_cpp_v2( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + dt = as.data.table(do.call(rbind, result_list)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] - # dt = as.data.table(do.call(rbind, result_list)) - # setnames(dt, feature_names) - # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] - # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] - # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - # - # # Update the id_combination. This will always be called as `index_features` is never NULL. - # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] - # - # # Add uniform weights - # dt[, w := 1 / n_samples] - # - # # Remove: - # # This is not needed when we assume that the empty and grand coalitions will never be present - # # dt[id_combination %in% c(1, n_combinations), w := 1] - # - # # Return the MC samples - # return(dt) + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_with_chol <- function(internal, index_features, ...) { +prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1102,38 +1103,38 @@ prepare_data_gaussian_new_v5_rnorm_cpp_with_chol <- function(internal, index_fea # This will always be called as `index_features` is never NULL. S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] - # Generate the MC samples + # Generate the MC samples from N(0, 1) MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) - # Call cpp - result_list <- prepare_data_gaussian_cpp_with_chol( + # Call cpp to create the data table with the MC samples for all explicands and coalitions + dt <- as.data.table( + prepare_data_gaussian_cpp_fix_large_mat( MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, S = S, mu = mu, cov_mat = cov_mat) -# -# dt = as.data.table(do.call(rbind, result_list)) -# setnames(dt, feature_names) -# dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] -# dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] -# data.table::setcolorder(dt, c("id_combination", "id", feature_names)) -# -# # Update the id_combination. This will always be called as `index_features` is never NULL. -# if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] -# -# # Add uniform weights -# dt[, w := 1 / n_samples] -# -# # Remove: -# # This is not needed when we assume that the empty and grand coalitions will never be present -# # dt[id_combination %in% c(1, n_combinations), w := 1] -# -# # Return the MC samples -# return(dt) + ) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_without_adding_to_list <- function(internal, index_features, ...) { +prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat_v2 <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1155,38 +1156,38 @@ prepare_data_gaussian_new_v5_rnorm_cpp_without_adding_to_list <- function(intern # This will always be called as `index_features` is never NULL. S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] - # Generate the MC samples + # Generate the MC samples from N(0, 1) MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) - # Call cpp - result_list <- prepare_data_gaussian_cpp_without_adding_to_list( - MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - S = S, - mu = mu, - cov_mat = cov_mat) - # - # dt = as.data.table(do.call(rbind, result_list)) - # setnames(dt, feature_names) - # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] - # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] - # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - # - # # Update the id_combination. This will always be called as `index_features` is never NULL. - # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] - # - # # Add uniform weights - # dt[, w := 1 / n_samples] - # - # # Remove: - # # This is not needed when we assume that the empty and grand coalitions will never be present - # # dt[id_combination %in% c(1, n_combinations), w := 1] - # - # # Return the MC samples - # return(dt) + # Call cpp to create the data table with the MC samples for all explicands and coalitions + dt <- as.data.table( + prepare_data_gaussian_cpp_fix_large_mat_v2( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + ) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_fake_list <- function(internal, index_features, ...) { +prepare_data_gaussian_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1212,34 +1213,35 @@ prepare_data_gaussian_new_v5_rnorm_cpp_fake_list <- function(internal, index_fea MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Call cpp - result_list <- prepare_data_gaussian_cpp_fake_list( + result_list <- prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices( MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, S = S, mu = mu, cov_mat = cov_mat) - # - # dt = as.data.table(do.call(rbind, result_list)) - # setnames(dt, feature_names) - # dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] - # dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] - # data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - # - # # Update the id_combination. This will always be called as `index_features` is never NULL. - # if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] - # - # # Add uniform weights - # dt[, w := 1 / n_samples] - # - # # Remove: - # # This is not needed when we assume that the empty and grand coalitions will never be present - # # dt[id_combination %in% c(1, n_combinations), w := 1] - # - # # Return the MC samples - # return(dt) + + # Here we first put the inner list together and then the whole thing. Maybe exist another faster way! + dt = as.data.table(do.call(rbind, lapply(result_list, function(inner_list) do.call(rbind, inner_list)))) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + + # Add uniform weights + dt[, w := 1 / n_samples] + + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] + + # Return the MC samples + return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_fix <- function(internal, index_features, ...) { +prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1261,18 +1263,26 @@ prepare_data_gaussian_new_v5_rnorm_cpp_fix <- function(internal, index_features, # This will always be called as `index_features` is never NULL. S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] - # Generate the MC samples from N(0, 1) + # Generate the MC samples MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) - # Call cpp to create the data table with the MC samples for all explicands and coalitions - dt <- as.data.table( - prepare_data_gaussian_cpp_fix_large_mat( - MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - S = S, - mu = mu, - cov_mat = cov_mat) - ) + # Call cpp + result_cube <- prepare_data_gaussian_cpp_fix_cube( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # Reshape the 3D array to 2D + # This is slower + # dt = as.data.table(matrix(aperm(result_cube, c(1, 3, 2)), + # nrow = prod(dim(result_cube)[-2]), + # ncol = dim(result_cube)[2])) + dims = dim(result_cube) + result_cube = aperm(result_cube, c(1, 3, 2)) + dim(result_cube) <- c(prod(dims[-2]), dims[2]) + dt = as.data.table(result_cube) setnames(dt, feature_names) dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] @@ -1292,7 +1302,7 @@ prepare_data_gaussian_new_v5_rnorm_cpp_fix <- function(internal, index_features, return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_fix2 <- function(internal, index_features, ...) { +prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube_v2 <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1309,6 +1319,7 @@ prepare_data_gaussian_new_v5_rnorm_cpp_fix2 <- function(internal, index_features n_samples <- internal$parameters$n_samples feature_names <- internal$parameters$feature_names n_combinations <- internal$parameters$n_combinations + n_combinations_now <- length(index_features) # Extract the relevant coalitions specified in `index_features` from `S`. # This will always be called as `index_features` is never NULL. @@ -1318,34 +1329,36 @@ prepare_data_gaussian_new_v5_rnorm_cpp_fix2 <- function(internal, index_features MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Call cpp - result_list <- prepare_data_gaussian_cpp_fix( + dt <- prepare_data_gaussian_cpp_fix_cube_v2( MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, S = S, mu = mu, cov_mat = cov_mat) - dt = as.data.table(do.call(rbind, result_list)) - setnames(dt, feature_names) - dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] - dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] - data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + # Reshape and convert to data.table + dim(dt) = c(n_combinations_now*n_explain*n_samples, n_features) + print(system.time({dt = as.data.table(dt)}, gcFirst = FALSE)) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - # Update the id_combination. This will always be called as `index_features` is never NULL. - if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + # Update the id_combination. This will always be called as `index_features` is never NULL. + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] - # Add uniform weights - dt[, w := 1 / n_samples] + # Add uniform weights + dt[, w := 1 / n_samples] - # Remove: - # This is not needed when we assume that the empty and grand coalitions will never be present - # dt[id_combination %in% c(1, n_combinations), w := 1] + # Remove: + # This is not needed when we assume that the empty and grand coalitions will never be present + # dt[id_combination %in% c(1, n_combinations), w := 1] - # Return the MC samples - return(dt) + # Return the MC samples + return(dt) } -prepare_data_gaussian_new_v5_rnorm_cpp_v2 <- function(internal, index_features, ...) { +prepare_data_gaussian_new_v5_rnorm_cpp_fix_std_list <- function(internal, index_features, ...) { # This function assumes that index_features will never include the empty and # grand coalitions. This is valid 21/11/23 as `batch_prepare_vS()` removes the # grand coalition before calling the `prepare_data()` function and the empty @@ -1371,13 +1384,19 @@ prepare_data_gaussian_new_v5_rnorm_cpp_v2 <- function(internal, index_features, MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Call cpp - result_list <- prepare_data_gaussian_cpp_v2( + result_list <- prepare_data_gaussian_cpp_fix_std_list( MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, S = S, mu = mu, cov_mat = cov_mat) + # FIND A BETTER WAY TO DO THIS + for (i in seq(length(result_list))) { + dim(result_list[[i]]) = c(n_samples, n_features) + } + + # Here we first put the inner list together and then the whole thing. Maybe exist another faster way! dt = as.data.table(do.call(rbind, result_list)) setnames(dt, feature_names) dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] @@ -1398,7 +1417,6 @@ prepare_data_gaussian_new_v5_rnorm_cpp_v2 <- function(internal, index_features, return(dt) } - # Here we only want to generate the data once. So we generate n_samples*n_batches from N(0, I), # and then use Cholensky to transform to N(O, Sigma_{Sbar|S}), and then add the means. prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { @@ -1539,12 +1557,9 @@ prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { n_train <- 1000 n_test <- 100 M <- 8 - rho <- 0.5 betas <- c(0, rep(1, M)) - - # We use the Gaussian approach approach <- "gaussian" @@ -1627,7 +1642,6 @@ prepare_data_gaussian_new_v6 <- function(internal, index_features, ...) { # conditional expectation computation for the chosen approach # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters internal <- setup_computation(internal, model, predict_model) - } @@ -1695,41 +1709,11 @@ time_new_v5_rnorm_cpp <- system.time({ index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) res_new_v5_rnorm_cpp <- NULL -# time_new_v5_rnorm_cpp_just_extracting <- system.time({ -# res_new_v5_rnorm_cpp_just_extracting <- prepare_data_gaussian_new_v5_rnorm_cpp_just_extracting( -# internal = internal, -# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) -# res_new_v5_rnorm_cpp_just_extracting <- NULL -# -# time_new_v5_rnorm_cpp_with_cond_mean_var <- system.time({ -# res_new_v5_rnorm_cpp_with_cond_mean_var <- prepare_data_gaussian_new_v5_rnorm_cpp_with_cond_mean_var( -# internal = internal, -# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) -# res_new_v5_rnorm_cpp_with_cond_mean_var <- NULL -# -# time_new_v5_rnorm_cpp_with_chol <- system.time({ -# res_new_v5_rnorm_cpp_with_chol <- prepare_data_gaussian_new_v5_rnorm_cpp_with_chol( -# internal = internal, -# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) -# res_new_v5_rnorm_cpp_with_chol <- NULL -# -# time_new_v5_rnorm_cpp_without_adding_to_list <- system.time({ -# res_new_v5_rnorm_cpp_without_adding_to_list <- prepare_data_gaussian_new_v5_rnorm_cpp_without_adding_to_list( -# internal = internal, -# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) -# res_new_v5_rnorm_cpp_without_adding_to_list <- NULL -# -# time_new_v5_rnorm_cpp_fake_list <- system.time({ -# res_new_v5_rnorm_cpp_fake_list <- prepare_data_gaussian_new_v5_rnorm_cpp_fake_list( -# internal = internal, -# index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) -# res_new_v5_rnorm_cpp_fake_list <- NULL - -time_new_v5_rnorm_cpp_fix <- system.time({ - res_new_v5_rnorm_cpp_fix <- prepare_data_gaussian_new_v5_rnorm_cpp_fix( +time_new_v5_rnorm_cpp_with_wrap <- system.time({ + res_new_v5_rnorm_cpp_with_wrap <- prepare_data_gaussian_new_v5_rnorm_cpp_with_wrap( internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) -res_new_v5_rnorm_cpp_fix <- NULL +res_new_v5_rnorm_cpp_with_wrap <- NULL time_new_v5_rnorm_cpp_v2 <- system.time({ res_new_v5_rnorm_cpp_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_v2( @@ -1737,6 +1721,42 @@ time_new_v5_rnorm_cpp_v2 <- system.time({ index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) res_new_v5_rnorm_cpp_v2 <- NULL +time_new_v5_rnorm_cpp_fix_large_mat <- system.time({ + res_new_v5_rnorm_cpp_fix_large_mat <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_large_mat <- NULL + +time_new_v5_rnorm_cpp_fix_large_mat_v2 <- system.time({ + res_new_v5_rnorm_cpp_fix_large_mat_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_large_mat_v2 <- NULL + +time_new_v5_rnorm_cpp_fix_cube <- system.time({ + res_new_v5_rnorm_cpp_fix_cube <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_cube <- NULL + +time_new_v5_rnorm_cpp_fix_cube_v2 <- system.time({ + res_new_v5_rnorm_cpp_fix_cube_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_cube_v2 <- NULL + +time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- system.time({ + res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- NULL + +time_new_v5_rnorm_cpp_fix_std_list <- system.time({ + res_new_v5_rnorm_cpp_fix_std_list <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_std_list( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +res_new_v5_rnorm_cpp_fix_std_list <- NULL + time_new_v6 <- system.time({ res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -1744,13 +1764,24 @@ time_new_v6 <- system.time({ res_new_v6 <- NULL # Create a table of the times. Less is better -times <- rbind(time_old, time_new_v1, time_new_v2, time_new_v3, time_new_v4, time_new_v5, - time_new_v5_rnorm, time_new_v5_rnorm_v2, time_new_v5_rnorm_cpp, - # time_new_v5_rnorm_cpp_just_extracting, time_new_v5_rnorm_cpp_with_cond_mean_var, - # time_new_v5_rnorm_cpp_with_chol, time_new_v5_rnorm_cpp_without_adding_to_list, - # time_new_v5_rnorm_cpp_fake_list, - time_new_v5_rnorm_cpp_fix, - time_new_v5_rnorm_cpp_v2, time_new_v6) +times <- rbind(time_old, + time_new_v1, + time_new_v2, + time_new_v3, + time_new_v4, + time_new_v5, + time_new_v5_rnorm, + time_new_v5_rnorm_v2, + time_new_v5_rnorm_cpp, + time_new_v5_rnorm_cpp_with_wrap, + time_new_v5_rnorm_cpp_v2, + time_new_v5_rnorm_cpp_fix_large_mat, + time_new_v5_rnorm_cpp_fix_large_mat_v2, + time_new_v5_rnorm_cpp_fix_cube, + time_new_v5_rnorm_cpp_fix_cube_v2, + time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices, + time_new_v5_rnorm_cpp_fix_std_list, + time_new_v6) times # Look at the relative time compared to the old method. Larger value is better. @@ -1760,35 +1791,44 @@ rownames(times_relative) <- paste0(rownames(times), "_rel") times_relative # ALL COALITIONS (look_at_coalitions = seq(1, 2^M-2)) -# user.self sys.self elapsed user.child sys.child -# time_old 38.773 5.301 45.087 0 0 -# time_new_v1 13.056 3.227 16.489 0 0 -# time_new_v2 12.760 3.188 16.159 0 0 -# time_new_v3 11.536 3.891 15.677 0 0 -# time_new_v4 11.293 3.681 15.150 0 0 -# time_new_v5 11.249 3.580 15.029 0 0 -# time_new_v5_rnorm 11.719 3.871 15.960 0 0 -# time_new_v5_rnorm_v2 11.893 3.780 16.055 0 0 -# time_new_v5_rnorm_cpp 40.220 7.029 51.638 0 0 -# time_new_v5_rnorm_cpp_fix 5.464 1.682 7.247 0 0 -# time_new_v5_rnorm_cpp_v2 38.499 4.262 43.501 0 0 -# time_new_v6 11.546 3.413 15.165 0 0 -# user.self sys.self elapsed user.child sys.child -# time_old_rel 1.00000 1.00000 1.00000 NaN NaN -# time_new_v1_rel 2.96975 1.64270 2.73437 NaN NaN -# time_new_v2_rel 3.03864 1.66280 2.79021 NaN NaN -# time_new_v3_rel 3.36104 1.36237 2.87600 NaN NaN -# time_new_v4_rel 3.43337 1.44010 2.97604 NaN NaN -# time_new_v5_rel 3.44680 1.48073 3.00000 NaN NaN -# time_new_v5_rnorm_rel 3.30856 1.36941 2.82500 NaN NaN -# time_new_v5_rnorm_v2_rel 3.26015 1.40238 2.80828 NaN NaN -# time_new_v5_rnorm_cpp_rel 0.96402 0.75416 0.87314 NaN NaN -# time_new_v5_rnorm_cpp_fix_rel 7.09608 3.15161 6.22147 NaN NaN -# time_new_v5_rnorm_cpp_v2_rel 1.00712 1.24378 1.03646 NaN NaN -# time_new_v6_rel 3.35813 1.55318 2.97310 NaN NaN - -# ----------------------------------------------------------------------------------------------------------------- - +# user.self sys.self elapsed user.child sys.child +# time_old 38.663 3.654 43.044 0.000 0.000 +# time_new_v1 14.693 3.539 18.709 0.000 0.000 +# time_new_v2 15.545 3.897 19.966 0.012 0.032 +# time_new_v3 13.476 3.838 17.812 0.000 0.000 +# time_new_v4 14.085 4.858 19.718 0.015 0.033 +# time_new_v5 13.508 4.104 18.148 0.000 0.000 +# time_new_v5_rnorm 13.107 4.178 17.705 0.000 0.000 +# time_new_v5_rnorm_v2 13.309 4.458 18.233 0.010 0.023 +# time_new_v5_rnorm_cpp 44.782 5.589 51.849 0.000 0.000 +# time_new_v5_rnorm_cpp_with_wrap 45.816 4.799 51.979 0.021 0.070 +# time_new_v5_rnorm_cpp_v2 44.997 6.513 52.931 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_large_mat 5.594 2.142 7.831 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_large_mat_v2 6.160 2.112 8.499 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_cube 5.607 2.745 8.558 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_cube_v2 4.621 2.121 6.862 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices 6.016 3.687 10.469 0.000 0.000 +# time_new_v5_rnorm_cpp_fix_std_list 5.407 3.272 8.841 0.000 0.000 +# time_new_v6 13.540 4.267 18.361 0.000 0.000 +# user.self sys.self elapsed user.child sys.child +# time_old_rel 1.00000 1.00000 1.00000 NaN NaN +# time_new_v1_rel 2.63139 1.03250 2.30071 NaN NaN +# time_new_v2_rel 2.48717 0.93764 2.15586 0 0 +# time_new_v3_rel 2.86903 0.95206 2.41657 NaN NaN +# time_new_v4_rel 2.74498 0.75216 2.18298 0 0 +# time_new_v5_rel 2.86223 0.89035 2.37183 NaN NaN +# time_new_v5_rnorm_rel 2.94980 0.87458 2.43118 NaN NaN +# time_new_v5_rnorm_v2_rel 2.90503 0.81965 2.36077 0 0 +# time_new_v5_rnorm_cpp_rel 0.86336 0.65378 0.83018 NaN NaN +# time_new_v5_rnorm_cpp_with_wrap_rel 0.84388 0.76141 0.82810 0 0 +# time_new_v5_rnorm_cpp_v2_rel 0.85924 0.56103 0.81321 NaN NaN +# time_new_v5_rnorm_cpp_fix_large_mat_rel 6.91151 1.70588 5.49662 NaN NaN +# time_new_v5_rnorm_cpp_fix_large_mat_v2_rel 6.27646 1.73011 5.06460 NaN NaN +# time_new_v5_rnorm_cpp_fix_cube_rel 6.89549 1.33115 5.02968 NaN NaN +# time_new_v5_rnorm_cpp_fix_cube_v2_rel 8.36680 1.72277 6.27281 NaN NaN +# time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices_rel 6.42670 0.99105 4.11157 NaN NaN +# time_new_v5_rnorm_cpp_fix_std_list_rel 7.15055 1.11675 4.86868 NaN NaN +# time_new_v6_rel 2.85547 0.85634 2.34432 NaN NaN # 26 coalitions (look_at_coalitions = seq(1, 2^M-2, 10)) @@ -1883,12 +1923,54 @@ one_coalition_time_new_v5_rnorm_cpp <- system.time({ internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_with_wrap <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_with_wrap <- prepare_data_gaussian_new_v5_rnorm_cpp_with_wrap( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + set.seed(123) one_coalition_time_new_v5_rnorm_cpp_v2 <- system.time({ one_coalition_res_new_v5_rnorm_cpp_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_v2( internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalition])}) +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_large_mat <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_large_mat <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_large_mat_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_large_mat_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_large_mat_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_cube <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_cube <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_cube_v2 <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_cube_v2 <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_cube_v2( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + +set.seed(123) +one_coalition_time_new_v5_rnorm_cpp_fix_std_list <- system.time({ + one_coalition_res_new_v5_rnorm_cpp_fix_std_list <- prepare_data_gaussian_new_v5_rnorm_cpp_fix_std_list( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalition])}) + one_coalition_time_new_v6 <- system.time({ one_coalition_res_new_v6 <- prepare_data_gaussian_new_v6( internal = internal, @@ -1904,7 +1986,14 @@ rbind(one_coalition_time_old, one_coalition_time_new_v5_rnorm, one_coalition_time_new_v5_rnorm_v2, one_coalition_time_new_v5_rnorm_cpp, + one_coalition_time_new_v5_rnorm_cpp_with_wrap, one_coalition_time_new_v5_rnorm_cpp_v2, + one_coalition_time_new_v5_rnorm_cpp_fix_large_mat, + one_coalition_time_new_v5_rnorm_cpp_fix_large_mat_v2, + one_coalition_time_new_v5_rnorm_cpp_fix_cube, + one_coalition_time_new_v5_rnorm_cpp_fix_cube_v2, + one_coalition_time_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices, + one_coalition_time_new_v5_rnorm_cpp_fix_std_list, one_coalition_time_new_v6) internal$objects$S[internal$objects$S_batch$`1`[look_at_coalition], , drop = FALSE] @@ -1918,7 +2007,14 @@ means_v5 <- one_coalition_res_new_v5[, lapply(.SD, mean), .SDcols = paste0("X", means_v5_rnorm <- one_coalition_res_new_v5_rnorm[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v5_rnorm_v2 <- one_coalition_res_new_v5_rnorm_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v5_rnorm_cpp <- one_coalition_res_new_v5_rnorm_cpp[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_with_wrap <- one_coalition_res_new_v5_rnorm_cpp_with_wrap[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v5_rnorm_cpp_v2 <- one_coalition_res_new_v5_rnorm_cpp_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_large_mat <- one_coalition_res_new_v5_rnorm_cpp_fix_large_mat[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_large_mat_v2 <- one_coalition_res_new_v5_rnorm_cpp_fix_large_mat_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_cube <- one_coalition_res_new_v5_rnorm_cpp_fix_cube[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_cube_v2 <- one_coalition_res_new_v5_rnorm_cpp_fix_cube_v2[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_list_of_lists_of_matrices <- one_coalition_res_new_v5_rnorm_cpp_fix_list_of_lists_of_matrices[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] +means_v5_rnorm_cpp_fix_std_list <- one_coalition_res_new_v5_rnorm_cpp_fix_std_list[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] means_v6 <- one_coalition_res_new_v6[, lapply(.SD, mean), .SDcols = paste0("X", seq(M)), by = list(id_combination, id)] # They are all in the same ballpark, so the differences are due to sampling. @@ -1934,7 +2030,14 @@ max(abs(means_old - means_v5)) max(abs(means_old - means_v5_rnorm)) max(abs(means_old - means_v5_rnorm_v2)) max(abs(means_old - means_v5_rnorm_cpp)) +max(abs(means_old - means_v5_rnorm_cpp_with_wrap)) max(abs(means_old - means_v5_rnorm_cpp_v2)) +max(abs(means_old - means_v5_rnorm_cpp_fix_large_mat)) +max(abs(means_old - means_v5_rnorm_cpp_fix_large_mat_v2)) +max(abs(means_old - means_v5_rnorm_cpp_fix_cube)) +max(abs(means_old - means_v5_rnorm_cpp_fix_cube_v2)) +max(abs(means_old - means_v5_rnorm_cpp_fix_list_of_lists_of_matrices)) +max(abs(means_old - means_v5_rnorm_cpp_fix_std_list)) max(abs(means_old - means_v6)) diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index e08ec9fa6..63aabfd32 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -3,7 +3,7 @@ using namespace Rcpp; - //' Generate Gaussian MC samples +//' Generate Gaussian MC samples //' //' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the //' univariate standard normal. @@ -35,7 +35,7 @@ using namespace Rcpp; arma::mat ret(n_samples, n_features); // Create a list containing the MC samples for all coalitions and test observations - Rcpp::List resultList; + Rcpp::List result_list; // Iterate over the coalitions for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { @@ -81,431 +81,19 @@ using namespace Rcpp; for (int idx_now = 0; idx_now < n_explain; idx_now++) { ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - resultList.push_back(ret); + result_list.push_back(ret); } } - return resultList; + return result_list; } - - - // // [[Rcpp::export]] - // Rcpp::List prepare_data_gaussian_cpp_just_extracting(arma::mat MC_samples_mat, - // arma::mat x_explain_mat, - // arma::mat S, - // arma::vec mu, - // arma::mat cov_mat) { - // int n_explain = x_explain_mat.n_rows; - // int n_samples = MC_samples_mat.n_rows; - // int n_features = MC_samples_mat.n_cols; - // - // // Pre-allocate result matrix - // arma::mat ret(n_samples, n_features); - // - // // Create a list containing the MC samples for all coalitions and test observations - // Rcpp::List resultList; - // - // // Iterate over the coalitions - // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - // - // // TODO: REMOVE IN FINAL VERSION Small printout - // Rcpp::Rcout << S_ind + 1 << ","; - // - // // Get current coalition S and the indices of the features in coalition S and mask Sbar - // arma::mat S_now = S.row(S_ind); - // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - // - // // Extract the features we condition on - // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - // - // // Extract the mean values for the features in the two sets - // arma::vec mu_S = mu.elem(S_now_idx); - // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - // - // // Extract the relevant parts of the covariance matrix - // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - // - // // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - // // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - // // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - // // - // // // Ensure that the conditional covariance matrix is symmetric - // // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - // // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - // // } - // // - // // // Compute the conditional mean of Xsbar given Xs = Xs_star - // // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - // // x_Sbar_mean.each_col() += mu_Sbar; - // // - // // // Transform the samples to be from N(O, Sigma_Sbar|S) - // // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // // - // // // Loop over the different test observations and combine the generated values with the values we conditioned on - // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - // // resultList.push_back(ret); - // // } - // } - // - // return resultList; - // } - // - // - // // [[Rcpp::export]] - // Rcpp::List prepare_data_gaussian_cpp_with_cond_mean_var(arma::mat MC_samples_mat, - // arma::mat x_explain_mat, - // arma::mat S, - // arma::vec mu, - // arma::mat cov_mat) { - // int n_explain = x_explain_mat.n_rows; - // int n_samples = MC_samples_mat.n_rows; - // int n_features = MC_samples_mat.n_cols; - // - // // Pre-allocate result matrix - // arma::mat ret(n_samples, n_features); - // - // // Create a list containing the MC samples for all coalitions and test observations - // Rcpp::List resultList; - // - // // Iterate over the coalitions - // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - // - // // TODO: REMOVE IN FINAL VERSION Small printout - // Rcpp::Rcout << S_ind + 1 << ","; - // - // // Get current coalition S and the indices of the features in coalition S and mask Sbar - // arma::mat S_now = S.row(S_ind); - // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - // - // // Extract the features we condition on - // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - // - // // Extract the mean values for the features in the two sets - // arma::vec mu_S = mu.elem(S_now_idx); - // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - // - // // Extract the relevant parts of the covariance matrix - // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - // - // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - // - // // Ensure that the conditional covariance matrix is symmetric - // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - // } - // - // // Compute the conditional mean of Xsbar given Xs = Xs_star - // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - // x_Sbar_mean.each_col() += mu_Sbar; - // - // // // Transform the samples to be from N(O, Sigma_Sbar|S) - // // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // - // // // Loop over the different test observations and combine the generated values with the values we conditioned on - // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - // // resultList.push_back(ret); - // // } - // } - // - // return resultList; - // } - // - // - // // [[Rcpp::export]] - // Rcpp::List prepare_data_gaussian_cpp_with_chol(arma::mat MC_samples_mat, - // arma::mat x_explain_mat, - // arma::mat S, - // arma::vec mu, - // arma::mat cov_mat) { - // int n_explain = x_explain_mat.n_rows; - // int n_samples = MC_samples_mat.n_rows; - // int n_features = MC_samples_mat.n_cols; - // - // // Pre-allocate result matrix - // arma::mat ret(n_samples, n_features); - // - // // Create a list containing the MC samples for all coalitions and test observations - // Rcpp::List resultList; - // - // // Iterate over the coalitions - // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - // - // // TODO: REMOVE IN FINAL VERSION Small printout - // Rcpp::Rcout << S_ind + 1 << ","; - // - // // Get current coalition S and the indices of the features in coalition S and mask Sbar - // arma::mat S_now = S.row(S_ind); - // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - // - // // Extract the features we condition on - // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - // - // // Extract the mean values for the features in the two sets - // arma::vec mu_S = mu.elem(S_now_idx); - // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - // - // // Extract the relevant parts of the covariance matrix - // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - // - // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - // - // // Ensure that the conditional covariance matrix is symmetric - // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - // } - // - // // Compute the conditional mean of Xsbar given Xs = Xs_star - // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - // x_Sbar_mean.each_col() += mu_Sbar; - // - // // Transform the samples to be from N(O, Sigma_Sbar|S) - // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // - // // // Loop over the different test observations and combine the generated values with the values we conditioned on - // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - // // resultList.push_back(ret); - // // } - // } - // - // return resultList; - // } - // - // - // - // // [[Rcpp::export]] - // Rcpp::List prepare_data_gaussian_cpp_without_adding_to_list(arma::mat MC_samples_mat, - // arma::mat x_explain_mat, - // arma::mat S, - // arma::vec mu, - // arma::mat cov_mat) { - // int n_explain = x_explain_mat.n_rows; - // int n_samples = MC_samples_mat.n_rows; - // int n_features = MC_samples_mat.n_cols; - // - // // Pre-allocate result matrix - // arma::mat ret(n_samples, n_features); - // - // // Create a list containing the MC samples for all coalitions and test observations - // Rcpp::List resultList; - // - // // Iterate over the coalitions - // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - // - // // TODO: REMOVE IN FINAL VERSION Small printout - // Rcpp::Rcout << S_ind + 1 << ","; - // - // // Get current coalition S and the indices of the features in coalition S and mask Sbar - // arma::mat S_now = S.row(S_ind); - // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - // - // // Extract the features we condition on - // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - // - // // Extract the mean values for the features in the two sets - // arma::vec mu_S = mu.elem(S_now_idx); - // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - // - // // Extract the relevant parts of the covariance matrix - // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - // - // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - // - // // Ensure that the conditional covariance matrix is symmetric - // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - // } - // - // // Compute the conditional mean of Xsbar given Xs = Xs_star - // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - // x_Sbar_mean.each_col() += mu_Sbar; - // - // // Transform the samples to be from N(O, Sigma_Sbar|S) - // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // - // // Loop over the different test observations and combine the generated values with the values we conditioned on - // for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - // // resultList.push_back(ret); - // } - // } - // - // return resultList; - // } - // - // // [[Rcpp::export]] - // Rcpp::List prepare_data_gaussian_cpp_fake_list(arma::mat MC_samples_mat, - // arma::mat x_explain_mat, - // arma::mat S, - // arma::vec mu, - // arma::mat cov_mat) { - // int n_explain = x_explain_mat.n_rows; - // int n_samples = MC_samples_mat.n_rows; - // int n_features = MC_samples_mat.n_cols; - // - // // Pre-allocate result matrix - // arma::mat ret(n_samples, n_features); - // - // // Create a list containing the MC samples for all coalitions and test observations - // Rcpp::List resultList; - // Rcpp::List resultList2; - // - // // Iterate over the coalitions - // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - // - // // TODO: REMOVE IN FINAL VERSION Small printout - // Rcpp::Rcout << S_ind + 1 << ","; - // - // // Get current coalition S and the indices of the features in coalition S and mask Sbar - // arma::mat S_now = S.row(S_ind); - // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - // - // // Extract the features we condition on - // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - // - // // Extract the mean values for the features in the two sets - // arma::vec mu_S = mu.elem(S_now_idx); - // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - // - // // Extract the relevant parts of the covariance matrix - // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - // - // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - // - // // Ensure that the conditional covariance matrix is symmetric - // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - // } - // - // // Compute the conditional mean of Xsbar given Xs = Xs_star - // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - // x_Sbar_mean.each_col() += mu_Sbar; - // - // // Transform the samples to be from N(O, Sigma_Sbar|S) - // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // - // // Loop over the different test observations and combine the generated values with the values we conditioned on - // for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - // resultList.push_back(ret); - // } - // } - // - // return resultList2; - // } - // - // // Fungerer men treg - // // // [[Rcpp::export]] - // // arma::cube prepare_data_gaussian_cpp_fix(arma::mat MC_samples_mat, - // // arma::mat x_explain_mat, - // // arma::mat S, - // // arma::vec mu, - // // arma::mat cov_mat) { - // // int n_explain = x_explain_mat.n_rows; - // // int n_samples = MC_samples_mat.n_rows; - // // int n_features = MC_samples_mat.n_cols; - // // - // // // Pre-allocate result matrix - // // arma::mat ret(n_samples, n_features); - // // arma::cube result(n_samples, n_features, n_explain*n_samples); - // // - // // - // // // Create a list containing the MC samples for all coalitions and test observations - // // Rcpp::List resultList(n_explain * n_samples); - // // - // // // Iterate over the coalitions - // // for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - // // - // // // TODO: REMOVE IN FINAL VERSION Small printout - // // Rcpp::Rcout << S_ind + 1 << ","; - // // - // // // Get current coalition S and the indices of the features in coalition S and mask Sbar - // // arma::mat S_now = S.row(S_ind); - // // arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - // // arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - // // - // // // Extract the features we condition on - // // arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - // // - // // // Extract the mean values for the features in the two sets - // // arma::vec mu_S = mu.elem(S_now_idx); - // // arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - // // - // // // Extract the relevant parts of the covariance matrix - // // arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - // // arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - // // arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - // // arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - // // - // // // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - // // arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - // // arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - // // - // // // Ensure that the conditional covariance matrix is symmetric - // // if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - // // cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - // // } - // // - // // // Compute the conditional mean of Xsbar given Xs = Xs_star - // // arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - // // x_Sbar_mean.each_col() += mu_Sbar; - // // - // // // Transform the samples to be from N(O, Sigma_Sbar|S) - // // arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // // - // // // Loop over the different test observations and combine the generated values with the values we conditioned on - // // for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - // // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - // // result.slice(S_ind*n_explain + idx_now) = ret; - // // } - // // } - // // - // // return result; - // // } - // [[Rcpp::export]] - Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { + Rcpp::List prepare_data_gaussian_cpp_with_wrap(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; int n_features = MC_samples_mat.n_cols; @@ -514,16 +102,11 @@ using namespace Rcpp; arma::mat ret(n_samples, n_features); // Create a list containing the MC samples for all coalitions and test observations - std::list resultList; - // Rcpp::List resultList; - - Rcpp::List listt(S.n_rows); + Rcpp::List result_list; // Iterate over the coalitions for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - Rcpp::List listt2(n_explain); - // TODO: REMOVE IN FINAL VERSION Small printout Rcpp::Rcout << S_ind + 1 << ","; @@ -565,37 +148,99 @@ using namespace Rcpp; for (int idx_now = 0; idx_now < n_explain; idx_now++) { ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - //Rcpp::NumericMatrix matRcpp(ret.begin(), ret.n_rows, ret.n_cols); - //resultList.push_back(ret); - listt2[idx_now] = ret; + result_list.push_back(Rcpp::wrap(ret)); } - listt[S_ind] = listt2; } + return result_list; + } + + // [[Rcpp::export]] + Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + + // Create a list containing the MC samples for all coalitions and test observations + Rcpp::List result_list; + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); + // Loop over the different test observations and Combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + arma::mat ret(n_samples, n_features); + ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); + result_list.push_back(ret); + } + } - return listt; + return result_list; } // [[Rcpp::export]] -std::list prepare_data_gaussian_cpp_fix(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { + arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; // Pre-allocate result matrix - arma::mat ret(n_samples, n_features); + arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); // Create a list containing the MC samples for all coalitions and test observations - std::list resultList; - // Rcpp::List resultList; + std::list result_list; + // Rcpp::List result_list; // Iterate over the coalitions - for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { // TODO: REMOVE IN FINAL VERSION Small printout Rcpp::Rcout << S_ind + 1 << ","; @@ -636,33 +281,38 @@ std::list prepare_data_gaussian_cpp_fix(arma::mat MC_samples_mat, // Loop over the different test observations and combine the generated values with the values we conditioned on for (int idx_now = 0; idx_now < n_explain; idx_now++) { - ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - //Rcpp::NumericMatrix matRcpp(ret.begin(), ret.n_rows, ret.n_cols); - resultList.push_back(ret); + // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. + arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, + S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, + n_samples); + + return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + return_mat.submat(row_indices_now, Sbar_now_idx) = + MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); } } - return resultList; + return return_mat; } + // Diff in v2 is where we do the transpose // [[Rcpp::export]] -arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { + arma::mat prepare_data_gaussian_cpp_fix_large_mat_v2(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; int n_features = MC_samples_mat.n_cols; int n_coalitions = S.n_rows; // Pre-allocate result matrix - arma::mat ret(n_coalitions*n_explain*n_samples, n_features); + arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); // Create a list containing the MC samples for all coalitions and test observations - std::list resultList; - // Rcpp::List resultList; + std::list result_list; + // Rcpp::List result_list; // Iterate over the coalitions for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { @@ -702,58 +352,241 @@ arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, x_Sbar_mean.each_col() += mu_Sbar; // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); // Loop over the different test observations and combine the generated values with the values we conditioned on for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - // ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, - S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, - n_samples); + S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, + n_samples); - ret.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - ret.submat(row_indices_now, Sbar_now_idx) = - MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + return_mat.submat(row_indices_now, Sbar_now_idx) = + trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); + } + } + + return return_mat; + } + + // [[Rcpp::export]] + arma::cube prepare_data_gaussian_cpp_fix_cube(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Pre-allocate result matrix + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_features, n_explain*n_coalitions); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + result_cube.slice(S_ind*n_explain + idx_now) = aux_mat; } } - return ret; + return result_cube; } - //' Generate Gaussian MC samples - //' - //' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the - //' univariate standard normal. - //' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations - //' to explain. - //' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of - //' the used coalitions. - //' @param mu vector. Vector of length `n_features` containing the mean of each feature. - //' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between - //' all features. - //' - //' @export - //' @keywords internal - //' - //' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times - //' `n_features` containing the conditional MC samples for each coalition and explicand. - //' @author Lars Henry Berge Olsen // [[Rcpp::export]] - Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { + arma::cube prepare_data_gaussian_cpp_fix_cube_v2(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Pre-allocate result matrix + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; + } + + // [[Rcpp::export]] + Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + + // Pre-allocate result matrix + arma::mat aux_mat(n_samples, n_features); + + // Create a list containing lists that contian the MC samples for all coalitions and test observations in each matrix + Rcpp::List result_list(S.n_rows); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { + + Rcpp::List result_list_now(n_explain); + + // TODO: REMOVE IN FINAL VERSION Small printout + Rcpp::Rcout << S_ind + 1 << ","; + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + result_list_now[idx_now] = aux_mat; + } + result_list[S_ind] = result_list_now; + } + + return result_list; + } + + // [[Rcpp::export]] + std::list prepare_data_gaussian_cpp_fix_std_list(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; int n_features = MC_samples_mat.n_cols; + // Pre-allocate result matrix + arma::mat aux_mat(n_samples, n_features); + // Create a list containing the MC samples for all coalitions and test observations - Rcpp::List resultList; + std::list result_list; // Iterate over the coalitions for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { @@ -763,7 +596,7 @@ arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, // Get current coalition S and the indices of the features in coalition S and mask Sbar arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); // Extract the features we condition on @@ -793,16 +626,15 @@ arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, x_Sbar_mean.each_col() += mu_Sbar; // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // Loop over the different test observations and Combine the generated values with the values we conditioned on + // Loop over the different test observations and combine the generated values with the values we conditioned on for (int idx_now = 0; idx_now < n_explain; idx_now++) { - arma::mat ret(n_samples, n_features); - ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); - resultList.push_back(ret); + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + result_list.push_back(aux_mat); } } - return resultList; + return result_list; } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index b522836fd..1c847bb08 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -95,6 +95,21 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// prepare_data_gaussian_cpp_with_wrap +Rcpp::List prepare_data_gaussian_cpp_with_wrap(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_with_wrap(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_with_wrap(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} // prepare_data_gaussian_cpp_v2 Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); RcppExport SEXP _shapr_prepare_data_gaussian_cpp_v2(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { @@ -110,6 +125,96 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// prepare_data_gaussian_cpp_fix_large_mat +arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_large_mat(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_large_mat(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} +// prepare_data_gaussian_cpp_fix_large_mat_v2 +arma::mat prepare_data_gaussian_cpp_fix_large_mat_v2(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_large_mat_v2(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_large_mat_v2(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} +// prepare_data_gaussian_cpp_fix_cube +arma::cube prepare_data_gaussian_cpp_fix_cube(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_cube(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_cube(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} +// prepare_data_gaussian_cpp_fix_cube_v2 +arma::cube prepare_data_gaussian_cpp_fix_cube_v2(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_cube_v2(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_cube_v2(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} +// prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices +Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} +// prepare_data_gaussian_cpp_fix_std_list +std::list prepare_data_gaussian_cpp_fix_std_list(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_std_list(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_std_list(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} // mahalanobis_distance_cpp arma::cube mahalanobis_distance_cpp(Rcpp::List featureList, arma::mat Xtrain_mat, arma::mat Xtest_mat, arma::mat mcov, bool S_scale_dist); RcppExport SEXP _shapr_mahalanobis_distance_cpp(SEXP featureListSEXP, SEXP Xtrain_matSEXP, SEXP Xtest_matSEXP, SEXP mcovSEXP, SEXP S_scale_distSEXP) { @@ -186,7 +291,14 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_aicc_full_single_cpp", (DL_FUNC) &_shapr_aicc_full_single_cpp, 5}, {"_shapr_aicc_full_cpp", (DL_FUNC) &_shapr_aicc_full_cpp, 6}, {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, + {"_shapr_prepare_data_gaussian_cpp_with_wrap", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_with_wrap, 5}, {"_shapr_prepare_data_gaussian_cpp_v2", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_v2, 5}, + {"_shapr_prepare_data_gaussian_cpp_fix_large_mat", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_large_mat, 5}, + {"_shapr_prepare_data_gaussian_cpp_fix_large_mat_v2", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_large_mat_v2, 5}, + {"_shapr_prepare_data_gaussian_cpp_fix_cube", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_cube, 5}, + {"_shapr_prepare_data_gaussian_cpp_fix_cube_v2", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_cube_v2, 5}, + {"_shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices, 5}, + {"_shapr_prepare_data_gaussian_cpp_fix_std_list", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_std_list, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, {"_shapr_observation_impute_cpp", (DL_FUNC) &_shapr_observation_impute_cpp, 5}, From 75aea94df356c4f96cd6d7b3d294af5350acb956 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 11:17:15 +0100 Subject: [PATCH 17/62] Added cpp functions here so we have them in the future (if needed) --- inst/scripts/Compare_Gaussian.R | 644 ++++++++++++++++++++++++++++++++ 1 file changed, 644 insertions(+) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 8dde791af..3a8d65db2 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -4,6 +4,650 @@ library(data.table) +# Cpp functions --------------------------------------------------------------------------------------------------- +# #include +# #include +# using namespace Rcpp; +# +# +# //' Generate Gaussian MC samples +# //' +# //' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the +# //' univariate standard normal. +# //' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations +# //' to explain. +# //' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of +# //' the used coalitions. +# //' @param mu vector. Vector of length `n_features` containing the mean of each feature. +# //' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between +# //' all features. +# //' +# //' @export +# //' @keywords internal +# //' +# //' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times +# //' `n_features` containing the conditional MC samples for each coalition and explicand. +# //' @author Lars Henry Berge Olsen +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat ret(n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list.push_back(ret); +# } +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp_with_wrap(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat ret(n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list.push_back(Rcpp::wrap(ret)); +# } +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Create a list containing the MC samples for all coalitions and test observations +# Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); +# +# // Loop over the different test observations and Combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# arma::mat ret(n_samples, n_features); +# ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); +# ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); +# result_list.push_back(ret); +# } +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# std::list result_list; +# // Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. +# arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, +# S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, +# n_samples); +# +# return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); +# return_mat.submat(row_indices_now, Sbar_now_idx) = +# MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# } +# } +# +# return return_mat; +# } +# +# // Diff in v2 is where we do the transpose +# // [[Rcpp::export]] +# arma::mat prepare_data_gaussian_cpp_fix_large_mat_v2(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# std::list result_list; +# // Rcpp::List result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. +# arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, +# S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, +# n_samples); +# +# return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); +# return_mat.submat(row_indices_now, Sbar_now_idx) = +# trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); +# } +# } +# +# return return_mat; +# } +# +# // [[Rcpp::export]] +# arma::cube prepare_data_gaussian_cpp_fix_cube(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# arma::cube result_cube(n_samples, n_features, n_explain*n_coalitions); +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_cube.slice(S_ind*n_explain + idx_now) = aux_mat; +# } +# } +# +# return result_cube; +# } +# +# // [[Rcpp::export]] +# arma::cube prepare_data_gaussian_cpp_fix_cube_v2(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# int n_coalitions = S.n_rows; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_cube.col(S_ind*n_explain + idx_now) = aux_mat; +# } +# } +# +# return result_cube; +# } +# +# // [[Rcpp::export]] +# Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# +# // Create a list containing lists that contian the MC samples for all coalitions and test observations in each matrix +# Rcpp::List result_list(S.n_rows); +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# Rcpp::List result_list_now(n_explain); +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list_now[idx_now] = aux_mat; +# } +# result_list[S_ind] = result_list_now; +# } +# +# return result_list; +# } +# +# // [[Rcpp::export]] +# std::list prepare_data_gaussian_cpp_fix_std_list(arma::mat MC_samples_mat, +# arma::mat x_explain_mat, +# arma::mat S, +# arma::vec mu, +# arma::mat cov_mat) { +# int n_explain = x_explain_mat.n_rows; +# int n_samples = MC_samples_mat.n_rows; +# int n_features = MC_samples_mat.n_cols; +# +# // Pre-allocate result matrix +# arma::mat aux_mat(n_samples, n_features); +# +# // Create a list containing the MC samples for all coalitions and test observations +# std::list result_list; +# +# // Iterate over the coalitions +# for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { +# +# // TODO: REMOVE IN FINAL VERSION Small printout +# Rcpp::Rcout << S_ind + 1 << ","; +# +# // Get current coalition S and the indices of the features in coalition S and mask Sbar +# arma::mat S_now = S.row(S_ind); +# arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her +# arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); +# +# // Extract the features we condition on +# arma::mat x_S_star = x_explain_mat.cols(S_now_idx); +# +# // Extract the mean values for the features in the two sets +# arma::vec mu_S = mu.elem(S_now_idx); +# arma::vec mu_Sbar = mu.elem(Sbar_now_idx); +# +# // Extract the relevant parts of the covariance matrix +# arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); +# arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); +# arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); +# arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); +# +# // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix +# arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); +# arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; +# +# // Ensure that the conditional covariance matrix is symmetric +# if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { +# cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); +# } +# +# // Compute the conditional mean of Xsbar given Xs = Xs_star +# arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? +# x_Sbar_mean.each_col() += mu_Sbar; +# +# // Transform the samples to be from N(O, Sigma_Sbar|S) +# arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); +# +# // Loop over the different test observations and combine the generated values with the values we conditioned on +# for (int idx_now = 0; idx_now < n_explain; idx_now++) { +# aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? +# aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); +# result_list.push_back(aux_mat); +# } +# } +# +# return result_list; +# } + + + # Old and new version --------------------------------------------------------------------------------------------- prepare_data_gaussian_old <- function(internal, index_features = NULL, ...) { x_train <- internal$data$x_train From b1baed155029c4976848a583a8b79c24e6fa78de Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 11:42:03 +0100 Subject: [PATCH 18/62] Made cube_v2 the main cpp version and removed others --- src/Gaussian.cpp | 716 ++++++----------------------------------------- 1 file changed, 80 insertions(+), 636 deletions(-) diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 63aabfd32..68ce8c782 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -1,640 +1,84 @@ #include -#include using namespace Rcpp; - //' Generate Gaussian MC samples - //' - //' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the - //' univariate standard normal. - //' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations - //' to explain. - //' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of - //' the used coalitions. - //' @param mu vector. Vector of length `n_features` containing the mean of each feature. - //' @param cov_mat mat. Matrix of dimension `n_features` times `n_features` containing the pariwise covariance between - //' all features. - //' - //' @export - //' @keywords internal - //' - //' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times - //' `n_features` containing the conditional MC samples for each coalition and explicand. - //' @author Lars Henry Berge Olsen - // [[Rcpp::export]] - Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - - // Pre-allocate result matrix - arma::mat ret(n_samples, n_features); - - // Create a list containing the MC samples for all coalitions and test observations - Rcpp::List result_list; - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - result_list.push_back(ret); - } - } - - return result_list; - } - - // [[Rcpp::export]] - Rcpp::List prepare_data_gaussian_cpp_with_wrap(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - - // Pre-allocate result matrix - arma::mat ret(n_samples, n_features); - - // Create a list containing the MC samples for all coalitions and test observations - Rcpp::List result_list; - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - ret.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - result_list.push_back(Rcpp::wrap(ret)); - } - } - - return result_list; - } - - // [[Rcpp::export]] - Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - - // Create a list containing the MC samples for all coalitions and test observations - Rcpp::List result_list; - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); - - // Loop over the different test observations and Combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - arma::mat ret(n_samples, n_features); - ret.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - ret.cols(Sbar_now_idx) = trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); - result_list.push_back(ret); - } - } - - return result_list; - } - - // [[Rcpp::export]] - arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - int n_coalitions = S.n_rows; - - // Pre-allocate result matrix - arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); - - // Create a list containing the MC samples for all coalitions and test observations - std::list result_list; - // Rcpp::List result_list; - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. - arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, - S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, - n_samples); - - return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - return_mat.submat(row_indices_now, Sbar_now_idx) = - MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - } - } - - return return_mat; - } - - // Diff in v2 is where we do the transpose - // [[Rcpp::export]] - arma::mat prepare_data_gaussian_cpp_fix_large_mat_v2(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - int n_coalitions = S.n_rows; - - // Pre-allocate result matrix - arma::mat return_mat(n_coalitions*n_explain*n_samples, n_features); - - // Create a list containing the MC samples for all coalitions and test observations - std::list result_list; - // Rcpp::List result_list; - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = trans(MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S)); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - // Maybe faster to create vector 0:(n_samples - 1) and then just add n_samples in each loop. - arma::uvec row_indices_now = arma::linspace(S_ind*n_explain*n_samples + idx_now*n_samples, - S_ind*n_explain*n_samples + idx_now*n_samples + n_samples - 1, - n_samples); - - return_mat.submat(row_indices_now, S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - return_mat.submat(row_indices_now, Sbar_now_idx) = - trans(MC_samples_mat_now + repmat(x_Sbar_mean.col(idx_now), 1, n_samples)); - } - } - - return return_mat; - } - - // [[Rcpp::export]] - arma::cube prepare_data_gaussian_cpp_fix_cube(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - int n_coalitions = S.n_rows; - - // Pre-allocate result matrix - arma::mat aux_mat(n_samples, n_features); - arma::cube result_cube(n_samples, n_features, n_explain*n_coalitions); - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - result_cube.slice(S_ind*n_explain + idx_now) = aux_mat; - } - } - - return result_cube; - } - - // [[Rcpp::export]] - arma::cube prepare_data_gaussian_cpp_fix_cube_v2(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - int n_coalitions = S.n_rows; - - // Pre-allocate result matrix - arma::mat aux_mat(n_samples, n_features); - arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - result_cube.col(S_ind*n_explain + idx_now) = aux_mat; - } - } - - return result_cube; - } - - // [[Rcpp::export]] - Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - - // Pre-allocate result matrix - arma::mat aux_mat(n_samples, n_features); - - // Create a list containing lists that contian the MC samples for all coalitions and test observations in each matrix - Rcpp::List result_list(S.n_rows); - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - - Rcpp::List result_list_now(n_explain); - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - result_list_now[idx_now] = aux_mat; - } - result_list[S_ind] = result_list_now; - } - - return result_list; - } - - // [[Rcpp::export]] - std::list prepare_data_gaussian_cpp_fix_std_list(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - - // Pre-allocate result matrix - arma::mat aux_mat(n_samples, n_features); - - // Create a list containing the MC samples for all coalitions and test observations - std::list result_list; - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < S.n_rows; S_ind++) { - - // TODO: REMOVE IN FINAL VERSION Small printout - Rcpp::Rcout << S_ind + 1 << ","; - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); // må finnes en bedre løsning her - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - - // Extract the mean values for the features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); // Can we speed it up by reducing the number of transposes? - x_Sbar_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_Sbar|S) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different test observations and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); // can using .fill() speed this up? - aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); - result_list.push_back(aux_mat); - } - } - - return result_list; - } +//' +//' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the +//' univariate standard normal. +//' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations +//' to explain. +//' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of +//' the used coalitions. +//' @param mu vector. Vector of length `n_features` containing the mean of each feature. +//' @param cov_mat matrix. Matrix of dimension `n_features` times `n_features` containing the pairwise covariance +//' between all pairs of features. +//' +//' @export +//' @keywords internal +//' +//' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times +//' `n_features` containing the conditional MC samples for each coalition and explicand. +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} From 4162364883bb8f115ea6c149a9a618023c9045b9 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 12:16:19 +0100 Subject: [PATCH 19/62] Updated `approach_gaussian()` to use Cpp code. Verified that we get the same as the old. --- R/approach_gaussian.R | 116 ++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 78 deletions(-) diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index 8191757a2..31d822af4 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -47,44 +47,48 @@ setup_approach.gaussian <- function(internal, #' @rdname prepare_data #' @export -prepare_data.gaussian <- function(internal, index_features = NULL, ...) { - x_train <- internal$data$x_train +#' @author Lars Henry Berge Olsen +prepare_data.gaussian <- function(internal, index_features, ...) { + + # Extract objects that we are going to use x_explain <- internal$data$x_explain + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + x_explain_mat <- as.matrix(internal$data$x_explain) n_explain <- internal$parameters$n_explain - gaussian.cov_mat <- internal$parameters$gaussian.cov_mat - n_samples <- internal$parameters$n_samples - gaussian.mu <- internal$parameters$gaussian.mu n_features <- internal$parameters$n_features + n_samples <- internal$parameters$n_samples + feature_names <- internal$parameters$feature_names + n_combinations <- internal$parameters$n_combinations + n_combinations_now <- length(index_features) + + # Update `S` with the relevant coalitions specified in `index_features` + S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use Cpp to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}) for all coalitions and explicands. + # The object `dt` is here a 3D array of dimension (n_samples, n_explain*n_coalitions, n_features). + dt <- prepare_data_gaussian_cpp(MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat) + + # Reshape `dt` to a 2D array of dimension (n_samples*n_explain*n_coalitions, n_features). + dim(dt) = c(n_combinations_now*n_explain*n_samples, n_features) + + # Convert to a data.table + dt = as.data.table(dt) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, "w" := 1 / n_samples] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] - X <- internal$objects$X - - x_explain0 <- as.matrix(x_explain) - dt_l <- list() - - if (is.null(index_features)) { - features <- X$features - } else { - features <- X$features[index_features] - } - - for (i in seq_len(n_explain)) { - l <- lapply( - X = features, - FUN = sample_gaussian, - n_samples = n_samples, - mu = gaussian.mu, - cov_mat = gaussian.cov_mat, - m = n_features, - x_explain = x_explain0[i, , drop = FALSE] - ) - - dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") - dt_l[[i]][, w := 1 / n_samples] - dt_l[[i]][, id := i] - if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] - } - - dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) return(dt) } @@ -111,47 +115,3 @@ get_cov_mat <- function(x_train, min_eigen_value = 1e-06) { get_mu_vec <- function(x_train) { unname(colMeans(x_train)) } - -#' Sample conditional Gaussian variables -#' -#' @inheritParams sample_copula -#' -#' @return data.table -#' -#' @keywords internal -#' -#' @author Martin Jullum -sample_gaussian <- function(index_given, n_samples, mu, cov_mat, m, x_explain) { - # Check input - stopifnot(is.matrix(x_explain)) - - # Handles the unconditional and full conditional separtely when predicting - cnms <- colnames(x_explain) - if (length(index_given) %in% c(0, m)) { - return(data.table::as.data.table(x_explain)) - } - - dependent_ind <- seq_along(mu)[-index_given] - x_explain_gaussian <- x_explain[index_given] - tmp <- condMVNorm::condMVN( - mean = mu, - sigma = cov_mat, - dependent.ind = dependent_ind, - given.ind = index_given, - X.given = x_explain_gaussian - ) - - # Makes the conditional covariance matrix symmetric in the rare case where numerical instability made it unsymmetric - if (!isSymmetric(tmp[["condVar"]])) { - tmp[["condVar"]] <- Matrix::symmpart(tmp$condVar) - } - - ret0 <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) - - ret <- matrix(NA, ncol = m, nrow = n_samples) - ret[, index_given] <- rep(x_explain_gaussian, each = n_samples) - ret[, dependent_ind] <- ret0 - - colnames(ret) <- cnms - return(as.data.table(ret)) -} From adf57e5099de827b78cadbd651bf6ca0626a6d97 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 12:17:05 +0100 Subject: [PATCH 20/62] Moved function to inst/compare_gaussian.R so it is still runable. --- inst/scripts/Compare_Gaussian.R | 49 ++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/Compare_Gaussian.R index 3a8d65db2..b9ca398aa 100644 --- a/inst/scripts/Compare_Gaussian.R +++ b/inst/scripts/Compare_Gaussian.R @@ -4,6 +4,53 @@ library(data.table) + +# Other functions ------------------------------------------------------------------------------------------------- +#' Sample conditional Gaussian variables +#' +#' @inheritParams sample_copula +#' +#' @return data.table +#' +#' @keywords internal +#' +#' @author Martin Jullum +sample_gaussian <- function(index_given, n_samples, mu, cov_mat, m, x_explain) { + # Check input + stopifnot(is.matrix(x_explain)) + + # Handles the unconditional and full conditional separtely when predicting + cnms <- colnames(x_explain) + if (length(index_given) %in% c(0, m)) { + return(data.table::as.data.table(x_explain)) + } + + dependent_ind <- seq_along(mu)[-index_given] + x_explain_gaussian <- x_explain[index_given] + tmp <- condMVNorm::condMVN( + mean = mu, + sigma = cov_mat, + dependent.ind = dependent_ind, + given.ind = index_given, + X.given = x_explain_gaussian + ) + + # Makes the conditional covariance matrix symmetric in the rare case where numerical instability made it unsymmetric + if (!isSymmetric(tmp[["condVar"]])) { + tmp[["condVar"]] <- Matrix::symmpart(tmp$condVar) + } + + ret0 <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) + + ret <- matrix(NA, ncol = m, nrow = n_samples) + ret[, index_given] <- rep(x_explain_gaussian, each = n_samples) + ret[, dependent_ind] <- ret0 + + colnames(ret) <- cnms + return(as.data.table(ret)) +} + + # Cpp functions --------------------------------------------------------------------------------------------------- # #include # #include @@ -673,7 +720,7 @@ prepare_data_gaussian_old <- function(internal, index_features = NULL, ...) { cat(sprintf("%d,", i)) l <- lapply( X = features, - FUN = shapr:::sample_gaussian, + FUN = sample_gaussian, #shapr:::sample_gaussian, n_samples = n_samples, mu = gaussian.mu, cov_mat = gaussian.cov_mat, From 74b1ae2f3f71c3dea8fb61519a857e8130707cde Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 15:10:55 +0100 Subject: [PATCH 21/62] Added `#' @inheritParams default_doc` --- R/approach_gaussian.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index 31d822af4..467cc418a 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -45,6 +45,7 @@ setup_approach.gaussian <- function(internal, return(internal) } +#' @inheritParams default_doc #' @rdname prepare_data #' @export #' @author Lars Henry Berge Olsen From 9590605af2f36ca814c65f4df447951163ca40c0 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 15:12:45 +0100 Subject: [PATCH 22/62] Changed order of parameters --- R/approach_gaussian.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index 467cc418a..9f7b7078a 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -52,17 +52,17 @@ setup_approach.gaussian <- function(internal, prepare_data.gaussian <- function(internal, index_features, ...) { # Extract objects that we are going to use - x_explain <- internal$data$x_explain - S <- internal$objects$S - mu <- internal$parameters$gaussian.mu - cov_mat <- internal$parameters$gaussian.cov_mat - x_explain_mat <- as.matrix(internal$data$x_explain) n_explain <- internal$parameters$n_explain n_features <- internal$parameters$n_features n_samples <- internal$parameters$n_samples - feature_names <- internal$parameters$feature_names n_combinations <- internal$parameters$n_combinations n_combinations_now <- length(index_features) + S <- internal$objects$S + mu <- internal$parameters$gaussian.mu + cov_mat <- internal$parameters$gaussian.cov_mat + feature_names <- internal$parameters$feature_names + x_explain <- internal$data$x_explain + x_explain_mat <- as.matrix(internal$data$x_explain) # Update `S` with the relevant coalitions specified in `index_features` S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] From df9777d5696a6d766c4d19f46ec473688042811d Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 15:15:01 +0100 Subject: [PATCH 23/62] Removed `if (!is.null(index_features))` in `approach_gaussian()`. --- R/approach_gaussian.R | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index 9f7b7078a..0c0e91bfd 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -51,21 +51,18 @@ setup_approach.gaussian <- function(internal, #' @author Lars Henry Berge Olsen prepare_data.gaussian <- function(internal, index_features, ...) { - # Extract objects that we are going to use + # Extract used variables n_explain <- internal$parameters$n_explain n_features <- internal$parameters$n_features n_samples <- internal$parameters$n_samples n_combinations <- internal$parameters$n_combinations n_combinations_now <- length(index_features) - S <- internal$objects$S mu <- internal$parameters$gaussian.mu cov_mat <- internal$parameters$gaussian.cov_mat feature_names <- internal$parameters$feature_names x_explain <- internal$data$x_explain x_explain_mat <- as.matrix(internal$data$x_explain) - - # Update `S` with the relevant coalitions specified in `index_features` - S <- if (!is.null(index_features)) S[index_features, , drop = FALSE] + S <- internal$objects$S[index_features, , drop = FALSE] # Generate the MC samples from N(0, 1) MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) @@ -88,7 +85,7 @@ prepare_data.gaussian <- function(internal, index_features, ...) { dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] dt[, "w" := 1 / n_samples] data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - if (!is.null(index_features)) dt[, id_combination := index_features[id_combination]] + dt[, id_combination := index_features[id_combination]] return(dt) } From 11819e8d88fc890eb65b44d3125d9e3393eb08e7 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 5 Jan 2024 17:16:26 +0100 Subject: [PATCH 24/62] Need to clean code. Push in case something happens with my computer. --- R/approach_copula.R | 110 ++++++++++++++++++++++-- src/Copula.cpp | 198 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+), 7 deletions(-) create mode 100644 src/Copula.cpp diff --git a/R/approach_copula.R b/R/approach_copula.R index 403d88809..32087a010 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -49,20 +49,93 @@ setup_approach.copula <- function(internal, ...) { return(internal) } + + #' @inheritParams default_doc #' @rdname prepare_data #' @export -prepare_data.copula <- function(internal, index_features = NULL, ...) { +#' @author Lars Henry Berge Olsen +prepare_data.copula2 <- function(internal, index_features, ...) { + + # Extract used variables + X <- internal$objects$X x_train <- internal$data$x_train + x_train_mat <- as.matrix(internal$data$x_train) x_explain <- internal$data$x_explain + x_explain_mat <- as.matrix(internal$data$x_explain) n_explain <- internal$parameters$n_explain - copula.cov_mat <- internal$parameters$copula.cov_mat n_samples <- internal$parameters$n_samples - copula.mu <- internal$parameters$copula.mu + n_samples = 1000000 n_features <- internal$parameters$n_features + n_combinations <- internal$parameters$n_combinations + n_combinations_now <- length(index_features) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) # CAN SKIP as.matrix as it is a matrix allready + feature_names <- internal$parameters$feature_names - copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian + + + + + + + + + S <- internal$objects$S[index_features, , drop = FALSE] + + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use Cpp to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}) for all coalitions and explicands. + # The object `dt` is here a 3D array of dimension (n_samples, n_explain*n_coalitions, n_features). + # INCLUDE THE TRANSFORMATION + + + dt <- prepare_data_copula_cpp(MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat) + + # Reshape `dt` to a 2D array of dimension (n_samples*n_explain*n_coalitions, n_features). + dim(dt) = c(n_combinations_now*n_explain*n_samples, n_features) + + # Convert to a data.table + dt = as.data.table(dt) + setnames(dt, feature_names) + dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, "w" := 1 / n_samples] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + dt[, id_combination := index_features[id_combination]] + dt + + dt[id_combination == 9 & id == 1,] + + dt_agr = dt[, lapply(.SD, mean), by = list(id, id_combination)] + data.table::setorderv(dt_agr, c("id", "id_combination")) + dt_agr + + return(dt) +} + +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +prepare_data.copula <- function(internal, index_features = NULL, ...) { X <- internal$objects$X + x_train <- internal$data$x_train + x_explain <- internal$data$x_explain + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian x_explain0 <- as.matrix(x_explain) @@ -74,7 +147,9 @@ prepare_data.copula <- function(internal, index_features = NULL, ...) { } + n_samples = 1000000 for (i in seq_len(n_explain)) { + print(i) l <- lapply( X = features, FUN = sample_copula, @@ -86,6 +161,9 @@ prepare_data.copula <- function(internal, index_features = NULL, ...) { x_train = as.matrix(x_train), x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE] ) + # x_explain_gaussian2 = x_explain_gaussian + # x_train2 = x_train + # x_explain2 = x_explain dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") dt_l[[i]][, w := 1 / n_samples] @@ -93,7 +171,15 @@ prepare_data.copula <- function(internal, index_features = NULL, ...) { if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] } dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) + dt + dt9 = dt + dt9_agr = dt9[, lapply(.SD, mean), by = list(id, id_combination)] + dt9_agr - dt_agr return(dt) + + + + } #' Sample conditional variables using the Gaussian copula approach @@ -119,6 +205,7 @@ sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaus } else { dependent_ind <- (seq_len(length(mu)))[-index_given] + # Dette har jeg kode til tmp <- condMVNorm::condMVN( mean = mu, sigma = cov_mat, @@ -127,15 +214,24 @@ sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaus X.given = x_explain_gaussian[index_given] ) + # Dette har jeg kode til. Bruker cholensky + simulert data fra før ret0_z <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) + # Dette må jeg skrive selv ret0_x <- apply( X = rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), MARGIN = 2, FUN = inv_gaussian_transform, - n_z = n_samples + n_z = n_samples, + type = 5 ) + ret0_x_cpp = inv_gaussian_transform_cpp(z = ret0_z, x = x_train[, dependent_ind, drop = FALSE]) + + ret0_x_cpp = inv_gaussian_transform_cpp_armamat(rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), n_samples = n_samples) + colnames(ret0_x_cpp) = feature_names[dependent_ind] + all.equal(ret0_x, ret0_x_cpp) + # Dette har jeg kode til ret <- matrix(NA, ncol = m, nrow = n_samples) ret[, index_given] <- rep(x_explain[index_given], each = n_samples) ret[, dependent_ind] <- ret0_x @@ -156,13 +252,13 @@ sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaus #' @keywords internal #' #' @author Martin Jullum -inv_gaussian_transform <- function(zx, n_z) { +inv_gaussian_transform <- function(zx, n_z, type) { if (n_z >= length(zx)) stop("n_z should be less than length(zx)") ind <- 1:n_z z <- zx[ind] x <- zx[-ind] u <- stats::pnorm(z) - x_new <- stats::quantile(x, probs = u) + x_new <- stats::quantile(x, probs = u, type = type) return(as.double(x_new)) } diff --git a/src/Copula.cpp b/src/Copula.cpp new file mode 100644 index 000000000..5e7997922 --- /dev/null +++ b/src/Copula.cpp @@ -0,0 +1,198 @@ +#include +#include + +// [[Rcpp::depends(RcppArmadillo)]] + +//' Transforms new data to a standardized normal distribution + //' + //' @param zx Numeric vector. The first `n_samples` items are the Gaussian data, and the last part is + //' the data with the original transformation. + //' @param n_samples Positive integer. Number of elements of `zx` that belongs to new data. + //' + //' @return Numeric matrix of length `n_samples` + //' + //' @keywords internal + //' + //' @author Lars Henry Berge Olsen + // [[Rcpp::export]] + Rcpp::NumericVector inv_gaussian_transform_cpp_Rcpp(const Rcpp::NumericVector& zx, const int n_samples) { + + // Extract z and x + Rcpp::NumericVector z = zx[Rcpp::Range(0, n_samples - 1)]; + Rcpp::NumericVector x = zx[Rcpp::Range(n_samples, zx.size() - 1)]; + + // Calculate u + Rcpp::NumericVector u = Rcpp::pnorm(z); + + // Calculate x_new using Armadillo's quantile function + arma::vec x_arma = Rcpp::as(x); + arma::vec u_arma = Rcpp::as(u); + + arma::vec x_new_arma = arma::quantile(x_arma, u_arma); + + // Convert back to Rcpp::NumericMatrix + Rcpp::NumericVector x_new = Rcpp::wrap(x_new_arma); + + return x_new; + } + + +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp_mat(Rcpp::NumericMatrix zx, const int n_samples) { + + int n_features = zx.ncol(); + + // Extract z and x + Rcpp::NumericMatrix z = zx(Rcpp::Range(0, n_samples - 1), Rcpp::_ ); + Rcpp::NumericMatrix x = zx(Rcpp::Range(n_samples, zx.nrow() - 1), Rcpp::_ ); + + // Rcpp::NumericMatrix u = Rcpp::pnorm(z); + + // Convert Rcpp::NumericMatrix to arma::mat + arma::mat z_arma = Rcpp::as(z); + arma::mat x_arma = Rcpp::as(x); + + // Calculate u + arma::mat u_arma = arma::normcdf(z_arma); + + // Calculate x_new using Armadillo's quantile function + arma::mat x_new_arma(n_samples, n_features); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + x_new_arma.col(feature_idx) = arma::quantile(x_arma.col(feature_idx), u_arma.col(feature_idx)); + } + + // // Convert back to Rcpp::NumericVector + // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); + + return x_new_arma; +} + +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp_armamat(arma::mat zx, const int n_samples) { + + int n_features = zx.n_cols; + +// WHAT IS THE POINT TO FIRST ADD THEM TOGETHER AND THEN SPLIT THEM? + // Extract z and x + arma::mat z = zx.rows(0, n_samples - 1); + arma::mat x = zx.rows(n_samples, zx.n_rows - 1); + + // Calculate u + arma::mat u = arma::normcdf(z); + + // Calculate x_new using Armadillo's quantile function + arma::mat x_new(n_samples, n_features); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + x_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); + } + + // // Convert back to Rcpp::NumericVector + // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); + + return x_new; +} + + + +//' Transforms new data to a standardized normal distribution +//' +//' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. +//' +//' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +//' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +//' +//' @return arma::mat of same dimension as `z` +//' +//' @keywords internal +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { + int n_features = z.n_cols; + int n_samples = z.n_rows; + arma::mat u = arma::normcdf(z); + arma::mat z_new(n_samples, n_features); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + z_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); + } + return z_new; +} + + + +// [[Rcpp::export]] +arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // // Does that we do not conditioning on + // arma::mat x_Sbar_star = x_train_mat.cols(Sbar_now_idx); + + // Extract the mean values for the features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + std::cout << mu_S << std::endl; + + // Extract the relevant parts of the covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star + arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_Sbar|S) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different test observations and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + arma::mat MC_samples_mat_now_now = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} From 92ccff3d670d6ac4e8e75b606a7fd8dc93cf5a19 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sat, 6 Jan 2024 16:22:39 +0100 Subject: [PATCH 25/62] approach_gaussian: removed not used variables + stylr + added `data.table::` --- R/approach_gaussian.R | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index 0c0e91bfd..b425d8f5a 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -50,42 +50,41 @@ setup_approach.gaussian <- function(internal, #' @export #' @author Lars Henry Berge Olsen prepare_data.gaussian <- function(internal, index_features, ...) { - # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names n_explain <- internal$parameters$n_explain n_features <- internal$parameters$n_features n_samples <- internal$parameters$n_samples - n_combinations <- internal$parameters$n_combinations n_combinations_now <- length(index_features) + x_explain_mat <- as.matrix(internal$data$x_explain) mu <- internal$parameters$gaussian.mu cov_mat <- internal$parameters$gaussian.cov_mat - feature_names <- internal$parameters$feature_names - x_explain <- internal$data$x_explain - x_explain_mat <- as.matrix(internal$data$x_explain) - S <- internal$objects$S[index_features, , drop = FALSE] # Generate the MC samples from N(0, 1) MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Use Cpp to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}) for all coalitions and explicands. - # The object `dt` is here a 3D array of dimension (n_samples, n_explain*n_coalitions, n_features). - dt <- prepare_data_gaussian_cpp(MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - S = S, - mu = mu, - cov_mat = cov_mat) + # The object `dt` is a 3D array of dimension (n_samples, n_explain*n_coalitions, n_features). + dt <- prepare_data_gaussian_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + S = S, + mu = mu, + cov_mat = cov_mat + ) # Reshape `dt` to a 2D array of dimension (n_samples*n_explain*n_coalitions, n_features). - dim(dt) = c(n_combinations_now*n_explain*n_samples, n_features) + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) - # Convert to a data.table - dt = as.data.table(dt) - setnames(dt, feature_names) + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] dt[, "w" := 1 / n_samples] - data.table::setcolorder(dt, c("id_combination", "id", feature_names)) dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) return(dt) } From ed41f9180eca8b4ab0793075986d37a087d59f90 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sat, 6 Jan 2024 16:24:56 +0100 Subject: [PATCH 26/62] Removed quotation marks in dt --- R/approach_gaussian.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index b425d8f5a..dcd9454e2 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -80,9 +80,9 @@ prepare_data.gaussian <- function(internal, index_features, ...) { # Convert to a data.table and add extra identification columns dt <- data.table::as.data.table(dt) data.table::setnames(dt, feature_names) - dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] - dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] - dt[, "w" := 1 / n_samples] + dt[, id_combination := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] dt[, id_combination := index_features[id_combination]] data.table::setcolorder(dt, c("id_combination", "id", feature_names)) From 8833bb7b8f3d228c9272ae62c1da26e856805584 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sat, 6 Jan 2024 16:33:03 +0100 Subject: [PATCH 27/62] Start to clean up copula.cpp --- src/Copula.cpp | 185 +++++++++++++++++++++++++------------------------ 1 file changed, 94 insertions(+), 91 deletions(-) diff --git a/src/Copula.cpp b/src/Copula.cpp index 5e7997922..1cedc135f 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -3,97 +3,6 @@ // [[Rcpp::depends(RcppArmadillo)]] -//' Transforms new data to a standardized normal distribution - //' - //' @param zx Numeric vector. The first `n_samples` items are the Gaussian data, and the last part is - //' the data with the original transformation. - //' @param n_samples Positive integer. Number of elements of `zx` that belongs to new data. - //' - //' @return Numeric matrix of length `n_samples` - //' - //' @keywords internal - //' - //' @author Lars Henry Berge Olsen - // [[Rcpp::export]] - Rcpp::NumericVector inv_gaussian_transform_cpp_Rcpp(const Rcpp::NumericVector& zx, const int n_samples) { - - // Extract z and x - Rcpp::NumericVector z = zx[Rcpp::Range(0, n_samples - 1)]; - Rcpp::NumericVector x = zx[Rcpp::Range(n_samples, zx.size() - 1)]; - - // Calculate u - Rcpp::NumericVector u = Rcpp::pnorm(z); - - // Calculate x_new using Armadillo's quantile function - arma::vec x_arma = Rcpp::as(x); - arma::vec u_arma = Rcpp::as(u); - - arma::vec x_new_arma = arma::quantile(x_arma, u_arma); - - // Convert back to Rcpp::NumericMatrix - Rcpp::NumericVector x_new = Rcpp::wrap(x_new_arma); - - return x_new; - } - - -// [[Rcpp::export]] -arma::mat inv_gaussian_transform_cpp_mat(Rcpp::NumericMatrix zx, const int n_samples) { - - int n_features = zx.ncol(); - - // Extract z and x - Rcpp::NumericMatrix z = zx(Rcpp::Range(0, n_samples - 1), Rcpp::_ ); - Rcpp::NumericMatrix x = zx(Rcpp::Range(n_samples, zx.nrow() - 1), Rcpp::_ ); - - // Rcpp::NumericMatrix u = Rcpp::pnorm(z); - - // Convert Rcpp::NumericMatrix to arma::mat - arma::mat z_arma = Rcpp::as(z); - arma::mat x_arma = Rcpp::as(x); - - // Calculate u - arma::mat u_arma = arma::normcdf(z_arma); - - // Calculate x_new using Armadillo's quantile function - arma::mat x_new_arma(n_samples, n_features); - for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { - x_new_arma.col(feature_idx) = arma::quantile(x_arma.col(feature_idx), u_arma.col(feature_idx)); - } - - // // Convert back to Rcpp::NumericVector - // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); - - return x_new_arma; -} - -// [[Rcpp::export]] -arma::mat inv_gaussian_transform_cpp_armamat(arma::mat zx, const int n_samples) { - - int n_features = zx.n_cols; - -// WHAT IS THE POINT TO FIRST ADD THEM TOGETHER AND THEN SPLIT THEM? - // Extract z and x - arma::mat z = zx.rows(0, n_samples - 1); - arma::mat x = zx.rows(n_samples, zx.n_rows - 1); - - // Calculate u - arma::mat u = arma::normcdf(z); - - // Calculate x_new using Armadillo's quantile function - arma::mat x_new(n_samples, n_features); - for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { - x_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); - } - - // // Convert back to Rcpp::NumericVector - // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); - - return x_new; -} - - - //' Transforms new data to a standardized normal distribution //' //' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. @@ -196,3 +105,97 @@ arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, return result_cube; } + + + + +//' Transforms new data to a standardized normal distribution + //' + //' @param zx Numeric vector. The first `n_samples` items are the Gaussian data, and the last part is + //' the data with the original transformation. + //' @param n_samples Positive integer. Number of elements of `zx` that belongs to new data. + //' + //' @return Numeric matrix of length `n_samples` + //' + //' @keywords internal + //' + //' @author Lars Henry Berge Olsen + // [[Rcpp::export]] + Rcpp::NumericVector inv_gaussian_transform_cpp_Rcpp(const Rcpp::NumericVector& zx, const int n_samples) { + + // Extract z and x + Rcpp::NumericVector z = zx[Rcpp::Range(0, n_samples - 1)]; + Rcpp::NumericVector x = zx[Rcpp::Range(n_samples, zx.size() - 1)]; + + // Calculate u + Rcpp::NumericVector u = Rcpp::pnorm(z); + + // Calculate x_new using Armadillo's quantile function + arma::vec x_arma = Rcpp::as(x); + arma::vec u_arma = Rcpp::as(u); + + arma::vec x_new_arma = arma::quantile(x_arma, u_arma); + + // Convert back to Rcpp::NumericMatrix + Rcpp::NumericVector x_new = Rcpp::wrap(x_new_arma); + + return x_new; + } + + +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp_mat(Rcpp::NumericMatrix zx, const int n_samples) { + + int n_features = zx.ncol(); + + // Extract z and x + Rcpp::NumericMatrix z = zx(Rcpp::Range(0, n_samples - 1), Rcpp::_ ); + Rcpp::NumericMatrix x = zx(Rcpp::Range(n_samples, zx.nrow() - 1), Rcpp::_ ); + + // Rcpp::NumericMatrix u = Rcpp::pnorm(z); + + // Convert Rcpp::NumericMatrix to arma::mat + arma::mat z_arma = Rcpp::as(z); + arma::mat x_arma = Rcpp::as(x); + + // Calculate u + arma::mat u_arma = arma::normcdf(z_arma); + + // Calculate x_new using Armadillo's quantile function + arma::mat x_new_arma(n_samples, n_features); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + x_new_arma.col(feature_idx) = arma::quantile(x_arma.col(feature_idx), u_arma.col(feature_idx)); + } + + // // Convert back to Rcpp::NumericVector + // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); + + return x_new_arma; +} + +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp_armamat(arma::mat zx, const int n_samples) { + + int n_features = zx.n_cols; + + // WHAT IS THE POINT TO FIRST ADD THEM TOGETHER AND THEN SPLIT THEM? + // Extract z and x + arma::mat z = zx.rows(0, n_samples - 1); + arma::mat x = zx.rows(n_samples, zx.n_rows - 1); + + // Calculate u + arma::mat u = arma::normcdf(z); + + // Calculate x_new using Armadillo's quantile function + arma::mat x_new(n_samples, n_features); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + x_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); + } + + // // Convert back to Rcpp::NumericVector + // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); + + return x_new; +} + + From 5a20f69313392e44a1efcebf3f43f882398eabb0 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sat, 6 Jan 2024 16:33:39 +0100 Subject: [PATCH 28/62] deleted not used functions in copula.cpp --- src/Copula.cpp | 94 -------------------------------------------------- 1 file changed, 94 deletions(-) diff --git a/src/Copula.cpp b/src/Copula.cpp index 1cedc135f..27ce4fe9c 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -105,97 +105,3 @@ arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, return result_cube; } - - - - -//' Transforms new data to a standardized normal distribution - //' - //' @param zx Numeric vector. The first `n_samples` items are the Gaussian data, and the last part is - //' the data with the original transformation. - //' @param n_samples Positive integer. Number of elements of `zx` that belongs to new data. - //' - //' @return Numeric matrix of length `n_samples` - //' - //' @keywords internal - //' - //' @author Lars Henry Berge Olsen - // [[Rcpp::export]] - Rcpp::NumericVector inv_gaussian_transform_cpp_Rcpp(const Rcpp::NumericVector& zx, const int n_samples) { - - // Extract z and x - Rcpp::NumericVector z = zx[Rcpp::Range(0, n_samples - 1)]; - Rcpp::NumericVector x = zx[Rcpp::Range(n_samples, zx.size() - 1)]; - - // Calculate u - Rcpp::NumericVector u = Rcpp::pnorm(z); - - // Calculate x_new using Armadillo's quantile function - arma::vec x_arma = Rcpp::as(x); - arma::vec u_arma = Rcpp::as(u); - - arma::vec x_new_arma = arma::quantile(x_arma, u_arma); - - // Convert back to Rcpp::NumericMatrix - Rcpp::NumericVector x_new = Rcpp::wrap(x_new_arma); - - return x_new; - } - - -// [[Rcpp::export]] -arma::mat inv_gaussian_transform_cpp_mat(Rcpp::NumericMatrix zx, const int n_samples) { - - int n_features = zx.ncol(); - - // Extract z and x - Rcpp::NumericMatrix z = zx(Rcpp::Range(0, n_samples - 1), Rcpp::_ ); - Rcpp::NumericMatrix x = zx(Rcpp::Range(n_samples, zx.nrow() - 1), Rcpp::_ ); - - // Rcpp::NumericMatrix u = Rcpp::pnorm(z); - - // Convert Rcpp::NumericMatrix to arma::mat - arma::mat z_arma = Rcpp::as(z); - arma::mat x_arma = Rcpp::as(x); - - // Calculate u - arma::mat u_arma = arma::normcdf(z_arma); - - // Calculate x_new using Armadillo's quantile function - arma::mat x_new_arma(n_samples, n_features); - for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { - x_new_arma.col(feature_idx) = arma::quantile(x_arma.col(feature_idx), u_arma.col(feature_idx)); - } - - // // Convert back to Rcpp::NumericVector - // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); - - return x_new_arma; -} - -// [[Rcpp::export]] -arma::mat inv_gaussian_transform_cpp_armamat(arma::mat zx, const int n_samples) { - - int n_features = zx.n_cols; - - // WHAT IS THE POINT TO FIRST ADD THEM TOGETHER AND THEN SPLIT THEM? - // Extract z and x - arma::mat z = zx.rows(0, n_samples - 1); - arma::mat x = zx.rows(n_samples, zx.n_rows - 1); - - // Calculate u - arma::mat u = arma::normcdf(z); - - // Calculate x_new using Armadillo's quantile function - arma::mat x_new(n_samples, n_features); - for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { - x_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); - } - - // // Convert back to Rcpp::NumericVector - // Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma); - - return x_new; -} - - From 63a911f559c515e144fde3c6f40e33eb088acc5e Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 13:01:59 +0100 Subject: [PATCH 29/62] Roxygen gaussian --- src/Gaussian.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 68ce8c782..c92d03b20 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -3,21 +3,22 @@ using namespace Rcpp; //' Generate Gaussian MC samples //' -//' @param MC_samples_mat matrix. Matrix of dimension `n_samples` times `n_features` containing samples from the +//' @param MC_samples_mat arma::mat. Matrix of dimension `n_samples` times `n_features` containing samples from the //' univariate standard normal. -//' @param x_explain_mat matrix. Matrix of dimension `n_explain` times `n_features` containing the observations +//' @param x_explain_mat arma::mat. Matrix of dimension `n_explain` times `n_features` containing the observations //' to explain. -//' @param S matrix. Matrix of dimension `n_combinations` times `n_features` containing binary representations of +//' @param S arma::mat. Matrix of dimension `n_combinations` times `n_features` containing binary representations of //' the used coalitions. -//' @param mu vector. Vector of length `n_features` containing the mean of each feature. -//' @param cov_mat matrix. Matrix of dimension `n_features` times `n_features` containing the pairwise covariance +//' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature. +//' @param cov_mat arma::mat. Matrix of dimension `n_features` times `n_features` containing the pairwise covariance //' between all pairs of features. //' +//' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +//' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +//' MC samples for each explicand and coalition. +//' //' @export //' @keywords internal -//' -//' @return List of length `n_combinations`*`n_samples`, where each entry is a matrix of dimension `n_samples` times -//' `n_features` containing the conditional MC samples for each coalition and explicand. //' @author Lars Henry Berge Olsen // [[Rcpp::export]] arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, From 24ec723ef82dda9c6d0624454e7deb22d0808cd8 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 13:23:45 +0100 Subject: [PATCH 30/62] Roxygen --- R/approach_gaussian.R | 4 ++-- src/Gaussian.cpp | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index dcd9454e2..429fbbc01 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -65,7 +65,7 @@ prepare_data.gaussian <- function(internal, index_features, ...) { MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) # Use Cpp to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}) for all coalitions and explicands. - # The object `dt` is a 3D array of dimension (n_samples, n_explain*n_coalitions, n_features). + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). dt <- prepare_data_gaussian_cpp( MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, @@ -74,7 +74,7 @@ prepare_data.gaussian <- function(internal, index_features, ...) { cov_mat = cov_mat ) - # Reshape `dt` to a 2D array of dimension (n_samples*n_explain*n_coalitions, n_features). + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) # Convert to a data.table and add extra identification columns diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index c92d03b20..a054aa2ab 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -3,14 +3,14 @@ using namespace Rcpp; //' Generate Gaussian MC samples //' -//' @param MC_samples_mat arma::mat. Matrix of dimension `n_samples` times `n_features` containing samples from the +//' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the //' univariate standard normal. -//' @param x_explain_mat arma::mat. Matrix of dimension `n_explain` times `n_features` containing the observations +//' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations //' to explain. -//' @param S arma::mat. Matrix of dimension `n_combinations` times `n_features` containing binary representations of +//' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of //' the used coalitions. //' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature. -//' @param cov_mat arma::mat. Matrix of dimension `n_features` times `n_features` containing the pairwise covariance +//' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance //' between all pairs of features. //' //' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where @@ -47,7 +47,7 @@ arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, // Extract the features we condition on arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - // Extract the mean values for the features in the two sets + // Extract the mean values of the features in the two sets arma::vec mu_S = mu.elem(S_now_idx); arma::vec mu_Sbar = mu.elem(Sbar_now_idx); @@ -70,10 +70,10 @@ arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star.each_row() - mu_S.t()).t(); x_Sbar_mean.each_col() += mu_Sbar; - // Transform the samples to be from N(O, Sigma_Sbar|S) + // Transform the samples to be from N(O, Sigma_{Sbar|S}) arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // Loop over the different test observations and combine the generated values with the values we conditioned on + // Loop over the different explicands and combine the generated values with the values we conditioned on for (int idx_now = 0; idx_now < n_explain; idx_now++) { aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); From b7ece67492682e799113034bf617b94464fa15b4 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 13:45:52 +0100 Subject: [PATCH 31/62] Cleaned up Copula.cpp --- src/Copula.cpp | 62 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/Copula.cpp b/src/Copula.cpp index 27ce4fe9c..b1e1aba39 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -1,6 +1,5 @@ #include #include - // [[Rcpp::depends(RcppArmadillo)]] //' Transforms new data to a standardized normal distribution @@ -18,16 +17,40 @@ arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { int n_features = z.n_cols; int n_samples = z.n_rows; - arma::mat u = arma::normcdf(z); arma::mat z_new(n_samples, n_features); + arma::mat u = arma::normcdf(z); for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { z_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); } return z_new; } - - +//' Generate (Gaussian) Copula MC samples +//' +//' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +//' univariate standard normal. +//' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +//' to explain on the original scale. +//' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +//' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +//' transformed to a standardized normal distribution. +//' +//' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +//' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +//' the used coalitions. +//' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +//' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +//' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +//' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +//' transformed to a standardized normal distribution. +//' +//' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +//' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +//' copula MC samples for each explicand and coalition on the original scale. +//' +//' @export +//' @keywords internal +//' @author Lars Henry Berge Olsen // [[Rcpp::export]] arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, @@ -54,20 +77,15 @@ arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, arma::uvec S_now_idx = arma::find(S_now > 0.5); arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - // Extract the features we condition on + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. arma::mat x_S_star = x_explain_mat.cols(S_now_idx); arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); - // // Does that we do not conditioning on - // arma::mat x_Sbar_star = x_train_mat.cols(Sbar_now_idx); - - // Extract the mean values for the features in the two sets + // Extract the mean values of the Gaussian transformed features in the two sets arma::vec mu_S = mu.elem(S_now_idx); arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - std::cout << mu_S << std::endl; - - // Extract the relevant parts of the covariance matrix + // Extract the relevant parts of the Gaussian transformed covariance matrix arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); @@ -82,23 +100,29 @@ arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); } - // Compute the conditional mean of Xsbar given Xs = Xs_star - arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); - x_Sbar_mean.each_col() += mu_Sbar; + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; - // Transform the samples to be from N(O, Sigma_Sbar|S) + // Transform the samples to be from N(O, Sigma_{Sbar|S}) arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - // Loop over the different test observations and combine the generated values with the values we conditioned on + // Loop over the different explicands and combine the generated values with the values we conditioned on for (int idx_now = 0; idx_now < n_explain; idx_now++) { - arma::mat MC_samples_mat_now_now = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1); + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform arma::mat MC_samples_mat_now_now_trans = inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); - aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + // Insert the auxiliary matrix into the result cube result_cube.col(S_ind*n_explain + idx_now) = aux_mat; } } From bc3df32874b2fe08ee2b480a0c5319c18a9caff2 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 13:48:05 +0100 Subject: [PATCH 32/62] Updated approach_copula.R --- R/approach_copula.R | 223 ++++++++++++++++++++------------------------ 1 file changed, 102 insertions(+), 121 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 32087a010..8b8ff8a5a 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -1,8 +1,7 @@ #' @rdname setup_approach -#' #' @inheritParams default_doc_explain -#' #' @export +#' @author Martin Jullum setup_approach.copula <- function(internal, ...) { parameters <- internal$parameters x_train <- internal$data$x_train @@ -31,7 +30,6 @@ setup_approach.copula <- function(internal, ...) { ) parameters$copula.cov_mat <- get_cov_mat(x_train0) - x_explain_gaussian <- apply( X = rbind(x_explain, x_train), MARGIN = 2, @@ -49,84 +47,104 @@ setup_approach.copula <- function(internal, ...) { return(internal) } - - #' @inheritParams default_doc #' @rdname prepare_data #' @export #' @author Lars Henry Berge Olsen -prepare_data.copula2 <- function(internal, index_features, ...) { - +prepare_data.copula <- function(internal, index_features, ...) { # Extract used variables - X <- internal$objects$X - x_train <- internal$data$x_train - x_train_mat <- as.matrix(internal$data$x_train) - x_explain <- internal$data$x_explain - x_explain_mat <- as.matrix(internal$data$x_explain) + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names n_explain <- internal$parameters$n_explain n_samples <- internal$parameters$n_samples - n_samples = 1000000 n_features <- internal$parameters$n_features - n_combinations <- internal$parameters$n_combinations n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) copula.mu <- internal$parameters$copula.mu copula.cov_mat <- internal$parameters$copula.cov_mat - copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) # CAN SKIP as.matrix as it is a matrix allready - feature_names <- internal$parameters$feature_names - - - - - - + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix + # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include + # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) - S <- internal$objects$S[index_features, , drop = FALSE] + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) - # Generate the MC samples from N(0, 1) - MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - # Use Cpp to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}) for all coalitions and explicands. - # The object `dt` is here a 3D array of dimension (n_samples, n_explain*n_coalitions, n_features). - # INCLUDE THE TRANSFORMATION + return(dt) +} +#' Transforms new data to standardized normal (dimension 1) based on other data transformations +#' +#' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last +#' part is the data with the original transformation. +#' @param n_y Positive integer. Number of elements of `yx` that belongs to the Gaussian data. +#' +#' @return Vector of back-transformed Gaussian data +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform_separate <- function(yx, n_y) { + if (n_y >= length(yx)) stop("n_y should be less than length(yx)") + ind <- 1:n_y + x <- yx[-ind] + tmp <- rank(yx)[ind] + tmp <- tmp - rank(tmp) + 0.5 + u_y <- tmp / (length(x) + 1) + z_y <- stats::qnorm(u_y) + return(z_y) +} - dt <- prepare_data_copula_cpp(MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - x_explain_gaussian_mat = copula.x_explain_gaussian_mat, - x_train_mat = x_train_mat, - S = S, - mu = copula.mu, - cov_mat = copula.cov_mat) +#' Transforms a sample to standardized normal distribution +#' +#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. +#' +#' @return Numeric vector of length `length(x)` +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform <- function(x) { + u <- rank(x) / (length(x) + 1) + z <- stats::qnorm(u) + return(z) +} - # Reshape `dt` to a 2D array of dimension (n_samples*n_explain*n_coalitions, n_features). - dim(dt) = c(n_combinations_now*n_explain*n_samples, n_features) - # Convert to a data.table - dt = as.data.table(dt) - setnames(dt, feature_names) - dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)] - dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))] - dt[, "w" := 1 / n_samples] - data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - dt[, id_combination := index_features[id_combination]] - dt - dt[id_combination == 9 & id == 1,] +# TRASH BELOW ----------------------------------------------------------------------------------------------------- - dt_agr = dt[, lapply(.SD, mean), by = list(id, id_combination)] - data.table::setorderv(dt_agr, c("id", "id_combination")) - dt_agr - return(dt) -} #' @inheritParams default_doc #' @rdname prepare_data #' @export -prepare_data.copula <- function(internal, index_features = NULL, ...) { +prepare_data.copula_old <- function(internal, index_features = NULL, ...) { X <- internal$objects$X x_train <- internal$data$x_train x_explain <- internal$data$x_explain @@ -147,39 +165,37 @@ prepare_data.copula <- function(internal, index_features = NULL, ...) { } - n_samples = 1000000 - for (i in seq_len(n_explain)) { - print(i) - l <- lapply( - X = features, - FUN = sample_copula, - n_samples = n_samples, - mu = copula.mu, - cov_mat = copula.cov_mat, - m = n_features, - x_explain = x_explain0[i, , drop = FALSE], - x_train = as.matrix(x_train), - x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE] - ) - # x_explain_gaussian2 = x_explain_gaussian - # x_train2 = x_train - # x_explain2 = x_explain - - dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") - dt_l[[i]][, w := 1 / n_samples] - dt_l[[i]][, id := i] - if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] - } + n_samples <- 5000000 + system.time({ + for (i in seq_len(n_explain)) { + print(i) + l <- lapply( + X = features, + FUN = sample_copula, + n_samples = n_samples, + mu = copula.mu, + cov_mat = copula.cov_mat, + m = n_features, + x_explain = x_explain0[i, , drop = FALSE], + x_train = as.matrix(x_train), + x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE] + ) + # x_explain_gaussian2 = x_explain_gaussian + # x_train2 = x_train + # x_explain2 = x_explain + + dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") + dt_l[[i]][, w := 1 / n_samples] + dt_l[[i]][, id := i] + if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] + } + }) dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) dt - dt9 = dt - dt9_agr = dt9[, lapply(.SD, mean), by = list(id, id_combination)] - dt9_agr - dt_agr + dt9 <- dt + dt9_agr <- dt9[, lapply(.SD, mean), by = list(id, id_combination)] + (dt9_agr - dt_agr) / dt9_agr return(dt) - - - - } #' Sample conditional variables using the Gaussian copula approach @@ -225,10 +241,10 @@ sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaus n_z = n_samples, type = 5 ) - ret0_x_cpp = inv_gaussian_transform_cpp(z = ret0_z, x = x_train[, dependent_ind, drop = FALSE]) + ret0_x_cpp <- inv_gaussian_transform_cpp(z = ret0_z, x = x_train[, dependent_ind, drop = FALSE]) - ret0_x_cpp = inv_gaussian_transform_cpp_armamat(rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), n_samples = n_samples) - colnames(ret0_x_cpp) = feature_names[dependent_ind] + ret0_x_cpp <- inv_gaussian_transform_cpp_armamat(rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), n_samples = n_samples) + colnames(ret0_x_cpp) <- feature_names[dependent_ind] all.equal(ret0_x, ret0_x_cpp) # Dette har jeg kode til @@ -262,39 +278,4 @@ inv_gaussian_transform <- function(zx, n_z, type) { return(as.double(x_new)) } -#' Transforms new data to standardized normal (dimension 1) based on other data transformations -#' -#' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last -#' part is the data with the original transformation. -#' @param n_y Positive integer. Number of elements of `yx` that belongs to the gaussian data. -#' -#' @return Vector of back-transformed Gaussian data -#' -#' @keywords internal -#' -#' @author Martin Jullum -gaussian_transform_separate <- function(yx, n_y) { - if (n_y >= length(yx)) stop("n_y should be less than length(yx)") - ind <- 1:n_y - x <- yx[-ind] - tmp <- rank(yx)[ind] - tmp <- tmp - rank(tmp) + 0.5 - u_y <- tmp / (length(x) + 1) - z_y <- stats::qnorm(u_y) - return(z_y) -} -#' Transforms a sample to standardized normal distribution -#' -#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. -#' -#' @return Numeric vector of length `length(x)` -#' -#' @keywords internal -#' -#' @author Martin Jullum -gaussian_transform <- function(x) { - u <- rank(x) / (length(x) + 1) - z <- stats::qnorm(u) - return(z) -} From 0ad95d29cb8ebab0176c682fdbe9d1fd5a9dfe5a Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 14:06:58 +0100 Subject: [PATCH 33/62] Roxygen --- src/Copula.cpp | 4 ++-- src/Gaussian.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Copula.cpp b/src/Copula.cpp index b1e1aba39..81e142778 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -34,10 +34,10 @@ arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { //' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the //' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been //' transformed to a standardized normal distribution. -//' //' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. //' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of -//' the used coalitions. +//' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +//' This is not a problem internally in shapr as the empty and grand coalitions treated differently. //' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed //' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. //' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index a054aa2ab..844f327d1 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -8,7 +8,8 @@ using namespace Rcpp; //' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations //' to explain. //' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of -//' the used coalitions. +//' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +//' This is not a problem internally in shapr as the empty and grand coalitions treated differently. //' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature. //' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance //' between all pairs of features. From ffba299b1eaf9e634eb881e11074792db1661ab1 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 14:08:45 +0100 Subject: [PATCH 34/62] Deleted unnecessary/old R code from approach_copula.R --- R/approach_copula.R | 145 -------------------------------------------- 1 file changed, 145 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 8b8ff8a5a..56f354baa 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -134,148 +134,3 @@ gaussian_transform <- function(x) { z <- stats::qnorm(u) return(z) } - - - -# TRASH BELOW ----------------------------------------------------------------------------------------------------- - - - -#' @inheritParams default_doc -#' @rdname prepare_data -#' @export -prepare_data.copula_old <- function(internal, index_features = NULL, ...) { - X <- internal$objects$X - x_train <- internal$data$x_train - x_explain <- internal$data$x_explain - n_explain <- internal$parameters$n_explain - n_samples <- internal$parameters$n_samples - n_features <- internal$parameters$n_features - copula.mu <- internal$parameters$copula.mu - copula.cov_mat <- internal$parameters$copula.cov_mat - copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian - - - x_explain0 <- as.matrix(x_explain) - dt_l <- list() - if (is.null(index_features)) { - features <- X$features - } else { - features <- X$features[index_features] - } - - - n_samples <- 5000000 - system.time({ - for (i in seq_len(n_explain)) { - print(i) - l <- lapply( - X = features, - FUN = sample_copula, - n_samples = n_samples, - mu = copula.mu, - cov_mat = copula.cov_mat, - m = n_features, - x_explain = x_explain0[i, , drop = FALSE], - x_train = as.matrix(x_train), - x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE] - ) - # x_explain_gaussian2 = x_explain_gaussian - # x_train2 = x_train - # x_explain2 = x_explain - - dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") - dt_l[[i]][, w := 1 / n_samples] - dt_l[[i]][, id := i] - if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] - } - }) - dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) - dt - dt9 <- dt - dt9_agr <- dt9[, lapply(.SD, mean), by = list(id, id_combination)] - (dt9_agr - dt_agr) / dt9_agr - return(dt) -} - -#' Sample conditional variables using the Gaussian copula approach -#' -#' @param index_given Integer vector. The indices of the features to condition upon. Note that -#' `min(index_given) >= 1` and `max(index_given) <= m`. -#' @param m Positive integer. The total number of features. -#' @param x_explain_gaussian Numeric matrix. Contains the observation whose predictions ought -#' to be explained (test data), -#' after quantile-transforming them to standard Gaussian variables. -#' @param x_explain Numeric matrix. Contains the features of the observation whose -#' predictions ought to be explained (test data). -#' -#' @return data.table -#' -#' @keywords internal -#' -#' @author Martin Jullum -sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaussian, x_train, x_explain) { - # Handles the unconditional and full conditional separtely when predicting - if (length(index_given) %in% c(0, m)) { - ret <- matrix(x_explain, ncol = m, nrow = 1) - } else { - dependent_ind <- (seq_len(length(mu)))[-index_given] - - # Dette har jeg kode til - tmp <- condMVNorm::condMVN( - mean = mu, - sigma = cov_mat, - dependent.ind = dependent_ind, - given.ind = index_given, - X.given = x_explain_gaussian[index_given] - ) - - # Dette har jeg kode til. Bruker cholensky + simulert data fra før - ret0_z <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) - - # Dette må jeg skrive selv - ret0_x <- apply( - X = rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), - MARGIN = 2, - FUN = inv_gaussian_transform, - n_z = n_samples, - type = 5 - ) - ret0_x_cpp <- inv_gaussian_transform_cpp(z = ret0_z, x = x_train[, dependent_ind, drop = FALSE]) - - ret0_x_cpp <- inv_gaussian_transform_cpp_armamat(rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), n_samples = n_samples) - colnames(ret0_x_cpp) <- feature_names[dependent_ind] - all.equal(ret0_x, ret0_x_cpp) - - # Dette har jeg kode til - ret <- matrix(NA, ncol = m, nrow = n_samples) - ret[, index_given] <- rep(x_explain[index_given], each = n_samples) - ret[, dependent_ind] <- ret0_x - } - colnames(ret) <- colnames(x_explain) - return(as.data.table(ret)) -} - - -#' Transforms new data to a standardized normal distribution -#' -#' @param zx Numeric vector. The first `n_z` items are the Gaussian data, and the last part is -#' the data with the original transformation. -#' @param n_z Positive integer. Number of elements of `zx` that belongs to new data. -#' -#' @return Numeric vector of length `n_z` -#' -#' @keywords internal -#' -#' @author Martin Jullum -inv_gaussian_transform <- function(zx, n_z, type) { - if (n_z >= length(zx)) stop("n_z should be less than length(zx)") - ind <- 1:n_z - z <- zx[ind] - x <- zx[-ind] - u <- stats::pnorm(z) - x_new <- stats::quantile(x, probs = u, type = type) - return(as.double(x_new)) -} - - From cf9a5b479794e9c5d78af1bed832fa501d370cca Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 14:11:46 +0100 Subject: [PATCH 35/62] Updated RcppExports --- R/RcppExports.R | 98 +++++++++++++++++++++----------- src/RcppExports.cpp | 133 ++++++-------------------------------------- 2 files changed, 82 insertions(+), 149 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 866709e08..20b6763d2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,45 +80,75 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } -#' Generate Gaussian MC samples -NULL +#' Transforms new data to a standardized normal distribution +#' +#' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. +#' +#' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +#' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +#' +#' @return arma::mat of same dimension as `z` +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +inv_gaussian_transform_cpp <- function(z, x) { + .Call(`_shapr_inv_gaussian_transform_cpp`, z, x) +} + +#' Generate (Gaussian) Copula MC samples +#' +#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +#' univariate standard normal. +#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +#' to explain on the original scale. +#' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +#' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +#' transformed to a standardized normal distribution. +#' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +#' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +#' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +#' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +#' transformed to a standardized normal distribution. +#' +#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +#' copula MC samples for each explicand and coalition on the original scale. +#' +#' @export +#' @keywords internal +#' @author Lars Henry Berge Olsen +prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) +} +#' Generate Gaussian MC samples +#' +#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +#' univariate standard normal. +#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +#' to explain. +#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +#' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature. +#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +#' between all pairs of features. +#' +#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +#' MC samples for each explicand and coalition. +#' +#' @export +#' @keywords internal +#' @author Lars Henry Berge Olsen prepare_data_gaussian_cpp <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { .Call(`_shapr_prepare_data_gaussian_cpp`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) } -prepare_data_gaussian_cpp_with_wrap <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_with_wrap`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - -prepare_data_gaussian_cpp_v2 <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_v2`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - -prepare_data_gaussian_cpp_fix_large_mat <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_fix_large_mat`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - -prepare_data_gaussian_cpp_fix_large_mat_v2 <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_fix_large_mat_v2`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - -prepare_data_gaussian_cpp_fix_cube <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_fix_cube`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - -prepare_data_gaussian_cpp_fix_cube_v2 <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_fix_cube_v2`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - -prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - -prepare_data_gaussian_cpp_fix_std_list <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_gaussian_cpp_fix_std_list`, MC_samples_mat, x_explain_mat, S, mu, cov_mat) -} - #' (Generalized) Mahalanobis distance #' #' Used to get the Euclidean distance as well by setting \code{mcov} = \code{diag(m)}. diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 1c847bb08..8ab20c076 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -80,129 +80,38 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// prepare_data_gaussian_cpp -Rcpp::List prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); - return rcpp_result_gen; -END_RCPP -} -// prepare_data_gaussian_cpp_with_wrap -Rcpp::List prepare_data_gaussian_cpp_with_wrap(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_with_wrap(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +// inv_gaussian_transform_cpp +arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x); +RcppExport SEXP _shapr_inv_gaussian_transform_cpp(SEXP zSEXP, SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_with_wrap(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + Rcpp::traits::input_parameter< arma::mat >::type z(zSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(inv_gaussian_transform_cpp(z, x)); return rcpp_result_gen; END_RCPP } -// prepare_data_gaussian_cpp_v2 -Rcpp::List prepare_data_gaussian_cpp_v2(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_v2(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +// prepare_data_copula_cpp +arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat x_explain_gaussian_mat, arma::mat x_train_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_copula_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP x_explain_gaussian_matSEXP, SEXP x_train_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_gaussian_mat(x_explain_gaussian_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_train_mat(x_train_matSEXP); Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_v2(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + rcpp_result_gen = Rcpp::wrap(prepare_data_copula_cpp(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat)); return rcpp_result_gen; END_RCPP } -// prepare_data_gaussian_cpp_fix_large_mat -arma::mat prepare_data_gaussian_cpp_fix_large_mat(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_large_mat(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_large_mat(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); - return rcpp_result_gen; -END_RCPP -} -// prepare_data_gaussian_cpp_fix_large_mat_v2 -arma::mat prepare_data_gaussian_cpp_fix_large_mat_v2(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_large_mat_v2(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_large_mat_v2(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); - return rcpp_result_gen; -END_RCPP -} -// prepare_data_gaussian_cpp_fix_cube -arma::cube prepare_data_gaussian_cpp_fix_cube(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_cube(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_cube(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); - return rcpp_result_gen; -END_RCPP -} -// prepare_data_gaussian_cpp_fix_cube_v2 -arma::cube prepare_data_gaussian_cpp_fix_cube_v2(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_cube_v2(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_cube_v2(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); - return rcpp_result_gen; -END_RCPP -} -// prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices -Rcpp::List prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); - return rcpp_result_gen; -END_RCPP -} -// prepare_data_gaussian_cpp_fix_std_list -std::list prepare_data_gaussian_cpp_fix_std_list(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_gaussian_cpp_fix_std_list(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +// prepare_data_gaussian_cpp +arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_gaussian_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -211,7 +120,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp_fix_std_list(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); + rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); return rcpp_result_gen; END_RCPP } @@ -290,15 +199,9 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_correction_matrix_cpp", (DL_FUNC) &_shapr_correction_matrix_cpp, 2}, {"_shapr_aicc_full_single_cpp", (DL_FUNC) &_shapr_aicc_full_single_cpp, 5}, {"_shapr_aicc_full_cpp", (DL_FUNC) &_shapr_aicc_full_cpp, 6}, + {"_shapr_inv_gaussian_transform_cpp", (DL_FUNC) &_shapr_inv_gaussian_transform_cpp, 2}, + {"_shapr_prepare_data_copula_cpp", (DL_FUNC) &_shapr_prepare_data_copula_cpp, 7}, {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, - {"_shapr_prepare_data_gaussian_cpp_with_wrap", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_with_wrap, 5}, - {"_shapr_prepare_data_gaussian_cpp_v2", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_v2, 5}, - {"_shapr_prepare_data_gaussian_cpp_fix_large_mat", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_large_mat, 5}, - {"_shapr_prepare_data_gaussian_cpp_fix_large_mat_v2", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_large_mat_v2, 5}, - {"_shapr_prepare_data_gaussian_cpp_fix_cube", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_cube, 5}, - {"_shapr_prepare_data_gaussian_cpp_fix_cube_v2", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_cube_v2, 5}, - {"_shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_list_of_lists_of_matrices, 5}, - {"_shapr_prepare_data_gaussian_cpp_fix_std_list", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp_fix_std_list, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, {"_shapr_observation_impute_cpp", (DL_FUNC) &_shapr_observation_impute_cpp, 5}, From 3a1498558558e4c159badf0412d22e8f2267642b Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 14:16:50 +0100 Subject: [PATCH 36/62] Renamed illustration script --- .../{Compare_Gaussian.R => compare_gaussian_in_R_and_C++.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename inst/scripts/{Compare_Gaussian.R => compare_gaussian_in_R_and_C++.R} (100%) diff --git a/inst/scripts/Compare_Gaussian.R b/inst/scripts/compare_gaussian_in_R_and_C++.R similarity index 100% rename from inst/scripts/Compare_Gaussian.R rename to inst/scripts/compare_gaussian_in_R_and_C++.R From da8f03c23260ca53cd9207772573892c29316ce8 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 16:39:46 +0100 Subject: [PATCH 37/62] Adding script to compare the R and C++ versions. --- inst/scripts/compare_copula_in_R_and_C++.R | 261 +++++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 inst/scripts/compare_copula_in_R_and_C++.R diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R new file mode 100644 index 000000000..bfeb7ba2f --- /dev/null +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -0,0 +1,261 @@ +# Libraries ------------------------------------------------------------------------------------------------------- +# library(shapr) +# library(rbenchmark) +library(data.table) +devtools::load_all(".") + +# Old R code ------------------------------------------------------------------------------------------------------ +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +prepare_data.copula_old <- function(internal, index_features = NULL, ...) { + X <- internal$objects$X + x_train <- internal$data$x_train + x_explain <- internal$data$x_explain + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian + + x_explain0 <- as.matrix(x_explain) + dt_l <- list() + if (is.null(index_features)) { + features <- X$features + } else { + features <- X$features[index_features] + } + + for (i in seq_len(n_explain)) { + cat(sprintf("%d,", i)) + l <- lapply( + X = features, + FUN = sample_copula_old, + n_samples = n_samples, + mu = copula.mu, + cov_mat = copula.cov_mat, + m = n_features, + x_explain = x_explain0[i, , drop = FALSE], + x_train = as.matrix(x_train), + x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE] + ) + dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") + dt_l[[i]][, w := 1 / n_samples] + dt_l[[i]][, id := i] + if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]] + } + + dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE) + + return(dt) +} + +#' Sample conditional variables using the Gaussian copula approach +#' +#' @param index_given Integer vector. The indices of the features to condition upon. Note that +#' `min(index_given) >= 1` and `max(index_given) <= m`. +#' @param m Positive integer. The total number of features. +#' @param x_explain_gaussian Numeric matrix. Contains the observation whose predictions ought +#' to be explained (test data), +#' after quantile-transforming them to standard Gaussian variables. +#' @param x_explain Numeric matrix. Contains the features of the observation whose +#' predictions ought to be explained (test data). +#' +#' @return data.table +#' +#' @keywords internal +#' +#' @author Martin Jullum +sample_copula_old <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaussian, x_train, x_explain) { + # Handles the unconditional and full conditional separtely when predicting + if (length(index_given) %in% c(0, m)) { + ret <- matrix(x_explain, ncol = m, nrow = 1) + } else { + dependent_ind <- (seq_len(length(mu)))[-index_given] + + tmp <- condMVNorm::condMVN( + mean = mu, + sigma = cov_mat, + dependent.ind = dependent_ind, + given.ind = index_given, + X.given = x_explain_gaussian[index_given] + ) + + ret0_z <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar) + + ret0_x <- apply( + X = rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), + MARGIN = 2, + FUN = inv_gaussian_transform_old, + n_z = n_samples, + type = 5 + ) + + ret <- matrix(NA, ncol = m, nrow = n_samples) + ret[, index_given] <- rep(x_explain[index_given], each = n_samples) + ret[, dependent_ind] <- ret0_x + } + colnames(ret) <- colnames(x_explain) + return(as.data.table(ret)) +} + + +#' Transforms new data to a standardized normal distribution +#' +#' @param zx Numeric vector. The first `n_z` items are the Gaussian data, and the last part is +#' the data with the original transformation. +#' @param n_z Positive integer. Number of elements of `zx` that belongs to new data. +#' +#' @return Numeric vector of length `n_z` +#' +#' @keywords internal +#' +#' @author Martin Jullum +inv_gaussian_transform_old <- function(zx, n_z, type) { + if (n_z >= length(zx)) stop("n_z should be less than length(zx)") + ind <- 1:n_z + z <- zx[ind] + x <- zx[-ind] + u <- stats::pnorm(z) + x_new <- stats::quantile(x, probs = u, type = type) + return(as.double(x_new)) +} + + + + +# Setup ----------------------------------------------------------------------------------------------------------- +{ + n_samples <- 1000 + n_train <- 1000 + n_test <- 6 + M <- 8 + rho <- 0.5 + betas <- c(0, rep(1, M)) + + # We use the Gaussian copula approach + approach <- "copula" + + # Mean of the multivariate Gaussian distribution + mu <- rep(0, times = M) + mu <- seq(M) + + # Create the covariance matrix + sigma <- matrix(rho, ncol = M, nrow = M) # Old + for (i in seq(1, M - 1)) { + for (j in seq(i + 1, M)) { + sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) + } + } + diag(sigma) <- 1 + + # Set seed for reproducibility + seed_setup <- 1996 + set.seed(seed_setup) + + # Make Gaussian data + data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) + data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) + colnames(data_train) <- paste("X", seq(M), sep = "") + colnames(data_test) <- paste("X", seq(M), sep = "") + + # Make the response + response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) + response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) + + # Put together the data + data_train_with_response <- copy(data_train)[, y := response_train] + data_test_with_response <- copy(data_test)[, y := response_test] + + # Fit a LM model + predictive_model <- lm(y ~ ., data = data_train_with_response) + + # Get the prediction zero, i.e., the phi0 Shapley value. + prediction_zero <- mean(response_train) + + model <- predictive_model + x_explain <- data_test + x_train <- data_train + keep_samp_for_vS <- FALSE + predict_model <- NULL + get_model_specs <- NULL + timing <- TRUE + n_combinations <- NULL + group <- NULL + feature_specs <- get_feature_specs(get_model_specs, model) + n_batches <- 1 + seed <- 1 + + internal <- setup( + x_train = x_train, + x_explain = x_explain, + approach = approach, + prediction_zero = prediction_zero, + n_combinations = n_combinations, + group = group, + n_samples = n_samples, + n_batches = n_batches, + seed = seed, + feature_specs = feature_specs, + keep_samp_for_vS = keep_samp_for_vS, + predict_model = predict_model, + get_model_specs = get_model_specs, + timing = timing + ) + + # Gets predict_model (if not passed to explain) + predict_model <- get_predict_model( + predict_model = predict_model, + model = model + ) + + # Sets up the Shapley (sampling) framework and prepares the + # conditional expectation computation for the chosen approach + # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters + internal <- setup_computation(internal, model, predict_model) +} + +# Compare --------------------------------------------------------------------------------------------------------- + +# Recall that old version iterate over the observations and then the coalitions. +# While the new version iterate over the coalitions and then the observations. +# The latter lets us reuse the computed conditional distributions for all observations. +look_at_coalitions <- seq(1, 2^M - 2) +look_at_coalitions <- seq(1, 2^M - 2, 10) +#look_at_coalitions <- seq(1, 2^M - 2, 25) + +# The old R code +time_old <- system.time({ + res_old <- prepare_data.copula_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) + +# The new C++ code +time_new <- system.time({ + res_new <- prepare_data.copula( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +setorderv(res_new, c("id", "id_combination")) + +# Time +time_old +time_new + +# Relative speedup of new method +time_old/time_new + +# Aggregate the MC sample values for each explicand and combination +res_old = res_old[,w:=NULL] +res_new = res_new[,w:=NULL] +res_old_agr = res_old[,w:=NULL][, lapply(.SD, mean), by = c("id", "id_combination")] +res_new_agr = res_new[, lapply(.SD, mean), by = c("id", "id_combination")] + +# Difference +res_old_agr - res_new_agr + +# Max absolute difference +max(abs(res_old_agr - res_new_agr)) + +# Max absolute relative difference +max(abs((res_old_agr - res_new_agr)/res_new_agr)) From ff0b2b4abf8db158f8e467edc5aac696b75dfb92 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 16:42:19 +0100 Subject: [PATCH 38/62] lintr + stylr --- R/approach_copula.R | 2 +- R/approach_gaussian.R | 2 +- python/install_r_packages.R | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 56f354baa..52cada21b 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -91,7 +91,7 @@ prepare_data.copula <- function(internal, index_features, ...) { # Convert to a data.table and add extra identification columns dt <- data.table::as.data.table(dt) data.table::setnames(dt, feature_names) - dt[, id_combination := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] dt[, w := 1 / n_samples] dt[, id_combination := index_features[id_combination]] diff --git a/R/approach_gaussian.R b/R/approach_gaussian.R index 429fbbc01..23dd34d98 100644 --- a/R/approach_gaussian.R +++ b/R/approach_gaussian.R @@ -80,7 +80,7 @@ prepare_data.gaussian <- function(internal, index_features, ...) { # Convert to a data.table and add extra identification columns dt <- data.table::as.data.table(dt) data.table::setnames(dt, feature_names) - dt[, id_combination := rep(seq(nrow(S)), each = n_samples * n_explain)] + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] dt[, w := 1 / n_samples] dt[, id_combination := index_features[id_combination]] diff --git a/python/install_r_packages.R b/python/install_r_packages.R index 0c8768d91..598886243 100644 --- a/python/install_r_packages.R +++ b/python/install_r_packages.R @@ -1,3 +1,4 @@ # Installs the required R-packages -install.packages("remotes",repos="https://cloud.r-project.org") -remotes::install_github("NorskRegnesentral/shapr") # Installs the development version of shapr from the master branch on CRAN +install.packages("remotes", repos = "https://cloud.r-project.org") +remotes::install_github("NorskRegnesentral/shapr") +# Installs the development version of shapr from the master branch on CRAN From 1de33a11b4e82363b7ad0dbadc0ccf20d47e8a6f Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 16:46:50 +0100 Subject: [PATCH 39/62] typo --- inst/scripts/compare_copula_in_R_and_C++.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index bfeb7ba2f..97adfd748 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -248,7 +248,7 @@ time_old/time_new # Aggregate the MC sample values for each explicand and combination res_old = res_old[,w:=NULL] res_new = res_new[,w:=NULL] -res_old_agr = res_old[,w:=NULL][, lapply(.SD, mean), by = c("id", "id_combination")] +res_old_agr = res_old[, lapply(.SD, mean), by = c("id", "id_combination")] res_new_agr = res_new[, lapply(.SD, mean), by = c("id", "id_combination")] # Difference From 6d6d7bcb74480562bdd07ec8a6b1448ae8086e0f Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 22:14:45 +0100 Subject: [PATCH 40/62] Added a copula version that uses cpp, but r for quantile --- R/RcppExports.R | 13 ++++- R/approach_copula.R | 95 ++++++++++++++++++++++++++++++++ src/Copula.cpp | 130 +++++++++++++++++++++++++++++++++++++++++--- src/RcppExports.cpp | 18 ++++++ 4 files changed, 244 insertions(+), 12 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 20b6763d2..3d5a9e29e 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,6 +80,9 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } +#' Generate (Gaussian) Copula MC samples +NULL + #' Transforms new data to a standardized normal distribution #' #' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. @@ -87,7 +90,7 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) #' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. #' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. #' -#' @return arma::mat of same dimension as `z` +#' @return arma::mat of the same dimension as `z` #' #' @keywords internal #' @author Lars Henry Berge Olsen @@ -95,6 +98,10 @@ inv_gaussian_transform_cpp <- function(z, x) { .Call(`_shapr_inv_gaussian_transform_cpp`, z, x) } +prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) +} + #' Generate (Gaussian) Copula MC samples #' #' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the @@ -121,8 +128,8 @@ inv_gaussian_transform_cpp <- function(z, x) { #' @export #' @keywords internal #' @author Lars Henry Berge Olsen -prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) +prepare_data_copula_cpp_and_R <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) { + .Call(`_shapr_prepare_data_copula_cpp_and_R`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) } #' Generate Gaussian MC samples diff --git a/R/approach_copula.R b/R/approach_copula.R index 52cada21b..6533cfc97 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -100,6 +100,101 @@ prepare_data.copula <- function(internal, index_features, ...) { return(dt) } +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +#' @author Lars Henry Berge Olsen +prepare_data.copula_cpp_and_R <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix + # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include + # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp_and_R( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) + + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + return(dt) +} + +#' Transform data using the inverse Gaussian transformation. +#' +#' @details This function is called from `prepare_data_copula_cpp()` as the this was faster +#' +#' @param z Matrix. The data are the Gaussian Monte Carlos samples to transform. +#' @param x Matrix. The data with the original transformation. Used to conduct the transformation of `z`. +#' +#' @return Matrix of the same dimension as `z`. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +inv_gaussian_transform_R <- function(z, x) { + u <- stats::pnorm(z) + x_new = sapply(seq_len(ncol(u)), function (idx) quantile.type7(x[,idx], probs = u[,idx])) + return(x_new) +} + +#' Compute the quantiles using quantile type seven +#' +#' @details Using quantile type number 7 from stats::quantile. +#' +#' @param x numeric vector whose sample quantiles are wanted. +#' @param probs numeric vector of probabilities with values between zero and one. +#' +#' @return A vector of length length(`probs`) is returned. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +quantile.type7 <- function(x, probs) { + n <- length(x) + probs <- pmax(0, pmin(1, probs)) # allow for slight overshoot + index <- 1 + (n - 1) * probs + lo <- floor(index) + hi <- ceiling(index) + x <- sort(x, partial = unique(c(lo, hi))) + qs <- x[lo] + i <- which(index > lo) + h <- (index - lo)[i] + qs[i] <- (1 - h) * qs[i] + h * x[hi[i]] + return(qs) +} + #' Transforms new data to standardized normal (dimension 1) based on other data transformations #' #' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last diff --git a/src/Copula.cpp b/src/Copula.cpp index 81e142778..248add593 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -9,7 +9,7 @@ //' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. //' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. //' -//' @return arma::mat of same dimension as `z` +//' @return arma::mat of the same dimension as `z` //' //' @keywords internal //' @author Lars Henry Berge Olsen @@ -25,6 +25,111 @@ arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { return z_new; } +//' Generate (Gaussian) Copula MC samples + //' + //' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the + //' univariate standard normal. + //' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations + //' to explain on the original scale. + //' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the + //' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been + //' transformed to a standardized normal distribution. + //' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. + //' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of + //' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. + //' This is not a problem internally in shapr as the empty and grand coalitions treated differently. + //' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed + //' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. + //' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance + //' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been + //' transformed to a standardized normal distribution. + //' + //' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where + //' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian + //' copula MC samples for each explicand and coalition on the original scale. + //' + //' @export + //' @keywords internal + //' @author Lars Henry Berge Olsen + // [[Rcpp::export]] + arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; + } + //' Generate (Gaussian) Copula MC samples //' //' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the @@ -52,19 +157,22 @@ arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { //' @keywords internal //' @author Lars Henry Berge Olsen // [[Rcpp::export]] -arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat x_explain_gaussian_mat, - arma::mat x_train_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { +arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; int n_features = MC_samples_mat.n_cols; int n_coalitions = S.n_rows; + // Get the R functions for computing the inverse gaussian transform + Rcpp::Function inv_gaussian_transform_R("inv_gaussian_transform_R"); + // Initialize auxiliary matrix and result cube arma::mat aux_mat(n_samples, n_features); arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); @@ -114,9 +222,13 @@ arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, arma::mat MC_samples_mat_now_now = MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + arma::mat x_train_mat_now = x_train_mat.cols(Sbar_now_idx); + //arma::mat x_train_mat_now = arma::normcdf(x_train_mat.cols(Sbar_now_idx)); + // Transform the MC to the original scale using the inverse Gaussian transform arma::mat MC_samples_mat_now_now_trans = - inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + Rcpp::as(inv_gaussian_transform_R(Rcpp::wrap(MC_samples_mat_now_now), + Rcpp::wrap(x_train_mat_now))); // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 8ab20c076..681f3dbe9 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -109,6 +109,23 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// prepare_data_copula_cpp_and_R +arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat x_explain_gaussian_mat, arma::mat x_train_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +RcppExport SEXP _shapr_prepare_data_copula_cpp_and_R(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP x_explain_gaussian_matSEXP, SEXP x_train_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_explain_gaussian_mat(x_explain_gaussian_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type x_train_mat(x_train_matSEXP); + Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); + Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); + Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + rcpp_result_gen = Rcpp::wrap(prepare_data_copula_cpp_and_R(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat)); + return rcpp_result_gen; +END_RCPP +} // prepare_data_gaussian_cpp arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); RcppExport SEXP _shapr_prepare_data_gaussian_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { @@ -201,6 +218,7 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_aicc_full_cpp", (DL_FUNC) &_shapr_aicc_full_cpp, 6}, {"_shapr_inv_gaussian_transform_cpp", (DL_FUNC) &_shapr_inv_gaussian_transform_cpp, 2}, {"_shapr_prepare_data_copula_cpp", (DL_FUNC) &_shapr_prepare_data_copula_cpp, 7}, + {"_shapr_prepare_data_copula_cpp_and_R", (DL_FUNC) &_shapr_prepare_data_copula_cpp_and_R, 7}, {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, From 8eec71267763ccf931ab866096a9625093c7882d Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 22:16:21 +0100 Subject: [PATCH 41/62] Updated comparison file --- inst/scripts/compare_copula_in_R_and_C++.R | 74 ++++++++++++++++------ 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index 97adfd748..1fc000cdd 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -89,7 +89,7 @@ sample_copula_old <- function(index_given, n_samples, mu, cov_mat, m, x_explain_ MARGIN = 2, FUN = inv_gaussian_transform_old, n_z = n_samples, - type = 5 + type = 7 ) ret <- matrix(NA, ncol = m, nrow = n_samples) @@ -129,7 +129,7 @@ inv_gaussian_transform_old <- function(zx, n_z, type) { { n_samples <- 1000 n_train <- 1000 - n_test <- 6 + n_test <- 20 M <- 8 rho <- 0.5 betas <- c(0, rep(1, M)) @@ -217,45 +217,77 @@ inv_gaussian_transform_old <- function(zx, n_z, type) { } # Compare --------------------------------------------------------------------------------------------------------- +set.seed(123) # Recall that old version iterate over the observations and then the coalitions. # While the new version iterate over the coalitions and then the observations. # The latter lets us reuse the computed conditional distributions for all observations. look_at_coalitions <- seq(1, 2^M - 2) -look_at_coalitions <- seq(1, 2^M - 2, 10) -#look_at_coalitions <- seq(1, 2^M - 2, 25) +# look_at_coalitions <- seq(1, 2^M - 2, 10) +# look_at_coalitions <- seq(1, 2^M - 2, 25) # The old R code -time_old <- system.time({ - res_old <- prepare_data.copula_old( +time_only_R <- system.time({ + res_only_R <- prepare_data.copula_old( internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +time_only_R -# The new C++ code -time_new <- system.time({ - res_new <- prepare_data.copula( +# The new C++ code with quantile from arma +time_only_cpp <- system.time({ + res_only_cpp <- prepare_data.copula( internal = internal, index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) -setorderv(res_new, c("id", "id_combination")) +data.table::setorderv(res_only_cpp, c("id", "id_combination")) +time_only_cpp -# Time -time_old -time_new +# The new C++ code with quantile from R +time_cpp_and_R <- system.time({ + res_cpp_and_R <- prepare_data.copula_cpp_and_R( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) +data.table::setorderv(res_cpp_and_R, c("id", "id_combination")) +time_cpp_and_R + +# Create a table of the times. Less is better +times <- rbind(time_only_R, + time_only_cpp, + time_cpp_and_R) +times + +# TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 +# user.self sys.self elapsed user.child sys.child +# time_only_R 14.081 1.341 15.659 0.000 0.000 +# time_only_cpp 108.905 0.850 112.089 0.021 0.068 +# time_cpp_and_R 6.829 1.022 8.165 0.000 0.000 # Relative speedup of new method -time_old/time_new +times_relative <- t(sapply(seq_len(nrow(times)), function(idx) times[1, ] / times[idx, ])) +rownames(times_relative) <- paste0(rownames(times), "_rel") +times_relative + +# RELATIVE TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 +# user.self sys.self elapsed user.child sys.child +# time_only_R_rel 1.0000 1.0000 1.0000 NaN NaN +# time_only_cpp_rel 0.1293 1.5776 0.1397 0 0 +# time_cpp_and_R_rel 2.0619 1.3121 1.9178 NaN NaN # Aggregate the MC sample values for each explicand and combination -res_old = res_old[,w:=NULL] -res_new = res_new[,w:=NULL] -res_old_agr = res_old[, lapply(.SD, mean), by = c("id", "id_combination")] -res_new_agr = res_new[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_R = res_only_R[, w:= NULL] +res_only_cpp = res_only_cpp[, w:= NULL] +res_cpp_and_R = res_cpp_and_R[, w:= NULL] +res_only_R_agr = res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_agr = res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_cpp_and_R_agr = res_cpp_and_R[, lapply(.SD, mean), by = c("id", "id_combination")] # Difference -res_old_agr - res_new_agr +res_only_R_agr - res_only_cpp_agr +res_only_R_agr - res_cpp_and_R_agr # Max absolute difference -max(abs(res_old_agr - res_new_agr)) +max(abs(res_only_R_agr - res_only_cpp_agr)) +max(abs(res_only_R_agr - res_cpp_and_R_agr)) # Max absolute relative difference -max(abs((res_old_agr - res_new_agr)/res_new_agr)) +max(abs(res_only_R_agr - res_only_cpp_agr)/res_only_cpp_agr) +max(abs(res_only_R_agr - res_cpp_and_R_agr)/res_cpp_and_R_agr) From a0f6132f78d7b81ec8147194094795a15690e40f Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 23:36:53 +0100 Subject: [PATCH 42/62] Ran code several time to get more CPU times --- inst/scripts/compare_copula_in_R_and_C++.R | 80 ++++++++++++++++------ 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index 1fc000cdd..5d9fa0384 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -217,6 +217,8 @@ inv_gaussian_transform_old <- function(zx, n_z, type) { } # Compare --------------------------------------------------------------------------------------------------------- +set.seed(321) +set.seed(2024) set.seed(123) # Recall that old version iterate over the observations and then the coalitions. @@ -230,14 +232,18 @@ look_at_coalitions <- seq(1, 2^M - 2) time_only_R <- system.time({ res_only_R <- prepare_data.copula_old( internal = internal, - index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) time_only_R # The new C++ code with quantile from arma time_only_cpp <- system.time({ res_only_cpp <- prepare_data.copula( internal = internal, - index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) data.table::setorderv(res_only_cpp, c("id", "id_combination")) time_only_cpp @@ -245,21 +251,40 @@ time_only_cpp time_cpp_and_R <- system.time({ res_cpp_and_R <- prepare_data.copula_cpp_and_R( internal = internal, - index_features = internal$objects$S_batch$`1`[look_at_coalitions])}) + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) data.table::setorderv(res_cpp_and_R, c("id", "id_combination")) time_cpp_and_R # Create a table of the times. Less is better -times <- rbind(time_only_R, - time_only_cpp, - time_cpp_and_R) +times <- rbind( + time_only_R, + time_only_cpp, + time_cpp_and_R +) times # TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 # user.self sys.self elapsed user.child sys.child -# time_only_R 14.081 1.341 15.659 0.000 0.000 -# time_only_cpp 108.905 0.850 112.089 0.021 0.068 -# time_cpp_and_R 6.829 1.022 8.165 0.000 0.000 +# time_only_R 64.263 2.130 68.793 0 0 +# time_only_cpp 112.403 1.018 117.429 0 0 +# time_cpp_and_R 7.020 1.370 8.854 0 0 + +# user.self sys.self elapsed user.child sys.child +# time_only_R 67.230 1.822 70.997 9.807 0.431 +# time_only_cpp 113.848 1.056 118.330 0.000 0.000 +# time_cpp_and_R 7.444 1.512 9.065 0.000 0.000 + +# user.self sys.self elapsed user.child sys.child +# time_only_R 81.699 3.180 90.926 0.000 0.000 +# time_only_cpp 115.815 1.547 122.401 0.021 0.068 +# time_cpp_and_R 7.976 1.750 10.539 1.491 0.403 + +# user.self sys.self elapsed user.child sys.child +# time_only_R 63.600 2.009 67.670 0 0 +# time_only_cpp 113.768 1.032 118.364 0 0 +# time_cpp_and_R 6.824 1.260 8.188 0 0 # Relative speedup of new method times_relative <- t(sapply(seq_len(nrow(times)), function(idx) times[1, ] / times[idx, ])) @@ -268,17 +293,32 @@ times_relative # RELATIVE TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 # user.self sys.self elapsed user.child sys.child -# time_only_R_rel 1.0000 1.0000 1.0000 NaN NaN -# time_only_cpp_rel 0.1293 1.5776 0.1397 0 0 -# time_cpp_and_R_rel 2.0619 1.3121 1.9178 NaN NaN +# time_only_R_rel 1.00000 1.0000 1.00000 1 1 +# time_only_cpp_rel 0.59052 1.7254 0.59999 Inf Inf +# time_cpp_and_R_rel 9.03143 1.2050 7.83199 Inf Inf + +# user.self sys.self elapsed user.child sys.child +# time_only_R_rel 1.00000 1.0000 1.00000 NaN NaN +# time_only_cpp_rel 0.70543 2.0556 0.74285 0 0 +# time_cpp_and_R_rel 10.24310 1.8171 8.62757 0 0 + +# user.self sys.self elapsed user.child sys.child +# time_only_R_rel 1.00000 1.0000 1.00000 NaN NaN +# time_only_cpp_rel 0.55903 1.9467 0.57171 NaN NaN +# time_cpp_and_R_rel 9.32005 1.5944 8.26453 NaN NaN + +# user.self sys.self elapsed user.child sys.child +# time_only_R_rel 1.00000 1.0000 1.00000 NaN NaN +# time_only_cpp_rel 0.57172 2.0923 0.58583 NaN NaN +# time_cpp_and_R_rel 9.15427 1.5547 7.76971 NaN NaN # Aggregate the MC sample values for each explicand and combination -res_only_R = res_only_R[, w:= NULL] -res_only_cpp = res_only_cpp[, w:= NULL] -res_cpp_and_R = res_cpp_and_R[, w:= NULL] -res_only_R_agr = res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] -res_only_cpp_agr = res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] -res_cpp_and_R_agr = res_cpp_and_R[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_R <- res_only_R[, w := NULL] +res_only_cpp <- res_only_cpp[, w := NULL] +res_cpp_and_R <- res_cpp_and_R[, w := NULL] +res_only_R_agr <- res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_agr <- res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_cpp_and_R_agr <- res_cpp_and_R[, lapply(.SD, mean), by = c("id", "id_combination")] # Difference res_only_R_agr - res_only_cpp_agr @@ -289,5 +329,5 @@ max(abs(res_only_R_agr - res_only_cpp_agr)) max(abs(res_only_R_agr - res_cpp_and_R_agr)) # Max absolute relative difference -max(abs(res_only_R_agr - res_only_cpp_agr)/res_only_cpp_agr) -max(abs(res_only_R_agr - res_cpp_and_R_agr)/res_cpp_and_R_agr) +max(abs(res_only_R_agr - res_only_cpp_agr) / res_only_cpp_agr) +max(abs(res_only_R_agr - res_cpp_and_R_agr) / res_cpp_and_R_agr) From 5bd7be6d128476f7d22492ac3a01078aa7633f1b Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 23:38:48 +0100 Subject: [PATCH 43/62] stylr --- R/approach_copula.R | 2 +- R/setup.R | 4 +-- R/setup_computation.R | 12 ++++--- tests/testthat/test-setup.R | 62 +++++++++++++++++++++++++------------ 4 files changed, 53 insertions(+), 27 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 6533cfc97..d81777434 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -166,7 +166,7 @@ prepare_data.copula_cpp_and_R <- function(internal, index_features, ...) { #' @author Lars Henry Berge Olsen inv_gaussian_transform_R <- function(z, x) { u <- stats::pnorm(z) - x_new = sapply(seq_len(ncol(u)), function (idx) quantile.type7(x[,idx], probs = u[,idx])) + x_new <- sapply(seq_len(ncol(u)), function(idx) quantile.type7(x[, idx], probs = u[, idx])) return(x_new) } diff --git a/R/setup.R b/R/setup.R index 9257439e8..9346b9b4e 100644 --- a/R/setup.R +++ b/R/setup.R @@ -377,8 +377,8 @@ get_extra_parameters <- function(internal) { } # Get the number of unique approaches - internal$parameters$n_approaches <- length(internal$parameters$approach) - internal$parameters$n_unique_approaches <- length(unique(internal$parameters$approach)) + internal$parameters$n_approaches <- length(internal$parameters$approach) + internal$parameters$n_unique_approaches <- length(unique(internal$parameters$approach)) return(internal) } diff --git a/R/setup_computation.R b/R/setup_computation.R index a3a7ff9db..195e1931e 100644 --- a/R/setup_computation.R +++ b/R/setup_computation.R @@ -639,15 +639,19 @@ create_S_batch_new <- function(internal, seed = NULL) { # Ensure that the number of batches is not larger than `n_batches`. # Remove one batch from the approach with the most batches. while (sum(batch_count_dt$n_batches_per_approach) > n_batches) { - batch_count_dt[which.max(n_batches_per_approach), - n_batches_per_approach := n_batches_per_approach - 1] + batch_count_dt[ + which.max(n_batches_per_approach), + n_batches_per_approach := n_batches_per_approach - 1 + ] } # Ensure that the number of batches is not lower than `n_batches`. # Add one batch to the approach with most coalitions per batch while (sum(batch_count_dt$n_batches_per_approach) < n_batches) { - batch_count_dt[which.max(n_S_per_approach / n_batches_per_approach), - n_batches_per_approach := n_batches_per_approach + 1] + batch_count_dt[ + which.max(n_S_per_approach / n_batches_per_approach), + n_batches_per_approach := n_batches_per_approach + 1 + ] } } diff --git a/tests/testthat/test-setup.R b/tests/testthat/test-setup.R index 6b7a2d4b7..86626a87d 100644 --- a/tests/testthat/test-setup.R +++ b/tests/testthat/test-setup.R @@ -1716,7 +1716,9 @@ test_that("Error with to low `n_batches` compared to the number of unique approa prediction_zero = p0, n_batches = 3, timing = FALSE, - seed = 1)) + seed = 1 + ) + ) # Except that shapr sets a valid `n_batches` and get no errors expect_no_error( @@ -1728,7 +1730,9 @@ test_that("Error with to low `n_batches` compared to the number of unique approa prediction_zero = p0, n_batches = NULL, timing = FALSE, - seed = 1)) + seed = 1 + ) + ) }) test_that("the used number of batches mathces the provided `n_batches` for combined approaches", { @@ -1740,11 +1744,14 @@ test_that("the used number of batches mathces the provided `n_batches` for combi prediction_zero = p0, n_batches = 2, timing = FALSE, - seed = 1) + seed = 1 + ) # Check that the used number of batches corresponds with the provided `n_batches` - expect_equal(explanation_1$internal$parameters$n_batches, - length(explanation_1$internal$objects$S_batch)) + expect_equal( + explanation_1$internal$parameters$n_batches, + length(explanation_1$internal$objects$S_batch) + ) explanation_2 <- explain( model = model_lm_numeric, @@ -1754,11 +1761,14 @@ test_that("the used number of batches mathces the provided `n_batches` for combi prediction_zero = p0, n_batches = 15, timing = FALSE, - seed = 1) + seed = 1 + ) # Check that the used number of batches corresponds with the provided `n_batches` - expect_equal(explanation_2$internal$parameters$n_batches, - length(explanation_2$internal$objects$S_batch)) + expect_equal( + explanation_2$internal$parameters$n_batches, + length(explanation_2$internal$objects$S_batch) + ) # Check for the default value for `n_batch` explanation_3 <- explain( @@ -1769,11 +1779,14 @@ test_that("the used number of batches mathces the provided `n_batches` for combi prediction_zero = p0, n_batches = NULL, timing = FALSE, - seed = 1) + seed = 1 + ) # Check that the used number of batches corresponds with the `n_batches` - expect_equal(explanation_3$internal$parameters$n_batches, - length(explanation_3$internal$objects$S_batch)) + expect_equal( + explanation_3$internal$parameters$n_batches, + length(explanation_3$internal$objects$S_batch) + ) }) test_that("setting the seed for combined approaches works", { @@ -1787,7 +1800,8 @@ test_that("setting the seed for combined approaches works", { approach = c("independence", "empirical", "gaussian", "copula"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) explanation_combined_2 <- explain( model = model_lm_numeric, @@ -1796,7 +1810,8 @@ test_that("setting the seed for combined approaches works", { approach = c("independence", "empirical", "gaussian", "copula"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) # Check that they are equal expect_equal(explanation_combined_1, explanation_combined_2) @@ -1810,7 +1825,8 @@ test_that("setting the seed for combined approaches works", { approach = c("independence", "empirical", "gaussian", "copula"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) explanation_combined_4 <- explain( model = model_lm_numeric, @@ -1819,7 +1835,8 @@ test_that("setting the seed for combined approaches works", { approach = c("independence", "empirical", "gaussian", "copula"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) # Check that they are equal expect_equal(explanation_combined_3, explanation_combined_4) @@ -1837,7 +1854,8 @@ test_that("counting the number of unique approaches", { approach = c("independence", "empirical", "gaussian", "copula"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) expect_equal(explanation_combined_1$internal$parameters$n_approaches, 4) expect_equal(explanation_combined_1$internal$parameters$n_unique_approaches, 4) @@ -1848,7 +1866,8 @@ test_that("counting the number of unique approaches", { approach = c("empirical"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) expect_equal(explanation_combined_2$internal$parameters$n_approaches, 1) expect_equal(explanation_combined_2$internal$parameters$n_unique_approaches, 1) @@ -1859,7 +1878,8 @@ test_that("counting the number of unique approaches", { approach = c("gaussian", "gaussian", "gaussian", "gaussian"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) expect_equal(explanation_combined_3$internal$parameters$n_approaches, 4) expect_equal(explanation_combined_3$internal$parameters$n_unique_approaches, 1) @@ -1870,7 +1890,8 @@ test_that("counting the number of unique approaches", { approach = c("independence", "empirical", "independence", "empirical"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) expect_equal(explanation_combined_4$internal$parameters$n_approaches, 4) expect_equal(explanation_combined_4$internal$parameters$n_unique_approaches, 2) @@ -1882,7 +1903,8 @@ test_that("counting the number of unique approaches", { approach = c("independence", "empirical", "independence", "empirical"), prediction_zero = p0, timing = FALSE, - seed = 1) + seed = 1 + ) expect_equal(explanation_combined_5$internal$parameters$n_approaches, 4) expect_equal(explanation_combined_5$internal$parameters$n_unique_approaches, 2) }) From 19a92bebd75bd76af202652e1824fb0234a141a6 Mon Sep 17 00:00:00 2001 From: LHBO Date: Sun, 7 Jan 2024 23:50:03 +0100 Subject: [PATCH 44/62] Updated manuals/documentation --- NAMESPACE | 4 + R/RcppExports.R | 29 +++- man/gaussian_transform_separate.Rd | 2 +- man/inv_gaussian_transform.Rd | 24 ---- man/inv_gaussian_transform_R.Rd | 26 ++++ man/inv_gaussian_transform_cpp.Rd | 26 ++++ man/prepare_data.Rd | 10 +- man/prepare_data_copula_cpp.Rd | 52 +++++++ man/prepare_data_copula_cpp_and_R.Rd | 52 +++++++ man/prepare_data_gaussian_cpp.Rd | 36 +++++ man/quantile.type7.Rd | 26 ++++ man/sample_copula.Rd | 40 ------ man/sample_gaussian.Rd | 27 ---- man/setup_approach.Rd | 3 + src/Copula.cpp | 207 +++++++++++++-------------- src/Gaussian.cpp | 2 + 16 files changed, 365 insertions(+), 201 deletions(-) delete mode 100644 man/inv_gaussian_transform.Rd create mode 100644 man/inv_gaussian_transform_R.Rd create mode 100644 man/inv_gaussian_transform_cpp.Rd create mode 100644 man/prepare_data_copula_cpp.Rd create mode 100644 man/prepare_data_copula_cpp_and_R.Rd create mode 100644 man/prepare_data_gaussian_cpp.Rd create mode 100644 man/quantile.type7.Rd delete mode 100644 man/sample_copula.Rd delete mode 100644 man/sample_gaussian.Rd diff --git a/NAMESPACE b/NAMESPACE index ecc8bdd1b..7249210f5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ S3method(predict_model,ranger) S3method(predict_model,xgb.Booster) S3method(prepare_data,categorical) S3method(prepare_data,copula) +S3method(prepare_data,copula_cpp_and_R) S3method(prepare_data,ctree) S3method(prepare_data,empirical) S3method(prepare_data,gaussian) @@ -62,6 +63,9 @@ export(mahalanobis_distance_cpp) export(observation_impute_cpp) export(predict_model) export(prepare_data) +export(prepare_data_copula_cpp) +export(prepare_data_copula_cpp_and_R) +export(prepare_data_gaussian_cpp) export(rss_cpp) export(setup) export(setup_approach) diff --git a/R/RcppExports.R b/R/RcppExports.R index 3d5a9e29e..419346d6d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,9 +80,6 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } -#' Generate (Gaussian) Copula MC samples -NULL - #' Transforms new data to a standardized normal distribution #' #' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. @@ -98,6 +95,32 @@ inv_gaussian_transform_cpp <- function(z, x) { .Call(`_shapr_inv_gaussian_transform_cpp`, z, x) } +#' Generate (Gaussian) Copula MC samples +#' +#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +#' univariate standard normal. +#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +#' to explain on the original scale. +#' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +#' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +#' transformed to a standardized normal distribution. +#' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +#' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +#' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +#' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +#' transformed to a standardized normal distribution. +#' +#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +#' copula MC samples for each explicand and coalition on the original scale. +#' +#' @export +#' @keywords internal +#' @author Lars Henry Berge Olsen prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) { .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) } diff --git a/man/gaussian_transform_separate.Rd b/man/gaussian_transform_separate.Rd index eef1e0c6a..89afb6494 100644 --- a/man/gaussian_transform_separate.Rd +++ b/man/gaussian_transform_separate.Rd @@ -10,7 +10,7 @@ gaussian_transform_separate(yx, n_y) \item{yx}{Numeric vector. The first \code{n_y} items is the data that is transformed, and last part is the data with the original transformation.} -\item{n_y}{Positive integer. Number of elements of \code{yx} that belongs to the gaussian data.} +\item{n_y}{Positive integer. Number of elements of \code{yx} that belongs to the Gaussian data.} } \value{ Vector of back-transformed Gaussian data diff --git a/man/inv_gaussian_transform.Rd b/man/inv_gaussian_transform.Rd deleted file mode 100644 index 76f058772..000000000 --- a/man/inv_gaussian_transform.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{inv_gaussian_transform} -\alias{inv_gaussian_transform} -\title{Transforms new data to a standardized normal distribution} -\usage{ -inv_gaussian_transform(zx, n_z) -} -\arguments{ -\item{zx}{Numeric vector. The first \code{n_z} items are the Gaussian data, and the last part is -the data with the original transformation.} - -\item{n_z}{Positive integer. Number of elements of \code{zx} that belongs to new data.} -} -\value{ -Numeric vector of length \code{n_z} -} -\description{ -Transforms new data to a standardized normal distribution -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/inv_gaussian_transform_R.Rd b/man/inv_gaussian_transform_R.Rd new file mode 100644 index 000000000..5b79ffe32 --- /dev/null +++ b/man/inv_gaussian_transform_R.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/approach_copula.R +\name{inv_gaussian_transform_R} +\alias{inv_gaussian_transform_R} +\title{Transform data using the inverse Gaussian transformation.} +\usage{ +inv_gaussian_transform_R(z, x) +} +\arguments{ +\item{z}{Matrix. The data are the Gaussian Monte Carlos samples to transform.} + +\item{x}{Matrix. The data with the original transformation. Used to conduct the transformation of \code{z}.} +} +\value{ +Matrix of the same dimension as \code{z}. +} +\description{ +Transform data using the inverse Gaussian transformation. +} +\details{ +This function is called from \code{prepare_data_copula_cpp()} as the this was faster +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/inv_gaussian_transform_cpp.Rd b/man/inv_gaussian_transform_cpp.Rd new file mode 100644 index 000000000..e3a2bd87d --- /dev/null +++ b/man/inv_gaussian_transform_cpp.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{inv_gaussian_transform_cpp} +\alias{inv_gaussian_transform_cpp} +\title{Transforms new data to a standardized normal distribution} +\usage{ +inv_gaussian_transform_cpp(z, x) +} +\arguments{ +\item{z}{arma::mat. The data are the Gaussian Monte Carlos samples to transform.} + +\item{x}{arma::mat. The data with the original transformation. Used to conduct the transformation of \code{z}.} +} +\value{ +arma::mat of the same dimension as \code{z} +} +\description{ +Transforms new data to a standardized normal distribution +} +\details{ +The function uses \code{arma::quantile(...)} which corresponds to R's \code{stats::quantile(..., type = 5)}. +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 097fef9b8..6f7353a93 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -6,6 +6,7 @@ \alias{prepare_data} \alias{prepare_data.categorical} \alias{prepare_data.copula} +\alias{prepare_data.copula_cpp_and_R} \alias{prepare_data.ctree} \alias{prepare_data.empirical} \alias{prepare_data.gaussian} @@ -17,13 +18,15 @@ prepare_data(internal, index_features = NULL, ...) \method{prepare_data}{categorical}(internal, index_features = NULL, ...) -\method{prepare_data}{copula}(internal, index_features = NULL, ...) +\method{prepare_data}{copula}(internal, index_features, ...) + +\method{prepare_data}{copula_cpp_and_R}(internal, index_features, ...) \method{prepare_data}{ctree}(internal, index_features = NULL, ...) \method{prepare_data}{empirical}(internal, index_features = NULL, ...) -\method{prepare_data}{gaussian}(internal, index_features = NULL, ...) +\method{prepare_data}{gaussian}(internal, index_features, ...) \method{prepare_data}{independence}(internal, index_features = NULL, ...) @@ -44,4 +47,7 @@ the contribution function by Monte Carlo integration. \description{ Generate data used for predictions and Monte Carlo integration } +\author{ +Lars Henry Berge Olsen +} \keyword{internal} diff --git a/man/prepare_data_copula_cpp.Rd b/man/prepare_data_copula_cpp.Rd new file mode 100644 index 000000000..ca901031d --- /dev/null +++ b/man/prepare_data_copula_cpp.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{prepare_data_copula_cpp} +\alias{prepare_data_copula_cpp} +\title{Generate (Gaussian) Copula MC samples} +\usage{ +prepare_data_copula_cpp( + MC_samples_mat, + x_explain_mat, + x_explain_gaussian_mat, + x_train_mat, + S, + mu, + cov_mat +) +} +\arguments{ +\item{MC_samples_mat}{arma::mat. Matrix of dimension (\code{n_samples}, \code{n_features}) containing samples from the +univariate standard normal.} + +\item{x_explain_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the observations +to explain on the original scale.} + +\item{x_explain_gaussian_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the +observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +transformed to a standardized normal distribution.} + +\item{x_train_mat}{arma::mat. Matrix of dimension (\code{n_train}, \code{n_features}) containing the training observations.} + +\item{S}{arma::mat. Matrix of dimension (\code{n_combinations}, \code{n_features}) containing binary representations of +the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +This is not a problem internally in shapr as the empty and grand coalitions treated differently.} + +\item{mu}{arma::vec. Vector of length \code{n_features} containing the mean of each feature after being transformed +using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution.} + +\item{cov_mat}{arma::mat. Matrix of dimension (\code{n_features}, \code{n_features}) containing the pairwise covariance +between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +transformed to a standardized normal distribution.} +} +\value{ +An arma::cube/3D array of dimension (\code{n_samples}, \code{n_explain} * \code{n_coalitions}, \code{n_features}), where +the columns (\emph{,j,}) are matrices of dimension (\code{n_samples}, \code{n_features}) containing the conditional Gaussian +copula MC samples for each explicand and coalition on the original scale. +} +\description{ +Generate (Gaussian) Copula MC samples +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/prepare_data_copula_cpp_and_R.Rd b/man/prepare_data_copula_cpp_and_R.Rd new file mode 100644 index 000000000..65fe941df --- /dev/null +++ b/man/prepare_data_copula_cpp_and_R.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{prepare_data_copula_cpp_and_R} +\alias{prepare_data_copula_cpp_and_R} +\title{Generate (Gaussian) Copula MC samples} +\usage{ +prepare_data_copula_cpp_and_R( + MC_samples_mat, + x_explain_mat, + x_explain_gaussian_mat, + x_train_mat, + S, + mu, + cov_mat +) +} +\arguments{ +\item{MC_samples_mat}{arma::mat. Matrix of dimension (\code{n_samples}, \code{n_features}) containing samples from the +univariate standard normal.} + +\item{x_explain_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the observations +to explain on the original scale.} + +\item{x_explain_gaussian_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the +observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +transformed to a standardized normal distribution.} + +\item{x_train_mat}{arma::mat. Matrix of dimension (\code{n_train}, \code{n_features}) containing the training observations.} + +\item{S}{arma::mat. Matrix of dimension (\code{n_combinations}, \code{n_features}) containing binary representations of +the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +This is not a problem internally in shapr as the empty and grand coalitions treated differently.} + +\item{mu}{arma::vec. Vector of length \code{n_features} containing the mean of each feature after being transformed +using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution.} + +\item{cov_mat}{arma::mat. Matrix of dimension (\code{n_features}, \code{n_features}) containing the pairwise covariance +between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +transformed to a standardized normal distribution.} +} +\value{ +An arma::cube/3D array of dimension (\code{n_samples}, \code{n_explain} * \code{n_coalitions}, \code{n_features}), where +the columns (\emph{,j,}) are matrices of dimension (\code{n_samples}, \code{n_features}) containing the conditional Gaussian +copula MC samples for each explicand and coalition on the original scale. +} +\description{ +Generate (Gaussian) Copula MC samples +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/prepare_data_gaussian_cpp.Rd b/man/prepare_data_gaussian_cpp.Rd new file mode 100644 index 000000000..b24b431e6 --- /dev/null +++ b/man/prepare_data_gaussian_cpp.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{prepare_data_gaussian_cpp} +\alias{prepare_data_gaussian_cpp} +\title{Generate Gaussian MC samples} +\usage{ +prepare_data_gaussian_cpp(MC_samples_mat, x_explain_mat, S, mu, cov_mat) +} +\arguments{ +\item{MC_samples_mat}{arma::mat. Matrix of dimension (\code{n_samples}, \code{n_features}) containing samples from the +univariate standard normal.} + +\item{x_explain_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the observations +to explain.} + +\item{S}{arma::mat. Matrix of dimension (\code{n_combinations}, \code{n_features}) containing binary representations of +the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +This is not a problem internally in shapr as the empty and grand coalitions treated differently.} + +\item{mu}{arma::vec. Vector of length \code{n_features} containing the mean of each feature.} + +\item{cov_mat}{arma::mat. Matrix of dimension (\code{n_features}, \code{n_features}) containing the pairwise covariance +between all pairs of features.} +} +\value{ +An arma::cube/3D array of dimension (\code{n_samples}, \code{n_explain} * \code{n_coalitions}, \code{n_features}), where +the columns (\emph{,j,}) are matrices of dimension (\code{n_samples}, \code{n_features}) containing the conditional Gaussian +MC samples for each explicand and coalition. +} +\description{ +Generate Gaussian MC samples +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/quantile.type7.Rd b/man/quantile.type7.Rd new file mode 100644 index 000000000..bbf2cbbf0 --- /dev/null +++ b/man/quantile.type7.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/approach_copula.R +\name{quantile.type7} +\alias{quantile.type7} +\title{Compute the quantiles using quantile type seven} +\usage{ +\method{quantile}{type7}(x, probs) +} +\arguments{ +\item{x}{numeric vector whose sample quantiles are wanted.} + +\item{probs}{numeric vector of probabilities with values between zero and one.} +} +\value{ +A vector of length length(\code{probs}) is returned. +} +\description{ +Compute the quantiles using quantile type seven +} +\details{ +Using quantile type number 7 from stats::quantile. +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/sample_copula.Rd b/man/sample_copula.Rd deleted file mode 100644 index c180f25ca..000000000 --- a/man/sample_copula.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{sample_copula} -\alias{sample_copula} -\title{Sample conditional variables using the Gaussian copula approach} -\usage{ -sample_copula( - index_given, - n_samples, - mu, - cov_mat, - m, - x_explain_gaussian, - x_train, - x_explain -) -} -\arguments{ -\item{index_given}{Integer vector. The indices of the features to condition upon. Note that -\code{min(index_given) >= 1} and \code{max(index_given) <= m}.} - -\item{m}{Positive integer. The total number of features.} - -\item{x_explain_gaussian}{Numeric matrix. Contains the observation whose predictions ought -to be explained (test data), -after quantile-transforming them to standard Gaussian variables.} - -\item{x_explain}{Numeric matrix. Contains the features of the observation whose -predictions ought to be explained (test data).} -} -\value{ -data.table -} -\description{ -Sample conditional variables using the Gaussian copula approach -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/sample_gaussian.Rd b/man/sample_gaussian.Rd deleted file mode 100644 index f91312e85..000000000 --- a/man/sample_gaussian.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_gaussian.R -\name{sample_gaussian} -\alias{sample_gaussian} -\title{Sample conditional Gaussian variables} -\usage{ -sample_gaussian(index_given, n_samples, mu, cov_mat, m, x_explain) -} -\arguments{ -\item{index_given}{Integer vector. The indices of the features to condition upon. Note that -\code{min(index_given) >= 1} and \code{max(index_given) <= m}.} - -\item{m}{Positive integer. The total number of features.} - -\item{x_explain}{Numeric matrix. Contains the features of the observation whose -predictions ought to be explained (test data).} -} -\value{ -data.table -} -\description{ -Sample conditional Gaussian variables -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/setup_approach.Rd b/man/setup_approach.Rd index 86cb164bb..a87fbd106 100644 --- a/man/setup_approach.Rd +++ b/man/setup_approach.Rd @@ -153,3 +153,6 @@ This is useful if the underlying time series are scaled between 0 and 1, for exa The different choices of \code{approach} takes different (optional) parameters, which are forwarded from \code{\link[=explain]{explain()}}. } +\author{ +Martin Jullum +} diff --git a/src/Copula.cpp b/src/Copula.cpp index 248add593..ac6cc7861 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -1,5 +1,4 @@ #include -#include // [[Rcpp::depends(RcppArmadillo)]] //' Transforms new data to a standardized normal distribution @@ -26,109 +25,109 @@ arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { } //' Generate (Gaussian) Copula MC samples - //' - //' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the - //' univariate standard normal. - //' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations - //' to explain on the original scale. - //' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the - //' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been - //' transformed to a standardized normal distribution. - //' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. - //' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of - //' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. - //' This is not a problem internally in shapr as the empty and grand coalitions treated differently. - //' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed - //' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. - //' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance - //' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been - //' transformed to a standardized normal distribution. - //' - //' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where - //' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian - //' copula MC samples for each explicand and coalition on the original scale. - //' - //' @export - //' @keywords internal - //' @author Lars Henry Berge Olsen - // [[Rcpp::export]] - arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat x_explain_gaussian_mat, - arma::mat x_train_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - int n_coalitions = S.n_rows; - - // Initialize auxiliary matrix and result cube - arma::mat aux_mat(n_samples, n_features); - arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on, both on the original scale and the Gaussian transformed values. - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); - - // Extract the mean values of the Gaussian transformed features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the Gaussian transformed covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features - arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); - x_Sbar_gaussian_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_{Sbar|S}) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different explicands and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - - // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand - arma::mat MC_samples_mat_now_now = - MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); - - // Transform the MC to the original scale using the inverse Gaussian transform - arma::mat MC_samples_mat_now_now_trans = - inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); - - // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix - aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; - aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); - - // Insert the auxiliary matrix into the result cube - result_cube.col(S_ind*n_explain + idx_now) = aux_mat; - } - } - - return result_cube; - } +//' +//' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +//' univariate standard normal. +//' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +//' to explain on the original scale. +//' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +//' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +//' transformed to a standardized normal distribution. +//' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +//' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +//' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +//' This is not a problem internally in shapr as the empty and grand coalitions treated differently. +//' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +//' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +//' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +//' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +//' transformed to a standardized normal distribution. +//' +//' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +//' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +//' copula MC samples for each explicand and coalition on the original scale. +//' +//' @export +//' @keywords internal +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} //' Generate (Gaussian) Copula MC samples //' diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 844f327d1..1cbb9505b 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -1,6 +1,8 @@ #include using namespace Rcpp; +// [[Rcpp::depends(RcppArmadillo)]] + //' Generate Gaussian MC samples //' //' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the From cbb06edb390692f6213a39f77153af63d0f89e63 Mon Sep 17 00:00:00 2001 From: LHBO Date: Tue, 9 Jan 2024 23:03:20 +0000 Subject: [PATCH 45/62] Updated versions with working and efficient C++ code --- R/approach_copula.R | 154 +----- inst/scripts/compare_copula_in_R_and_C++.R | 594 +++++++++++++++++++-- src/Copula.cpp | 205 ++++--- src/Gaussian.cpp | 10 +- 4 files changed, 664 insertions(+), 299 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index d81777434..9f00c3fbd 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -4,8 +4,9 @@ #' @author Martin Jullum setup_approach.copula <- function(internal, ...) { parameters <- internal$parameters - x_train <- internal$data$x_train - x_explain <- internal$data$x_explain + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + feature_names <- internal$parameters$feature_names # Checking if factor features are present feature_specs <- internal$objects$feature_specs @@ -22,20 +23,13 @@ setup_approach.copula <- function(internal, ...) { } # Prepare transformed data - parameters$copula.mu <- rep(0, ncol(x_train)) - x_train0 <- apply( - X = x_train, - MARGIN = 2, - FUN = gaussian_transform - ) - parameters$copula.cov_mat <- get_cov_mat(x_train0) + parameters$copula.mu <- rep(0, ncol(x_train_mat)) + x_train_mat0 = gaussian_transform_cpp(as.matrix(x_train_mat)) + colnames(x_train_mat0) = feature_names + parameters$copula.cov_mat <- get_cov_mat(x_train_mat0) - x_explain_gaussian <- apply( - X = rbind(x_explain, x_train), - MARGIN = 2, - FUN = gaussian_transform_separate, - n_y = nrow(x_explain) - ) + x_explain_gaussian = gaussian_transform_separate_cpp(x_explain_mat, x_train_mat) + colnames(x_explain_gaussian) = feature_names if (is.null(dim(x_explain_gaussian))) { x_explain_gaussian <- t(as.matrix(x_explain_gaussian)) @@ -99,133 +93,3 @@ prepare_data.copula <- function(internal, index_features, ...) { return(dt) } - -#' @inheritParams default_doc -#' @rdname prepare_data -#' @export -#' @author Lars Henry Berge Olsen -prepare_data.copula_cpp_and_R <- function(internal, index_features, ...) { - # Extract used variables - S <- internal$objects$S[index_features, , drop = FALSE] - feature_names <- internal$parameters$feature_names - n_explain <- internal$parameters$n_explain - n_samples <- internal$parameters$n_samples - n_features <- internal$parameters$n_features - n_combinations_now <- length(index_features) - x_train_mat <- as.matrix(internal$data$x_train) - x_explain_mat <- as.matrix(internal$data$x_explain) - copula.mu <- internal$parameters$copula.mu - copula.cov_mat <- internal$parameters$copula.cov_mat - copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) - - # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix - # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include - # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! - - # Generate the MC samples from N(0, 1) - MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) - - # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, - # and then transforming them back to the original scale using the inverse Gaussian transform in C++. - # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). - dt <- prepare_data_copula_cpp_and_R( - MC_samples_mat = MC_samples_mat, - x_explain_mat = x_explain_mat, - x_explain_gaussian_mat = copula.x_explain_gaussian_mat, - x_train_mat = x_train_mat, - S = S, - mu = copula.mu, - cov_mat = copula.cov_mat - ) - - # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). - dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) - - # Convert to a data.table and add extra identification columns - dt <- data.table::as.data.table(dt) - data.table::setnames(dt, feature_names) - dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] - dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] - dt[, w := 1 / n_samples] - dt[, id_combination := index_features[id_combination]] - data.table::setcolorder(dt, c("id_combination", "id", feature_names)) - - return(dt) -} - -#' Transform data using the inverse Gaussian transformation. -#' -#' @details This function is called from `prepare_data_copula_cpp()` as the this was faster -#' -#' @param z Matrix. The data are the Gaussian Monte Carlos samples to transform. -#' @param x Matrix. The data with the original transformation. Used to conduct the transformation of `z`. -#' -#' @return Matrix of the same dimension as `z`. -#' -#' @keywords internal -#' @author Lars Henry Berge Olsen -inv_gaussian_transform_R <- function(z, x) { - u <- stats::pnorm(z) - x_new <- sapply(seq_len(ncol(u)), function(idx) quantile.type7(x[, idx], probs = u[, idx])) - return(x_new) -} - -#' Compute the quantiles using quantile type seven -#' -#' @details Using quantile type number 7 from stats::quantile. -#' -#' @param x numeric vector whose sample quantiles are wanted. -#' @param probs numeric vector of probabilities with values between zero and one. -#' -#' @return A vector of length length(`probs`) is returned. -#' -#' @keywords internal -#' @author Lars Henry Berge Olsen -quantile.type7 <- function(x, probs) { - n <- length(x) - probs <- pmax(0, pmin(1, probs)) # allow for slight overshoot - index <- 1 + (n - 1) * probs - lo <- floor(index) - hi <- ceiling(index) - x <- sort(x, partial = unique(c(lo, hi))) - qs <- x[lo] - i <- which(index > lo) - h <- (index - lo)[i] - qs[i] <- (1 - h) * qs[i] + h * x[hi[i]] - return(qs) -} - -#' Transforms new data to standardized normal (dimension 1) based on other data transformations -#' -#' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last -#' part is the data with the original transformation. -#' @param n_y Positive integer. Number of elements of `yx` that belongs to the Gaussian data. -#' -#' @return Vector of back-transformed Gaussian data -#' -#' @keywords internal -#' @author Martin Jullum -gaussian_transform_separate <- function(yx, n_y) { - if (n_y >= length(yx)) stop("n_y should be less than length(yx)") - ind <- 1:n_y - x <- yx[-ind] - tmp <- rank(yx)[ind] - tmp <- tmp - rank(tmp) + 0.5 - u_y <- tmp / (length(x) + 1) - z_y <- stats::qnorm(u_y) - return(z_y) -} - -#' Transforms a sample to standardized normal distribution -#' -#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. -#' -#' @return Numeric vector of length `length(x)` -#' -#' @keywords internal -#' @author Martin Jullum -gaussian_transform <- function(x) { - u <- rank(x) / (length(x) + 1) - z <- stats::qnorm(u) - return(z) -} diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index 5d9fa0384..d9751e071 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -5,6 +5,7 @@ library(data.table) devtools::load_all(".") # Old R code ------------------------------------------------------------------------------------------------------ +## R Old version --------------------------------------------------------------------------------------------------- #' @inheritParams default_doc #' @rdname prepare_data #' @export @@ -122,6 +123,451 @@ inv_gaussian_transform_old <- function(zx, n_z, type) { return(as.double(x_new)) } +#' Transforms a sample to standardized normal distribution +#' +#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. +#' +#' @return Numeric vector of length `length(x)` +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform_old <- function(x) { + u <- rank(x) / (length(x) + 1) + z <- stats::qnorm(u) + return(z) +} + +#' Transforms new data to standardized normal (dimension 1) based on other data transformations +#' +#' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last +#' part is the data with the original transformation. +#' @param n_y Positive integer. Number of elements of `yx` that belongs to the Gaussian data. +#' +#' @return Vector of back-transformed Gaussian data +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform_separate_old <- function(yx, n_y) { + if (n_y >= length(yx)) stop("n_y should be less than length(yx)") + ind <- 1:n_y + x <- yx[-ind] + tmp <- rank(yx)[ind] + tmp <- tmp - rank(tmp) + 0.5 + u_y <- tmp / (length(x) + 1) + z_y <- stats::qnorm(u_y) + return(z_y) +} + + +## C++ arma version ------------------------------------------------------------------------------------------------- +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +#' @author Lars Henry Berge Olsen +prepare_data.copula_cpp_arma <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix + # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include + # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp_arma( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) + + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + return(dt) +} + + + + + +## C++ and R version ---------------------------------------------------------------------------------------------- +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +#' @author Lars Henry Berge Olsen +prepare_data.copula_cpp_and_R <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix + # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include + # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp_and_R( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) + + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + return(dt) +} + +#' Transform data using the inverse Gaussian transformation. +#' +#' @details This function is called from `prepare_data_copula_cpp()` as the this was faster +#' +#' @param z Matrix. The data are the Gaussian Monte Carlos samples to transform. +#' @param x Matrix. The data with the original transformation. Used to conduct the transformation of `z`. +#' +#' @return Matrix of the same dimension as `z`. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +inv_gaussian_transform_R <- function(z, x) { + u <- stats::pnorm(z) + x_new <- sapply(seq_len(ncol(u)), function(idx) quantile.type7(x[, idx], probs = u[, idx])) + return(x_new) +} + +#' Compute the quantiles using quantile type seven +#' +#' @details Using quantile type number 7 from stats::quantile. +#' +#' @param x numeric vector whose sample quantiles are wanted. +#' @param probs numeric vector of probabilities with values between zero and one. +#' +#' @return A vector of length length(`probs`) is returned. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +quantile.type7 <- function(x, probs) { + n <- length(x) + probs <- pmax(0, pmin(1, probs)) # allow for slight overshoot + index <- 1 + (n - 1) * probs + lo <- floor(index) + hi <- ceiling(index) + x <- sort(x, partial = unique(c(lo, hi))) + qs <- x[lo] + i <- which(index > lo) + h <- (index - lo)[i] + qs[i] <- (1 - h) * qs[i] + h * x[hi[i]] + return(qs) +} + + +# Old C++ code ---------------------------------------------------------------------------------------------------- +sourceCpp( + code = ' +// [[Rcpp::depends("RcppArmadillo")]] +#include +using namespace Rcpp; + +// Transforms new data to a standardized normal distribution +// +// @details The function uses `arma::quantile(...)` which corresponds to Rs `stats::quantile(..., type = 5)`. +// +// @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +// @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +// +// @return arma::mat of the same dimension as `z` +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::mat inv_gaussian_transform_cpp_arma(arma::mat z, arma::mat x) { + int n_features = z.n_cols; + int n_samples = z.n_rows; + arma::mat z_new(n_samples, n_features); + arma::mat u = arma::normcdf(z); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + z_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); + } + return z_new; +} + +// Generate (Gaussian) Copula MC samples +// +// @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +// univariate standard normal. +// @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +// to explain on the original scale. +// @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +// observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +// @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +// the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +// This is not a problem internally in shapr as the empty and grand coalitions treated differently. +// @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +// using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +// @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +// between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// +// @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +// the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +// copula MC samples for each explicand and coalition on the original scale. +// +// @export +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_copula_cpp_arma(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp_arma(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} + +// Generate (Gaussian) Copula MC samples +// +// @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +// univariate standard normal. +// @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +// to explain on the original scale. +// @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +// observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +// @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +// the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +// This is not a problem internally in shapr as the empty and grand coalitions treated differently. +// @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +// using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +// @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +// between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// +// @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +// the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +// copula MC samples for each explicand and coalition on the original scale. +// +// @export +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, + arma::mat x_explain_mat, + arma::mat x_explain_gaussian_mat, + arma::mat x_train_mat, + arma::mat S, + arma::vec mu, + arma::mat cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Get the R functions for computing the inverse gaussian transform + Rcpp::Function inv_gaussian_transform_R("inv_gaussian_transform_R"); + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + arma::mat x_train_mat_now = x_train_mat.cols(Sbar_now_idx); + //arma::mat x_train_mat_now = arma::normcdf(x_train_mat.cols(Sbar_now_idx)); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + Rcpp::as(inv_gaussian_transform_R(Rcpp::wrap(MC_samples_mat_now_now), + Rcpp::wrap(x_train_mat_now))); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; +} +') + + + + + + + @@ -216,9 +662,7 @@ inv_gaussian_transform_old <- function(zx, n_z, type) { internal <- setup_computation(internal, model, predict_model) } -# Compare --------------------------------------------------------------------------------------------------------- -set.seed(321) -set.seed(2024) +# Compare prepare_data.copula ---------------------------------------------------------------------------------------- set.seed(123) # Recall that old version iterate over the observations and then the coalitions. @@ -237,7 +681,7 @@ time_only_R <- system.time({ }) time_only_R -# The new C++ code with quantile from arma +# The C++ code with my own quantile function time_only_cpp <- system.time({ res_only_cpp <- prepare_data.copula( internal = internal, @@ -247,6 +691,16 @@ time_only_cpp <- system.time({ data.table::setorderv(res_only_cpp, c("id", "id_combination")) time_only_cpp +# The C++ code with quantile functions from arma +time_only_cpp_arma <- system.time({ + res_only_cpp_arma <- prepare_data.copula_cpp_arma( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp_arma, c("id", "id_combination")) +time_only_cpp_arma + # The new C++ code with quantile from R time_cpp_and_R <- system.time({ res_cpp_and_R <- prepare_data.copula_cpp_and_R( @@ -261,30 +715,19 @@ time_cpp_and_R times <- rbind( time_only_R, time_only_cpp, + time_only_cpp_arma, time_cpp_and_R ) times -# TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 -# user.self sys.self elapsed user.child sys.child -# time_only_R 64.263 2.130 68.793 0 0 -# time_only_cpp 112.403 1.018 117.429 0 0 -# time_cpp_and_R 7.020 1.370 8.854 0 0 - -# user.self sys.self elapsed user.child sys.child -# time_only_R 67.230 1.822 70.997 9.807 0.431 -# time_only_cpp 113.848 1.056 118.330 0.000 0.000 -# time_cpp_and_R 7.444 1.512 9.065 0.000 0.000 +# TIMES for all coalitions (254), n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 -# user.self sys.self elapsed user.child sys.child -# time_only_R 81.699 3.180 90.926 0.000 0.000 -# time_only_cpp 115.815 1.547 122.401 0.021 0.068 -# time_cpp_and_R 7.976 1.750 10.539 1.491 0.403 +# user.self sys.self elapsed user.child sys.child +# time_only_R 67.050 2.587 72.357 0.011 0.018 +# time_only_cpp 4.588 0.406 5.218 0.000 0.000 +# time_only_cpp_arma 23.853 0.663 25.391 0.000 0.000 +# time_cpp_and_R 7.430 1.346 9.086 0.000 0.000 -# user.self sys.self elapsed user.child sys.child -# time_only_R 63.600 2.009 67.670 0 0 -# time_only_cpp 113.768 1.032 118.364 0 0 -# time_cpp_and_R 6.824 1.260 8.188 0 0 # Relative speedup of new method times_relative <- t(sapply(seq_len(nrow(times)), function(idx) times[1, ] / times[idx, ])) @@ -292,42 +735,113 @@ rownames(times_relative) <- paste0(rownames(times), "_rel") times_relative # RELATIVE TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 -# user.self sys.self elapsed user.child sys.child -# time_only_R_rel 1.00000 1.0000 1.00000 1 1 -# time_only_cpp_rel 0.59052 1.7254 0.59999 Inf Inf -# time_cpp_and_R_rel 9.03143 1.2050 7.83199 Inf Inf - -# user.self sys.self elapsed user.child sys.child -# time_only_R_rel 1.00000 1.0000 1.00000 NaN NaN -# time_only_cpp_rel 0.70543 2.0556 0.74285 0 0 -# time_cpp_and_R_rel 10.24310 1.8171 8.62757 0 0 - -# user.self sys.self elapsed user.child sys.child -# time_only_R_rel 1.00000 1.0000 1.00000 NaN NaN -# time_only_cpp_rel 0.55903 1.9467 0.57171 NaN NaN -# time_cpp_and_R_rel 9.32005 1.5944 8.26453 NaN NaN -# user.self sys.self elapsed user.child sys.child -# time_only_R_rel 1.00000 1.0000 1.00000 NaN NaN -# time_only_cpp_rel 0.57172 2.0923 0.58583 NaN NaN -# time_cpp_and_R_rel 9.15427 1.5547 7.76971 NaN NaN +# user.self sys.self elapsed user.child sys.child +# time_only_R_rel 1.0000 1.0000 1.0000 1 1 +# time_only_cpp_rel 14.6142 6.3719 13.8668 Inf Inf +# time_only_cpp_arma_rel 2.8110 3.9020 2.8497 Inf Inf +# time_cpp_and_R_rel 9.0242 1.9220 7.9636 Inf Inf # Aggregate the MC sample values for each explicand and combination res_only_R <- res_only_R[, w := NULL] res_only_cpp <- res_only_cpp[, w := NULL] +res_only_cpp_arma <- res_only_cpp_arma[, w := NULL] res_cpp_and_R <- res_cpp_and_R[, w := NULL] res_only_R_agr <- res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] res_only_cpp_agr <- res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_arma_agr <- res_only_cpp_arma[, lapply(.SD, mean), by = c("id", "id_combination")] res_cpp_and_R_agr <- res_cpp_and_R[, lapply(.SD, mean), by = c("id", "id_combination")] # Difference res_only_R_agr - res_only_cpp_agr +res_only_R_agr - res_only_cpp_arma_agr res_only_R_agr - res_cpp_and_R_agr # Max absolute difference max(abs(res_only_R_agr - res_only_cpp_agr)) +max(abs(res_only_R_agr - res_only_cpp_arma_agr)) max(abs(res_only_R_agr - res_cpp_and_R_agr)) # Max absolute relative difference max(abs(res_only_R_agr - res_only_cpp_agr) / res_only_cpp_agr) +max(abs(res_only_R_agr - res_only_cpp_arma_agr) / res_only_cpp_arma_agr) max(abs(res_only_R_agr - res_cpp_and_R_agr) / res_cpp_and_R_agr) + +# Compare gaussian_transform -------------------------------------------------------------------------------------- +set.seed(123) +x_temp_rows = 10000 +x_temp_cols = 10 +x_temp = matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols) + +# Compare for equal values +gaussian_transform_R = apply(X = x_temp, MARGIN = 2, FUN = gaussian_transform_old) +gaussian_transform_cpp = gaussian_transform_cpp(x_temp) +all.equal(gaussian_transform_R, gaussian_transform_cpp) # TRUE + +# Compare time (generate new data each time such that the result is not stored in the cache) +set.seed(1234) +gc() +#Rcpp::sourceCpp("src/Copula.cpp") # C++ code is faster when I recompile it? I don't understand. +rbenchmark::benchmark(R = apply(X = matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols), + MARGIN = 2, + FUN = gaussian_transform_old), + cpp = gaussian_transform_cpp(matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols)), + replications = 100) +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 100 1.933 1.000 1.764 0.149 0 0 +# 1 R 100 3.152 1.631 2.498 0.511 0 0 + + + +# Compare gaussian_transform_separate ------------------------------------------------------------------------- +set.seed(123) +x_cols = 8 +x_train_rows = 1000 +x_explain_rows = 1000 +x_train_temp = matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols) +x_explain_temp = matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols) +x_explain_train_temp = rbind(x_explain_temp, x_train_temp) + +system.time({r = apply(X = rbind(x_explain_temp, x_train_temp), + MARGIN = 2, + FUN = gaussian_transform_separate_old, + n_y = nrow(x_explain_temp))}) +system.time({cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)}) +all.equal(r, cpp) + +# gc() +# Rcpp::sourceCpp("src/Copula.cpp") # C++ code is faster when I recompile it? I don't understand. + +rbenchmark::benchmark(r = apply(X = rbind(x_explain_temp, x_train_temp), + MARGIN = 2, + FUN = gaussian_transform_separate_old, + n_y = nrow(x_explain_temp)), + cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)) +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 100 0.238 1.000 0.228 0.006 0 0 +# 1 r 100 0.502 2.109 0.432 0.058 0 0 + +rbenchmark::benchmark(r = apply(X = rbind(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + MARGIN = 2, + FUN = gaussian_transform_separate_old, + n_y = nrow(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols))), + cpp = gaussian_transform_separate_cpp(matrix(rnorm(x_explain_rows*x_cols), + x_explain_rows, + x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + cpp_shapr = shapr:::gaussian_transform_separate_cpp(matrix(rnorm(x_explain_rows*x_cols), + x_explain_rows, + x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + cpp2 = .Call(`_shapr_gaussian_transform_separate_cpp`, + matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols))) + +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 100 0.361 1.000 0.322 0.025 0 0 +# 1 r 100 0.553 1.532 0.496 0.047 0 0 + +# Call `Rcpp::sourceCpp("src/Copula.cpp")` and then run rbenchmark again and then cpp is much faster. +# C++ code is faster when I recompile it? I don't understand. +# Rcpp::sourceCpp("src/Copula.cpp") diff --git a/src/Copula.cpp b/src/Copula.cpp index ac6cc7861..9188cf4b7 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -1,6 +1,47 @@ #include +using namespace Rcpp; + // [[Rcpp::depends(RcppArmadillo)]] +//' Compute the quantiles using quantile type seven +//' +//' @details Using quantile type number seven from stats::quantile in R. +//' +//' @param x arma::vec. Numeric vector whose sample quantiles are wanted. +//' @param probs arma::vec. Numeric vector of probabilities with values between zero and one. +//' +//' @return A vector of length `length(probs)` with the quantiles is returned. +//' +//' @keywords internal +//' @author Lars Henry Berge Olsen +// [[Rcpp::export]] +arma::vec quantile_type7_cpp(const arma::vec& x, const arma::vec& probs) { + int n = x.n_elem; + int m = probs.n_elem; + + // Initialize output quantile vector + arma::vec qs(m); + + // Calculate indices + arma::vec index = 1 + (n - 1) * probs; + arma::vec lo = arma::floor(index); + arma::vec hi = arma::ceil(index); + + // Sort the data + arma::vec sorted_x = arma::sort(x); + + // Calculate quantiles using quantile type seven + for (int i = 0; i < m; ++i) { + qs(i) = sorted_x(lo(i) - 1); + if (index(i) > lo(i)) { + double h = index(i) - lo(i); + qs(i) = (1 - h) * qs(i) + h * sorted_x(hi(i) - 1); + } + } + + return qs; +} + //' Transforms new data to a standardized normal distribution //' //' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. @@ -13,13 +54,13 @@ //' @keywords internal //' @author Lars Henry Berge Olsen // [[Rcpp::export]] -arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { +arma::mat inv_gaussian_transform_cpp(const arma::mat& z, const arma::mat& x) { int n_features = z.n_cols; int n_samples = z.n_rows; arma::mat z_new(n_samples, n_features); arma::mat u = arma::normcdf(z); for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { - z_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx)); + z_new.col(feature_idx) = quantile_type7_cpp(x.col(feature_idx), u.col(feature_idx)); } return z_new; } @@ -51,13 +92,13 @@ arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) { //' @keywords internal //' @author Lars Henry Berge Olsen // [[Rcpp::export]] -arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat x_explain_gaussian_mat, - arma::mat x_train_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { +arma::cube prepare_data_copula_cpp(const arma::mat& MC_samples_mat, + const arma::mat& x_explain_mat, + const arma::mat& x_explain_gaussian_mat, + const arma::mat& x_train_mat, + const arma::mat& S, + const arma::vec& mu, + const arma::mat& cov_mat) { int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; @@ -129,114 +170,60 @@ arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, return result_cube; } -//' Generate (Gaussian) Copula MC samples +//' Transforms a sample to standardized normal distribution //' -//' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the -//' univariate standard normal. -//' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations -//' to explain on the original scale. -//' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the -//' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been -//' transformed to a standardized normal distribution. -//' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. -//' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of -//' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. -//' This is not a problem internally in shapr as the empty and grand coalitions treated differently. -//' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed -//' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. -//' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance -//' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been -//' transformed to a standardized normal distribution. +//' @param x Numeric matrix. The data which should be transformed to a standard normal distribution. //' -//' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where -//' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian -//' copula MC samples for each explicand and coalition on the original scale. +//' @return Numeric matrix of dimension `dim(x)` //' -//' @export //' @keywords internal //' @author Lars Henry Berge Olsen // [[Rcpp::export]] -arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat x_explain_gaussian_mat, - arma::mat x_train_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { - - int n_explain = x_explain_mat.n_rows; - int n_samples = MC_samples_mat.n_rows; - int n_features = MC_samples_mat.n_cols; - int n_coalitions = S.n_rows; - - // Get the R functions for computing the inverse gaussian transform - Rcpp::Function inv_gaussian_transform_R("inv_gaussian_transform_R"); - - // Initialize auxiliary matrix and result cube - arma::mat aux_mat(n_samples, n_features); - arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); - - // Iterate over the coalitions - for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { - - // Get current coalition S and the indices of the features in coalition S and mask Sbar - arma::mat S_now = S.row(S_ind); - arma::uvec S_now_idx = arma::find(S_now > 0.5); - arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); - - // Extract the features we condition on, both on the original scale and the Gaussian transformed values. - arma::mat x_S_star = x_explain_mat.cols(S_now_idx); - arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); - - // Extract the mean values of the Gaussian transformed features in the two sets - arma::vec mu_S = mu.elem(S_now_idx); - arma::vec mu_Sbar = mu.elem(Sbar_now_idx); - - // Extract the relevant parts of the Gaussian transformed covariance matrix - arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); - arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); - arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); - arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); - - // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix - arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); - arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; - - // Ensure that the conditional covariance matrix is symmetric - if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { - cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); - } - - // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features - arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); - x_Sbar_gaussian_mean.each_col() += mu_Sbar; - - // Transform the samples to be from N(O, Sigma_{Sbar|S}) - arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); - - // Loop over the different explicands and combine the generated values with the values we conditioned on - for (int idx_now = 0; idx_now < n_explain; idx_now++) { - - // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand - arma::mat MC_samples_mat_now_now = - MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); - - arma::mat x_train_mat_now = x_train_mat.cols(Sbar_now_idx); - //arma::mat x_train_mat_now = arma::normcdf(x_train_mat.cols(Sbar_now_idx)); - - // Transform the MC to the original scale using the inverse Gaussian transform - arma::mat MC_samples_mat_now_now_trans = - Rcpp::as(inv_gaussian_transform_R(Rcpp::wrap(MC_samples_mat_now_now), - Rcpp::wrap(x_train_mat_now))); +Rcpp::NumericMatrix gaussian_transform_cpp(const arma::mat& x) { + int n_obs = x.n_rows; + int n_features = x.n_cols; + + // Pre allocate the return matrix + Rcpp::NumericMatrix x_trans(n_obs, n_features); + + // Iterate over the columns, i.e., the features + for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { + // Compute the rank and transform to standardized normal distribution + arma::vec rank_now = arma::conv_to::from(arma::sort_index(arma::sort_index(x.col(idx_feature)))); + Rcpp::NumericVector u = Rcpp::wrap((rank_now + 1) / (n_obs + 1)); + x_trans(Rcpp::_, idx_feature) = Rcpp::qnorm(u); + } - // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix - aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; - aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + return x_trans; +} - // Insert the auxiliary matrix into the result cube - result_cube.col(S_ind*n_explain + idx_now) = aux_mat; - } +//' Transforms new data to standardized normal (column-wise) based on other data transformations +//' +//' @param y arma::mat. A numeric matrix containing the data that is to be transformed. +//' @param x arma::mat. A numeric matrix containing the data of the original transformation. +//' +//' @return An arma::mat matrix of the same dimension as `y` containing the back-transformed Gaussian data. +//' +//' @keywords internal +//' @author Lars Henry Berge Olsen, Martin Jullum +// [[Rcpp::export]] +Rcpp::NumericMatrix gaussian_transform_separate_cpp(const arma::mat& y, const arma::mat& x) { + int n_features = x.n_cols; + int n_y_rows = y.n_rows; + int n_x_rows = x.n_rows; + + // Pre allocate the return matrix + Rcpp::NumericMatrix z_y(n_y_rows, n_features); + + // Compute the transformation for each feature at the time + for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { + arma::vec yx_now = arma::join_cols(y.col(idx_feature), x.col(idx_feature)); + arma::vec rank_now_1 = arma::conv_to::from(arma::sort_index(arma::sort_index(yx_now))).head(n_y_rows); + arma::vec rank_now_2 = arma::conv_to::from(arma::sort_index(arma::sort_index(rank_now_1))); + arma::vec tmp = rank_now_1 - rank_now_2 + 0.5; + Rcpp::NumericVector u_y = Rcpp::wrap(tmp / (n_x_rows + 1)); + z_y(Rcpp::_, idx_feature) = Rcpp::qnorm(u_y); } - return result_cube; + return z_y; } diff --git a/src/Gaussian.cpp b/src/Gaussian.cpp index 1cbb9505b..c375ed510 100644 --- a/src/Gaussian.cpp +++ b/src/Gaussian.cpp @@ -24,11 +24,11 @@ using namespace Rcpp; //' @keywords internal //' @author Lars Henry Berge Olsen // [[Rcpp::export]] -arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, - arma::mat x_explain_mat, - arma::mat S, - arma::vec mu, - arma::mat cov_mat) { +arma::cube prepare_data_gaussian_cpp(const arma::mat& MC_samples_mat, + const arma::mat& x_explain_mat, + const arma::mat& S, + const arma::vec& mu, + const arma::mat& cov_mat) { int n_explain = x_explain_mat.n_rows; int n_samples = MC_samples_mat.n_rows; From a1e1767efddf928ac7064156866d6e4fa8a8754b Mon Sep 17 00:00:00 2001 From: LHBO Date: Tue, 9 Jan 2024 23:05:55 +0000 Subject: [PATCH 46/62] Updated manuals and NAMESPACE --- NAMESPACE | 2 - R/RcppExports.R | 72 +++++++++----- man/gaussian_transform.Rd | 21 ----- man/gaussian_transform_cpp.Rd | 21 +++++ man/gaussian_transform_separate.Rd | 24 ----- man/gaussian_transform_separate_cpp.Rd | 23 +++++ man/inv_gaussian_transform_R.Rd | 26 ----- man/prepare_data.Rd | 3 - man/prepare_data_copula_cpp_and_R.Rd | 52 ---------- man/quantile.type7.Rd | 26 ----- man/quantile_type7_cpp.Rd | 26 +++++ src/RcppExports.cpp | 126 +++++++++++++++++++------ 12 files changed, 216 insertions(+), 206 deletions(-) delete mode 100644 man/gaussian_transform.Rd create mode 100644 man/gaussian_transform_cpp.Rd delete mode 100644 man/gaussian_transform_separate.Rd create mode 100644 man/gaussian_transform_separate_cpp.Rd delete mode 100644 man/inv_gaussian_transform_R.Rd delete mode 100644 man/prepare_data_copula_cpp_and_R.Rd delete mode 100644 man/quantile.type7.Rd create mode 100644 man/quantile_type7_cpp.Rd diff --git a/NAMESPACE b/NAMESPACE index 7249210f5..b71c5d437 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,7 +29,6 @@ S3method(predict_model,ranger) S3method(predict_model,xgb.Booster) S3method(prepare_data,categorical) S3method(prepare_data,copula) -S3method(prepare_data,copula_cpp_and_R) S3method(prepare_data,ctree) S3method(prepare_data,empirical) S3method(prepare_data,gaussian) @@ -64,7 +63,6 @@ export(observation_impute_cpp) export(predict_model) export(prepare_data) export(prepare_data_copula_cpp) -export(prepare_data_copula_cpp_and_R) export(prepare_data_gaussian_cpp) export(rss_cpp) export(setup) diff --git a/R/RcppExports.R b/R/RcppExports.R index 419346d6d..e8e64c143 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -80,6 +80,21 @@ aicc_full_cpp <- function(h, X_list, mcov_list, S_scale_dist, y_list, negative) .Call(`_shapr_aicc_full_cpp`, h, X_list, mcov_list, S_scale_dist, y_list, negative) } +#' Compute the quantiles using quantile type seven +#' +#' @details Using quantile type number seven from stats::quantile in R. +#' +#' @param x arma::vec. Numeric vector whose sample quantiles are wanted. +#' @param probs arma::vec. Numeric vector of probabilities with values between zero and one. +#' +#' @return A vector of length `length(probs)` with the quantiles is returned. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen +quantile_type7_cpp <- function(x, probs) { + .Call(`_shapr_quantile_type7_cpp`, x, probs) +} + #' Transforms new data to a standardized normal distribution #' #' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. @@ -125,34 +140,29 @@ prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gau .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) } -#' Generate (Gaussian) Copula MC samples +#' Transforms a sample to standardized normal distribution #' -#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the -#' univariate standard normal. -#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations -#' to explain on the original scale. -#' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the -#' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been -#' transformed to a standardized normal distribution. -#' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. -#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of -#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. -#' This is not a problem internally in shapr as the empty and grand coalitions treated differently. -#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed -#' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. -#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance -#' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been -#' transformed to a standardized normal distribution. +#' @param x Numeric matrix. The data which should be transformed to a standard normal distribution. #' -#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where -#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian -#' copula MC samples for each explicand and coalition on the original scale. +#' @return Numeric matrix of dimension `dim(x)` #' -#' @export #' @keywords internal #' @author Lars Henry Berge Olsen -prepare_data_copula_cpp_and_R <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) { - .Call(`_shapr_prepare_data_copula_cpp_and_R`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) +gaussian_transform_cpp <- function(x) { + .Call(`_shapr_gaussian_transform_cpp`, x) +} + +#' Transforms new data to standardized normal (column-wise) based on other data transformations +#' +#' @param y arma::mat. A numeric matrix containing the data that is to be transformed. +#' @param x arma::mat. A numeric matrix containing the data of the original transformation. +#' +#' @return An arma::mat matrix of the same dimension as `y` containing the back-transformed Gaussian data. +#' +#' @keywords internal +#' @author Lars Henry Berge Olsen, Martin Jullum +gaussian_transform_separate_cpp <- function(y, x) { + .Call(`_shapr_gaussian_transform_separate_cpp`, y, x) } #' Generate Gaussian MC samples @@ -243,6 +253,22 @@ observation_impute_cpp <- function(index_xtrain, index_s, xtrain, xtest, S) { .Call(`_shapr_observation_impute_cpp`, index_xtrain, index_s, xtrain, xtest, S) } +gaussian_transform_cpp_d <- function(x) { + .Call(`_shapr_gaussian_transform_cpp_d`, x) +} + +gaussian_transform_cpp_arma <- function(x) { + .Call(`_shapr_gaussian_transform_cpp_arma`, x) +} + +gaussian_transform_mat_cpp_arma <- function(x) { + .Call(`_shapr_gaussian_transform_mat_cpp_arma`, x) +} + +gaussian_transform_mat_cpp_arma2 <- function(x) { + .Call(`_shapr_gaussian_transform_mat_cpp_arma2`, x) +} + #' Calculate weight matrix #' #' @param subsets List. Each of the elements equals an integer diff --git a/man/gaussian_transform.Rd b/man/gaussian_transform.Rd deleted file mode 100644 index c0af625c4..000000000 --- a/man/gaussian_transform.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{gaussian_transform} -\alias{gaussian_transform} -\title{Transforms a sample to standardized normal distribution} -\usage{ -gaussian_transform(x) -} -\arguments{ -\item{x}{Numeric vector.The data which should be transformed to a standard normal distribution.} -} -\value{ -Numeric vector of length \code{length(x)} -} -\description{ -Transforms a sample to standardized normal distribution -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/gaussian_transform_cpp.Rd b/man/gaussian_transform_cpp.Rd new file mode 100644 index 000000000..97d8039af --- /dev/null +++ b/man/gaussian_transform_cpp.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{gaussian_transform_cpp} +\alias{gaussian_transform_cpp} +\title{Transforms a sample to standardized normal distribution} +\usage{ +gaussian_transform_cpp(x) +} +\arguments{ +\item{x}{Numeric matrix. The data which should be transformed to a standard normal distribution.} +} +\value{ +Numeric matrix of dimension \code{dim(x)} +} +\description{ +Transforms a sample to standardized normal distribution +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/man/gaussian_transform_separate.Rd b/man/gaussian_transform_separate.Rd deleted file mode 100644 index 89afb6494..000000000 --- a/man/gaussian_transform_separate.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{gaussian_transform_separate} -\alias{gaussian_transform_separate} -\title{Transforms new data to standardized normal (dimension 1) based on other data transformations} -\usage{ -gaussian_transform_separate(yx, n_y) -} -\arguments{ -\item{yx}{Numeric vector. The first \code{n_y} items is the data that is transformed, and last -part is the data with the original transformation.} - -\item{n_y}{Positive integer. Number of elements of \code{yx} that belongs to the Gaussian data.} -} -\value{ -Vector of back-transformed Gaussian data -} -\description{ -Transforms new data to standardized normal (dimension 1) based on other data transformations -} -\author{ -Martin Jullum -} -\keyword{internal} diff --git a/man/gaussian_transform_separate_cpp.Rd b/man/gaussian_transform_separate_cpp.Rd new file mode 100644 index 000000000..04be9d37c --- /dev/null +++ b/man/gaussian_transform_separate_cpp.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{gaussian_transform_separate_cpp} +\alias{gaussian_transform_separate_cpp} +\title{Transforms new data to standardized normal (column-wise) based on other data transformations} +\usage{ +gaussian_transform_separate_cpp(y, x) +} +\arguments{ +\item{y}{arma::mat. A numeric matrix containing the data that is to be transformed.} + +\item{x}{arma::mat. A numeric matrix containing the data of the original transformation.} +} +\value{ +An arma::mat matrix of the same dimension as \code{y} containing the back-transformed Gaussian data. +} +\description{ +Transforms new data to standardized normal (column-wise) based on other data transformations +} +\author{ +Lars Henry Berge Olsen, Martin Jullum +} +\keyword{internal} diff --git a/man/inv_gaussian_transform_R.Rd b/man/inv_gaussian_transform_R.Rd deleted file mode 100644 index 5b79ffe32..000000000 --- a/man/inv_gaussian_transform_R.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{inv_gaussian_transform_R} -\alias{inv_gaussian_transform_R} -\title{Transform data using the inverse Gaussian transformation.} -\usage{ -inv_gaussian_transform_R(z, x) -} -\arguments{ -\item{z}{Matrix. The data are the Gaussian Monte Carlos samples to transform.} - -\item{x}{Matrix. The data with the original transformation. Used to conduct the transformation of \code{z}.} -} -\value{ -Matrix of the same dimension as \code{z}. -} -\description{ -Transform data using the inverse Gaussian transformation. -} -\details{ -This function is called from \code{prepare_data_copula_cpp()} as the this was faster -} -\author{ -Lars Henry Berge Olsen -} -\keyword{internal} diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 6f7353a93..23e57b18d 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -6,7 +6,6 @@ \alias{prepare_data} \alias{prepare_data.categorical} \alias{prepare_data.copula} -\alias{prepare_data.copula_cpp_and_R} \alias{prepare_data.ctree} \alias{prepare_data.empirical} \alias{prepare_data.gaussian} @@ -20,8 +19,6 @@ prepare_data(internal, index_features = NULL, ...) \method{prepare_data}{copula}(internal, index_features, ...) -\method{prepare_data}{copula_cpp_and_R}(internal, index_features, ...) - \method{prepare_data}{ctree}(internal, index_features = NULL, ...) \method{prepare_data}{empirical}(internal, index_features = NULL, ...) diff --git a/man/prepare_data_copula_cpp_and_R.Rd b/man/prepare_data_copula_cpp_and_R.Rd deleted file mode 100644 index 65fe941df..000000000 --- a/man/prepare_data_copula_cpp_and_R.Rd +++ /dev/null @@ -1,52 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RcppExports.R -\name{prepare_data_copula_cpp_and_R} -\alias{prepare_data_copula_cpp_and_R} -\title{Generate (Gaussian) Copula MC samples} -\usage{ -prepare_data_copula_cpp_and_R( - MC_samples_mat, - x_explain_mat, - x_explain_gaussian_mat, - x_train_mat, - S, - mu, - cov_mat -) -} -\arguments{ -\item{MC_samples_mat}{arma::mat. Matrix of dimension (\code{n_samples}, \code{n_features}) containing samples from the -univariate standard normal.} - -\item{x_explain_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the observations -to explain on the original scale.} - -\item{x_explain_gaussian_mat}{arma::mat. Matrix of dimension (\code{n_explain}, \code{n_features}) containing the -observations to explain after being transformed using the Gaussian transform, i.e., the samples have been -transformed to a standardized normal distribution.} - -\item{x_train_mat}{arma::mat. Matrix of dimension (\code{n_train}, \code{n_features}) containing the training observations.} - -\item{S}{arma::mat. Matrix of dimension (\code{n_combinations}, \code{n_features}) containing binary representations of -the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. -This is not a problem internally in shapr as the empty and grand coalitions treated differently.} - -\item{mu}{arma::vec. Vector of length \code{n_features} containing the mean of each feature after being transformed -using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution.} - -\item{cov_mat}{arma::mat. Matrix of dimension (\code{n_features}, \code{n_features}) containing the pairwise covariance -between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been -transformed to a standardized normal distribution.} -} -\value{ -An arma::cube/3D array of dimension (\code{n_samples}, \code{n_explain} * \code{n_coalitions}, \code{n_features}), where -the columns (\emph{,j,}) are matrices of dimension (\code{n_samples}, \code{n_features}) containing the conditional Gaussian -copula MC samples for each explicand and coalition on the original scale. -} -\description{ -Generate (Gaussian) Copula MC samples -} -\author{ -Lars Henry Berge Olsen -} -\keyword{internal} diff --git a/man/quantile.type7.Rd b/man/quantile.type7.Rd deleted file mode 100644 index bbf2cbbf0..000000000 --- a/man/quantile.type7.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/approach_copula.R -\name{quantile.type7} -\alias{quantile.type7} -\title{Compute the quantiles using quantile type seven} -\usage{ -\method{quantile}{type7}(x, probs) -} -\arguments{ -\item{x}{numeric vector whose sample quantiles are wanted.} - -\item{probs}{numeric vector of probabilities with values between zero and one.} -} -\value{ -A vector of length length(\code{probs}) is returned. -} -\description{ -Compute the quantiles using quantile type seven -} -\details{ -Using quantile type number 7 from stats::quantile. -} -\author{ -Lars Henry Berge Olsen -} -\keyword{internal} diff --git a/man/quantile_type7_cpp.Rd b/man/quantile_type7_cpp.Rd new file mode 100644 index 000000000..69e3100a9 --- /dev/null +++ b/man/quantile_type7_cpp.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{quantile_type7_cpp} +\alias{quantile_type7_cpp} +\title{Compute the quantiles using quantile type seven} +\usage{ +quantile_type7_cpp(x, probs) +} +\arguments{ +\item{x}{arma::vec. Numeric vector whose sample quantiles are wanted.} + +\item{probs}{arma::vec. Numeric vector of probabilities with values between zero and one.} +} +\value{ +A vector of length \code{length(probs)} with the quantiles is returned. +} +\description{ +Compute the quantiles using quantile type seven +} +\details{ +Using quantile type number seven from stats::quantile in R. +} +\author{ +Lars Henry Berge Olsen +} +\keyword{internal} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 681f3dbe9..a5301bbd0 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -80,63 +80,81 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// quantile_type7_cpp +arma::vec quantile_type7_cpp(const arma::vec& x, const arma::vec& probs); +RcppExport SEXP _shapr_quantile_type7_cpp(SEXP xSEXP, SEXP probsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type probs(probsSEXP); + rcpp_result_gen = Rcpp::wrap(quantile_type7_cpp(x, probs)); + return rcpp_result_gen; +END_RCPP +} // inv_gaussian_transform_cpp -arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x); +arma::mat inv_gaussian_transform_cpp(const arma::mat& z, const arma::mat& x); RcppExport SEXP _shapr_inv_gaussian_transform_cpp(SEXP zSEXP, SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type z(zSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x(xSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type z(zSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(inv_gaussian_transform_cpp(z, x)); return rcpp_result_gen; END_RCPP } // prepare_data_copula_cpp -arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat x_explain_gaussian_mat, arma::mat x_train_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +arma::cube prepare_data_copula_cpp(const arma::mat& MC_samples_mat, const arma::mat& x_explain_mat, const arma::mat& x_explain_gaussian_mat, const arma::mat& x_train_mat, const arma::mat& S, const arma::vec& mu, const arma::mat& cov_mat); RcppExport SEXP _shapr_prepare_data_copula_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP x_explain_gaussian_matSEXP, SEXP x_train_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_gaussian_mat(x_explain_gaussian_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_train_mat(x_train_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_explain_gaussian_mat(x_explain_gaussian_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_train_mat(x_train_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type mu(muSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type cov_mat(cov_matSEXP); rcpp_result_gen = Rcpp::wrap(prepare_data_copula_cpp(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat)); return rcpp_result_gen; END_RCPP } -// prepare_data_copula_cpp_and_R -arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat x_explain_gaussian_mat, arma::mat x_train_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); -RcppExport SEXP _shapr_prepare_data_copula_cpp_and_R(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP x_explain_gaussian_matSEXP, SEXP x_train_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { +// gaussian_transform_cpp +Rcpp::NumericMatrix gaussian_transform_cpp(const arma::mat& x); +RcppExport SEXP _shapr_gaussian_transform_cpp(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(gaussian_transform_cpp(x)); + return rcpp_result_gen; +END_RCPP +} +// gaussian_transform_separate_cpp +Rcpp::NumericMatrix gaussian_transform_separate_cpp(const arma::mat& y, const arma::mat& x); +RcppExport SEXP _shapr_gaussian_transform_separate_cpp(SEXP ySEXP, SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_gaussian_mat(x_explain_gaussian_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_train_mat(x_train_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); - rcpp_result_gen = Rcpp::wrap(prepare_data_copula_cpp_and_R(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat)); + Rcpp::traits::input_parameter< const arma::mat& >::type y(ySEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(gaussian_transform_separate_cpp(y, x)); return rcpp_result_gen; END_RCPP } // prepare_data_gaussian_cpp -arma::cube prepare_data_gaussian_cpp(arma::mat MC_samples_mat, arma::mat x_explain_mat, arma::mat S, arma::vec mu, arma::mat cov_mat); +arma::cube prepare_data_gaussian_cpp(const arma::mat& MC_samples_mat, const arma::mat& x_explain_mat, const arma::mat& S, const arma::vec& mu, const arma::mat& cov_mat); RcppExport SEXP _shapr_prepare_data_gaussian_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< arma::mat >::type MC_samples_mat(MC_samples_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type x_explain_mat(x_explain_matSEXP); - Rcpp::traits::input_parameter< arma::mat >::type S(SSEXP); - Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); - Rcpp::traits::input_parameter< arma::mat >::type cov_mat(cov_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type MC_samples_mat(MC_samples_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type x_explain_mat(x_explain_matSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type S(SSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type mu(muSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type cov_mat(cov_matSEXP); rcpp_result_gen = Rcpp::wrap(prepare_data_gaussian_cpp(MC_samples_mat, x_explain_mat, S, mu, cov_mat)); return rcpp_result_gen; END_RCPP @@ -183,6 +201,50 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// gaussian_transform_cpp_d +Rcpp::NumericVector gaussian_transform_cpp_d(const Rcpp::NumericVector& x); +RcppExport SEXP _shapr_gaussian_transform_cpp_d(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(gaussian_transform_cpp_d(x)); + return rcpp_result_gen; +END_RCPP +} +// gaussian_transform_cpp_arma +Rcpp::NumericVector gaussian_transform_cpp_arma(const arma::vec& x); +RcppExport SEXP _shapr_gaussian_transform_cpp_arma(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(gaussian_transform_cpp_arma(x)); + return rcpp_result_gen; +END_RCPP +} +// gaussian_transform_mat_cpp_arma +Rcpp::NumericMatrix gaussian_transform_mat_cpp_arma(const arma::mat& x); +RcppExport SEXP _shapr_gaussian_transform_mat_cpp_arma(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(gaussian_transform_mat_cpp_arma(x)); + return rcpp_result_gen; +END_RCPP +} +// gaussian_transform_mat_cpp_arma2 +Rcpp::NumericMatrix gaussian_transform_mat_cpp_arma2(const Rcpp::NumericMatrix& x); +RcppExport SEXP _shapr_gaussian_transform_mat_cpp_arma2(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::NumericMatrix& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(gaussian_transform_mat_cpp_arma2(x)); + return rcpp_result_gen; +END_RCPP +} // weight_matrix_cpp arma::mat weight_matrix_cpp(List subsets, int m, int n, NumericVector w); RcppExport SEXP _shapr_weight_matrix_cpp(SEXP subsetsSEXP, SEXP mSEXP, SEXP nSEXP, SEXP wSEXP) { @@ -216,13 +278,19 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_correction_matrix_cpp", (DL_FUNC) &_shapr_correction_matrix_cpp, 2}, {"_shapr_aicc_full_single_cpp", (DL_FUNC) &_shapr_aicc_full_single_cpp, 5}, {"_shapr_aicc_full_cpp", (DL_FUNC) &_shapr_aicc_full_cpp, 6}, + {"_shapr_quantile_type7_cpp", (DL_FUNC) &_shapr_quantile_type7_cpp, 2}, {"_shapr_inv_gaussian_transform_cpp", (DL_FUNC) &_shapr_inv_gaussian_transform_cpp, 2}, {"_shapr_prepare_data_copula_cpp", (DL_FUNC) &_shapr_prepare_data_copula_cpp, 7}, - {"_shapr_prepare_data_copula_cpp_and_R", (DL_FUNC) &_shapr_prepare_data_copula_cpp_and_R, 7}, + {"_shapr_gaussian_transform_cpp", (DL_FUNC) &_shapr_gaussian_transform_cpp, 1}, + {"_shapr_gaussian_transform_separate_cpp", (DL_FUNC) &_shapr_gaussian_transform_separate_cpp, 2}, {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, {"_shapr_observation_impute_cpp", (DL_FUNC) &_shapr_observation_impute_cpp, 5}, + {"_shapr_gaussian_transform_cpp_d", (DL_FUNC) &_shapr_gaussian_transform_cpp_d, 1}, + {"_shapr_gaussian_transform_cpp_arma", (DL_FUNC) &_shapr_gaussian_transform_cpp_arma, 1}, + {"_shapr_gaussian_transform_mat_cpp_arma", (DL_FUNC) &_shapr_gaussian_transform_mat_cpp_arma, 1}, + {"_shapr_gaussian_transform_mat_cpp_arma2", (DL_FUNC) &_shapr_gaussian_transform_mat_cpp_arma2, 1}, {"_shapr_weight_matrix_cpp", (DL_FUNC) &_shapr_weight_matrix_cpp, 4}, {"_shapr_feature_matrix_cpp", (DL_FUNC) &_shapr_feature_matrix_cpp, 2}, {NULL, NULL, 0} From f2aa827fa30b2e8ce02e672ae94cb17a34a05926 Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 11 Jan 2024 10:44:01 +0000 Subject: [PATCH 47/62] Added comparison of shapr and rcpp compile --- R/RcppExports.R | 16 --- inst/scripts/compare_copula_in_R_and_C++.R | 126 +++++++++++++++++++-- src/Copula.cpp | 23 ++++ src/RcppExports.cpp | 48 -------- 4 files changed, 139 insertions(+), 74 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index e8e64c143..d7a8a5e15 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -253,22 +253,6 @@ observation_impute_cpp <- function(index_xtrain, index_s, xtrain, xtest, S) { .Call(`_shapr_observation_impute_cpp`, index_xtrain, index_s, xtrain, xtest, S) } -gaussian_transform_cpp_d <- function(x) { - .Call(`_shapr_gaussian_transform_cpp_d`, x) -} - -gaussian_transform_cpp_arma <- function(x) { - .Call(`_shapr_gaussian_transform_cpp_arma`, x) -} - -gaussian_transform_mat_cpp_arma <- function(x) { - .Call(`_shapr_gaussian_transform_mat_cpp_arma`, x) -} - -gaussian_transform_mat_cpp_arma2 <- function(x) { - .Call(`_shapr_gaussian_transform_mat_cpp_arma2`, x) -} - #' Calculate weight matrix #' #' @param subsets List. Each of the elements equals an integer diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index d9751e071..a0c26bdde 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -629,7 +629,7 @@ arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, timing <- TRUE n_combinations <- NULL group <- NULL - feature_specs <- get_feature_specs(get_model_specs, model) + feature_specs <- shapr:::get_feature_specs(get_model_specs, model) n_batches <- 1 seed <- 1 @@ -651,7 +651,7 @@ arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, ) # Gets predict_model (if not passed to explain) - predict_model <- get_predict_model( + predict_model <- shapr:::get_predict_model( predict_model = predict_model, model = model ) @@ -659,9 +659,93 @@ arma::cube prepare_data_copula_cpp_and_R(arma::mat MC_samples_mat, # Sets up the Shapley (sampling) framework and prepares the # conditional expectation computation for the chosen approach # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters - internal <- setup_computation(internal, model, predict_model) + internal <- shapr:::setup_computation(internal, model, predict_model) } + +# Compare shapr compile and rcpp compile -------------------------------------------------------------------------- +look_at_coalitions <- seq(1, 2^M - 2) +index_features = internal$objects$S_batch$`1`[look_at_coalitions] +S <- internal$objects$S[index_features, , drop = FALSE] +feature_names <- internal$parameters$feature_names +n_explain <- internal$parameters$n_explain +n_samples <- internal$parameters$n_samples +n_features <- internal$parameters$n_features +n_combinations_now <- length(index_features) +x_train_mat <- as.matrix(internal$data$x_train) +x_explain_mat <- as.matrix(internal$data$x_explain) +copula.mu <- internal$parameters$copula.mu +copula.cov_mat <- internal$parameters$copula.cov_mat +copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + +# Generate the MC samples from N(0, 1) +MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + +# Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, +# and then transforming them back to the original scale using the inverse Gaussian transform in C++. +# The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). +time_1 = system.time({dt_1 <- prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) +time_1 + +time_2 = system.time({dt_2 <- shapr:::prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) +time_2 + +time_2andhalf = system.time({dt_2andhalf <- + .Call(`_shapr_prepare_data_copula_cpp`, + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + )}) +time_2andhalf + +# Rcpp::compileAttributes(pkgdir = ".", verbose = TRUE) +Rcpp::sourceCpp("src/Copula.cpp") +time_3 = system.time({dt_3 <- prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) +time_3 + +time_4 = system.time({dt_4 <- shapr::prepare_data_copula_cpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat +)}) + +rbind(time_1, time_2, time_3, time_4) +all.equal(dt_1, dt_2) +all.equal(dt_2, dt_3) +all.equal(dt_3, dt_4) + # Compare prepare_data.copula ---------------------------------------------------------------------------------------- set.seed(123) @@ -769,13 +853,14 @@ max(abs(res_only_R_agr - res_cpp_and_R_agr) / res_cpp_and_R_agr) # Compare gaussian_transform -------------------------------------------------------------------------------------- set.seed(123) -x_temp_rows = 10000 -x_temp_cols = 10 +x_temp_rows = 100000 +x_temp_cols = 20 x_temp = matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols) # Compare for equal values -gaussian_transform_R = apply(X = x_temp, MARGIN = 2, FUN = gaussian_transform_old) -gaussian_transform_cpp = gaussian_transform_cpp(x_temp) +system.time({gaussian_transform_R = apply(X = x_temp, MARGIN = 2, FUN = gaussian_transform_old)}) +system.time({gaussian_transform_cpp = gaussian_transform_cpp(x_temp)}) +system.time({gaussian_transform_cpp = shapr:::gaussian_transform_cpp(x_temp)}) all.equal(gaussian_transform_R, gaussian_transform_cpp) # TRUE # Compare time (generate new data each time such that the result is not stored in the cache) @@ -795,9 +880,9 @@ rbenchmark::benchmark(R = apply(X = matrix(rnorm(x_temp_rows*x_temp_cols), x_tem # Compare gaussian_transform_separate ------------------------------------------------------------------------- set.seed(123) -x_cols = 8 -x_train_rows = 1000 -x_explain_rows = 1000 +x_cols = 10 +x_train_rows = 50000 +x_explain_rows = 50000 x_train_temp = matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols) x_explain_temp = matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols) x_explain_train_temp = rbind(x_explain_temp, x_train_temp) @@ -807,6 +892,7 @@ system.time({r = apply(X = rbind(x_explain_temp, x_train_temp), FUN = gaussian_transform_separate_old, n_y = nrow(x_explain_temp))}) system.time({cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)}) +system.time({cpp_shapr = shapr:::gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)}) all.equal(r, cpp) # gc() @@ -821,6 +907,8 @@ rbenchmark::benchmark(r = apply(X = rbind(x_explain_temp, x_train_temp), # 2 cpp 100 0.238 1.000 0.228 0.006 0 0 # 1 r 100 0.502 2.109 0.432 0.058 0 0 +Sys.setenv("PKG_CXXFLAGS"="-O0") +Rcpp::sourceCpp("src/Copula.cpp") rbenchmark::benchmark(r = apply(X = rbind(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), MARGIN = 2, @@ -830,10 +918,18 @@ rbenchmark::benchmark(r = apply(X = rbind(matrix(rnorm(x_explain_rows*x_cols), x x_explain_rows, x_cols), matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + cpp_dir = gaussian_transform_separate_cpp2(matrix(rnorm(x_explain_rows*x_cols), + x_explain_rows, + x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), cpp_shapr = shapr:::gaussian_transform_separate_cpp(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + cpp_shapr_dir = shapr:::gaussian_transform_separate_cpp2(matrix(rnorm(x_explain_rows*x_cols), + x_explain_rows, + x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), cpp2 = .Call(`_shapr_gaussian_transform_separate_cpp`, matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols))) @@ -842,6 +938,16 @@ rbenchmark::benchmark(r = apply(X = rbind(matrix(rnorm(x_explain_rows*x_cols), x # 2 cpp 100 0.361 1.000 0.322 0.025 0 0 # 1 r 100 0.553 1.532 0.496 0.047 0 0 + +# x_cols = 5, x_train_rows = 10000, x_explain_rows = 10000 +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 100 2.526 1.000 2.342 0.145 0 0 +# 3 cpp_dir 100 2.574 1.019 2.359 0.136 0 0 +# 4 cpp_shapr 100 5.314 2.104 4.955 0.173 0 0 +# 5 cpp_shapr_dir 100 5.274 2.088 5.020 0.169 0 0 +# 6 cpp2 100 5.448 2.157 5.112 0.169 0 0 +# 1 r 100 4.791 1.897 3.926 0.756 0 0 + # Call `Rcpp::sourceCpp("src/Copula.cpp")` and then run rbenchmark again and then cpp is much faster. # C++ code is faster when I recompile it? I don't understand. # Rcpp::sourceCpp("src/Copula.cpp") diff --git a/src/Copula.cpp b/src/Copula.cpp index 9188cf4b7..a3f37f186 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -227,3 +227,26 @@ Rcpp::NumericMatrix gaussian_transform_separate_cpp(const arma::mat& y, const ar return z_y; } + +// // [[Rcpp::export]] +// Rcpp::NumericMatrix gaussian_transform_separate_cpp6(arma::mat y, arma::mat x) { +// int n_features = x.n_cols; +// int n_y_rows = y.n_rows; +// int n_x_rows = x.n_rows; +// +// // Pre allocate the return matrix +// Rcpp::NumericMatrix z_y(n_y_rows, n_features); +// +// // Compute the transformation for each feature at the time +// for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { +// arma::vec yx_now = arma::join_cols(y.col(idx_feature), x.col(idx_feature)); +// arma::vec rank_now_1 = arma::conv_to::from(arma::sort_index(arma::sort_index(yx_now))).head(n_y_rows); +// arma::vec rank_now_2 = arma::conv_to::from(arma::sort_index(arma::sort_index(rank_now_1))); +// arma::vec tmp = rank_now_1 - rank_now_2 + 0.5; +// Rcpp::NumericVector u_y = Rcpp::wrap(tmp / (n_x_rows + 1)); +// z_y(Rcpp::_, idx_feature) = Rcpp::qnorm(u_y); +// } +// +// return z_y; +// } + diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a5301bbd0..e0393f34f 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -201,50 +201,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// gaussian_transform_cpp_d -Rcpp::NumericVector gaussian_transform_cpp_d(const Rcpp::NumericVector& x); -RcppExport SEXP _shapr_gaussian_transform_cpp_d(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(gaussian_transform_cpp_d(x)); - return rcpp_result_gen; -END_RCPP -} -// gaussian_transform_cpp_arma -Rcpp::NumericVector gaussian_transform_cpp_arma(const arma::vec& x); -RcppExport SEXP _shapr_gaussian_transform_cpp_arma(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(gaussian_transform_cpp_arma(x)); - return rcpp_result_gen; -END_RCPP -} -// gaussian_transform_mat_cpp_arma -Rcpp::NumericMatrix gaussian_transform_mat_cpp_arma(const arma::mat& x); -RcppExport SEXP _shapr_gaussian_transform_mat_cpp_arma(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(gaussian_transform_mat_cpp_arma(x)); - return rcpp_result_gen; -END_RCPP -} -// gaussian_transform_mat_cpp_arma2 -Rcpp::NumericMatrix gaussian_transform_mat_cpp_arma2(const Rcpp::NumericMatrix& x); -RcppExport SEXP _shapr_gaussian_transform_mat_cpp_arma2(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const Rcpp::NumericMatrix& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(gaussian_transform_mat_cpp_arma2(x)); - return rcpp_result_gen; -END_RCPP -} // weight_matrix_cpp arma::mat weight_matrix_cpp(List subsets, int m, int n, NumericVector w); RcppExport SEXP _shapr_weight_matrix_cpp(SEXP subsetsSEXP, SEXP mSEXP, SEXP nSEXP, SEXP wSEXP) { @@ -287,10 +243,6 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, {"_shapr_observation_impute_cpp", (DL_FUNC) &_shapr_observation_impute_cpp, 5}, - {"_shapr_gaussian_transform_cpp_d", (DL_FUNC) &_shapr_gaussian_transform_cpp_d, 1}, - {"_shapr_gaussian_transform_cpp_arma", (DL_FUNC) &_shapr_gaussian_transform_cpp_arma, 1}, - {"_shapr_gaussian_transform_mat_cpp_arma", (DL_FUNC) &_shapr_gaussian_transform_mat_cpp_arma, 1}, - {"_shapr_gaussian_transform_mat_cpp_arma2", (DL_FUNC) &_shapr_gaussian_transform_mat_cpp_arma2, 1}, {"_shapr_weight_matrix_cpp", (DL_FUNC) &_shapr_weight_matrix_cpp, 4}, {"_shapr_feature_matrix_cpp", (DL_FUNC) &_shapr_feature_matrix_cpp, 2}, {NULL, NULL, 0} From e0a784beb8dee2d3b125bbc45dd28510e60d4d87 Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 11 Jan 2024 10:53:11 +0000 Subject: [PATCH 48/62] Added to ignore .DS_Store (mac file created when looking at folder elements as tiles) --- .gitignore | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index e6b4b3974..619cbb459 100644 --- a/.gitignore +++ b/.gitignore @@ -5,32 +5,22 @@ Rprof.out src/*.o src/*.so - *.o - *.so - tmp/ - \.RDataTmp - *.out - *.Rout - src-i386/ - src-x64/ - paper_experiments/res/single_res/ - paper_experiments/res/single_res_old/ inst/doc - docs/* doc Meta docs /doc/ /Meta/ -.idea \ No newline at end of file +.idea +.DS_Store From 2613e6e54dd20dea66d7a5344fb2da5be0409d64 Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 11 Jan 2024 13:55:50 +0000 Subject: [PATCH 49/62] Fixed TODO tasks in approach_copula.R --- R/approach_copula.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 9f00c3fbd..76c337a37 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -24,19 +24,17 @@ setup_approach.copula <- function(internal, ...) { # Prepare transformed data parameters$copula.mu <- rep(0, ncol(x_train_mat)) - x_train_mat0 = gaussian_transform_cpp(as.matrix(x_train_mat)) + x_train_mat0 = gaussian_transform_cpp(x_train_mat) colnames(x_train_mat0) = feature_names parameters$copula.cov_mat <- get_cov_mat(x_train_mat0) x_explain_gaussian = gaussian_transform_separate_cpp(x_explain_mat, x_train_mat) colnames(x_explain_gaussian) = feature_names + if (is.null(dim(x_explain_gaussian))) x_explain_gaussian <- t(as.matrix(x_explain_gaussian)) - if (is.null(dim(x_explain_gaussian))) { - x_explain_gaussian <- t(as.matrix(x_explain_gaussian)) - } - # TODO: Change to this a data.table for consistency (not speed/memory) - internal$data$copula.x_explain_gaussian <- x_explain_gaussian + # Add objects to internal list internal$parameters <- parameters + internal$data$copula.x_explain_gaussian <- as.data.table(x_explain_gaussian) return(internal) } @@ -59,10 +57,6 @@ prepare_data.copula <- function(internal, index_features, ...) { copula.cov_mat <- internal$parameters$copula.cov_mat copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) - # TODO: Note that `as.matrix` is not needed for `copula.x_explain_gaussian_mat` as it is already defined as a matrix - # in `setup_approach.copula`, however, it seems that Martin plans to make it into a data.table, thus, I include - # `as.matrix` as future safety. DISCUSS WITH MARTIN WHAT HIS PLANS ARE! - # Generate the MC samples from N(0, 1) MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) From cfd0b2458343a14f8ce8076b44770f9c71f557cb Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 11 Jan 2024 13:56:22 +0000 Subject: [PATCH 50/62] Stylr --- R/approach_copula.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 76c337a37..70037900c 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -24,12 +24,12 @@ setup_approach.copula <- function(internal, ...) { # Prepare transformed data parameters$copula.mu <- rep(0, ncol(x_train_mat)) - x_train_mat0 = gaussian_transform_cpp(x_train_mat) - colnames(x_train_mat0) = feature_names + x_train_mat0 <- gaussian_transform_cpp(x_train_mat) + colnames(x_train_mat0) <- feature_names parameters$copula.cov_mat <- get_cov_mat(x_train_mat0) - x_explain_gaussian = gaussian_transform_separate_cpp(x_explain_mat, x_train_mat) - colnames(x_explain_gaussian) = feature_names + x_explain_gaussian <- gaussian_transform_separate_cpp(x_explain_mat, x_train_mat) + colnames(x_explain_gaussian) <- feature_names if (is.null(dim(x_explain_gaussian))) x_explain_gaussian <- t(as.matrix(x_explain_gaussian)) # Add objects to internal list From ccfbd59504928626b7667d7b6ce4a0ad2bfb777e Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 11 Jan 2024 16:32:06 +0000 Subject: [PATCH 51/62] Removed debug funciton --- src/Copula.cpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/Copula.cpp b/src/Copula.cpp index a3f37f186..9188cf4b7 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -227,26 +227,3 @@ Rcpp::NumericMatrix gaussian_transform_separate_cpp(const arma::mat& y, const ar return z_y; } - -// // [[Rcpp::export]] -// Rcpp::NumericMatrix gaussian_transform_separate_cpp6(arma::mat y, arma::mat x) { -// int n_features = x.n_cols; -// int n_y_rows = y.n_rows; -// int n_x_rows = x.n_rows; -// -// // Pre allocate the return matrix -// Rcpp::NumericMatrix z_y(n_y_rows, n_features); -// -// // Compute the transformation for each feature at the time -// for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { -// arma::vec yx_now = arma::join_cols(y.col(idx_feature), x.col(idx_feature)); -// arma::vec rank_now_1 = arma::conv_to::from(arma::sort_index(arma::sort_index(yx_now))).head(n_y_rows); -// arma::vec rank_now_2 = arma::conv_to::from(arma::sort_index(arma::sort_index(rank_now_1))); -// arma::vec tmp = rank_now_1 - rank_now_2 + 0.5; -// Rcpp::NumericVector u_y = Rcpp::wrap(tmp / (n_x_rows + 1)); -// z_y(Rcpp::_, idx_feature) = Rcpp::qnorm(u_y); -// } -// -// return z_y; -// } - From 71b8f409f5608b803f496ac788814f2047467181 Mon Sep 17 00:00:00 2001 From: LHBO Date: Thu, 11 Jan 2024 17:04:19 +0000 Subject: [PATCH 52/62] Added comparisons to when we compile with Rcpp::sourceCpp in all examples --- inst/scripts/compare_copula_in_R_and_C++.R | 536 ++++++++++++++++++--- 1 file changed, 467 insertions(+), 69 deletions(-) diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index a0c26bdde..a304c7ef1 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -39,7 +39,7 @@ prepare_data.copula_old <- function(internal, index_features = NULL, ...) { m = n_features, x_explain = x_explain0[i, , drop = FALSE], x_train = as.matrix(x_train), - x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE] + x_explain_gaussian = as.matrix(copula.x_explain_gaussian)[i, , drop = FALSE] ) dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination") dt_l[[i]][, w := 1 / n_samples] @@ -314,8 +314,296 @@ quantile.type7 <- function(x, probs) { } + +# C++ with sourceRcpp --------------------------------------------------------------------------------------------- +#' @inheritParams default_doc +#' @rdname prepare_data +#' @export +#' @author Lars Henry Berge Olsen +prepare_data.copula_sourceCpp <- function(internal, index_features, ...) { + # Extract used variables + S <- internal$objects$S[index_features, , drop = FALSE] + feature_names <- internal$parameters$feature_names + n_explain <- internal$parameters$n_explain + n_samples <- internal$parameters$n_samples + n_features <- internal$parameters$n_features + n_combinations_now <- length(index_features) + x_train_mat <- as.matrix(internal$data$x_train) + x_explain_mat <- as.matrix(internal$data$x_explain) + copula.mu <- internal$parameters$copula.mu + copula.cov_mat <- internal$parameters$copula.cov_mat + copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) + + # Generate the MC samples from N(0, 1) + MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features) + + # Use C++ to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}), for all coalitions and explicands, + # and then transforming them back to the original scale using the inverse Gaussian transform in C++. + # The object `dt` is a 3D array of dimension (n_samples, n_explain * n_coalitions, n_features). + dt <- prepare_data_copula_cpp_sourceCpp( + MC_samples_mat = MC_samples_mat, + x_explain_mat = x_explain_mat, + x_explain_gaussian_mat = copula.x_explain_gaussian_mat, + x_train_mat = x_train_mat, + S = S, + mu = copula.mu, + cov_mat = copula.cov_mat + ) + + # Reshape `dt` to a 2D array of dimension (n_samples * n_explain * n_coalitions, n_features). + dim(dt) <- c(n_combinations_now * n_explain * n_samples, n_features) + + # Convert to a data.table and add extra identification columns + dt <- data.table::as.data.table(dt) + data.table::setnames(dt, feature_names) + dt[, id_combination := rep(seq_len(nrow(S)), each = n_samples * n_explain)] + dt[, id := rep(seq(n_explain), each = n_samples, times = nrow(S))] + dt[, w := 1 / n_samples] + dt[, id_combination := index_features[id_combination]] + data.table::setcolorder(dt, c("id_combination", "id", feature_names)) + + return(dt) +} + +Rcpp::sourceCpp( + code = ' + #include +using namespace Rcpp; + +// [[Rcpp::depends(RcppArmadillo)]] + +// Compute the quantiles using quantile type seven +// +// @details Using quantile type number seven from stats::quantile in R. +// +// @param x arma::vec. Numeric vector whose sample quantiles are wanted. +// @param probs arma::vec. Numeric vector of probabilities with values between zero and one. +// +// @return A vector of length `length(probs)` with the quantiles is returned. +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] + arma::vec quantile_type7_cpp_sourceCpp(const arma::vec& x, const arma::vec& probs) { + int n = x.n_elem; + int m = probs.n_elem; + + // Initialize output quantile vector + arma::vec qs(m); + + // Calculate indices + arma::vec index = 1 + (n - 1) * probs; + arma::vec lo = arma::floor(index); + arma::vec hi = arma::ceil(index); + + // Sort the data + arma::vec sorted_x = arma::sort(x); + + // Calculate quantiles using quantile type seven + for (int i = 0; i < m; ++i) { + qs(i) = sorted_x(lo(i) - 1); + if (index(i) > lo(i)) { + double h = index(i) - lo(i); + qs(i) = (1 - h) * qs(i) + h * sorted_x(hi(i) - 1); + } + } + + return qs; + } + +// Transforms new data to a standardized normal distribution +// +// @details The function uses `arma::quantile(...)` which corresponds to Rs `stats::quantile(..., type = 5)`. +// +// @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. +// @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. +// +// @return arma::mat of the same dimension as `z` +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] + arma::mat inv_gaussian_transform_cpp_sourceCpp(const arma::mat& z, const arma::mat& x) { + int n_features = z.n_cols; + int n_samples = z.n_rows; + arma::mat z_new(n_samples, n_features); + arma::mat u = arma::normcdf(z); + for (int feature_idx = 0; feature_idx < n_features; feature_idx++) { + z_new.col(feature_idx) = quantile_type7_cpp_sourceCpp(x.col(feature_idx), u.col(feature_idx)); + } + return z_new; + } + +// Generate (Gaussian) Copula MC samples +// +// @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the +// univariate standard normal. +// @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations +// to explain on the original scale. +// @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the +// observations to explain after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations. +// @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of +// the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones. +// This is not a problem internally in shapr as the empty and grand coalitions treated differently. +// @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed +// using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution. +// @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance +// between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been +// transformed to a standardized normal distribution. +// +// @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where +// the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian +// copula MC samples for each explicand and coalition on the original scale. +// +// @export +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] + arma::cube prepare_data_copula_cpp_sourceCpp(const arma::mat& MC_samples_mat, + const arma::mat& x_explain_mat, + const arma::mat& x_explain_gaussian_mat, + const arma::mat& x_train_mat, + const arma::mat& S, + const arma::vec& mu, + const arma::mat& cov_mat) { + + int n_explain = x_explain_mat.n_rows; + int n_samples = MC_samples_mat.n_rows; + int n_features = MC_samples_mat.n_cols; + int n_coalitions = S.n_rows; + + // Initialize auxiliary matrix and result cube + arma::mat aux_mat(n_samples, n_features); + arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features); + + // Iterate over the coalitions + for (int S_ind = 0; S_ind < n_coalitions; S_ind++) { + + // Get current coalition S and the indices of the features in coalition S and mask Sbar + arma::mat S_now = S.row(S_ind); + arma::uvec S_now_idx = arma::find(S_now > 0.5); + arma::uvec Sbar_now_idx = arma::find(S_now < 0.5); + + // Extract the features we condition on, both on the original scale and the Gaussian transformed values. + arma::mat x_S_star = x_explain_mat.cols(S_now_idx); + arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx); + + // Extract the mean values of the Gaussian transformed features in the two sets + arma::vec mu_S = mu.elem(S_now_idx); + arma::vec mu_Sbar = mu.elem(Sbar_now_idx); + + // Extract the relevant parts of the Gaussian transformed covariance matrix + arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx); + arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx); + arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx); + arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx); + + // Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix + arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS); + arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar; + + // Ensure that the conditional covariance matrix is symmetric + if (!cond_cov_mat_Sbar_given_S.is_symmetric()) { + cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S); + } + + // Compute the conditional mean of Xsbar given Xs = Xs_star_gaussian, i.e., of the Gaussian transformed features + arma::mat x_Sbar_gaussian_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t(); + x_Sbar_gaussian_mean.each_col() += mu_Sbar; + + // Transform the samples to be from N(O, Sigma_{Sbar|S}) + arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S); + + // Loop over the different explicands and combine the generated values with the values we conditioned on + for (int idx_now = 0; idx_now < n_explain; idx_now++) { + + // Transform the MC samples to be from N(mu_{Sbar|S}, Sigma_{Sbar|S}) for one coalition and one explicand + arma::mat MC_samples_mat_now_now = + MC_samples_mat_now + repmat(trans(x_Sbar_gaussian_mean.col(idx_now)), n_samples, 1); + + // Transform the MC to the original scale using the inverse Gaussian transform + arma::mat MC_samples_mat_now_now_trans = + inv_gaussian_transform_cpp_sourceCpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx)); + + // Insert the generate Gaussian copula MC samples and the feature values we condition on into an auxiliary matrix + aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans; + aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1); + + // Insert the auxiliary matrix into the result cube + result_cube.col(S_ind*n_explain + idx_now) = aux_mat; + } + } + + return result_cube; + } + +// Transforms a sample to standardized normal distribution +// +// @param x Numeric matrix. The data which should be transformed to a standard normal distribution. +// +// @return Numeric matrix of dimension `dim(x)` +// +// @keywords internal +// @author Lars Henry Berge Olsen +// [[Rcpp::export]] +Rcpp::NumericMatrix gaussian_transform_cpp_sourceCpp(const arma::mat& x) { + int n_obs = x.n_rows; + int n_features = x.n_cols; + + // Pre allocate the return matrix + Rcpp::NumericMatrix x_trans(n_obs, n_features); + + // Iterate over the columns, i.e., the features + for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { + // Compute the rank and transform to standardized normal distribution + arma::vec rank_now = arma::conv_to::from(arma::sort_index(arma::sort_index(x.col(idx_feature)))); + Rcpp::NumericVector u = Rcpp::wrap((rank_now + 1) / (n_obs + 1)); + x_trans(Rcpp::_, idx_feature) = Rcpp::qnorm(u); + } + + return x_trans; +} + +// Transforms new data to standardized normal (column-wise) based on other data transformations +// +// @param y arma::mat. A numeric matrix containing the data that is to be transformed. +// @param x arma::mat. A numeric matrix containing the data of the original transformation. +// +// @return An arma::mat matrix of the same dimension as `y` containing the back-transformed Gaussian data. +// +// @keywords internal +// @author Lars Henry Berge Olsen, Martin Jullum +// [[Rcpp::export]] +Rcpp::NumericMatrix gaussian_transform_separate_cpp_sourceCpp(const arma::mat& y, const arma::mat& x) { + int n_features = x.n_cols; + int n_y_rows = y.n_rows; + int n_x_rows = x.n_rows; + + // Pre allocate the return matrix + Rcpp::NumericMatrix z_y(n_y_rows, n_features); + + // Compute the transformation for each feature at the time + for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { + arma::vec yx_now = arma::join_cols(y.col(idx_feature), x.col(idx_feature)); + arma::vec rank_now_1 = arma::conv_to::from(arma::sort_index(arma::sort_index(yx_now))).head(n_y_rows); + arma::vec rank_now_2 = arma::conv_to::from(arma::sort_index(arma::sort_index(rank_now_1))); + arma::vec tmp = rank_now_1 - rank_now_2 + 0.5; + Rcpp::NumericVector u_y = Rcpp::wrap(tmp / (n_x_rows + 1)); + z_y(Rcpp::_, idx_feature) = Rcpp::qnorm(u_y); + } + + return z_y; +} + + ' +) + + + # Old C++ code ---------------------------------------------------------------------------------------------------- -sourceCpp( +Rcpp::sourceCpp( code = ' // [[Rcpp::depends("RcppArmadillo")]] #include @@ -706,7 +994,7 @@ time_2 = system.time({dt_2 <- shapr:::prepare_data_copula_cpp( )}) time_2 -time_2andhalf = system.time({dt_2andhalf <- +time_3 = system.time({dt_3 <- .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, @@ -716,11 +1004,10 @@ time_2andhalf = system.time({dt_2andhalf <- mu = copula.mu, cov_mat = copula.cov_mat )}) -time_2andhalf +time_3 -# Rcpp::compileAttributes(pkgdir = ".", verbose = TRUE) Rcpp::sourceCpp("src/Copula.cpp") -time_3 = system.time({dt_3 <- prepare_data_copula_cpp( +time_4 = system.time({dt_4 <- prepare_data_copula_cpp( MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, x_explain_gaussian_mat = copula.x_explain_gaussian_mat, @@ -729,9 +1016,9 @@ time_3 = system.time({dt_3 <- prepare_data_copula_cpp( mu = copula.mu, cov_mat = copula.cov_mat )}) -time_3 +time_4 -time_4 = system.time({dt_4 <- shapr::prepare_data_copula_cpp( +time_5 = system.time({dt_5 <- shapr::prepare_data_copula_cpp( MC_samples_mat = MC_samples_mat, x_explain_mat = x_explain_mat, x_explain_gaussian_mat = copula.x_explain_gaussian_mat, @@ -741,10 +1028,11 @@ time_4 = system.time({dt_4 <- shapr::prepare_data_copula_cpp( cov_mat = copula.cov_mat )}) -rbind(time_1, time_2, time_3, time_4) +rbind(time_1, time_2, time_3, time_4, time_5) all.equal(dt_1, dt_2) all.equal(dt_2, dt_3) all.equal(dt_3, dt_4) +all.equal(dt_4, dt_5) # Compare prepare_data.copula ---------------------------------------------------------------------------------------- set.seed(123) @@ -775,6 +1063,16 @@ time_only_cpp <- system.time({ data.table::setorderv(res_only_cpp, c("id", "id_combination")) time_only_cpp +# The C++ code with my own quantile function +time_only_cpp_sourceCpp <- system.time({ + res_only_cpp_sourceCpp <- prepare_data.copula_sourceCpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp_sourceCpp, c("id", "id_combination")) +time_only_cpp_sourceCpp + # The C++ code with quantile functions from arma time_only_cpp_arma <- system.time({ res_only_cpp_arma <- prepare_data.copula_cpp_arma( @@ -799,6 +1097,7 @@ time_cpp_and_R times <- rbind( time_only_R, time_only_cpp, + time_only_cpp_sourceCpp, time_only_cpp_arma, time_cpp_and_R ) @@ -806,12 +1105,12 @@ times # TIMES for all coalitions (254), n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 -# user.self sys.self elapsed user.child sys.child -# time_only_R 67.050 2.587 72.357 0.011 0.018 -# time_only_cpp 4.588 0.406 5.218 0.000 0.000 -# time_only_cpp_arma 23.853 0.663 25.391 0.000 0.000 -# time_cpp_and_R 7.430 1.346 9.086 0.000 0.000 - +# user.self sys.self elapsed user.child sys.child +# time_only_R 65.266 2.142 70.896 0 0 +# time_only_cpp 4.622 0.393 5.212 0 0 +# time_only_cpp_sourceCpp 1.823 0.423 2.279 0 0 +# time_only_cpp_arma 23.874 0.604 27.801 0 0 +# time_cpp_and_R 6.826 1.493 8.683 0 0 # Relative speedup of new method times_relative <- t(sapply(seq_len(nrow(times)), function(idx) times[1, ] / times[idx, ])) @@ -819,63 +1118,69 @@ rownames(times_relative) <- paste0(rownames(times), "_rel") times_relative # RELATIVE TIMES for all coalitions, n_samples <- 1000, n_train <- 1000, n_test <- 20, M <- 8 - -# user.self sys.self elapsed user.child sys.child -# time_only_R_rel 1.0000 1.0000 1.0000 1 1 -# time_only_cpp_rel 14.6142 6.3719 13.8668 Inf Inf -# time_only_cpp_arma_rel 2.8110 3.9020 2.8497 Inf Inf -# time_cpp_and_R_rel 9.0242 1.9220 7.9636 Inf Inf +# user.self sys.self elapsed user.child sys.child +# time_only_R_rel 1.0000 1.0000 1.0000 NaN NaN +# time_only_cpp_rel 14.1207 5.4504 13.6025 NaN NaN +# time_only_cpp_sourceCpp_rel 35.8014 5.0638 31.1084 NaN NaN +# time_only_cpp_arma_rel 2.7338 3.5464 2.5501 NaN NaN +# time_cpp_and_R_rel 9.5614 1.4347 8.1649 NaN NaN # Aggregate the MC sample values for each explicand and combination res_only_R <- res_only_R[, w := NULL] res_only_cpp <- res_only_cpp[, w := NULL] +res_only_cpp_sourceCpp <- res_only_cpp_sourceCpp[, w := NULL] res_only_cpp_arma <- res_only_cpp_arma[, w := NULL] res_cpp_and_R <- res_cpp_and_R[, w := NULL] res_only_R_agr <- res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] res_only_cpp_agr <- res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_sourceCpp_agr <- res_only_cpp_sourceCpp[, lapply(.SD, mean), by = c("id", "id_combination")] res_only_cpp_arma_agr <- res_only_cpp_arma[, lapply(.SD, mean), by = c("id", "id_combination")] res_cpp_and_R_agr <- res_cpp_and_R[, lapply(.SD, mean), by = c("id", "id_combination")] # Difference res_only_R_agr - res_only_cpp_agr +res_only_R_agr - res_only_cpp_sourceCpp_agr res_only_R_agr - res_only_cpp_arma_agr res_only_R_agr - res_cpp_and_R_agr # Max absolute difference max(abs(res_only_R_agr - res_only_cpp_agr)) +max(abs(res_only_R_agr - res_only_cpp_sourceCpp_agr)) max(abs(res_only_R_agr - res_only_cpp_arma_agr)) max(abs(res_only_R_agr - res_cpp_and_R_agr)) # Max absolute relative difference max(abs(res_only_R_agr - res_only_cpp_agr) / res_only_cpp_agr) +max(abs(res_only_R_agr - res_only_cpp_sourceCpp_agr) / res_only_cpp_sourceCpp_agr) max(abs(res_only_R_agr - res_only_cpp_arma_agr) / res_only_cpp_arma_agr) max(abs(res_only_R_agr - res_cpp_and_R_agr) / res_cpp_and_R_agr) # Compare gaussian_transform -------------------------------------------------------------------------------------- set.seed(123) -x_temp_rows = 100000 +x_temp_rows = 10000 x_temp_cols = 20 x_temp = matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols) # Compare for equal values -system.time({gaussian_transform_R = apply(X = x_temp, MARGIN = 2, FUN = gaussian_transform_old)}) -system.time({gaussian_transform_cpp = gaussian_transform_cpp(x_temp)}) -system.time({gaussian_transform_cpp = shapr:::gaussian_transform_cpp(x_temp)}) -all.equal(gaussian_transform_R, gaussian_transform_cpp) # TRUE +system.time({gaussian_transform_R_res = apply(X = x_temp, MARGIN = 2, FUN = gaussian_transform_old)}) +system.time({gaussian_transform_cpp_res = gaussian_transform_cpp(x_temp)}) +system.time({gaussian_transform_cpp_sourceCpp_res = gaussian_transform_cpp_sourceCpp(x_temp)}) +all.equal(gaussian_transform_R_res, gaussian_transform_cpp_res) # TRUE +all.equal(gaussian_transform_R_res, gaussian_transform_cpp_sourceCpp_res) # TRUE # Compare time (generate new data each time such that the result is not stored in the cache) -set.seed(1234) -gc() -#Rcpp::sourceCpp("src/Copula.cpp") # C++ code is faster when I recompile it? I don't understand. rbenchmark::benchmark(R = apply(X = matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols), MARGIN = 2, FUN = gaussian_transform_old), cpp = gaussian_transform_cpp(matrix(rnorm(x_temp_rows*x_temp_cols), x_temp_rows, x_temp_cols)), + cpp_sourceCpp = gaussian_transform_cpp_sourceCpp(matrix(rnorm(x_temp_rows*x_temp_cols), + x_temp_rows, + x_temp_cols)), replications = 100) -# test replications elapsed relative user.self sys.self user.child sys.child -# 2 cpp 100 1.933 1.000 1.764 0.149 0 0 -# 1 R 100 3.152 1.631 2.498 0.511 0 0 - +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 100 7.604 1.987 7.059 0.294 0 0 +# 3 cpp_sourceCpp 100 3.827 1.000 3.529 0.254 0 0 +# 1 R 100 6.183 1.616 4.899 0.738 0 0 # Compare gaussian_transform_separate ------------------------------------------------------------------------- @@ -892,23 +1197,23 @@ system.time({r = apply(X = rbind(x_explain_temp, x_train_temp), FUN = gaussian_transform_separate_old, n_y = nrow(x_explain_temp))}) system.time({cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)}) -system.time({cpp_shapr = shapr:::gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)}) +system.time({cpp_sourceCpp = gaussian_transform_separate_cpp_sourceCpp(x_explain_temp, x_train_temp)}) all.equal(r, cpp) +all.equal(r, cpp_sourceCpp) -# gc() # Rcpp::sourceCpp("src/Copula.cpp") # C++ code is faster when I recompile it? I don't understand. - rbenchmark::benchmark(r = apply(X = rbind(x_explain_temp, x_train_temp), MARGIN = 2, FUN = gaussian_transform_separate_old, n_y = nrow(x_explain_temp)), - cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp)) -# test replications elapsed relative user.self sys.self user.child sys.child -# 2 cpp 100 0.238 1.000 0.228 0.006 0 0 -# 1 r 100 0.502 2.109 0.432 0.058 0 0 + cpp = gaussian_transform_separate_cpp(x_explain_temp, x_train_temp), + cpp_sourceCpp = gaussian_transform_separate_cpp_sourceCpp(x_explain_temp, x_train_temp), + replications = 20) +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 20 10.933 2.352 10.082 0.179 0 0 +# 3 cpp_sourceCpp 20 4.648 1.000 4.389 0.100 0 0 +# 1 r 20 9.787 2.106 8.409 0.797 0 0 -Sys.setenv("PKG_CXXFLAGS"="-O0") -Rcpp::sourceCpp("src/Copula.cpp") rbenchmark::benchmark(r = apply(X = rbind(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), MARGIN = 2, @@ -918,36 +1223,129 @@ rbenchmark::benchmark(r = apply(X = rbind(matrix(rnorm(x_explain_rows*x_cols), x x_explain_rows, x_cols), matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), - cpp_dir = gaussian_transform_separate_cpp2(matrix(rnorm(x_explain_rows*x_cols), - x_explain_rows, - x_cols), - matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), - cpp_shapr = shapr:::gaussian_transform_separate_cpp(matrix(rnorm(x_explain_rows*x_cols), - x_explain_rows, - x_cols), - matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), - cpp_shapr_dir = shapr:::gaussian_transform_separate_cpp2(matrix(rnorm(x_explain_rows*x_cols), + cpp2 = .Call(`_shapr_gaussian_transform_separate_cpp`, + matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), + matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), + cpp_cpp_sourceCpp = gaussian_transform_separate_cpp_sourceCpp(matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols)), - cpp2 = .Call(`_shapr_gaussian_transform_separate_cpp`, - matrix(rnorm(x_explain_rows*x_cols), x_explain_rows, x_cols), - matrix(rnorm(x_train_rows*x_cols), x_train_rows, x_cols))) + replications = 20) -# test replications elapsed relative user.self sys.self user.child sys.child -# 2 cpp 100 0.361 1.000 0.322 0.025 0 0 -# 1 r 100 0.553 1.532 0.496 0.047 0 0 +# test replications elapsed relative user.self sys.self user.child sys.child +# 2 cpp 20 12.634 2.202 11.275 0.352 0.00 0.00 +# 4 cpp_cpp_sourceCpp 20 5.737 1.000 5.287 0.182 0.00 0.00 +# 3 cpp2 20 11.566 2.016 10.890 0.246 0.01 0.01 +# 1 r 20 11.937 2.081 10.232 1.027 0.00 0.00 -# x_cols = 5, x_train_rows = 10000, x_explain_rows = 10000 -# test replications elapsed relative user.self sys.self user.child sys.child -# 2 cpp 100 2.526 1.000 2.342 0.145 0 0 -# 3 cpp_dir 100 2.574 1.019 2.359 0.136 0 0 -# 4 cpp_shapr 100 5.314 2.104 4.955 0.173 0 0 -# 5 cpp_shapr_dir 100 5.274 2.088 5.020 0.169 0 0 -# 6 cpp2 100 5.448 2.157 5.112 0.169 0 0 -# 1 r 100 4.791 1.897 3.926 0.756 0 0 - -# Call `Rcpp::sourceCpp("src/Copula.cpp")` and then run rbenchmark again and then cpp is much faster. -# C++ code is faster when I recompile it? I don't understand. -# Rcpp::sourceCpp("src/Copula.cpp") + + + +# Simple C examples compile issues time -------------------------------------------------------------------------------- +sourceCpp( + code = ' +#include +// [[Rcpp::depends(RcppArmadillo)]] +// [[Rcpp::export]] +Rcpp::NumericVector addVectors(const Rcpp::NumericVector& vec1, const Rcpp::NumericVector& vec2) { + // Check if the input vectors are of the same length + if (vec1.size() != vec2.size()) { + Rcpp::stop("Vectors must be of the same length."); + } + + // Create a result vector of the same length as the input vectors + Rcpp::NumericVector result(vec1.size()); + + // Perform element-wise addition + for (int i = 0; i < vec1.size(); ++i) { + result[i] = vec1[i] + vec2[i]; + } + + return result; +} + +// [[Rcpp::export]] +Rcpp::NumericMatrix addMatrices(const Rcpp::NumericMatrix& mat1, const Rcpp::NumericMatrix& mat2) { + // Check if the input matrices have the same dimensions + if (mat1.nrow() != mat2.nrow() || mat1.ncol() != mat2.ncol()) { + Rcpp::stop("Matrices must have the same dimensions."); + } + + // Create a result matrix of the same dimensions as the input matrices + Rcpp::NumericMatrix result(mat1.nrow(), mat1.ncol()); + + // Perform element-wise addition + for (int i = 0; i < mat1.nrow(); ++i) { + for (int j = 0; j < mat1.ncol(); ++j) { + result(i, j) = mat1(i, j) + mat2(i, j); + } + } + + return result; +} + +// [[Rcpp::export]] +arma::mat addMatricesArmadillo(const arma::mat& mat1, const arma::mat& mat2) { + // Check if the input matrices have the same dimensions + if (mat1.n_rows != mat2.n_rows || mat1.n_cols != mat2.n_cols) { + Rcpp::stop("Matrices must have the same dimensions."); + } + + // Perform element-wise addition using Armadillo + arma::mat result = mat1 + mat2; + + return result; +}') + + +# !!!!!READ!!!!! +# Copy the code above into `src/Copula.cpp` and then build the package with +devtools::load_all(".") + +# Dimension of matrix +n = 1000000 +m = 100 + +# Make matrices +mat1 = matrix(rnorm(n*m), n, m) +mat2 = matrix(rnorm(n*m), n, m) + +# Time when using the compiled code using `devtools::load_all()` +shapr_vec_time = system.time({shapr_vec_res = addVectors(mat1[,1], mat2[,1])}) +shapr_mat_rcpp_time = system.time({shapr_mat_rcpp_res <- addMatrices(mat1, mat2)}) +shapr_mat_arma_time = system.time({shapr_mat_arma_res <- addMatricesArmadillo(mat1, mat2)}) + +# Then we compile with `Rcpp::compileAttributes()` +Rcpp::compileAttributes(pkgdir = ".", verbose = TRUE) +compileAttributes_vec_time = system.time({compileAttributes_vec_res = addVectors(mat1[,1], mat2[,1])}) +compileAttributes_mat_rcpp_time = system.time({compileAttributes_mat_rcpp_res <- addMatrices(mat1, mat2)}) +compileAttributes_mat_arma_time = system.time({compileAttributes_mat_arma_res <- addMatricesArmadillo(mat1, mat2)}) + +# Then we compile with `Rcpp::sourceCpp()` +# Here a shared library is built +Rcpp::sourceCpp("src/Copula.cpp", verbose = TRUE) +sourceCpp_vec_time = system.time({sourceCpp_vec_res = addVectors(mat1[,1], mat2[,1])}) +sourceCpp_mat_rcpp_time = system.time({sourceCpp_mat_rcpp_res <- addMatrices(mat1, mat2)}) +sourceCpp_mat_arma_time = system.time({sourceCpp_mat_arma_res <- addMatricesArmadillo(mat1, mat2)}) + +# Look at the times. See a drastic decrease when using sourceCpp. Half on my mac +rbind(shapr_vec_time, + compileAttributes_vec_time, + sourceCpp_vec_time) +rbind(shapr_mat_rcpp_time, + compileAttributes_mat_rcpp_time, + sourceCpp_mat_rcpp_time) +rbind(shapr_mat_arma_time, + compileAttributes_mat_arma_time, + sourceCpp_mat_arma_time) + +# All equal +all.equal(shapr_vec_res, compileAttributes_vec_res) +all.equal(shapr_vec_res, sourceCpp_vec_res) + +all.equal(shapr_mat_rcpp_res, compileAttributes_mat_rcpp_res) +all.equal(shapr_mat_rcpp_res, sourceCpp_mat_rcpp_res) + +all.equal(shapr_mat_arma_res, compileAttributes_mat_arma_res) +all.equal(shapr_mat_arma_res, sourceCpp_mat_arma_res) From 33d739187393e09e452a4c6665ac11def2f922f0 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 12 Jan 2024 12:02:14 +0000 Subject: [PATCH 53/62] Added test that shows that cpp and R gives same values when n_samples is large --- inst/scripts/compare_copula_in_R_and_C++.R | 144 +++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index a304c7ef1..2077ea27b 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -1349,3 +1349,147 @@ all.equal(shapr_mat_rcpp_res, sourceCpp_mat_rcpp_res) all.equal(shapr_mat_arma_res, compileAttributes_mat_arma_res) all.equal(shapr_mat_arma_res, sourceCpp_mat_arma_res) + + + + +# Large n_samples equal results ---------------------------------------------------------------------------------------- +{ + n_samples <- 100000 + n_train <- 1000 + n_test <- 5 + M <- 4 + rho <- 0.5 + betas <- c(0, rep(1, M)) + + # We use the Gaussian copula approach + approach <- "copula" + + # Mean of the multivariate Gaussian distribution + mu <- rep(0, times = M) + mu <- seq(M) + + # Create the covariance matrix + sigma <- matrix(rho, ncol = M, nrow = M) # Old + for (i in seq(1, M - 1)) { + for (j in seq(i + 1, M)) { + sigma[i, j] <- sigma[j, i] <- rho^abs(i - j) + } + } + diag(sigma) <- 1 + + # Set seed for reproducibility + seed_setup <- 1996 + set.seed(seed_setup) + + # Make Gaussian data + data_train <- data.table(mvtnorm::rmvnorm(n = n_train, mean = mu, sigma = sigma)) + data_test <- data.table(mvtnorm::rmvnorm(n = n_test, mean = mu, sigma = sigma)) + colnames(data_train) <- paste("X", seq(M), sep = "") + colnames(data_test) <- paste("X", seq(M), sep = "") + + # Make the response + response_train <- as.vector(cbind(1, as.matrix(data_train)) %*% betas) + response_test <- as.vector(cbind(1, as.matrix(data_test)) %*% betas) + + # Put together the data + data_train_with_response <- copy(data_train)[, y := response_train] + data_test_with_response <- copy(data_test)[, y := response_test] + + # Fit a LM model + predictive_model <- lm(y ~ ., data = data_train_with_response) + + # Get the prediction zero, i.e., the phi0 Shapley value. + prediction_zero <- mean(response_train) + + model <- predictive_model + x_explain <- data_test + x_train <- data_train + keep_samp_for_vS <- FALSE + predict_model <- NULL + get_model_specs <- NULL + timing <- TRUE + n_combinations <- NULL + group <- NULL + feature_specs <- shapr:::get_feature_specs(get_model_specs, model) + n_batches <- 1 + seed <- 1 + + internal <- setup( + x_train = x_train, + x_explain = x_explain, + approach = approach, + prediction_zero = prediction_zero, + n_combinations = n_combinations, + group = group, + n_samples = n_samples, + n_batches = n_batches, + seed = seed, + feature_specs = feature_specs, + keep_samp_for_vS = keep_samp_for_vS, + predict_model = predict_model, + get_model_specs = get_model_specs, + timing = timing + ) + + # Gets predict_model (if not passed to explain) + predict_model <- shapr:::get_predict_model( + predict_model = predict_model, + model = model + ) + + # Sets up the Shapley (sampling) framework and prepares the + # conditional expectation computation for the chosen approach + # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters + internal <- shapr:::setup_computation(internal, model, predict_model) +} + +look_at_coalitions <- seq(1, 2^M - 2) +# look_at_coalitions <- seq(1, 2^M - 2, 10) +# look_at_coalitions <- seq(1, 2^M - 2, 25) + +# The old R code +time_only_R <- system.time({ + res_only_R <- prepare_data.copula_old( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +time_only_R + +# The C++ code with my own quantile function +time_only_cpp <- system.time({ + res_only_cpp <- prepare_data.copula( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp, c("id", "id_combination")) +time_only_cpp + +# The C++ code with my own quantile function +time_only_cpp_sourceCpp <- system.time({ + res_only_cpp_sourceCpp <- prepare_data.copula_sourceCpp( + internal = internal, + index_features = internal$objects$S_batch$`1`[look_at_coalitions] + ) +}) +data.table::setorderv(res_only_cpp_sourceCpp, c("id", "id_combination")) +time_only_cpp_sourceCpp + +# Look at the differences +# Aggregate the MC sample values for each explicand and combination +res_only_R <- res_only_R[, w := NULL] +res_only_cpp <- res_only_cpp[, w := NULL] +res_only_cpp_sourceCpp <- res_only_cpp_sourceCpp[, w := NULL] +res_only_R_agr <- res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_agr <- res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] +res_only_cpp_sourceCpp_agr <- res_only_cpp_sourceCpp[, lapply(.SD, mean), by = c("id", "id_combination")] + +# Difference +res_only_R_agr - res_only_cpp_agr +res_only_R_agr - res_only_cpp_sourceCpp_agr + +# Max absolute difference +max(abs(res_only_R_agr - res_only_cpp_agr)) +max(abs(res_only_R_agr - res_only_cpp_sourceCpp_agr)) From 7634b38f96731e1dfcf44c83b4a1fda7d2930892 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 12 Jan 2024 12:37:53 +0000 Subject: [PATCH 54/62] Go back to using R for gaussian_transform and gaussian_transform_separate --- R/approach_copula.R | 46 +++++++++++++++++++++++++++++++---- src/Copula.cpp | 58 --------------------------------------------- 2 files changed, 42 insertions(+), 62 deletions(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 70037900c..5dde7db6a 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -24,12 +24,15 @@ setup_approach.copula <- function(internal, ...) { # Prepare transformed data parameters$copula.mu <- rep(0, ncol(x_train_mat)) - x_train_mat0 <- gaussian_transform_cpp(x_train_mat) - colnames(x_train_mat0) <- feature_names + x_train_mat0 <- apply(X = x_train_mat, MARGIN = 2, FUN = gaussian_transform) parameters$copula.cov_mat <- get_cov_mat(x_train_mat0) - x_explain_gaussian <- gaussian_transform_separate_cpp(x_explain_mat, x_train_mat) - colnames(x_explain_gaussian) <- feature_names + x_explain_gaussian <- apply( + X = rbind(x_explain_mat, x_train_mat), + MARGIN = 2, + FUN = gaussian_transform_separate, + n_y = nrow(x_explain_mat) + ) if (is.null(dim(x_explain_gaussian))) x_explain_gaussian <- t(as.matrix(x_explain_gaussian)) # Add objects to internal list @@ -87,3 +90,38 @@ prepare_data.copula <- function(internal, index_features, ...) { return(dt) } + +#' Transforms a sample to standardized normal distribution +#' +#' @param x Numeric vector.The data which should be transformed to a standard normal distribution. +#' +#' @return Numeric vector of length `length(x)` +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform <- function(x) { + u <- rank(x) / (length(x) + 1) + z <- stats::qnorm(u) + return(z) +} + +#' Transforms new data to standardized normal (dimension 1) based on other data transformations +#' +#' @param yx Numeric vector. The first `n_y` items is the data that is transformed, and last +#' part is the data with the original transformation. +#' @param n_y Positive integer. Number of elements of `yx` that belongs to the Gaussian data. +#' +#' @return Vector of back-transformed Gaussian data +#' +#' @keywords internal +#' @author Martin Jullum +gaussian_transform_separate <- function(yx, n_y) { + if (n_y >= length(yx)) stop("n_y should be less than length(yx)") + ind <- 1:n_y + x <- yx[-ind] + tmp <- rank(yx)[ind] + tmp <- tmp - rank(tmp) + 0.5 + u_y <- tmp / (length(x) + 1) + z_y <- stats::qnorm(u_y) + return(z_y) +} diff --git a/src/Copula.cpp b/src/Copula.cpp index 9188cf4b7..9f58955cc 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -169,61 +169,3 @@ arma::cube prepare_data_copula_cpp(const arma::mat& MC_samples_mat, return result_cube; } - -//' Transforms a sample to standardized normal distribution -//' -//' @param x Numeric matrix. The data which should be transformed to a standard normal distribution. -//' -//' @return Numeric matrix of dimension `dim(x)` -//' -//' @keywords internal -//' @author Lars Henry Berge Olsen -// [[Rcpp::export]] -Rcpp::NumericMatrix gaussian_transform_cpp(const arma::mat& x) { - int n_obs = x.n_rows; - int n_features = x.n_cols; - - // Pre allocate the return matrix - Rcpp::NumericMatrix x_trans(n_obs, n_features); - - // Iterate over the columns, i.e., the features - for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { - // Compute the rank and transform to standardized normal distribution - arma::vec rank_now = arma::conv_to::from(arma::sort_index(arma::sort_index(x.col(idx_feature)))); - Rcpp::NumericVector u = Rcpp::wrap((rank_now + 1) / (n_obs + 1)); - x_trans(Rcpp::_, idx_feature) = Rcpp::qnorm(u); - } - - return x_trans; -} - -//' Transforms new data to standardized normal (column-wise) based on other data transformations -//' -//' @param y arma::mat. A numeric matrix containing the data that is to be transformed. -//' @param x arma::mat. A numeric matrix containing the data of the original transformation. -//' -//' @return An arma::mat matrix of the same dimension as `y` containing the back-transformed Gaussian data. -//' -//' @keywords internal -//' @author Lars Henry Berge Olsen, Martin Jullum -// [[Rcpp::export]] -Rcpp::NumericMatrix gaussian_transform_separate_cpp(const arma::mat& y, const arma::mat& x) { - int n_features = x.n_cols; - int n_y_rows = y.n_rows; - int n_x_rows = x.n_rows; - - // Pre allocate the return matrix - Rcpp::NumericMatrix z_y(n_y_rows, n_features); - - // Compute the transformation for each feature at the time - for (int idx_feature = 0; idx_feature < n_features; ++idx_feature) { - arma::vec yx_now = arma::join_cols(y.col(idx_feature), x.col(idx_feature)); - arma::vec rank_now_1 = arma::conv_to::from(arma::sort_index(arma::sort_index(yx_now))).head(n_y_rows); - arma::vec rank_now_2 = arma::conv_to::from(arma::sort_index(arma::sort_index(rank_now_1))); - arma::vec tmp = rank_now_1 - rank_now_2 + 0.5; - Rcpp::NumericVector u_y = Rcpp::wrap(tmp / (n_x_rows + 1)); - z_y(Rcpp::_, idx_feature) = Rcpp::qnorm(u_y); - } - - return z_y; -} From 4db7aac867896e647ba0830c2099fd0c41484df1 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 12 Jan 2024 14:04:16 +0000 Subject: [PATCH 55/62] Typo in Copula.cpp --- src/Copula.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Copula.cpp b/src/Copula.cpp index 9f58955cc..732ed3a4f 100644 --- a/src/Copula.cpp +++ b/src/Copula.cpp @@ -44,8 +44,6 @@ arma::vec quantile_type7_cpp(const arma::vec& x, const arma::vec& probs) { //' Transforms new data to a standardized normal distribution //' -//' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. -//' //' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. //' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. //' From e969bd95145353c7b6094c0b3eb80fbee57e5471 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 12 Jan 2024 14:18:36 +0000 Subject: [PATCH 56/62] Added code that Shows that C++ code produce equivalent Shapley values as R code when `n_samples` tends to infinity --- inst/scripts/compare_copula_in_R_and_C++.R | 64 ++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/inst/scripts/compare_copula_in_R_and_C++.R b/inst/scripts/compare_copula_in_R_and_C++.R index 2077ea27b..fd6b1cfb4 100644 --- a/inst/scripts/compare_copula_in_R_and_C++.R +++ b/inst/scripts/compare_copula_in_R_and_C++.R @@ -1355,7 +1355,7 @@ all.equal(shapr_mat_arma_res, sourceCpp_mat_arma_res) # Large n_samples equal results ---------------------------------------------------------------------------------------- { - n_samples <- 100000 + n_samples <- 1000000 n_train <- 1000 n_test <- 5 M <- 4 @@ -1479,9 +1479,9 @@ time_only_cpp_sourceCpp # Look at the differences # Aggregate the MC sample values for each explicand and combination -res_only_R <- res_only_R[, w := NULL] -res_only_cpp <- res_only_cpp[, w := NULL] -res_only_cpp_sourceCpp <- res_only_cpp_sourceCpp[, w := NULL] +# res_only_R <- res_only_R[, w := NULL] +# res_only_cpp <- res_only_cpp[, w := NULL] +# res_only_cpp_sourceCpp <- res_only_cpp_sourceCpp[, w := NULL] res_only_R_agr <- res_only_R[, lapply(.SD, mean), by = c("id", "id_combination")] res_only_cpp_agr <- res_only_cpp[, lapply(.SD, mean), by = c("id", "id_combination")] res_only_cpp_sourceCpp_agr <- res_only_cpp_sourceCpp[, lapply(.SD, mean), by = c("id", "id_combination")] @@ -1493,3 +1493,59 @@ res_only_R_agr - res_only_cpp_sourceCpp_agr # Max absolute difference max(abs(res_only_R_agr - res_only_cpp_agr)) max(abs(res_only_R_agr - res_only_cpp_sourceCpp_agr)) + +# Look at the difference in Shapley values +temp_shapley_value_func = function(dt, internal, model, predict_model) { + compute_preds( + dt, # Updating dt by reference + feature_names = internal$parameters$feature_names, + predict_model = predict_model, + model = model, + pred_cols = paste0("p_hat", seq_len(internal$parameters$output_size)), + type = internal$parameters$type, + horizon = internal$parameters$horizon, + n_endo = internal$data$n_endo, + explain_idx = internal$parameters$explain_idx, + explain_lags = internal$parameters$explain_lags, + y = internal$data$y, + xreg = internal$data$xreg + ) + dt_vS2 <- compute_MCint(dt, paste0("p_hat", seq_len(internal$parameters$output_size))) + dt_vS <- rbind(t(as.matrix(c(1, rep(prediction_zero, n_test)))), dt_vS2, t(as.matrix(c(2^M, response_test))), + use.names = FALSE) + colnames(dt_vS) = colnames(dt_vS2) + compute_shapley_new(internal, dt_vS) +} + +# Compute the Shapley values +res_shapley_R = temp_shapley_value_func(data.table::copy(res_only_R), internal, model, predict_model) +res_shapley_cpp = temp_shapley_value_func(data.table::copy(res_only_cpp), internal, model, predict_model) +res_shapley_cpp_sourceCpp = temp_shapley_value_func(data.table::copy(res_only_cpp_sourceCpp), + internal, + model, + predict_model) +# Look at the difference +abs(res_shapley_R - res_shapley_cpp) +abs(res_shapley_R - res_shapley_cpp_sourceCpp) +max(abs(res_shapley_R - res_shapley_cpp)) +max(abs(res_shapley_R - res_shapley_cpp_sourceCpp)) + +# When n_samples <- 1000000, n_train <- 1000, n_test <- 5, M <- 4 +# > abs(res_shapley_R - res_shapley_cpp) +# none X1 X2 X3 X4 +# 1: 7.2140e-11 0.00056643 0.00109848 9.5478e-05 0.00043657 +# 2: 4.3903e-10 0.00179695 0.00163158 1.8549e-03 0.00202031 +# 3: 9.3072e-11 0.00142949 0.00087037 1.2457e-03 0.00180482 +# 4: 5.1367e-11 0.00079767 0.00099899 7.2505e-04 0.00052373 +# 5: 3.8260e-10 0.00032232 0.00046644 1.1651e-03 0.00102102 +# > abs(res_shapley_R - res_shapley_cpp_sourceCpp) +# none X1 X2 X3 X4 +# 1: 3.1773e-10 0.00061369 0.00096567 0.00139486 0.00174684 +# 2: 2.1354e-10 0.00164283 0.00139693 0.00051290 0.00075879 +# 3: 1.2370e-10 0.00143125 0.00066145 0.00021455 0.00055524 +# 4: 2.0396e-10 0.00090834 0.00091129 0.00077478 0.00077773 +# 5: 1.3627e-10 0.00038308 0.00033615 0.00031426 0.00026733 +# > max(abs(res_shapley_R - res_shapley_cpp)) +# [1] 0.0020203 +# > max(abs(res_shapley_R - res_shapley_cpp_sourceCpp)) +# [1] 0.0017468 From 0bd1f84b7bd278d2a2911bda62829cc1def16d70 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 12 Jan 2024 14:28:31 +0000 Subject: [PATCH 57/62] Updated RcppExports --- R/RcppExports.R | 27 --------------------------- src/RcppExports.cpp | 25 ------------------------- 2 files changed, 52 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index d7a8a5e15..1f27325fe 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -97,8 +97,6 @@ quantile_type7_cpp <- function(x, probs) { #' Transforms new data to a standardized normal distribution #' -#' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`. -#' #' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform. #' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`. #' @@ -140,31 +138,6 @@ prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gau .Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) } -#' Transforms a sample to standardized normal distribution -#' -#' @param x Numeric matrix. The data which should be transformed to a standard normal distribution. -#' -#' @return Numeric matrix of dimension `dim(x)` -#' -#' @keywords internal -#' @author Lars Henry Berge Olsen -gaussian_transform_cpp <- function(x) { - .Call(`_shapr_gaussian_transform_cpp`, x) -} - -#' Transforms new data to standardized normal (column-wise) based on other data transformations -#' -#' @param y arma::mat. A numeric matrix containing the data that is to be transformed. -#' @param x arma::mat. A numeric matrix containing the data of the original transformation. -#' -#' @return An arma::mat matrix of the same dimension as `y` containing the back-transformed Gaussian data. -#' -#' @keywords internal -#' @author Lars Henry Berge Olsen, Martin Jullum -gaussian_transform_separate_cpp <- function(y, x) { - .Call(`_shapr_gaussian_transform_separate_cpp`, y, x) -} - #' Generate Gaussian MC samples #' #' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index e0393f34f..c95d55541 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -121,29 +121,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// gaussian_transform_cpp -Rcpp::NumericMatrix gaussian_transform_cpp(const arma::mat& x); -RcppExport SEXP _shapr_gaussian_transform_cpp(SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(gaussian_transform_cpp(x)); - return rcpp_result_gen; -END_RCPP -} -// gaussian_transform_separate_cpp -Rcpp::NumericMatrix gaussian_transform_separate_cpp(const arma::mat& y, const arma::mat& x); -RcppExport SEXP _shapr_gaussian_transform_separate_cpp(SEXP ySEXP, SEXP xSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat& >::type y(ySEXP); - Rcpp::traits::input_parameter< const arma::mat& >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(gaussian_transform_separate_cpp(y, x)); - return rcpp_result_gen; -END_RCPP -} // prepare_data_gaussian_cpp arma::cube prepare_data_gaussian_cpp(const arma::mat& MC_samples_mat, const arma::mat& x_explain_mat, const arma::mat& S, const arma::vec& mu, const arma::mat& cov_mat); RcppExport SEXP _shapr_prepare_data_gaussian_cpp(SEXP MC_samples_matSEXP, SEXP x_explain_matSEXP, SEXP SSEXP, SEXP muSEXP, SEXP cov_matSEXP) { @@ -237,8 +214,6 @@ static const R_CallMethodDef CallEntries[] = { {"_shapr_quantile_type7_cpp", (DL_FUNC) &_shapr_quantile_type7_cpp, 2}, {"_shapr_inv_gaussian_transform_cpp", (DL_FUNC) &_shapr_inv_gaussian_transform_cpp, 2}, {"_shapr_prepare_data_copula_cpp", (DL_FUNC) &_shapr_prepare_data_copula_cpp, 7}, - {"_shapr_gaussian_transform_cpp", (DL_FUNC) &_shapr_gaussian_transform_cpp, 1}, - {"_shapr_gaussian_transform_separate_cpp", (DL_FUNC) &_shapr_gaussian_transform_separate_cpp, 2}, {"_shapr_prepare_data_gaussian_cpp", (DL_FUNC) &_shapr_prepare_data_gaussian_cpp, 5}, {"_shapr_mahalanobis_distance_cpp", (DL_FUNC) &_shapr_mahalanobis_distance_cpp, 5}, {"_shapr_sample_features_cpp", (DL_FUNC) &_shapr_sample_features_cpp, 2}, From 8f111a6861ab215b9954058eec135b1e47701817 Mon Sep 17 00:00:00 2001 From: LHBO Date: Fri, 12 Jan 2024 14:41:39 +0000 Subject: [PATCH 58/62] Removed unneeded variable --- R/approach_copula.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/approach_copula.R b/R/approach_copula.R index 5dde7db6a..4e7f5e914 100644 --- a/R/approach_copula.R +++ b/R/approach_copula.R @@ -6,7 +6,6 @@ setup_approach.copula <- function(internal, ...) { parameters <- internal$parameters x_train_mat <- as.matrix(internal$data$x_train) x_explain_mat <- as.matrix(internal$data$x_explain) - feature_names <- internal$parameters$feature_names # Checking if factor features are present feature_specs <- internal$objects$feature_specs From df1c276d4321e9a7e730b04c0b241926d9e74f8e Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 15 Jan 2024 11:40:41 +0100 Subject: [PATCH 59/62] man --- man/gaussian_transform.Rd | 21 +++++++++++++++++++++ man/gaussian_transform_cpp.Rd | 21 --------------------- man/gaussian_transform_separate.Rd | 24 ++++++++++++++++++++++++ man/gaussian_transform_separate_cpp.Rd | 23 ----------------------- man/inv_gaussian_transform_cpp.Rd | 3 --- 5 files changed, 45 insertions(+), 47 deletions(-) create mode 100644 man/gaussian_transform.Rd delete mode 100644 man/gaussian_transform_cpp.Rd create mode 100644 man/gaussian_transform_separate.Rd delete mode 100644 man/gaussian_transform_separate_cpp.Rd diff --git a/man/gaussian_transform.Rd b/man/gaussian_transform.Rd new file mode 100644 index 000000000..c0af625c4 --- /dev/null +++ b/man/gaussian_transform.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/approach_copula.R +\name{gaussian_transform} +\alias{gaussian_transform} +\title{Transforms a sample to standardized normal distribution} +\usage{ +gaussian_transform(x) +} +\arguments{ +\item{x}{Numeric vector.The data which should be transformed to a standard normal distribution.} +} +\value{ +Numeric vector of length \code{length(x)} +} +\description{ +Transforms a sample to standardized normal distribution +} +\author{ +Martin Jullum +} +\keyword{internal} diff --git a/man/gaussian_transform_cpp.Rd b/man/gaussian_transform_cpp.Rd deleted file mode 100644 index 97d8039af..000000000 --- a/man/gaussian_transform_cpp.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RcppExports.R -\name{gaussian_transform_cpp} -\alias{gaussian_transform_cpp} -\title{Transforms a sample to standardized normal distribution} -\usage{ -gaussian_transform_cpp(x) -} -\arguments{ -\item{x}{Numeric matrix. The data which should be transformed to a standard normal distribution.} -} -\value{ -Numeric matrix of dimension \code{dim(x)} -} -\description{ -Transforms a sample to standardized normal distribution -} -\author{ -Lars Henry Berge Olsen -} -\keyword{internal} diff --git a/man/gaussian_transform_separate.Rd b/man/gaussian_transform_separate.Rd new file mode 100644 index 000000000..89afb6494 --- /dev/null +++ b/man/gaussian_transform_separate.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/approach_copula.R +\name{gaussian_transform_separate} +\alias{gaussian_transform_separate} +\title{Transforms new data to standardized normal (dimension 1) based on other data transformations} +\usage{ +gaussian_transform_separate(yx, n_y) +} +\arguments{ +\item{yx}{Numeric vector. The first \code{n_y} items is the data that is transformed, and last +part is the data with the original transformation.} + +\item{n_y}{Positive integer. Number of elements of \code{yx} that belongs to the Gaussian data.} +} +\value{ +Vector of back-transformed Gaussian data +} +\description{ +Transforms new data to standardized normal (dimension 1) based on other data transformations +} +\author{ +Martin Jullum +} +\keyword{internal} diff --git a/man/gaussian_transform_separate_cpp.Rd b/man/gaussian_transform_separate_cpp.Rd deleted file mode 100644 index 04be9d37c..000000000 --- a/man/gaussian_transform_separate_cpp.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RcppExports.R -\name{gaussian_transform_separate_cpp} -\alias{gaussian_transform_separate_cpp} -\title{Transforms new data to standardized normal (column-wise) based on other data transformations} -\usage{ -gaussian_transform_separate_cpp(y, x) -} -\arguments{ -\item{y}{arma::mat. A numeric matrix containing the data that is to be transformed.} - -\item{x}{arma::mat. A numeric matrix containing the data of the original transformation.} -} -\value{ -An arma::mat matrix of the same dimension as \code{y} containing the back-transformed Gaussian data. -} -\description{ -Transforms new data to standardized normal (column-wise) based on other data transformations -} -\author{ -Lars Henry Berge Olsen, Martin Jullum -} -\keyword{internal} diff --git a/man/inv_gaussian_transform_cpp.Rd b/man/inv_gaussian_transform_cpp.Rd index e3a2bd87d..7cf04833c 100644 --- a/man/inv_gaussian_transform_cpp.Rd +++ b/man/inv_gaussian_transform_cpp.Rd @@ -17,9 +17,6 @@ arma::mat of the same dimension as \code{z} \description{ Transforms new data to a standardized normal distribution } -\details{ -The function uses \code{arma::quantile(...)} which corresponds to R's \code{stats::quantile(..., type = 5)}. -} \author{ Lars Henry Berge Olsen } From e9c5d4f5945c22a2918c3896e5d80af5cb8c2049 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 15 Jan 2024 11:43:44 +0100 Subject: [PATCH 60/62] test files --- tests/testthat/_snaps/output.md | 38 +++++++++--------- .../_snaps/output/output_lm_numeric_comb1.rds | Bin 4094 -> 4431 bytes .../_snaps/output/output_lm_numeric_comb2.rds | Bin 4423 -> 4428 bytes .../_snaps/output/output_lm_numeric_comb3.rds | Bin 4361 -> 4365 bytes .../output/output_lm_numeric_copula.rds | Bin 4289 -> 4295 bytes .../output/output_lm_numeric_gaussian.rds | Bin 4198 -> 4194 bytes 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/testthat/_snaps/output.md b/tests/testthat/_snaps/output.md index 303f9a777..411d334a8 100644 --- a/tests/testthat/_snaps/output.md +++ b/tests/testthat/_snaps/output.md @@ -71,20 +71,20 @@ Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -8.545 7.779 14.586 0.4475 -1.6653 - 2: 42.44 4.826 -4.295 -11.655 -1.1250 -1.6309 - 3: 42.44 7.163 -25.491 0.368 -0.5455 0.9377 + none Solar.R Wind Temp Month Day + 1: 42.44 -8.117 7.438 14.0026 0.8602 -1.5813 + 2: 42.44 5.278 -5.219 -12.1079 -0.8073 -1.0235 + 3: 42.44 7.867 -25.995 -0.1377 -0.2368 0.9342 # output_lm_numeric_copula Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -6.371 7.355 14.470 -0.6108 -2.241 - 2: 42.44 4.115 -4.159 -9.980 -1.9378 -1.917 - 3: 42.44 5.932 -25.086 1.857 -1.3624 1.090 + none Solar.R Wind Temp Month Day + 1: 42.44 -5.960 7.046 13.863 -0.274 -2.074 + 2: 42.44 4.482 -4.892 -10.491 -1.659 -1.319 + 3: 42.44 6.587 -25.533 1.279 -1.043 1.142 # output_lm_numeric_ctree @@ -140,20 +140,20 @@ Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -8.809 9.149 15.503 -2.8888 -0.3522 - 2: 42.44 3.146 -4.566 -7.727 -4.3771 -0.3559 - 3: 42.44 6.655 -22.559 -1.645 0.5634 -0.5832 + none Solar.R Wind Temp Month Day + 1: 42.44 -8.746 9.03 15.366 -2.619 -0.4293 + 2: 42.44 3.126 -4.50 -7.789 -4.401 -0.3161 + 3: 42.44 7.037 -22.86 -1.837 0.607 -0.5181 # output_lm_numeric_comb2 Code (out <- code) Output - none Solar.R Wind Temp Month Day - 1: 42.44 -9.302 9.454 17.2106 -1.767 -2.9936 - 2: 42.44 5.189 -5.352 -8.5382 -2.854 -2.3245 - 3: 42.44 6.388 -22.748 0.0177 -1.441 0.2159 + none Solar.R Wind Temp Month Day + 1: 42.44 -9.294 9.327 17.31641 -1.754 -2.9935 + 2: 42.44 5.194 -5.506 -8.45049 -2.935 -2.1810 + 3: 42.44 6.452 -22.967 -0.09553 -1.310 0.3519 # output_lm_numeric_comb3 @@ -161,9 +161,9 @@ (out <- code) Output none Solar.R Wind Temp Month Day - 1: 42.44 -6.940 10.773 12.187 -3.692 0.27495 - 2: 42.44 2.628 -2.656 -8.569 -5.313 0.03032 - 3: 42.44 5.827 -22.183 3.440 -2.954 -1.69839 + 1: 42.44 -6.952 10.777 12.160 -3.641 0.25767 + 2: 42.44 2.538 -2.586 -8.503 -5.376 0.04789 + 3: 42.44 5.803 -22.122 3.362 -2.926 -1.68514 # output_lm_mixed_independence diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds index acf46dd4b721a01fe7b0f246269401596e0e671b..fd3014b5715c84f333cdc071ed5c8d2193f7cef6 100644 GIT binary patch literal 4431 zcmV-V5wPwbiwFP!000001LZplSX9TkXCJV4f0U|^+)?{;B z4zP9EU3V8mViTKateTjbViUP0)m+n<uG_{n!-qAU z^4Qm2(+%mZ`_vuPI69^1Z~=eDjCStW5zP3mLYH?OU zMKe~IuV^ROZk?CL+6u>@UWos+wzl3@I%G5p4(5;=h#}>IQ%G|Pv(2IprNWLaDhx!W znJiAxZV}9B4$)j;7b`Ha8^9fa&hITTt0pr<3pg2?(Sle|rBLH=m;?)p2sUT3n~Z|l z({ra?WR8U4hDDo*|5%J7jH!yuu@(3qgiwVc2|6V4UqBrEq`IE-sq4tZW9$0rwwvxeLs^R#BFM;fW2c2cSwl4JPV)fJF5y zDz#QkI@aZT)bqbnE(Jf4i_-NkkrEzGA|Hp`;;}4h3 zd^A0A`Hrn$Y(m}6?@sMe735CX)|${SX9}~=^cf$irhZ@im$)_j?h~ymezo=u);_b( z^cf$O>V6)-qUABga?zBVsv%rz7_k6_NDvlbOE((iI%)Je$&6{;wKzzaxiwdg`(yMS7=d1$UC! zR2OVEyHzl2vD{>InyeN>y=b=rIl^KvTB~Q9ECS&;n3I)uYYp=_*kW)9)i$%} z@DgSVPGgmapmm5M>#Kf_DB7rkp~7l6)Rrv7TZPs+ z-{#rBs;qWXy+=BPdSoyQm8{V~r>WXxsbn6Db%N2!ge`^&QE=AS`)o+YL*)Ez^UBhDM zAsMZ;hH8QJC)~5ario60jhvS#3tLw&Rh}s%W~`U#8B>E`G8%#3UDi&mV8$NQ$=NYp zw!pPTtvnf!)XjB@Y2BhwW>QV;|DbEA}2l1#A29*p%%jq3@un@B*=Eb^-CD) za5)r13@+O+CW~(;h9el7FkHtp($*@B`(XSf#+ewO1KWcz6k;fa<@*%0zYD`3uQRQJs#SjiUH!!4Q_yNPmk}sqWs=gde zda7KE-58UHh*KDz!H^-vhZ~RSBQVavI7*7GDjMQY0yfEx83T5wVxC@#p$b7vS^!A@ zjTjm*G-Ei3ft)0^LVxzcGY;{4DJ~~KY|Lngl=@J-v^gO))RUp!7wQR6XL#^F^4pIF zK|KZP4|4Q}k^pxB{r;Ks*G7)jRB>p`B4Qy+zW zMrZAIgF5qPAmE{KzV;bb$|Wh{*{_}Y0L3^kzgT~i2gxRK%0j z1jA22e-g+L+H_9JwKKpkQZIu(5WQ(&hm-~Iu}J32SdgCzeIWhJ2fae@b)?M4GANgU zT_Vqed?a_NPL-qhz`>LHAK;rv%z%K0GWNH?qf(eU0oH!2paF?1#?EIvI_~ z1OkoG17$pBCqM>r-2hoRNT#pzqZ=fn%Y9%vO1@6vn;hir%Y9b1Bk!|6U*sT+G2fR<3{$;t{4(Pk3703V(5hdKVQ)6oCwe(hZ8NxlX@8Nh-|d1 zACVZyUQIUBSkNPUJ?Fij=mj!CAQJ;T6||{?dJphN2kk~cKe}_?V+JYrgudvYt#F`) zfm|%-y)#jRj|m{t3w&cSiv~SC_?RH`Q4cYQ1Y3!~Cvx%7KJg zoMFA^p8&v`PIE`?W5!Zbjn8bx`>OsJ0b`x_46O`)?D5gC8wj&7TQsxtypCS?drto0 z6@t-ewHwT0Es^oAEw(cxaZ=&2RDYCrmF zg}hoJuU5!w6nu?>ujPF6nM%=yoTl|dQ|fB`$Z7n@Dce>0DyJ#sl)4%PP0O{NJO1eY zdQ%VTwWboGpX`8Ikq@1)x3rRru zz0!VpSNu*C{r%hG4)%lfo0D=B8INGRm$$NuX~o;a`>nTm`}l85@OdrzFRA#xi|uS0 z8)umRPhBBXK=&GZ2YWY+xK74Fos9Jz!`^q!K2HrgcI|B{m%k2Rvs&@OqU|dTRS`4b zs1j^ualT=mXsWDY7v#$KJpuF;O*_xkr+$Rz>VIMXyzliLR=ZOy=X~FpDm{`OAjuqc z^Osv&XRiO`jtl3m%K2MLDP9}!I>lEsSrqSITS#zK*te^*O-)g``Q>Lwu3MJ>)Aw9& zu3p{z=+YY$Z(Q;ViWi0HDE^}`jmlfgCWG8}R4?<;AzJ@+-FaFsKX8ZnPwOi}|0daK zNTN6gnFv0rJuHy7Znv7*|dn@?$;BVX}wq3 z7An7Q)o?0zr0*k?|84x6w7&VHLA3to>G@PX?3aD1{L=Y9q5MPXddj~UahdW}S0y{2 z&e~1uf&V$2%D=6fNa2YYVMZF7-q&%KslUnaC$az7s?FKeSAs^belw zO7Zr_1C)Mj(Qm1qcE>Z!KCN@7U!ZdCWe2I;t;CmT{p5qQY5mTj)hs@=e%1OR@boV3u#xi{iKh$V$wNGOh^LFiQ|=FU zdZkMmufsfymrEKifoJ1IOg351R@}LXW3lHnXC9Rhu9*ZUk^TJN`A-u12z-Ti}xhkU|I`Q!;+ z$|oPWAfND3KDq5`vnjAmWO=1b7_UU?$-z)!a?c6K> zZuEN{hmQJxzj0~r*I9Asta@kl`(D>;XUE}wckF$I0%e{M>c=oWs~x%&H7l7{UsftT2>o|03D*shGni8a;=X8LTj! z71)Pwx>5riRoZ;C#J=cFYI6iu9ywzG#{!1$06j1N`v6V`oC=tI3@`xjAi!yW(*b7y z9s)QMFnoWAh5>#Ea2DXbTxjCC`2$bu_o7MPzcn{-PdqBxS~BaMg5D_a=CYOD zvdhtwC6k|>F}^1%jv2IkW!4%raqrJX+tGSdSTirPd@p>%R3Uuc&85`Fl^J+z@WpcPIjlcE3CH<&Tb|2@_^t2rK*=jaf2z z!MV>HQPzc~1^ZsWamaJNy|q^e8r@i(-RG0RXv~1~mG53&hWLJlE(^Y#igFewUqTzs zpxmKHTp@cdqud>Pwrn)Vq3oFTue}`89pzlQdF}f6gJ}HHJAb>9z736jwmxO`iMfb3 z<`3N4{2I!BJLyhY^C>j^gX#_8rjsc9sheZ}RQ4RoiHOy2|L9i;oi<)6t9l>tU+(|e zk_**n(({?b_wcxW5v=>>Mo$M6LqzHub)8U{`$pN zkxyPi`AgoiU&&}hBPV2R)@&S&^4>q+^5UUTRDAY%ZBg$5Xw3EJAFw|CF?#6i!Kf`O z-a&C^1Fr;VSE2Mji{GE9egNgYiS~q5WT3ob#?W0!$%wCSed+ezbTop$@M_nAacJbk zxksatccM{m%p21DR4mHVFWr$UXYE5M$Nlw+KDVAjSaYs+HLDB;xx^Xo0R-}|bnzKcWo+txKUM3 VWM}<0n;f)je+P;6Oj(3B006z^`quye literal 4094 zcmV|r{DjvY)jWCdVA;n>2}wn#oqDjh9$SJI;!Y@qM02T`PyRd;E%(}B3(kuw{s7TTGID< z#lK-t`Y%pvR(WG@9xzJxbeQP_!}U*?tDLBBIokBufE~+7%h5k>AHOO7Y|96al_uSC z-F`vPYpGr%=!I~Wq+P6)+_Yenlk*Anc86VpvZ2gj6I~gls9IpPS7Ld-RO>`P#SXit z1}eJg;tH;vaF?S#1KWzCq+Lb-baZrF-~+PQL^lSc1z|*`=n*qK;u4z_AgOXukDP

g{aCpRoMofLoUDD)ghpJJy-U1hSE23J%m&n<<2Bm{F(LY z2~y^$ne|rhugRd_4?P|J(_E5i+{8+BI2pb_@k(zUKDW%|Ev!1X?~}}L$TH2y=Nk*# zy{U2er^aq*^v<^3*!0XdYZd*-lCsv(+K;_ik@|?G%|C7VBD8Erl_AM=Zt&5()y<<6 zd(e{wNq4@`{O%6Y_mh3Y2GoRk6Zdo^4$ql`{?Q)wL^JQ}xqt2dfN9^`9czF7&^Bxz z{i8kVNtxlMfosn!<}MdqrL`8iOA8|w!a>3)1TWn0ym3Qvs#ca2_-I*+r?L#J{hZWVgZ z3a7Gv)i_+%Mg<)qM`X5%)!1mL$69N(S7XFdgJ|&}*lw-HaWp;NtA&I=gjQofJe>=cZR{&_3>kyJ$vj9rm3f zvN)ETYemc_O4(pDB#($A=NGEFt&d--UMYOa1iw-lQ?qEbSU}%T)y{I!Mk8oYtK2*r2l)j=e*4Dk0r%+Dsh&5v=KMB6mDHT&HgUfd%w7-`Uz5CUq zvCEXwR!Tc4-3fdfrT&y!C=I2=F74-hF$!-`J8GlJqcn_C6xe)2DU;H7ln(e}VL527 z5adNqlS{dma&{B(9;I6-W%>F?D4_C5l;=<$>+4%h9P~pe_+&d~JoueQbw*zwY6$en z86cLwh0;b!?UW8t8c3-b@@a#692R#IEhj?X*vJ^)(nkjRHYcKmdMebffO;a-k&iye zey@;ps1JquPXzf#l7aUI`;lYiUnd;a`-6Ts$gv&|_9!3sj8C4(#fLcbhj?r^h4V84 zIOd6UK40t?v`0PGQIGKs0e%IC$9_oV`f(taM>^M@5#qx9upP8VJm!V%8v&2wl*0K( zIkwvu>KIQN@NxYE{Uhgb$y|R9@1j168wZRF^Wnz>#}oCLTzi-=#-GOFQaRoW@Ys)h z{5(gyP_92wkNwBbC-5)$*GG1(&!j|enB=vxh|+Aqw@%S(TPPIdH9?pUmd1GBpd{IIeJF_x%enGX1QkORvzAMA=D*2$_E%b{Ejei_~b z_)^G!9>h`t{^vovQ&hGK!S-5crxe=780M?SjqP!4Ut@blI1tLhgUGRLP{|l36l5q5 zRq4=Ah>Fy4NxBd@fe`pXvm|V;D5|u3c?JobxG2qFsXC6cY?xtBUnJInb zyEBIKT-*V;q$yfKJHhZ{G+Nwi^t(I z+oa_Tm6h!hkzmKJ!5G26a!P;imXmU=*t*cmvnA&{%n780tqu6+a)4jX{52m@dhnHLL^!D*TRuJ%7^hc=#zKiW?8ICi||CMLi z6fpb>-@#st6QPHBSPye!_qhL^bHH7L0gt_{mFm|4IIFoA7M)*VXiCt5yGC@{q!s3R z$y!~52ju+wo)GyJO&9mo@BIMx)jwi>{`dNBhsz^X3W4uT`G|ZskZg{6O`mskEPUk1 zcP(`7s=n~$(=y+)c>(YWjxWfzw#Me>SANLWx;uu(hPOPnWlOtfh5>jO@KBT^9_zB* z@cLxAUf*yC_+jKS-cep0;HSRfHLx3kb>OdZ{Tynt{m`s-XF$+cG*R( zu}L$oU1_574bS)eOy(^`%`Fed#`Zh+Gl}C>GCuXtdcdDXJ=pC=TmZiMQ#+66>~;m< zn^r8sctBps$ANY-e!F3&EdRpRD9c}**arN7EdO$C8Swe&AMo{n+mCv*M>`JR!0G2B z4&^fb;Jp(u9?tIp-k$f*m6CB^b;SO;1 z{T#lL$D?1A%XsI~GW5^uF}{F!{P=oomyaLqWPHgx62`;jcYx0y{kP!!hxum0`HAxx z=NrSXKc55m(;QAh9NKk-2i$JN1>jZj0Ir#DN5L1h8Y6bywZR0}y9w75xZX{;{`jwV zW3PeZuLZo8!yQ2!)@A>0=Ptm9qJO{*;oIZ<8#sT5FdpQHZ-Mm|?SO{`v`ghLHnkd$ zJT>eOCf|DgmI>Bv;1wKS5ag2)_WYf!+9Mvq-CXr8*}wc<>^|pGf8D)*>go8p@^`Uz zx~uPQ+&zuIRQ<);xm5kd$19&FzEZ}2+j|C3jACfD9j|V;h_(b57fKLWK1^86p(|~6KHvz|lp&#(G zaAHc3w&bTgPiC)4dNXwYRx-Erbh$?P7Wlc5qBKh`O^KQaPFaP?ek(tp`pED*wyJI@2

    0Aq50F`3KfWyV zcdczB{ZE9R4bdMVnIA}Bzg;_#-5 z^3i09_~1s{Z}l|qgEcjO&m{SK9=>m5+|}gDA+PO7a10_7-zy&Q8M{h@E?r$5u{g~& zq9@Co#ns1f^|AlPL!lmxSUJ!MVaJXQG(vc96`(Yl&7IDB@cmX-F9|W#kV=pLi{)rq zQGdtW*$lIbaEh!iJd#WKBAWFOZUBxY*Gm?U^4+rUyJx}wgrURBAE$p8!>_yaZtL>h wrE<{9 diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds index 01025be22a7bc6dfe43bc8811abde8a54f7b0fc3..3d7e091cdb4012a742767cbcf4f295d3c0e83d82 100644 GIT binary patch literal 4428 zcmV-S5wq?eiwFP!000001LZpjSQJOP&CI|6a;c~w3J8cXB4WHy(JoFE1r_gWX9pNy zWSALd21Jv6nhib=T#bn)!A}!oJfd!%iM|~BG-)*PzFl24(HNugh+-0=Mh(JNRsYrW zG~mFecafz?(vw`nleP}fx(Zo zQEL39H->fkE5Wbk&;Ap^i!OdJlkuw}Ud!`&b;SRmz&#ZQL$3Ou;3L=C_VtnYC}P8k z57({Du4?P?x+C*kaaG@67Szwp>Lc-cj^+~U!&g*=Us*q3_SliaCys6V1QW;jLTxK! zo96Tk~g($tDjmu(>FvxR6%f61^$vZkL!WIjI8pL!*rLKf%UGk? zVX&F?rdYedlxH*KVPac=I)LVnmY7wM8KME)5xUVxfjMjj){jSy#ad$0L(2#EVz3(U zA9IdULL5#vGQAe#e~>*l4B-%q$X|ju_=#;#&Z*7+D7C#oUwN&R3y>w%!)&n?=}oc> zy0YRX^Uhs%t(IMzz_R<(XI$+3&kSTyjYuycLge@2(>K-=}N~iC8J$u zo#`mBn1Rs3QsS_dICOU7ELJZxj+s`eiWOLF##xfIH}yzo(&w{IJsid&qdA{>Y$(&` zIGC_mmuJvBN^EkM4P{o7-pK4}%{qszq3H|qI&rXBNwe!{w=;}(=9Z6H#}X^awKnTY z%*Nspg9}!P-H@wund4Tpmd4^3a%@Ii?2S^aeiFxSH5pk@{t`92B&X28$UY)j93d91 zSa)NKN+gRSA;(gxE7Ci-hDy)w30SEkr7e@>2g5}S=kV0RGH&hgHQwg9n;4=n1Ywwp zp%lZX7;a&i0U+B7x2rLf;p-L{0`avKV^Wbm!*CMAZVb0Ejr6q~;|Po|VB8Di@4$8p zh71g2II(WchW>YA*nwrck!g-$6^1PsDgbXFH4B4)p)&@uG`5RAa;LB!)^SV55RJhX zbnan@!%&Oipy&&U!7Wl;_-;uUS7A(Q^EVh?!q8L94>ttU@jVHbh;d6Xw{EQ=4_RQ7 z6p%o$I~wx>#2mUI$jL1L5`PYcWf)FlIEn$+E$(B8XFqHg#P0?8x+CPqgq{=k0~At8 zek+g)+ELJsgmy=0Gu-_U`5i_v(C!TF9vuClaKNoVziTi0Yb3|o!NBhVbk+_8eMaxS zO{8~J_{Zd!JtoikjZoNm1~7}0wUzO*yfA&nXKlu3{&oT!sgP%Rh*IP+L=i`fqQ3y} zi^a$KVfsv-#l`v$0C_e}5eoZ^&iZW&ZRSrmz`cW=?K7;D3s>Z`%Ol#)D#n5N#o|$p z2OCeuk5lx=;${AKQ^-Xra0`%Uc~ttZ#7xgakx#~F`BTm(u+KHrN3zz_G5n4bYvnKu zBRH{6;kC^T!PZ?eA1JhCRwF=+l zAn#x9v$7v~%&vTqgFI()x+m8-AB^U~Imga?@sjn$VqoJ&_R%(+_%urN$n#7XhENRn z{(?Rf`+*+0LDzshY5M?=$hMZn;g5mr)nqdb0zJYH;2IwFLP5q8WCDTb27PLw-5&hW zLci=8s~y+y>_x6SKrCA5%NJ-qAQ!|nJbtRc$BrNq3cj(NwFbQa@Uf%J#{kHIKiCQb zK9LK7{)r#K0PVr13gQX``@ypKePwd3AnpK=^#LCPMILz^QPp$KM`1q{DDUyfcPVF}4FL?x18kr23NToFSvJk@u9**lKR` zh^#;Sx68VCu6T98hlS(cJ<1=yJmm738S{8(OZ>~lDO26Mui{USE?=B}=2`w;HJ6#L z;~Ag7wbZi3)~-$v`O(6j9(THPp1~5oCd&QuylG1$d?qA)Q>Pa;NcMj!iHb_;oFMh{ z&AgajdZJ~`9{Rt~pf@_GP&1QdPEX=qWr`>DV0dD;iQR70o4JO^Soem<*=F)r z{~hj@y*lcI4m}u$xupU z8WteW(HS%u$K+tiLvE#NJ9Ai;%(1k@dqvG z@Bb}sVjL`PPOMR6JiPGkT+b@3{8ifjrs4eh?VsCn_44muk@mFNU_%U3fA)`L^tEf) z3(3b}#5FVaY-ZfVej8rEI^8g6+1m(XuKa}vP(2l|Y#P7Bam!=|>;-zO$uLtl!(hxW zU~fW{Z@k>;OR7iQB!BZ8+$8@K<7;@yZMWDQhFs41)wj|k@!pZl(JKCj`ufRhkJkyf zY~|1VXbi=x-Pck4qq-l(hgWA1T;Ox3ajX*fH9E&3_}sre)C-Z@yki<$JHX3HB;L?p>2 z_d~@LmoER*N&YYTNff_*{RPqP^1W1V;N9!AoqME?`A^%6Tl^~8Sr$%lA~F(uMsuW| z>Lr(68a!-w%dlIK(LAQVa_ev}icgOlCA{6TWsv{wa)R64?s%HEL&twaLkd3sxZC?y+e>)+S%KN+=N#z&JoI?4>;sPlDzTXd&?^Yw)IW=_`ZF{^EPvtjj z(<%Myx4)wD->>RI@y~NCR4)2lDCHkOBZz#s{|AATUwdm9rGN1sZ7ANp;s~XGS-zg? zX+C|4*{5yp;u0!XweTpFdl2?IZJ+OvP1|+HDp`JLyT-DI@{iQ6pzRy)QTst*UUMk@ zM5`N4_CqQPDQUeZfh- zrO#zCkNNAU-uW;8NaZ#a2UC1~PyXP<*cJ&(?Ua{0{j1|Iz3!5;zq z;lUq)`6IWFz%F-yxt3WalJnj8rFM2i(R#99_k4%)|2OXv7YP`pq}tzJ$dM4AJ;1$ z>XiWXiidi|vwDT)z3;zk=EZvWKoINUanRc&_Oq-rFV;f`wI9-VssQyC^zK1_^LePZ zJk;CI7VHi<5z^a>)UW*9(-jm~L~o_|tEeQ3Z{FHYa6wFpr?`J;rTvi;AHBn6d&FwC zo6A0|`iH;A-RC^%uc`M>&F$Z#>^-iZrt+KWcXP`>DgSZ(JSqQi^d}vMruzT1d3n^Y zx$@9l?dFR6N!M$0=izDPpEhpi=NP9N8O}-PuF}CX;jwp+%A?G1;FBZErXFj$I&Y&jV;8-+M;{qW-^SUkh8*cxWg@$nz%JOAe%diQcruZ)qk=n_q&|d4P_R0Irv@8aWvZU;*P}w)}quK zeJ?CEtwhOfm-auoYAPB!a9p=nere_*z z0iWy8@SoaLM1^^v;jwXb(+Y2*jN9d= zC?)B^j-l_VQEK_QKm2s@88r0Uo%w~@Ur>6~J6!`4I-o&us=F5s&q5=I-6f9R zy;1qXk*+AEzR%ooX}8g^H*4c&ynO{_XeuLSubGW9B2WF{?y5CtNM-1f-O)Eu`pSNp z?H5^5#<4RU3MQM;@VPZXm(33vqPkBPb=5qS zxL3%WGPVdMK4Xi|2^Y}7b^0F-9oiyP+x6>H$rDl9ck6%Hb80?Ha^JOi*AgF;c-F6C zT94i+c|+IxTSmNy(AA}r4s3rPC4KnkeI3p`L`n1SZTNU*6iVp3Bj4}!zKB2ZbMn^X z#}GfWLUpZGC>r$I%Wr+&tsNR%KIr1r_g10A8I#8j>FSM=s}H-q*=8CVv@t*QLB(Md zKjGj`L)c)H@bX%}-TPNG%ER!3k%zu4nvUYG(qWJ2)ue|8f z4khgGc=7s_8kFkuR*%)8St#v6`h>A*JW4ClCuW()qF#HCoQ<2SLH*w8{LC9CFQC2) zvTB!i#_@icdwJ8d2T{NIn2`-B^u9=WW%KTsRCi=@W30A+`ZnvZotz9MXr{ z;)mW`!vlqyEq^xsA2IyuCr{fW-%?6vYNdKpiFD-V$^M@WI}|2g$xAPwT~7~pV#&th SHW}?Sw7&x(%cT~3H2?rQ^99`i literal 4423 zcmV-N5xDLjiwFP!000001LZplcud#2dtRBms zjnevGdCT!kEx~Vg+;oB9*LD`G7{9VZ#eVDH1-DSh-q)@s*4{v2ult|xT0hdM-{Jb3 zYRj#$m92{s-hS`M*2)B7nQul|wv!*%_u7e`_sc4uZngOS)KkCk%OY#P*fy7AeBnV_ zBLC}+X_a1Is>k(xv7PX1psuVoe~q($ZO!+sCzkwIrSh%vud_|j{GqikwjGp~!*OaQ z<|{Zg=R<-t>x&I`d=W&06NzfG#cY6U?HG$mZ;Q=gtO-VQKD!=kD7G>?Sr)US2pXzX z{VZ8OJ~qqDSk_ky3`!D8LqkKI+@U;^-p(9S0?{{L@6g9O^tmR38>PU8Ey@f8#~RHJ zgUzfr#o7&~0-K=#6WajP12liM#H^Bx5H;Wk=tk`f%waRIe!TK5))JE*TK>2fgVlil znDgWk;_XzB;k6k5gX}3VL_sVfe+lB?C$_ygw>JNSv+WD|@@u(VkR+*IW{a&@Z<1ut zlNE);JB{R8CAl_%WzCakTWqF=Yq%_2;u+J%wgC^?}bnx5iE7Pt<4au@_Dkw&pZ|^vkL>r(5+Qd|sAJ>eDAqr-@6-oAU!8yW;?3*fvRS6eHx@&xm5>&VOvD?D zuo>S9#bIsMW4sS1FbegzA?{Xw3v0X4K@pq1I)TSOL4Bz ztS1~h>!{FXDPg_(n00o2vDIX-y9l{@M_!SWptc(f%-^8t27{F<=n5=0UFjI6WVGw7 zvm8YhGZ6eNB@SzeLuWV6X7xhtm}PZVu_BAjINK@hOFhz=^o6WbFNd+%Xf9+P8_M)~ z4km2Y6&Uo65}VXzLz&g2H!^!#v(902HCuqZPQ2Nyq}g?}+nGi?bIaeXV~KUjwKD5U z%*Gid1`n(fyCGlaF~==wEse!7q#H&4K>+V%URayOCjzVHJjL7#0EENNN@a0YfJYWNPddedNBxdRRx1f*}S& z3(&cTp%;b+7!HZPkQfvl#fh&-#<&t=QkySfcnL$Cm>+H^rVqe43FBZfw~AJfha9j; z3P=dp9gTTGVh$Aua&jAh#Gi*@Ifm01j$y!ci~9)TIS9)I@p~b@j)2^lP;YTPK;bLN zuL3Ed9S!Y{(2jsM!#xg@-x1Uu+MS@?lcPTr1-K>Xcj-fajpSH64EUXa&e|cM&*@6&)g%H&xdqGfpum&MUt)?X0# z#o}ZAFnuP^;$r;=fjqlU9c1EN)`O4P5?@k5 zel)~D;!Fd*Ebw)p#K-Y)Js#{5c^>3*ApX(dOE%aa4gC(1=uQINsnAaj^vir0E4goE zjU($CSu=b97|q86on31sG$P{#G)DK5@R%Kq1f;qe$+fpcU+Y2FTSAxmz;xt%t;{zm zNc)%iEbm7evnOAqAkCSS?#(sM2cvm$?y)mpd?bCb7}$Ly>u76E+>H`FvY%;>Arb?A zzMz}p0MH{3=xUHBZGYeq*;bM`0x^)ank=SmL67i*IM-G$5@ftVCIom2=u->rr@$XA z^vm{Gp`2^)MXuXHEL!NR1c-&oFCfnE^!7$Na72yzezw%P-q z$c01y#E&q5r@*EX;tB!#VUqY;NaR{V+(97g4?c#7JhB~8HgN7oB+Es*ifvOK<5Wc5 zz(ZHe+s2(lIKE1>KNN}@xmFR9L15*oYn-Vz#{&8;7j z^}7ErS*Pa|fx>qKKb$^((=q%np!u@cXy>nDCd=HmwmxODt#24?2Ays@@vv09nPzD#KO{ zvYL>}PgWPQLbZqfTLC6^InH&ODLq_Oa-I)YrKg{)jw)q3N|~NgrmOUzuawCvW%5dy zyh_Gb$@pr{z0NCSeMo6)4>Y;1%7dKBgPgoyxvx^1Tu!d5lF`&$2p z^!L9OH!%(tHz(F8azA|VGfo4mu+n!^|4PICeb&FX<>_tO|3%u9W`o^g82WR5Aa`H; z4tvA*D2%vf#@@}0o7k`Gji%cJgO(ld8}p@a7=Y?6dk50^t%xF<8L$`WttP`P-AseA zu!y}{kiSCG(6>Gh*=k(+9kv?(#`s)sjqMhj!;sIpze|>TaOrq_6 zXa>clD?WCU|A+orir3zFLA1N#bE-G!mm9R5f3%+YPuq+AeiQ91kD@pU83{h4KH5O_ zQcAB3$*2l$f4gH0kLk;9AL&E!>2afkwZXw{1FOmj4*faeG;K$Y-$msQujo(ZPIY{i z^6!RkrtO^vy3_VI6Vj->{~H~t{K8q2DgSt{Aj+=|xI_7h8qv;|Q})re*Sh{xeycW< z(!XB&6_x*fRcDHSnroqQG3O&G{|hvN$VUZ!5JLG6Zf8*X{y(;+cz4B7Nb`#;n6g`T;zU4MKf%MWeWSU#itqYo-*`{sMpe%tmwb1D5~ z%bRZY!xv4b*zZ~w%0Fm+O3d@!pQwJduhvbzMc8UeuX8*v-iM1HZC^S2f}4D> z|5Y)Mg&U~eg>x@bxh*roD8BJo;gF}rNj%8&AkTxm0P=!VKKp`!2Y-0*M*x3# z@JC?&NbTczd5Bklcm;@;XYoqxlY9!0Pag8gLp}wTPpLoL<)wmnzfSUSzXb7q>3Md) zh`e?B7#_xhAdZJn0mcIl389e6JH+^F7FK-vbp7^@JDe$sc&Jo*WRMp73HlsdKZB>lF|6 zN`QLBL%rfzy~6UowVTYmSP$YnQD;V_77y`v;+#2U7mOTJY4 zr@u!%=REGOsn1W%?cc-fJ*uCk@|)^+bIU(2|55!sF8@*V$Gs0t_5Vrp@~~fX<)OLS z%@z0K&e!J7!;{KCY25Dn7`GZ3#W~Miod?f^$6o5mk20fxPmU~`daUW`xUvQ4H1WvO zCAKq-!H5H9IwR!wYiFIg1 zdfDEbQWYA$;F3Q0ulrF}_5MLO-VZ<{7tFt3HRmcyKM}I!{YC50$YXCCtChJZHTnLY zVehF>TKW0^`0?^HXxKM<3#V&;MVZm-x`ZUQLxX!Mf4O*MHX506cgCoq9%xkB&6O`4 z?SfJp`pzGh{xiz>>w{i1*Iq+eL4SEUeVPGfb@jJZt7F}7zCkJX zht)=3zlBmYm95_y5{i;*KTq{*osBYcpBp|ZBL$`H8_1n~{Sr!wJ6Tm*k&BZ4aj4FI zM}yF|g6F1Je2oTWzVrGeuMP-(@ztOS&AzSsmiVKjvjK~y_KZg< z8@tqQ8!-=|@0LFM#qRAW`NO{*Xm_R#CBJfS<43ciQDVZL!hk;~ApYb}DLYOaNBpcs z%Ihs7(cnM5vHH_)p=ii_;~XE;9F#O;sQ%uD_9(T_m>oyUtY~n_=A7qEhf)8GX(^p+ z1(f*4`hco~6^-&R{7}ck=ZdGHUe^k9F7n@_l-GJq8^Hyj^b7M(+)d0yXzHIX?e{eI?zvFSWYFV-jJn8%_%pC3KjYrY!wTi5BCw@zI|2@7)`tmx#6 zQqJXH-O}|C>i0@;#IE7F4o8(9fb|ML?StFB1zh||T%+P!7%KL3MfK+Wp0^ZZ6b&MM2Whwz5VjTxlVH9Fp5 zZ?YctI*^5`$#=NEc@Dr0>6g%r)mU`CRz1mu!(elMQ%y3&7641`G((=l`Q@|t~=miEjv000Dk@@D`5 diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds index 633c26fe5814f876ed6c34a26fef330312608bc1..f8473748fe79aea108aea94321ed7cd7252b97ea 100644 GIT binary patch literal 4365 zcmV+o5%TUIiwFP!000001LZpjR8&`%zZNJ~Ed&=tmI&es5ph8j|6>ucS!7W`qp5tM zfQm&GRuzcoL=tt>n3$z?(sJ5~b|+Dj$+6S4Q73JxByEiCZYQzh5@R&!xRH!;k6QEI z`}bA-`V=i5$xO~n9WVF4yS;nwd+)yg-~W~ca2%)N)G8&X^1+v!&$Ox82`QKsN*N;d z#n6qk(ZrN06le+(yh`Ha0w8Q&3cIK5$JujNRcKe{;N;}X<+)M39@HWj+` z2fTQxp!82?9O|v-cJMZhV{~-Fk5hXzyZD2O1NObVaEaqF#Yg5DiIphe{K>zrc)HAG zKRD*G6TxxcIC?hi$V1PJaA>Nw?!6ii#rXV@f7w!=n47?FUV3cn`W5%>cWzAE+mhPI zacU*zD>yahPl7ZFHF_()(9+;UqS|CO>ET*aVm1nv#A3!OHJB>c^;CULEwhtvHrcA7 zp~?~#$olcOnCB<5zJf6*oA4i}(|J$sP`OdCGKZ8v45$!nLZVHWW7K<4DlOQe%s@z@ z!DQ20OoB1dsy9|z^p%*{3*bIL*GEgtD#;8{1CD`i)F2jwP3dd(_>ZZab?1*w8Z3r# z!3YGuDxuD5H3+8m0^+zT$UIw(|3MlR(9?Yv|0Tr1Pi*^f9&P>yS33ao<=1jKtt7j? zCbOl6dc#ub&5Af94O zZlZpF8gRvbVFntmeT`iG-yBqp)B8yJIe*C`M?6$G!DI}Zb3$#c#VnLF@$R)2eTAXi zW-yy{je3h2$lXl3a&yfbgGnG9E9)1Y3Evi=ejZ^~_)G zT)n=QD(EWB7F~S_Q!-d}wF_+3W>(k(&2_fgIvcJrjchYe+ZNQi_MvLC#n9-I4xk?C zj6xOb)YoRHF_@~D$NC1L+{T1Wx=OuZtFuU5);H7|1p~9EG3jg;cQY8|b>cE*`;}Ek zyPa>aGPiq><$r2pMHtEEgIyUYu5)*ab z5Gdi+k!^%z<0~M8fZ;5LQ+WR39ZJy~U*o$W?lOir5*mhC80sk6|l@Re(2<>W9IOVE_iQ{yq?W zx+pR*c3@0)?yoQ`!H^{8hns}yV=+$0I7G~?A{6pa3^vLB5(ajQFi$JyP=O#PZ2%V zpgjQEk8|{gVgUC5{lUrf*G`VL!+}2#=&T(E`iwrZMWjc{{9|&=9+PMN_Ltci1enFi z+VXf=UYI`Pvo_;1e;)%JE0bq=h?C_pLKep`S$|sai^a$KVfsv-#l`y9f;=0i{xbWF z&id^IZRXEVz$3#w?K3Qwi;?AXU~rq=NY?tVFyK2AS}P}G$m7I1h1WKh!_k_+O@-^(#2oO6 zTo#6>K#$b9GLR>-xGr&1fL;K4q^}InBX}l;Gz=3svAz_6KD*8a-*7$R#$hOw_%;K> z6QG|9GK5yjiM6&2{37k?5ChSh26o7`06rE-e8~d&B8Y*+IT7^o!PiuYkJI6LI@l%h zJjfSA{6*kP0oX5se#c34XM*l*=%*O^Wj;)mj2qeG$i7DQ41WMd^Y=n$*BS|p$oK+{ z(S0R6X2(YYQe7X(wVy;^<3-m`LYMl$bmV-E%r_}W`v?qp0&?8UO(HO{a628BnXD8i2k30paL7udOfJbCQC2@4e zK=x{~nf3ua!q;-{gMcWI@dKGK;3=R_4Yd1$KN{$_8^qC@b3gr)>wXZ62KovHS`f(f z;oOH4O7JlfWTL<~ma|aM(}ItY5+AjYgYIA}8u&ym0{SO@gah;in@Wf)4D5$X;t!U{ z^?dTP#d_h35=sjOs|2m(E_2D@|a%1Npju^T3?BRbzn zwI>6T2MeKGT&RF)KOmN6gYQU{)`bG99H%Pbt$?deK+P z*_=(YpM-jSqd*(!s6wS3kq8_y1yi?sx1~v&E*b;5=VS%RLhBG07Zt@ZUR~ zGv7FR-;T>$)q?j+DBj@nCdEIf#!!54Lq5UPLD$z!FzgA*m{@U+l*8vIKE1=fW!<{t zvsd1t_}%3zC|(+*q4*x-B-I$Mjd*4klB4d|IJ>V@OD!?t7LJ-1}POaoUcW{yvpI zur`IteHuH1@~=m1q3xDWhSB!dr4y-q(95w@e&vEEDF4WCE#=?t_C4h*u8MX(o7GC& zzQ0eQ^6zMVMd@E|JW1s*t{+J8k4^8bqRh8bZB`W85{xFrh6a5-(pMHD}ZQnn#j^&58ubMxm{6lw}Y5UT< z)PA36|0YU5*5i_g{fJd_DGvO0Fy-$v^%e7c{YR>QB|zgL9~}NVrQfqXC62?Hb0R(C z3T>bJVv&b@NYHsPk5z9{z0)V2qjGQ03#a(P$5j*369dP;ayy*|c^>3>khg=pT`FI2 zTF-+&JosY=e|Yf6&is+u$MNzIuN~sGL%ckTS7M*!(+>IMA)h?t)6ViK^@ltAid`J9 zV?2zPT^uigXX8cWYv-2mFdyvVeCTb5`M|?`;Kli{l-ak#eB;IW7SD_G&BTlIE!r;5 zw_Il54)d9Z`OL$7w!?hp#rZsz%C{_k+z#u7hxK9?*GnQVu9s|HTrY7R{^0d!hxN$A zdgNg}^6vGh4SceAEHAEiCoisdx<2rFpUaEu{hr;u-nD_PcU3&p6JD$*3wg1gd}4=s z!i)9fo`-#0uXw0ecBof8)GMCVD=Z&yd$Wlb>){={SPzea-rHh7YX zq27YtE$D9r5A~LZdi&wZJ=$XtBmGJJs;W4?isDuAf28pGw&Job`He&iVgd~&qe z+2c=d$C|;Q)5)VzuLrw|Bz7@^T_kfIA1%AS@UqtG%U#bZW$dDqU9j)fG$lHCGHLf2 zlNut7Y4^m-o-nn5!vOaI+z)UJ;QoN)0LKGnp9c&9JPdFm;NgIi0FMBi3>dz|MWX`fn%CTt%7dzg@d5tQX4O?5|7QYC@9~p$^5lIy9=xGW3OwyV2N7%Ni5E(V*gh z=l{O#FU=^Y@bISNML{S>bG#vb#t@YGLgc?|z8s6D+*z~n{K0gT+Zr->S+WTgq-;~D zxipkhbavseARWqHRu&qSbQ|TqXU}?a^DLCR*pgG^Jb*F=6rVC-KL_Qn&Go#x7Zh5Z|+T@aI_nSYw*D=d&m^ zUAOsUO9)B}Eh(-4_9K)wuCz7hyI_>Q^W(z1jTLBo!{Xa%yWU3QhVF9?4gVUAfBB7W zd-gTA%flpv=GRgGfzX&gyuKk}!&#JN3LITu{XWV(+dszk@&PpQ(7~KRJ9eVf$w@7$ zcgLg5|Gd!l>XATHc>YCo!DB;E+O-!4nV;K_MxQ?%^8V`gQ22S@AAHnn(eSVJH&4|J zLYZ68-k{1PlzE~&aCb}`;v1c>-P@OpQg{Esk#+xTH0n1&+lOE7k47#&eQ4oZ{wV9i z`A3_-xQ!-7UtE6k_oXOrgeiAO^Xn)%qctwFY9bo5^`|RknU~PO)WOM%-q?mR3sXM5 zzRC}cZt8XB?GznK`$F@2^tMk?KfiT9E%}`aW#`v7*!xdKiJSJNT(ab&l&|vpUPirS zB}o0xPgH7k)q*Wamn3VCkhPQl2hV`I)MMBAjSo2lZonSGn=B8eR;O!syu;pjb@bYh zh3ZpbbAM>u4L78pAGcRy(FN2BBo{Wl#q}XG$q?HB%yXXBm)l&Q8;hSSbM6NU6LwlVbKE#rR#cecLX=wiq?5+{$ H12zBvsY~S6 literal 4361 zcmV+k5%%sMiwFP!000001LZpjcoap}Jy#|-6B3RPa!5ir69~Z&j?k3^LNEsj;TCqb znRJqo$xN7;#1Q#4Du^5l=z<9TD(ue+%KGuUtQ@QCj9H_i-0O;f%Bd@dsH`H$k*%tJ zHPh3XEa4;W`t8og%jRd;nWEtumt4X0J9IE@Zpa=OWr@=`J}Es`=M9DpI5 zw9(kiGb`Tjc9h^{&$j%L;6=q#LK)v3)3kZlYq~O|(_KD$--u?^ZNiwCmxPrb{oYG= zt-k(dfm_EnzECx|(w&iddh0VYHhTDdjZe(yi+i~5Nfeg6JJgOs7WA9`=&q9<`(cYu zRIE8y>+XWWyL<<@UVShMqB9^>hHY*uF#G&Ey` zd5V65?6wC}Szloo)Q$L0OH0d5r9&o*;9w4^ff!saIE7TFFxw*fP%7-$qQXE#s@duk z?N-5(>JTjzcCi8zy8_$-==f-fSv8p`e#DA0N{tCpwPihBpK5hORPdga&mDfr+gDkrNR-3(= zdc#ub&x%UsolbVGkzHHBvhMCvFL!~N*DAV|lHth>1bL}~{uDumRG90aanVU`b&3Os5W@cj)z>|29#~B(8u$a<37q=xTC*EANXi${LSZ+2s%{HsC zLA2X|9Bwt5Y}K>PR)KIFtfNZ1t(Nr~WHma3>Kcpa@DgSVPE(bKpmm5M^Vcv(6l#V7D;u_PyHUq75UX5oTsXFeRRI*M3 zoaSn?wUT)()(a*l6Sf*FM8R2Wm%A+1*H{EIv!}Njopx_C4CIZ{GG+Uf!$`Y*(ClDt zg<6d)u^zcdtFhKVfX~kU%W%9y5VblH^g1UkVHbmFcU)^hTRx0VVMyi+ZngN!cdQ|Loh_) z>l%!EV*DozA7W_6@ExX+zE)%03*!?Qr(^s%*dBjdmMxjxg-S+0eO~3rTM4A7kg{Y->@nGcg><3{#4vagXnBM5-eg8b0gwO&ReG66ti^Z*%; z+0n^BuB($>2g>yIeslw6bh!^qN6FVKe3OH`f4R@fe&jLx^F5d?A2s5 z?GAc`Z{WNK0dXJ`2r^N?Q$e44X!ihr^w4iO#L_^Mu50lAthPVwN z8wx%~Nj&n*qxL+lHFESMZIwjVuxFt=I6Tv~aQ@H8?*0+Y=kcs#p7?z3rb(8J7`~A1 zjN$NJV*xBdmc2s&p$fA znhtYE?PJDTTP@?obs-Rr z@)`wSqu^^f-+HD}^dYBd{m_)U8b5LxKXS@`mA=YpN;##jMnThZt>=zEx<7e;cKq2_ z&dV^s{^l&3&m8BSKXOpF6Ey7vtsjv4!RC|_f;$-e^+Eb3YcwG{bwMLZ2){?#FK>%q zi=e-sEpBHVEN+gS8sj>K_wp9c(P!&dbsbG>_3H0uOHjU({JFI|IA}H%V2M2P4VfkS z->~w-NxSjoa-~amDSm@bNy}4X|tj~r<@#3NNs|ZywGvKHaYAoVBzQ!M9vUrH-IlI}6d9Chs8Xeuk98C1W4h>3VtX+9R_TU#0lf zMNd+^AXHEBpM}X(-ZrHW{#d)e zh3e(jof$p8IU@d2Vlt2EPhZ-XPVte+C9Vw-5#2jAFCe(vcd zMSmZqf41Q`l|TP{ABz9pXrpq;pTtrA`>2q}CverWr$?H$VBe`6hOUwD<;?;ao2 zNa=?=U+}RXvvdx{Az$>P{5{qlQl7v6JJtUxSnner7X2cn-*o;`8ix~SBzn$Qw0-92 zd>{FU(6dq=D}PJ%K0Wprm3w7wG{xuMsT`f18ZzpsYuP-=^B~WIybI)Aa{1ycSA_i4GjJ`uG6_yXaw%N){_3%fRR1Xh<-YZf+t9tWNJ#tIkLU@q zJz_oE&1K(p{nOv=?sM++*WUZ5j`puDd$;SSz5Mq2-O=)Q%D-JdcgnvV{Z7ZBz5d^A zUfTL~R3192-BEGh>3Z$xJlw7P-Nx0-Ow(eIC#s@Ib(+fCmFk13Uz9I$-z`7u^du1Mo1w znSh4_9szhH;8B3H0FMUD17-n39@rPzDCg9cz4Z;f(fIrmAI&;86BT{=(xZ*{PCz*` zwSPH&;xfv8{)^QMqq?HJ%|XW0tyVNn73o%;sYUlrvkzFd;Vm@$!or5s&-G|x@RE;T ze|sIuFFCO3NNFg_*B_}*p57njE{lCZ|JiUf;m1`Q&hE=b1x*qC7N%QK@u0(|tZ$Q0 ze$<@6MdCsv}xnCdp?0d)2`0=yPhZcQ_vKAH2|NQ7WH0*ry{Jkr19&(*q zcEkmvQR}Kl_d1k>!Skn z2!-^`KYGQ6a?0W&uQqK$g%6Kj_WKkY8c{uD;zIj%gi`0N*s*>$$~m0(jJR?E8lLY? ze|!2IloeSvweE{QqpXoroASR5L)p9DDY?;5jz-l#axH83D`@0^JuL&GPoq&QH-wIR?$Xl-Lr}@tUulc)>5sC$`&D1tV;`W5vj-x!FMkb1pAGm%r(KN(pB8`kq`EK4 zeHpzQT9JlwkC{T=N=QO{L(7_*d(zQ}(;HIls{cXvKJZz_>5?;OXrKE|&iP#p8uRAK z5rsAt8rSz^&k*-RsIa7QqqycHl%BEWRNBU?XxQI2Ii9QHQ18l47vst|q1=k|rf&;N zP)1|d6R%_%QPxrYi}A02hTwZuRg@s@ zKR;2aF;)rAG-H~gJw(w?{}-MCb*RUl^BWyG1g^&(!ka7~rom`zb-csgc(wJ~kcFx% zcX~fG4#y4o=f|zpSaiWP0?CC_w0k~eCK+NIfNk~zqRHv`+*tZtne#qSXxQ>+)4z@3 z*W7v9+I*YoIa8|>EVZ5^w?Ov)eb{L``PSR}9@_8p&=E^^7PrOhprQQ(47;t?12zBv DA8^Jy diff --git a/tests/testthat/_snaps/output/output_lm_numeric_copula.rds b/tests/testthat/_snaps/output/output_lm_numeric_copula.rds index 41dedb8cf725b9a57c96173a95d81ed4edd1425e..29143a83223f27ba4e3df9443ce2e75c9d94d9b1 100644 GIT binary patch literal 4295 zcmV;&5IFB2iwFP!000001MNErSX4*SZw_u$5EPMcL=lZ3f*21lz6LoWmx>B%)X6vu zFgnZ(GXtV)qA_H>{y)+G(`1ctHzr1nF~4Uvhe@7MaXq5Zd>&{#gGtsXgb+=ltMYgE ztDbrD7*_oxdwl!9&$3=sS5;S4cUQmHZnB9-lms^;fJhRDO z281eIJ4@D&kJUUgp7qrMgK{4Jucf8sj@%)gQEOukDS_!*thH<7?b>NZy$h$riY>|v z1jZXocD>c4HOAZY#uBT(1T(t=+y!X=Xo*=RnIUSxq0kNU%Ts5zR2sD~J_<5lUc~<( z9SRIl;GEe065`;ZG~dLvMf-xj{9Z2S;*H5{E!P?)DRE~-A@R;ba<7uy8^N;2!>3T{ zLI*c4C>+Ug{MS+{+jv1qgK zPVYqBww6wg_J+@6ONUO;3+J!=u>QYNjtifyyZif?>izuMv-yjUqY8&UwDS$8=Y8;p z@l*I?ck@2BmCg`0YNl5;Z+qWipXoz8{~Z?=G5C#_glUsP?CW2QV*NAw4t;3n8;U=_ zaP_ZYvZ=2sHk3oUl#mt=nTR(QVaqTGoh(*; zu|a1ym`$45daD`O9ZedYxqOmtFb|!4nl<2kgN~_dmeYM4?H86W#CXL5pBe-Oe=Fm|OlP4NI&;F36;*G#M%?^=_IfZTe!3+Z_9d1unk4k_`ngbml5exz^4( zYb4uESxq4|B%cI<;Sz?kc>J-9A_U*#4T8IdAsRzB43jZbVfX|?BbG@9+0K}5z)+3v z{V)XMdke;oVEieD;}~i&T*o}p*9we#V0;1NB#gfS+i@83FpTBI;xZTd{|v*&ST>f- zMGUJkY{swz@OnXEbYP{yqx0r%ayZAzGHlFj*XNvi<_VFBTu`hv_qU78mP30OZ*?^^n6YZvTC^@iPBoWpdFn><98J zk8=OznCW@S^2yp+{^aus>~qfgNY;8bhN+xbD@S6;<-|IL*EaVgM{5E%4(_KAbI?xY z(lI;@dZf-x0C^&d>k>B#`1znm`bq;mf+t~k0>f}ltS@6gpWSDIZ@3I$ynE`rv;A^tP$3nO-1iM6@ z2l)bse+>AN5BA4EzbO*kNuWCg`YC{ZnGfS6<3{#4vagXn!v}!zeBAKay_bYXWITb# z_@0tBX2(MUQe6+py|+Z)%Z;wLgfI1h>B!r?WWGs3+P~Cic|X#a-T5K~Y0jj4Z?1Jd z7|)Y)jh*@8Bk7C9z{ZX2qg^nBgC2P~iNp|r0Y6{R*U65cN1me9AWvw2Xd|*gk~lhH zAbU00OuK;|X%FC>?^F>W;|(&w(58Ssy@2ix{&+#Z>_sSqbG}HC`$r%aFX*cS@ccop z8|QqcJCD9?(msICG36G7uw}i)5bC))*9&rcggR~DBSP`f9!JB z<+_;*c%U8amn+gId-Pk)pBhv1Vs>3${?`UBKU?ECG2|PUnKxTQ?h0aijIgi&jeeYW zsH43(+G9`2l$RX1E-ZWFqjT0f?EhF99i855n4_PI3*vrBM9bSf^nas4Z*`iWVkWCy zUYC8UWv|nr@b2p1Yf811Hk(0f;+(Is9?sXCH9=zzOvE!7YLy1;rH(NDEeHD>$E>^a|M{(k&r-ac{bFL2o>_LHS zkYuy>2R*XTI&sbokZelCF4=^-zzimvF4+{>CPy|uvQd%EkZh=t(0>qMVwdCGpLi-c z_lK@(uJysJQl_Jn=_zHpN;mpSnY>aauawEFWbGtz@#9IUD^)HL>GY%FvC)Ox39zJ+?ZedjuK0^IJ z^{+^=v4S}~w}}j+*Bk5;-hUdF?fhQsvx1>m`uPF4-m(uQtv{J4@|gi!nbu;|&(h4) z8%oRAHx~K#B@g;ZsEw!Hi@(9s?msa;=VxJ?*=pApbFLqY;mh(pi5VOY!R-Z&BQ&8cgxw*YgN2^S`ljxS=*MEu*-e)T_lA(?1a2Ub*trltsT% z{O*E<6xaBBQM^Yxp30jGb3yKBs+V;1D5YQS^ftxD>@5Mg4OFjR z%p^(=MimrSt@y-6{zL6Viq~A3Bidc@7pj-?^A$=LAGyo?r}T?{e-rI2kD@pg83?XZ zA8Da_BdY3$kE{)hZ0s4sWB&g%9!{e8)cDcDn!vzrooZ_c4!ItFiqa8@y=I|Q2v>k zbyQFN$@9!UrMXKlQMp}n{+Go(mcB*x&YhW0G0HezhTR6rt%=qgFFxN0>}$e`TTQw9{l0K z9|8Q~!5@M7Bejp?c@{RLPrJggT%TrcswxLz`OalJ&l_=DG@0PB&5^~l3|H5IyeFiVC_d9}fy$AU1zOCY+p73Hl`5iCTll=nJ6JD$*cUw z#OrJ~m;4mz?_T$QE_kr(!Jj)G&YoPZE&Y4hf6)H_QSuMkey@HWwEbTE2OWp|_5We> z($>{pd1x=Xz2bh*_1fNfcv$&|jobA-#-&C^agGC2dYH=2{35x3>Y1d=`xblpS0r+6 zpX@q*XJ)hLb&fZY3GAkb-LPNHd5zV;dxs--C7jKwA(kkXHw4*3-hcxD2LtW~_z}QS zfO`Or1{?#J{m7y(;5fkXfD-^G0v-T33GhI`j{_bAcrf50fQJH32Al$T7~m%W4+qQx zW&uMU*iVE}>RYN?qhi*g+^6Oq4D>mTMyJ*6{b+!IvX)MI@1wYEl+ndLJ<{6~J-I3( z?TuqAP~PT~-#@#jGfJ;GwQ|+V`w-ewVBe9x1C2iRu}XQl2<3L3dA;Fu6UrH-@OeMA z2IYMpQ!{?=W;E)_#i8lpZ==i~4ThtGwJ3k-jD*z(bZBI%XGP`@M^ILF?EIZ0HlbX9 zkLGO~d{J&*NpwutWHe%)P;??{HX0S|U-aBn6&n3m(^p?zxrlN+4}Bp7J%e&ze|JvT zJs~L1qwIsaRp(J&@GIGW-S!&FYX0!?Z`Yhh+3QY5{YJrIprSIkoe@QAV$fl*ONhp%K|RqgM~ThWHgPWdHs^F-jZd_vH3X zI+Q(d*{BI~Hlk#|-A_omUbKWPi-&!=hW~w56&~}ts zadK$FIvq;SZz-F)em;6)zxt=w@^7Lcy(1dN`p-whmagqsd*Ibpc^LWT<>$|bbwxer z7FzzotseR=$Dgj7N()>?WXeU z&laHUfUPH#gNLC(HTJBNzbrvL`mZXykU0zW>Roqr$HcuTZOP+}V^^qA`nEpa^UhVG z%u&k$-`_pm8I1`4WY8e)CK|l(FUdMZ1{xOiWpGhW zBTD*4x7d<024y_@Y)HB388mRy!Btm2OGiU4=5@b@y2?tB`tP5YSTto?d!i;$Mh}qD zN&mnTo_6)v`4sR&&z(DBLi!`<)@m#| zUyGLH!mhVEeojp?#5Mr)wCQ@C-SMMm@ki5~^L0VRmOq>RZ4AHa!PC~}$3(}8S(VmU p={QjHW`9Tmv_JX9x#K&r`|069ELmCHMuUxp_J37FVrE=4003vludDz7 literal 4289 zcmV;y5I*l8iwFP!000001MNErR8+^ZcQ$r)aD@<+2x6j$qC5pL-Ubl@3M#H~8wVI* zbeI`t21H{b|A)9ovzP^6;?umipTsROc^Y$xKK)|Uh%sMHqS3%J(I{>qaS8Cd`&Q4~ zxeQT{o5_m0K9cC+eM_!xJx+G;rMOa{rK7}v!YmEtuQD{@qdktjrDSevQ2tBb4UqHzZ|_oALYA7v)&YCH<)s5hFr|-2+#n~^3f8rN-{&#fP*L!lBC3w6@|n*jpSJ+c{YJ%&BLeg_nh}g zBjiSFxcmMniF`1B<$8FOvc8kBRabrERAdSN`POddb2n`hJ_|bF)eCJAHus3m-rm^8 z*?#44ue`Xa2I1_*tLvA>pAbG<*Dyb#=tS2pY)={3#Qp9@oHS6s90;|bjcQLZ`j_iCVLv1%0n7`VY27{F< z=yEMKUGW&EWVGw7vmN;sGcf!uMGk9`LuWV6VP!?_m~C~In0$-PIL9gNM?KP+^m(jP zFNd+fXwG9E8%p%q4km2YFYI&yJ7qt#{Dt=2iT6pkd9#tCl;4^(En!`@GhBh=Ig84D`~$*Fh2=$HVh@uuJ55kk5qp zM}sdJV1G398!yqF1iDk9pG@eN`7l;8Ze))m`x@CZd;u8G*8`tDdrNpk#tV3i?S`pTejFbH3%XD=+e_ayJhjwk!_dD#lEr#Z{!Gsn5+j}(;m399=9wFi*;!RC}4 z{8|`1^+Eb3Ycw<=s;rbGgs&98N8A%{3#6ZaTYR5!u(&y~Mv?LG#ouunWp8)>pZa&C z*jT|Fo_CE5qxU=Pb<%$tmhJrB?4I73BfZZDu8-`EL(_K46}8!492{C z_R>QB!bU^iu{3jLIQIvf8UBUwx!$+hEjEWChjV`mEBDBCnyBEfHa1Rrx2i$F^(Sxk z&M_3P)~u!Ynra}$M^>j3oF7nEKE${`FmYH;HK|vNht2p{*jQeEa_XX66u-A%A;o0@ z-W2cCkE8OI@gqR)7pm9)*fC1K-ex1krWFwsZ@ySe{=TZ!DId_wMY6>d~)0F(r5Z)QM%z+Im-{FZ&(h{_M^Y8r1VekQTst%eM>3-c-x=c z?1wCwNwNQhCun=6`4KVCbvLR06+dq``BuSyq5OKsba5QMs}}i*S14V5daj#%U_gzS z$Go*v@66Z#P31Nf22*_TK;DpqDF4AP-%j8`o(FjzmpoygZ9nVxQzwfPC_hPag6quzX7W;m*G-h~ssfhw&1`@zV2byokJY z<`^F4gCNd_P6Esa9_9lt&WAE)Ux4|>i}Njl7w4Oq7w217L7Z>HnSBA~GY|8bhxshP zeCEaZ97pAMEa)S^df{Qc2;zE);>Gon%!}(K+|3`n9tBvBJgi3^)+6s)k6Qm}nQ^?h z-Wz#wz0>u9*ZWLfT<`UQYrSjz_x`5hp`P$!J$Z%~>&Za@>IpB_lX^G%xL)y4uLP)9 zJk%?m)hjITcYA}G7wh32L9B;WptnivXGM2jtcMP2KV-lZ0qQO2-GcrW@=$MisJEXk z+OItx65~tiS6oI zGLbz@U=QqPXx?LVaPDx%u7tZ;HN+Ci@`fOL$Oo_%aC^W(fFA`M3b-5KaKI6O*{>FQ z0*(Y61-LKZXu$md_XivU_)mZb03HZ97H}NkL4e}{4+i`s;30r{z${?M1N-eQN*E#p zq;B7hMi`cMYMt;b8kITV^KW*2kCJN|wd1_@pro(oAJ1ABj-E2U!Sz{t2c=h5j*KfY zqM-$Ie?^OjArw;8FHUm{jXD>fd+1vY8qxFa{ON-gDDAHQINkR*QAWoP>e~Hq35`s9 zX6d5qZ=vLu=6;!VDF$V1dF$stpPYkI`}U~cQ1B^AKE3(s=I5%=@ahG9(sxy%5oY~6 zwtfzj*uW2%RBu5eQ&y#K4-7`5hP>6QqT>!UJfrc=vIILCu{C<)PQx~ou2lsEm%oV8 z!)hm$2HZj^8-mLFWllzEFM5@%j@XIPKe=#V!p3+s{Dm7qHRd}_@{sc6!r__E??VY^ zN?ihEd$xOMKgHn$c=IcMq zLCNuzU;DJSAapUZBDky&C0yPzx8%S^Y^UR)bDMop{Mb(pf4E~VLi zR9ANFpNIx;Jy`a)y-F0T%Rf|6ei{va>D|`*D_1tjLu!u?HAfx!D7?>FpNL*>prpii zPOPp=LBqCw`TD#bAE04-KltGQhjyH=e>$qNsrR+J7z zgU*zGxhN|LMgQ-TUrv{7Mg7*C4@()f4kdMXG5Ywg(J1-jK?@Jhtww1%3ntGo7Nh=q zLtl;yoP=WIkJPM-+=#lzp1m}n+=vn{1g|dKP>Tk>*Dzq=Q+v?h0n=Vp&Hp><_x|n) z*Q(-C(kZj0t$G`ZseG*9l~<3T*mLQRTt*#bB}o1I_a#YfMOgRoqmKMdxSLlUz6q zHs|k?Nru=4V9AEF!os~$XU&Av=@-k25ZO-0TNH6QjD j8bAk-pKv?h6MLQ>8e+-D;x-xWG_?N%E7hG+wKD(!=ZJ$% diff --git a/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds b/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds index 260276785d0dc03b81bfcc88b4a14688ea190526..f1c32878a7340eb3d7303bd821a551aebf7925a0 100644 GIT binary patch literal 4194 zcmV-o5S{NIiwFP!000001MNEtR8&W@cRn!u8HA{a2$4@DC=nrwV!RCkq5`7$*EkL^ zz=UB2X9h&gGsc)0WusZ6>uP?I=V~<1V>Z6kO{LTen4X|epX<+ z`&Q4~xeRMKlHGUSzBv|ktGcSXs=B-G?Y`IPUL40MIHf|)DctZS=Qd?>#-Jq33#1JK zcE{jHXq29GVbjXVEj_NY<|g#Hzpmj z>xb4(ekJw%j@qDe-jPicEVaYiHhol`UT<$7D9a5V*EXwmz_7GCi@p!D9eBHbM$J}^ zweweBdg10rr2}o3KTo#Y-uteteu78Xcm1Dc?Y6twK0b9nt+U;j+kf_~#qaY?-)5!{ zjXA?{N;z(qaZ1jU1ZmWk=`8p{O@kAdN~6iBgKJfu$)GjIO=3;c^u|JVJy};)&g^8H zjMfq$6lvN8l72kRrulKKFJBDuCHPN!dwZ+Yp#p=}!W@zV)2mQx)y7%1`39X6r^t*g zN(=5W#M*{C(dS#*XXv#tm;djhlpbbqwOtengcCE#G_hWX`DtgWzE^jaf~kc`Zj zRrntyM1~<8+!NYYBMyF)=5KJF(O#f0y_U*}{c$&%%w<}GC@HS2$VA?`iLMo*YXewz zd-x=ZUGR0`g3O*Adu}N0hKkiRnJA*3Wc4D-GnI`0s>f~A_rIf>JQt;+!Rqzo3J(sE z4@R}RuNrR7<7+dEZXUcm_zs#Ui}};S%*NWt!1OB#YZlgK85XbDb!U~mJtimb@F4m3 zwedbmuhKn>Y(Kc??JQD-@i+T@G5WGJzv@z*uAC}picDrrWgb(~ zTQubhtR*HRFuY9_*76Fg#-d-y3X0OYpxjt@<*(v6y+R ztI`%&nXpk)q|;g}%wm^yRpkb)p4n3wHCD5O^aXj1FxUdq2fIwWov*hrw|tBmmRP%7 zpixs{)X%HXxoE1e=n6G1bL`DZTwGZN%blH7V5-!VX|0^2Qp(|;%IXTKBRM1x4CgW6 zdzB2=8CfrUjc*g&H4Kp$dSjT4p%TMB49!?(1jzQlbQ6XueC>@P2w#_D+!y2h82*Cc zFoqV)BYmyIxIf0e{tsaI6PAr7^AW?F7B zZ6Km%tB|_{Zd!Jtoik?Ju!205FS_(b9NXUYI^>XEbYP{yqvg zLL$%d5Gl!Hh$N1IlK#}-7mJVe!}OUvi;MNI26;A4{U!DppY_`lXy#8W;KX2O`wUCv z!X^29tP}ci$v7~-SUl44VB^W!2TS^6@iPBoC32Ax><#iPk5d1onCZDo^2yp+{-pB> z>~oI#NY?r|4EV-RYvouBS)5R(@Y?1wIa(9A$#6~f0|?rQTpEVCphxQ5bdV>qxGr%M zfS&_;q_0%aBX}l;r!b7-g!(cO^x1U=_=f8dmy97-8o7mdWiU9ADE7`T_y2N4C4O9K1=%%$Lz`%F^F>}=6i4*^TBxToOA5V7f(@NECx1i z9!4HU)UkAd8)$<4Gk=#h3c=h#>E z0~rsH34%5m^r-^65BQ^ke*GYhUYuhyMXvioEGp>B7kEA(*PC-}wB+DpD9H2!-&oE9 zK~D`nhKhVtLk|4GRv5Gsxe(}|_z?`y2W-kAt{|`%~tL&RK$Nt9+u%g@Ckvo|&R+Qzf?s-k6l|+)(B|t5Y~!k;hmQ6wu5S_Ec;lVEL03s+9BYf+8NxZCTFPPc zl$`V3gWX|BWhHkBAJ8Lf(4TWuPEyT?-5_Bd(fLlQHL0eoI+ChMDjcbzq!Ncg|ABys zU5;~oTqfsSpV%q6j;DHZiH=;NCzt5TUFgdt@^Xp1Tq3WKv@0a-O3t~S$s~PN zsjk9>oWg~iv|p*OVxCk^s;iLjlw8NT zKNFNLKbJyJ5*wg9`HHr!N$rd$TM`PUbtF)ys020ZWOt z+@M>anXl6qm$1Wq>5C0F`YxiA=jwC6z;pG#Fh0loNQ=pA)fIBiZz-i72@jBDj@I(m z+uLVuKG|l&wX1l+?mUV&x&4mf8;W5RAK#QsaEZ^Y4Wsmj15(oqFOXWdGX0r9+O}=j z(D?L0xLd#g*&!Imv&hok4NUyA>r9s|g6`?SFU~!M$2S8!6py>TW83 zY+VwS`%AM%BGLX`Lot94JeO`^A@+%h1qU{ZX)wKPt-*wt9yD8ZDeD*;~ zyZ>hrm48n)j`F{*IZfrSy!jZ#KQ1v*x#% z^_2hBvMp3kx$gyLpVHj<)l{x_9&RqEI*XKX*xpN>+f%*^wk~I ze(x~PC6xbJkE>4hL#j(D_P!KD+v|*dggoE+k?J>jshs3}gWsn7R_k109KN|A@Kc*8 zec{Vzo#X?2E(&=p{vFjj`_)n^w|!nP#g~s1k4lO29{KCLDLlyYAkTxm4diWN`JA&l z9{l0K9~=0?gFiOrkJvtrmxp+55U&m5$R`i^J<<5if8o-%X{5@*T@U?@SaVmhbKXAyU@?tC|;g^kq|S4V$9@#)A^imx^ACAef@nul=zP}%Q~ zoUs267u_Q^vAenG8<>Cid+>8X*M41p?szzRQn}9bA7sC){r|J%yW0Mse!ANJAbwZJ z;ph7QuzBh1*Ijw&F1owo?&^B&?mRrK{KLlWypM6Jk>Q;E@Rc6Kvh%_)?q7PI>9pTs zPye!buJco2`v*>za$SLaCo-K~Ok)@9b1hY#2KF5tzCcn!Ea5C~2(pJf0ILB90qzaB zFW_*%{Q*Y;jt0!$>^}~8AmBK_g8|0_9s)Q4a3bI*0S^T{3~&AF+HA?)UyQQKN}}G_5rcA58a{r%bTG=?+~F?La;noqb~XCo#=v)XJ97oi(W__eAdOi#=D09#1m$xV^C@1;#y{}u{P-gX@x4foJK^eQBGp}7{b<4*eY{&-EXqxGt6cQ_29)*uy1u{pJ|1O# zx#q2wcwYTZ_J#9Qr(INpF8&o8ae-mc`?)1 zA=DN#_uIhrDD#7egspx;*Jpjy^f^(0w$j=B1W* zR$M_THCpYRRnMaIPZuY)9Qy+rwfu0@sWHJQY4*{2@7V9q$X7S}9j@EhArIqr)zoa< zvK&R+S+(M1)fJRBPE{Gvat3Am@bxAC#Z4%E`F8V-_>CwrC?~qHL4z{hmH(8t6OY3h z?Vs!Fe9*XiN%c$HmY|{DkCiahog_6Ax(!iCa<4@h7Gyhe@n(G6rJz!dE=3%(C9h)-s%(BiiTAMU0tG;qmi** zO+S8c0}VZimiry@Lg{}wJzt_E-2XYXVbrv;a7A$ZJoY|w4az& sY7G_k12qr!e;9!FA)h)sULm`j9&W^vnZ<3;TWDy12Pz3C`qwf50B>G=)Bpeg literal 4198 zcmV-s5Si~EiwFP!000001MNErcvMBwZ;oVgGa&&ILJmR*M-IRwkO1N(ud#OP%}pc!yz_dK^=3fAiHH4_e5-72+h+L; zj#J8UyNpwE?!-x>wnA5h3u@||P^mPUj5;W*icAKrIku2BP1PGqSb37JqLS&!F&Qo8 zKqykQ^CkVbn@#g#Szq25DrS%zDm_ZH7HzCWJIkPRs3|pLjS>aE zv3jFLXEtgLu~j-lsaaQwD|-Xn0qA~jiCQ_CAxgl3&<(T8txQ{8Ri)P&VT5F4#w^2s z5D*!LFfdPOUyeBVQJTNXbw+!DytFLk6WilzG?^>322oI)X_1Mna}kvlqOt)ryWD#a zf6jQPG(rxv3o297prQdsNOZqMMW*ts(>;Gg{oWaP+5M4ZG+e!wl<*)C{y@~IJAJfm zF5j3@+P3e)@at%2f8O@CmeyQKYty<8SZFsb9$Mt2l_93}NO)>JnHIDF7dd%7R zi1l07qV1)sP`)kX|0g`TbEr)o+R@a|rF(XMx*heturIQ2xm#oKAKHTljn8BHnLKMp zvVtG;&j&uw?>p1JZ1slMSpQ5vlV|OyNVO!wlcCN*~ii)HPgGLTuWt*kVgw8f0QN2OU;qA#}SO-9WEo!JD{K1NNksbZGis3k2` ztfMlsshaibZq!t1D=H1TDtpB&t);l!R-vrY>6pFhIXYb>70{HL%$k}aCZw;@RL-}Q zn~YH5X{xqVR$DYx`UNbpC@u3VZAqrwWY#aRaeGjUGzM)M>(tevuh1LIn8mtUZLx(h z8#Sdmt)<#5Hd$9&Y0&DKK9x~pG22OR;MWL)EmV48lWDi}^i|9(FQbM9*2d>&)KnYw zbE|bunyRaGB^swO_GGCpwxXKF&Q?`ys?k(vEu1}L%3*J1`Gw?@T;d3Za~RIxIe_zx ztPd{Z?Si|6Ap%1H2E3nfH5m3_XvI9Efww27FJq|1Wls$LxLk>GKaBTcIDw%F!}qw3 z^i_{>IL4o09Eb5&pgS5v4u&E~Fpok1A7I#xd85dD#P9-!?HHBa!w4ZhTsp2# z!gxH!zCvteeh`O3&`DOhKjxJ1ujz|2oZOZ{bWVe+h<(X5@>+aK@%2|tTN zgd~nZ5$f-1%$_K~@qv!|8J6;eN#Z%E z6MCp*9GG3qAL)3o@nr48CH*mfnf*}`z6c5S1b!Arsr^#S;Z+gt`ma{@OB$}@>NXeWFr7#;>WlINxYKjFoBiOYlfT#zGu zC4(Hn(=ptSVH_vqmjaMy)RD7K4%m_j^c3)c$V~=4 zq^t!Sb49kK0Dl4aK>SPvxg4-{w8+LOP@V#M2|o|~h2Vby*pdtS3!vXIBH8I6I}`dT zgnpR~lSJc2_BgVykv+p5fYrG>sb^)CsE+WsLLIAj6}2%vE+P=ix`@hdB6*b)SvOI= z*ajveZC6Qb6N9*avCYzc#6CN-MGWGYiR<0Cj`3i1uAHOq%ocZ1U(5$KZe$+gQx}Ku!%d28(P| zLkxO=u25(vd_mAZu_F+mFX)tmU;dy!P~^Y2h_5I3tp;8%u+d*=Bj*NrJLh=Sutum? zu=7M0ZWom8obyYBdq3tl9C5nm)a39qO`@d1@U`${47W>c2b4TPiDXFjCD}PgRL4f% zO;pELbFWUKe)~Tq>JBQIm#SKa!XCYmbkD9kmVGSvmtdxT@&oqwR?6U5XFh_@r;p)es*0+G0CH7$;;)IgFl?bKE`H z4ns04*(JO{j;z5RoIP`rY)16@3+sr^cap71Hf7n7WKELcNERiTI28K#15EUCoc&f# zd~+w~oZs9jxsJDca*2#wA}5!~%ALr|CH!&;zg)tvkhCi#?MluupUEVBi0hP2>ZGy? zCwvMgeA0fUwuvRtO1s$5%X z(9PG()9K5~+2y|U#fA%g7tzUc_2<9BbM-&bKl^*hDwElwE8!g9R!S`r9w5maZRD@C zw@-id*c~g*U1jqdiYVUTvWeoWicu6F-jG9Zx!1LIHHYrg({$isJp+$&}wTB^&sDpmK3Xk5c;C9$P6ktcj%f^$RtWKmLVFpl>Pg zZJ}~8k<%$X3eBasrhbnD{~xpuQ2g?Rg@WGtKT)|cKU|=6$&owEeo8O%{7KNWCXC|o zNKf#`$|LPmZbD7VxXdQs(AEKwJg#5WdN_{a=E?cimwkN$dNeH|xX<^&&6EzA(m?q? zs!ybRCk8w~+ph&}rSy)2(UksTYAWUTS}}m~*Uf*BwjUj?rtLR;uF!T_o1o|9jD3`L z-Iz%EUsp|}^`E}{3FZIhg+Ua5_qd7jMV<+v?SDqugg>mu+y1ouR%<4$|Kp>*DBiXH z2(3T8WHXgh?pe(AQ<^)sobok3^B2l@GjuJb&kmbK={rZ)vG`EB&2)gaAGx)j(ih*L z`U67UAE))ldtP+VAGCB1#hzaeq3wr^eT6t*`;N+A_E0(Sdk4Nm>u*~g7RKST7NI`* zGNoHiEp*`b_4-nXW7#GuclPuXlyBSIK#DIMC>u9E)^qI28{>K4=YgLGek<@>#r(Nv zbv)R^gFRNThX;GC%pS3R>@N@gTESl{_{%eYMfyoRtq@Nh;>klitt_5md$@Bet-^R6 z=V838!gy(UHeQ6ka!wHs^T8_2hdx%A4?N5VUYHL{n0_nFH(r=;k-RY9jJz=4LaoAl z%VPSiFrRst&pga$E6itJn9oU+f5%h9tgv2qST9y#y~OgudP(Dj^%CJ=4_=Q}SdToc zM;_KAZ(omU&xZ<=cwxP_^TK+k>jSU%IlQpmZ(Hr_UG2H=mV$?T!VCH25njkA2d$7# zcp;zMcF>RW6%YB!3i*nMe8sbTh50>hY%%geKD=oa^5HR%+a~n0W*{%*Lkra(G;)R& z@-4_+hyI@CA>Zk-cKUB8; zkrU3}VWK@^1KZ6-pO5_P@9xh9UHf(Yx#QmSN%=aHznlK9_WzIK?`r$q`sr%>-RiqK z4!_j?dyPwHzwU}dchTMDcURYIcgNvg`R~X{j{xKe< z&k3))6I_jQhGo`z-I{@t<36r9t~!bCpPI7O({(Y*Kj;0s-&gcOIqJZC&uM2+*82li zy}a=R%B_-}{$}lUl$Uk|-HJYmG72+mzge~tVFPrtF>b;}w*7?8W16g{Gj9Xhn!I?`;&ZC2HxShdx3B-CfG6alDe>b3J+O=}%Bf z+l=k=8ro6%#MsH-E>oh>UkypSeCQ%d7_~3()u3Z2{&i)#y6$O|vfS?~dSx$4?jy0!sWmr|%`yTatp5KYw3RsVUc5 zMrcMz==&se+&}P!r&}$yf3&YRmgs7+h42!}p;4{Tbhx}>FRMCx{Oi*gAa? wX}dA2(Hg347iwPx# From 21d0599cb628e1f45857aafad89f94c10a8b1d51 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 15 Jan 2024 12:08:55 +0100 Subject: [PATCH 61/62] clear unused packages --- DESCRIPTION | 2 -- NAMESPACE | 1 + R/shapr-package.R | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 42e96754a..f7994608d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,8 +32,6 @@ Imports: stats, data.table, Rcpp (>= 0.12.15), - condMVNorm, - mvnfast, Matrix, future.apply Suggests: diff --git a/NAMESPACE b/NAMESPACE index b71c5d437..7f0c8931b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,6 +95,7 @@ importFrom(stats,formula) importFrom(stats,model.frame) importFrom(stats,model.matrix) importFrom(stats,predict) +importFrom(stats,rnorm) importFrom(stats,setNames) importFrom(utils,head) importFrom(utils,methods) diff --git a/R/shapr-package.R b/R/shapr-package.R index 4e4761d31..23b99fa5b 100644 --- a/R/shapr-package.R +++ b/R/shapr-package.R @@ -21,6 +21,8 @@ #' #' @importFrom stats embed #' +#' @importFrom stats rnorm +#' #' @importFrom Rcpp sourceCpp #' #' @keywords internal From 3ea3d9d022ac8ed0c4e30853d3eb78641dbb4ce3 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 15 Jan 2024 12:22:17 +0100 Subject: [PATCH 62/62] rerun tests after master merge --- NAMESPACE | 2 +- .../_snaps/output/output_lm_numeric_comb1.rds | Bin 4431 -> 5110 bytes .../_snaps/output/output_lm_numeric_comb2.rds | Bin 4428 -> 5100 bytes .../_snaps/output/output_lm_numeric_comb3.rds | Bin 4365 -> 5039 bytes .../output/output_lm_numeric_copula.rds | Bin 4295 -> 4966 bytes .../output/output_lm_numeric_gaussian.rds | Bin 4194 -> 4865 bytes tests/testthat/_snaps/plot/msev-bar-50-ci.svg | 16 +- .../plot/msev-bar-with-ci-different-width.svg | 4 +- .../_snaps/plot/msev-bar-without-ci.svg | 4 +- tests/testthat/_snaps/plot/msev-bar.svg | 16 +- .../msev-combination-bar-specified-width.svg | 80 ++--- .../_snaps/plot/msev-combination-bar.svg | 320 +++++++++--------- .../plot/msev-combination-line-point.svg | 84 ++--- .../msev-explicand-bar-specified-width.svg | 30 +- .../_snaps/plot/msev-explicand-bar.svg | 30 +- ...v-explicand-for-specified-observations.svg | 22 +- .../_snaps/plot/msev-explicand-line-point.svg | 44 +-- 17 files changed, 326 insertions(+), 326 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d648ba83c..6537b3bcb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -96,9 +96,9 @@ importFrom(stats,formula) importFrom(stats,model.frame) importFrom(stats,model.matrix) importFrom(stats,predict) -importFrom(stats,rnorm) importFrom(stats,pt) importFrom(stats,qt) +importFrom(stats,rnorm) importFrom(stats,sd) importFrom(stats,setNames) importFrom(utils,head) diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds index fd3014b5715c84f333cdc071ed5c8d2193f7cef6..87eb7463002939b1ea7743de5f30dfb55d05392b 100644 GIT binary patch literal 5110 zcmVJYok_tR_r9Q>UF zt0P{DjjRu9+W*k3lk@8PpD3C>q47sYeZtm$8y`Kezdov~^^2n&eC)S%%Lnw^ea0bQ zc}>-)lj>M~P~DK&oWq&?IV1XL+cs}jZ$B|JN4f0s2K&)L>CflgT4_I1J0~;6b2K(Z zj_YOEB0}q7(w6D1IH9J-3580N*`$ZGD&K6>S`zYD)i{Hxh@~IVmz6U)*=Cci6f%lo z+L@Aeye;M#39K!D4D#9dzow?9+fs#eMy-`8B!@zuBCSoEVAD=D>Rn2TEm)#NKv06g zWYb$rT4RD$Z!EUxi*aE)fI9%KZ!Hlkhmk^YEHtA8GOrSCh1F`%nwUqhID^HY(;6L? z+bnvfh#ziPU#`dhnRI&SQyCd!OYlE%p$tPb7$np`i#YfRc{S&f=dU>OKEN+cOX<`i z&w857mNKla(~XQ%)`CQtNK+3{S|Lgsp%sq@?*g$3OuSN(798X76U}>|5;b)!>T-a1 zxDn->OGka#<_7BU*Vv2RkENmB>X%6hcP*h0NA>zI4_%$k*Jl=A-EqG6O*CEB{ln_a zhWeP`j0=6AudW|$e4=K<%_WZd?m79pd&$4APxMpzOxyUF{coQ6>x)%U{MC-1r_b9M z@8E}eG%jkOb0hb=@a6oDlTAxrShh2uFC)J?uBC~TAnSSM=& zbz<32*Or%C%vv35DzMz5FEZ$C2D3?1t+$w=IKZUQnaidcOj=T7WlEM<%oR*yZeh2Bk7g%wX{x1He2mfVE03g%o#k@S#7mxEjDpokh9KJ_cnCnkbt{NXEETd zkHOLB0gf7LxzWJr10AJPD|FNJtk_Q|7L?|k8yY!mec1v|7VI3-zZky8a1zfQyr9cM zaT@RDII@L}A+EqM2}31@Z5S?Lnn6I@2IntgsKRMq3?VpOjxm{?A7MC(VK;{BxQw** zBF3FD{tDy17=Ht@dt%7OkPq|wNoapNh7U1q4>FH2tiZ4y!y>?I$neKt=Y)+KnfqG> z9l2we5A(>58S#0FUxjz&dU8pib)lU>Fc3{PQ56#T=Dz~uum zPQ^G#@U1Ku{E!DS$&wlZa>w90wctY;0-sz0AolAp)M9ABa1aCe?AQqQ?1fDb(Ypqx zBf&RD)JKSoC_-4hkR0+cknaNdNXRqXYajXTM?E1Q3;BmQ`a{ux+W>#}zVz3Oj^)Fl zzAKcodzwU_yY@w0lCXZ1|qPJp{e=$Rj4 zBz}yL*wItco*MLG_OW&tKci=MvG&zK&-$seL_RBL?Y4tF)29dEq;OaH3`^;vCI0N% zLOx#74@@s+kF-Bnf3o`ClJ=OrO#dDdx)=%e1$yR3ss2*T_&g>4WcAE{((wfHIcIz% zb3F?K-ciw5IT8c@^rmqN&uuP~qcMSd1k#TZaZpd_hGCcjd?d~l0zIL{afy2v%5#8^ zw3P;Y1W&*)6hjIp#FsI^&(gy|Hyn?+!5DHyx{b#$5%@;|4JjMP39+^i^dk8Juz~Q6 z1vwsBkLMjGrR#2}O4_4;Mx!TTj@fNkkY+(IH*3q^Y zB7u+W?V~Vs#DMn~^z0`9_{i6b66i_Z59$bQu*i-;3}me)i)k3}k$N@fJizD(G+sax z0(CNIQw90Np}ESRM3_`l=%T&80S1xk%NwrK+_R)V?GN8J~ikV zDbi65J_rO^QBY6lBA|VuM>s%xkSPbdLO_1F$bNqjT^q1l4YYosV~9{kzPIF#uguvT zJqfH8$`$N8>^=_vluexbGrkA^v~>A2Y@IJvm)$h+k_N-K!p;~Df7uEs*+GeTNc1Jq zIa^f5dfrP^#%6Qd7QBA@|BKgML^3N?RW9GC-c^ooSm*bT2Vh03sUvq0W2z{_-)y=E z%KjD(W5zqWRs_Fw|0viAgi$+HZ)D%|Dtb5I`11EJ*6M6#i^iz0Bs9*s#a4!BPKcIr z=shLpx_Yn`hD27fO85aEnS+6xGjft>M&yPF^N5aj60J!zWzmsDO%mZq6eW>33fd0_ zOyqK$^DJ6?G%n}dAB~I8u35a0OL*iGKDmTf?uK73p_fbO@q+TJZS8}fLOeSeV zT&8qWCgoMQp;Ne_leR0>Ra_>elkzGgWlFC3*l}0xj^3Rdck-p<(jTz9G0Vm?$2rH3 z807Z|ihBg58xZTk#*`F%S{dB6LE0vBG&(h5?rh=^ey^}z-W9(WM1TLb_#W+Gc5_0E zBK_fw*YYNIaxJ-Pe9(NetIB`d0+&0~e@Vvm;9h3(xi;#wJd zwK7)U8+Kk$yX+cN?AY5-B)$${qgrxd(fkTSmcs;COSR=j{Y zncrYv{V%l7`3S&jw%GJVoa?2kR3l*nNycbBf4Qk?!s^fN*m3MCnfXCJ#VbADp!kX+ znd1E`vk5Ns`)OHlSAGVUzvsWy=~Kt+`3@y7>zzp6jQgc&BzOr8gIh z2D+aqU*AKAX#Svv`ASNkwBkpQw+QIoqI|vLCeVB`noe=$i`!i2H*3dJ zyz2ZcLGFutDBs|p&(nO-fjdlpnqT7kcR@~VG{vdNK=5(pfhNkAUU@EMku}{e3BJ7@KQf6%-T}xO*J1R|Pr8Cheek&p!{K^zW#$X!+??Cn)`e6ua`#B@?B4~rFd)I0b2g$+&3woa@$i(KFxDqKTGNA z7apW^zeK%E^QRt~O7nLPEo1(n`K#uSY5jp;>uCOmbyR*>l=o~}ez?sKF7hK5O{3WN z+wQb}uc^J@=bx@o{);{;7kdBj*J$}|+Z3T6zB(tAr(LA^b6?DIp%3zF6#Q872IV{T z+j7^-{agUx#_<1LOC#+!*3##@wK7;mGPd^?P19>y~d{T2zBlp0 ze5dmR&-ZD(FyC+6o%3DoyW>{{5AlQ-;>lyY5Klg_Lp7oqMgnSG`-C{(kH4*3SLb->v+9`{7>oe^9@)^lL3Yv{t^g?7rXm z+S-12Q2Gb8+x0ufB}PVbj>A{>teDoZ8!YL0W;E23Gn0Eg+hp|jddA7lr`()%x_pPR zC(p7(mgvJ0eOV%jYxyAA@l?!OuGcv}UJ6-a980j5ajJX`e5y2iw8WnLMmPHemVR=E z01g8T4+R~U|D6HH0FDF99s|S!?g=;naBskgfcpUM3m6_KqJDt;15O4!0PsM-g8&Z( zJOuDiz$t)vz|3Iq1A9Y`_?4@zxgV9F(PgEv%hz>Bxv7VC{@1kLD06lDr>0%kp^Rt0 zdm@9&MZ@3n)+DSqp%Jp+dfB-O)UVLec2(p4R5WTwQ1^L#P3Ym=d2Iq0$D*u=NyB>h*-_Rv zGkbik-it=vSUcTwPXx-@m^bOY%uXo%*21OjQi{;S^G82jIHCi}4e7aPY4UQEwfB$u z^5fMgyJAM)yum-wib>>*&hdWU%6=! z8a3}~SflBeW`4;0xcfO1`jr*1l)4AJT2_Z0zWRhSZg% zbpEUt8XAAT#u}_ zqSTAGu3aB-5RIt0^Y<&gH=`j>SH~_pIUVu3j2?R%UPmeKMBgcBID-c4DtpV{a0;b7 zd285_f@e@_K$v>#CodrMweCtm=@!I)wg2(?7s}9}5haN-pJ8aobE^Y(@2zX*hpY{& zR@J>ZA9cC8q~^1#3usuDsL8Ny1Hgta<&0w+`i@Y zr(HNzxDEaBk3}rzG z8mxV_*7(LHZ1=v>(x1ao#^zOZwZVC)U#BDM!_1y&z?tm!KcaRmgsv8jSjsh}T3ezf zQIhW?$@l#o4~1G)W5XVbfb2Po{~7*aBdl`Ukzv`EFTw7|l7qFcfV1v*F)D_bcw-ZH^^J0ZWdY zV{gria%8=3l!Z9*>F)-_ui&a%@Qji?$7Tl6r`gt`7RFK2Z!nJjxf+hURd(+vz2_6W zteexOFrAYrhi5}nw4F0cJ4|!Dm)T+_%BnpqbXb39(!qM z6w;4QoLCS!7wJZ4rgm9Shzf7Lnv?#{HPrj~TJ7KF-9+dW-!>`oER=luhq;^)L1*dj?l7l?sR7nGm z#Gnx$9x;s@qDH~*2G%4k=z;pbJaC`?cdw&Kp(T}%uQi+GlgFa?vAb@*J}cB= z_rxs~Tb^iZM!D&;+n4P1M%mL|Jv3)s7@D#(HZ#%pO&s-qdSP?O=ZL>^GSzo|D9Y@= zDd)}Tv1s<6&JXmd%|d1Nm@!wspNtBVkAL{p(P}jQjmB~Hy?dfb3+A5PRFH)7Yuk47 zI~9j={kxrfNt=gys1Drzb8;3+j5T~%o4RMD&(V#n_)-{b|`3pyKE>Bi8A~f|q-3jB{NVam! zo)<=~L(#b_*RS2T0TtGmr}G`>qVYTZUo*e2Ktbi_7MEVzb$^=`$?ea5tS@JO|9#3m zDcj;ZAu)LXB`5E=3_7jj1(KSUHoK#8?@x9;R;e{sI8^dte^<4f0U|^+)?{;B z4zP9EU3V8mViTKateTjbViUP0)m+n<uG_{n!-qAU z^4Qm2(+%mZ`_vuPI69^1Z~=eDjCStW5zP3mLYH?OU zMKe~IuV^ROZk?CL+6u>@UWos+wzl3@I%G5p4(5;=h#}>IQ%G|Pv(2IprNWLaDhx!W znJiAxZV}9B4$)j;7b`Ha8^9fa&hITTt0pr<3pg2?(Sle|rBLH=m;?)p2sUT3n~Z|l z({ra?WR8U4hDDo*|5%J7jH!yuu@(3qgiwVc2|6V4UqBrEq`IE-sq4tZW9$0rwwvxeLs^R#BFM;fW2c2cSwl4JPV)fJF5y zDz#QkI@aZT)bqbnE(Jf4i_-NkkrEzGA|Hp`;;}4h3 zd^A0A`Hrn$Y(m}6?@sMe735CX)|${SX9}~=^cf$irhZ@im$)_j?h~ymezo=u);_b( z^cf$O>V6)-qUABga?zBVsv%rz7_k6_NDvlbOE((iI%)Je$&6{;wKzzaxiwdg`(yMS7=d1$UC! zR2OVEyHzl2vD{>InyeN>y=b=rIl^KvTB~Q9ECS&;n3I)uYYp=_*kW)9)i$%} z@DgSVPGgmapmm5M>#Kf_DB7rkp~7l6)Rrv7TZPs+ z-{#rBs;qWXy+=BPdSoyQm8{V~r>WXxsbn6Db%N2!ge`^&QE=AS`)o+YL*)Ez^UBhDM zAsMZ;hH8QJC)~5ario60jhvS#3tLw&Rh}s%W~`U#8B>E`G8%#3UDi&mV8$NQ$=NYp zw!pPTtvnf!)XjB@Y2BhwW>QV;|DbEA}2l1#A29*p%%jq3@un@B*=Eb^-CD) za5)r13@+O+CW~(;h9el7FkHtp($*@B`(XSf#+ewO1KWcz6k;fa<@*%0zYD`3uQRQJs#SjiUH!!4Q_yNPmk}sqWs=gde zda7KE-58UHh*KDz!H^-vhZ~RSBQVavI7*7GDjMQY0yfEx83T5wVxC@#p$b7vS^!A@ zjTjm*G-Ei3ft)0^LVxzcGY;{4DJ~~KY|Lngl=@J-v^gO))RUp!7wQR6XL#^F^4pIF zK|KZP4|4Q}k^pxB{r;Ks*G7)jRB>p`B4Qy+zW zMrZAIgF5qPAmE{KzV;bb$|Wh{*{_}Y0L3^kzgT~i2gxRK%0j z1jA22e-g+L+H_9JwKKpkQZIu(5WQ(&hm-~Iu}J32SdgCzeIWhJ2fae@b)?M4GANgU zT_Vqed?a_NPL-qhz`>LHAK;rv%z%K0GWNH?qf(eU0oH!2paF?1#?EIvI_~ z1OkoG17$pBCqM>r-2hoRNT#pzqZ=fn%Y9%vO1@6vn;hir%Y9b1Bk!|6U*sT+G2fR<3{$;t{4(Pk3703V(5hdKVQ)6oCwe(hZ8NxlX@8Nh-|d1 zACVZyUQIUBSkNPUJ?Fij=mj!CAQJ;T6||{?dJphN2kk~cKe}_?V+JYrgudvYt#F`) zfm|%-y)#jRj|m{t3w&cSiv~SC_?RH`Q4cYQ1Y3!~Cvx%7KJg zoMFA^p8&v`PIE`?W5!Zbjn8bx`>OsJ0b`x_46O`)?D5gC8wj&7TQsxtypCS?drto0 z6@t-ewHwT0Es^oAEw(cxaZ=&2RDYCrmF zg}hoJuU5!w6nu?>ujPF6nM%=yoTl|dQ|fB`$Z7n@Dce>0DyJ#sl)4%PP0O{NJO1eY zdQ%VTwWboGpX`8Ikq@1)x3rRru zz0!VpSNu*C{r%hG4)%lfo0D=B8INGRm$$NuX~o;a`>nTm`}l85@OdrzFRA#xi|uS0 z8)umRPhBBXK=&GZ2YWY+xK74Fos9Jz!`^q!K2HrgcI|B{m%k2Rvs&@OqU|dTRS`4b zs1j^ualT=mXsWDY7v#$KJpuF;O*_xkr+$Rz>VIMXyzliLR=ZOy=X~FpDm{`OAjuqc z^Osv&XRiO`jtl3m%K2MLDP9}!I>lEsSrqSITS#zK*te^*O-)g``Q>Lwu3MJ>)Aw9& zu3p{z=+YY$Z(Q;ViWi0HDE^}`jmlfgCWG8}R4?<;AzJ@+-FaFsKX8ZnPwOi}|0daK zNTN6gnFv0rJuHy7Znv7*|dn@?$;BVX}wq3 z7An7Q)o?0zr0*k?|84x6w7&VHLA3to>G@PX?3aD1{L=Y9q5MPXddj~UahdW}S0y{2 z&e~1uf&V$2%D=6fNa2YYVMZF7-q&%KslUnaC$az7s?FKeSAs^belw zO7Zr_1C)Mj(Qm1qcE>Z!KCN@7U!ZdCWe2I;t;CmT{p5qQY5mTj)hs@=e%1OR@boV3u#xi{iKh$V$wNGOh^LFiQ|=FU zdZkMmufsfymrEKifoJ1IOg351R@}LXW3lHnXC9Rhu9*ZUk^TJN`A-u12z-Ti}xhkU|I`Q!;+ z$|oPWAfND3KDq5`vnjAmWO=1b7_UU?$-z)!a?c6K> zZuEN{hmQJxzj0~r*I9Asta@kl`(D>;XUE}wckF$I0%e{M>c=oWs~x%&H7l7{UsftT2>o|03D*shGni8a;=X8LTj! z71)Pwx>5riRoZ;C#J=cFYI6iu9ywzG#{!1$06j1N`v6V`oC=tI3@`xjAi!yW(*b7y z9s)QMFnoWAh5>#Ea2DXbTxjCC`2$bu_o7MPzcn{-PdqBxS~BaMg5D_a=CYOD zvdhtwC6k|>F}^1%jv2IkW!4%raqrJX+tGSdSTirPd@p>%R3Uuc&85`Fl^J+z@WpcPIjlcE3CH<&Tb|2@_^t2rK*=jaf2z z!MV>HQPzc~1^ZsWamaJNy|q^e8r@i(-RG0RXv~1~mG53&hWLJlE(^Y#igFewUqTzs zpxmKHTp@cdqud>Pwrn)Vq3oFTue}`89pzlQdF}f6gJ}HHJAb>9z736jwmxO`iMfb3 z<`3N4{2I!BJLyhY^C>j^gX#_8rjsc9sheZ}RQ4RoiHOy2|L9i;oi<)6t9l>tU+(|e zk_**n(({?b_wcxW5v=>>Mo$M6LqzHub)8U{`$pN zkxyPi`AgoiU&&}hBPV2R)@&S&^4>q+^5UUTRDAY%ZBg$5Xw3EJAFw|CF?#6i!Kf`O z-a&C^1Fr;VSE2Mji{GE9egNgYiS~q5WT3ob#?W0!$%wCSed+ezbTop$@M_nAacJbk zxksatccM{m%p21DR4mHVFWr$UXYE5M$Nlw+KDVAjSaYs+HLDB;xx^Xo0R-}|bnzKcWo+txKUM3 VWM}<0n;f)je+P;6Oj(3B006z^`quye diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds index 3d7e091cdb4012a742767cbcf4f295d3c0e83d82..fc125c8591b3686a12a0a5ad732683bf47d62a72 100644 GIT binary patch literal 5100 zcmVLy10)wmiPlW4rJXw*bwyfrE&cpHwb>V7r!G(*H+lKuDl zcRru5>s|HUtE;Q38D5WJ7+c29R>s)c<44ARNNRlBZkXnU3wD4VFt`&z6xZ$QqJGW4 zBlyGYiSH9U>+6*EBxrYxINBnqvwYh%B(j8HDdqazD48| zv++O3o(+aTh(+MfK@9wavJ+!bW`8x8>w&)XSt{o#N~(iaXUJ7(L>Z)-HZ@qZ5qWAa zdbSlkYoHeUSI@rK1!~?-Qmv2}bDkjJk}Upb=6AB>qo&moX9GNYrq(|hk@scq{T31~ zEBE0(mtQSC7dDPP{CUvjtPPvFy^Z%fv_YG=jV;?Nw^VwV`S*^r$?l%6;(oe)f8DH} zr@6grEB+Qb?f|>`Leh-WD9@}9eD>b?Nnb1)lFpv2NZf7A&fzx5#}t%q*WQuAIYVr1_ZE%?_mXQ~PFp;IsEKk6gNQ%TF@mY%|rlkZKvE#a<%fghlA+_X4>( zYWX@cQz3Y4Qwb*hspks4-k?(`skn#UpvqJ$O=_K1K0#&B0oh$ESL$*z)LI4M7->b> z23Iro2urrOL6*Uzww#WLJSKG$D;vn54sV^UW3of>Ku?&sCV1s%60zULfdPNansIk{=A0F_{f~WdRWIM216(YH_&;CAp%1ghQop{BnF#cVP>$2#<&<`GI{@u;XMrPh5Rsm zFde_ZFi{xS7jkRk1$jsYn`DOZ2D^hX&r`^u4T7AM0Fd~V80KR*i{Us1Jh3r5AfAJ; zo)EvMz3LnZ!RTN9|F0T5piVPAkAPPFj}6OY=hYDW8@ppZePz zaIi$4<{?CqM?Xm%VUqeh!7mygt%vGUc^Vh3-xK6%I|WJXQ#!4;5tOMvZ2))jwX{#M zR4!1G&z3cmUz4;0^^3+MZ4cU>lpi6fkH$;=ZzGWlkzf~)r+Jk6FU3^PL6T3(r}>li zC$P^{&5vZPCt!G;5$4K%7zQxHJcY+L)0g4r1SS=p(}_9Y6S-InZ-5?|=Z1qkk;U^8 zGZ5%WphxP820en)F!aLEn-S)h!Jto{enq?+-GBHjEMG+Tq|A zDG!Ahh~5yeL!K4jW0J_1SdbqKF_1XpKraz|?IH4UC_E1ZyF{J^`DBQHF!+)L_6I|~ zJw>`{pqmc$BtyN_hg4C!ku{F2Yh=xE2B0)&D|GtoD54P=2cS{9gNR4%*o#1{YcG0s z66rfy(RC8h#Xe9SDc@1zn;69Pi+z^XBaYdcFJchqOiXuTs^^2!92m>ksV~l=x@Ziv z-N-uH5JNN2BO9IoM%c>Y_X~ap?GAe64c-pqN!b;6MAl0bhX)3-R+GiFG3XJ#CsVZ- z^amLyknskd4bA#e8BmNWE|?kg|idRy~aMh$0Q}lS;}Wi=IJ&Wd@4ABAIj}-_oyx76 zX#UnZv;E)lLqcL(M4IdQd1}~`_Go^chyEYb=+#a#Y^lj{7CUk0agv?7FYH~-ba}QS z-)K}Tv`p1D*1l>xTSxxNEX-*0bMX#c`AWq9_sk@j%Lt8*otVa4}u|C@s4UERO8WbLN# zKauvT*`Q4fU4P=Qr1c$F(%Z`yVZ_uicB*4s%X+JBYb{@##ePCZqyk}s!Yc6o?PmY@a{d51-6+?J=-OvFb_n6n~c;W-Btmn=fY`tDye#<=HO3 z3wGuQ@;C~q2|j0ctdiG@DY)9Z-@f_*CBdOArhia!v?Gtt4jII)s9(Ra$G$>>eI7PD z%a{F!Zs+BXEa=9|oemzs^B?)G=gV6Ugz@Do!{T^(*SW#G{Pgi7dH#tAPoDqG{XWmP zDHZIT8TA!kc39Pom*41^z|((P@gpz)%hHxSelSVL%Y|O>=lO@w03sjgvDusFmzDJ6 z>EFE5kjJ|gALHpi6t3miV-1C5s`0~Ye8GO0oL=nvoUoO?{ z=lRFV7W3shpYrw_2RKjS>8Bdpv9RwqGl$16H(K-jgW4uSo*zBn_3zblw2*i6UB=VP zO>YS8aOtW*kG{v3ubzL)LcYH1H6f4LYk0kjKfKG!ZOHTG@$LQDy`#ciBIiAeVnLn- zc^2e3kmtnmNf%Wt_``xf9Qeb6KOFT(Y#+zVLcAQr%R#&>jaOu!kbJaYB13SlV7hUZ0b~LVw_d{@}wwe_)|MutI+*r1m-JZ>-SYLRq1| zX<4Da1#m)t>rd@-(4Sf8&n)z34*D}I^ylup{MM=MI2bQ1j2BKAFX5~(UgB9{yo6Zz zgU2HWSD(qW7Ga{nhsUsjmI2$=-|VsV%>@e%H19 z%kp1T&&%>(M1R?KsIC96nwOe>b(M#@D%VxqFFRiAIuEZZ|Eh6Y?qe+G$Uw$??rJ`G zCOmovDLu*z1U@;ktnIO;wc|=R(5dB-r_}>noA&gf1AXYo)O=)Y{x;62S1HYVmErVZ z7=56h0y?J1VP|5_j;!i)bzx^*ZO2P@n4W;W0XGH=pSzmB)(8R|0yq>f{d(gyz+r&H z0Y?CC54Z#1j)37aThtkF7r;%cXgmB`BuRyzZx$jzWEV4r%kj?+sDn|9p8QuGdeJ-(Ks{=YSx84}=t`iDnUr03j4i4O}4t_6RL25Ltv z`fyVp6u;}(63zF=(ZJ(N)c0f=C^q`}?!KSeqPW5fZ~u0=73zCsZ+4F36O<6Ls^x;i{6p05;lVT_+T?uJyA`112MMbKZzUyFN$J+x~H&>A7+gJ?-hb z9pgh#WS8C9?jLnQ?5PJaTTY%p?D(0oTMhhCuZ44$eci?f^)Bpn`R1ofQPkM9)IP22 zp_qF|Z9Z<8j(V-n_J2O}DC#!s@Lp9wZxlIqwfnwu|K|KZ>xKje-<5jV1vFR{O% zm{}2H1~Bd@{^FFAk0LWrTClaGwuf}&@I3>cTY5yc+*uIrS*gDCFz3BLr% zmZBbxMSBW&-9_E*efz;=S05C4u-WC?BTG@7>#}ys{gYAr^Mqlk@hpnZRYWCgQ&Gon zkDZU0Vu!k}YSC)Z=}V}~^yIPyEpWU)WM13w+F{gn+R(zHHHC+eNQ0+5KRo`&C4P*AXjdu%YAb> z>d_tjFULF7kQqDj{W%SY{}$btSwlan{1f`oAG=|`MWr{4(i=S?%NcjPubzr(fYR0( z-c7V0hcugqH|Zd+sV~UXnHH7)bzc|M=z%#-db8)xSbx3xD{hMEUr+t_cKE-y!!_Lw z^Pd``SsM3e5wZV7(brzTvZDWSq}<|h;8I#kq~BQ@ZmJANxet0gU*G-#N*-?vThXcz zW$tNr+u>kKG|AZfv%dNQ^v?Od^;fMOjs`g_?=ds36&mqqQTv{Ol_+&tuT~ouA~fJh z`abuBW$5)ovK>bwgOEvNW~!h5;on3jXonJJ-;NR%7ICdwqvG3@Yi9vbV$>w^DT?&+9=HYV&K{ zKHcQasNd-aCz_=NprJELCiRJ!jQTI_rwh713UwKBXw&BVAE1%Pp9H_2G8-wkeKR6x zaw$q*+4b1u&TpbVHxl2NTd)(QdNfhx`M9A$i;DM_Hm~wWp-&J~9&lW9y$uS!o8~oN zlY@G4hPg-Yrl8>;jo9&b*+uq=_r~L8ui3xc%ZlW(=OxycD}ViaO0Ar&@rxj_xH~US zKHX9)73Ob^Jb7xhD=F*wq_<%O3QfM*B`5mlwDdZc`~jTg(=pLHRxNq<#HxV`YSc#l O1?69kIzEmFecafz?(vw`nleP}fx(Zo zQEL39H->fkE5Wbk&;Ap^i!OdJlkuw}Ud!`&b;SRmz&#ZQL$3Ou;3L=C_VtnYC}P8k z57({Du4?P?x+C*kaaG@67Szwp>Lc-cj^+~U!&g*=Us*q3_SliaCys6V1QW;jLTxK! zo96Tk~g($tDjmu(>FvxR6%f61^$vZkL!WIjI8pL!*rLKf%UGk? zVX&F?rdYedlxH*KVPac=I)LVnmY7wM8KME)5xUVxfjMjj){jSy#ad$0L(2#EVz3(U zA9IdULL5#vGQAe#e~>*l4B-%q$X|ju_=#;#&Z*7+D7C#oUwN&R3y>w%!)&n?=}oc> zy0YRX^Uhs%t(IMzz_R<(XI$+3&kSTyjYuycLge@2(>K-=}N~iC8J$u zo#`mBn1Rs3QsS_dICOU7ELJZxj+s`eiWOLF##xfIH}yzo(&w{IJsid&qdA{>Y$(&` zIGC_mmuJvBN^EkM4P{o7-pK4}%{qszq3H|qI&rXBNwe!{w=;}(=9Z6H#}X^awKnTY z%*Nspg9}!P-H@wund4Tpmd4^3a%@Ii?2S^aeiFxSH5pk@{t`92B&X28$UY)j93d91 zSa)NKN+gRSA;(gxE7Ci-hDy)w30SEkr7e@>2g5}S=kV0RGH&hgHQwg9n;4=n1Ywwp zp%lZX7;a&i0U+B7x2rLf;p-L{0`avKV^Wbm!*CMAZVb0Ejr6q~;|Po|VB8Di@4$8p zh71g2II(WchW>YA*nwrck!g-$6^1PsDgbXFH4B4)p)&@uG`5RAa;LB!)^SV55RJhX zbnan@!%&Oipy&&U!7Wl;_-;uUS7A(Q^EVh?!q8L94>ttU@jVHbh;d6Xw{EQ=4_RQ7 z6p%o$I~wx>#2mUI$jL1L5`PYcWf)FlIEn$+E$(B8XFqHg#P0?8x+CPqgq{=k0~At8 zek+g)+ELJsgmy=0Gu-_U`5i_v(C!TF9vuClaKNoVziTi0Yb3|o!NBhVbk+_8eMaxS zO{8~J_{Zd!JtoikjZoNm1~7}0wUzO*yfA&nXKlu3{&oT!sgP%Rh*IP+L=i`fqQ3y} zi^a$KVfsv-#l`v$0C_e}5eoZ^&iZW&ZRSrmz`cW=?K7;D3s>Z`%Ol#)D#n5N#o|$p z2OCeuk5lx=;${AKQ^-Xra0`%Uc~ttZ#7xgakx#~F`BTm(u+KHrN3zz_G5n4bYvnKu zBRH{6;kC^T!PZ?eA1JhCRwF=+l zAn#x9v$7v~%&vTqgFI()x+m8-AB^U~Imga?@sjn$VqoJ&_R%(+_%urN$n#7XhENRn z{(?Rf`+*+0LDzshY5M?=$hMZn;g5mr)nqdb0zJYH;2IwFLP5q8WCDTb27PLw-5&hW zLci=8s~y+y>_x6SKrCA5%NJ-qAQ!|nJbtRc$BrNq3cj(NwFbQa@Uf%J#{kHIKiCQb zK9LK7{)r#K0PVr13gQX``@ypKePwd3AnpK=^#LCPMILz^QPp$KM`1q{DDUyfcPVF}4FL?x18kr23NToFSvJk@u9**lKR` zh^#;Sx68VCu6T98hlS(cJ<1=yJmm738S{8(OZ>~lDO26Mui{USE?=B}=2`w;HJ6#L z;~Ag7wbZi3)~-$v`O(6j9(THPp1~5oCd&QuylG1$d?qA)Q>Pa;NcMj!iHb_;oFMh{ z&AgajdZJ~`9{Rt~pf@_GP&1QdPEX=qWr`>DV0dD;iQR70o4JO^Soem<*=F)r z{~hj@y*lcI4m}u$xupU z8WteW(HS%u$K+tiLvE#NJ9Ai;%(1k@dqvG z@Bb}sVjL`PPOMR6JiPGkT+b@3{8ifjrs4eh?VsCn_44muk@mFNU_%U3fA)`L^tEf) z3(3b}#5FVaY-ZfVej8rEI^8g6+1m(XuKa}vP(2l|Y#P7Bam!=|>;-zO$uLtl!(hxW zU~fW{Z@k>;OR7iQB!BZ8+$8@K<7;@yZMWDQhFs41)wj|k@!pZl(JKCj`ufRhkJkyf zY~|1VXbi=x-Pck4qq-l(hgWA1T;Ox3ajX*fH9E&3_}sre)C-Z@yki<$JHX3HB;L?p>2 z_d~@LmoER*N&YYTNff_*{RPqP^1W1V;N9!AoqME?`A^%6Tl^~8Sr$%lA~F(uMsuW| z>Lr(68a!-w%dlIK(LAQVa_ev}icgOlCA{6TWsv{wa)R64?s%HEL&twaLkd3sxZC?y+e>)+S%KN+=N#z&JoI?4>;sPlDzTXd&?^Yw)IW=_`ZF{^EPvtjj z(<%Myx4)wD->>RI@y~NCR4)2lDCHkOBZz#s{|AATUwdm9rGN1sZ7ANp;s~XGS-zg? zX+C|4*{5yp;u0!XweTpFdl2?IZJ+OvP1|+HDp`JLyT-DI@{iQ6pzRy)QTst*UUMk@ zM5`N4_CqQPDQUeZfh- zrO#zCkNNAU-uW;8NaZ#a2UC1~PyXP<*cJ&(?Ua{0{j1|Iz3!5;zq z;lUq)`6IWFz%F-yxt3WalJnj8rFM2i(R#99_k4%)|2OXv7YP`pq}tzJ$dM4AJ;1$ z>XiWXiidi|vwDT)z3;zk=EZvWKoINUanRc&_Oq-rFV;f`wI9-VssQyC^zK1_^LePZ zJk;CI7VHi<5z^a>)UW*9(-jm~L~o_|tEeQ3Z{FHYa6wFpr?`J;rTvi;AHBn6d&FwC zo6A0|`iH;A-RC^%uc`M>&F$Z#>^-iZrt+KWcXP`>DgSZ(JSqQi^d}vMruzT1d3n^Y zx$@9l?dFR6N!M$0=izDPpEhpi=NP9N8O}-PuF}CX;jwp+%A?G1;FBZErXFj$I&Y&jV;8-+M;{qW-^SUkh8*cxWg@$nz%JOAe%diQcruZ)qk=n_q&|d4P_R0Irv@8aWvZU;*P}w)}quK zeJ?CEtwhOfm-auoYAPB!a9p=nere_*z z0iWy8@SoaLM1^^v;jwXb(+Y2*jN9d= zC?)B^j-l_VQEK_QKm2s@88r0Uo%w~@Ur>6~J6!`4I-o&us=F5s&q5=I-6f9R zy;1qXk*+AEzR%ooX}8g^H*4c&ynO{_XeuLSubGW9B2WF{?y5CtNM-1f-O)Eu`pSNp z?H5^5#<4RU3MQM;@VPZXm(33vqPkBPb=5qS zxL3%WGPVdMK4Xi|2^Y}7b^0F-9oiyP+x6>H$rDl9ck6%Hb80?Ha^JOi*AgF;c-F6C zT94i+c|+IxTSmNy(AA}r4s3rPC4KnkeI3p`L`n1SZTNU*6iVp3Bj4}!zKB2ZbMn^X z#}GfWLUpZGC>r$I%Wr+&tsNR%KIr1r_g10A8I#8j>FSM=s}H-q*=8CVv@t*QLB(Md zKjGj`L)c)H@bX%}-TPNG%ER!3k%zu4nvUYG(qWJ2)ue|8f z4khgGc=7s_8kFkuR*%)8St#v6`h>A*JW4ClCuW()qF#HCoQ<2SLH*w8{LC9CFQC2) zvTB!i#_@icdwJ8d2T{NIn2`-B^u9=WW%KTsRCi=@W30A+`ZnvZotz9MXr{ z;)mW`!vlqyEq^xsA2IyuCr{fW-%?6vYNdKpiFD-V$^M@WI}|2g$xAPwT~7~pV#&th SHW}?Sw7&x(%cT~3H2?rQ^99`i diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds index f8473748fe79aea108aea94321ed7cd7252b97ea..7ab8d808dc5380cda9513ec7150fb592edf0501e 100644 GIT binary patch literal 5039 zcmV;g6Hx3QiwFP!000001MNErTvS!}Zx$GKL2yB3Q4x1sa6uH`0R%CYSQOB-afAUT zVFrf*5v?pSG0Uvfk}p5)D|6{9KQp(=2JJI5chfBQtv<~q(-tk2|Gn>>JM-o-r2eG- z{r>-Nejd-g_bm6EbMHO(y?YG<%T%m0M`SWmj=`AE$|Ln9rA$w%$!ICUifWi{imV@Z zvuSb!>&u&fayt31zP|pp+@QQ-T^Tb-iH#olI*TsCqMJ~xcW5av6OA$jeh~(vMQ=9h ziX+PO#RX=40cmWBa0j93y`^fEI8y}N9=lN^n-@`~FV&O(jCrg(ccRi@HstAwvB9lS zS6)_T&>0&th~uiD^K35phhbC@2t}tv`U}9}PprFf4t4&jt?q&H^0J)IR}x)Uqsd$% zSi?feBo&QlR7fm!k(5=EaxwPe^5DZDHG!#D%eoZ zh7+ry<%O-`ZHnCLf;QC$b&h?Q>1Xnc4vLIl(*G0q8o%{e{oI$AE@%BS{Y;+GAxraA z+qqXBm90EgzM%vot;E50k+HZD7M2D4k*|pczKPVZh2#Q-zQRI-j(g~eI|o^&qU0mZ ze}Q)Ice?C8H4b|FzCug?HwRTy?|dZP>i^0kM=ey;qssPd&gn`^%_dzQV{cY!*5@1Y zEC!QNTctOfu-V6`%`=ruFc@`|QpP$eG@Hslxa(+Sc*(6$$Og0Ev4lal4Gja#-O%LDYflMMJBVM%Es*> zSfnl16|znPQ?n8(wB<$v>83<*LOWS+C@iut23L!r#9%CBF6k?Ec^0P4s4dXzEahgY zPx{KzVx56?s4;3SW_#5e`L*IYW}91?RWz@3B+xYyA)XquedMBjvGLmPV z4uZFB7mUzbbZorrjgnxsu(wJRw@7SlC)!3*t25-~A-$EPp9)>EwnSGcB?s8aWfq;; zBBlA+X?dm!BFO^m$#(RJl5#dNY)y>S{`8>4=S;R!(w#Al4g!JG1dfpjPPQ;bD^ezp zPTVB|VbnDOV+m9c*i7IG;fY4x=A?d^KqV=A5(psWQi9tN{4s%p1hx_Qk+jji77*Nl z;FAPL68sJ7?m-}xKo+LRC$ax61U@3X-RWW@u$aJV0&@^Ar*o8ml|Ux~bb)>-+Q=Ou zazsWEPavFtH_F@~(3`+50y{-ps1J&cV){|U5nM+w-P%tPm_eYA7$0spX&*#zEWv(a zY!&_(hfLH-H=qF2n?Y#4Vhk04F}Z?3{m&yXpTJ=PdkM59umSzqflnLM?%AXqgs~}x zzT)Eqv=#S$P+~m{>m9Kkgms2{?xMfl&;#q8u-=Okeh`YdIm&m96n+i(SUnKwow1$O z15lo|_ggEr2g&SXd`utXXZ?1N>FI))`N`^Xe_33ZJfpKZqceNkBkm~UXK@IV#j&l- zj~=rAe9)#jo**JBO>1XY%-rKLh(6ERh|9vg5FyOzfB0 zFj6vZbdRI^8r?J85m=kMlXh0tNZKfmE4HzAR|$>jagl&j)OO$yTfr8dj^k^1b+7AZ($CT(})8peaQxpI!aGh5sxeK8-{xY2#I1%V)xqp#c{ z1nAildA<-1T6|EBz9y)VpVqyQMtS`uel#OM_iDPCwnjNh_vP#-1;NPUhCBgCQ(&JO zthYgXG}x~X`q7HB9}m!SJM=|^eR*S>7xJ~{?57t>v@r;Ig3&e>Gk=uxMH_=8Hu_== znxU=`q*K1O*gv%+5TOm~RH9!2s6SBRzqf?1Ir{C3yk2NyfJmb+MM~T2+;mPjcAGD@ ztJurZeVnwZ>pABKY7hPn=kWg4G_QmXPt!C?S_0pSPsVuC%63566O=@TW?!0}QzdO| zSzeL?ZCG%2K>F^@yUTJ&Lw7>tjAXYV*IZ6jL#_|$9c};tu>K_p> z=DclaCGba&kBS`=6zeAFi`h%JMmUJKy`Fm)=<+Nkv$j}YL3!+Ji|q`doR}?@IC^T% zara<549%=`m+(S4x(1tZ_RLAM8Pyvgt|MW-(`-$%Da(#DYtjryvnb8PA=tk^Vyc(p z?1#S6GiW8}{0v%pNX+tuQYNF6$th*BN+i8#euubU-W9L$6MlbLe2;!Gzd12S(eZF6 zdwIR>=(FLyUQ@Rxf zve@ClLc@;g4f)bzeKs_*i-(3+5sEaXpsYw&TCAUf_Zm!r>|)17xQl7z(fiaN@aX-& z=%4*UzRYB{=<_+pTWh&R;xi_lqjmiE_4Q-k+<(VP(p%w_^;rU5>atA0S5*TAynAUX z#YJ8}EgEXr<`UVXkEEz(cy8kZwUCESv3N#_RzV7GOy4kjBid+2{bXcedXRjCdcP)q#_zrd)Ezo~zyHcpH{j7&jKa-On z@O#bcDDcmo@`ym+*V|X1-}L!jpewG4dJc`4uiPdl#X(HxIYlVing*1TU_aL|$AkVGj0?^=QTQ$m4qCaXs?(_2}#QXyzbZT<`U~ zxZZ{JLDu_3UR>|Dt@icq>$&xoipPAyi}_?KFXofate8)DF`wLa&`a#lA0pp?|Kq`*W{vfk78%Rz-ZHflQBQzgqY2T1ZOYyZms57bIy8 zSB8)72Jz1Y{atfn5Ip?r{MXOzj)ml{eqCoq8X+xeokGpUKvKr(sXe^3kT^5fKe*3L zNM2(d_UMYSko=@MDWiTD#C6I%4$EGFhim@x)6XqCLi(7*m0h?Kkg{QMKQ3f1r0n!Q zwD-NWkevNYtncY4NX^MKZLQx4Bf|0$(|txle9B9n*`vlm>cB4(2Npa6$uqCDK4<*3 zK^zhj4JoZU=0kkdfKFemCH~iJ{4ti_X^X?KZlQ5KZ}SkJ)~+CbUoHnl``#{uWMo8ciX1zdV$0t-tzW_U=J8lTQao6{+-cGdW-J2WThw;_lE@#Zit%mn6hy~rR~%jI2QmIxIThc20x^SgwkDnT zhS*J?rr)Z{har_u-i+D23I=!IQQtlA3=Em~rq8w=wGHAhT%mb6_@4j+|FvXk&!wkf zn9*}UMbUbQKiwhJGH(|o?Ae{vW#cA@9?@s5>YX7F|G|YTFYWV$^mEUv)7p1~m>-|- zVtRTf3^=#fZ~fdg5O~h@s*8F7^gg5izCSe2z?(CUUc(WcGDHc4Tvmxxm6+PaK4C zL&jvDSA7X{_itK0AfX#fOD#N8y;KRgUo<=L?u0Ec{(}~W7RH2tKBdni*+JEimy#6Q zu_hOCZ~i?k{_X3~`>R#DAD(f6*wj%j3wN6#cV6I(xg96Ms7r_6eI_*+a{ZV2PZ_)u z^amDos4|U*nUhEO7$c7Wyr3)^GkQA=TeqMpJHrFAx%CqQ4&8+G%`G{jcM)*yuBUyd zH$&HjkvrD}9)|IQ!kY!Ce}IXPfADVHqw^uw6n(0{X}`_yl#AoEZe_0k6o2DhQN0r~ z);t~>eX0v&RVQpZ)N3-NZA!>}Hem+vcaFt+u5JZM12(3;6*>y0|LwvckNFQli8Ukaojh0tqnDk_sq5VX#@1Ay*_hoAvgWtw;&nV6(!IMJTd2!~?wUQf-x!z= zk?ZPL^~%hL%z-uQipLFxxcKqwryO|`vZp-Vyw@Aapxb?5?h*e;D2zJs{e}-ZL7&=B zJf8n;0l<<8!`GK|f~49*^McJwAgx9b-D|#uzwI^a)|Sv_(9R?Jz2={pAg3;+=F-i} zFm`8`tAiAKV91EVJj1D0_xG?OyX?7-{pHM`|D1A9#y0xpkd)m=V5eVp8S-?tZ<2h4 z)&^Hn&g02$!zy&ePx{;JHw(X2w`-C=(t~{s*DxU!L|o F000oUCm;X- literal 4365 zcmV+o5%TUIiwFP!000001LZpjR8&`%zZNJ~Ed&=tmI&es5ph8j|6>ucS!7W`qp5tM zfQm&GRuzcoL=tt>n3$z?(sJ5~b|+Dj$+6S4Q73JxByEiCZYQzh5@R&!xRH!;k6QEI z`}bA-`V=i5$xO~n9WVF4yS;nwd+)yg-~W~ca2%)N)G8&X^1+v!&$Ox82`QKsN*N;d z#n6qk(ZrN06le+(yh`Ha0w8Q&3cIK5$JujNRcKe{;N;}X<+)M39@HWj+` z2fTQxp!82?9O|v-cJMZhV{~-Fk5hXzyZD2O1NObVaEaqF#Yg5DiIphe{K>zrc)HAG zKRD*G6TxxcIC?hi$V1PJaA>Nw?!6ii#rXV@f7w!=n47?FUV3cn`W5%>cWzAE+mhPI zacU*zD>yahPl7ZFHF_()(9+;UqS|CO>ET*aVm1nv#A3!OHJB>c^;CULEwhtvHrcA7 zp~?~#$olcOnCB<5zJf6*oA4i}(|J$sP`OdCGKZ8v45$!nLZVHWW7K<4DlOQe%s@z@ z!DQ20OoB1dsy9|z^p%*{3*bIL*GEgtD#;8{1CD`i)F2jwP3dd(_>ZZab?1*w8Z3r# z!3YGuDxuD5H3+8m0^+zT$UIw(|3MlR(9?Yv|0Tr1Pi*^f9&P>yS33ao<=1jKtt7j? zCbOl6dc#ub&5Af94O zZlZpF8gRvbVFntmeT`iG-yBqp)B8yJIe*C`M?6$G!DI}Zb3$#c#VnLF@$R)2eTAXi zW-yy{je3h2$lXl3a&yfbgGnG9E9)1Y3Evi=ejZ^~_)G zT)n=QD(EWB7F~S_Q!-d}wF_+3W>(k(&2_fgIvcJrjchYe+ZNQi_MvLC#n9-I4xk?C zj6xOb)YoRHF_@~D$NC1L+{T1Wx=OuZtFuU5);H7|1p~9EG3jg;cQY8|b>cE*`;}Ek zyPa>aGPiq><$r2pMHtEEgIyUYu5)*ab z5Gdi+k!^%z<0~M8fZ;5LQ+WR39ZJy~U*o$W?lOir5*mhC80sk6|l@Re(2<>W9IOVE_iQ{yq?W zx+pR*c3@0)?yoQ`!H^{8hns}yV=+$0I7G~?A{6pa3^vLB5(ajQFi$JyP=O#PZ2%V zpgjQEk8|{gVgUC5{lUrf*G`VL!+}2#=&T(E`iwrZMWjc{{9|&=9+PMN_Ltci1enFi z+VXf=UYI`Pvo_;1e;)%JE0bq=h?C_pLKep`S$|sai^a$KVfsv-#l`y9f;=0i{xbWF z&id^IZRXEVz$3#w?K3Qwi;?AXU~rq=NY?tVFyK2AS}P}G$m7I1h1WKh!_k_+O@-^(#2oO6 zTo#6>K#$b9GLR>-xGr&1fL;K4q^}InBX}l;Gz=3svAz_6KD*8a-*7$R#$hOw_%;K> z6QG|9GK5yjiM6&2{37k?5ChSh26o7`06rE-e8~d&B8Y*+IT7^o!PiuYkJI6LI@l%h zJjfSA{6*kP0oX5se#c34XM*l*=%*O^Wj;)mj2qeG$i7DQ41WMd^Y=n$*BS|p$oK+{ z(S0R6X2(YYQe7X(wVy;^<3-m`LYMl$bmV-E%r_}W`v?qp0&?8UO(HO{a628BnXD8i2k30paL7udOfJbCQC2@4e zK=x{~nf3ua!q;-{gMcWI@dKGK;3=R_4Yd1$KN{$_8^qC@b3gr)>wXZ62KovHS`f(f z;oOH4O7JlfWTL<~ma|aM(}ItY5+AjYgYIA}8u&ym0{SO@gah;in@Wf)4D5$X;t!U{ z^?dTP#d_h35=sjOs|2m(E_2D@|a%1Npju^T3?BRbzn zwI>6T2MeKGT&RF)KOmN6gYQU{)`bG99H%Pbt$?deK+P z*_=(YpM-jSqd*(!s6wS3kq8_y1yi?sx1~v&E*b;5=VS%RLhBG07Zt@ZUR~ zGv7FR-;T>$)q?j+DBj@nCdEIf#!!54Lq5UPLD$z!FzgA*m{@U+l*8vIKE1=fW!<{t zvsd1t_}%3zC|(+*q4*x-B-I$Mjd*4klB4d|IJ>V@OD!?t7LJ-1}POaoUcW{yvpI zur`IteHuH1@~=m1q3xDWhSB!dr4y-q(95w@e&vEEDF4WCE#=?t_C4h*u8MX(o7GC& zzQ0eQ^6zMVMd@E|JW1s*t{+J8k4^8bqRh8bZB`W85{xFrh6a5-(pMHD}ZQnn#j^&58ubMxm{6lw}Y5UT< z)PA36|0YU5*5i_g{fJd_DGvO0Fy-$v^%e7c{YR>QB|zgL9~}NVrQfqXC62?Hb0R(C z3T>bJVv&b@NYHsPk5z9{z0)V2qjGQ03#a(P$5j*369dP;ayy*|c^>3>khg=pT`FI2 zTF-+&JosY=e|Yf6&is+u$MNzIuN~sGL%ckTS7M*!(+>IMA)h?t)6ViK^@ltAid`J9 zV?2zPT^uigXX8cWYv-2mFdyvVeCTb5`M|?`;Kli{l-ak#eB;IW7SD_G&BTlIE!r;5 zw_Il54)d9Z`OL$7w!?hp#rZsz%C{_k+z#u7hxK9?*GnQVu9s|HTrY7R{^0d!hxN$A zdgNg}^6vGh4SceAEHAEiCoisdx<2rFpUaEu{hr;u-nD_PcU3&p6JD$*3wg1gd}4=s z!i)9fo`-#0uXw0ecBof8)GMCVD=Z&yd$Wlb>){={SPzea-rHh7YX zq27YtE$D9r5A~LZdi&wZJ=$XtBmGJJs;W4?isDuAf28pGw&Job`He&iVgd~&qe z+2c=d$C|;Q)5)VzuLrw|Bz7@^T_kfIA1%AS@UqtG%U#bZW$dDqU9j)fG$lHCGHLf2 zlNut7Y4^m-o-nn5!vOaI+z)UJ;QoN)0LKGnp9c&9JPdFm;NgIi0FMBi3>dz|MWX`fn%CTt%7dzg@d5tQX4O?5|7QYC@9~p$^5lIy9=xGW3OwyV2N7%Ni5E(V*gh z=l{O#FU=^Y@bISNML{S>bG#vb#t@YGLgc?|z8s6D+*z~n{K0gT+Zr->S+WTgq-;~D zxipkhbavseARWqHRu&qSbQ|TqXU}?a^DLCR*pgG^Jb*F=6rVC-KL_Qn&Go#x7Zh5Z|+T@aI_nSYw*D=d&m^ zUAOsUO9)B}Eh(-4_9K)wuCz7hyI_>Q^W(z1jTLBo!{Xa%yWU3QhVF9?4gVUAfBB7W zd-gTA%flpv=GRgGfzX&gyuKk}!&#JN3LITu{XWV(+dszk@&PpQ(7~KRJ9eVf$w@7$ zcgLg5|Gd!l>XATHc>YCo!DB;E+O-!4nV;K_MxQ?%^8V`gQ22S@AAHnn(eSVJH&4|J zLYZ68-k{1PlzE~&aCb}`;v1c>-P@OpQg{Esk#+xTH0n1&+lOE7k47#&eQ4oZ{wV9i z`A3_-xQ!-7UtE6k_oXOrgeiAO^Xn)%qctwFY9bo5^`|RknU~PO)WOM%-q?mR3sXM5 zzRC}cZt8XB?GznK`$F@2^tMk?KfiT9E%}`aW#`v7*!xdKiJSJNT(ab&l&|vpUPirS zB}o0xPgH7k)q*Wamn3VCkhPQl2hV`I)MMBAjSo2lZonSGn=B8eR;O!syu;pjb@bYh zh3ZpbbAM>u4L78pAGcRy(FN2BBo{Wl#q}XG$q?HB%yXXBm)l&Q8;hSSbM6NU6LwlVbKE#rR#cecLX=wiq?5+{$ H12zBvsY~S6 diff --git a/tests/testthat/_snaps/output/output_lm_numeric_copula.rds b/tests/testthat/_snaps/output/output_lm_numeric_copula.rds index 29143a83223f27ba4e3df9443ce2e75c9d94d9b1..b74a9cf5e4306e26fed86e586410b20c311ef352 100644 GIT binary patch literal 4966 zcmV-s6PfHEiwFP!000001MNErSXI^5=MGGY3IZY`LxO06D1#HAXLDsxCK(im)Y}y< za0TwY+rzofQJN!>8EnixuW%?Y;%IY|WnTBP~nT6F;0m-zIr}FQ!&)S!B4^Z++ z_5Sa5KbPyQwbxpEt+l7K_dbkk$1se7Q7Yt&!VW(&c4I~-^c{?OZkSL4w#U$s7ew)c zt2QWK@5|%WKM#C~#|yUyv$TA_=b5ikEuFKFYt6v$IMis0sVQ){HSM+234qDEgTHJG{ z%kzm`{o*pSu|AHg+ts_;^`}Z}{mk>sTRUG^z}@MVveU8hHhU}1boBH$yKpFSTrR^A zIq^a2HN{#pesCed2~4Hlpx45)LzbaPV+zfrMPqgPT>3m(TWq9uQVn`b0T7Bf%?wFB zPA0>&P+C_94DxySzlMf}`%;H;iZo{GkQ|upay1rBs6{iSNNdB%GhvGo1FoSuy+v!% zYl=e6+M+y@HV-qq06YL_`)GNya_A`pcY|uEU-mf$V`-5F+DFFsmu2`Lq(g?mADk1) zmm>y#gl6lQmgsh%FMXEE*?6Njn2I$;qLffgSqm0rB2Vo^&kE6V5!7P$=;;)@(4iR@ zWY$Dk^F(Mn=AnObUfzqX`5`b=e&3VZp{}}hE@V1;d`FM3^S12d4s<aQ=`A|x>(<|7Gm(rG;MkQ8#C}(TV&wJ;EF_YL+ z4^rPZ=NEEY)KklD?b>U#PxYak{hsmm9rVg_Zps8t%jRYNw0>&est@ICP3|XO-T0Tc zWEd!Nb;Xb_Ii$r-BI1Ze=#X>+xi)IOHZt=-@L1LWCc9D3HAbV!pvj@)os1@Jt}e%- zGw9VbwI&0wJL=UrhT*;fdL6E&m;|Aorq$&aSg3%#MOUoT=hKk1(=|C3YE7@s(`qcGCUH#K z>Bb_Bj#lWPS6fVtWCxH}3%x;LI^&p#U(bJ9AO*3zmbJO#FmXvCnDJnHCh8CC7wgN_up-f$@u`rDjssff?I_dC}Od1~qhOaSP!W{(5$UN~gUQL*441pNB zVwi}b48sQ)YOzcd$acnb4TkCXxjhDV{A|RyJH{VkIE~>5hTE9O*R>ks9vFXxaRkQS zg6$9tsTi^tVfL8|^&i0SK9&vUJ1d5D7`9_r3V1W$#uzvZJu&bde4pSW^99z!I{?)wS@H_RF2xC^0eL_5<9&C(>N(DjhE(y>eF&c({k$XV}JuB@-z>Dl014#;s}w{ z=K_Aw_-H*;pUTs?X#FlAPur=7#6IQIdb1Au5VyT?JB%i%np!-PLf%-+` zk+uhIPg>qzQXh?%`X4Nj3zXpYAW!os^Oo!V?7DOWJZ`P zM`B1}gn0^&ZDs^R<^*OmJWt}yK{+oMhv7-kr#Opk0)Rl zieVTd%r6f&wv>CIO9Pt6?}~n`IrsQ*9d1~$II9QkMiwBWz>$H2*kQ}qGv~uzC$y*jv~I;2dX12 zcaZod266pjpQZJPV{XnDF^F>}<~uS?^Fev`jBV`H7bj6&GzQvk{5tA^!3XsC-HRUv zUkv#Dg6uszf*${xtps_Vc7`%u)=d;gCk*^r%`c{1L60wYVH!7Gz98cWGVV|&gE}36 z?gsuiK)rOQ?+=a=(M<0;!1>b1S+(6F-eDo3d=mI(D z1h)L3oR{;4`guRR0J?!qImG1-_Ps>$cM!>ShPYio)){^^LwS>yTk$#Xq%Dul-fXr1Lup`O zT+bM5Jy#Zn{2Yc>wR!0Oj~cznaf5=IoNlvScA75PPJ6-L)yh}rYf8;#okq_zZe#5l zx3g{JuiVCrzO-0t(&ao7`Fr$x8oaf!MDROQ*K7mY7ip$wi|E;d136;0o>Fwk)8tqT zCUudvjF)K~taO3*XM{yh4y~tTY+nNCivqtu@{7GQ=O!KF*N~Y<_tz4obm*~kQy7Ff9_1|VKO`s~5|qt=*bmyLq|mO7p}9Kf6@q_kX!$%oA?z{X zwdXhSPFM2#kHrre2aTH%<|w{Bobc-0Kxa+3vHFYpPo!vDK_8x5$2X(HEA+nazYR-P zeg}H3pvx6sKLFQJazWDc%0!k%4VViw#v<(uxPPGrBo|P2QZ zve{s=Xmc6cn`Nm-!b>3ENB6Tg8yY5TJoSLX^H2Vax3dUdZ}%F(b&5d*pID#D;{xY9 zYli8LxT@lFtN8h9N&M7zxHs0UIXh|bF9dH}xQO6#X9t20X~qzFLv{+t{Y3O4Dk};7 zT&Fh(E?N~t@SE4lhW_$C z^iS6aoqO^D^`Fqo+W%Xyv&x^~Sfu0eIpxU)qBp#(YS_plu70%vK`iF~Z|#W)g3pdg z=Qg;ycI|YeoX4KGea;fvH~VcO|H+5qaP4b2asZyAn~@{{-068gqAV!x}O(>%ietn&>U``$|n32y)0K_G=ErZ zX#1FzcVk(QXF;9?c@E?`v3%N>S{D3a!5hVLd0z zho?Yqi%`$1o~$q*T8Mq`ffG5HZ$a-DsBaMq^DPVW?T3qxxP0av>BP@p`MGD861+5M z7r_?;RRmYp?&EPmNSvdvemGdyM@HB~`-|3y^>j5CeYf%?;Ve3Pb$}v z{%_fDZT)|gd~3^ptDe@D{}#Ws?eMVvKWbiD`n6Ra+KO(gxLZ43+d2=AD*vc)+wNm* z=170WdVop}Q|XzXAM>Z4N!skU=-a<6jA{8~*ZQ5A*{IF2?nK7Xhq3g5epcs@rG|Zn zHFkO9_t?U|m*x#Y_K+iB7r^d-y8`YG*dK5Yz=42+0Mm~w`T!0A916HU;4r}9fFl4$ z0v-T(AmBlO2Lm1gI0|qy;26L|0S^Pr0;T~&9_aVPDE2i){ivXgDCLQ{AGx6STy$P=a76})rq#mgww{rRMS?Rp6%-gl@Gp8IAxO6*-8J=HS_p-ZG zeJXaK*!&d3!Q9a(`X4nD`i=KM=-SuD*&{EZME|PUn#e??%CEY1cHjS?_@436OFr~Q z!;_NJ*A1yg?CNKdo;{X}RHNFD*u6CeB}J|rHE#A66xBY;<>))*zw4*wN3Ic^3|(PU-s^T0_J8`hn=WJae?fKX*&<11P|X=x2ET#_;U_7&UEgJ z!pgJoEEX)T7^v*sEK{wWl>_2U<>9f(7NucUUX zMqMORkn-2hON{CQjU`MSCZWS6bi^Nc!qcW6H~x8rPM@otjy;5nB%4APwYtgS4ZUz` z>DG>hYL{zi{2sa^Cd6M!H&vt2wKHn?Tv)Uw>-W}thUfxdm@-wH)A-S|@KrU_xLr`t z;ZM7N3(c=+ecGCQe=KL{UmWBl7N&ofl5eF)9#}YFZP?)!GNa_b<)#6V@4C&oE%c*? z-=QD`nT$gK8=^ytI zK#Lw&3B`*0Q5Y6f0HTt@?0~*(D{@<(Ix}ztZ%c{oK z2O?w4EM4iGy{P!@qzShxpF)`_7Y8p5Kcy?A*5c4^yEO)xcR}sxP5&Zyz5zd-yt(nqGP| ze{2$BD^5LID04v*<3GLJg&U9ZwnZMj_*yzrkB>Q_UVH>4SY~csWFCw%PejJA{X7zl zNuHl}O>Www*H*uap%T*Pvds&fO z<+Ngb`R0#*PkAV3Tl{cGEZ&ib^Iur$ax~UYf?No<$%Rw%`J{JUWtyT=t4ogb??r)j k=Kq*X@`aaZ9cz+2JKm~^3Kr?iB%)X6vu zFgnZ(GXtV)qA_H>{y)+G(`1ctHzr1nF~4Uvhe@7MaXq5Zd>&{#gGtsXgb+=ltMYgE ztDbrD7*_oxdwl!9&$3=sS5;S4cUQmHZnB9-lms^;fJhRDO z281eIJ4@D&kJUUgp7qrMgK{4Jucf8sj@%)gQEOukDS_!*thH<7?b>NZy$h$riY>|v z1jZXocD>c4HOAZY#uBT(1T(t=+y!X=Xo*=RnIUSxq0kNU%Ts5zR2sD~J_<5lUc~<( z9SRIl;GEe065`;ZG~dLvMf-xj{9Z2S;*H5{E!P?)DRE~-A@R;ba<7uy8^N;2!>3T{ zLI*c4C>+Ug{MS+{+jv1qgK zPVYqBww6wg_J+@6ONUO;3+J!=u>QYNjtifyyZif?>izuMv-yjUqY8&UwDS$8=Y8;p z@l*I?ck@2BmCg`0YNl5;Z+qWipXoz8{~Z?=G5C#_glUsP?CW2QV*NAw4t;3n8;U=_ zaP_ZYvZ=2sHk3oUl#mt=nTR(QVaqTGoh(*; zu|a1ym`$45daD`O9ZedYxqOmtFb|!4nl<2kgN~_dmeYM4?H86W#CXL5pBe-Oe=Fm|OlP4NI&;F36;*G#M%?^=_IfZTe!3+Z_9d1unk4k_`ngbml5exz^4( zYb4uESxq4|B%cI<;Sz?kc>J-9A_U*#4T8IdAsRzB43jZbVfX|?BbG@9+0K}5z)+3v z{V)XMdke;oVEieD;}~i&T*o}p*9we#V0;1NB#gfS+i@83FpTBI;xZTd{|v*&ST>f- zMGUJkY{swz@OnXEbYP{yqx0r%ayZAzGHlFj*XNvi<_VFBTu`hv_qU78mP30OZ*?^^n6YZvTC^@iPBoWpdFn><98J zk8=OznCW@S^2yp+{^aus>~qfgNY;8bhN+xbD@S6;<-|IL*EaVgM{5E%4(_KAbI?xY z(lI;@dZf-x0C^&d>k>B#`1znm`bq;mf+t~k0>f}ltS@6gpWSDIZ@3I$ynE`rv;A^tP$3nO-1iM6@ z2l)bse+>AN5BA4EzbO*kNuWCg`YC{ZnGfS6<3{#4vagXn!v}!zeBAKay_bYXWITb# z_@0tBX2(MUQe6+py|+Z)%Z;wLgfI1h>B!r?WWGs3+P~Cic|X#a-T5K~Y0jj4Z?1Jd z7|)Y)jh*@8Bk7C9z{ZX2qg^nBgC2P~iNp|r0Y6{R*U65cN1me9AWvw2Xd|*gk~lhH zAbU00OuK;|X%FC>?^F>W;|(&w(58Ssy@2ix{&+#Z>_sSqbG}HC`$r%aFX*cS@ccop z8|QqcJCD9?(msICG36G7uw}i)5bC))*9&rcggR~DBSP`f9!JB z<+_;*c%U8amn+gId-Pk)pBhv1Vs>3${?`UBKU?ECG2|PUnKxTQ?h0aijIgi&jeeYW zsH43(+G9`2l$RX1E-ZWFqjT0f?EhF99i855n4_PI3*vrBM9bSf^nas4Z*`iWVkWCy zUYC8UWv|nr@b2p1Yf811Hk(0f;+(Is9?sXCH9=zzOvE!7YLy1;rH(NDEeHD>$E>^a|M{(k&r-ac{bFL2o>_LHS zkYuy>2R*XTI&sbokZelCF4=^-zzimvF4+{>CPy|uvQd%EkZh=t(0>qMVwdCGpLi-c z_lK@(uJysJQl_Jn=_zHpN;mpSnY>aauawEFWbGtz@#9IUD^)HL>GY%FvC)Ox39zJ+?ZedjuK0^IJ z^{+^=v4S}~w}}j+*Bk5;-hUdF?fhQsvx1>m`uPF4-m(uQtv{J4@|gi!nbu;|&(h4) z8%oRAHx~K#B@g;ZsEw!Hi@(9s?msa;=VxJ?*=pApbFLqY;mh(pi5VOY!R-Z&BQ&8cgxw*YgN2^S`ljxS=*MEu*-e)T_lA(?1a2Ub*trltsT% z{O*E<6xaBBQM^Yxp30jGb3yKBs+V;1D5YQS^ftxD>@5Mg4OFjR z%p^(=MimrSt@y-6{zL6Viq~A3Bidc@7pj-?^A$=LAGyo?r}T?{e-rI2kD@pg83?XZ zA8Da_BdY3$kE{)hZ0s4sWB&g%9!{e8)cDcDn!vzrooZ_c4!ItFiqa8@y=I|Q2v>k zbyQFN$@9!UrMXKlQMp}n{+Go(mcB*x&YhW0G0HezhTR6rt%=qgFFxN0>}$e`TTQw9{l0K z9|8Q~!5@M7Bejp?c@{RLPrJggT%TrcswxLz`OalJ&l_=DG@0PB&5^~l3|H5IyeFiVC_d9}fy$AU1zOCY+p73Hl`5iCTll=nJ6JD$*cUw z#OrJ~m;4mz?_T$QE_kr(!Jj)G&YoPZE&Y4hf6)H_QSuMkey@HWwEbTE2OWp|_5We> z($>{pd1x=Xz2bh*_1fNfcv$&|jobA-#-&C^agGC2dYH=2{35x3>Y1d=`xblpS0r+6 zpX@q*XJ)hLb&fZY3GAkb-LPNHd5zV;dxs--C7jKwA(kkXHw4*3-hcxD2LtW~_z}QS zfO`Or1{?#J{m7y(;5fkXfD-^G0v-T33GhI`j{_bAcrf50fQJH32Al$T7~m%W4+qQx zW&uMU*iVE}>RYN?qhi*g+^6Oq4D>mTMyJ*6{b+!IvX)MI@1wYEl+ndLJ<{6~J-I3( z?TuqAP~PT~-#@#jGfJ;GwQ|+V`w-ewVBe9x1C2iRu}XQl2<3L3dA;Fu6UrH-@OeMA z2IYMpQ!{?=W;E)_#i8lpZ==i~4ThtGwJ3k-jD*z(bZBI%XGP`@M^ILF?EIZ0HlbX9 zkLGO~d{J&*NpwutWHe%)P;??{HX0S|U-aBn6&n3m(^p?zxrlN+4}Bp7J%e&ze|JvT zJs~L1qwIsaRp(J&@GIGW-S!&FYX0!?Z`Yhh+3QY5{YJrIprSIkoe@QAV$fl*ONhp%K|RqgM~ThWHgPWdHs^F-jZd_vH3X zI+Q(d*{BI~Hlk#|-A_omUbKWPi-&!=hW~w56&~}ts zadK$FIvq;SZz-F)em;6)zxt=w@^7Lcy(1dN`p-whmagqsd*Ibpc^LWT<>$|bbwxer z7FzzotseR=$Dgj7N()>?WXeU z&laHUfUPH#gNLC(HTJBNzbrvL`mZXykU0zW>Roqr$HcuTZOP+}V^^qA`nEpa^UhVG z%u&k$-`_pm8I1`4WY8e)CK|l(FUdMZ1{xOiWpGhW zBTD*4x7d<024y_@Y)HB388mRy!Btm2OGiU4=5@b@y2?tB`tP5YSTto?d!i;$Mh}qD zN&mnTo_6)v`4sR&&z(DBLi!`<)@m#| zUyGLH!mhVEeojp?#5Mr)wCQ@C-SMMm@ki5~^L0VRmOq>RZ4AHa!PC~}$3(}8S(VmU p={QjHW`9Tmv_JX9x#K&r`|069ELmCHMuUxp_J37FVrE=4003vludDz7 diff --git a/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds b/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds index f1c32878a7340eb3d7303bd821a551aebf7925a0..d941cac1595573b26a0cbd6ff8750a625470a12b 100644 GIT binary patch literal 4865 zcmV+c6aMTUiwFP!000001MNEtSX9;4X9ke>L%|22LWn40Buc(8&H@1m0Z|lu9ly(UJ*E?&ApympMdtA->*7x&>sTI3KqOHYL8XFOU39# zv8Ogx_?&bNzC6NIF{p0!2j%fq*7n|tG~cAU2^BpD#NC>HF2J(u)v9r8-sD(2fAQ(r zH6IlAwwyl_WxBcJElbr1r+^=W9%Jp6+bM2tl{c1Ku4V;IoG{}Z{_>f``2L|^lTehT zT|pvp7J}5Pi!>&3;YEWJnf7{vUW3<;=?0zJ7(R+MWoq@g?0U4OsF>MFG3d<&NZ7@x zr^@!%wqBn6QUpx zi0+B)OMt^4rTME|bF>TU%dh2fQh(a$4aOq1PLdR+sc6QcLgJ}|6`myf0K} zzN)U7%vU7l)$Bgs=N3#>gnl?J@mNK$cl?EZOQ%&N>t-z2a%-`*Jv23a?}N&76%lUs zE`{5sTduW9-<0PVz}Iv=I&|)~URHf}+RE0bgk!7f)@gy~Y^h&jkaoQMcp zl-?-UN~70GX6i(b74>M+k9n>xE;bs}IZWKM*r>_X=9skxy=t1qXh61yUX^1g%GT=D zLW_xYly5YYuwI?@DwDdXSf?>HFtXL=oB}Ju-lWkmf4vGdnqsP;$}<>MrRhvbYf=?Y zH5VB4$Z$23n2SryDwB2^D=zltsm0bpQ(!P^r&*<4s7ETDI-hmwo01-1sw&ZINjF6_ z2vwOzn_pmN0&UFNBCS53g`}CH&M`A?1kr|A#?FfCD zz!Y-rO2CI)7Zco>;GG2iLtrm~TEY|hT262f!QT1%ECjj}5GMH!(MRqG(IYyFp#(w*xTDT30(}VFCGe@}ix7jN zo4AA(u>@BTOtvBJB!SrkBE zh*_MBmdDHT!t_}?qggxi_W{J+Wb!Nz!LmI1%i`!Q>(2}QV)3zlm_Czdak2irP@aub zkjy^gvwk}u&HM>N9O-LopJBONpe&y~nxT8i#)0|8;*pOB8&B5WN7f&Um-!zilM9w% zSCnUYl>0BoOs|bBpRAqbPd=Z}KG#qmg|(hUfZQ3-TKOn}WKOJ8WNmYa9IXl5XuK8< z4}k50TpWQc)D!C5Sde!`H>id5NABUsBQjNbEOCqB|aSAH#k|VZY3W(UNf!_Bdf*6ZQ;e1jci2g3qoU zB|Jf<4e}Vjjiim)agcyi*FkdaB+++lLf1*cm-@hTE)pNTFbAG!D*)RCIe+Y5@WU6u4{a(j zE+4e-D~aD-BG(S%_Ci@V^wCFb6Al*2dd~KF#4?d@#||7Vcm!cz&o%v|;r`D-HfNyL zIab-+O$%945jZQ}8M8-JP2XurbcE_FROb{4kBz*OgvVBM`)0C!@4qGMHYQn=j#F$7 zW6o2ohqZ?Dj{{=6R&z&bV@6+6q%mr9?u+~*62^kJ4y^?K=<%^*M{S)tTccwKS4aAU z(t5;o&m#wKgHff^lnOEpYm4m+ft*+^l{k9#obB$xb{L_u3cG|G>IrMmlWVA)LNycY z`iSd@&Uc|&3)PfWN1zP8dE1W&GO*Myv16LRu?<-SUJayhxKos4JC zHJ&?7@tcxw%1%@E<@3@VaZ_`a&1a5lm_Jfb{!Xy_onYStNc~`QN)9fq3{CaHo<9f1 zgwL5NB!sUN_sieJ@3y7C|Figa#=+v|#2O`xhcnsB>l?lB_?!O!MYD0n`TyiqVG20D z#$Gi2w_({`gCpk4Lz^po?#;3&dothnS-m2a888*7i*=f*_~4%zkiGeEpf4+$xvxI? z2i#Zxh4D4Kyfhh%W=$?<`$|*pk$3|s%+U(|r~3NwYY*31NbSm>x;35R)edh^eAR9M z#RpfX2)MxQ`pRhS-nOyvxu=C%w=n+k_buyJt~~bGf?E`CoHw81Ic|;=f28=@Dsr+Zl2U5BJbQ?$8uluj3^p*p?DgAwB zJe7BQwi}gSFm(cLukPbT+i!dPMB5cLqMaiXcT>8}>jSC$+m1<;|LvL+RQ|%tJt+Qp zrh&?ZeBG6{e*(#Ze4yvMKD7Prl}9Om&y4mI?^snu`CrXhNA>JKn$7G}nme_G%2g~p zMCI-Ttf2Ha4`oxju6iZQ52b4i`)GUB-BpynxRKiL6yQ9Q@;`5P(Z;@ic_GEFXG3Xw zrQT1>^Yx#p{$&?O8+mu%S1JFVIZGUe?@o*S*vphYef$X<`L=F9hczw>}cv-~pQuA!Q1o`5^bROq}MVt>E zEjS-|oDaM>ALcOo7MyRqINw5ealYw!alQpu#Q8Ry*|*?)=5aprIG-&zpLuaU52Es0 z<~?M=^}^$Nv54y>oEO(i0xzzYU>kqPdbHqrme!i}hqWFV>R-7OW?{SWoWR*eCUh$9iSKdc|YC;#s{S z@-DaE((__HykimT;bGLe}EaZcj?;b`3-Ir01*DA^-cv)x?s!OXAze*3!&mwsmOU>F;QrF*nKtJwcK5G{;F^CbZ2iE=RIJIdow#!yI$ss8w#!ak6=)^B!UZ4SgQ`Ef=(mj=U@e*gHBqn?G7o^h3RyRupe(va{fp5czl#h41Z`cos z+m_5-cs26v5acZ|hb!a;zf4wom?;p-E z?49Qm|9SlcBx!QS?@s6msa$~9w(GyZ@VPaee$d}(l!wH96(_d37eZ{x1Lk9$4G{CC z|M8CdcQxc;(U(z%&(=cRo$+s%oL9lnr)%S$^$vsN6;Bx*-Y^cJdfb|_ z2hKw550BrwI(`DgI^_2`d2$g9DZ4f~bnJ3~y3njM-YXz+bGLqPdh~_ZSwDIAT)G4j zlK(Zc`+G+rZi?oEX{%BIGCn!G=~_9&zF6_yx$HtnPIhm*?3rp9oE1}(xn(^W&*vkn zkL`!pmyF%leRCQHPtryO@3{-nYo4xsZNUYIS)*3pTKojWe=;Mo_OtgOdfwjdUk>wy zffM&vxrY4+L!MphvA1$nqdX*SS+i!QRwQtXQ&bSQm z^EMc-My!HJpVW|J)hbAMOL-%G6B&o4>MtrQ-5}}Cz^YkwGoioh@PVIsPKEBTxxC|< zm;%AOhCGv6^(w?#`X|NqvOwI)Ma5v5bRYD+yk}L^iZL*t@6Dif3131;w$qVg z`yPRzlRkRI&-)$>nBsGBrdkO@!dx!@y!k5hKMeCc_P9X&w&UM~#aw~N&@Zk|&p8VN zPp0@?f)270WdG~WONvzmYIB4tLPqzM(f$6!Cp@j{al@DWT79l&3h@wMLfJHWsZ@;~ z-msTd&Astsp*rN68$QJKAcXYu+{S7wI+tR#kPEZMX#LPv$Pn8A4B3xsavHw26+h?Y z8jcHgZ27b4-^}pawLERsEyoVG93{s-b=ga0c6R}d8 zYt!kU_qjo{9$4dKFYmf?f8EWKH(vk!0{HJ2z|Fk?rk|m~to=#8=|66Ru|cuB(mvk^ zSqtr>U$*xESe*TA$(c5^YjN1Yu6-!;+dmRIx(l7R%J$eQrYceRB5*LDt9J6C+&7&FI|g z&u{lR3d71i?NK&$225HV$W~J%?E~;dmbp= zI0i^zq2i~(J{p_{HHM!`d6|2{qV+szOi{zj+Cs%wz+?jirs zf(jVnmhAYcX+LnEC>CEz$b}xiD7~J%GzijOUvhlTsg*Es%v{6!=Zb*q?RPxGcU8-7 zR%EYoTCl%-|L5PQ{4Qsk{pd$3?m@+ckFvBmYU}4jUX{x literal 4194 zcmV-o5S{NIiwFP!000001MNEtR8&W@cRn!u8HA{a2$4@DC=nrwV!RCkq5`7$*EkL^ zz=UB2X9h&gGsc)0WusZ6>uP?I=V~<1V>Z6kO{LTen4X|epX<+ z`&Q4~xeRMKlHGUSzBv|ktGcSXs=B-G?Y`IPUL40MIHf|)DctZS=Qd?>#-Jq33#1JK zcE{jHXq29GVbjXVEj_NY<|g#Hzpmj z>xb4(ekJw%j@qDe-jPicEVaYiHhol`UT<$7D9a5V*EXwmz_7GCi@p!D9eBHbM$J}^ zweweBdg10rr2}o3KTo#Y-uteteu78Xcm1Dc?Y6twK0b9nt+U;j+kf_~#qaY?-)5!{ zjXA?{N;z(qaZ1jU1ZmWk=`8p{O@kAdN~6iBgKJfu$)GjIO=3;c^u|JVJy};)&g^8H zjMfq$6lvN8l72kRrulKKFJBDuCHPN!dwZ+Yp#p=}!W@zV)2mQx)y7%1`39X6r^t*g zN(=5W#M*{C(dS#*XXv#tm;djhlpbbqwOtengcCE#G_hWX`DtgWzE^jaf~kc`Zj zRrntyM1~<8+!NYYBMyF)=5KJF(O#f0y_U*}{c$&%%w<}GC@HS2$VA?`iLMo*YXewz zd-x=ZUGR0`g3O*Adu}N0hKkiRnJA*3Wc4D-GnI`0s>f~A_rIf>JQt;+!Rqzo3J(sE z4@R}RuNrR7<7+dEZXUcm_zs#Ui}};S%*NWt!1OB#YZlgK85XbDb!U~mJtimb@F4m3 zwedbmuhKn>Y(Kc??JQD-@i+T@G5WGJzv@z*uAC}picDrrWgb(~ zTQubhtR*HRFuY9_*76Fg#-d-y3X0OYpxjt@<*(v6y+R ztI`%&nXpk)q|;g}%wm^yRpkb)p4n3wHCD5O^aXj1FxUdq2fIwWov*hrw|tBmmRP%7 zpixs{)X%HXxoE1e=n6G1bL`DZTwGZN%blH7V5-!VX|0^2Qp(|;%IXTKBRM1x4CgW6 zdzB2=8CfrUjc*g&H4Kp$dSjT4p%TMB49!?(1jzQlbQ6XueC>@P2w#_D+!y2h82*Cc zFoqV)BYmyIxIf0e{tsaI6PAr7^AW?F7B zZ6Km%tB|_{Zd!Jtoik?Ju!205FS_(b9NXUYI^>XEbYP{yqvg zLL$%d5Gl!Hh$N1IlK#}-7mJVe!}OUvi;MNI26;A4{U!DppY_`lXy#8W;KX2O`wUCv z!X^29tP}ci$v7~-SUl44VB^W!2TS^6@iPBoC32Ax><#iPk5d1onCZDo^2yp+{-pB> z>~oI#NY?r|4EV-RYvouBS)5R(@Y?1wIa(9A$#6~f0|?rQTpEVCphxQ5bdV>qxGr%M zfS&_;q_0%aBX}l;r!b7-g!(cO^x1U=_=f8dmy97-8o7mdWiU9ADE7`T_y2N4C4O9K1=%%$Lz`%F^F>}=6i4*^TBxToOA5V7f(@NECx1i z9!4HU)UkAd8)$<4Gk=#h3c=h#>E z0~rsH34%5m^r-^65BQ^ke*GYhUYuhyMXvioEGp>B7kEA(*PC-}wB+DpD9H2!-&oE9 zK~D`nhKhVtLk|4GRv5Gsxe(}|_z?`y2W-kAt{|`%~tL&RK$Nt9+u%g@Ckvo|&R+Qzf?s-k6l|+)(B|t5Y~!k;hmQ6wu5S_Ec;lVEL03s+9BYf+8NxZCTFPPc zl$`V3gWX|BWhHkBAJ8Lf(4TWuPEyT?-5_Bd(fLlQHL0eoI+ChMDjcbzq!Ncg|ABys zU5;~oTqfsSpV%q6j;DHZiH=;NCzt5TUFgdt@^Xp1Tq3WKv@0a-O3t~S$s~PN zsjk9>oWg~iv|p*OVxCk^s;iLjlw8NT zKNFNLKbJyJ5*wg9`HHr!N$rd$TM`PUbtF)ys020ZWOt z+@M>anXl6qm$1Wq>5C0F`YxiA=jwC6z;pG#Fh0loNQ=pA)fIBiZz-i72@jBDj@I(m z+uLVuKG|l&wX1l+?mUV&x&4mf8;W5RAK#QsaEZ^Y4Wsmj15(oqFOXWdGX0r9+O}=j z(D?L0xLd#g*&!Imv&hok4NUyA>r9s|g6`?SFU~!M$2S8!6py>TW83 zY+VwS`%AM%BGLX`Lot94JeO`^A@+%h1qU{ZX)wKPt-*wt9yD8ZDeD*;~ zyZ>hrm48n)j`F{*IZfrSy!jZ#KQ1v*x#% z^_2hBvMp3kx$gyLpVHj<)l{x_9&RqEI*XKX*xpN>+f%*^wk~I ze(x~PC6xbJkE>4hL#j(D_P!KD+v|*dggoE+k?J>jshs3}gWsn7R_k109KN|A@Kc*8 zec{Vzo#X?2E(&=p{vFjj`_)n^w|!nP#g~s1k4lO29{KCLDLlyYAkTxm4diWN`JA&l z9{l0K9~=0?gFiOrkJvtrmxp+55U&m5$R`i^J<<5if8o-%X{5@*T@U?@SaVmhbKXAyU@?tC|;g^kq|S4V$9@#)A^imx^ACAef@nul=zP}%Q~ zoUs267u_Q^vAenG8<>Cid+>8X*M41p?szzRQn}9bA7sC){r|J%yW0Mse!ANJAbwZJ z;ph7QuzBh1*Ijw&F1owo?&^B&?mRrK{KLlWypM6Jk>Q;E@Rc6Kvh%_)?q7PI>9pTs zPye!buJco2`v*>za$SLaCo-K~Ok)@9b1hY#2KF5tzCcn!Ea5C~2(pJf0ILB90qzaB zFW_*%{Q*Y;jt0!$>^}~8AmBK_g8|0_9s)Q4a3bI*0S^T{3~&AF+HA?)UyQQKN}}G_5rcA58a{r%bTG=?+~F?La;noqb~XCo#=v)XJ97oi(W__eAdOi#=D09#1m$xV^C@1;#y{}u{P-gX@x4foJK^eQBGp}7{b<4*eY{&-EXqxGt6cQ_29)*uy1u{pJ|1O# zx#q2wcwYTZ_J#9Qr(INpF8&o8ae-mc`?)1 zA=DN#_uIhrDD#7egspx;*Jpjy^f^(0w$j=B1W* zR$M_THCpYRRnMaIPZuY)9Qy+rwfu0@sWHJQY4*{2@7V9q$X7S}9j@EhArIqr)zoa< zvK&R+S+(M1)fJRBPE{Gvat3Am@bxAC#Z4%E`F8V-_>CwrC?~qHL4z{hmH(8t6OY3h z?Vs!Fe9*XiN%c$HmY|{DkCiahog_6Ax(!iCa<4@h7Gyhe@n(G6rJz!dE=3%(C9h)-s%(BiiTAMU0tG;qmi** zO+S8c0}VZimiry@Lg{}wJzt_E-2XYXVbrv;a7A$ZJoY|w4az& sY7G_k12qr!e;9!FA)h)sULm`j9&W^vnZ<3;TWDy12Pz3C`qwf50B>G=)Bpeg diff --git a/tests/testthat/_snaps/plot/msev-bar-50-ci.svg b/tests/testthat/_snaps/plot/msev-bar-50-ci.svg index ff95215fc..7d5630803 100644 --- a/tests/testthat/_snaps/plot/msev-bar-50-ci.svg +++ b/tests/testthat/_snaps/plot/msev-bar-50-ci.svg @@ -43,21 +43,21 @@ - + - + - - - + + + - - - + + + 0 diff --git a/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg b/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg index 17d6e9ec2..c6d232890 100644 --- a/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg +++ b/tests/testthat/_snaps/plot/msev-bar-with-ci-different-width.svg @@ -28,9 +28,9 @@ - + - + diff --git a/tests/testthat/_snaps/plot/msev-bar-without-ci.svg b/tests/testthat/_snaps/plot/msev-bar-without-ci.svg index 5b31384c1..1be15b58a 100644 --- a/tests/testthat/_snaps/plot/msev-bar-without-ci.svg +++ b/tests/testthat/_snaps/plot/msev-bar-without-ci.svg @@ -41,9 +41,9 @@ - + - + 0 diff --git a/tests/testthat/_snaps/plot/msev-bar.svg b/tests/testthat/_snaps/plot/msev-bar.svg index f7b6b2e15..57d503c70 100644 --- a/tests/testthat/_snaps/plot/msev-bar.svg +++ b/tests/testthat/_snaps/plot/msev-bar.svg @@ -28,21 +28,21 @@ - + - + - - - + + + - - - + + + diff --git a/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg b/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg index 7d95ab35d..9a7ee4506 100644 --- a/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg +++ b/tests/testthat/_snaps/plot/msev-combination-bar-specified-width.svg @@ -57,36 +57,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -132,16 +132,16 @@ - - - - - - - - - - + + + + + + + + + + diff --git a/tests/testthat/_snaps/plot/msev-combination-bar.svg b/tests/testthat/_snaps/plot/msev-combination-bar.svg index 9d2de04ab..42a758234 100644 --- a/tests/testthat/_snaps/plot/msev-combination-bar.svg +++ b/tests/testthat/_snaps/plot/msev-combination-bar.svg @@ -57,36 +57,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -132,16 +132,16 @@ - - - - - - - - - - + + + + + + + + + + @@ -237,96 +237,96 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -462,36 +462,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/_snaps/plot/msev-combination-line-point.svg b/tests/testthat/_snaps/plot/msev-combination-line-point.svg index 1b229513f..65b02981c 100644 --- a/tests/testthat/_snaps/plot/msev-combination-line-point.svg +++ b/tests/testthat/_snaps/plot/msev-combination-line-point.svg @@ -57,36 +57,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -132,25 +132,25 @@ - - - - - - - - - - + + + + + + + + + + - + - + diff --git a/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg b/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg index 4bb61aa68..c3be8d1e8 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-bar-specified-width.svg @@ -27,27 +27,27 @@ - - - - - + + + + + - - - - - - + + + + + + 0 -100 -200 +100 +200 - - + + diff --git a/tests/testthat/_snaps/plot/msev-explicand-bar.svg b/tests/testthat/_snaps/plot/msev-explicand-bar.svg index 3c6d7b21f..02cdd7d64 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-bar.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-bar.svg @@ -27,27 +27,27 @@ - - - - - + + + + + - - - - - - + + + + + + 0 -100 -200 +100 +200 - - + + diff --git a/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg b/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg index 77659a782..87b0706fd 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-for-specified-observations.svg @@ -27,23 +27,23 @@ - - - + + + - - - - + + + + 0 -100 -200 +100 +200 - - + + 1 diff --git a/tests/testthat/_snaps/plot/msev-explicand-line-point.svg b/tests/testthat/_snaps/plot/msev-explicand-line-point.svg index e3afb1ef1..13aa02cfb 100644 --- a/tests/testthat/_snaps/plot/msev-explicand-line-point.svg +++ b/tests/testthat/_snaps/plot/msev-explicand-line-point.svg @@ -27,33 +27,33 @@ - + - - - + + + - - - - - - - - - - + + + + + + + + + + -100 -150 -200 -250 - - - - +100 +150 +200 +250 + + + +