diff --git a/.Rbuildignore b/.Rbuildignore
index 376e7b235..4aff343ea 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -22,3 +22,10 @@ inst/compare_lundberg\.xgb\.obj
 ^CRAN-SUBMISSION$
 ^.Rprofile
 ^python$
+^rebuild-long-running-vignette\.R$
+^vignettes/understanding_shapr_vaeac\.Rmd\.orig$
+^vignettes/understanding_shapr\.Rmd\.orig$
+^vignettes/figure_main/*$
+^vignettes/cache_main/*$
+^vignettes/figure_vaeac/*$
+^vignettes/cache_vaeac/*$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 39a2d8a60..e7cb7ae18 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -35,9 +35,10 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: macOS-latest,   r: 'release'}
-          - {os: windows-latest, r: 'release'}
-          - {os: ubuntu-20.04,   r: 'devel', http-user-agent: 'release'}
+# Temporary disable all but ubuntu release to reduce compute while debugging
+#          - {os: macOS-latest,   r: 'release'}
+#          - {os: windows-latest, r: 'release'}
+#          - {os: ubuntu-20.04,   r: 'devel', http-user-agent: 'release'}
           - {os: ubuntu-20.04,   r: 'release'}
 #          Temporary disable the below check plattforms as they fail due to a change in how R reports error from R<4.3 to R>=4.3,
 #          which gives a different output in the snapshots produced by testthat>=3.2.0
@@ -49,6 +50,9 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
+      - name: Set the value
+        run: echo "TORCH_INSTALL=1" >> "$GITHUB_ENV"
+
       - uses: actions/checkout@v2
 
       - uses: r-lib/actions/setup-pandoc@v2
@@ -64,6 +68,11 @@ jobs:
           extra-packages: any::rcmdcheck
           needs: check
 
+# Try to do this with the env variable TORCH_INSTALL instead to hopefully get setup-r-dependencies to cache the installation
+#      - name: Install torch dependencies
+#        run: torch::install_torch()
+#        shell: Rscript {0}
+
       - uses: r-lib/actions/check-r-package@v2
         with:
           args: 'c("--no-manual")' # I.e. exclude the as-cran comment here
diff --git a/DESCRIPTION b/DESCRIPTION
index f7994608d..dda067b2a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -12,6 +12,7 @@ Description: Complex machine learning models are often hard to interpret. Howeve
 Authors@R: c(
     person("Nikolai", "Sellereite", email = "nikolaisellereite@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-4671-0337")),
     person("Martin", "Jullum", email = "Martin.Jullum@nr.no", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-3908-5155")),
+    person("Lars Henry Berge", "Olsen", email = "lholsen@math.uio.no", role = "aut", comment = c(ORCID = "0009-0006-9360-6993")),
     person("Annabelle", "Redelmeier", email = "Annabelle.Redelmeier@nr.no", role = "aut"),
     person("Jon", "Lachmann", email = "Jon@lachmann.nu", role = "aut"),
     person("Anders", "Løland", email = "Anders.Loland@nr.no", role = "ctb"), 
@@ -26,14 +27,15 @@ Encoding: UTF-8
 LazyData: true
 ByteCompile: true
 Language: en-US
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Depends: R (>= 3.5.0)
 Imports: 
     stats,
     data.table,
     Rcpp (>= 0.12.15),
     Matrix,
-    future.apply
+    future.apply,
+    methods
 Suggests: 
     ranger,
     xgboost,
@@ -51,7 +53,11 @@ Suggests:
     future,
     ggbeeswarm,
     vdiffr,
-    forecast
+    forecast,
+    torch,
+    GGally,
+    progress,
+    coro
 LinkingTo: 
     RcppArmadillo,
     Rcpp
diff --git a/NAMESPACE b/NAMESPACE
index 6537b3bcb..d993c4525 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,6 +34,7 @@ S3method(prepare_data,empirical)
 S3method(prepare_data,gaussian)
 S3method(prepare_data,independence)
 S3method(prepare_data,timeseries)
+S3method(prepare_data,vaeac)
 S3method(print,shapr)
 S3method(setup_approach,categorical)
 S3method(setup_approach,combined)
@@ -43,6 +44,7 @@ S3method(setup_approach,empirical)
 S3method(setup_approach,gaussian)
 S3method(setup_approach,independence)
 S3method(setup_approach,timeseries)
+S3method(setup_approach,vaeac)
 export(aicc_full_single_cpp)
 export(compute_shapley_new)
 export(compute_vS)
@@ -61,6 +63,7 @@ export(hat_matrix_cpp)
 export(mahalanobis_distance_cpp)
 export(observation_impute_cpp)
 export(plot_MSEv_eval_crit)
+export(plot_SV_several_approaches)
 export(predict_model)
 export(prepare_data)
 export(prepare_data_copula_cpp)
@@ -69,32 +72,27 @@ export(rss_cpp)
 export(setup)
 export(setup_approach)
 export(setup_computation)
+export(vaeac_continue_train_model)
+export(vaeac_get_evaluation_criteria)
+export(vaeac_get_extra_para_default)
+export(vaeac_plot_evaluation_criteria)
+export(vaeac_plot_imputed_ggpairs)
+export(vaeac_train_model)
 export(weight_matrix_cpp)
 importFrom(Rcpp,sourceCpp)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)
-importFrom(data.table,between)
 importFrom(data.table,copy)
 importFrom(data.table,data.table)
-importFrom(data.table,fread)
-importFrom(data.table,fwrite)
 importFrom(data.table,is.data.table)
-importFrom(data.table,month)
 importFrom(data.table,rbindlist)
 importFrom(data.table,setcolorder)
 importFrom(data.table,setkey)
 importFrom(data.table,setnames)
-importFrom(data.table,uniqueN)
-importFrom(data.table,year)
 importFrom(graphics,hist)
-importFrom(graphics,plot)
-importFrom(graphics,rect)
 importFrom(stats,as.formula)
-importFrom(stats,contrasts)
 importFrom(stats,embed)
 importFrom(stats,formula)
-importFrom(stats,model.frame)
-importFrom(stats,model.matrix)
 importFrom(stats,predict)
 importFrom(stats,pt)
 importFrom(stats,qt)
@@ -103,5 +101,6 @@ importFrom(stats,sd)
 importFrom(stats,setNames)
 importFrom(utils,head)
 importFrom(utils,methods)
+importFrom(utils,modifyList)
 importFrom(utils,tail)
 useDynLib(shapr, .registration = TRUE)
diff --git a/R/approach.R b/R/approach.R
index 79ca95e1b..e0325ea3d 100644
--- a/R/approach.R
+++ b/R/approach.R
@@ -82,5 +82,5 @@ insert_defaults <- function(internal, defaults) {
 
 #' @keywords internal
 get_factor_approaches <- function() {
-  c("'independence' (not recommended)", "'ctree'", "'categorical'")
+  c("'independence' (not recommended)", "'ctree'", "'vaeac'", "'categorical'")
 }
diff --git a/R/approach_vaeac.R b/R/approach_vaeac.R
new file mode 100644
index 000000000..80ae6fbb9
--- /dev/null
+++ b/R/approach_vaeac.R
@@ -0,0 +1,2845 @@
+# SHAPR functions -------------------------------------------------------------------------------------------------
+#' @rdname setup_approach
+#'
+#' @param vaeac.depth Positive integer (default is `3`). The number of hidden layers
+#' in the neural networks of the masked encoder, full encoder, and decoder.
+#' @param vaeac.width Positive integer (default is `32`). The number of neurons in each
+#' hidden layer in the neural networks of the masked encoder, full encoder, and decoder.
+#' @param vaeac.latent_dim Positive integer (default is `8`). The number of dimensions in the latent space.
+#' @param vaeac.lr Positive numeric (default is `0.001`). The learning rate used in the [torch::optim_adam()] optimizer.
+#' @param vaeac.activation_function An [torch::nn_module()] representing an activation function such as, e.g.,
+#' [torch::nn_relu()] (default), [torch::nn_leaky_relu()], [torch::nn_selu()], or [torch::nn_sigmoid()].
+#' @param vaeac.n_vaeacs_initialize Positive integer (default is `4`). The number of different vaeac models to initiate
+#'  in the start. Pick the best performing one after `vaeac.extra_parameters$epochs_initiation_phase`
+#'  epochs (default is `2`) and continue training that one.
+#' @param vaeac.epochs Positive integer (default is `100`). The number of epochs to train the final vaeac model.
+#' This includes `vaeac.extra_parameters$epochs_initiation_phase`, where the default is `2`.
+#' @param vaeac.extra_parameters Named list with extra parameters to the `vaeac` approach. See
+#'  [shapr::vaeac_get_extra_para_default()] for description of possible additional parameters and their default values.
+#'
+#' @section The vaeac approach:
+#' The `vaeac` model consists of three neural network (a full encoder, a masked encoder, and a decoder) based
+#' on the provided `vaeac.depth` and `vaeac.width`. The encoders map the full and masked input
+#' representations to latent representations, respectively, where the dimension is given by `vaeac.latent_dim`.
+#' The latent representations are sent to the decoder to go back to the real feature space and
+#' provide a samplable probabilistic representation, from which the Monte Carlo samples are generated.
+#' We use the `vaeac` method at the epoch with the lowest validation error (IWAE) by default, but
+#' other possibilities are available but setting the `vaeac.which_vaeac_model` parameter. See
+#' \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)} for more details.
+#'
+#' @inheritParams default_doc_explain
+#'
+#' @export
+#' @author Lars Henry Berge Olsen
+setup_approach.vaeac <- function(internal, # add default values for vaeac here.
+                                 vaeac.depth = 3,
+                                 vaeac.width = 32,
+                                 vaeac.latent_dim = 8,
+                                 vaeac.activation_function = torch::nn_relu,
+                                 vaeac.lr = 0.001,
+                                 vaeac.n_vaeacs_initialize = 4,
+                                 vaeac.epochs = 100,
+                                 vaeac.extra_parameters = list(),
+                                 ...) {
+  # Check that torch is installed
+  if (!requireNamespace("torch", quietly = TRUE)) stop("`torch` is not installed. Please run install.packages('torch')")
+  if (!torch::torch_is_installed()) torch::install_torch()
+
+  # Extract the objects we will use later
+  S <- internal$objects$S
+  X <- internal$objects$X
+  parameters <- internal$parameters
+
+  # Small printout to user
+  if (parameters$verbose == 2) message("Starting 'setup_approach.vaeac'.")
+
+  # Check if we are doing a combination of approaches
+  combined_approaches <- length(internal$parameters$approach) > 1
+
+  # Ensure that `parameters$vaeac.extra_parameters` is a named list
+  if (is.null(parameters$vaeac.extra_parameters)) parameters$vaeac.extra_parameters <- list()
+  if (!is.list(parameters$vaeac.extra_parameters)) stop("`vaeac.extra_parameters` must be a list.")
+  if (length(parameters$vaeac.extra_parameters) > 0) vaeac_check_extra_named_list(parameters$vaeac.extra_parameters)
+
+  # Ensure that all vaeac parameters are in their right location
+  parameters <- vaeac_update_para_locations(parameters = parameters)
+
+  # Extract the default values defined for the vaeac parameters in this function
+  vaeac_main_para_names <- methods::formalArgs(setup_approach.vaeac)
+  vaeac_main_para_names <- vaeac_main_para_names[!vaeac_main_para_names %in% c("internal", "...")]
+  vaeac_main_para <- mget(vaeac_main_para_names)
+
+  # Add the default extra parameter values for the non-user specified extra parameters
+  parameters$vaeac.extra_parameters <- utils::modifyList(vaeac_get_extra_para_default(),
+    parameters$vaeac.extra_parameters,
+    keep.null = TRUE
+  )
+
+  # Add the default main parameter values for the non-user specified main parameters
+  parameters <- utils::modifyList(vaeac_main_para, parameters, keep.null = TRUE)
+
+  # Reorder them such that the vaeac parameters are at the end of the parameters list
+  parameters <- c(parameters[(length(vaeac_main_para) + 1):length(parameters)], parameters[seq_along(vaeac_main_para)])
+
+  # Check if vaeac is to be applied on a subset of coalitions.
+  if (!parameters$exact || parameters$is_groupwise || combined_approaches) {
+    # We have either:
+    # 1) sampled `n_combinations` different subsets of coalitions (i.e., not exact),
+    # 2) using the coalitions which respects the groups in group Shapley values, and/or
+    # 3) using a combination of approaches where vaeac is only used on a subset of the coalitions.
+    # Here, objects$S contains the coalitions while objects$X contains the information about the approach.
+
+    # Extract the the coalitions / masks which are estimated using vaeac as a matrix
+    parameters$vaeac.extra_parameters$vaeac.mask_gen_coalitions <-
+      S[X[approach == "vaeac"]$id_combination, , drop = FALSE]
+
+    # Extract the weights for the corresponding coalitions / masks.
+    parameters$vaeac.extra_parameters$vaeac.mask_gen_coalitions_prob <-
+      X$shapley_weight[X[approach == "vaeac"]$id_combination]
+
+    # Normalize the weights/probabilities such that they sum to one.
+    parameters$vaeac.extra_parameters$vaeac.mask_gen_coalitions_prob <-
+      parameters$vaeac.extra_parameters$vaeac.mask_gen_coalitions_prob /
+        sum(parameters$vaeac.extra_parameters$vaeac.mask_gen_coalitions_prob)
+  } else {
+    # We are going to use the MCAR(`masking_ratio`) masking scheme. Set the variables to `NULL` as we do not need them.
+    parameters$vaeac.mask_gen_coalitions <- parameters$vaeac.mask_gen_coalitions_prob <- NULL
+  }
+
+  # Check if user provided a pre-trained vaeac model, otherwise, we train one from scratch.
+  if (is.null(parameters$vaeac.extra_parameters$vaeac.pretrained_vaeac_model)) {
+    # We train a vaeac model with the parameters in `parameters`, as user did not provide pre-trained vaeac model
+    if (parameters$verbose == 2) message("Training a `vaeac` model with the provided parameters from scratch.")
+
+    # Specify that a vaeac model was NOT provided
+    parameters$vaeac.extra_parameters$vaeac.pretrained_vaeac_model_provided <- FALSE
+
+    # Extract all veaac parameters and remove the "vaeac." prefix as the names need to mach the parameters in "do.call"
+    vaeac_all_parameters <- c(
+      parameters$vaeac.extra_parameters,
+      parameters[vaeac_main_para_names[vaeac_main_para_names != "vaeac.extra_parameters"]]
+    )
+    names(vaeac_all_parameters) <- sub("vaeac\\.", "", names(vaeac_all_parameters))
+    vaeac_all_parameters <- c(vaeac_all_parameters, parameters[c("seed", "verbose")]) # Add seed and verbose
+
+    # Fit/train the vaeac model with the provided model parameters
+    vaeac_model <- do.call(vaeac_train_model, c(vaeac_all_parameters, list(x_train = internal$data$x_train)))
+
+    # Add this to the explainer object
+    parameters$vaeac <- list(
+      models = vaeac_model[1:(grep("train_vlb", names(vaeac_model)) - 1)], # Models are all entries before `train_vlb`
+      results = vaeac_model[c("train_vlb", "val_iwae", "val_iwae_running")], # The train & val results
+      parameters = vaeac_model$parameters # List of all the parameters used to train the vaeac model
+    )
+
+    # Add `vaeac` as a class to the object. We use this to validate the input when
+    # `vaeac.pretrained_vaeac_model` is given to the `shapr::explain()` function.
+    class(parameters$vaeac) <- c(class(parameters$vaeac), "vaeac")
+  } else {
+    # User provided a pre-trained vaeac model. (Minimal checking for valid vaeac model is conducted.)
+    # The pre-trained vaeac model is either:
+    # 1. The explanation$internal$parameters$vaeac list of type "vaeac" from an earlier call to explain().
+    # 2. A string containing the path to where the "vaeac" model is stored on disk.
+    if (parameters$verbose == 2) message("Loading the provided `vaeac` model.")
+
+    # Boolean representing that a pre-trained vaeac model was provided
+    parameters$vaeac.extra_parameters$vaeac.pretrained_vaeac_model_provided <- TRUE
+
+    # Check some aspects of the pre-trained vaeac model and add it to the parameters list if it passes the checks
+    parameters <- vaeac_update_pretrained_model(parameters = parameters)
+  }
+
+  # Get which vaeac model we are to use, load it and then store the checkpoint
+  checkpoint <- torch::torch_load(parameters$vaeac$models[[parameters$vaeac.extra_parameters$vaeac.which_vaeac_model]])
+  parameters$vaeac.checkpoint <- checkpoint
+
+  # Set up and store the vaeac model such that it is loaded before calling the `prepare_data.vaeac()` function.
+  parameters$vaeac.model <-
+    vaeac_get_model_from_checkp(checkpoint = checkpoint, cuda = checkpoint$cuda, mode_train = FALSE)
+
+  # Extract and save sampling method. That is, if we are to sample randomly from the inferred generative distributions
+  # or if we are to sample the most likely values (mean for cont and class with highest prob for cat features).
+  parameters$vaeac.sampler <- if (parameters$vaeac.extra_parameters$vaeac.sample_random) {
+    parameters$vaeac.model$sampler_random
+  } else {
+    parameters$vaeac.model$sampler_most_likely
+  }
+
+  # Update/overwrite the parameters list in the internal list.
+  internal$parameters <- parameters
+
+  # Small printout to user
+  if (parameters$verbose == 2) message("Done with 'setup_approach.vaeac'.\n")
+
+  # Return the updated internal list.
+  return(internal)
+}
+
+#' @inheritParams default_doc
+#'
+#' @rdname prepare_data
+#' @export
+#' @author Lars Henry Berge Olsen
+prepare_data.vaeac <- function(internal, index_features = NULL, ...) {
+  # If not provided, then set `index_features` to all non trivial coalitions
+  if (is.null(index_features)) index_features <- seq(2, internal$parameters$n_combinations - 1)
+
+  # Extract objects we are going to need later
+  S <- internal$objects$S
+  seed <- internal$parameters$seed
+  verbose <- internal$parameters$verbose
+  x_explain <- internal$data$x_explain
+  n_explain <- internal$parameters$n_explain
+  n_samples <- internal$parameters$n_samples
+  vaeac.model <- internal$parameters$vaeac.model
+  vaeac.sampler <- internal$parameters$vaeac.sampler
+  vaeac.checkpoint <- internal$parameters$vaeac.checkpoint
+  vaeac.batch_size_sampling <- internal$parameters$vaeac.extra_parameters$vaeac.batch_size_sampling
+
+  # Small printout to the user
+  if (verbose == 2) {
+    message(paste0(
+      "Working on batch ", internal$objects$X[id_combination == index_features[1]]$batch, " of ",
+      internal$parameters$n_batches, " in `prepare_data.vaeac()`."
+    ))
+  }
+
+  # Apply all coalitions to all explicands to get a data table where `vaeac` will impute the `NaN` values
+  x_explain_extended <-
+    vaeac_get_x_explain_extended(x_explain = x_explain, S = S, index_features = index_features)
+
+  # Set the number of observations do generate the MC samples for at the time.
+  n_explain_extended <- nrow(x_explain_extended)
+  batch_size <- if (is.null(vaeac.batch_size_sampling)) n_explain_extended else vaeac.batch_size_sampling
+  if (batch_size > n_explain_extended) batch_size <- n_explain_extended
+
+  # Impute the missing entries using the vaeac approach.
+  x_explain_with_MC_samples_dt <- vaeac_impute_missing_entries(
+    x_explain_with_NaNs = x_explain_extended,
+    n_explain = n_explain,
+    n_samples = n_samples,
+    vaeac_model = vaeac.model,
+    checkpoint = vaeac.checkpoint,
+    sampler = vaeac.sampler,
+    batch_size = batch_size,
+    verbose = verbose,
+    seed = seed,
+    index_features = index_features
+  )
+
+  # Return the generated conditional Monte Carlo samples
+  return(x_explain_with_MC_samples_dt)
+}
+
+
+# Train vaeac model ====================================================================================================
+#' Train the Vaeac Model
+#'
+#' @description Function that fits a vaeac model to the given dataset based on the provided parameters,
+#' as described in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}. Note that
+#' all default parameters specified below origin from [shapr::setup_approach.vaeac()] and
+#' [shapr::vaeac_get_extra_para_default()].
+#'
+#' @details
+#' The vaeac model consists of three neural networks, i.e., a masked encoder, a full encoder, and a decoder.
+#' The networks have shared `depth`, `width`, and `activation_function`. The encoders maps the `x_train`
+#' to a latent representation of dimension `latent_dim`, while the decoder maps the latent representations
+#' back to the feature space. See \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}
+#' for more details. The function first initiates `n_vaeacs_initialize` vaeac models with different randomly
+#' initiated network parameter values to remedy poorly initiated values. After `epochs_initiation_phase` epochs, the
+#' `n_vaeacs_initialize` vaeac models are compared and the function continues to only train the best performing
+#' one for a total of `epochs` epochs. The networks are trained using the ADAM optimizer with the learning rate is `lr`.
+#'
+#' @param depth Positive integer (default is `3`). The number of hidden layers
+#' in the neural networks of the masked encoder, full encoder, and decoder.
+#' @param width Positive integer (default is `32`). The number of neurons in each
+#' hidden layer in the neural networks of the masked encoder, full encoder, and decoder.
+#' @param latent_dim Positive integer (default is `8`). The number of dimensions in the latent space.
+#' @param lr Positive numeric (default is `0.001`). The learning rate used in the [torch::optim_adam()] optimizer.
+#' @param activation_function An [torch::nn_module()] representing an activation function such as, e.g.,
+#' [torch::nn_relu()] (default), [torch::nn_leaky_relu()], [torch::nn_selu()], or [torch::nn_sigmoid()].
+#' @param n_vaeacs_initialize Positive integer (default is `4`). The number of different vaeac models to initiate
+#'  in the start. Pick the best performing one after `epochs_initiation_phase`
+#'  epochs (default is `2`) and continue training that one.
+#' @param epochs Positive integer (default is `100`). The number of epochs to train the final vaeac model.
+#' This includes `epochs_initiation_phase`, where the default is `2`.
+#' @param x_train A data.table containing the training data. Categorical data must have class names \eqn{1,2,\dots,K}.
+#' @param model_description String (default is `make.names(Sys.time())`). String containing, e.g., the name of the
+#' data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+#' then a name will be generated based on [base::Sys.time()] to ensure a unique name. We use [base::make.names()] to
+#' ensure a valid file name for all operating systems.
+#' @param folder_to_save_model String (default is [base::tempdir()]). String specifying a path to a folder where
+#' the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+#' [shapr::explain()] object if `vaeac.save_model = FALSE`.
+#' @param cuda cuda Logical (default is `FALSE`). If `TRUE`, then the `vaeac` model will be trained using cuda/GPU.
+#' If [torch::cuda_is_available()] is `FALSE`, the we fall back to use CPU. If `FALSE`, we use the CPU. Often this is
+#' faster for tabular data sets. Note, cuda is not not supported in the current version of the `shapr` package.
+#' TODO: Update this when this is done.
+#' @param epochs_initiation_phase Positive integer (default is `2`). The number of epochs to run each of the
+#' `n_vaeacs_initialize` `vaeac` models before continuing to train only the best performing model.
+#' @param epochs_early_stopping Positive integer (default is `NULL`). The training stops if there has been no
+#' improvement in the validation IWAE for `epochs_early_stopping` epochs. If the user wants the training process
+#' to be solely based on this training criterion, then `epochs` in [shapr::explain()] should be set to a large
+#' number. If `NULL`, then `shapr` will internally set `epochs_early_stopping = vaeac.epochs` such that early
+#' stopping does not occur.
+#' @param save_every_nth_epoch Positive integer (default is `NULL`). If provided, then the vaeac model after
+#' every `save_every_nth_epoch`th epoch will be saved.
+#' @param val_ratio Numeric (default is `0.25`). Scalar between `0` and `1` indicating the ratio of
+#' instances from the input data which will be used as validation data. That is, `val_ratio = 0.25` means
+#' that `75%` of the provided data is used as training data, while the remaining `25%` is used as validation data.
+#' @param val_iwae_n_samples Positive integer (default is `25`). The number of generated samples used
+#' to compute the IWAE criterion when validating the vaeac model on the validation data.
+#' @param batch_size Positive integer (default is `64`). The number of samples to include in each batch
+#' during the training of the vaeac model. Used in [torch::dataloader()].
+#' @param skip_conn_layer Logical (default is `TRUE`). If `TRUE`, we apply identity skip connections in each
+#' layer, see [shapr::SkipConnection()]. That is, we add the input \eqn{X} to the outcome of each hidden layer,
+#' so the output becomes \eqn{X + activation(WX + b)}.
+#' @param skip_conn_masked_enc_dec Logical (default is `TRUE`). If `TRUE`, we apply concatenate skip
+#' connections between the layers in the masked encoder and decoder. The first layer of the masked encoder will be
+#' linked to the last layer of the decoder. The second layer of the masked encoder will be
+#' linked to the second to last layer of the decoder, and so on.
+#' @param batch_normalization Logical (default is `FALSE`). If `TRUE`, we apply batch normalization after the
+#' activation function. Note that if `skip_conn_layer = TRUE`, then the normalization is applied after the
+#' inclusion of the skip connection. That is, we batch normalize the whole quantity \eqn{X + activation(WX + b)}.
+#' @param paired_sampling Logical (default is `TRUE`). If `TRUE`, we apply paired sampling to the training
+#' batches. That is, the training observations in each batch will be duplicated, where the first instance will be masked
+#' by \eqn{S} while the second instance will be masked by \eqn{\bar{S}}. This ensures that the training of the
+#' `vaeac` model becomes more stable as the model has access to the full version of each training observation. However,
+#' this will increase the training time due to more complex implementation and doubling the size of each batch. See
+#' [shapr::paired_sampler()] for more information.
+#' @param running_avg_n_values running_avg_n_values Positive integer (default is `5`).
+#' The number of previous IWAE values to include
+#' when we compute the running means of the IWAE criterion.
+#' @param masking_ratio Numeric (default is `0.5`). Probability of masking a feature in the
+#' [shapr::MCAR_mask_generator()] (MCAR = Missing Completely At Random). The MCAR masking scheme ensures that `vaeac`
+#' model can do arbitrary conditioning as all coalitions will be trained. `masking_ratio` will be overruled if
+#' `mask_gen_coalitions` is specified.
+#' @param mask_gen_coalitions Matrix (default is `NULL`). Matrix containing the coalitions that the
+#' `vaeac` model will be trained on, see [shapr::Specified_masks_mask_generator()]. This parameter is used internally
+#' in `shapr` when we only consider a subset of coalitions/combinations, i.e., when
+#' `n_combinations` \eqn{< 2^{n_{\text{features}}}}, and for group Shapley, i.e.,
+#' when `group` is specified in [shapr::explain()].
+#' @param mask_gen_coalitions_prob Numeric array (default is `NULL`). Array of length equal to the height
+#' of `mask_gen_coalitions` containing the probabilities of sampling the corresponding coalitions in
+#' `mask_gen_coalitions`.
+#' @param sigma_mu Numeric (default is `1e4`). One of two hyperparameter values in the normal-gamma prior
+#' used in the masked encoder, see Section 3.3.1 in
+#' \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#' @param sigma_sigma Numeric (default is `1e-4`). One of two hyperparameter values in the normal-gamma prior
+#' used in the masked encoder, see Section 3.3.1 in
+#' \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#' @param save_data Logical (default is `FALSE`). If `TRUE`, then the data is stored together with
+#' the model. Useful if one are to continue to train the model later using [shapr::vaeac_continue_train_model()].
+#' @param log_exp_cont_feat Logical (default is `FALSE`). If we are to \eqn{\log} transform all
+#' continuous features before sending the data to [shapr::vaeac()]. The `vaeac` model creates unbounded Monte Carlo
+#' sample values. Thus, if the continuous features are strictly positive (as for, e.g., the Burr distribution and
+#' Abalone data set), it can be advantageous to \eqn{\log} transform the data to unbounded form before using `vaeac`.
+#' If `TRUE`, then [shapr::vaeac_postprocess_data()] will take the \eqn{\exp} of the results to get back to strictly
+#' positive values when using the `vaeac` model to impute missing values/generate the Monte Carlo samples.
+#' @param verbose Boolean. An integer specifying the level of verbosity. Use `0` (default) for no verbosity,
+#' `1` for low verbose, and `2` for high verbose.
+#' @param seed Positive integer (default is `1`). Seed for reproducibility. Specifies the seed before any randomness
+#' based code is being run.
+#' @param which_vaeac_model String (default is `best`). The name of the `vaeac` model (snapshots from different
+#' epochs) to use when generating the Monte Carlo samples. The standard choices are: `"best"` (epoch with lowest IWAE),
+#' `"best_running"` (epoch with lowest running IWAE, see `vaeac.running_avg_n_values`), and `last` (the last epoch).
+#' Note that additional choices are available if `vaeac.save_every_nth_epoch` is provided. For example, if
+#' `vaeac.save_every_nth_epoch = 5`, then `vaeac.which_vaeac_model` can also take the values `"epoch_5"`, `"epoch_10"`,
+#' `"epoch_15"`, and so on.
+#' @param ... List of extra parameters, currently not used.
+#'
+#' @return A list containing the training/validation errors and paths to where the vaeac models are saved on the disk.
+#' @export
+#' @author Lars Henry Berge Olsen
+vaeac_train_model <- function(x_train,
+                              model_description,
+                              folder_to_save_model,
+                              cuda,
+                              n_vaeacs_initialize,
+                              epochs_initiation_phase,
+                              epochs,
+                              epochs_early_stopping,
+                              save_every_nth_epoch,
+                              val_ratio,
+                              val_iwae_n_samples,
+                              depth,
+                              width,
+                              latent_dim,
+                              lr,
+                              batch_size,
+                              running_avg_n_values,
+                              activation_function,
+                              skip_conn_layer,
+                              skip_conn_masked_enc_dec,
+                              batch_normalization,
+                              paired_sampling,
+                              masking_ratio,
+                              mask_gen_coalitions,
+                              mask_gen_coalitions_prob,
+                              sigma_mu,
+                              sigma_sigma,
+                              save_data,
+                              log_exp_cont_feat,
+                              which_vaeac_model,
+                              verbose,
+                              seed,
+                              ...) {
+  # Set seed for reproducibility for both R and torch
+  set.seed(seed)
+  torch::torch_manual_seed(seed)
+
+  # Set epochs_early_stopping to epochs to ensure that early stopping never occurs
+  if (is.null(epochs_early_stopping)) epochs_early_stopping <- epochs
+
+  # Check all the vaeac parameters
+  do.call(vaeac_check_parameters, mget(methods::formalArgs(vaeac_train_model)))
+
+  # Check if we can use cuda
+  if (cuda) cuda <- vaeac_check_cuda(cuda)
+
+  # Determine which mask generator to use
+  mask_generator_name <- vaeac_get_mask_generator_name(
+    mask_gen_coalitions = mask_gen_coalitions,
+    mask_gen_coalitions_prob = mask_gen_coalitions_prob,
+    masking_ratio = masking_ratio,
+    verbose = verbose
+  )
+
+  # Set up the data loaders and get the save file names and load them into the local environment
+  list2env(
+    vaeac_get_data_objects(
+      x_train = x_train,
+      log_exp_cont_feat = log_exp_cont_feat,
+      val_ratio = val_ratio,
+      batch_size = batch_size,
+      paired_sampling = paired_sampling,
+      model_description = model_description,
+      depth = depth,
+      width = width,
+      latent_dim = latent_dim,
+      lr = lr,
+      epochs = epochs,
+      save_every_nth_epoch = save_every_nth_epoch,
+      folder_to_save_model = folder_to_save_model,
+      train_indices = NULL,
+      val_indices = NULL
+    ),
+    envir = environment()
+  )
+
+  # Get information saved together with the vaeac model to make it possible to load the model from disk later.
+  # Note that some of the parameters could be derived from others, but for simplicity we store all needed objects.
+  state_list <- vaeac_get_full_state_list(environment())
+
+  # Check if we are to add the training data to the state list
+  if (save_data) state_list <- c(state_list, list(x_train = x_train, x_train_torch = x_train_torch))
+
+  ## Initializing vaeac models
+  # Initialize several vaeac models and keep the one with the best training variational lower bound
+  # after a given number of epochs. Keep the version with highest vlb, denoted by "best_vlb".
+  best_vlb <- -Inf
+
+  # Create a `progressr::progressor()` to keep track of the overall training time of the vaeac approach
+  progressr_bar <- progressr::progressor(steps = epochs_initiation_phase * (n_vaeacs_initialize - 1) + epochs)
+
+  # Iterate over the initializations.
+  initialization_idx <- 1
+  for (initialization_idx in seq(n_vaeacs_initialize)) {
+    # Initialize a new vaeac model
+    vaeac_model <- vaeac(
+      one_hot_max_sizes = one_hot_max_sizes,
+      width = width,
+      depth = depth,
+      latent_dim = latent_dim,
+      activation_function = activation_function,
+      skip_conn_layer = skip_conn_layer,
+      skip_conn_masked_enc_dec = skip_conn_masked_enc_dec,
+      batch_normalization = batch_normalization,
+      paired_sampling = paired_sampling,
+      mask_generator_name = mask_generator_name,
+      masking_ratio = masking_ratio,
+      mask_gen_coalitions = mask_gen_coalitions,
+      mask_gen_coalitions_prob = mask_gen_coalitions_prob,
+      sigma_mu = sigma_mu,
+      sigma_sigma = sigma_sigma
+    )
+
+    # TODO: we need to check this + we need to send the data too
+    # Send the model to the GPU, if we have access to it.
+    if (cuda) vaeac_model <- vaeac_model$cuda()
+
+    # Add the number of trainable parameters in the vaeac model to the state list
+    if (initialization_idx == 1) {
+      state_list$n_trainable_parameters <- vaeac_model$n_train_param
+      if (verbose == 2) {
+        message(paste0("The vaeac model contains ", vaeac_model$n_train_param[1, 1], " trainable parameters."))
+      }
+    }
+
+    # Print which initialization vaeac the function is working on
+    if (verbose == 2) {
+      message(paste0("Initializing vaeac number ", initialization_idx, " of ", n_vaeacs_initialize, "."))
+    }
+
+    # Create the ADAM optimizer
+    optimizer <- vaeac_get_optimizer(vaeac_model = vaeac_model, lr = lr, optimizer_name = "adam")
+
+    # Train the current initialized vaeac model
+    vaeac_model_now_list <- vaeac_train_model_auxiliary(
+      vaeac_model = vaeac_model,
+      optimizer = optimizer,
+      epochs = epochs_initiation_phase,
+      epochs_start = 1, # All the vaeacs should start from scratch
+      train_dataloader = train_dataloader,
+      val_dataloader = val_dataloader,
+      val_iwae_n_samples = val_iwae_n_samples,
+      running_avg_n_values = running_avg_n_values,
+      epochs_early_stopping = FALSE, # Do not want to do early stopping during initialization
+      verbose = verbose,
+      cuda = cuda,
+      progressr_bar = progressr_bar,
+      save_every_nth_epoch = save_every_nth_epoch,
+      initialization_idx = initialization_idx,
+      n_vaeacs_initialize = n_vaeacs_initialize,
+      train_vlb = NULL, # We start from scratch
+      val_iwae = NULL, # We start from scratch
+      val_iwae_running = NULL # We start from scratch
+    )
+
+    # If the new initialization have lower training VLB than previous initializations, then we keep it.
+    if ((best_vlb <= vaeac_model_now_list$avg_vlb)$item()) {
+      vaeac_model_best_list <- vaeac_model_now_list
+    }
+  } # Done with initial training of all vaeac models
+
+  # Send the model to the GPU, if we have access to it.
+  # TODO: Check that this when we get access to GPU
+  if (cuda) vaeac_model_best_list$model <- vaeac_model_best_listmodel$cuda()
+
+  # Check if we are printing detailed debug information
+  # Small printout to the user stating which initiated vaeac model was the best.
+  if (verbose == 2) {
+    message(paste0(
+      "Best vaeac inititalization was number ", vaeac_model_best_list$initialization_idx, " (of ", n_vaeacs_initialize,
+      ") with a training VLB = ", round(as.numeric(vaeac_model_best_list$train_vlb[-1]), 3), " after ",
+      epochs_initiation_phase, " epochs. Continue to train this inititalization."
+    ))
+  }
+
+  return_list <- vaeac_train_model_auxiliary(
+    vaeac_model = vaeac_model_best_list$vaeac_model,
+    optimizer = vaeac_model_best_list$optimizer,
+    train_dataloader = train_dataloader,
+    val_dataloader = val_dataloader,
+    val_iwae_n_samples = val_iwae_n_samples,
+    running_avg_n_values = running_avg_n_values,
+    verbose = verbose,
+    cuda = cuda,
+    progressr_bar = progressr_bar,
+    epochs = epochs,
+    epochs_start = epochs_initiation_phase + 1,
+    epochs_early_stopping = epochs_early_stopping,
+    save_every_nth_epoch = save_every_nth_epoch,
+    vaeac_save_file_names = vaeac_save_file_names, # Provide the save names for the models
+    state_list = state_list, # Need to provide the state list as it will be saved together with the models
+    initialization_idx = NULL, # Do not need to specify it as we are not doing the initialization now
+    n_vaeacs_initialize = NULL, # Do not need to specify it as we are not doing the initialization now
+    train_vlb = vaeac_model_best_list$train_vlb, # Send in the array from the best initiated vaeac model
+    val_iwae = vaeac_model_best_list$val_iwae,
+    val_iwae_running = vaeac_model_best_list$val_iwae_running
+  )
+
+  # Return the paths where the models are saved and the training/validation errors.
+  return(return_list)
+}
+
+#' Continue to Train the vaeac Model
+#'
+#' @description Function that loads a previously trained vaeac model and continue the training, either
+#' on new data or on the same dataset as it was trained on before. If we are given a new dataset, then
+#' we assume that new dataset has the same distribution and one_hot_max_sizes as the original dataset.
+#'
+#' @inheritParams vaeac_train_model
+#' @param explanation A [shapr::explain()] object and `vaeac` must be the used approach.
+#' @param epochs_new Positive integer. The number of extra epochs to conduct.
+#' @param lr_new Positive numeric. If we are to overwrite the old learning rate in the adam optimizer.
+#'
+#' @return A list containing the training/validation errors and paths to where the vaeac models are saved on the disk.
+#' @export
+#' @author Lars Henry Berge Olsen
+vaeac_continue_train_model <- function(explanation,
+                                       epochs_new,
+                                       lr_new = NULL,
+                                       x_train = NULL,
+                                       save_data = FALSE,
+                                       verbose = 0,
+                                       seed = 1) {
+  # Check the input
+  if (!"shapr" %in% class(explanation)) stop("`explanation` must be a list of class `shapr`.")
+  if (!"vaeac" %in% explanation$internal$parameters$approach) stop("`vaeac` is not an approach in `explanation`.")
+  if (!is.null(lr_new)) vaeac_check_positive_numerics(list(lr_new = lr_new))
+  if (!is.null(x_train) && !data.table::is.data.table(x_train)) stop("`x_train` must be a `data.table` object.")
+  vaeac_check_verbose(verbose)
+  vaeac_check_positive_integers(list(epochs_new = epochs_new, seed = seed))
+  vaeac_check_logicals(list(save_data = save_data))
+
+  # Set seed for reproducibility
+  set.seed(seed)
+
+  # Extract the vaeac list and load the model at the last epoch
+  vaeac_model <- explanation$internal$parameters$vaeac
+  checkpoint <- torch::torch_load(vaeac_model$models$last)
+
+  # If we applied early stopping before and are calling this function, then we turn early stopping off
+  if (isTRUE(checkpoint$early_stopping_applied)) checkpoint$epochs_early_stopping <- epochs_new
+
+  # Check for access to a single training data set and use the data from the checkpoint if `x_train` is not provided
+  if (is.null(checkpoint$normalized_data) && is.null(x_train)) {
+    stop("The `vaeac` model did not include data (set `vaeac.save_data = TRUE in `explain()`) and `x_train = NULL`.")
+  }
+  if (!is.null(checkpoint$x_train) && !is.null(x_train)) {
+    message("The `vaeac` model includes data and `x_train` was provided to this function. We only use `x_train`.")
+  }
+  if (is.null(x_train)) x_train <- checkpoint$x_train
+
+  # Check that the provided vaeac model is trained on a dataset with the same feature names
+  vaeac_check_x_train_names(feature_names_vaeac = checkpoint$feature_list$labels, feature_names_new = names(x_train))
+
+  # Check if we can reuse the original validation and training indices
+  if (!is.null(checkpoint$x_train) || nrow(x_train) == checkpoint$n_train) {
+    val_indices <- checkpoint$val_indices
+    train_indices <- checkpoint$train_indices
+  } else {
+    val_indices <- train_indices <- NULL
+  }
+
+  # Set up the data loaders and get the save file names and load them into the local environment
+  list2env(
+    vaeac_get_data_objects(
+      x_train = x_train,
+      log_exp_cont_feat = checkpoint$log_exp_cont_feat,
+      val_ratio = checkpoint$val_ratio,
+      batch_size = checkpoint$batch_size,
+      paired_sampling = checkpoint$paired_sampling,
+      model_description = checkpoint$ model_description,
+      depth = checkpoint$depth,
+      width = checkpoint$width,
+      latent_dim = checkpoint$latent_dim,
+      lr = checkpoint$lr, # Use the old one as this parameter is used in the filenames
+      epochs = checkpoint$epochs + epochs_new,
+      save_every_nth_epoch = checkpoint$save_every_nth_epoch,
+      folder_to_save_model = checkpoint$folder_to_save_model,
+      train_indices = train_indices,
+      val_indices = val_indices
+    ),
+    envir = environment()
+  )
+
+  # List to values saved to disk together with the vaeac models below.
+  state_list_new <- list(
+    norm_mean = as.array(x_train_preprocessed$norm_mean),
+    norm_std = as.array(x_train_preprocessed$norm_std),
+    n_train = n_train,
+    epochs_new = epochs_new,
+    train_indices = train_indices,
+    val_indices = val_indices,
+    lr_new = lr_new
+  )
+
+  # If we are also to save the data to state_list.
+  if (save_data) {
+    state_list_new <- c(state_list_new, list(x_train = x_train, x_train_torch = x_train_torch))
+
+    # Give a message regarding disk usage
+    vaeac_check_save_parameters(
+      save_data = save_data,
+      epochs = epochs_new,
+      save_every_nth_epoch = checkpoint$save_every_nth_epoch,
+      x_train_size = format(utils::object.size(x_train), units = "auto")
+    )
+  }
+
+  # Add the new state list as a list to the checkpoint
+  n_times_continued_trained <- sum(grepl("state_list_new", names(checkpoint)))
+  state_list_new_name <- paste("state_list_new", n_times_continued_trained + 1, sep = "_")
+  state_list <- checkpoint
+  state_list[[state_list_new_name]] <- state_list_new
+
+  # Set up the vaeac model in training mode and based on the parameters stored in the checkpoint
+  vaeac_model <- vaeac_get_model_from_checkp(checkpoint = checkpoint, cuda = checkpoint$cuda, mode_train = TRUE)
+
+  # Specify the learning rate we will use, create the an adam optimizer, and insert the stored optimizer state.
+  lr_now <- if (!is.null(lr_new)) lr_new else checkpoint$lr
+  optimizer <- vaeac_get_optimizer(vaeac_model = vaeac_model, lr = lr_now, optimizer_name = "adam")
+  optimizer$load_state_dict(checkpoint$optimizer_state_dict)
+
+  # Compute the new number of epochs
+  epochs_old <- checkpoint$epochs
+  epochs <- epochs_old + epochs_new
+  state_list$epochs <- epochs
+
+  # Create a `progressr::progressor()` to keep track of the new training
+  progressr_bar <- progressr::progressor(steps = epochs_new)
+
+  # Train the vaeac model for `epochs_new` number of epochs
+  vaeac_tmp <- vaeac_train_model_auxiliary(
+    vaeac_model = vaeac_model,
+    optimizer = optimizer,
+    train_dataloader = train_dataloader,
+    val_dataloader = val_dataloader,
+    val_iwae_n_samples = checkpoint$val_iwae_n_samples,
+    running_avg_n_values = checkpoint$running_avg_n_values,
+    verbose = verbose,
+    cuda = checkpoint$cuda,
+    progressr_bar = progressr_bar,
+    epochs = epochs,
+    epochs_start = epochs_old + 1,
+    epochs_early_stopping = checkpoint$epochs_early_stopping,
+    save_every_nth_epoch = checkpoint$save_every_nth_epoch,
+    vaeac_save_file_names = vaeac_save_file_names, # Provide the save names for the models
+    state_list = state_list, # Need to provide the state list as it will be saved together with the models
+    initialization_idx = NULL, # Do not need to specify it as we are not doing the initialization now
+    n_vaeacs_initialize = NULL, # Do not need to specify it as we are not doing the initialization now
+    train_vlb = checkpoint$train_vlb,
+    val_iwae = checkpoint$val_iwae,
+    val_iwae_running = checkpoint$val_iwae_running
+  )
+
+  # Create the return list
+  return_list <- list(
+    models = vaeac_tmp[1:(grep("train_vlb", names(vaeac_tmp)) - 1)], # Models are all entries before `train_vlb`
+    results = vaeac_tmp[c("train_vlb", "val_iwae", "val_iwae_running")], # The train & val results
+    parameters = vaeac_tmp$parameters # List of all the parameters used to train the vaeac model
+  )
+
+  # Add `vaeac` as a class to the object. We use this to validate the input when
+  # `vaeac.pretrained_vaeac_model` is given to the `shapr::explain()` function.
+  class(return_list) <- c(class(return_list), "vaeac")
+
+  # Return the paths where the models are saved and the training/validation errors.
+  return(return_list)
+}
+
+
+# Compute Imputations ==================================================================================================
+#' Impute Missing Values Using Vaeac
+#'
+#' @details  Function that imputes the missing values in 2D matrix where each row constitute an individual.
+#' The values are sampled from the conditional distribution estimated by a vaeac model.
+#'
+#' @inheritParams vaeac_train_model
+#' @param x_explain_with_NaNs A 2D matrix, where the missing entries to impute are represented by `NaN`.
+#' @param n_samples Integer. The number of imputed versions we create for each row in `x_explain_with_NaNs`.
+#' @param index_features Optional integer vector. Used internally in shapr package to index the coalitions.
+#' @param n_explain Positive integer. The number of explicands.
+#' @param vaeac_model An initialized `vaeac` model that we are going to use to generate the MC samples.
+#' @param checkpoint List containing the parameters of the `vaeac` model.
+#' @param sampler A sampler object used to sample the MC samples.
+#'
+#' @return A data.table where the missing values (`NaN`) in `x_explain_with_NaNs` have been imputed `n_samples` times.
+#' The data table will contain extra id columns if `index_features` and `n_explain` are provided.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_impute_missing_entries <- function(x_explain_with_NaNs,
+                                         n_samples,
+                                         vaeac_model,
+                                         checkpoint,
+                                         sampler,
+                                         batch_size,
+                                         verbose = 0,
+                                         seed = NULL,
+                                         n_explain = NULL,
+                                         index_features = NULL) {
+  # We only need `n_explain` when `index_features` is provided
+  if (xor(is.null(index_features), is.null(n_explain))) {
+    stop("Either none or both of `index_features` and `n_explain` must be given.")
+  }
+
+  # Set seed for reproducibility if provided by the user. Both in R and torch.
+  if (!is.null(seed)) {
+    set.seed(seed)
+    torch::torch_manual_seed(seed)
+  }
+
+  if (verbose == 2) message("Preprocessing the explicands.")
+
+  # Preprocess `x_explain_with_NaNs`. Turn factor names into numerics 1,2,...,K, (vaeac only accepts numerics) and keep
+  # track of the maping of names. Optionally log-transform the continuous features. Then, finally, normalize the data
+  # using the training means and standard deviations. I.e., we assume that the new data follow the same distribution as
+  # the training data. If this is NOT the case, then vaeac will generate unreasonable imputations.
+  x_explain_with_NaNs_processed <- vaeac_preprocess_data(
+    data = x_explain_with_NaNs,
+    log_exp_cont_feat = checkpoint$log_exp_cont_feat,
+    normalize = TRUE,
+    norm_mean = checkpoint$norm_mean, # Normalize using training data means
+    norm_std = checkpoint$norm_std # Normalize using training data standard deviations
+  )$data_normalized_torch
+
+  # Create the data set object
+  dataset <- vaeac_dataset(X = x_explain_with_NaNs_processed, one_hot_max_sizes = checkpoint$one_hot_max_sizes)
+
+  # Create a data loader that load/iterate over the data set in chronological order.
+  dataloader <- torch::dataloader(dataset = dataset, batch_size = batch_size, shuffle = FALSE)
+
+  if (verbose == 2) message("Generating the MC samples.")
+
+  # Create an auxiliary list of lists to store the imputed values combined with the original values. The structure is
+  # [[i'th MC sample]][[b'th batch]], where the entries are tensors of dimension batch_size x n_features.
+  results <- lapply(seq(n_samples), function(k) list())
+
+  # Generate the conditional Monte Carlo samples for the observation `x_explain_with_NaNs`, one batch at the time.
+  coro::loop(for (batch in dataloader) {
+    # Make a deep copy of the batch and detach it from graph.
+    batch_extended <- batch$clone()$detach()
+
+    # If batch size is less than batch_size, extend it with objects from the beginning of the dataset.
+    if (batch_extended$shape[1] < batch_size) {
+      batch_extended <- vaeac_extend_batch(batch = batch_extended, dataloader = dataloader, batch_size = batch_size)
+    }
+
+    # Send the original and extended batch to GPU if applicable.
+    if (checkpoint$cuda) {
+      batch <- batch$cuda()
+      batch_extended <- batch_extended$cuda()
+    }
+
+    # Compute the imputation mask, i.e., which entries we are to impute.
+    mask_extended <- torch::torch_isnan(batch_extended)$to(dtype = torch::torch_float())
+
+    # Do not need to keep track of the gradients, as we are not fitting the model.
+    torch::with_no_grad({
+      # Compute the distribution parameters for the generative models inferred by the masked encoder and decoder.
+      # This is a tensor of shape [batch_size, n_samples, n_generative_parameters]. Note that, for only continuous
+      # features we have that n_generative_parameters = 2*n_features, but for categorical data the number depends
+      # on the number of categories.
+      samples_params <- vaeac_model$generate_samples_params(batch = batch_extended, mask = mask_extended, K = n_samples)
+
+      # Remove the parameters belonging to added instances in batch_extended.
+      samples_params <- samples_params[1:batch$shape[1], , ]
+    })
+
+    # Make a deep copy of the batch with missing values set to zero.
+    mask <- torch::torch_isnan(batch)
+    batch_zeroed_nans <- batch$clone()$detach()
+    batch_zeroed_nans[mask] <- 0
+
+    # Iterate over the number of imputations and generate the imputed samples
+    for (i in seq(n_samples)) {
+      # Extract the i'th inferred generative parameters for the whole batch.
+      # sample_params is a tensor of shape [batch_size, n_generative_parameters].
+      sample_params <- samples_params[, i, ]
+
+      # Generate the imputations using the generative distributions inferred by the decoder.
+      sample <- sampler(sample_params)
+
+      # Set the imputations for features in S (known features) to zero, as we do not need to generate them.
+      sample[torch::torch_logical_not(mask)] <- 0
+
+      # Combine the imputations with the original data to fill in the missing values. Shape is [batch_size, n_features].
+      sample <- sample + batch_zeroed_nans
+
+      # Make a deep copy and add it to correct location in the results list.
+      results[[i]] <- append(results[[i]], sample$clone()$detach()$cpu())
+    } # End of iterating over the n_samples
+  }) # End of iterating over the batches. Done imputing.
+
+  if (verbose == 2) message("Postprocessing the Monte Carlo samples.")
+
+  # Order the MC samples into a tensor of shape [nrow(x_explain_with_NaNs), n_samples, n_features]. The lapply function
+  # creates a list of tensors of shape [nrow(x_explain_with_NaNs), 1, n_features] by concatenating the batches for the
+  # i'th MC sample to a tensor of shape [nrow(x_explain_with_NaNs), n_features] and then add unsqueeze to add a new
+  # singleton dimension as the second dimension to get the shape [nrow(x_explain_with_NaNs), 1, n_features]. Then
+  # outside of the lapply function, we concatenate the n_samples torch elements to form a final torch result of shape
+  # [nrow(x_explain_with_NaNs), n_samples, n_features].
+  result <- torch::torch_cat(lapply(seq(n_samples), function(i) torch::torch_cat(results[[i]])$unsqueeze(2)), dim = 2)
+
+  # Get back to the original distribution by undoing the normalization by multiplying with the std and adding the mean
+  result <- result * checkpoint$norm_std + checkpoint$norm_mean
+
+  # Convert from a tensor of shape [nrow(x_explain_with_NaNs), n_samples, n_features]
+  # to a matrix of shape [(nrow(x_explain_with_NaNs) * n_samples), n_features].
+  result <- data.table::as.data.table(as.matrix(result$view(c(
+    result$shape[1] * result$shape[2],
+    result$shape[3]
+  ))$detach()$cpu()))
+
+  # Post-process the data such that categorical features have original level names and convert to a data table.
+  result <- vaeac_postprocess_data(data = result, vaeac_model_state_list = checkpoint)
+
+  # If user provide `index_features`, then we add columns needed for shapr computations
+  if (!is.null(index_features)) {
+    # Add id, id_combination and weights (uniform for the `vaeac` approach) to the result.
+    result[, c("id", "id_combination", "w") := list(
+      rep(x = seq(n_explain), each = length(index_features) * n_samples),
+      rep(x = index_features, each = n_samples, times = n_explain),
+      1 / n_samples
+    )]
+
+    # Set the key in the data table
+    data.table::setkeyv(result, c("id", "id_combination"))
+  }
+
+  return(result)
+}
+
+# Check functions ======================================================================================================
+#' Check vaeac.extra_parameters list
+#'
+#' @param vaeac.extra_parameters List containing the extra parameters to the `vaeac` approach
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac_check_extra_named_list <- function(vaeac.extra_parameters) {
+  names <- names(vaeac.extra_parameters)
+  if (is.null(names)) stop("The parameter `vaeac.extra_parameters` is not a named list.")
+  if (any(names == "")) stop("Not all parameters in the list `vaeac.extra_parameters` are named.")
+}
+
+#' Function that checks positive integers
+#'
+#' @param named_list_positive_integers List containing named entries. I.e., `list(a = 1, b = 2)`.
+#'
+#' @return The function does not return anything.
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_positive_integers <- function(named_list_positive_integers) {
+  param_names <- names(named_list_positive_integers)
+  for (idx in seq_len(length(named_list_positive_integers))) {
+    param_name <- param_names[idx]
+    value <- named_list_positive_integers[[param_name]]
+    if (!is.numeric(value) || length(value) != 1 || value <= 0 || !is.finite(value) || value %% 1 != 0) {
+      stop(paste0("'vaeac.", param_name, "' must be a positive integer."))
+    }
+  }
+}
+
+#' Function that checks positive numerics
+#'
+#' @param named_list_positive_numerics List containing named entries. I.e., `list(a = 0.2, b = 10^3)`.
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_positive_numerics <- function(named_list_positive_numerics) {
+  param_names <- names(named_list_positive_numerics)
+  for (idx in seq_len(length(named_list_positive_numerics))) {
+    param_name <- param_names[idx]
+    value <- named_list_positive_numerics[[param_name]]
+    if (!is.numeric(value) || length(value) != 1 || !is.finite(value) || value <= 0) {
+      stop(paste0("'vaeac.", param_name, "' must be a positive numeric."))
+    }
+  }
+}
+
+#' Function that checks probabilities
+#'
+#' @param named_list_probabilities List containing named entries. I.e., `list(a = 0.2, b = 0.9)`.
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_probabilities <- function(named_list_probabilities) {
+  # Trick needed for entries that can be vectors (i.e., `vaeac.masking_ratio`)
+  named_list_probabilities_tmp <- as.list(unlist(named_list_probabilities))
+  param_names <- names(named_list_probabilities_tmp)
+  for (idx in seq_len(length(named_list_probabilities_tmp))) {
+    param_name <- param_names[idx]
+    value <- named_list_probabilities_tmp[[param_name]]
+    if (!is.numeric(value) || length(value) != 1 || !is.finite(value) || value < 0 || value > 1) {
+      stop(paste0("'vaeac.", param_name, "' must be a valid probability (a number between 0 and 1)."))
+    }
+  }
+}
+
+#' Function that checks logicals
+#'
+#' @param named_list_logicals List containing named entries. I.e., `list(a = TRUE, b = FALSE)`.
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_logicals <- function(named_list_logicals) {
+  param_names <- names(named_list_logicals)
+  for (idx in seq_len(length(named_list_logicals))) {
+    param_name <- param_names[idx]
+    value <- named_list_logicals[[param_name]]
+    if (!is.logical(value) || length(value) != 1) {
+      stop(paste0("'vaeac.", param_name, "' must be a boolean (i.e., `TRUE` or `FALSE`)."))
+    }
+  }
+}
+
+#' Function that checks for valid `vaeac` model name
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_which_vaeac_model <- function(which_vaeac_model, epochs, save_every_nth_epoch = NULL) {
+  valid_names <- c("best", "best_running", "last")
+  if (!is.null(save_every_nth_epoch)) {
+    valid_names <- c(
+      valid_names,
+      paste0("epoch_", seq(
+        from = save_every_nth_epoch,
+        by = save_every_nth_epoch,
+        length.out = floor(epochs / save_every_nth_epoch)
+      ))
+    )
+  }
+
+  if (!is.null(which_vaeac_model) && !is.character(which_vaeac_model)) {
+    stop("`vaeac.which_vaeac_model` must be a string.")
+  }
+
+  if (!which_vaeac_model %in% valid_names) {
+    stop(paste0(
+      "The provided `vaeac.which_vaeac_model` ('", which_vaeac_model, "') does not match any of the valid values: '",
+      paste(valid_names, collapse = "', '"), "'."
+    ))
+  }
+}
+
+#' Function that checks provided epoch arguments
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_epoch_values <- function(epochs, epochs_initiation_phase, epochs_early_stopping, save_every_nth_epoch) {
+  if (epochs_initiation_phase >= epochs) {
+    stop(paste0(
+      "'vaeac.epochs_initiation_phase' (", epochs_initiation_phase, ") must be strictly less than ",
+      "'vaeac.epochs' (", epochs, ")."
+    ))
+  }
+
+  if (epochs_early_stopping > epochs) {
+    message(paste0(
+      "No early stopping as `vaeac.epochs_early_stopping` (", epochs_early_stopping, ") is larger than ",
+      "`vaeac.epochs` (", epochs, ")."
+    ))
+  }
+
+  # Ensure a valid value for save_every_nth_epoch.
+  if (!is.null(save_every_nth_epoch) && save_every_nth_epoch > epochs) {
+    stop(paste0("Number of 'epochs' (", epochs, ") is less than 'save_every_nth_epoch' (", save_every_nth_epoch, ")."))
+  }
+  # Ensure a valid value for save_every_nth_epoch.
+  if (!is.null(save_every_nth_epoch) && save_every_nth_epoch <= epochs_initiation_phase) {
+    stop(paste0(
+      "Number of 'epochs_initiation_phase' (", epochs_initiation_phase, ") is less than ",
+      "'save_every_nth_epoch' (", save_every_nth_epoch, ")."
+    ))
+  }
+}
+
+#' Function that checks the provided activation function
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_activation_func <- function(activation_function) {
+  # In future, check that it is one of the activation functions and not just a nn_module
+  # Check that activation function is an nn_module
+  if (!any("nn_module" %in% class(activation_function))) stop("`vaeac.activation_function` is not an `nn_module`.")
+}
+
+#' Function that checks the specified masking scheme
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_mask_gen <- function(mask_gen_coalitions, mask_gen_coalitions_prob, x_train) {
+  masks <- mask_gen_coalitions
+  probs <- mask_gen_coalitions_prob
+
+  if (!is.null(masks) || !is.null(probs)) {
+    if (xor(is.null(masks), is.null(probs))) {
+      stop(
+        "Either both `vaeac.mask_gen_coalitions` and `vaeac.mask_gen_coalitions_prob` need to `NULL` ",
+        "or both have to be specified."
+      )
+    }
+
+    if (!is.matrix(masks)) stop("`vaeac.mask_gen_coalitions` must be a matrix.")
+    if (!is.numeric(probs)) stop("`vaeac.mask_gen_coalitions_prob` must be an array.")
+
+    if (nrow(masks) != length(probs)) {
+      stop(
+        "The number of rows in `vaeac.mask_gen_coalitions` must be equal to the length of ",
+        "`vaeac.mask_gen_coalitions_prob`."
+      )
+    }
+
+    if (ncol(masks) != ncol(x_train)) {
+      stop(
+        "The number of columns in `vaeac.mask_gen_coalitions` must be equal to the number of ",
+        "columns in the `x_train`. That is, the number of features."
+      )
+    }
+  }
+}
+
+#' Function the checks the verbose parameter
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_verbose <- function(verbose) {
+  if (!is.numeric(verbose) || !(verbose %in% c(0, 1, 2))) {
+    stop("`vaeac.verbose` must be either `0` (no verbosity), `1` (low verbosity), or `2` (high verbosity).")
+  }
+}
+
+#' Function that checks that the save folder exists and for a valid file name
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_save_names <- function(folder_to_save_model, model_description) {
+  if (!is.character(folder_to_save_model)) stop("`vaeac.folder_to_save_model` must be a string.")
+  if (!is.character(model_description)) stop("`vaeac.model_description` must be a string.")
+  if (!dir.exists(folder_to_save_model)) {
+    stop(paste0("the folder `vaeac.folder_to_save_model` ('", folder_to_save_model, "') does not exist."))
+  }
+  if (!grepl("^[A-Za-z0-9._-]+$", model_description)) {
+    stop(paste0(
+      "`vaeac.model_description` can only contain uppercase and lowercase letters, ",
+      "digits, dots, underscores, and hyphens."
+    ))
+  }
+}
+
+#' Function that checks for access to CUDA
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_cuda <- function(cuda) {
+  # Check if cuda/GPU is available on the current system
+  cuda_available <- torch::cuda_is_available()
+
+  # Give message to user if asked to run on cuda, but cuda is not available.
+  if (isFALSE(cuda_available) && isTRUE(cuda)) {
+    cuda <- FALSE
+    message("Cuda/GPU is not available (`shapr` uses CPU instead).", immediate. = TRUE)
+  }
+
+  return(cuda)
+}
+
+#' Function that checks that the masking ratio argument is valid
+#'
+#' @inheritParams vaeac_train_model
+#' @param n_features The number of features, i.e., the number of columns in the training data.
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_masking_ratio <- function(masking_ratio, n_features) {
+  if (length(masking_ratio) > 1 && length(masking_ratio) != ncol(x_train)) {
+    stop(paste0(
+      "'Masking_ratio' contains masking ratios for ',", length(masking_ratio), "' features, ",
+      "but there are '", ncol(x_train), "' features in 'x_train'."
+    ))
+  }
+}
+
+#' Function that gives a warning about disk usage
+#'
+#' @param x_train_size The object size of the `x_train` object.
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_save_parameters <- function(save_data, epochs, save_every_nth_epoch, x_train_size) {
+  if (save_data && !is.null(save_every_nth_epoch) && epochs / save_every_nth_epoch > 5) {
+    message(paste0(
+      "Having `save_data = TRUE` and `save_every_nth_epoch = ", save_every_nth_epoch, "` might requirer ",
+      "a lot of disk storage if `x_train` (", x_train_size, ") is large."
+    ))
+  }
+}
+
+#' Function that checks the feature names of data and `vaeac` model
+#'
+#' @param feature_names_vaeac Array of strings containing the feature names of the `vaeac` model.
+#' @param feature_names_new Array of strings containing the feature names to compare with.
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_x_train_names <- function(feature_names_vaeac, feature_names_new) {
+  n_features_vaeac <- length(feature_names_vaeac)
+  n_features_new <- length(feature_names_new)
+
+  # Check for equal number of features
+  if (n_features_new != n_features_vaeac) {
+    stop(paste0(
+      "The provided `vaeac` model is trainined on a ", n_features_vaeac, "-dimensional dataset, but the current ",
+      "dataset is ", n_features_new, "-dimensional."
+    ))
+  }
+
+  # Check that the feature names of x_train matches the names of the training data used to train the vaeac model
+  if (!all.equal(feature_names_vaeac, feature_names_new)) {
+    stop(paste0(
+      "The training data's feature names (`", paste(feature_names_new, collapse = "`, `"), "`) do not match the ",
+      "names of the `vaeac` model's original training data (`", paste(feature_names_vaeac, collapse = "`, `"), "`)."
+    ))
+  }
+}
+
+#' Function that calls all vaeac parameters check functions
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_check_parameters <- function(x_train,
+                                   model_description,
+                                   folder_to_save_model,
+                                   cuda,
+                                   n_vaeacs_initialize,
+                                   epochs_initiation_phase,
+                                   epochs,
+                                   epochs_early_stopping,
+                                   save_every_nth_epoch,
+                                   val_ratio,
+                                   val_iwae_n_samples,
+                                   depth,
+                                   width,
+                                   latent_dim,
+                                   lr,
+                                   batch_size,
+                                   running_avg_n_values,
+                                   activation_function,
+                                   skip_conn_layer,
+                                   skip_conn_masked_enc_dec,
+                                   batch_normalization,
+                                   paired_sampling,
+                                   masking_ratio,
+                                   mask_gen_coalitions,
+                                   mask_gen_coalitions_prob,
+                                   sigma_mu,
+                                   sigma_sigma,
+                                   save_data,
+                                   log_exp_cont_feat,
+                                   which_vaeac_model,
+                                   verbose,
+                                   seed,
+                                   ...) {
+  # Check verbose parameter
+  vaeac_check_verbose(verbose = verbose)
+
+  # Check that the activation function is valid torch::nn_module object
+  vaeac_check_activation_func(activation_function = activation_function)
+
+  # Check that the save folder exists and for a valid file name
+  vaeac_check_save_names(folder_to_save_model = folder_to_save_model, model_description = model_description)
+
+  # Check the probability parameters
+  vaeac_check_probabilities(list(val_ratio = val_ratio, masking_ratio = masking_ratio))
+
+  # Check the masking ratio
+  vaeac_check_masking_ratio(masking_ratio = masking_ratio, n_features = ncol(x_train))
+
+  # Check the positive numeric parameters
+  vaeac_check_positive_numerics(list(lr = lr, sigma_mu = sigma_mu, sigma_sigma = sigma_sigma))
+
+  # Check the mask_gen_coalitions and mask_gen_coalitions_prob parameters
+  vaeac_check_mask_gen(
+    mask_gen_coalitions = mask_gen_coalitions,
+    mask_gen_coalitions_prob = mask_gen_coalitions_prob,
+    x_train = x_train
+  )
+
+  # Check the logical parameters
+  vaeac_check_logicals(list(
+    cuda = cuda,
+    skip_conn_layer = skip_conn_layer,
+    skip_conn_masked_enc_dec = skip_conn_masked_enc_dec,
+    batch_normalization = batch_normalization,
+    paired_sampling = paired_sampling,
+    save_data = save_data,
+    log_exp_cont_feat = log_exp_cont_feat
+  ))
+
+  # Check the positive integer parameters
+  unchecked_positive_integers <- list(
+    n_vaeacs_initialize = n_vaeacs_initialize,
+    epochs = epochs,
+    epochs_early_stopping = epochs_early_stopping,
+    epochs_initiation_phase = epochs_initiation_phase,
+    val_iwae_n_samples = val_iwae_n_samples,
+    depth = depth,
+    width = width,
+    latent_dim = latent_dim,
+    batch_size = batch_size,
+    running_avg_n_values = running_avg_n_values,
+    seed = seed
+  )
+  if (!is.null(save_every_nth_epoch)) unchecked_positive_integers$save_every_nth_epoch <- save_every_nth_epoch
+  vaeac_check_positive_integers(unchecked_positive_integers)
+
+  # Check the epoch values
+  vaeac_check_epoch_values(
+    epochs = epochs,
+    epochs_initiation_phase = epochs_initiation_phase,
+    epochs_early_stopping = epochs_early_stopping,
+    save_every_nth_epoch = save_every_nth_epoch
+  )
+
+  # Check the save parameters
+  vaeac_check_save_parameters(
+    save_data = save_data,
+    epochs = epochs,
+    save_every_nth_epoch = save_every_nth_epoch,
+    x_train_size = format(utils::object.size(x_train), units = "auto")
+  )
+
+  # Check that user want to use the vaeac model at a valid checkpoint
+  vaeac_check_which_vaeac_model(
+    which_vaeac_model = which_vaeac_model,
+    epochs = epochs,
+    save_every_nth_epoch = save_every_nth_epoch
+  )
+}
+
+# Get functions ========================================================================================================
+#' Function to specify the extra parameters in the `vaeac` model
+#'
+#' @description In this function, we specify the default values for the extra parameters used in [shapr::explain()]
+#' for `approach = "vaeac"`.
+#'
+#' @param vaeac.model_description String (default is `make.names(Sys.time())`). String containing, e.g., the name of the
+#' data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+#' then a name will be generated based on [base::Sys.time()] to ensure a unique name. We use [base::make.names()] to
+#' ensure a valid file name for all operating systems.
+#' @param vaeac.folder_to_save_model String (default is [base::tempdir()]). String specifying a path to a folder where
+#' the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+#' [shapr::explain()] object if `vaeac.save_model = FALSE`.
+#' @param vaeac.pretrained_vaeac_model List or String (default is `NULL`). 1) Either a list of class
+#' `vaeac`, i.e., the list stored in `explanation$internal$parameters$vaeac` where `explanation` is the returned list
+#' from an earlier call to the [shapr::explain()] function. 2) A string containing the path to where the `vaeac`
+#' model is stored on disk, for example, `explanation$internal$parameters$vaeac$models$best`.
+#' @param vaeac.cuda Logical (default is `FALSE`). If `TRUE`, then the `vaeac` model will be trained using cuda/GPU.
+#' If [torch::cuda_is_available()] is `FALSE`, the we fall back to use CPU. If `FALSE`, we use the CPU. Often this is
+#' faster for tabular data sets. Note, cuda is not not supported in the current version of the `shapr` package.
+#' TODO: Update this when this is done.
+#' @param vaeac.epochs_initiation_phase Positive integer (default is `2`). The number of epochs to run each of the
+#' `vaeac.n_vaeacs_initialize` `vaeac` models before continuing to train only the best performing model.
+#' @param vaeac.epochs_early_stopping Positive integer (default is `NULL`). The training stops if there has been no
+#' improvement in the validation IWAE for `vaeac.epochs_early_stopping` epochs. If the user wants the training process
+#' to be solely based on this training criterion, then `vaeac.epochs` in [shapr::explain()] should be set to a large
+#' number. If `NULL`, then `shapr` will internally set `vaeac.epochs_early_stopping = vaeac.epochs` such that early
+#' stopping does not occur.
+#' @param vaeac.save_every_nth_epoch Positive integer (default is `NULL`). If provided, then the vaeac model after
+#' every `vaeac.save_every_nth_epoch`th epoch will be saved.
+#' @param vaeac.val_ratio Numeric (default is `0.25`). Scalar between `0` and `1` indicating the ratio of
+#' instances from the input data which will be used as validation data. That is, `vaeac.val_ratio = 0.25` means
+#' that `75%` of the provided data is used as training data, while the remaining `25%` is used as validation data.
+#' @param vaeac.val_iwae_n_samples Positive integer (default is `25`). The number of generated samples used
+#' to compute the IWAE criterion when validating the vaeac model on the validation data.
+#' @param vaeac.batch_size Positive integer (default is `64`). The number of samples to include in each batch
+#' during the training of the vaeac model. Used in [torch::dataloader()].
+#' @param vaeac.batch_size_sampling Positive integer (default is `NULL`) The number of samples to include in
+#' each batch when generating the Monte Carlo samples. If `NULL`, then the function generates the Monte Carlo samples
+#' for the provided coalitions/combinations and all explicands sent to [shapr::explain()] at the time.
+#' The number of coalitions are determined by `n_batches` in [shapr::explain()]. We recommend to tweak `n_batches`
+#' rather  than `vaeac.batch_size_sampling`. Larger batch sizes are often much faster provided sufficient memory.
+#' @param vaeac.running_avg_n_values Positive integer (default is `5`). The number of previous IWAE values to include
+#' when we compute the running means of the IWAE criterion.
+#' @param vaeac.skip_conn_layer Logical (default is `TRUE`). If `TRUE`, we apply identity skip connections in each
+#' layer, see [shapr::SkipConnection()]. That is, we add the input \eqn{X} to the outcome of each hidden layer,
+#' so the output becomes \eqn{X + activation(WX + b)}.
+#' @param vaeac.skip_conn_masked_enc_dec Logical (default is `TRUE`). If `TRUE`, we apply concatenate skip
+#' connections between the layers in the masked encoder and decoder. The first layer of the masked encoder will be
+#' linked to the last layer of the decoder. The second layer of the masked encoder will be
+#' linked to the second to last layer of the decoder, and so on.
+#' @param vaeac.batch_normalization Logical (default is `FALSE`). If `TRUE`, we apply batch normalization after the
+#' activation function. Note that if `vaeac.skip_conn_layer = TRUE`, then the normalization is applied after the
+#' inclusion of the skip connection. That is, we batch normalize the whole quantity \eqn{X + activation(WX + b)}.
+#' @param vaeac.paired_sampling Logical (default is `TRUE`). If `TRUE`, we apply paired sampling to the training
+#' batches. That is, the training observations in each batch will be duplicated, where the first instance will be masked
+#' by \eqn{S} while the second instance will be masked by \eqn{\bar{S}}. This ensures that the training of the
+#' `vaeac` model becomes more stable as the model has access to the full version of each training observation. However,
+#' this will increase the training time due to more complex implementation and doubling the size of each batch. See
+#' [shapr::paired_sampler()] for more information.
+#' @param vaeac.masking_ratio Numeric (default is `0.5`). Probability of masking a feature in the
+#' [shapr::MCAR_mask_generator()] (MCAR = Missing Completely At Random). The MCAR masking scheme ensures that `vaeac`
+#' model can do arbitrary conditioning as all coalitions will be trained. `vaeac.masking_ratio` will be overruled if
+#' `vaeac.mask_gen_coalitions` is specified.
+#' @param vaeac.mask_gen_coalitions Matrix (default is `NULL`). Matrix containing the coalitions that the
+#' `vaeac` model will be trained on, see [shapr::Specified_masks_mask_generator()]. This parameter is used internally
+#' in `shapr` when we only consider a subset of coalitions/combinations, i.e., when
+#' `n_combinations` \eqn{< 2^{n_{\text{features}}}}, and for group Shapley, i.e.,
+#' when `group` is specified in [shapr::explain()].
+#' @param vaeac.mask_gen_coalitions_prob Numeric array (default is `NULL`). Array of length equal to the height
+#' of `vaeac.mask_gen_coalitions` containing the probabilities of sampling the corresponding coalitions in
+#' `vaeac.mask_gen_coalitions`.
+#' @param vaeac.sigma_mu Numeric (default is `1e4`). One of two hyperparameter values in the normal-gamma prior
+#' used in the masked encoder, see Section 3.3.1 in
+#' \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#' @param vaeac.sigma_sigma Numeric (default is `1e-4`). One of two hyperparameter values in the normal-gamma prior
+#' used in the masked encoder, see Section 3.3.1 in
+#' \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#' @param vaeac.save_data Logical (default is `FALSE`). If `TRUE`, then the data is stored together with
+#' the model. Useful if one are to continue to train the model later using [shapr::vaeac_continue_train_model()].
+#' @param vaeac.log_exp_cont_feat Logical (default is `FALSE`). If we are to \eqn{\log} transform all
+#' continuous features before sending the data to [shapr::vaeac()]. The `vaeac` model creates unbounded Monte Carlo
+#' sample values. Thus, if the continuous features are strictly positive (as for, e.g., the Burr distribution and
+#' Abalone data set), it can be advantageous to \eqn{\log} transform the data to unbounded form before using `vaeac`.
+#' If `TRUE`, then [shapr::vaeac_postprocess_data()] will take the \eqn{\exp} of the results to get back to strictly
+#' positive values when using the `vaeac` model to impute missing values/generate the Monte Carlo samples.
+#' @param vaeac.sample_random Logcial (default is `TRUE`). If `TRUE`, the function generates random Monte Carlo samples
+#' from the inferred generative distributions. If `FALSE`, the function use the most likely values, i.e., the mean and
+#' class with highest probability for continuous and categorical, respectively.
+#' @param vaeac.which_vaeac_model String (default is `best`). The name of the `vaeac` model (snapshots from different
+#' epochs) to use when generating the Monte Carlo samples. The standard choices are: `"best"` (epoch with lowest IWAE),
+#' `"best_running"` (epoch with lowest running IWAE, see `vaeac.running_avg_n_values`), and `last` (the last epoch).
+#' Note that additional choices are available if `vaeac.save_every_nth_epoch` is provided. For example, if
+#' `vaeac.save_every_nth_epoch = 5`, then `vaeac.which_vaeac_model` can also take the values `"epoch_5"`, `"epoch_10"`,
+#' `"epoch_15"`, and so on.
+#' @param vaeac.save_model Boolean. If `TRUE` (default), the `vaeac` model will be saved either in a
+#' [base::tempdir()] folder or in a user specified location in `vaeac.folder_to_save_model`. If `FALSE`, then
+#' the paths to model and the model will will be deleted from the returned object from [shapr::explain()].
+#'
+#' @return Named list of the default values `vaeac` extra parameter arguments specified in this function call.
+#' Note that both `vaeac.model_description` and `vaeac.folder_to_save_model` will change with time and R session.
+#'
+#' @export
+#' @author Lars Henry Berge Olsen
+vaeac_get_extra_para_default <- function(vaeac.model_description = make.names(Sys.time()),
+                                         vaeac.folder_to_save_model = tempdir(),
+                                         vaeac.pretrained_vaeac_model = NULL,
+                                         vaeac.cuda = FALSE,
+                                         vaeac.epochs_initiation_phase = 2,
+                                         vaeac.epochs_early_stopping = NULL,
+                                         vaeac.save_every_nth_epoch = NULL,
+                                         vaeac.val_ratio = 0.25,
+                                         vaeac.val_iwae_n_samples = 25,
+                                         vaeac.batch_size = 64,
+                                         vaeac.batch_size_sampling = NULL,
+                                         vaeac.running_avg_n_values = 5,
+                                         vaeac.skip_conn_layer = TRUE,
+                                         vaeac.skip_conn_masked_enc_dec = TRUE,
+                                         vaeac.batch_normalization = FALSE,
+                                         vaeac.paired_sampling = TRUE,
+                                         vaeac.masking_ratio = 0.5,
+                                         vaeac.mask_gen_coalitions = NULL,
+                                         vaeac.mask_gen_coalitions_prob = NULL,
+                                         vaeac.sigma_mu = 1e4,
+                                         vaeac.sigma_sigma = 1e-4,
+                                         vaeac.sample_random = TRUE,
+                                         vaeac.save_data = FALSE,
+                                         vaeac.log_exp_cont_feat = FALSE,
+                                         vaeac.which_vaeac_model = "best",
+                                         vaeac.save_model = TRUE) {
+  # Return a named list with the extra parameters to the vaeac model
+  return(mget(methods::formalArgs(vaeac_get_extra_para_default)))
+}
+
+#' Function to load a `vaeac` model and set it in the right state and mode
+#'
+#' @inheritParams vaeac_train_model
+#' @param checkpoint List. This must be a loaded `vaeac` save object. That is, `torch::torch_load('vaeac_save_path')`.
+#' @param mode_train Logical. If `TRUE`, the returned `vaeac` model is set to be in training mode.
+#' If `FALSE`, the returned `vaeac` model is set to be in evaluation mode.
+#'
+#' @return A `vaeac` model with the correct state (based on `checkpoint`), sent to the desired hardware (based on
+#' `cuda`), and in the right mode (based on `mode_train`).
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_model_from_checkp <- function(checkpoint, cuda, mode_train) {
+  # Check parameters
+  vaeac_check_logicals(list(cuda = cuda, mode_train = mode_train))
+
+  # Set up the model such that it is loaded before calling the `prepare_data.vaeac()` function.
+  vaeac_model <- vaeac(
+    one_hot_max_sizes = checkpoint$one_hot_max_sizes,
+    width = checkpoint$width,
+    depth = checkpoint$depth,
+    latent_dim = checkpoint$latent_dim,
+    activation_function = checkpoint$activation_function,
+    skip_conn_layer = checkpoint$skip_conn_layer,
+    skip_conn_masked_enc_dec = checkpoint$skip_conn_masked_enc_dec,
+    batch_normalization = checkpoint$batch_normalization,
+    paired_sampling = checkpoint$paired_sampling,
+    mask_generator_name = checkpoint$mask_generator_name,
+    masking_ratio = checkpoint$masking_ratio,
+    mask_gen_coalitions = checkpoint$mask_gen_coalitions,
+    mask_gen_coalitions_prob = checkpoint$mask_gen_coalitions_prob,
+    sigma_mu = checkpoint$sigma_mu,
+    sigma_sigma = checkpoint$sigma_sigma
+  )
+
+  # Set the state of the vaeac model (setting the weights and biases in the networks)
+  vaeac_model$load_state_dict(checkpoint$model_state_dict)
+
+  # Apply the mode. Evaluation mode effects certain modules by, e.g., deactivating dropout layers,
+  # how batch norm is conducted, and so on...
+  if (mode_train) vaeac_model$train() else vaeac_model$eval()
+
+  # Send the model to the GPU, if we are supposed to. Otherwise use CPU
+  if (cuda) vaeac_model <- vaeac_model$cuda() else vaeac_model <- vaeac_model$cpu()
+
+  # Return the model
+  return(vaeac_model)
+}
+
+#' Function that determines which mask generator to use
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return The function does not return anything.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_mask_generator_name <- function(mask_gen_coalitions,
+                                          mask_gen_coalitions_prob,
+                                          masking_ratio,
+                                          verbose) {
+  if (!is.null(mask_gen_coalitions) && !is.null(mask_gen_coalitions_prob)) {
+    # User have provided mask_gen_coalitions (and mask_gen_coalitions_prob),
+    # and we want to use Specified_masks_mask_generator
+    mask_generator_name <- "Specified_masks_mask_generator"
+
+    # Small printout
+    if (verbose == 2) {
+      message(paste0("Using 'Specified_masks_mask_generator' with '", nrow(mask_gen_coalitions), "' coalitions."))
+    }
+  } else if (length(masking_ratio) == 1) {
+    # We are going to use 'MCAR_mask_generator' as masking_ratio is a singleton.
+    # I.e., all feature values are equally likely to be masked based on masking_ratio.
+    mask_generator_name <- "MCAR_mask_generator"
+
+    # Small printout
+    if (verbose == 2) message(paste0("Using 'MCAR_mask_generator' with 'masking_ratio = ", masking_ratio, "'."))
+  } else if (length(masking_ratio) > 1) {
+    # We are going to use 'Specified_prob_mask_generator' as masking_ratio is a vector (of same length as ncol(x_train).
+    # I.e., masking_ratio[5] specifies the probability of masking 5 features
+    mask_generator_name <- "Specified_prob_mask_generator"
+
+    # We have an array of masking ratios. Then we are using the Specified_prob_mask_generator.
+    if (verbose == 2) {
+      message(paste0(
+        "Using 'Specified_prob_mask_generator' mask generator with 'masking_ratio = [",
+        paste(masking_ratio, collapse = ", "), "]'."
+      ))
+    }
+  } else {
+    stop("`vaeac` could not determine which masking scheme to use based on the givene parameter arguments.")
+  }
+
+  return(mask_generator_name)
+}
+
+#' Function that creates the save file names for the `vaeac` model
+#'
+#' @inheritParams vaeac_train_model
+#'
+#' @return Array of string containing the save files to use when training the `vaeac` model. The first three names
+#' corresponds to the best, best_running, and last epochs, in that order.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_save_file_names <- function(model_description,
+                                      n_features,
+                                      n_train,
+                                      depth,
+                                      width,
+                                      latent_dim,
+                                      lr,
+                                      epochs,
+                                      save_every_nth_epoch,
+                                      folder_to_save_model = NULL) {
+  file_names <- c("best", "best_running", "last") # The standard epochs we save the vaeac model
+
+  # Add the optional epochs to save the model
+  if (!is.null(save_every_nth_epoch)) {
+    file_names <- c(file_names, seq(
+      from = save_every_nth_epoch,
+      by = save_every_nth_epoch,
+      length.out = floor(epochs / save_every_nth_epoch)
+    ))
+  }
+
+  # Create the file names
+  file_names <- paste0(
+    make.names(model_description), "_n_features_", n_features, "_n_train_", n_train, "_depth_", depth,
+    "_width_", width, "_latent_", latent_dim, "_lr_", lr, "_epoch_", file_names, ".pt"
+  )
+
+  # Add the (optional) path to the folder to the name
+  if (!is.null(folder_to_save_model)) file_names <- file.path(folder_to_save_model, file_names)
+
+  return(file_names)
+}
+
+#' Function to create the optimizer used to train `vaeac`
+#'
+#' @description
+#' Only [torch::optim_adam()] is currently supported. But it is easy to add an additional option later.
+#'
+#' @inheritParams vaeac_train_model
+#' @param vaeac_model A `vaeac` model created using [vaeac()].
+#' @param optimizer_name String containing the name of the [torch::optimizer()] to use.
+#'
+#' @return Array of string containing the save files to use when training the `vaeac` model. The first three names
+#' corresponds to the best, best_running, and last epochs, in that order.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_optimizer <- function(vaeac_model, lr, optimizer_name = "adam") {
+  if (optimizer_name == "adam") {
+    # Create the adam optimizer
+    optimizer <- torch::optim_adam(
+      params = vaeac_model$parameters,
+      lr = lr,
+      betas = c(0.9, 0.999),
+      eps = 1e-08,
+      weight_decay = 0,
+      amsgrad = FALSE
+    )
+  } else {
+    stop("Only the `adam` optimizer has been implemented for the `vaeac` approach.")
+  }
+
+  return(optimizer)
+}
+
+
+
+#' Function that extracts additional objects from the environment to the state list
+#'
+#' @description
+#' The function extract the objects that we are going to save together with the `vaeac` model to make it possible to
+#' train the model further and to evaluate it.
+#' The environment should be the local environment inside the [shapr::vaeac_train_model_auxiliary()] function.
+#'
+#' @inheritParams vaeac_get_full_state_list
+#'
+#' @return List containing the values of `epoch`, `train_vlb`, `val_iwae`, `val_iwae_running`,
+#' and the `state_dict()` of the vaeac model and optimizer.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_current_save_state <- function(environment) {
+  object_names <- c("epoch", "train_vlb", "val_iwae", "val_iwae_running")
+  objects <- lapply(object_names, function(name) environment[[name]])
+  names(objects) <- object_names
+  objects$model_state_dict <- environment[["vaeac_model"]]$state_dict()
+  objects$optimizer_state_dict <- environment[["optimizer"]]$state_dict()
+  return(objects)
+}
+
+#' Function that extracts the state list objects from the environment
+#'
+#' #' @description
+#' The function extract the objects that we are going to save together with the `vaeac` model to make it possible to
+#' train the model further and to evaluate it.
+#' The environment should be the local environment inside the [shapr::vaeac_train_model_auxiliary()] function.
+#'
+#' @param environment The [base::environment()] where the objects are stored.
+#'
+#' @return List containing the values of `norm_mean`, `norm_std`, `model_description`, `folder_to_save_model`,
+#' `n_train`, `n_features`, `one_hot_max_sizes`, `epochs`, `epochs_specified`, `epochs_early_stopping`,
+#' `early_stopping_applied`, `running_avg_n_values`, `paired_sampling`, `mask_generator_name`, `masking_ratio`,
+#' `mask_gen_coalitions`, `mask_gen_coalitions_prob`, `val_ratio`, `val_iwae_n_samples`,
+#' `n_vaeacs_initialize`, `epochs_initiation_phase`, `width`, `depth`, `latent_dim`, `activation_function`,
+#' `lr`, `batch_size`, `skip_conn_layer`, `skip_conn_masked_enc_dec`, `batch_normalization`, `cuda`,
+#' `train_indices`, `val_indices`, `save_every_nth_epoch`, `sigma_mu`,
+#' `sigma_sigma`, `feature_list`, `col_cat_names`, `col_cont_names`, `col_cat`, `col_cont`, `cat_in_dataset`,
+#' `map_new_to_original_names`, `map_original_to_new_names`, `log_exp_cont_feat`, `save_data`, `verbose`,
+#' `seed`, and `vaeac_save_file_names`.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_full_state_list <- function(environment) {
+  object_names <- c(
+    "norm_mean", "norm_std", "model_description", "folder_to_save_model", "n_train", "n_features", "one_hot_max_sizes",
+    "epochs", "epochs_specified", "epochs_early_stopping", "early_stopping_applied", "running_avg_n_values",
+    "paired_sampling", "mask_generator_name", "masking_ratio", "mask_gen_coalitions",
+    "mask_gen_coalitions_prob", "val_ratio", "val_iwae_n_samples", "n_vaeacs_initialize",
+    "epochs_initiation_phase", "width", "depth", "latent_dim", "activation_function",
+    "lr", "batch_size", "skip_conn_layer", "skip_conn_masked_enc_dec", "batch_normalization", "cuda",
+    "train_indices", "val_indices", "save_every_nth_epoch", "sigma_mu", "sigma_sigma", "feature_list", "col_cat_names",
+    "col_cont_names", "col_cat", "col_cont", "cat_in_dataset", "map_new_to_original_names", "map_original_to_new_names",
+    "log_exp_cont_feat", "save_data", "verbose", "seed", "vaeac_save_file_names"
+  )
+  objects <- lapply(object_names, function(name) environment[[name]])
+  names(objects) <- object_names
+  objects <- utils::modifyList(objects, environment[["x_train_preprocessed"]], keep.null = TRUE) # Flatten this list
+  return(objects)
+}
+
+
+
+#' Function to extend the explicands and apply all relevant masks/coalitions
+#'
+#' @inheritParams explain
+#' @inheritParams default_doc_explain
+#' @param S The `internal$objects$S` matrix containing the possible coalitions.
+#'
+#' @return The extended version of `x_explain` where the masks from `S` with indices `index_features` have been applied.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_x_explain_extended <- function(x_explain, S, index_features) {
+  n_coaltions <- length(index_features) # Get the number of active coalitions
+  n_explain <- nrow(x_explain) # Get the number of explicands
+  mask <- S[index_features, , drop = FALSE] # Get the masks/coalitions we are to generate MC samples for
+  mask[mask == 0] <- NaN # Set zeros to `NaN` to indicate that they are missing and to be imputed by `vaeac`
+  x_explain_extended <-
+    x_explain[rep(seq_len(nrow(x_explain)), each = n_coaltions), ] # Extend the explicands `n_coalitions` times
+  mask_extended <- mask[rep(seq(n_coaltions), times = n_explain), ] # Extend the masks `n_expliand` times
+  x_explain_extended[is.na(mask_extended)] <- NaN # Apply the mask. The NaNs are features outside coalition S.
+  return(x_explain_extended)
+}
+
+#' Extract the Training VLB and Validation IWAE from a list of explanations objects using the vaeac approach
+#'
+#' @param explanation_list A list of [explain()] objects applied to the same data, model, and
+#' `vaeac` must be the used approach. If the entries in the list is named, then the function use
+#' these names. Otherwise, it defaults to the approach names (with integer suffix for duplicates)
+#' for the explanation objects in `explanation_list`.
+#'
+#' @return A data.table containing the training VLB, validation IWAE, and running validation IWAE at each epoch for
+#' each vaeac model.
+#' @export
+#' @author Lars Henry Berge Olsen
+vaeac_get_evaluation_criteria <- function(explanation_list) {
+  # Check if user only provided a single explanation and did not put it in a list
+  if ("shapr" %in% class(explanation_list)) explanation_list <- list(explanation_list)
+
+  # Check that all explanation objects use the `vaeac` approach
+  explanation_approaches <- sapply(explanation_list, function(explanation) explanation$internal$parameters$approach)
+  if (any(explanation_approaches != "vaeac")) {
+    stop(sprintf(
+      "Explanation object number `%d` in the `explanation_list` does not use the `vaeac` approach.",
+      seq_along(explanation_approaches)[explanation_approaches != "vaeac"][1]
+    ))
+  }
+
+  # Name the elements in the explanation_list if no names have been provided
+  if (is.null(names(explanation_list))) explanation_list <- MSEv_name_explanation_list(explanation_list)
+
+  # Extract the evaluation criteria and put them into a data.table
+  vaeac_VLB_IWAE_dt <- data.table::rbindlist(
+    lapply(explanation_list, function(explanation) {
+      data.table::data.table(do.call(cbind, explanation$internal$parameters$vaeac$results))[, Epoch := .I]
+    }),
+    use.names = TRUE,
+    idcol = "Method",
+  )
+  names(vaeac_VLB_IWAE_dt)[2:4] <- c("VLB", "IWAE", "IWAE_running")
+  vaeac_VLB_IWAE_dt$Method <- factor(vaeac_VLB_IWAE_dt$Method, levels = names(explanation_list))
+  data.table::setkeyv(vaeac_VLB_IWAE_dt, c("Method", "Epoch"))
+  data.table::setcolorder(vaeac_VLB_IWAE_dt, c("Method", "Epoch"))
+
+  return(vaeac_VLB_IWAE_dt)
+}
+
+#' Function to set up data loaders and save file names
+#'
+#' @inheritParams vaeac_train_model
+#' @param train_indices Numeric array (optional) containing the indices of the training observations.
+#' There are conducted no checks to validdate the indices.
+#' @param val_indices Numeric array (optional) containing the indices of the validation observations.
+#' #' There are conducted no checks to validdate the indices.
+#'
+#' @return List of objects needed to train the `vaeac` model
+vaeac_get_data_objects <- function(x_train,
+                                   log_exp_cont_feat,
+                                   val_ratio,
+                                   batch_size,
+                                   paired_sampling,
+                                   model_description,
+                                   depth,
+                                   width,
+                                   latent_dim,
+                                   lr,
+                                   epochs,
+                                   save_every_nth_epoch,
+                                   folder_to_save_model,
+                                   train_indices = NULL,
+                                   val_indices = NULL) {
+  if (xor(is.null(train_indices), is.null(val_indices))) {
+    stop("Either none or both of `train_indices` and `val_indices` must be given.")
+  }
+
+  # Get the dimensions of the x_train
+  n_train <- nrow(x_train)
+  n_features <- ncol(x_train)
+
+  # Preprocess x_train. Turn factor names into numerics 1,2,...,K, (vaeac only accepts numerics) and keep track
+  # of the maping of names. Optionally log-transform the continuous features. Then, finally, normalize the data.
+  x_train_preprocessed <- vaeac_preprocess_data(data = x_train, log_exp_cont_feat = log_exp_cont_feat, normalize = TRUE)
+
+  # Extract the preprocessed and normalized x_train as a torch tensor and the one-hot feature sizes (cont have size 1)
+  x_train_torch <- x_train_preprocessed$data_normalized_torch
+  one_hot_max_sizes <- x_train_preprocessed$one_hot_max_sizes
+
+  # Splitting the input into a training and validation data sets
+  if (is.null(train_indices)) { # The val_indices will also be NULL due to the xor check above
+    val_size <- ceiling(n_train * val_ratio) # Number of observations in the validation set
+    val_indices <- sample(n_train, val_size, replace = FALSE) # Sample indices for the validation set
+    train_indices <- seq(n_train)[-val_indices] # The remaining indices constitutes the training set
+  } else {
+    val_size <- NULL
+  }
+  val_dataset <- vaeac_dataset(x_train_torch[val_indices], one_hot_max_sizes) # Create a torch::dataset() for vaeac
+  train_dataset <- vaeac_dataset(x_train_torch[train_indices], one_hot_max_sizes) # Create a torch::dataset() for vaeac
+
+  # Ensure a valid batch size
+  if (batch_size > length(train_indices)) {
+    message(paste0(
+      "Decrease `batch_size` (", batch_size, ") to largest allowed value (", length(train_indices), "), ",
+      "i.e., the number of training observations."
+    ))
+    batch_size <- length(train_indices)
+  }
+
+  # Create the Data Loader objects which iterate over the data in the Data Set objects
+  train_dataloader <- torch::dataloader(
+    dataset = train_dataset,
+    batch_size = batch_size,
+    shuffle = if (paired_sampling) FALSE else TRUE, # Must be `FALSE` when `sampler` is specified
+    sampler = if (paired_sampling) paired_sampler(train_dataset, shuffle = TRUE) else NULL
+  )
+
+  val_dataloader <- torch::dataloader(
+    dataset = val_dataset,
+    batch_size = batch_size,
+    shuffle = FALSE,
+    sampler = if (paired_sampling) paired_sampler(val_dataset, shuffle = FALSE) else NULL
+  )
+
+  # Get all the file names for the vaeac objects we are going to save
+  vaeac_save_file_names <- vaeac_get_save_file_names(
+    model_description = model_description,
+    n_features = n_features,
+    n_train = n_train,
+    depth = depth,
+    width = width,
+    latent_dim = latent_dim,
+    lr = lr,
+    epochs = epochs,
+    save_every_nth_epoch = save_every_nth_epoch,
+    folder_to_save_model = folder_to_save_model
+  )
+
+  return(list(
+    n_train = n_train,
+    n_features = n_features,
+    x_train_preprocessed = x_train_preprocessed,
+    x_train_torch = x_train_torch,
+    one_hot_max_sizes = one_hot_max_sizes,
+    val_size = val_size,
+    val_indices = val_indices,
+    train_indices = train_indices,
+    batch_size = batch_size,
+    train_dataloader = train_dataloader,
+    val_dataloader = val_dataloader,
+    vaeac_save_file_names = vaeac_save_file_names
+  ))
+}
+
+
+# Train functions ======================================================================================================
+#' Function used to train a `vaeac` model
+#'
+#' @description
+#' This function can be applied both in the initialization phase when, we train several initiated `vaeac` models, and
+#' to keep training the best performing `vaeac` model for the remaining number of epochs. We are in the former setting
+#' when `initialization_idx` is provided and the latter when it is `NULL`. When it is `NULL`, we save the `vaeac` models
+#' with lowest VLB, IWAE, running IWAE, and the epochs according to `save_every_nth_epoch` to disk.
+#'
+#' @inheritParams vaeac_train_model
+#' @param vaeac_model A [shapr::vaeac()] object. The `vaeac` model this function is to train.
+#' @param optimizer A [torch::optimizer()] object. See [shapr::vaeac_get_optimizer()].
+#' @param train_dataloader A [torch::dataloader()] containing the training data for the `vaeac` model.
+#' @param val_dataloader A [torch::dataloader()] containing the validation data for the `vaeac` model.
+#' @param train_vlb A [torch::torch_tensor()] (default is `NULL`)
+#' of one dimension containing previous values for the training VLB.
+#' @param val_iwae A [torch::torch_tensor()] (default is `NULL`)
+#' of one dimension containing previous values for the validation IWAE.
+#' @param val_iwae_running A [torch::torch_tensor()] (default is `NULL`)
+#' of one dimension containing previous values for the running validation IWAE.
+#' @param progressr_bar A [progressr::progressor()] object (default is `NULL`) to keep track of progress.
+#' @param epochs_start Positive integer (default is `1`). At which epoch the training is starting at.
+#' @param vaeac_save_file_names Array of strings containing the save file names for the `vaeac` model.
+#' @param state_list Named list containing the objects returned from [shapr::vaeac_get_full_state_list()].
+#' @param initialization_idx Positive integer (default is `NULL`). The index
+#' of the current `vaeac` model in the initialization phase.
+#'
+#' @return Depending on if we are in the initialization phase or not. Then either the trained `vaeac` model, or
+#' a list of where the `vaeac` models are stored on disk and the parameters of the model.
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_train_model_auxiliary <- function(vaeac_model,
+                                        optimizer,
+                                        train_dataloader,
+                                        val_dataloader,
+                                        val_iwae_n_samples,
+                                        running_avg_n_values,
+                                        verbose,
+                                        cuda,
+                                        epochs,
+                                        save_every_nth_epoch,
+                                        epochs_early_stopping,
+                                        epochs_start = 1,
+                                        progressr_bar = NULL,
+                                        vaeac_save_file_names = NULL,
+                                        state_list = NULL,
+                                        initialization_idx = NULL,
+                                        n_vaeacs_initialize = NULL,
+                                        train_vlb = NULL,
+                                        val_iwae = NULL,
+                                        val_iwae_running = NULL) {
+  # Check for valid input
+  if (xor(is.null(initialization_idx), is.null(n_vaeacs_initialize))) {
+    stop("Either none or both of `initialization_idx` and `n_vaeacs_initialize` must be given.")
+  }
+
+  if (is.null(state_list) && is.null(initialization_idx)) {
+    stop("`state_list` must be provide when `initialization_idx = NULL` to properly save the `vaeac` model.")
+  }
+
+  if (is.null(vaeac_save_file_names) && is.null(initialization_idx)) {
+    stop(paste0(
+      "`vaeac_save_file_names` must be provide when `initialization_idx = NULL` ",
+      "to know where to save the vaeac model."
+    ))
+  }
+
+  if (!((is.null(train_vlb) && is.null(val_iwae) && is.null(val_iwae_running)) ||
+    (!is.null(train_vlb) && !is.null(val_iwae) && !is.null(val_iwae_running)))) {
+    stop("Either none or all of `train_vlb`, `val_iwae`, and `val_iwae_running` must be given.")
+  }
+
+  # Variable that we change to `TRUE` if early stopping is applied
+  if (!is.null(state_list)) state_list$early_stopping_applied <- FALSE
+
+  # Variables to stores the epochs of the `vaeac` at the best epoch according to IWAE and IWAE_running
+  if (is.null(initialization_idx)) best_epoch <- best_epoch_running <- NULL
+
+  # Get the batch size
+  batch_size <- train_dataloader$batch_size
+
+  # Extract the mask generator and the variational lower bound scale factor from the vaeac model object.
+  mask_generator <- vaeac_model$mask_generator
+  vlb_scale_factor <- vaeac_model$vlb_scale_factor
+
+  # Start the training loop
+  epoch <- 1
+  for (epoch in seq(from = epochs_start, to = epochs)) {
+    # Set average variational lower bound to 0 for this epoch
+    avg_vlb <- 0
+
+    # Index to keep track of which batch we are working on.
+    batch_index <- 1
+
+    # batch <- train_dataloader$.iter()$.next()
+
+    # Iterate over the training data
+    coro::loop(for (batch in train_dataloader) {
+      # If batch size is less than batch_size, extend it with objects from the beginning of the dataset
+      if (batch$shape[1] < batch_size) {
+        batch <- vaeac_extend_batch(batch = batch, dataloader = train_dataloader, batch_size = batch_size)
+      }
+
+      # Generate mask and do an optimizer step over the mask and the batch
+      mask <- mask_generator(batch)
+
+      # TODO: Send the batch and mask to Nvida GPU if we have. IS it here it should be?
+      if (cuda) {
+        batch <- batch$cuda()
+        mask <- mask$cuda()
+      }
+
+      # Set all previous gradients to zero.
+      optimizer$zero_grad()
+
+      # Compute the variational lower bound for the batch given the mask
+      vlb <- vaeac_model$batch_vlb(batch, mask)$mean()
+
+      # Backpropagation: minimize the negative vlb.
+      vlb_loss <- (-vlb / vlb_scale_factor)
+      vlb_loss$backward()
+
+      # Update the vaeac_model parameters by using the optimizer
+      optimizer$step()
+
+      # Update running variational lower bound average using the recursive average formula/update.
+      # a + (new - a)/(i+1) = {(i+1)a + new - a}/(i+1) = { a(i) + new}/(i+1) = a *i/(i+1) + new/(i+1)
+      avg_vlb <- avg_vlb + (vlb$to(dtype = torch::torch_float())$clone()$detach() - avg_vlb) / batch_index
+
+      # Update the batch index.
+      batch_index <- batch_index + 1
+    }) # Done with one new epoch
+
+    ## Time to evaluate the vaeac_model on the validation data, potentially save it, and check for early stopping.
+
+    # Store the VLB
+    train_vlb <- torch::torch_cat(c(train_vlb, avg_vlb), -1)
+
+    # Compute the validation IWAE
+    val_iwae_now <- vaeac_get_val_iwae(
+      val_dataloader = val_dataloader,
+      mask_generator = mask_generator,
+      batch_size = batch_size,
+      vaeac_model = vaeac_model,
+      val_iwae_n_samples = val_iwae_n_samples
+    )
+    val_iwae <- torch::torch_cat(c(val_iwae, val_iwae_now), -1)
+
+    # Compute the running validation IWAE
+    val_iwae_running_now <-
+      val_iwae[
+        (-min(length(val_iwae), running_avg_n_values) +
+          length(val_iwae) + 1):(-1 + length(val_iwae) + 1),
+        drop = FALSE
+      ]$mean()$view(1)
+    val_iwae_running <- torch::torch_cat(c(val_iwae_running, val_iwae_running_now), -1)
+
+    # Check if we are to save the models
+    if (is.null(initialization_idx)) {
+      # Save if current vaeac model has the lowest validation IWAE error
+      if ((max(val_iwae) <= val_iwae_now)$item() || is.null(best_epoch)) {
+        best_epoch <- epoch
+        if (verbose == 2) message("Saving `best` vaeac model at epoch ", epoch, ".")
+        vaeac_save_state(state_list = state_list, file_name = vaeac_save_file_names[1])
+      }
+
+      # Save if current vaeac model has the lowest running validation IWAE error
+      if ((max(val_iwae_running) <= val_iwae_running_now)$item() || is.null(best_epoch_running)) {
+        best_epoch_running <- epoch
+        if (verbose == 2) message("Saving `best_running` vaeac model at epoch ", epoch, ".")
+        vaeac_save_state(state_list = state_list, file_name = vaeac_save_file_names[2])
+      }
+
+      # Save if we are in an n'th epoch and are to save every n'th epoch
+      if (is.numeric(save_every_nth_epoch) && epoch %% save_every_nth_epoch == 0) {
+        if (verbose == 2) message("Saving `nth_epoch` vaeac model at epoch ", epoch, ".")
+        vaeac_save_state(state_list = state_list, file_name = vaeac_save_file_names[3 + epoch %/% save_every_nth_epoch])
+      }
+    }
+
+    # Handle the message to the progress bar based on if we are doing initialization or final training
+    if (!is.null(progressr_bar)) {
+      update_message <- if (!is.null(initialization_idx)) {
+        paste0(
+          "Training vaeac (init. ", initialization_idx, " of ", n_vaeacs_initialize, "): Epoch: ", epoch,
+          " | VLB: ", vaeac_get_n_decimals(avg_vlb$item()), " | IWAE: ", vaeac_get_n_decimals(val_iwae_now$item()), " |"
+        )
+      } else {
+        paste0(
+          "Training vaeac (final model): Epoch: ", epoch, " | best epoch: ", best_epoch,
+          " | VLB: ", vaeac_get_n_decimals(avg_vlb$item()), " | IWAE: ", vaeac_get_n_decimals(val_iwae_now$item()), " |"
+        )
+      }
+      progressr_bar(message = update_message)
+    }
+
+    # Check if we are to apply early stopping, i.e., no improvement in the IWAE for `epochs_early_stopping` epochs.
+    if (is.numeric(epochs_early_stopping)) {
+      if (epoch - best_epoch >= epochs_early_stopping) {
+        if (verbose == 2) {
+          message(paste0(
+            "No IWAE improvment in ", epochs_early_stopping, " epochs. Apply early stopping at epoch ",
+            epoch, "."
+          ))
+        }
+        if (!is.null(progressr_bar)) progressr_bar("Training vaeac (early stopping)", amount = epochs - epoch)
+        state_list$early_stopping_applied <- TRUE # Add that we did early stopping to the state list
+        state_list$epochs <- epoch # Update the number of used epochs.
+        break # Stop the training loop
+      }
+    }
+  } # Done with all epochs in training phase
+
+  # Find out what to return
+  if (!is.null(initialization_idx)) {
+    # Here we return the models and the optimizer which we will train further if this was the best initialization
+    return_list <- list(
+      vaeac_model = vaeac_model,
+      optimizer = optimizer,
+      train_vlb = train_vlb,
+      val_iwae = val_iwae,
+      val_iwae_running = val_iwae_running,
+      avg_vlb = avg_vlb,
+      initialization_idx = initialization_idx,
+      state_list = state_list
+    )
+  } else {
+    # Save the vaeac model at the last epoch
+    if (verbose == 2) message("Saving `last` vaeac model at epoch ", epoch, ".")
+    last_state <- vaeac_save_state(state_list = state_list, file_name = vaeac_save_file_names[3], return_state = TRUE)
+
+    # Summary printout
+    if (verbose == 2) vaeac_print_train_summary(best_epoch, best_epoch_running, last_state)
+
+    # Create a return list
+    return_list <- list(
+      best = vaeac_save_file_names[1],
+      best_running = vaeac_save_file_names[2],
+      last = vaeac_save_file_names[3],
+      train_vlb = as.array(train_vlb),
+      val_iwae = as.array(val_iwae),
+      val_iwae_running = as.array(val_iwae_running),
+      parameters = last_state
+    )
+
+    # Add the potentially additional save names
+    if (!is.null(vaeac_save_file_names) && length(vaeac_save_file_names) > 3) {
+      return_list <- append(
+        return_list,
+        setNames(
+          as.list(vaeac_save_file_names[-(1:3)]),
+          paste0("epoch_", save_every_nth_epoch * seq(length(vaeac_save_file_names) - 3))
+        ),
+        3
+      )
+    }
+
+    # Update the class of the returned object
+    attr(return_list, "class") <- c("vaeac", class(return_list))
+  }
+  return(return_list)
+}
+
+#' Function to get string of values with specific number of decimals
+#'
+#' @param value The number to get `n_decimals` for.
+#' @param n_decimals Positive integer. The number of decimals. Default is three.
+#'
+#' @return String of `value` with `n_decimals` decimals.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_get_n_decimals <- function(value, n_decimals = 3) {
+  trimws(format(round(value, n_decimals), nsmall = n_decimals))
+}
+
+# Save functions =======================================================================================================
+#' Function that saves the state list and the current save state of the `vaeac` model
+#'
+#' @param state_list List containing all the parameters in the state.
+#' @param file_name String containing the file path.
+#' @param return_state Logical if we are to return the state list or not.
+#'
+#' @return This function does not return anything
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_save_state <- function(state_list, file_name, return_state = FALSE) {
+  state <- modifyList(state_list, vaeac_get_current_save_state(parent.frame()), keep.null = TRUE)
+  class(state) <- c(class(state), "vaeac")
+  torch::torch_save(state, file_name)
+  if (return_state) {
+    return(state)
+  }
+}
+
+# Print functions ======================================================================================================
+#' Function to printout a training summary for the `vaeac` model
+#'
+#' @param best_epoch Positive integer. The epoch with the lowest validation error.
+#' @param best_epoch_running Positive integer. The epoch with the lowest running validation error.
+#' @param last_epoch The state list (i.e., the saved `vaeac` object)
+#' of `vaeac` model at the epoch with the lowest IWAE.
+#'
+#' @return This function only prints out a message.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_print_train_summary <- function(best_epoch, best_epoch_running, last_state) {
+  message(sprintf(
+    "\nResults of the `vaeac` training process:
+Best epoch:             %d. \tVLB = %.3f \tIWAE = %.3f \tIWAE_running = %.3f
+Best running avg epoch: %d. \tVLB = %.3f \tIWAE = %.3f \tIWAE_running = %.3f
+Last epoch:             %d. \tVLB = %.3f \tIWAE = %.3f \tIWAE_running = %.3f\n",
+    best_epoch,
+    last_state$train_vlb[best_epoch],
+    last_state$val_iwae[best_epoch],
+    last_state$val_iwae_running[best_epoch],
+    best_epoch_running,
+    last_state$train_vlb[best_epoch_running],
+    last_state$val_iwae[best_epoch_running],
+    last_state$val_iwae_running[best_epoch_running],
+    last_state$epoch,
+    last_state$train_vlb[-1],
+    last_state$val_iwae[-1],
+    last_state$val_iwae_running[-1]
+  ))
+}
+
+# Update functions =====================================================================================================
+#' Move `vaeac` parameters to correct location
+#'
+#' @description
+#' This function ensures that the main and extra parameters for the `vaeac`
+#' approach is located at their right locations.
+#'
+#' @param parameters List. The `internal$parameters` list created inside the [shapr::explain()] function.
+#'
+#' @return Updated version of `parameters` where all `vaeac` parameters are located at the correct location.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_update_para_locations <- function(parameters) {
+  # Get the name of the main parameters for the `vaeac` approach
+  vaeac.main_para_default_names <- methods::formalArgs(setup_approach.vaeac)
+  vaeac.main_para_default_names <-
+    vaeac.main_para_default_names[!vaeac.main_para_default_names %in% c("internal", "vaeac.extra_parameters", "...")]
+
+  # Get the default values for vaeac's main parameters defined above into a named list
+  vaeac.main_para_default <- as.list(formals(sys.function(sys.parent())))
+  vaeac.main_para_default <- vaeac.main_para_default[vaeac.main_para_default %in% vaeac.main_para_default_names]
+
+  # Get the names of the vaeac's main parameters provided by the user
+  vaeac.main_para_user_names <- names(parameters)
+  vaeac.main_para_user_names <- vaeac.main_para_user_names[grepl("vaeac.", vaeac.main_para_user_names)]
+  vaeac.main_para_user_names <- vaeac.main_para_user_names[!vaeac.main_para_user_names %in% "vaeac.extra_parameters"]
+
+  # Get the default values for vaeac's extra parameters into a named list
+  vaeac.extra_para_default <- vaeac_get_extra_para_default()
+  vaeac.extra_para_default_names <- names(vaeac.extra_para_default)
+
+  # Get the names of the extra parameters provided by the user
+  vaeac.extra_para_user_names <- names(parameters$vaeac.extra_parameters)
+
+  # Get the names of all parameters and the user specified parameters
+  vaeav.all_para_default_names <- c(vaeac.main_para_default_names, vaeac.extra_para_default_names)
+
+  # Check if any of the main parameters with the "vaeac." prefix is unknown (i.e., not main or extra parameter)
+  not_extra_para_in_main_para <-
+    vaeac.main_para_user_names[!vaeac.main_para_user_names %in% vaeav.all_para_default_names]
+  if (length(not_extra_para_in_main_para) > 0) {
+    # Give a message to the user about the unknown extra parameters
+    warning(paste0(
+      "The following vaeac main parameters are not recognized (`shapr` removes them): ",
+      paste(strsplit(paste(paste0("`", not_extra_para_in_main_para, "`"), collapse = ", "),
+        ",(?=[^,]+$)",
+        perl = TRUE
+      )[[1]], collapse = " and"), ".\n"
+    ))
+
+    # Delete the unknown extra parameters
+    parameters[not_extra_para_in_main_para] <- NULL
+  }
+
+  # Check if any of the extra parameters with the "vaeac." prefix is unknown (i.e., not main or extra parameter)
+  not_main_para_in_extra_para <-
+    vaeac.extra_para_user_names[!vaeac.extra_para_user_names %in% vaeav.all_para_default_names]
+  if (length(not_main_para_in_extra_para) > 0) {
+    # Give a message to the user about the unknown extra parameters
+    warning(paste0(
+      "The following vaeac extra parameters are not recognized (`shapr` removes them): ",
+      paste(strsplit(paste(paste0("`", not_main_para_in_extra_para, "`"), collapse = ", "),
+        ",(?=[^,]+$)",
+        perl = TRUE
+      )[[1]], collapse = " and"), ".\n"
+    ))
+
+    # Delete the unknown extra parameters
+    parameters$vaeac.extra_parameters[not_main_para_in_extra_para] <- NULL
+  }
+
+  # Check for parameters that have been provided as both main and extra parameter
+  both_main_and_extra_para <- vaeac.extra_para_user_names[vaeac.extra_para_user_names %in% vaeac.main_para_user_names]
+  if (length(both_main_and_extra_para > 0)) {
+    # Print a message to the user and tell them that we use those in `vaeac.extra_parameters`.
+    warning(paste0(
+      "The following vaeac parameters were given as both main and extra parameters (`shapr` uses the ",
+      "values at the correct location ): ",
+      paste(strsplit(paste(paste0("`", both_main_and_extra_para, "`"), collapse = ", "),
+        ",(?=[^,]+$)",
+        perl = TRUE
+      )[[1]], collapse = " and"), ".\n"
+    ))
+
+    # Note that we do not move it here as the moving will be fixed in the next two if-clauses
+  }
+
+  # Check if any any extra parameters have been given as main parameters
+  extra_para_in_main_para <- vaeac.main_para_user_names[vaeac.main_para_user_names %in% vaeac.extra_para_default_names]
+  if (length(extra_para_in_main_para) > 0) {
+    warning(paste0(
+      "The following vaeac parameters were given as main parameters but should have been extra ",
+      "parameters (`shapr` fixes this): ",
+      paste(strsplit(paste(paste0("`", extra_para_in_main_para, "`"), collapse = ", "),
+        ",(?=[^,]+$)",
+        perl = TRUE
+      )[[1]], collapse = " and"), ".\n"
+    ))
+
+    # Move extra parameter from the main parameters to extra_parameters list if they have NOT been specified already
+    parameters$vaeac.extra_parameters[extra_para_in_main_para[!extra_para_in_main_para %in%
+      vaeac.extra_para_user_names]] <-
+      parameters[extra_para_in_main_para[!extra_para_in_main_para %in% vaeac.extra_para_user_names]]
+
+    # Remove the extra parameter from the main parameters
+    parameters[extra_para_in_main_para] <- NULL
+  }
+
+  # Check if any any main parameters have been given as extra parameters
+  main_para_in_extra_para <- vaeac.extra_para_user_names[vaeac.extra_para_user_names %in% vaeac.main_para_default_names]
+  if (length(main_para_in_extra_para) > 0) {
+    # Give a message to the user about the misplaced main parameters in the extra list
+    warning(paste0(
+      "The following vaeac parameters were given as extra parameters but should have been main ",
+      "parameters (`shapr` fixes this): ",
+      paste(strsplit(paste(paste0("`", main_para_in_extra_para, "`"), collapse = ", "),
+        ",(?=[^,]+$)",
+        perl = TRUE
+      )[[1]], collapse = " and"), ".\n"
+    ))
+
+    # Move main parameters from the extra_parameters list to main parameters if they have NOT been specified already
+    parameters[main_para_in_extra_para[!main_para_in_extra_para %in% vaeac.main_para_user_names]] <-
+      parameters$vaeac.extra_parameters[main_para_in_extra_para[!main_para_in_extra_para
+      %in% vaeac.main_para_user_names]]
+
+    # Remove the main parameter from the extra list
+    parameters$vaeac.extra_parameters[main_para_in_extra_para] <- NULL
+  }
+
+  # Return the fixed parameters list
+  return(parameters)
+}
+
+#' Function that checks and adds a pre-trained `vaeac` model
+#'
+#' @param parameters List containing the parameters used within [shapr::explain()].
+#'
+#' @return This function adds a valid pre-trained vaeac model to the `parameter`.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_update_pretrained_model <- function(parameters) {
+  # Extract the provided pre-trained vaeac model
+  vaeac_object <- parameters$vaeac.extra_parameters$vaeac.pretrained_vaeac_model
+
+  # Check that it is either a list or string
+  if (!(is.list(vaeac_object) || is.character(vaeac_object))) {
+    stop("The `vaeac.pretrained_vaeac_model` parameter must be either a list or a string. Read the documentation.")
+  }
+
+  # Check if we are given a list
+  if (is.list(vaeac_object)) {
+    # Check for list of type vaeac
+    if (!("vaeac" %in% class(vaeac_object))) stop("The `vaeac.pretrained_vaeac_model` list is not of type `vaeac`.")
+    vaeac_check_x_train_names(
+      feature_names_vaeac = vaeac_object$parameters$feature_list$labels,
+      feature_names_new = parameters$feature_names
+    )
+
+    # Add the pre-trained valid vaeac model to the parameters list
+    parameters$vaeac <- parameters$vaeac.extra_parameters$vaeac.pretrained_vaeac_model
+
+    # Remove the pre-trained vaeac model as it has been approved as a vaeac model
+    parameters$vaeac.extra_parameters$vaeac.pretrained_vaeac_model <- NULL
+  }
+
+
+  # Check if we are given a string
+  if (is.character(vaeac_object)) {
+    # Check that the file exists
+    if (!file.exists(vaeac_object)) {
+      stop(paste0("The `vaeac.pretrained_vaeac_model` file ('", vaeac_object, "') does not exist."))
+    }
+
+    # Read in the vaeac model from the disk
+    vaeac_model <- torch::torch_load(vaeac_object)
+
+    # Some very small check that we have read in a vaeac model
+    if (is.null(vaeac_model$model_state_dict)) {
+      stop("The provided file is not a vaeac model as it is missing, e.g., the `model_state_dict` entry.")
+    }
+    if (is.null(vaeac_model$optimizer_state_dict)) {
+      stop("The provided file is not a vaeac model as it is missing, e.g., the `optimizer_state_dict` entry.")
+    }
+
+    # Check that the provided vaeac model is trained on a dataset with the same feature names
+    vaeac_check_x_train_names(
+      feature_names_vaeac = vaeac_model$feature_list$labels,
+      feature_names_new = parameters$feature_names
+    )
+
+    # Extract the training/validation results
+    evaluation_criterions <- c("train_vlb", "val_iwae", "val_iwae_running")
+    vaeac_model_results <- lapply(vaeac_model[evaluation_criterions], as.array)
+
+    # Save path to the vaeac approach to use to generate the MC samples.
+    parameters$vaeac <- list(
+      models = list(best = vaeac_object),
+      results = vaeac_model_results,
+      parameters = vaeac_model[!names(vaeac_model) %in% evaluation_criterions]
+    )
+
+    # Add `vaeac` as a class to the object. We use this to validate the input when
+    # `vaeac.pretrained_vaeac_model` is given to the `shapr::explain()` function.
+    class(parameters$vaeac) <- c(class(parameters$vaeac), "vaeac")
+  }
+
+  # Return the updated parameters list
+  return(parameters)
+}
+
+
+# Plot functions =======================================================================================================
+#' Plot the training VLB and validation IWAE for `vaeac` models
+#'
+#' @description
+#' This function makes ([ggplot2::ggplot()]) figures of the training VLB and the validation IWAE for a list
+#' of [shapr::explain()] objects with `approach = "vaeac"`. See [setup_approach()] for more information about the
+#' `vaeac` approach. Two figures are returned by the function. In the figure, each object in `explanation_list` gets
+#' its own facet, while in the second figure, we plot the criteria in each facet for all objects.
+#'
+#' @details
+#' See \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)} or the
+#' \href{https://borea17.github.io/paper_summaries/iwae/}{blog post} for a summary of the VLB and IWAE.
+#'
+#' @param explanation_list A list of [explain()] objects applied to the same data, model, and
+#' `vaeac` must be the used approach. If the entries in the list is named, then the function use
+#' these names. Otherwise, it defaults to the approach names (with integer suffix for duplicates)
+#' for the explanation objects in `explanation_list`.
+#' @param plot_from_nth_epoch Integer. If we are only plot the results form the nth epoch and so forth.
+#' The first epochs can be large in absolute value and make the rest of the plot difficult to interpret.
+#' @param plot_every_nth_epoch Integer. If we are only to plot every nth epoch. Usefully to illustrate
+#' the overall trend, as there can be a lot of fluctuation and oscillation in the values between each epoch.
+#' @param facet_wrap_scales String. Should the scales be fixed ("`fixed`", the default),
+#' free ("`free`"), or free in one dimension ("`free_x`", "`free_y`").
+#' @param facet_wrap_ncol Integer. Number of columns in the facet wrap.
+#' @param criteria Character vector. The possible options are "VLB", "IWAE", "IWAE_running". Default is the first two.
+#' @param plot_type Character vector. The possible options are "method" and "criterion". Default is to plot both.
+#'
+#' @return Either a single [ggplot2::ggplot()] object or a list of [ggplot2::ggplot()] objects based on the
+#' `plot_type` parameter.
+#'
+#' @examples
+#' \dontrun{
+#' library(xgboost)
+#' library(data.table)
+#' library(shapr)
+#'
+#' data("airquality")
+#' data <- data.table::as.data.table(airquality)
+#' data <- data[complete.cases(data), ]
+#'
+#' x_var <- c("Solar.R", "Wind", "Temp", "Month")
+#' y_var <- "Ozone"
+#'
+#' ind_x_explain <- 1:6
+#' x_train <- data[-ind_x_explain, ..x_var]
+#' y_train <- data[-ind_x_explain, get(y_var)]
+#' x_explain <- data[ind_x_explain, ..x_var]
+#'
+#' # Fitting a basic xgboost model to the training data
+#' model <- xgboost(data = as.matrix(x_train), label = y_train, nround = 100, verbose = FALSE)
+#'
+#' # Specifying the phi_0, i.e. the expected prediction without any features
+#' p0 <- mean(y_train)
+#'
+#' # Train vaeac with and without paired sampling
+#' explanation_paired <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = approach,
+#'   prediction_zero = p0,
+#'   n_samples = 1, # As we are only interested in the training of the vaeac
+#'   vaeac.epochs = 10, # Should be higher in applications.
+#'   vaeac.n_vaeacs_initialize = 1,
+#'   vaeac.width = 16,
+#'   vaeac.depth = 2,
+#'   vaeac.extra_parameters = list(vaeac.paired_sampling = TRUE)
+#' )
+#'
+#' explanation_regular <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = approach,
+#'   prediction_zero = p0,
+#'   n_samples = 1, # As we are only interested in the training of the vaeac
+#'   vaeac.epochs = 10, # Should be higher in applications.
+#'   vaeac.width = 16,
+#'   vaeac.depth = 2,
+#'   vaeac.n_vaeacs_initialize = 1,
+#'   vaeac.extra_parameters = list(vaeac.paired_sampling = FALSE)
+#' )
+#'
+#' # Collect the explanation objects in an named list
+#' explanation_list <- list(
+#'   "Regular sampling" = explanation_regular,
+#'   "Paired sampling" = explanation_paired
+#' )
+#'
+#' # Call the function with the named list, will use the provided names
+#' vaeac_plot_evaluation_criteria(explanation_list = explanation_list)
+#'
+#' # The function also works if we have only one method,
+#' # but then one should only look at the method plot.
+#' vaeac_plot_evaluation_criteria(
+#'   explanation_list = explanation_list[2],
+#'   plot_type = "method"
+#' )
+#'
+#' # Can alter the plot
+#' vaeac_plot_evaluation_criteria(
+#'   explanation_list = explanation_list,
+#'   plot_from_nth_epoch = 2,
+#'   plot_every_nth_epoch = 2,
+#'   facet_wrap_scales = "free"
+#' )
+#'
+#' # If we only want the VLB
+#' vaeac_plot_evaluation_criteria(
+#'   explanation_list = explanation_list,
+#'   criteria = "VLB",
+#'   plot_type = "criterion"
+#' )
+#'
+#' # If we want only want the criterion version
+#' tmp_fig_criterion <-
+#'   vaeac_plot_evaluation_criteria(explanation_list = explanation_list, plot_type = "criterion")
+#'
+#' # Since tmp_fig_criterion is a ggplot2 object, we can alter it
+#' # by, e.g,. adding points or smooths with se bands
+#' tmp_fig_criterion + ggplot2::geom_point(shape = "circle", size = 1, ggplot2::aes(col = Method))
+#' tmp_fig_criterion$layers[[1]] <- NULL
+#' tmp_fig_criterion + ggplot2::geom_smooth(method = "loess", formula = y ~ x, se = TRUE) +
+#'   ggplot2::scale_color_brewer(palette = "Set1") +
+#'   ggplot2::theme_minimal()
+#' }
+#'
+#' @author Lars Henry Berge Olsen
+#' @export
+vaeac_plot_evaluation_criteria <- function(explanation_list,
+                                           plot_from_nth_epoch = 1,
+                                           plot_every_nth_epoch = 1,
+                                           criteria = c("VLB", "IWAE"),
+                                           plot_type = c("method", "criterion"),
+                                           facet_wrap_scales = "fixed",
+                                           facet_wrap_ncol = NULL) {
+  ## Checks
+  # Check that ggplot2 is installed
+  if (!requireNamespace("ggplot2", quietly = TRUE)) {
+    stop("ggplot2 is not installed. Please run install.packages('ggplot2')")
+  }
+
+  # Check for valid criteria argument
+  unknown_criteria <- criteria[!(criteria %in% c("VLB", "IWAE", "IWAE_running"))]
+  if (length(unknown_criteria) > 0) {
+    stop(paste0(
+      "The `criteria` must be one (or several) of 'VLB', 'IWAE', and 'IWAE_running'. ",
+      "Do not recognise: '", paste(unknown_plot_type, collapse = "', '"), "'."
+    ))
+  }
+
+  # Check for valid plot type argument
+  unknown_plot_type <- plot_type[!(plot_type %in% c("method", "criterion"))]
+  if (length(unknown_plot_type) > 0) {
+    stop(paste0(
+      "The `plot_type` must be one (or several) of 'method' and 'criterion'. ",
+      "Do not recognise: '", paste(unknown_plot_type, collapse = "', '"), "'."
+    ))
+  }
+
+  # Ensure that even a single explanation object is in a list
+  if ("shapr" %in% class(explanation_list)) explanation_list <- list(explanation_list)
+
+  ## Create data.tables
+  # Extract the VLB and IWAE
+  vaeac_VLB_IWAE_dt <- vaeac_get_evaluation_criteria(explanation_list)
+
+  # Get the relevant criteria
+  keep_these_columns <- c("Method", "Epoch", criteria)
+  vaeac_VLB_IWAE_dt <- vaeac_VLB_IWAE_dt[, keep_these_columns, with = FALSE]
+
+  # Check for valid `plot_from_nth_epoch`
+  max_epoch <- max(vaeac_VLB_IWAE_dt$Epoch)
+  if (plot_from_nth_epoch > max_epoch) {
+    stop(sprintf(
+      "`plot_from_nth_epoch` (%d) is larger than the number of epochs (%d)",
+      plot_from_nth_epoch, max_epoch
+    ))
+  }
+
+  # Remove entries with too low epoch
+  vaeac_VLB_IWAE_dt <- vaeac_VLB_IWAE_dt[Epoch >= plot_from_nth_epoch, ]
+
+  # If we are only to plot every nth epoch
+  vaeac_VLB_IWAE_dt <- vaeac_VLB_IWAE_dt[Epoch %% plot_every_nth_epoch == 0]
+
+  # Convert it from wide to long
+  vaeac_VLB_IWAE_dt_long <- data.table::melt(
+    data = vaeac_VLB_IWAE_dt,
+    id.vars = c("Method", "Epoch"),
+    variable.name = "Criterion",
+    variable.factor = TRUE,
+    value.name = "Value"
+  )
+
+  ## Plot
+  return_object <- list()
+
+  # Make the figure where each explanation object has its own facet
+  if ("method" %in% plot_type) {
+    return_object$figure_each_method <-
+      ggplot2::ggplot(vaeac_VLB_IWAE_dt_long, ggplot2::aes(x = Epoch, y = Value, col = Criterion)) +
+      ggplot2::labs(title = "The evaluation criterions for different vaeac models") +
+      ggplot2::geom_line(ggplot2::aes(group = Criterion, col = Criterion)) +
+      ggplot2::facet_wrap(ggplot2::vars(Method), ncol = facet_wrap_ncol, scales = facet_wrap_scales)
+  }
+
+  # Make the figure where each criterion has its own facet
+  if ("criterion" %in% plot_type) {
+    return_object$figure_each_criterion <-
+      ggplot2::ggplot(vaeac_VLB_IWAE_dt_long, ggplot2::aes(x = Epoch, y = Value, col = Method)) +
+      ggplot2::labs(title = "The evaluation criterions for different vaeac models") +
+      ggplot2::geom_line(ggplot2::aes(group = Method, col = Method)) +
+      ggplot2::facet_wrap(ggplot2::vars(Criterion), ncol = facet_wrap_ncol, scales = facet_wrap_scales)
+  }
+
+  # If only made one figure, then we directly return that object and not a list
+  if (length(return_object) == 1) return_object <- return_object[[1]]
+
+  return(return_object)
+}
+
+#' Plot Pairwise Plots for Imputed and True Data
+#'
+#' @description A function that creates a matrix of plots ([GGally::ggpairs()]) from
+#' generated imputations from the unconditioned distribution \eqn{p(\boldsymbol{x})} estimated by
+#' a `vaeac` model, and then compares the imputed values with data from the true distribution (if provided).
+#' See \href{https://www.blopig.com/blog/2019/06/a-brief-introduction-to-ggpairs/}{ggpairs} for an
+#' introduction to [GGally::ggpairs()], and the corresponding
+#' \href{https://ggobi.github.io/ggally/articles/ggally_plots.html}{vignette}.
+#'
+#' @param explanation Shapr list. The output list from the [shapr::explain()] function.
+#' @param which_vaeac_model String. Indicating which `vaeac` model to use when generating the samples.
+#' Possible options are always `'best'`, `'best_running'`, and `'last'`. All possible options can be obtained
+#' by calling `names(explanation$internal$parameters$vaeac$models)`.
+#' @param x_true Data.table containing the data from the distribution that the `vaeac` model is fitted to.
+#' @param upper_cont String. Type of plot to use in upper triangle for continuous features, see [GGally::ggpairs()].
+#' Possible options are: `'cor'` (default), `'points'`, `'smooth'`, `'smooth_loess'`, `'density'`, and `'blank'`.
+#' @param upper_cat String. Type of plot to use in upper triangle for categorical features, see [GGally::ggpairs()].
+#' Possible options are: `'count'` (default), `'cross'`, `'ratio'`, `'facetbar'`, and `'blank'`.
+#' @param upper_mix String. Type of plot to use in upper triangle for mixed features, see [GGally::ggpairs()].
+#' Possible options are: `'box'` (default), `'box_no_facet'`, `'dot'`, `'dot_no_facet'`, `'facethist'`,
+#'  `'facetdensity'`, `'denstrip'`, and `'blank'`
+#' @param lower_cont String. Type of plot to use in lower triangle for continuous features, see [GGally::ggpairs()].
+#' Possible options are: `'points'` (default), `'smooth'`, `'smooth_loess'`, `'density'`, `'cor'`, and `'blank'`.
+#' @param lower_cat String. Type of plot to use in lower triangle for categorical features, see [GGally::ggpairs()].
+#' Possible options are: `'facetbar'` (default), `'ratio'`, `'count'`, `'cross'`, and `'blank'`.
+#' @param lower_mix String. Type of plot to use in lower triangle for mixed features, see [GGally::ggpairs()].
+#' Possible options are: `'facetdensity'` (default), `'box'`, `'box_no_facet'`, `'dot'`, `'dot_no_facet'`,
+#'  `'facethist'`, `'denstrip'`, and `'blank'`.
+#' @param diag_cont String. Type of plot to use on the diagonal for continuous features, see [GGally::ggpairs()].
+#' Possible options are: `'densityDiag'` (default), `'barDiag'`, and `'blankDiag'`.
+#' @param diag_cat String. Type of plot to use on the diagonal for categorical features, see [GGally::ggpairs()].
+#' Possible options are: `'barDiag'` (default) and `'blankDiag'`.
+#' @param cor_method String. Type of correlation measure, see [GGally::ggpairs()].
+#' Possible options are: `'pearson'` (default), `'kendall'`, and `'spearman'`.
+#' @param add_title Logical. If `TRUE`, then a title is added to the plot based on the internal description
+#' of the `vaeac` model specified in `which_vaeac_model`.
+#' @param alpha Numeric between `0` and `1` (default is `0.5`). The degree of color transparency.
+#'
+#' @return A [GGally::ggpairs()] figure.
+#' @export
+#' @author Lars Henry Berge Olsen
+#'
+#' @examples
+#' \dontrun{
+#' library(xgboost)
+#' library(data.table)
+#' library(shapr)
+#'
+#' data("airquality")
+#' data <- data.table::as.data.table(airquality)
+#' data <- data[complete.cases(data), ]
+#'
+#' x_var <- c("Solar.R", "Wind", "Temp", "Month")
+#' y_var <- "Ozone"
+#'
+#' ind_x_explain <- 1:6
+#' x_train <- data[-ind_x_explain, ..x_var]
+#' y_train <- data[-ind_x_explain, get(y_var)]
+#' x_explain <- data[ind_x_explain, ..x_var]
+#'
+#' # Fitting a basic xgboost model to the training data
+#' model <- xgboost(
+#'   data = as.matrix(x_train),
+#'   label = y_train,
+#'   nround = 100,
+#'   verbose = FALSE
+#' )
+#'
+#' explanation <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = "vaeac",
+#'   prediction_zero = mean(y_train),
+#'   n_samples = 1,
+#'   vaeac.epochs = 10,
+#'   vaeac.n_vaeacs_initialize = 1
+#' )
+#'
+#' # Plot the results
+#' figure <- vaeac_plot_imputed_ggpairs(
+#'   explanation = explanation,
+#'   which_vaeac_model = "best",
+#'   x_true = x_train,
+#'   add_title = TRUE
+#' )
+#' figure
+#'
+#' # Note that this is an ggplot2 object which we can alter, e.g., we can change the colors.
+#' figure +
+#'   ggplot2::scale_color_manual(values = c("#E69F00", "#999999")) +
+#'   ggplot2::scale_fill_manual(values = c("#E69F00", "#999999"))
+#' }
+vaeac_plot_imputed_ggpairs <- function(
+    explanation,
+    which_vaeac_model = "best",
+    x_true = NULL,
+    add_title = TRUE,
+    alpha = 0.5,
+    upper_cont = c("cor", "points", "smooth", "smooth_loess", "density", "blank"),
+    upper_cat = c("count", "cross", "ratio", "facetbar", "blank"),
+    upper_mix = c("box", "box_no_facet", "dot", "dot_no_facet", "facethist", "facetdensity", "denstrip", "blank"),
+    lower_cont = c("points", "smooth", "smooth_loess", "density", "cor", "blank"),
+    lower_cat = c("facetbar", "ratio", "count", "cross", "blank"),
+    lower_mix = c("facetdensity", "box", "box_no_facet", "dot", "dot_no_facet", "facethist", "denstrip", "blank"),
+    diag_cont = c("densityDiag", "barDiag", "blankDiag"),
+    diag_cat = c("barDiag", "blankDiag"),
+    cor_method = c("pearson", "kendall", "spearman")) {
+  # Check that ggplot2 and GGally are installed
+  if (!requireNamespace("ggplot2", quietly = TRUE)) {
+    stop("ggplot2 is not installed. Please run install.packages('ggplot2')")
+  }
+  if (!requireNamespace("GGally", quietly = TRUE)) {
+    stop("GGally is not installed. Please run install.packages('GGally')")
+  }
+
+  # Check all input parameters except `which_vaeac_model`
+  if (!"shapr" %in% class(explanation)) stop("`explanation` must be an object of type `shapr`.")
+  if (!is.null(x_true) && !is.data.table(x_true)) stop("`x_true` must be an object of type `data.table`.")
+  vaeac_check_logicals(list(add_title = add_title))
+  vaeac_check_probabilities(list(alpha = alpha))
+  upper_cont <- match.arg(upper_cont)
+  upper_cat <- match.arg(upper_cat)
+  upper_mix <- match.arg(upper_mix)
+  lower_cont <- match.arg(lower_cont)
+  lower_cat <- match.arg(lower_cat)
+  lower_mix <- match.arg(lower_mix)
+  diag_cont <- match.arg(diag_cont)
+  diag_cat <- match.arg(diag_cat)
+  cor_method <- match.arg(cor_method)
+
+  # Check if the vaeac model is expected to give a reasonable figure.
+  if (!explanation$internal$parameters$exact || explanation$internal$parameters$is_groupwise) {
+    message(
+      "The vaeac model has not been trained on the empty colition, hence, the figure can be missleading. ",
+      "The figure is only reasonable if 'n_combintations = NULL' and 'group = NULL' in the explanation call."
+    )
+  }
+
+  # Extract the vaeac list from the explanation list
+  vaeac_list <- explanation$internal$parameters$vaeac
+
+  # Check that `which_vaeac_model` is a valid vaeac model name and then load the vaeac checkpoint
+  if (!is.character(which_vaeac_model) || !which_vaeac_model %in% names(vaeac_list$models)) {
+    stop(paste0(
+      "The parameter `which_vaeac_model` ('", which_vaeac_model, "') must be one of the following: '",
+      paste(names(vaeac_list$models), collapse = "', '"), "'."
+    ))
+  }
+  vaeac_model_path <- vaeac_list$models[[which_vaeac_model]]
+  checkpoint <- torch::torch_load(vaeac_model_path)
+
+  # Get the number of observations in the x_true and features
+  n_samples <- if (is.null(x_true)) 500 else nrow(x_true)
+  n_features <- checkpoint$n_features
+
+  # Checking for valid dimension
+  if (!is.null(x_true) && ncol(x_true) != n_features) {
+    stop(paste0(
+      "Different number of columns in the vaeac model (", n_features, ") and `x_true` (", ncol(x_true), ")."
+    ))
+  }
+
+  # Set up the vaeac model
+  vaeac_model <- vaeac_get_model_from_checkp(checkpoint = checkpoint, cuda = FALSE, mode_train = FALSE)
+
+  # Impute the missing entries using the vaeac approach. Here we generate x from p(x), so no conditioning.
+  imputed_values <- vaeac_impute_missing_entries(
+    x_explain_with_NaNs = matrix(NaN, n_samples, checkpoint$n_features),
+    n_samples = 1,
+    vaeac_model = vaeac_model,
+    checkpoint = checkpoint,
+    sampler = explanation$internal$parameters$vaeac.sampler,
+    batch_size = n_samples,
+    verbose = explanation$internal$parameters$verbose,
+    seed = explanation$internal$parameters$seed
+  )
+
+  # Combine the true (if there are any) adn imputed data and ensure that the categorical features are marked as factors.
+  combined_data <- data.table(rbind(x_true, imputed_values))
+  col_cat_names <- checkpoint$col_cat_names
+  if (length(col_cat_names) > 0) combined_data[, (col_cat_names) := lapply(.SD, as.factor), .SDcols = col_cat_names]
+
+  # Add type variable representing if they are imputed samples or from `x_true`
+  combined_data$type <-
+    factor(rep(c("True", "Imputed"), times = c(ifelse(is.null(nrow(x_true)), 0, nrow(x_true)), n_samples)))
+
+  # Create the ggpairs figure and potentially add title based on the description of the used vaeac model
+  figure <- GGally::ggpairs(
+    combined_data,
+    columns = seq(n_features),
+    mapping = ggplot2::aes(color = type),
+    diag = list(continuous = GGally::wrap(diag_cont, alpha = alpha), discrete = diag_cat),
+    upper = list(combo = upper_mix, discrete = upper_cat, continuous = GGally::wrap(upper_cont, method = cor_method)),
+    lower = list(combo = lower_mix, discrete = lower_cat, continuous = GGally::wrap(lower_cont, alpha = alpha))
+  )
+  if (add_title) figure <- figure + ggplot2::ggtitle(tools::file_path_sans_ext(basename(vaeac_model_path)))
+
+  return(figure)
+}
diff --git a/R/approach_vaeac_torch_modules.R b/R/approach_vaeac_torch_modules.R
new file mode 100644
index 000000000..3a5960d37
--- /dev/null
+++ b/R/approach_vaeac_torch_modules.R
@@ -0,0 +1,2410 @@
+# VAEAC Model =========================================================================================================
+## vaeac --------------------------------------------------------------------------------------------------------------
+#' Initializing a vaeac model
+#'
+#' @description Class that represents a vaeac model, i.e., the class creates the neural networks in the vaeac
+#' model and necessary training utilities.
+#' For more details, see \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#'
+#' @details This function builds neural networks (masked encoder, full encoder, decoder) given
+#' the list of one-hot max sizes of the features in the dataset we use to train the vaeac model,
+#' and the provided parameters for the networks. It also creates, e.g., reconstruction log probability function,
+#' methods for sampling from the decoder output, and then use these to create the vaeac model.
+#'
+#' @param one_hot_max_sizes A torch tensor of dimension p containing the one hot sizes of the `n_features` features.
+#' The sizes for the continuous features can either be `0` or `1`.
+#' @param width Integer. The number of neurons in each hidden layer in the neural networks
+#' of the masked encoder, full encoder, and decoder.
+#' @param depth Integer. The number of hidden layers in the neural networks of the
+#' masked encoder, full encoder, and decoder.
+#' @param latent_dim Integer. The number of dimensions in the latent space.
+#' @param activation_function A [torch::nn_module()] representing an activation function such as, e.g.,
+#' [torch::nn_relu()], [torch::nn_leaky_relu()], [torch::nn_selu()],
+#' [torch::nn_sigmoid()].
+#' @param skip_conn_layer Boolean. If we are to use skip connections in each layer, see [shapr::SkipConnection()].
+#' If `TRUE`, then we add the input to the outcome of each hidden layer, so the output becomes
+#' \eqn{X + \operatorname{activation}(WX + b)}. I.e., the identity skip connection.
+#' @param skip_conn_masked_enc_dec Boolean. If we are to apply concatenating skip
+#' connections between the layers in the masked encoder and decoder. The first layer of the masked encoder will be
+#' linked to the last layer of the decoder. The second layer of the masked encoder will be
+#' linked to the second to last layer of the decoder, and so on.
+#' @param batch_normalization Boolean. If we are to use batch normalization after the activation function.
+#' Note that if `skip_conn_layer` is TRUE, then the normalization is
+#' done after the adding from the skip connection. I.e, we batch normalize the whole quantity X + activation(WX + b).
+#' @param paired_sampling Boolean. If we are doing paired sampling. I.e., if we are to include both coalition S
+#' and \eqn{\bar{S}} when we sample coalitions during training for each batch.
+#' @param mask_generator_name String specifying the type of mask generator to use. Need to be one of
+#' 'MCAR_mask_generator', 'Specified_prob_mask_generator', and 'Specified_masks_mask_generator'.
+#' @param masking_ratio Scalar. The probability for an entry in the generated mask to be 1 (masked).
+#' Not used if `mask_gen_coalitions` is given.
+#' @param mask_gen_coalitions Matrix containing the different coalitions to learn.
+#' Must be given if `mask_generator_name = 'Specified_masks_mask_generator'`.
+#' @param mask_gen_coalitions_prob Numerics containing the probabilities
+#' for sampling each mask in `mask_gen_coalitions`.
+#' Array containing the probabilities for sampling the coalitions in `mask_gen_coalitions`.
+#' @param sigma_mu Numeric representing a hyperparameter in the normal-gamma prior used on the masked encoder,
+#' see Section 3.3.1 in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#' @param sigma_sigma Numeric representing a hyperparameter in the normal-gamma prior used on the masked encoder,
+#' see Section 3.3.1 in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#'
+#' @return Returns a list with the neural networks of the masked encoder, full encoder, and decoder together
+#' with reconstruction log probability function, optimizer constructor, sampler from the decoder output,
+#' mask generator, batch size, and scale factor for the stability of the variational lower bound optimization.
+#'
+#' @section make_observed:
+#' Apply Mask to Batch to Create Observed Batch
+#'
+#' Compute the parameters for the latent normal distributions inferred by the encoders.
+#' If `only_masked_encoder = TRUE`, then we only compute the latent normal distributions inferred by the
+#' masked encoder. This is used in the deployment phase when we do not have access to the full observation.
+#'
+#' @section make_latent_distributions:
+#' Compute the Latent Distributions Inferred by the Encoders
+#'
+#' Compute the parameters for the latent normal distributions inferred by the encoders.
+#' If `only_masked_encoder = TRUE`, then we only compute the latent normal distributions inferred by the
+#' masked encoder. This is used in the deployment phase when we do not have access to the full observation.
+#'
+#' @section masked_encoder_regularization:
+#' Compute the Regularizes for the Latent Distribution Inferred by the Masked Encoder.
+#'
+#' The masked encoder (prior) distribution regularization in the latent space.
+#' This is used to compute the extended variational lower bound used to train vaeac, see
+#' Section 3.3.1 in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#' Though regularizing prevents the masked encoder distribution parameters from going to infinity,
+#' the model usually doesn't diverge even without this regularization. It almost doesn't affect
+#' learning process near zero with default regularization parameters which are recommended to be used.
+#'
+#' @section batch_vlb:
+#' Compute the Variational Lower Bound for the Observations in the Batch
+#'
+#' Compute differentiable lower bound for the given batch of objects and mask.
+#' Used as the (negative) loss function for training the vaeac model.
+#'
+#' @section batch_iwae:
+#' Compute IWAE log likelihood estimate with K samples per object.
+#'
+#' Technically, it is differentiable, but it is recommended to use it for
+#' evaluation purposes inside torch.no_grad in order to save memory. With [torch::with_no_grad()]
+#' the method almost doesn't require extra memory for very large K. The method makes K independent
+#' passes through decoder network, so the batch size is the same as for training with batch_vlb.
+#' IWAE is an abbreviation for Importance Sampling Estimator:
+#' \deqn{
+#' \log p_{\theta, \psi}(x|y) \approx
+#' \log {\frac{1}{K} \sum_{i=1}^K [p_\theta(x|z_i, y) * p_\psi(z_i|y) / q_\phi(z_i|x,y)]} \newline
+#' =
+#' \log {\sum_{i=1}^K \exp(\log[p_\theta(x|z_i, y) * p_\psi(z_i|y) / q_\phi(z_i|x,y)])} - \log(K) \newline
+#' =
+#' \log {\sum_{i=1}^K \exp(\log[p_\theta(x|z_i, y)] + \log[p_\psi(z_i|y)] - \log[q_\phi(z_i|x,y)])} - \log(K) \newline
+#' =
+#' \operatorname{logsumexp}(\log[p_\theta(x|z_i, y)] + \log[p_\psi(z_i|y)] - \log[q_\phi(z_i|x,y)]) - \log(K) \newline
+#' =
+#' \operatorname{logsumexp}(\text{rec}\_\text{loss} + \text{prior}\_\text{log}\_\text{prob} -
+#'  \text{proposal}\_\text{log}\_\text{prob}) - \log(K),}
+#' where \eqn{z_i \sim q_\phi(z|x,y)}.
+#'
+#' @section generate_samples_params:
+#' Generate the parameters of the generative distributions for samples from the batch.
+#'
+#' The function makes K latent representation for each object from the batch, send these
+#' latent representations through the decoder to obtain the parameters for the generative distributions.
+#' I.e., means and variances for the normal distributions (continuous features) and probabilities
+#' for the categorical distribution (categorical features).
+#' The second axis is used to index samples for an object, i.e. if the batch shape is \[n x D1 x D2\], then
+#' the result shape is \[n x K x D1 x D2\]. It is better to use it inside [torch::with_no_grad()] in order to save
+#' memory. With [torch::with_no_grad()] the method doesn't require extra memory except the memory for the result.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac <- torch::nn_module(
+
+  # Name of the torch::nn_module object
+  classname = "vaeac",
+
+  # Initializing a vaeac model
+  initialize = function(one_hot_max_sizes,
+                        width = 32,
+                        depth = 3,
+                        latent_dim = 8,
+                        activation_function = torch::nn_relu,
+                        skip_conn_layer = FALSE,
+                        skip_conn_masked_enc_dec = FALSE,
+                        batch_normalization = FALSE,
+                        paired_sampling = FALSE,
+                        mask_generator_name = c(
+                          "MCAR_mask_generator",
+                          "Specified_prob_mask_generator",
+                          "Specified_masks_mask_generator"
+                        ),
+                        masking_ratio = 0.5,
+                        mask_gen_coalitions = NULL,
+                        mask_gen_coalitions_prob = NULL,
+                        sigma_mu = 1e4,
+                        sigma_sigma = 1e-4) {
+    # Check that a valid mask_generator was provided.
+    mask_generator_name <- match.arg(mask_generator_name)
+
+    # Get the number of features
+    n_features <- length(one_hot_max_sizes)
+
+    # Extra strings to add to names of layers depending on if we use memory layers and/or batch normalization.
+    # If FALSE, they are just an empty string and do not effect the names.
+    name_extra_memory_layer <- ifelse(skip_conn_masked_enc_dec, "_and_memory", "")
+    name_extra_batch_normalize <- ifelse(batch_normalization, "_and_batch_norm", "")
+
+    # Save some of the initializing hyperparameters to the vaeac object. Others are saved later.
+    self$one_hot_max_sizes <- one_hot_max_sizes
+    self$depth <- depth
+    self$width <- width
+    self$latent_dim <- latent_dim
+    self$activation_function <- activation_function
+    self$skip_conn_layer <- skip_conn_layer
+    self$skip_conn_masked_enc_dec <- skip_conn_masked_enc_dec
+    self$batch_normalization <- batch_normalization
+    self$sigma_mu <- sigma_mu
+    self$sigma_sigma <- sigma_sigma
+    self$paired_sampling <- paired_sampling
+
+    # Save the how to compute the loss and how to sample from the vaeac model.
+    self$reconstruction_log_prob <- GaussCatLoss(one_hot_max_sizes)
+    self$sampler_most_likely <- GaussCatSamplerMostLikely(one_hot_max_sizes)
+    self$sampler_random <- GaussCatSamplerRandom(one_hot_max_sizes)
+    self$generative_parameters <- GaussCatParameters(one_hot_max_sizes)
+    self$n_features <- n_features
+    self$vlb_scale_factor <- 1 / n_features
+
+    ##### Generate the mask generator
+    if (mask_generator_name == "MCAR_mask_generator") {
+      # Create a MCAR_mask_generator and attach it to the vaeac object. Note that masking_ratio is a singleton here.
+      self$mask_generator <- MCAR_mask_generator(
+        masking_ratio = masking_ratio,
+        paired_sampling = paired_sampling
+      )
+
+      # Attach the masking ratio to the vaeac object.
+      self$masking_ratio <- masking_ratio
+    } else if (mask_generator_name == "Specified_prob_mask_generator") {
+      # Create a Specified_prob_mask_generator and attach it to the vaeac object.
+      # Note that masking_ratio is an array here.
+      self$mask_generator <- Specified_prob_mask_generator(
+        masking_probs = masking_ratio,
+        paired_sampling = paired_sampling
+      )
+
+      # Attach the masking probabilities to the vaeac object.
+      self$masking_probs <- masking_ratio
+    } else if (mask_generator_name == "Specified_masks_mask_generator") {
+      # Small check that they have been provided.
+      if (is.null(mask_gen_coalitions) | is.null(mask_gen_coalitions_prob)) {
+        stop(paste0(
+          "Both 'mask_gen_coalitions' and 'mask_gen_coalitions_prob' ",
+          "must be provided when using 'Specified_masks_mask_generator'."
+        ))
+      }
+
+      # Create a Specified_masks_mask_generator and attach it to the vaeac object.
+      self$mask_generator <- Specified_masks_mask_generator(
+        masks = mask_gen_coalitions,
+        masks_probs = mask_gen_coalitions_prob,
+        paired_sampling = paired_sampling
+      )
+
+      # Save the possible masks and corresponding probabilities to the vaeac object.
+      self$masks <- mask_gen_coalitions
+      self$masks_probs <- mask_gen_coalitions_prob
+    } else {
+      # Print error to user.
+      stop(paste0(
+        "`mask_generator_name` must be one of 'MCAR_mask_generator', 'Specified_prob_mask_generator', or ",
+        "'Specified_masks_mask_generator', and not '", mask_generator_name, "'."
+      ))
+    }
+
+    ##### Full Encoder
+    full_encoder_network <- torch::nn_sequential()
+
+    # Full Encoder: Input layer
+    full_encoder_network$add_module(
+      module = CategoricalToOneHotLayer(c(one_hot_max_sizes, rep(0, n_features)), seq(n_features)),
+      name = "input_layer_cat_to_one_hot"
+    )
+    full_encoder_network$add_module(
+      module = torch::nn_linear(
+        in_features = sum(apply(rbind(one_hot_max_sizes, rep(1, n_features)), 2, max)) + n_features * 2,
+        out_features = width
+      ),
+      name = "input_layer_linear"
+    )
+    full_encoder_network$add_module(
+      module = activation_function(),
+      name = "input_layer_layer_activation"
+    )
+    if (batch_normalization) {
+      full_encoder_network$add_module(
+        module = torch::nn_batch_norm1d(width),
+        name = "input_layer_layer_batch_norm"
+      )
+    }
+
+    # Full Encoder: Hidden layers
+    for (i in seq(depth)) {
+      if (skip_conn_layer) {
+        # Add identity skip connection. Such that the input is added to the output of the linear layer
+        # and activation function: output = X + activation(WX + b).
+        full_encoder_network$add_module(
+          module = SkipConnection(
+            torch::nn_linear(width, width),
+            activation_function(),
+            if (batch_normalization) torch::nn_batch_norm1d(width)
+          ),
+          name = paste0("hidden_layer_", i, "_skip_conn_with_linear_and_activation", name_extra_batch_normalize)
+        )
+      } else {
+        # Do not use skip connections and do not add the input to the output.
+        full_encoder_network$add_module(
+          module = torch::nn_linear(width, width),
+          name = paste0("hidden_layer_", i, "_linear")
+        )
+        full_encoder_network$add_module(
+          module = activation_function(),
+          name = paste0("hidden_layer_", i, "_activation")
+        )
+        if (batch_normalization) {
+          full_encoder_network$add_module(
+            module = torch::nn_batch_norm1d(width),
+            name = paste0("hidden_layer_", i, "_batch_norm")
+          )
+        }
+      }
+    }
+
+    # Full Encoder: Go to latent space
+    full_encoder_network$add_module(
+      module = torch::nn_linear(width, latent_dim * 2),
+      name = "latent_space_layer_linear"
+    )
+
+    ##### Masked Encoder
+    masked_encoder_network <- torch::nn_sequential()
+
+    # Masked Encoder: Input layer
+    masked_encoder_network$add_module(
+      module = CategoricalToOneHotLayer(c(one_hot_max_sizes, rep(0, n_features))),
+      name = "input_layer_cat_to_one_hot"
+    )
+    if (skip_conn_masked_enc_dec) {
+      masked_encoder_network$add_module(
+        module = MemoryLayer("#input"),
+        name = "input_layer_memory"
+      )
+    }
+    masked_encoder_network$add_module(
+      module = torch::nn_linear(
+        in_features = sum(apply(rbind(one_hot_max_sizes, rep(1, n_features)), 2, max)) + n_features,
+        out_features = width
+      ),
+      name = "input_layer_linear"
+    )
+    masked_encoder_network$add_module(
+      module = activation_function(),
+      name = "input_layer_activation"
+    )
+    if (batch_normalization) {
+      masked_encoder_network$add_module(
+        module = torch::nn_batch_norm1d(width),
+        name = "input_layer_batch_norm"
+      )
+    }
+
+    # Masked Encoder: Hidden layers
+    for (i in seq(depth)) {
+      if (skip_conn_layer) {
+        # Add identity skip connection. Such that the input is added to the output of the linear layer
+        # and activation function: output = X + activation(WX + b).
+        # Also check inside SkipConnection if we are to use MemoryLayer. I.e., skip connection with
+        # concatenation from masked encoder to decoder.
+        masked_encoder_network$add_module(
+          module = SkipConnection(
+            if (skip_conn_masked_enc_dec) MemoryLayer(paste0("#", i)),
+            torch::nn_linear(width, width),
+            activation_function()
+          ),
+          name = paste0("hidden_layer_", i, "_skip_conn_with_linear_and_activation", name_extra_memory_layer)
+        )
+        if (batch_normalization) {
+          masked_encoder_network$add_module(
+            module = torch::nn_batch_norm1d(width),
+            name = paste0("hidden_layer_", i, "_batch_norm")
+          )
+        }
+      } else {
+        # Do not use skip connections and do not add the input to the output.
+        if (skip_conn_masked_enc_dec) {
+          masked_encoder_network$add_module(
+            module = MemoryLayer(paste0("#", i)),
+            name = paste0("hidden_layer_", i, "_memory")
+          )
+        }
+        masked_encoder_network$add_module(
+          module = torch::nn_linear(width, width),
+          name = paste0("hidden_layer_", i, "_linear")
+        )
+        masked_encoder_network$add_module(
+          module = activation_function(),
+          name = paste0("hidden_layer_", i, "_activation")
+        )
+        if (batch_normalization) {
+          masked_encoder_network$add_module(
+            module = torch::nn_batch_norm1d(width),
+            name = paste0("hidden_layer_", i, "_batch_norm")
+          )
+        }
+      }
+    }
+
+    # Masked Encoder: Go to latent space
+    if (skip_conn_masked_enc_dec) {
+      masked_encoder_network$add_module(
+        module = MemoryLayer(paste0("#", depth + 1)),
+        name = "latent_space_layer_memory"
+      )
+    }
+    masked_encoder_network$add_module(
+      module = torch::nn_linear(width, 2 * latent_dim),
+      name = "latent_space_layer_linear"
+    )
+
+    ##### Decoder
+    decoder_network <- torch::nn_sequential()
+
+    # Decoder: Go from latent space
+    decoder_network$add_module(
+      module = torch::nn_linear(latent_dim, width),
+      name = "latent_space_layer_linear"
+    )
+    decoder_network$add_module(
+      module = activation_function(),
+      name = "latent_space_layer_activation"
+    )
+    if (batch_normalization) {
+      decoder_network$add_module(
+        module = torch::nn_batch_norm1d(width),
+        name = "latent_space_layer_batch_norm"
+      )
+    }
+
+    # Get the width of the hidden layers in the decoder. Needs to be multiplied with two if
+    # we use skip connections between masked encoder and decoder as we concatenate the tensors.
+    width_decoder <- ifelse(skip_conn_masked_enc_dec, 2 * width, width)
+
+    # Same for the input dimension to the last layer in decoder that yields the distribution params.
+    extra_params_skip_con_mask_enc <-
+      ifelse(test = skip_conn_masked_enc_dec,
+        yes = sum(apply(rbind(one_hot_max_sizes, rep(1, n_features)), 2, max)) + n_features,
+        no = 0
+      )
+
+    # Will need an extra hidden layer if we use skip connection from masked encoder to decoder
+    # as we send the full input layer of the masked encoder to the last layer in the decoder.
+    depth_decoder <- ifelse(skip_conn_masked_enc_dec, depth + 1, depth)
+
+    # Decoder: Hidden layers
+    for (i in seq(depth_decoder)) {
+      if (skip_conn_layer) {
+        # Add identity skip connection. Such that the input is added to the output of the linear layer
+        # and activation function: output = X + activation(WX + b).
+        # Also check inside SkipConnection if we are to use MemoryLayer. I.e., skip connection with
+        # concatenation from masked encoder to decoder.
+        # If TRUE, then the memory layers extracts the corresponding input used in the masked encoder
+        # and concatenate them with the current input.
+        # Note that we add the memory layers in the opposite direction from how they were created.
+        # So, we get a classical U-net with latent
+        # space at the bottom and a connection between the layers on the same height of the U-shape.
+        decoder_network$add_module(
+          module = torch::nn_sequential(
+            SkipConnection(
+              if (skip_conn_masked_enc_dec) {
+                MemoryLayer(paste0("#", depth - i + 2), TRUE)
+              },
+              torch::nn_linear(width_decoder, width),
+              activation_function()
+            )
+          ),
+          name = paste0("hidden_layer_", i, "_skip_conn_with_linear_and_activation", name_extra_memory_layer)
+        )
+        if (batch_normalization) {
+          decoder_network$add_module(
+            module = torch::nn_batch_norm1d(n_features = width),
+            name = paste0("hidden_layer_", i, "_batch_norm")
+          )
+        }
+      } else {
+        # Do not use skip connections and do not add the input to the output.
+        if (skip_conn_masked_enc_dec) {
+          decoder_network$add_module(
+            module = MemoryLayer(paste0("#", depth - i + 2), TRUE),
+            name = paste0("hidden_layer_", i, "_memory")
+          )
+        }
+        decoder_network$add_module(
+          module = torch::nn_linear(width_decoder, width),
+          name = paste0("hidden_layer_", i, "_linear")
+        )
+        decoder_network$add_module(
+          module = activation_function(),
+          name = paste0("hidden_layer_", i, "_activation")
+        )
+        if (batch_normalization) {
+          decoder_network$add_module(
+            module = torch::nn_batch_norm1d(width),
+            name = paste0("hidden_layer_", i, "_batch_norm")
+          )
+        }
+      }
+    }
+
+    # Decoder: Go the parameter space of the generative distributions
+    # Concatenate the input to the first layer of the masked encoder to the last layer of the decoder network.
+    if (skip_conn_masked_enc_dec) {
+      decoder_network$add_module(
+        module = MemoryLayer("#input", TRUE),
+        name = "output_layer_memory"
+      )
+    }
+    # Linear layer to the parameters of the generative distributions Gaussian and Categorical.
+    # Note that sum(apply(rbind(one_hot_max_sizes, rep(1, n_features)), 2, max)) is the number of
+    # one hot variables to the masked encoder and n_features represents the binary variables if
+    # the features was masked/missing or not when they entered the masked encoder.
+    # The output dimension is 2 for the continuous features and K_i for categorical feature X_i,
+    # where K_i is the number of classes the i'th categorical feature can take on.
+    decoder_network$add_module(
+      module = torch::nn_linear(
+        in_features = width + extra_params_skip_con_mask_enc,
+        out_features = sum(apply(rbind(one_hot_max_sizes, rep(2, n_features)), 2, max))
+      ),
+      name = "output_layer_linear"
+    )
+
+    # Save the networks to the vaeac object
+    self$full_encoder_network <- full_encoder_network
+    self$masked_encoder_network <- masked_encoder_network
+    self$decoder_network <- decoder_network
+
+    # Compute the number of trainable parameters in the different networks and save them
+    n_para_full_encoder <- sum(sapply(full_encoder_network$parameters, function(p) prod(p$size())))
+    n_para_masked_encoder <- sum(sapply(masked_encoder_network$parameters, function(p) prod(p$size())))
+    n_para_decoder <- sum(sapply(decoder_network$parameters, function(p) prod(p$size())))
+    n_para_total <- n_para_full_encoder + n_para_masked_encoder + n_para_decoder
+    self$n_train_param <- rbind(n_para_total, n_para_full_encoder, n_para_masked_encoder, n_para_decoder)
+  },
+
+  # Forward functions are required in torch::nn_modules,
+  # but is it not needed in the way we have implemented vaeac.
+  forward = function(...) {
+    warning("NO FORWARD FUNCTION IMPLEMENTED FOR VAEAC.")
+    return("NO FORWARD FUNCTION IMPLEMENTED FOR VAEAC.")
+  },
+
+  # Apply Mask to Batch to Create Observed Batch
+  #
+  # description Clones the batch and applies the mask to set masked entries to 0 to create the observed batch.
+  #
+  # param batch Tensor of dimension batch_size x n_features containing a batch of observations.
+  # param mask Tensor of zeros and ones indicating which entries in batch to mask. Same dimension as `batch`.
+  make_observed = function(batch, mask) {
+    # Clone and detach the batch from the graph (removes the gradient element for the tensor).
+    observed <- batch$clone()$detach()
+
+    # Apply the mask by masking every entry in batch where 'mask' is 1.
+    observed[mask == 1] <- 0
+
+    # Return the observed batch where masked entries are set to 0.
+    return(observed)
+  },
+
+  # Compute the Latent Distributions Inferred by the Encoders
+  #
+  # description Compute the parameters for the latent normal distributions inferred by the encoders.
+  # If `only_masked_encoder = TRUE`, then we only compute the latent normal distributions inferred by the
+  # masked encoder. This is used in the deployment phase when we do not have access to the full observation.
+  #
+  # param batch Tensor of dimension batch_size x n_features containing a batch of observations.
+  # param mask Tensor of zeros and ones indicating which entries in batch to mask. Same dimension as `batch`.
+  # param only_masked_encoder Boolean. If we are only to compute the latent distributions for the masked encoder.
+  # Used in deployment phase when we do not have access to the full data. Always FALSE in the training phase.
+  make_latent_distributions = function(batch, mask, only_masked_encoder = FALSE) {
+    # Artificially mask the observations where mask == 1 to create the observed batch values.
+    observed <- self$make_observed(batch = batch, mask = mask)
+
+    # Check if we are in training or deployment phase
+    if (only_masked_encoder) {
+      # In deployment phase and only use the masked encoder.
+      full_encoder <- NULL
+    } else {
+      # In the training phase where we need to use both masked and full encoder.
+
+      # Column bind the batch and the mask to create the full information sent to the full encoder.
+      full_info <- torch::torch_cat(c(batch, mask), dim = 2)
+
+      # Send the full_information through the full encoder. It needs the full information to know if a
+      # value is missing or just masked. The output tensor is of shape batch_size x (2 x latent_dim)
+      # In each row, i.e., each observation in the batch, the first latent_dim entries are the means mu
+      # while the last latent_dim entries are the softplus of the sigmas, so they can take on any
+      # negative or positive value. Recall that softplus(x) = ln(1+e^{x}).
+      full_encoder_params <- self$full_encoder_network(full_info)
+
+      # Takes the full_encoder_parameters and returns a normal distribution, which is component-wise
+      # independent. If sigma (after softmax transform) is less than 1e-3, then we set sigma to 0.001.
+      full_encoder <- vaeac_normal_parse_params(params = full_encoder_params, min_sigma = 1e-3)
+    }
+
+    # Column bind the batch and the mask to create the observed information sent to the masked encoder.
+    observed_info <- torch::torch_cat(c(observed, mask), dim = -1)
+
+    # Compute the latent normal dist parameters (mu, sigma) for the masked
+    # encoder by sending the observed values and the mask to the masked encoder.
+    masked_encoder_params <- self$masked_encoder_network(observed_info)
+
+    # Create the latent normal distributions based on the parameters (mu, sigma) from the masked encoder
+    masked_encoder <- vaeac_normal_parse_params(params = masked_encoder_params, min_sigma = 1e-3)
+
+    # Return the full and masked encoders
+    return(list(
+      full_encoder = full_encoder,
+      masked_encoder = masked_encoder
+    ))
+  },
+
+  # Compute the Regularizes for the Latent Distribution Inferred by the Masked Encoder.
+  #
+  # description The masked encoder (prior) distribution regularization in the latent space.
+  # This is used to compute the extended variational lower bound used to train vaeac, see
+  # Section 3.3.1 in Olsen et al. (2022).
+  # Though regularizing prevents the masked encoder distribution parameters from going to infinity,
+  # the model usually doesn't diverge even without this regularization. It almost doesn't affect
+  # learning process near zero with default regularization parameters which are recommended to be used.
+  #
+  # param masked_encoder The torch_Normal object returned when calling the masked encoder.
+  masked_encoder_regularization = function(masked_encoder) {
+    # Extract the number of observations. Same as batch_size.
+    n_observations <- masked_encoder$mean$shape[1]
+
+    # Extract the number of dimension in the latent space.
+    n_latent_dimensions <- masked_encoder$mean$shape[2]
+
+    # Extract means and ensure correct shape (batch_size x latent_dim).
+    mu <- masked_encoder$mean$view(c(n_observations, n_latent_dimensions))
+
+    # Extract the sigmas and ensure correct shape (batch_size x latent_dim).
+    sigma <- masked_encoder$scale$view(c(n_observations, n_latent_dimensions))
+
+    # Note that sum(-1) indicates that we sum together the columns.
+    # mu_regularizer is then a tensor of length n_observations
+    mu_regularizer <- -(mu^2)$sum(-1) / (2 * self$sigma_mu^2)
+
+    # sigma_regularizer is then also a tensor of length n_observations.
+    sigma_regularizer <- (sigma$log() - sigma)$sum(-1) * self$sigma_sigma
+
+    # Add the regularization terms together and return them.
+    return(mu_regularizer + sigma_regularizer)
+  },
+
+  # Compute the Variational Lower Bound for the Observations in the Batch
+  #
+  # description Compute differentiable lower bound for the given batch of objects and mask.
+  # Used as the (negative) loss function for training the vaeac model.
+  #
+  # param batch Tensor of dimension batch_size x n_features containing a batch of observations.
+  # param mask Tensor of zeros and ones indicating which entries in batch to mask. Same dimension as `batch`.
+  batch_vlb = function(batch, mask) {
+    # Compute the latent normal distributions obtained from the full and masked encoder
+    encoders_list <- self$make_latent_distributions(batch = batch, mask = mask)
+
+    # Extract the masked and full encoders. These are torch_Normal objects.
+    masked_encoder <- encoders_list$masked_encoder
+    full_encoder <- encoders_list$full_encoder
+
+    # Apply the regularization on the mus and sigmas of the normal dist obtained from the masked encoder
+    # such that they don't blow up. Regularized according to their normal gamma prior, see Olsen et al. (2022).
+    masked_encoder_regularization <- self$masked_encoder_regularization(masked_encoder)
+
+    # To use the reparameterization trick to train vaeac, we need to use 'rsample'
+    # and not 'sample', which allows backpropagation through the mean and standard deviation layers,
+    # see https://pytorch.org/docs/stable/distributions.html#pathwise-derivative.
+    # For each training instance in the batch we sample values for each of the latent variables,
+    # i.e.,  we get a tensor of dimension batch_size x latent_dim.
+    latent <- full_encoder$rsample()
+
+    # Send the latent samples through the decoder and get the batch_size x 2*n_features (in cont case)
+    # where we for each row have a normal dist on each feature The form will be (mu_1, sigma_1, ..., mu_p, sigma_p)
+    reconstruction_params <- self$decoder_network(latent)
+
+    # Compute the reconstruction loss, i.e., the log likelihood of only the masked values in
+    # the batch (true values) given the current reconstruction parameters from the decoder.
+    # We do not consider the log likelihood of observed or missing/nan values.
+    reconstruction_loss <- self$reconstruction_log_prob(batch, reconstruction_params, mask)
+
+    # Compute the KL divergence between the two latent normal distributions obtained from the full encoder
+    # and masked encoder. Since the networks create MVN with diagonal covariance matrices, that is, the same as
+    # a product of individual Gaussian distributions, we can compute KL analytically very easily:
+    # KL(p, q) = \int p(x) log(p(x)/q(x)) dx
+    #          = 0.5 * { (sigma_p/sigma_q)^2 + (mu_q - mu_p)^2/sigma_q^2 - 1 + 2 ln (sigma_q/sigma_p)}
+    # when both p and q are torch_Normal objects.
+    kl <- vaeac_kl_normal_normal(full_encoder, masked_encoder)$view(c(batch$shape[1], -1))$sum(-1)
+
+    # Return the variational lower bound with the prior regularization. See Section 3.3.1 in Olsen et al. (2022)
+    return(reconstruction_loss - kl + masked_encoder_regularization)
+  },
+
+  # Compute the Importance Sampling Estimator for the Observations in the Batch
+  #
+  # description Compute IWAE log likelihood estimate with K samples per object.
+  #
+  # details Technically, it is differentiable, but it is recommended to use it for
+  # evaluation purposes inside torch.no_grad in order to save memory. With torch::with_no_grad
+  # the method almost doesn't require extra memory for very large K. The method makes K independent
+  # passes through decoder network, so the batch size is the same as for training with batch_vlb.
+  # IWAE is an abbreviation for Importance Sampling Estimator
+  # log p_{theta, psi}(x|y) approx
+  # log {1/K * sum_{i=1}^K [p_theta(x|z_i, y) * p_psi(z_i|y) / q_phi(z_i|x,y)]} =
+  # log {sum_{i=1}^K exp(log[p_theta(x|z_i, y) * p_psi(z_i|y) / q_phi(z_i|x,y)])} - log(K) =
+  # log {sum_{i=1}^K exp(log[p_theta(x|z_i, y)] + log[p_psi(z_i|y)] - log[q_phi(z_i|x,y)])} - log(K) =
+  # logsumexp(log[p_theta(x|z_i, y)] + log[p_psi(z_i|y)] - log[q_phi(z_i|x,y)]) - log(K) =
+  # logsumexp(rec_loss + prior_log_prob - proposal_log_prob) - log(K),
+  # where z_i ~ q_phi(z|x,y).
+  #
+  # param batch Tensor of dimension batch_size x n_features containing a batch of observations.
+  # param mask Tensor of zeros and ones indicating which entries in batch to mask. Same dimension as `batch`.
+  # param K Integer. The number of samples generated to compute the IWAE for each observation in `batch`.
+  batch_iwae = function(batch, mask, K) {
+    # Compute the latent normal distributions obtained from the full and masked encoder
+    encoders_list <- self$make_latent_distributions(batch = batch, mask = mask)
+
+    # Extract the masked and full encoders. These are torch_Normal objects.
+    masked_encoder <- encoders_list$masked_encoder
+    full_encoder <- encoders_list$full_encoder
+
+    # List to store the estimates.
+    estimates <- list()
+
+    # Iterate over the number of samples/passes through the decoder for each validation observation.
+    for (i in seq(K)) {
+      # See equation 18 on page 18 in Ivanov et al. (2019). Create samples from the
+      # full encoder; z_i ~ q_phi(z|x,y). We get a tensor of dimension batch_size x latent_dim.
+      latent <- full_encoder$rsample()
+
+      # Send the latent samples through the decoder and get the batch_size x 2*n_features (in cont case)
+      # where we for each row have a normal dist on each feature The form will be (mu_1, sigma_1, ..., mu_p, sigma_p)
+      reconstruction_params <- self$decoder_network(latent)
+
+      # Compute the reconstruction loss, i.e., the log likelihood of only the masked values in
+      # the batch (true values) given the current reconstruction parameters from the decoder.
+      # We do not consider the log likelihood of observed or missing/nan values.
+      reconstruction_loss <- self$reconstruction_log_prob(batch, reconstruction_params, mask)
+
+      # Compute the log likelihood of observing the sampled latent representations from
+      # the full_encoder when using the normal distribution estimated by the masked_encoder.
+      masked_encoder_log_prob <- masked_encoder$log_prob(latent)
+
+      # Ensure dimensions batch$shape[1] x something.
+      masked_encoder_log_prob <- masked_encoder_log_prob$view(c(batch$shape[1], -1))
+
+      # Sum over the rows (last dimension), i.e., add the log-likelihood for each instance.
+      masked_encoder_log_prob <- masked_encoder_log_prob$sum(-1)
+
+      # Same explanations here as above, but now for the full_encoder.
+      full_encoder_log_prob <- full_encoder$log_prob(latent)
+      full_encoder_log_prob <- full_encoder_log_prob$view(c(batch$shape[1], -1))
+      full_encoder_log_prob <- full_encoder_log_prob$sum(-1)
+
+      # Combine the estimated loss based on the formula from equation 18 on page 18 in Ivanov et al. (2019).
+      # Consists of batch.shape[0] number of values
+      estimate <- reconstruction_loss + masked_encoder_log_prob - full_encoder_log_prob
+
+      # Make sure that the results are a column vector of height batch_size.
+      estimate <- estimate$unsqueeze(-1)
+
+      # Add the results to the estimates list
+      estimates <- append(estimates, estimate)
+    }
+
+    # Convert from list of tensors to a single tensor using colum bind
+    estimates <- torch::torch_cat(estimates, -1)
+
+    # Use the stabilizing trick logsumexp.
+    # We have worked on log-scale above, hence plus and minus and not multiplication and division,
+    # while Eq. 18 in Ivanov et al. (2019) work on regular scale with multiplication and division.
+    # We take the exp of the values to get back to original scale, then sum it and convert back to
+    # log scale. Note that we add -log(K) instead of dividing each term by K.
+    # Take the log sum exp along the rows (validation samples) then subtract log(K).
+    return(torch::torch_logsumexp(estimates, -1) - log(K))
+  },
+
+  # Generate the Parameters of the Generative Distributions
+  #
+  # description Generate the parameters of the generative distributions for samples from the batch.
+  #
+  # details The function makes K latent representation for each object from the batch, send these
+  # latent representations through the decoder to obtain the parameters for the generative distributions.
+  # I.e., means and variances for the normal distributions (continuous features) and probabilities
+  # for the categorical distribution (categorical features).
+  # The second axis is used to index samples for an object, i.e. if the batch shape is [n x D1 x D2], then
+  # the result shape is [n x K x D1 x D2]. It is better to use it inside torch::with_no_grad in order to save
+  # memory. With torch::with_no_grad the method doesn't require extra memory except the memory for the result.
+  #
+  # param batch Tensor of dimension batch_size x n_features containing a batch of observations.
+  # param mask Tensor of zeros and ones indicating which entries in batch to mask. Same dimension as `batch`.
+  # param K Integer. The number of imputations to be done for each observation in batch.
+  generate_samples_params = function(batch, mask, K = 1) {
+    # Compute the latent normal distributions obtained from only the masked encoder.
+    encoders_list <- self$make_latent_distributions(batch = batch, mask = mask, only_masked_encoder = TRUE)
+
+    # Only extract the masked encoder (torch_Normal object) as we are in the deployment phase.
+    masked_encoder <- encoders_list$masked_encoder
+
+    # Create a list to keep the sampled parameters.
+    samples_params <- list()
+
+    # Iterate over the number of imputations for each observation in the batch.
+    for (i in seq(K)) {
+      # Generate latent representations by using the masked encoder.
+      latent <- masked_encoder$rsample()
+
+      # Send the latent representations through the decoder.
+      sample_params <- self$decoder_network(latent)
+
+      # Collect the parameters of the induced Gaussian distributions.
+      samples_params <- append(samples_params, sample_params$unsqueeze(2))
+    }
+
+    # Concatenate the list to a 3d-tensor. 2nd dimensions is the imputations.
+    return(torch::torch_cat(samples_params, 2))
+  }
+)
+
+# Dataset Utility Functions ===========================================================================================
+#' Compute Featurewise Means and Standard Deviations
+#'
+#' @description Returns the means and standard deviations for all continuous features in the data set.
+#' Categorical features get mean=0 and sd=1 by default.
+#'
+#' @param data A torch_tensor of dimension N x p containing the data.
+#' @param one_hot_max_sizes A torch tensor of dimension p containing the one hot sizes of the p features.
+#' The sizes for the continuous features can either be '0' or '1'.
+#'
+#' @return List containing the means and the standard deviations of the different features.
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac_compute_normalization <- function(data,
+                                        one_hot_max_sizes) {
+  # Create vectors of zeros that will store the means and sd for each feature.
+  norm_vector_mean <- torch::torch_zeros(length(one_hot_max_sizes))
+  norm_vector_std <- torch::torch_ones(length(one_hot_max_sizes))
+
+  # Iterate over the features
+  for (variable_j in seq_along(one_hot_max_sizes)) {
+    # Number of one hot encoded dummy features for the j'th variable
+    size_j <- one_hot_max_sizes[variable_j]
+
+    # Check if categorical or continuous feature
+    if (size_j >= 2) {
+      # Categorical feature
+      # Do not do anything when the feature is categorical
+      next
+    } else {
+      # Continuous feature
+
+      # Get the values of the i'th features
+      variable_j_values <- data[, variable_j]
+
+      # Only keep the non-missing values
+      variable_j_values <- variable_j_values[variable_j_values$isnan()$logical_not()]
+
+      # Compute the mean of the values
+      variable_j_values_mean <- variable_j_values$mean()
+
+      # Compute the sd of the values
+      variable_j_values_sd <- variable_j_values$std()
+
+      # Save the mean and sd in the right place of the vectors
+      norm_vector_mean[variable_j] <- variable_j_values_mean
+      norm_vector_std[variable_j] <- variable_j_values_sd
+    }
+  }
+
+  # return the vectors of means and standards deviations
+  return(list(
+    norm_vector_mean = norm_vector_mean,
+    norm_vector_std = norm_vector_std
+  ))
+}
+
+
+
+#' Preprocess Data for the vaeac approach
+#'
+#' @description vaeac only supports numerical values. This function converts categorical features
+#' to numerics with class labels 1,2,...,K, and keeps track of the map between the original and
+#' new class labels. It also computes the one_hot_max_sizes.
+#'
+#' @param data matrix/data.frame/data.table containing the training data. Only the features and
+#' not the response.
+#' @param log_exp_cont_feat Boolean. If we are to log transform all continuous
+#' features before sending the data to vaeac. vaeac creates unbounded values, so if the continuous
+#' features are strictly positive, as for Burr and Abalone data, it can be advantageous to log-transform
+#' the data to unbounded form before using vaeac. If TRUE, then `vaeac_postprocess_data` will
+#' take the exp of the results to get back to strictly positive values.
+#' @param x_torch Torch tensor. A 2D matrix containing the data to normalize.
+#' @param norm_mean Torch tensor (optional). A 1D array containing the means of the columns of `x_torch`.
+#' @param norm_std Torch tensor (optional). A 1D array containing the stds of the columns of `x_torch`.
+#'
+#' @return list containing data which can be used in vaeac, maps between original and new class
+#' names for categorical features, one_hot_max_sizes, and list of information about the data.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_preprocess_data <- function(data, log_exp_cont_feat = FALSE,
+                                  normalize = TRUE, norm_mean = NULL, norm_std = NULL) {
+  # Ensure that data is data.table object
+  data <- data.table::copy(data.table::as.data.table(data))
+
+  # Create feature list which contains information about the features
+  feature_list <- list()
+  feature_list$labels <- colnames(data)
+  feature_list$classes <- sapply(data, class)
+  feature_list$factor_levels <- sapply(data, levels)
+  feature_list
+
+  # Create an return_list object to store information about the data
+  return_list <- list()
+  return_list$feature_list <- feature_list
+  return_list$n_features <- ncol(return_list$x_train)
+
+  # Compute the one_hot_max_sizes for the features
+  one_hot_max_sizes <- unname(sapply(return_list$feature_list$factor_levels, length))
+  one_hot_max_sizes[one_hot_max_sizes == 0] <- 1
+  return_list$one_hot_max_sizes <- as.integer(one_hot_max_sizes)
+
+  # Ge the categorical and continuous features
+  col_cat <- sapply(data, is.factor)
+  col_cont <- sapply(data, is.numeric)
+  cat_in_dataset <- sum(col_cat) > 0
+
+  # Extract the names of the categorical and continuous features
+  col_cat_names <- names(col_cat[col_cat])
+  col_cont_names <- names(col_cont[col_cont])
+
+  if (cat_in_dataset) {
+    # We have one or several categorical features and need to ensure that these have levels 1,2,...,K for vaeac to work
+
+    # Lists that will store maps between the original and new class names for categorical features
+    map_original_to_new_names <- list()
+    map_new_to_original_names <- list()
+
+    # Iterate over the categorical features
+    for (col_cat_name in col_cat_names) {
+      # Create a map from the original class names to the new class names
+      map_original_to_new_names[[col_cat_name]] <- as.list(seq_along(levels(data[[col_cat_name]])))
+      names(map_original_to_new_names[[col_cat_name]]) <- levels(data[[col_cat_name]])
+      map_original_to_new_names[[col_cat_name]]
+
+      # Create a map from the new class names to the original class names
+      map_new_to_original_names[[col_cat_name]] <- as.list(levels(data[[col_cat_name]]))
+      names(map_new_to_original_names[[col_cat_name]]) <- seq_along(levels(data[[col_cat_name]]))
+      map_new_to_original_names[[col_cat_name]]
+    }
+
+    # Convert the categorical features to numeric. Automatically gets class levels 1,2,...,K.
+    data[, (col_cat_names) := lapply(.SD, as.numeric), .SDcols = col_cat_names]
+
+    # Add the maps to the return_list object
+    return_list$map_new_to_original_names <- map_new_to_original_names
+    return_list$map_original_to_new_names <- map_original_to_new_names
+  }
+
+  # Check if we are to log transform all continuous features.
+  if (log_exp_cont_feat) {
+    if (any(data[, ..col_cont_names] <= 0, na.rm = TRUE)) { # Add na.rm as data can contain NaN values to be imputed
+      stop("The continuous features cannot be log-transformed as they are not strictly positive.")
+    }
+    data[, (col_cont_names) := lapply(.SD, log), .SDcols = col_cont_names]
+  }
+
+  # Add the numerical data table to the return_list object, and some other variables.
+  return_list$log_exp_cont_feat <- log_exp_cont_feat
+  return_list$data_preprocessed <- as.matrix(data)
+  return_list$col_cat <- col_cat
+  return_list$col_cat_names <- col_cat_names
+  return_list$col_cont <- col_cont
+  return_list$col_cont_names <- col_cont_names
+  return_list$cat_in_dataset <- cat_in_dataset
+
+  # Check if we are to normalize the data. Then normalize it and add it to the return list.
+  if (normalize) {
+    data_norm_list <- vaeac_normalize_data(
+      data_torch = torch::torch_tensor(return_list$data_preprocessed),
+      norm_mean = norm_mean,
+      norm_std = norm_std,
+      one_hot_max_sizes = one_hot_max_sizes
+    )
+    return_list <- c(return_list, data_norm_list)
+  }
+
+  # Return the return_list object
+  return(return_list)
+}
+
+#' Normalize mixed data for `vaeac`
+#'
+#' @description
+#' Compute the mean and std for each continuous feature, while the categorical features will have mean 0 and std 1.
+#'
+#' @inheritParams vaeac_preprocess_data
+#' @inheritParams vaeac
+#'
+#' @return A list containing the normalized version of `x_torch`, `norm_mean` and `norm_std`.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+vaeac_normalize_data <- function(data_torch, one_hot_max_sizes, norm_mean = NULL, norm_std = NULL) {
+  if (xor(!is.null(norm_mean), !is.null(norm_std))) stop("Both `norm_mean` and `norm_std` must be provided.")
+
+  if (is.null(norm_mean) && is.null(norm_std)) {
+    # Compute the mean and std for each continuous feature, while the categorical features will have mean 0 and std 1
+    mean_and_sd <- vaeac_compute_normalization(data_torch, one_hot_max_sizes)
+    norm_mean <- mean_and_sd$norm_vector_mean
+    norm_std <- mean_and_sd$norm_vector_std
+
+    # Make sure that the standard deviation is not too low, in that case clip it.
+    norm_std <- norm_std$max(other = torch::torch_tensor(1e-9))
+  }
+
+  # Normalize the data to have mean 0 and std 1.
+  data_normalized_torch <- (data_torch - norm_mean) / norm_std
+
+  # Return the values
+  list(
+    data_normalized_torch = data_normalized_torch,
+    norm_mean = norm_mean,
+    norm_std = norm_std
+  )
+}
+
+#' Postprocess Data Generated by a vaeac Model
+#'
+#' @description vaeac generates numerical values. This function converts categorical features
+#' to from numerics with class labels 1,2,...,K, to factors with the original and class labels.
+#'
+#' @param data data.table containing the data generated by a vaeac model
+#' @param vaeac_model_state_list List. The returned list from the `vaeac_preprocess_data` function or
+#' a loaded checkpoint list of a saved vaeac object.
+#'
+#' @return data.table with the generated data from a vaeac model where the categorical features
+#' now have the original class names.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+#'
+#' @examples
+#' \dontrun{
+#' data <- data.table(matrix(rgamma(500 * 3, 2), ncol = 3))
+#' preprocessed <- vaeac_preprocess_data(data)
+#' preprocessed$data_preprocessed
+#' postprocessed <- vaeac_postprocess_data(preprocessed$data_preprocessed, preprocessed)
+#' postprocessed
+#' all.equal(data, postprocessed)
+#' }
+vaeac_postprocess_data <- function(data, vaeac_model_state_list) {
+  # Go from vaeac type data back to data.table used in shapr
+  if (!"data.table" %in% class(data)) data <- as.data.table(data)
+  colnames(data) <- vaeac_model_state_list$feature_list$labels
+
+  # Extract the column names for the categorical and continuous features, and the map from new to original name
+  col_cat_names <- vaeac_model_state_list$col_cat_names
+  col_cont_names <- vaeac_model_state_list$col_cont_names
+  map_new_to_original_names <- vaeac_model_state_list$map_new_to_original_names
+
+  # Convert all categorical features (if there are any) from numeric back to factors with the original class names
+  if (length(col_cat_names) > 0) {
+    lapply(col_cat_names, function(col_cat_name) {
+      data[, (col_cat_name) := lapply(.SD, factor, labels = map_new_to_original_names[[col_cat_name]]),
+        .SDcols = col_cat_name
+      ]
+    })
+  }
+
+  # Apply the exp transformation if we applied the log transformation in the pre-processing to the positive features
+  if (vaeac_model_state_list$log_exp_cont_feat) data[, (col_cont_names) := lapply(.SD, exp), .SDcols = col_cont_names]
+
+  # Return the postprocessed data table
+  return(data)
+}
+
+
+## vaeac_dataset ------------------------------------------------------------------------------------------------------
+#' Dataset used by the `vaeac` model
+#'
+#' @description
+#' Convert a the data into a [torch::dataset()] which the vaeac model creates batches from.
+#'
+#' @details
+#' This function creates a [torch::dataset()] object that represent a map from keys to data samples.
+#' It is used by the [torch::dataloader()] to load data which should be used to extract the
+#' batches for all epochs in the training phase of the neural network. Note that a dataset object
+#' is an R6 instanc, see \url{https://r6.r-lib.org/articles/Introduction.html}, which is classical
+#' object-oriented programming, with self reference. I.e, [shapr::vaeac_dataset()] is a subclass
+#' of type [torch::dataset()].
+#'
+#' @param X A torch_tensor contain the data of shape N x p, where N and p are the number
+#' of observations and features, respectively.
+#' @param one_hot_max_sizes A torch tensor of dimension p containing the one hot sizes of
+#' the p features. The sizes for the continuous features can either be 0 or 1.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+#'
+#' @examples
+#' \dontrun{
+#' p <- 5
+#' N <- 14
+#' batch_size <- 10
+#' one_hot_max_sizes <- rep(1, p)
+#' vaeac_ds <- vaeac_dataset(
+#'   torch_tensor(matrix(rnorm(p * N), ncol = p),
+#'     dtype = torch_float()
+#'   ),
+#'   one_hot_max_sizes
+#' )
+#' vaeac_ds
+#'
+#' vaeac_dl <- torch::dataloader(
+#'   vaeac_ds,
+#'   batch_size = batch_size,
+#'   shuffle = TRUE,
+#'   drop_last = FALSE
+#' )
+#' vaeac_dl$.length()
+#' vaeac_dl$.iter()
+#'
+#' vaeac_iterator <- vaeac_dl$.iter()
+#' vaeac_iterator$.next() # batch1
+#' vaeac_iterator$.next() # batch2
+#' vaeac_iterator$.next() # Empty
+#' }
+vaeac_dataset <- torch::dataset(
+
+  # field name The name of the `torch::dataset`.
+  name = "vaeac_dataset",
+
+  # description Create a new vaeac_dataset object.
+  # param X A torch_tensor contain the data
+  # param one_hot_max_sizes A torch tensor of dimension p containing the one hot sizes of the p features.
+  # The sizes for the continuous features can either be '0' or '1'.
+  initialize = function(X, one_hot_max_sizes) {
+    # Save the number of observations in X
+    self$N <- nrow(X)
+
+    # Save the number of features in X
+    self$p <- ncol(X)
+
+    # Save the number of one hot dummy features for each features
+    self$one_hot_max_sizes <- one_hot_max_sizes
+
+    # Save the dataset
+    self$X <- X
+  },
+  # description How to fetch a data sample for a given key/index.
+  .getitem = function(index) {
+    X <- self$X[index, ]
+  },
+  # description Return the size of the dataset
+  .length = function() {
+    nrow(self$X)
+  }
+)
+
+
+## Paired Sampler  ----------------------------------------------------------------------------------------------------
+#' Sampling Paired Observations
+#'
+#' @description
+#' A sampler used to samples the batches where each instances is sampled twice
+#'
+#' @details
+#' A sampler object that allows for paired sampling by always including each observation from the
+#' [shapr::vaeac_dataset()] twice.
+#' A [torch::sampler()] object can be used with [torch::dataloader()]
+#' when creating batches from a torch dataset [torch::dataset()]. See more on
+#' \url{https://rdrr.io/cran/torch/src/R/utils-data-sampler.R}.
+#' This function does not use batch iterators, which might increase the speed.
+#'
+#' @param vaeac_dataset_object A [shapr::vaeac_dataset()] object containing the data.
+#' @param shuffle Boolean. If `TRUE`, then the data is shuffled. If `FALSE`,
+#' then the data is returned in chronological order.
+#'
+#' @examples
+#' \dontrun{
+#' # Example how to use it combined with mask generators with paired sampling activated
+#' batch_size <- 4
+#' if (batch_size %% 2 == 1) batch_size <- batch_size - 1 # Make sure that batch size is even
+#' n_features <- 3
+#' n_observations <- 5
+#' shuffle <- TRUE
+#' data <- torch_tensor(matrix(rep(seq(n_observations), each = n_features),
+#'   ncol = n_features, byrow = TRUE
+#' ))
+#' data
+#' dataset <- vaeac_dataset(data, rep(1, n_features))
+#' dataload <- torch::dataloader(dataset,
+#'   batch_size = batch_size,
+#'   sampler = paired_sampler(dataset,
+#'     shuffle = shuffle
+#'   )
+#' )
+#' dataload$.length() # Number of batches, same as ceiling((2 * n_observations) / batch_size)
+#' mask_generator <- MCAR_mask_generator(paired = TRUE)
+#' coro::loop(for (batch in dataload) {
+#'   mask <- mask_generator(batch)
+#'   obs <- mask * batch
+#'   print(torch::torch_cat(c(batch, mask, obs), -1))
+#' })
+#' }
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+paired_sampler <- torch::sampler(
+  # field Name of the object
+  classname = "paired_sampler",
+  # description Initialize the paired_sampler object
+  initialize = function(vaeac_dataset_object, shuffle = FALSE) {
+    self$vaeac_dataset_object <- vaeac_dataset_object
+    self$shuffle <- shuffle
+  },
+  # description Get the number of observations in the datasaet
+  .length = function() {
+    length(self$vaeac_dataset_object) * 2 # Multiply by two do to get the actual number
+  },
+  # description Function to iterate over the data
+  .iter = function() {
+    # Get the number of observations in the data
+    n <- length(self$vaeac_dataset_object)
+
+    # If shuffle, then randomly sample the indices, otherwise we take the them in increasing order
+    indices <- if (self$shuffle) sample.int(n) else seq_len(n)
+
+    # Duplicate each index and return an iterator
+    return(coro::as_iterator(rep(indices, each = 2)))
+  }
+)
+
+
+# Neural Network Utility Functions ====================================================================================
+##  MemoryLayer -------------------------------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a Memory Layer
+#'
+#' @description
+#' The layer is used to make skip-connections inside a [torch::nn_sequential] network
+#' or between several [torch::nn_sequential] networks without unnecessary code complication.
+#'
+#' @details
+#' If `output = FALSE`, this layer stores its input in a static list
+#' `storage` with the key `id`` and then passes the input to the next layer.
+#' I.e., when memory layer is used in the masked encoder.
+#' If `output = TRUE`, this layer takes stored tensor from the storage.
+#' I.e., when memory layer is used in the decoder.
+#' If `add = TRUE`, it returns sum of the stored vector and an `input`,
+#' otherwise it returns their concatenation. If the tensor with specified `id`
+#' is not in storage when the layer with `output = TRUE` is called, it would cause an exception.
+#'
+#' @param id A unique id to use as a key in the storage list.
+#' @param output Boolean variable indicating if the memory layer is to store input in storage or extract from storage.
+#' @param add Boolean variable indicating if the extracted value are to be added or concatenated to the input.
+#' Only applicable when `output = TRUE`.
+#' @param verbose Boolean variable indicating if we want to give printouts to the user.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+#'
+#' @examples
+#' \dontrun{
+#' net1 <- torch::nn_sequential(
+#'   MemoryLayer("#1"),
+#'   MemoryLayer("#0.1"),
+#'   torch::nn_linear(512, 256),
+#'   torch::nn_leaky_relu(),
+#'   # here add cannot be TRUE because the dimensions mismatch
+#'   MemoryLayer("#0.1", output = TRUE, add = FALSE),
+#'   torch::nn_linear(768, 256),
+#'   # the dimension after the concatenation with skip-connection is 512 + 256 = 768
+#' )
+#' net2 <- torch::nn_equential(
+#'   torch::nn_linear(512, 512),
+#'   MemoryLayer("#1", output = TRUE, add = TRUE),
+#'   ...
+#' )
+#' b <- net1(a)
+#' d <- net2(c) # net2 must be called after net1, otherwise tensor '#1' will not be in storage.
+#' }
+MemoryLayer <- torch::nn_module(
+  # field classname Name of the of torch::nn_module object.
+  classname = "MemoryLayer",
+
+  # field shared_env A shared environment for all instances of MemoryLayers.
+  shared_env = new.env(),
+
+  # description Create a new MemoryLayer object.
+  # param id A unique id to use as a key in the storage list.
+  # param output Boolean variable indicating if the memory layer is to store input in storage or extract from storage.
+  # param add Boolean variable indicating if the extracted value are to be added or concatenated to the input.
+  # Only applicable when `output = TRUE`.
+  # param verbose Boolean variable indicating if we want to give printouts to the user.
+  initialize = function(id, output = FALSE, add = FALSE, verbose = FALSE) {
+    self$id <- id
+    self$output <- output
+    self$add <- add
+    self$verbose <- verbose
+  },
+  forward = function(input) {
+    # Check if we are going to insert input into the storage or extract data from the storage.
+    if (!self$output) {
+      if (self$verbose) message(paste0("Inserting data to memory layer `self$id = ", self$id, "`."))
+
+      # Insert the input into the storage list which is in the shared environment of the MemoryLayer class.
+      # Note that we do not check if self$id is unique.
+      self$shared_env$storage[[self$id]] <- input
+      return(input) # Return/send the input to the next layer in the network.
+    } else {
+      # We are to extract data from the storage list.
+      if (self$verbose) {
+        message(paste0(
+          "Extracting data to memory layer `self$id = ", self$id, "`. Using ",
+          " concatination = ", !self$add, "."
+        ))
+      }
+
+      # Check that the memory layer has data is stored in it. If not, then thorw error.
+      if (!self$id %in% names(self$shared_env$storage)) {
+        stop(paste0(
+          "ValueError: Looking for memory layer `self$id = ", self$id, "`, but the only available memory layers are: ",
+          paste(names(self$shared_env$storage), collapse = "`, `"), "`."
+        ))
+      }
+
+      # Extract the stored data for the given memory layer and check if we are to concatenate or add the input
+      stored <- self$shared_env$storage[[self$id]]
+      data <- if (self$add) input + stored else torch::torch_cat(c(input, stored), -1)
+
+      # Return the data
+      return(data)
+    }
+  }
+)
+
+## SkipConnection -----------------------------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a skip connection
+#'
+#' @description
+#' Skip connection over the sequence of layers in the constructor. The module passes
+#' input data sequentially through these layers and then adds original data to the result.
+#'
+#' @param ... network modules such as, e.g., [torch::nn_linear()], [torch::nn_relu()],
+#' and [shapr::MemoryLayer()] objects. See [shapr::vaeac()] for more information.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+SkipConnection <- torch::nn_module(
+  # field classname Name of the of torch::nn_module object.
+  classname = "SkipConnection",
+
+  # description Initialize a new SkipConnection module
+  initialize = function(...) {
+    self$inner_net <- torch::nn_sequential(...)
+  },
+  # description What to do when a SkipConnection module is called
+  forward = function(input) {
+    return(input + self$inner_net(input))
+  }
+)
+
+
+
+
+# Training Utility Functions ==========================================================================================
+#' Extends Incomplete Batches by Sampling Extra Data from Dataloader
+#'
+#' @description If the height of the `batch` is less than `batch_size`, this function extends the `batch` with
+#' data from the [torch::dataloader()] until the `batch` reaches the required size.
+#' Note that `batch` is a tensor.
+#'
+#' @param batch The batch we want to check if has the right size, and if not extend it until it has the right size.
+#' @param dataloader A [torch::dataloader()] object from which we can create an iterator object
+#' and load data to extend the batch.
+#' @param batch_size Integer. The number of samples to include in each batch.
+#'
+#' @return Returns the extended batch with the correct batch_size.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac_extend_batch <- function(batch, dataloader, batch_size) {
+  # Check if the batch contains too few observations and in that case add the missing number of obs from a new batch
+  while (batch$shape[1] < batch_size) { # Use while in case a single extra batch is not enough to get to `batch_size`
+    batch_extra <- dataloader$.iter()$.next()
+    batch <- torch::torch_cat(c(batch, batch_extra[seq(min(nrow(batch_extra), batch_size - batch$shape[1])), ]), 1)
+  }
+
+  # The returned batch is gauranteed to contain `batch_size` observations
+  return(batch)
+}
+
+
+#' Compute the Importance Sampling Estimator (Validation Error)
+#'
+#' @description Compute the Importance Sampling Estimator which the vaeac model
+#' uses to evaluate its performance on the validation data.
+#'
+#' @details Compute mean IWAE log likelihood estimation of the validation set.
+#' Takes validation data loader, mask generator, batch size, vaeac_model (vaeac)
+#' and number of IWAE latent samples per object.Returns one the estimation (float).
+#' IWAE is an abbreviation for Importance Sampling Estimator
+#' \deqn{\log p_{\theta, \psi}(x|y) \approx
+#' \log {\frac{1}{S}\sum_{i=1}^S p_\theta(x|z_i, y) p_\psi(z_i|y) \big/ q_\phi(z_i|x,y),}}
+#' where \eqn{z_i \sim q_\phi(z|x,y)}.
+#' For more details, see \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+#'
+#' @param val_dataloader A torch dataloader which loads the validation data.
+#' @param mask_generator A mask generator object that generates the masks.
+#' @param batch_size Integer. The number of samples to include in each batch.
+#' @param vaeac_model The vaeac model.
+#' @param val_iwae_n_samples Number of samples to generate for computing the IWAE for each validation sample.
+#'
+#' @return The average iwae over all instances in the validation dataset.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac_get_val_iwae <- function(val_dataloader,
+                               mask_generator,
+                               batch_size,
+                               vaeac_model,
+                               val_iwae_n_samples) {
+  # Set variables to store the number of instances evaluated and avg_iwae
+  cum_size <- 0
+  avg_iwae <- 0
+
+  # Iterate over all the batches in the validation set
+  coro::loop(for (batch in val_dataloader) {
+    # Get the number of instances in the current batch
+    init_size <- batch$shape[1]
+
+    # Extend the with observations from `val_dataloader` to ensure that batch contains `batch_size` observations
+    batch <- vaeac_extend_batch(batch = batch, dataloader = val_dataloader, batch_size = batch_size)
+
+    # Create the mask for the current batch. Mask consists of zeros (observed) and ones (missing or masked)
+    mask <- mask_generator(batch = batch)
+
+    # If the vaeac_model$parameters are located on a GPU, then we send batch and mask to the GPU too
+    if (vaeac_model$parameters[[1]]$is_cuda) {
+      batch <- batch$cuda()
+      mask <- mask$cuda()
+    }
+
+    # Use torch::with_no_grad() since we are evaluation, and do not need the gradients to do backpropagation
+    torch::with_no_grad({
+      # Get the iwae for the first `init_size` observations in the batch. The other obs are just "padding".
+      iwae <- vaeac_model$batch_iwae(batch, mask, val_iwae_n_samples)[1:init_size, drop = FALSE]
+
+      # Update the average iwae over all batches (over all instances). This is called recursive/online updating of
+      # the mean. Takes the old average * cum_size to get old sum of iwae and adds the sum of newly computed iwae.
+      # Then divide the total iwae by the number of instances: cum_size + iwae.shape[0]
+      avg_iwae <- (avg_iwae * (cum_size / (cum_size + iwae$shape[1])) + iwae$sum() / (cum_size + iwae$shape[1]))
+
+      # Update the number of instances evaluated
+      cum_size <- cum_size + iwae$shape[1]
+    }) # End with_no_grad
+  }) # End iterating over the validation samples
+
+  # return the average iwae over all instances in the validation set.
+  return(avg_iwae$to(dtype = torch::torch_float()))
+}
+
+
+# Probability Utility Functions =======================================================================================
+#' Creates Normal Distributions
+#'
+#' @description Function that takes in the a tensor where the first half of the columns contains the means of the
+#' normal distributions, while the latter half of the columns contains the standard deviations. The standard deviations
+#' are clamped with `min_sigma` to ensure stable results. If `params` is of dimensions batch_size x 8, the function
+#' will create 4 independent normal distributions for each of the observation (`batch_size` observations in total).
+#'
+#' @details Take a Tensor (e.g. neural network output) and return a [torch::distr_normal()] distribution.
+#' This normal distribution is component-wise independent, and its dimensionality depends on the input shape.
+#' First half of channels is mean (\eqn{\mu}) of the distribution, the softplus of the second half is
+#' std (\eqn{\sigma}), so there is no restrictions on the input tensor. `min_sigma` is the minimal value of
+#' \eqn{\sigma}. I.e., if the above softplus is less than `min_sigma`, then \eqn{\sigma} is clipped
+#' from below with value `min_sigma`. This regularization is required for the numerical stability and may
+#' be considered as a neural network architecture choice without any change to the probabilistic model.
+#'
+#' @param params Tensor containing the parameters for the normal distributions.
+#' @param min_sigma The minimal variance allowed.
+#'
+#' @return [torch::distr_normal()] distributions with the provided means and standard deviations.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac_normal_parse_params <- function(params, min_sigma = 0.001) {
+  # Get the number of instances
+  n <- params$shape[1]
+
+  # Then get the dimension of the parameters
+  d <- params$shape[2]
+
+  # Use double dash to get integer. Do not need it as we by construction always have 2*n_dim_latent_space
+  mu <- params[, 1:(d %/% 2)] # Get the first halves which are the means
+
+  # Get the second half which are transformed sigmas
+  sigma_params <- params[, (d %/% 2 + 1):d]
+  sigma <- torch::nnf_softplus(sigma_params) # ln(1 + exp(sigma_params))
+  sigma <- sigma$clamp(min = min_sigma) # Make sure that sigma >= min_sigma
+
+  # Create the normal dist. Multivariate, but with independent dimensions. Correlation = 0. So just Normal
+  distr <- torch::distr_normal(loc = mu, scale = sigma)
+
+  # Return the distribution
+  return(distr)
+}
+
+#' Creates Categorical Distributions
+#'
+#' @description Function that takes in a tensor containing the logits for each of the K classes. Each row corresponds to
+#' an observations. Send each row through the softmax function to convert from logits to probabilities that sum 1 one.
+#' The function also clamps the probabilities between a minimum and maximum probability. Note that we still normalize
+#' them afterward, so the final probabilities can be marginally below or above the thresholds.
+#'
+#' @details Take a Tensor (e. g. a part of neural network output) and return [torch::distr_categorical()]
+#' distribution. The input tensor after applying softmax over the last axis contains a batch of the categorical
+#' probabilities. So there are no restrictions on the input tensor. Technically, this function treats the last axis as
+#' the categorical probabilities, but Categorical takes only 2D input where the first axis is the batch axis and the
+#' second one corresponds to the probabilities, so practically the function requires 2D input with the batch of
+#' probabilities for one categorical feature. `min_prob` is the minimal probability for each class.
+#' After clipping the probabilities from below and above they are renormalized in order to be a valid distribution.
+#' This regularization is required for the numerical stability and may be considered as a neural network architecture
+#' choice without any change to the probabilistic model.Note that the softmax function is given by
+#' \eqn{\operatorname{Softmax}(x_i) = (\exp(x_i))/(\sum_{j} \exp(x_j))}, where \eqn{x_i} are the logits and can
+#' take on any value, negative and positive. The output \eqn{\operatorname{Softmax}(x_i) \in [0,1]}
+#' and \eqn{\sum_{j} Softmax(x_i) = 1}.
+#'
+#' @param params Tensor of `dimension batch_size` x `K` containing the logits for each
+#' of the `K` classes and `batch_size` observations.
+#' @param min_prob For stability it might be desirable that the minimal probability is not exactly zero.
+#' @param max_prob For stability it might be desirable that the maximal probability is not exactly zero.
+#'
+#' @return torch::distr_categorical distributions with the provided probabilities for each class.
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac_categorical_parse_params <- function(params, min_prob = 0, max_prob = 1) {
+  # One option here is to directly use that 'dist_categorical' supports logits. I.e., we could have used
+  # `distr = torch::distr_categorical(logits = params)` and then been done. However, we would then not be able
+  # to clamp the probabilities. In test, this is 30% faster and min prob is seldom reached, and we get the same values.
+
+  # Send the parameters through the softmax to get normalized probabilities
+  params <- torch::nnf_softmax(params, dim = -1)
+
+  # Ensure that the probabilities are between the minimum and maximum allowed probabilities and that they sum to one
+  params <- torch::torch_clamp(params, min = min_prob, max = max_prob)
+  params <- params / torch::torch_sum(params, dim = -1, keepdim = TRUE)
+
+  # Create a categorical dist with len(params) number of levels where the prob for each level is given by params.
+  distr <- torch::distr_categorical(probs = params)
+
+  return(distr)
+}
+
+#' Compute the KL Divergence Between Two Gaussian Distributions.
+#'
+#' @description Computes the KL divergence between univariate normal distributions using the analytical formula,
+#' see \url{https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Multivariate_normal_distributions}.
+#'
+#' @param p A [torch::distr_normal()] object.
+#' @param q A [torch::distr_normal()] object.
+#'
+#' @return The KL divergence between the two Gaussian distributions.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+vaeac_kl_normal_normal <- function(p, q) {
+  var_ratio <- (p$scale / q$scale)$pow(2)
+  t1 <- ((p$loc - q$loc) / q$scale)$pow(2)
+  return(0.5 * (var_ratio + t1 - 1 - var_ratio$log()))
+}
+
+# Neural Network Modules ===============================================================================================
+## GaussCatSamplerMostLikely -------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a GaussCatSamplerMostLikely
+#'
+#' @description
+#' The GaussCatSamplerMostLikely generates the most likely samples from
+#' the generative distribution defined by the output of the vaeac.
+#' I.e., the layer will return the mean and most probable class for the Gaussian (continuous features)
+#' and categorical (categorical features) distributions, respectively.
+#'
+#' @param one_hot_max_sizes A vector of integers where the i-th entry is the number of one-hot encoding
+#' for the i-th feature. I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+#' A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.
+#' @param min_sigma For stability it might be desirable that the minimal sigma is not too close to zero.
+#' @param min_prob For stability it might be desirable that the minimal probability is not too close to zero.
+#'
+#' @return A `GaussCatSamplerMostLikely` object.
+#'
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+GaussCatSamplerMostLikely <- torch::nn_module(
+
+  # field classname Type of torch::nn_module
+  classname = "GaussCatSamplerMostLikely",
+
+  # description Initialize a GaussCatSamplerMostLikely which generates the most likely
+  # sample from the generative distribution defined by the output of the neural network.
+  initialize = function(one_hot_max_sizes, min_sigma = 1e-4, min_prob = 1e-4) {
+    self$one_hot_max_sizes <- one_hot_max_sizes
+    self$min_sigma <- min_sigma
+    self$min_prob <- min_prob
+  },
+
+  # param dist_params A matrix of form batch_size x (mu_1, sigma_1, ..., mu_p, sigma_p),
+  # when only considering continuous features.
+  # For categorical features, we do NOT have mu and sigma for the decoder at the end of the vaeac,
+  # but rather logits for the categorical distribution.
+  # return A tensor containing the generated data.
+  forward = function(distr_params) {
+    # A counter to keep track of which
+    cur_distr_col <- 1
+
+    # List to store all the samples sampled from the
+    # normal distribution with parameters from distr_params.
+    sample <- list()
+
+    # Iterate over the features
+    for (i in seq_along(self$one_hot_max_sizes)) {
+      size <- self$one_hot_max_sizes[i]
+
+      if (size <= 1) {
+        # Continuous
+        # Gaussian distribution
+        # Get the mu and sigma for the current feature, for each instance
+        params <- distr_params[, cur_distr_col:(cur_distr_col + 1)]
+        cur_distr_col <- cur_distr_col + 2
+
+        # generative model distribution for the feature
+        # so create batch_size number of normal distributions.
+        distr <- vaeac_normal_parse_params(params, self$min_sigma)
+
+        # We sample the mean (most likely value)
+        col_sample <- distr$mean
+      } else {
+        # Categorical distribution
+
+        # Extract the logits of the different classes for the ith categorical variable
+        params <- distr_params[, cur_distr_col:(cur_distr_col + size - 1)]
+        cur_distr_col <- cur_distr_col + size
+
+        # Generate the categorical distribution based on the logits, which are
+        # transformed and clamped in the 'vaeac_categorical_parse_params' function.
+        # distr is a "torch::distr_categorical" distribution.
+        distr <- vaeac_categorical_parse_params(params, self$min_prob)
+
+        # Return the class with highest probability
+        # By doing [, NULL], we add an extra dimension such that the tensor is a column vector.
+        col_sample <- torch::torch_max(distr$probs, -1)[[2]][, NULL]$to(dtype = torch::torch_float())
+      }
+
+      # Add the vector of sampled values for the i´th
+      # feature to the sample list.
+      sample <- append(sample, col_sample)
+    }
+
+    # Create a matrix by column binding the vectors in the list
+    return(torch::torch_cat(sample, -1))
+  }
+)
+
+## GaussCatSamplerRandom -----------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a GaussCatSamplerRandom
+#'
+#' @description
+#' The GaussCatSamplerRandom generates random samples from the generative
+#' distribution defined by the output of the vaeac. The random sample is generated by
+#' sampling from the inferred Gaussian and categorical distributions for the
+#' continuous and categorical features, respectively.
+#'
+#' @param one_hot_max_sizes A vector of integers where the i-th entry is the number of one-hot encoding
+#' for the i-th feature. I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+#' A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.
+#' @param min_sigma For stability it might be desirable that the minimal sigma is not too close to zero.
+#' @param min_prob For stability it might be desirable that the minimal probability is not too close to zero.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+GaussCatSamplerRandom <- torch::nn_module(
+
+  # field classname Type of torch::nn_module
+  classname = "GaussCatSamplerRandom",
+
+  # description
+  # Initialize a GaussCatSamplerRandom which generates a sample from the
+  # generative distribution defined by the output of the neural network by random sampling.
+  # return A new `GaussCatSamplerRandom` object.
+  initialize = function(one_hot_max_sizes,
+                        min_sigma = 1e-4,
+                        min_prob = 1e-4) {
+    self$one_hot_max_sizes <- one_hot_max_sizes
+    self$min_sigma <- min_sigma
+    self$min_prob <- min_prob
+  },
+
+  # param dist_params A matrix of form batch_size x (mu_1, sigma_1, ..., mu_p, sigma_p),
+  # when only considering continuous features.
+  # For categorical features, we do NOT have mu and sigma for the decoder at the end of the vaeac,
+  # but rather logits for the categorical distribution.
+  # return A tensor containing the generated data.
+  forward = function(distr_params) {
+    # A counter to keep track of which
+    cur_distr_col <- 1
+
+    # List to store all the samples sampled from the
+    # normal distribution with parameters from distr_params.
+    sample <- list()
+
+    # Iterate over the features
+    for (i in seq_along(self$one_hot_max_sizes)) {
+      size <- self$one_hot_max_sizes[i]
+
+      if (size <= 1) {
+        # Continuous. Gaussian distribution. Get the mu and sigma for the current feature, for each instance
+        params <- distr_params[, cur_distr_col:(cur_distr_col + 1)]
+        cur_distr_col <- cur_distr_col + 2
+
+        # generative model distribution for the feature, i.e., create batch_size number of normal distributions.
+        distr <- vaeac_normal_parse_params(params, self$min_sigma)
+
+        # Sample from the inferred Gaussian distributions
+        col_sample <- distr$sample()
+      } else {
+        # Categorical distribution
+
+        # Extract the logits of the different classes for the ith categorical variable
+        params <- distr_params[, cur_distr_col:(cur_distr_col + size - 1)]
+        cur_distr_col <- cur_distr_col + size
+
+        # Generate the categorical distribution based on the logits, which are
+        # transformed and clamped in the 'vaeac_categorical_parse_params' function.
+        # distr is a "torch::distr_categorical" distribution.
+        distr <- vaeac_categorical_parse_params(params, self$min_prob)
+
+        # Sample a class from the distribution based on each class' probabilities.
+        # By doing [, NULL], we add an extra dimension such that the tensor is a column vector.
+        # Here we can use $sample() as it respects manual set seeds.
+        col_sample <- distr$sample()[, NULL]$to(dtype = torch::torch_float())
+      }
+
+      # Add the vector of sampled values for the i´th
+      # feature to the sample list.
+      sample <- append(sample, col_sample)
+    }
+
+    # Create a matrix by column binding the vectors in the list
+    return(torch::torch_cat(sample, -1))
+  }
+)
+
+
+## GaussCatParameters --------------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a GaussCatParameters
+#'
+#' @description
+#' The GaussCatParameters module extracts the parameters
+#' from the inferred generative Gaussian and categorical distributions for the
+#' continuous and categorical features, respectively.
+#'
+#' If `one_hot_max_sizes` is \eqn{[4, 1, 1, 2]}, then the inferred distribution parameters for one observation is the
+#' vector \eqn{[p_{00}, p_{01}, p_{02}, p_{03}, \mu_1, \sigma_1, \mu_2, \sigma_2, p_{30}, p_{31}]}, where
+#' \eqn{\operatorname{Softmax}([p_{00}, p_{01}, p_{02}, p_{03}])} and \eqn{\operatorname{Softmax}([p_{30}, p_{31}])}
+#' are probabilities of the first and the fourth feature categories respectively in the model generative distribution,
+#' and Gaussian(\eqn{\mu_1, \sigma_1^2}) and Gaussian(\eqn{\mu_2, \sigma_2^2}) are the model generative distributions
+#' on the second and the third features.
+#'
+#' @param one_hot_max_sizes A vector of integers where the i-th entry is the number of
+#' one-hot encoding for the i-th feature.
+#' I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+#' A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.
+#' @param min_sigma For stability it might be desirable that the minimal sigma is not too close to zero.
+#' @param min_prob For stability it might be desirable that the minimal probability is not too close to zero.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+GaussCatParameters <- torch::nn_module(
+  # field classname Type of torch::nn_module
+  classname = "GaussCatParameters",
+
+  # description
+  # Initialize a `GaussCatParameters` which extract the parameters from the
+  # generative distribution defined by the output of the neural network.
+  # return A new `GaussCatParameters` object.
+  initialize = function(one_hot_max_sizes,
+                        min_sigma = 1e-4,
+                        min_prob = 1e-4) {
+    self$one_hot_max_sizes <- one_hot_max_sizes
+    self$min_sigma <- min_sigma
+    self$min_prob <- min_prob
+  },
+
+  # param dist_params A matrix of form batch_size x (mu_1, sigma_1, ..., mu_p, sigma_p), when only
+  # considering continuous features. For categorical features, we do NOT have mu and sigma for the
+  # decoder at the end of the vaeac, but rather logits for the categorical distribution.
+  # return A tensor containing the final parameters of the generative distributions (after transformations).
+  forward = function(distr_params) {
+    # A counter to keep track of which
+    cur_distr_col <- 1
+
+    # List to store all the generative parameters from the normal and categorical distributions
+    parameters <- list()
+
+    # Iterate over the features
+    for (i in seq_along(self$one_hot_max_sizes)) {
+      size <- self$one_hot_max_sizes[i]
+
+      if (size <= 1) {
+        # Continuous. Gaussian distribution. Get the mu and sigma for the current feature, for each instance.
+        params <- distr_params[, cur_distr_col:(cur_distr_col + 1)]
+        cur_distr_col <- cur_distr_col + 2
+
+        # generative model distribution for the feature, i.e., create batch_size number of normal distributions.
+        distr <- vaeac_normal_parse_params(params, self$min_sigma)
+
+        # Combine the current parameters
+        current_parameters <- torch::torch_cat(c(distr$mean, distr$scale), -1)
+      } else {
+        # Categorical distribution
+
+        # Extract the logits of the different classes for the ith categorical variable
+        params <- distr_params[, cur_distr_col:(cur_distr_col + size - 1)]
+        cur_distr_col <- cur_distr_col + size
+
+        # Generate the categorical distribution based on the logits, which are
+        # transformed and clamped in the 'vaeac_categorical_parse_params' function.
+        # distr is a "torch::distr_categorical" distribution.
+        distr <- vaeac_categorical_parse_params(params, self$min_prob)
+
+        # Extract the current probabailities for each classs
+        current_parameters <- distr$probs
+      }
+
+      # Add the tensor of current parameters for the i´th feature to the parameters list
+      parameters <- append(parameters, current_parameters)
+    }
+
+    # Create a torch_tensor by column binding the tensors in the list
+    return(torch::torch_cat(parameters, -1))
+  }
+)
+
+## GaussCatLoss --------------------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a GaussCatLoss
+#'
+#' @description
+#' The GaussCatLoss module/layer computes the log probability
+#' of the `groundtruth` for each object given the mask and the distribution parameters.
+#' That is, the log-likelihoods of the true/full training observations based on the
+#' generative distributions parameters `distr_params` inferred by the masked versions of the observations.
+#'
+#' @details
+#' Note that the module works with mixed data represented as 2-dimensional inputs and it
+#' works correctly with missing values in `groundtruth` as long as they are represented by NaNs.
+#'
+#' @param one_hot_max_sizes A vector of integers where the i-th entry is the number of
+#' one-hot encoding for the i-th feature.
+#' I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+#' A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.
+#' @param min_sigma For stability it might be desirable that the minimal sigma is not too close to zero.
+#' @param min_prob For stability it might be desirable that the minimal probability is not too close to zero.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+GaussCatLoss <- torch::nn_module(
+
+  # field classname Type of torch::nn_module
+  classname = "GaussCatLoss",
+
+  # description Initialize a `GaussCatLoss`.
+  # return A new `GaussCatLoss` object.
+  initialize = function(one_hot_max_sizes, min_sigma = 1e-4, min_prob = 1e-4) {
+    self$one_hot_max_sizes <- one_hot_max_sizes
+    self$min_sigma <- min_sigma
+    self$min_prob <- min_prob
+  },
+  forward = function(groundtruth, distr_params, mask) {
+    # Which column in distr_params we now consider.
+    # Either increases with 2 in cont case (mu and sigma)
+    # or in increases with one-hot encoding size in cat case.
+    cur_distr_col <- 1
+
+    # List to store the log probabilities.
+    log_prob <- list()
+
+    # Iterate over the features
+    for (i in seq_along(self$one_hot_max_sizes)) {
+      size <- self$one_hot_max_sizes[i]
+
+      if (size <= 1) {
+        # Continuous feature
+        # Gaussian distribution
+
+        # select groundtruth, mask and distr_params for i-th feature
+        groundtruth_col <- groundtruth[, i, drop = FALSE] # Look at the ith column of the truth
+        mask_col <- mask[, i, drop = FALSE] # Get the ith column of the mask
+
+        # These are the mean and sigma for the ith feature,
+        # so dimensions batch_size x 2
+        params <- distr_params[, cur_distr_col:(cur_distr_col + 1), drop = FALSE]
+        cur_distr_col <- cur_distr_col + 2
+
+        # generative model distribution for the feature
+        distr <- vaeac_normal_parse_params(params, self$min_sigma)
+        # distr$mean
+        # distr$scale
+        # log(1 + exp(params[,2]))
+
+        # copy ground truth column, so that zeroing nans will not affect the original data
+        gt_col_nansafe <- groundtruth_col$clone()$detach()
+
+        # If groundtruth don't have any nans then this line does not change anything. Set 'NaN's to zero
+        nan_mask <- torch::torch_isnan(groundtruth_col)
+        gt_col_nansafe[nan_mask] <- 0
+
+        # Mask_col masks both the nan/missing values and the artificially masked values. We want to compute the the log
+        # prob only over the artificially missing features, so we omit the missing values. We remove the masking of the
+        # missing values. So those ones in mask_col which are there due to missing values are now turned in to zeros.
+        mask_col <- mask_col * (torch::torch_logical_not(nan_mask))$to(dtype = torch::torch_float())
+
+        # Get the log-likelihood, but only of the masked values i.e., the ones hat are masked by the masking scheme
+        # MCARGenerator. This one is batch_size x 1 and is the log-lik of observing the ground truth given the current
+        # parameters, for only the artificially masked features.
+        col_log_prob <- distr$log_prob(gt_col_nansafe) * mask_col
+      } else {
+        # Categorical feature and categorical distribution
+
+        # Extract the ground truth and mask
+        groundtruth_col <- groundtruth[, i, drop = FALSE] # Look at the ith column of the truth
+        mask_col <- mask[, i, drop = FALSE] # Get the ith column of the mask
+
+        # Extract the probabilities for each of the K-classes for the ith feature. The dimension is batch_size x size.
+        params <- distr_params[, cur_distr_col:(cur_distr_col + size - 1), drop = FALSE]
+        cur_distr_col <- cur_distr_col + size
+
+        # Create a categorical distrbution based on the extracted parameters. Returns a "torch::distr_categorical"
+        # distribution. Ensures that the probabbility for each class is at least self$min_prob.
+        distr <- vaeac_categorical_parse_params(params, self$min_prob)
+
+        # copy ground truth column, so that zeroing nans will not affect the original data
+        gt_col_nansafe <- groundtruth_col$clone()$detach()
+
+        # If groundtruth don't have any nans then this line does not change anything
+        nan_mask <- torch::torch_isnan(groundtruth_col)
+        gt_col_nansafe[nan_mask] <- 0
+
+        # Compute the mask of the values which we consider in the log probability. #e remove the masking of the missing
+        # values. So the ones in mask_col which are there due to missing values are now turned in to zeros.
+        mask_col <- mask_col * (torch::torch_logical_not(nan_mask))$to(dtype = torch::torch_float())
+        col_log_prob <- distr$log_prob(gt_col_nansafe$squeeze())[, NULL] * mask_col
+      }
+
+      # Append the column of log probabilities for the i-th feature for those instances that are masked into log_prob.
+      # log_prob is now a list of length n_features, where each element is a tensor batch_size x 1 containing the
+      # log-lik of the parameters of masked values.
+      log_prob <- append(log_prob, col_log_prob)
+    }
+
+    # Concatenate the list into tensor of dim batch x features. Then sum along the the rows.
+    # That is, for each observation in the batch to get a tensor of length batch size.
+    return(torch::torch_cat(log_prob, 2)$sum(-1))
+  }
+)
+
+
+## CategoricalToOneHotLayer -------------------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a CategoricalToOneHotLayer
+#'
+#' @description
+#' The CategoricalToOneHotLayer module/layer expands categorical features into one-hot vectors,
+#' because multi-layer perceptrons are known to work better with this data representation.
+#' It also replaces NaNs with zeros in order so that further layers may work correctly.
+#'
+#' @param one_hot_max_sizes A vector of integers where the i-th entry is the number of
+#' one-hot encoding for the i-th feature.
+#' I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+#' A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.
+#' @param add_nans_map_for_columns Optional list which contains indices of columns which
+#' is_nan masks are to be appended to the result tensor. This option is necessary for the full
+#' encoder to distinguish whether value is to be reconstructed or not.
+#'
+#' @details
+#' Note that the module works with mixed data represented as 2-dimensional inputs and it
+#' works correctly with missing values in `groundtruth` as long as they are repsented by NaNs.
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+CategoricalToOneHotLayer <- torch::nn_module(
+
+  # field classname Type of torch::nn_module
+  classname = "CategoricalToOneHotLayer",
+  initialize = function(one_hot_max_sizes, add_nans_map_for_columns = NULL) {
+    # Here one_hot_max_sizes includes zeros at the end of the list
+    # one_hot_max_sizes + [0] * len(one_hot_max_sizes)
+    # So if we have that features have this many categories [1, 2, 3, 1],
+    # then we get that one_hot_max_sizes = [1, 2, 3, 1, 0, 0, 0, 0]
+    self$one_hot_max_sizes <- one_hot_max_sizes
+
+    # Is always an empty column for the Masked Encoder network
+    # while it is a list [0, 1, ..., length(one_hot_max_sizes)-1)
+    # for the Full Encoder network.
+    # So for the Full Encoder  network we apply the nan masks to each column/feature
+    self$add_nans_map_for_columns <- add_nans_map_for_columns
+  },
+  forward = function(input) {
+    # input = torch::torch_cat(c(batch, mask), -1)
+    # Input is torch::torch_cat(c(batch, mask), -1), so a matrix of
+    # dimension batch_size x 2*sum(one_hot_max_sizes)
+    # At least for continuous data where one_hot_max_sizes
+    # only consists of ones. Recall that ONE_HOT_MAX_SIZES
+    # are padded with zeros at the end in this function.
+
+    # Get the number of instances in the input batch.
+    n <- input$shape[1]
+
+    # variable to store the out columns, i.e., the input columns / one hot encoding + is nan.mask.
+    out_cols <- NULL
+
+    # We iterate over the features and get the number
+    # of categories for each feature.
+    # so i goes from 0 to 2*n_features-1
+    # For i in [n_features, 2*n_features-1] will have size <= 1,
+    # even for categorical features.
+    i <- 1
+    for (i in seq_along(self$one_hot_max_sizes)) {
+      size <- self$one_hot_max_sizes[i]
+
+      # Distinguish between continuous and categorical features
+      if (size <= 1) {
+        # If size <= 1, then the feature is continuous
+        # just copy it and replace NaNs with zeros
+        # OR, the last half of self.one_hot_max_sizes
+
+        # Take the ith column of the input
+        # NOTE THAT THIS IS NOT A DEEP COPY, so changing out_col changes input
+        out_col <- input[, i:i] # maybe add '$clone()$detach()'?
+
+        # check if any of the values are nan, i.e., missing
+        nan_mask <- torch::torch_isnan(out_col)
+
+        # set all the missing values to 0.
+        # THIS CHANGES THE INPUT VARIABLE.
+        out_col[nan_mask] <- 0
+      } else {
+        # Categorical feature
+
+        # Get the categories for each instance for the ith feature
+        # start to count at zero. So if we have 2 cat, then this
+        # vector will contains zeros and ones.
+        cat_idx <- input[, i:i] # $clone()$detach()
+
+        # Check if any of the categories are nan / missing
+        nan_mask <- torch::torch_isnan(cat_idx)
+
+        # Set the nan values to 0
+        cat_idx[nan_mask] <- 0
+
+        # Create a matrix, where the jth row is the one-hot encoding of the ith feature of the jth instance.
+        out_col <- matrix(0, nrow = n, ncol = size)
+        out_col[cbind(seq(n), as.matrix(cat_idx))] <- 1
+        out_col <- torch::torch_tensor(out_col, device = input$device)
+      }
+
+      # append this feature column to the result
+      # out_col is n x size = batch_size x n_categories_for_this_feature
+      out_cols <- torch::torch_cat(c(out_cols, out_col), dim = -1)
+
+      # if necessary, append isnan mask of this feature to the result
+      # which we always do for the proposal network.
+      # This only happens for the first half of the i's,
+      # so for i = 1, ..., n_features.
+      if (i %in% self$add_nans_map_for_columns) {
+        # so we add the columns of nan_mask
+        out_cols <- torch::torch_cat(c(out_cols, nan_mask$to(dtype = torch::torch_float())), dim = -1)
+      }
+    }
+
+    # ONLY FOR CONTINUOUS FEATURES.
+    # out_cols now is a list of n_features tensors of shape n x size
+    # = n x 1 for continuous variables. So we concatenate them
+    # to get a matrix of dim n x 2*n_features (in cont case) for
+    # prior net, but for proposal net, it is n x 3*n_features
+    # They take the form  [batch1, is.nan1, batch2, is.nan2, …,
+    # batch12, is.nan12, mask1, mask2, …, mask12]
+    return(out_cols)
+  }
+)
+
+
+
+
+# Mask Generators =====================================================================================================
+## MCAR_mask_generator ------------------------------------------------------------------------------------------------
+
+#' Missing Completely at Random (MCAR) Mask Generator
+#'
+#' @description
+#' A mask generator which masks the entries in the input completely at random.
+#'
+#' @details
+#' The mask generator mask each element in the `batch` (N x p) using a component-wise independent Bernoulli
+#' distribution with probability `masking_ratio`. Default values for `masking_ratio` is 0.5, so all
+#' masks are equally likely to be generated, including the empty and full masks.
+#' The function returns a mask of the same shape as the input `batch`, and the `batch` can contain
+#' missing values, indicated by the "NaN" token, which will always be masked.
+#'
+#' @param masking_ratio Numeric between 0 and 1. The probability for an entry in the generated mask to be 1 (masked).
+#' @param paired_sampling Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+#' If `TRUE`, then `batch` must be sampled using [shapr::paired_sampler()] which ensures that the `batch` contains
+#' two instances for each original observation. That is, `batch` \eqn{= [X_1, X_1, X_2, X_2, X_3, X_3, ...]}, where
+#' each entry \eqn{X_j} is a row of dimension \eqn{p} (i.e., the number of features).
+#'
+#' @section Shape:
+#' - Input: \eqn{(N, p)} where N is the number of observations in the `batch` and \eqn{p} is the number of features.
+#' - Output: \eqn{(N, p)}, same shape as the input
+#'
+#' @examples
+#' \dontrun{
+#' mask_gen <- MCAR_mask_generator(masking_ratio = 0.5, paired_sampling = FALSE)
+#' batch <- torch::torch_randn(c(5, 3))
+#' mask_gen(batch)
+#' }
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+MCAR_mask_generator <- torch::nn_module(
+
+  # field name Type of mask generator
+  name = "MCAR_mask_generator",
+
+  # description
+  # Initialize a missing completely at random mask generator.
+  # param masking_ratio The probability for an entry in the generated mask to be 1 (masked).
+  # param paired_sampling Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+  # If TRUE, then batch must be sampled using `paired_sampler` which creates batches where
+  # the first half and second half of the rows are duplicates of each other. That is,
+  # batch = [row1, row1, row2, row2, row3, row3, ...].
+  # return A new `MCAR_mask_generator` object.
+  initialize = function(masking_ratio = 0.5, paired_sampling = FALSE) {
+    self$masking_ratio <- masking_ratio
+    self$paired_sampling <- paired_sampling
+  },
+
+  # description
+  # Generates a MCAR mask by calling self$MCAR_mask_generator_function function.
+  # param batch Matrix/Tensor. Only used to get the dimensions and to check if any of the
+  # entries are missing. If any are missing, then the returned mask will ensure that
+  # these missing entries are masked.
+  forward = function(batch) {
+    self$MCAR_mask_generator_function(batch, prob = self$masking_ratio, paired_sampling = self$paired_sampling)
+  },
+
+  # description Missing Completely At Random Mask Generator: A mask generator where the masking
+  # is determined by component-wise independent Bernoulli distribution.
+  #
+  # details
+  # Function that takes in a batch of observations and the probability
+  # of masking each element based on a component-wise independent Bernoulli
+  # distribution. Default value is 0.5, so all masks are equally likely to be trained.
+  # Function returns the mask of same shape as batch.
+  # Note that the batch can contain missing values, indicated by the "NaN" token.
+  # The mask will always mask missing values.
+  #
+  # param batch Matrix/Tensor. Only used to get the dimensions and to check if any of the
+  # entries are missing. If any are missing, then the returned mask will ensure that
+  # these missing entries are masked.
+  # param prob Numeric between 0 and 1. The probability that an entry will be masked.
+  # param seed Integer. Used to set the seed for the sampling process such that we
+  # can reproduce the same masks.
+  # param paired_sampling Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+  # If TRUE, then batch must be sampled using 'paired_sampler' which creates batches where
+  # the first half and second half of the rows are duplicates of each other. That is,
+  # batch = [row1, row1, row2, row2, row3, row3, ...].
+  #
+  # examples
+  # MCAR_mask_generator_function(torch::torch_rand(c(5, 3)))
+  #
+  # return A binary matrix of the same size as 'batch'. An entry of '1' indicates that the
+  # observed feature value will be masked. '0' means that the entry is NOT masked,
+  # i.e., the feature value will be observed/given/available.
+  MCAR_mask_generator_function = function(batch, prob = 0.5, seed = NULL, paired_sampling = FALSE) {
+    # If the user specify a seed for reproducibility
+    if (!is.null(seed)) set.seed(seed)
+
+    # Get the number of entries in the batch.
+    size <- prod(batch$shape)
+
+    # If doing paired sampling, divide size by two as we later concatenate with the inverse mask.
+    if (paired_sampling) size <- size / 2
+
+    # Check for missing values in the batch
+    nan_mask <- batch$isnan()$to(torch::torch_float())
+
+    # # Torch version, but marginally slower than r version when batch_size <= 128 and n_features <= 50
+    # mask = torch::torch_bernoulli(torch::torch_full_like(batch, prob))
+    # Create the Bernoulli mask where an element is masked (1) with probability 'prob'.
+    mask <- torch::torch_tensor(
+      matrix(sample(c(0, 1), size = size, replace = TRUE, prob = c(prob, 1 - prob)), ncol = ncol(batch)),
+      dtype = torch::torch_float()
+    )
+
+    # If paired sampling, then concatenate the inverse mask and reorder to ensure correct order [m1, !m1, m2, !m2, ...].
+    if (paired_sampling) {
+      mask <- torch::torch_cat(c(mask, !mask), 1L)[c(matrix(seq_len(nrow(batch)), nrow = 2, byrow = TRUE)), ]
+    }
+
+    # Mask all entries that are missing or artificially masked by the Bernoulli mask. 1 means that the entry is masked.
+    return(mask + nan_mask >= 1)
+  }
+)
+
+
+## Specified_prob_mask_generator -------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a Specified_prob_mask_generator
+#'
+#' @description A mask generator which masks the entries based on specified probabilities.
+#'
+#' @details
+#' A class that takes in the probabilities of having d masked observations.  I.e., for M dimensional data,
+#' masking_probs is of length M+1, where the d'th entry is the probability of having d-1 masked values.
+#'
+#' A mask generator that first samples the number of entries 'd' to be masked in
+#' the 'M'-dimensional observation 'x' in the batch based on the given M+1 probabilities. The
+#' 'd' masked are uniformly sampled from the 'M' possible feature indices. The d'th entry
+#' of the probability of having d-1 masked values.
+#'
+#' Note that MCAR_mask_generator with p = 0.5 is the same as using [shapr::Specified_prob_mask_generator()]
+#' with `masking_ratio` = choose(M, 0:M), where M is the number of features. This function was initially
+#' created to check if increasing the probability of having a masks with many masked features improved
+#' vaeac's performance by focusing more on these situations during training.
+#'
+#' @param masking_probs An M+1 numerics containing the probabilities masking 'd' of the (0,...M) entries
+#' for each observation.
+#' @param paired_sampling Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+#' If TRUE, then batch must be sampled using 'paired_sampler' which creates batches where
+#' the first half and second half of the rows are duplicates of each other. That is,
+#' `batch = [row1, row1, row2, row2, row3, row3, ...]`.
+#'
+#' @examples
+#' \dontrun{
+#' probs <- c(1, 8, 6, 3, 2)
+#' mask_gen <- Specified_prob_mask_generator(probs)
+#' masks <- mask_gen(torch::torch_randn(c(10000, length(probs)) - 1))
+#' empirical_prob <- table(as.array(masks$sum(2)))
+#' empirical_prob / sum(empirical_prob)
+#' probs / sum(probs)
+#' }
+#'
+#' @keywords internal
+Specified_prob_mask_generator <- torch::nn_module(
+
+  # field name Type of mask generator
+  name = "Specified_prob_mask_generator",
+
+  # description Initialize a specified_probability mask generator.
+  initialize = function(masking_probs, paired_sampling = FALSE) {
+    self$masking_probs <- masking_probs / sum(masking_probs)
+    self$paired_sampling <- paired_sampling
+  },
+
+  # description Generates a specified probability mask by calling the
+  # self$Specified_prob_mask_generator_function.
+  # param batch Matrix/Tensor. Only used to get the dimensions and to check if any of the entries are
+  # missing. If any are missing, then the returned mask will ensure that these missing entries are masked.
+  forward = function(batch) {
+    self$Specified_prob_mask_generator_function(batch,
+      masking_prob = self$masking_probs,
+      paired_sampling = self$paired_sampling
+    )
+  },
+
+
+  # description Specified Probability Mask Generator:
+  # A mask generator that first samples the number of entries 'd' to be masked in
+  # the 'M'-dimensional observation 'x' in the batch based on the given M+1 probabilities. The
+  # 'd' masked are uniformly sampled from the 'M' possible feature indices. The d'th entry
+  # of the probability of having d-1 masked values.
+  #
+  # details Note that MCAR_mask_generator with p = 0.5 is the same as using Specified_prob_mask_generator
+  # with masking_ratio = choose(M, 0:M), where M is the number of features. This function was initially
+  # created to check if increasing the probability of having a masks with many masked features improved
+  # vaeac's performance by focusing more on these situations during training.
+  #
+  # param batch Matrix/Tensor. Only used to get the dimensions and to check if any of the
+  # entries are missing. If any are missing, then the returned mask will ensure that
+  # these missing entries are masked.
+  # param masking_probs An M+1 numerics containing the probabilities masking 'd' (0,...M) entries for each observation.
+  # param seed Integer. Used to set the seed for the sampling process such that we
+  # can reproduce the same masks.
+  # param paired_sampling Boolean. If we are doing paired sampling. So include both S and \bar{S}.
+  # If TRUE, then batch must be sampled using 'paired_sampler' which creates batches where
+  # the first half and second half of the rows are duplicates of each other. That is,
+  # `batch = [row1, row1, row2, row2, row3, row3, ...]`.
+  #
+  # examples Specified_prob_mask_generator_function(torch::torch_rand(c(5, 4)), masking_probs = c(2,7,5,3,3))
+  #
+  # return A binary matrix of the same size as 'batch'. An entry of '1' indicates that the
+  # observed feature value will be masked. '0' means that the entry is NOT masked,
+  # i.e., the feature value will be observed/given/available.
+  Specified_prob_mask_generator_function = function(batch, masking_probs, seed = NULL, paired_sampling = FALSE) {
+    # If the user specify a seed for reproducibility
+    if (!is.null(seed)) set.seed(seed)
+
+    # Get the number of features and observations in the batch
+    n_features <- ncol(batch)
+    size <- nrow(batch)
+
+    # Check for missing values in the batch
+    nan_mask <- batch$isnan()$to(torch::torch_float())
+
+    # If doing paired sampling, divide size by two as we later concatenate with the inverse mask.
+    if (paired_sampling) size <- size / 2
+
+    # Sample the number of masked features in each row.
+    n_masked_each_row <- sample(x = seq(0, n_features), size = size, replace = TRUE, prob = masking_probs)
+
+    # Crate the mask matrix
+    mask <- torch::torch_zeros_like(batch)
+    for (i in seq(size)) {
+      if (n_masked_each_row[i] != 0) mask[i, sample(n_features, size = n_masked_each_row[i], replace = FALSE)] <- 1
+    }
+
+    # If paired sampling, then concatenate the inverse mask and reorder to ensure correct order [m1, !m1, m2, !m2, ...].
+    if (paired_sampling) {
+      mask <- torch::torch_cat(c(mask, !mask), 1L)[c(matrix(seq_len(nrow(batch)), nrow = 2, byrow = TRUE)), ]
+    }
+
+    # Mask all entries that are missing or artificially masked by the Bernoulli mask. 1 means that the entry is masked.
+    return(mask + nan_mask >= 1)
+  }
+)
+
+## Specified_masks_mask_generator -------------------------------------------------------------------------------------
+#' A [torch::nn_module()] Representing a Specified_masks_mask_generator
+#'
+#' @description
+#' A mask generator which masks the entries based on sampling provided 1D masks with corresponding probabilities.
+#' Used for Shapley value estimation when only a subset of coalitions are used to compute the Shapley values.
+#'
+#' @param masks Matrix/Tensor of possible/allowed 'masks' which we sample from.
+#' @param masks_probs Array of 'probabilities' for each of the masks specified in 'masks'.
+#' Note that they do not need to be between 0 and 1 (e.g. sampling frequency).
+#' They are scaled, hence, they only need to be positive.
+#' @param paired_sampling Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+#' If TRUE, then batch must be sampled using 'paired_sampler' which creates batches where
+#' the first half and second half of the rows are duplicates of each other. That is,
+#' `batch = [row1, row1, row2, row2, row3, row3, ...]`.
+#' @param batch Matrix/Tensor. Only used to get the dimensions and to check if any of the
+#' entries are missing. If any are missing, then the returned mask will ensure that
+#' these missing entries are masked.
+#'
+#' @examples
+#' \dontrun{
+#' masks <- torch_tensor(matrix(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1),
+#'   nrow = 3, ncol = 4, byrow = TRUE
+#' ))
+#' masks_probs <- c(3, 1, 6)
+#' mask_gen <- Specified_masks_mask_generator(masks = masks, masks_probs = masks_probs)
+#' empirical_prob <-
+#'   table(as.array(mask_gen(torch::torch_randn(c(10000, ncol(masks))))$sum(-1)))
+#' empirical_prob / sum(empirical_prob)
+#' masks_probs / sum(masks_probs)
+#' }
+#'
+#' @author Lars Henry Berge Olsen
+#' @keywords internal
+Specified_masks_mask_generator <- torch::nn_module(
+
+  #' @field name Type of mask generator
+  name = "Specified_masks_mask_generator",
+
+  #' @description Initialize a specified masks mask generator.
+  initialize = function(masks, masks_probs, paired_sampling = FALSE) {
+    self$masks <- masks
+    self$masks_probs <- masks_probs / sum(masks_probs)
+    self$paired_sampling <- paired_sampling
+  },
+
+  # description Generates a mask by calling self$Specified_masks_mask_generator_function function.
+  # param batch Matrix/Tensor. Only used to get the dimensions and to check if any of the
+  # entries are missing. If any are missing, then the returned mask will ensure that
+  # these missing entries are masked.
+  forward = function(batch) {
+    self$Specified_masks_mask_generator_function(batch,
+      masks = self$masks,
+      masks_probs = self$masks_probs,
+      paired_sampling = self$paired_sampling
+    )
+  },
+
+  # description
+  # Sampling Masks from the Provided Masks with the Given Probabilities
+  #
+  # details
+  # Function that takes in a 'batch' of observations and matrix of possible/allowed
+  # 'masks' which we are going to sample from based on the provided probability in 'masks_probs'.
+  # Function returns a mask of same shape as batch. Note that the batch can contain missing values,
+  # indicated by the "NaN" token. The mask will always mask missing values.
+  #
+  # param batch Matrix/Tensor. Only used to get the dimensions and to check if any of the
+  # entries are missing. If any are missing, then the returned mask will ensure that
+  # these missing entries are masked.
+  # param masks Matrix/Tensor of possible/allowed 'masks' which we sample from.
+  # param masks_probs Array of 'probabilities' for each of the masks specified in 'masks'.
+  # Note that they do not need to be between 0 and 1. They are scaled, hence, they only need to be positive.
+  # param seed Integer. Used to set the seed for the sampling process such that we
+  # can reproduce the same masks.
+  # param paired_sampling Boolean. If we are doing paired sampling. So include both S and \bar{S}.
+  # If TRUE, then batch must be sampled using 'paired_sampler' which creates batches where
+  # the first half and second half of the rows are duplicates of each other. That is,
+  # batch = [row1, row1, row2, row2, row3, row3, ...].
+  #
+  # return A binary matrix of the same size as 'batch'. An entry of '1' indicates that the
+  # observed feature value will be masked. '0' means that the entry is NOT masked,
+  # i.e., the feature value will be observed/given/available.
+  Specified_masks_mask_generator_function = function(batch, masks, masks_probs, seed = NULL, paired_sampling = FALSE) {
+    # Set seed if the user specifies a seed for reproducibility.
+    if (!is.null(seed)) set.seed(seed)
+
+    # Check for missing values in the batch
+    nan_mask <- batch$isnan()$to(torch::torch_float())
+
+    # Get the number of masks to choose from
+    n_masks <- nrow(masks)
+
+    # Get the number of observations in the batch
+    size <- nrow(batch)
+
+    # If doing paired sampling, divide size by two as we later concatenate with the inverse mask.
+    if (paired_sampling) size <- size / 2
+
+    # Sample 'n_observation' masks from the possible masks by first sampling the row indices
+    # based on the given mask probabilities and then use these indices to extract the masks.
+    mask_rows_indices <- sample.int(n = n_masks, size = size, replace = TRUE, prob = masks_probs)
+    mask <- torch::torch_tensor(masks[mask_rows_indices, ], dtype = torch::torch_float())
+
+    # If paired sampling, then concatenate the inverse mask and reorder to ensure correct order [m1, !m1, m2, !m2, ...].
+    if (paired_sampling) {
+      mask <- torch::torch_cat(c(mask, !mask), 1L)[c(matrix(seq_len(nrow(batch)), nrow = 2, byrow = TRUE)), ]
+    }
+
+    # Mask all entries that are missing or artificially masked by the Bernoulli mask. 1 means that the entry is masked.
+    return(mask + nan_mask >= 1)
+  }
+)
diff --git a/R/explain.R b/R/explain.R
index ca354684f..1e32e9e42 100644
--- a/R/explain.R
+++ b/R/explain.R
@@ -17,8 +17,8 @@
 #' see details for more information.
 #'
 #' @param approach Character vector of length `1` or one less than the number of features.
-#' All elements should,
-#' either be `"gaussian"`, `"copula"`, `"empirical"`, `"ctree"`, `"categorical"`, `"timeseries"`, or `"independence"`.
+#' All elements should, either be `"gaussian"`, `"copula"`, `"empirical"`, `"ctree"`, `"vaeac"`,
+#' `"categorical"`, `"timeseries"`, or `"independence"`.
 #' See details for more information.
 #'
 #' @param prediction_zero Numeric.
@@ -89,6 +89,12 @@
 #'
 #' @param timing Logical.
 #' Whether the timing of the different parts of the `explain()` should saved in the model object.
+#'
+#' @param verbose An integer specifying the level of verbosity. If `0`, `shapr` will stay silent.
+#' If `1`, it will print information about performance. If `2`, some additional information will be printed out.
+#' Use `0` (default) for no verbosity, `1` for low verbose, and `2` for high verbose.
+#' TODO: Make this clearer when we end up fixing this and if they should force a progressr bar.
+#'
 #' @param ... Further arguments passed to specific approaches
 #'
 #' @inheritDotParams setup_approach.empirical
@@ -96,12 +102,13 @@
 #' @inheritDotParams setup_approach.gaussian
 #' @inheritDotParams setup_approach.copula
 #' @inheritDotParams setup_approach.ctree
+#' @inheritDotParams setup_approach.vaeac
 #' @inheritDotParams setup_approach.categorical
 #' @inheritDotParams setup_approach.timeseries
 #'
 #' @details The most important thing to notice is that `shapr` has implemented six different
 #' approaches for estimating the conditional distributions of the data, namely `"empirical"`,
-#' `"gaussian"`, `"copula"`, `"ctree"`, `"categorical"`, `"timeseries"`, and `"independence"`.
+#' `"gaussian"`, `"copula"`, `"ctree"`, `"vaeac"`, `"categorical"`, `"timeseries"`, and `"independence"`.
 #' In addition, the user also has the option of combining the different approaches.
 #' E.g., if you're in a situation where you have trained a model that consists of 10 features,
 #' and you'd like to use the `"gaussian"` approach when you condition on a single feature,
@@ -146,7 +153,7 @@
 #' The difference between the prediction and `none` is distributed among the other features.
 #' In theory this value should be the expected prediction without conditioning on any features.
 #' Typically we set this value equal to the mean of the response variable in our training data, but other choices
-#' such as the mean of the predictions in the training data are also reasonable. [explain()] [shapr::explain()]
+#' such as the mean of the predictions in the training data are also reasonable.
 #'
 #' @examples
 #'
@@ -266,6 +273,7 @@ explain <- function(model,
                     get_model_specs = NULL,
                     MSEv_uniform_comb_weights = TRUE,
                     timing = TRUE,
+                    verbose = 0,
                     ...) { # ... is further arguments passed to specific approaches
 
   timing_list <- list(
@@ -277,7 +285,6 @@ explain <- function(model,
   # Gets and check feature specs from the model
   feature_specs <- get_feature_specs(get_model_specs, model)
 
-
   # Sets up and organizes input parameters
   # Checks the input parameters and their compatability
   # Checks data/model compatability
@@ -295,6 +302,7 @@ explain <- function(model,
     feature_specs = feature_specs,
     MSEv_uniform_comb_weights = MSEv_uniform_comb_weights,
     timing = timing,
+    verbose = verbose,
     ...
   )
 
@@ -353,5 +361,13 @@ explain <- function(model,
   output$internal$objects$cols_per_horizon <- NULL
   output$internal$objects$W_list <- NULL
 
+  if (isFALSE(output$internal$parameters$vaeac.extra_parameters$vaeac.save_model)) {
+    output$internal$parameters[c(
+      "vaeac", "vaeac.sampler", "vaeac.model", "vaeac.activation_function", "vaeac.checkpoint"
+    )] <- NULL
+    output$internal$parameters$vaeac.extra_parameters[c("vaeac.folder_to_save_model", "vaeac.model_description")] <-
+      NULL
+  }
+
   return(output)
 }
diff --git a/R/explain_forecast.R b/R/explain_forecast.R
index de6b43b57..f2a48eb5b 100644
--- a/R/explain_forecast.R
+++ b/R/explain_forecast.R
@@ -44,6 +44,7 @@
 #' @inheritDotParams setup_approach.gaussian
 #' @inheritDotParams setup_approach.copula
 #' @inheritDotParams setup_approach.ctree
+#' @inheritDotParams setup_approach.vaeac
 #' @inheritDotParams setup_approach.categorical
 #' @inheritDotParams setup_approach.timeseries
 #'
@@ -103,6 +104,7 @@ explain_forecast <- function(model,
                              predict_model = NULL,
                              get_model_specs = NULL,
                              timing = TRUE,
+                             verbose = 0,
                              ...) { # ... is further arguments passed to specific approaches
   timing_list <- list(
     init_time = Sys.time()
@@ -143,6 +145,7 @@ explain_forecast <- function(model,
     group_lags = group_lags,
     group = group,
     timing = timing,
+    verbose = verbose,
     ...
   )
 
@@ -193,6 +196,12 @@ explain_forecast <- function(model,
     output$timing <- compute_time(timing_list)
   }
 
+  # Temporary to avoid failing tests
+  if (isFALSE(output$internal$parameters$vaeac.save_model)) {
+    output$internal$parameters$vaeac$models <- NULL
+    output$internal$parameters$vaeac$parameters$folder_to_save_model <- NULL
+    output$internal$parameters$vaeac$parameters$model_description <- NULL
+  }
 
   return(output)
 }
diff --git a/R/model.R b/R/model.R
index 60d55bb82..b706cbb0d 100644
--- a/R/model.R
+++ b/R/model.R
@@ -167,7 +167,7 @@ get_supported_models <- function() {
   DT_predict_model[, predict_model := 1]
   DT_predict_model[, c("visible", "from", "generic", "isS4") := NULL]
 
-  DT <- merge(DT_get_model_specs, DT_predict_model, by = "rn", all = TRUE, allow.cartesian = TRUE, nomatch = 0)
+  DT <- merge(DT_get_model_specs, DT_predict_model, by = "rn", all = TRUE, allow.cartesian = TRUE)
   DT[, (colnames(DT)[-1]) := lapply(.SD, data.table::nafill, fill = 0), .SDcols = colnames(DT)[-1]]
   DT[, (colnames(DT)[2:3]) := lapply(.SD, as.logical), .SDcols = colnames(DT)[2:3]]
   data.table::setnames(DT, "rn", "model_class")
diff --git a/R/plot.R b/R/plot.R
index 162d564c7..ef80c9a32 100644
--- a/R/plot.R
+++ b/R/plot.R
@@ -291,32 +291,32 @@ plot.shapr <- function(x,
   return(gg)
 }
 
-get_num_breaks <- function(dt_plot, feature_name) {
+get_n_breaks <- function(dt_plot, feature_name) {
   n_feat_vals <- length(dt_plot[variable == feature_name, unique(feature_value)]) # number of unique points to plot
   type <- dt_plot[variable == feature_name, type][1]
 
   if (type == "numeric") {
     if (n_feat_vals > 500) {
-      num_breaks <- 50
+      n_breaks <- 50
     } else if (n_feat_vals > 200) {
-      num_breaks <- 20
+      n_breaks <- 20
     } else if (n_feat_vals > 100) {
-      num_breaks <- 10
+      n_breaks <- 10
     } else {
-      num_breaks <- min(5, n_feat_vals + 2)
+      n_breaks <- min(5, n_feat_vals + 2)
     }
   } else { # If factor
-    num_breaks <- n_feat_vals
+    n_breaks <- n_feat_vals
   }
 
-  return(num_breaks)
+  return(n_breaks)
 }
 
 
 compute_scatter_hist_values <- function(dt_plot, scatter_features) {
   dt_scatter_hist_list <- list()
   for (feature_name in scatter_features) {
-    num_breaks <- get_num_breaks(dt_plot, feature_name)
+    n_breaks <- get_n_breaks(dt_plot, feature_name)
 
     x <- dt_plot[variable == feature_name, feature_value]
 
@@ -325,7 +325,7 @@ compute_scatter_hist_values <- function(dt_plot, scatter_features) {
       # scatter_hist_object$breaks = c(x[1] - .Machine$double.eps*10^10, x[1] + .Machine$double.eps*10^10)
       scatter_hist_object$breaks <- c(x[1] - 0.01, x[1] + 0.01)
     } else {
-      step <- (max(x) - min(x)) / (num_breaks - 1)
+      step <- (max(x) - min(x)) / (n_breaks - 1)
       scatter_hist_object <- hist(x, breaks = seq(min(x) - step / 2, max(x) + step / 2, by = step), plot = FALSE)
     }
 
@@ -985,7 +985,7 @@ plot_MSEv_eval_crit <- function(explanation_list,
   # Check for valid plot type argument
   unknown_plot_type <- plot_type[!(plot_type %in% c("overall", "comb", "explicand"))]
   if (length(unknown_plot_type) > 0) {
-    error(paste0(
+    stop(paste0(
       "The `plot_type` must be one (or several) of 'overall', 'comb', 'explicand'. ",
       "Do not recognise: '", paste(unknown_plot_type, collapse = "', '"), "'."
     ))
@@ -1082,7 +1082,7 @@ plot_MSEv_eval_crit <- function(explanation_list,
   }
 
   # Return ----------------------------------------------------------------------------------------------------------
-  if (length(plot_type) == 1 && plot_type == "comb") {
+  if (length(plot_type) == 1 && plot_type == "overall") {
     return_object <- return_object$MSEv_bar
   }
 
@@ -1090,6 +1090,7 @@ plot_MSEv_eval_crit <- function(explanation_list,
 }
 
 #' @keywords internal
+#' @author Lars Henry Berge Olsen
 MSEv_name_explanation_list <- function(explanation_list) {
   # Give names to the entries in the `explanation_list` based on their used approach.
 
@@ -1113,6 +1114,7 @@ MSEv_name_explanation_list <- function(explanation_list) {
 }
 
 #' @keywords internal
+#' @author Lars Henry Berge Olsen
 MSEv_check_explanation_list <- function(explanation_list) {
   # Check that the explanation list is valid for plotting the MSEv evaluation criterion
 
@@ -1161,6 +1163,7 @@ MSEv_check_explanation_list <- function(explanation_list) {
 }
 
 #' @keywords internal
+#' @author Lars Henry Berge Olsen
 MSEv_extract_MSEv_values <- function(explanation_list,
                                      index_x_explain = NULL,
                                      id_combination = NULL) {
@@ -1198,6 +1201,7 @@ MSEv_extract_MSEv_values <- function(explanation_list,
 }
 
 #' @keywords internal
+#' @author Lars Henry Berge Olsen
 make_MSEv_bar_plot <- function(MSEv_dt,
                                n_combinations,
                                n_explain,
@@ -1238,6 +1242,7 @@ make_MSEv_bar_plot <- function(MSEv_dt,
 }
 
 #' @keywords internal
+#' @author Lars Henry Berge Olsen
 make_MSEv_explicand_plots <- function(MSEv_explicand_dt,
                                       n_combinations,
                                       geom_col_width = 0.9) {
@@ -1272,6 +1277,7 @@ make_MSEv_explicand_plots <- function(MSEv_explicand_dt,
 }
 
 #' @keywords internal
+#' @author Lars Henry Berge Olsen
 make_MSEv_combination_plots <- function(MSEv_combination_dt,
                                         n_explain,
                                         tfrac = NULL,
@@ -1323,3 +1329,530 @@ make_MSEv_combination_plots <- function(MSEv_combination_dt,
     MSEv_combination_line_point = MSEv_combination_line_point
   ))
 }
+
+#' Shapley value bar plots for several explanation objects
+#'
+#' @description
+#' Make plots to visualize and compare the estimated Shapley values for a list of
+#' [shapr::explain()] objects applied to the same data and model.
+#'
+#' @param explanation_list A list of [shapr::explain()] objects applied to the same data and model.
+#' If the entries in the list is named, then the function use these names. Otherwise, it defaults to
+#' the approach names (with integer suffix for duplicates) for the explanation objects in `explanation_list`.
+#' @param index_explicands Integer vector. Which of the explicands (test observations) to plot.
+#' E.g. if you have explained 10 observations using [shapr::explain()], you can generate a plot for the
+#' first 5 observations/explicands and the 10th by setting `index_x_explain = c(1:5, 10)`.
+#' @param only_these_features String vector. Containing the names of the features which
+#' are to be included in the bar plots.
+#' @param plot_phi0 Boolean. If we are to include the \eqn{\phi_0} in the bar plots or not.
+#' @param digits Integer. Number of significant digits to use in the feature description.
+#' @param add_zero_line Boolean. If we are to add a black line for a feature contribution of 0.
+#' @param brewer_palette String. Name of one of the color palettes from [RColorBrewer::RColorBrewer()].
+#'  If `NULL`, then the function uses the default [ggplot2::ggplot()] color scheme.
+#' The following palettes are available for use with these scales:
+#' \describe{
+#'    \item{Diverging}{BrBG, PiYG, PRGn, PuOr, RdBu, RdGy, RdYlBu, RdYlGn, Spectral}
+#'    \item{Qualitative}{Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3}
+#'    \item{Sequential}{Blues, BuGn, BuPu, GnBu, Greens, Greys, Oranges,
+#'      OrRd, PuBu, PuBuGn, PuRd, Purples, RdPu, Reds, YlGn, YlGnBu, YlOrBr, YlOrRd}
+#' }
+#' @param axis_labels_n_dodge Integer. The number of rows that
+#' should be used to render the labels. This is useful for displaying labels that would otherwise overlap.
+#' @param axis_labels_rotate_angle Numeric. The angle of the axis label, where 0 means horizontal, 45 means tilted,
+#' and 90 means vertical. Compared to setting the angle in[ggplot2::theme()] / [ggplot2::element_text()], this also
+#' uses some heuristics to automatically pick the `hjust` and `vjust` that you probably want.
+#' @param horizontal_bars Boolean. Flip Cartesian coordinates so that horizontal becomes vertical,
+#' and vertical, horizontal. This is primarily useful for converting geoms and statistics which display
+#' y conditional on x, to x conditional on y. See [ggplot2::coord_flip()].
+#' @param facet_scales Should scales be free ("`free`", the default), fixed ("`fixed`"), or free in one dimension
+#' ("`free_x`", "`free_y`")? The user has to change the latter manually depending on the value of `horizontal_bars`.
+#' @param facet_ncol  Integer. The number of columns in the facet grid. Default is `facet_ncol = 2`.
+#' @param geom_col_width Numeric. Bar width. By default, set to 85% of the [ggplot2::resolution()] of the data.
+#'
+#' @return A [ggplot2::ggplot()] object.
+#' @export
+#'
+#' @examples
+#' # Load necessary libraries
+#' library(xgboost)
+#' library(data.table)
+#'
+#' # Get the data
+#' data("airquality")
+#' data <- data.table::as.data.table(airquality)
+#' data <- data[complete.cases(data), ]
+#'
+#' # Define the features and the response
+#' x_var <- c("Solar.R", "Wind", "Temp", "Month")
+#' y_var <- "Ozone"
+#'
+#' # Split data into test and training data set
+#' ind_x_explain <- 1:12
+#' x_train <- data[-ind_x_explain, ..x_var]
+#' y_train <- data[-ind_x_explain, get(y_var)]
+#' x_explain <- data[ind_x_explain, ..x_var]
+#'
+#' # Fitting a basic xgboost model to the training data
+#' model <- xgboost::xgboost(
+#'   data = as.matrix(x_train),
+#'   label = y_train,
+#'   nround = 20,
+#'   verbose = FALSE
+#' )
+#'
+#' # Specifying the phi_0, i.e. the expected prediction without any features
+#' prediction_zero <- mean(y_train)
+#'
+#' # Independence approach
+#' explanation_independence <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = "independence",
+#'   prediction_zero = prediction_zero,
+#'   n_samples = 1e2
+#' )
+#'
+#' # Empirical approach
+#' explanation_empirical <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = "empirical",
+#'   prediction_zero = prediction_zero,
+#'   n_samples = 1e2
+#' )
+#'
+#' # Gaussian 1e1 approach
+#' explanation_gaussian_1e1 <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = "gaussian",
+#'   prediction_zero = prediction_zero,
+#'   n_samples = 1e1
+#' )
+#'
+#' # Gaussian 1e2 approach
+#' explanation_gaussian_1e2 <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = "gaussian",
+#'   prediction_zero = prediction_zero,
+#'   n_samples = 1e2
+#' )
+#'
+#' # Combined approach
+#' explanation_combined <- explain(
+#'   model = model,
+#'   x_explain = x_explain,
+#'   x_train = x_train,
+#'   approach = c("gaussian", "ctree", "empirical"),
+#'   prediction_zero = prediction_zero,
+#'   n_samples = 1e2
+#' )
+#'
+#' # Create a list of explanations with names
+#' explanation_list <- list(
+#'   "Ind." = explanation_independence,
+#'   "Emp." = explanation_empirical,
+#'   "Gaus. 1e1" = explanation_gaussian_1e1,
+#'   "Gaus. 1e2" = explanation_gaussian_1e2,
+#'   "Combined" = explanation_combined
+#' )
+#'
+#' if (requireNamespace("ggplot2", quietly = TRUE)) {
+#'   # The function uses the provided names.
+#'   plot_SV_several_approaches(explanation_list)
+#'
+#'   # We can change the number of columns in the grid of plots and add other visual alterations
+#'   plot_SV_several_approaches(explanation_list,
+#'     facet_ncol = 3,
+#'     facet_scales = "free_y",
+#'     add_zero_line = TRUE,
+#'     digits = 2,
+#'     brewer_palette = "Paired",
+#'     geom_col_width = 0.6
+#'   ) +
+#'     ggplot2::theme_minimal() +
+#'     ggplot2::theme(legend.position = "bottom", plot.title = ggplot2::element_text(size = 0))
+#'
+#'
+#'   # We can specify which explicands to plot to get less chaotic plots and make the bars vertical
+#'   plot_SV_several_approaches(explanation_list,
+#'     index_explicands = c(1:2, 5, 10),
+#'     horizontal_bars = FALSE,
+#'     axis_labels_rotate_angle = 45
+#'   )
+#'
+#'   # We can change the order of the features by specifying the
+#'   # order using the `only_these_features` parameter.
+#'   plot_SV_several_approaches(explanation_list,
+#'     index_explicands = c(1:2, 5, 10),
+#'     only_these_features = c("Temp", "Solar.R", "Month", "Wind")
+#'   )
+#'
+#'   # We can also remove certain features if we are not interested in them
+#'   # or want to focus on, e.g., two features. The function will give a
+#'   # message to if the user specifies non-valid feature names.
+#'   plot_SV_several_approaches(explanation_list,
+#'     index_explicands = c(1:2, 5, 10),
+#'     only_these_features = c("Temp", "Solar.R"),
+#'     plot_phi0 = TRUE
+#'   )
+#' }
+#'
+#' @author Lars Henry Berge Olsen
+plot_SV_several_approaches <- function(explanation_list,
+                                       index_explicands = NULL,
+                                       only_these_features = NULL,
+                                       plot_phi0 = FALSE,
+                                       digits = 4,
+                                       add_zero_line = FALSE,
+                                       axis_labels_n_dodge = NULL,
+                                       axis_labels_rotate_angle = NULL,
+                                       horizontal_bars = TRUE,
+                                       facet_scales = "free",
+                                       facet_ncol = 2,
+                                       geom_col_width = 0.85,
+                                       brewer_palette = NULL) {
+  # Setup and checks ----------------------------------------------------------------------------
+  # Check that ggplot2 is installed
+  if (!requireNamespace("ggplot2", quietly = TRUE)) {
+    stop("ggplot2 is not installed. Please run install.packages('ggplot2')")
+  }
+
+  # Ensure that even a single explanation object is in a list
+  if ("shapr" %in% class(explanation_list)) explanation_list <- list(explanation_list)
+
+  # Name the elements in the explanation_list if no names have been provided
+  if (is.null(names(explanation_list))) explanation_list <- MSEv_name_explanation_list(explanation_list)
+
+  # All entries must be named
+  if (any(names(explanation_list) == "")) stop("All the entries in `explanation_list` must be named.")
+
+  # Check that the column names for the Shapley values are the same for all explanations in the `explanation_list`
+  if (length(unique(lapply(explanation_list, function(explanation) colnames(explanation$shapley_values)))) != 1) {
+    stop("The Shapley value feature names are not identical in all objects in the `explanation_list`.")
+  }
+
+  # Check that all explanation objects use the same test observations
+  entries_using_diff_x_explain <- sapply(explanation_list, function(explanation) {
+    !identical(explanation_list[[1]]$internal$data$x_explain, explanation$internal$data$x_explain)
+  })
+  if (any(entries_using_diff_x_explain)) {
+    methods_with_diff_comb_str <-
+      paste(names(entries_using_diff_x_explain)[entries_using_diff_x_explain], collapse = "', '")
+    stop(paste0(
+      "The object/objects '", methods_with_diff_comb_str, "' in `explanation_list` has/have a different ",
+      "`x_explain` than '", names(explanation_list)[1], "'. Cannot compare them."
+    ))
+  }
+
+  # Update the index_explicands to be all explicands if not specified
+  if (is.null(index_explicands)) index_explicands <- seq(explanation_list[[1]]$internal$parameters$n_explain)
+
+
+  # Creating data.tables --------------------------------------------------------------------------------------------
+  # Update the `only_these_features` parameter vector based on `plot_phi0` or in case it is NULL
+  only_these_features <- update_only_these_features(
+    explanation_list = explanation_list,
+    only_these_features = only_these_features,
+    plot_phi0 = plot_phi0
+  )
+
+  # Create a variable storing the features to use excluding `none`
+  only_these_features_wo_none <- only_these_features[only_these_features != "none"]
+
+  # Create data.table of the Shapley values
+  dt_Shapley_values <- extract_Shapley_values_dt(
+    explanation_list = explanation_list,
+    index_explicands = index_explicands,
+    only_these_features = only_these_features
+  )
+
+  # Create data.table of feature descriptions
+  dt_desc_long <- create_feature_descriptions_dt(
+    explanation_list = explanation_list,
+    only_these_features_wo_none = only_these_features_wo_none,
+    index_explicands = index_explicands,
+    horizontal_bars = horizontal_bars,
+    digits = digits
+  )
+
+  # Melt `dt_Shapley_values` and merge with `dt_desc_long` to creat data.table ready to be plotted with ggplot2
+  dt_Shapley_values_long <- create_Shapley_value_figure_dt(
+    dt_Shapley_values = dt_Shapley_values,
+    dt_desc_long = dt_desc_long,
+    digits = digits,
+    horizontal_bars = horizontal_bars
+  )
+
+  # Update the axis_labels parameters
+  axis_labels_list <- update_axis_labels(
+    axis_labels_rotate_angle = axis_labels_rotate_angle,
+    axis_labels_n_dodge = axis_labels_n_dodge,
+    horizontal_bars = horizontal_bars,
+    length_of_longest_description = max(nchar(levels(dt_desc_long$.description)))
+  )
+  axis_labels_rotate_angle <- axis_labels_list[["axis_labels_rotate_angle"]]
+  axis_labels_n_dodge <- axis_labels_list[["axis_labels_n_dodge"]]
+
+  # Get the breaks and direction for the fill aesthetic
+  breaks <- if (horizontal_bars) rev(levels(dt_Shapley_values_long$.method)) else levels(dt_Shapley_values_long$.method)
+  direction <- if (horizontal_bars) -1 else 1
+
+  # Plot --------------------------------------------------------------------------------
+  figure <- ggplot2::ggplot(dt_Shapley_values_long, ggplot2::aes(x = .description, y = .phi))
+  if (add_zero_line) figure <- figure + ggplot2::geom_abline(intercept = 0, slope = 0)
+  figure <- figure +
+    ggplot2::geom_col(
+      width = geom_col_width,
+      position = ggplot2::position_dodge(geom_col_width),
+      ggplot2::aes(fill = .method)
+    ) +
+    ggplot2::facet_wrap(~.header, scales = facet_scales, labeller = "label_value", ncol = facet_ncol) +
+    ggplot2::labs(
+      x = "Feature and value",
+      y = bquote("Feature contribution (Shapley value" ~ phi[j] * ")"),
+      fill = "Method"
+    ) +
+    ggplot2::guides(x = ggplot2::guide_axis(n.dodge = axis_labels_n_dodge, angle = axis_labels_rotate_angle)) +
+    ggplot2::labs(title = "Shapley value prediction explanation")
+  if (is.null(brewer_palette)) {
+    figure <- figure + ggplot2::scale_fill_discrete(
+      breaks = breaks,
+      direction = direction
+    )
+  }
+  if (!is.null(brewer_palette)) {
+    figure <- figure + ggplot2::scale_fill_brewer(
+      breaks = breaks,
+      direction = direction,
+      palette = brewer_palette
+    )
+  }
+  if (horizontal_bars) figure <- figure + ggplot2::coord_flip()
+
+  # Return the figure
+  return(figure)
+}
+
+
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+update_only_these_features <- function(explanation_list,
+                                       only_these_features,
+                                       plot_phi0) {
+  # Update the `only_these_features` parameter vector based on `plot_phi0` or in case it is NULL
+
+  # Get the common feature names for all explanation objects (including `none`) and one without `none`
+  feature_names_with_none <- colnames(explanation_list[[1]]$shapley_values)
+  feature_names_without_none <- feature_names_with_none[feature_names_with_none != "none"]
+
+  # Only keep the desired features/columns
+  if (!is.null(only_these_features)) {
+    # Check if user has provided a non-valid feature name, note that `none` is a valid feature name
+    only_these_features_in_names <- only_these_features[only_these_features %in% feature_names_with_none]
+    only_these_features_not_names <- only_these_features[!only_these_features %in% feature_names_with_none]
+
+    # Give the user a warning if the user provided non-valid feature names
+    if (length(only_these_features_not_names) > 0) {
+      message(paste0(
+        "User provided non-valid feature names in `only_these_features` (",
+        paste0("'", only_these_features_not_names, "'", collapse = ", "),
+        "). The function skips non-valid feature names."
+      ))
+    }
+
+    # Stop if we have no valid feature names.
+    if (length(only_these_features_in_names[only_these_features_in_names != "none"]) == 0) {
+      stop(paste0(
+        "The parameter `only_these_features` must contain at least one of: ",
+        paste0("'", feature_names_without_none, "'", collapse = ", "),
+        "."
+      ))
+    }
+
+    # If user has specified `plot_phi0 = TRUE`, then we ensure that it is included in our variable
+    if (plot_phi0) only_these_features_in_names <- unique(c("none", only_these_features_in_names))
+
+    # Overwrite the `only_these_features` with `only_these_features_in_names` to remove non-valid input
+    only_these_features <- only_these_features_in_names
+  } else {
+    # If user has specified `plot_phi0 = FALSE`, then we exclude the phi0/`none` from the feature names.
+    only_these_features <- if (plot_phi0) feature_names_with_none else feature_names_without_none
+  }
+
+  return(only_these_features)
+}
+
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+extract_Shapley_values_dt <- function(explanation_list,
+                                      index_explicands,
+                                      only_these_features) {
+  # Extract the Shapley values and combine them into a single data table.
+  # We add an id column (`.id`) for the explicands and a column indicating the method (`.method`)
+  dt_Shapley_values <- data.table::rbindlist(
+    lapply(
+      explanation_list,
+      function(explanation) {
+        data.table::copy(explanation$shapley_values)[, c(".id", ".pred") := list(.I, explanation$pred_explain)]
+      }
+    ),
+    use.names = TRUE,
+    idcol = ".method"
+  )
+
+  # Convert to factors
+  dt_Shapley_values$.method <- factor(dt_Shapley_values$.method,
+    levels = names(explanation_list),
+    ordered = TRUE
+  )
+
+  # Set the keys and change the order of the columns
+  data.table::setkeyv(dt_Shapley_values, c(".id", ".method"))
+  data.table::setcolorder(dt_Shapley_values, c(".id", ".pred", ".method"))
+
+  # Only keep the desired explicands
+  if (!is.null(index_explicands)) dt_Shapley_values <- dt_Shapley_values[.id %in% index_explicands, ]
+
+  # Give a small warning to the user if they have not specified the `index_explicands` and too many explicands
+  if (length(index_explicands) > 12) {
+    message(paste(
+      "It might be too many explicands to plot together in a nice fashion! Try for instance",
+      "setting `index_explicands = 1:10` to limit the number of explicands.\n"
+    ))
+  }
+
+  # Keep only the needed columns, and ensure that .id, .pred, and .method are included
+  only_these_columns <- unique(c(".id", ".pred", ".method", only_these_features))
+  dt_Shapley_values <- dt_Shapley_values[, only_these_columns, with = FALSE]
+
+  return(dt_Shapley_values)
+}
+
+
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+update_axis_labels <- function(axis_labels_rotate_angle,
+                               axis_labels_n_dodge,
+                               horizontal_bars,
+                               length_of_longest_description) {
+  # User has provided neither `axis_labels_n_dodge` nor `axis_labels_rotate_angle`
+  if (is.null(axis_labels_rotate_angle) && is.null(axis_labels_n_dodge)) {
+    # Set default values
+    axis_labels_rotate_angle <- 0
+    axis_labels_n_dodge <- 1
+
+    # Get the length of the longest description
+    length_of_longest_description <- length_of_longest_description
+
+    # If it is long, then we alter the default values set above and give message to user
+    if (length_of_longest_description > 12 && !horizontal_bars) {
+      message(paste(
+        "Long label names: consider specifying either `axis_labels_rotate_angle` or",
+        "`axis_labels_n_dodge`, to fix any potentially overlapping axis labels.",
+        "The function sets `axis_labels_rotate_angle = 45` internally.\n"
+      ))
+
+      # Set it to rotate 45 degrees
+      axis_labels_rotate_angle <- 45
+    }
+  }
+
+  # User has specified `axis_labels_n_dodge` so set `axis_labels_rotate_angle` to default value
+  if (is.null(axis_labels_rotate_angle)) axis_labels_rotate_angle <- 0
+
+  # User has specified `axis_labels_rotate_angle` so set `axis_labels_n_dodge` to default value
+  if (is.null(axis_labels_n_dodge)) axis_labels_n_dodge <- 1
+
+  return(list(
+    axis_labels_rotate_angle = axis_labels_rotate_angle,
+    axis_labels_n_dodge = axis_labels_n_dodge
+  ))
+}
+
+
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+create_feature_descriptions_dt <- function(explanation_list,
+                                           only_these_features_wo_none,
+                                           index_explicands,
+                                           horizontal_bars,
+                                           digits) {
+  # Get the explicands
+  x_explain <-
+    explanation_list[[1]]$internal$data$x_explain[index_explicands, only_these_features_wo_none, with = FALSE]
+
+  # Converting and melting the explicands
+  desc_mat <- trimws(format(x_explain, digits = digits))
+  for (i in seq_len(ncol(desc_mat))) desc_mat[, i] <- paste0(colnames(desc_mat)[i], " = ", desc_mat[, i])
+  dt_desc <- data.table::as.data.table(cbind(none = "None", desc_mat))
+  dt_desc_long <- data.table::melt(dt_desc[, .id := index_explicands],
+    id.vars = ".id",
+    variable.name = ".feature",
+    value.name = ".description"
+  )
+
+  # Make the description into an ordered factor such that the features in the
+  # bar plots follow the same order of features as in the training data.
+  levels <- if (horizontal_bars) rev(unique(dt_desc_long$.description)) else unique(dt_desc_long$.description)
+  dt_desc_long$.description <- factor(dt_desc_long$.description,
+    levels = levels,
+    ordered = TRUE
+  )
+
+  return(dt_desc_long)
+}
+
+#' @keywords internal
+#' @author Lars Henry Berge Olsen
+create_Shapley_value_figure_dt <- function(dt_Shapley_values,
+                                           dt_desc_long,
+                                           digits,
+                                           horizontal_bars) {
+  # This function takes in the wide `dt_Shapley_values` data.table, melt it and merge it with
+  # dt_desc_long. Add some headers and do some manipulations based on if the plots are horizontal or not.
+
+  # Melt the data.table from a wide to long format
+  dt_Shapley_values_long <- data.table::melt(dt_Shapley_values,
+    id.vars = c(".id", ".pred", ".method"),
+    variable.name = ".feature",
+    value.name = ".phi"
+  )
+  dt_Shapley_values_long$.feature <- as.ordered(dt_Shapley_values_long$.feature)
+
+
+  # Data table for plotting
+  dt_Shapley_values_long <- merge(dt_Shapley_values_long, dt_desc_long)
+
+  # Make the .id column into an ordered column
+  dt_Shapley_values_long$.id <- factor(dt_Shapley_values_long$.id,
+    levels = unique(dt_Shapley_values_long$.id),
+    ordered = TRUE
+  )
+
+  # Adding header for each individual plot
+  dt_Shapley_values_long[, .header := paste0("id: ", .id, ", pred = ", format(.pred, digits = digits))]
+  dt_Shapley_values_long$.header <- factor(dt_Shapley_values_long$.header,
+    levels = unique(dt_Shapley_values_long$.header),
+    ordered = TRUE
+  )
+
+  # If flip coordinates, then we need to change the order of the levels such that the order
+  # of the bars in the figure match the order in the legend.
+  if (horizontal_bars) {
+    dt_Shapley_values_long$.method <- factor(dt_Shapley_values_long$.method,
+      levels = rev(levels(dt_Shapley_values_long$.method)),
+      ordered = TRUE
+    )
+    breaks <- rev(levels(dt_Shapley_values_long$.method))
+  } else {
+    breaks <- levels(dt_Shapley_values_long$.method)
+  }
+
+  return(dt_Shapley_values_long)
+}
diff --git a/R/setup.R b/R/setup.R
index 018e03b30..b627cc6db 100644
--- a/R/setup.R
+++ b/R/setup.R
@@ -40,6 +40,7 @@ setup <- function(x_train,
                   explain_xreg_lags = NULL,
                   group_lags = NULL,
                   timing,
+                  verbose,
                   is_python = FALSE,
                   ...) {
   internal <- list()
@@ -63,6 +64,7 @@ setup <- function(x_train,
     group_lags = group_lags,
     MSEv_uniform_comb_weights = MSEv_uniform_comb_weights,
     timing = timing,
+    verbose = verbose,
     is_python = is_python,
     ...
   )
@@ -387,7 +389,8 @@ get_extra_parameters <- function(internal) {
 #' @keywords internal
 get_parameters <- function(approach, prediction_zero, output_size = 1, n_combinations, group, n_samples,
                            n_batches, seed, keep_samp_for_vS, type, horizon, train_idx, explain_idx, explain_y_lags,
-                           explain_xreg_lags, group_lags = NULL, MSEv_uniform_comb_weights, timing, is_python, ...) {
+                           explain_xreg_lags, group_lags = NULL, MSEv_uniform_comb_weights, timing, verbose,
+                           is_python, ...) {
   # Check input type for approach
 
   # approach is checked more comprehensively later
@@ -441,6 +444,11 @@ get_parameters <- function(approach, prediction_zero, output_size = 1, n_combina
     stop("`type` must be either `normal` or `forecast`.\n")
   }
 
+  # verbose
+  if (!is.numeric(verbose) || !(verbose %in% c(0, 1, 2))) {
+    stop("`verbose` must be either `0` (no verbosity), `1` (low verbosity), or `2` (high verbosity).")
+  }
+
   # parameters only used for type "forecast"
   if (type == "forecast") {
     if (!(is.wholenumber(horizon) && all(horizon > 0))) {
@@ -505,7 +513,8 @@ get_parameters <- function(approach, prediction_zero, output_size = 1, n_combina
     horizon = horizon,
     group_lags = group_lags,
     MSEv_uniform_comb_weights = MSEv_uniform_comb_weights,
-    timing = timing
+    timing = timing,
+    verbose = verbose
   )
 
   # Getting additional parameters from ...
diff --git a/R/shapr-package.R b/R/shapr-package.R
index 75bb26ba7..df52ae4ce 100644
--- a/R/shapr-package.R
+++ b/R/shapr-package.R
@@ -1,29 +1,10 @@
-#' @importFrom data.table data.table fread fwrite setnames := year month
-#' uniqueN setkey as.data.table copy between is.data.table setcolorder rbindlist
+#' @importFrom data.table data.table as.data.table is.data.table := setnames setkey copy setcolorder rbindlist
 #'
-#' @importFrom graphics plot hist rect
+#' @importFrom graphics hist
 #'
-#' @importFrom utils head tail methods
+#' @importFrom utils head tail methods modifyList
 #'
-#' @importFrom stats predict
-#'
-#' @importFrom stats as.formula
-#'
-#' @importFrom stats formula
-#'
-#' @importFrom stats model.matrix
-#'
-#' @importFrom stats model.frame
-#'
-#' @importFrom stats setNames
-#'
-#' @importFrom stats contrasts
-#'
-#' @importFrom stats embed
-#'
-#' @importFrom stats sd qt pt
-#'
-#' @importFrom stats rnorm
+#' @importFrom stats predict as.formula formula setNames embed sd qt pt rnorm
 #'
 #' @importFrom Rcpp sourceCpp
 #'
@@ -34,3 +15,14 @@ NULL
 
 #' @keywords internal
 "_PACKAGE"
+
+#' Auxiliary function for the vaeac vignette
+#' @description Function that question if the main and vaeac vignette has been built using the
+#' `rebuild-long-running-vignette.R` function. This is only useful when using devtools to release
+#' `shapr` to cran. See [devtools::release()] for more information.
+release_questions <- function() {
+  c(paste0(
+    "Did you re-build the `understanding_shapr.Rmd` and `understanding_shapr_vaeac.Rmd` vignettes using ",
+    "`rebuild-long-running-vignette.R`?"
+  ))
+}
diff --git a/R/zzz.R b/R/zzz.R
index 9e2410a20..47318dbd9 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -82,7 +82,29 @@
       "Method",
       "MSEv",
       "MSEv_sd",
-      "error"
+      "error",
+      ".header",
+      ".id",
+      ".pred",
+      ".only_these_features_wo_none",
+      ".only_these_columns",
+      "Epoch",
+      ".description",
+      ".phi",
+      ".method",
+      "Value",
+      "Criterion",
+      "checkpoint",
+      "..col_cont_names",
+      "n_train",
+      "one_hot_max_sizes",
+      "train_dataloader",
+      "vaeac_model_best_listmodel",
+      "vaeac_save_file_names",
+      "val_dataloader",
+      "x_train",
+      "x_train_preprocessed",
+      "x_train_torch"
     )
   )
   invisible()
diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 84f9aa312..4f2e4df24 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -166,3 +166,18 @@ @article{covert2020understanding
   pages={17212--17223},
   year={2020}
 }
+
+@inproceedings{kingma2014autoencoding,
+  title = {{Auto-Encoding Variational Bayes}},
+  author = {Kingma, Diederik P. and Welling, Max},
+  booktitle = {2nd International Conference on Learning Representations, {ICLR} 2014, Banff, AB, Canada, April 14-16, 2014, Conference Track Proceedings},
+  year = 2014
+}
+
+@Manual{torch,
+    title = {torch: Tensors and Neural Networks with 'GPU' Acceleration},
+    author = {Daniel Falbel and Javier Luraschi},
+    year = {2023},
+    note = {R package version 0.11.0},
+    url = {https://CRAN.R-project.org/package=torch},
+  }
diff --git a/inst/scripts/example_plot_SV_several_approaches.R b/inst/scripts/example_plot_SV_several_approaches.R
new file mode 100644
index 000000000..a25c66b36
--- /dev/null
+++ b/inst/scripts/example_plot_SV_several_approaches.R
@@ -0,0 +1,186 @@
+# Setup -----------------------------------------------------------------------------------------------------------
+# Load necessary libraries
+library(xgboost)
+library(data.table)
+
+# Get the data
+data("airquality")
+data = data.table::as.data.table(airquality)
+data = data[complete.cases(data), ]
+
+# Define the features and the response
+x_var = c("Solar.R", "Wind", "Temp", "Month")
+y_var = "Ozone"
+
+# Split data into test and training data set
+ind_x_explain = 1:12
+x_train = data[-ind_x_explain, ..x_var]
+y_train = data[-ind_x_explain, get(y_var)]
+x_explain = data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model = xgboost::xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 20,
+  verbose = FALSE
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+prediction_zero = mean(y_train)
+
+# Independence approach
+explanation_independence = explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "independence",
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Empirical approach
+explanation_empirical = explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Gaussian 1e1 approach
+explanation_gaussian_1e1 = explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "gaussian",
+  prediction_zero = prediction_zero,
+  n_samples = 1e1
+)
+
+# Gaussian 1e2 approach
+explanation_gaussian_1e2 = explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "gaussian",
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Combined approach
+explanation_combined = explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = c("gaussian", "ctree", "empirical"),
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Create a list of explanations with names
+explanation_list = list(
+  "Ind." = explanation_independence,
+  "Emp." = explanation_empirical,
+  "Gaus. 1e1" = explanation_gaussian_1e1,
+  "Gaus. 1e2" = explanation_gaussian_1e2,
+  "Combined" = explanation_combined
+)
+
+
+# Plots -----------------------------------------------------------------------------------------------------------
+# The function uses the provided names.
+plot_SV_several_approaches(explanation_list)
+
+# We can change the number of columns in the grid of plots and add other visual alterations
+plot_SV_several_approaches(explanation_list,
+                           facet_ncol = 3,
+                           facet_scales = "free_y",
+                           add_zero_line = TRUE,
+                           digits = 2,
+                           brewer_palette = "Paired",
+                           geom_col_width = 0.6) +
+  ggplot2::theme_minimal() +
+  ggplot2::theme(legend.position = "bottom", plot.title = ggplot2::element_text(size = 0))
+
+
+# We can specify which explicands to plot to get less chaotic plots and make the bars vertical
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           horizontal_bars = FALSE,
+                           axis_labels_rotate_angle = 45)
+
+
+# We can change the order of the features by specifying the order using the `only_these_features` parameter.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R", "Month", "Wind"))
+
+# We can also remove certain features if we are not interested in them or want to focus on, e.g., two features.
+# The function will give a message to if the user specifies non-valid feature names.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R"),
+                           plot_phi0 = TRUE)
+
+# We can specify which explicands to plot to get less chaotic plots.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10))
+
+# We can make the bars vertical by setting `horizontal_bars = FALSE`.
+# Will then get message about long label names on the x-axis and how to fix it.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           horizontal_bars = FALSE)
+
+# We can change the order of the features by specifying the order using the `only_these_features` parameter.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R", "Month", "Wind"))
+
+# We can also remove certain features if we are not interested in them or want to focus on, e.g., two features.
+# The function will give a message to if the user specifies non-valid feature names.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R"))
+
+# To more easily compare the magnitude of the Shapley values for different explicands we can fix the x-axis
+# by specifying that only the scales on the y-axis are to be free.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R"),
+                           facet_scales = "free_y")
+
+# If we rather want vertical bars and fix the y-axis, then we specify that the scales are only free on the x-axis.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R"),
+                           facet_scales = "free_x",
+                           axis_labels_rotate_angle = 0,
+                           horizontal_bars = FALSE)
+
+# By default the function does not plot the phi0, but we can change that by setting `plot_phi0 = TRUE`.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R"),
+                           plot_phi0 = TRUE)
+
+# Or we can include "none" in the `only_these_features` parameter. Note that phi0 will always be the first bars.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           only_these_features = c("Temp", "Solar.R", "none"))
+
+# We can add a line at the Shapley value of zero and ensure non overlapping labels by setting `axis_labels_n_dodge`.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           add_zero_line = TRUE,
+                           axis_labels_n_dodge = 2,
+                           horizontal_bars = FALSE)
+
+# We can increase the space between the features to make it easier to distinguish them from each other
+# by lowering `geom_col_width`. Note that default is 0.85.
+plot_SV_several_approaches(explanation_list,
+                           index_explicands = c(1:2, 5, 10),
+                           geom_col_width = 0.6)
+
diff --git a/inst/scripts/example_plot_several_vaeacs_VLB_IWAE.R b/inst/scripts/example_plot_several_vaeacs_VLB_IWAE.R
new file mode 100644
index 000000000..667d5cfe2
--- /dev/null
+++ b/inst/scripts/example_plot_several_vaeacs_VLB_IWAE.R
@@ -0,0 +1,150 @@
+library(xgboost)
+data("airquality")
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+x_var <- c("Solar.R", "Wind", "Temp", "Month")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 100,
+  verbose = FALSE
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+p0 <- mean(y_train)
+
+# Train several different NN
+explanation_paired_sampling_TRUE <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = approach,
+  prediction_zero = p0,
+  n_batches = 2,
+  n_samples = 1, #' As we are only interested in the training of the vaeac
+  vaeac.epochs = 25, #' Should be higher in applications.
+  vaeac.n_vaeacs_initialize = 5,
+  vaeac.extra_parameters = list(
+    vaeac.paired_sampling = TRUE,
+    vaeac.verbose = TRUE
+  )
+)
+
+explanation_paired_sampling_FALSE <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = approach,
+  prediction_zero = p0,
+  n_batches = 2,
+  n_samples = 1, #' As we are only interested in the training of the vaeac
+  vaeac.epochs = 25, #' Should be higher in applications.
+  vaeac.n_vaeacs_initialize = 5,
+  vaeac.extra_parameters = list(
+    vaeac.paired_sampling = FALSE,
+    vaeac.verbose = TRUE
+  )
+)
+
+# Other networks have 4.76 times more parameters.
+explanation_paired_sampling_FALSE_small <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = approach,
+  prediction_zero = p0,
+  n_batches = 2,
+  n_samples = 1, #' As we are only interested in the training of the vaeac
+  vaeac.epochs = 25, #' Should be higher in applications.
+  vaeac.width = 16, #' Default is 32
+  vaeac.depth = 2, #' Default is 3
+  vaeac.latent_dim = 4, #' Default is 8
+  vaeac.n_vaeacs_initialize = 5,
+  vaeac.extra_parameters = list(
+    vaeac.paired_sampling = FALSE,
+    vaeac.verbose = TRUE
+  )
+)
+
+explanation_paired_sampling_TRUE_small <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = approach,
+  prediction_zero = p0,
+  n_batches = 2,
+  n_samples = 1, #' As we are only interested in the training of the vaeac
+  vaeac.epochs = 25, #' Should be higher in applications.
+  vaeac.width = 16, #' Default is 32
+  vaeac.depth = 2, #' Default is 3
+  vaeac.latent_dim = 4, #' Default is 8
+  vaeac.n_vaeacs_initialize = 5,
+  vaeac.extra_parameters = list(
+    vaeac.paired_sampling = TRUE,
+    vaeac.verbose = TRUE
+  )
+)
+
+# Collect the explanation objects in an unnamed list
+explanation_list_unnamed <- list(
+  explanation_paired_sampling_FALSE,
+  explanation_paired_sampling_FALSE_small,
+  explanation_paired_sampling_TRUE,
+  explanation_paired_sampling_TRUE_small
+)
+
+# Collect the explanation objects in an named list
+explanation_list_named <- list(
+  "Regular samp. & large NN" = explanation_paired_sampling_FALSE,
+  "Regular samp. & small NN" = explanation_paired_sampling_FALSE_small,
+  "Paired samp. & large NN" = explanation_paired_sampling_TRUE,
+  "Paired samp. & small NN" = explanation_paired_sampling_TRUE_small
+)
+
+# Call the function with the unnamed list, will create names
+vaeac_plot_evaluation_criteria(explanation_list = explanation_list_unnamed)
+
+# Call the function with the named list, will use the provided names
+# See that the paired samplign often produce more stable results
+vaeac_plot_evaluation_criteria(explanation_list = explanation_list_named)
+
+# The function also works if we have only one method, but then one should only look at the method plot
+vaeac_plot_evaluation_criteria(explanation_list = list("Paired samp. & large NN" = explanation_paired_sampling_TRUE),
+                             plot_type = "method")
+
+# Can alter the plot
+vaeac_plot_evaluation_criteria(
+  explanation_list = explanation_list_named,
+  plot_from_nth_epoch = 5,
+  plot_every_nth_epoch = 3,
+  facet_wrap_scales = "free"
+)
+
+# If we want only want the criterion version
+tmp_fig_criterion = vaeac_plot_evaluation_criteria(
+  explanation_list = explanation_list_named,
+  plot_type = "criterion")
+
+# We can add points
+tmp_fig_criterion + ggplot2::geom_point(shape = "circle", size = 1, ggplot2::aes(col = Method))
+
+# If we rather want smooths with se bands
+tmp_fig_criterion$layers[[1]] = NULL
+tmp_fig_criterion + ggplot2::geom_smooth(method = "loess", formula = y ~ x, se = TRUE) +
+  ggplot2::scale_color_brewer(palette = "Set1") +
+  ggplot2::theme_minimal()
+
+# If we only want the VLB
+vaeac_plot_evaluation_criteria(
+  explanation_list = explanation_list_named,
+  criteria = "VLB",
+  plot_type = "criterion")
diff --git a/man/CategoricalToOneHotLayer.Rd b/man/CategoricalToOneHotLayer.Rd
new file mode 100644
index 000000000..18e753953
--- /dev/null
+++ b/man/CategoricalToOneHotLayer.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{CategoricalToOneHotLayer}
+\alias{CategoricalToOneHotLayer}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a CategoricalToOneHotLayer}
+\usage{
+CategoricalToOneHotLayer(one_hot_max_sizes, add_nans_map_for_columns = NULL)
+}
+\arguments{
+\item{one_hot_max_sizes}{A vector of integers where the i-th entry is the number of
+one-hot encoding for the i-th feature.
+I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.}
+
+\item{add_nans_map_for_columns}{Optional list which contains indices of columns which
+is_nan masks are to be appended to the result tensor. This option is necessary for the full
+encoder to distinguish whether value is to be reconstructed or not.}
+}
+\description{
+The CategoricalToOneHotLayer module/layer expands categorical features into one-hot vectors,
+because multi-layer perceptrons are known to work better with this data representation.
+It also replaces NaNs with zeros in order so that further layers may work correctly.
+}
+\details{
+Note that the module works with mixed data represented as 2-dimensional inputs and it
+works correctly with missing values in \code{groundtruth} as long as they are repsented by NaNs.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/GaussCatLoss.Rd b/man/GaussCatLoss.Rd
new file mode 100644
index 000000000..a465ddbb7
--- /dev/null
+++ b/man/GaussCatLoss.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{GaussCatLoss}
+\alias{GaussCatLoss}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a GaussCatLoss}
+\usage{
+GaussCatLoss(one_hot_max_sizes, min_sigma = 1e-04, min_prob = 1e-04)
+}
+\arguments{
+\item{one_hot_max_sizes}{A vector of integers where the i-th entry is the number of
+one-hot encoding for the i-th feature.
+I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.}
+
+\item{min_sigma}{For stability it might be desirable that the minimal sigma is not too close to zero.}
+
+\item{min_prob}{For stability it might be desirable that the minimal probability is not too close to zero.}
+}
+\description{
+The GaussCatLoss module/layer computes the log probability
+of the \code{groundtruth} for each object given the mask and the distribution parameters.
+That is, the log-likelihoods of the true/full training observations based on the
+generative distributions parameters \code{distr_params} inferred by the masked versions of the observations.
+}
+\details{
+Note that the module works with mixed data represented as 2-dimensional inputs and it
+works correctly with missing values in \code{groundtruth} as long as they are represented by NaNs.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/GaussCatParameters.Rd b/man/GaussCatParameters.Rd
new file mode 100644
index 000000000..079b8d76f
--- /dev/null
+++ b/man/GaussCatParameters.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{GaussCatParameters}
+\alias{GaussCatParameters}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a GaussCatParameters}
+\usage{
+GaussCatParameters(one_hot_max_sizes, min_sigma = 1e-04, min_prob = 1e-04)
+}
+\arguments{
+\item{one_hot_max_sizes}{A vector of integers where the i-th entry is the number of
+one-hot encoding for the i-th feature.
+I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.}
+
+\item{min_sigma}{For stability it might be desirable that the minimal sigma is not too close to zero.}
+
+\item{min_prob}{For stability it might be desirable that the minimal probability is not too close to zero.}
+}
+\description{
+The GaussCatParameters module extracts the parameters
+from the inferred generative Gaussian and categorical distributions for the
+continuous and categorical features, respectively.
+
+If \code{one_hot_max_sizes} is \eqn{[4, 1, 1, 2]}, then the inferred distribution parameters for one observation is the
+vector \eqn{[p_{00}, p_{01}, p_{02}, p_{03}, \mu_1, \sigma_1, \mu_2, \sigma_2, p_{30}, p_{31}]}, where
+\eqn{\operatorname{Softmax}([p_{00}, p_{01}, p_{02}, p_{03}])} and \eqn{\operatorname{Softmax}([p_{30}, p_{31}])}
+are probabilities of the first and the fourth feature categories respectively in the model generative distribution,
+and Gaussian(\eqn{\mu_1, \sigma_1^2}) and Gaussian(\eqn{\mu_2, \sigma_2^2}) are the model generative distributions
+on the second and the third features.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/GaussCatSamplerMostLikely.Rd b/man/GaussCatSamplerMostLikely.Rd
new file mode 100644
index 000000000..5f3379f94
--- /dev/null
+++ b/man/GaussCatSamplerMostLikely.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{GaussCatSamplerMostLikely}
+\alias{GaussCatSamplerMostLikely}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a GaussCatSamplerMostLikely}
+\usage{
+GaussCatSamplerMostLikely(
+  one_hot_max_sizes,
+  min_sigma = 1e-04,
+  min_prob = 1e-04
+)
+}
+\arguments{
+\item{one_hot_max_sizes}{A vector of integers where the i-th entry is the number of one-hot encoding
+for the i-th feature. I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.}
+
+\item{min_sigma}{For stability it might be desirable that the minimal sigma is not too close to zero.}
+
+\item{min_prob}{For stability it might be desirable that the minimal probability is not too close to zero.}
+}
+\value{
+A \code{GaussCatSamplerMostLikely} object.
+}
+\description{
+The GaussCatSamplerMostLikely generates the most likely samples from
+the generative distribution defined by the output of the vaeac.
+I.e., the layer will return the mean and most probable class for the Gaussian (continuous features)
+and categorical (categorical features) distributions, respectively.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/GaussCatSamplerRandom.Rd b/man/GaussCatSamplerRandom.Rd
new file mode 100644
index 000000000..666fbc3a5
--- /dev/null
+++ b/man/GaussCatSamplerRandom.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{GaussCatSamplerRandom}
+\alias{GaussCatSamplerRandom}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a GaussCatSamplerRandom}
+\usage{
+GaussCatSamplerRandom(one_hot_max_sizes, min_sigma = 1e-04, min_prob = 1e-04)
+}
+\arguments{
+\item{one_hot_max_sizes}{A vector of integers where the i-th entry is the number of one-hot encoding
+for the i-th feature. I.e., a categorical feature with 5 levels will have a one_hot_max_size of 5.
+A feature with a one_hot_max_size of either 0 or 1 will be treated as a continuous feature.}
+
+\item{min_sigma}{For stability it might be desirable that the minimal sigma is not too close to zero.}
+
+\item{min_prob}{For stability it might be desirable that the minimal probability is not too close to zero.}
+}
+\description{
+The GaussCatSamplerRandom generates random samples from the generative
+distribution defined by the output of the vaeac. The random sample is generated by
+sampling from the inferred Gaussian and categorical distributions for the
+continuous and categorical features, respectively.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/MCAR_mask_generator.Rd b/man/MCAR_mask_generator.Rd
new file mode 100644
index 000000000..0ed4c1c29
--- /dev/null
+++ b/man/MCAR_mask_generator.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{MCAR_mask_generator}
+\alias{MCAR_mask_generator}
+\title{Missing Completely at Random (MCAR) Mask Generator}
+\usage{
+MCAR_mask_generator(masking_ratio = 0.5, paired_sampling = FALSE)
+}
+\arguments{
+\item{masking_ratio}{Numeric between 0 and 1. The probability for an entry in the generated mask to be 1 (masked).}
+
+\item{paired_sampling}{Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+If \code{TRUE}, then \code{batch} must be sampled using \code{\link[=paired_sampler]{paired_sampler()}} which ensures that the \code{batch} contains
+two instances for each original observation. That is, \code{batch} \eqn{= [X_1, X_1, X_2, X_2, X_3, X_3, ...]}, where
+each entry \eqn{X_j} is a row of dimension \eqn{p} (i.e., the number of features).}
+}
+\description{
+A mask generator which masks the entries in the input completely at random.
+}
+\details{
+The mask generator mask each element in the \code{batch} (N x p) using a component-wise independent Bernoulli
+distribution with probability \code{masking_ratio}. Default values for \code{masking_ratio} is 0.5, so all
+masks are equally likely to be generated, including the empty and full masks.
+The function returns a mask of the same shape as the input \code{batch}, and the \code{batch} can contain
+missing values, indicated by the "NaN" token, which will always be masked.
+}
+\section{Shape}{
+
+\itemize{
+\item Input: \eqn{(N, p)} where N is the number of observations in the \code{batch} and \eqn{p} is the number of features.
+\item Output: \eqn{(N, p)}, same shape as the input
+}
+}
+
+\examples{
+\dontrun{
+mask_gen <- MCAR_mask_generator(masking_ratio = 0.5, paired_sampling = FALSE)
+batch <- torch::torch_randn(c(5, 3))
+mask_gen(batch)
+}
+
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/MemoryLayer.Rd b/man/MemoryLayer.Rd
new file mode 100644
index 000000000..da5f56067
--- /dev/null
+++ b/man/MemoryLayer.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{MemoryLayer}
+\alias{MemoryLayer}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a Memory Layer}
+\usage{
+MemoryLayer(id, output = FALSE, add = FALSE, verbose = FALSE)
+}
+\arguments{
+\item{id}{A unique id to use as a key in the storage list.}
+
+\item{output}{Boolean variable indicating if the memory layer is to store input in storage or extract from storage.}
+
+\item{add}{Boolean variable indicating if the extracted value are to be added or concatenated to the input.
+Only applicable when \code{output = TRUE}.}
+
+\item{verbose}{Boolean variable indicating if we want to give printouts to the user.}
+}
+\description{
+The layer is used to make skip-connections inside a \link[torch:nn_sequential]{torch::nn_sequential} network
+or between several \link[torch:nn_sequential]{torch::nn_sequential} networks without unnecessary code complication.
+}
+\details{
+If \code{output = FALSE}, this layer stores its input in a static list
+\code{storage} with the key \verb{id`` and then passes the input to the next layer. I.e., when memory layer is used in the masked encoder. If }output = TRUE\verb{, this layer takes stored tensor from the storage. I.e., when memory layer is used in the decoder. If }add = TRUE\verb{, it returns sum of the stored vector and an }input\verb{, otherwise it returns their concatenation. If the tensor with specified }id\verb{is not in storage when the layer with}output = TRUE` is called, it would cause an exception.
+}
+\examples{
+\dontrun{
+net1 <- torch::nn_sequential(
+  MemoryLayer("#1"),
+  MemoryLayer("#0.1"),
+  torch::nn_linear(512, 256),
+  torch::nn_leaky_relu(),
+  # here add cannot be TRUE because the dimensions mismatch
+  MemoryLayer("#0.1", output = TRUE, add = FALSE),
+  torch::nn_linear(768, 256),
+  # the dimension after the concatenation with skip-connection is 512 + 256 = 768
+)
+net2 <- torch::nn_equential(
+  torch::nn_linear(512, 512),
+  MemoryLayer("#1", output = TRUE, add = TRUE),
+  ...
+)
+b <- net1(a)
+d <- net2(c) # net2 must be called after net1, otherwise tensor '#1' will not be in storage.
+}
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/SkipConnection.Rd b/man/SkipConnection.Rd
new file mode 100644
index 000000000..9d4a371a9
--- /dev/null
+++ b/man/SkipConnection.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{SkipConnection}
+\alias{SkipConnection}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a skip connection}
+\usage{
+SkipConnection(...)
+}
+\arguments{
+\item{...}{network modules such as, e.g., \code{\link[torch:nn_linear]{torch::nn_linear()}}, \code{\link[torch:nn_relu]{torch::nn_relu()}},
+and \code{\link[=MemoryLayer]{MemoryLayer()}} objects. See \code{\link[=vaeac]{vaeac()}} for more information.}
+}
+\description{
+Skip connection over the sequence of layers in the constructor. The module passes
+input data sequentially through these layers and then adds original data to the result.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/Specified_masks_mask_generator.Rd b/man/Specified_masks_mask_generator.Rd
new file mode 100644
index 000000000..e35ce969e
--- /dev/null
+++ b/man/Specified_masks_mask_generator.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{Specified_masks_mask_generator}
+\alias{Specified_masks_mask_generator}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a Specified_masks_mask_generator}
+\usage{
+Specified_masks_mask_generator(masks, masks_probs, paired_sampling = FALSE)
+}
+\arguments{
+\item{masks}{Matrix/Tensor of possible/allowed 'masks' which we sample from.}
+
+\item{masks_probs}{Array of 'probabilities' for each of the masks specified in 'masks'.
+Note that they do not need to be between 0 and 1 (e.g. sampling frequency).
+They are scaled, hence, they only need to be positive.}
+
+\item{paired_sampling}{Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+If TRUE, then batch must be sampled using 'paired_sampler' which creates batches where
+the first half and second half of the rows are duplicates of each other. That is,
+\verb{batch = [row1, row1, row2, row2, row3, row3, ...]}.}
+
+\item{batch}{Matrix/Tensor. Only used to get the dimensions and to check if any of the
+entries are missing. If any are missing, then the returned mask will ensure that
+these missing entries are masked.}
+}
+\description{
+A mask generator which masks the entries based on sampling provided 1D masks with corresponding probabilities.
+Used for Shapley value estimation when only a subset of coalitions are used to compute the Shapley values.
+
+Initialize a specified masks mask generator.
+}
+\section{Fields}{
+
+\describe{
+\item{\code{name}}{Type of mask generator}
+}}
+
+\examples{
+\dontrun{
+masks <- torch_tensor(matrix(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1),
+  nrow = 3, ncol = 4, byrow = TRUE
+))
+masks_probs <- c(3, 1, 6)
+mask_gen <- Specified_masks_mask_generator(masks = masks, masks_probs = masks_probs)
+empirical_prob <-
+  table(as.array(mask_gen(torch::torch_randn(c(10000, ncol(masks))))$sum(-1)))
+empirical_prob / sum(empirical_prob)
+masks_probs / sum(masks_probs)
+}
+
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/Specified_prob_mask_generator.Rd b/man/Specified_prob_mask_generator.Rd
new file mode 100644
index 000000000..66294e58e
--- /dev/null
+++ b/man/Specified_prob_mask_generator.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{Specified_prob_mask_generator}
+\alias{Specified_prob_mask_generator}
+\title{A \code{\link[torch:nn_module]{torch::nn_module()}} Representing a Specified_prob_mask_generator}
+\usage{
+Specified_prob_mask_generator(masking_probs, paired_sampling = FALSE)
+}
+\arguments{
+\item{masking_probs}{An M+1 numerics containing the probabilities masking 'd' of the (0,...M) entries
+for each observation.}
+
+\item{paired_sampling}{Boolean. If we are doing paired sampling. So include both S and \eqn{\bar{S}}.
+If TRUE, then batch must be sampled using 'paired_sampler' which creates batches where
+the first half and second half of the rows are duplicates of each other. That is,
+\verb{batch = [row1, row1, row2, row2, row3, row3, ...]}.}
+}
+\description{
+A mask generator which masks the entries based on specified probabilities.
+}
+\details{
+A class that takes in the probabilities of having d masked observations.  I.e., for M dimensional data,
+masking_probs is of length M+1, where the d'th entry is the probability of having d-1 masked values.
+
+A mask generator that first samples the number of entries 'd' to be masked in
+the 'M'-dimensional observation 'x' in the batch based on the given M+1 probabilities. The
+'d' masked are uniformly sampled from the 'M' possible feature indices. The d'th entry
+of the probability of having d-1 masked values.
+
+Note that MCAR_mask_generator with p = 0.5 is the same as using \code{\link[=Specified_prob_mask_generator]{Specified_prob_mask_generator()}}
+with \code{masking_ratio} = choose(M, 0:M), where M is the number of features. This function was initially
+created to check if increasing the probability of having a masks with many masked features improved
+vaeac's performance by focusing more on these situations during training.
+}
+\examples{
+\dontrun{
+probs <- c(1, 8, 6, 3, 2)
+mask_gen <- Specified_prob_mask_generator(probs)
+masks <- mask_gen(torch::torch_randn(c(10000, length(probs)) - 1))
+empirical_prob <- table(as.array(masks$sum(2)))
+empirical_prob / sum(empirical_prob)
+probs / sum(probs)
+}
+
+}
+\keyword{internal}
diff --git a/man/explain.Rd b/man/explain.Rd
index e7c9deb4d..a45f7266e 100644
--- a/man/explain.Rd
+++ b/man/explain.Rd
@@ -20,6 +20,7 @@ explain(
   get_model_specs = NULL,
   MSEv_uniform_comb_weights = TRUE,
   timing = TRUE,
+  verbose = 0,
   ...
 )
 }
@@ -38,8 +39,8 @@ Contains the data used to estimate the (conditional) distributions for the featu
 needed to properly estimate the conditional expectations in the Shapley formula.}
 
 \item{approach}{Character vector of length \code{1} or one less than the number of features.
-All elements should,
-either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, \code{"ctree"}, \code{"categorical"}, \code{"timeseries"}, or \code{"independence"}.
+All elements should, either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, \code{"ctree"}, \code{"vaeac"},
+\code{"categorical"}, \code{"timeseries"}, or \code{"independence"}.
 See details for more information.}
 
 \item{prediction_zero}{Numeric.
@@ -111,8 +112,13 @@ sampling frequency when not all combinations are considered.}
 \item{timing}{Logical.
 Whether the timing of the different parts of the \code{explain()} should saved in the model object.}
 
+\item{verbose}{An integer specifying the level of verbosity. If \code{0}, \code{shapr} will stay silent.
+If \code{1}, it will print information about performance. If \code{2}, some additional information will be printed out.
+Use \code{0} (default) for no verbosity, \code{1} for low verbose, and \code{2} for high verbose.
+TODO: Make this clearer when we end up fixing this and if they should force a progressr bar.}
+
 \item{...}{
-  Arguments passed on to \code{\link[=setup_approach.empirical]{setup_approach.empirical}}, \code{\link[=setup_approach.independence]{setup_approach.independence}}, \code{\link[=setup_approach.gaussian]{setup_approach.gaussian}}, \code{\link[=setup_approach.copula]{setup_approach.copula}}, \code{\link[=setup_approach.ctree]{setup_approach.ctree}}, \code{\link[=setup_approach.categorical]{setup_approach.categorical}}, \code{\link[=setup_approach.timeseries]{setup_approach.timeseries}}
+  Arguments passed on to \code{\link[=setup_approach.empirical]{setup_approach.empirical}}, \code{\link[=setup_approach.independence]{setup_approach.independence}}, \code{\link[=setup_approach.gaussian]{setup_approach.gaussian}}, \code{\link[=setup_approach.copula]{setup_approach.copula}}, \code{\link[=setup_approach.ctree]{setup_approach.ctree}}, \code{\link[=setup_approach.vaeac]{setup_approach.vaeac}}, \code{\link[=setup_approach.categorical]{setup_approach.categorical}}, \code{\link[=setup_approach.timeseries]{setup_approach.timeseries}}
   \describe{
     \item{\code{empirical.type}}{Character. (default = \code{"fixed_sigma"})
 Should be equal to either \code{"independence"},\code{"fixed_sigma"}, \code{"AICc_each_k"} \code{"AICc_full"}.
@@ -161,6 +167,21 @@ If FALSE and the number of observations in the leaf node is more than \code{n_sa
 the method will sample \code{n_samples} observations (with replacement).
 This means that there will always be sampling in the leaf unless
 \code{sample} = FALSE AND the number of obs in the node is less than \code{n_samples}.}
+    \item{\code{vaeac.depth}}{Positive integer (default is \code{3}). The number of hidden layers
+in the neural networks of the masked encoder, full encoder, and decoder.}
+    \item{\code{vaeac.width}}{Positive integer (default is \code{32}). The number of neurons in each
+hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
+    \item{\code{vaeac.latent_dim}}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
+    \item{\code{vaeac.lr}}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+    \item{\code{vaeac.activation_function}}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
+\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
+    \item{\code{vaeac.n_vaeacs_initialize}}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
+in the start. Pick the best performing one after \code{vaeac.extra_parameters$epochs_initiation_phase}
+epochs (default is \code{2}) and continue training that one.}
+    \item{\code{vaeac.epochs}}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{vaeac.extra_parameters$epochs_initiation_phase}, where the default is \code{2}.}
+    \item{\code{vaeac.extra_parameters}}{Named list with extra parameters to the \code{vaeac} approach. See
+\code{\link[=vaeac_get_extra_para_default]{vaeac_get_extra_para_default()}} for description of possible additional parameters and their default values.}
     \item{\code{categorical.joint_prob_dt}}{Data.table. (Optional)
 Containing the joint probability distribution for each combination of feature
 values.
@@ -206,7 +227,7 @@ It's equal for all observations and set by the user through the argument \code{p
 The difference between the prediction and \code{none} is distributed among the other features.
 In theory this value should be the expected prediction without conditioning on any features.
 Typically we set this value equal to the mean of the response variable in our training data, but other choices
-such as the mean of the predictions in the training data are also reasonable. \code{\link[=explain]{explain()}} \code{\link[=explain]{explain()}}
+such as the mean of the predictions in the training data are also reasonable.
 }
 \description{
 Computes dependence-aware Shapley values for observations in \code{x_explain} from the specified
@@ -215,7 +236,7 @@ Computes dependence-aware Shapley values for observations in \code{x_explain} fr
 \details{
 The most important thing to notice is that \code{shapr} has implemented six different
 approaches for estimating the conditional distributions of the data, namely \code{"empirical"},
-\code{"gaussian"}, \code{"copula"}, \code{"ctree"}, \code{"categorical"}, \code{"timeseries"}, and \code{"independence"}.
+\code{"gaussian"}, \code{"copula"}, \code{"ctree"}, \code{"vaeac"}, \code{"categorical"}, \code{"timeseries"}, and \code{"independence"}.
 In addition, the user also has the option of combining the different approaches.
 E.g., if you're in a situation where you have trained a model that consists of 10 features,
 and you'd like to use the \code{"gaussian"} approach when you condition on a single feature,
diff --git a/man/explain_forecast.Rd b/man/explain_forecast.Rd
index b7817e55c..57f351971 100644
--- a/man/explain_forecast.Rd
+++ b/man/explain_forecast.Rd
@@ -25,6 +25,7 @@ explain_forecast(
   predict_model = NULL,
   get_model_specs = NULL,
   timing = TRUE,
+  verbose = 0,
   ...
 )
 }
@@ -65,8 +66,8 @@ If \code{xreg != NULL}, denotes the number of lags that should be used for each
 The forecast horizon to explain. Passed to the \code{predict_model} function.}
 
 \item{approach}{Character vector of length \code{1} or one less than the number of features.
-All elements should,
-either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, \code{"ctree"}, \code{"categorical"}, \code{"timeseries"}, or \code{"independence"}.
+All elements should, either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, \code{"ctree"}, \code{"vaeac"},
+\code{"categorical"}, \code{"timeseries"}, or \code{"independence"}.
 See details for more information.}
 
 \item{prediction_zero}{Numeric.
@@ -137,8 +138,13 @@ Can also be used to override the default function for natively supported model c
 \item{timing}{Logical.
 Whether the timing of the different parts of the \code{explain()} should saved in the model object.}
 
+\item{verbose}{An integer specifying the level of verbosity. If \code{0}, \code{shapr} will stay silent.
+If \code{1}, it will print information about performance. If \code{2}, some additional information will be printed out.
+Use \code{0} (default) for no verbosity, \code{1} for low verbose, and \code{2} for high verbose.
+TODO: Make this clearer when we end up fixing this and if they should force a progressr bar.}
+
 \item{...}{
-  Arguments passed on to \code{\link[=setup_approach.empirical]{setup_approach.empirical}}, \code{\link[=setup_approach.independence]{setup_approach.independence}}, \code{\link[=setup_approach.gaussian]{setup_approach.gaussian}}, \code{\link[=setup_approach.copula]{setup_approach.copula}}, \code{\link[=setup_approach.ctree]{setup_approach.ctree}}, \code{\link[=setup_approach.categorical]{setup_approach.categorical}}, \code{\link[=setup_approach.timeseries]{setup_approach.timeseries}}
+  Arguments passed on to \code{\link[=setup_approach.empirical]{setup_approach.empirical}}, \code{\link[=setup_approach.independence]{setup_approach.independence}}, \code{\link[=setup_approach.gaussian]{setup_approach.gaussian}}, \code{\link[=setup_approach.copula]{setup_approach.copula}}, \code{\link[=setup_approach.ctree]{setup_approach.ctree}}, \code{\link[=setup_approach.vaeac]{setup_approach.vaeac}}, \code{\link[=setup_approach.categorical]{setup_approach.categorical}}, \code{\link[=setup_approach.timeseries]{setup_approach.timeseries}}
   \describe{
     \item{\code{empirical.type}}{Character. (default = \code{"fixed_sigma"})
 Should be equal to either \code{"independence"},\code{"fixed_sigma"}, \code{"AICc_each_k"} \code{"AICc_full"}.
@@ -187,6 +193,21 @@ If FALSE and the number of observations in the leaf node is more than \code{n_sa
 the method will sample \code{n_samples} observations (with replacement).
 This means that there will always be sampling in the leaf unless
 \code{sample} = FALSE AND the number of obs in the node is less than \code{n_samples}.}
+    \item{\code{vaeac.depth}}{Positive integer (default is \code{3}). The number of hidden layers
+in the neural networks of the masked encoder, full encoder, and decoder.}
+    \item{\code{vaeac.width}}{Positive integer (default is \code{32}). The number of neurons in each
+hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
+    \item{\code{vaeac.latent_dim}}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
+    \item{\code{vaeac.lr}}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+    \item{\code{vaeac.activation_function}}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
+\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
+    \item{\code{vaeac.n_vaeacs_initialize}}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
+in the start. Pick the best performing one after \code{vaeac.extra_parameters$epochs_initiation_phase}
+epochs (default is \code{2}) and continue training that one.}
+    \item{\code{vaeac.epochs}}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{vaeac.extra_parameters$epochs_initiation_phase}, where the default is \code{2}.}
+    \item{\code{vaeac.extra_parameters}}{Named list with extra parameters to the \code{vaeac} approach. See
+\code{\link[=vaeac_get_extra_para_default]{vaeac_get_extra_para_default()}} for description of possible additional parameters and their default values.}
     \item{\code{categorical.joint_prob_dt}}{Data.table. (Optional)
 Containing the joint probability distribution for each combination of feature
 values.
@@ -232,7 +253,7 @@ It's equal for all observations and set by the user through the argument \code{p
 The difference between the prediction and \code{none} is distributed among the other features.
 In theory this value should be the expected prediction without conditioning on any features.
 Typically we set this value equal to the mean of the response variable in our training data, but other choices
-such as the mean of the predictions in the training data are also reasonable. \code{\link[=explain]{explain()}} \code{\link[=explain]{explain()}}
+such as the mean of the predictions in the training data are also reasonable.
 }
 \description{
 Computes dependence-aware Shapley values for observations in \code{explain_idx} from the specified
diff --git a/man/finalize_explanation.Rd b/man/finalize_explanation.Rd
index aa97c7eb3..ffff80604 100644
--- a/man/finalize_explanation.Rd
+++ b/man/finalize_explanation.Rd
@@ -42,7 +42,7 @@ It's equal for all observations and set by the user through the argument \code{p
 The difference between the prediction and \code{none} is distributed among the other features.
 In theory this value should be the expected prediction without conditioning on any features.
 Typically we set this value equal to the mean of the response variable in our training data, but other choices
-such as the mean of the predictions in the training data are also reasonable. \code{\link[=explain]{explain()}} \code{\link[=explain]{explain()}}
+such as the mean of the predictions in the training data are also reasonable.
 }
 \description{
 Computes dependence-aware Shapley values for observations in \code{x_explain} from the specified
@@ -51,7 +51,7 @@ Computes dependence-aware Shapley values for observations in \code{x_explain} fr
 \details{
 The most important thing to notice is that \code{shapr} has implemented six different
 approaches for estimating the conditional distributions of the data, namely \code{"empirical"},
-\code{"gaussian"}, \code{"copula"}, \code{"ctree"}, \code{"categorical"}, \code{"timeseries"}, and \code{"independence"}.
+\code{"gaussian"}, \code{"copula"}, \code{"ctree"}, \code{"vaeac"}, \code{"categorical"}, \code{"timeseries"}, and \code{"independence"}.
 In addition, the user also has the option of combining the different approaches.
 E.g., if you're in a situation where you have trained a model that consists of 10 features,
 and you'd like to use the \code{"gaussian"} approach when you condition on a single feature,
diff --git a/man/paired_sampler.Rd b/man/paired_sampler.Rd
new file mode 100644
index 000000000..7bd8de1d0
--- /dev/null
+++ b/man/paired_sampler.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{paired_sampler}
+\alias{paired_sampler}
+\title{Sampling Paired Observations}
+\usage{
+paired_sampler(vaeac_dataset_object, shuffle = FALSE)
+}
+\arguments{
+\item{vaeac_dataset_object}{A \code{\link[=vaeac_dataset]{vaeac_dataset()}} object containing the data.}
+
+\item{shuffle}{Boolean. If \code{TRUE}, then the data is shuffled. If \code{FALSE},
+then the data is returned in chronological order.}
+}
+\description{
+A sampler used to samples the batches where each instances is sampled twice
+}
+\details{
+A sampler object that allows for paired sampling by always including each observation from the
+\code{\link[=vaeac_dataset]{vaeac_dataset()}} twice.
+A \code{\link[torch:sampler]{torch::sampler()}} object can be used with \code{\link[torch:dataloader]{torch::dataloader()}}
+when creating batches from a torch dataset \code{\link[torch:dataset]{torch::dataset()}}. See more on
+\url{https://rdrr.io/cran/torch/src/R/utils-data-sampler.R}.
+This function does not use batch iterators, which might increase the speed.
+}
+\examples{
+\dontrun{
+# Example how to use it combined with mask generators with paired sampling activated
+batch_size <- 4
+if (batch_size \%\% 2 == 1) batch_size <- batch_size - 1 # Make sure that batch size is even
+n_features <- 3
+n_observations <- 5
+shuffle <- TRUE
+data <- torch_tensor(matrix(rep(seq(n_observations), each = n_features),
+  ncol = n_features, byrow = TRUE
+))
+data
+dataset <- vaeac_dataset(data, rep(1, n_features))
+dataload <- torch::dataloader(dataset,
+  batch_size = batch_size,
+  sampler = paired_sampler(dataset,
+    shuffle = shuffle
+  )
+)
+dataload$.length() # Number of batches, same as ceiling((2 * n_observations) / batch_size)
+mask_generator <- MCAR_mask_generator(paired = TRUE)
+coro::loop(for (batch in dataload) {
+  mask <- mask_generator(batch)
+  obs <- mask * batch
+  print(torch::torch_cat(c(batch, mask, obs), -1))
+})
+}
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/plot_SV_several_approaches.Rd b/man/plot_SV_several_approaches.Rd
new file mode 100644
index 000000000..274b1a608
--- /dev/null
+++ b/man/plot_SV_several_approaches.Rd
@@ -0,0 +1,210 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot.R
+\name{plot_SV_several_approaches}
+\alias{plot_SV_several_approaches}
+\title{Shapley value bar plots for several explanation objects}
+\usage{
+plot_SV_several_approaches(
+  explanation_list,
+  index_explicands = NULL,
+  only_these_features = NULL,
+  plot_phi0 = FALSE,
+  digits = 4,
+  add_zero_line = FALSE,
+  axis_labels_n_dodge = NULL,
+  axis_labels_rotate_angle = NULL,
+  horizontal_bars = TRUE,
+  facet_scales = "free",
+  facet_ncol = 2,
+  geom_col_width = 0.85,
+  brewer_palette = NULL
+)
+}
+\arguments{
+\item{explanation_list}{A list of \code{\link[=explain]{explain()}} objects applied to the same data and model.
+If the entries in the list is named, then the function use these names. Otherwise, it defaults to
+the approach names (with integer suffix for duplicates) for the explanation objects in \code{explanation_list}.}
+
+\item{index_explicands}{Integer vector. Which of the explicands (test observations) to plot.
+E.g. if you have explained 10 observations using \code{\link[=explain]{explain()}}, you can generate a plot for the
+first 5 observations/explicands and the 10th by setting \code{index_x_explain = c(1:5, 10)}.}
+
+\item{only_these_features}{String vector. Containing the names of the features which
+are to be included in the bar plots.}
+
+\item{plot_phi0}{Boolean. If we are to include the \eqn{\phi_0} in the bar plots or not.}
+
+\item{digits}{Integer. Number of significant digits to use in the feature description.}
+
+\item{add_zero_line}{Boolean. If we are to add a black line for a feature contribution of 0.}
+
+\item{axis_labels_n_dodge}{Integer. The number of rows that
+should be used to render the labels. This is useful for displaying labels that would otherwise overlap.}
+
+\item{axis_labels_rotate_angle}{Numeric. The angle of the axis label, where 0 means horizontal, 45 means tilted,
+and 90 means vertical. Compared to setting the angle in\code{\link[ggplot2:theme]{ggplot2::theme()}} / \code{\link[ggplot2:element]{ggplot2::element_text()}}, this also
+uses some heuristics to automatically pick the \code{hjust} and \code{vjust} that you probably want.}
+
+\item{horizontal_bars}{Boolean. Flip Cartesian coordinates so that horizontal becomes vertical,
+and vertical, horizontal. This is primarily useful for converting geoms and statistics which display
+y conditional on x, to x conditional on y. See \code{\link[ggplot2:coord_flip]{ggplot2::coord_flip()}}.}
+
+\item{facet_scales}{Should scales be free ("\code{free}", the default), fixed ("\code{fixed}"), or free in one dimension
+("\code{free_x}", "\code{free_y}")? The user has to change the latter manually depending on the value of \code{horizontal_bars}.}
+
+\item{facet_ncol}{Integer. The number of columns in the facet grid. Default is \code{facet_ncol = 2}.}
+
+\item{geom_col_width}{Numeric. Bar width. By default, set to 85\% of the \code{\link[ggplot2:resolution]{ggplot2::resolution()}} of the data.}
+
+\item{brewer_palette}{String. Name of one of the color palettes from \code{\link[RColorBrewer:ColorBrewer]{RColorBrewer::RColorBrewer()}}.
+If \code{NULL}, then the function uses the default \code{\link[ggplot2:ggplot]{ggplot2::ggplot()}} color scheme.
+The following palettes are available for use with these scales:
+\describe{
+\item{Diverging}{BrBG, PiYG, PRGn, PuOr, RdBu, RdGy, RdYlBu, RdYlGn, Spectral}
+\item{Qualitative}{Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3}
+\item{Sequential}{Blues, BuGn, BuPu, GnBu, Greens, Greys, Oranges,
+OrRd, PuBu, PuBuGn, PuRd, Purples, RdPu, Reds, YlGn, YlGnBu, YlOrBr, YlOrRd}
+}}
+}
+\value{
+A \code{\link[ggplot2:ggplot]{ggplot2::ggplot()}} object.
+}
+\description{
+Make plots to visualize and compare the estimated Shapley values for a list of
+\code{\link[=explain]{explain()}} objects applied to the same data and model.
+}
+\examples{
+# Load necessary libraries
+library(xgboost)
+library(data.table)
+
+# Get the data
+data("airquality")
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+# Define the features and the response
+x_var <- c("Solar.R", "Wind", "Temp", "Month")
+y_var <- "Ozone"
+
+# Split data into test and training data set
+ind_x_explain <- 1:12
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost::xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 20,
+  verbose = FALSE
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+prediction_zero <- mean(y_train)
+
+# Independence approach
+explanation_independence <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "independence",
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Empirical approach
+explanation_empirical <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Gaussian 1e1 approach
+explanation_gaussian_1e1 <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "gaussian",
+  prediction_zero = prediction_zero,
+  n_samples = 1e1
+)
+
+# Gaussian 1e2 approach
+explanation_gaussian_1e2 <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "gaussian",
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Combined approach
+explanation_combined <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = c("gaussian", "ctree", "empirical"),
+  prediction_zero = prediction_zero,
+  n_samples = 1e2
+)
+
+# Create a list of explanations with names
+explanation_list <- list(
+  "Ind." = explanation_independence,
+  "Emp." = explanation_empirical,
+  "Gaus. 1e1" = explanation_gaussian_1e1,
+  "Gaus. 1e2" = explanation_gaussian_1e2,
+  "Combined" = explanation_combined
+)
+
+if (requireNamespace("ggplot2", quietly = TRUE)) {
+  # The function uses the provided names.
+  plot_SV_several_approaches(explanation_list)
+
+  # We can change the number of columns in the grid of plots and add other visual alterations
+  plot_SV_several_approaches(explanation_list,
+    facet_ncol = 3,
+    facet_scales = "free_y",
+    add_zero_line = TRUE,
+    digits = 2,
+    brewer_palette = "Paired",
+    geom_col_width = 0.6
+  ) +
+    ggplot2::theme_minimal() +
+    ggplot2::theme(legend.position = "bottom", plot.title = ggplot2::element_text(size = 0))
+
+
+  # We can specify which explicands to plot to get less chaotic plots and make the bars vertical
+  plot_SV_several_approaches(explanation_list,
+    index_explicands = c(1:2, 5, 10),
+    horizontal_bars = FALSE,
+    axis_labels_rotate_angle = 45
+  )
+
+  # We can change the order of the features by specifying the
+  # order using the `only_these_features` parameter.
+  plot_SV_several_approaches(explanation_list,
+    index_explicands = c(1:2, 5, 10),
+    only_these_features = c("Temp", "Solar.R", "Month", "Wind")
+  )
+
+  # We can also remove certain features if we are not interested in them
+  # or want to focus on, e.g., two features. The function will give a
+  # message to if the user specifies non-valid feature names.
+  plot_SV_several_approaches(explanation_list,
+    index_explicands = c(1:2, 5, 10),
+    only_these_features = c("Temp", "Solar.R"),
+    plot_phi0 = TRUE
+  )
+}
+
+}
+\author{
+Lars Henry Berge Olsen
+}
diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd
index 23e57b18d..0d6098204 100644
--- a/man/prepare_data.Rd
+++ b/man/prepare_data.Rd
@@ -1,7 +1,8 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/approach.R, R/approach_categorical.R,
 %   R/approach_copula.R, R/approach_ctree.R, R/approach_empirical.R,
-%   R/approach_gaussian.R, R/approach_independence.R, R/approach_timeseries.R
+%   R/approach_gaussian.R, R/approach_independence.R, R/approach_timeseries.R,
+%   R/approach_vaeac.R
 \name{prepare_data}
 \alias{prepare_data}
 \alias{prepare_data.categorical}
@@ -11,6 +12,7 @@
 \alias{prepare_data.gaussian}
 \alias{prepare_data.independence}
 \alias{prepare_data.timeseries}
+\alias{prepare_data.vaeac}
 \title{Generate data used for predictions and Monte Carlo integration}
 \usage{
 prepare_data(internal, index_features = NULL, ...)
@@ -28,6 +30,8 @@ prepare_data(internal, index_features = NULL, ...)
 \method{prepare_data}{independence}(internal, index_features = NULL, ...)
 
 \method{prepare_data}{timeseries}(internal, index_features = NULL, ...)
+
+\method{prepare_data}{vaeac}(internal, index_features = NULL, ...)
 }
 \arguments{
 \item{internal}{Not used.}
diff --git a/man/release_questions.Rd b/man/release_questions.Rd
new file mode 100644
index 000000000..b8455b820
--- /dev/null
+++ b/man/release_questions.Rd
@@ -0,0 +1,13 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/shapr-package.R
+\name{release_questions}
+\alias{release_questions}
+\title{Auxiliary function for the vaeac vignette}
+\usage{
+release_questions()
+}
+\description{
+Function that question if the main and vaeac vignette has been built using the
+\code{rebuild-long-running-vignette.R} function. This is only useful when using devtools to release
+\code{shapr} to cran. See \code{\link[devtools:release]{devtools::release()}} for more information.
+}
diff --git a/man/setup.Rd b/man/setup.Rd
index 45a4ef170..1b096533d 100644
--- a/man/setup.Rd
+++ b/man/setup.Rd
@@ -28,6 +28,7 @@ setup(
   explain_xreg_lags = NULL,
   group_lags = NULL,
   timing,
+  verbose,
   is_python = FALSE,
   ...
 )
@@ -41,8 +42,8 @@ needed to properly estimate the conditional expectations in the Shapley formula.
 Contains the the features, whose predictions ought to be explained.}
 
 \item{approach}{Character vector of length \code{1} or one less than the number of features.
-All elements should,
-either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, \code{"ctree"}, \code{"categorical"}, \code{"timeseries"}, or \code{"independence"}.
+All elements should, either be \code{"gaussian"}, \code{"copula"}, \code{"empirical"}, \code{"ctree"}, \code{"vaeac"},
+\code{"categorical"}, \code{"timeseries"}, or \code{"independence"}.
 See details for more information.}
 
 \item{prediction_zero}{Numeric.
@@ -138,6 +139,11 @@ If \code{FALSE} all lags of each variable are explained individually.}
 \item{timing}{Logical.
 Whether the timing of the different parts of the \code{explain()} should saved in the model object.}
 
+\item{verbose}{An integer specifying the level of verbosity. If \code{0}, \code{shapr} will stay silent.
+If \code{1}, it will print information about performance. If \code{2}, some additional information will be printed out.
+Use \code{0} (default) for no verbosity, \code{1} for low verbose, and \code{2} for high verbose.
+TODO: Make this clearer when we end up fixing this and if they should force a progressr bar.}
+
 \item{is_python}{Logical. Indicates whether the function is called from the Python wrapper. Default is FALSE which is
 never changed when calling the function via \code{explain()} in R. The parameter is later used to disallow
 running the AICc-versions of the empirical as that requires data based optimization.}
diff --git a/man/setup_approach.Rd b/man/setup_approach.Rd
index a87fbd106..d5efdf5b6 100644
--- a/man/setup_approach.Rd
+++ b/man/setup_approach.Rd
@@ -1,7 +1,8 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/approach.R, R/approach_categorical.R,
 %   R/approach_copula.R, R/approach_ctree.R, R/approach_empirical.R,
-%   R/approach_gaussian.R, R/approach_independence.R, R/approach_timeseries.R
+%   R/approach_gaussian.R, R/approach_independence.R, R/approach_timeseries.R,
+%   R/approach_vaeac.R
 \name{setup_approach}
 \alias{setup_approach}
 \alias{setup_approach.categorical}
@@ -11,6 +12,7 @@
 \alias{setup_approach.gaussian}
 \alias{setup_approach.independence}
 \alias{setup_approach.timeseries}
+\alias{setup_approach.vaeac}
 \title{Set up the framework chosen approach}
 \usage{
 setup_approach(internal, ...)
@@ -57,6 +59,19 @@ setup_approach(internal, ...)
   timeseries.bounds = c(NULL, NULL),
   ...
 )
+
+\method{setup_approach}{vaeac}(
+  internal,
+  vaeac.depth = 3,
+  vaeac.width = 32,
+  vaeac.latent_dim = 8,
+  vaeac.activation_function = torch::nn_relu,
+  vaeac.lr = 0.001,
+  vaeac.n_vaeacs_initialize = 4,
+  vaeac.epochs = 100,
+  vaeac.extra_parameters = list(),
+  ...
+)
 }
 \arguments{
 \item{internal}{Not used.}
@@ -148,11 +163,48 @@ Represents the kernel bandwidth in the distance computation. TODO: What length s
 If one or both of these bounds are not NULL, we restrict the sampled time series to be
 between these bounds.
 This is useful if the underlying time series are scaled between 0 and 1, for example.}
+
+\item{vaeac.depth}{Positive integer (default is \code{3}). The number of hidden layers
+in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{vaeac.width}{Positive integer (default is \code{32}). The number of neurons in each
+hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{vaeac.latent_dim}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
+
+\item{vaeac.activation_function}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
+\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
+
+\item{vaeac.lr}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+
+\item{vaeac.n_vaeacs_initialize}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
+in the start. Pick the best performing one after \code{vaeac.extra_parameters$epochs_initiation_phase}
+epochs (default is \code{2}) and continue training that one.}
+
+\item{vaeac.epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{vaeac.extra_parameters$epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{vaeac.extra_parameters}{Named list with extra parameters to the \code{vaeac} approach. See
+\code{\link[=vaeac_get_extra_para_default]{vaeac_get_extra_para_default()}} for description of possible additional parameters and their default values.}
 }
 \description{
 The different choices of \code{approach} takes different (optional) parameters,
 which are forwarded from \code{\link[=explain]{explain()}}.
 }
+\section{The vaeac approach}{
+
+The \code{vaeac} model consists of three neural network (a full encoder, a masked encoder, and a decoder) based
+on the provided \code{vaeac.depth} and \code{vaeac.width}. The encoders map the full and masked input
+representations to latent representations, respectively, where the dimension is given by \code{vaeac.latent_dim}.
+The latent representations are sent to the decoder to go back to the real feature space and
+provide a samplable probabilistic representation, from which the Monte Carlo samples are generated.
+We use the \code{vaeac} method at the epoch with the lowest validation error (IWAE) by default, but
+other possibilities are available but setting the \code{vaeac.which_vaeac_model} parameter. See
+\href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)} for more details.
+}
+
 \author{
 Martin Jullum
+
+Lars Henry Berge Olsen
 }
diff --git a/man/shapr-package.Rd b/man/shapr-package.Rd
index 2c6e66acd..1041460af 100644
--- a/man/shapr-package.Rd
+++ b/man/shapr-package.Rd
@@ -23,6 +23,7 @@ Useful links:
 Authors:
 \itemize{
   \item Nikolai Sellereite \email{nikolaisellereite@gmail.com} (\href{https://orcid.org/0000-0002-4671-0337}{ORCID})
+  \item Lars Henry Berge Olsen \email{lholsen@math.uio.no} (\href{https://orcid.org/0009-0006-9360-6993}{ORCID})
   \item Annabelle Redelmeier \email{Annabelle.Redelmeier@nr.no}
   \item Jon Lachmann \email{Jon@lachmann.nu}
 }
diff --git a/man/vaeac.Rd b/man/vaeac.Rd
new file mode 100644
index 000000000..6b9c38ded
--- /dev/null
+++ b/man/vaeac.Rd
@@ -0,0 +1,171 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac}
+\alias{vaeac}
+\title{Initializing a vaeac model}
+\usage{
+vaeac(
+  one_hot_max_sizes,
+  width = 32,
+  depth = 3,
+  latent_dim = 8,
+  activation_function = torch::nn_relu,
+  skip_conn_layer = FALSE,
+  skip_conn_masked_enc_dec = FALSE,
+  batch_normalization = FALSE,
+  paired_sampling = FALSE,
+  mask_generator_name = c("MCAR_mask_generator", "Specified_prob_mask_generator",
+    "Specified_masks_mask_generator"),
+  masking_ratio = 0.5,
+  mask_gen_coalitions = NULL,
+  mask_gen_coalitions_prob = NULL,
+  sigma_mu = 10000,
+  sigma_sigma = 1e-04
+)
+}
+\arguments{
+\item{one_hot_max_sizes}{A torch tensor of dimension p containing the one hot sizes of the \code{n_features} features.
+The sizes for the continuous features can either be \code{0} or \code{1}.}
+
+\item{width}{Integer. The number of neurons in each hidden layer in the neural networks
+of the masked encoder, full encoder, and decoder.}
+
+\item{depth}{Integer. The number of hidden layers in the neural networks of the
+masked encoder, full encoder, and decoder.}
+
+\item{latent_dim}{Integer. The number of dimensions in the latent space.}
+
+\item{activation_function}{A \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
+\code{\link[torch:nn_relu]{torch::nn_relu()}}, \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}},
+\code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
+
+\item{skip_conn_layer}{Boolean. If we are to use skip connections in each layer, see \code{\link[=SkipConnection]{SkipConnection()}}.
+If \code{TRUE}, then we add the input to the outcome of each hidden layer, so the output becomes
+\eqn{X + \operatorname{activation}(WX + b)}. I.e., the identity skip connection.}
+
+\item{skip_conn_masked_enc_dec}{Boolean. If we are to apply concatenating skip
+connections between the layers in the masked encoder and decoder. The first layer of the masked encoder will be
+linked to the last layer of the decoder. The second layer of the masked encoder will be
+linked to the second to last layer of the decoder, and so on.}
+
+\item{batch_normalization}{Boolean. If we are to use batch normalization after the activation function.
+Note that if \code{skip_conn_layer} is TRUE, then the normalization is
+done after the adding from the skip connection. I.e, we batch normalize the whole quantity X + activation(WX + b).}
+
+\item{paired_sampling}{Boolean. If we are doing paired sampling. I.e., if we are to include both coalition S
+and \eqn{\bar{S}} when we sample coalitions during training for each batch.}
+
+\item{mask_generator_name}{String specifying the type of mask generator to use. Need to be one of
+'MCAR_mask_generator', 'Specified_prob_mask_generator', and 'Specified_masks_mask_generator'.}
+
+\item{masking_ratio}{Scalar. The probability for an entry in the generated mask to be 1 (masked).
+Not used if \code{mask_gen_coalitions} is given.}
+
+\item{mask_gen_coalitions}{Matrix containing the different coalitions to learn.
+Must be given if \code{mask_generator_name = 'Specified_masks_mask_generator'}.}
+
+\item{mask_gen_coalitions_prob}{Numerics containing the probabilities
+for sampling each mask in \code{mask_gen_coalitions}.
+Array containing the probabilities for sampling the coalitions in \code{mask_gen_coalitions}.}
+
+\item{sigma_mu}{Numeric representing a hyperparameter in the normal-gamma prior used on the masked encoder,
+see Section 3.3.1 in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+
+\item{sigma_sigma}{Numeric representing a hyperparameter in the normal-gamma prior used on the masked encoder,
+see Section 3.3.1 in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+}
+\value{
+Returns a list with the neural networks of the masked encoder, full encoder, and decoder together
+with reconstruction log probability function, optimizer constructor, sampler from the decoder output,
+mask generator, batch size, and scale factor for the stability of the variational lower bound optimization.
+}
+\description{
+Class that represents a vaeac model, i.e., the class creates the neural networks in the vaeac
+model and necessary training utilities.
+For more details, see \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+}
+\details{
+This function builds neural networks (masked encoder, full encoder, decoder) given
+the list of one-hot max sizes of the features in the dataset we use to train the vaeac model,
+and the provided parameters for the networks. It also creates, e.g., reconstruction log probability function,
+methods for sampling from the decoder output, and then use these to create the vaeac model.
+}
+\section{make_observed}{
+
+Apply Mask to Batch to Create Observed Batch
+
+Compute the parameters for the latent normal distributions inferred by the encoders.
+If \code{only_masked_encoder = TRUE}, then we only compute the latent normal distributions inferred by the
+masked encoder. This is used in the deployment phase when we do not have access to the full observation.
+}
+
+\section{make_latent_distributions}{
+
+Compute the Latent Distributions Inferred by the Encoders
+
+Compute the parameters for the latent normal distributions inferred by the encoders.
+If \code{only_masked_encoder = TRUE}, then we only compute the latent normal distributions inferred by the
+masked encoder. This is used in the deployment phase when we do not have access to the full observation.
+}
+
+\section{masked_encoder_regularization}{
+
+Compute the Regularizes for the Latent Distribution Inferred by the Masked Encoder.
+
+The masked encoder (prior) distribution regularization in the latent space.
+This is used to compute the extended variational lower bound used to train vaeac, see
+Section 3.3.1 in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+Though regularizing prevents the masked encoder distribution parameters from going to infinity,
+the model usually doesn't diverge even without this regularization. It almost doesn't affect
+learning process near zero with default regularization parameters which are recommended to be used.
+}
+
+\section{batch_vlb}{
+
+Compute the Variational Lower Bound for the Observations in the Batch
+
+Compute differentiable lower bound for the given batch of objects and mask.
+Used as the (negative) loss function for training the vaeac model.
+}
+
+\section{batch_iwae}{
+
+Compute IWAE log likelihood estimate with K samples per object.
+
+Technically, it is differentiable, but it is recommended to use it for
+evaluation purposes inside torch.no_grad in order to save memory. With \code{\link[torch:with_no_grad]{torch::with_no_grad()}}
+the method almost doesn't require extra memory for very large K. The method makes K independent
+passes through decoder network, so the batch size is the same as for training with batch_vlb.
+IWAE is an abbreviation for Importance Sampling Estimator:
+\deqn{
+\log p_{\theta, \psi}(x|y) \approx
+\log {\frac{1}{K} \sum_{i=1}^K [p_\theta(x|z_i, y) * p_\psi(z_i|y) / q_\phi(z_i|x,y)]} \newline
+=
+\log {\sum_{i=1}^K \exp(\log[p_\theta(x|z_i, y) * p_\psi(z_i|y) / q_\phi(z_i|x,y)])} - \log(K) \newline
+=
+\log {\sum_{i=1}^K \exp(\log[p_\theta(x|z_i, y)] + \log[p_\psi(z_i|y)] - \log[q_\phi(z_i|x,y)])} - \log(K) \newline
+=
+\operatorname{logsumexp}(\log[p_\theta(x|z_i, y)] + \log[p_\psi(z_i|y)] - \log[q_\phi(z_i|x,y)]) - \log(K) \newline
+=
+\operatorname{logsumexp}(\text{rec}\_\text{loss} + \text{prior}\_\text{log}\_\text{prob} -
+ \text{proposal}\_\text{log}\_\text{prob}) - \log(K),}
+where \eqn{z_i \sim q_\phi(z|x,y)}.
+}
+
+\section{generate_samples_params}{
+
+Generate the parameters of the generative distributions for samples from the batch.
+
+The function makes K latent representation for each object from the batch, send these
+latent representations through the decoder to obtain the parameters for the generative distributions.
+I.e., means and variances for the normal distributions (continuous features) and probabilities
+for the categorical distribution (categorical features).
+The second axis is used to index samples for an object, i.e. if the batch shape is [n x D1 x D2], then
+the result shape is [n x K x D1 x D2]. It is better to use it inside \code{\link[torch:with_no_grad]{torch::with_no_grad()}} in order to save
+memory. With \code{\link[torch:with_no_grad]{torch::with_no_grad()}} the method doesn't require extra memory except the memory for the result.
+}
+
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_categorical_parse_params.Rd b/man/vaeac_categorical_parse_params.Rd
new file mode 100644
index 000000000..cd33bb998
--- /dev/null
+++ b/man/vaeac_categorical_parse_params.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_categorical_parse_params}
+\alias{vaeac_categorical_parse_params}
+\title{Creates Categorical Distributions}
+\usage{
+vaeac_categorical_parse_params(params, min_prob = 0, max_prob = 1)
+}
+\arguments{
+\item{params}{Tensor of \verb{dimension batch_size} x \code{K} containing the logits for each
+of the \code{K} classes and \code{batch_size} observations.}
+
+\item{min_prob}{For stability it might be desirable that the minimal probability is not exactly zero.}
+
+\item{max_prob}{For stability it might be desirable that the maximal probability is not exactly zero.}
+}
+\value{
+torch::distr_categorical distributions with the provided probabilities for each class.
+}
+\description{
+Function that takes in a tensor containing the logits for each of the K classes. Each row corresponds to
+an observations. Send each row through the softmax function to convert from logits to probabilities that sum 1 one.
+The function also clamps the probabilities between a minimum and maximum probability. Note that we still normalize
+them afterward, so the final probabilities can be marginally below or above the thresholds.
+}
+\details{
+Take a Tensor (e. g. a part of neural network output) and return \code{\link[torch:distr_categorical]{torch::distr_categorical()}}
+distribution. The input tensor after applying softmax over the last axis contains a batch of the categorical
+probabilities. So there are no restrictions on the input tensor. Technically, this function treats the last axis as
+the categorical probabilities, but Categorical takes only 2D input where the first axis is the batch axis and the
+second one corresponds to the probabilities, so practically the function requires 2D input with the batch of
+probabilities for one categorical feature. \code{min_prob} is the minimal probability for each class.
+After clipping the probabilities from below and above they are renormalized in order to be a valid distribution.
+This regularization is required for the numerical stability and may be considered as a neural network architecture
+choice without any change to the probabilistic model.Note that the softmax function is given by
+\eqn{\operatorname{Softmax}(x_i) = (\exp(x_i))/(\sum_{j} \exp(x_j))}, where \eqn{x_i} are the logits and can
+take on any value, negative and positive. The output \eqn{\operatorname{Softmax}(x_i) \in [0,1]}
+and \eqn{\sum_{j} Softmax(x_i) = 1}.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_activation_func.Rd b/man/vaeac_check_activation_func.Rd
new file mode 100644
index 000000000..75ac2973a
--- /dev/null
+++ b/man/vaeac_check_activation_func.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_activation_func}
+\alias{vaeac_check_activation_func}
+\title{Function that checks the provided activation function}
+\usage{
+vaeac_check_activation_func(activation_function)
+}
+\arguments{
+\item{activation_function}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
+\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks the provided activation function
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_cuda.Rd b/man/vaeac_check_cuda.Rd
new file mode 100644
index 000000000..b85777b77
--- /dev/null
+++ b/man/vaeac_check_cuda.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_cuda}
+\alias{vaeac_check_cuda}
+\title{Function that checks for access to CUDA}
+\usage{
+vaeac_check_cuda(cuda)
+}
+\arguments{
+\item{cuda}{cuda Logical (default is \code{FALSE}). If \code{TRUE}, then the \code{vaeac} model will be trained using cuda/GPU.
+If \code{\link[torch:cuda_is_available]{torch::cuda_is_available()}} is \code{FALSE}, the we fall back to use CPU. If \code{FALSE}, we use the CPU. Often this is
+faster for tabular data sets. Note, cuda is not not supported in the current version of the \code{shapr} package.
+TODO: Update this when this is done.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks for access to CUDA
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_epoch_values.Rd b/man/vaeac_check_epoch_values.Rd
new file mode 100644
index 000000000..767de3302
--- /dev/null
+++ b/man/vaeac_check_epoch_values.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_epoch_values}
+\alias{vaeac_check_epoch_values}
+\title{Function that checks provided epoch arguments}
+\usage{
+vaeac_check_epoch_values(
+  epochs,
+  epochs_initiation_phase,
+  epochs_early_stopping,
+  save_every_nth_epoch
+)
+}
+\arguments{
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{epochs_initiation_phase}{Positive integer (default is \code{2}). The number of epochs to run each of the
+\code{n_vaeacs_initialize} \code{vaeac} models before continuing to train only the best performing model.}
+
+\item{epochs_early_stopping}{Positive integer (default is \code{NULL}). The training stops if there has been no
+improvement in the validation IWAE for \code{epochs_early_stopping} epochs. If the user wants the training process
+to be solely based on this training criterion, then \code{epochs} in \code{\link[=explain]{explain()}} should be set to a large
+number. If \code{NULL}, then \code{shapr} will internally set \code{epochs_early_stopping = vaeac.epochs} such that early
+stopping does not occur.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks provided epoch arguments
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_extra_named_list.Rd b/man/vaeac_check_extra_named_list.Rd
new file mode 100644
index 000000000..32cd1228c
--- /dev/null
+++ b/man/vaeac_check_extra_named_list.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_extra_named_list}
+\alias{vaeac_check_extra_named_list}
+\title{Check vaeac.extra_parameters list}
+\usage{
+vaeac_check_extra_named_list(vaeac.extra_parameters)
+}
+\arguments{
+\item{vaeac.extra_parameters}{List containing the extra parameters to the \code{vaeac} approach}
+}
+\description{
+Check vaeac.extra_parameters list
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_logicals.Rd b/man/vaeac_check_logicals.Rd
new file mode 100644
index 000000000..41e31d065
--- /dev/null
+++ b/man/vaeac_check_logicals.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_logicals}
+\alias{vaeac_check_logicals}
+\title{Function that checks logicals}
+\usage{
+vaeac_check_logicals(named_list_logicals)
+}
+\arguments{
+\item{named_list_logicals}{List containing named entries. I.e., \code{list(a = TRUE, b = FALSE)}.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks logicals
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_mask_gen.Rd b/man/vaeac_check_mask_gen.Rd
new file mode 100644
index 000000000..fc5209010
--- /dev/null
+++ b/man/vaeac_check_mask_gen.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_mask_gen}
+\alias{vaeac_check_mask_gen}
+\title{Function that checks the specified masking scheme}
+\usage{
+vaeac_check_mask_gen(mask_gen_coalitions, mask_gen_coalitions_prob, x_train)
+}
+\arguments{
+\item{mask_gen_coalitions}{Matrix (default is \code{NULL}). Matrix containing the coalitions that the
+\code{vaeac} model will be trained on, see \code{\link[=Specified_masks_mask_generator]{Specified_masks_mask_generator()}}. This parameter is used internally
+in \code{shapr} when we only consider a subset of coalitions/combinations, i.e., when
+\code{n_combinations} \eqn{< 2^{n_{\text{features}}}}, and for group Shapley, i.e.,
+when \code{group} is specified in \code{\link[=explain]{explain()}}.}
+
+\item{mask_gen_coalitions_prob}{Numeric array (default is \code{NULL}). Array of length equal to the height
+of \code{mask_gen_coalitions} containing the probabilities of sampling the corresponding coalitions in
+\code{mask_gen_coalitions}.}
+
+\item{x_train}{A data.table containing the training data. Categorical data must have class names \eqn{1,2,\dots,K}.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks the specified masking scheme
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_masking_ratio.Rd b/man/vaeac_check_masking_ratio.Rd
new file mode 100644
index 000000000..7a7defd21
--- /dev/null
+++ b/man/vaeac_check_masking_ratio.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_masking_ratio}
+\alias{vaeac_check_masking_ratio}
+\title{Function that checks that the masking ratio argument is valid}
+\usage{
+vaeac_check_masking_ratio(masking_ratio, n_features)
+}
+\arguments{
+\item{masking_ratio}{Numeric (default is \code{0.5}). Probability of masking a feature in the
+\code{\link[=MCAR_mask_generator]{MCAR_mask_generator()}} (MCAR = Missing Completely At Random). The MCAR masking scheme ensures that \code{vaeac}
+model can do arbitrary conditioning as all coalitions will be trained. \code{masking_ratio} will be overruled if
+\code{mask_gen_coalitions} is specified.}
+
+\item{n_features}{The number of features, i.e., the number of columns in the training data.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks that the masking ratio argument is valid
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_parameters.Rd b/man/vaeac_check_parameters.Rd
new file mode 100644
index 000000000..0fe3f3d8f
--- /dev/null
+++ b/man/vaeac_check_parameters.Rd
@@ -0,0 +1,183 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_parameters}
+\alias{vaeac_check_parameters}
+\title{Function that calls all vaeac parameters check functions}
+\usage{
+vaeac_check_parameters(
+  x_train,
+  model_description,
+  folder_to_save_model,
+  cuda,
+  n_vaeacs_initialize,
+  epochs_initiation_phase,
+  epochs,
+  epochs_early_stopping,
+  save_every_nth_epoch,
+  val_ratio,
+  val_iwae_n_samples,
+  depth,
+  width,
+  latent_dim,
+  lr,
+  batch_size,
+  running_avg_n_values,
+  activation_function,
+  skip_conn_layer,
+  skip_conn_masked_enc_dec,
+  batch_normalization,
+  paired_sampling,
+  masking_ratio,
+  mask_gen_coalitions,
+  mask_gen_coalitions_prob,
+  sigma_mu,
+  sigma_sigma,
+  save_data,
+  log_exp_cont_feat,
+  which_vaeac_model,
+  verbose,
+  seed,
+  ...
+)
+}
+\arguments{
+\item{x_train}{A data.table containing the training data. Categorical data must have class names \eqn{1,2,\dots,K}.}
+
+\item{model_description}{String (default is \code{make.names(Sys.time())}). String containing, e.g., the name of the
+data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+then a name will be generated based on \code{\link[base:Sys.time]{base::Sys.time()}} to ensure a unique name. We use \code{\link[base:make.names]{base::make.names()}} to
+ensure a valid file name for all operating systems.}
+
+\item{folder_to_save_model}{String (default is \code{\link[base:tempfile]{base::tempdir()}}). String specifying a path to a folder where
+the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+\code{\link[=explain]{explain()}} object if \code{vaeac.save_model = FALSE}.}
+
+\item{cuda}{cuda Logical (default is \code{FALSE}). If \code{TRUE}, then the \code{vaeac} model will be trained using cuda/GPU.
+If \code{\link[torch:cuda_is_available]{torch::cuda_is_available()}} is \code{FALSE}, the we fall back to use CPU. If \code{FALSE}, we use the CPU. Often this is
+faster for tabular data sets. Note, cuda is not not supported in the current version of the \code{shapr} package.
+TODO: Update this when this is done.}
+
+\item{n_vaeacs_initialize}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
+in the start. Pick the best performing one after \code{epochs_initiation_phase}
+epochs (default is \code{2}) and continue training that one.}
+
+\item{epochs_initiation_phase}{Positive integer (default is \code{2}). The number of epochs to run each of the
+\code{n_vaeacs_initialize} \code{vaeac} models before continuing to train only the best performing model.}
+
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{epochs_early_stopping}{Positive integer (default is \code{NULL}). The training stops if there has been no
+improvement in the validation IWAE for \code{epochs_early_stopping} epochs. If the user wants the training process
+to be solely based on this training criterion, then \code{epochs} in \code{\link[=explain]{explain()}} should be set to a large
+number. If \code{NULL}, then \code{shapr} will internally set \code{epochs_early_stopping = vaeac.epochs} such that early
+stopping does not occur.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+
+\item{val_ratio}{Numeric (default is \code{0.25}). Scalar between \code{0} and \code{1} indicating the ratio of
+instances from the input data which will be used as validation data. That is, \code{val_ratio = 0.25} means
+that \verb{75\%} of the provided data is used as training data, while the remaining \verb{25\%} is used as validation data.}
+
+\item{val_iwae_n_samples}{Positive integer (default is \code{25}). The number of generated samples used
+to compute the IWAE criterion when validating the vaeac model on the validation data.}
+
+\item{depth}{Positive integer (default is \code{3}). The number of hidden layers
+in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{width}{Positive integer (default is \code{32}). The number of neurons in each
+hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{latent_dim}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
+
+\item{lr}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+
+\item{batch_size}{Positive integer (default is \code{64}). The number of samples to include in each batch
+during the training of the vaeac model. Used in \code{\link[torch:dataloader]{torch::dataloader()}}.}
+
+\item{running_avg_n_values}{running_avg_n_values Positive integer (default is \code{5}).
+The number of previous IWAE values to include
+when we compute the running means of the IWAE criterion.}
+
+\item{activation_function}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
+\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
+
+\item{skip_conn_layer}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply identity skip connections in each
+layer, see \code{\link[=SkipConnection]{SkipConnection()}}. That is, we add the input \eqn{X} to the outcome of each hidden layer,
+so the output becomes \eqn{X + activation(WX + b)}.}
+
+\item{skip_conn_masked_enc_dec}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply concatenate skip
+connections between the layers in the masked encoder and decoder. The first layer of the masked encoder will be
+linked to the last layer of the decoder. The second layer of the masked encoder will be
+linked to the second to last layer of the decoder, and so on.}
+
+\item{batch_normalization}{Logical (default is \code{FALSE}). If \code{TRUE}, we apply batch normalization after the
+activation function. Note that if \code{skip_conn_layer = TRUE}, then the normalization is applied after the
+inclusion of the skip connection. That is, we batch normalize the whole quantity \eqn{X + activation(WX + b)}.}
+
+\item{paired_sampling}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply paired sampling to the training
+batches. That is, the training observations in each batch will be duplicated, where the first instance will be masked
+by \eqn{S} while the second instance will be masked by \eqn{\bar{S}}. This ensures that the training of the
+\code{vaeac} model becomes more stable as the model has access to the full version of each training observation. However,
+this will increase the training time due to more complex implementation and doubling the size of each batch. See
+\code{\link[=paired_sampler]{paired_sampler()}} for more information.}
+
+\item{masking_ratio}{Numeric (default is \code{0.5}). Probability of masking a feature in the
+\code{\link[=MCAR_mask_generator]{MCAR_mask_generator()}} (MCAR = Missing Completely At Random). The MCAR masking scheme ensures that \code{vaeac}
+model can do arbitrary conditioning as all coalitions will be trained. \code{masking_ratio} will be overruled if
+\code{mask_gen_coalitions} is specified.}
+
+\item{mask_gen_coalitions}{Matrix (default is \code{NULL}). Matrix containing the coalitions that the
+\code{vaeac} model will be trained on, see \code{\link[=Specified_masks_mask_generator]{Specified_masks_mask_generator()}}. This parameter is used internally
+in \code{shapr} when we only consider a subset of coalitions/combinations, i.e., when
+\code{n_combinations} \eqn{< 2^{n_{\text{features}}}}, and for group Shapley, i.e.,
+when \code{group} is specified in \code{\link[=explain]{explain()}}.}
+
+\item{mask_gen_coalitions_prob}{Numeric array (default is \code{NULL}). Array of length equal to the height
+of \code{mask_gen_coalitions} containing the probabilities of sampling the corresponding coalitions in
+\code{mask_gen_coalitions}.}
+
+\item{sigma_mu}{Numeric (default is \code{1e4}). One of two hyperparameter values in the normal-gamma prior
+used in the masked encoder, see Section 3.3.1 in
+\href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+
+\item{sigma_sigma}{Numeric (default is \code{1e-4}). One of two hyperparameter values in the normal-gamma prior
+used in the masked encoder, see Section 3.3.1 in
+\href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+
+\item{save_data}{Logical (default is \code{FALSE}). If \code{TRUE}, then the data is stored together with
+the model. Useful if one are to continue to train the model later using \code{\link[=vaeac_continue_train_model]{vaeac_continue_train_model()}}.}
+
+\item{log_exp_cont_feat}{Logical (default is \code{FALSE}). If we are to \eqn{\log} transform all
+continuous features before sending the data to \code{\link[=vaeac]{vaeac()}}. The \code{vaeac} model creates unbounded Monte Carlo
+sample values. Thus, if the continuous features are strictly positive (as for, e.g., the Burr distribution and
+Abalone data set), it can be advantageous to \eqn{\log} transform the data to unbounded form before using \code{vaeac}.
+If \code{TRUE}, then \code{\link[=vaeac_postprocess_data]{vaeac_postprocess_data()}} will take the \eqn{\exp} of the results to get back to strictly
+positive values when using the \code{vaeac} model to impute missing values/generate the Monte Carlo samples.}
+
+\item{which_vaeac_model}{String (default is \code{best}). The name of the \code{vaeac} model (snapshots from different
+epochs) to use when generating the Monte Carlo samples. The standard choices are: \code{"best"} (epoch with lowest IWAE),
+\code{"best_running"} (epoch with lowest running IWAE, see \code{vaeac.running_avg_n_values}), and \code{last} (the last epoch).
+Note that additional choices are available if \code{vaeac.save_every_nth_epoch} is provided. For example, if
+\code{vaeac.save_every_nth_epoch = 5}, then \code{vaeac.which_vaeac_model} can also take the values \code{"epoch_5"}, \code{"epoch_10"},
+\code{"epoch_15"}, and so on.}
+
+\item{verbose}{Boolean. An integer specifying the level of verbosity. Use \code{0} (default) for no verbosity,
+\code{1} for low verbose, and \code{2} for high verbose.}
+
+\item{seed}{Positive integer (default is \code{1}). Seed for reproducibility. Specifies the seed before any randomness
+based code is being run.}
+
+\item{...}{List of extra parameters, currently not used.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that calls all vaeac parameters check functions
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_positive_integers.Rd b/man/vaeac_check_positive_integers.Rd
new file mode 100644
index 000000000..c532b516b
--- /dev/null
+++ b/man/vaeac_check_positive_integers.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_positive_integers}
+\alias{vaeac_check_positive_integers}
+\title{Function that checks positive integers}
+\usage{
+vaeac_check_positive_integers(named_list_positive_integers)
+}
+\arguments{
+\item{named_list_positive_integers}{List containing named entries. I.e., \code{list(a = 1, b = 2)}.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks positive integers
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_positive_numerics.Rd b/man/vaeac_check_positive_numerics.Rd
new file mode 100644
index 000000000..cfc61dc70
--- /dev/null
+++ b/man/vaeac_check_positive_numerics.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_positive_numerics}
+\alias{vaeac_check_positive_numerics}
+\title{Function that checks positive numerics}
+\usage{
+vaeac_check_positive_numerics(named_list_positive_numerics)
+}
+\arguments{
+\item{named_list_positive_numerics}{List containing named entries. I.e., \code{list(a = 0.2, b = 10^3)}.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks positive numerics
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_probabilities.Rd b/man/vaeac_check_probabilities.Rd
new file mode 100644
index 000000000..ccbffc729
--- /dev/null
+++ b/man/vaeac_check_probabilities.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_probabilities}
+\alias{vaeac_check_probabilities}
+\title{Function that checks probabilities}
+\usage{
+vaeac_check_probabilities(named_list_probabilities)
+}
+\arguments{
+\item{named_list_probabilities}{List containing named entries. I.e., \code{list(a = 0.2, b = 0.9)}.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks probabilities
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_save_names.Rd b/man/vaeac_check_save_names.Rd
new file mode 100644
index 000000000..e2bb63648
--- /dev/null
+++ b/man/vaeac_check_save_names.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_save_names}
+\alias{vaeac_check_save_names}
+\title{Function that checks that the save folder exists and for a valid file name}
+\usage{
+vaeac_check_save_names(folder_to_save_model, model_description)
+}
+\arguments{
+\item{folder_to_save_model}{String (default is \code{\link[base:tempfile]{base::tempdir()}}). String specifying a path to a folder where
+the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+\code{\link[=explain]{explain()}} object if \code{vaeac.save_model = FALSE}.}
+
+\item{model_description}{String (default is \code{make.names(Sys.time())}). String containing, e.g., the name of the
+data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+then a name will be generated based on \code{\link[base:Sys.time]{base::Sys.time()}} to ensure a unique name. We use \code{\link[base:make.names]{base::make.names()}} to
+ensure a valid file name for all operating systems.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks that the save folder exists and for a valid file name
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_save_parameters.Rd b/man/vaeac_check_save_parameters.Rd
new file mode 100644
index 000000000..4736dd14b
--- /dev/null
+++ b/man/vaeac_check_save_parameters.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_save_parameters}
+\alias{vaeac_check_save_parameters}
+\title{Function that gives a warning about disk usage}
+\usage{
+vaeac_check_save_parameters(
+  save_data,
+  epochs,
+  save_every_nth_epoch,
+  x_train_size
+)
+}
+\arguments{
+\item{save_data}{Logical (default is \code{FALSE}). If \code{TRUE}, then the data is stored together with
+the model. Useful if one are to continue to train the model later using \code{\link[=vaeac_continue_train_model]{vaeac_continue_train_model()}}.}
+
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+
+\item{x_train_size}{The object size of the \code{x_train} object.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that gives a warning about disk usage
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_verbose.Rd b/man/vaeac_check_verbose.Rd
new file mode 100644
index 000000000..c5aecac15
--- /dev/null
+++ b/man/vaeac_check_verbose.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_verbose}
+\alias{vaeac_check_verbose}
+\title{Function the checks the verbose parameter}
+\usage{
+vaeac_check_verbose(verbose)
+}
+\arguments{
+\item{verbose}{Boolean. An integer specifying the level of verbosity. Use \code{0} (default) for no verbosity,
+\code{1} for low verbose, and \code{2} for high verbose.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function the checks the verbose parameter
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_which_vaeac_model.Rd b/man/vaeac_check_which_vaeac_model.Rd
new file mode 100644
index 000000000..3a3d89ac2
--- /dev/null
+++ b/man/vaeac_check_which_vaeac_model.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_which_vaeac_model}
+\alias{vaeac_check_which_vaeac_model}
+\title{Function that checks for valid \code{vaeac} model name}
+\usage{
+vaeac_check_which_vaeac_model(
+  which_vaeac_model,
+  epochs,
+  save_every_nth_epoch = NULL
+)
+}
+\arguments{
+\item{which_vaeac_model}{String (default is \code{best}). The name of the \code{vaeac} model (snapshots from different
+epochs) to use when generating the Monte Carlo samples. The standard choices are: \code{"best"} (epoch with lowest IWAE),
+\code{"best_running"} (epoch with lowest running IWAE, see \code{vaeac.running_avg_n_values}), and \code{last} (the last epoch).
+Note that additional choices are available if \code{vaeac.save_every_nth_epoch} is provided. For example, if
+\code{vaeac.save_every_nth_epoch = 5}, then \code{vaeac.which_vaeac_model} can also take the values \code{"epoch_5"}, \code{"epoch_10"},
+\code{"epoch_15"}, and so on.}
+
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks for valid \code{vaeac} model name
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_check_x_train_names.Rd b/man/vaeac_check_x_train_names.Rd
new file mode 100644
index 000000000..1c2bab9a7
--- /dev/null
+++ b/man/vaeac_check_x_train_names.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_check_x_train_names}
+\alias{vaeac_check_x_train_names}
+\title{Function that checks the feature names of data and \code{vaeac} model}
+\usage{
+vaeac_check_x_train_names(feature_names_vaeac, feature_names_new)
+}
+\arguments{
+\item{feature_names_vaeac}{Array of strings containing the feature names of the \code{vaeac} model.}
+
+\item{feature_names_new}{Array of strings containing the feature names to compare with.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that checks the feature names of data and \code{vaeac} model
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_compute_normalization.Rd b/man/vaeac_compute_normalization.Rd
new file mode 100644
index 000000000..0a4fd253e
--- /dev/null
+++ b/man/vaeac_compute_normalization.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_compute_normalization}
+\alias{vaeac_compute_normalization}
+\title{Compute Featurewise Means and Standard Deviations}
+\usage{
+vaeac_compute_normalization(data, one_hot_max_sizes)
+}
+\arguments{
+\item{data}{A torch_tensor of dimension N x p containing the data.}
+
+\item{one_hot_max_sizes}{A torch tensor of dimension p containing the one hot sizes of the p features.
+The sizes for the continuous features can either be '0' or '1'.}
+}
+\value{
+List containing the means and the standard deviations of the different features.
+}
+\description{
+Returns the means and standard deviations for all continuous features in the data set.
+Categorical features get mean=0 and sd=1 by default.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_continue_train_model.Rd b/man/vaeac_continue_train_model.Rd
new file mode 100644
index 000000000..cca06ccf2
--- /dev/null
+++ b/man/vaeac_continue_train_model.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_continue_train_model}
+\alias{vaeac_continue_train_model}
+\title{Continue to Train the vaeac Model}
+\usage{
+vaeac_continue_train_model(
+  explanation,
+  epochs_new,
+  lr_new = NULL,
+  x_train = NULL,
+  save_data = FALSE,
+  verbose = 0,
+  seed = 1
+)
+}
+\arguments{
+\item{explanation}{A \code{\link[=explain]{explain()}} object and \code{vaeac} must be the used approach.}
+
+\item{epochs_new}{Positive integer. The number of extra epochs to conduct.}
+
+\item{lr_new}{Positive numeric. If we are to overwrite the old learning rate in the adam optimizer.}
+
+\item{x_train}{A data.table containing the training data. Categorical data must have class names \eqn{1,2,\dots,K}.}
+
+\item{save_data}{Logical (default is \code{FALSE}). If \code{TRUE}, then the data is stored together with
+the model. Useful if one are to continue to train the model later using \code{\link[=vaeac_continue_train_model]{vaeac_continue_train_model()}}.}
+
+\item{verbose}{Boolean. An integer specifying the level of verbosity. Use \code{0} (default) for no verbosity,
+\code{1} for low verbose, and \code{2} for high verbose.}
+
+\item{seed}{Positive integer (default is \code{1}). Seed for reproducibility. Specifies the seed before any randomness
+based code is being run.}
+}
+\value{
+A list containing the training/validation errors and paths to where the vaeac models are saved on the disk.
+}
+\description{
+Function that loads a previously trained vaeac model and continue the training, either
+on new data or on the same dataset as it was trained on before. If we are given a new dataset, then
+we assume that new dataset has the same distribution and one_hot_max_sizes as the original dataset.
+}
+\author{
+Lars Henry Berge Olsen
+}
diff --git a/man/vaeac_dataset.Rd b/man/vaeac_dataset.Rd
new file mode 100644
index 000000000..b67517b7e
--- /dev/null
+++ b/man/vaeac_dataset.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_dataset}
+\alias{vaeac_dataset}
+\title{Dataset used by the \code{vaeac} model}
+\usage{
+vaeac_dataset(X, one_hot_max_sizes)
+}
+\arguments{
+\item{X}{A torch_tensor contain the data of shape N x p, where N and p are the number
+of observations and features, respectively.}
+
+\item{one_hot_max_sizes}{A torch tensor of dimension p containing the one hot sizes of
+the p features. The sizes for the continuous features can either be 0 or 1.}
+}
+\description{
+Convert a the data into a \code{\link[torch:dataset]{torch::dataset()}} which the vaeac model creates batches from.
+}
+\details{
+This function creates a \code{\link[torch:dataset]{torch::dataset()}} object that represent a map from keys to data samples.
+It is used by the \code{\link[torch:dataloader]{torch::dataloader()}} to load data which should be used to extract the
+batches for all epochs in the training phase of the neural network. Note that a dataset object
+is an R6 instanc, see \url{https://r6.r-lib.org/articles/Introduction.html}, which is classical
+object-oriented programming, with self reference. I.e, \code{\link[=vaeac_dataset]{vaeac_dataset()}} is a subclass
+of type \code{\link[torch:dataset]{torch::dataset()}}.
+}
+\examples{
+\dontrun{
+p <- 5
+N <- 14
+batch_size <- 10
+one_hot_max_sizes <- rep(1, p)
+vaeac_ds <- vaeac_dataset(
+  torch_tensor(matrix(rnorm(p * N), ncol = p),
+    dtype = torch_float()
+  ),
+  one_hot_max_sizes
+)
+vaeac_ds
+
+vaeac_dl <- torch::dataloader(
+  vaeac_ds,
+  batch_size = batch_size,
+  shuffle = TRUE,
+  drop_last = FALSE
+)
+vaeac_dl$.length()
+vaeac_dl$.iter()
+
+vaeac_iterator <- vaeac_dl$.iter()
+vaeac_iterator$.next() # batch1
+vaeac_iterator$.next() # batch2
+vaeac_iterator$.next() # Empty
+}
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_extend_batch.Rd b/man/vaeac_extend_batch.Rd
new file mode 100644
index 000000000..0cf8c49ba
--- /dev/null
+++ b/man/vaeac_extend_batch.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_extend_batch}
+\alias{vaeac_extend_batch}
+\title{Extends Incomplete Batches by Sampling Extra Data from Dataloader}
+\usage{
+vaeac_extend_batch(batch, dataloader, batch_size)
+}
+\arguments{
+\item{batch}{The batch we want to check if has the right size, and if not extend it until it has the right size.}
+
+\item{dataloader}{A \code{\link[torch:dataloader]{torch::dataloader()}} object from which we can create an iterator object
+and load data to extend the batch.}
+
+\item{batch_size}{Integer. The number of samples to include in each batch.}
+}
+\value{
+Returns the extended batch with the correct batch_size.
+}
+\description{
+If the height of the \code{batch} is less than \code{batch_size}, this function extends the \code{batch} with
+data from the \code{\link[torch:dataloader]{torch::dataloader()}} until the \code{batch} reaches the required size.
+Note that \code{batch} is a tensor.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_current_save_state.Rd b/man/vaeac_get_current_save_state.Rd
new file mode 100644
index 000000000..230eb4d92
--- /dev/null
+++ b/man/vaeac_get_current_save_state.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_current_save_state}
+\alias{vaeac_get_current_save_state}
+\title{Function that extracts additional objects from the environment to the state list}
+\usage{
+vaeac_get_current_save_state(environment)
+}
+\arguments{
+\item{environment}{The \code{\link[base:environment]{base::environment()}} where the objects are stored.}
+}
+\value{
+List containing the values of \code{epoch}, \code{train_vlb}, \code{val_iwae}, \code{val_iwae_running},
+and the \code{state_dict()} of the vaeac model and optimizer.
+}
+\description{
+The function extract the objects that we are going to save together with the \code{vaeac} model to make it possible to
+train the model further and to evaluate it.
+The environment should be the local environment inside the \code{\link[=vaeac_train_model_auxiliary]{vaeac_train_model_auxiliary()}} function.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_data_objects.Rd b/man/vaeac_get_data_objects.Rd
new file mode 100644
index 000000000..bac7e5d16
--- /dev/null
+++ b/man/vaeac_get_data_objects.Rd
@@ -0,0 +1,85 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_data_objects}
+\alias{vaeac_get_data_objects}
+\title{Function to set up data loaders and save file names}
+\usage{
+vaeac_get_data_objects(
+  x_train,
+  log_exp_cont_feat,
+  val_ratio,
+  batch_size,
+  paired_sampling,
+  model_description,
+  depth,
+  width,
+  latent_dim,
+  lr,
+  epochs,
+  save_every_nth_epoch,
+  folder_to_save_model,
+  train_indices = NULL,
+  val_indices = NULL
+)
+}
+\arguments{
+\item{x_train}{A data.table containing the training data. Categorical data must have class names \eqn{1,2,\dots,K}.}
+
+\item{log_exp_cont_feat}{Logical (default is \code{FALSE}). If we are to \eqn{\log} transform all
+continuous features before sending the data to \code{\link[=vaeac]{vaeac()}}. The \code{vaeac} model creates unbounded Monte Carlo
+sample values. Thus, if the continuous features are strictly positive (as for, e.g., the Burr distribution and
+Abalone data set), it can be advantageous to \eqn{\log} transform the data to unbounded form before using \code{vaeac}.
+If \code{TRUE}, then \code{\link[=vaeac_postprocess_data]{vaeac_postprocess_data()}} will take the \eqn{\exp} of the results to get back to strictly
+positive values when using the \code{vaeac} model to impute missing values/generate the Monte Carlo samples.}
+
+\item{val_ratio}{Numeric (default is \code{0.25}). Scalar between \code{0} and \code{1} indicating the ratio of
+instances from the input data which will be used as validation data. That is, \code{val_ratio = 0.25} means
+that \verb{75\%} of the provided data is used as training data, while the remaining \verb{25\%} is used as validation data.}
+
+\item{batch_size}{Positive integer (default is \code{64}). The number of samples to include in each batch
+during the training of the vaeac model. Used in \code{\link[torch:dataloader]{torch::dataloader()}}.}
+
+\item{paired_sampling}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply paired sampling to the training
+batches. That is, the training observations in each batch will be duplicated, where the first instance will be masked
+by \eqn{S} while the second instance will be masked by \eqn{\bar{S}}. This ensures that the training of the
+\code{vaeac} model becomes more stable as the model has access to the full version of each training observation. However,
+this will increase the training time due to more complex implementation and doubling the size of each batch. See
+\code{\link[=paired_sampler]{paired_sampler()}} for more information.}
+
+\item{model_description}{String (default is \code{make.names(Sys.time())}). String containing, e.g., the name of the
+data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+then a name will be generated based on \code{\link[base:Sys.time]{base::Sys.time()}} to ensure a unique name. We use \code{\link[base:make.names]{base::make.names()}} to
+ensure a valid file name for all operating systems.}
+
+\item{depth}{Positive integer (default is \code{3}). The number of hidden layers
+in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{width}{Positive integer (default is \code{32}). The number of neurons in each
+hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{latent_dim}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
+
+\item{lr}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+
+\item{folder_to_save_model}{String (default is \code{\link[base:tempfile]{base::tempdir()}}). String specifying a path to a folder where
+the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+\code{\link[=explain]{explain()}} object if \code{vaeac.save_model = FALSE}.}
+
+\item{train_indices}{Numeric array (optional) containing the indices of the training observations.
+There are conducted no checks to validdate the indices.}
+
+\item{val_indices}{Numeric array (optional) containing the indices of the validation observations.
+#' There are conducted no checks to validdate the indices.}
+}
+\value{
+List of objects needed to train the \code{vaeac} model
+}
+\description{
+Function to set up data loaders and save file names
+}
diff --git a/man/vaeac_get_evaluation_criteria.Rd b/man/vaeac_get_evaluation_criteria.Rd
new file mode 100644
index 000000000..7925c7285
--- /dev/null
+++ b/man/vaeac_get_evaluation_criteria.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_evaluation_criteria}
+\alias{vaeac_get_evaluation_criteria}
+\title{Extract the Training VLB and Validation IWAE from a list of explanations objects using the vaeac approach}
+\usage{
+vaeac_get_evaluation_criteria(explanation_list)
+}
+\arguments{
+\item{explanation_list}{A list of \code{\link[=explain]{explain()}} objects applied to the same data, model, and
+\code{vaeac} must be the used approach. If the entries in the list is named, then the function use
+these names. Otherwise, it defaults to the approach names (with integer suffix for duplicates)
+for the explanation objects in \code{explanation_list}.}
+}
+\value{
+A data.table containing the training VLB, validation IWAE, and running validation IWAE at each epoch for
+each vaeac model.
+}
+\description{
+Extract the Training VLB and Validation IWAE from a list of explanations objects using the vaeac approach
+}
+\author{
+Lars Henry Berge Olsen
+}
diff --git a/man/vaeac_get_extra_para_default.Rd b/man/vaeac_get_extra_para_default.Rd
new file mode 100644
index 000000000..870f2a774
--- /dev/null
+++ b/man/vaeac_get_extra_para_default.Rd
@@ -0,0 +1,165 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_extra_para_default}
+\alias{vaeac_get_extra_para_default}
+\title{Function to specify the extra parameters in the \code{vaeac} model}
+\usage{
+vaeac_get_extra_para_default(
+  vaeac.model_description = make.names(Sys.time()),
+  vaeac.folder_to_save_model = tempdir(),
+  vaeac.pretrained_vaeac_model = NULL,
+  vaeac.cuda = FALSE,
+  vaeac.epochs_initiation_phase = 2,
+  vaeac.epochs_early_stopping = NULL,
+  vaeac.save_every_nth_epoch = NULL,
+  vaeac.val_ratio = 0.25,
+  vaeac.val_iwae_n_samples = 25,
+  vaeac.batch_size = 64,
+  vaeac.batch_size_sampling = NULL,
+  vaeac.running_avg_n_values = 5,
+  vaeac.skip_conn_layer = TRUE,
+  vaeac.skip_conn_masked_enc_dec = TRUE,
+  vaeac.batch_normalization = FALSE,
+  vaeac.paired_sampling = TRUE,
+  vaeac.masking_ratio = 0.5,
+  vaeac.mask_gen_coalitions = NULL,
+  vaeac.mask_gen_coalitions_prob = NULL,
+  vaeac.sigma_mu = 10000,
+  vaeac.sigma_sigma = 1e-04,
+  vaeac.sample_random = TRUE,
+  vaeac.save_data = FALSE,
+  vaeac.log_exp_cont_feat = FALSE,
+  vaeac.which_vaeac_model = "best",
+  vaeac.save_model = TRUE
+)
+}
+\arguments{
+\item{vaeac.model_description}{String (default is \code{make.names(Sys.time())}). String containing, e.g., the name of the
+data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+then a name will be generated based on \code{\link[base:Sys.time]{base::Sys.time()}} to ensure a unique name. We use \code{\link[base:make.names]{base::make.names()}} to
+ensure a valid file name for all operating systems.}
+
+\item{vaeac.folder_to_save_model}{String (default is \code{\link[base:tempfile]{base::tempdir()}}). String specifying a path to a folder where
+the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+\code{\link[=explain]{explain()}} object if \code{vaeac.save_model = FALSE}.}
+
+\item{vaeac.pretrained_vaeac_model}{List or String (default is \code{NULL}). 1) Either a list of class
+\code{vaeac}, i.e., the list stored in \code{explanation$internal$parameters$vaeac} where \code{explanation} is the returned list
+from an earlier call to the \code{\link[=explain]{explain()}} function. 2) A string containing the path to where the \code{vaeac}
+model is stored on disk, for example, \code{explanation$internal$parameters$vaeac$models$best}.}
+
+\item{vaeac.cuda}{Logical (default is \code{FALSE}). If \code{TRUE}, then the \code{vaeac} model will be trained using cuda/GPU.
+If \code{\link[torch:cuda_is_available]{torch::cuda_is_available()}} is \code{FALSE}, the we fall back to use CPU. If \code{FALSE}, we use the CPU. Often this is
+faster for tabular data sets. Note, cuda is not not supported in the current version of the \code{shapr} package.
+TODO: Update this when this is done.}
+
+\item{vaeac.epochs_initiation_phase}{Positive integer (default is \code{2}). The number of epochs to run each of the
+\code{vaeac.n_vaeacs_initialize} \code{vaeac} models before continuing to train only the best performing model.}
+
+\item{vaeac.epochs_early_stopping}{Positive integer (default is \code{NULL}). The training stops if there has been no
+improvement in the validation IWAE for \code{vaeac.epochs_early_stopping} epochs. If the user wants the training process
+to be solely based on this training criterion, then \code{vaeac.epochs} in \code{\link[=explain]{explain()}} should be set to a large
+number. If \code{NULL}, then \code{shapr} will internally set \code{vaeac.epochs_early_stopping = vaeac.epochs} such that early
+stopping does not occur.}
+
+\item{vaeac.save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{vaeac.save_every_nth_epoch}th epoch will be saved.}
+
+\item{vaeac.val_ratio}{Numeric (default is \code{0.25}). Scalar between \code{0} and \code{1} indicating the ratio of
+instances from the input data which will be used as validation data. That is, \code{vaeac.val_ratio = 0.25} means
+that \verb{75\%} of the provided data is used as training data, while the remaining \verb{25\%} is used as validation data.}
+
+\item{vaeac.val_iwae_n_samples}{Positive integer (default is \code{25}). The number of generated samples used
+to compute the IWAE criterion when validating the vaeac model on the validation data.}
+
+\item{vaeac.batch_size}{Positive integer (default is \code{64}). The number of samples to include in each batch
+during the training of the vaeac model. Used in \code{\link[torch:dataloader]{torch::dataloader()}}.}
+
+\item{vaeac.batch_size_sampling}{Positive integer (default is \code{NULL}) The number of samples to include in
+each batch when generating the Monte Carlo samples. If \code{NULL}, then the function generates the Monte Carlo samples
+for the provided coalitions/combinations and all explicands sent to \code{\link[=explain]{explain()}} at the time.
+The number of coalitions are determined by \code{n_batches} in \code{\link[=explain]{explain()}}. We recommend to tweak \code{n_batches}
+rather  than \code{vaeac.batch_size_sampling}. Larger batch sizes are often much faster provided sufficient memory.}
+
+\item{vaeac.running_avg_n_values}{Positive integer (default is \code{5}). The number of previous IWAE values to include
+when we compute the running means of the IWAE criterion.}
+
+\item{vaeac.skip_conn_layer}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply identity skip connections in each
+layer, see \code{\link[=SkipConnection]{SkipConnection()}}. That is, we add the input \eqn{X} to the outcome of each hidden layer,
+so the output becomes \eqn{X + activation(WX + b)}.}
+
+\item{vaeac.skip_conn_masked_enc_dec}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply concatenate skip
+connections between the layers in the masked encoder and decoder. The first layer of the masked encoder will be
+linked to the last layer of the decoder. The second layer of the masked encoder will be
+linked to the second to last layer of the decoder, and so on.}
+
+\item{vaeac.batch_normalization}{Logical (default is \code{FALSE}). If \code{TRUE}, we apply batch normalization after the
+activation function. Note that if \code{vaeac.skip_conn_layer = TRUE}, then the normalization is applied after the
+inclusion of the skip connection. That is, we batch normalize the whole quantity \eqn{X + activation(WX + b)}.}
+
+\item{vaeac.paired_sampling}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply paired sampling to the training
+batches. That is, the training observations in each batch will be duplicated, where the first instance will be masked
+by \eqn{S} while the second instance will be masked by \eqn{\bar{S}}. This ensures that the training of the
+\code{vaeac} model becomes more stable as the model has access to the full version of each training observation. However,
+this will increase the training time due to more complex implementation and doubling the size of each batch. See
+\code{\link[=paired_sampler]{paired_sampler()}} for more information.}
+
+\item{vaeac.masking_ratio}{Numeric (default is \code{0.5}). Probability of masking a feature in the
+\code{\link[=MCAR_mask_generator]{MCAR_mask_generator()}} (MCAR = Missing Completely At Random). The MCAR masking scheme ensures that \code{vaeac}
+model can do arbitrary conditioning as all coalitions will be trained. \code{vaeac.masking_ratio} will be overruled if
+\code{vaeac.mask_gen_coalitions} is specified.}
+
+\item{vaeac.mask_gen_coalitions}{Matrix (default is \code{NULL}). Matrix containing the coalitions that the
+\code{vaeac} model will be trained on, see \code{\link[=Specified_masks_mask_generator]{Specified_masks_mask_generator()}}. This parameter is used internally
+in \code{shapr} when we only consider a subset of coalitions/combinations, i.e., when
+\code{n_combinations} \eqn{< 2^{n_{\text{features}}}}, and for group Shapley, i.e.,
+when \code{group} is specified in \code{\link[=explain]{explain()}}.}
+
+\item{vaeac.mask_gen_coalitions_prob}{Numeric array (default is \code{NULL}). Array of length equal to the height
+of \code{vaeac.mask_gen_coalitions} containing the probabilities of sampling the corresponding coalitions in
+\code{vaeac.mask_gen_coalitions}.}
+
+\item{vaeac.sigma_mu}{Numeric (default is \code{1e4}). One of two hyperparameter values in the normal-gamma prior
+used in the masked encoder, see Section 3.3.1 in
+\href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+
+\item{vaeac.sigma_sigma}{Numeric (default is \code{1e-4}). One of two hyperparameter values in the normal-gamma prior
+used in the masked encoder, see Section 3.3.1 in
+\href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+
+\item{vaeac.sample_random}{Logcial (default is \code{TRUE}). If \code{TRUE}, the function generates random Monte Carlo samples
+from the inferred generative distributions. If \code{FALSE}, the function use the most likely values, i.e., the mean and
+class with highest probability for continuous and categorical, respectively.}
+
+\item{vaeac.save_data}{Logical (default is \code{FALSE}). If \code{TRUE}, then the data is stored together with
+the model. Useful if one are to continue to train the model later using \code{\link[=vaeac_continue_train_model]{vaeac_continue_train_model()}}.}
+
+\item{vaeac.log_exp_cont_feat}{Logical (default is \code{FALSE}). If we are to \eqn{\log} transform all
+continuous features before sending the data to \code{\link[=vaeac]{vaeac()}}. The \code{vaeac} model creates unbounded Monte Carlo
+sample values. Thus, if the continuous features are strictly positive (as for, e.g., the Burr distribution and
+Abalone data set), it can be advantageous to \eqn{\log} transform the data to unbounded form before using \code{vaeac}.
+If \code{TRUE}, then \code{\link[=vaeac_postprocess_data]{vaeac_postprocess_data()}} will take the \eqn{\exp} of the results to get back to strictly
+positive values when using the \code{vaeac} model to impute missing values/generate the Monte Carlo samples.}
+
+\item{vaeac.which_vaeac_model}{String (default is \code{best}). The name of the \code{vaeac} model (snapshots from different
+epochs) to use when generating the Monte Carlo samples. The standard choices are: \code{"best"} (epoch with lowest IWAE),
+\code{"best_running"} (epoch with lowest running IWAE, see \code{vaeac.running_avg_n_values}), and \code{last} (the last epoch).
+Note that additional choices are available if \code{vaeac.save_every_nth_epoch} is provided. For example, if
+\code{vaeac.save_every_nth_epoch = 5}, then \code{vaeac.which_vaeac_model} can also take the values \code{"epoch_5"}, \code{"epoch_10"},
+\code{"epoch_15"}, and so on.}
+
+\item{vaeac.save_model}{Boolean. If \code{TRUE} (default), the \code{vaeac} model will be saved either in a
+\code{\link[base:tempfile]{base::tempdir()}} folder or in a user specified location in \code{vaeac.folder_to_save_model}. If \code{FALSE}, then
+the paths to model and the model will will be deleted from the returned object from \code{\link[=explain]{explain()}}.}
+}
+\value{
+Named list of the default values \code{vaeac} extra parameter arguments specified in this function call.
+Note that both \code{vaeac.model_description} and \code{vaeac.folder_to_save_model} will change with time and R session.
+}
+\description{
+In this function, we specify the default values for the extra parameters used in \code{\link[=explain]{explain()}}
+for \code{approach = "vaeac"}.
+}
+\author{
+Lars Henry Berge Olsen
+}
diff --git a/man/vaeac_get_full_state_list.Rd b/man/vaeac_get_full_state_list.Rd
new file mode 100644
index 000000000..59418372e
--- /dev/null
+++ b/man/vaeac_get_full_state_list.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_full_state_list}
+\alias{vaeac_get_full_state_list}
+\title{Function that extracts the state list objects from the environment}
+\usage{
+vaeac_get_full_state_list(environment)
+}
+\arguments{
+\item{environment}{The \code{\link[base:environment]{base::environment()}} where the objects are stored.}
+}
+\value{
+List containing the values of \code{norm_mean}, \code{norm_std}, \code{model_description}, \code{folder_to_save_model},
+\code{n_train}, \code{n_features}, \code{one_hot_max_sizes}, \code{epochs}, \code{epochs_specified}, \code{epochs_early_stopping},
+\code{early_stopping_applied}, \code{running_avg_n_values}, \code{paired_sampling}, \code{mask_generator_name}, \code{masking_ratio},
+\code{mask_gen_coalitions}, \code{mask_gen_coalitions_prob}, \code{val_ratio}, \code{val_iwae_n_samples},
+\code{n_vaeacs_initialize}, \code{epochs_initiation_phase}, \code{width}, \code{depth}, \code{latent_dim}, \code{activation_function},
+\code{lr}, \code{batch_size}, \code{skip_conn_layer}, \code{skip_conn_masked_enc_dec}, \code{batch_normalization}, \code{cuda},
+\code{train_indices}, \code{val_indices}, \code{save_every_nth_epoch}, \code{sigma_mu},
+\code{sigma_sigma}, \code{feature_list}, \code{col_cat_names}, \code{col_cont_names}, \code{col_cat}, \code{col_cont}, \code{cat_in_dataset},
+\code{map_new_to_original_names}, \code{map_original_to_new_names}, \code{log_exp_cont_feat}, \code{save_data}, \code{verbose},
+\code{seed}, and \code{vaeac_save_file_names}.
+}
+\description{
+#' @description
+The function extract the objects that we are going to save together with the \code{vaeac} model to make it possible to
+train the model further and to evaluate it.
+The environment should be the local environment inside the \code{\link[=vaeac_train_model_auxiliary]{vaeac_train_model_auxiliary()}} function.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_mask_generator_name.Rd b/man/vaeac_get_mask_generator_name.Rd
new file mode 100644
index 000000000..67db3c993
--- /dev/null
+++ b/man/vaeac_get_mask_generator_name.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_mask_generator_name}
+\alias{vaeac_get_mask_generator_name}
+\title{Function that determines which mask generator to use}
+\usage{
+vaeac_get_mask_generator_name(
+  mask_gen_coalitions,
+  mask_gen_coalitions_prob,
+  masking_ratio,
+  verbose
+)
+}
+\arguments{
+\item{mask_gen_coalitions}{Matrix (default is \code{NULL}). Matrix containing the coalitions that the
+\code{vaeac} model will be trained on, see \code{\link[=Specified_masks_mask_generator]{Specified_masks_mask_generator()}}. This parameter is used internally
+in \code{shapr} when we only consider a subset of coalitions/combinations, i.e., when
+\code{n_combinations} \eqn{< 2^{n_{\text{features}}}}, and for group Shapley, i.e.,
+when \code{group} is specified in \code{\link[=explain]{explain()}}.}
+
+\item{mask_gen_coalitions_prob}{Numeric array (default is \code{NULL}). Array of length equal to the height
+of \code{mask_gen_coalitions} containing the probabilities of sampling the corresponding coalitions in
+\code{mask_gen_coalitions}.}
+
+\item{masking_ratio}{Numeric (default is \code{0.5}). Probability of masking a feature in the
+\code{\link[=MCAR_mask_generator]{MCAR_mask_generator()}} (MCAR = Missing Completely At Random). The MCAR masking scheme ensures that \code{vaeac}
+model can do arbitrary conditioning as all coalitions will be trained. \code{masking_ratio} will be overruled if
+\code{mask_gen_coalitions} is specified.}
+
+\item{verbose}{Boolean. An integer specifying the level of verbosity. Use \code{0} (default) for no verbosity,
+\code{1} for low verbose, and \code{2} for high verbose.}
+}
+\value{
+The function does not return anything.
+}
+\description{
+Function that determines which mask generator to use
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_model_from_checkp.Rd b/man/vaeac_get_model_from_checkp.Rd
new file mode 100644
index 000000000..cb2a3d2b4
--- /dev/null
+++ b/man/vaeac_get_model_from_checkp.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_model_from_checkp}
+\alias{vaeac_get_model_from_checkp}
+\title{Function to load a \code{vaeac} model and set it in the right state and mode}
+\usage{
+vaeac_get_model_from_checkp(checkpoint, cuda, mode_train)
+}
+\arguments{
+\item{checkpoint}{List. This must be a loaded \code{vaeac} save object. That is, \code{torch::torch_load('vaeac_save_path')}.}
+
+\item{cuda}{cuda Logical (default is \code{FALSE}). If \code{TRUE}, then the \code{vaeac} model will be trained using cuda/GPU.
+If \code{\link[torch:cuda_is_available]{torch::cuda_is_available()}} is \code{FALSE}, the we fall back to use CPU. If \code{FALSE}, we use the CPU. Often this is
+faster for tabular data sets. Note, cuda is not not supported in the current version of the \code{shapr} package.
+TODO: Update this when this is done.}
+
+\item{mode_train}{Logical. If \code{TRUE}, the returned \code{vaeac} model is set to be in training mode.
+If \code{FALSE}, the returned \code{vaeac} model is set to be in evaluation mode.}
+}
+\value{
+A \code{vaeac} model with the correct state (based on \code{checkpoint}), sent to the desired hardware (based on
+\code{cuda}), and in the right mode (based on \code{mode_train}).
+}
+\description{
+Function to load a \code{vaeac} model and set it in the right state and mode
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_n_decimals.Rd b/man/vaeac_get_n_decimals.Rd
new file mode 100644
index 000000000..4e1445e8e
--- /dev/null
+++ b/man/vaeac_get_n_decimals.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_n_decimals}
+\alias{vaeac_get_n_decimals}
+\title{Function to get string of values with specific number of decimals}
+\usage{
+vaeac_get_n_decimals(value, n_decimals = 3)
+}
+\arguments{
+\item{value}{The number to get \code{n_decimals} for.}
+
+\item{n_decimals}{Positive integer. The number of decimals. Default is three.}
+}
+\value{
+String of \code{value} with \code{n_decimals} decimals.
+}
+\description{
+Function to get string of values with specific number of decimals
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_optimizer.Rd b/man/vaeac_get_optimizer.Rd
new file mode 100644
index 000000000..256d7704e
--- /dev/null
+++ b/man/vaeac_get_optimizer.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_optimizer}
+\alias{vaeac_get_optimizer}
+\title{Function to create the optimizer used to train \code{vaeac}}
+\usage{
+vaeac_get_optimizer(vaeac_model, lr, optimizer_name = "adam")
+}
+\arguments{
+\item{vaeac_model}{A \code{vaeac} model created using \code{\link[=vaeac]{vaeac()}}.}
+
+\item{lr}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+
+\item{optimizer_name}{String containing the name of the \code{\link[torch:optimizer]{torch::optimizer()}} to use.}
+}
+\value{
+Array of string containing the save files to use when training the \code{vaeac} model. The first three names
+corresponds to the best, best_running, and last epochs, in that order.
+}
+\description{
+Only \code{\link[torch:optim_adam]{torch::optim_adam()}} is currently supported. But it is easy to add an additional option later.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_save_file_names.Rd b/man/vaeac_get_save_file_names.Rd
new file mode 100644
index 000000000..18ff47925
--- /dev/null
+++ b/man/vaeac_get_save_file_names.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_save_file_names}
+\alias{vaeac_get_save_file_names}
+\title{Function that creates the save file names for the \code{vaeac} model}
+\usage{
+vaeac_get_save_file_names(
+  model_description,
+  n_features,
+  n_train,
+  depth,
+  width,
+  latent_dim,
+  lr,
+  epochs,
+  save_every_nth_epoch,
+  folder_to_save_model = NULL
+)
+}
+\arguments{
+\item{model_description}{String (default is \code{make.names(Sys.time())}). String containing, e.g., the name of the
+data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+then a name will be generated based on \code{\link[base:Sys.time]{base::Sys.time()}} to ensure a unique name. We use \code{\link[base:make.names]{base::make.names()}} to
+ensure a valid file name for all operating systems.}
+
+\item{depth}{Positive integer (default is \code{3}). The number of hidden layers
+in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{width}{Positive integer (default is \code{32}). The number of neurons in each
+hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{latent_dim}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
+
+\item{lr}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+
+\item{folder_to_save_model}{String (default is \code{\link[base:tempfile]{base::tempdir()}}). String specifying a path to a folder where
+the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+\code{\link[=explain]{explain()}} object if \code{vaeac.save_model = FALSE}.}
+}
+\value{
+Array of string containing the save files to use when training the \code{vaeac} model. The first three names
+corresponds to the best, best_running, and last epochs, in that order.
+}
+\description{
+Function that creates the save file names for the \code{vaeac} model
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_val_iwae.Rd b/man/vaeac_get_val_iwae.Rd
new file mode 100644
index 000000000..b22519e63
--- /dev/null
+++ b/man/vaeac_get_val_iwae.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_get_val_iwae}
+\alias{vaeac_get_val_iwae}
+\title{Compute the Importance Sampling Estimator (Validation Error)}
+\usage{
+vaeac_get_val_iwae(
+  val_dataloader,
+  mask_generator,
+  batch_size,
+  vaeac_model,
+  val_iwae_n_samples
+)
+}
+\arguments{
+\item{val_dataloader}{A torch dataloader which loads the validation data.}
+
+\item{mask_generator}{A mask generator object that generates the masks.}
+
+\item{batch_size}{Integer. The number of samples to include in each batch.}
+
+\item{vaeac_model}{The vaeac model.}
+
+\item{val_iwae_n_samples}{Number of samples to generate for computing the IWAE for each validation sample.}
+}
+\value{
+The average iwae over all instances in the validation dataset.
+}
+\description{
+Compute the Importance Sampling Estimator which the vaeac model
+uses to evaluate its performance on the validation data.
+}
+\details{
+Compute mean IWAE log likelihood estimation of the validation set.
+Takes validation data loader, mask generator, batch size, vaeac_model (vaeac)
+and number of IWAE latent samples per object.Returns one the estimation (float).
+IWAE is an abbreviation for Importance Sampling Estimator
+\deqn{\log p_{\theta, \psi}(x|y) \approx
+\log {\frac{1}{S}\sum_{i=1}^S p_\theta(x|z_i, y) p_\psi(z_i|y) \big/ q_\phi(z_i|x,y),}}
+where \eqn{z_i \sim q_\phi(z|x,y)}.
+For more details, see \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_get_x_explain_extended.Rd b/man/vaeac_get_x_explain_extended.Rd
new file mode 100644
index 000000000..91b76a56b
--- /dev/null
+++ b/man/vaeac_get_x_explain_extended.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_get_x_explain_extended}
+\alias{vaeac_get_x_explain_extended}
+\title{Function to extend the explicands and apply all relevant masks/coalitions}
+\usage{
+vaeac_get_x_explain_extended(x_explain, S, index_features)
+}
+\arguments{
+\item{x_explain}{A matrix or data.frame/data.table.
+Contains the the features, whose predictions ought to be explained.}
+
+\item{S}{The \code{internal$objects$S} matrix containing the possible coalitions.}
+
+\item{index_features}{Positive integer vector. Specifies the indices of combinations to
+apply to the present method. \code{NULL} means all combinations. Only used internally.}
+}
+\value{
+The extended version of \code{x_explain} where the masks from \code{S} with indices \code{index_features} have been applied.
+}
+\description{
+Function to extend the explicands and apply all relevant masks/coalitions
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_impute_missing_entries.Rd b/man/vaeac_impute_missing_entries.Rd
new file mode 100644
index 000000000..a3dda74f4
--- /dev/null
+++ b/man/vaeac_impute_missing_entries.Rd
@@ -0,0 +1,58 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_impute_missing_entries}
+\alias{vaeac_impute_missing_entries}
+\title{Impute Missing Values Using Vaeac}
+\usage{
+vaeac_impute_missing_entries(
+  x_explain_with_NaNs,
+  n_samples,
+  vaeac_model,
+  checkpoint,
+  sampler,
+  batch_size,
+  verbose = 0,
+  seed = NULL,
+  n_explain = NULL,
+  index_features = NULL
+)
+}
+\arguments{
+\item{x_explain_with_NaNs}{A 2D matrix, where the missing entries to impute are represented by \code{NaN}.}
+
+\item{n_samples}{Integer. The number of imputed versions we create for each row in \code{x_explain_with_NaNs}.}
+
+\item{vaeac_model}{An initialized \code{vaeac} model that we are going to use to generate the MC samples.}
+
+\item{checkpoint}{List containing the parameters of the \code{vaeac} model.}
+
+\item{sampler}{A sampler object used to sample the MC samples.}
+
+\item{batch_size}{Positive integer (default is \code{64}). The number of samples to include in each batch
+during the training of the vaeac model. Used in \code{\link[torch:dataloader]{torch::dataloader()}}.}
+
+\item{verbose}{Boolean. An integer specifying the level of verbosity. Use \code{0} (default) for no verbosity,
+\code{1} for low verbose, and \code{2} for high verbose.}
+
+\item{seed}{Positive integer (default is \code{1}). Seed for reproducibility. Specifies the seed before any randomness
+based code is being run.}
+
+\item{n_explain}{Positive integer. The number of explicands.}
+
+\item{index_features}{Optional integer vector. Used internally in shapr package to index the coalitions.}
+}
+\value{
+A data.table where the missing values (\code{NaN}) in \code{x_explain_with_NaNs} have been imputed \code{n_samples} times.
+The data table will contain extra id columns if \code{index_features} and \code{n_explain} are provided.
+}
+\description{
+Impute Missing Values Using Vaeac
+}
+\details{
+Function that imputes the missing values in 2D matrix where each row constitute an individual.
+The values are sampled from the conditional distribution estimated by a vaeac model.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_kl_normal_normal.Rd b/man/vaeac_kl_normal_normal.Rd
new file mode 100644
index 000000000..c9971ef09
--- /dev/null
+++ b/man/vaeac_kl_normal_normal.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_kl_normal_normal}
+\alias{vaeac_kl_normal_normal}
+\title{Compute the KL Divergence Between Two Gaussian Distributions.}
+\usage{
+vaeac_kl_normal_normal(p, q)
+}
+\arguments{
+\item{p}{A \code{\link[torch:distr_normal]{torch::distr_normal()}} object.}
+
+\item{q}{A \code{\link[torch:distr_normal]{torch::distr_normal()}} object.}
+}
+\value{
+The KL divergence between the two Gaussian distributions.
+}
+\description{
+Computes the KL divergence between univariate normal distributions using the analytical formula,
+see \url{https://en.wikipedia.org/wiki/Kullback\%E2\%80\%93Leibler_divergence#Multivariate_normal_distributions}.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_normal_parse_params.Rd b/man/vaeac_normal_parse_params.Rd
new file mode 100644
index 000000000..9d0f85f6a
--- /dev/null
+++ b/man/vaeac_normal_parse_params.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_normal_parse_params}
+\alias{vaeac_normal_parse_params}
+\title{Creates Normal Distributions}
+\usage{
+vaeac_normal_parse_params(params, min_sigma = 0.001)
+}
+\arguments{
+\item{params}{Tensor containing the parameters for the normal distributions.}
+
+\item{min_sigma}{The minimal variance allowed.}
+}
+\value{
+\code{\link[torch:distr_normal]{torch::distr_normal()}} distributions with the provided means and standard deviations.
+}
+\description{
+Function that takes in the a tensor where the first half of the columns contains the means of the
+normal distributions, while the latter half of the columns contains the standard deviations. The standard deviations
+are clamped with \code{min_sigma} to ensure stable results. If \code{params} is of dimensions batch_size x 8, the function
+will create 4 independent normal distributions for each of the observation (\code{batch_size} observations in total).
+}
+\details{
+Take a Tensor (e.g. neural network output) and return a \code{\link[torch:distr_normal]{torch::distr_normal()}} distribution.
+This normal distribution is component-wise independent, and its dimensionality depends on the input shape.
+First half of channels is mean (\eqn{\mu}) of the distribution, the softplus of the second half is
+std (\eqn{\sigma}), so there is no restrictions on the input tensor. \code{min_sigma} is the minimal value of
+\eqn{\sigma}. I.e., if the above softplus is less than \code{min_sigma}, then \eqn{\sigma} is clipped
+from below with value \code{min_sigma}. This regularization is required for the numerical stability and may
+be considered as a neural network architecture choice without any change to the probabilistic model.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_normalize_data.Rd b/man/vaeac_normalize_data.Rd
new file mode 100644
index 000000000..b983aaa12
--- /dev/null
+++ b/man/vaeac_normalize_data.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_normalize_data}
+\alias{vaeac_normalize_data}
+\title{Normalize mixed data for \code{vaeac}}
+\usage{
+vaeac_normalize_data(
+  data_torch,
+  one_hot_max_sizes,
+  norm_mean = NULL,
+  norm_std = NULL
+)
+}
+\arguments{
+\item{one_hot_max_sizes}{A torch tensor of dimension p containing the one hot sizes of the \code{n_features} features.
+The sizes for the continuous features can either be \code{0} or \code{1}.}
+
+\item{norm_mean}{Torch tensor (optional). A 1D array containing the means of the columns of \code{x_torch}.}
+
+\item{norm_std}{Torch tensor (optional). A 1D array containing the stds of the columns of \code{x_torch}.}
+}
+\value{
+A list containing the normalized version of \code{x_torch}, \code{norm_mean} and \code{norm_std}.
+}
+\description{
+Compute the mean and std for each continuous feature, while the categorical features will have mean 0 and std 1.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_plot_evaluation_criteria.Rd b/man/vaeac_plot_evaluation_criteria.Rd
new file mode 100644
index 000000000..40dfa309d
--- /dev/null
+++ b/man/vaeac_plot_evaluation_criteria.Rd
@@ -0,0 +1,152 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_plot_evaluation_criteria}
+\alias{vaeac_plot_evaluation_criteria}
+\title{Plot the training VLB and validation IWAE for \code{vaeac} models}
+\usage{
+vaeac_plot_evaluation_criteria(
+  explanation_list,
+  plot_from_nth_epoch = 1,
+  plot_every_nth_epoch = 1,
+  criteria = c("VLB", "IWAE"),
+  plot_type = c("method", "criterion"),
+  facet_wrap_scales = "fixed",
+  facet_wrap_ncol = NULL
+)
+}
+\arguments{
+\item{explanation_list}{A list of \code{\link[=explain]{explain()}} objects applied to the same data, model, and
+\code{vaeac} must be the used approach. If the entries in the list is named, then the function use
+these names. Otherwise, it defaults to the approach names (with integer suffix for duplicates)
+for the explanation objects in \code{explanation_list}.}
+
+\item{plot_from_nth_epoch}{Integer. If we are only plot the results form the nth epoch and so forth.
+The first epochs can be large in absolute value and make the rest of the plot difficult to interpret.}
+
+\item{plot_every_nth_epoch}{Integer. If we are only to plot every nth epoch. Usefully to illustrate
+the overall trend, as there can be a lot of fluctuation and oscillation in the values between each epoch.}
+
+\item{criteria}{Character vector. The possible options are "VLB", "IWAE", "IWAE_running". Default is the first two.}
+
+\item{plot_type}{Character vector. The possible options are "method" and "criterion". Default is to plot both.}
+
+\item{facet_wrap_scales}{String. Should the scales be fixed ("\code{fixed}", the default),
+free ("\code{free}"), or free in one dimension ("\code{free_x}", "\code{free_y}").}
+
+\item{facet_wrap_ncol}{Integer. Number of columns in the facet wrap.}
+}
+\value{
+Either a single \code{\link[ggplot2:ggplot]{ggplot2::ggplot()}} object or a list of \code{\link[ggplot2:ggplot]{ggplot2::ggplot()}} objects based on the
+\code{plot_type} parameter.
+}
+\description{
+This function makes (\code{\link[ggplot2:ggplot]{ggplot2::ggplot()}}) figures of the training VLB and the validation IWAE for a list
+of \code{\link[=explain]{explain()}} objects with \code{approach = "vaeac"}. See \code{\link[=setup_approach]{setup_approach()}} for more information about the
+\code{vaeac} approach. Two figures are returned by the function. In the figure, each object in \code{explanation_list} gets
+its own facet, while in the second figure, we plot the criteria in each facet for all objects.
+}
+\details{
+See \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)} or the
+\href{https://borea17.github.io/paper_summaries/iwae/}{blog post} for a summary of the VLB and IWAE.
+}
+\examples{
+\dontrun{
+library(xgboost)
+library(data.table)
+library(shapr)
+
+data("airquality")
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+x_var <- c("Solar.R", "Wind", "Temp", "Month")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost(data = as.matrix(x_train), label = y_train, nround = 100, verbose = FALSE)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+p0 <- mean(y_train)
+
+# Train vaeac with and without paired sampling
+explanation_paired <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = approach,
+  prediction_zero = p0,
+  n_samples = 1, # As we are only interested in the training of the vaeac
+  vaeac.epochs = 10, # Should be higher in applications.
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.width = 16,
+  vaeac.depth = 2,
+  vaeac.extra_parameters = list(vaeac.paired_sampling = TRUE)
+)
+
+explanation_regular <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = approach,
+  prediction_zero = p0,
+  n_samples = 1, # As we are only interested in the training of the vaeac
+  vaeac.epochs = 10, # Should be higher in applications.
+  vaeac.width = 16,
+  vaeac.depth = 2,
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.extra_parameters = list(vaeac.paired_sampling = FALSE)
+)
+
+# Collect the explanation objects in an named list
+explanation_list <- list(
+  "Regular sampling" = explanation_regular,
+  "Paired sampling" = explanation_paired
+)
+
+# Call the function with the named list, will use the provided names
+vaeac_plot_evaluation_criteria(explanation_list = explanation_list)
+
+# The function also works if we have only one method,
+# but then one should only look at the method plot.
+vaeac_plot_evaluation_criteria(
+  explanation_list = explanation_list[2],
+  plot_type = "method"
+)
+
+# Can alter the plot
+vaeac_plot_evaluation_criteria(
+  explanation_list = explanation_list,
+  plot_from_nth_epoch = 2,
+  plot_every_nth_epoch = 2,
+  facet_wrap_scales = "free"
+)
+
+# If we only want the VLB
+vaeac_plot_evaluation_criteria(
+  explanation_list = explanation_list,
+  criteria = "VLB",
+  plot_type = "criterion"
+)
+
+# If we want only want the criterion version
+tmp_fig_criterion <-
+  vaeac_plot_evaluation_criteria(explanation_list = explanation_list, plot_type = "criterion")
+
+# Since tmp_fig_criterion is a ggplot2 object, we can alter it
+# by, e.g,. adding points or smooths with se bands
+tmp_fig_criterion + ggplot2::geom_point(shape = "circle", size = 1, ggplot2::aes(col = Method))
+tmp_fig_criterion$layers[[1]] <- NULL
+tmp_fig_criterion + ggplot2::geom_smooth(method = "loess", formula = y ~ x, se = TRUE) +
+  ggplot2::scale_color_brewer(palette = "Set1") +
+  ggplot2::theme_minimal()
+}
+
+}
+\author{
+Lars Henry Berge Olsen
+}
diff --git a/man/vaeac_plot_imputed_ggpairs.Rd b/man/vaeac_plot_imputed_ggpairs.Rd
new file mode 100644
index 000000000..b667281f6
--- /dev/null
+++ b/man/vaeac_plot_imputed_ggpairs.Rd
@@ -0,0 +1,134 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_plot_imputed_ggpairs}
+\alias{vaeac_plot_imputed_ggpairs}
+\title{Plot Pairwise Plots for Imputed and True Data}
+\usage{
+vaeac_plot_imputed_ggpairs(
+  explanation,
+  which_vaeac_model = "best",
+  x_true = NULL,
+  add_title = TRUE,
+  alpha = 0.5,
+  upper_cont = c("cor", "points", "smooth", "smooth_loess", "density", "blank"),
+  upper_cat = c("count", "cross", "ratio", "facetbar", "blank"),
+  upper_mix = c("box", "box_no_facet", "dot", "dot_no_facet", "facethist",
+    "facetdensity", "denstrip", "blank"),
+  lower_cont = c("points", "smooth", "smooth_loess", "density", "cor", "blank"),
+  lower_cat = c("facetbar", "ratio", "count", "cross", "blank"),
+  lower_mix = c("facetdensity", "box", "box_no_facet", "dot", "dot_no_facet",
+    "facethist", "denstrip", "blank"),
+  diag_cont = c("densityDiag", "barDiag", "blankDiag"),
+  diag_cat = c("barDiag", "blankDiag"),
+  cor_method = c("pearson", "kendall", "spearman")
+)
+}
+\arguments{
+\item{explanation}{Shapr list. The output list from the \code{\link[=explain]{explain()}} function.}
+
+\item{which_vaeac_model}{String. Indicating which \code{vaeac} model to use when generating the samples.
+Possible options are always \code{'best'}, \code{'best_running'}, and \code{'last'}. All possible options can be obtained
+by calling \code{names(explanation$internal$parameters$vaeac$models)}.}
+
+\item{x_true}{Data.table containing the data from the distribution that the \code{vaeac} model is fitted to.}
+
+\item{add_title}{Logical. If \code{TRUE}, then a title is added to the plot based on the internal description
+of the \code{vaeac} model specified in \code{which_vaeac_model}.}
+
+\item{alpha}{Numeric between \code{0} and \code{1} (default is \code{0.5}). The degree of color transparency.}
+
+\item{upper_cont}{String. Type of plot to use in upper triangle for continuous features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'cor'} (default), \code{'points'}, \code{'smooth'}, \code{'smooth_loess'}, \code{'density'}, and \code{'blank'}.}
+
+\item{upper_cat}{String. Type of plot to use in upper triangle for categorical features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'count'} (default), \code{'cross'}, \code{'ratio'}, \code{'facetbar'}, and \code{'blank'}.}
+
+\item{upper_mix}{String. Type of plot to use in upper triangle for mixed features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'box'} (default), \code{'box_no_facet'}, \code{'dot'}, \code{'dot_no_facet'}, \code{'facethist'},
+\code{'facetdensity'}, \code{'denstrip'}, and \code{'blank'}}
+
+\item{lower_cont}{String. Type of plot to use in lower triangle for continuous features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'points'} (default), \code{'smooth'}, \code{'smooth_loess'}, \code{'density'}, \code{'cor'}, and \code{'blank'}.}
+
+\item{lower_cat}{String. Type of plot to use in lower triangle for categorical features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'facetbar'} (default), \code{'ratio'}, \code{'count'}, \code{'cross'}, and \code{'blank'}.}
+
+\item{lower_mix}{String. Type of plot to use in lower triangle for mixed features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'facetdensity'} (default), \code{'box'}, \code{'box_no_facet'}, \code{'dot'}, \code{'dot_no_facet'},
+\code{'facethist'}, \code{'denstrip'}, and \code{'blank'}.}
+
+\item{diag_cont}{String. Type of plot to use on the diagonal for continuous features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'densityDiag'} (default), \code{'barDiag'}, and \code{'blankDiag'}.}
+
+\item{diag_cat}{String. Type of plot to use on the diagonal for categorical features, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'barDiag'} (default) and \code{'blankDiag'}.}
+
+\item{cor_method}{String. Type of correlation measure, see \code{\link[GGally:ggpairs]{GGally::ggpairs()}}.
+Possible options are: \code{'pearson'} (default), \code{'kendall'}, and \code{'spearman'}.}
+}
+\value{
+A \code{\link[GGally:ggpairs]{GGally::ggpairs()}} figure.
+}
+\description{
+A function that creates a matrix of plots (\code{\link[GGally:ggpairs]{GGally::ggpairs()}}) from
+generated imputations from the unconditioned distribution \eqn{p(\boldsymbol{x})} estimated by
+a \code{vaeac} model, and then compares the imputed values with data from the true distribution (if provided).
+See \href{https://www.blopig.com/blog/2019/06/a-brief-introduction-to-ggpairs/}{ggpairs} for an
+introduction to \code{\link[GGally:ggpairs]{GGally::ggpairs()}}, and the corresponding
+\href{https://ggobi.github.io/ggally/articles/ggally_plots.html}{vignette}.
+}
+\examples{
+\dontrun{
+library(xgboost)
+library(data.table)
+library(shapr)
+
+data("airquality")
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+x_var <- c("Solar.R", "Wind", "Temp", "Month")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 100,
+  verbose = FALSE
+)
+
+explanation <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = mean(y_train),
+  n_samples = 1,
+  vaeac.epochs = 10,
+  vaeac.n_vaeacs_initialize = 1
+)
+
+# Plot the results
+figure <- vaeac_plot_imputed_ggpairs(
+  explanation = explanation,
+  which_vaeac_model = "best",
+  x_true = x_train,
+  add_title = TRUE
+)
+figure
+
+# Note that this is an ggplot2 object which we can alter, e.g., we can change the colors.
+figure +
+  ggplot2::scale_color_manual(values = c("#E69F00", "#999999")) +
+  ggplot2::scale_fill_manual(values = c("#E69F00", "#999999"))
+}
+}
+\author{
+Lars Henry Berge Olsen
+}
diff --git a/man/vaeac_postprocess_data.Rd b/man/vaeac_postprocess_data.Rd
new file mode 100644
index 000000000..cb5f4acb8
--- /dev/null
+++ b/man/vaeac_postprocess_data.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_postprocess_data}
+\alias{vaeac_postprocess_data}
+\title{Postprocess Data Generated by a vaeac Model}
+\usage{
+vaeac_postprocess_data(data, vaeac_model_state_list)
+}
+\arguments{
+\item{data}{data.table containing the data generated by a vaeac model}
+
+\item{vaeac_model_state_list}{List. The returned list from the \code{vaeac_preprocess_data} function or
+a loaded checkpoint list of a saved vaeac object.}
+}
+\value{
+data.table with the generated data from a vaeac model where the categorical features
+now have the original class names.
+}
+\description{
+vaeac generates numerical values. This function converts categorical features
+to from numerics with class labels 1,2,...,K, to factors with the original and class labels.
+}
+\examples{
+\dontrun{
+data <- data.table(matrix(rgamma(500 * 3, 2), ncol = 3))
+preprocessed <- vaeac_preprocess_data(data)
+preprocessed$data_preprocessed
+postprocessed <- vaeac_postprocess_data(preprocessed$data_preprocessed, preprocessed)
+postprocessed
+all.equal(data, postprocessed)
+}
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_preprocess_data.Rd b/man/vaeac_preprocess_data.Rd
new file mode 100644
index 000000000..93c8ca05f
--- /dev/null
+++ b/man/vaeac_preprocess_data.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac_torch_modules.R
+\name{vaeac_preprocess_data}
+\alias{vaeac_preprocess_data}
+\title{Preprocess Data for the vaeac approach}
+\usage{
+vaeac_preprocess_data(
+  data,
+  log_exp_cont_feat = FALSE,
+  normalize = TRUE,
+  norm_mean = NULL,
+  norm_std = NULL
+)
+}
+\arguments{
+\item{data}{matrix/data.frame/data.table containing the training data. Only the features and
+not the response.}
+
+\item{log_exp_cont_feat}{Boolean. If we are to log transform all continuous
+features before sending the data to vaeac. vaeac creates unbounded values, so if the continuous
+features are strictly positive, as for Burr and Abalone data, it can be advantageous to log-transform
+the data to unbounded form before using vaeac. If TRUE, then \code{vaeac_postprocess_data} will
+take the exp of the results to get back to strictly positive values.}
+
+\item{norm_mean}{Torch tensor (optional). A 1D array containing the means of the columns of \code{x_torch}.}
+
+\item{norm_std}{Torch tensor (optional). A 1D array containing the stds of the columns of \code{x_torch}.}
+
+\item{x_torch}{Torch tensor. A 2D matrix containing the data to normalize.}
+}
+\value{
+list containing data which can be used in vaeac, maps between original and new class
+names for categorical features, one_hot_max_sizes, and list of information about the data.
+}
+\description{
+vaeac only supports numerical values. This function converts categorical features
+to numerics with class labels 1,2,...,K, and keeps track of the map between the original and
+new class labels. It also computes the one_hot_max_sizes.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_print_train_summary.Rd b/man/vaeac_print_train_summary.Rd
new file mode 100644
index 000000000..ab9a8a560
--- /dev/null
+++ b/man/vaeac_print_train_summary.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_print_train_summary}
+\alias{vaeac_print_train_summary}
+\title{Function to printout a training summary for the \code{vaeac} model}
+\usage{
+vaeac_print_train_summary(best_epoch, best_epoch_running, last_state)
+}
+\arguments{
+\item{best_epoch}{Positive integer. The epoch with the lowest validation error.}
+
+\item{best_epoch_running}{Positive integer. The epoch with the lowest running validation error.}
+
+\item{last_epoch}{The state list (i.e., the saved \code{vaeac} object)
+of \code{vaeac} model at the epoch with the lowest IWAE.}
+}
+\value{
+This function only prints out a message.
+}
+\description{
+Function to printout a training summary for the \code{vaeac} model
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_save_state.Rd b/man/vaeac_save_state.Rd
new file mode 100644
index 000000000..5da0eaa0e
--- /dev/null
+++ b/man/vaeac_save_state.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_save_state}
+\alias{vaeac_save_state}
+\title{Function that saves the state list and the current save state of the \code{vaeac} model}
+\usage{
+vaeac_save_state(state_list, file_name, return_state = FALSE)
+}
+\arguments{
+\item{state_list}{List containing all the parameters in the state.}
+
+\item{file_name}{String containing the file path.}
+
+\item{return_state}{Logical if we are to return the state list or not.}
+}
+\value{
+This function does not return anything
+}
+\description{
+Function that saves the state list and the current save state of the \code{vaeac} model
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_train_model.Rd b/man/vaeac_train_model.Rd
new file mode 100644
index 000000000..946fc6939
--- /dev/null
+++ b/man/vaeac_train_model.Rd
@@ -0,0 +1,195 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_train_model}
+\alias{vaeac_train_model}
+\title{Train the Vaeac Model}
+\usage{
+vaeac_train_model(
+  x_train,
+  model_description,
+  folder_to_save_model,
+  cuda,
+  n_vaeacs_initialize,
+  epochs_initiation_phase,
+  epochs,
+  epochs_early_stopping,
+  save_every_nth_epoch,
+  val_ratio,
+  val_iwae_n_samples,
+  depth,
+  width,
+  latent_dim,
+  lr,
+  batch_size,
+  running_avg_n_values,
+  activation_function,
+  skip_conn_layer,
+  skip_conn_masked_enc_dec,
+  batch_normalization,
+  paired_sampling,
+  masking_ratio,
+  mask_gen_coalitions,
+  mask_gen_coalitions_prob,
+  sigma_mu,
+  sigma_sigma,
+  save_data,
+  log_exp_cont_feat,
+  which_vaeac_model,
+  verbose,
+  seed,
+  ...
+)
+}
+\arguments{
+\item{x_train}{A data.table containing the training data. Categorical data must have class names \eqn{1,2,\dots,K}.}
+
+\item{model_description}{String (default is \code{make.names(Sys.time())}). String containing, e.g., the name of the
+data distribution or additional parameter information. Used in the save name of the fitted model. If not provided,
+then a name will be generated based on \code{\link[base:Sys.time]{base::Sys.time()}} to ensure a unique name. We use \code{\link[base:make.names]{base::make.names()}} to
+ensure a valid file name for all operating systems.}
+
+\item{folder_to_save_model}{String (default is \code{\link[base:tempfile]{base::tempdir()}}). String specifying a path to a folder where
+the function is to save the fitted vaeac model. Note that  the path will be removed from the returned
+\code{\link[=explain]{explain()}} object if \code{vaeac.save_model = FALSE}.}
+
+\item{cuda}{cuda Logical (default is \code{FALSE}). If \code{TRUE}, then the \code{vaeac} model will be trained using cuda/GPU.
+If \code{\link[torch:cuda_is_available]{torch::cuda_is_available()}} is \code{FALSE}, the we fall back to use CPU. If \code{FALSE}, we use the CPU. Often this is
+faster for tabular data sets. Note, cuda is not not supported in the current version of the \code{shapr} package.
+TODO: Update this when this is done.}
+
+\item{n_vaeacs_initialize}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
+in the start. Pick the best performing one after \code{epochs_initiation_phase}
+epochs (default is \code{2}) and continue training that one.}
+
+\item{epochs_initiation_phase}{Positive integer (default is \code{2}). The number of epochs to run each of the
+\code{n_vaeacs_initialize} \code{vaeac} models before continuing to train only the best performing model.}
+
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{epochs_early_stopping}{Positive integer (default is \code{NULL}). The training stops if there has been no
+improvement in the validation IWAE for \code{epochs_early_stopping} epochs. If the user wants the training process
+to be solely based on this training criterion, then \code{epochs} in \code{\link[=explain]{explain()}} should be set to a large
+number. If \code{NULL}, then \code{shapr} will internally set \code{epochs_early_stopping = vaeac.epochs} such that early
+stopping does not occur.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+
+\item{val_ratio}{Numeric (default is \code{0.25}). Scalar between \code{0} and \code{1} indicating the ratio of
+instances from the input data which will be used as validation data. That is, \code{val_ratio = 0.25} means
+that \verb{75\%} of the provided data is used as training data, while the remaining \verb{25\%} is used as validation data.}
+
+\item{val_iwae_n_samples}{Positive integer (default is \code{25}). The number of generated samples used
+to compute the IWAE criterion when validating the vaeac model on the validation data.}
+
+\item{depth}{Positive integer (default is \code{3}). The number of hidden layers
+in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{width}{Positive integer (default is \code{32}). The number of neurons in each
+hidden layer in the neural networks of the masked encoder, full encoder, and decoder.}
+
+\item{latent_dim}{Positive integer (default is \code{8}). The number of dimensions in the latent space.}
+
+\item{lr}{Positive numeric (default is \code{0.001}). The learning rate used in the \code{\link[torch:optim_adam]{torch::optim_adam()}} optimizer.}
+
+\item{batch_size}{Positive integer (default is \code{64}). The number of samples to include in each batch
+during the training of the vaeac model. Used in \code{\link[torch:dataloader]{torch::dataloader()}}.}
+
+\item{running_avg_n_values}{running_avg_n_values Positive integer (default is \code{5}).
+The number of previous IWAE values to include
+when we compute the running means of the IWAE criterion.}
+
+\item{activation_function}{An \code{\link[torch:nn_module]{torch::nn_module()}} representing an activation function such as, e.g.,
+\code{\link[torch:nn_relu]{torch::nn_relu()}} (default), \code{\link[torch:nn_leaky_relu]{torch::nn_leaky_relu()}}, \code{\link[torch:nn_selu]{torch::nn_selu()}}, or \code{\link[torch:nn_sigmoid]{torch::nn_sigmoid()}}.}
+
+\item{skip_conn_layer}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply identity skip connections in each
+layer, see \code{\link[=SkipConnection]{SkipConnection()}}. That is, we add the input \eqn{X} to the outcome of each hidden layer,
+so the output becomes \eqn{X + activation(WX + b)}.}
+
+\item{skip_conn_masked_enc_dec}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply concatenate skip
+connections between the layers in the masked encoder and decoder. The first layer of the masked encoder will be
+linked to the last layer of the decoder. The second layer of the masked encoder will be
+linked to the second to last layer of the decoder, and so on.}
+
+\item{batch_normalization}{Logical (default is \code{FALSE}). If \code{TRUE}, we apply batch normalization after the
+activation function. Note that if \code{skip_conn_layer = TRUE}, then the normalization is applied after the
+inclusion of the skip connection. That is, we batch normalize the whole quantity \eqn{X + activation(WX + b)}.}
+
+\item{paired_sampling}{Logical (default is \code{TRUE}). If \code{TRUE}, we apply paired sampling to the training
+batches. That is, the training observations in each batch will be duplicated, where the first instance will be masked
+by \eqn{S} while the second instance will be masked by \eqn{\bar{S}}. This ensures that the training of the
+\code{vaeac} model becomes more stable as the model has access to the full version of each training observation. However,
+this will increase the training time due to more complex implementation and doubling the size of each batch. See
+\code{\link[=paired_sampler]{paired_sampler()}} for more information.}
+
+\item{masking_ratio}{Numeric (default is \code{0.5}). Probability of masking a feature in the
+\code{\link[=MCAR_mask_generator]{MCAR_mask_generator()}} (MCAR = Missing Completely At Random). The MCAR masking scheme ensures that \code{vaeac}
+model can do arbitrary conditioning as all coalitions will be trained. \code{masking_ratio} will be overruled if
+\code{mask_gen_coalitions} is specified.}
+
+\item{mask_gen_coalitions}{Matrix (default is \code{NULL}). Matrix containing the coalitions that the
+\code{vaeac} model will be trained on, see \code{\link[=Specified_masks_mask_generator]{Specified_masks_mask_generator()}}. This parameter is used internally
+in \code{shapr} when we only consider a subset of coalitions/combinations, i.e., when
+\code{n_combinations} \eqn{< 2^{n_{\text{features}}}}, and for group Shapley, i.e.,
+when \code{group} is specified in \code{\link[=explain]{explain()}}.}
+
+\item{mask_gen_coalitions_prob}{Numeric array (default is \code{NULL}). Array of length equal to the height
+of \code{mask_gen_coalitions} containing the probabilities of sampling the corresponding coalitions in
+\code{mask_gen_coalitions}.}
+
+\item{sigma_mu}{Numeric (default is \code{1e4}). One of two hyperparameter values in the normal-gamma prior
+used in the masked encoder, see Section 3.3.1 in
+\href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+
+\item{sigma_sigma}{Numeric (default is \code{1e-4}). One of two hyperparameter values in the normal-gamma prior
+used in the masked encoder, see Section 3.3.1 in
+\href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}.}
+
+\item{save_data}{Logical (default is \code{FALSE}). If \code{TRUE}, then the data is stored together with
+the model. Useful if one are to continue to train the model later using \code{\link[=vaeac_continue_train_model]{vaeac_continue_train_model()}}.}
+
+\item{log_exp_cont_feat}{Logical (default is \code{FALSE}). If we are to \eqn{\log} transform all
+continuous features before sending the data to \code{\link[=vaeac]{vaeac()}}. The \code{vaeac} model creates unbounded Monte Carlo
+sample values. Thus, if the continuous features are strictly positive (as for, e.g., the Burr distribution and
+Abalone data set), it can be advantageous to \eqn{\log} transform the data to unbounded form before using \code{vaeac}.
+If \code{TRUE}, then \code{\link[=vaeac_postprocess_data]{vaeac_postprocess_data()}} will take the \eqn{\exp} of the results to get back to strictly
+positive values when using the \code{vaeac} model to impute missing values/generate the Monte Carlo samples.}
+
+\item{which_vaeac_model}{String (default is \code{best}). The name of the \code{vaeac} model (snapshots from different
+epochs) to use when generating the Monte Carlo samples. The standard choices are: \code{"best"} (epoch with lowest IWAE),
+\code{"best_running"} (epoch with lowest running IWAE, see \code{vaeac.running_avg_n_values}), and \code{last} (the last epoch).
+Note that additional choices are available if \code{vaeac.save_every_nth_epoch} is provided. For example, if
+\code{vaeac.save_every_nth_epoch = 5}, then \code{vaeac.which_vaeac_model} can also take the values \code{"epoch_5"}, \code{"epoch_10"},
+\code{"epoch_15"}, and so on.}
+
+\item{verbose}{Boolean. An integer specifying the level of verbosity. Use \code{0} (default) for no verbosity,
+\code{1} for low verbose, and \code{2} for high verbose.}
+
+\item{seed}{Positive integer (default is \code{1}). Seed for reproducibility. Specifies the seed before any randomness
+based code is being run.}
+
+\item{...}{List of extra parameters, currently not used.}
+}
+\value{
+A list containing the training/validation errors and paths to where the vaeac models are saved on the disk.
+}
+\description{
+Function that fits a vaeac model to the given dataset based on the provided parameters,
+as described in \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}. Note that
+all default parameters specified below origin from \code{\link[=setup_approach.vaeac]{setup_approach.vaeac()}} and
+\code{\link[=vaeac_get_extra_para_default]{vaeac_get_extra_para_default()}}.
+}
+\details{
+The vaeac model consists of three neural networks, i.e., a masked encoder, a full encoder, and a decoder.
+The networks have shared \code{depth}, \code{width}, and \code{activation_function}. The encoders maps the \code{x_train}
+to a latent representation of dimension \code{latent_dim}, while the decoder maps the latent representations
+back to the feature space. See \href{https://www.jmlr.org/papers/volume23/21-1413/21-1413.pdf}{Olsen et al. (2022)}
+for more details. The function first initiates \code{n_vaeacs_initialize} vaeac models with different randomly
+initiated network parameter values to remedy poorly initiated values. After \code{epochs_initiation_phase} epochs, the
+\code{n_vaeacs_initialize} vaeac models are compared and the function continues to only train the best performing
+one for a total of \code{epochs} epochs. The networks are trained using the ADAM optimizer with the learning rate is \code{lr}.
+}
+\author{
+Lars Henry Berge Olsen
+}
diff --git a/man/vaeac_train_model_auxiliary.Rd b/man/vaeac_train_model_auxiliary.Rd
new file mode 100644
index 000000000..6c8a6cc21
--- /dev/null
+++ b/man/vaeac_train_model_auxiliary.Rd
@@ -0,0 +1,103 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_train_model_auxiliary}
+\alias{vaeac_train_model_auxiliary}
+\title{Function used to train a \code{vaeac} model}
+\usage{
+vaeac_train_model_auxiliary(
+  vaeac_model,
+  optimizer,
+  train_dataloader,
+  val_dataloader,
+  val_iwae_n_samples,
+  running_avg_n_values,
+  verbose,
+  cuda,
+  epochs,
+  save_every_nth_epoch,
+  epochs_early_stopping,
+  epochs_start = 1,
+  progressr_bar = NULL,
+  vaeac_save_file_names = NULL,
+  state_list = NULL,
+  initialization_idx = NULL,
+  n_vaeacs_initialize = NULL,
+  train_vlb = NULL,
+  val_iwae = NULL,
+  val_iwae_running = NULL
+)
+}
+\arguments{
+\item{vaeac_model}{A \code{\link[=vaeac]{vaeac()}} object. The \code{vaeac} model this function is to train.}
+
+\item{optimizer}{A \code{\link[torch:optimizer]{torch::optimizer()}} object. See \code{\link[=vaeac_get_optimizer]{vaeac_get_optimizer()}}.}
+
+\item{train_dataloader}{A \code{\link[torch:dataloader]{torch::dataloader()}} containing the training data for the \code{vaeac} model.}
+
+\item{val_dataloader}{A \code{\link[torch:dataloader]{torch::dataloader()}} containing the validation data for the \code{vaeac} model.}
+
+\item{val_iwae_n_samples}{Positive integer (default is \code{25}). The number of generated samples used
+to compute the IWAE criterion when validating the vaeac model on the validation data.}
+
+\item{running_avg_n_values}{running_avg_n_values Positive integer (default is \code{5}).
+The number of previous IWAE values to include
+when we compute the running means of the IWAE criterion.}
+
+\item{verbose}{Boolean. An integer specifying the level of verbosity. Use \code{0} (default) for no verbosity,
+\code{1} for low verbose, and \code{2} for high verbose.}
+
+\item{cuda}{cuda Logical (default is \code{FALSE}). If \code{TRUE}, then the \code{vaeac} model will be trained using cuda/GPU.
+If \code{\link[torch:cuda_is_available]{torch::cuda_is_available()}} is \code{FALSE}, the we fall back to use CPU. If \code{FALSE}, we use the CPU. Often this is
+faster for tabular data sets. Note, cuda is not not supported in the current version of the \code{shapr} package.
+TODO: Update this when this is done.}
+
+\item{epochs}{Positive integer (default is \code{100}). The number of epochs to train the final vaeac model.
+This includes \code{epochs_initiation_phase}, where the default is \code{2}.}
+
+\item{save_every_nth_epoch}{Positive integer (default is \code{NULL}). If provided, then the vaeac model after
+every \code{save_every_nth_epoch}th epoch will be saved.}
+
+\item{epochs_early_stopping}{Positive integer (default is \code{NULL}). The training stops if there has been no
+improvement in the validation IWAE for \code{epochs_early_stopping} epochs. If the user wants the training process
+to be solely based on this training criterion, then \code{epochs} in \code{\link[=explain]{explain()}} should be set to a large
+number. If \code{NULL}, then \code{shapr} will internally set \code{epochs_early_stopping = vaeac.epochs} such that early
+stopping does not occur.}
+
+\item{epochs_start}{Positive integer (default is \code{1}). At which epoch the training is starting at.}
+
+\item{progressr_bar}{A \code{\link[progressr:progressor]{progressr::progressor()}} object (default is \code{NULL}) to keep track of progress.}
+
+\item{vaeac_save_file_names}{Array of strings containing the save file names for the \code{vaeac} model.}
+
+\item{state_list}{Named list containing the objects returned from \code{\link[=vaeac_get_full_state_list]{vaeac_get_full_state_list()}}.}
+
+\item{initialization_idx}{Positive integer (default is \code{NULL}). The index
+of the current \code{vaeac} model in the initialization phase.}
+
+\item{n_vaeacs_initialize}{Positive integer (default is \code{4}). The number of different vaeac models to initiate
+in the start. Pick the best performing one after \code{epochs_initiation_phase}
+epochs (default is \code{2}) and continue training that one.}
+
+\item{train_vlb}{A \code{\link[torch:torch_tensor]{torch::torch_tensor()}} (default is \code{NULL})
+of one dimension containing previous values for the training VLB.}
+
+\item{val_iwae}{A \code{\link[torch:torch_tensor]{torch::torch_tensor()}} (default is \code{NULL})
+of one dimension containing previous values for the validation IWAE.}
+
+\item{val_iwae_running}{A \code{\link[torch:torch_tensor]{torch::torch_tensor()}} (default is \code{NULL})
+of one dimension containing previous values for the running validation IWAE.}
+}
+\value{
+Depending on if we are in the initialization phase or not. Then either the trained \code{vaeac} model, or
+a list of where the \code{vaeac} models are stored on disk and the parameters of the model.
+}
+\description{
+This function can be applied both in the initialization phase when, we train several initiated \code{vaeac} models, and
+to keep training the best performing \code{vaeac} model for the remaining number of epochs. We are in the former setting
+when \code{initialization_idx} is provided and the latter when it is \code{NULL}. When it is \code{NULL}, we save the \code{vaeac} models
+with lowest VLB, IWAE, running IWAE, and the epochs according to \code{save_every_nth_epoch} to disk.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_update_para_locations.Rd b/man/vaeac_update_para_locations.Rd
new file mode 100644
index 000000000..d8e3e53f1
--- /dev/null
+++ b/man/vaeac_update_para_locations.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_update_para_locations}
+\alias{vaeac_update_para_locations}
+\title{Move \code{vaeac} parameters to correct location}
+\usage{
+vaeac_update_para_locations(parameters)
+}
+\arguments{
+\item{parameters}{List. The \code{internal$parameters} list created inside the \code{\link[=explain]{explain()}} function.}
+}
+\value{
+Updated version of \code{parameters} where all \code{vaeac} parameters are located at the correct location.
+}
+\description{
+This function ensures that the main and extra parameters for the \code{vaeac}
+approach is located at their right locations.
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/man/vaeac_update_pretrained_model.Rd b/man/vaeac_update_pretrained_model.Rd
new file mode 100644
index 000000000..d60785518
--- /dev/null
+++ b/man/vaeac_update_pretrained_model.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/approach_vaeac.R
+\name{vaeac_update_pretrained_model}
+\alias{vaeac_update_pretrained_model}
+\title{Function that checks and adds a pre-trained \code{vaeac} model}
+\usage{
+vaeac_update_pretrained_model(parameters)
+}
+\arguments{
+\item{parameters}{List containing the parameters used within \code{\link[=explain]{explain()}}.}
+}
+\value{
+This function adds a valid pre-trained vaeac model to the \code{parameter}.
+}
+\description{
+Function that checks and adds a pre-trained \code{vaeac} model
+}
+\author{
+Lars Henry Berge Olsen
+}
+\keyword{internal}
diff --git a/rebuild_long_running_vignette.R b/rebuild_long_running_vignette.R
new file mode 100644
index 000000000..43517c56e
--- /dev/null
+++ b/rebuild_long_running_vignette.R
@@ -0,0 +1,14 @@
+# This R file is based on https://www.kloppenborg.ca/2021/06/long-running-vignettes/ and
+# https://ropensci.org/blog/2019/12/08/precompute-vignettes/ to allow for the `vaeac` vignette
+# to be pre-built such that is not run every time we check and/or build the package.
+
+old_wd <- getwd()
+
+setwd("vignettes/")
+knitr::knit("understanding_shapr_vaeac.Rmd.orig", output = "understanding_shapr_vaeac.Rmd")
+knitr::purl("understanding_shapr_vaeac.Rmd.orig", output = "understanding_shapr_vaeac.R")
+
+knitr::knit("understanding_shapr.Rmd.orig", output = "understanding_shapr.Rmd")
+knitr::purl("understanding_shapr.Rmd.orig", output = "understanding_shapr.R")
+
+setwd(old_wd)
diff --git a/tests/testthat/_snaps/.gitignore b/tests/testthat/_snaps/.gitignore
new file mode 100644
index 000000000..e43b0f988
--- /dev/null
+++ b/tests/testthat/_snaps/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/tests/testthat/_snaps/forecast-output.md b/tests/testthat/_snaps/forecast-output.md
index 9d347b16a..dbc55f06f 100644
--- a/tests/testthat/_snaps/forecast-output.md
+++ b/tests/testthat/_snaps/forecast-output.md
@@ -8,6 +8,7 @@
       
     Output
          explain_idx horizon  none  Temp.1  Temp.2
+               <int>   <int> <num>   <num>   <num>
       1:         152       1 77.88 -0.3972 -1.3912
       2:         153       1 77.88 -6.6177 -0.1835
       3:         152       2 77.88 -0.3285 -1.2034
@@ -25,6 +26,7 @@
       
     Output
          explain_idx horizon  none  Temp.1 Temp.2  Wind.1  Wind.2 Wind.F1 Wind.F2
+               <int>   <int> <num>   <num>  <num>   <num>   <num>   <num>   <num>
       1:         149       1 77.88 -0.9588 -5.044  1.0543 -2.8958 -2.6627      NA
       2:         150       1 77.88  1.1553 -3.137 -2.8802  0.7196 -1.4930      NA
       3:         149       2 77.88  0.1327 -5.048  0.3337 -2.8249 -2.3014 -1.1764
@@ -32,6 +34,7 @@
       5:         149       3 77.88 -1.3878 -5.014  0.7964 -1.3881 -1.9652 -0.3295
       6:         150       3 77.88  1.6690 -2.556 -2.3821  0.3835 -0.8644 -0.1648
          Wind.F3
+           <num>
       1:      NA
       2:      NA
       3:      NA
@@ -49,6 +52,7 @@
       
     Output
          explain_idx horizon  none  Temp.1 Temp.2
+               <int>   <int> <num>   <num>  <num>
       1:         149       1 77.88 -1.7273 -7.033
       2:         150       1 77.88 -0.2229 -4.492
       3:         149       2 77.88 -1.7273 -7.033
@@ -66,6 +70,7 @@
       
     Output
          explain_idx horizon  none    Temp   Wind
+               <int>   <int> <num>   <num>  <num>
       1:         149       1 77.88 -5.3063 -5.201
       2:         150       1 77.88 -1.4435 -4.192
       3:         149       2 77.88 -3.6824 -7.202
@@ -3620,6 +3625,7 @@
       data length [2] is not a sub-multiple or multiple of the number of rows [3]
     Output
          explain_idx horizon  none Wind.F1 Wind.F2 Wind.F3
+               <int>   <int> <num>   <num>   <num>   <num>
       1:         149       1 77.88  -9.391      NA      NA
       2:         150       1 77.88  -4.142      NA      NA
       3:         149       2 77.88  -4.699 -4.6989      NA
diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds
index bdb1e287f..ca1606114 100644
Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds differ
diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds
index 7d65b7ac6..bc7ca40af 100644
Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds differ
diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds
index 696f23e64..f0974f449 100644
Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds differ
diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds
index 60331f9a6..8fecb3578 100644
Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds differ
diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds
index 498bff71e..5dfbb93b2 100644
Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds differ
diff --git a/tests/testthat/_snaps/output.md b/tests/testthat/_snaps/output.md
index bd2dd91fe..d241853e1 100644
--- a/tests/testthat/_snaps/output.md
+++ b/tests/testthat/_snaps/output.md
@@ -4,6 +4,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -4.537   8.269 17.517 -5.581 -3.066
       2: 42.44   2.250  -3.345 -5.232 -5.581 -1.971
       3: 42.44   3.708 -18.610 -1.440 -2.541  1.316
@@ -14,6 +15,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -4.537   8.269 17.517 -5.581 -3.066
       2: 42.44   2.250  -3.345 -5.232 -5.581 -1.971
       3: 42.44   3.708 -18.610 -1.440 -2.541  1.316
@@ -24,6 +26,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp    Month    Day
+         <num>   <num>   <num>  <num>    <num>  <num>
       1: 42.44 -13.252  15.541 12.826 -5.77179  3.259
       2: 42.44   2.758  -3.325 -7.992 -7.12800  1.808
       3: 42.44   6.805 -22.126  3.730 -0.09235 -5.885
@@ -34,6 +37,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -5.795  15.320  8.557 -7.547  2.066
       2: 42.44   3.266  -3.252 -7.693 -7.663  1.462
       3: 42.44   4.290 -24.395  6.739 -1.006 -3.197
@@ -52,6 +56,7 @@
       empirical.eta force set to 1 for empirical.type = 'independence'
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -4.537   8.269 17.517 -5.581 -3.066
       2: 42.44   2.250  -3.345 -5.232 -5.581 -1.971
       3: 42.44   3.708 -18.610 -1.440 -2.541  1.316
@@ -62,6 +67,7 @@
       (out <- code)
     Output
           none Solar.R    Wind     Temp    Month     Day
+         <num>   <num>   <num>    <num>    <num>   <num>
       1: 42.44  -15.66   6.823  17.5092   0.2463  3.6847
       2: 42.44   10.70  -1.063 -10.6804 -13.0305  0.1983
       3: 42.44   14.65 -19.946   0.9675  -7.3433 -5.8946
@@ -72,6 +78,7 @@
       (out <- code)
     Output
           none Solar.R     Wind     Temp    Month     Day
+         <num>   <num>    <num>    <num>    <num>   <num>
       1: 42.44  -14.98   6.3170  17.4103   0.2876  3.5623
       2: 42.44   12.42   0.1482 -10.2338 -16.4096  0.1967
       3: 42.44   15.74 -19.7250   0.9992  -8.6950 -5.8886
@@ -82,6 +89,7 @@
       (out <- code)
     Output
           none Solar.R    Wind     Temp   Month     Day
+         <num>   <num>   <num>    <num>   <num>   <num>
       1: 42.44  -8.117   7.438  14.0026  0.8602 -1.5813
       2: 42.44   5.278  -5.219 -12.1079 -0.8073 -1.0235
       3: 42.44   7.867 -25.995  -0.1377 -0.2368  0.9342
@@ -92,6 +100,7 @@
       (out <- code)
     Output
           none Solar.R    Wind    Temp  Month    Day
+         <num>   <num>   <num>   <num>  <num>  <num>
       1: 42.44  -5.960   7.046  13.863 -0.274 -2.074
       2: 42.44   4.482  -4.892 -10.491 -1.659 -1.319
       3: 42.44   6.587 -25.533   1.279 -1.043  1.142
@@ -102,26 +111,51 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp   Month    Day
+         <num>   <num>   <num>  <num>   <num>  <num>
       1: 42.44  -9.124   9.509 17.139 -1.4711 -3.451
       2: 42.44   5.342  -6.097 -8.232 -2.8129 -2.079
       3: 42.44   6.901 -21.079 -4.687  0.1494  1.146
 
+# output_lm_numeric_vaeac
+
+    Code
+      (out <- code)
+    Output
+          none Solar.R    Wind    Temp  Month     Day
+         <num>   <num>   <num>   <num>  <num>   <num>
+      1: 42.44  -6.534   9.146 18.8166 -5.238 -3.5884
+      2: 42.44   1.421  -5.329 -6.8472 -3.668  0.5436
+      3: 42.44   7.073 -18.914 -0.6391 -6.038  0.9493
+
 # output_lm_categorical_ctree
 
     Code
       (out <- code)
     Output
           none Month_factor Ozone_sub30_factor Solar.R_factor Wind_factor
+         <num>        <num>              <num>          <num>       <num>
       1: 42.44       -6.206              15.38         -6.705      -2.973
       2: 42.44       -5.764             -17.71         21.866     -13.219
       3: 42.44        7.101             -21.78          1.730      -5.413
 
+# output_lm_categorical_vaeac
+
+    Code
+      (out <- code)
+    Output
+          none Month_factor Ozone_sub30_factor Solar.R_factor Wind_factor
+         <num>        <num>              <num>          <num>       <num>
+      1: 42.44        1.795              10.32         -6.919      -5.704
+      2: 42.44       -2.438             -18.15         20.755     -14.999
+      3: 42.44        8.299             -23.71          8.751     -11.708
+
 # output_lm_categorical_categorical
 
     Code
       (out <- code)
     Output
           none Month_factor Ozone_sub30_factor Solar.R_factor Wind_factor
+         <num>        <num>              <num>          <num>       <num>
       1: 42.44       13.656             -19.73          4.369     -16.659
       2: 42.44       -5.448              11.31        -11.445       5.078
       3: 42.44       -7.493             -12.27         19.672     -14.744
@@ -132,6 +166,7 @@
       (out <- code)
     Output
           none Month_factor Ozone_sub30_factor Solar.R_factor Wind_factor
+         <num>        <num>              <num>          <num>       <num>
       1: 42.44       -5.252              13.95         -7.041      -2.167
       2: 42.44       -5.252             -15.61         20.086     -14.050
       3: 42.44        4.833             -15.61          0.596      -8.178
@@ -142,6 +177,7 @@
       (out <- code)
     Output
           none      S1     S2       S3      S4
+         <num>   <num>  <num>    <num>   <num>
       1: 4.895 -0.5261 0.7831 -0.21023 -0.3885
       2: 4.895 -0.6310 1.6288 -0.04498 -2.9298
 
@@ -151,6 +187,7 @@
       (out <- code)
     Output
           none Solar.R   Wind   Temp  Month     Day
+         <num>   <num>  <num>  <num>  <num>   <num>
       1: 42.44  -8.746   9.03 15.366 -2.619 -0.4293
       2: 42.44   3.126  -4.50 -7.789 -4.401 -0.3161
       3: 42.44   7.037 -22.86 -1.837  0.607 -0.5181
@@ -161,6 +198,7 @@
       (out <- code)
     Output
           none Solar.R    Wind     Temp  Month     Day
+         <num>   <num>   <num>    <num>  <num>   <num>
       1: 42.44  -9.294   9.327 17.31641 -1.754 -2.9935
       2: 42.44   5.194  -5.506 -8.45049 -2.935 -2.1810
       3: 42.44   6.452 -22.967 -0.09553 -1.310  0.3519
@@ -171,6 +209,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp  Month      Day
+         <num>   <num>   <num>  <num>  <num>    <num>
       1: 42.44  -6.952  10.777 12.160 -3.641  0.25767
       2: 42.44   2.538  -2.586 -8.503 -5.376  0.04789
       3: 42.44   5.803 -22.122  3.362 -2.926 -1.68514
@@ -181,6 +220,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp    Day Month_factor
+         <num>   <num>   <num>  <num>  <num>        <num>
       1: 42.44  -4.730   7.750 17.753 -2.601       -7.588
       2: 42.44   2.338  -3.147 -5.310 -1.676       -7.588
       3: 42.44   3.857 -17.469 -1.466  1.099        3.379
@@ -191,16 +231,29 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp     Day Month_factor
+         <num>   <num>   <num>  <num>   <num>        <num>
       1: 42.44  -9.165  11.815 13.184 -0.4473       -4.802
       2: 42.44   3.652  -5.782 -6.524 -0.4349       -6.295
       3: 42.44   6.268 -21.441 -7.323  1.6330       10.262
 
+# output_lm_mixed_vaeac
+
+    Code
+      (out <- code)
+    Output
+          none Solar.R    Wind   Temp     Day Month_factor
+         <num>   <num>   <num>  <num>   <num>        <num>
+      1: 42.44  -3.629   8.898 17.330 -2.5409      -9.4742
+      2: 42.44   3.938  -3.933 -8.190  0.6284      -7.8259
+      3: 42.44   5.711 -15.928 -3.216  2.2431       0.5899
+
 # output_lm_mixed_comb
 
     Code
       (out <- code)
     Output
           none Solar.R    Wind   Temp     Day Month_factor
+         <num>   <num>   <num>  <num>   <num>        <num>
       1: 42.44  -7.886  10.511 16.292 -0.9519       -7.382
       2: 42.44   5.001  -4.925 -7.015 -1.0954       -7.349
       3: 42.44   5.505 -20.583 -4.328  0.7825        8.023
@@ -211,6 +264,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -4.537   8.269 17.517 -5.581 -3.066
       2: 42.44   2.250  -3.345 -5.232 -5.581 -1.971
       3: 42.44   3.708 -18.610 -1.440 -2.541  1.316
@@ -225,6 +279,7 @@
       
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -4.537   8.269 17.517 -5.581 -3.066
       2: 42.44   2.250  -3.345 -5.232 -5.581 -1.971
       3: 42.44   3.708 -18.610 -1.440 -2.541  1.316
@@ -239,6 +294,7 @@
       
     Output
           none Solar.R   Wind   Temp     Day Month_factor
+         <num>   <num>  <num>  <num>   <num>        <num>
       1: 42.44  -5.603  13.05  20.43 0.08508      -0.2664
       2: 42.44   4.645 -12.57 -16.65 1.29133      -2.1574
       3: 42.44   5.451 -14.01 -19.72 1.32503       6.3851
@@ -249,6 +305,7 @@
       (out <- code)
     Output
           none Solar.R    Wind
+         <num>   <num>   <num>
       1: 42.44 -13.818  10.579
       2: 42.44   4.642  -6.287
       3: 42.44   4.452 -34.602
@@ -259,6 +316,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp   Month    Day
+         <num>   <num>   <num>  <num>   <num>  <num>
       1: 42.44  -9.124   9.509 17.139 -1.4711 -3.451
       2: 42.44   5.342  -6.097 -8.232 -2.8129 -2.079
       3: 42.44   6.901 -21.079 -4.687  0.1494  1.146
@@ -269,6 +327,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -4.537   8.269 17.517 -5.581 -3.066
       2: 42.44   2.250  -3.345 -5.232 -5.581 -1.971
       3: 42.44   3.708 -18.610 -1.440 -2.541  1.316
@@ -279,6 +338,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp    Month    Day
+         <num>   <num>   <num>  <num>    <num>  <num>
       1: 42.44 -13.252  15.541 12.826 -5.77179  3.259
       2: 42.44   2.758  -3.325 -7.992 -7.12800  1.808
       3: 42.44   6.805 -22.126  3.730 -0.09235 -5.885
@@ -289,6 +349,7 @@
       (out <- code)
     Output
           none Solar.R    Wind   Temp  Month    Day
+         <num>   <num>   <num>  <num>  <num>  <num>
       1: 42.44  -4.537   8.269 17.517 -5.581 -3.066
       2: 42.44   2.250  -3.345 -5.232 -5.581 -1.971
       3: 42.44   3.708 -18.610 -1.440 -2.541  1.316
diff --git a/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_1.rds b/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_1.rds
index 977b26135..faa720cd3 100644
Binary files a/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_1.rds and b/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_1.rds differ
diff --git a/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_2.rds b/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_2.rds
index 977b26135..faa720cd3 100644
Binary files a/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_2.rds and b/tests/testthat/_snaps/output/output_custom_lm_numeric_independence_2.rds differ
diff --git a/tests/testthat/_snaps/output/output_custom_xgboost_mixed_dummy_ctree.rds b/tests/testthat/_snaps/output/output_custom_xgboost_mixed_dummy_ctree.rds
index 112c76a59..f6b3d80ca 100644
Binary files a/tests/testthat/_snaps/output/output_custom_xgboost_mixed_dummy_ctree.rds and b/tests/testthat/_snaps/output/output_custom_xgboost_mixed_dummy_ctree.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_categorical_ctree.rds b/tests/testthat/_snaps/output/output_lm_categorical_ctree.rds
index 1e7994de4..eddfb6733 100644
Binary files a/tests/testthat/_snaps/output/output_lm_categorical_ctree.rds and b/tests/testthat/_snaps/output/output_lm_categorical_ctree.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_categorical_independence.rds b/tests/testthat/_snaps/output/output_lm_categorical_independence.rds
index 22749a3a4..140ceb5d0 100644
Binary files a/tests/testthat/_snaps/output/output_lm_categorical_independence.rds and b/tests/testthat/_snaps/output/output_lm_categorical_independence.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_categorical_method.rds b/tests/testthat/_snaps/output/output_lm_categorical_method.rds
index 4fa4304f8..e5c62746f 100644
Binary files a/tests/testthat/_snaps/output/output_lm_categorical_method.rds and b/tests/testthat/_snaps/output/output_lm_categorical_method.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_categorical_vaeac.rds b/tests/testthat/_snaps/output/output_lm_categorical_vaeac.rds
new file mode 100644
index 000000000..94b04392c
Binary files /dev/null and b/tests/testthat/_snaps/output/output_lm_categorical_vaeac.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_mixed_comb.rds b/tests/testthat/_snaps/output/output_lm_mixed_comb.rds
index ff09bdb93..8300a78bc 100644
Binary files a/tests/testthat/_snaps/output/output_lm_mixed_comb.rds and b/tests/testthat/_snaps/output/output_lm_mixed_comb.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_mixed_ctree.rds b/tests/testthat/_snaps/output/output_lm_mixed_ctree.rds
index 60ba014c8..429c7837a 100644
Binary files a/tests/testthat/_snaps/output/output_lm_mixed_ctree.rds and b/tests/testthat/_snaps/output/output_lm_mixed_ctree.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_mixed_independence.rds b/tests/testthat/_snaps/output/output_lm_mixed_independence.rds
index 11bf76964..14024d680 100644
Binary files a/tests/testthat/_snaps/output/output_lm_mixed_independence.rds and b/tests/testthat/_snaps/output/output_lm_mixed_independence.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_mixed_vaeac.rds b/tests/testthat/_snaps/output/output_lm_mixed_vaeac.rds
new file mode 100644
index 000000000..ab0abc134
Binary files /dev/null and b/tests/testthat/_snaps/output/output_lm_mixed_vaeac.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds
index 87eb74630..67e8ca982 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds and b/tests/testthat/_snaps/output/output_lm_numeric_comb1.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds
index fc125c859..aebe607e8 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds and b/tests/testthat/_snaps/output/output_lm_numeric_comb2.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds b/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds
index 7ab8d808d..8dfecc3eb 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds and b/tests/testthat/_snaps/output/output_lm_numeric_comb3.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_copula.rds b/tests/testthat/_snaps/output/output_lm_numeric_copula.rds
index b74a9cf5e..f0ce11bc2 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_copula.rds and b/tests/testthat/_snaps/output/output_lm_numeric_copula.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_ctree.rds b/tests/testthat/_snaps/output/output_lm_numeric_ctree.rds
index 30a4aa879..cd92b5926 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_ctree.rds and b/tests/testthat/_snaps/output/output_lm_numeric_ctree.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_ctree_parallelized.rds b/tests/testthat/_snaps/output/output_lm_numeric_ctree_parallelized.rds
index 30a4aa879..cd92b5926 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_ctree_parallelized.rds and b/tests/testthat/_snaps/output/output_lm_numeric_ctree_parallelized.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_empirical.rds b/tests/testthat/_snaps/output/output_lm_numeric_empirical.rds
index 8a7c73d52..cc396937f 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_empirical.rds and b/tests/testthat/_snaps/output/output_lm_numeric_empirical.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_each.rds b/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_each.rds
index 641ff5c7d..f3dd31c55 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_each.rds and b/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_each.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_full.rds b/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_full.rds
index 3352c5c0f..45c1baa52 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_full.rds and b/tests/testthat/_snaps/output/output_lm_numeric_empirical_AICc_full.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_empirical_independence.rds b/tests/testthat/_snaps/output/output_lm_numeric_empirical_independence.rds
index c1c8a06d6..873268bc8 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_empirical_independence.rds and b/tests/testthat/_snaps/output/output_lm_numeric_empirical_independence.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_empirical_n_combinations.rds b/tests/testthat/_snaps/output/output_lm_numeric_empirical_n_combinations.rds
index b1a489f6e..acf7f5e78 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_empirical_n_combinations.rds and b/tests/testthat/_snaps/output/output_lm_numeric_empirical_n_combinations.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_empirical_progress.rds b/tests/testthat/_snaps/output/output_lm_numeric_empirical_progress.rds
index 9f6ee8493..b7311ca0a 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_empirical_progress.rds and b/tests/testthat/_snaps/output/output_lm_numeric_empirical_progress.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds b/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds
index d941cac15..628f63a1c 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds and b/tests/testthat/_snaps/output/output_lm_numeric_gaussian.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_independence.rds b/tests/testthat/_snaps/output/output_lm_numeric_independence.rds
index 0cec9d0fe..46cdda26b 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_independence.rds and b/tests/testthat/_snaps/output/output_lm_numeric_independence.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_independence_MSEv_Shapley_weights.rds b/tests/testthat/_snaps/output/output_lm_numeric_independence_MSEv_Shapley_weights.rds
index e76474f7a..5273db365 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_independence_MSEv_Shapley_weights.rds and b/tests/testthat/_snaps/output/output_lm_numeric_independence_MSEv_Shapley_weights.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_independence_keep_samp_for_vS.rds b/tests/testthat/_snaps/output/output_lm_numeric_independence_keep_samp_for_vS.rds
index dcc65e157..b9142b857 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_independence_keep_samp_for_vS.rds and b/tests/testthat/_snaps/output/output_lm_numeric_independence_keep_samp_for_vS.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_independence_n_batches_10.rds b/tests/testthat/_snaps/output/output_lm_numeric_independence_n_batches_10.rds
index a27de9805..e05527ffc 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_independence_n_batches_10.rds and b/tests/testthat/_snaps/output/output_lm_numeric_independence_n_batches_10.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_interaction.rds b/tests/testthat/_snaps/output/output_lm_numeric_interaction.rds
index b9d755ebd..4696060f7 100644
Binary files a/tests/testthat/_snaps/output/output_lm_numeric_interaction.rds and b/tests/testthat/_snaps/output/output_lm_numeric_interaction.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_numeric_vaeac.rds b/tests/testthat/_snaps/output/output_lm_numeric_vaeac.rds
new file mode 100644
index 000000000..e68838c69
Binary files /dev/null and b/tests/testthat/_snaps/output/output_lm_numeric_vaeac.rds differ
diff --git a/tests/testthat/_snaps/output/output_lm_timeseries_method.rds b/tests/testthat/_snaps/output/output_lm_timeseries_method.rds
index 88a29f9b0..cf15a0fb0 100644
Binary files a/tests/testthat/_snaps/output/output_lm_timeseries_method.rds and b/tests/testthat/_snaps/output/output_lm_timeseries_method.rds differ
diff --git a/tests/testthat/_snaps/plot/msev-bar-50-ci.svg b/tests/testthat/_snaps/plot/msev-bar-50-ci.svg
index 7d5630803..20d6fe5e4 100644
--- a/tests/testthat/_snaps/plot/msev-bar-50-ci.svg
+++ b/tests/testthat/_snaps/plot/msev-bar-50-ci.svg
@@ -18,46 +18,32 @@
   </clipPath>
 </defs>
 <g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
-<rect x='-0.000000000000064' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
+<rect x='0.00' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
 </g>
 <defs>
-  <clipPath id='cpMzkuODR8NjQzLjkyfDI2LjAxfDU0NS4xMQ=='>
-    <rect x='39.84' y='26.01' width='604.08' height='519.11' />
+  <clipPath id='cpMzkuODR8NjU0Ljg4fDI2LjAxfDU0NS4xMQ=='>
+    <rect x='39.84' y='26.01' width='615.04' height='519.11' />
   </clipPath>
 </defs>
-<g clip-path='url(#cpMzkuODR8NjQzLjkyfDI2LjAxfDU0NS4xMQ==)'>
-<rect x='39.84' y='26.01' width='604.08' height='519.11' style='stroke-width: 1.07; stroke: none; fill: #EBEBEB;' />
-<polyline points='39.84,473.10 643.92,473.10 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,376.27 643.92,376.27 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,279.44 643.92,279.44 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,182.61 643.92,182.61 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,85.78 643.92,85.78 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,521.52 643.92,521.52 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,424.69 643.92,424.69 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,327.85 643.92,327.85 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,231.02 643.92,231.02 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,134.19 643.92,134.19 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,37.36 643.92,37.36 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='126.13,545.11 126.13,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='269.96,545.11 269.96,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='413.79,545.11 413.79,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='557.62,545.11 557.62,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<rect x='61.41' y='142.96' width='129.45' height='378.55' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
-<rect x='205.24' y='172.01' width='129.45' height='349.50' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
-<rect x='349.07' y='195.48' width='129.45' height='326.04' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
-<rect x='492.90' y='169.68' width='129.45' height='351.84' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
-<polyline points='108.15,49.60 144.11,49.60 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='126.13,49.60 126.13,236.33 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='108.15,236.33 144.11,236.33 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='251.98,86.79 287.94,86.79 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='269.96,86.79 269.96,257.24 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='251.98,257.24 287.94,257.24 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='395.81,135.70 431.77,135.70 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='413.79,135.70 413.79,255.25 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='395.81,255.25 431.77,255.25 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='539.64,111.79 575.60,111.79 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='557.62,111.79 557.62,227.56 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
-<polyline points='539.64,227.56 575.60,227.56 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<g clip-path='url(#cpMzkuODR8NjU0Ljg4fDI2LjAxfDU0NS4xMQ==)'>
+<rect x='39.84' y='26.01' width='615.04' height='519.11' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='61.80' y='142.96' width='131.79' height='378.55' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='208.24' y='172.01' width='131.79' height='349.50' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='354.68' y='195.48' width='131.79' height='326.04' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='501.12' y='169.68' width='131.79' height='351.84' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<polyline points='109.39,49.60 146.00,49.60 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='127.70,49.60 127.70,236.33 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='109.39,236.33 146.00,236.33 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='255.83,86.79 292.44,86.79 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='274.14,86.79 274.14,257.24 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='255.83,257.24 292.44,257.24 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='402.27,135.70 438.88,135.70 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='420.58,135.70 420.58,255.25 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='402.27,255.25 438.88,255.25 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='548.71,111.79 585.32,111.79 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='567.01,111.79 567.01,227.56 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='548.71,227.56 585.32,227.56 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<rect x='39.84' y='26.01' width='615.04' height='519.11' style='stroke-width: 1.07; stroke: #333333;' />
 </g>
 <g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
 <text x='34.90' y='524.54' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
@@ -72,33 +58,33 @@
 <polyline points='37.10,231.02 39.84,231.02 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
 <polyline points='37.10,134.19 39.84,134.19 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
 <polyline points='37.10,37.36 39.84,37.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='126.13,547.85 126.13,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='269.96,547.85 269.96,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='413.79,547.85 413.79,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='557.62,547.85 557.62,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<text x='126.13' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
-<text x='269.96' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
-<text x='413.79' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
-<text x='557.62' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
-<text x='341.88' y='568.24' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<polyline points='127.70,547.85 127.70,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='274.14,547.85 274.14,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='420.58,547.85 420.58,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='567.01,547.85 567.01,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='127.70' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='274.14' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='420.58' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='567.01' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<text x='347.36' y='568.24' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
 <text transform='translate(13.16,299.41) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='9.17px' lengthAdjust='spacingAndGlyphs'>M</text>
 <text transform='translate(13.16,290.24) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='7.34px' lengthAdjust='spacingAndGlyphs'>S</text>
 <text transform='translate(13.16,282.90) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='7.34px' lengthAdjust='spacingAndGlyphs'>E</text>
 <text transform='translate(15.20,275.56) rotate(-90)' style='font-size: 7.70px; font-family: sans;' textLength='3.85px' lengthAdjust='spacingAndGlyphs'>v</text>
-<rect x='654.88' y='237.85' width='59.64' height='95.41' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
-<text x='660.36' y='252.05' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
-<rect x='660.36' y='258.67' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='259.37' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
-<rect x='660.36' y='275.95' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='276.65' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
-<rect x='660.36' y='293.23' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='293.93' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
-<rect x='660.36' y='310.51' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='311.21' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
-<text x='683.12' y='270.33' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
-<text x='683.12' y='287.61' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
-<text x='683.12' y='304.89' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
-<text x='683.12' y='322.17' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<rect x='665.84' y='243.33' width='48.68' height='84.45' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<text x='665.84' y='252.05' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<rect x='665.84' y='258.67' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='259.37' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='665.84' y='275.95' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='276.65' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='665.84' y='293.23' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='293.93' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='665.84' y='310.51' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='311.21' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<text x='688.60' y='270.33' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='688.60' y='287.61' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='688.60' y='304.89' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='688.60' y='322.17' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
 <text x='39.84' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='11.00px' lengthAdjust='spacingAndGlyphs'>M</text>
 <text x='50.84' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='8.81px' lengthAdjust='spacingAndGlyphs'>S</text>
 <text x='59.64' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='8.81px' lengthAdjust='spacingAndGlyphs'>E</text>
diff --git a/tests/testthat/_snaps/plot/msev-bar-without-ci.svg b/tests/testthat/_snaps/plot/msev-bar-without-ci.svg
index 1be15b58a..053323a67 100644
--- a/tests/testthat/_snaps/plot/msev-bar-without-ci.svg
+++ b/tests/testthat/_snaps/plot/msev-bar-without-ci.svg
@@ -18,32 +18,20 @@
   </clipPath>
 </defs>
 <g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
-<rect x='-0.000000000000064' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
+<rect x='0.00' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
 </g>
 <defs>
-  <clipPath id='cpMzkuODR8NjQzLjkyfDI2LjAxfDU0NS4xMQ=='>
-    <rect x='39.84' y='26.01' width='604.08' height='519.11' />
+  <clipPath id='cpMzkuODR8NjU0Ljg4fDI2LjAxfDU0NS4xMQ=='>
+    <rect x='39.84' y='26.01' width='615.04' height='519.11' />
   </clipPath>
 </defs>
-<g clip-path='url(#cpMzkuODR8NjQzLjkyfDI2LjAxfDU0NS4xMQ==)'>
-<rect x='39.84' y='26.01' width='604.08' height='519.11' style='stroke-width: 1.07; stroke: none; fill: #EBEBEB;' />
-<polyline points='39.84,461.16 643.92,461.16 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,340.45 643.92,340.45 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,219.74 643.92,219.74 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,99.02 643.92,99.02 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,521.52 643.92,521.52 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,400.80 643.92,400.80 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,280.09 643.92,280.09 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,159.38 643.92,159.38 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='39.84,38.67 643.92,38.67 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='126.13,545.11 126.13,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='269.96,545.11 269.96,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='413.79,545.11 413.79,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<polyline points='557.62,545.11 557.62,26.01 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' />
-<rect x='61.41' y='49.60' width='129.45' height='471.91' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
-<rect x='205.24' y='85.82' width='129.45' height='435.70' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
-<rect x='349.07' y='115.07' width='129.45' height='406.45' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
-<rect x='492.90' y='82.90' width='129.45' height='438.61' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<g clip-path='url(#cpMzkuODR8NjU0Ljg4fDI2LjAxfDU0NS4xMQ==)'>
+<rect x='39.84' y='26.01' width='615.04' height='519.11' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='61.80' y='49.60' width='131.79' height='471.91' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='208.24' y='85.82' width='131.79' height='435.70' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='354.68' y='115.07' width='131.79' height='406.45' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='501.12' y='82.90' width='131.79' height='438.61' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='39.84' y='26.01' width='615.04' height='519.11' style='stroke-width: 1.07; stroke: #333333;' />
 </g>
 <g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
 <text x='34.90' y='524.54' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
@@ -56,33 +44,33 @@
 <polyline points='37.10,280.09 39.84,280.09 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
 <polyline points='37.10,159.38 39.84,159.38 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
 <polyline points='37.10,38.67 39.84,38.67 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='126.13,547.85 126.13,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='269.96,547.85 269.96,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='413.79,547.85 413.79,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<polyline points='557.62,547.85 557.62,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
-<text x='126.13' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
-<text x='269.96' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
-<text x='413.79' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
-<text x='557.62' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
-<text x='341.88' y='568.24' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<polyline points='127.70,547.85 127.70,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='274.14,547.85 274.14,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='420.58,547.85 420.58,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='567.01,547.85 567.01,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='127.70' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='274.14' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='420.58' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='567.01' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<text x='347.36' y='568.24' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
 <text transform='translate(13.16,299.41) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='9.17px' lengthAdjust='spacingAndGlyphs'>M</text>
 <text transform='translate(13.16,290.24) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='7.34px' lengthAdjust='spacingAndGlyphs'>S</text>
 <text transform='translate(13.16,282.90) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='7.34px' lengthAdjust='spacingAndGlyphs'>E</text>
 <text transform='translate(15.20,275.56) rotate(-90)' style='font-size: 7.70px; font-family: sans;' textLength='3.85px' lengthAdjust='spacingAndGlyphs'>v</text>
-<rect x='654.88' y='237.85' width='59.64' height='95.41' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
-<text x='660.36' y='252.05' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
-<rect x='660.36' y='258.67' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='259.37' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
-<rect x='660.36' y='275.95' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='276.65' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
-<rect x='660.36' y='293.23' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='293.93' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
-<rect x='660.36' y='310.51' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #F2F2F2;' />
-<rect x='661.07' y='311.21' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
-<text x='683.12' y='270.33' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
-<text x='683.12' y='287.61' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
-<text x='683.12' y='304.89' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
-<text x='683.12' y='322.17' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<rect x='665.84' y='243.33' width='48.68' height='84.45' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<text x='665.84' y='252.05' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<rect x='665.84' y='258.67' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='259.37' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='665.84' y='275.95' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='276.65' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='665.84' y='293.23' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='293.93' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='665.84' y='310.51' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='311.21' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<text x='688.60' y='270.33' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='688.60' y='287.61' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='688.60' y='304.89' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='688.60' y='322.17' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
 <text x='39.84' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='11.00px' lengthAdjust='spacingAndGlyphs'>M</text>
 <text x='50.84' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='8.81px' lengthAdjust='spacingAndGlyphs'>S</text>
 <text x='59.64' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='8.81px' lengthAdjust='spacingAndGlyphs'>E</text>
diff --git a/tests/testthat/_snaps/plot/msev-combinations-for-specified-combinations.svg b/tests/testthat/_snaps/plot/msev-combinations-for-specified-combinations.svg
index e69de29bb..a9beb90c2 100644
--- a/tests/testthat/_snaps/plot/msev-combinations-for-specified-combinations.svg
+++ b/tests/testthat/_snaps/plot/msev-combinations-for-specified-combinations.svg
@@ -0,0 +1,186 @@
+<?xml version='1.0' encoding='UTF-8' ?>
+<svg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' class='svglite' data-engine-version='2.0' width='720.00pt' height='576.00pt' viewBox='0 0 720.00 576.00'>
+<defs>
+  <style type='text/css'><![CDATA[
+    .svglite line, .svglite polyline, .svglite polygon, .svglite path, .svglite rect, .svglite circle {
+      fill: none;
+      stroke: #000000;
+      stroke-linecap: round;
+      stroke-linejoin: round;
+      stroke-miterlimit: 10.00;
+    }
+  ]]></style>
+</defs>
+<rect width='100%' height='100%' style='stroke: none; fill: #FFFFFF;'/>
+<defs>
+  <clipPath id='cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA='>
+    <rect x='0.00' y='0.00' width='720.00' height='576.00' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<rect x='0.00' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
+</g>
+<defs>
+  <clipPath id='cpNDguMTl8NjU0Ljg4fDI2LjAxfDU0NS4xMQ=='>
+    <rect x='48.19' y='26.01' width='606.68' height='519.11' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNDguMTl8NjU0Ljg4fDI2LjAxfDU0NS4xMQ==)'>
+<rect x='48.19' y='26.01' width='606.68' height='519.11' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='62.87' y='385.41' width='22.02' height='1.91' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='160.72' y='374.42' width='22.02' height='12.90' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='258.58' y='285.56' width='22.02' height='101.76' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='356.43' y='369.74' width='22.02' height='17.58' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='454.28' y='371.39' width='22.02' height='15.93' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='552.13' y='377.01' width='22.02' height='10.31' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='84.89' y='377.36' width='22.02' height='9.96' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='182.74' y='373.67' width='22.02' height='13.65' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='280.59' y='314.04' width='22.02' height='73.27' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='378.44' y='378.14' width='22.02' height='9.18' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='476.30' y='380.80' width='22.02' height='6.52' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='574.15' y='374.55' width='22.02' height='12.77' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='106.90' y='384.10' width='22.02' height='3.22' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='204.76' y='370.81' width='22.02' height='16.51' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='302.61' y='339.23' width='22.02' height='48.09' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='400.46' y='384.48' width='22.02' height='2.84' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='498.31' y='383.05' width='22.02' height='4.27' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='596.17' y='372.78' width='22.02' height='14.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='128.92' y='385.41' width='22.02' height='1.91' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='226.77' y='374.42' width='22.02' height='12.90' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='324.63' y='333.66' width='22.02' height='53.66' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='422.48' y='385.82' width='22.02' height='1.50' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='520.33' y='382.68' width='22.02' height='4.64' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='618.18' y='372.25' width='22.02' height='15.07' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<polyline points='70.82,378.97 76.94,378.97 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='73.88,378.97 73.88,391.86 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='70.82,391.86 76.94,391.86 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='168.67,350.45 174.79,350.45 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='171.73,350.45 171.73,398.39 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='168.67,398.39 174.79,398.39 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='266.53,49.60 272.64,49.60 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='269.58,49.60 269.58,521.52 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='266.53,521.52 272.64,521.52 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='364.38,319.07 370.49,319.07 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='367.44,319.07 367.44,420.41 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='364.38,420.41 370.49,420.41 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='462.23,336.62 468.35,336.62 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='465.29,336.62 465.29,406.16 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='462.23,406.16 468.35,406.16 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='560.08,348.51 566.20,348.51 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='563.14,348.51 563.14,405.52 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='560.08,405.52 566.20,405.52 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='92.84,348.48 98.95,348.48 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='95.90,348.48 95.90,406.24 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='92.84,406.24 98.95,406.24 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='190.69,347.54 196.81,347.54 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='193.75,347.54 193.75,399.79 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='190.69,399.79 196.81,399.79 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='288.54,212.33 294.66,212.33 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='291.60,212.33 291.60,415.76 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='288.54,415.76 294.66,415.76 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='386.39,350.32 392.51,350.32 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='389.45,350.32 389.45,405.95 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='386.39,405.95 392.51,405.95 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='484.25,359.13 490.36,359.13 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='487.31,359.13 487.31,402.48 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='484.25,402.48 490.36,402.48 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='582.10,338.63 588.22,338.63 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='585.16,338.63 585.16,410.47 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='582.10,410.47 588.22,410.47 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='114.85,379.14 120.97,379.14 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='117.91,379.14 117.91,389.05 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='114.85,389.05 120.97,389.05 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='212.71,348.07 218.82,348.07 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='215.76,348.07 215.76,393.55 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='212.71,393.55 218.82,393.55 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='310.56,246.55 316.68,246.55 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='313.62,246.55 313.62,431.90 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='310.56,431.90 316.68,431.90 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='408.41,379.95 414.53,379.95 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='411.47,379.95 411.47,389.01 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='408.41,389.01 414.53,389.01 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='506.26,373.94 512.38,373.94 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='509.32,373.94 509.32,392.17 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='506.26,392.17 512.38,392.17 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='604.12,343.95 610.23,343.95 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='607.17,343.95 607.17,401.61 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='604.12,401.61 610.23,401.61 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='136.87,378.97 142.99,378.97 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='139.93,378.97 139.93,391.86 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='136.87,391.86 142.99,391.86 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='234.72,350.45 240.84,350.45 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='237.78,350.45 237.78,398.39 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='234.72,398.39 240.84,398.39 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='332.58,232.21 338.69,232.21 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='335.63,232.21 335.63,435.11 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='332.58,435.11 338.69,435.11 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='430.43,382.31 436.54,382.31 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='433.49,382.31 433.49,389.32 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='430.43,389.32 436.54,389.32 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='528.28,370.80 534.40,370.80 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='531.34,370.80 531.34,394.56 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='528.28,394.56 534.40,394.56 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='626.13,341.97 632.25,341.97 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='629.19,341.97 629.19,402.53 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<polyline points='626.13,402.53 632.25,402.53 ' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<rect x='48.19' y='26.01' width='606.68' height='519.11' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<text x='43.26' y='493.53' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='22.51px' lengthAdjust='spacingAndGlyphs'>-1000</text>
+<text x='43.26' y='390.35' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='43.26' y='287.17' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='19.58px' lengthAdjust='spacingAndGlyphs'>1000</text>
+<text x='43.26' y='183.98' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='19.58px' lengthAdjust='spacingAndGlyphs'>2000</text>
+<text x='43.26' y='80.80' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='19.58px' lengthAdjust='spacingAndGlyphs'>3000</text>
+<polyline points='45.45,490.50 48.19,490.50 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='45.45,387.32 48.19,387.32 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='45.45,284.14 48.19,284.14 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='45.45,180.96 48.19,180.96 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='45.45,77.77 48.19,77.77 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='106.90,547.85 106.90,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='204.76,547.85 204.76,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='302.61,547.85 302.61,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='400.46,547.85 400.46,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='498.31,547.85 498.31,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='596.17,547.85 596.17,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='106.90' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>3</text>
+<text x='204.76' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>4</text>
+<text x='302.61' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>9</text>
+<text x='400.46' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>13</text>
+<text x='498.31' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>14</text>
+<text x='596.17' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>15</text>
+<text x='351.54' y='568.24' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='74.00px' lengthAdjust='spacingAndGlyphs'>id_combination</text>
+<text transform='translate(13.45,334.11) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='9.17px' lengthAdjust='spacingAndGlyphs'>M</text>
+<text transform='translate(13.45,324.94) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='7.34px' lengthAdjust='spacingAndGlyphs'>S</text>
+<text transform='translate(13.45,317.60) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='7.34px' lengthAdjust='spacingAndGlyphs'>E</text>
+<text transform='translate(15.49,310.26) rotate(-90)' style='font-size: 7.70px; font-family: sans;' textLength='3.85px' lengthAdjust='spacingAndGlyphs'>v</text>
+<text transform='translate(13.45,306.41) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='2.75px' lengthAdjust='spacingAndGlyphs'> </text>
+<text transform='translate(13.45,303.66) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='66.65px' lengthAdjust='spacingAndGlyphs'>(combination)</text>
+<rect x='665.84' y='243.33' width='48.68' height='84.45' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<text x='665.84' y='252.05' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<rect x='665.84' y='258.67' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='259.37' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='665.84' y='275.95' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='276.65' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='665.84' y='293.23' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='293.93' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='665.84' y='310.51' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='311.21' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<text x='688.60' y='270.33' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='688.60' y='287.61' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='688.60' y='304.89' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='688.60' y='322.17' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<text x='48.19' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='11.00px' lengthAdjust='spacingAndGlyphs'>M</text>
+<text x='59.19' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='8.81px' lengthAdjust='spacingAndGlyphs'>S</text>
+<text x='68.00' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='8.81px' lengthAdjust='spacingAndGlyphs'>E</text>
+<text x='76.81' y='17.49' style='font-size: 9.24px; font-family: sans;' textLength='4.62px' lengthAdjust='spacingAndGlyphs'>v</text>
+<text x='81.43' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='3.30px' lengthAdjust='spacingAndGlyphs'> </text>
+<text x='84.73' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='157.04px' lengthAdjust='spacingAndGlyphs'>criterion averaged over the</text>
+<text x='241.77' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='3.30px' lengthAdjust='spacingAndGlyphs'> </text>
+<text x='245.07' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='7.34px' lengthAdjust='spacingAndGlyphs'>3</text>
+<text x='252.42' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='3.30px' lengthAdjust='spacingAndGlyphs'> </text>
+<text x='255.72' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='215.75px' lengthAdjust='spacingAndGlyphs'>explicands for each combination with</text>
+<text x='471.47' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='3.30px' lengthAdjust='spacingAndGlyphs'> </text>
+<text x='474.77' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='14.68px' lengthAdjust='spacingAndGlyphs'>95</text>
+<text x='489.45' y='15.05' style='font-size: 13.20px; font-family: sans;' textLength='28.61px' lengthAdjust='spacingAndGlyphs'>% CI</text>
+</g>
+</svg>
diff --git a/tests/testthat/_snaps/plot/plot-sv-several-approaches-default.svg b/tests/testthat/_snaps/plot/plot-sv-several-approaches-default.svg
new file mode 100644
index 000000000..df8f70f0c
--- /dev/null
+++ b/tests/testthat/_snaps/plot/plot-sv-several-approaches-default.svg
@@ -0,0 +1,238 @@
+<?xml version='1.0' encoding='UTF-8' ?>
+<svg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' class='svglite' data-engine-version='2.0' width='720.00pt' height='576.00pt' viewBox='0 0 720.00 576.00'>
+<defs>
+  <style type='text/css'><![CDATA[
+    .svglite line, .svglite polyline, .svglite polygon, .svglite path, .svglite rect, .svglite circle {
+      fill: none;
+      stroke: #000000;
+      stroke-linecap: round;
+      stroke-linejoin: round;
+      stroke-miterlimit: 10.00;
+    }
+  ]]></style>
+</defs>
+<rect width='100%' height='100%' style='stroke: none; fill: #FFFFFF;'/>
+<defs>
+  <clipPath id='cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA='>
+    <rect x='0.00' y='0.00' width='720.00' height='576.00' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<rect x='0.00' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8MzMzLjc0fDM5LjQzfDI3Mi43OA=='>
+    <rect x='77.07' y='39.43' width='256.67' height='233.35' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8MzMzLjc0fDM5LjQzfDI3Mi43OA==)'>
+<rect x='77.07' y='39.43' width='256.67' height='233.35' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='88.73' y='47.29' width='101.22' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='127.95' y='56.82' width='62.00' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='120.27' y='66.36' width='69.69' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='113.15' y='75.89' width='76.81' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='189.96' y='92.16' width='118.71' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='189.96' y='101.70' width='56.81' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='189.96' y='111.23' width='72.63' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='189.96' y='120.77' width='68.82' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='189.96' y='137.04' width='97.97' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='189.96' y='146.57' width='106.96' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='189.96' y='156.11' width='130.91' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='189.96' y='165.64' width='132.11' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='145.87' y='181.91' width='44.09' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='189.96' y='191.45' width='6.57' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='178.72' y='200.98' width='11.24' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='163.01' y='210.52' width='26.95' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='189.96' y='226.79' width='24.90' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='177.88' y='236.32' width='12.08' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='163.60' y='245.86' width='26.36' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='189.04' y='255.39' width='0.91' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='77.07' y='39.43' width='256.67' height='233.35' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8MzMzLjc0fDMwNy43M3w1NDEuMDg='>
+    <rect x='77.07' y='307.73' width='256.67' height='233.35' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8MzMzLjc0fDMwNy43M3w1NDEuMDg=)'>
+<rect x='77.07' y='307.73' width='256.67' height='233.35' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='267.86' y='315.58' width='46.89' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='267.86' y='325.12' width='54.21' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='267.86' y='334.65' width='47.56' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='267.86' y='344.19' width='46.71' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='115.39' y='360.45' width='152.47' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='88.73' y='369.99' width='179.13' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='122.61' y='379.53' width='145.25' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='126.89' y='389.06' width='140.97' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='267.86' y='405.33' width='25.70' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='266.91' y='414.87' width='0.95' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='235.57' y='424.40' width='32.29' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='248.09' y='433.94' width='19.77' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='267.22' y='450.20' width='0.64' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='266.23' y='459.74' width='1.63' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='267.86' y='469.28' width='1.03' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='267.86' y='478.81' width='13.79' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='227.31' y='495.08' width='40.55' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='267.86' y='504.62' width='6.44' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='267.86' y='514.15' width='7.90' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='247.04' y='523.69' width='20.82' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='77.07' y='307.73' width='256.67' height='233.35' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMzk4LjIxfDY1NC44OHwzOS40M3wyNzIuNzg='>
+    <rect x='398.21' y='39.43' width='256.67' height='233.35' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzk4LjIxfDY1NC44OHwzOS40M3wyNzIuNzg=)'>
+<rect x='398.21' y='39.43' width='256.67' height='233.35' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='571.78' y='47.29' width='36.88' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='571.78' y='56.82' width='70.58' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='571.78' y='66.36' width='71.43' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='571.78' y='75.89' width='70.98' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='527.33' y='92.16' width='44.46' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='502.00' y='101.70' width='69.79' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='490.25' y='111.23' width='81.53' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='507.69' y='120.77' width='64.09' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='464.91' y='137.04' width='106.87' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='409.88' y='146.57' width='161.91' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='461.70' y='156.11' width='110.08' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='442.04' y='165.64' width='129.75' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='476.47' y='181.91' width='95.32' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='560.99' y='191.45' width='10.79' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='534.17' y='200.98' width='37.61' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='506.40' y='210.52' width='65.38' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='571.78' y='226.79' width='24.17' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='558.10' y='236.32' width='13.69' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='543.98' y='245.86' width='27.80' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='571.78' y='255.39' width='2.65' height='9.54' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='398.21' y='39.43' width='256.67' height='233.35' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMzk4LjIxfDY1NC44OHwzMDcuNzN8NTQxLjA4'>
+    <rect x='398.21' y='307.73' width='256.67' height='233.35' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzk4LjIxfDY1NC44OHwzMDcuNzN8NTQxLjA4)'>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8MzMzLjc0fDI5MS4wOHwzMDcuNzM='>
+    <rect x='77.07' y='291.08' width='256.67' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8MzMzLjc0fDI5MS4wOHwzMDcuNzM=)'>
+<rect x='77.07' y='291.08' width='256.67' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='205.40' y='302.43' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 3, pred = 24.88</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMzk4LjIxfDY1NC44OHwyOTEuMDh8MzA3Ljcz'>
+    <rect x='398.21' y='291.08' width='256.67' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzk4LjIxfDY1NC44OHwyOTEuMDh8MzA3Ljcz)'>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8MzMzLjc0fDIyLjc4fDM5LjQz'>
+    <rect x='77.07' y='22.78' width='256.67' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8MzMzLjc0fDIyLjc4fDM5LjQz)'>
+<rect x='77.07' y='22.78' width='256.67' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='205.40' y='34.14' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 1, pred = 55.05</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMzk4LjIxfDY1NC44OHwyMi43OHwzOS40Mw=='>
+    <rect x='398.21' y='22.78' width='256.67' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzk4LjIxfDY1NC44OHwyMi43OHwzOS40Mw==)'>
+<rect x='398.21' y='22.78' width='256.67' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='526.54' y='34.14' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 2, pred = 28.57</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<polyline points='130.04,543.82 130.04,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='198.95,543.82 198.95,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='267.86,543.82 267.86,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='130.04' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-20</text>
+<text x='198.95' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-10</text>
+<text x='267.86' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<polyline points='113.57,275.52 113.57,272.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='189.96,275.52 189.96,272.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='266.34,275.52 266.34,272.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='113.57' y='283.77' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-10</text>
+<text x='189.96' y='283.77' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='266.34' y='283.77' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>10</text>
+<polyline points='438.06,275.52 438.06,272.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='504.92,275.52 504.92,272.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='571.78,275.52 571.78,272.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='638.64,275.52 638.64,272.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='438.06' y='283.77' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-10</text>
+<text x='504.92' y='283.77' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='7.82px' lengthAdjust='spacingAndGlyphs'>-5</text>
+<text x='571.78' y='283.77' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='638.64' y='283.77' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>5</text>
+<text x='393.28' y='248.89' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='30.58px' lengthAdjust='spacingAndGlyphs'>Day = 9</text>
+<text x='393.28' y='204.01' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 9</text>
+<text x='393.28' y='159.14' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 75</text>
+<text x='393.28' y='114.26' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='47.21px' lengthAdjust='spacingAndGlyphs'>Wind = 10.9</text>
+<text x='393.28' y='69.39' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='54.06px' lengthAdjust='spacingAndGlyphs'>Solar.R = 230</text>
+<polyline points='395.47,245.86 398.21,245.86 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='395.47,200.98 398.21,200.98 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='395.47,156.11 398.21,156.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='395.47,111.23 398.21,111.23 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='395.47,66.36 398.21,66.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='72.13' y='248.89' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='30.58px' lengthAdjust='spacingAndGlyphs'>Day = 5</text>
+<text x='72.13' y='204.01' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 9</text>
+<text x='72.13' y='159.14' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 87</text>
+<text x='72.13' y='114.26' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.31px' lengthAdjust='spacingAndGlyphs'>Wind = 7.4</text>
+<text x='72.13' y='69.39' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='49.17px' lengthAdjust='spacingAndGlyphs'>Solar.R = 95</text>
+<polyline points='74.33,245.86 77.07,245.86 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,200.98 77.07,200.98 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,156.11 77.07,156.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,111.23 77.07,111.23 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,66.36 77.07,66.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='72.13' y='517.18' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='35.48px' lengthAdjust='spacingAndGlyphs'>Day = 21</text>
+<text x='72.13' y='472.30' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 8</text>
+<text x='72.13' y='427.43' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 77</text>
+<text x='72.13' y='382.55' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='47.21px' lengthAdjust='spacingAndGlyphs'>Wind = 15.5</text>
+<text x='72.13' y='337.68' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='54.06px' lengthAdjust='spacingAndGlyphs'>Solar.R = 259</text>
+<polyline points='74.33,514.15 77.07,514.15 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,469.28 77.07,469.28 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,424.40 77.07,424.40 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,379.53 77.07,379.53 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,334.65 77.07,334.65 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='271.85' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='173.67px' lengthAdjust='spacingAndGlyphs'>Feature contribution (Shapley value</text>
+<text x='445.52' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='2.75px' lengthAdjust='spacingAndGlyphs'> </text>
+<text x='448.27' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='6.45px' lengthAdjust='spacingAndGlyphs'>φ</text>
+<text x='454.72' y='566.64' style='font-size: 7.70px; font-family: sans;' textLength='1.71px' lengthAdjust='spacingAndGlyphs'>j</text>
+<text x='456.43' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='3.66px' lengthAdjust='spacingAndGlyphs'>)</text>
+<text transform='translate(13.05,290.25) rotate(-90)' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='88.68px' lengthAdjust='spacingAndGlyphs'>Feature and value</text>
+<rect x='665.84' y='248.03' width='48.68' height='84.45' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<text x='665.84' y='256.74' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<rect x='665.84' y='263.36' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='264.07' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='665.84' y='280.64' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='281.35' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='665.84' y='297.92' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='298.63' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='665.84' y='315.20' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='315.91' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<text x='688.60' y='275.03' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='688.60' y='292.31' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='688.60' y='309.59' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='688.60' y='326.87' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<text x='77.07' y='14.56' style='font-size: 13.20px; font-family: sans;' textLength='215.04px' lengthAdjust='spacingAndGlyphs'>Shapley value prediction explanation</text>
+</g>
+</svg>
diff --git a/tests/testthat/_snaps/plot/plot-sv-several-div-input-1.svg b/tests/testthat/_snaps/plot/plot-sv-several-div-input-1.svg
new file mode 100644
index 000000000..bb1098c82
--- /dev/null
+++ b/tests/testthat/_snaps/plot/plot-sv-several-div-input-1.svg
@@ -0,0 +1,239 @@
+<?xml version='1.0' encoding='UTF-8' ?>
+<svg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' class='svglite' data-engine-version='2.0' width='720.00pt' height='576.00pt' viewBox='0 0 720.00 576.00'>
+<defs>
+  <style type='text/css'><![CDATA[
+    .svglite line, .svglite polyline, .svglite polygon, .svglite path, .svglite rect, .svglite circle {
+      fill: none;
+      stroke: #000000;
+      stroke-linecap: round;
+      stroke-linejoin: round;
+      stroke-miterlimit: 10.00;
+    }
+  ]]></style>
+</defs>
+<rect width='100%' height='100%' style='stroke: none; fill: #FFFFFF;'/>
+<defs>
+  <clipPath id='cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA='>
+    <rect x='0.00' y='0.00' width='720.00' height='576.00' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<rect x='0.00' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
+</g>
+<defs>
+  <clipPath id='cpNzIuMTd8MjIzLjQzfDM5LjQzfDU0MS4wOA=='>
+    <rect x='72.17' y='39.43' width='151.25' height='501.64' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzIuMTd8MjIzLjQzfDM5LjQzfDU0MS4wOA==)'>
+<rect x='72.17' y='39.43' width='151.25' height='501.64' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<line x1='131.27' y1='1042.72' x2='131.27' y2='-462.21' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<rect x='131.27' y='53.59' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='131.27' y='70.79' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='131.27' y='87.98' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='131.27' y='105.17' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='104.65' y='134.50' width='26.63' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='114.96' y='151.70' width='16.31' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='112.94' y='168.89' width='18.33' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='111.07' y='186.08' width='20.20' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='131.27' y='215.41' width='31.22' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='131.27' y='232.61' width='14.94' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='131.27' y='249.80' width='19.10' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='131.27' y='266.99' width='18.10' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='131.27' y='296.32' width='25.77' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='131.27' y='313.52' width='28.13' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='131.27' y='330.71' width='34.43' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='131.27' y='347.90' width='34.75' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='119.68' y='377.23' width='11.60' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='131.27' y='394.43' width='1.73' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='128.32' y='411.62' width='2.96' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='124.19' y='428.81' width='7.09' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='131.27' y='458.14' width='6.55' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='128.10' y='475.34' width='3.18' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='124.34' y='492.53' width='6.93' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='131.03' y='509.72' width='0.24' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='72.17' y='39.43' width='151.25' height='501.64' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMjg3LjkwfDQzOS4xNXwzOS40M3w1NDEuMDg='>
+    <rect x='287.90' y='39.43' width='151.25' height='501.64' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMjg3LjkwfDQzOS4xNXwzOS40M3w1NDEuMDg=)'>
+<rect x='287.90' y='39.43' width='151.25' height='501.64' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<line x1='347.00' y1='1042.72' x2='347.00' y2='-462.21' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<rect x='347.00' y='53.59' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='347.00' y='70.79' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='347.00' y='87.98' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='347.00' y='105.17' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='347.00' y='134.50' width='5.54' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='347.00' y='151.70' width='10.60' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='347.00' y='168.89' width='10.73' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='347.00' y='186.08' width='10.66' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='340.32' y='215.41' width='6.68' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='336.51' y='232.61' width='10.49' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='334.75' y='249.80' width='12.25' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='337.37' y='266.99' width='9.63' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='330.94' y='296.32' width='16.06' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='322.67' y='313.52' width='24.33' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='330.46' y='330.71' width='16.54' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='327.51' y='347.90' width='19.49' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='332.68' y='377.23' width='14.32' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='345.38' y='394.43' width='1.62' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='341.35' y='411.62' width='5.65' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='337.18' y='428.81' width='9.82' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='347.00' y='458.14' width='3.63' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='344.94' y='475.34' width='2.06' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='342.82' y='492.53' width='4.18' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='347.00' y='509.72' width='0.40' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='287.90' y='39.43' width='151.25' height='501.64' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNTAzLjYyfDY1NC44OHwzOS40M3w1NDEuMDg='>
+    <rect x='503.62' y='39.43' width='151.25' height='501.64' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNTAzLjYyfDY1NC44OHwzOS40M3w1NDEuMDg=)'>
+<rect x='503.62' y='39.43' width='151.25' height='501.64' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<line x1='562.73' y1='1042.72' x2='562.73' y2='-462.21' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<rect x='562.73' y='53.59' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='562.73' y='70.79' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='562.73' y='87.98' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='562.73' y='105.17' width='85.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='562.73' y='134.50' width='13.67' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='562.73' y='151.70' width='15.81' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='562.73' y='168.89' width='13.87' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='562.73' y='186.08' width='13.62' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='518.27' y='215.41' width='44.45' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='510.50' y='232.61' width='52.23' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='520.38' y='249.80' width='42.35' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='521.62' y='266.99' width='41.10' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='562.73' y='296.32' width='7.49' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='562.45' y='313.52' width='0.28' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='553.31' y='330.71' width='9.42' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='556.96' y='347.90' width='5.76' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='562.54' y='377.23' width='0.19' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='562.25' y='394.43' width='0.48' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='562.73' y='411.62' width='0.30' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='562.73' y='428.81' width='4.02' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='550.90' y='458.14' width='11.82' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='562.73' y='475.34' width='1.88' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='562.73' y='492.53' width='2.30' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='556.66' y='509.72' width='6.07' height='17.19' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='503.62' y='39.43' width='151.25' height='501.64' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzIuMTd8MjIzLjQzfDIyLjc4fDM5LjQz'>
+    <rect x='72.17' y='22.78' width='151.25' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzIuMTd8MjIzLjQzfDIyLjc4fDM5LjQz)'>
+<rect x='72.17' y='22.78' width='151.25' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='147.80' y='34.14' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 1, pred = 55.05</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMjg3LjkwfDQzOS4xNXwyMi43OHwzOS40Mw=='>
+    <rect x='287.90' y='22.78' width='151.25' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMjg3LjkwfDQzOS4xNXwyMi43OHwzOS40Mw==)'>
+<rect x='287.90' y='22.78' width='151.25' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='363.52' y='34.14' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 2, pred = 28.57</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNTAzLjYyfDY1NC44OHwyMi43OHwzOS40Mw=='>
+    <rect x='503.62' y='22.78' width='151.25' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNTAzLjYyfDY1NC44OHwyMi43OHwzOS40Mw==)'>
+<rect x='503.62' y='22.78' width='151.25' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='579.25' y='34.14' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 3, pred = 24.88</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<polyline points='81.05,543.82 81.05,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='131.27,543.82 131.27,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='181.50,543.82 181.50,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='81.05' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-25</text>
+<text x='131.27' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='181.50' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>25</text>
+<polyline points='296.77,543.82 296.77,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='347.00,543.82 347.00,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='397.23,543.82 397.23,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='296.77' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-25</text>
+<text x='347.00' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='397.23' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>25</text>
+<polyline points='512.50,543.82 512.50,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='562.73,543.82 562.73,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='612.95,543.82 612.95,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='512.50' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-25</text>
+<text x='562.73' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='612.95' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>25</text>
+<text x='498.69' y='495.56' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='35.48px' lengthAdjust='spacingAndGlyphs'>Day = 21</text>
+<text x='498.69' y='414.65' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 8</text>
+<text x='498.69' y='333.74' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 77</text>
+<text x='498.69' y='252.83' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='47.21px' lengthAdjust='spacingAndGlyphs'>Wind = 15.5</text>
+<text x='498.69' y='171.92' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='54.06px' lengthAdjust='spacingAndGlyphs'>Solar.R = 259</text>
+<text x='498.69' y='91.01' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.04px' lengthAdjust='spacingAndGlyphs'>None</text>
+<polyline points='500.88,492.53 503.62,492.53 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='500.88,411.62 503.62,411.62 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='500.88,330.71 503.62,330.71 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='500.88,249.80 503.62,249.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='500.88,168.89 503.62,168.89 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='500.88,87.98 503.62,87.98 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='282.97' y='495.56' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='30.58px' lengthAdjust='spacingAndGlyphs'>Day = 9</text>
+<text x='282.97' y='414.65' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 9</text>
+<text x='282.97' y='333.74' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 75</text>
+<text x='282.97' y='252.83' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='47.21px' lengthAdjust='spacingAndGlyphs'>Wind = 10.9</text>
+<text x='282.97' y='171.92' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='54.06px' lengthAdjust='spacingAndGlyphs'>Solar.R = 230</text>
+<text x='282.97' y='91.01' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.04px' lengthAdjust='spacingAndGlyphs'>None</text>
+<polyline points='285.16,492.53 287.90,492.53 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='285.16,411.62 287.90,411.62 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='285.16,330.71 287.90,330.71 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='285.16,249.80 287.90,249.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='285.16,168.89 287.90,168.89 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='285.16,87.98 287.90,87.98 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='67.24' y='495.56' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='30.58px' lengthAdjust='spacingAndGlyphs'>Day = 5</text>
+<text x='67.24' y='414.65' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 9</text>
+<text x='67.24' y='333.74' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 87</text>
+<text x='67.24' y='252.83' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.31px' lengthAdjust='spacingAndGlyphs'>Wind = 7.4</text>
+<text x='67.24' y='171.92' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='49.17px' lengthAdjust='spacingAndGlyphs'>Solar.R = 95</text>
+<text x='67.24' y='91.01' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.04px' lengthAdjust='spacingAndGlyphs'>None</text>
+<polyline points='69.43,492.53 72.17,492.53 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='69.43,411.62 72.17,411.62 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='69.43,330.71 72.17,330.71 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='69.43,249.80 72.17,249.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='69.43,168.89 72.17,168.89 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='69.43,87.98 72.17,87.98 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='269.40' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='173.67px' lengthAdjust='spacingAndGlyphs'>Feature contribution (Shapley value</text>
+<text x='443.07' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='2.75px' lengthAdjust='spacingAndGlyphs'> </text>
+<text x='445.83' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='6.45px' lengthAdjust='spacingAndGlyphs'>φ</text>
+<text x='452.27' y='566.64' style='font-size: 7.70px; font-family: sans;' textLength='1.71px' lengthAdjust='spacingAndGlyphs'>j</text>
+<text x='453.98' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='3.66px' lengthAdjust='spacingAndGlyphs'>)</text>
+<text transform='translate(13.05,290.25) rotate(-90)' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='88.68px' lengthAdjust='spacingAndGlyphs'>Feature and value</text>
+<rect x='665.84' y='248.03' width='48.68' height='84.45' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<text x='665.84' y='256.74' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<rect x='665.84' y='263.36' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='264.07' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='665.84' y='280.64' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='281.35' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='665.84' y='297.92' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='298.63' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='665.84' y='315.20' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='315.91' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<text x='688.60' y='275.03' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='688.60' y='292.31' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='688.60' y='309.59' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='688.60' y='326.87' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<text x='72.17' y='14.56' style='font-size: 13.20px; font-family: sans;' textLength='215.04px' lengthAdjust='spacingAndGlyphs'>Shapley value prediction explanation</text>
+</g>
+</svg>
diff --git a/tests/testthat/_snaps/plot/plot-sv-several-div-input-2.svg b/tests/testthat/_snaps/plot/plot-sv-several-div-input-2.svg
new file mode 100644
index 000000000..ef5bde04a
--- /dev/null
+++ b/tests/testthat/_snaps/plot/plot-sv-several-div-input-2.svg
@@ -0,0 +1,166 @@
+<?xml version='1.0' encoding='UTF-8' ?>
+<svg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' class='svglite' data-engine-version='2.0' width='720.00pt' height='576.00pt' viewBox='0 0 720.00 576.00'>
+<defs>
+  <style type='text/css'><![CDATA[
+    .svglite line, .svglite polyline, .svglite polygon, .svglite path, .svglite rect, .svglite circle {
+      fill: none;
+      stroke: #000000;
+      stroke-linecap: round;
+      stroke-linejoin: round;
+      stroke-miterlimit: 10.00;
+    }
+  ]]></style>
+</defs>
+<rect width='100%' height='100%' style='stroke: none; fill: #FFFFFF;'/>
+<defs>
+  <clipPath id='cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA='>
+    <rect x='0.00' y='0.00' width='720.00' height='576.00' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<rect x='0.00' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
+</g>
+<defs>
+  <clipPath id='cpMzkuNzZ8NjU0Ljg4fDM5LjQzfDI3NC44MA=='>
+    <rect x='39.76' y='39.43' width='615.12' height='235.37' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzkuNzZ8NjU0Ljg4fDM5LjQzfDI3NC44MA==)'>
+<rect x='39.76' y='39.43' width='615.12' height='235.37' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<line x1='-575.36' y1='135.62' x2='1269.99' y2='135.62' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<rect x='60.46' y='135.62' width='25.14' height='65.50' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='85.60' y='135.62' width='25.14' height='40.12' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='110.74' y='135.62' width='25.14' height='45.10' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='135.87' y='135.62' width='25.14' height='49.70' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='178.75' y='58.81' width='25.14' height='76.81' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='203.89' y='98.86' width='25.14' height='36.76' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='229.03' y='88.62' width='25.14' height='47.00' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='254.16' y='91.09' width='25.14' height='44.53' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='297.04' y='72.23' width='25.14' height='63.39' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='322.18' y='66.41' width='25.14' height='69.21' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='347.32' y='50.91' width='25.14' height='84.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='372.46' y='50.13' width='25.14' height='85.49' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='415.34' y='135.62' width='25.14' height='28.53' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='440.47' y='131.37' width='25.14' height='4.25' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='465.61' y='135.62' width='25.14' height='7.27' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='490.75' y='135.62' width='25.14' height='17.44' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='533.63' y='119.51' width='25.14' height='16.11' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='558.77' y='135.62' width='25.14' height='7.82' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='583.90' y='135.62' width='25.14' height='17.06' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='609.04' y='135.62' width='25.14' height='0.59' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='39.76' y='39.43' width='615.12' height='235.37' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMzkuNzZ8NjU0Ljg4fDMwOS43NXw1NDUuMTE='>
+    <rect x='39.76' y='309.75' width='615.12' height='235.37' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzkuNzZ8NjU0Ljg4fDMwOS43NXw1NDUuMTE=)'>
+<rect x='39.76' y='309.75' width='615.12' height='235.37' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<line x1='-575.36' y1='405.93' x2='1269.99' y2='405.93' style='stroke-width: 1.07; stroke-linecap: butt;' />
+<rect x='60.46' y='372.30' width='25.14' height='33.63' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='85.60' y='367.05' width='25.14' height='38.88' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='110.74' y='371.82' width='25.14' height='34.11' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='135.87' y='372.43' width='25.14' height='33.51' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='178.75' y='405.93' width='25.14' height='109.36' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='203.89' y='405.93' width='25.14' height='128.48' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='229.03' y='405.93' width='25.14' height='104.18' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='254.16' y='405.93' width='25.14' height='101.12' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='297.04' y='387.50' width='25.14' height='18.44' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='322.18' y='405.93' width='25.14' height='0.68' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='347.32' y='405.93' width='25.14' height='23.16' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='372.46' y='405.93' width='25.14' height='14.18' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='415.34' y='405.93' width='25.14' height='0.46' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='440.47' y='405.93' width='25.14' height='1.17' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='465.61' y='405.19' width='25.14' height='0.74' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='490.75' y='396.04' width='25.14' height='9.89' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='533.63' y='405.93' width='25.14' height='29.09' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='558.77' y='401.31' width='25.14' height='4.62' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='583.90' y='400.27' width='25.14' height='5.66' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='609.04' y='405.93' width='25.14' height='14.93' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<rect x='39.76' y='309.75' width='615.12' height='235.37' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMzkuNzZ8NjU0Ljg4fDI5My4wOXwzMDkuNzU='>
+    <rect x='39.76' y='293.09' width='615.12' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzkuNzZ8NjU0Ljg4fDI5My4wOXwzMDkuNzU=)'>
+<rect x='39.76' y='293.09' width='615.12' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='347.32' y='304.45' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 3, pred = 24.88</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpMzkuNzZ8NjU0Ljg4fDIyLjc4fDM5LjQz'>
+    <rect x='39.76' y='22.78' width='615.12' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMzkuNzZ8NjU0Ljg4fDIyLjc4fDM5LjQz)'>
+<rect x='39.76' y='22.78' width='615.12' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='347.32' y='34.14' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 1, pred = 55.05</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<polyline points='110.74,547.85 110.74,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='229.03,547.85 229.03,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='347.32,547.85 347.32,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='465.61,547.85 465.61,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='583.90,547.85 583.90,545.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='110.74' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='54.06px' lengthAdjust='spacingAndGlyphs'>Solar.R = 259</text>
+<text x='229.03' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='47.21px' lengthAdjust='spacingAndGlyphs'>Wind = 15.5</text>
+<text x='347.32' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 77</text>
+<text x='465.61' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 8</text>
+<text x='583.90' y='556.10' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='35.48px' lengthAdjust='spacingAndGlyphs'>Day = 21</text>
+<polyline points='110.74,277.54 110.74,274.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='229.03,277.54 229.03,274.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='347.32,277.54 347.32,274.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='465.61,277.54 465.61,274.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='583.90,277.54 583.90,274.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='110.74' y='285.79' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='49.17px' lengthAdjust='spacingAndGlyphs'>Solar.R = 95</text>
+<text x='229.03' y='285.79' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.31px' lengthAdjust='spacingAndGlyphs'>Wind = 7.4</text>
+<text x='347.32' y='285.79' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='42.32px' lengthAdjust='spacingAndGlyphs'>Temp = 87</text>
+<text x='465.61' y='285.79' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 9</text>
+<text x='583.90' y='285.79' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='30.58px' lengthAdjust='spacingAndGlyphs'>Day = 5</text>
+<text x='34.83' y='237.50' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-20</text>
+<text x='34.83' y='188.07' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-10</text>
+<text x='34.83' y='138.65' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='34.83' y='89.22' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>10</text>
+<polyline points='37.02,234.47 39.76,234.47 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='37.02,185.05 39.76,185.05 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='37.02,135.62 39.76,135.62 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='37.02,86.19 39.76,86.19 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='34.83' y='507.81' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-20</text>
+<text x='34.83' y='458.39' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='12.72px' lengthAdjust='spacingAndGlyphs'>-10</text>
+<text x='34.83' y='408.96' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='34.83' y='359.53' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>10</text>
+<polyline points='37.02,504.78 39.76,504.78 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='37.02,455.36 39.76,455.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='37.02,405.93 39.76,405.93 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='37.02,356.50 39.76,356.50 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='347.32' y='568.24' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='88.68px' lengthAdjust='spacingAndGlyphs'>Feature and value</text>
+<text transform='translate(13.45,386.40) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='173.67px' lengthAdjust='spacingAndGlyphs'>Feature contribution (Shapley value</text>
+<text transform='translate(13.45,212.72) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='2.75px' lengthAdjust='spacingAndGlyphs'> </text>
+<text transform='translate(13.45,209.97) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='6.45px' lengthAdjust='spacingAndGlyphs'>φ</text>
+<text transform='translate(15.49,203.52) rotate(-90)' style='font-size: 7.70px; font-family: sans;' textLength='1.71px' lengthAdjust='spacingAndGlyphs'>j</text>
+<text transform='translate(13.45,201.81) rotate(-90)' style='font-size: 11.00px; font-family: sans;' textLength='3.66px' lengthAdjust='spacingAndGlyphs'>)</text>
+<rect x='665.84' y='250.05' width='48.68' height='84.45' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<text x='665.84' y='258.76' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<rect x='665.84' y='265.38' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='266.09' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #F8766D;' />
+<rect x='665.84' y='282.66' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='283.37' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #7CAE00;' />
+<rect x='665.84' y='299.94' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='300.65' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #00BFC4;' />
+<rect x='665.84' y='317.22' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='317.93' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #C77CFF;' />
+<text x='688.60' y='277.05' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='688.60' y='294.33' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='688.60' y='311.61' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='688.60' y='328.89' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<text x='39.76' y='14.56' style='font-size: 13.20px; font-family: sans;' textLength='215.04px' lengthAdjust='spacingAndGlyphs'>Shapley value prediction explanation</text>
+</g>
+</svg>
diff --git a/tests/testthat/_snaps/plot/plot-sv-several-div-input-3.svg b/tests/testthat/_snaps/plot/plot-sv-several-div-input-3.svg
new file mode 100644
index 000000000..2c93513a3
--- /dev/null
+++ b/tests/testthat/_snaps/plot/plot-sv-several-div-input-3.svg
@@ -0,0 +1,188 @@
+<?xml version='1.0' encoding='UTF-8' ?>
+<svg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' class='svglite' data-engine-version='2.0' width='720.00pt' height='576.00pt' viewBox='0 0 720.00 576.00'>
+<defs>
+  <style type='text/css'><![CDATA[
+    .svglite line, .svglite polyline, .svglite polygon, .svglite path, .svglite rect, .svglite circle {
+      fill: none;
+      stroke: #000000;
+      stroke-linecap: round;
+      stroke-linejoin: round;
+      stroke-miterlimit: 10.00;
+    }
+  ]]></style>
+</defs>
+<rect width='100%' height='100%' style='stroke: none; fill: #FFFFFF;'/>
+<defs>
+  <clipPath id='cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA='>
+    <rect x='0.00' y='0.00' width='720.00' height='576.00' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<rect x='0.00' y='0.00' width='720.00' height='576.00' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8NjU0Ljg4fDM5LjQzfDE5MS44OQ=='>
+    <rect x='77.07' y='39.43' width='577.81' height='152.46' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8NjU0Ljg4fDM5LjQzfDE5MS44OQ==)'>
+<rect x='77.07' y='39.43' width='577.81' height='152.46' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='228.31' y='45.79' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='228.31' y='53.50' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='228.31' y='61.21' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='228.31' y='68.93' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='173.88' y='82.09' width='54.43' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='228.31' y='89.80' width='8.11' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='214.44' y='97.51' width='13.87' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='195.04' y='105.23' width='33.27' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='228.31' y='118.39' width='30.74' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='213.40' y='126.10' width='14.91' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='195.76' y='133.81' width='32.55' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='227.18' y='141.53' width='1.13' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='103.33' y='154.69' width='124.98' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='151.76' y='162.40' width='76.55' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='142.26' y='170.11' width='86.05' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='133.48' y='177.83' width='94.84' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='77.07' y='39.43' width='577.81' height='152.46' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8NjU0Ljg4fDIxNC4wMnwzNjYuNDk='>
+    <rect x='77.07' y='214.02' width='577.81' height='152.46' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8NjU0Ljg4fDIxNC4wMnwzNjYuNDk=)'>
+<rect x='77.07' y='214.02' width='577.81' height='152.46' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='228.31' y='220.38' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='228.31' y='228.09' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='228.31' y='235.80' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='228.31' y='243.52' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='161.09' y='256.68' width='67.23' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='220.70' y='264.39' width='7.61' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='201.78' y='272.10' width='26.53' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='182.20' y='279.82' width='46.11' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='228.31' y='292.98' width='17.05' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='218.66' y='300.69' width='9.65' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='208.71' y='308.41' width='19.61' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='228.31' y='316.12' width='1.87' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='228.31' y='329.28' width='26.01' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='228.31' y='336.99' width='49.78' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='228.31' y='344.71' width='50.38' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='228.31' y='352.42' width='50.06' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='77.07' y='214.02' width='577.81' height='152.46' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8NjU0Ljg4fDM4OC42Mnw1NDEuMDg='>
+    <rect x='77.07' y='388.62' width='577.81' height='152.46' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8NjU0Ljg4fDM4OC42Mnw1NDEuMDg=)'>
+<rect x='77.07' y='388.62' width='577.81' height='152.46' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='228.31' y='394.97' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='228.31' y='402.68' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='228.31' y='410.40' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='228.31' y='418.11' width='400.30' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='227.44' y='431.27' width='0.87' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='226.08' y='438.98' width='2.23' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='228.31' y='446.70' width='1.41' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='228.31' y='454.41' width='18.87' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='172.81' y='467.57' width='55.50' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='228.31' y='475.28' width='8.81' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='228.31' y='483.00' width='10.81' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='199.82' y='490.71' width='28.49' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='228.31' y='503.87' width='64.17' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='228.31' y='511.58' width='74.19' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='228.31' y='519.30' width='65.09' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='228.31' y='527.01' width='63.93' height='7.71' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<rect x='77.07' y='388.62' width='577.81' height='152.46' style='stroke-width: 1.07; stroke: #333333;' />
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8NjU0Ljg4fDM3MS45NnwzODguNjI='>
+    <rect x='77.07' y='371.96' width='577.81' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8NjU0Ljg4fDM3MS45NnwzODguNjI=)'>
+<rect x='77.07' y='371.96' width='577.81' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='365.97' y='383.32' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 3, pred = 24.88</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8NjU0Ljg4fDE5Ny4zN3wyMTQuMDI='>
+    <rect x='77.07' y='197.37' width='577.81' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8NjU0Ljg4fDE5Ny4zN3wyMTQuMDI=)'>
+<rect x='77.07' y='197.37' width='577.81' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='365.97' y='208.73' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 2, pred = 28.57</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+</g>
+<defs>
+  <clipPath id='cpNzcuMDd8NjU0Ljg4fDIyLjc4fDM5LjQz'>
+    <rect x='77.07' y='22.78' width='577.81' height='16.65' />
+  </clipPath>
+</defs>
+<g clip-path='url(#cpNzcuMDd8NjU0Ljg4fDIyLjc4fDM5LjQz)'>
+<rect x='77.07' y='22.78' width='577.81' height='16.65' style='stroke-width: 1.07; stroke: #333333; fill: #D9D9D9;' />
+<text x='365.97' y='34.14' text-anchor='middle' style='font-size: 8.80px; fill: #1A1A1A; font-family: sans;' textLength='71.19px' lengthAdjust='spacingAndGlyphs'>id: 1, pred = 55.05</text>
+</g>
+<g clip-path='url(#cpMC4wMHw3MjAuMDB8MC4wMHw1NzYuMDA=)'>
+<polyline points='228.31,543.82 228.31,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='416.94,543.82 416.94,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='605.56,543.82 605.56,541.08 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='228.31' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text>
+<text x='416.94' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>20</text>
+<text x='605.56' y='552.06' text-anchor='middle' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='9.79px' lengthAdjust='spacingAndGlyphs'>40</text>
+<text x='72.13' y='173.14' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='49.17px' lengthAdjust='spacingAndGlyphs'>Solar.R = 95</text>
+<text x='72.13' y='136.84' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='30.58px' lengthAdjust='spacingAndGlyphs'>Day = 5</text>
+<text x='72.13' y='100.54' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 9</text>
+<text x='72.13' y='64.24' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.04px' lengthAdjust='spacingAndGlyphs'>None</text>
+<polyline points='74.33,170.11 77.07,170.11 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,133.81 77.07,133.81 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,97.51 77.07,97.51 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,61.21 77.07,61.21 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='72.13' y='347.73' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='54.06px' lengthAdjust='spacingAndGlyphs'>Solar.R = 230</text>
+<text x='72.13' y='311.43' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='30.58px' lengthAdjust='spacingAndGlyphs'>Day = 9</text>
+<text x='72.13' y='275.13' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 9</text>
+<text x='72.13' y='238.83' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.04px' lengthAdjust='spacingAndGlyphs'>None</text>
+<polyline points='74.33,344.71 77.07,344.71 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,308.41 77.07,308.41 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,272.10 77.07,272.10 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,235.80 77.07,235.80 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='72.13' y='522.32' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='54.06px' lengthAdjust='spacingAndGlyphs'>Solar.R = 259</text>
+<text x='72.13' y='486.02' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='35.48px' lengthAdjust='spacingAndGlyphs'>Day = 21</text>
+<text x='72.13' y='449.72' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='39.39px' lengthAdjust='spacingAndGlyphs'>Month = 8</text>
+<text x='72.13' y='413.42' text-anchor='end' style='font-size: 8.80px; fill: #4D4D4D; font-family: sans;' textLength='21.04px' lengthAdjust='spacingAndGlyphs'>None</text>
+<polyline points='74.33,519.30 77.07,519.30 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,483.00 77.07,483.00 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,446.70 77.07,446.70 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<polyline points='74.33,410.40 77.07,410.40 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' />
+<text x='271.85' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='173.67px' lengthAdjust='spacingAndGlyphs'>Feature contribution (Shapley value</text>
+<text x='445.52' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='2.75px' lengthAdjust='spacingAndGlyphs'> </text>
+<text x='448.27' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='6.45px' lengthAdjust='spacingAndGlyphs'>φ</text>
+<text x='454.72' y='566.64' style='font-size: 7.70px; font-family: sans;' textLength='1.71px' lengthAdjust='spacingAndGlyphs'>j</text>
+<text x='456.43' y='564.60' style='font-size: 11.00px; font-family: sans;' textLength='3.66px' lengthAdjust='spacingAndGlyphs'>)</text>
+<text transform='translate(13.05,290.25) rotate(-90)' text-anchor='middle' style='font-size: 11.00px; font-family: sans;' textLength='88.68px' lengthAdjust='spacingAndGlyphs'>Feature and value</text>
+<rect x='665.84' y='248.03' width='48.68' height='84.45' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<text x='665.84' y='256.74' style='font-size: 11.00px; font-family: sans;' textLength='36.70px' lengthAdjust='spacingAndGlyphs'>Method</text>
+<rect x='665.84' y='263.36' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='264.07' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #E41A1C;' />
+<rect x='665.84' y='280.64' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='281.35' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #377EB8;' />
+<rect x='665.84' y='297.92' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='298.63' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #4DAF4A;' />
+<rect x='665.84' y='315.20' width='17.28' height='17.28' style='stroke-width: 1.07; stroke: none; fill: #FFFFFF;' />
+<rect x='666.54' y='315.91' width='15.86' height='15.86' style='stroke-width: 1.07; stroke: none; stroke-linecap: butt; stroke-linejoin: miter; fill: #984EA3;' />
+<text x='688.60' y='275.03' style='font-size: 8.80px; font-family: sans;' textLength='20.54px' lengthAdjust='spacingAndGlyphs'>Emp.</text>
+<text x='688.60' y='292.31' style='font-size: 8.80px; font-family: sans;' textLength='23.49px' lengthAdjust='spacingAndGlyphs'>Gaus.</text>
+<text x='688.60' y='309.59' style='font-size: 8.80px; font-family: sans;' textLength='21.52px' lengthAdjust='spacingAndGlyphs'>Ctree</text>
+<text x='688.60' y='326.87' style='font-size: 8.80px; font-family: sans;' textLength='25.92px' lengthAdjust='spacingAndGlyphs'>Comb.</text>
+<text x='77.07' y='14.56' style='font-size: 13.20px; font-family: sans;' textLength='215.04px' lengthAdjust='spacingAndGlyphs'>Shapley value prediction explanation</text>
+</g>
+</svg>
diff --git a/tests/testthat/_snaps/setup.md b/tests/testthat/_snaps/setup.md
index d21d7c28c..2d040cb13 100644
--- a/tests/testthat/_snaps/setup.md
+++ b/tests/testthat/_snaps/setup.md
@@ -26,6 +26,7 @@
       
     Output
           none Solar.R    Wind   Temp    Day Month_factor
+         <num>   <num>   <num>  <num>  <num>        <num>
       1: 42.44  -4.730   7.750 17.753 -2.601       -7.588
       2: 42.44   2.338  -3.147 -5.310 -1.676       -7.588
       3: 42.44   3.857 -17.469 -1.466  1.099        3.379
@@ -45,6 +46,7 @@
       
     Output
           none Solar.R    Wind   Temp    Day Month_factor
+         <num>   <num>   <num>  <num>  <num>        <num>
       1: 42.44  -4.730   7.750 17.753 -2.601       -7.588
       2: 42.44   2.338  -3.147 -5.310 -1.676       -7.588
       3: 42.44   3.857 -17.469 -1.466  1.099        3.379
@@ -64,6 +66,7 @@
       
     Output
           none Solar.R    Wind   Temp    Day Month_factor
+         <num>   <num>   <num>  <num>  <num>        <num>
       1: 42.44  -4.730   7.750 17.753 -2.601       -7.588
       2: 42.44   2.338  -3.147 -5.310 -1.676       -7.588
       3: 42.44   3.857 -17.469 -1.466  1.099        3.379
@@ -84,6 +87,7 @@
       
     Output
           none Solar.R    Wind   Temp    Day Month_factor
+         <num>   <num>   <num>  <num>  <num>        <num>
       1: 42.44  -4.730   7.750 17.753 -2.601       -7.588
       2: 42.44   2.338  -3.147 -5.310 -1.676       -7.588
       3: 42.44   3.857 -17.469 -1.466  1.099        3.379
@@ -175,7 +179,7 @@
     Condition
       Error in `check_approach()`:
       ! `approach` must be one of the following: 
-       categorical, copula, ctree, empirical, gaussian, independence, timeseries 
+       categorical, copula, ctree, empirical, gaussian, independence, timeseries, vaeac 
        or a vector of length one less than the number of features ( 4 ), with only the above strings.
 
 ---
@@ -188,7 +192,7 @@
     Condition
       Error in `check_approach()`:
       ! `approach` must be one of the following: 
-       categorical, copula, ctree, empirical, gaussian, independence, timeseries 
+       categorical, copula, ctree, empirical, gaussian, independence, timeseries, vaeac 
        or a vector of length one less than the number of features ( 4 ), with only the above strings.
 
 ---
@@ -201,7 +205,7 @@
     Condition
       Error in `check_approach()`:
       ! `approach` must be one of the following: 
-       categorical, copula, ctree, empirical, gaussian, independence, timeseries 
+       categorical, copula, ctree, empirical, gaussian, independence, timeseries, vaeac 
        or a vector of length one less than the number of features ( 4 ), with only the above strings.
 
 # erroneous input: `prediction_zero`
@@ -818,7 +822,7 @@
       Error in `setup_approach.gaussian()`:
       ! The following feature(s) are factor(s): Month_factor.
       approach = 'gaussian' does not support factor features.
-      Please change approach to one of 'independence' (not recommended), 'ctree', 'categorical'.
+      Please change approach to one of 'independence' (not recommended), 'ctree', 'vaeac', 'categorical'.
 
 ---
 
@@ -831,7 +835,7 @@
       Error in `setup_approach.empirical()`:
       ! The following feature(s) are factor(s): Month_factor.
       approach = 'empirical' does not support factor features.
-      Please change approach to one of 'independence' (not recommended), 'ctree', 'categorical'.
+      Please change approach to one of 'independence' (not recommended), 'ctree', 'vaeac', 'categorical'.
 
 ---
 
@@ -844,5 +848,5 @@
       Error in `setup_approach.copula()`:
       ! The following feature(s) are factor(s): Month_factor.
       approach = 'copula' does not support factor features.
-      Please change approach to one of 'independence' (not recommended), 'ctree', 'categorical'.
+      Please change approach to one of 'independence' (not recommended), 'ctree', 'vaeac', 'categorical'.
 
diff --git a/tests/testthat/test-output.R b/tests/testthat/test-output.R
index f6c6f975a..cea7c696a 100644
--- a/tests/testthat/test-output.R
+++ b/tests/testthat/test-output.R
@@ -160,6 +160,28 @@ test_that("output_lm_numeric_ctree", {
   )
 })
 
+test_that("output_lm_numeric_vaeac", {
+  expect_snapshot_rds(
+    explain(
+      model = model_lm_numeric,
+      x_explain = x_explain_numeric,
+      x_train = x_train_numeric,
+      approach = "vaeac",
+      prediction_zero = p0,
+      n_batches = 1,
+      timing = FALSE,
+      n_samples = 10, # Low value here to speed up the time
+      vaeac.epochs = 4, # Low value here to speed up the time
+      vaeac.n_vaeacs_initialize = 2, # Low value here to speed up the time
+      vaeac.extra_parameters = list(
+        vaeac.epochs_initiation_phase = 2, # Low value here to speed up the time
+        vaeac.save_model = FALSE # Removes names and objects such as tmpdir and tmpfile
+      )
+    ),
+    "output_lm_numeric_vaeac"
+  )
+})
+
 test_that("output_lm_categorical_ctree", {
   expect_snapshot_rds(
     explain(
@@ -175,6 +197,28 @@ test_that("output_lm_categorical_ctree", {
   )
 })
 
+test_that("output_lm_categorical_vaeac", {
+  expect_snapshot_rds(
+    explain(
+      model = model_lm_categorical,
+      x_explain = x_explain_categorical,
+      x_train = x_train_categorical,
+      approach = "vaeac",
+      prediction_zero = p0,
+      n_batches = 1,
+      timing = FALSE,
+      n_samples = 10, # Low value here to speed up the time
+      vaeac.epochs = 4, # Low value here to speed up the time
+      vaeac.n_vaeacs_initialize = 2, # Low value here to speed up the time
+      vaeac.extra_parameters = list(
+        vaeac.epochs_initiation_phase = 2, # Low value here to speed up the time
+        vaeac.save_model = FALSE # Removes tmpdir and tmpfiles
+      )
+    ),
+    "output_lm_categorical_vaeac"
+  )
+})
+
 test_that("output_lm_categorical_categorical", {
   expect_snapshot_rds(
     explain(
@@ -299,6 +343,28 @@ test_that("output_lm_mixed_ctree", {
   )
 })
 
+test_that("output_lm_mixed_vaeac", {
+  expect_snapshot_rds(
+    explain(
+      model = model_lm_mixed,
+      x_explain = x_explain_mixed,
+      x_train = x_train_mixed,
+      approach = "vaeac",
+      prediction_zero = p0,
+      n_batches = 1,
+      timing = FALSE,
+      n_samples = 10, # Low value here to speed up the time
+      vaeac.epochs = 4, # Low value here to speed up the time
+      vaeac.n_vaeacs_initialize = 2, # Low value here to speed up the time
+      vaeac.extra_parameters = list(
+        vaeac.epochs_initiation_phase = 2, # Low value here to speed up the time
+        vaeac.save_model = FALSE # Removes tmpdir and tmpfiles
+      )
+    ),
+    "output_lm_mixed_vaeac"
+  )
+})
+
 test_that("output_lm_mixed_comb", {
   set.seed(123)
   expect_snapshot_rds(
diff --git a/tests/testthat/test-plot.R b/tests/testthat/test-plot.R
index e8c34a2b8..5cb291cda 100644
--- a/tests/testthat/test-plot.R
+++ b/tests/testthat/test-plot.R
@@ -50,6 +50,14 @@ explain_numeric_combined <- explain(
   timing = FALSE
 )
 
+# Create a list of explanations with names
+explanation_list_named <- list(
+  "Emp." = explain_numeric_empirical,
+  "Gaus." = explain_numeric_gaussian,
+  "Ctree" = explain_numeric_ctree,
+  "Comb." = explain_numeric_combined
+)
+
 test_that("checking default outputs", {
   skip_if_not_installed("vdiffr")
 
@@ -173,14 +181,6 @@ test_that("beeswarm_plot_new_arguments", {
 test_that("MSEv evaluation criterion plots", {
   skip_if_not_installed("vdiffr")
 
-  # Create a list of explanations with names
-  explanation_list_named <- list(
-    "Emp." = explain_numeric_empirical,
-    "Gaus." = explain_numeric_gaussian,
-    "Ctree" = explain_numeric_ctree,
-    "Comb." = explain_numeric_combined
-  )
-
   MSEv_plots <- plot_MSEv_eval_crit(
     explanation_list_named,
     plot_type = c("overall", "comb", "explicand"),
@@ -270,3 +270,47 @@ test_that("MSEv evaluation criterion plots", {
     )$MSEv_combination_bar
   )
 })
+
+test_that("plot_SV_several_approaches_explanations", {
+  skip_if_not_installed("vdiffr")
+
+  vdiffr::expect_doppelganger(
+    title = "plot_SV_several_approaches_default",
+    fig = plot_SV_several_approaches(explanation_list_named)
+  )
+
+
+  vdiffr::expect_doppelganger(
+    title = "plot_SV_several_div_input_1",
+    fig = plot_SV_several_approaches(explanation_list_named,
+      plot_phi0 = TRUE,
+      add_zero_line = TRUE,
+      facet_ncol = 3,
+      facet_scales = "free_y",
+      horizontal_bars = TRUE
+    )
+  )
+
+  vdiffr::expect_doppelganger(
+    title = "plot_SV_several_div_input_2",
+    fig = plot_SV_several_approaches(explanation_list_named,
+      axis_labels_n_dodge = 1,
+      facet_ncol = 1,
+      facet_scales = "free_x",
+      horizontal_bars = FALSE,
+      index_explicands = c(1, 3),
+      add_zero_line = TRUE
+    )
+  )
+
+  vdiffr::expect_doppelganger(
+    title = "plot_SV_several_div_input_3",
+    fig = plot_SV_several_approaches(explanation_list_named,
+      facet_ncol = 1,
+      facet_scales = "free_y",
+      brewer_palette = "Set1",
+      only_these_features = c("Month", "Day", "Solar.R"),
+      plot_phi0 = TRUE
+    )
+  )
+})
diff --git a/tests/testthat/test-setup.R b/tests/testthat/test-setup.R
index f60a15363..6fdb0b9e0 100644
--- a/tests/testthat/test-setup.R
+++ b/tests/testthat/test-setup.R
@@ -1599,7 +1599,7 @@ test_that("parallelization gives same output for any approach", {
     timing = FALSE
   )
 
-  future::plan("multisession", workers = 5) # Parallelized with 2 cores
+  future::plan("multisession", workers = 2) # Parallelized with 2 cores
   explain.ctree_multisession <- explain(
     model = model_lm_numeric,
     x_explain = x_explain_numeric,
@@ -1965,3 +1965,108 @@ test_that("counting the number of unique approaches", {
   expect_equal(explanation_combined_5$internal$parameters$n_approaches, 4)
   expect_equal(explanation_combined_5$internal$parameters$n_unique_approaches, 2)
 })
+
+
+
+test_that("vaeac_set_seed_works", {
+  # Train two vaeac models with the same seed
+  explanation_vaeac_1 <- explain(
+    model = model_lm_mixed,
+    x_explain = x_explain_mixed,
+    x_train = x_train_mixed,
+    approach = "vaeac",
+    prediction_zero = p0,
+    n_samples = 10,
+    n_batches = 2,
+    seed = 1,
+    vaeac.epochs = 4,
+    vaeac.n_vaeacs_initialize = 2,
+    vaeac.extra_parameters = list(
+      vaeac.epochs_initiation_phase = 2
+    )
+  )
+
+  explanation_vaeac_2 <- explain(
+    model = model_lm_mixed,
+    x_explain = x_explain_mixed,
+    x_train = x_train_mixed,
+    approach = "vaeac",
+    prediction_zero = p0,
+    n_samples = 10,
+    n_batches = 2,
+    seed = 1,
+    vaeac.epochs = 4,
+    vaeac.n_vaeacs_initialize = 2,
+    vaeac.extra_parameters = list(
+      vaeac.epochs_initiation_phase = 2
+    )
+  )
+
+  # Check for equal Shapley values
+  expect_equal(explanation_vaeac_1$shapley_values, explanation_vaeac_2$shapley_values)
+})
+
+test_that("vaeac_pretreained_vaeac_model", {
+  # Test that we can skip training a new vaeac model if we already
+  # have trained it in a previous shapr::explain object.
+
+  explanation_vaeac_1 <- explain(
+    model = model_lm_mixed,
+    x_explain = x_explain_mixed,
+    x_train = x_train_mixed,
+    approach = "vaeac",
+    prediction_zero = p0,
+    n_samples = 10,
+    n_batches = 2,
+    seed = 1,
+    vaeac.epochs = 4,
+    vaeac.n_vaeacs_initialize = 2,
+    vaeac.extra_parameters = list(
+      vaeac.epochs_initiation_phase = 2
+    )
+  )
+
+  #### We can do this by reusing the vaeac model OBJECT
+  # Get the pretrained vaeac model object
+  vaeac.pretrained_vaeac_model <- explanation_vaeac_1$internal$parameters$vaeac
+
+  # send the pre-trained vaeac model to the explain function
+  explanation_pretrained_vaeac <- explain(
+    model = model_lm_mixed,
+    x_explain = x_explain_mixed,
+    x_train = x_train_mixed,
+    approach = "vaeac",
+    prediction_zero = p0,
+    n_samples = 10,
+    n_batches = 2,
+    seed = 1,
+    vaeac.extra_parameters = list(
+      vaeac.pretrained_vaeac_model = vaeac.pretrained_vaeac_model
+    )
+  )
+
+  # Check for equal Shapley values
+  expect_equal(explanation_vaeac_1$shapley_values, explanation_pretrained_vaeac$shapley_values)
+
+  #### We can also do this by reusing the vaeac model PATH
+  # Get the pre-trained vaeac model path
+  vaeac.pretrained_vaeac_path <- explanation_vaeac_1$internal$parameters$vaeac$models$best
+
+  # send the pre-trained vaeac model to the explain function
+  explanation_pretrained_vaeac <- explain(
+    model = model_lm_mixed,
+    x_explain = x_explain_mixed,
+    x_train = x_train_mixed,
+    approach = "vaeac",
+    prediction_zero = p0,
+    n_samples = 10,
+    n_batches = 2,
+    seed = 1,
+    vaeac.extra_parameters = list(
+      vaeac.pretrained_vaeac_model = vaeac.pretrained_vaeac_path
+    )
+  )
+
+  # Check for equal Shapley values
+  expect_equal(explanation_vaeac_1$shapley_values, explanation_pretrained_vaeac$shapley_values)
+})
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
index 097b24163..98ec82408 100644
--- a/vignettes/.gitignore
+++ b/vignettes/.gitignore
@@ -1,2 +1,3 @@
 *.html
 *.R
+cache/*
diff --git a/vignettes/cache_main/__packages b/vignettes/cache_main/__packages
new file mode 100644
index 000000000..b6820f803
--- /dev/null
+++ b/vignettes/cache_main/__packages
@@ -0,0 +1,14 @@
+shapr
+data.table
+mvtnorm
+condMVNorm
+nlme
+mgcv
+ggplot2
+future
+future.apply
+testthat
+xgboost
+progressr
+ranger
+gbm
diff --git a/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.RData b/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.RData
new file mode 100644
index 000000000..4691d01e8
Binary files /dev/null and b/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.RData differ
diff --git a/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.rdb b/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.rdb
new file mode 100644
index 000000000..1d6053bee
Binary files /dev/null and b/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.rdb differ
diff --git a/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.rdx b/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.rdx
new file mode 100644
index 000000000..fc03da995
Binary files /dev/null and b/vignettes/cache_main/vaeac-early-stop_250bb769b1d68f465dc693964cc773ca.rdx differ
diff --git a/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.RData b/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.RData
new file mode 100644
index 000000000..49d899d68
Binary files /dev/null and b/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.RData differ
diff --git a/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.rdb b/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.rdb
new file mode 100644
index 000000000..e69de29bb
diff --git a/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.rdx b/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.rdx
new file mode 100644
index 000000000..5940e3599
Binary files /dev/null and b/vignettes/cache_main/vaeac-plot-1_6a0b3339c76089c5cb60e3cb488922e4.rdx differ
diff --git a/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.RData b/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.RData
new file mode 100644
index 000000000..756a5f25f
Binary files /dev/null and b/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.RData differ
diff --git a/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.rdb b/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.rdb
new file mode 100644
index 000000000..e69de29bb
diff --git a/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.rdx b/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.rdx
new file mode 100644
index 000000000..5940e3599
Binary files /dev/null and b/vignettes/cache_main/vaeac-plot-2_a4a0b4b51a5f30adfa7798e41301600b.rdx differ
diff --git a/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.RData b/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.RData
new file mode 100644
index 000000000..9507eb8b6
Binary files /dev/null and b/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.RData differ
diff --git a/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.rdb b/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.rdb
new file mode 100644
index 000000000..e69de29bb
diff --git a/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.rdx b/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.rdx
new file mode 100644
index 000000000..5940e3599
Binary files /dev/null and b/vignettes/cache_main/vaeac-plot-3_e4241442e934deb23f8bc117389d3e93.rdx differ
diff --git a/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.RData b/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.RData
new file mode 100644
index 000000000..b313b4c47
Binary files /dev/null and b/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.RData differ
diff --git a/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.rdb b/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.rdb
new file mode 100644
index 000000000..5f86771f0
Binary files /dev/null and b/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.rdb differ
diff --git a/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.rdx b/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.rdx
new file mode 100644
index 000000000..97b787485
Binary files /dev/null and b/vignettes/cache_main/vaeac-train-first-time_aaf6fed245054f4af50b60646b3800be.rdx differ
diff --git a/vignettes/cache_vaeac/__packages b/vignettes/cache_vaeac/__packages
new file mode 100644
index 000000000..b6820f803
--- /dev/null
+++ b/vignettes/cache_vaeac/__packages
@@ -0,0 +1,14 @@
+shapr
+data.table
+mvtnorm
+condMVNorm
+nlme
+mgcv
+ggplot2
+future
+future.apply
+testthat
+xgboost
+progressr
+ranger
+gbm
diff --git a/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.RData b/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.RData
new file mode 100644
index 000000000..044f62b41
Binary files /dev/null and b/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.RData differ
diff --git a/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.rdb b/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.rdb
new file mode 100644
index 000000000..b1e1c505a
Binary files /dev/null and b/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.rdb differ
diff --git a/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.rdx b/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.rdx
new file mode 100644
index 000000000..f4f5627f1
Binary files /dev/null and b/vignettes/cache_vaeac/check-n_combinations-and-more-batches-2_a562c1ee62fe137bd2a7c7cbd6a1d46e.rdx differ
diff --git a/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.RData b/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.RData
new file mode 100644
index 000000000..8cd96aa30
Binary files /dev/null and b/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.RData differ
diff --git a/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.rdb b/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.rdb
new file mode 100644
index 000000000..5ab41056a
Binary files /dev/null and b/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.rdb differ
diff --git a/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.rdx b/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.rdx
new file mode 100644
index 000000000..299cf82e1
Binary files /dev/null and b/vignettes/cache_vaeac/check-n_combinations-and-more-batches_ac659ed1fa037c153639e26a028fc01f.rdx differ
diff --git a/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.RData b/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.RData
new file mode 100644
index 000000000..81e281489
Binary files /dev/null and b/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.RData differ
diff --git a/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.rdb b/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.rdb
new file mode 100644
index 000000000..59b514270
Binary files /dev/null and b/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.rdb differ
diff --git a/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.rdx b/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.rdx
new file mode 100644
index 000000000..ba143faeb
Binary files /dev/null and b/vignettes/cache_vaeac/continue-training_2864f63b172c16fa9a4fd41a2432802d.rdx differ
diff --git a/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.RData b/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.RData
new file mode 100644
index 000000000..837736884
Binary files /dev/null and b/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.RData differ
diff --git a/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.rdb b/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.rdb
new file mode 100644
index 000000000..662705fdc
Binary files /dev/null and b/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.rdb differ
diff --git a/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.rdx b/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.rdx
new file mode 100644
index 000000000..7cd6c6bdd
Binary files /dev/null and b/vignettes/cache_vaeac/ctree-mixed-data_e1d63dcc24d7738078fd1b26fd58af4e.rdx differ
diff --git a/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.RData b/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.RData
new file mode 100644
index 000000000..3504b0a1f
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.RData differ
diff --git a/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.rdb b/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.rdb
new file mode 100644
index 000000000..dabeb5036
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.rdb differ
diff --git a/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.rdx b/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.rdx
new file mode 100644
index 000000000..a0f61687c
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-1_74ac40d2cdc680f0a1a6b07d82a6686f.rdx differ
diff --git a/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.RData b/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.RData
new file mode 100644
index 000000000..a078bfbfc
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.RData differ
diff --git a/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.rdb b/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.rdb
new file mode 100644
index 000000000..c99c45fb0
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.rdb differ
diff --git a/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.rdx b/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.rdx
new file mode 100644
index 000000000..678954f44
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-2_fc519d0da7e30e6e42052802434923a8.rdx differ
diff --git a/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.RData b/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.RData
new file mode 100644
index 000000000..e14a17e7a
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.RData differ
diff --git a/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.rdb b/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.rdb
new file mode 100644
index 000000000..7c908f43b
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.rdb differ
diff --git a/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.rdx b/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.rdx
new file mode 100644
index 000000000..de028c9bb
Binary files /dev/null and b/vignettes/cache_vaeac/early-stopping-3_d482cf8ddc63f4f8c9a67070f71c26c0.rdx differ
diff --git a/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.RData b/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.RData
new file mode 100644
index 000000000..8f59b2735
Binary files /dev/null and b/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.RData differ
diff --git a/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.rdb b/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.rdb
new file mode 100644
index 000000000..e69de29bb
diff --git a/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.rdx b/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.rdx
new file mode 100644
index 000000000..5940e3599
Binary files /dev/null and b/vignettes/cache_vaeac/first-vaeac-plots_ae51e52bd1fd298cf3582a6757f8e42a.rdx differ
diff --git a/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.RData b/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.RData
new file mode 100644
index 000000000..6de0e4da0
Binary files /dev/null and b/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.RData differ
diff --git a/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.rdb b/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.rdb
new file mode 100644
index 000000000..f1c35a608
Binary files /dev/null and b/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.rdb differ
diff --git a/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.rdx b/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.rdx
new file mode 100644
index 000000000..607cf7d5b
Binary files /dev/null and b/vignettes/cache_vaeac/first-vaeac_b95d26569654d0a37b24c533cefdc17a.rdx differ
diff --git a/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.RData b/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.RData
new file mode 100644
index 000000000..29d7cb2b6
Binary files /dev/null and b/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.RData differ
diff --git a/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.rdb b/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.rdb
new file mode 100644
index 000000000..7e3b62e4f
Binary files /dev/null and b/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.rdb differ
diff --git a/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.rdx b/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.rdx
new file mode 100644
index 000000000..187307144
Binary files /dev/null and b/vignettes/cache_vaeac/paired-sampling-plotting_a0f8abd69dc2644c1d588681cb6cdd7a.rdx differ
diff --git a/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.RData b/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.RData
new file mode 100644
index 000000000..3e2b463ed
Binary files /dev/null and b/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.RData differ
diff --git a/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.rdb b/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.rdb
new file mode 100644
index 000000000..468ff3a29
Binary files /dev/null and b/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.rdb differ
diff --git a/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.rdx b/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.rdx
new file mode 100644
index 000000000..ab0decabc
Binary files /dev/null and b/vignettes/cache_vaeac/paired-sampling-training_19f4a4f5ad368f895280dddb877e10e7.rdx differ
diff --git a/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.RData b/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.RData
new file mode 100644
index 000000000..64843ebc6
Binary files /dev/null and b/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.RData differ
diff --git a/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.rdb b/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.rdb
new file mode 100644
index 000000000..f5fb7b584
Binary files /dev/null and b/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.rdb differ
diff --git a/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.rdx b/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.rdx
new file mode 100644
index 000000000..4c142b642
Binary files /dev/null and b/vignettes/cache_vaeac/pretrained-vaeac-model_ff0a4aad115676779de63f71528ecee4.rdx differ
diff --git a/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.RData b/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.RData
new file mode 100644
index 000000000..505e5106d
Binary files /dev/null and b/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.RData differ
diff --git a/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.rdb b/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.rdb
new file mode 100644
index 000000000..ff1601671
Binary files /dev/null and b/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.rdb differ
diff --git a/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.rdx b/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.rdx
new file mode 100644
index 000000000..6a4a5383b
Binary files /dev/null and b/vignettes/cache_vaeac/pretrained-vaeac-path_a4733824359b47c812165a83a915cc35.rdx differ
diff --git a/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.RData b/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.RData
new file mode 100644
index 000000000..d1f0b499b
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.RData differ
diff --git a/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.rdb b/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.rdb
new file mode 100644
index 000000000..8d3e24c9e
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.rdb differ
diff --git a/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.rdx b/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.rdx
new file mode 100644
index 000000000..141f907a8
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-false-verbose-0_769210b7c13a6b26a23e5c70cd39ecc5.rdx differ
diff --git a/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.RData b/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.RData
new file mode 100644
index 000000000..ef039f73c
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.RData differ
diff --git a/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.rdb b/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.rdb
new file mode 100644
index 000000000..60e905f71
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.rdb differ
diff --git a/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.rdx b/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.rdx
new file mode 100644
index 000000000..b903b4bee
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-false-verbose-2_73fe9a90acb7b7a4cd08de26dd26d7f1.rdx differ
diff --git a/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.RData b/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.RData
new file mode 100644
index 000000000..8cb2044fc
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.RData differ
diff --git a/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.rdb b/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.rdb
new file mode 100644
index 000000000..0860e193b
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.rdb differ
diff --git a/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.rdx b/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.rdx
new file mode 100644
index 000000000..15a4246f9
Binary files /dev/null and b/vignettes/cache_vaeac/progressr-true-verbose-2_76680bb309fbc9e166f903e4de509202.rdx differ
diff --git a/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.RData b/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.RData
new file mode 100644
index 000000000..6bfa7f468
Binary files /dev/null and b/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.RData differ
diff --git a/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.rdb b/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.rdb
new file mode 100644
index 000000000..beed89265
Binary files /dev/null and b/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.rdb differ
diff --git a/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.rdx b/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.rdx
new file mode 100644
index 000000000..229a0b910
Binary files /dev/null and b/vignettes/cache_vaeac/setup-2_d395cb1e9c51641367ed7995823c20d5.rdx differ
diff --git a/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.RData b/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.RData
new file mode 100644
index 000000000..4d8e94819
Binary files /dev/null and b/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.RData differ
diff --git a/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.rdb b/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.rdb
new file mode 100644
index 000000000..a76bba88d
Binary files /dev/null and b/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.rdb differ
diff --git a/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.rdx b/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.rdx
new file mode 100644
index 000000000..b7e87a5f1
Binary files /dev/null and b/vignettes/cache_vaeac/vaeac-grouping-of-features_61e26018b504b1830e8ec92a54365a70.rdx differ
diff --git a/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.RData b/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.RData
new file mode 100644
index 000000000..329590114
Binary files /dev/null and b/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.RData differ
diff --git a/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.rdb b/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.rdb
new file mode 100644
index 000000000..f6f301fe8
Binary files /dev/null and b/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.rdb differ
diff --git a/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.rdx b/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.rdx
new file mode 100644
index 000000000..011388d66
Binary files /dev/null and b/vignettes/cache_vaeac/vaeac-mixed-data_886cad1d7ab2e55cd6e2c3e29e546715.rdx differ
diff --git a/vignettes/figure_main/unnamed-chunk-12-1.png b/vignettes/figure_main/unnamed-chunk-12-1.png
new file mode 100644
index 000000000..db086a46d
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-12-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-12-2.png b/vignettes/figure_main/unnamed-chunk-12-2.png
new file mode 100644
index 000000000..9fae709fa
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-12-2.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-12-3.png b/vignettes/figure_main/unnamed-chunk-12-3.png
new file mode 100644
index 000000000..e03fe9105
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-12-3.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-13-1.png b/vignettes/figure_main/unnamed-chunk-13-1.png
new file mode 100644
index 000000000..e08370b7f
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-13-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-13-2.png b/vignettes/figure_main/unnamed-chunk-13-2.png
new file mode 100644
index 000000000..7d86a62a8
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-13-2.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-14-1.png b/vignettes/figure_main/unnamed-chunk-14-1.png
new file mode 100644
index 000000000..c3e047ece
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-14-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-2-1.png b/vignettes/figure_main/unnamed-chunk-2-1.png
new file mode 100644
index 000000000..ac95b5818
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-2-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-20-1.png b/vignettes/figure_main/unnamed-chunk-20-1.png
new file mode 100644
index 000000000..f915a7961
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-20-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-22-1.png b/vignettes/figure_main/unnamed-chunk-22-1.png
new file mode 100644
index 000000000..dd32ab9fe
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-22-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-23-1.png b/vignettes/figure_main/unnamed-chunk-23-1.png
new file mode 100644
index 000000000..4f01679fd
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-23-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-23-2.png b/vignettes/figure_main/unnamed-chunk-23-2.png
new file mode 100644
index 000000000..4f01679fd
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-23-2.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-3-1.png b/vignettes/figure_main/unnamed-chunk-3-1.png
new file mode 100644
index 000000000..90868c1fb
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-3-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-4-1.png b/vignettes/figure_main/unnamed-chunk-4-1.png
new file mode 100644
index 000000000..469d16797
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-4-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-5-1.png b/vignettes/figure_main/unnamed-chunk-5-1.png
new file mode 100644
index 000000000..0290ecd84
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-5-1.png differ
diff --git a/vignettes/figure_main/unnamed-chunk-6-1.png b/vignettes/figure_main/unnamed-chunk-6-1.png
new file mode 100644
index 000000000..271c82ed9
Binary files /dev/null and b/vignettes/figure_main/unnamed-chunk-6-1.png differ
diff --git a/vignettes/figure_main/vaeac-plot-1-1.png b/vignettes/figure_main/vaeac-plot-1-1.png
new file mode 100644
index 000000000..ed5a55db6
Binary files /dev/null and b/vignettes/figure_main/vaeac-plot-1-1.png differ
diff --git a/vignettes/figure_main/vaeac-plot-2-1.png b/vignettes/figure_main/vaeac-plot-2-1.png
new file mode 100644
index 000000000..16800591f
Binary files /dev/null and b/vignettes/figure_main/vaeac-plot-2-1.png differ
diff --git a/vignettes/figure_main/vaeac-plot-3-1.png b/vignettes/figure_main/vaeac-plot-3-1.png
new file mode 100644
index 000000000..2d456bca8
Binary files /dev/null and b/vignettes/figure_main/vaeac-plot-3-1.png differ
diff --git a/vignettes/figure_vaeac/check-n_combinations-and-more-batches-1.png b/vignettes/figure_vaeac/check-n_combinations-and-more-batches-1.png
new file mode 100644
index 000000000..145c8f245
Binary files /dev/null and b/vignettes/figure_vaeac/check-n_combinations-and-more-batches-1.png differ
diff --git a/vignettes/figure_vaeac/continue-training-1.png b/vignettes/figure_vaeac/continue-training-1.png
new file mode 100644
index 000000000..443dcc38c
Binary files /dev/null and b/vignettes/figure_vaeac/continue-training-1.png differ
diff --git a/vignettes/figure_vaeac/continue-training-2.png b/vignettes/figure_vaeac/continue-training-2.png
new file mode 100644
index 000000000..24653720c
Binary files /dev/null and b/vignettes/figure_vaeac/continue-training-2.png differ
diff --git a/vignettes/figure_vaeac/continue-training-3.png b/vignettes/figure_vaeac/continue-training-3.png
new file mode 100644
index 000000000..ed7c68be3
Binary files /dev/null and b/vignettes/figure_vaeac/continue-training-3.png differ
diff --git a/vignettes/figure_vaeac/continue-training-4.png b/vignettes/figure_vaeac/continue-training-4.png
new file mode 100644
index 000000000..1e6be8da9
Binary files /dev/null and b/vignettes/figure_vaeac/continue-training-4.png differ
diff --git a/vignettes/figure_vaeac/continue-training-5.png b/vignettes/figure_vaeac/continue-training-5.png
new file mode 100644
index 000000000..b880e0e77
Binary files /dev/null and b/vignettes/figure_vaeac/continue-training-5.png differ
diff --git a/vignettes/figure_vaeac/continue-training-6.png b/vignettes/figure_vaeac/continue-training-6.png
new file mode 100644
index 000000000..7a11d30a5
Binary files /dev/null and b/vignettes/figure_vaeac/continue-training-6.png differ
diff --git a/vignettes/figure_vaeac/continue-training-7.png b/vignettes/figure_vaeac/continue-training-7.png
new file mode 100644
index 000000000..5f399e592
Binary files /dev/null and b/vignettes/figure_vaeac/continue-training-7.png differ
diff --git a/vignettes/figure_vaeac/early-stopping-1-1.png b/vignettes/figure_vaeac/early-stopping-1-1.png
new file mode 100644
index 000000000..c81b7bf88
Binary files /dev/null and b/vignettes/figure_vaeac/early-stopping-1-1.png differ
diff --git a/vignettes/figure_vaeac/early-stopping-2-1.png b/vignettes/figure_vaeac/early-stopping-2-1.png
new file mode 100644
index 000000000..e2c268815
Binary files /dev/null and b/vignettes/figure_vaeac/early-stopping-2-1.png differ
diff --git a/vignettes/figure_vaeac/early-stopping-3-1.png b/vignettes/figure_vaeac/early-stopping-3-1.png
new file mode 100644
index 000000000..6f39e740d
Binary files /dev/null and b/vignettes/figure_vaeac/early-stopping-3-1.png differ
diff --git a/vignettes/figure_vaeac/early-stopping-3-2.png b/vignettes/figure_vaeac/early-stopping-3-2.png
new file mode 100644
index 000000000..61168a938
Binary files /dev/null and b/vignettes/figure_vaeac/early-stopping-3-2.png differ
diff --git a/vignettes/figure_vaeac/first-vaeac-plots-1.png b/vignettes/figure_vaeac/first-vaeac-plots-1.png
new file mode 100644
index 000000000..871d72873
Binary files /dev/null and b/vignettes/figure_vaeac/first-vaeac-plots-1.png differ
diff --git a/vignettes/figure_vaeac/paired-sampling-plotting-1.png b/vignettes/figure_vaeac/paired-sampling-plotting-1.png
new file mode 100644
index 000000000..6ab4794f1
Binary files /dev/null and b/vignettes/figure_vaeac/paired-sampling-plotting-1.png differ
diff --git a/vignettes/figure_vaeac/paired-sampling-plotting-2.png b/vignettes/figure_vaeac/paired-sampling-plotting-2.png
new file mode 100644
index 000000000..792c3a742
Binary files /dev/null and b/vignettes/figure_vaeac/paired-sampling-plotting-2.png differ
diff --git a/vignettes/figure_vaeac/vaeac-grouping-of-features-1.png b/vignettes/figure_vaeac/vaeac-grouping-of-features-1.png
new file mode 100644
index 000000000..f9ca0bf7e
Binary files /dev/null and b/vignettes/figure_vaeac/vaeac-grouping-of-features-1.png differ
diff --git a/vignettes/figure_vaeac/vaeac-mixed-data-1.png b/vignettes/figure_vaeac/vaeac-mixed-data-1.png
new file mode 100644
index 000000000..eb3866df0
Binary files /dev/null and b/vignettes/figure_vaeac/vaeac-mixed-data-1.png differ
diff --git a/vignettes/figure_vaeac/vaeac-mixed-data-2.png b/vignettes/figure_vaeac/vaeac-mixed-data-2.png
new file mode 100644
index 000000000..d020c12d4
Binary files /dev/null and b/vignettes/figure_vaeac/vaeac-mixed-data-2.png differ
diff --git a/vignettes/figure_vaeac/vaeac-mixed-data-3.png b/vignettes/figure_vaeac/vaeac-mixed-data-3.png
new file mode 100644
index 000000000..085b3815d
Binary files /dev/null and b/vignettes/figure_vaeac/vaeac-mixed-data-3.png differ
diff --git a/vignettes/understanding_shapr.Rmd b/vignettes/understanding_shapr.Rmd
index c975a67f7..594967913 100644
--- a/vignettes/understanding_shapr.Rmd
+++ b/vignettes/understanding_shapr.Rmd
@@ -7,25 +7,14 @@ vignette: >
   %\VignetteEncoding{UTF-8}
   %\VignetteIndexEntry{`shapr`: Explaining individual machine learning predictions with Shapley values}
   %\VignetteEngine{knitr::rmarkdown}
-editor_options: 
-  markdown: 
+editor_options:
+  markdown:
     wrap: 72
 ---
 
-```{r, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>",
-  fig.width = 7,
-  fig.height = 3,
-  warning = FALSE,
-  message = FALSE
-)
-```
 
-```{r setup, include=FALSE, warning=FALSE}
-library(shapr)
-```
+
+
 
 > [Introduction](#intro)
 
@@ -112,7 +101,7 @@ expectations can be written as
 ```{=tex}
 \begin{equation}
 \label{eq:CondExp}
-E[f(\boldsymbol{x})|\boldsymbol{x}_s=\boldsymbol{x}_S^*] = E[f(\boldsymbol{x}_{\bar{S}},\boldsymbol{x}_S)|\boldsymbol{x}_S=\boldsymbol{x}_S^*] = 
+E[f(\boldsymbol{x})|\boldsymbol{x}_s=\boldsymbol{x}_S^*] = E[f(\boldsymbol{x}_{\bar{S}},\boldsymbol{x}_S)|\boldsymbol{x}_S=\boldsymbol{x}_S^*] =
 \int f(\boldsymbol{x}_{\bar{S}},\boldsymbol{x}_S^*)\,p(\boldsymbol{x}_{\bar{S}}|\boldsymbol{x}_S=\boldsymbol{x}_S^*)d\boldsymbol{x}_{\bar{S}},
 \end{equation}
 ```
@@ -280,6 +269,51 @@ the conditional expectations as in Kernel SHAP. See
 The conditional inference trees are fit using the *party* and *partykit*
 packages (@partykit_package).
 
+
+
+<a id="vaeac"></a>
+
+## Variational AutoEncoder with Arbitrary Conditioning (vaeac) Approach
+Another approach that supports mixed features is the Variational AutoEncoder
+with Arbitrary Conditioning (@olsen2022using), abbreviated to `vaeac`.
+The `vaeac` is an extension of the regular variational autoencoder
+(@kingma2014autoencoding), but instead of giving a probabilistic representation
+of the distribution $p(\boldsymbol{x})$ it gives a probabilistic representation
+of the conditional distribution
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}} \mid \boldsymbol{x}_{\mathcal{S}})$,
+for all possible feature subsets $\mathcal{S}\subseteq\mathcal{M}$ simultaneously,
+where $\mathcal{M}$ is the set of all features. That is, only a single `vaeac`
+model is needed to model all conditional distributions.
+
+The `vaeac` consists of three neural networks: a *full encoder*, a *masked encoder*,
+and a *decoder*. The encoders map the full and masked/conditional input representations,
+i.e., $\boldsymbol{x}$ and $\boldsymbol{x}_{\mathcal{S}}$, respectively,
+to latent probabilistic representations. Sampled instances from this latent probabilistic
+representations are sent to the decoder, which maps them back to the feature space
+and provides a samplable probabilistic representation for the unconditioned features
+$\boldsymbol{x}_{\bar{\mathcal{S}}}$. The full encoder is only used during the
+training phase of the `vaeac` model to guide the training process of the masked encoder,
+as the former relies on the full input sample $\boldsymbol{x}$, which is not accessible
+in the deployment phase (when we generate the Monte Carlo samples), as we only have access
+to $\boldsymbol{x}_{\mathcal{S}}$. The networks are trained by minimizing a variational
+lower bound, and see Section 3 in @olsen2022using for an in-depth introduction to the
+`vaeac` methodology. We use the `vaeac` model at the epoch which obtains the lowest
+validation IWAE score to generate the Monte Carlo samples used in the Shapley value computations.
+
+We fit the `vaeac` model using the *torch* package in $\textsf{R}$ (@torch). The main
+parameters are the the number of layers in the networks (`vaeac.depth`), the width of the layers
+(`vaeac.width`), the number of dimensions in the latent space (`vaeac.latent_dim`),
+the activation function between the layers in the networks (`vaeac.activation_function`),
+the learning rate in the ADAM optimizer (`vaeac.lr`), the number of `vaeac` models to initiate
+to remedy poorly initiated model parameter values (`vaeac.n_vaeacs_initialize`), and
+the number of learning epochs (`vaeac.epochs`). Call `?shapr::setup_approach.vaeac` for
+a more detailed description of the parameters.
+
+There are additional extra parameters which can be set by including a named list in the call to
+the `explain()` function. For example, we can the change the batch size to 32 by including
+`vaeac.extra_parameters = list(vaeac.batch_size = 32)` as a parameter in the call the `explain()` function. See `?shapr::vaeac_get_extra_para_default` for a description of the possible
+extra parameters to the `vaeac` approach. We strongly encourage the user to specify the main and extra parameters to the `vaeac` approach at the correct place in the call to the `explain()` function. That is, the main parameters are directly entered to the `explain()` function, while the extra parameters are included in a named list called `vaeac.extra_parameters`. However, the `vaeac` approach will try to correct for misplaced and duplicated parameters and give warnings to the user.
+
 ## Categorical Approach
 
 When the features are all categorical, we can estimate the conditional
@@ -322,7 +356,8 @@ used instead by setting the argument `approach` to either `"gaussian"`,
 `"copula"`, `"ctree"`, `"categorical"` or `"independence"` in the code
 below.
 
-```{r, warning=FALSE}
+
+```r
 library(xgboost)
 library(data.table)
 
@@ -362,22 +397,35 @@ explanation <- explain(
 # Printing the Shapley values for the test data.
 # For more information about the interpretation of the values in the table, see ?shapr::explain.
 print(explanation$shapley_values)
+#>        none    Solar.R      Wind      Temp      Month
+#>       <num>      <num>     <num>     <num>      <num>
+#> 1: 43.08571 13.2117337  4.785645 -25.57222  -5.599230
+#> 2: 43.08571 -9.9727747  5.830694 -11.03873  -7.829954
+#> 3: 43.08571 -2.2916185 -7.053393 -10.15035  -4.452481
+#> 4: 43.08571  3.3254595 -3.240879 -10.22492  -6.663488
+#> 5: 43.08571  4.3039571 -2.627764 -14.15166 -12.266855
+#> 6: 43.08571  0.4786417 -5.248686 -12.55344  -6.645738
 
 # Plot the resulting explanations for observations 1 and 6
 plot(explanation, bar_plot_phi0 = FALSE, index_x_explain = c(1, 6))
 ```
 
+![](figure_main/unnamed-chunk-2-1.png)
+
 There are multiple plot options specified by the `plot_type` argument in
 `plot`. The `waterfall` option shows the changes in the prediction score
 due to each features contribution (their Shapley values):
 
-There are multiple plot options specified by the `plot_type` argument in `plot`. 
-The `waterfall` option shows the changes in the prediction score due to each features contribution (their Shapley values): 
+There are multiple plot options specified by the `plot_type` argument in `plot`.
+The `waterfall` option shows the changes in the prediction score due to each features contribution (their Shapley values):
 
-```{r}
+
+```r
 plot(explanation, plot_type = "waterfall", index_x_explain = c(1, 6))
 ```
 
+![](figure_main/unnamed-chunk-3-1.png)
+
 The other two plot options, `"beeswarm"` and `"scatter"`, can be useful
 when you have many observations that you want to explain. For the
 purpose of illustration, we explain the whole `airquality` dataset
@@ -387,7 +435,8 @@ values along the x-axis across all features. Each point gives the
 Shapley value of a given instance, where the points are colored by the
 feature value of that instance:
 
-```{r}
+
+```r
 x_explain_many <- data[, ..x_var]
 explanation_plot <- explain(
   model = model,
@@ -399,18 +448,24 @@ explanation_plot <- explain(
 plot(explanation_plot, plot_type = "beeswarm")
 ```
 
+![](figure_main/unnamed-chunk-4-1.png)
+
 The `plot_type = "scatter"` plots the feature values on the x-axis and
 Shapley values on the y-axis, as well as (optionally) a background
 scatter_hist showing the distribution of the feature data:
 
-```{r}
+
+```r
 plot(explanation_plot, plot_type = "scatter", scatter_hist = TRUE)
 ```
 
-We can use mixed (i.e continuous, categorical, ordinal) data with ctree.
+![](figure_main/unnamed-chunk-5-1.png)
+
+We can use mixed (i.e continuous, categorical, ordinal) data with ctree or vaeac.
 Use ctree with mixed data in the following manner:
 
-```{r}
+
+```r
 # convert the month variable to a factor
 data[, Month_factor := as.factor(Month)]
 
@@ -442,11 +497,14 @@ explanation_lm_cat <- explain(
 plot(explanation_lm_cat, bar_plot_phi0 = FALSE, index_x_explain = c(1, 6))
 ```
 
+![](figure_main/unnamed-chunk-6-1.png)
+
 We can specify parameters used to build the conditional inference trees
 in the following manner. Default values are based on
 @hothorn2006unbiased.
 
-```{r}
+
+```r
 # Use the conditional inference tree approach
 # We can specify parameters used to building trees by specifying mincriterion,
 # minsplit, minbucket
@@ -469,7 +527,8 @@ explanation_ctree <- explain(
 If **all** features are categorical, one may use the categorical
 approach as follows:
 
-```{r}
+
+```r
 # For the sake of illustration, convert ALL features to factors
 data[, Solar.R_factor := as.factor(cut(Solar.R, 10))]
 data[, Wind_factor := as.factor(cut(Wind, 3))]
@@ -510,7 +569,8 @@ achieved through the `group` attribute. Other optional parameters of
 `timeseries.bounds` (a vector indicating upper and lower bounds of the
 time series if necessary).
 
-```{r}
+
+```r
 # Simulate time series data with AR(1)-structure
 set.seed(1)
 data_ts <- data.frame(matrix(NA, ncol = 41, nrow = 4))
@@ -568,7 +628,7 @@ and rank the approaches/methods. The $\operatorname{MSE}_{v}$ is given by
 ```{=tex}
 \begin{align}
     \label{eq:MSE_v}
-    \operatorname{MSE}_{v} = \operatorname{MSE}_{v}(\text{method } \texttt{q}) 
+    \operatorname{MSE}_{v} = \operatorname{MSE}_{v}(\text{method } \texttt{q})
     =
      \frac{1}{N_\mathcal{S}} \sum_{\mathcal{S} \in \mathcal{P}^*(\mathcal{M})} \frac{1}{N_\text{explain}}
      \sum_{i=1}^{N_\text{explain}} \left( f(\boldsymbol{x}^{[i]}) - {\hat{v}}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}^{[i]})\right)^2\!,
@@ -577,21 +637,21 @@ and rank the approaches/methods. The $\operatorname{MSE}_{v}$ is given by
 where ${\hat{v}}_{\texttt{q}}$ is the estimated contribution function using method $\texttt{q}$ and $N_\mathcal{S} = |\mathcal{P}^*(\mathcal{M})| = 2^M-2$, i.e., we have removed the empty ($\mathcal{S} = \emptyset$) and the grand combinations ($\mathcal{S} = \mathcal{M}$) as they are method independent. Meaning that these two combinations do not influence the ranking of the methods as the methods are not used to compute the contribution function for them.
 
 The motivation behind the
-$\operatorname{MSE}_{v}$ criterion is that 
+$\operatorname{MSE}_{v}$ criterion is that
 $\mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (v_{\texttt{true}}(\mathcal{S},\boldsymbol{x}) - \hat{v}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}))^2$
 can be decomposed as
 ```{=tex}
 \begin{align}
     \label{eq:expectation_decomposition}
     \begin{split}
-    \mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (v_{\texttt{true}}(\mathcal{S}, \boldsymbol{x})- \hat{v}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}))^2 
+    \mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (v_{\texttt{true}}(\mathcal{S}, \boldsymbol{x})- \hat{v}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}))^2
     &=
     \mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (f(\boldsymbol{x}) - \hat{v}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}))^2 \\
     &\phantom{\,\,\,\,\,\,\,}- \mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (f(\boldsymbol{x})-v_{\texttt{true}}(\mathcal{S}, \boldsymbol{x}))^2,
     \end{split}
 \end{align}
 ```
-see Appendix A in @covert2020understanding. The first term on the right-hand side of 
+see Appendix A in @covert2020understanding. The first term on the right-hand side of
 the equation above can be estimated by $\operatorname{MSE}_{v}$, while the second
 term is a fixed (unknown) constant not influenced by the approach \texttt{q}. Thus, a low value
 of $\operatorname{MSE}_{v}$ indicates that the estimated contribution function $\hat{v}_{\texttt{q}}$
@@ -599,7 +659,7 @@ is closer to the true counterpart $v_{\texttt{true}}$ than a high value.
 
 In `shapr`, we allow for weighting the combinations in the $\operatorname{MSE}_{v}$ evaluation criterion either
 uniformly or by using the corresponding Shapley kernel weights (or the sampling frequencies when sampling of
-combinations is used). 
+combinations is used).
 This is determined by the logical parameter `MSEv_uniform_comb_weights` in the `explain()` function, and the
 default is to do uniform weighting, that is, `MSEv_uniform_comb_weights = TRUE`.
 
@@ -613,8 +673,8 @@ First, we can only use the $\operatorname{MSE}_{v}$ criterion to rank the method
 their closeness to the optimum since the minimum value of the $\operatorname{MSE}_{v}$ criterion
 is unknown. Second, the criterion evaluates the contribution functions and not the Shapley values.
 
-Note that @olsen2023comparative observed a relatively linear relationship between the 
-$\operatorname{MSE}_{v}$ criterion and the mean absolute error $(\operatorname{MAE})$ between the 
+Note that @olsen2023comparative observed a relatively linear relationship between the
+$\operatorname{MSE}_{v}$ criterion and the mean absolute error $(\operatorname{MAE})$ between the
 true and estimated Shapley values in extensive simulation studies where the true Shapley values
 were known. That is, a method that achieves a low $\operatorname{MSE}_{v}$ score also tends to
 obtain a low $\operatorname{MAE}$ score, and vice versa.
@@ -623,7 +683,7 @@ obtain a low $\operatorname{MAE}$ score, and vice versa.
 The $\operatorname{MSE}_{v}$ criterion can be written as
 $\operatorname{MSE}_{v} = \frac{1}{N_\text{explain}}\sum_{i=1}^{N_\text{explain}} \operatorname{MSE}_{v,\text{explain }i}$.
 We can therefore use the central limit theorem to compute an approximate
-confidence interval for the $\operatorname{MSE}_{v}$ criterion. We have that 
+confidence interval for the $\operatorname{MSE}_{v}$ criterion. We have that
 $\operatorname{MSE}_{v} \pm t_{\alpha/2}\frac{\operatorname{SD}(\operatorname{MSE}_{v})}{\sqrt{N_\text{explain}}}$
 is a $(1-\alpha/2)\%$ approximate confidence interval for the evaluation criterion,
 where $t_{\alpha/2}$ is the $\alpha/2$ percentile of the $T_{N_\text{explain}-1}$ distribution.
@@ -640,7 +700,8 @@ each observation, as each combination is a different prediction tasks.
 ### MSEv examples
 
 Start by explaining the predictions by using different methods and combining them into lists.
-```{r}
+
+```r
 # We use more explicands here for more stable confidence intervals
 ind_x_explain <- 1:25
 x_train <- data[-ind_x_explain, ..x_var]
@@ -731,7 +792,8 @@ explanation_list_named <- list(
 
 We can then compare the different approaches by creating plots of the $\operatorname{MSE}_{v}$ evaluation criterion.
 
-```{r}
+
+```r
 # Create the MSEv plots with approximate 95% confidence intervals
 MSEv_plots <- plot_MSEv_eval_crit(explanation_list_named,
   plot_type = c("overall", "comb", "explicand"),
@@ -740,35 +802,75 @@ MSEv_plots <- plot_MSEv_eval_crit(explanation_list_named,
 
 # 5 plots are made
 names(MSEv_plots)
+#> [1] "MSEv_explicand_bar"          "MSEv_explicand_line_point"   "MSEv_combination_bar"        "MSEv_combination_line_point"
+#> [5] "MSEv_bar"
 ```
 The main plot if interest is the `MSEv_bar`, which displays the $\operatorname{MSE}_{v}$ evaluation criterion for each method averaged over both the combinations/coalitions and test observations/explicands. However, we can also look at the other plots where
 we have only averaged over the observations or the combinations (both as bar and line plots).
 
-```{r}
+
+```r
 # The main plot of the overall MSEv averaged over both the combinations and observations
 MSEv_plots$MSEv_bar
+```
+
+![](figure_main/unnamed-chunk-12-1.png)
+
+```r
 
 # The MSEv averaged over only the explicands for each combinations
 MSEv_plots$MSEv_combination_bar
+```
+
+![](figure_main/unnamed-chunk-12-2.png)
+
+```r
 
 # The MSEv averaged over only the combinations for each observation/explicand
 MSEv_plots$MSEv_explicand_bar
+```
+
+![](figure_main/unnamed-chunk-12-3.png)
+
+```r
 
 # To see which coalition S each of the `id_combination` corresponds to,
 # i.e., which features that are conditions on.
 explanation_list_named[[1]]$MSEv$MSEv_combination[, c("id_combination", "features")]
+#>     id_combination features
+#>              <int>   <list>
+#>  1:              2        1
+#>  2:              3        2
+#>  3:              4        3
+#>  4:              5        4
+#>  5:              6      1,2
+#>  6:              7      1,3
+#>  7:              8      1,4
+#>  8:              9      2,3
+#>  9:             10      2,4
+#> 10:             11      3,4
+#> 11:             12    1,2,3
+#> 12:             13    1,2,4
+#> 13:             14    1,3,4
+#> 14:             15    2,3,4
 ```
 
-We can specify the `index_x_explain` and `id_combination` parameters in `plot_MSEv_eval_crit()` to only plot 
+We can specify the `index_x_explain` and `id_combination` parameters in `plot_MSEv_eval_crit()` to only plot
 certain test observations and combinations, respectively.
 
-```{r}
+
+```r
 # We can specify which test observations or combinations to plot
 plot_MSEv_eval_crit(explanation_list_named,
   plot_type = "explicand",
   index_x_explain = c(1, 3:4, 6),
   CI_level = 0.95
 )$MSEv_explicand_bar
+```
+
+![](figure_main/unnamed-chunk-13-1.png)
+
+```r
 plot_MSEv_eval_crit(explanation_list_named,
   plot_type = "comb",
   id_combination = c(3, 4, 9, 13:15),
@@ -776,15 +878,16 @@ plot_MSEv_eval_crit(explanation_list_named,
 )$MSEv_combination_bar
 ```
 
+![](figure_main/unnamed-chunk-13-2.png)
+
 
 We can also alter the plots design-wise as we do in the code below.
 
-```{r}
+
+```r
 bar_text_n_decimals <- 1
-CI_level <- 0.95
-MSEv_plot <- plot_MSEv_eval_crit(explanation_list_named, CI_level = CI_level)$MSEv_bar
-MSEv_plot +
-  ggplot2::scale_x_discrete(limits = rev(levels(MSEv_plot$data$Method))) +
+plot_MSEv_eval_crit(explanation_list_named) +
+  ggplot2::scale_x_discrete(limits = rev(levels(MSEv_plots$MSEv_bar$data$Method))) +
   ggplot2::coord_flip() +
   ggplot2::scale_fill_brewer(palette = "Paired") +
   ggplot2::theme_minimal() + # This must be set before other theme calls
@@ -805,6 +908,9 @@ MSEv_plot +
   )
 ```
 
+![](figure_main/unnamed-chunk-14-1.png)
+
+
 ## Main arguments in `explain`
 
 When using `explain`, the default behavior is to use all feature
@@ -887,7 +993,8 @@ may be of more interesting to explain the impact of each variable,
 rather than each lag of each variable. This can be done by setting
 `group_lags = TRUE`.
 
-```{r}
+
+```r
 # Simulate time series data with AR(1)-structure.
 set.seed(1)
 data_ts <- data.frame(Y = arima.sim(list(order = c(1, 0, 0), ar = .5), n = 500))
@@ -914,6 +1021,14 @@ explanation_forecast <- explain_forecast(
   group_lags = FALSE
 )
 explanation_forecast
+#>    explain_idx horizon    none     Y.1      Y.2
+#>          <int>   <int>   <num>   <num>    <num>
+#> 1:         499       1 0.04018  0.5053 -0.07659
+#> 2:         500       1 0.04018 -0.3622  0.02497
+#> 3:         499       2 0.04018  0.5053 -0.07659
+#> 4:         500       2 0.04018 -0.3622  0.02497
+#> 5:         499       3 0.04018  0.5053 -0.07659
+#> 6:         500       3 0.04018 -0.3622  0.02497
 ```
 
 Note that for a multivariate model such as a VAR (Vector AutoRegressive
@@ -936,12 +1051,18 @@ model is then used to make a forecast of the temperature of the day that
 comes after the last day in the data, this forecast starts from index
 153.
 
-```{r}
+
+```r
 data <- data.table::as.data.table(airquality)
 
 model_ar_temp <- ar(data$Temp, order = 2)
 
 predict(model_ar_temp, n.ahead = 2)$pred
+#> Time Series:
+#> Start = 154 
+#> End = 155 
+#> Frequency = 1 
+#> [1] 71.08111 71.52445
 ```
 
 First, we pass the model and the data as `model` and `y`. Since we have
@@ -968,7 +1089,8 @@ explained separately. Grouping lags may be more interesting to do in a
 model with multiple variables, as it is then possible to explain each
 variable separately.
 
-```{r}
+
+```r
 explanation <- explain_forecast(
   model = model_ar_temp,
   y = data[, "Temp"],
@@ -984,6 +1106,10 @@ explanation <- explain_forecast(
 )
 
 print(explanation)
+#>    explain_idx horizon  none Temp.1  Temp.2
+#>          <num>   <num> <num>  <num>   <num>
+#> 1:         153       1 77.88 -6.622 -0.1788
+#> 2:         153       2 77.88 -6.025 -0.3327
 ```
 
 The results are presented per value of `explain_idx` and forecast
@@ -1000,7 +1126,8 @@ regressor must be available for the predicted time points, the model is
 just fit on the 151 first observations, leaving two observations of
 `Wind` to be used as exogenous values during the prediction phase.
 
-```{r}
+
+```r
 data <- data.table::as.data.table(airquality)
 
 data_fit <- data[seq_len(151), ]
@@ -1010,6 +1137,11 @@ model_arimax_temp <- arima(data_fit$Temp, order = c(2, 0, 0), xreg = data_fit$Wi
 newxreg <- data[-seq_len(151), "Wind", drop = FALSE]
 
 predict(model_arimax_temp, n.ahead = 2, newxreg = newxreg)$pred
+#> Time Series:
+#> Start = 152 
+#> End = 153 
+#> Frequency = 1 
+#> [1] 77.49992 76.38062
 ```
 
 The `shapr` package can then explain not only the two autoregressive
@@ -1021,7 +1153,8 @@ makes it possible for `shapr` to not only explain the effect of the
 first lag of the exogenous variable, but also the contemporary effect
 during the forecasting period.
 
-```{r}
+
+```r
 explanation <- explain_forecast(
   model = model_ar_temp,
   y = data_fit[, "Temp"],
@@ -1039,6 +1172,10 @@ explanation <- explain_forecast(
 )
 
 print(explanation$shapley_values)
+#>    explain_idx horizon     none     Temp.1     Temp.2    Wind.1   Wind.F1    Wind.F2
+#>          <num>   <num>    <num>      <num>      <num>     <num>     <num>      <num>
+#> 1:         151       1 77.96026 -0.6779272 -0.6734041 -1.268789 0.4934084         NA
+#> 2:         151       2 77.96026  0.3996832 -0.5005937 -1.465464 0.0659129 -0.4742238
 ```
 
 <a id="advanced"></a>
@@ -1076,9 +1213,10 @@ by the complete prediction itself, and should thus not be part of the vector.
 
 The code below exemplifies this approach for a case where there are four
 features, using `"empirical", "copula"` and `"gaussian"` when
-conditioning on respectively 1, 2 and 3 features. 
+conditioning on respectively 1, 2 and 3 features.
 
-```{r}
+
+```r
 # Use the combined approach
 explanation_combined <- explain(
   model = model,
@@ -1092,10 +1230,13 @@ explanation_combined <- explain(
 plot(explanation_combined, bar_plot_phi0 = FALSE, index_x_explain = c(1, 6))
 ```
 
-As a second example using `"ctree"` to conditin on 1 and 2 features, and 
+![](figure_main/unnamed-chunk-20-1.png)
+
+As a second example using `"ctree"` to conditin on 1 and 2 features, and
 `"empirical"` when conditioning on 3 features:
 
-```{r}
+
+```r
 # Use the combined approach
 explanation_combined <- explain(
   model = model,
@@ -1114,7 +1255,8 @@ features instead of single features, see (@jullum2021efficient) for
 intuition and real world examples. Explaining prediction in terms of
 groups of features is very easy using `shapr`:
 
-```{r}
+
+```r
 # Define the feature groups
 group_list <- list(
   A = c("Temp", "Month"),
@@ -1132,10 +1274,40 @@ explanation_group <- explain(
 )
 # Prints the group-wise explanations
 explanation_group
+#>      none       A        B
+#>     <num>   <num>    <num>
+#>  1: 47.27 -29.588  13.1628
+#>  2: 47.27 -11.834 -15.7011
+#>  3: 47.27 -15.976 -17.5729
+#>  4: 47.27 -25.067  -5.1374
+#>  5: 47.27 -35.848  20.2892
+#>  6: 47.27 -27.257  -8.4830
+#>  7: 47.27 -14.960 -21.3995
+#>  8: 47.27 -18.325   7.3791
+#>  9: 47.27 -23.012   9.6591
+#> 10: 47.27 -16.189  -5.6100
+#> 11: 47.27 -25.607 -10.1334
+#> 12: 47.27 -25.065  -5.1394
+#> 13: 47.27 -25.841  -0.7281
+#> 14: 47.27 -21.518 -13.3293
+#> 15: 47.27 -21.248  -1.3199
+#> 16: 47.27 -13.676 -16.9497
+#> 17: 47.27 -13.899 -14.8890
+#> 18: 47.27 -12.276  -8.2472
+#> 19: 47.27 -13.768 -13.5242
+#> 20: 47.27 -24.866 -10.8744
+#> 21: 47.27 -14.486 -22.7674
+#> 22: 47.27  -4.122 -14.2893
+#> 23: 47.27 -11.218  22.4682
+#> 24: 47.27 -33.002  14.2114
+#> 25: 47.27 -16.251  -8.6796
+#>      none       A        B
 # Plots the group-wise explanations
 plot(explanation_group, bar_plot_phi0 = TRUE, index_x_explain = c(1, 6))
 ```
 
+![](figure_main/unnamed-chunk-22-1.png)
+
 ## Explain custom models
 
 `shapr` currently natively supports explanation of predictions from
@@ -1184,7 +1356,8 @@ and a minimal version which skips the `get_model_specs` function. We do
 this for the `gbm` model class from the `gbm` package, fitted to the
 same airquality data set as used above.
 
-```{r,warning=TRUE, message=TRUE}
+
+```r
 library(gbm)
 
 formula_gbm <- as.formula(paste0(y_var, "~", paste0(x_var, collapse = "+")))
@@ -1234,9 +1407,16 @@ explanation_custom <- explain(
   predict_model = MY_predict_model,
   get_model_specs = MY_get_model_specs
 )
+#> Setting parameter 'n_batches' to 2 as a fair trade-off between memory consumption and computation time.
+#> Reducing 'n_batches' typically reduces the computation time at the cost of increased memory consumption.
 
 # Plot results
 plot(explanation_custom, index_x_explain = c(1, 6))
+```
+
+![](figure_main/unnamed-chunk-23-1.png)
+
+```r
 
 
 #### Minimal version of the three required model functions ####
@@ -1257,11 +1437,115 @@ explanation_custom_minimal <- explain(
   prediction_zero = p0,
   predict_model = MY_MINIMAL_predict_model
 )
+#> Note: You passed a model to explain() which is not natively supported, and did not supply a 'get_model_specs' function to explain().
+#> Consistency checks between model and data is therefore disabled.
+#> 
+#> Setting parameter 'n_batches' to 2 as a fair trade-off between memory consumption and computation time.
+#> Reducing 'n_batches' typically reduces the computation time at the cost of increased memory consumption.
 
 # Plot results
 plot(explanation_custom_minimal, index_x_explain = c(1, 6))
 ```
 
+![](figure_main/unnamed-chunk-23-2.png)
+
+## The parameters of the `vaeac` approach
+
+The `vaeac` approach is a very flexible method that supports mixed data. The main
+parameters are the the number of layers in the networks (`vaeac.depth`), the width of the layers
+(`vaeac.width`), the number of dimensions in the latent space (`vaeac.latent_dim`),
+the activation function between the layers in the networks (`vaeac.activation_function`),
+the learning rate in the ADAM optimizer (`vaeac.lr`), the number of `vaeac` models to initiate
+to remedy poorly initiated model parameter values (`vaeac.n_vaeacs_initialize`), and
+the number of learning epochs (`vaeac.epochs`). Call `?shapr::setup_approach.vaeac` for
+a more detailed description of the parameters.
+
+There are additional extra parameters which can be set by including a named list in the call to
+the `explain()` function. For example, we can the change the batch size to 32 by including
+`vaeac.extra_parameters = list(vaeac.batch_size = 32)` as a parameter in the call the `explain()` function. See `?shapr::vaeac_get_extra_para_default` for a description of the possible
+extra parameters to the `vaeac` approach. We strongly encourage the user to specify the main and extra parameters to the `vaeac` approach at the correct place in the call to the `explain()` function. That is, the main parameters are directly entered to the `explain()` function, while the extra parameters are included in a named list called `vaeac.extra_parameters`. However, the `vaeac` approach will try to correct for misplaced and duplicated parameters and give warnings to the user.
+
+
+
+```r
+explanation_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = p0,
+  n_samples = 100,
+  vaeac.width = 16,
+  vaeac.depth = 2,
+  vaeac.epochs = 3,
+  vaeac.n_vaeacs_initialize = 2
+)
+```
+
+Can look at the training and validation error for the trained `vaeac` model and see that `vaeac.epochs = 3` is likely to few epochs as it still seems like the `vaeac` model is learning.
+
+```r
+# Look at the training and validation errors.
+vaeac_plot_evaluation_criteria(list("Vaeac 3 epochs" = explanation_vaeac), plot_type = "method")
+```
+
+![](figure_main/vaeac-plot-1-1.png)
+
+
+
+
+### Early stopping
+If we are uncertain about the choice of `vaeac.epochs`, we can rather use `vaeac` with early stopping.
+We will then set `vaeac.epochs` to a large number which will act as a maximum number of allowed epochs
+and in the `vaeac.extra_parameters` list, we set `vaeac.epochs_early_stopping` the number of epochs we
+allow the `vaeac` model to not improve its validation score. That is, if `vaeac.epochs_early_stopping = 2`,
+then `vaeac` will stop the training procedure if there has been no improvement in the validation score
+for `2` consecutive epochs, of if `vaeac.epochs` is reached. Note that if using early stopping and progress
+updates simultaneously, then the estimated timer remaining will obviously be incorrect if early stopping
+is applied. Furthermore, a value of `2` is too low for real world applications, but we set it so low here
+to make the vignette faster to build.
+
+
+```r
+explanation_vaeac_early_stop <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = p0,
+  n_samples = 100,
+  vaeac.width = 16,
+  vaeac.depth = 2,
+  vaeac.epochs = 1000, # Set it to a large number
+  vaeac.n_vaeacs_initialize = 2,
+  vaeac.extra_parameters = list(vaeac.epochs_early_stopping = 2)
+)
+```
+
+Can compare with the previous version and see that the results are more stable now.
+
+```r
+# Look at the training and validation errors.
+vaeac_plot_evaluation_criteria(
+  list("Vaeac 3 epochs" = explanation_vaeac, "Vaeac early stopping" = explanation_vaeac_early_stop),
+  plot_type = "method"
+)
+```
+
+![](figure_main/vaeac-plot-2-1.png)
+
+Can also compare the $MSE_{v}$ evaluation scores.
+
+
+```r
+plot_MSEv_eval_crit(list("Vaeac 3 epochs" = explanation_vaeac, "Vaeac early stopping" = explanation_vaeac_early_stop))
+```
+
+![](figure_main/vaeac-plot-3-1.png)
+
+
+
+
 <a id="scalability"></a>
 
 <br>
@@ -1292,7 +1576,8 @@ computation speed and memory consumption which depends on
 is most apparent for models with more than say 6-7 features. Below we a
 basic example where `n_batches=10`:
 
-```{r}
+
+```r
 explanation_batch <- explain(
   model = model,
   x_explain = x_explain,
@@ -1306,7 +1591,7 @@ explanation_batch <- explain(
 ## Parallelized computation
 
 In addition to reducing the memory consumption, the introduction of the
-`n_batch` argument allows computation within each batch to be performed in parallel. 
+`n_batch` argument allows computation within each batch to be performed in parallel.
 The parallelization in `shapr::explain()` is handled by the
 `future_apply` which builds on the `future` environment. The `future`
 package works on all OS, allows the user to decide the parallelization
@@ -1317,16 +1602,17 @@ clusters, and also supports progress updates for the parallelized task
 Note that, since it takes some time to duplicate data into different
 processes/machines when running in parallel, it is not always
 preferrable to run `shapr::explain()` in parallel, at least not with
-many parallel sessions (hereby called **workers**). Parallelizatiob also
+many parallel sessions (hereby called **workers**). Parallelization also
 increases the memory consumption proportionally, so you want to limit
 the number of workers for that reason too. In a future version of
 `shapr` we will provide experienced based automatic selection of the
-number of workers. In the meanwhile, this is all lef to the user, and we
+number of workers. In the meanwhile, this is all let to the user, and we
 advice that `n_batches` equals some positive integer multiplied by the
 number of workers. Below is a basic example of a parallelization with
 two workers.
 
-```{r}
+
+```r
 library(future)
 future::plan(multisession, workers = 2)
 
@@ -1346,10 +1632,10 @@ future::plan(sequential) # To return to non-parallel computation
 
 `shapr` provides progress updates of the computation of the Shapley
 values through the R-package `progressr`. This gives the user full
-control over the visual apperance of the progress updates, and also
-intergrates seemlessly with the parallelization framework `future` used
+control over the visual appearance of the progress updates, and also
+integrates seamlessly with the parallelization framework `future` used
 by `shapr` (see above). Note that the progress is updated as the batches
-are completed, meaning that if you have choosen `n_batches=1`, you will
+are completed, meaning that if you have chosen `n_batches=1`, you will
 not get intermediate updates, while if you set `n_batches=10` you will
 get updates on every 10% of the computation.
 
@@ -1358,7 +1644,7 @@ command `progressr::handlers(local=TRUE)`, before calling
 `shapr::explain()`. To use progress updates for only a single call to
 `shapr::explain()`, one can wrap the call using
 `progressr::with_progress` as follows:
-`progressr::with_progress({ shapr::explain() })` The default appearence
+`progressr::with_progress({ shapr::explain() })` The default appearance
 of the progress updates is a basic ASCII-based horizontal progress bar.
 Other variants can be chosen by passing different strings to
 `progressr::handlers()`, some of which require additional packages. If
@@ -1374,7 +1660,8 @@ options available with `progressr`, see the `progressr`
 [vignette](https://cran.r-project.org/web/packages/progressr/vignettes/progressr-intro.html).
 A full code example of using `progressr` with `shapr` is shown below:
 
-```{r,eval = FALSE}
+
+```r
 library(progressr)
 progressr::handlers(global = TRUE)
 # If no progression handler is specified, the txtprogressbar is used
@@ -1391,9 +1678,13 @@ explanation <- explain(
   n_batches = 10
 )
 
+handlers("progress")
 #| [=================================>----------------------]  60% Estimating v(S)
 ```
 
+
+
+
 <a id="compare"></a>
 
 <br>
diff --git a/vignettes/understanding_shapr.Rmd.orig b/vignettes/understanding_shapr.Rmd.orig
new file mode 100644
index 000000000..6539df6d3
--- /dev/null
+++ b/vignettes/understanding_shapr.Rmd.orig
@@ -0,0 +1,1570 @@
+---
+title: "`shapr`: Explaining individual machine learning predictions with Shapley values"
+author: "Camilla Lingjærde, Martin Jullum, Lars Henry Berge Olsen & Nikolai Sellereite"
+output: rmarkdown::html_vignette
+bibliography: ../inst/REFERENCES.bib
+vignette: >
+  %\VignetteEncoding{UTF-8}
+  %\VignetteIndexEntry{`shapr`: Explaining individual machine learning predictions with Shapley values}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options:
+  markdown:
+    wrap: 72
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.cap = "",
+  fig.width = 9,
+  fig.height = 5,
+  fig.path = "figure_main/", # Ensure that figures are saved in the right folder (this vignette will be built manually)
+  cache.path = "cache_main/", # Ensure that cached objects are saved in the right folder
+  warning = FALSE,
+  message = FALSE
+)
+```
+
+```{r setup, include=FALSE, warning=FALSE}
+library(shapr)
+```
+
+> [Introduction](#intro)
+
+> [Overview of Package](#overview)
+
+> [The Kernel SHAP Method](#KSHAP)
+
+> [Examples](#ex)
+
+> [Advanced usage](#advanced)
+
+> [Scalability and efficency](#scalability)
+
+> [Comparison to Lundberg & Lee's implementation](#compare)
+
+<a id="intro"></a>
+
+# Introduction
+
+The `shapr` package implements an extended version of the Kernel SHAP
+method for approximating Shapley values (@lundberg2017unified), in which
+dependence between the features is taken into account
+(@aas2019explaining). Estimation of Shapley values is of interest when
+attempting to explain complex machine learning models. Of existing work
+on interpreting individual predictions, Shapley values is regarded to be
+the only model-agnostic explanation method with a solid theoretical
+foundation (@lundberg2017unified). Kernel SHAP is a computationally
+efficient approximation to Shapley values in higher dimensions, but it
+assumes independent features. @aas2019explaining extend the Kernel SHAP
+method to handle dependent features, resulting in more accurate
+approximations to the true Shapley values. See the
+[paper](https://www.sciencedirect.com/sdfe/reader/pii/S0004370221000539/pdf)
+(@aas2019explaining) for further details.
+
+<a id="overview"></a>
+
+<br>
+
+# Overview of Package
+
+## Functions
+
+Here is an overview of the main functions. You can read their
+documentation and see examples with `?function_name`.
+
+| Function Name      | Description                                                                               |
+|:---------------------|:-------------------------------------------------|
+| `explain`          | Computes kernel SHAP values for test data.                                                |
+| `explain_forecast` | Analogous to `explain`, but for explaining forecasts from time series models.             |
+| `plot.shapr`       | Plots the individual prediction explanations. Uses the `ggplot` and `ggbeeswarm` package. |
+
+: Main functions in the `shapr` package.
+
+<a id="KSHAP"></a>
+
+<br>
+
+# The Kernel SHAP Method
+
+Assume a predictive model $f(\boldsymbol{x})$ for a response value $y$
+with features $\boldsymbol{x}\in \mathbb{R}^M$, trained on a training
+set, and that we want to explain the predictions for new sets of data.
+This may be done using ideas from cooperative game theory, letting a
+single prediction take the place of the game being played and the
+features the place of the players. Letting $N$ denote the set of all $M$
+players, and $S \subseteq N$ be a subset of $|S|$ players, the
+"contribution" function $v(S)$ describes the total expected sum of
+payoffs the members of $S$ can obtain by cooperation. The Shapley value
+(@Shapley53) is one way to distribute the total gains to the players,
+assuming that they all collaborate. The amount that player $i$ gets is
+then
+
+$$\phi_i(v) = \phi_i = \sum_{S \subseteq N \setminus\{i\}} \frac{|S| ! (M-| S| - 1)!}{M!}(v(S\cup \{i\})-v(S)),$$
+
+that is, a weighted mean over all subsets $S$ of players not containing
+player $i$. @lundberg2017unified define the contribution function for a
+certain subset $S$ of these features $\boldsymbol{x}_S$ as
+$v(S) = \mbox{E}[f(\boldsymbol{x})|\boldsymbol{x}_S]$, the expected
+output of the predictive model conditional on the feature values of the
+subset. @lundberg2017unified names this type of Shapley values SHAP
+(SHapley Additive exPlanation) values. Since the conditional
+expectations can be written as
+
+```{=tex}
+\begin{equation}
+\label{eq:CondExp}
+E[f(\boldsymbol{x})|\boldsymbol{x}_s=\boldsymbol{x}_S^*] = E[f(\boldsymbol{x}_{\bar{S}},\boldsymbol{x}_S)|\boldsymbol{x}_S=\boldsymbol{x}_S^*] =
+\int f(\boldsymbol{x}_{\bar{S}},\boldsymbol{x}_S^*)\,p(\boldsymbol{x}_{\bar{S}}|\boldsymbol{x}_S=\boldsymbol{x}_S^*)d\boldsymbol{x}_{\bar{S}},
+\end{equation}
+```
+the conditional distributions
+$p(\boldsymbol{x}_{\bar{S}}|\boldsymbol{x}_S=\boldsymbol{x}_S^*)$ are
+needed to compute the contributions. The Kernel SHAP method of
+@lundberg2017unified assumes feature independence, so that
+$p(\boldsymbol{x}_{\bar{S}}|\boldsymbol{x}_S=\boldsymbol{x}_S^*)=p(\boldsymbol{x}_{\bar{S}})$.
+If samples $\boldsymbol{x}_{\bar{S}}^{k}, k=1,\ldots,K$, from
+$p(\boldsymbol{x}_{\bar{S}}|\boldsymbol{x}_S=\boldsymbol{x}_S^*)$ are
+available, the conditional expectation in above can be approximated by
+
+```{=tex}
+\begin{equation}
+  v_{\text{KerSHAP}}(S) = \frac{1}{K}\sum_{k=1}^K f(\boldsymbol{x}_{\bar{S}}^{k},\boldsymbol{x}_S^*).
+\end{equation}
+```
+In Kernel SHAP, $\boldsymbol{x}_{\bar{S}}^{k}, k=1,\ldots,K$ are sampled
+from the $\bar{S}$-part of the training data, *independently* of
+$\boldsymbol{x}_{S}$. This is motivated by using the training set as the
+empirical distribution of $\boldsymbol{x}_{\bar{S}}$, and assuming that
+$\boldsymbol{x}_{\bar{S}}$ is independent of
+$\boldsymbol{x}_S=\boldsymbol{x}_S^*$. Due to the independence
+assumption, if the features in a given model are highly dependent, the
+Kernel SHAP method may give a completely wrong answer. This can be
+avoided by estimating the conditional distribution
+$p(\boldsymbol{x}_{\bar{S}}|\boldsymbol{x}_S=\boldsymbol{x}_S^*)$
+directly and generating samples from this distribution. With this small
+change, the contributions and Shapley values may then be approximated as
+in the ordinary Kernel SHAP framework. @aas2019explaining propose three
+different approaches for estimating the conditional probabilities which
+are implemented: `empirical`, `gaussian` and `copula`. The package also
+implements the method `ctree` method from @redelmeier2020explaining. The
+original `independence` approach of @lundberg2017unified is also
+available. The methods may also be combined, such that e.g. one method
+is used when conditioning on a small number of features, while another
+method is used otherwise.
+
+<a id="gaussian"></a>
+
+## Multivariate Gaussian Distribution Approach
+
+The first approach arises from the assumption that the feature vector
+$\boldsymbol{x}$ stems from a multivariate Gaussian distribution with
+some mean vector $\boldsymbol{\mu}$ and covariance matrix
+$\boldsymbol{\Sigma}$. Under this assumption, the conditional
+distribution
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}} |\boldsymbol{x}_{\mathcal{S}}=\boldsymbol{x}_{\mathcal{S}}^*)$
+is also multivariate Gaussian\
+$\text{N}_{|\bar{\mathcal{S}}|}(\boldsymbol{\mu}_{\bar{\mathcal{S}}|\mathcal{S}},\boldsymbol{\Sigma}_{\bar{\mathcal{S}}|\mathcal{S}})$,
+with analytical expressions for the conditional mean vector
+$\boldsymbol{\mu}_{\bar{\mathcal{S}}|\mathcal{S}}$ and covariance matrix
+$\boldsymbol{\Sigma}_{\bar{\mathcal{S}}|\mathcal{S}}$, see
+@aas2019explaining for details. Hence, instead of sampling from the
+marginal empirical distribution of $\boldsymbol{x}_{\bar{\mathcal{S}}}$
+approximated by the training data, we can sample from the Gaussian
+conditional distribution, which is fitted using the training data. Using
+the resulting samples
+$\boldsymbol{x}_{\bar{\mathcal{S}}}^k, k=1,\ldots,K$, the conditional
+expectations be approximated as in the Kernel SHAP.
+
+<a id="copula"></a>
+
+## Gaussian Copula Approach
+
+If the features are far from multivariate Gaussian, an alternative
+approach is to instead represent the marginals by their empirical
+distributions, and model the dependence structure by a Gaussian copula.
+Assuming a Gaussian copula, we may convert the marginals of the training
+data to Gaussian features using their empirical distributions, and then
+fit a multivariate Gaussian distribution to these.
+
+To produce samples from the conditional distribution
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}} |\boldsymbol{x}_{\mathcal{S}}=\boldsymbol{x}_{\mathcal{S}}^*)$,
+we convert the marginals of $\boldsymbol{x}_{\mathcal{S}}$ to Gaussians,
+sample from the conditional Gaussian distribution as above, and convert
+the marginals of the samples back to the original distribution. Those
+samples are then used to approximate the sample from the resulting
+multivariate Gaussian conditional distribution. While other copulas may
+be used, the Gaussian copula has the benefit that we may use the
+analytical expressions for the conditionals
+$\boldsymbol{\mu}_{\bar{\mathcal{S}}|\mathcal{S}}$ and
+$\boldsymbol{\Sigma}_{\bar{\mathcal{S}}|\mathcal{S}}$. Finally, we may
+convert the marginals back to their original distribution, and use the
+resulting samples to approximate the conditional expectations as in the
+Kernel SHAP.
+
+<a id="empirical"></a>
+
+## Empirical Conditional Distribution Approach
+
+If both the dependence structure and the marginal distributions of
+$\boldsymbol{x}$ are very far from the Gaussian, neither of the two
+aforementioned methods will work very well. Few methods exists for the
+non-parametric estimation of conditional densities, and the classic
+kernel estimator (@rosenblatt1956) for non-parametric density estimation
+suffers greatly from the curse of dimensionality and does not provide a
+way to generate samples from the estimated distribution. For such
+situations, @aas2019explaining propose an empirical conditional approach
+to sample approximately from
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}}|\boldsymbol{x}_{\mathcal{S}}^*)$.
+The idea is to compute weights
+$w_{\mathcal{S}}(\boldsymbol{x}^*,\boldsymbol{x}^i),\ i=1,...,n_{\text{train}}$
+for all training instances based on their Mahalanobis distances (in the
+$S$ subset only) to the instance $\boldsymbol{x}^*$ to be explained.
+Instead of sampling from this weighted (conditional) empirical
+distribution, @aas2019explaining suggests a more efficient variant,
+using only the $K$ instances with the largest weights:
+
+$$v_{\text{condKerSHAP}}(\mathcal{S}) = \frac{\sum_{k=1}^K w_{\mathcal{S}}(\boldsymbol{x}^*,
+\boldsymbol{x}^{[k]}) f(\boldsymbol{x}_{\bar{\mathcal{S}}}^{[k]},
+\boldsymbol{x}_{\mathcal{S}}^*)}{\sum_{k=1}^K w_{\mathcal{S}}(\boldsymbol{x}^*,\boldsymbol{x}^{[k]})},$$
+
+The number of samples $K$ to be used in the approximate prediction can
+for instance be chosen such that the $K$ largest weights accounts for a
+fraction $\eta$, for example $0.9$, of the total weight. If $K$ exceeds
+a certain limit, for instance $5,000$, it might be set to that limit. A
+bandwidth parameter $\sigma$ used to scale the weights, must also be
+specified. This choice may be viewed as a bias-variance trade-off. A
+small $\sigma$ puts most of the weight to a few of the closest training
+observations and thereby gives low bias, but high variance. When
+$\sigma \rightarrow \infty$, this method converges to the original
+Kernel SHAP assuming feature independence. Typically, when the features
+are highly dependent, a small $\sigma$ is typically needed such that the
+bias does not dominate. @aas2019explaining show that a proper criterion
+for selecting $\sigma$ is a small-sample-size corrected version of the
+AIC known as AICc. As calculation of it is computationally intensive, an
+approximate version of the selection criterion is also suggested.
+Details on this is found in @aas2019explaining.
+
+<a id="ex"></a>
+
+<br>
+
+## Conditional Inference Tree Approach
+
+The previous three methods can only handle numerical data. This means
+that if the data contains categorical/discrete/ordinal features, the
+features first have to be one-hot encoded. When the number of
+levels/features is large, this is not feasible. An approach that handles
+mixed (i.e numerical, categorical, discrete, ordinal) features and both
+univariate and multivariate responses is conditional inference trees
+(@hothorn2006unbiased).
+
+Conditional inference trees is a special tree fitting procedure that
+relies on hypothesis tests to choose both the splitting feature and the
+splitting point. The tree fitting procedure is sequential: first a
+splitting feature is chosen (the feature that is least independent of
+the response), and then a splitting point is chosen for this feature.
+This decreases the chance of being biased towards features with many
+splits (@hothorn2006unbiased).
+
+We use conditional inference trees (*ctree*) to model the conditional
+distribution,
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}}|\boldsymbol{x}_{\mathcal{S}}^*)$,
+found in the Shapley methodology. First, we fit a different conditional
+inference tree to each conditional distribution. Once a tree is fit for
+given dependent features, the end node of
+$\boldsymbol{x}_{\mathcal{S}}^*$ is found. Then, we sample from this end
+node and use the resulting samples,
+$\boldsymbol{x}_{\bar{\mathcal{S}}}^k, k=1,\ldots,K$, when approximating
+the conditional expectations as in Kernel SHAP. See
+@redelmeier2020explaining for more details.
+
+The conditional inference trees are fit using the *party* and *partykit*
+packages (@partykit_package).
+
+
+
+<a id="vaeac"></a>
+
+## Variational AutoEncoder with Arbitrary Conditioning (vaeac) Approach
+Another approach that supports mixed features is the Variational AutoEncoder
+with Arbitrary Conditioning (@olsen2022using), abbreviated to `vaeac`.
+The `vaeac` is an extension of the regular variational autoencoder
+(@kingma2014autoencoding), but instead of giving a probabilistic representation
+of the distribution $p(\boldsymbol{x})$ it gives a probabilistic representation
+of the conditional distribution
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}} \mid \boldsymbol{x}_{\mathcal{S}})$,
+for all possible feature subsets $\mathcal{S}\subseteq\mathcal{M}$ simultaneously,
+where $\mathcal{M}$ is the set of all features. That is, only a single `vaeac`
+model is needed to model all conditional distributions.
+
+The `vaeac` consists of three neural networks: a *full encoder*, a *masked encoder*,
+and a *decoder*. The encoders map the full and masked/conditional input representations,
+i.e., $\boldsymbol{x}$ and $\boldsymbol{x}_{\mathcal{S}}$, respectively,
+to latent probabilistic representations. Sampled instances from this latent probabilistic
+representations are sent to the decoder, which maps them back to the feature space
+and provides a samplable probabilistic representation for the unconditioned features
+$\boldsymbol{x}_{\bar{\mathcal{S}}}$. The full encoder is only used during the
+training phase of the `vaeac` model to guide the training process of the masked encoder,
+as the former relies on the full input sample $\boldsymbol{x}$, which is not accessible
+in the deployment phase (when we generate the Monte Carlo samples), as we only have access
+to $\boldsymbol{x}_{\mathcal{S}}$. The networks are trained by minimizing a variational
+lower bound, and see Section 3 in @olsen2022using for an in-depth introduction to the
+`vaeac` methodology. We use the `vaeac` model at the epoch which obtains the lowest
+validation IWAE score to generate the Monte Carlo samples used in the Shapley value computations.
+
+We fit the `vaeac` model using the *torch* package in $\textsf{R}$ (@torch). The main
+parameters are the the number of layers in the networks (`vaeac.depth`), the width of the layers
+(`vaeac.width`), the number of dimensions in the latent space (`vaeac.latent_dim`),
+the activation function between the layers in the networks (`vaeac.activation_function`),
+the learning rate in the ADAM optimizer (`vaeac.lr`), the number of `vaeac` models to initiate
+to remedy poorly initiated model parameter values (`vaeac.n_vaeacs_initialize`), and
+the number of learning epochs (`vaeac.epochs`). Call `?shapr::setup_approach.vaeac` for
+a more detailed description of the parameters.
+
+There are additional extra parameters which can be set by including a named list in the call to
+the `explain()` function. For example, we can the change the batch size to 32 by including
+`vaeac.extra_parameters = list(vaeac.batch_size = 32)` as a parameter in the call the `explain()` function. See `?shapr::vaeac_get_extra_para_default` for a description of the possible
+extra parameters to the `vaeac` approach. We strongly encourage the user to specify the main and extra parameters to the `vaeac` approach at the correct place in the call to the `explain()` function. That is, the main parameters are directly entered to the `explain()` function, while the extra parameters are included in a named list called `vaeac.extra_parameters`. However, the `vaeac` approach will try to correct for misplaced and duplicated parameters and give warnings to the user.
+
+## Categorical Approach
+
+When the features are all categorical, we can estimate the conditional
+expectations using basic statistical formulas. For example, if we have
+three features, $x_1, x_2, x_3$ with three levels each (indicated as 1,
+2, 3), and we are provided with a table of counts indicating how many
+times each combination of feature values occurs, we can estimate the
+marginal and conditional probabilities as follows. Marginal
+probabilities are estimated by dividing the number of times a given
+feature (or features) takes on a certain value in the data set with the
+total number of observations in the data set. Condititional
+probabilities (for example, $P(X_1 = 1 | X_2 = 1)$) are estimated by
+first subsetting the data set to reflect the conditioning (i.e.,
+extracting all rows where $X_2 = 1$), and then dividing the number of
+times the feature on the left hand side of $|$ takes the given value in
+this subset by the total number of observations in this subset. Once the
+marginal and conditional probabilities are estimated for all
+combinations of feature values, each conditional expectation can be
+calculated. For example, the expected value of $X_1$ given $X_2 = 1$ and
+$X_3 = 2$ is
+$$E(X_1|X_2, X_3) = \sum_{x}x P(X_1 = x | X_2=1, X_3=2) = \sum_{x} x \frac{P(X_1 = x, X_2 = 1, X_3 = 2)}{P(X_2=1, X_3=2)}.$$.
+
+<a id="ex"></a>
+
+<br>
+
+# Examples
+
+`shapr` supports computation of Shapley values with any predictive model
+which takes a set of numeric features and produces a numeric outcome.
+Note that the ctree method takes both numeric and categorical variables.
+Check under "Advanced usage" for an example of how this can be done.
+
+The following example shows how a simple `xgboost` model is trained
+using the `airquality` dataset, and how `shapr` can be used to explain
+the individual predictions. Note that the empirical conditional
+distribution approach is the default (i.e. `approach = "empirical"`).
+The Gaussian, Gaussian copula, ctree or independence approaches can be
+used instead by setting the argument `approach` to either `"gaussian"`,
+`"copula"`, `"ctree"`, `"categorical"` or `"independence"` in the code
+below.
+
+```{r, warning=FALSE}
+library(xgboost)
+library(data.table)
+
+data("airquality")
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+x_var <- c("Solar.R", "Wind", "Temp", "Month")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost::xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 20,
+  verbose = FALSE
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+p0 <- mean(y_train)
+
+# Computing the actual Shapley values with kernelSHAP accounting for feature dependence using
+# the empirical (conditional) distribution approach with bandwidth parameter sigma = 0.1 (default)
+explanation <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0
+)
+
+# Printing the Shapley values for the test data.
+# For more information about the interpretation of the values in the table, see ?shapr::explain.
+print(explanation$shapley_values)
+
+# Plot the resulting explanations for observations 1 and 6
+plot(explanation, bar_plot_phi0 = FALSE, index_x_explain = c(1, 6))
+```
+
+There are multiple plot options specified by the `plot_type` argument in
+`plot`. The `waterfall` option shows the changes in the prediction score
+due to each features contribution (their Shapley values):
+
+There are multiple plot options specified by the `plot_type` argument in `plot`.
+The `waterfall` option shows the changes in the prediction score due to each features contribution (their Shapley values):
+
+```{r}
+plot(explanation, plot_type = "waterfall", index_x_explain = c(1, 6))
+```
+
+The other two plot options, `"beeswarm"` and `"scatter"`, can be useful
+when you have many observations that you want to explain. For the
+purpose of illustration, we explain the whole `airquality` dataset
+(including the training data) for these plot types. The
+`plot_type = "beeswarm"` summarises the distribution of the Shapley
+values along the x-axis across all features. Each point gives the
+Shapley value of a given instance, where the points are colored by the
+feature value of that instance:
+
+```{r}
+x_explain_many <- data[, ..x_var]
+explanation_plot <- explain(
+  model = model,
+  x_explain = x_explain_many,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0
+)
+plot(explanation_plot, plot_type = "beeswarm")
+```
+
+The `plot_type = "scatter"` plots the feature values on the x-axis and
+Shapley values on the y-axis, as well as (optionally) a background
+scatter_hist showing the distribution of the feature data:
+
+```{r}
+plot(explanation_plot, plot_type = "scatter", scatter_hist = TRUE)
+```
+
+We can use mixed (i.e continuous, categorical, ordinal) data with ctree or vaeac.
+Use ctree with mixed data in the following manner:
+
+```{r}
+# convert the month variable to a factor
+data[, Month_factor := as.factor(Month)]
+
+data_train_cat <- data[-ind_x_explain, ]
+data_explain_cat <- data[ind_x_explain, ]
+
+x_var_cat <- c("Solar.R", "Wind", "Temp", "Month_factor")
+
+x_train_cat <- data_train_cat[, ..x_var_cat]
+x_explain_cat <- data_explain_cat[, ..x_var_cat]
+
+# Fitting an lm model here as xgboost does not handle categorical features directly
+# (work around in example below)
+lm_formula <- as.formula(paste0(y_var, " ~ ", paste0(x_var_cat, collapse = " + ")))
+
+model_lm_cat <- lm(lm_formula, data_train_cat)
+
+p0 <- mean(y_train)
+explanation_lm_cat <- explain(
+  model = model_lm_cat,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "ctree",
+  prediction_zero = p0
+)
+
+# Plot the resulting explanations for observations 1 and 6, excluding
+# the no-covariate effect
+plot(explanation_lm_cat, bar_plot_phi0 = FALSE, index_x_explain = c(1, 6))
+```
+
+We can specify parameters used to build the conditional inference trees
+in the following manner. Default values are based on
+@hothorn2006unbiased.
+
+```{r}
+# Use the conditional inference tree approach
+# We can specify parameters used to building trees by specifying mincriterion,
+# minsplit, minbucket
+explanation_ctree <- explain(
+  model = model_lm_cat,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "ctree",
+  prediction_zero = p0,
+  ctree.mincriterion = 0.80,
+  ctree.minsplit = 20,
+  ctree.minbucket = 20
+)
+# Default parameters (based on (Hothorn, 2006)) are:
+# mincriterion = 0.95
+# minsplit = 20
+# minbucket = 7
+```
+
+If **all** features are categorical, one may use the categorical
+approach as follows:
+
+```{r}
+# For the sake of illustration, convert ALL features to factors
+data[, Solar.R_factor := as.factor(cut(Solar.R, 10))]
+data[, Wind_factor := as.factor(cut(Wind, 3))]
+data[, Temp_factor := as.factor(cut(Temp, 2))]
+data[, Month_factor := as.factor(Month)]
+
+data_train_all_cat <- data[-ind_x_explain, ]
+data_explain_all_cat <- data[ind_x_explain, ]
+
+
+x_var_all_cat <- c("Solar.R_factor", "Wind_factor", "Temp_factor", "Month_factor")
+
+x_train_all_cat <- data_train_all_cat[, ..x_var_all_cat]
+x_explain_all_cat <- data_explain_all_cat[, ..x_var_all_cat]
+
+# Fit an lm model here
+lm_formula_all_cat <- as.formula(paste0(y_var, " ~ ", paste0(x_var_all_cat, collapse = " + ")))
+
+model_lm_all_cat <- lm(lm_formula_all_cat, data_train_all_cat)
+
+explanation_cat_method <- explain(
+  model = model_lm_all_cat,
+  x_explain = x_explain_all_cat,
+  x_train = x_train_all_cat,
+  approach = "categorical",
+  prediction_zero = p0
+)
+```
+
+Shapley values can be used to explain any predictive model. For
+predictive models taking time series as input, `approach='timeseries'`
+can be used. In such models, joint behavior of consecutive time points
+is often more important for the outcome than the single time points.
+Therefore, it makes sense to derive Shapley value segments of the time
+series instead of for each single time point. In `shapr` this can be
+achieved through the `group` attribute. Other optional parameters of
+`approach='timeseries'` are `timeseries.fixed_sigma_vec` and
+`timeseries.bounds` (a vector indicating upper and lower bounds of the
+time series if necessary).
+
+```{r}
+# Simulate time series data with AR(1)-structure
+set.seed(1)
+data_ts <- data.frame(matrix(NA, ncol = 41, nrow = 4))
+for (n in 1:100) {
+  set.seed(n)
+  e <- rnorm(42, mean = 0, sd = 1)
+
+  m_1 <- 0
+  for (i in 2:length(e)) {
+    m_1[i] <- 1 + 0.8 * m_1[i - 1] + e[i]
+  }
+  data_ts[n, ] <- m_1[-1]
+}
+data_ts <- data.table::as.data.table(data_ts)
+
+x_var_ts <- paste0("X", 1:40)
+y_var_ts <- "X41"
+
+ind_x_explain <- 1:6
+data_ts_train <- data_ts[-ind_x_explain]
+
+# Creating a predictive model (for illustration just predicting the next point in the time series with a linear model)
+lm_ts_formula <- as.formula(X41 ~ .)
+model_lm_ts <- lm(lm_ts_formula, data_ts_train)
+
+x_explain_ts <- data_ts[ind_x_explain, ..x_var_ts]
+x_train_ts <- data_ts[-ind_x_explain, ..x_var_ts]
+
+# Spitting the time series into 4 segments
+group_ts <- list(
+  S1 = paste0("X", 1:10),
+  S2 = paste0("X", 11:20),
+  S3 = paste0("X", 21:30),
+  S4 = paste0("X", 31:40)
+)
+
+
+p0_ts <- mean(unlist(data_ts_train[, ..y_var_ts]))
+
+explanation_timeseries <- explain(
+  model = model_lm_ts,
+  x_explain = x_explain_ts,
+  x_train = x_train_ts,
+  approach = "timeseries",
+  prediction_zero = p0_ts,
+  group = group_ts
+)
+```
+
+
+## MSEv evaluation criterion
+We can use the $\operatorname{MSE}_{v}$ criterion proposed by @frye2020shapley,
+and later used by, e.g., @olsen2022using and @olsen2023comparative, to evaluate
+and rank the approaches/methods. The $\operatorname{MSE}_{v}$ is given by
+```{=tex}
+\begin{align}
+    \label{eq:MSE_v}
+    \operatorname{MSE}_{v} = \operatorname{MSE}_{v}(\text{method } \texttt{q})
+    =
+     \frac{1}{N_\mathcal{S}} \sum_{\mathcal{S} \in \mathcal{P}^*(\mathcal{M})} \frac{1}{N_\text{explain}}
+     \sum_{i=1}^{N_\text{explain}} \left( f(\boldsymbol{x}^{[i]}) - {\hat{v}}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}^{[i]})\right)^2\!,
+\end{align}
+```
+where ${\hat{v}}_{\texttt{q}}$ is the estimated contribution function using method $\texttt{q}$ and $N_\mathcal{S} = |\mathcal{P}^*(\mathcal{M})| = 2^M-2$, i.e., we have removed the empty ($\mathcal{S} = \emptyset$) and the grand combinations ($\mathcal{S} = \mathcal{M}$) as they are method independent. Meaning that these two combinations do not influence the ranking of the methods as the methods are not used to compute the contribution function for them.
+
+The motivation behind the
+$\operatorname{MSE}_{v}$ criterion is that
+$\mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (v_{\texttt{true}}(\mathcal{S},\boldsymbol{x}) - \hat{v}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}))^2$
+can be decomposed as
+```{=tex}
+\begin{align}
+    \label{eq:expectation_decomposition}
+    \begin{split}
+    \mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (v_{\texttt{true}}(\mathcal{S}, \boldsymbol{x})- \hat{v}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}))^2
+    &=
+    \mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (f(\boldsymbol{x}) - \hat{v}_{\texttt{q}}(\mathcal{S}, \boldsymbol{x}))^2 \\
+    &\phantom{\,\,\,\,\,\,\,}- \mathbb{E}_\mathcal{S}\mathbb{E}_{\boldsymbol{x}} (f(\boldsymbol{x})-v_{\texttt{true}}(\mathcal{S}, \boldsymbol{x}))^2,
+    \end{split}
+\end{align}
+```
+see Appendix A in @covert2020understanding. The first term on the right-hand side of
+the equation above can be estimated by $\operatorname{MSE}_{v}$, while the second
+term is a fixed (unknown) constant not influenced by the approach \texttt{q}. Thus, a low value
+of $\operatorname{MSE}_{v}$ indicates that the estimated contribution function $\hat{v}_{\texttt{q}}$
+is closer to the true counterpart $v_{\texttt{true}}$ than a high value.
+
+In `shapr`, we allow for weighting the combinations in the $\operatorname{MSE}_{v}$ evaluation criterion either
+uniformly or by using the corresponding Shapley kernel weights (or the sampling frequencies when sampling of
+combinations is used).
+This is determined by the logical parameter `MSEv_uniform_comb_weights` in the `explain()` function, and the
+default is to do uniform weighting, that is, `MSEv_uniform_comb_weights = TRUE`.
+
+### Advantage:
+An advantage of the $\operatorname{MSE}_{v}$ criterion is that $v_\texttt{true}$ is not involved.
+Thus, we can apply it as an evaluation criterion to real-world data sets where the true
+Shapley values are unknown.
+
+### Disadvantages:
+First, we can only use the $\operatorname{MSE}_{v}$ criterion to rank the methods and not assess
+their closeness to the optimum since the minimum value of the $\operatorname{MSE}_{v}$ criterion
+is unknown. Second, the criterion evaluates the contribution functions and not the Shapley values.
+
+Note that @olsen2023comparative observed a relatively linear relationship between the
+$\operatorname{MSE}_{v}$ criterion and the mean absolute error $(\operatorname{MAE})$ between the
+true and estimated Shapley values in extensive simulation studies where the true Shapley values
+were known. That is, a method that achieves a low $\operatorname{MSE}_{v}$ score also tends to
+obtain a low $\operatorname{MAE}$ score, and vice versa.
+
+### Confidence intervals
+The $\operatorname{MSE}_{v}$ criterion can be written as
+$\operatorname{MSE}_{v} = \frac{1}{N_\text{explain}}\sum_{i=1}^{N_\text{explain}} \operatorname{MSE}_{v,\text{explain }i}$.
+We can therefore use the central limit theorem to compute an approximate
+confidence interval for the $\operatorname{MSE}_{v}$ criterion. We have that
+$\operatorname{MSE}_{v} \pm t_{\alpha/2}\frac{\operatorname{SD}(\operatorname{MSE}_{v})}{\sqrt{N_\text{explain}}}$
+is a $(1-\alpha/2)\%$ approximate confidence interval for the evaluation criterion,
+where $t_{\alpha/2}$ is the $\alpha/2$ percentile of the $T_{N_\text{explain}-1}$ distribution.
+Note that $N_\text{explain}$ should be large (rule of thumb is at least $30$) for the
+central limit theorem to be valid. The quantities $\operatorname{MSE}_{v}$ and
+$\frac{\operatorname{SD}(\operatorname{MSE}_{v})}{\sqrt{N_\text{explain}}}$ are returned by
+the `explain()` function in the `MSEv` list of data tables. We can also compute similar
+approximate confidence interval for $\operatorname{MSE}_{v}$ criterion for each
+combination/coalition when only averaging over the observations. However, it does not
+make sense in the other direction, i.e., when only averaging over the combinations for
+each observation, as each combination is a different prediction tasks.
+
+
+### MSEv examples
+
+Start by explaining the predictions by using different methods and combining them into lists.
+```{r}
+# We use more explicands here for more stable confidence intervals
+ind_x_explain <- 1:25
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost::xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 20,
+  verbose = FALSE
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+p0 <- mean(y_train)
+
+# Independence approach
+explanation_independence <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "independence",
+  prediction_zero = p0,
+  n_samples = 1e2,
+  n_batches = 5,
+  MSEv_uniform_comb_weights = TRUE
+)
+
+# Empirical approach
+explanation_empirical <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0,
+  n_samples = 1e2,
+  n_batches = 5,
+  MSEv_uniform_comb_weights = TRUE
+)
+
+# Gaussian 1e1 approach
+explanation_gaussian_1e1 <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "gaussian",
+  prediction_zero = p0,
+  n_samples = 1e1,
+  n_batches = 5,
+  MSEv_uniform_comb_weights = TRUE
+)
+
+# Gaussian 1e2 approach
+explanation_gaussian_1e2 <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "gaussian",
+  prediction_zero = p0,
+  n_samples = 1e2,
+  n_batches = 5,
+  MSEv_uniform_comb_weights = TRUE
+)
+
+# Combined approach
+explanation_combined <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = c("gaussian", "empirical", "independence"),
+  prediction_zero = p0,
+  n_samples = 1e2,
+  n_batches = 5,
+  MSEv_uniform_comb_weights = TRUE
+)
+
+# Create a list of explanations with names
+explanation_list_named <- list(
+  "Ind." = explanation_independence,
+  "Emp." = explanation_empirical,
+  "Gaus. 1e1" = explanation_gaussian_1e1,
+  "Gaus. 1e2" = explanation_gaussian_1e2,
+  "Combined" = explanation_combined
+)
+```
+
+
+We can then compare the different approaches by creating plots of the $\operatorname{MSE}_{v}$ evaluation criterion.
+
+```{r}
+# Create the MSEv plots with approximate 95% confidence intervals
+MSEv_plots <- plot_MSEv_eval_crit(explanation_list_named,
+  plot_type = c("overall", "comb", "explicand"),
+  CI_level = 0.95
+)
+
+# 5 plots are made
+names(MSEv_plots)
+```
+The main plot if interest is the `MSEv_bar`, which displays the $\operatorname{MSE}_{v}$ evaluation criterion for each method averaged over both the combinations/coalitions and test observations/explicands. However, we can also look at the other plots where
+we have only averaged over the observations or the combinations (both as bar and line plots).
+
+```{r}
+# The main plot of the overall MSEv averaged over both the combinations and observations
+MSEv_plots$MSEv_bar
+
+# The MSEv averaged over only the explicands for each combinations
+MSEv_plots$MSEv_combination_bar
+
+# The MSEv averaged over only the combinations for each observation/explicand
+MSEv_plots$MSEv_explicand_bar
+
+# To see which coalition S each of the `id_combination` corresponds to,
+# i.e., which features that are conditions on.
+explanation_list_named[[1]]$MSEv$MSEv_combination[, c("id_combination", "features")]
+```
+
+We can specify the `index_x_explain` and `id_combination` parameters in `plot_MSEv_eval_crit()` to only plot
+certain test observations and combinations, respectively.
+
+```{r}
+# We can specify which test observations or combinations to plot
+plot_MSEv_eval_crit(explanation_list_named,
+  plot_type = "explicand",
+  index_x_explain = c(1, 3:4, 6),
+  CI_level = 0.95
+)$MSEv_explicand_bar
+plot_MSEv_eval_crit(explanation_list_named,
+  plot_type = "comb",
+  id_combination = c(3, 4, 9, 13:15),
+  CI_level = 0.95
+)$MSEv_combination_bar
+```
+
+
+We can also alter the plots design-wise as we do in the code below.
+
+```{r}
+bar_text_n_decimals <- 1
+plot_MSEv_eval_crit(explanation_list_named) +
+  ggplot2::scale_x_discrete(limits = rev(levels(MSEv_plots$MSEv_bar$data$Method))) +
+  ggplot2::coord_flip() +
+  ggplot2::scale_fill_brewer(palette = "Paired") +
+  ggplot2::theme_minimal() + # This must be set before other theme calls
+  ggplot2::theme(
+    plot.title = ggplot2::element_text(size = 10),
+    legend.position = "bottom"
+  ) +
+  ggplot2::geom_text(
+    ggplot2::aes(label = sprintf(
+      paste("%.", sprintf("%d", bar_text_n_decimals), "f", sep = ""),
+      round(MSEv, bar_text_n_decimals)
+    )),
+    vjust = -0.35, # This number might need altering for different plots sizes
+    hjust = 1.1, # This number might need altering for different plots sizes
+    color = "black",
+    position = ggplot2::position_dodge(0.9),
+    size = 4
+  )
+```
+
+
+## Main arguments in `explain`
+
+When using `explain`, the default behavior is to use all feature
+combinations in the Shapley formula. Kernel SHAP's sampling based
+approach may be used by specifying `n_combinations`, which is the number
+of unique feature combinations to sample. If not specified, the exact
+method is used. The computation time grows approximately exponentially
+with the number of features. The training data and the model whose
+predictions we wish to explain must be provided through the arguments
+`x_train` and `model`. The data whose predicted values we wish to
+explain must be given by the argument `x_explain`. Note that both
+`x_train` and `x_explain` must be a `data.frame` or a `matrix`, and all
+elements must be finite numerical values. Currently we do not support
+missing values. The default approach when computing the Shapley values
+is the empirical approach (i.e. `approach = "empirical"`). If you'd like
+to use a different approach you'll need to set `approach` equal to
+either `copula` or `gaussian`, or a vector of them, with length equal to
+the number of features. If a vector, a combined approach is used, and
+element `i` indicates the approach to use when conditioning on `i`
+variables. For more details see [Combined approach](#combined) below.
+
+When computing the kernel SHAP values by `explain`, the maximum number
+of samples to use in the Monte Carlo integration for every conditional
+expectation is controlled by the argument `n_samples` (default equals
+`1000`). The computation time grows approximately linear with this
+number. You will also need to pass a numeric value for the argument
+`prediction_zero`, which represents the prediction value when not
+conditioning on any features. We recommend setting this equal to the
+mean of the response, but other values, like the mean prediction of a
+large test data set is also a possibility. If the empirical method is
+used, specific settings for that approach, like a vector of fixed
+$\sigma$ values can be specified through the argument
+`empirical.fixed_sigma`. See `?explain` for more information. If
+`approach = "gaussian"`, you may specify the mean vector and covariance
+matrix of the data generating distribution by the arguments
+`gaussian.mu` and `gaussian.cov_mat`. If not specified, they are
+estimated from the training data.
+
+## Explaining a forecasting model using `explain_forecast`
+
+`shapr` provides a specific function, `explain_forecast`, to explain
+forecasts from time series models, at one or more steps into the future.
+The main difference compared to `explain` is that the data is supplied
+as (set of) time series, in addition to index arguments (`train_idx` and
+`explain_idx`) specifying which time points that represents the train
+and explain parts of the data. See `?explain_forecast` for more
+information.
+
+To demonstrate how to use the function, 500 observations are generated
+which follow an AR(1) structure, i.e.
+$y_t = 0.5 y_{t-1} + \varepsilon_t$. To this data an arima model of
+order (2, 0, 0) is fitted, and we therefore would like to explain the
+forecasts in terms of the two previous lags of the time series. This is
+is specified through the argument `explain_y_lags = 2`. Note that some
+models may also put restrictions on the amount of data required to make
+a forecast. The AR(2) model we used there, for instance, requires two
+previous time point to make a forecast.
+
+In the example, two separate forecasts, each three steps ahead, are
+explained. To set the starting points of the two forecasts,
+`explain_idx` is set to `499:500`. This means that one forecast of
+$t = (500, 501, 502)$ and another of $t = (501, 502, 503)$, will be
+explained. In other words, `explain_idx` tells `shapr` at which points
+in time data was available up until, when making the forecast to
+explain.
+
+In the same way, `train_idx` denotes the points in time used to estimate
+the conditional expectations used to explain the different forecasts.
+Note that since we want to explain the forecasts in terms of the two
+previous lags (`explain_y_lags = 2`), the smallest value of `train_idx`
+must also be 2, because at time $t = 1$ there was only a single
+observation available.
+
+Since the data is stationary, the mean of the data is used as value of
+`prediction_zero` (i.e. $\phi_0$). This can however be chosen
+differently depending on the data and application.
+
+For a multivariate model such as a VAR (Vector AutoRegressive model), it
+may be of more interesting to explain the impact of each variable,
+rather than each lag of each variable. This can be done by setting
+`group_lags = TRUE`.
+
+```{r}
+# Simulate time series data with AR(1)-structure.
+set.seed(1)
+data_ts <- data.frame(Y = arima.sim(list(order = c(1, 0, 0), ar = .5), n = 500))
+data_ts <- data.table::as.data.table(data_ts)
+
+# Fit an ARIMA(2, 0, 0) model.
+arima_model <- arima(data_ts, order = c(2, 0, 0))
+
+# Set prediction zero as the mean of the data for each forecast point.
+p0_ar <- rep(mean(data_ts$Y), 3)
+
+# Explain forecasts from points t = 499 and t = 500.
+explain_idx <- 499:500
+
+explanation_forecast <- explain_forecast(
+  model = arima_model,
+  y = data_ts,
+  train_idx = 2:498,
+  explain_idx = 499:500,
+  explain_y_lags = 2,
+  horizon = 3,
+  approach = "empirical",
+  prediction_zero = p0_ar,
+  group_lags = FALSE
+)
+explanation_forecast
+```
+
+Note that for a multivariate model such as a VAR (Vector AutoRegressive
+model), or for models also including several exogenous variables, it may
+be of more informative to explain the impact of each variable, rather
+than each lag of each variable. This can be done by setting
+`group_lags = TRUE`. This does not make sense for this model, however,
+as that would result in decomposing the forecast into a single group.
+
+We now give a more hands on example of how to use the `explain_forecast`
+function. Say that we have an AR(2) model which describes the change
+over time of the variable `Temp` in the dataset `airquality`. It seems
+reasonable to assume that the temperature today should affect the
+temperature tomorrow. To a lesser extent, we may also suggest that the
+temperature today should also have an impact on that of the day after
+tomorrow.
+
+We start by building our AR(2) model, naming it `model_ar_temp`. This
+model is then used to make a forecast of the temperature of the day that
+comes after the last day in the data, this forecast starts from index
+153.
+
+```{r}
+data <- data.table::as.data.table(airquality)
+
+model_ar_temp <- ar(data$Temp, order = 2)
+
+predict(model_ar_temp, n.ahead = 2)$pred
+```
+
+First, we pass the model and the data as `model` and `y`. Since we have
+an AR(2) model, we want to explain the forecasts in terms of the two
+previous lags, whihc we specify with `explain_y_lags = 2`. Then, we let
+`shapr` know which time indices to use as training data through the
+argument `train_idx`. We use `2:152`, meaning that we skip the first
+index, as we want to explain the two previous lags. Letting the training
+indices go up until 152 means that every point in time except the first
+and last will be used as training data.
+
+The last index, 153 is passed as the argument `explain_idx`, which means
+that we want to explain a forecast made from time point 153 in the data.
+The argument `horizon` is set to 2 in order to explain a forecast of
+length 2.
+
+The argument `prediction_zero` is set to the mean of the time series,
+and is repeated two times. Each value of `prediction_zero` is the
+baseline for each forecast horizon. In our example, we assume that given
+no effect from the two lags, the temperature would just be the average
+during the observed period. Finally, we opt to not group the lags by
+setting `group_lags` to `FALSE`. This means that lag 1 and 2 will be
+explained separately. Grouping lags may be more interesting to do in a
+model with multiple variables, as it is then possible to explain each
+variable separately.
+
+```{r}
+explanation <- explain_forecast(
+  model = model_ar_temp,
+  y = data[, "Temp"],
+  train_idx = 2:152,
+  explain_idx = 153,
+  explain_y_lags = 2,
+  horizon = 2,
+  approach = "empirical",
+  prediction_zero = rep(mean(data$Temp), 2),
+  group_lags = FALSE,
+  n_batches = 1,
+  timing = FALSE
+)
+
+print(explanation)
+```
+
+The results are presented per value of `explain_idx` and forecast
+horizon. We can see that the mean temperature was around 77.9 degrees.
+At horizon 1, the first lag in the model caused it to be 6.6 degrees
+lower, and the second lag had just a minor effect. At horizon 2, the
+first lag has a slightly smaller negative impact, and the second lag has
+a slightly larger impact.
+
+It is also possible to explain a forecasting model which uses exogenous
+regressors. The previous example is expanded to use an ARIMA(2,0,0)
+model with `Wind` as an exogenous regressor. Since the exogenous
+regressor must be available for the predicted time points, the model is
+just fit on the 151 first observations, leaving two observations of
+`Wind` to be used as exogenous values during the prediction phase.
+
+```{r}
+data <- data.table::as.data.table(airquality)
+
+data_fit <- data[seq_len(151), ]
+
+model_arimax_temp <- arima(data_fit$Temp, order = c(2, 0, 0), xreg = data_fit$Wind)
+
+newxreg <- data[-seq_len(151), "Wind", drop = FALSE]
+
+predict(model_arimax_temp, n.ahead = 2, newxreg = newxreg)$pred
+```
+
+The `shapr` package can then explain not only the two autoregressive
+lags, but also the single lag of the exogenous regressor. In order to do
+so, the `Wind` variable is passed as the argument `xreg`, and
+`explain_xreg_lags` is set to 1. Notice how only the first 151
+observations are used for `y` and all 153 are used for `xreg`. This
+makes it possible for `shapr` to not only explain the effect of the
+first lag of the exogenous variable, but also the contemporary effect
+during the forecasting period.
+
+```{r}
+explanation <- explain_forecast(
+  model = model_ar_temp,
+  y = data_fit[, "Temp"],
+  xreg = data[, "Wind"],
+  train_idx = 2:150,
+  explain_idx = 151,
+  explain_y_lags = 2,
+  explain_xreg_lags = 1,
+  horizon = 2,
+  approach = "empirical",
+  prediction_zero = rep(mean(data_fit$Temp), 2),
+  group_lags = FALSE,
+  n_batches = 1,
+  timing = FALSE
+)
+
+print(explanation$shapley_values)
+```
+
+<a id="advanced"></a>
+
+<br>
+
+# Advanced usage
+
+<a id="combined"></a>
+
+## Combined approach
+
+In addition to letting the user select one of the five aforementioned
+approaches for estimating the conditional distribution of the data (i.e.
+`approach` equals either [`"gaussian"`](#gaussian),
+[`"copula"`](#copula), [`"empirical"`](#empirical),
+[`"ctree"`](#ctree), [`"categorical"`](#categorical)) or `"timeseries"`, the package
+allows the user to combine the given approaches. To simplify the usage,
+the flexibility is restricted such that the same approach is used when
+conditioning on the same number of features. This is also in line
+@aas2019explaining [, Section 3.4].
+
+This can be done by setting `approach` equal to a character vector,
+where the length of the vector is one less than the number of features in the
+model. Consider a situation where you have trained a model that consists
+of 10 features, and you would like to use the `"empirical"` approach
+when you condition on 1-3 features, the `"copula"` approach when you
+condition on 4-5 features, and the `"gaussian"` approach when
+conditioning on 6 or more features. This can be applied by simply
+passing
+`approach = c(rep("empirical", 3), rep("copula", 2), rep("gaussian", 4))`,
+i.e. `approach[i]` determines which method to use when conditioning on
+`i` features. Conditioning on all features needs no approach as that is given
+by the complete prediction itself, and should thus not be part of the vector.
+
+The code below exemplifies this approach for a case where there are four
+features, using `"empirical", "copula"` and `"gaussian"` when
+conditioning on respectively 1, 2 and 3 features.
+
+```{r}
+# Use the combined approach
+explanation_combined <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = c("empirical", "copula", "gaussian"),
+  prediction_zero = p0
+)
+# Plot the resulting explanations for observations 1 and 6, excluding
+# the no-covariate effect
+plot(explanation_combined, bar_plot_phi0 = FALSE, index_x_explain = c(1, 6))
+```
+
+As a second example using `"ctree"` to conditin on 1 and 2 features, and
+`"empirical"` when conditioning on 3 features:
+
+```{r}
+# Use the combined approach
+explanation_combined <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = c("ctree", "ctree", "empirical"),
+  prediction_zero = p0
+)
+```
+
+## Explain groups of features
+
+In some cases, especially when the number of features is very large, it
+may be more appropriate to explain predictions in terms of groups of
+features instead of single features, see (@jullum2021efficient) for
+intuition and real world examples. Explaining prediction in terms of
+groups of features is very easy using `shapr`:
+
+```{r}
+# Define the feature groups
+group_list <- list(
+  A = c("Temp", "Month"),
+  B = c("Wind", "Solar.R")
+)
+
+# Use the empirical approach
+explanation_group <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0,
+  group = group_list
+)
+# Prints the group-wise explanations
+explanation_group
+# Plots the group-wise explanations
+plot(explanation_group, bar_plot_phi0 = TRUE, index_x_explain = c(1, 6))
+```
+
+## Explain custom models
+
+`shapr` currently natively supports explanation of predictions from
+models fitted with the following functions:
+
+-   `stats::lm`
+-   `stats::glm`
+-   `ranger::ranger`
+-   `mgcv::gam`
+-   `xgboost::xgboost`/`xgboost::xgb.train`
+
+Any continuous response regression model or binary classification model
+of these model classes, can be explained with the package directly as
+exemplified above. Moreover, essentially any feature dependent
+prediction model can be explained by the package by specifying two (or
+one) simple additional functions for your model.
+
+*Note: The below procedure for specifying custom models was changed in
+shapr v0.3.0* The first function is `predict_model`, taking the model
+and data (as a `matrix` or `data.frame/data.table`) as input and
+outputting the corresponding prediction as a numeric vector. The second
+(optional, but highly recommended) function is `get_model_specs`, taking
+the model as input and outputting a list with the following elements:
+*labels* (vector with the feature names to compute Shapley values for),
+*classes* (a named vector with the labels as names and the class type as
+elements), *factor_levels* (a named list with the labels as names and
+vectors with the factor levels as elements (NULL if the feature is not a
+factor)). The `get_model_specs` function is used to check that the
+format of the data passed to `explain` have the correct format in terms
+of the necessary feature columns being available and having the correct
+class/attributes. It is highly recommended to do such checks in order to
+ensure correct usage of `explain`. If, for some reason, such checking is
+not desirable, one does not have to provide the `get_model_specs`
+function. This will, however, throw a warning that all feature
+consistency checking against the model is disabled.
+
+Once the above functions are created, you can explain predictions from
+this model as before by passing the functions through the input
+arguments `predict_model` and `get_model_specs` of `explain()`.
+
+These functions **can** be made general enough to handle all supported
+model types of that class, or they can be made minimal, possibly only
+allowing explanation of the specific version of the model class at hand.
+Below we give examples of both full support versions of these functions
+and a minimal version which skips the `get_model_specs` function. We do
+this for the `gbm` model class from the `gbm` package, fitted to the
+same airquality data set as used above.
+
+```{r,warning=TRUE, message=TRUE}
+library(gbm)
+
+formula_gbm <- as.formula(paste0(y_var, "~", paste0(x_var, collapse = "+")))
+# Fitting a gbm model
+set.seed(825)
+model_gbm <- gbm::gbm(
+  formula_gbm,
+  data = cbind(x_train, Ozone = y_train),
+  distribution = "gaussian"
+)
+
+#### Full feature versions of the three required model functions ####
+MY_predict_model <- function(x, newdata) {
+  if (!requireNamespace("gbm", quietly = TRUE)) {
+    stop("The gbm package is required for predicting train models")
+  }
+  model_type <- ifelse(
+    x$distribution$name %in% c("bernoulli", "adaboost"),
+    "classification",
+    "regression"
+  )
+  if (model_type == "classification") {
+    predict(x, as.data.frame(newdata), type = "response", n.trees = x$n.trees)
+  } else {
+    predict(x, as.data.frame(newdata), n.trees = x$n.trees)
+  }
+}
+MY_get_model_specs <- function(x) {
+  feature_specs <- list()
+  feature_specs$labels <- labels(x$Terms)
+  m <- length(feature_specs$labels)
+  feature_specs$classes <- attr(x$Terms, "dataClasses")[-1]
+  feature_specs$factor_levels <- setNames(vector("list", m), feature_specs$labels)
+  feature_specs$factor_levels[feature_specs$classes == "factor"] <- NA # model object doesn't contain factor levels info
+  return(feature_specs)
+}
+
+# Compute the Shapley values
+set.seed(123)
+p0 <- mean(y_train)
+explanation_custom <- explain(
+  model = model_gbm,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0,
+  predict_model = MY_predict_model,
+  get_model_specs = MY_get_model_specs
+)
+
+# Plot results
+plot(explanation_custom, index_x_explain = c(1, 6))
+
+
+#### Minimal version of the three required model functions ####
+# Note: Working only for this exact version of the model class
+# Avoiding to define get_model_specs skips all feature
+# consistency checking between your data and model
+MY_MINIMAL_predict_model <- function(x, newdata) {
+  predict(x, as.data.frame(newdata), n.trees = x$n.trees)
+}
+
+# Compute the Shapley values
+set.seed(123)
+explanation_custom_minimal <- explain(
+  model = model_gbm,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0,
+  predict_model = MY_MINIMAL_predict_model
+)
+
+# Plot results
+plot(explanation_custom_minimal, index_x_explain = c(1, 6))
+```
+
+## The parameters of the `vaeac` approach
+
+The `vaeac` approach is a very flexible method that supports mixed data. The main
+parameters are the the number of layers in the networks (`vaeac.depth`), the width of the layers
+(`vaeac.width`), the number of dimensions in the latent space (`vaeac.latent_dim`),
+the activation function between the layers in the networks (`vaeac.activation_function`),
+the learning rate in the ADAM optimizer (`vaeac.lr`), the number of `vaeac` models to initiate
+to remedy poorly initiated model parameter values (`vaeac.n_vaeacs_initialize`), and
+the number of learning epochs (`vaeac.epochs`). Call `?shapr::setup_approach.vaeac` for
+a more detailed description of the parameters.
+
+There are additional extra parameters which can be set by including a named list in the call to
+the `explain()` function. For example, we can the change the batch size to 32 by including
+`vaeac.extra_parameters = list(vaeac.batch_size = 32)` as a parameter in the call the `explain()` function. See `?shapr::vaeac_get_extra_para_default` for a description of the possible
+extra parameters to the `vaeac` approach. We strongly encourage the user to specify the main and extra parameters to the `vaeac` approach at the correct place in the call to the `explain()` function. That is, the main parameters are directly entered to the `explain()` function, while the extra parameters are included in a named list called `vaeac.extra_parameters`. However, the `vaeac` approach will try to correct for misplaced and duplicated parameters and give warnings to the user.
+
+
+```{r vaeac-train-first-time, cache = TRUE}
+explanation_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = p0,
+  n_samples = 100,
+  vaeac.width = 16,
+  vaeac.depth = 2,
+  vaeac.epochs = 3,
+  vaeac.n_vaeacs_initialize = 2
+)
+```
+
+Can look at the training and validation error for the trained `vaeac` model and see that `vaeac.epochs = 3` is likely to few epochs as it still seems like the `vaeac` model is learning.
+```{r vaeac-plot-1, cache = TRUE}
+# Look at the training and validation errors.
+vaeac_plot_evaluation_criteria(list("Vaeac 3 epochs" = explanation_vaeac), plot_type = "method")
+```
+
+
+
+
+### Early stopping
+If we are uncertain about the choice of `vaeac.epochs`, we can rather use `vaeac` with early stopping.
+We will then set `vaeac.epochs` to a large number which will act as a maximum number of allowed epochs
+and in the `vaeac.extra_parameters` list, we set `vaeac.epochs_early_stopping` the number of epochs we
+allow the `vaeac` model to not improve its validation score. That is, if `vaeac.epochs_early_stopping = 2`,
+then `vaeac` will stop the training procedure if there has been no improvement in the validation score
+for `2` consecutive epochs, of if `vaeac.epochs` is reached. Note that if using early stopping and progress
+updates simultaneously, then the estimated timer remaining will obviously be incorrect if early stopping
+is applied. Furthermore, a value of `2` is too low for real world applications, but we set it so low here
+to make the vignette faster to build.
+
+```{r vaeac-early-stop, cache = TRUE}
+explanation_vaeac_early_stop <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = p0,
+  n_samples = 100,
+  vaeac.width = 16,
+  vaeac.depth = 2,
+  vaeac.epochs = 1000, # Set it to a large number
+  vaeac.n_vaeacs_initialize = 2,
+  vaeac.extra_parameters = list(vaeac.epochs_early_stopping = 2)
+)
+```
+
+Can compare with the previous version and see that the results are more stable now.
+```{r vaeac-plot-2, cache = TRUE}
+# Look at the training and validation errors.
+vaeac_plot_evaluation_criteria(
+  list("Vaeac 3 epochs" = explanation_vaeac, "Vaeac early stopping" = explanation_vaeac_early_stop),
+  plot_type = "method"
+)
+```
+
+Can also compare the $MSE_{v}$ evaluation scores.
+
+```{r vaeac-plot-3, cache = TRUE}
+plot_MSEv_eval_crit(list("Vaeac 3 epochs" = explanation_vaeac, "Vaeac early stopping" = explanation_vaeac_early_stop))
+```
+
+
+
+
+<a id="scalability"></a>
+
+<br>
+
+# Scalability and efficency
+
+## Batch computation
+
+The computational complexity of Shapley value based explanations grows
+fast in the number of features, as the number of conditional
+expectations one needs to estimate in the Shapley formula grows
+exponentially. As outlined [above](#KSHAP), the estimating of each of
+these conditional expectations is also computationally expensive,
+typically requiring estimation of a conditional probability
+distribution, followed by Monte Carlo integration. These computations
+are not only heavy for the CPU, they also require a lot of memory (RAM),
+which typically is a limited resource. By doing the most resource hungry
+computations (the computation of v(S)) in sequential batches with
+different feature subsets $S$, the memory usage can be significantly
+reduces. Such batching comes at the cost of an increase in computation
+time, which depends on the number of feature subsets (`n_combinations`),
+the number of features, the estimation `approach` and so on. When
+calling `shapr::explain()`, we allow the user to set the number of
+batches with the argument `n_batches`. The default of this argument is
+`NULL`, which uses a (hopefully) reasonable trade-off between
+computation speed and memory consumption which depends on
+`n_combinations` and `approach`. The memory/computation time trade-off
+is most apparent for models with more than say 6-7 features. Below we a
+basic example where `n_batches=10`:
+
+```{r}
+explanation_batch <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0,
+  n_batches = 10
+)
+```
+
+## Parallelized computation
+
+In addition to reducing the memory consumption, the introduction of the
+`n_batch` argument allows computation within each batch to be performed in parallel.
+The parallelization in `shapr::explain()` is handled by the
+`future_apply` which builds on the `future` environment. The `future`
+package works on all OS, allows the user to decide the parallelization
+backend (mutliple R procesess or forking), works directly with hpc
+clusters, and also supports progress updates for the parallelized task
+(see below).
+
+Note that, since it takes some time to duplicate data into different
+processes/machines when running in parallel, it is not always
+preferrable to run `shapr::explain()` in parallel, at least not with
+many parallel sessions (hereby called **workers**). Parallelization also
+increases the memory consumption proportionally, so you want to limit
+the number of workers for that reason too. In a future version of
+`shapr` we will provide experienced based automatic selection of the
+number of workers. In the meanwhile, this is all let to the user, and we
+advice that `n_batches` equals some positive integer multiplied by the
+number of workers. Below is a basic example of a parallelization with
+two workers.
+
+```{r}
+library(future)
+future::plan(multisession, workers = 2)
+
+explanation_par <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0,
+  n_batches = 10
+)
+
+future::plan(sequential) # To return to non-parallel computation
+```
+
+## Progress updates
+
+`shapr` provides progress updates of the computation of the Shapley
+values through the R-package `progressr`. This gives the user full
+control over the visual appearance of the progress updates, and also
+integrates seamlessly with the parallelization framework `future` used
+by `shapr` (see above). Note that the progress is updated as the batches
+are completed, meaning that if you have chosen `n_batches=1`, you will
+not get intermediate updates, while if you set `n_batches=10` you will
+get updates on every 10% of the computation.
+
+Progress updates are enabled for the current R-session by running the
+command `progressr::handlers(local=TRUE)`, before calling
+`shapr::explain()`. To use progress updates for only a single call to
+`shapr::explain()`, one can wrap the call using
+`progressr::with_progress` as follows:
+`progressr::with_progress({ shapr::explain() })` The default appearance
+of the progress updates is a basic ASCII-based horizontal progress bar.
+Other variants can be chosen by passing different strings to
+`progressr::handlers()`, some of which require additional packages. If
+you are using Rstudio, the progress can be displayed directly in the gui
+with `progressr::handlers('rstudio')` (requires the `rstudioapi`
+package). If you are running Windows, you may use the pop-up gui
+progress bar `progressr::handlers('handler_winprogressbar')`. A wrapper
+for progressbar of the flexible `cli` package is also available
+`progressr::handlers('cli')` (requires the `cli` package).
+
+For a full list of all progression handlers and the customization
+options available with `progressr`, see the `progressr`
+[vignette](https://cran.r-project.org/web/packages/progressr/vignettes/progressr-intro.html).
+A full code example of using `progressr` with `shapr` is shown below:
+
+```{r,eval = FALSE}
+library(progressr)
+progressr::handlers(global = TRUE)
+# If no progression handler is specified, the txtprogressbar is used
+# Other progression handlers:
+# progressr::handlers('rstudio') # requires the 'rstudioapi' package
+# progressr::handlers('handler_winprogressbar') # Window only
+# progressr::handlers('cli') # requires the 'cli' package
+explanation <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "empirical",
+  prediction_zero = p0,
+  n_batches = 10
+)
+
+handlers("progress")
+#| [=================================>----------------------]  60% Estimating v(S)
+```
+
+
+
+
+<a id="compare"></a>
+
+<br>
+
+# Comparison to Lundberg & Lee's implementation
+
+As mentioned above, the original (independence assuming) Kernel SHAP
+implementation can be approximated by setting a large $\sigma$ value
+using our empirical approach. If we specify that the distances to *all*
+training observations should be used (i.e. setting
+`approach = "empirical"` and `empirical.eta = 1` when using `explain`,
+we can approximate the original method arbitrarily well by increasing
+$\sigma$. For completeness of the `shapr` package, we have also
+implemented a version of the original method, which samples training
+observations independently with respect to their distances to test
+observations (i.e. without the large-$\sigma$ approximation). This
+method is available by using `approach = "independence"` in `explain`.
+
+We have compared the results using these two variants with the original
+implementation of @lundberg2017unified, available through the Python
+library [`shap`](https://github.com/slundberg/shap). As above, we used
+the Boston housing data, trained via `xgboost`. We specify that *all*
+training observations should be used when explaining all of the 6 test
+observations. To run the individual explanation method in the `shap`
+Python library we use the `reticulate` `R`-package, allowing Python code
+to run within `R`. As this requires installation of Python package, the
+comparison code and results is not included in this vignette, but can be
+found
+[here](https://github.com/NorskRegnesentral/shapr/blob/master/inst/scripts/compare_shap_python.R).
+As indicated by the (commented out) results in the file above both
+methods in our `R`-package give (up to numerical approximation error)
+identical results to the original implementation in the Python `shap`
+library.
+
+<br>
+
+# References
diff --git a/vignettes/understanding_shapr_vaeac.Rmd b/vignettes/understanding_shapr_vaeac.Rmd
new file mode 100644
index 000000000..a0554e1b0
--- /dev/null
+++ b/vignettes/understanding_shapr_vaeac.Rmd
@@ -0,0 +1,872 @@
+---
+title: "More details and advanced usage of the `vaeac` approach"
+author: "Lars Henry Berge Olsen"
+output: rmarkdown::html_vignette
+bibliography: ../inst/REFERENCES.bib
+vignette: >
+  %\VignetteEncoding{UTF-8}
+  %\VignetteIndexEntry{The `vaeac` approach in `shapr`}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options:
+  markdown:
+    wrap: 72
+---
+
+
+
+
+
+> [The vaeac method](#vaeac)
+
+> [Code](#Code)
+
+> [Basic Example](#basicexample)
+
+> [Pretrained vaeac](#pretrained_vaeac)
+
+> [Pretrained vaeac (path)](#pretrained_vaeac_path)
+
+> [Subset of coalitions](#n_combinations)
+
+> [Paired sampling](#paired_sampling)
+
+> [Progress bar](#progress_bar)
+
+> [Continue training](#continue_training)
+
+> [Early stopping](#early_stopping)
+
+> [Grouping of features](#grouping_of_features)
+
+> [Mixed data](#mixed_data)
+
+> [Future Updates](#FutureUpdates)
+
+<a id="intro"></a>
+
+In this vignette, we elaborate and illustrate the `vaeac` approach in more depth than in the main vignette.
+In the main vignette, only a few basic examples of using `vaeac` is included, while we here showcase more
+advanced usage. See the overview above for what topics that are covered in this vignette.
+
+
+
+
+# vaeac {#vaeac}
+
+An approach that supports mixed features is the Variational AutoEncoder
+with Arbitrary Conditioning (@olsen2022using), abbreviated to `vaeac`.
+The `vaeac` is an extension of the regular variational autoencoder
+(@kingma2014autoencoding), but instead of giving a probabilistic representation
+of the distribution $p(\boldsymbol{x})$ it gives a probabilistic representation
+of the conditional distribution
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}} \mid \boldsymbol{x}_{\mathcal{S}})$,
+for all possible feature subsets $\mathcal{S}\subseteq\mathcal{M}$ simultaneously,
+where $\mathcal{M}$ is the set of all features. That is, only a single `vaeac`
+model is needed to model all conditional distributions.
+
+The `vaeac` consists of three neural networks: a *full encoder*, a *masked encoder*,
+and a *decoder*. The encoders map the full and masked/conditional input representations,
+i.e., $\boldsymbol{x}$ and $\boldsymbol{x}_{\mathcal{S}}$, respectively,
+to latent probabilistic representations. Sampled instances from this latent probabilistic
+representations are sent to the decoder, which maps them back to the feature space
+and provides a samplable probabilistic representation for the unconditioned features
+$\boldsymbol{x}_{\bar{\mathcal{S}}}$. The full encoder is only used during the
+training phase of the `vaeac` model to guide the training process of the masked encoder,
+as the former relies on the full input sample $\boldsymbol{x}$, which is not accessible
+in the deployment phase (when we generate the Monte Carlo samples), as we only have access
+to $\boldsymbol{x}_{\mathcal{S}}$. The networks are trained by minimizing a variational
+lower bound, and see Section 3 in @olsen2022using for an in-depth introduction to the
+`vaeac` methodology. We use the `vaeac` model at the epoch which obtains the lowest
+validation IWAE score to generate the Monte Carlo samples used in the Shapley value computations.
+
+We fit the `vaeac` model using the *torch* package in $\textsf{R}$ (@torch). The main
+parameters are the the number of layers in the networks (`vaeac.depth`), the width of the layers
+(`vaeac.width`), the number of dimensions in the latent space (`vaeac.latent_dim`),
+the activation function between the layers in the networks (`vaeac.activation_function`),
+the learning rate in the ADAM optimizer (`vaeac.lr`), the number of `vaeac` models to initiate
+to remedy poorly initiated model parameter values (`vaeac.n_vaeacs_initialize`), and
+the number of learning epochs (`vaeac.epochs`). Call `?shapr::setup_approach.vaeac` for
+a more detailed description of the parameters.
+
+There are additional extra parameters which can be set by including a named list in the call to
+the `explain()` function. For example, we can the change the batch size to 32 by including
+`vaeac.extra_parameters = list(vaeac.batch_size = 32)` as a parameter in the call the `explain()` function. See `?shapr::vaeac_get_extra_para_default` for a description of the possible
+extra parameters to the `vaeac` approach. We strongly encourage the user to specify the main and extra parameters to the `vaeac` approach at the correct place in the call to the `explain()` function. That is, the main parameters are directly entered to the `explain()` function, while the extra parameters are included in a named list called `vaeac.extra_parameters`. However, the `vaeac` approach will try to correct for misplaced and duplicated parameters and give warnings to the user.
+
+# Code Examples {#code}
+We now demonstrate the `vaeac` approach on several different use cases.
+
+
+## Basic Example {#basicexample}
+Here we go through how to use the `vaeac` approach on the same data as in the main vignette
+
+First we set up the model we want to explain.
+
+
+```r
+library(xgboost)
+library(data.table)
+
+data("airquality")
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+x_var <- c("Solar.R", "Wind", "Temp", "Month")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 100,
+  verbose = FALSE
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+prediction_zero <- mean(y_train)
+```
+
+
+## First vaeac example
+
+We are now going to explain predictions made by the model using the `vaeac` approach.
+
+
+```r
+n_samples <- 25 # Low number of MC samples to make the vignette build faster
+n_batches <- 1 # Do all coalitions in one batch
+vaeac.save_every_nth_epoch <- 3 # Save the vaeac model every 3th epoch + best and last epoch
+vaeac.n_vaeacs_initialize <- 2 # Number of vaeacs to initialize to counteract bad initialization values (extra para)
+vaeac.epochs <- 4 # The number of epochs
+
+explanation <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = n_batches,
+  vaeac.epochs = vaeac.epochs,
+  vaeac.n_vaeacs_initialize = vaeac.n_vaeacs_initialize,
+  vaeac.extra_parameters = list(vaeac.save_every_nth_epoch = vaeac.save_every_nth_epoch)
+)
+```
+
+We can look at the Shapley values.
+
+
+```r
+# Printing and ploting the Shapley values. see ?shapr::explain for interpretation of the values.
+print(explanation$shapley_values)
+#>        none   Solar.R      Wind       Temp      Month
+#>       <num>     <num>     <num>      <num>      <num>
+#> 1: 43.08571  6.120728  3.143015 -18.677950 -2.8861381
+#> 2: 43.08572 -2.077884 -2.554779 -20.118234  0.6956914
+#> 3: 43.08572  3.038485 -5.512098 -18.257521 -2.5587136
+#> 4: 43.08571  3.000938 -4.722036  -8.945166 -3.9248570
+#> 5: 43.08572 -1.102200 -4.431948 -13.545894 -5.2956652
+#> 6: 43.08571  3.932041 -9.844483 -11.948902 -3.5601849
+plot(explanation)
+```
+
+![](figure_vaeac/first-vaeac-plots-1.png)
+
+
+## Pre-trained vaeac {#pretrained_vaeac}
+If the user has a pre-trained `vaeac` model (from a previous run), the user can send that to the `explain()` function
+and `shapr` will skip the training of a new `vaeac` model and rather use the provided `vaeac` model. The `vaeac` model
+is accessible via `explanation$internal$parameters$vaeac`. Note that if we set `verbose = 2` in `explain()`, then `shapr` will give a message that it loads a pretrained `vaeac` model instead of training it from scratch.
+
+In this example, we extract the trained `vaeac` model from the previous example and send it to `explain()`.
+
+
+```r
+# Send the pre-trained vaeac model
+expl_pretrained_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = n_samples,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = explanation$internal$parameters$vaeac)
+)
+
+# Check that this version provides the same Shapley values
+all.equal(explanation$shapley_values, expl_pretrained_vaeac$shapley_values)
+#> [1] TRUE
+```
+
+## Pre-trained vaeac (path) {#pretrained_vaeac_path}
+We can also just provide a path to the stored `vaeac` model. This is beneficial if we have only stored the `vaeac`
+model on the computer but not the whole `explanation` object. The possible save paths are stored in `explanation$internal$parameters$vaeac$model`. Note that if we set `verbose = 2` in `explain()`, then `shapr` will give a message that it loads a pretrained `vaeac` model instead of training it from scratch.
+
+
+```r
+# Call `explanation$internal$parameters$vaeac$model` to see possible vaeac models. We use `best` below.
+# send the pre-trained vaeac path
+expl_pretrained_vaeac_path <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = n_samples,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = explanation$internal$parameters$vaeac$models$best)
+)
+
+# Check that this version provides the same Shapley values
+all.equal(explanation$shapley_values, expl_pretrained_vaeac_path$shapley_values)
+#> [1] TRUE
+```
+
+
+
+## Specified n_combinations and more batches {#n_combinations}
+
+The user can limit the Shapley value computations to only a subset of coalitions by setting the
+`n_combinations` parameter to a value lower than $2^{n_\text{features}}$. To lower the memory
+usage, the user can split the coalitions into several batches by setting `n_batches` to a desired
+number. In this example, we set `n_batches = 5` and `n_combinations = 10` which is less than
+the maximum of `16`.
+
+Note that we do not need to train a new `vaeac` model as we can use the one above trained on
+all `16` coalitions as we are now only using a subset of them. This is not applicable the other
+way around.
+
+
+```r
+# send the pre-trained vaeac path
+expl_batches_combinations <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_combinations = 10,
+  n_batches = 5,
+  n_samples = n_samples,
+  verbose = 2,
+  vaeac.extra_parameters = list(
+    vaeac.pretrained_vaeac_model = explanation$internal$parameters$vaeac
+  )
+)
+
+# Gives different Shapley values as the latter one are only based on a subset of coalitions
+plot_SV_several_approaches(list("Original" = explanation, "Other combi." = expl_batches_combinations))
+```
+
+![](figure_vaeac/check-n_combinations-and-more-batches-1.png)
+
+```r
+
+# Here we can see that the samples coalitions are in different batches and have different weights
+expl_batches_combinations$internal$objects$X
+#> Key: <id_combination>
+#> Index: <approach>
+#>     id_combination features n_features     N shapley_weight approach batch
+#>              <int>   <list>      <int> <int>          <int>   <char> <num>
+#>  1:              1                   0     1        1000000     <NA>    NA
+#>  2:              2        3          1     4              1    vaeac     1
+#>  3:              3        4          1     4              1    vaeac     3
+#>  4:              4        2          1     4              1    vaeac     2
+#>  5:              5      2,3          2     6              2    vaeac     5
+#>  6:              6      1,4          2     6              1    vaeac     2
+#>  7:              7    1,3,4          3     4              2    vaeac     5
+#>  8:              8    2,3,4          3     4              1    vaeac     4
+#>  9:              9    1,2,3          3     4              1    vaeac     4
+#> 10:             10  1,2,3,4          4     1        1000000     <NA>     1
+
+# Can compare that to the situation where we have exact computations (i.e., include all coalitions)
+explanation$internal$objects$X
+#> Key: <id_combination>
+#>     id_combination features n_features     N shapley_weight approach batch
+#>              <int>   <list>      <int> <int>          <num>   <char> <num>
+#>  1:              1                   0     1       1.00e+06     <NA>    NA
+#>  2:              2        1          1     4       2.50e-01    vaeac     1
+#>  3:              3        2          1     4       2.50e-01    vaeac     1
+#>  4:              4        3          1     4       2.50e-01    vaeac     1
+#>  5:              5        4          1     4       2.50e-01    vaeac     1
+#>  6:              6      1,2          2     6       1.25e-01    vaeac     1
+#>  7:              7      1,3          2     6       1.25e-01    vaeac     1
+#>  8:              8      1,4          2     6       1.25e-01    vaeac     1
+#>  9:              9      2,3          2     6       1.25e-01    vaeac     1
+#> 10:             10      2,4          2     6       1.25e-01    vaeac     1
+#> 11:             11      3,4          2     6       1.25e-01    vaeac     1
+#> 12:             12    1,2,3          3     4       2.50e-01    vaeac     1
+#> 13:             13    1,2,4          3     4       2.50e-01    vaeac     1
+#> 14:             14    1,3,4          3     4       2.50e-01    vaeac     1
+#> 15:             15    2,3,4          3     4       2.50e-01    vaeac     1
+#> 16:             16  1,2,3,4          4     1       1.00e+06     <NA>     1
+```
+
+
+
+Note that if we train a `vaeac` model from scratch with the setup above, then the `vaeac` model will use
+the "Specified_masks_mask_generator" which ensures that the `vaeac` model only train on a specified set of coalitions.
+In this case, it will be the `n_combinations - 2` sampled coalitions. The minus two is because the `vaeac` model will
+not train on the empty and grand coalitions as they are not needed in the Shapley value computations.
+
+```r
+expl_batches_combinations_2 <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_combinations = 10,
+  n_batches = 1,
+  n_samples = n_samples,
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.epochs = 3,
+  verbose = 2
+)
+```
+
+
+
+## Paired sampling {#paired_sampling}
+
+The `vaeac` approach can use paired sampling to improve the stability of the training procedure.
+When using paired sampling, each observation in the training batches will be duplicated, but the first version will
+be masked by $S$ and the second one is masked by the complement $\bar{S}$. The mask are taken from the
+`explanation$internal$objects$S` matrix. Note that `vaeac` does not check if the complement is also in said matrix.
+Furthermore, the masks are randomly selected for each observation in the batch. The training time when using paired
+sampling is higher in comparison to random sampling due to more complex implementation.
+
+
+```r
+expl_paired_sampling_TRUE <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = n_batches,
+  vaeac.epochs = 10,
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.extra_parameters = list(vaeac.paired_sampling = TRUE)
+)
+
+expl_paired_sampling_FALSE <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = n_batches,
+  vaeac.epochs = 10,
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.extra_parameters = list(vaeac.paired_sampling = FALSE)
+)
+```
+
+We can compare the results by looking at the training and validation errors and by the $MSE_v$ evaluation criterion.
+
+```r
+explanation_list <- list("Regular samp." = expl_paired_sampling_FALSE, "Paired samp." = expl_paired_sampling_TRUE)
+vaeac_plot_evaluation_criteria(explanation_list, plot_type = "criterion")
+```
+
+![](figure_vaeac/paired-sampling-plotting-1.png)
+
+```r
+plot_MSEv_eval_crit(explanation_list)
+```
+
+![](figure_vaeac/paired-sampling-plotting-2.png)
+
+By looking at the time, we see that the paired version takes (a bit) longer time in the `setup_computation`
+phase, that is, in the training phase.
+
+```r
+rbind(
+  "Paired" = expl_paired_sampling_TRUE$timing$timing_secs, "Regular" = expl_paired_sampling_FALSE$timing$timing_secs
+)
+#>              setup test_prediction setup_computation compute_vS shapley_computation
+#> Paired  0.10554790      0.06238222         10.776925  0.4035740         0.006369114
+#> Regular 0.07739019      0.06001687          9.007903  0.4133561         0.008683920
+```
+
+
+
+## Progressr {#progress_bar}
+
+The `shapr` package provides progress updates of the computation of the Shapley
+values through the R-package `progressr`. If the user also sets `verbose = 2` in `explain()`, then we will get
+extra information related to the `vaeac` approach. The `verbose` parameter works independently of the `progressr`
+package.
+
+In this setup, we get no progress updates.
+
+```r
+library(progressr)
+handlers("void") # To silence all progressr updates
+expl_without_messages <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = 5,
+  verbose = 0,
+  vaeac.epochs = 5,
+  vaeac.n_vaeacs_initialize = 2
+)
+```
+
+By setting `verbose = 2`, we will get messages about the progress.
+
+```r
+handlers("void") # To silence all progressr updates
+expl_with_messages <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = 5,
+  verbose = 2,
+  vaeac.epochs = 5,
+  vaeac.n_vaeacs_initialize = 2
+)
+all.equal(expl_without_messages$shapley_values, expl_with_messages$shapley_values)
+#> [1] TRUE
+```
+
+For more visual information, we can use the `progressr` package. This can help us see the progress of the training
+step for the final `vaeac` model. Note that one can set `verbose = 0` to not get any messages from the `vaeac`
+approach and only get the progress bars.
+
+```r
+progressr::handlers("cli")
+# If no progression handler is specified, the txtprogressbar is used
+# Other progression handlers:
+# progressr::handlers('rstudio') # requires the 'rstudioapi' package
+# progressr::handlers('handler_winprogressbar') # Window only
+# progressr::handlers('cli') # requires the 'cli' package
+#
+# Another progressbar with sound which requires that `beepr` is installed
+# progressr::handlers(list(
+#   progressr::handler_pbcol(
+#       adjust = 1.0,
+#     complete = function(s) cli::bg_red(cli::col_black(s)),
+#   incomplete = function(s) cli::bg_cyan(cli::col_black(s))
+# ),
+#   progressr::handler_beepr(
+#     finish   = "wilhelm",
+#     interval = 2.0
+#   )
+# ))
+progressr::with_progress({
+  expl_with_progressr <- explain(
+    model = model,
+    x_explain = x_explain,
+    x_train = x_train,
+    approach = "vaeac",
+    prediction_zero = prediction_zero,
+    n_samples = n_samples,
+    n_batches = 5,
+    verbose = 2,
+    vaeac.epochs = 5,
+    vaeac.n_vaeacs_initialize = 2
+  )
+})
+all.equal(expl_without_messages$shapley_values, expl_with_progressr$shapley_values)
+#> [1] TRUE
+```
+
+## Continue the training of the vaeac approach {#continue_training}
+
+In the case the user has set a too low number of training epochs and sees that the network is still learning,
+then the user can continue to train the network from where it stopped. Thus, a good workflow can therefore
+be to call the `explain()` function with a `n_samples = 1` (to not waste to much time to generate MC samples),
+then look at the training and evaluation plots of the `vaeac`. If not satisfied, then train more. If satisfied,
+then call the `explain()` function again but this time by using the extra parameter `vaeac.pretrained_vaeac_model`,
+as illustrated above. Note that we have set the number of `vaeac.epochs` to be very low in this example and we
+recommend to use many more epochs.
+
+
+
+```r
+expl_little_training <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = 250,
+  n_batches = n_batches,
+  vaeac.epochs = 3,
+  vaeac.n_vaeacs_initialize = 2,
+  vaeac.extra_parameters = list(vaeac.save_every_nth_epoch = vaeac.save_every_nth_epoch)
+)
+
+# Look at the training and validation errors. Not happy and want to train more.
+vaeac_plot_evaluation_criteria(list("Original" = expl_little_training), plot_type = "method")
+```
+
+![](figure_vaeac/continue-training-1.png)
+
+```r
+
+# Can also see how well vaeac generates data from the full joint distribution. Quite good.
+vaeac_plot_imputed_ggpairs(explanation = expl_little_training, which_vaeac_model = "best", x_true = x_train)
+```
+
+![](figure_vaeac/continue-training-2.png)
+
+```r
+
+# Make a copy of the explanation object and continue to train the vaeac model some more epochs
+expl_train_more <- expl_little_training
+expl_train_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(explanation = expl_train_more, epochs_new = 5, x_train = x_train, verbose = 0)
+
+# Compute the Shapley values again but this time using the extra trained vaeac model
+expl_train_more_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = 250,
+  verbose = 0,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = expl_train_more$internal$parameters$vaeac)
+)
+
+# Look at the training and validation errors and conclude that we want to train some more
+vaeac_plot_evaluation_criteria(list("Original" = expl_little_training, "More epochs" = expl_train_more),
+  plot_type = "method"
+)
+```
+
+![](figure_vaeac/continue-training-3.png)
+
+```r
+
+# Continue to train the vaeac model some more epochs
+expl_train_even_more <- expl_train_more
+expl_train_even_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(
+    explanation = expl_train_even_more,
+    epochs_new = 10,
+    x_train = x_train,
+    verbose = 0
+  )
+
+# Compute the Shapley values again but this time using the even more trained vaeac model
+expl_train_even_more_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = 250,
+  verbose = 0,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = expl_train_even_more$internal$parameters$vaeac)
+)
+
+# Look at the training and validation errors.
+vaeac_plot_evaluation_criteria(list(
+  "Original" = expl_little_training,
+  "More epochs" = expl_train_more,
+  "Even more epochs" = expl_train_even_more
+), plot_type = "method")
+```
+
+![](figure_vaeac/continue-training-4.png)
+
+```r
+
+# Can also see how well vaeac generates data from the full joint distribution
+vaeac_plot_imputed_ggpairs(explanation = expl_train_even_more, which_vaeac_model = "best", x_true = x_train)
+```
+
+![](figure_vaeac/continue-training-5.png)
+
+```r
+
+# Can see that the extra training has decreased the MSEv score.
+plot_MSEv_eval_crit(list(
+  "Few epochs" = expl_little_training,
+  "More epochs" = expl_train_more_vaeac,
+  "Even more epochs" = expl_train_even_more_vaeac
+))
+```
+
+![](figure_vaeac/continue-training-6.png)
+
+```r
+
+# We see that the Shapley values have changed, but they are often comparable.
+plot_SV_several_approaches(list(
+  "Few epochs" = expl_little_training,
+  "More epochs" = expl_train_more_vaeac,
+  "Even more epochs" = expl_train_even_more_vaeac
+))
+```
+
+![](figure_vaeac/continue-training-7.png)
+
+
+## Vaeac with early stopping {#early_stopping}
+If we do not want to specify the number of `epochs`, as we are uncertain how many `epochs` it will take before the
+`vaeac` model is properly trained, a good choice is to rather use early stopping. This means that we can set
+`vaeac.epochs` to a large number and let `vaeac.epochs_early_stopping` be for example `5`. This means that the `vaeac`
+model will stop the training procedure if there has been no improvement in the validation score for `5` epochs.
+
+
+```r
+expl_early_stopping <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = 250,
+  n_batches = 1,
+  verbose = 2,
+  vaeac.epochs = 1000, # Set it to a big number
+  vaeac.n_vaeacs_initialize = 2,
+  vaeac.extra_parameters = list(vaeac.epochs_early_stopping = 2) # Low value here for faster building of the vignette
+)
+
+# Look at the training and validation errors. We are quite happy with it.
+vaeac_plot_evaluation_criteria(list("Vaeac early stopping" = expl_early_stopping), plot_type = "method")
+```
+
+![](figure_vaeac/early-stopping-1-1.png)
+
+However, we can train it further for a fixed amount of epochs if desired. This can be in a setting where we are not
+happy with the IWAE curve or we feel that we set `vaeac.epochs_early_stopping` to a too low value or if the max
+number of epochs (`vaeac.epochs`) were reached.
+
+```r
+# Make a copy of the explanation object which we are to train further.
+expl_early_stopping_train_more <- expl_early_stopping
+
+# Continue to train the vaeac model some more epochs
+expl_early_stopping_train_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(
+    explanation = expl_early_stopping_train_more,
+    epochs_new = 15,
+    x_train = x_train,
+    verbose = 2
+  )
+
+# Can even do it twice if desired
+expl_early_stopping_train_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(
+    explanation = expl_early_stopping_train_more,
+    epochs_new = 10,
+    x_train = x_train,
+    verbose = 2
+  )
+
+# Look at the training and validation errors. We see some improvement
+vaeac_plot_evaluation_criteria(list(
+  "Vaeac early stopping" = expl_early_stopping,
+  "Vaeac early stopping more epochs" = expl_early_stopping_train_more
+), plot_type = "method")
+```
+
+![](figure_vaeac/early-stopping-2-1.png)
+
+We can then use the extra trained version to compute the Shapley value explanations and compare it with the previous
+version that used early stopping. We see a non-significant difference.
+
+```r
+# Use extra trained vaeac model to compute Shapley values again.
+expl_early_stopping_train_more <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = 250,
+  vaeac.extra_parameters = list(
+    vaeac.pretrained_vaeac_model = expl_early_stopping_train_more$internal$parameters$vaeac
+  )
+)
+
+# We can compare their MSEv scores
+plot_MSEv_eval_crit(list(
+  "Vaeac early stopping" = expl_early_stopping,
+  "Vaeac early stopping more epochs" = expl_early_stopping_train_more
+))
+```
+
+![](figure_vaeac/early-stopping-3-1.png)
+
+```r
+
+# We see that the Shapley values have changed, but only slightly
+plot_SV_several_approaches(list(
+  "Vaeac early stopping" = expl_early_stopping,
+  "Vaeac early stopping more epochs" = expl_early_stopping_train_more
+))
+```
+
+![](figure_vaeac/early-stopping-3-2.png)
+
+
+
+
+
+## Grouping of features {#grouping_of_features}
+When we train a `vaeac` model to explain groups of features, then the `vaeac` model will use
+the "Specified_masks_mask_generator" which ensures that the `vaeac` model only train on a specified set of coalitions.
+In this case, it will ensure that all features in group A will always either be conditioned on or be unconditioned.
+The same goes for group B. Note that in this setup, there are only `4` possible coalitions, but `vaeac` only train on
+`2` coalitions as the empty and grand coalitions as they are not needed in the Shapley value computations.
+
+
+```r
+expl_group <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  group = list(A = c("Temp", "Month"), B = c("Wind", "Solar.R")),
+  n_batches = 2,
+  n_samples = n_samples,
+  verbose = 2,
+  vaeac.epochs = 4,
+  vaeac.n_vaeacs_initialize = 2,
+)
+
+# Plot the resulting explanations
+plot(expl_group)
+```
+
+![](figure_vaeac/vaeac-grouping-of-features-1.png)
+
+
+
+## Mixed Data {#mixed_data}
+Here we look at a setup with mixed data, i.e., the data contains both categorical and continuous features. First we set up the data and the model.
+
+```r
+library(ranger)
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+# convert the month variable to a factor
+data[, Month_factor := as.factor(Month)]
+
+x_var_cat <- c("Solar.R", "Wind", "Temp", "Month_factor")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+
+data_train_cat <- data[-ind_x_explain, ]
+x_train_cat <- data_train_cat[, ..x_var_cat]
+x_explain_cat <- data[ind_x_explain, ][, ..x_var_cat]
+
+# Fit a random forest model to the training data
+model <- ranger(as.formula(paste0(y_var, " ~ ", paste0(x_var_cat, collapse = " + "))),
+  data = data_train_cat
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+prediction_zero <- mean(data_train_cat[, get(y_var)])
+```
+Then we compute explanations using the `ctree` and `vaeac` approaches. For the `vaeac` approach, we
+consider two setups: the default architecture, and a simpler one without skip connections. We do this
+to illustrate that the skip connections improve the `vaeac` method.
+
+
+```r
+# Here we use the ctree approach
+expl_ctree <- explain(
+  model = model,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "ctree",
+  prediction_zero = prediction_zero,
+  n_batches = 1,
+  n_samples = 250
+)
+
+# Then we use the vaeac approach
+expl_vaeac_with <- explain(
+  model = model,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = 1,
+  n_samples = 250,
+  vaeac.epochs = 50,
+  vaeac.n_vaeacs_initialize = 4
+)
+
+# Then we use the vaeac approach
+expl_vaeac_without <- explain(
+  model = model,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = 1,
+  n_samples = 250,
+  vaeac.epochs = 50,
+  vaeac.n_vaeacs_initialize = 4,
+  vaeac.extra_parameters = list(vaeac.skip_conn_layer = FALSE, vaeac.skip_conn_masked_enc_dec = FALSE)
+)
+
+# We see that the `vaeac` model without the skip connections perform worse
+vaeac_plot_evaluation_criteria(list(
+  "Vaeac w.o. skip-con." = expl_vaeac_without,
+  "Vaeac w. skip-con." = expl_vaeac_with
+), plot_type = "criterion")
+```
+
+![](figure_vaeac/vaeac-mixed-data-1.png)
+
+```r
+
+# The vaeac model with skip connections have the lowest/best MSE_Frye evaluation criterion score
+plot_MSEv_eval_crit(list(
+  "Vaeac w.o. skip-con." = expl_vaeac_without,
+  "Vaeac w. skip-con." = expl_vaeac_with,
+  "Ctree" = expl_ctree
+))
+```
+
+![](figure_vaeac/vaeac-mixed-data-2.png)
+
+```r
+
+# Can compare the Shapley values. Ctree and vaeac with skip connections produce similar explanations
+plot_SV_several_approaches(list(
+  "Vaeac w.o. skip-con." = expl_vaeac_without,
+  "Vaeac w. skip-con." = expl_vaeac_with,
+  "Ctree" = expl_ctree
+), index_explicands = 1:6)
+```
+
+![](figure_vaeac/vaeac-mixed-data-3.png)
+
+
+
+
+# Future Updates {#future}
+-   Add support for GPU in vaeac. We have not had access to GPU, so have only used CPU.
diff --git a/vignettes/understanding_shapr_vaeac.Rmd.orig b/vignettes/understanding_shapr_vaeac.Rmd.orig
new file mode 100644
index 000000000..83204a539
--- /dev/null
+++ b/vignettes/understanding_shapr_vaeac.Rmd.orig
@@ -0,0 +1,747 @@
+---
+title: "More details and advanced usage of the `vaeac` approach"
+author: "Lars Henry Berge Olsen"
+output: rmarkdown::html_vignette
+bibliography: ../inst/REFERENCES.bib
+vignette: >
+  %\VignetteEncoding{UTF-8}
+  %\VignetteIndexEntry{The `vaeac` approach in `shapr`}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options:
+  markdown:
+    wrap: 72
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.cap = "",
+  fig.width = 9,
+  fig.height = 5,
+  fig.path = "figure_vaeac/", # Ensure that figures are saved in the right folder (this vignette will be built manually)
+  cache.path = "cache_vaeac/", # Ensure that cached objects are saved in the right folder
+  warning = FALSE,
+  message = FALSE
+  )
+```
+
+```{r setup, include=FALSE, warning=FALSE}
+library(shapr)
+```
+
+> [The vaeac method](#vaeac)
+
+> [Code](#Code)
+
+> [Basic Example](#basicexample)
+
+> [Pretrained vaeac](#pretrained_vaeac)
+
+> [Pretrained vaeac (path)](#pretrained_vaeac_path)
+
+> [Subset of coalitions](#n_combinations)
+
+> [Paired sampling](#paired_sampling)
+
+> [Progress bar](#progress_bar)
+
+> [Continue training](#continue_training)
+
+> [Early stopping](#early_stopping)
+
+> [Grouping of features](#grouping_of_features)
+
+> [Mixed data](#mixed_data)
+
+> [Future Updates](#FutureUpdates)
+
+<a id="intro"></a>
+
+In this vignette, we elaborate and illustrate the `vaeac` approach in more depth than in the main vignette.
+In the main vignette, only a few basic examples of using `vaeac` is included, while we here showcase more
+advanced usage. See the overview above for what topics that are covered in this vignette.
+
+
+
+
+# vaeac {#vaeac}
+
+An approach that supports mixed features is the Variational AutoEncoder
+with Arbitrary Conditioning (@olsen2022using), abbreviated to `vaeac`.
+The `vaeac` is an extension of the regular variational autoencoder
+(@kingma2014autoencoding), but instead of giving a probabilistic representation
+of the distribution $p(\boldsymbol{x})$ it gives a probabilistic representation
+of the conditional distribution
+$p(\boldsymbol{x}_{\bar{\mathcal{S}}} \mid \boldsymbol{x}_{\mathcal{S}})$,
+for all possible feature subsets $\mathcal{S}\subseteq\mathcal{M}$ simultaneously,
+where $\mathcal{M}$ is the set of all features. That is, only a single `vaeac`
+model is needed to model all conditional distributions.
+
+The `vaeac` consists of three neural networks: a *full encoder*, a *masked encoder*,
+and a *decoder*. The encoders map the full and masked/conditional input representations,
+i.e., $\boldsymbol{x}$ and $\boldsymbol{x}_{\mathcal{S}}$, respectively,
+to latent probabilistic representations. Sampled instances from this latent probabilistic
+representations are sent to the decoder, which maps them back to the feature space
+and provides a samplable probabilistic representation for the unconditioned features
+$\boldsymbol{x}_{\bar{\mathcal{S}}}$. The full encoder is only used during the
+training phase of the `vaeac` model to guide the training process of the masked encoder,
+as the former relies on the full input sample $\boldsymbol{x}$, which is not accessible
+in the deployment phase (when we generate the Monte Carlo samples), as we only have access
+to $\boldsymbol{x}_{\mathcal{S}}$. The networks are trained by minimizing a variational
+lower bound, and see Section 3 in @olsen2022using for an in-depth introduction to the
+`vaeac` methodology. We use the `vaeac` model at the epoch which obtains the lowest
+validation IWAE score to generate the Monte Carlo samples used in the Shapley value computations.
+
+We fit the `vaeac` model using the *torch* package in $\textsf{R}$ (@torch). The main
+parameters are the the number of layers in the networks (`vaeac.depth`), the width of the layers
+(`vaeac.width`), the number of dimensions in the latent space (`vaeac.latent_dim`),
+the activation function between the layers in the networks (`vaeac.activation_function`),
+the learning rate in the ADAM optimizer (`vaeac.lr`), the number of `vaeac` models to initiate
+to remedy poorly initiated model parameter values (`vaeac.n_vaeacs_initialize`), and
+the number of learning epochs (`vaeac.epochs`). Call `?shapr::setup_approach.vaeac` for
+a more detailed description of the parameters.
+
+There are additional extra parameters which can be set by including a named list in the call to
+the `explain()` function. For example, we can the change the batch size to 32 by including
+`vaeac.extra_parameters = list(vaeac.batch_size = 32)` as a parameter in the call the `explain()` function. See `?shapr::vaeac_get_extra_para_default` for a description of the possible
+extra parameters to the `vaeac` approach. We strongly encourage the user to specify the main and extra parameters to the `vaeac` approach at the correct place in the call to the `explain()` function. That is, the main parameters are directly entered to the `explain()` function, while the extra parameters are included in a named list called `vaeac.extra_parameters`. However, the `vaeac` approach will try to correct for misplaced and duplicated parameters and give warnings to the user.
+
+# Code Examples {#code}
+We now demonstrate the `vaeac` approach on several different use cases.
+
+
+## Basic Example {#basicexample}
+Here we go through how to use the `vaeac` approach on the same data as in the main vignette
+
+First we set up the model we want to explain.
+
+```{r setup-2, cache = TRUE}
+library(xgboost)
+library(data.table)
+
+data("airquality")
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+x_var <- c("Solar.R", "Wind", "Temp", "Month")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+x_train <- data[-ind_x_explain, ..x_var]
+y_train <- data[-ind_x_explain, get(y_var)]
+x_explain <- data[ind_x_explain, ..x_var]
+
+# Fitting a basic xgboost model to the training data
+model <- xgboost(
+  data = as.matrix(x_train),
+  label = y_train,
+  nround = 100,
+  verbose = FALSE
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+prediction_zero <- mean(y_train)
+```
+
+
+## First vaeac example
+
+We are now going to explain predictions made by the model using the `vaeac` approach.
+
+```{r first-vaeac, cache = TRUE}
+n_samples <- 25 # Low number of MC samples to make the vignette build faster
+n_batches <- 1 # Do all coalitions in one batch
+vaeac.save_every_nth_epoch <- 3 # Save the vaeac model every 3th epoch + best and last epoch
+vaeac.n_vaeacs_initialize <- 2 # Number of vaeacs to initialize to counteract bad initialization values (extra para)
+vaeac.epochs <- 4 # The number of epochs
+
+explanation <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = n_batches,
+  vaeac.epochs = vaeac.epochs,
+  vaeac.n_vaeacs_initialize = vaeac.n_vaeacs_initialize,
+  vaeac.extra_parameters = list(vaeac.save_every_nth_epoch = vaeac.save_every_nth_epoch)
+)
+```
+
+We can look at the Shapley values.
+
+```{r first-vaeac-plots, cache = TRUE}
+# Printing and ploting the Shapley values. see ?shapr::explain for interpretation of the values.
+print(explanation$shapley_values)
+plot(explanation)
+```
+
+
+## Pre-trained vaeac {#pretrained_vaeac}
+If the user has a pre-trained `vaeac` model (from a previous run), the user can send that to the `explain()` function
+and `shapr` will skip the training of a new `vaeac` model and rather use the provided `vaeac` model. The `vaeac` model
+is accessible via `explanation$internal$parameters$vaeac`. Note that if we set `verbose = 2` in `explain()`, then `shapr` will give a message that it loads a pretrained `vaeac` model instead of training it from scratch.
+
+In this example, we extract the trained `vaeac` model from the previous example and send it to `explain()`.
+
+```{r pretrained-vaeac-model, cache = TRUE}
+# Send the pre-trained vaeac model
+expl_pretrained_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = n_samples,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = explanation$internal$parameters$vaeac)
+)
+
+# Check that this version provides the same Shapley values
+all.equal(explanation$shapley_values, expl_pretrained_vaeac$shapley_values)
+```
+
+## Pre-trained vaeac (path) {#pretrained_vaeac_path}
+We can also just provide a path to the stored `vaeac` model. This is beneficial if we have only stored the `vaeac`
+model on the computer but not the whole `explanation` object. The possible save paths are stored in `explanation$internal$parameters$vaeac$model`. Note that if we set `verbose = 2` in `explain()`, then `shapr` will give a message that it loads a pretrained `vaeac` model instead of training it from scratch.
+
+```{r pretrained-vaeac-path, cache = TRUE}
+# Call `explanation$internal$parameters$vaeac$model` to see possible vaeac models. We use `best` below.
+# send the pre-trained vaeac path
+expl_pretrained_vaeac_path <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = n_samples,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = explanation$internal$parameters$vaeac$models$best)
+)
+
+# Check that this version provides the same Shapley values
+all.equal(explanation$shapley_values, expl_pretrained_vaeac_path$shapley_values)
+```
+
+
+
+## Specified n_combinations and more batches {#n_combinations}
+
+The user can limit the Shapley value computations to only a subset of coalitions by setting the
+`n_combinations` parameter to a value lower than $2^{n_\text{features}}$. To lower the memory
+usage, the user can split the coalitions into several batches by setting `n_batches` to a desired
+number. In this example, we set `n_batches = 5` and `n_combinations = 10` which is less than
+the maximum of `16`.
+
+Note that we do not need to train a new `vaeac` model as we can use the one above trained on
+all `16` coalitions as we are now only using a subset of them. This is not applicable the other
+way around.
+
+```{r check-n_combinations-and-more-batches, cache = TRUE}
+# send the pre-trained vaeac path
+expl_batches_combinations <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_combinations = 10,
+  n_batches = 5,
+  n_samples = n_samples,
+  verbose = 2,
+  vaeac.extra_parameters = list(
+    vaeac.pretrained_vaeac_model = explanation$internal$parameters$vaeac
+  )
+)
+
+# Gives different Shapley values as the latter one are only based on a subset of coalitions
+plot_SV_several_approaches(list("Original" = explanation, "Other combi." = expl_batches_combinations))
+
+# Here we can see that the samples coalitions are in different batches and have different weights
+expl_batches_combinations$internal$objects$X
+
+# Can compare that to the situation where we have exact computations (i.e., include all coalitions)
+explanation$internal$objects$X
+```
+
+
+
+Note that if we train a `vaeac` model from scratch with the setup above, then the `vaeac` model will use
+the "Specified_masks_mask_generator" which ensures that the `vaeac` model only train on a specified set of coalitions.
+In this case, it will be the `n_combinations - 2` sampled coalitions. The minus two is because the `vaeac` model will
+not train on the empty and grand coalitions as they are not needed in the Shapley value computations.
+```{r check-n_combinations-and-more-batches-2, cache = TRUE}
+expl_batches_combinations_2 <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_combinations = 10,
+  n_batches = 1,
+  n_samples = n_samples,
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.epochs = 3,
+  verbose = 2
+)
+```
+
+
+
+## Paired sampling {#paired_sampling}
+
+The `vaeac` approach can use paired sampling to improve the stability of the training procedure.
+When using paired sampling, each observation in the training batches will be duplicated, but the first version will
+be masked by $S$ and the second one is masked by the complement $\bar{S}$. The mask are taken from the
+`explanation$internal$objects$S` matrix. Note that `vaeac` does not check if the complement is also in said matrix.
+Furthermore, the masks are randomly selected for each observation in the batch. The training time when using paired
+sampling is higher in comparison to random sampling due to more complex implementation.
+
+```{r paired-sampling-training, cache = TRUE}
+expl_paired_sampling_TRUE <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = n_batches,
+  vaeac.epochs = 10,
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.extra_parameters = list(vaeac.paired_sampling = TRUE)
+)
+
+expl_paired_sampling_FALSE <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = n_batches,
+  vaeac.epochs = 10,
+  vaeac.n_vaeacs_initialize = 1,
+  vaeac.extra_parameters = list(vaeac.paired_sampling = FALSE)
+)
+```
+
+We can compare the results by looking at the training and validation errors and by the $MSE_v$ evaluation criterion.
+```{r paired-sampling-plotting, cache = TRUE}
+explanation_list <- list("Regular samp." = expl_paired_sampling_FALSE, "Paired samp." = expl_paired_sampling_TRUE)
+vaeac_plot_evaluation_criteria(explanation_list, plot_type = "criterion")
+plot_MSEv_eval_crit(explanation_list)
+```
+
+By looking at the time, we see that the paired version takes (a bit) longer time in the `setup_computation`
+phase, that is, in the training phase.
+```{r paired-sampling-timing}
+rbind(
+  "Paired" = expl_paired_sampling_TRUE$timing$timing_secs, "Regular" = expl_paired_sampling_FALSE$timing$timing_secs
+)
+```
+
+
+
+## Progressr {#progress_bar}
+
+The `shapr` package provides progress updates of the computation of the Shapley
+values through the R-package `progressr`. If the user also sets `verbose = 2` in `explain()`, then we will get
+extra information related to the `vaeac` approach. The `verbose` parameter works independently of the `progressr`
+package.
+
+In this setup, we get no progress updates.
+```{r progressr-false-verbose-0, cache = TRUE}
+library(progressr)
+handlers("void") # To silence all progressr updates
+expl_without_messages <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = 5,
+  verbose = 0,
+  vaeac.epochs = 5,
+  vaeac.n_vaeacs_initialize = 2
+)
+```
+
+By setting `verbose = 2`, we will get messages about the progress.
+```{r progressr-false-verbose-2, cache = TRUE}
+handlers("void") # To silence all progressr updates
+expl_with_messages <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = n_samples,
+  n_batches = 5,
+  verbose = 2,
+  vaeac.epochs = 5,
+  vaeac.n_vaeacs_initialize = 2
+)
+all.equal(expl_without_messages$shapley_values, expl_with_messages$shapley_values)
+```
+
+For more visual information, we can use the `progressr` package. This can help us see the progress of the training
+step for the final `vaeac` model. Note that one can set `verbose = 0` to not get any messages from the `vaeac`
+approach and only get the progress bars.
+```{r progressr-true-verbose-2, cache = TRUE}
+progressr::handlers("cli")
+# If no progression handler is specified, the txtprogressbar is used
+# Other progression handlers:
+# progressr::handlers('rstudio') # requires the 'rstudioapi' package
+# progressr::handlers('handler_winprogressbar') # Window only
+# progressr::handlers('cli') # requires the 'cli' package
+#
+# Another progressbar with sound which requires that `beepr` is installed
+# progressr::handlers(list(
+#   progressr::handler_pbcol(
+#       adjust = 1.0,
+#     complete = function(s) cli::bg_red(cli::col_black(s)),
+#   incomplete = function(s) cli::bg_cyan(cli::col_black(s))
+# ),
+#   progressr::handler_beepr(
+#     finish   = "wilhelm",
+#     interval = 2.0
+#   )
+# ))
+progressr::with_progress({
+  expl_with_progressr <- explain(
+    model = model,
+    x_explain = x_explain,
+    x_train = x_train,
+    approach = "vaeac",
+    prediction_zero = prediction_zero,
+    n_samples = n_samples,
+    n_batches = 5,
+    verbose = 2,
+    vaeac.epochs = 5,
+    vaeac.n_vaeacs_initialize = 2
+  )
+})
+all.equal(expl_without_messages$shapley_values, expl_with_progressr$shapley_values)
+```
+
+## Continue the training of the vaeac approach {#continue_training}
+
+In the case the user has set a too low number of training epochs and sees that the network is still learning,
+then the user can continue to train the network from where it stopped. Thus, a good workflow can therefore
+be to call the `explain()` function with a `n_samples = 1` (to not waste to much time to generate MC samples),
+then look at the training and evaluation plots of the `vaeac`. If not satisfied, then train more. If satisfied,
+then call the `explain()` function again but this time by using the extra parameter `vaeac.pretrained_vaeac_model`,
+as illustrated above. Note that we have set the number of `vaeac.epochs` to be very low in this example and we
+recommend to use many more epochs.
+
+
+```{r continue-training, cache = TRUE}
+expl_little_training <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = 250,
+  n_batches = n_batches,
+  vaeac.epochs = 3,
+  vaeac.n_vaeacs_initialize = 2,
+  vaeac.extra_parameters = list(vaeac.save_every_nth_epoch = vaeac.save_every_nth_epoch)
+)
+
+# Look at the training and validation errors. Not happy and want to train more.
+vaeac_plot_evaluation_criteria(list("Original" = expl_little_training), plot_type = "method")
+
+# Can also see how well vaeac generates data from the full joint distribution. Quite good.
+vaeac_plot_imputed_ggpairs(explanation = expl_little_training, which_vaeac_model = "best", x_true = x_train)
+
+# Make a copy of the explanation object and continue to train the vaeac model some more epochs
+expl_train_more <- expl_little_training
+expl_train_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(explanation = expl_train_more, epochs_new = 5, x_train = x_train, verbose = 0)
+
+# Compute the Shapley values again but this time using the extra trained vaeac model
+expl_train_more_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = 250,
+  verbose = 0,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = expl_train_more$internal$parameters$vaeac)
+)
+
+# Look at the training and validation errors and conclude that we want to train some more
+vaeac_plot_evaluation_criteria(list("Original" = expl_little_training, "More epochs" = expl_train_more),
+  plot_type = "method"
+)
+
+# Continue to train the vaeac model some more epochs
+expl_train_even_more <- expl_train_more
+expl_train_even_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(
+    explanation = expl_train_even_more,
+    epochs_new = 10,
+    x_train = x_train,
+    verbose = 0
+  )
+
+# Compute the Shapley values again but this time using the even more trained vaeac model
+expl_train_even_more_vaeac <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = 250,
+  verbose = 0,
+  vaeac.extra_parameters = list(vaeac.pretrained_vaeac_model = expl_train_even_more$internal$parameters$vaeac)
+)
+
+# Look at the training and validation errors.
+vaeac_plot_evaluation_criteria(list(
+  "Original" = expl_little_training,
+  "More epochs" = expl_train_more,
+  "Even more epochs" = expl_train_even_more
+), plot_type = "method")
+
+# Can also see how well vaeac generates data from the full joint distribution
+vaeac_plot_imputed_ggpairs(explanation = expl_train_even_more, which_vaeac_model = "best", x_true = x_train)
+
+# Can see that the extra training has decreased the MSEv score.
+plot_MSEv_eval_crit(list(
+  "Few epochs" = expl_little_training,
+  "More epochs" = expl_train_more_vaeac,
+  "Even more epochs" = expl_train_even_more_vaeac
+))
+
+# We see that the Shapley values have changed, but they are often comparable.
+plot_SV_several_approaches(list(
+  "Few epochs" = expl_little_training,
+  "More epochs" = expl_train_more_vaeac,
+  "Even more epochs" = expl_train_even_more_vaeac
+))
+```
+
+
+## Vaeac with early stopping {#early_stopping}
+If we do not want to specify the number of `epochs`, as we are uncertain how many `epochs` it will take before the
+`vaeac` model is properly trained, a good choice is to rather use early stopping. This means that we can set
+`vaeac.epochs` to a large number and let `vaeac.epochs_early_stopping` be for example `5`. This means that the `vaeac`
+model will stop the training procedure if there has been no improvement in the validation score for `5` epochs.
+
+```{r early-stopping-1, cache = TRUE}
+expl_early_stopping <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_samples = 250,
+  n_batches = 1,
+  verbose = 2,
+  vaeac.epochs = 1000, # Set it to a big number
+  vaeac.n_vaeacs_initialize = 2,
+  vaeac.extra_parameters = list(vaeac.epochs_early_stopping = 2) # Low value here for faster building of the vignette
+)
+
+# Look at the training and validation errors. We are quite happy with it.
+vaeac_plot_evaluation_criteria(list("Vaeac early stopping" = expl_early_stopping), plot_type = "method")
+```
+
+However, we can train it further for a fixed amount of epochs if desired. This can be in a setting where we are not
+happy with the IWAE curve or we feel that we set `vaeac.epochs_early_stopping` to a too low value or if the max
+number of epochs (`vaeac.epochs`) were reached.
+```{r early-stopping-2, cache = TRUE}
+# Make a copy of the explanation object which we are to train further.
+expl_early_stopping_train_more <- expl_early_stopping
+
+# Continue to train the vaeac model some more epochs
+expl_early_stopping_train_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(
+    explanation = expl_early_stopping_train_more,
+    epochs_new = 15,
+    x_train = x_train,
+    verbose = 2
+  )
+
+# Can even do it twice if desired
+expl_early_stopping_train_more$internal$parameters$vaeac <-
+  vaeac_continue_train_model(
+    explanation = expl_early_stopping_train_more,
+    epochs_new = 10,
+    x_train = x_train,
+    verbose = 2
+  )
+
+# Look at the training and validation errors. We see some improvement
+vaeac_plot_evaluation_criteria(list(
+  "Vaeac early stopping" = expl_early_stopping,
+  "Vaeac early stopping more epochs" = expl_early_stopping_train_more
+), plot_type = "method")
+```
+
+We can then use the extra trained version to compute the Shapley value explanations and compare it with the previous
+version that used early stopping. We see a non-significant difference.
+```{r early-stopping-3, cache = TRUE}
+# Use extra trained vaeac model to compute Shapley values again.
+expl_early_stopping_train_more <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = n_batches,
+  n_samples = 250,
+  vaeac.extra_parameters = list(
+    vaeac.pretrained_vaeac_model = expl_early_stopping_train_more$internal$parameters$vaeac
+  )
+)
+
+# We can compare their MSEv scores
+plot_MSEv_eval_crit(list(
+  "Vaeac early stopping" = expl_early_stopping,
+  "Vaeac early stopping more epochs" = expl_early_stopping_train_more
+))
+
+# We see that the Shapley values have changed, but only slightly
+plot_SV_several_approaches(list(
+  "Vaeac early stopping" = expl_early_stopping,
+  "Vaeac early stopping more epochs" = expl_early_stopping_train_more
+))
+```
+
+
+
+
+
+## Grouping of features {#grouping_of_features}
+When we train a `vaeac` model to explain groups of features, then the `vaeac` model will use
+the "Specified_masks_mask_generator" which ensures that the `vaeac` model only train on a specified set of coalitions.
+In this case, it will ensure that all features in group A will always either be conditioned on or be unconditioned.
+The same goes for group B. Note that in this setup, there are only `4` possible coalitions, but `vaeac` only train on
+`2` coalitions as the empty and grand coalitions as they are not needed in the Shapley value computations.
+
+```{r vaeac-grouping-of-features, cache = TRUE}
+expl_group <- explain(
+  model = model,
+  x_explain = x_explain,
+  x_train = x_train,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  group = list(A = c("Temp", "Month"), B = c("Wind", "Solar.R")),
+  n_batches = 2,
+  n_samples = n_samples,
+  verbose = 2,
+  vaeac.epochs = 4,
+  vaeac.n_vaeacs_initialize = 2,
+)
+
+# Plot the resulting explanations
+plot(expl_group)
+```
+
+
+
+## Mixed Data {#mixed_data}
+Here we look at a setup with mixed data, i.e., the data contains both categorical and continuous features. First we set up the data and the model.
+```{r ctree-mixed-data, cache = TRUE}
+library(ranger)
+data <- data.table::as.data.table(airquality)
+data <- data[complete.cases(data), ]
+
+# convert the month variable to a factor
+data[, Month_factor := as.factor(Month)]
+
+x_var_cat <- c("Solar.R", "Wind", "Temp", "Month_factor")
+y_var <- "Ozone"
+
+ind_x_explain <- 1:6
+
+data_train_cat <- data[-ind_x_explain, ]
+x_train_cat <- data_train_cat[, ..x_var_cat]
+x_explain_cat <- data[ind_x_explain, ][, ..x_var_cat]
+
+# Fit a random forest model to the training data
+model <- ranger(as.formula(paste0(y_var, " ~ ", paste0(x_var_cat, collapse = " + "))),
+  data = data_train_cat
+)
+
+# Specifying the phi_0, i.e. the expected prediction without any features
+prediction_zero <- mean(data_train_cat[, get(y_var)])
+```
+Then we compute explanations using the `ctree` and `vaeac` approaches. For the `vaeac` approach, we
+consider two setups: the default architecture, and a simpler one without skip connections. We do this
+to illustrate that the skip connections improve the `vaeac` method.
+
+```{r vaeac-mixed-data, cache = TRUE}
+# Here we use the ctree approach
+expl_ctree <- explain(
+  model = model,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "ctree",
+  prediction_zero = prediction_zero,
+  n_batches = 1,
+  n_samples = 250
+)
+
+# Then we use the vaeac approach
+expl_vaeac_with <- explain(
+  model = model,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = 1,
+  n_samples = 250,
+  vaeac.epochs = 50,
+  vaeac.n_vaeacs_initialize = 4
+)
+
+# Then we use the vaeac approach
+expl_vaeac_without <- explain(
+  model = model,
+  x_explain = x_explain_cat,
+  x_train = x_train_cat,
+  approach = "vaeac",
+  prediction_zero = prediction_zero,
+  n_batches = 1,
+  n_samples = 250,
+  vaeac.epochs = 50,
+  vaeac.n_vaeacs_initialize = 4,
+  vaeac.extra_parameters = list(vaeac.skip_conn_layer = FALSE, vaeac.skip_conn_masked_enc_dec = FALSE)
+)
+
+# We see that the `vaeac` model without the skip connections perform worse
+vaeac_plot_evaluation_criteria(list(
+  "Vaeac w.o. skip-con." = expl_vaeac_without,
+  "Vaeac w. skip-con." = expl_vaeac_with
+), plot_type = "criterion")
+
+# The vaeac model with skip connections have the lowest/best MSE_Frye evaluation criterion score
+plot_MSEv_eval_crit(list(
+  "Vaeac w.o. skip-con." = expl_vaeac_without,
+  "Vaeac w. skip-con." = expl_vaeac_with,
+  "Ctree" = expl_ctree
+))
+
+# Can compare the Shapley values. Ctree and vaeac with skip connections produce similar explanations
+plot_SV_several_approaches(list(
+  "Vaeac w.o. skip-con." = expl_vaeac_without,
+  "Vaeac w. skip-con." = expl_vaeac_with,
+  "Ctree" = expl_ctree
+), index_explicands = 1:6)
+```
+
+
+
+
+# Future Updates {#future}
+-   Add support for GPU in vaeac. We have not had access to GPU, so have only used CPU.