Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the cex argument in ggbeeswarm::geom_beeswarm() #406

Merged
merged 8 commits into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
26 changes: 16 additions & 10 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
if (requireNamespace("testthat", quietly = TRUE)) {
testthat::set_max_fails(Inf)
}

#' Helper function for package development
#'
#' This is a manual extension of [testthat::snapshot_review()] which works for the \code{.rds} files used in
Expand All @@ -7,17 +11,19 @@
#' @param ... Additional arguments passed to [waldo::compare()]
#' Gives the relative path to the test files to review
#'
snapshot_review_man <- function(path, tolerance = NULL, ...) {
changed <- testthat:::snapshot_meta(path)
these_rds <- (tools::file_ext(changed$name) == "rds")
if (any(these_rds)) {
for (i in which(these_rds)) {
old <- readRDS(changed[i, "cur"])
new <- readRDS(changed[i, "new"])
snapshot_review_man <- function(path, tolerance = 10^(-5), max_diffs = 200, ...) {
if (requireNamespace("testthat", quietly = TRUE) && requireNamespace("waldo", quietly = TRUE)) {
changed <- testthat:::snapshot_meta(path)
these_rds <- (tools::file_ext(changed$name) == "rds")
if (any(these_rds)) {
for (i in which(these_rds)) {
old <- readRDS(changed[i, "cur"])
new <- readRDS(changed[i, "new"])

cat(paste0("Difference for check ", changed[i, "name"], " in test ", changed[i, "test"], "\n"))
print(waldo::compare(old, new, max_diffs = 50, tolerance = tolerance, ...))
browser()
cat(paste0("Difference for check ", changed[i, "name"], " in test ", changed[i, "test"], "\n"))
print(waldo::compare(old, new, max_diffs = max_diffs, tolerance = tolerance, ...))
browser()
}
}
}
}
4 changes: 2 additions & 2 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

on:
push:
branches: [main, master, cranversion, devel]
branches: [main, master, cranversion, devel, 'shapr-1.0.0']
pull_request:
branches: [main, master, cranversion, devel]
branches: [main, master, cranversion, devel, 'shapr-1.0.0']

name: R-CMD-check

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint-changed-files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
pull_request:
branches: [main, master]
branches: [main, master, cranversion, devel, 'shapr-1.0.0']

name: lint-changed-files

Expand Down
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Encoding: UTF-8
LazyData: true
ByteCompile: true
Language: en-US
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Depends: R (>= 3.5.0)
Imports:
stats,
Expand Down Expand Up @@ -66,7 +66,8 @@ Suggests:
yardstick,
hardhat,
rsample,
rlang
rlang,
cli
LinkingTo:
RcppArmadillo,
Rcpp
Expand Down
23 changes: 21 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,22 @@ S3method(setup_approach,regression_separate)
S3method(setup_approach,regression_surrogate)
S3method(setup_approach,timeseries)
S3method(setup_approach,vaeac)
export(additional_regression_setup)
export(aicc_full_single_cpp)
export(check_convergence)
export(coalition_matrix_cpp)
export(compute_estimates)
export(compute_shapley_new)
export(compute_time)
export(compute_vS)
export(compute_vS_forecast)
export(correction_matrix_cpp)
export(create_coalition_table)
export(explain)
export(explain_forecast)
export(feature_combinations)
export(feature_matrix_cpp)
export(finalize_explanation)
export(finalize_explanation_forecast)
export(get_adaptive_arguments_default)
export(get_cov_mat)
export(get_data_specs)
export(get_model_specs)
Expand All @@ -73,19 +80,28 @@ export(plot_MSEv_eval_crit)
export(plot_SV_several_approaches)
export(predict_model)
export(prepare_data)
export(prepare_data_causal)
export(prepare_data_copula_cpp)
export(prepare_data_copula_cpp_caus)
export(prepare_data_gaussian_cpp)
export(prepare_data_gaussian_cpp_caus)
export(prepare_next_iteration)
export(print_iter)
export(regression.train_model)
export(rss_cpp)
export(save_results)
export(setup)
export(setup_approach)
export(setup_computation)
export(shapley_setup)
export(testing_cleanup)
export(vaeac_get_evaluation_criteria)
export(vaeac_get_extra_para_default)
export(vaeac_plot_eval_crit)
export(vaeac_plot_imputed_ggpairs)
export(vaeac_train_model)
export(vaeac_train_model_continue)
export(weight_matrix)
export(weight_matrix_cpp)
importFrom(Rcpp,sourceCpp)
importFrom(data.table,":=")
Expand All @@ -110,6 +126,7 @@ importFrom(stats,as.formula)
importFrom(stats,contrasts)
importFrom(stats,embed)
importFrom(stats,formula)
importFrom(stats,median)
importFrom(stats,model.frame)
importFrom(stats,model.matrix)
importFrom(stats,predict)
Expand All @@ -118,8 +135,10 @@ importFrom(stats,qt)
importFrom(stats,rnorm)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(utils,capture.output)
importFrom(utils,head)
importFrom(utils,methods)
importFrom(utils,modifyList)
importFrom(utils,relist)
importFrom(utils,tail)
useDynLib(shapr, .registration = TRUE)
8 changes: 6 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# shapr (development version)
# shapr 1.0.0

* Release a Python wrapper (`shaprpyr`, [#325](https://github.com/NorskRegnesentral/shapr/pull/325)) for explaining predictions from Python models (from Python) utilizing almost all functionality of `shapr`. The wrapper moves back and forth back and forth between Python and R, doing the prediction in Python, and almost everything else in R. This simplifies maintenance of `shaprpy` significantly. The wrapper is available [here](https://github.com/NorskRegnesentral/shapr/tree/master/python).
* (Just some notes so far)
* Adaptive estimatio/convergence detection
* Verbosity
* Complete restructuring motivated by introducing the Python wrapper. The restructuring splits the explanation tasks into smaller pieces, which was necessary to allow the Python wrapper to move back and forth between R and Python.
* As part of the restructuring, we also did a number of design changes, resulting in a series of breaking changes described below.

Expand All @@ -13,6 +15,8 @@

### New features

* Adatpive sampling of Shapley value subsets
* Release a Python wrapper (`shaprpyr`, [#325](https://github.com/NorskRegnesentral/shapr/pull/325)) for explaining predictions from Python models (from Python) utilizing almost all functionality of `shapr`. The wrapper moves back and forth back and forth between Python and R, doing the prediction in Python, and almost everything else in R. This simplifies maintenance of `shaprpy` significantly. The wrapper is available [here](https://github.com/NorskRegnesentral/shapr/tree/master/python).
* Introduce batch computation of conditional expectations ([#244](https://github.com/NorskRegnesentral/shapr/issues/244)).
This essentially compute $v(S)$ for a portion of the $S$-subsets at a time, to reduce the amount of data needed to be held in memory.
The user can control the number of batches herself, but we set a reasonable value by default ([#327](https://github.com/NorskRegnesentral/shapr/pull/327)).
Expand Down
94 changes: 74 additions & 20 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,15 @@ inv_gaussian_transform_cpp <- function(z, x) {

#' Generate (Gaussian) Copula MC samples
#'
#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the
#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_MC_samples`, `n_features`) containing samples from the
#' univariate standard normal.
#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations
#' to explain on the original scale.
#' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the
#' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been
#' transformed to a standardized normal distribution.
#' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations.
#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of
#' @param S arma::mat. Matrix of dimension (`n_coalitions`, `n_features`) containing binary representations of
#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones.
#' This is not a problem internally in shapr as the empty and grand coalitions treated differently.
#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed
Expand All @@ -127,8 +127,8 @@ inv_gaussian_transform_cpp <- function(z, x) {
#' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been
#' transformed to a standardized normal distribution.
#'
#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where
#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian
#' @return An arma::cube/3D array of dimension (`n_MC_samples`, `n_explain` * `n_coalitions`, `n_features`), where
#' the columns (_,j,_) are matrices of dimension (`n_MC_samples`, `n_features`) containing the conditional Gaussian
#' copula MC samples for each explicand and coalition on the original scale.
#'
#' @export
Expand All @@ -138,21 +138,51 @@ prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gau
.Call(`_shapr_prepare_data_copula_cpp`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat)
}

#' Generate (Gaussian) Copula MC samples for the causal setup with a single MC sample for each explicand
#'
#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing samples from the
#' univariate standard normal. The i'th row will be applied to the i'th row in `x_explain_mat`.
#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations to
#' explain on the original scale. The MC sample for the i'th explicand is based on the i'th row in `MC_samples_mat`.
#' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the
#' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been
#' transformed to a standardized normal distribution.
#' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations.
#' @param S arma::mat. Matrix of dimension (`n_coalitions`, `n_features`) containing binary representations of
#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones.
#' This is not a problem internally in shapr as the empty and grand coalitions treated differently.
#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed
#' using the Gaussian transform, i.e., the samples have been transformed to a standardized normal distribution.
#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance
#' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been
#' transformed to a standardized normal distribution.
#'
#' @return An arma::mat/2D array of dimension (`n_explain` * `n_coalitions`, `n_features`),
#' where the rows (n_explain * S_ind, n_explain * (S_ind + 1) - 1) contains the single
#' conditional Gaussian MC samples for each explicand and `S_ind` coalition.
#'
#' @export
#' @keywords internal
#' @author Lars Henry Berge Olsen
prepare_data_copula_cpp_caus <- function(MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat) {
.Call(`_shapr_prepare_data_copula_cpp_caus`, MC_samples_mat, x_explain_mat, x_explain_gaussian_mat, x_train_mat, S, mu, cov_mat)
}

#' Generate Gaussian MC samples
#'
#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the
#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_MC_samples`, `n_features`) containing samples from the
#' univariate standard normal.
#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations
#' to explain.
#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of
#' @param S arma::mat. Matrix of dimension (`n_coalitions`, `n_features`) containing binary representations of
#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones.
#' This is not a problem internally in shapr as the empty and grand coalitions treated differently.
#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature.
#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance
#' between all pairs of features.
#'
#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where
#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian
#' @return An arma::cube/3D array of dimension (`n_MC_samples`, `n_explain` * `n_coalitions`, `n_features`), where
#' the columns (_,j,_) are matrices of dimension (`n_MC_samples`, `n_features`) containing the conditional Gaussian
#' MC samples for each explicand and coalition.
#'
#' @export
Expand All @@ -162,6 +192,30 @@ prepare_data_gaussian_cpp <- function(MC_samples_mat, x_explain_mat, S, mu, cov_
.Call(`_shapr_prepare_data_gaussian_cpp`, MC_samples_mat, x_explain_mat, S, mu, cov_mat)
}

#' Generate Gaussian MC samples for the causal setup with a single MC sample for each explicand
#'
#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing samples from the
#' univariate standard normal. The i'th row will be applied to the i'th row in `x_explain_mat`.
#' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations
#' to explain. The MC sample for the i'th explicand is based on the i'th row in `MC_samples_mat`
#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of
#' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones.
#' This is not a problem internally in shapr as the empty and grand coalitions treated differently.
#' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature.
#' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance
#' between all pairs of features.
#'
#' @return An arma::mat/2D array of dimension (`n_explain` * `n_coalitions`, `n_features`),
#' where the rows (n_explain * S_ind, n_explain * (S_ind + 1) - 1) contains the single
#' conditional Gaussian MC samples for each explicand and `S_ind` coalition.
#'
#' @export
#' @keywords internal
#' @author Lars Henry Berge Olsen
prepare_data_gaussian_cpp_caus <- function(MC_samples_mat, x_explain_mat, S, mu, cov_mat) {
.Call(`_shapr_prepare_data_gaussian_cpp_caus`, MC_samples_mat, x_explain_mat, S, mu, cov_mat)
}

#' (Generalized) Mahalanobis distance
#'
#' Used to get the Euclidean distance as well by setting \code{mcov} = \code{diag(m)}.
Expand Down Expand Up @@ -199,7 +253,7 @@ sample_features_cpp <- function(m, n_features) {
#'
#' @param xtest Numeric matrix. Represents a single test observation.
#'
#' @param S Integer matrix of dimension \code{n_combinations x m}, where \code{n_combinations} equals
#' @param S Integer matrix of dimension \code{n_coalitions x m}, where \code{n_coalitions} equals
#' the total number of sampled/non-sampled feature combinations and \code{m} equals
#' the total number of unique features. Note that \code{m = ncol(xtrain)}. See details
#' for more information.
Expand Down Expand Up @@ -228,34 +282,34 @@ observation_impute_cpp <- function(index_xtrain, index_s, xtrain, xtest, S) {

#' Calculate weight matrix
#'
#' @param subsets List. Each of the elements equals an integer
#' @param coalitions List. Each of the elements equals an integer
#' vector representing a valid combination of features/feature groups.
#' @param m Integer. Number of features/feature groups
#' @param n Integer. Number of combinations
#' @param w Numeric vector of length \code{n}, i.e. \code{w[i]} equals
#' the Shapley weight of feature/feature group combination \code{i}, represented by
#' \code{subsets[[i]]}.
#' \code{coalitions[[i]]}.
#'
#' @export
#' @keywords internal
#'
#' @return Matrix of dimension n x m + 1
#' @author Nikolai Sellereite
weight_matrix_cpp <- function(subsets, m, n, w) {
.Call(`_shapr_weight_matrix_cpp`, subsets, m, n, w)
#' @author Nikolai Sellereite, Martin Jullum
weight_matrix_cpp <- function(coalitions, m, n, w) {
.Call(`_shapr_weight_matrix_cpp`, coalitions, m, n, w)
}

#' Get feature matrix
#' Get coalition matrix
#'
#' @param features List
#' @param m Positive integer. Total number of features
#' @param coalitions List
#' @param m Positive integer. Total number of coalitions
#'
#' @export
#' @keywords internal
#'
#' @return Matrix
#' @author Nikolai Sellereite
feature_matrix_cpp <- function(features, m) {
.Call(`_shapr_feature_matrix_cpp`, features, m)
#' @author Nikolai Sellereite, Martin Jullum
coalition_matrix_cpp <- function(coalitions, m) {
.Call(`_shapr_coalition_matrix_cpp`, coalitions, m)
}

Loading
Loading