diff --git a/DESCRIPTION b/DESCRIPTION index d75e841f0..d12ee9094 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: parameters Title: Processing of Model Parameters -Version: 0.21.2.10 +Version: 0.21.2.11 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/ci_profile_boot.R b/R/ci_profile_boot.R index 1d0b54672..0dab03861 100644 --- a/R/ci_profile_boot.R +++ b/R/ci_profile_boot.R @@ -1,7 +1,10 @@ .ci_profiled <- function(model, ci) { glm_ci <- tryCatch( { - out <- as.data.frame(stats::confint(model, level = ci), stringsAsFactors = FALSE) + out <- as.data.frame( + suppressWarnings(stats::confint(model, level = ci)), + stringsAsFactors = FALSE + ) names(out) <- c("CI_low", "CI_high") out$CI <- ci @@ -132,9 +135,9 @@ ) param_names <- switch(component, - "conditional" = pars$Parameter, - "zi" = , - "zero_inflated" = paste0("zi~", pars$Parameter), + conditional = pars$Parameter, + zi = , + zero_inflated = paste0("zi~", pars$Parameter), c( pars$Parameter[pars$Component == "conditional"], paste0("zi~", pars$Parameter[pars$Component == "zero_inflated"]) diff --git a/R/format.R b/R/format.R index b151fb4a7..496b5d34c 100644 --- a/R/format.R +++ b/R/format.R @@ -306,12 +306,12 @@ format.compare_parameters <- function(x, ran_pars <- which(x$Effects == "random") # find all random effect groups - if (!is.null(x$Group)) { - ran_groups <- unique(insight::compact_character(x$Group)) - ran_group_rows <- which(nchar(x$Group) > 0) - } else { + if (is.null(x$Group)) { ran_groups <- NULL ran_group_rows <- NULL + } else { + ran_groups <- unique(insight::compact_character(x$Group)) + ran_group_rows <- which(nchar(x$Group) > 0) } for (i in models) { @@ -646,10 +646,10 @@ format.parameters_sem <- function(x, .add_footer_sigma <- function(footer = NULL, digits = 3, sigma = NULL, residual_df = NULL, type = "text") { if (!is.null(sigma)) { # format residual df - if (!is.null(residual_df)) { - res_df <- paste0(" (df = ", residual_df, ")") - } else { + if (is.null(residual_df)) { res_df <- "" + } else { + res_df <- paste0(" (df = ", residual_df, ")") } if (type == "text" || type == "markdown") { @@ -785,10 +785,10 @@ format.parameters_sem <- function(x, .add_footer_formula <- function(footer = NULL, model_formula = NULL, n_obs = NULL, type = "text") { if (!is.null(model_formula)) { # format n of observations - if (!is.null(n_obs)) { - n <- paste0(" (", n_obs, " Observations)") - } else { + if (is.null(n_obs)) { n <- "" + } else { + n <- paste0(" (", n_obs, " Observations)") } if (type == "text" || type == "markdown") { @@ -919,18 +919,44 @@ format.parameters_sem <- function(x, .print_footer_exp <- function(x) { if (isTRUE(getOption("parameters_exponentiate", TRUE))) { msg <- NULL + # we need this to check whether we have extremely large cofficients + if (all(c("Coefficient", "Parameter") %in% colnames(x))) { + spurious_coefficients <- abs(x$Coefficient[!.in_intercepts(x$Parameter)]) + } else { + spurious_coefficients <- NULL + } exponentiate <- .additional_arguments(x, "exponentiate", FALSE) if (!.is_valid_exponentiate_argument(exponentiate)) { if (isTRUE(.additional_arguments(x, "log_link", FALSE))) { msg <- "The model has a log- or logit-link. Consider using `exponentiate = TRUE` to interpret coefficients as ratios." # nolint + # we only check for exp(coef), so exp() here soince coefficients are on logit-scale + spurious_coefficients <- exp(spurious_coefficients) } else if (isTRUE(.additional_arguments(x, "log_response", FALSE))) { msg <- "The model has a log-transformed response variable. Consider using `exponentiate = TRUE` to interpret coefficients as ratios." # nolint + # don't show warning about complete separation + spurious_coefficients <- NULL } } else if (.is_valid_exponentiate_argument(exponentiate) && isTRUE(.additional_arguments(x, "log_response", FALSE))) { # nolint msg <- c( "This model has a log-transformed response variable, and exponentiated parameters are reported.", "A one-unit increase in the predictor is associated with multiplying the outcome by that predictor's coefficient." # nolint ) + # don't show warning about complete separation + spurious_coefficients <- NULL + } + + # following check only for models with logit-link + logit_model <- isTRUE(.additional_arguments(x, "logit_link", FALSE)) || + isTRUE(attributes(x)$coefficient_name %in% c("Log-Odds", "Odds Ratio")) + + # check for complete separation coefficients or possible issues with + # too few data points + if (!is.null(spurious_coefficients) && logit_model) { + if (any(spurious_coefficients > 140)) { + msg <- c(msg, "Note that some coefficients are very large, which may indicate issues with complete separation.") # nolint + } else if (any(spurious_coefficients > 20)) { + msg <- c(msg, "Note that some coefficients are very large, which may indicate issues with (quasi) complete separation. Consider using bias-corrected or penalized regression models.") # nolint + } } if (!is.null(msg) && isTRUE(getOption("parameters_warning_exponentiate", TRUE))) { diff --git a/R/utils_model_parameters.R b/R/utils_model_parameters.R index eb62012a1..a59b48d22 100644 --- a/R/utils_model_parameters.R +++ b/R/utils_model_parameters.R @@ -56,6 +56,7 @@ attr(params, "ran_pars") <- isFALSE(group_level) attr(params, "show_summary") <- isTRUE(summary) attr(params, "log_link") <- isTRUE(grepl("log", info$link_function, fixed = TRUE)) + attr(params, "logit_link") <- isTRUE(identical(info$link_function, "logit")) # add parameters with value and variable attr(params, "pretty_labels") <- .format_value_labels(params, model) @@ -63,7 +64,7 @@ # use tryCatch, these might fail... attr(params, "test_statistic") <- .safe(insight::find_statistic(model)) attr(params, "log_response") <- .safe(isTRUE(grepl("log", insight::find_transformation(model), fixed = TRUE))) - attr(params, "log_predictors") <- .safe(any(grepl("log", unlist(insight::find_terms(model)[c("conditional", "zero_inflated", "instruments")]), fixed = TRUE))) + attr(params, "log_predictors") <- .safe(any(grepl("log", unlist(insight::find_terms(model)[c("conditional", "zero_inflated", "instruments")]), fixed = TRUE))) # nolint # save if model is multivariate response model if (isTRUE(info$is_multivariate)) { @@ -301,13 +302,13 @@ } # pattern for marginaleffects objects - if (!is.null(attr(params, "coefficient_name"))) { + if (is.null(attr(params, "coefficient_name"))) { + pattern <- "^(Coefficient|Mean|Median|MAP|Std_Coefficient|CI_|Std_CI)" + } else { pattern <- sprintf( "^(Coefficient|Mean|Median|MAP|Std_Coefficient|%s|CI_|Std_CI)", attr(params, "coefficient_name") ) - } else { - pattern <- "^(Coefficient|Mean|Median|MAP|Std_Coefficient|CI_|Std_CI)" } columns <- grepl(pattern = pattern, colnames(params)) @@ -319,10 +320,10 @@ rows <- !tolower(params$Component) %in% c("location", "scale") } else { # don't exponentiate dispersion - if (!is.null(params$Component)) { - rows <- !tolower(params$Component) %in% c("dispersion", "residual") - } else { + if (is.null(params$Component)) { rows <- seq_len(nrow(params)) + } else { + rows <- !tolower(params$Component) %in% c("dispersion", "residual") } } params[rows, columns] <- exp(params[rows, columns]) @@ -362,7 +363,7 @@ #' @keywords internal .add_anova_attributes <- function(params, model, ci, test = NULL, alternative = NULL, ...) { - dot.arguments <- lapply(match.call(expand.dots = FALSE)$`...`, function(x) x) + dot.arguments <- lapply(match.call(expand.dots = FALSE)$`...`, function(x) x) # nolint attr(params, "ci") <- ci attr(params, "model_class") <- class(model) @@ -448,8 +449,8 @@ if (verbose) { not_allowed_string <- datawizard::text_concatenate(not_allowed, enclose = "\"") insight::format_alert( - sprintf("Following arguments are not supported in `%s()` for models of class `%s` and will be ignored: %s", function_name, model_class, not_allowed_string), - sprintf("Please run `%s()` again without specifying the above mentioned arguments to obtain expected results.", function_name) + sprintf("Following arguments are not supported in `%s()` for models of class `%s` and will be ignored: %s", function_name, model_class, not_allowed_string), # nolint + sprintf("Please run `%s()` again without specifying the above mentioned arguments to obtain expected results.", function_name) # nolint ) } dots[not_allowed] <- NULL diff --git a/tests/testthat/_snaps/complete_separation.md b/tests/testthat/_snaps/complete_separation.md new file mode 100644 index 000000000..d433a37ee --- /dev/null +++ b/tests/testthat/_snaps/complete_separation.md @@ -0,0 +1,62 @@ +# print warning about complete separation + + Code + print(out) + Output + Parameter | Log-Odds | SE | 95% CI | z | p + ------------------------------------------------------------------------------ + (Intercept) | -66.10 | 1.83e+05 | [-10644.72, 10512.52] | -3.60e-04 | > .999 + x1 | 15.29 | 27362.84 | [ -3122.69, ] | 5.59e-04 | > .999 + x2 | 6.24 | 81543.72 | [-12797.28, ] | 7.65e-05 | > .999 + Message + + Uncertainty intervals (profile-likelihood) and p-values (two-tailed) + computed using a Wald z-distribution approximation. + + The model has a log- or logit-link. Consider using `exponentiate = + TRUE` to interpret coefficients as ratios. + + Note that some coefficients are very large, which may indicate issues + with complete separation. + +--- + + Code + print(out) + Output + Parameter | Log-Odds | SE | 95% CI | z | p + ------------------------------------------------------------------------- + (Intercept) | -83.33 | 15505.03 | [ , 816.56] | -5.37e-03 | 0.996 + gear | 21.01 | 3876.26 | [-248.93, ] | 5.42e-03 | 0.996 + Message + + Uncertainty intervals (profile-likelihood) and p-values (two-tailed) + computed using a Wald z-distribution approximation. + + The model has a log- or logit-link. Consider using `exponentiate = + TRUE` to interpret coefficients as ratios. + + Note that some coefficients are very large, which may indicate issues + with complete separation. + +# print warning about quasi complete separation + + Code + print(out) + Output + Parameter | Log-Odds | SE | 95% CI | z | p + ------------------------------------------------------------------ + (Intercept) | -56.36 | 21.45 | [-114.76, -25.33] | -2.63 | 0.009 + qsec | 3.12 | 1.19 | [ 1.40, 6.37] | 2.62 | 0.009 + Message + + Uncertainty intervals (profile-likelihood) and p-values (two-tailed) + computed using a Wald z-distribution approximation. + + The model has a log- or logit-link. Consider using `exponentiate = + TRUE` to interpret coefficients as ratios. + + Note that some coefficients are very large, which may indicate issues + with (quasi) complete separation. Consider using bias-corrected or + penalized regression models. + diff --git a/tests/testthat/test-complete_separation.R b/tests/testthat/test-complete_separation.R new file mode 100644 index 000000000..7b8594a30 --- /dev/null +++ b/tests/testthat/test-complete_separation.R @@ -0,0 +1,42 @@ +skip_if(getRversion() < "4.0.0") +skip_if_not_installed("withr") + +withr::with_options( + list(parameters_warning_exponentiate = TRUE), + { + test_that("print warning about complete separation", { + d_sep <- data.frame( + y = c(0, 0, 0, 0, 1, 1, 1, 1), + x1 = c(1, 2, 3, 3, 5, 6, 10, 11), + x2 = c(3, 2, -1, -1, 2, 4, 1, 0) + ) + m_sep <- suppressWarnings(glm(y ~ x1 + x2, data = d_sep, family = binomial)) + out <- model_parameters(m_sep) + expect_snapshot(print(out)) + }) + } +) + +withr::with_options( + list(parameters_warning_exponentiate = TRUE), + { + test_that("print warning about complete separation", { + data(mtcars) + m_sep2 <- suppressWarnings(glm(am ~ gear, data = mtcars, family = binomial)) + out <- model_parameters(m_sep2) + expect_snapshot(print(out)) + }) + } +) + +withr::with_options( + list(parameters_warning_exponentiate = TRUE), + { + test_that("print warning about quasi complete separation", { + data(mtcars) + m_sep3 <- suppressWarnings(glm(vs ~ qsec, data = mtcars, family = binomial)) + out <- model_parameters(m_sep3) + expect_snapshot(print(out)) + }) + } +) diff --git a/tests/testthat/test-model_parameters.cgam.R b/tests/testthat/test-model_parameters.cgam.R index 9b4828683..b9be2e5fc 100644 --- a/tests/testthat/test-model_parameters.cgam.R +++ b/tests/testthat/test-model_parameters.cgam.R @@ -47,6 +47,7 @@ test_that("model_parameters - cgam", { ran_pars = TRUE, show_summary = FALSE, log_link = FALSE, + logit_link = FALSE, pretty_labels = c(`(Intercept)` = "(Intercept)"), test_statistic = "t-statistic", log_response = FALSE, @@ -140,6 +141,7 @@ test_that("model_parameters - cgamm", { ran_pars = TRUE, show_summary = FALSE, log_link = FALSE, + logit_link = FALSE, pretty_labels = c( `(Intercept)` = "(Intercept)", `cgam::s.incr(x)` = "cgam::s.incr(x)" diff --git a/tests/testthat/test-printing.R b/tests/testthat/test-printing.R index ce3891fa1..3a9f9ea35 100644 --- a/tests/testthat/test-printing.R +++ b/tests/testthat/test-printing.R @@ -2,7 +2,7 @@ skip_if_not_installed("withr") skip_if(getRversion() < "4.0.0") withr::with_options( - list("parameters_interaction" = "*"), + list(parameters_interaction = "*"), { # Splitting model components ---- test_that("print model with multiple components", {