Skip to content

Commit

Permalink
AIC varies with order of factors in binomial? (#835)
Browse files Browse the repository at this point in the history
* AIC varies with order of factors in binomial?
Fixes #834

* lintr

* news

* fix

* reverse lintr

* lintr

* lintr

* fix?
  • Loading branch information
strengejacke authored Nov 27, 2023
1 parent 09551e9 commit f4e739f
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 110 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: insight
Title: Easy Access to Model Information for Various Model Objects
Version: 0.19.7
Version: 0.19.7.1
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# insight 0.19.8

## Bug fixes

* Fixed issue in `get_loglikelihood()` for glm-models with binary outcome, where
levels were defined in reversed order.

# insight 0.19.7

## General
Expand Down
147 changes: 69 additions & 78 deletions R/get_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ get_data <- function(x, ...) {

# handle arguments
effects <- match.arg(effects, choices = c("all", "fixed", "random"))
component <- match.arg(component, choices = c("all", "conditional", "zero_inflated", "zi", "smooth_terms", "dispersion"))
component <- match.arg(
component,
choices = c("all", "conditional", "zero_inflated", "zi", "smooth_terms", "dispersion")
)

# we want to add the variable for subsettig, too
model_call <- get_call(x)
Expand Down Expand Up @@ -156,10 +159,10 @@ get_data <- function(x, ...) {

# complete cases only, as in model frames, need to filter attributes
# only use model variables in complete.cases()
if (!is.null(vars)) {
cc <- stats::complete.cases(dat[, intersect(vars, colnames(dat))])
} else {
if (is.null(vars)) {
cc <- stats::complete.cases(dat)
} else {
cc <- stats::complete.cases(dat[, intersect(vars, colnames(dat))])
}

# only preserve random effects
Expand Down Expand Up @@ -284,16 +287,12 @@ get_data.default <- function(x, source = "environment", verbose = TRUE, ...) {
# fall back to extract data from model frame
if (is.null(model_data)) {
mf <- tryCatch(
{
if (inherits(x, "Zelig-relogit")) {
.get_zelig_relogit_frame(x)
} else {
stats::model.frame(x)
}
if (inherits(x, "Zelig-relogit")) {
.get_zelig_relogit_frame(x)
} else {
stats::model.frame(x)
},
error = function(x) {
NULL
}
error = function(x) NULL
)
# process arguments, check whether data should be recovered from
# environment or model frame
Expand Down Expand Up @@ -725,7 +724,7 @@ get_data.merMod <- function(x,
switch(effects,
fixed = stats::model.frame(x, fixed.only = TRUE),
all = stats::model.frame(x, fixed.only = FALSE),
random = stats::model.frame(x, fixed.only = FALSE)[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE]
random = stats::model.frame(x, fixed.only = FALSE)[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE] # nolint
)
})
.prepare_get_data(x, mf, effects, verbose = verbose)
Expand All @@ -745,16 +744,12 @@ get_data.mmrm <- function(x,
# data from model frame
mf <- .prepare_get_data(x, stats::model.frame(x, full = TRUE), effects, verbose = verbose)
tryCatch(
{
switch(effects,
fixed = mf[intersect(colnames(mf), fixed_vars)],
all = mf[intersect(colnames(mf), unique(c(fixed_vars, random_vars)))],
random = mf[intersect(colnames(mf), random_vars)]
)
},
error = function(x) {
NULL
}
switch(effects,
fixed = mf[intersect(colnames(mf), fixed_vars)],
all = mf[intersect(colnames(mf), unique(c(fixed_vars, random_vars)))],
random = mf[intersect(colnames(mf), random_vars)]
),
error = function(x) NULL
)
}

Expand Down Expand Up @@ -820,16 +815,12 @@ get_data.cpglmm <- function(x,
dat <- stats::model.frame(x)

mf <- tryCatch(
{
switch(effects,
fixed = dat[, find_predictors(x, effects = "fixed", flatten = TRUE, verbose = FALSE), drop = FALSE],
all = dat,
random = dat[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE]
)
},
error = function(x) {
NULL
}
switch(effects,
fixed = dat[, find_predictors(x, effects = "fixed", flatten = TRUE, verbose = FALSE), drop = FALSE],
all = dat,
random = dat[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE]
),
error = function(x) NULL
)
.prepare_get_data(x, mf, effects, verbose = verbose)
}
Expand Down Expand Up @@ -899,16 +890,12 @@ get_data.mixor <- function(x,
effects <- match.arg(effects, choices = c("all", "fixed", "random"))

mf <- tryCatch(
{
switch(effects,
fixed = stats::model.frame(x),
all = cbind(stats::model.frame(x), x$id),
random = data.frame(x$id)
)
},
error = function(x) {
NULL
}
switch(effects,
fixed = stats::model.frame(x),
all = cbind(stats::model.frame(x), x$id),
random = data.frame(x$id)
),
error = function(x) NULL
)
fix_cn <- which(colnames(mf) %in% c("x.id", "x$id"))
colnames(mf)[fix_cn] <- safe_deparse(x$call$id)
Expand Down Expand Up @@ -999,10 +986,10 @@ get_data.mixed <- function(x,
#' @param shape Return long or wide data? Only applicable in repeated measures
#' designs.
get_data.afex_aov <- function(x, shape = c("long", "wide"), ...) {
if (!length(attr(x, "within"))) {
shape <- "long"
} else {
if (length(attr(x, "within"))) {
shape <- match.arg(shape)
} else {
shape <- "long"
}
x$data[[shape]]
}
Expand Down Expand Up @@ -1484,7 +1471,12 @@ get_data.ivreg <- function(x, source = "environment", verbose = TRUE, ...) {
mf <- .safe(stats::model.frame(x))
ft <- find_variables(x, flatten = TRUE)

if (!insight::is_empty_object(mf)) {
if (is_empty_object(mf)) {
final_mf <- .safe({
dat <- .recover_data_from_environment(x)
dat[, ft, drop = FALSE]
})
} else {
cn <- clean_names(colnames(mf))
remain <- setdiff(ft, cn)
if (is_empty_object(remain)) {
Expand All @@ -1495,11 +1487,6 @@ get_data.ivreg <- function(x, source = "environment", verbose = TRUE, ...) {
cbind(mf, dat[, remain, drop = FALSE])
})
}
} else {
final_mf <- .safe({
dat <- .recover_data_from_environment(x)
dat[, ft, drop = FALSE]
})
}

.prepare_get_data(x, stats::na.omit(final_mf), verbose = verbose)
Expand Down Expand Up @@ -1559,7 +1546,15 @@ get_data.brmsfit <- function(x, effects = "all", component = "all", source = "en
# verbose is FALSE by default because `get_call()` often does not work on
# `brmsfit` objects, so we typically default to the `data` held in the object.
data_name <- attr(x$data, "data_name")
model_data <- .get_data_from_environment(x, effects = effects, component = component, source = source, verbose = verbose, data_name = data_name, ...)
model_data <- .get_data_from_environment(
x,
effects = effects,
component = component,
source = source,
verbose = verbose,
data_name = data_name,
...
)

if (!is.null(model_data)) {
return(model_data)
Expand Down Expand Up @@ -1651,15 +1646,15 @@ get_data.MCMCglmm <- function(x, effects = "all", source = "environment", verbos
all(pv %in% colnames(dat))
}))
mf <- env_dataframes[matchframe][1]
if (!is.na(mf)) {
if (is.na(mf)) {
NULL
} else {
dat <- get(mf)
switch(effects,
fixed = dat[, setdiff(colnames(dat), find_random(x, flatten = TRUE)), drop = FALSE],
all = dat,
random = dat[, find_random(x, flatten = TRUE), drop = FALSE]
)
} else {
NULL
}
},
error = function(x) {
Expand Down Expand Up @@ -1889,21 +1884,17 @@ get_data.vglm <- function(x, source = "environment", verbose = TRUE, ...) {

# fall back to extract data from model frame
mf <- tryCatch(
{
if (!length(x@model)) {
env <- environment(x@terms$terms)
if (is.null(env)) env <- parent.frame()
fcall <- x@call
fcall$method <- "model.frame"
fcall$smart <- FALSE
eval(fcall, env, parent.frame())
} else {
x@model
}
if (length(x@model)) {
x@model
} else {
env <- environment(x@terms$terms)
if (is.null(env)) env <- parent.frame()
fcall <- x@call
fcall$method <- "model.frame"
fcall$smart <- FALSE
eval(fcall, env, parent.frame())
},
error = function(x) {
NULL
}
error = function(x) NULL
)

.prepare_get_data(x, mf)
Expand Down Expand Up @@ -2030,7 +2021,7 @@ get_data.clmm2 <- function(x, source = "environment", verbose = TRUE, ...) {
}

data_complete <- cbind(data_complete, x$grFac)
colnames(data_complete)[ncol(data_complete)] <- unlist(.find_random_effects(x, f = find_formula(x, verbose = FALSE), split_nested = TRUE))
colnames(data_complete)[ncol(data_complete)] <- unlist(.find_random_effects(x, f = find_formula(x, verbose = FALSE), split_nested = TRUE)) # nolint

data_complete
},
Expand Down Expand Up @@ -2183,14 +2174,14 @@ get_data.rma <- function(x,
if (!is.function(transf)) {
format_error("`transf` must be a function.")
}
if (!is.null(transf_args)) {
mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf, transf_args)
mf$CI_low <- sapply(mf$CI_low, transf, transf_args)
mf$CI_high <- sapply(mf$CI_high, transf, transf_args)
} else {
if (is.null(transf_args)) {
mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf)
mf$CI_low <- sapply(mf$CI_low, transf)
mf$CI_high <- sapply(mf$CI_high, transf)
} else {
mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf, transf_args)
mf$CI_low <- sapply(mf$CI_low, transf, transf_args)
mf$CI_high <- sapply(mf$CI_high, transf, transf_args)
}
}
}
Expand Down Expand Up @@ -2283,7 +2274,7 @@ get_data.htest <- function(x, ...) {
.check_data_source_arg <- function(source) {
source <- match.arg(source, choices = c("environment", "mf", "modelframe", "frame"))
switch(source,
"environment" = "environment",
environment = "environment",
"frame"
)
}
24 changes: 15 additions & 9 deletions R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

# remove values from vector
.remove_values <- function(x, values) {
remove <- x %in% values
if (any(remove)) {
x <- x[!remove]
to_remove <- x %in% values
if (any(to_remove)) {
x <- x[!to_remove]
}
x
}
Expand All @@ -24,7 +24,7 @@
# is string empty?
.is_empty_string <- function(x) {
x <- x[!is.na(x)]
length(x) == 0 || all(nchar(x) == 0)
length(x) == 0 || all(nchar(x) == 0) # nolint
}


Expand Down Expand Up @@ -53,12 +53,11 @@

# checks if a brms-models is a multi-membership-model
.is_multi_membership <- function(x) {
if (inherits(x, "brmsfit")) {
re <- find_random(x, split_nested = TRUE, flatten = TRUE)
any(grepl("^(mmc|mm)\\(", re))
} else {
if (!inherits(x, "brmsfit")) {
return(FALSE)
}
re <- find_random(x, split_nested = TRUE, flatten = TRUE)
any(grepl("^(mmc|mm)\\(", re))
}


Expand Down Expand Up @@ -115,7 +114,7 @@
# if there are any chars left, these come from further terms that come after
# random effects...
.formula_empty_after_random_effect <- function(f) {
nchar(gsub("(~|\\+|\\*|-|/|:)", "", gsub(" ", "", gsub("\\((.*)\\)", "", f), fixed = TRUE))) == 0
nchar(gsub("(~|\\+|\\*|-|/|:)", "", gsub(" ", "", gsub("\\((.*)\\)", "", f), fixed = TRUE))) == 0 # nolint
}


Expand Down Expand Up @@ -558,6 +557,13 @@
}
x <- droplevels(x)
levels(x) <- 1:nlevels(x)
} else if (is.unsorted(levels(x))) {
# for numeric factors, we need to check the order of levels
x_inverse <- rep(NA_real_, length(x))
for (i in 1:nlevels(x)) {
x_inverse[x == levels(x)[i]] <- as.numeric(levels(x)[nlevels(x) - i + 1])
}
x <- factor(x_inverse)
}

out <- as.numeric(as.character(x))
Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ funcionts
gam
ggeffects
github
glm
glmm
glmmTMB
hglm
Expand Down
34 changes: 34 additions & 0 deletions tests/testthat/test-factor_to_numeric.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
test_that(".factor_to_numeric", {
f <- c(0, 0, 1, 1, 1, 0)
x1 <- factor(f, levels = c(0, 1), labels = c("a", "b"))
x2 <- factor(f, levels = c(1, 0), labels = c("b", "a"))
x3 <- factor(f, levels = c(1, 0), labels = c("a", "b"))
x4 <- factor(f, levels = c(0, 1), labels = c("b", "a"))
out1 <- insight:::.factor_to_numeric(x1, lowest = 0)
out2 <- insight:::.factor_to_numeric(x2, lowest = 0)
out3 <- insight:::.factor_to_numeric(x3, lowest = 0)
out4 <- insight:::.factor_to_numeric(x4, lowest = 0)
expect_identical(out1, c(0, 0, 1, 1, 1, 0))
expect_identical(out2, c(1, 1, 0, 0, 0, 1))
expect_identical(out1, out4)
expect_identical(out2, out3)

x1 <- factor(f, levels = c(0, 1))
x2 <- factor(f, levels = c(1, 0))
out1 <- insight:::.factor_to_numeric(x1, lowest = 0)
out2 <- insight:::.factor_to_numeric(x2, lowest = 0)
expect_identical(out1, c(0, 0, 1, 1, 1, 0))
expect_identical(out2, c(1, 1, 0, 0, 0, 1))

f <- c(1, 1, 2, 2, 2, 1)
x1 <- factor(f, levels = c(1, 2))
x2 <- factor(f, levels = c(2, 1))
out1 <- insight:::.factor_to_numeric(x1, lowest = 0)
out2 <- insight:::.factor_to_numeric(x2, lowest = 0)
expect_identical(out1, c(0, 0, 1, 1, 1, 0))
expect_identical(out2, c(1, 1, 0, 0, 0, 1))
out1 <- insight:::.factor_to_numeric(x1)
out2 <- insight:::.factor_to_numeric(x2)
expect_identical(out1, c(1, 1, 2, 2, 2, 1))
expect_identical(out2, c(2, 2, 1, 1, 1, 2))
})
Loading

0 comments on commit f4e739f

Please sign in to comment.