Function for term / parameter-wise deletion #602

mattansb · 2021-09-26T04:45:07Z

Original thread posted by @bwiernik in #570 (comment)

Current version take a model, re-fits models with term\parameter deletion on the fixed effects, and applies a function(s) to each subset vs the full model.

#' @param m A model
#' @fn A function or list of functions. Each function takes two arguments: the
#'   model and the subset model (in that order).
#' @param by Should subsetting bedone by term or by parameter?
compare_nested <- function(m, fn, by = c("term", "parameter")) {
  by <- match.arg(by)
  if (!is.list(fn)) fn <- list(fn)
  
  mf <- insight::get_data(m)
  m_formula <- insight::find_formula(m)
  mm <- insight::get_modelmatrix(m)
  
  if ("random" %in% names(m_formula)) {
    m_formula[["random"]] <- gsub("~", "", format(m_formula[["random"]]))
  }
  
  if (colnames(mm)[1] == "(Intercept)") {
    colnames(mm)[1] <- "Intercept"
  }
  new_dat <- cbind(mf, mm)
  
  if (by == "term") {
    v_assign <- attr(mm, "assign")
    k_assign <- unique(v_assign)
    subset_formula <- vector("list", length(k_assign))
    
    for (k in seq_along(k_assign)) {
      tmp_formula <- m_formula
      tmp_cond <- stats::update.formula(
        tmp_formula$conditional,
        paste(". ~ 0 +", paste0("`", colnames(mm)[v_assign!=k_assign[k]], "`", collapse = " + "))
      )
      tmp_formula$conditional <- paste(as.character(tmp_cond)[c(2, 1, 3)], collapse = " ")
      subset_formula[k] <- format(tmp_formula)
    }
    
    term_nm <- attr(terms(m_formula$conditional),"term.labels")
    if (colnames(mm)[1] == "Intercept") term_nm <- c("Intercept", term_nm)
    names(subset_formula) <- term_nm
  } else {
    subset_formula <- vector("list", ncol(mm))
    v_pars <- colnames(mm)
    
    for (k in seq_along(v_pars)) {
      tmp_formula <- m_formula
      tmp_cond <- stats::update.formula(
        tmp_formula$conditional,
        paste(". ~ 0 +", paste0("`", v_pars[-k], "`", collapse = " + "))
      )
      tmp_formula$conditional <- paste(as.character(tmp_cond)[c(2, 1, 3)], collapse = " ")
      subset_formula[k] <- format(tmp_formula)
    }
    names(subset_formula) <- v_pars
  }
  
  
  res <- lapply(subset_formula, function(sf) {
    s_mod <- update(m, formula = sf, data = new_dat)
    .out <- lapply(fn, function(.f) .f(m, s_mod))
    if (length(.out)==1L) .out <- .out[[1]]
    .out
  })
  return(res)
}

m2 <- glm(count ~ spp * mined, family = "poisson", data = glmmTMB::Salamanders)
#> Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
#> TMB was built with Matrix version 1.3.3
#> Current Matrix version is 1.3.4
#> Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package

do_LRT <- function(m, sub_m) {
  ll1 <- logLik(m)
  ll2 <- logLik(sub_m)
  
  chisq <- -2 * (ll2[1] - ll1[1])
  df <- attr(ll1, "df") - attr(ll2, "df")
  p <- pchisq(chisq, df, lower.tail = FALSE)
  
  data.frame(chisq, df, p)
}

do_spR2 <- function(m, sub_m) {
  data.frame(
    spR2 = performance::r2(m)[[1]] - performance::r2(sub_m)[[1]]  
  )
}


(term_res <- compare_nested(m2, list(LRT = do_LRT, spR2 = do_spR2)))
#> $Intercept
#> $Intercept$LRT
#>      chisq df           p
#> 1 71.63583  1 2.58814e-17
#> 
#> $Intercept$spR2
#>                        spR2
#> Nagelkerke's R2 0.007306225
#> 
#> 
#> $spp
#> $spp$LRT
#>     chisq df            p
#> 1 49.6665  6 5.483212e-09
#> 
#> $spp$spR2
#>                         spR2
#> Nagelkerke's R2 -0.002198223
#> 
#> 
#> $mined
#> $mined$LRT
#>      chisq df            p
#> 1 120.9563  1 3.906445e-28
#> 
#> $mined$spR2
#>                       spR2
#> Nagelkerke's R2 0.02986212
#> 
#> 
#> $`spp:mined`
#> $`spp:mined`$LRT
#>      chisq df            p
#> 1 34.55771  6 5.248732e-06
#> 
#> $`spp:mined`$spR2
#>                         spR2
#> Nagelkerke's R2 -0.008548994

term_res |>
  lapply(do.call, what = cbind) |>
  do.call(what = rbind)
#>           LRT.chisq LRT.df        LRT.p         spR2
#> Intercept  71.63583      1 2.588140e-17  0.007306225
#> spp        49.66650      6 5.483212e-09 -0.002198223
#> mined     120.95629      1 3.906445e-28  0.029862123
#> spp:mined  34.55771      6 5.248732e-06 -0.008548994

^{Created on 2021-09-26 by the reprex package (v2.0.1)}

strengejacke · 2022-02-06T08:37:09Z

I can't recall what the purpose of this feature is. Looks like it could be something for performance?

bwiernik · 2022-02-06T09:07:17Z

I think the purpose was for computing likelihood ratio test based p values

strengejacke added the Feature idea 🔥 New feature or request label Sep 30, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Function for term / parameter-wise deletion #602

Function for term / parameter-wise deletion #602

mattansb commented Sep 26, 2021

strengejacke commented Feb 6, 2022

bwiernik commented Feb 6, 2022

Function for term / parameter-wise deletion #602

Function for term / parameter-wise deletion #602

Comments

mattansb commented Sep 26, 2021

strengejacke commented Feb 6, 2022

bwiernik commented Feb 6, 2022