merge pr #981: broom 0.7.4

tidymodels · Jan 29, 2021 · c55ffd0 · c55ffd0
2 parents 1d10ae1 + 2d962f8
commit c55ffd0
Show file tree

Hide file tree

Showing 12 changed files with 701 additions and 12 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: broom
 Title: Convert Statistical Objects into Tidy Tibbles
-Version: 0.7.3.9000
+Version: 0.7.4
 Authors@R:
     c(person(given = "David",
              family = "Robinson",
@@ -527,7 +527,6 @@ Imports:
     stringr,
     tibble (>= 3.0.0),
     tidyr (>= 1.0.0)
-Remotes: alexpghayes/modeltests
 Suggests:
     AER,
     akima,
@@ -569,6 +568,7 @@ Suggests:
     Lahman,
     lavaan,
     leaps,
+    lfe,
     lm.beta,
     lme4,
     lmodel2,
@@ -659,6 +659,7 @@ Collate:
     'ks-tidiers.R'
     'lavaan-tidiers.R'
     'leaps.R'
+    'lfe-tidiers.R'
     'list-irlba.R'
     'list-optim-tidiers.R'
     'list-svd-tidiers.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ S3method(augment,decomposed.ts)
 S3method(augment,default)
 S3method(augment,drc)
 S3method(augment,factanal)
+S3method(augment,felm)
 S3method(augment,fixest)
 S3method(augment,gam)
 S3method(augment,glm)
@@ -72,6 +73,7 @@ S3method(glance,drc)
 S3method(glance,durbinWatsonTest)
 S3method(glance,ergm)
 S3method(glance,factanal)
+S3method(glance,felm)
 S3method(glance,fitdistr)
 S3method(glance,fixest)
 S3method(glance,gam)
@@ -176,6 +178,7 @@ S3method(tidy,emmGrid)
 S3method(tidy,epi.2by2)
 S3method(tidy,ergm)
 S3method(tidy,factanal)
+S3method(tidy,felm)
 S3method(tidy,fitdistr)
 S3method(tidy,fixest)
 S3method(tidy,ftable)

diff --git a/NEWS.md b/NEWS.md
@@ -1,19 +1,27 @@
-# broom 0.7.3.9000
+# broom 0.7.4
 
-To be released as 0.7.4.
+broom 0.7.4 introduces tidier support for a number of new model objects and 
+improves functionality of many existing tidiers!
+
+#### New Tidiers
 
 * Add tidiers for `Rchoice` objects (`#961` by `@vincentarelbundock` and `@Nateme16`)
+* Add tidiers for objects produced by `car::leveneTest` (`#968` by `@vincentarelbundock` and `@mkirzon`)
+* Add tidiers for objects produced by `cmprsk::crr` (`#971` and `#552` by `@vincentarelbundock` and `@margarethannum`)
+* Add an `augment()` method for `gam` objects (`#975` and `#645` by `@vincentarelbundock`)
+* Add tidiers for `vars` objects (`#979` and `#161` by `@vincentarelbundock` and `@Diego-MX`)
+
+This release also restores tidiers for `felm` objects from the `lfe` package, which was recently unarchived from CRAN.
+
+#### Improvements to existing tidiers
+
 * `tidy.emmGrid` can now return `std.error` and `conf.*` columns at the same time. (`#962` by `@vincentarelbundock` and `@jmbarbone`)
 * `tidy.garch` can now produce confidence intervals (`#964` by `@vincentarelbundock` and `@IndrajeetPatil`)
 * `tidy.coxph` can now report confidence intervals on models utilizing penalized/clustering terms (`#966` by `@vincentarelbundock` and `@matthieu-faron`)
 * `augment.lm` now works when some regression weights are equal to zero (`#965` by `@vincentarelbundock` and `@vnijs`)
 * `tidy.coxph` can now handle models utilizing penalized/clustering terms (`#966` and `#969` by `@vincentarelbundock`, `@matthieu-faron`, and `@KZARCA`)
-* Add tidiers for objects produced by `car::leveneTest` (`#968` by `@vincentarelbundock` and `@mkirzon`)
-* Add tidiers for objects produced by `cmprsk::crr` (`#971` and `#552` by `@vincentarelbundock` and `@margarethannum`)
 * Fix bug in `tidy.speedglm` on R 4.0.0+ (`#974` by `@uqzwang`)
-* Add `augment.gam` (`#975` and `#645` by `@vincentarelbundock`)
 * tidy.multinom works with matrix response (`#977` and `#666` by `@vincentarelbundock` and `@atyre2`)
-* Add tidiers for `vars` objects (`#979` and `#161` by `@vincentarelbundock` and `@Diego-MX`)
 * Various bug fixes and improvements to documentation and errors.
 
 # broom 0.7.3

diff --git a/R/lfe-tidiers.R b/R/lfe-tidiers.R
@@ -0,0 +1,241 @@
+#' @templateVar class felm
+#' @template title_desc_tidy
+#'
+#' @param x A `felm` object returned from [lfe::felm()].
+#' @template param_confint
+#' @param fe Logical indicating whether or not to include estimates of
+#'   fixed effects. Defaults to `FALSE`.
+#' @param se.type Character indicating the type of standard errors. Defaults to
+#'   using those of the underlying felm() model object, e.g. clustered errors
+#'   for models that were provided a cluster specification. Users can override
+#'   these defaults by specifying an appropriate alternative: "iid" (for 
+#'   homoskedastic errors), "robust" (for Eicker-Huber-White robust errors), or
+#'   "cluster" (for clustered standard errors; if the model object supports it).
+#' @template param_unused_dots
+#'
+#' @evalRd return_tidy(regression = TRUE)
+#'
+#' @examples
+#'
+#' library(lfe)
+#'
+#' # Use built-in "airquality" dataset
+#' head(airquality)
+#'
+#' # No FEs; same as lm()
+#' est0 <- felm(Ozone ~ Temp + Wind + Solar.R, airquality)
+#' tidy(est0)
+#' augment(est0)
+#' 
+#' # Add month fixed effects
+#' est1 <- felm(Ozone ~ Temp + Wind + Solar.R  | Month, airquality)
+#' tidy(est1)
+#' tidy(est1, fe = TRUE)
+#' augment(est1)
+#' glance(est1)
+#'
+#' # The "se.type" argument can be used to switch out different standard errors 
+#' # types on the fly. In turn, this can be useful exploring the effect of 
+#' # different error structures on model inference.
+#' tidy(est1, se.type = "iid")
+#' tidy(est1, se.type = "robust")
+#' 
+#' # Add clustered SEs (also by month)
+#' est2 <- felm(Ozone ~ Temp + Wind + Solar.R  | Month | 0 | Month, airquality)
+#' tidy(est2, conf.int = TRUE) 
+#' tidy(est2, conf.int = TRUE, se.type = "cluster")
+#' tidy(est2, conf.int = TRUE, se.type = "robust")
+#' tidy(est2, conf.int = TRUE, se.type = "iid")
+#' @export
+#' @aliases felm_tidiers lfe_tidiers
+#' @family felm tidiers
+#' @seealso [tidy()], [lfe::felm()]
+tidy.felm <- function(x, conf.int = FALSE, conf.level = .95, fe = FALSE, se.type = c("default", "iid", "robust", "cluster"), ...) {
+  has_multi_response <- length(x$lhs) > 1
+
+  # warn users about deprecated "robust" argument
+  dots <- list(...)
+  if (!is.null(dots$robust)) {
+    warning('\nThe "robust" argument has been deprecated in tidy.felm and will be ignored. Please use the "se.type" argument instead.\n')
+  }
+
+  # match SE args
+  se.type <- match.arg(se.type)
+  if (se.type == "default") {
+    se.type <- NULL
+  }
+
+  # get "robust" logical to pass on to summary.lfe
+  if (is.null(se.type)) {
+    robust <- !is.null(x$clustervar) 
+  } else if (se.type == 'iid') {
+    robust <- FALSE
+  } else {
+    # catch potential user error, asking for clusters where none exist
+    if (se.type == "cluster" && is.null(x$clustervar)) {
+       warning("Clustered SEs requested, but weren't calculated in underlying model object. Reverting to default SEs.\n")
+       se.type <- NULL
+    }
+
+    robust <- TRUE
+  }
+
+  nn <- c("estimate", "std.error", "statistic", "p.value")
+  if (has_multi_response) {
+    ret <- map_df(x$lhs, function(y) {
+      stats::coef(summary(x, lhs = y, robust = robust)) %>%
+        as_tidy_tibble(new_names = nn) %>%
+        mutate(response = y)
+    }) %>%
+      select(response, dplyr::everything())
+  } else {
+    ret <- as_tidy_tibble(
+      stats::coef(summary(x, robust = robust)),
+      new_names = nn
+    )
+  }
+
+  # Catch edge case where users specify "robust" SEs on felm() object that
+  # contains clusters. Reason: Somewhat confusingly, summary.felm(robust = TRUE) 
+  # reports clustered SEs even though robust SEs are available. In contrast,
+  # confint.felm distinguishes between robust and clustered SEs regardless
+  # of the underlying model. See also: https://github.com/sgaure/lfe/pull/17/files
+  if (!is.null(se.type)) {
+    if (se.type == "robust" && !is.null(x$clustervar)) {
+      ret$std.error <- x$rse
+      ret$statistic <- x$rtval
+      ret$p.value <- x$rpval
+    }
+  }
+
+
+  if (conf.int) {
+    if (has_multi_response) {
+      ci <- map_df(x$lhs, function(y) {
+        broom_confint_terms(x, level = conf.level, type = NULL, lhs = y)
+      })
+    } else {
+      ci <- broom_confint_terms(x, level = conf.level, type = se.type)
+    }
+    ret <- dplyr::left_join(ret, ci, by = "term")
+  }
+
+  if (fe) {
+    ret <- mutate(ret, N = NA, comp = NA)
+
+    nn <- c("estimate", "std.error", "N", "comp")
+    ret_fe_prep <- lfe::getfe(x, se = TRUE, bN = 100) %>%
+      tibble::rownames_to_column(var = "term") %>%
+      # effect and se are multiple if multiple y
+      select(term, contains("effect"), contains("se"), obs, comp) %>%
+      rename(N = obs)
+
+    if (has_multi_response) {
+      ret_fe_prep <- ret_fe_prep %>%
+        tidyr::pivot_longer(
+          cols = c(
+            starts_with("effect."),
+            starts_with("se.")
+          ),
+          names_to = "stat_resp",
+          values_to = "value"
+        ) %>%
+        tidyr::separate(
+          col = "stat_resp",
+          c("stat", "response"),
+          sep = "\\."
+        ) %>%
+        tidyr::pivot_wider(
+          id_cols = c(term, N, comp, response),
+          names_from = stat,
+          values_from = value
+        ) %>%
+        dplyr::arrange(term) %>%
+        as.data.frame()
+    }
+    ret_fe <- ret_fe_prep %>%
+      rename(estimate = effect, std.error = se) %>%
+      select(contains("response"), dplyr::everything()) %>%
+      mutate(statistic = estimate / std.error) %>%
+      mutate(p.value = 2 * (1 - stats::pt(statistic, df = N)))
+
+    if (conf.int) {
+      crit_val_low <- stats::qnorm(1 - (1 - conf.level) / 2)
+      crit_val_high <- stats::qnorm(1 - (1 - conf.level) / 2)
+
+      ret_fe <- ret_fe %>%
+        mutate(
+          conf.low = estimate - crit_val_low * std.error,
+          conf.high = estimate + crit_val_high * std.error
+        )
+    }
+    ret <- rbind(ret, ret_fe)
+  }
+  as_tibble(ret)
+}
+
+#' @templateVar class felm
+#' @template title_desc_augment
+#'
+#' @inherit tidy.felm params examples
+#' @template param_data
+#'
+#' @evalRd return_augment()
+#'
+#' @export
+#' @family felm tidiers
+#' @seealso [augment()], [lfe::felm()]
+augment.felm <- function(x, data = model.frame(x), ...) {
+  has_multi_response <- length(x$lhs) > 1
+
+  if (has_multi_response) {
+    stop(
+      "Augment does not support linear models with multiple responses.",
+      call. = FALSE
+    )
+  }
+  df <- as_augment_tibble(data)
+  mutate(df, .fitted = as.vector(x$fitted.values), .resid = as.vector(x$residuals))
+}
+
+#' @templateVar class felm
+#' @template title_desc_glance
+#'
+#' @inherit tidy.felm params examples
+#'
+#' @evalRd return_glance(
+#'   "r.squared",
+#'   "adj.r.squared",
+#'   "sigma",
+#'   "statistic",
+#'   "p.value",
+#'   "df",
+#'   "df.residual",
+#'   "nobs"
+#' )
+#'
+#' @export
+glance.felm <- function(x, ...) {
+  has_multi_response <- length(x$lhs) > 1
+
+  if (has_multi_response) {
+    stop(
+      "Glance does not support linear models with multiple responses.",
+      call. = FALSE
+    )
+  }
+
+  s <- summary(x)
+
+  as_glance_tibble(
+    r.squared = s$r2,
+    adj.r.squared = s$r2adj,
+    sigma = s$rse,
+    statistic = s$fstat,
+    p.value = unname(s$pval),
+    df = s$df[1],
+    df.residual = s$rdf,
+    nobs = stats::nobs(x),
+    na_types = "rrrrriii"
+  )
+}
diff --git a/R/survey-tidiers.R b/R/survey-tidiers.R
@@ -125,7 +125,6 @@ tidy.svyglm <- function(x, conf.int = FALSE, conf.level = 0.95,
 #' glance(m)
 #' @references Lumley T, Scott A (2015). AIC and BIC for modelling with complex
 #'   survey data. *Journal of Survey Statistics and Methodology*, 3(1).
-#'   <https://doi.org/10.1093/jssam/smu021>.
 #'
 #' @export
 #' @family lm tidiers

diff --git a/R/utilities.R b/R/utilities.R
@@ -540,6 +540,7 @@ globalVariables(
     "expCIWidth",
     "fit",
     "GCV",
+    "group",
     "group1",
     "group2",
     "hat",

diff --git a/cran-comments.md b/cran-comments.md
@@ -1,3 +1,17 @@
+# broom 0.7.4
+
+This is a resubmission following request to notify maintainers of 2 newly broken
+packages. We filed notes in the issues trackers of the two affected packages,
+rstatix and tadaatoolbox, on January 15th, with notes on the specific causes
+for breakages and proposed changes to their codebase:
+
+* https://github.com/tadaadata/tadaatoolbox/issues/33
+* https://github.com/kassambara/rstatix/issues/89
+
+While we have not received responses from maintainers of either project, we are
+resubmitting now following request on January 12th to fix package breakages 
+by January 26th.
+
 ## Test environments
 
 - local mac OS  install: R 3.6.3
@@ -12,6 +26,6 @@
 
 # Reverse dependencies
 
-We checked 162 reverse dependencies (152 from CRAN + 10 from BioConductor), 
+We checked 170 reverse dependencies (158 from CRAN + 12 from BioConductor), 
 comparing R CMD check results across CRAN and dev versions of this package.
-We saw no new problems.
+We saw new ERRORs in 2 CRAN packages, rstatix and tadaatoolbox.