From f0ed45ff105717b02a9a5430e7c0ff5abf7d9b12 Mon Sep 17 00:00:00 2001 From: gowerc Date: Thu, 21 Mar 2024 16:01:12 +0000 Subject: [PATCH] Introduced "counterfactual" weighting --- R/ancova.R | 46 +++---- R/lsmeans.R | 176 +++++++++++++++++++------ man/ancova.Rd | 79 ++++++++--- man/ancova_single.Rd | 62 ++++++++- man/ls_design.Rd | 3 + man/lsmeans.Rd | 76 ++++++++--- tests/testthat/test-lsmeans.R | 242 +++++++++++----------------------- 7 files changed, 415 insertions(+), 269 deletions(-) diff --git a/R/ancova.R b/R/ancova.R index 57a08af44..caec031f6 100644 --- a/R/ancova.R +++ b/R/ancova.R @@ -12,9 +12,10 @@ #' fit the ancova model at. If `NULL`, a separate ancova model will be fit to the #' outcomes for each visit (as determined by `unique(data[[vars$visit]])`). #' See details. -#' @param weights Character, either `"proportional"` (default) or `"equal"`. Specifies the -#' weighting strategy to be used for categorical covariates when calculating the lsmeans. -#' See details. +#' @param weights Character, either `"counterfactual"` (default), `"equal"`, +#' `"proportional_em"` or `"proportional"`. +#' Specifies the weighting strategy to be used when calculating the lsmeans. +#' See the weighting section for more details. #' #' @details #' The function works as follows: @@ -49,29 +50,18 @@ #' by providing them to the `covariates` argument of [set_vars()] #' e.g. `set_vars(covariates = c("sex*age"))`. #' -#' -#' ## Weighting -#' -#' `"proportional"` is the default scheme that is used. This is equivalent to standardization, -#' i.e. the lsmeans in -#' each group are equal to the predicted mean outcome from the ancova model for -#' that group based on baseline characteristics of all subjects regardless of -#' their assigned group. The alternative weighting scheme, `"equal"`, creates hypothetical -#' patients by expanding out all combinations of the models categorical covariates. The -#' lsmeans are then calculated as the average of -#' the predicted mean outcome for these hypothetical patients assuming they come from each -#' group in turn. -#' -#' In short: -#' - `"proportional"` weights categorical covariates based upon their frequency of occurrence -#' in the data. -#' - `"equal"` weights categorical covariates equally across all theoretical combinations. +#' @inheritSection lsmeans Weighting #' #' @seealso [analyse()] #' @seealso [stats::lm()] #' @seealso [set_vars()] #' @export -ancova <- function(data, vars, visits = NULL, weights = c("proportional", "equal")) { +ancova <- function( + data, + vars, + visits = NULL, + weights = c("counterfactual", "equal", "proportional_em", "proportional") +) { outcome <- vars[["outcome"]] group <- vars[["group"]] @@ -154,13 +144,13 @@ ancova <- function(data, vars, visits = NULL, weights = c("proportional", "equal #' @description #' Performance analysis of covariance. See [ancova()] for full details. #' -#' @param data The `data.frame` containing all of the data required for the model. #' @param outcome Character, the name of the outcome variable in `data`. #' @param group Character, the name of the group variable in `data`. #' @param covariates Character vector containing the name of any additional covariates #' to be included in the model as well as any interaction terms. -#' @param weights Character, specifies whether to use "proportional" or "equal" weighting for each -#' categorical covariate combination when calculating the lsmeans. +#' +#' @inheritParams ancova +#' @inheritSection lsmeans Weighting #' #' @details #' - `group` must be a factor variable with only 2 levels. @@ -174,7 +164,13 @@ ancova <- function(data, vars, visits = NULL, weights = c("proportional", "equal #' } #' @seealso [ancova()] #' @importFrom stats lm coef vcov df.residual -ancova_single <- function(data, outcome, group, covariates, weights = c("proportional", "equal")) { +ancova_single <- function( + data, + outcome, + group, + covariates, + weights = c("counterfactual", "equal", "proportional_em", "proportional") +) { weights <- match.arg(weights) assert_that( diff --git a/R/lsmeans.R b/R/lsmeans.R index 4223c79bb..2c8694123 100644 --- a/R/lsmeans.R +++ b/R/lsmeans.R @@ -3,25 +3,55 @@ #' #' #' Estimates the least square means from a linear model. The exact implementation -#' / interpretation depends on the weighting scheme; see details for more information. -#' -#' @details -#' -#' For `.weights = "proportional"` (the default) the lsmeans are obtained by -#' taking the average of the fitted values for each patient. -#' In terms of the interactions between two continuous covariates -#' this is equivalent to constructing a hypothetical patient whose -#' interaction term is `mean(X * Y)`. This is opposed to the -#' `emmeans` package which calculates the interaction term -#' of the hypothetical patient as `mean(X) * mean(Y)`. The approach -#' outlined here is equivalent to standardization or g-computation. +#' / interpretation depends on the weighting scheme; see the weighting section for more +#' information. +#' #' -#' For `.weights = "equal"` the lsmeans are obtained by taking the model fitted +#' @section Weighting: +#' +#' ### Counterfactual +#' +#' For `weights = "counterfactual"` (the default) the lsmeans are obtained by +#' taking the average of the predicted values for each patient after assigning all patients +#' to each arm in turn. +#' This approach is equivalent to standardization or g-computation. +#' In comparison to `emmeans` this approach is equivalent to: +#' ``` +#' emmeans::emmeans(model, specs = "", counterfactual = "") +#' ``` +#' Note that `weights = "proportional"` is an alias for `weights = "counterfactual"`. +#' Whilst potentially confusing, this has been done to ensure backwards compatibility +#' with prior versions of `rbmi`. To get results +#' consistent with `emmeans`'s `weights = "proportional"` please use `weights = "proportional_em"`. +#' +#' ### Equal +#' +#' For `weights = "equal"` the lsmeans are obtained by taking the model fitted #' value of a hypothetical patient whose covariates are defined as follows: #' - Continuous covariates are set to `mean(X)` #' - Dummy categorical variables are set to `1/N` where `N` is the number of levels #' - Continuous * continuous interactions are set to `mean(X) * mean(Y)` #' - Continuous * categorical interactions are set to `mean(X) * 1/N` +#' - Dummary categorical * categorical interaction variables are set to `1/N * 1/M` +#' +#' In comparison to `emmeans` this approach is equivalent to: +#' ``` +#' emmeans::emmeans(model, specs = "", weights = "equal") +#' ``` +#' +#' ### Proportional +#' +#' For `weights = "proportional_em"` the lsmeans are obtained as per `weights = "equal"` +#' except instead of weighting each observation equally they are weighted by the proportion +#' in which the given combination of categorical values occurred in the data. +#' In comparison to `emmeans` this approach is equivalent to: +#' ``` +#' emmeans::emmeans(model, specs = "", weights = "proportional") +#' ``` +#' Note that this is not to be confused with `weights = "proportional"` which is an alias +#' for `weights = "counterfactual"`. +#' +#' @section Fixing: #' #' Regardless of the weighting scheme any named arguments passed via `...` will #' fix the value of the covariate to the specified value. @@ -35,8 +65,10 @@ #' @param ... Fixes specific variables to specific values i.e. #' `trt = 1` or `age = 50`. The name of the argument must be the name #' of the variable within the dataset. -#' @param .weights Character, specifies whether to use "proportional" or "equal" weighting for each -#' categorical covariate combination when calculating the lsmeans. +#' @param .weights Character, either `"counterfactual"` (default), `"equal"`, +#' `"proportional_em"` or `"proportional"`. +#' Specifies the weighting strategy to be used when calculating the lsmeans. +#' See the weighting section for more details. #' #' @references \url{https://CRAN.R-project.org/package=emmeans} #' @references \url{https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.3/statug/statug_glm_details41.htm} @@ -49,7 +81,11 @@ #' lsmeans(mod, Species = "versicolor", Petal.Length = 1) #' } #' @importFrom stats model.matrix terms reformulate -lsmeans <- function(model, ..., .weights = c("proportional", "equal")) { +lsmeans <- function( + model, + ..., + .weights = c("counterfactual", "equal", "proportional_em", "proportional") +) { .weights <- match.arg(arg = .weights) fix <- list(...) @@ -66,9 +102,22 @@ lsmeans <- function(model, ..., .weights = c("proportional", "equal")) { ) } + if (.weights == "proportional") { + message( + paste( + "NOTE: The `proportional` weighting scheme is an alias for `counterfactual`", + "and will be deprecated in the future. Please use `proportional_em` or", + "`counterfactual` as appropriate instead.", + sep = " " + ) + ) + } + design_fun <- switch(.weights, + counterfactual = ls_design_counterfactual, + proportional = ls_design_counterfactual, equal = ls_design_equal, - proportional = ls_design_proportional + proportional_em = ls_design_proportional ) design <- design_fun(data, frm, fix) @@ -81,6 +130,27 @@ lsmeans <- function(model, ..., .weights = c("proportional", "equal")) { } + +collapse_values <- function(x) { + UseMethod("collapse_values") +} + +#' @export +collapse_values.factor <- function(x) { + return(factor(levels(x), levels(x))) +} + +#' @export +collapse_values.numeric <- function(x) { + return(mean(x)) +} + +#' @export +collapse_values.default <- function(x) { + stop("invalid data type") +} + + #' Calculate design vector for the lsmeans #' #' Calculates the design vector as required to generate the lsmean @@ -99,7 +169,8 @@ ls_design_equal <- function(data, frm, fix) { inherits(frm, "formula"), is.data.frame(data), is.list(fix), - all(names(fix) %in% colnames(data)) + all(names(fix) %in% colnames(data)), + all(vapply(fix, length, numeric(1)) == 1) ) frm2 <- update(frm, NULL ~ .) data2 <- model.frame(frm2, data) @@ -125,28 +196,8 @@ ls_design_equal <- function(data, frm, fix) { } -collapse_values <- function(x) { - UseMethod("collapse_values") -} - -#' @export -collapse_values.factor <- function(x) { - return(factor(levels(x), levels(x))) -} - -#' @export -collapse_values.numeric <- function(x) { - return(mean(x)) -} - -#' @export -collapse_values.default <- function(x) { - stop("invalid data type") -} - - #' @rdname ls_design -ls_design_proportional <- function(data, frm, fix) { +ls_design_counterfactual <- function(data, frm, fix) { data2 <- data for (var in names(fix)) { value <- data[[var]] @@ -159,3 +210,50 @@ ls_design_proportional <- function(data, frm, fix) { design <- colMeans(model.matrix(frm, data = data2)) return(design) } + + +#' @rdname ls_design +ls_design_proportional <- function(data, frm, fix) { + assert_that( + inherits(frm, "formula"), + is.data.frame(data), + is.list(fix), + all(names(fix) %in% colnames(data)), + all(vapply(fix, length, numeric(1)) == 1) + ) + frm2 <- update(frm, NULL ~ .) + dat2 <- model.frame(frm2, data) + collection <- lapply(as.list(dat2), collapse_values) + + for (var in names(fix)) { + if (is.numeric(dat2[[var]])) { + dat2[[var]] <- fix[[var]] + collection[[var]] <- fix[[var]] + } else if (is.factor(dat2[[var]])) { + dat2[[var]] <- factor(fix[[var]], levels = levels(dat2[[var]])) + collection[[var]] <- factor(fix[[var]], levels = levels(dat2[[var]])) + } + } + + all_combinations <- expand.grid(collection) + design_matrix <- model.matrix(frm2, all_combinations) + + categorical_vars <- dat2 |> + vapply(\(x) is.character(x) || is.factor(x), logical(1)) |> + which() |> + names() + + wgts <- dat2[, categorical_vars[[1]]] |> + aggregate( + as.list(dat2[, categorical_vars]), + length + ) + assert_that( + all.equal(wgts[, categorical_vars], all_combinations[, categorical_vars]) + ) + + wgts_scaled <- wgts[["x"]] / sum(wgts[["x"]]) + + design <- apply(design_matrix, 2, \(x) sum(x * wgts_scaled)) + return(design) +} diff --git a/man/ancova.Rd b/man/ancova.Rd index 886bd48cf..1301ca298 100644 --- a/man/ancova.Rd +++ b/man/ancova.Rd @@ -4,7 +4,12 @@ \alias{ancova} \title{Analysis of Covariance} \usage{ -ancova(data, vars, visits = NULL, weights = c("proportional", "equal")) +ancova( + data, + vars, + visits = NULL, + weights = c("counterfactual", "equal", "proportional_em", "proportional") +) } \arguments{ \item{data}{A \code{data.frame} containing the data to be used in the model.} @@ -17,9 +22,10 @@ fit the ancova model at. If \code{NULL}, a separate ancova model will be fit to outcomes for each visit (as determined by \code{unique(data[[vars$visit]])}). See details.} -\item{weights}{Character, either \code{"proportional"} (default) or \code{"equal"}. Specifies the -weighting strategy to be used for categorical covariates when calculating the lsmeans. -See details.} +\item{weights}{Character, either \code{"counterfactual"} (default), \code{"equal"}, +\code{"proportional_em"} or \code{"proportional"}. +Specifies the weighting strategy to be used when calculating the lsmeans. +See the weighting section for more details.} } \description{ Performs an analysis of covariance between two groups returning the estimated @@ -60,26 +66,59 @@ coincide with the control arm. Analogously, "alt" refers to the second factor le If you want to include interaction terms in your model this can be done by providing them to the \code{covariates} argument of \code{\link[=set_vars]{set_vars()}} e.g. \code{set_vars(covariates = c("sex*age"))}. -\subsection{Weighting}{ - -\code{"proportional"} is the default scheme that is used. This is equivalent to standardization, -i.e. the lsmeans in -each group are equal to the predicted mean outcome from the ancova model for -that group based on baseline characteristics of all subjects regardless of -their assigned group. The alternative weighting scheme, \code{"equal"}, creates hypothetical -patients by expanding out all combinations of the models categorical covariates. The -lsmeans are then calculated as the average of -the predicted mean outcome for these hypothetical patients assuming they come from each -group in turn. - -In short: +} +\section{Weighting}{ + +\subsection{Counterfactual}{ + +For \code{weights = "counterfactual"} (the default) the lsmeans are obtained by +taking the average of the predicted values for each patient after assigning all patients +to each arm in turn. +This approach is equivalent to standardization or g-computation. +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", counterfactual = "") +}\if{html}{\out{
}} + +Note that \code{weights = "proportional"} is an alias for \code{weights = "counterfactual"}. +Whilst potentially confusing, this has been done to ensure backwards compatibility +with prior versions of \code{rbmi}. To get results +consistent with \code{emmeans}'s \code{weights = "proportional"} please use \code{weights = "proportional_em"}. +} + +\subsection{Equal}{ + +For \code{weights = "equal"} the lsmeans are obtained by taking the model fitted +value of a hypothetical patient whose covariates are defined as follows: \itemize{ -\item \code{"proportional"} weights categorical covariates based upon their frequency of occurrence -in the data. -\item \code{"equal"} weights categorical covariates equally across all theoretical combinations. +\item Continuous covariates are set to \code{mean(X)} +\item Dummy categorical variables are set to \code{1/N} where \code{N} is the number of levels +\item Continuous * continuous interactions are set to \code{mean(X) * mean(Y)} +\item Continuous * categorical interactions are set to \code{mean(X) * 1/N} +\item Dummary categorical * categorical interaction variables are set to \code{1/N * 1/M} } + +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", weights = "equal") +}\if{html}{\out{
}} +} + +\subsection{Proportional}{ + +For \code{weights = "proportional_em"} the lsmeans are obtained as per \code{weights = "equal"} +except instead of weighting each observation equally they are weighted by the proportion +in which the given combination of categorical values occurred in the data. +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", weights = "proportional") +}\if{html}{\out{
}} + +Note that this is not to be confused with \code{weights = "proportional"} which is an alias +for \code{weights = "counterfactual"}. } } + \seealso{ \code{\link[=analyse]{analyse()}} diff --git a/man/ancova_single.Rd b/man/ancova_single.Rd index 464e652aa..f10de1f6a 100644 --- a/man/ancova_single.Rd +++ b/man/ancova_single.Rd @@ -9,11 +9,11 @@ ancova_single( outcome, group, covariates, - weights = c("proportional", "equal") + weights = c("counterfactual", "equal", "proportional_em", "proportional") ) } \arguments{ -\item{data}{The \code{data.frame} containing all of the data required for the model.} +\item{data}{A \code{data.frame} containing the data to be used in the model.} \item{outcome}{Character, the name of the outcome variable in \code{data}.} @@ -22,8 +22,10 @@ ancova_single( \item{covariates}{Character vector containing the name of any additional covariates to be included in the model as well as any interaction terms.} -\item{weights}{Character, specifies whether to use "proportional" or "equal" weighting for each -categorical covariate combination when calculating the lsmeans.} +\item{weights}{Character, either \code{"counterfactual"} (default), \code{"equal"}, +\code{"proportional_em"} or \code{"proportional"}. +Specifies the weighting strategy to be used when calculating the lsmeans. +See the weighting section for more details.} } \description{ Performance analysis of covariance. See \code{\link[=ancova]{ancova()}} for full details. @@ -34,6 +36,58 @@ Performance analysis of covariance. See \code{\link[=ancova]{ancova()}} for full \item \code{outcome} must be a continuous numeric variable. } } +\section{Weighting}{ + +\subsection{Counterfactual}{ + +For \code{weights = "counterfactual"} (the default) the lsmeans are obtained by +taking the average of the predicted values for each patient after assigning all patients +to each arm in turn. +This approach is equivalent to standardization or g-computation. +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", counterfactual = "") +}\if{html}{\out{
}} + +Note that \code{weights = "proportional"} is an alias for \code{weights = "counterfactual"}. +Whilst potentially confusing, this has been done to ensure backwards compatibility +with prior versions of \code{rbmi}. To get results +consistent with \code{emmeans}'s \code{weights = "proportional"} please use \code{weights = "proportional_em"}. +} + +\subsection{Equal}{ + +For \code{weights = "equal"} the lsmeans are obtained by taking the model fitted +value of a hypothetical patient whose covariates are defined as follows: +\itemize{ +\item Continuous covariates are set to \code{mean(X)} +\item Dummy categorical variables are set to \code{1/N} where \code{N} is the number of levels +\item Continuous * continuous interactions are set to \code{mean(X) * mean(Y)} +\item Continuous * categorical interactions are set to \code{mean(X) * 1/N} +\item Dummary categorical * categorical interaction variables are set to \code{1/N * 1/M} +} + +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", weights = "equal") +}\if{html}{\out{
}} +} + +\subsection{Proportional}{ + +For \code{weights = "proportional_em"} the lsmeans are obtained as per \code{weights = "equal"} +except instead of weighting each observation equally they are weighted by the proportion +in which the given combination of categorical values occurred in the data. +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", weights = "proportional") +}\if{html}{\out{
}} + +Note that this is not to be confused with \code{weights = "proportional"} which is an alias +for \code{weights = "counterfactual"}. +} +} + \examples{ \dontrun{ iris2 <- iris[ iris$Species \%in\% c("versicolor", "virginica"), ] diff --git a/man/ls_design.Rd b/man/ls_design.Rd index aa3672495..cb239207c 100644 --- a/man/ls_design.Rd +++ b/man/ls_design.Rd @@ -3,11 +3,14 @@ \name{ls_design} \alias{ls_design} \alias{ls_design_equal} +\alias{ls_design_counterfactual} \alias{ls_design_proportional} \title{Calculate design vector for the lsmeans} \usage{ ls_design_equal(data, frm, fix) +ls_design_counterfactual(data, frm, fix) + ls_design_proportional(data, frm, fix) } \arguments{ diff --git a/man/lsmeans.Rd b/man/lsmeans.Rd index 795eda72c..a70fc8d97 100644 --- a/man/lsmeans.Rd +++ b/man/lsmeans.Rd @@ -4,7 +4,11 @@ \alias{lsmeans} \title{Least Square Means} \usage{ -lsmeans(model, ..., .weights = c("proportional", "equal")) +lsmeans( + model, + ..., + .weights = c("counterfactual", "equal", "proportional_em", "proportional") +) } \arguments{ \item{model}{A model created by \code{lm}.} @@ -13,32 +17,71 @@ lsmeans(model, ..., .weights = c("proportional", "equal")) \code{trt = 1} or \code{age = 50}. The name of the argument must be the name of the variable within the dataset.} -\item{.weights}{Character, specifies whether to use "proportional" or "equal" weighting for each -categorical covariate combination when calculating the lsmeans.} +\item{.weights}{Character, either \code{"counterfactual"} (default), \code{"equal"}, +\code{"proportional_em"} or \code{"proportional"}. +Specifies the weighting strategy to be used when calculating the lsmeans. +See the weighting section for more details.} } \description{ Estimates the least square means from a linear model. The exact implementation -/ interpretation depends on the weighting scheme; see details for more information. -} -\details{ -For \code{.weights = "proportional"} (the default) the lsmeans are obtained by -taking the average of the fitted values for each patient. -In terms of the interactions between two continuous covariates -this is equivalent to constructing a hypothetical patient whose -interaction term is \code{mean(X * Y)}. This is opposed to the -\code{emmeans} package which calculates the interaction term -of the hypothetical patient as \code{mean(X) * mean(Y)}. The approach -outlined here is equivalent to standardization or g-computation. - -For \code{.weights = "equal"} the lsmeans are obtained by taking the model fitted +/ interpretation depends on the weighting scheme; see the weighting section for more +information. +} +\section{Weighting}{ + +\subsection{Counterfactual}{ + +For \code{weights = "counterfactual"} (the default) the lsmeans are obtained by +taking the average of the predicted values for each patient after assigning all patients +to each arm in turn. +This approach is equivalent to standardization or g-computation. +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", counterfactual = "") +}\if{html}{\out{
}} + +Note that \code{weights = "proportional"} is an alias for \code{weights = "counterfactual"}. +Whilst potentially confusing, this has been done to ensure backwards compatibility +with prior versions of \code{rbmi}. To get results +consistent with \code{emmeans}'s \code{weights = "proportional"} please use \code{weights = "proportional_em"}. +} + +\subsection{Equal}{ + +For \code{weights = "equal"} the lsmeans are obtained by taking the model fitted value of a hypothetical patient whose covariates are defined as follows: \itemize{ \item Continuous covariates are set to \code{mean(X)} \item Dummy categorical variables are set to \code{1/N} where \code{N} is the number of levels \item Continuous * continuous interactions are set to \code{mean(X) * mean(Y)} \item Continuous * categorical interactions are set to \code{mean(X) * 1/N} +\item Dummary categorical * categorical interaction variables are set to \code{1/N * 1/M} +} + +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", weights = "equal") +}\if{html}{\out{
}} +} + +\subsection{Proportional}{ + +For \code{weights = "proportional_em"} the lsmeans are obtained as per \code{weights = "equal"} +except instead of weighting each observation equally they are weighted by the proportion +in which the given combination of categorical values occurred in the data. +In comparison to \code{emmeans} this approach is equivalent to: + +\if{html}{\out{
}}\preformatted{emmeans::emmeans(model, specs = "", weights = "proportional") +}\if{html}{\out{
}} + +Note that this is not to be confused with \code{weights = "proportional"} which is an alias +for \code{weights = "counterfactual"}. +} } +\section{Fixing}{ + + Regardless of the weighting scheme any named arguments passed via \code{...} will fix the value of the covariate to the specified value. For example, \code{lsmeans(model, trt = "A")} will fix the dummy variable \code{trtA} to 1 @@ -47,6 +90,7 @@ for all patients (real or hypothetical) when calculating the lsmeans. See the references for similar implementations as done in SAS and in R via the \code{emmeans} package. } + \examples{ \dontrun{ mod <- lm(Sepal.Length ~ Species + Petal.Length, data = iris) diff --git a/tests/testthat/test-lsmeans.R b/tests/testthat/test-lsmeans.R index fb3755c7c..06f5b9eba 100644 --- a/tests/testthat/test-lsmeans.R +++ b/tests/testthat/test-lsmeans.R @@ -1,7 +1,7 @@ library(dplyr) library(testthat) -test_that("Least square means works as expected - Part 1", { +test_that("Least square means works as expected", { ##### Check that treatment effect is unaffected set.seed(101) @@ -56,114 +56,6 @@ test_that("Least square means works as expected - Part 1", { }) - - - -test_that("Least square means works as expected - Part 2", { - n <- 8000 - - dat <- tibble( - trt = sample(c("C", "T"), size = n, replace = TRUE, prob = c(0.5, 0.5)), - age = rnorm(n), - sex = sample(c("M", "F", "O"), size = n, replace = TRUE, prob = c(0.3, 0.5, 0.2)), - cat = sample(c("A", "B"), size = n, replace = TRUE, prob = c(0.8, 0.2)), - visit = "vis1" - ) %>% - mutate(sex = factor(sex, levels = c("M", "F", "O"))) %>% - mutate(trt = factor(trt, levels = c("C", "T"))) %>% - mutate(cat = factor(cat, levels = c("A", "B"))) - - - outcome_vec <- ( - model.matrix(~ trt * sex * cat + age, dat) %*% - c(10, 5, 4, 3, 9, 4, 2, 8, 1, -5, -1, -2, -3) - ) + rnorm(n, 0, 5) - - dat2 <- dat %>% - mutate(outcome = outcome_vec) - - - mod <- lm(outcome ~ trt * sex * cat + age, data = dat2) - - - ########### - # - # Proportional weighting - # - - d <- suppressMessages({ - as.data.frame(emmeans::emmeans(mod, "trt", weights = "proportional")) - }) - expected <- list( - "est" = d[["emmean"]], - "se" = d[["SE"]], - "df" = d[["df"]] - ) - lsm1 <- lsmeans(mod, trt = "C") - lsm2 <- lsmeans(mod, trt = "T") - actual <- list( - "est" = c(lsm1$est, lsm2$est), - "se" = c(lsm1$se, lsm2$se), - "df" = c(lsm1$df, lsm2$df) - ) - expect_equal(actual, expected) - - result_actual <- ancova( - dat2, - set_vars( - visit = "visit", - covariates = c("age", "sex*cat*trt"), - outcome = "outcome", - group = "trt" - ) - )[c("lsm_ref_vis1", "lsm_alt_vis1")] - result_expected <- list( - "lsm_ref_vis1" = lsm1, - "lsm_alt_vis1" = lsm2 - ) - expect_equal(result_actual, result_expected) - - - ########### - # - # Equal weighting - # - - d <- suppressMessages({ - as.data.frame(emmeans::emmeans(mod, "trt")) - }) - expected <- list( - "est" = d[["emmean"]], - "se" = d[["SE"]], - "df" = d[["df"]] - ) - lsm1 <- lsmeans(mod, trt = "C", .weights = "equal") - lsm2 <- lsmeans(mod, trt = "T", .weights = "equal") - actual <- list( - "est" = c(lsm1$est, lsm2$est), - "se" = c(lsm1$se, lsm2$se), - "df" = c(lsm1$df, lsm2$df) - ) - expect_equal(actual, expected) - - result_actual <- ancova( - dat2, - set_vars( - visit = "visit", - covariates = c("age", "sex*cat*trt"), - outcome = "outcome", - group = "trt" - ), - weights = "equal" - )[c("lsm_ref_vis1", "lsm_alt_vis1")] - result_expected <- list( - "lsm_ref_vis1" = lsm1, - "lsm_alt_vis1" = lsm2 - ) - expect_equal(result_actual, result_expected) -}) - - test_that("LSmeans (proportional) from rbmi equals means of average prediction", { set.seed(124) @@ -218,15 +110,16 @@ test_that("LSmeans (proportional) from rbmi equals means of average prediction", test_that("LSmeans(proportional) returns equivalent results to 'counterfactual'", { set.seed(2412) - n <- 10000 - # Main goal here is to provide a more involved test of interactions between - # cont * cont, cont * cont * cont model terms + n <- 4000 dat <- tibble( v1 = rnorm(n), v2 = rnorm(n), v3 = rnorm(n), c1 = sample(c("A", "B"), size = n, replace = TRUE, prob = c(0.8, 0.2)), - c2 = sample(c("X", "Y"), size = n, replace = TRUE, prob = c(0.6, 0.4)), + c2 = sample(c("Y", "X"), size = n, replace = TRUE, prob = c(0.6, 0.4)) |> + factor(levels = c("Y", "X")), + c3 = sample(c("L", "K", "J"), size = n, replace = TRUE, prob = c(0.2, 0.5, 0.3)) |> + factor(levels = c("L", "K", "J")), error = rnorm(n, 0, 4), outcome = 30 + 5 * v1 + @@ -240,65 +133,84 @@ test_that("LSmeans(proportional) returns equivalent results to 'counterfactual'" 6 * (c2 == "Y") + 7 * (c1 == "B" & c2 == "Y") + 13 * (c1 == "B") * v1 + + 15 * (c3 == "K") + + 16 * (c3 == "J") + error ) - mod <- lm(outcome ~ v1 * v2 * v3 + c1 * c2 + v1 * c1, data = dat) - - expect_equal( - lsmeans(mod, c1 = "A", .weights = "proportional")$est, - mean(predict(mod, newdata = dat |> mutate(c1 = "A"))) - ) - - expect_equal( - lsm2 <- lsmeans(mod, c1 = "B", c2 = "Y", .weights = "proportional")$est, - mean(predict(mod, newdata = dat |> mutate(c1 = "B", c2 = "Y"))) - ) -}) - - -#### Toy code to experiment with - -# n <- 8000 - -# dat <- tibble( -# trt = sample(c("C", "T"), size = n, replace = TRUE, prob = c(0.5, 0.5)), -# age = rnorm(n), -# sex = sample(c("M", "F", "O"), size = n, replace = TRUE, prob = c(0.3, 0.5, 0.2)), -# cat = sample(c("A", "B"), size = n, replace = TRUE, prob = c(0.8, 0.2)), -# visit = "vis1" -# ) %>% -# mutate(sex = factor(sex, levels = c("M", "F", "O"))) %>% -# mutate(trt = factor(trt, levels = c("C", "T"))) %>% -# mutate(cat = factor(cat, levels = c("A", "B"))) - - -# outcome_vec <- ( -# model.matrix(~ trt * sex * cat + age, dat) %*% -# c(10, 5, 4, 3, 9, 4, 2, 8, 1, -5, -1, -2, -3) -# ) + rnorm(n, 0, 5) - -# dat2 <- dat %>% -# mutate(outcome = outcome_vec) - - -# mod <- lm(outcome ~ trt * sex * cat + age, data = dat2) - - -# as.data.frame(emmeans::emmeans(mod, "trt", weights = "equal")) -# as.data.frame(emmeans::emmeans(mod, "trt", weights = "proportional")) - -# dat_t <- dat2 %>% mutate(trt = factor("T", levels = c("C", "T"))) -# dat_c <- dat2 %>% mutate(trt = factor("C", levels = c("C", "T"))) -# mean(predict(mod, dat_c)) -# mean(predict(mod, dat_t)) - - - + mod <- lm(outcome ~ (v1 * v2 * v3) + (c1 * c2) + (v1 * c1) + c3, data = dat) + # + # + # Equal + # + # + emod <- suppressMessages({ + as.data.frame(emmeans::emmeans(mod, "c1", weights = "equal")) + }) + expected <- list( + "est" = emod[["emmean"]], + "se" = emod[["SE"]], + "df" = emod[["df"]] + ) + lsm1 <- lsmeans(mod, c1 = "A", .weights = "equal") + lsm2 <- lsmeans(mod, c1 = "B", .weights = "equal") + actual <- list( + "est" = c(lsm1$est, lsm2$est), + "se" = c(lsm1$se, lsm2$se), + "df" = c(lsm1$df, lsm2$df) + ) + expect_equal(actual, expected) + # + # + # Proportional + # + # + emod <- suppressMessages({ + as.data.frame(emmeans::emmeans(mod, "c1", weights = "proportional")) + }) + expected <- list( + "est" = emod[["emmean"]], + "se" = emod[["SE"]], + "df" = emod[["df"]] + ) + lsm1 <- lsmeans(mod, c1 = "A", .weights = "proportional_em") + lsm2 <- lsmeans(mod, c1 = "B", .weights = "proportional_em") + actual <- list( + "est" = c(lsm1$est, lsm2$est), + "se" = c(lsm1$se, lsm2$se), + "df" = c(lsm1$df, lsm2$df) + ) + expect_equal(actual, expected) + # + # + # Counterfactual + # + # + emod <- suppressMessages({ + as.data.frame(emmeans::emmeans(mod, "c1", counterfactual = "c1")) + }) + expected <- list( + "est" = emod[["emmean"]], + "se" = emod[["SE"]], + "df" = emod[["df"]] + ) + lsm1 <- lsmeans(mod, c1 = "A", .weights = "counterfactual") + lsm2 <- lsmeans(mod, c1 = "B", .weights = "counterfactual") + actual <- list( + "est" = c(lsm1$est, lsm2$est), + "se" = c(lsm1$se, lsm2$se), + "df" = c(lsm1$df, lsm2$df) + ) + expect_equal(actual, expected) + expect_message( + lsmeans(mod, c1 = "A", .weights = "proportional"), + "NOTE: The `proportional` weighting scheme is an alias for `counterfactual`" + ) +})