Skip to content

Commit

Permalink
Introduced "counterfactual" weighting
Browse files Browse the repository at this point in the history
  • Loading branch information
gowerc committed Mar 21, 2024
1 parent 6e989b4 commit f0ed45f
Show file tree
Hide file tree
Showing 7 changed files with 415 additions and 269 deletions.
46 changes: 21 additions & 25 deletions R/ancova.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
#' fit the ancova model at. If `NULL`, a separate ancova model will be fit to the
#' outcomes for each visit (as determined by `unique(data[[vars$visit]])`).
#' See details.
#' @param weights Character, either `"proportional"` (default) or `"equal"`. Specifies the
#' weighting strategy to be used for categorical covariates when calculating the lsmeans.
#' See details.
#' @param weights Character, either `"counterfactual"` (default), `"equal"`,
#' `"proportional_em"` or `"proportional"`.
#' Specifies the weighting strategy to be used when calculating the lsmeans.
#' See the weighting section for more details.
#'
#' @details
#' The function works as follows:
Expand Down Expand Up @@ -49,29 +50,18 @@
#' by providing them to the `covariates` argument of [set_vars()]
#' e.g. `set_vars(covariates = c("sex*age"))`.
#'
#'
#' ## Weighting
#'
#' `"proportional"` is the default scheme that is used. This is equivalent to standardization,
#' i.e. the lsmeans in
#' each group are equal to the predicted mean outcome from the ancova model for
#' that group based on baseline characteristics of all subjects regardless of
#' their assigned group. The alternative weighting scheme, `"equal"`, creates hypothetical
#' patients by expanding out all combinations of the models categorical covariates. The
#' lsmeans are then calculated as the average of
#' the predicted mean outcome for these hypothetical patients assuming they come from each
#' group in turn.
#'
#' In short:
#' - `"proportional"` weights categorical covariates based upon their frequency of occurrence
#' in the data.
#' - `"equal"` weights categorical covariates equally across all theoretical combinations.
#' @inheritSection lsmeans Weighting
#'
#' @seealso [analyse()]
#' @seealso [stats::lm()]
#' @seealso [set_vars()]
#' @export
ancova <- function(data, vars, visits = NULL, weights = c("proportional", "equal")) {
ancova <- function(
data,
vars,
visits = NULL,
weights = c("counterfactual", "equal", "proportional_em", "proportional")
) {

outcome <- vars[["outcome"]]
group <- vars[["group"]]
Expand Down Expand Up @@ -154,13 +144,13 @@ ancova <- function(data, vars, visits = NULL, weights = c("proportional", "equal
#' @description
#' Performance analysis of covariance. See [ancova()] for full details.
#'
#' @param data The `data.frame` containing all of the data required for the model.
#' @param outcome Character, the name of the outcome variable in `data`.
#' @param group Character, the name of the group variable in `data`.
#' @param covariates Character vector containing the name of any additional covariates
#' to be included in the model as well as any interaction terms.
#' @param weights Character, specifies whether to use "proportional" or "equal" weighting for each
#' categorical covariate combination when calculating the lsmeans.
#'
#' @inheritParams ancova
#' @inheritSection lsmeans Weighting
#'
#' @details
#' - `group` must be a factor variable with only 2 levels.
Expand All @@ -174,7 +164,13 @@ ancova <- function(data, vars, visits = NULL, weights = c("proportional", "equal
#' }
#' @seealso [ancova()]
#' @importFrom stats lm coef vcov df.residual
ancova_single <- function(data, outcome, group, covariates, weights = c("proportional", "equal")) {
ancova_single <- function(
data,
outcome,
group,
covariates,
weights = c("counterfactual", "equal", "proportional_em", "proportional")
) {

weights <- match.arg(weights)
assert_that(
Expand Down
176 changes: 137 additions & 39 deletions R/lsmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,55 @@
#'
#'
#' Estimates the least square means from a linear model. The exact implementation
#' / interpretation depends on the weighting scheme; see details for more information.
#'
#' @details
#'
#' For `.weights = "proportional"` (the default) the lsmeans are obtained by
#' taking the average of the fitted values for each patient.
#' In terms of the interactions between two continuous covariates
#' this is equivalent to constructing a hypothetical patient whose
#' interaction term is `mean(X * Y)`. This is opposed to the
#' `emmeans` package which calculates the interaction term
#' of the hypothetical patient as `mean(X) * mean(Y)`. The approach
#' outlined here is equivalent to standardization or g-computation.
#' / interpretation depends on the weighting scheme; see the weighting section for more
#' information.
#'
#'
#' For `.weights = "equal"` the lsmeans are obtained by taking the model fitted
#' @section Weighting:
#'
#' ### Counterfactual
#'
#' For `weights = "counterfactual"` (the default) the lsmeans are obtained by
#' taking the average of the predicted values for each patient after assigning all patients
#' to each arm in turn.
#' This approach is equivalent to standardization or g-computation.
#' In comparison to `emmeans` this approach is equivalent to:
#' ```
#' emmeans::emmeans(model, specs = "<treatment>", counterfactual = "<treatment>")
#' ```
#' Note that `weights = "proportional"` is an alias for `weights = "counterfactual"`.
#' Whilst potentially confusing, this has been done to ensure backwards compatibility
#' with prior versions of `rbmi`. To get results
#' consistent with `emmeans`'s `weights = "proportional"` please use `weights = "proportional_em"`.
#'
#' ### Equal
#'
#' For `weights = "equal"` the lsmeans are obtained by taking the model fitted
#' value of a hypothetical patient whose covariates are defined as follows:
#' - Continuous covariates are set to `mean(X)`
#' - Dummy categorical variables are set to `1/N` where `N` is the number of levels
#' - Continuous * continuous interactions are set to `mean(X) * mean(Y)`
#' - Continuous * categorical interactions are set to `mean(X) * 1/N`
#' - Dummary categorical * categorical interaction variables are set to `1/N * 1/M`
#'
#' In comparison to `emmeans` this approach is equivalent to:
#' ```
#' emmeans::emmeans(model, specs = "<treatment>", weights = "equal")
#' ```
#'
#' ### Proportional
#'
#' For `weights = "proportional_em"` the lsmeans are obtained as per `weights = "equal"`
#' except instead of weighting each observation equally they are weighted by the proportion
#' in which the given combination of categorical values occurred in the data.
#' In comparison to `emmeans` this approach is equivalent to:
#' ```
#' emmeans::emmeans(model, specs = "<treatment>", weights = "proportional")
#' ```
#' Note that this is not to be confused with `weights = "proportional"` which is an alias
#' for `weights = "counterfactual"`.
#'
#' @section Fixing:
#'
#' Regardless of the weighting scheme any named arguments passed via `...` will
#' fix the value of the covariate to the specified value.
Expand All @@ -35,8 +65,10 @@
#' @param ... Fixes specific variables to specific values i.e.
#' `trt = 1` or `age = 50`. The name of the argument must be the name
#' of the variable within the dataset.
#' @param .weights Character, specifies whether to use "proportional" or "equal" weighting for each
#' categorical covariate combination when calculating the lsmeans.
#' @param .weights Character, either `"counterfactual"` (default), `"equal"`,
#' `"proportional_em"` or `"proportional"`.
#' Specifies the weighting strategy to be used when calculating the lsmeans.
#' See the weighting section for more details.
#'
#' @references \url{https://CRAN.R-project.org/package=emmeans}
#' @references \url{https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.3/statug/statug_glm_details41.htm}
Expand All @@ -49,7 +81,11 @@
#' lsmeans(mod, Species = "versicolor", Petal.Length = 1)
#' }
#' @importFrom stats model.matrix terms reformulate
lsmeans <- function(model, ..., .weights = c("proportional", "equal")) {
lsmeans <- function(
model,
...,
.weights = c("counterfactual", "equal", "proportional_em", "proportional")
) {
.weights <- match.arg(arg = .weights)
fix <- list(...)

Expand All @@ -66,9 +102,22 @@ lsmeans <- function(model, ..., .weights = c("proportional", "equal")) {
)
}

if (.weights == "proportional") {
message(
paste(
"NOTE: The `proportional` weighting scheme is an alias for `counterfactual`",
"and will be deprecated in the future. Please use `proportional_em` or",
"`counterfactual` as appropriate instead.",
sep = " "
)
)
}

design_fun <- switch(.weights,
counterfactual = ls_design_counterfactual,
proportional = ls_design_counterfactual,
equal = ls_design_equal,
proportional = ls_design_proportional
proportional_em = ls_design_proportional
)

design <- design_fun(data, frm, fix)
Expand All @@ -81,6 +130,27 @@ lsmeans <- function(model, ..., .weights = c("proportional", "equal")) {
}



collapse_values <- function(x) {
UseMethod("collapse_values")
}

#' @export
collapse_values.factor <- function(x) {
return(factor(levels(x), levels(x)))
}

#' @export
collapse_values.numeric <- function(x) {
return(mean(x))
}

#' @export
collapse_values.default <- function(x) {
stop("invalid data type")
}


#' Calculate design vector for the lsmeans
#'
#' Calculates the design vector as required to generate the lsmean
Expand All @@ -99,7 +169,8 @@ ls_design_equal <- function(data, frm, fix) {
inherits(frm, "formula"),
is.data.frame(data),
is.list(fix),
all(names(fix) %in% colnames(data))
all(names(fix) %in% colnames(data)),
all(vapply(fix, length, numeric(1)) == 1)
)
frm2 <- update(frm, NULL ~ .)
data2 <- model.frame(frm2, data)
Expand All @@ -125,28 +196,8 @@ ls_design_equal <- function(data, frm, fix) {
}


collapse_values <- function(x) {
UseMethod("collapse_values")
}

#' @export
collapse_values.factor <- function(x) {
return(factor(levels(x), levels(x)))
}

#' @export
collapse_values.numeric <- function(x) {
return(mean(x))
}

#' @export
collapse_values.default <- function(x) {
stop("invalid data type")
}


#' @rdname ls_design
ls_design_proportional <- function(data, frm, fix) {
ls_design_counterfactual <- function(data, frm, fix) {
data2 <- data
for (var in names(fix)) {
value <- data[[var]]
Expand All @@ -159,3 +210,50 @@ ls_design_proportional <- function(data, frm, fix) {
design <- colMeans(model.matrix(frm, data = data2))
return(design)
}


#' @rdname ls_design
ls_design_proportional <- function(data, frm, fix) {
assert_that(
inherits(frm, "formula"),
is.data.frame(data),
is.list(fix),
all(names(fix) %in% colnames(data)),
all(vapply(fix, length, numeric(1)) == 1)
)
frm2 <- update(frm, NULL ~ .)
dat2 <- model.frame(frm2, data)
collection <- lapply(as.list(dat2), collapse_values)

for (var in names(fix)) {
if (is.numeric(dat2[[var]])) {
dat2[[var]] <- fix[[var]]
collection[[var]] <- fix[[var]]
} else if (is.factor(dat2[[var]])) {
dat2[[var]] <- factor(fix[[var]], levels = levels(dat2[[var]]))
collection[[var]] <- factor(fix[[var]], levels = levels(dat2[[var]]))
}
}

all_combinations <- expand.grid(collection)
design_matrix <- model.matrix(frm2, all_combinations)

categorical_vars <- dat2 |>
vapply(\(x) is.character(x) || is.factor(x), logical(1)) |>
which() |>
names()

wgts <- dat2[, categorical_vars[[1]]] |>
aggregate(
as.list(dat2[, categorical_vars]),
length
)
assert_that(
all.equal(wgts[, categorical_vars], all_combinations[, categorical_vars])
)

wgts_scaled <- wgts[["x"]] / sum(wgts[["x"]])

design <- apply(design_matrix, 2, \(x) sum(x * wgts_scaled))
return(design)
}
Loading

0 comments on commit f0ed45f

Please sign in to comment.