Skip to content

Commit

Permalink
Merge pull request Merck#122 from Merck/116-develop-get_analysis_date
Browse files Browse the repository at this point in the history
116 develop a new function `get_analysis_date`
  • Loading branch information
nanxstats authored Nov 3, 2023
2 parents 85a19fe + f870fd5 commit 0ddb0d6
Show file tree
Hide file tree
Showing 5 changed files with 490 additions and 0 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Suggests:
dplyr,
ggplot2,
gsDesign,
gsDesign2,
knitr,
markdown,
remotes,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(counting_process)
export(cut_data_by_date)
export(cut_data_by_event)
export(fit_pwexp)
export(get_analysis_date)
export(get_cut_date_by_event)
export(mb_weight)
export(pvalue_maxcombo)
Expand Down
310 changes: 310 additions & 0 deletions R/get_analysis_date.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
# Copyright (c) 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates.
# All rights reserved.
#
# This file is part of the simtrial program.
#
# simtrial is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

#' Get the analysis date under multiple conditions
#'
#' @param data A simulated data generated by [sim_pw_surv()].
#' @param planned_calendar_time A numerical value specifying the
#' planned calendar time for the analysis.
#' @param target_event_overall A numerical value specifying the
#' targeted events for the overall population.
#' @param target_event_per_stratum A numerical vector specifying the
#' targeted events per stratum.
#' @param max_extension_for_target_event A numerical value specifying the
#' maximum time extension to reach targeted events.
#' @param previous_analysis_date A numerical value specifying the
#' previous analysis date.
#' @param min_time_after_previous_analysis A numerical value specifying the
#' planned minimum time after the previous analysis.
#' @param enroll_rate Enrollment rates, see details and examples.
#' @param min_n_overall A numerical value specifying the
#' minimal overall sample size enrolled to kick off the analysis.
#' @param min_n_per_stratum A numerical value specifying the
#' minimal sample size enrolled per stratum to kick off the analysis.
#' @param min_followup A numerical value specifying the
#' minimal follow-up time after specified enrollment fraction in
#' `min_n_overall` or `min_n_per_stratum`.
#'
#' @return A numerical value of the analysis date.
#'
#' @export
#'
#' @examples
#' library(gsDesign2)
#' library(simtrial)
#' library(tibble)
#'
#' alpha <- 0.025
#' ratio <- 3
#' n <- 500
#' info_frac <- c(0.7, 1)
#' prevalence_ratio <- c(0.4, 0.6)
#' study_duration <- 48
#'
#' # Two strata
#' stratum <- c("Biomarker-positive", "Biomarker-negative")
#'
#' prevalence_ratio <- c(0.6, 0.4)
#' # enrollment rate
#' enroll_rate <- define_enroll_rate(
#' stratum = rep(stratum, each = 2),
#' duration = c(2, 10, 2, 10),
#' rate = c(c(1, 4) * prevalence_ratio[1], c(1, 4) * prevalence_ratio[2])
#' )
#' enroll_rate$rate <- enroll_rate$rate * n / sum(enroll_rate$duration * enroll_rate$rate)
#'
#' # Failure rate
#' med_pos <- 10 # Median of the biomarker positive population
#' med_neg <- 8 # Median of the biomarker negative population
#' hr_pos <- c(1, 0.7) # Hazard ratio of the biomarker positive population
#' hr_neg <- c(1, 0.8) # Hazard ratio of the biomarker negative population
#' fail_rate <- define_fail_rate(
#' stratum = rep(stratum, each = 2),
#' duration = 1000,
#' fail_rate = c(log(2) / c(med_pos, med_pos, med_neg, med_neg)),
#' hr = c(hr_pos, hr_neg),
#' dropout_rate = 0.01
#' )
#'
#' # Simulate data
#' temp <- simfix2simpwsurv(fail_rate) # Convert the failure rate
#' set.seed(2023)
#' simulated_data <- sim_pw_surv(
#' n = n, # Sample size
#' # Stratified design with prevalence ratio of 6:4
#' stratum = tibble(stratum = stratum, p = prevalence_ratio),
#' # Randomization ratio
#' block = c("control", "control", "experimental", "experimental"),
#' enroll_rate = enroll_rate, # Enrollment rate
#' fail_rate = temp$fail_rate, # Failure rate
#' dropout_rate = temp$dropout_rate # Dropout rate
#' )
#'
#' # Example 1: Cut for analysis at the 24th month.
#' get_analysis_date(
#' simulated_data,
#' planned_calendar_time = 24
#' )
#'
#' # Example 2: Cut for analysis when there are 300 events in the overall population.
#' get_analysis_date(
#' simulated_data,
#' target_event_overall = 300
#' )
#'
#' # Example 3: Cut for analysis at the 24th month and there are 300 events
#' # in the overall population, whichever arrives later.
#' get_analysis_date(
#' simulated_data,
#' planned_calendar_time = 24,
#' target_event_overall = 300
#' )
#'
#' # Example 4: Cut for analysis when there are at least 100 events
#' # in the biomarker-positive population, and at least 200 events
#' # in the biomarker-negative population, whichever arrives later.
#' get_analysis_date(
#' simulated_data,
#' target_event_per_stratum = c(100, 200)
#' )
#' get_analysis_date(
#' simulated_data,
#' target_event_overall = 150,
#' target_event_per_stratum = c(100, NA)
#' )
#'
#' # Example 5: Cut for analysis when there are at least 100 events
#' # in the biomarker positive population, and at least 200 events
#' # in the biomarker negative population, whichever arrives later.
#' # But will stop at the 30th month if events are fewer than 100/200.
#' get_analysis_date(
#' simulated_data,
#' target_event_per_stratum = c(100, 200),
#' max_extension_for_target_event = 30
#' )
#'
#' # Example 6: Cut for analysis after 12 months followup when 80%
#' # of the patients are enrolled in the overall population.
#' get_analysis_date(
#' simulated_data,
#' enroll_rate = enroll_rate,
#' min_n_overall = n * 0.8,
#' min_followup = 12
#' )
#'
#' # Example 7: Cut for analysis when 12 months after at least 200/160 patients
#' # are enrolled in the biomarker positive/negative population.
#' get_analysis_date(
#' simulated_data,
#' enroll_rate = enroll_rate,
#' min_n_per_stratum = c(200, 160),
#' min_followup = 12
#' )
#' get_analysis_date(
#' simulated_data,
#' enroll_rate = enroll_rate,
#' min_n_per_stratum = c(200, NA),
#' min_followup = 12
#' )
get_analysis_date <- function(
data,
# Option 1: Planned calendar time for the analysis
planned_calendar_time = NA,
# Option 2: Reach targeted events
target_event_overall = NA,
target_event_per_stratum = NA,
# Option 3: Max time extension to reach targeted events
max_extension_for_target_event = NA,
# Option 4: Planned minimum time after the previous analysis
previous_analysis_date = 0,
min_time_after_previous_analysis = NA,
# Option 5: Minimal follow-up time after specified enrollment fraction
enroll_rate = NA,
min_n_overall = NA,
min_n_per_stratum = NA,
min_followup = NA) {
input_check_scale(planned_calendar_time, label = "planned_calendar_time")
input_check_scale(target_event_overall, label = "target_event_overall")
input_check_scale(max_extension_for_target_event, label = "max_extension_for_target_event")
input_check_scale(min_time_after_previous_analysis, label = "min_time_after_previous_analysis")
input_check_scale(min_n_overall, label = "min_n_overall")
input_check_scale(min_followup, label = "min_followup")
input_check_vector(target_event_per_stratum, label = "target_event_per_stratum")
input_check_vector(min_n_per_stratum, label = "min_n_per_stratum")

# Check if enrollment is input by user
cond1 <- inherits(enroll_rate, c("tbl_df", "data.frame"))
# Check if `min_n_overall` is input by user
cond2 <- !is.na(min_n_overall)
# Check if `min_n_per_stratum` is input by user
cond3 <- !all(is.na(min_n_overall))

if (cond1) {
n_max <- sum(enroll_rate$rate * enroll_rate$duration)

if (is.na(min_followup)) {
stop("`min_followup` must be provided.")
}

if (cond2) {
if (min_n_overall > n_max) {
stop("`min_n_overall` must be a positive number smaller than the total sample size.")
}
}

if (cond3) {
if (sum(min_n_per_stratum, na.rm = TRUE) > n_max) {
stop("`min_n_per_stratum` must be a sum of positive numbers smaller than the total sample size.")
}
}
}

# Cutting option 1: Planned calendar time for the analysis
cut_date1 <- planned_calendar_time

# Cutting option 2: Reach targeted events
# 2a: Reach targeted events of the overall population
if (!is.na(target_event_overall)) {
cut_date2a <- get_cut_date_by_event(data, event = target_event_overall)
} else {
cut_date2a <- NA
}
# 2b: Reach targeted events per sub-population
if (!all(is.na(target_event_per_stratum))) {
stratum <- unique(data$stratum)
cut_date2b <- lapply(
seq_along(target_event_per_stratum),
function(x) {
get_cut_date_by_event(data %>% dplyr::filter(stratum == stratum[x]),
event = target_event_per_stratum[x]
)
}
) %>%
unlist() %>%
max()
} else {
cut_date2b <- NA
}
cut_date2 <- pmax(cut_date2a, cut_date2b, na.rm = TRUE)

# Cutting option 3: Max time extension to reach targeted events
cut_date3 <- max_extension_for_target_event

# Cutting option 4: Planned minimum time after the previous analysis
cut_date4 <- previous_analysis_date + min_time_after_previous_analysis

# Cutting option 5: Minimal follow-up time after specified enrollment fraction
# 5a: At least 10 months after the 80% of the patients are enrolled
if (!all(is.na(min_n_overall))) {
cut_date5a <- get_min_date(enroll_rate, min_n = min_n_overall) + min_followup
} else {
cut_date5a <- NA
}
# 5b: At least 10 months after the 80% biomarker positive patients are
# enrolled and 70% biomarker negative patients are enrolled
if (!all(is.na(min_n_per_stratum))) {
cut_date5b <- lapply(
seq_along(min_n_per_stratum),
function(x) {
get_min_date(enroll_rate %>% dplyr::filter(stratum == stratum[x]), min_n = min_n_per_stratum[x])
}
) %>%
unlist() %>%
max(na.rm = TRUE) + min_followup
} else {
cut_date5b <- NA
}
cut_date5 <- pmax(cut_date5a, cut_date5b, na.rm = TRUE)

# Combining all 5 cutting options
cut_date <- pmin(pmax(cut_date1, cut_date2, cut_date4, cut_date5, na.rm = TRUE), cut_date3, na.rm = TRUE)

cut_date
}

input_check_scale <- function(x = NA, label = "x") {
if (!is.na(x)) {
if (is.numeric(x) && x < 0) {
stop(paste0(label, " must be a positive number."))
} else if (!is.numeric(x)) {
stop(paste0(label, " must be a numerical value."))
}
}
}

input_check_vector <- function(x = NA, label = "x") {
if (!(all(is.na(x) | (is.numeric(x) & x > 0)))) {
stop(paste0(label, " must be a positive number with either `NA` or positive numbers."))
}
}

get_min_date <- function(enroll_rate, min_n = 400) {
if (!is.na(min_n)) {
res <- stats::uniroot(
f = function(x) {
gsDesign2::expected_accrual(time = x, enroll_rate = enroll_rate) - min_n
},
interval = c(0, sum(enroll_rate$duration) + 1)
)
ans <- res$root
} else {
ans <- NA
}

ans
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ reference:
- cut_data_by_date
- cut_data_by_event
- get_cut_date_by_event
- get_analysis_date

- title: "Compute p-values/test statistics"
contents:
Expand Down
Loading

0 comments on commit 0ddb0d6

Please sign in to comment.