Merge pull request Merck#122 from Merck/116-develop-get_analysis_date

116 develop a new function `get_analysis_date`
jdblischak · Nov 3, 2023 · 0ddb0d6 · 0ddb0d6
2 parents 85a19fe + f870fd5
commit 0ddb0d6
Show file tree

Hide file tree

Showing 5 changed files with 490 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -55,6 +55,7 @@ Suggests:
     dplyr,
     ggplot2,
     gsDesign,
+    gsDesign2,
     knitr,
     markdown,
     remotes,

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(counting_process)
 export(cut_data_by_date)
 export(cut_data_by_event)
 export(fit_pwexp)
+export(get_analysis_date)
 export(get_cut_date_by_event)
 export(mb_weight)
 export(pvalue_maxcombo)

diff --git a/R/get_analysis_date.R b/R/get_analysis_date.R
@@ -0,0 +1,310 @@
+#  Copyright (c) 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates.
+#  All rights reserved.
+#
+#  This file is part of the simtrial program.
+#
+#  simtrial is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#' Get the analysis date under multiple conditions
+#'
+#' @param data A simulated data generated by [sim_pw_surv()].
+#' @param planned_calendar_time A numerical value specifying the
+#'   planned calendar time for the analysis.
+#' @param target_event_overall A numerical value specifying the
+#'   targeted events for the overall population.
+#' @param target_event_per_stratum A numerical vector specifying the
+#'   targeted events per stratum.
+#' @param max_extension_for_target_event A numerical value specifying the
+#'   maximum time extension to reach targeted events.
+#' @param previous_analysis_date A numerical value specifying the
+#'   previous analysis date.
+#' @param min_time_after_previous_analysis A numerical value specifying the
+#'   planned minimum time after the previous analysis.
+#' @param enroll_rate Enrollment rates, see details and examples.
+#' @param min_n_overall A numerical value specifying the
+#'   minimal overall sample size enrolled to kick off the analysis.
+#' @param min_n_per_stratum A numerical value specifying the
+#'   minimal sample size enrolled per stratum to kick off the analysis.
+#' @param min_followup A numerical value specifying the
+#'   minimal follow-up time after specified enrollment fraction in
+#'   `min_n_overall` or `min_n_per_stratum`.
+#'
+#' @return A numerical value of the analysis date.
+#'
+#' @export
+#'
+#' @examples
+#' library(gsDesign2)
+#' library(simtrial)
+#' library(tibble)
+#'
+#' alpha <- 0.025
+#' ratio <- 3
+#' n <- 500
+#' info_frac <- c(0.7, 1)
+#' prevalence_ratio <- c(0.4, 0.6)
+#' study_duration <- 48
+#'
+#' # Two strata
+#' stratum <- c("Biomarker-positive", "Biomarker-negative")
+#'
+#' prevalence_ratio <- c(0.6, 0.4)
+#' # enrollment rate
+#' enroll_rate <- define_enroll_rate(
+#'   stratum = rep(stratum, each = 2),
+#'   duration = c(2, 10, 2, 10),
+#'   rate = c(c(1, 4) * prevalence_ratio[1], c(1, 4) * prevalence_ratio[2])
+#' )
+#' enroll_rate$rate <- enroll_rate$rate * n / sum(enroll_rate$duration * enroll_rate$rate)
+#'
+#' # Failure rate
+#' med_pos <- 10 # Median of the biomarker positive population
+#' med_neg <- 8 # Median of the biomarker negative population
+#' hr_pos <- c(1, 0.7) # Hazard ratio of the biomarker positive population
+#' hr_neg <- c(1, 0.8) # Hazard ratio of the biomarker negative population
+#' fail_rate <- define_fail_rate(
+#'   stratum = rep(stratum, each = 2),
+#'   duration = 1000,
+#'   fail_rate = c(log(2) / c(med_pos, med_pos, med_neg, med_neg)),
+#'   hr = c(hr_pos, hr_neg),
+#'   dropout_rate = 0.01
+#' )
+#'
+#' # Simulate data
+#' temp <- simfix2simpwsurv(fail_rate) # Convert the failure rate
+#' set.seed(2023)
+#' simulated_data <- sim_pw_surv(
+#'   n = n, # Sample size
+#'   # Stratified design with prevalence ratio of 6:4
+#'   stratum = tibble(stratum = stratum, p = prevalence_ratio),
+#'   # Randomization ratio
+#'   block = c("control", "control", "experimental", "experimental"),
+#'   enroll_rate = enroll_rate, # Enrollment rate
+#'   fail_rate = temp$fail_rate, # Failure rate
+#'   dropout_rate = temp$dropout_rate # Dropout rate
+#' )
+#'
+#' # Example 1: Cut for analysis at the 24th month.
+#' get_analysis_date(
+#'   simulated_data,
+#'   planned_calendar_time = 24
+#' )
+#'
+#' # Example 2: Cut for analysis when there are 300 events in the overall population.
+#' get_analysis_date(
+#'   simulated_data,
+#'   target_event_overall = 300
+#' )
+#'
+#' # Example 3: Cut for analysis at the 24th month and there are 300 events
+#' # in the overall population, whichever arrives later.
+#' get_analysis_date(
+#'   simulated_data,
+#'   planned_calendar_time = 24,
+#'   target_event_overall = 300
+#' )
+#'
+#' # Example 4: Cut for analysis when there are at least 100 events
+#' # in the biomarker-positive population, and at least 200 events
+#' # in the biomarker-negative population, whichever arrives later.
+#' get_analysis_date(
+#'   simulated_data,
+#'   target_event_per_stratum = c(100, 200)
+#' )
+#' get_analysis_date(
+#'   simulated_data,
+#'   target_event_overall = 150,
+#'   target_event_per_stratum = c(100, NA)
+#' )
+#'
+#' # Example 5: Cut for analysis when there are at least 100 events
+#' # in the biomarker positive population, and at least 200 events
+#' # in the biomarker negative population, whichever arrives later.
+#' # But will stop at the 30th month if events are fewer than 100/200.
+#' get_analysis_date(
+#'   simulated_data,
+#'   target_event_per_stratum = c(100, 200),
+#'   max_extension_for_target_event = 30
+#' )
+#'
+#' # Example 6: Cut for analysis after 12 months followup when 80%
+#' # of the patients are enrolled in the overall population.
+#' get_analysis_date(
+#'   simulated_data,
+#'   enroll_rate = enroll_rate,
+#'   min_n_overall = n * 0.8,
+#'   min_followup = 12
+#' )
+#'
+#' # Example 7: Cut for analysis when 12 months after at least 200/160 patients
+#' # are enrolled in the biomarker positive/negative population.
+#' get_analysis_date(
+#'   simulated_data,
+#'   enroll_rate = enroll_rate,
+#'   min_n_per_stratum = c(200, 160),
+#'   min_followup = 12
+#' )
+#' get_analysis_date(
+#'   simulated_data,
+#'   enroll_rate = enroll_rate,
+#'   min_n_per_stratum = c(200, NA),
+#'   min_followup = 12
+#' )
+get_analysis_date <- function(
+    data,
+    # Option 1: Planned calendar time for the analysis
+    planned_calendar_time = NA,
+    # Option 2: Reach targeted events
+    target_event_overall = NA,
+    target_event_per_stratum = NA,
+    # Option 3: Max time extension to reach targeted events
+    max_extension_for_target_event = NA,
+    # Option 4: Planned minimum time after the previous analysis
+    previous_analysis_date = 0,
+    min_time_after_previous_analysis = NA,
+    # Option 5: Minimal follow-up time after specified enrollment fraction
+    enroll_rate = NA,
+    min_n_overall = NA,
+    min_n_per_stratum = NA,
+    min_followup = NA) {
+  input_check_scale(planned_calendar_time, label = "planned_calendar_time")
+  input_check_scale(target_event_overall, label = "target_event_overall")
+  input_check_scale(max_extension_for_target_event, label = "max_extension_for_target_event")
+  input_check_scale(min_time_after_previous_analysis, label = "min_time_after_previous_analysis")
+  input_check_scale(min_n_overall, label = "min_n_overall")
+  input_check_scale(min_followup, label = "min_followup")
+  input_check_vector(target_event_per_stratum, label = "target_event_per_stratum")
+  input_check_vector(min_n_per_stratum, label = "min_n_per_stratum")
+
+  # Check if enrollment is input by user
+  cond1 <- inherits(enroll_rate, c("tbl_df", "data.frame"))
+  # Check if `min_n_overall` is input by user
+  cond2 <- !is.na(min_n_overall)
+  # Check if `min_n_per_stratum` is input by user
+  cond3 <- !all(is.na(min_n_overall))
+
+  if (cond1) {
+    n_max <- sum(enroll_rate$rate * enroll_rate$duration)
+
+    if (is.na(min_followup)) {
+      stop("`min_followup` must be provided.")
+    }
+
+    if (cond2) {
+      if (min_n_overall > n_max) {
+        stop("`min_n_overall` must be a positive number smaller than the total sample size.")
+      }
+    }
+
+    if (cond3) {
+      if (sum(min_n_per_stratum, na.rm = TRUE) > n_max) {
+        stop("`min_n_per_stratum` must be a sum of positive numbers smaller than the total sample size.")
+      }
+    }
+  }
+
+  # Cutting option 1: Planned calendar time for the analysis
+  cut_date1 <- planned_calendar_time
+
+  # Cutting option 2: Reach targeted events
+  # 2a: Reach targeted events of the overall population
+  if (!is.na(target_event_overall)) {
+    cut_date2a <- get_cut_date_by_event(data, event = target_event_overall)
+  } else {
+    cut_date2a <- NA
+  }
+  # 2b: Reach targeted events per sub-population
+  if (!all(is.na(target_event_per_stratum))) {
+    stratum <- unique(data$stratum)
+    cut_date2b <- lapply(
+      seq_along(target_event_per_stratum),
+      function(x) {
+        get_cut_date_by_event(data %>% dplyr::filter(stratum == stratum[x]),
+          event = target_event_per_stratum[x]
+        )
+      }
+    ) %>%
+      unlist() %>%
+      max()
+  } else {
+    cut_date2b <- NA
+  }
+  cut_date2 <- pmax(cut_date2a, cut_date2b, na.rm = TRUE)
+
+  # Cutting option 3: Max time extension to reach targeted events
+  cut_date3 <- max_extension_for_target_event
+
+  # Cutting option 4: Planned minimum time after the previous analysis
+  cut_date4 <- previous_analysis_date + min_time_after_previous_analysis
+
+  # Cutting option 5: Minimal follow-up time after specified enrollment fraction
+  # 5a: At least 10 months after the 80% of the patients are enrolled
+  if (!all(is.na(min_n_overall))) {
+    cut_date5a <- get_min_date(enroll_rate, min_n = min_n_overall) + min_followup
+  } else {
+    cut_date5a <- NA
+  }
+  # 5b: At least 10 months after the 80% biomarker positive patients are
+  # enrolled and 70% biomarker negative patients are enrolled
+  if (!all(is.na(min_n_per_stratum))) {
+    cut_date5b <- lapply(
+      seq_along(min_n_per_stratum),
+      function(x) {
+        get_min_date(enroll_rate %>% dplyr::filter(stratum == stratum[x]), min_n = min_n_per_stratum[x])
+      }
+    ) %>%
+      unlist() %>%
+      max(na.rm = TRUE) + min_followup
+  } else {
+    cut_date5b <- NA
+  }
+  cut_date5 <- pmax(cut_date5a, cut_date5b, na.rm = TRUE)
+
+  # Combining all 5 cutting options
+  cut_date <- pmin(pmax(cut_date1, cut_date2, cut_date4, cut_date5, na.rm = TRUE), cut_date3, na.rm = TRUE)
+
+  cut_date
+}
+
+input_check_scale <- function(x = NA, label = "x") {
+  if (!is.na(x)) {
+    if (is.numeric(x) && x < 0) {
+      stop(paste0(label, " must be a positive number."))
+    } else if (!is.numeric(x)) {
+      stop(paste0(label, " must be a numerical value."))
+    }
+  }
+}
+
+input_check_vector <- function(x = NA, label = "x") {
+  if (!(all(is.na(x) | (is.numeric(x) & x > 0)))) {
+    stop(paste0(label, " must be a positive number with either `NA` or positive numbers."))
+  }
+}
+
+get_min_date <- function(enroll_rate, min_n = 400) {
+  if (!is.na(min_n)) {
+    res <- stats::uniroot(
+      f = function(x) {
+        gsDesign2::expected_accrual(time = x, enroll_rate = enroll_rate) - min_n
+      },
+      interval = c(0, sum(enroll_rate$duration) + 1)
+    )
+    ans <- res$root
+  } else {
+    ans <- NA
+  }
+
+  ans
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -49,6 +49,7 @@ reference:
     - cut_data_by_date
     - cut_data_by_event
     - get_cut_date_by_event
+    - get_analysis_date
 
 - title: "Compute p-values/test statistics"
   contents: