Add support for unvalidated text fields

Address issues ctsit#3, ctsit#7. Add get_long_text_fields(). Add get_long_text_field_values(). Modify get_long_categorical_field_response_values(). Modify get_one_rectangle_of_values() Update proof_of_concept(). Update README to reflect the new feature.
pbchase · Jan 15, 2025 · 9db8c89 · 9db8c89
1 parent c5b4875
commit 9db8c89
Show file tree

Hide file tree

Showing 21 changed files with 449 additions and 21 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -37,6 +37,8 @@ Suggests:
 Config/testthat/edition: 3
 Imports: 
     dplyr,
+    lorem,
+    purrr,
     rlang,
     tidyr
 RoxygenNote: 7.3.2

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,5 +2,7 @@
 
 export(get_long_categorical_field_response_values)
 export(get_long_categorical_field_responses)
+export(get_long_text_field_values)
+export(get_long_text_fields)
 export(get_one_rectangle_of_values)
 importFrom(rlang,.data)
diff --git a/R/get_long_categorical_field_response_values.R b/R/get_long_categorical_field_response_values.R
@@ -16,13 +16,22 @@
 #' }
 get_long_categorical_field_response_values <- function(long_categorical_field_responses) {
   single_value_responses <- long_categorical_field_responses |>
+    # Filter for any categorical field_type
+    dplyr::filter(.data$field_type %in% c(
+      "checkbox",
+      "dropdown",
+      "radio",
+      "yesno"
+    )) |>
+    # Filter for anything but checkbox fields
     dplyr::filter(.data$field_type != "checkbox") |>
     dplyr::group_by(.data$field_name) |>
     dplyr::slice_sample(n = 1, weight_by = .data$weight) |>
     dplyr::ungroup()
 
   multi_value_responses <-
     long_categorical_field_responses |>
+    # Filter for checkbox fields
     dplyr::filter(.data$field_type == "checkbox") |>
     dplyr::group_by(.data$field_group) |>
     dplyr::slice_sample(prop = 0.5, weight_by = .data$weight) |>

diff --git a/R/get_long_text_field_values.R b/R/get_long_text_field_values.R
@@ -0,0 +1,48 @@
+#' @title generate text field values
+#' @description
+#' Provide a set of values for each field in the output of
+#' `get_long_text_fields`
+#'
+#' @param long_text_fields a long data set of text
+#'   fields, their parameters, and weights.
+#'
+#' @return a long dataframe of text field values with one row for each value set.
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' get_long_text_field_values(long_text_fields)
+#' }
+get_long_text_field_values <- function(long_text_fields) {
+  tvt_na <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_na") |>
+      dplyr::mutate(value = replicate(length(.data$field_name), lorem::ipsum_words(round(stats::rnorm(mean = .data$mean, sd = .data$sd, n = 1)))))
+    # |>
+    #   dplyr::slice_sample(prop = 0.5, weight_by = .data$weight)
+  }
+
+  tvt_types <- c(
+    "tvt_na"
+  )
+
+  process_one_text_validation_type <- function(my_tvt, df) {
+    # exec (run) the function named in `alert` on the rows of data that
+    #  have an alert_type of `alert`
+    result <- rlang::exec(my_tvt, df |>
+      dplyr::filter(.data$tvt == my_tvt))
+    return(result)
+  }
+
+  text_field_values <-
+    purrr::map(tvt_types,
+               process_one_text_validation_type,
+               long_text_fields |> dplyr::filter(.data$field_type == "text")
+               ) |>
+    dplyr::bind_rows()
+
+  result <- text_field_values |>
+    dplyr::select("field_name", "value")
+
+  return(result)
+}
diff --git a/R/get_long_text_fields.R b/R/get_long_text_fields.R
@@ -0,0 +1,85 @@
+#' @title Get every text field response from a REDCap data dictionary
+#'
+#' @description
+#' Given a REDCap data dictionary, enumerate every text field in that data dictionary and return a dataset with default weights
+#'
+#' @param metadata A REDCap data dictionary
+#'
+#' @returns a dataframe with these columns
+#' \describe{
+#'   \item{field_name}{REDCap field name}
+#'   \item{form_name}{REDCap form name}
+#'   \item{field_type}{REDCap field type}
+#'   \item{text_validation_type}{REDCap text validation type}
+#'   \item{text_validation_min}{REDCap text validation min}
+#'   \item{text_validation_max}{REDCap text validation max}
+#'   \item{tvt}{text validation type function name}
+#'   \item{weight}{a default weight for the field}
+#'   \item{mean}{mean of data to be generated}
+#'   \item{sd}{standard deviation of data to be generated}
+#' }
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' long_text_fields <-
+#'   get_long_text_fields(metadata_to_populate)
+#' }
+get_long_text_fields <- function(metadata) {
+  text_fields <-
+    metadata |>
+    # include only categorical field types
+    dplyr::filter(.data$field_type %in% c("text")) |>
+    # excluding anything displayed by branching logic
+    dplyr::filter(is.na(.data$branching_logic)) |>
+    # narrow our focus to the required columns
+    dplyr::select(c(
+      "field_name",
+      "form_name",
+      "field_type",
+      "text_validation_type_or_show_slider_number",
+      "text_validation_min",
+      "text_validation_max"
+    )) |>
+    dplyr::rename(text_validation_type = "text_validation_type_or_show_slider_number") |>
+    dplyr::mutate(tvt = dplyr::case_when(
+      is.na(.data$text_validation_type) ~ "tvt_na",
+      TRUE ~ "tvt_unsupported"
+    )) |>
+    # set weights for each response
+    dplyr::mutate(weight = 100)
+
+  tvt_na <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(is.na(.data$text_validation_type)) |>
+      dplyr::mutate(mean = 1.5, sd = 0.8)
+    return(result)
+  }
+
+  tvt_unsupported <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(F)
+    return(result)
+  }
+
+  tvt_types <- c(
+    "tvt_na",
+    "tvt_unsupported"
+  )
+
+  process_one_text_validation_type <- function(my_tvt, df) {
+    # exec (run) the function named in `alert` on the rows of data that
+    #  have an alert_type of `alert`
+    result <- rlang::exec(my_tvt, df |>
+      dplyr::filter(.data$tvt == my_tvt))
+    return(result)
+  }
+
+  text_fields_and_weights <-
+    purrr::map(tvt_types, process_one_text_validation_type, text_fields) |>
+    dplyr::bind_rows()
+
+  return(text_fields_and_weights)
+}
diff --git a/R/get_one_rectangle_of_values.R b/R/get_one_rectangle_of_values.R
@@ -3,7 +3,8 @@
 #' @param one_record_id a single record_id
 #' @param record_id_name the column name the record_id should be returned in
 #' @param forms_to_fill the forms to fill for this rectangle
-#' @param long_categorical_field_responses the output of `get_long_categorical_field_responses()`
+#' @param long_fields_and_responses the output of `get_long_*_fields` and
+#'   `get_long_categorical_field_responses_responses` functions
 #'
 #' @returns a rectangle of data with appropriate REDCap identifiers ready to write to REDCap
 #' @export
@@ -17,7 +18,7 @@ get_one_rectangle_of_values <- function(
     one_record_id = 1,
     record_id_name,
     forms_to_fill,
-    long_categorical_field_responses) {
+    long_fields_and_responses) {
   # Build tibble of static REDCap identifiers
   redcap_identifiers <- dplyr::tibble(
     record_id = one_record_id
@@ -26,14 +27,25 @@ get_one_rectangle_of_values <- function(
   # fix the first column name
   names(redcap_identifiers) <- record_id_name
 
+  value_getter_functions <- c(
+    "get_long_categorical_field_response_values",
+    "get_long_text_field_values"
+  )
+
+  process_one_value_getter <- function(value_getter, df) {
+    rlang::exec(value_getter, df)
+  }
+
   # pick values for one record on one event
   #    ...by binding the output of each field_type / field_validation function
-  all_responses <- dplyr::bind_rows(
-    get_long_categorical_field_response_values(
-      long_categorical_field_responses |>
+  all_responses <-
+    purrr::map(
+      value_getter_functions,
+      process_one_value_getter,
+      long_fields_and_responses |>
         dplyr::filter(.data$form_name %in% forms_to_fill)
-    )
-  )
+    ) |>
+    dplyr::bind_rows()
 
   # prefix responses with redcap fields
   long_result <- dplyr::bind_cols(

diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ Copy [proof_of_concept.R](https://github.com/ctsit/redcapfiller/blob/main/proof_
 
 ### Limitations
 
-REDCap Filler does not yet understand all the dimensions of a modern REDCap project. It can fill the categorical fields checkbox, dropdown, and radio. It ignores all other field types and will not attempt to fill them. It doesn't even know how to fill a yesno field. :-( Filler only knows how to fill classic projects without repeating forms or events. It does not honor form display logic and ignores all fields governed by branching logic.
+REDCap Filler does not yet understand all the dimensions of a modern REDCap project. It can fill the categorical fields checkbox, dropdown, and radio. It can fill unvalidated text fields. It ignores all other field types and will not attempt to fill them. It doesn't even know how to fill a yesno field. :-( Filler only knows how to fill classic projects without repeating forms or events. It does not honor form display logic and ignores all fields governed by branching logic.
 
 Focusing more on what Filler _can_ do, the first release milestone will support these features:
 

diff --git a/man/get_long_text_field_values.Rd b/man/get_long_text_field_values.Rd
diff --git a/man/get_long_text_fields.Rd b/man/get_long_text_fields.Rd
diff --git a/man/get_one_rectangle_of_values.Rd b/man/get_one_rectangle_of_values.Rd
diff --git a/proof_of_concept.R b/proof_of_concept.R
@@ -19,7 +19,8 @@ library(redcapfiller)
 path_credential <- Sys.getenv("path_credential")
 credentials <- REDCapR::retrieve_credential_local(
   path_credential,
-  project_id = Sys.getenv("filler_demo_pid")
+  project_id = 16255
+    # Sys.getenv("filler_demo_pid")
 )
 
 metadata <- REDCapR::redcap_metadata_read(
@@ -43,15 +44,16 @@ field_types_we_know_how_to_fill <- c(
   "dropdown",
   # "notes",
   "radio",
-  # "text",
+  "text",
   "yesno"
 )
 
 metadata_to_populate <-
   metadata |>
-  # Filter for record ID and forms we want to fill
-  filter(field_name == record_id_name |
-    form_name %in% forms_to_fill) |>
+  # exclude the record_id field from filling
+  filter(field_name != record_id_name) |>
+  # Filter for forms we want to fill
+  filter(form_name %in% forms_to_fill) |>
   # Exclude descriptive fields because they are not fillable
   filter(field_type != "descriptive") |>
   # Exclude calc fields because we don't control them
@@ -85,13 +87,19 @@ record_ids <- seq(first_id, first_id + number_of_records_to_populate)
 
 # get the categorical field responses in a long table and populate them
 long_categorical_field_responses <- get_long_categorical_field_responses(metadata_to_populate)
+long_text_fields <- get_long_text_fields(metadata_to_populate)
+
+long_fields_and_responses <- bind_rows(
+  long_categorical_field_responses,
+  long_text_fields
+)
 
 picked_values <-
   purrr::map(record_ids,
              get_one_rectangle_of_values,
              record_id_name,
              forms_to_fill,
-             long_categorical_field_responses
+             long_fields_and_responses
              ) |>
   bind_rows()
 

diff --git a/tests/testthat/get_long_text_field_values/input.rds b/tests/testthat/get_long_text_field_values/input.rds
diff --git a/tests/testthat/get_long_text_field_values/make_test_data.R b/tests/testthat/get_long_text_field_values/make_test_data.R
@@ -0,0 +1,25 @@
+# Download the data dictionary from a copy of REDCapR's validation-types-1 project.
+# See https://ctsit.github.io/redcapfiller/articles/demonstration_and_testing.html for details
+
+library(tidyverse)
+library(REDCapR)
+# Read environment from ".env" in the project directory
+library(dotenv)
+load_dot_env(".env")
+
+# Get our credentials using environment variables to locate
+# the credentials file and describe the project in it
+path_credential <- Sys.getenv("path_credential")
+credentials <- REDCapR::retrieve_credential_local(
+  path_credential,
+  project_id = Sys.getenv("filler_demo_pid")
+)
+
+metadata <- REDCapR::redcap_metadata_read(
+  token = credentials$token,
+  redcap_uri = credentials$redcap_uri
+)$data
+
+long_text_fields <- metadata |> get_long_text_fields()
+
+long_text_fields |> saveRDS(testthat::test_path("get_long_text_field_values", "input.rds"))
-Original file line number
+Diff line change
@@ Expand Up / @@ -37,6 +37,8 @@ Suggests: @@
     Config/testthat/edition: 3
     Imports:
         dplyr,
+        lorem,
+        purrr,
         rlang,
         tidyr
     RoxygenNote: 7.3.2
@@ Expand Down @@