Skip to content

Commit

Permalink
Initial commit to redcapfiller
Browse files Browse the repository at this point in the history
Provides these functions:
  get_long_categorical_field_responses()
  get_long_categorical_field_response_values()
  get_one_rectangle_of_values()
  Tests for each of the 3 functions.

Also provides a proof_of_concept.R script with write-back to a demonstration project and properly managed secrets.

Supports filling of checkbox, dropdown, and radio field types on classic projects.
  • Loading branch information
pbchase committed Dec 28, 2024
0 parents commit 3ba8ce0
Show file tree
Hide file tree
Showing 23 changed files with 828 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
^.*\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
.env
example.env
proof_of_concept.R
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
.env
40 changes: 40 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
Package: redcapfiller
Type: Package
Title: Fill a REDCap Project with Generated Data
Version: 0.1.0
Authors@R: c(
person("Philip", "Chase",
email = "[email protected]",
role = c("aut", "cre"),
comment=c(ORCID = "0000-0002-5318-9420")),
person("Sai Pavan", "Kamma",
email = "[email protected]",
role = "aut",
comment=c(ORCID = "0009-0004-4619-0409")),
person("Laurence", "James-Woodley",
email = "[email protected]",
role = "aut",
comment=c(ORCID = "0000-0002-6418-2742")),
person("Taryn", "Stoffs",
email = "[email protected]",
role = "ctb",
comment=c(ORCID = "0000-0002-0830-8179")),
person("Christopher", "Barnes",
email = "[email protected]",
role = "ctb",
comment=c(ORCID = "0000-0001-7114-1992"))
)
Description: Fill a REDCap project with generated data based on the project design and a minimal set of inputs.
License: Apache License (>= 2)
Encoding: UTF-8
LazyData: true
Suggests:
readr,
stringr,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Imports:
dplyr,
rlang,
tidyr
RoxygenNote: 7.3.2
13 changes: 13 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Copyright 2024 University of Florida

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(get_long_categorical_field_response_values)
export(get_long_categorical_field_responses)
export(get_one_rectangle_of_values)
importFrom(rlang,.data)
38 changes: 38 additions & 0 deletions R/get_long_categorical_field_response_values.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' @title generate categorical field response values
#' @description
#' Provide a set of response values for each categorical field in
#' `long_categorical_field_responses`
#'
#' @param long_categorical_field_responses a long data set of categorical
#' field response values and weights.
#'
#' @return a tall dataframe of categorical field response values with one
#' row for each value set.
#' @export
#'
#' @examples
#' \dontrun{
#' get_long_categorical_field_response_values(long_categorical_field_responses)
#' }
get_long_categorical_field_response_values <- function(long_categorical_field_responses) {
single_value_responses <- long_categorical_field_responses |>
dplyr::filter(.data$field_type != "checkbox") |>
dplyr::group_by(.data$field_name) |>
dplyr::slice_sample(n = 1, weight_by = .data$weight) |>
dplyr::ungroup()

multi_value_responses <-
long_categorical_field_responses |>
dplyr::filter(.data$field_type == "checkbox") |>
dplyr::group_by(.data$field_group) |>
dplyr::slice_sample(prop = 0.5, weight_by = .data$weight) |>
dplyr::ungroup()

result <- dplyr::bind_rows(
single_value_responses,
multi_value_responses
) |>
dplyr::select("field_name", value = "response_code")

return(result)
}
62 changes: 62 additions & 0 deletions R/get_long_categorical_field_responses.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' @title Get every categorical field response from a REDCap data dictionary
#'
#' @description
#' Given a REDCap data dictionary, enumerate every response value for every categorical field in that data dictionary
#'
#' @param metadata A REDCap data dictionary
#'
#' @returns a dataframe with these columns
#' \describe{
#' \item{field_name}{First item}
#' \item{field_type}{Second item}
#' \item{response_code}{Second item}
#' \item{response_label}{Second item}
#' \item{field_group}{Second item}
#' \item{weight}{a set of uniform weights across the responses of each field}
#' }
#' @export
#'
#' @examples
#' \dontrun{
#' long_categorical_field_responses <-
#' get_long_categorical_field_responses(metadata_to_populate)
#' }
get_long_categorical_field_responses <- function(metadata) {
balanced_responses <-
metadata |>
# include only categorical field types
dplyr::filter(.data$field_type %in% c("checkbox", "radio", "dropdown")) |>
# excluding anything displayed by branching logic
dplyr::filter(is.na(.data$branching_logic)) |>
# narrow our focus to the required columns
dplyr::select(c("field_name", "form_name", "field_type", "select_choices_or_calculations")) |>
# separate responses
tidyr::separate_longer_delim("select_choices_or_calculations", delim = " | ") |>
# separate response_codes from response_labels
tidyr::separate_wider_delim("select_choices_or_calculations",
delim = ", ",
names = c("response_code", "response_label"),
too_many = "merge",
too_few = "align_start"
) |>
# apply one-hot encoding to checkbox fields, but leave others unmodified
dplyr::mutate(
field_group = .data$field_name,
field_name = dplyr::if_else(
.data$field_type == "checkbox",
paste0(.data$field_name, "___", .data$response_code),
.data$field_name
),
response_code = dplyr::if_else(
.data$field_type == "checkbox",
"1",
.data$response_code
)
) |>
# set weights for each response
dplyr::group_by(.data$field_group) |>
dplyr::mutate(weight = round(100 / dplyr::n(), digits = 0)) |>
dplyr::ungroup()

return(balanced_responses)
}
53 changes: 53 additions & 0 deletions R/get_one_rectangle_of_values.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#' Generate a rectangle of data for one record
#'
#' @param one_record_id a single record_id
#' @param record_id_name the column name the record_id should be returned in
#' @param forms_to_fill the forms to fill for this rectangle
#' @param long_categorical_field_responses the output of `get_long_categorical_field_responses()`
#'
#' @returns a rectangle of data with appropriate REDCap identifiers ready to write to REDCap
#' @export
#' @importFrom rlang .data
#'
#' @examples
#' \dontrun{
#' get_one_rectangle_of_values(1, record_id_name, forms_to_fill)
#' }
get_one_rectangle_of_values <- function(
one_record_id = 1,
record_id_name,
forms_to_fill,
long_categorical_field_responses) {
# Build tibble of static REDCap identifiers
redcap_identifiers <- dplyr::tibble(
record_id = one_record_id
)

# fix the first column name
names(redcap_identifiers) <- record_id_name

# pick values for one record on one event
# ...by binding the output of each field_type / field_validation function
all_responses <- dplyr::bind_rows(
get_long_categorical_field_response_values(
long_categorical_field_responses |>
dplyr::filter(.data$form_name %in% forms_to_fill)
)
)

# prefix responses with redcap fields
long_result <- dplyr::bind_cols(
redcap_identifiers,
# later we will add redcap_event_name, redcap_repeat_instrument, dag_name, etc. where appropriate
all_responses
)

wide_result <- long_result |>
tidyr::pivot_wider(
id_cols = dplyr::any_of(names(redcap_identifiers)),
names_from = "field_name",
values_from = "value"
)

return(wide_result)
}
2 changes: 2 additions & 0 deletions example.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
filler_demo_pid=15321
path_credential = ~/credentials.csv
25 changes: 25 additions & 0 deletions man/get_long_categorical_field_response_values.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/get_long_categorical_field_responses.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions man/get_one_rectangle_of_values.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3ba8ce0

Please sign in to comment.