From b5821f58cf0a7948277807d21310d56c6993c94a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Th=C3=A9riault?= <13123390+rempsyc@users.noreply.github.com> Date: Sun, 20 Aug 2023 20:21:59 -0400 Subject: [PATCH] `report_participants`: gender supports alternate spellings, convert age to numeric (#389) * Gender now also supports more alternate spellings, and age converts the respective column to numeric. * snapshots * styler * comma * lints * lints2 (stringsAsFactors = FALSE) * lints3 (stringsAsFactors = FALSE in tests, etc.) --- DESCRIPTION | 6 +- NEWS.md | 2 + R/report_participants.R | 48 +++++++---- .../_snaps/windows/report.htest-chi2.md | 4 +- .../_snaps/windows/report_participants.md | 16 +++- tests/testthat/test-report_participants.R | 85 ++++++++++++------- 6 files changed, 112 insertions(+), 49 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cef47160..01ea95c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,7 +56,7 @@ Depends: R (>= 3.6) Imports: bayestestR (>= 0.13.0), - effectsize (> 0.8.2), + effectsize (>= 0.8.5), insight (>= 0.19.3.2), parameters (>= 0.20.2), performance (>= 0.10.2), @@ -143,4 +143,6 @@ Collate: 'utils_grouped_df.R' 'zzz.R' Roxygen: list(markdown = TRUE) -Remotes: easystats/insight +Remotes: + easystats/insight, + easystats/effectsize diff --git a/NEWS.md b/NEWS.md index dafbc243..54b302e0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,6 +26,8 @@ Bug fixes * Fixed issue in `report_participants`, which did not print the `"gender"` category for grouped output when that argument was written in lower-case. + Gender now also supports more alternate spellings, and age converts the + respective column to numeric. * Fixed printing issue for intercept-only models. diff --git a/R/report_participants.R b/R/report_participants.R index e23a77d4..efaa8ae3 100644 --- a/R/report_participants.R +++ b/R/report_participants.R @@ -133,7 +133,7 @@ report_participants <- function(data, x[which(x == "")] <- NA x }) - data <- as.data.frame(data_list) + data <- as.data.frame(data_list, stringsAsFactors = FALSE) # find age variable automatically if (is.null(age)) { @@ -256,33 +256,37 @@ report_participants <- function(data, country <- .replace_names(data, country) race <- .replace_names(data, race) + # Set age as numeric + data[[age]] <- as.numeric(data[[age]]) + # Grouped data if (!is.null(participants)) { data <- data.frame( - "Age" = stats::aggregate(data[[age]], + Age = stats::aggregate(data[[age]], by = list(data[[participants]]), FUN = mean )[[2]], - "Sex" = stats::aggregate(data[[sex]], + Sex = stats::aggregate(data[[sex]], by = list(data[[participants]]), FUN = utils::head, n = 1 )[[2]], - "Gender" = stats::aggregate(data[[gender]], + Gender = stats::aggregate(data[[gender]], by = list(data[[participants]]), FUN = utils::head, n = 1 )[[2]], - "Education" = stats::aggregate(data[[education]], + Education = stats::aggregate(data[[education]], by = list(data[[participants]]), FUN = utils::head, n = 1 )[[2]], - "Country" = stats::aggregate(data[[country]], + Country = stats::aggregate(data[[country]], by = list(data[[participants]]), FUN = utils::head, n = 1 )[[2]], - "Race" = stats::aggregate(data[[race]], + Race = stats::aggregate(data[[race]], by = list(data[[participants]]), FUN = utils::head, n = 1 - )[[2]] + )[[2]], + stringsAsFactors = FALSE ) age <- "Age" sex <- "Sex" @@ -344,6 +348,16 @@ report_participants <- function(data, ) } + genders_woman <- c( + "woman", "w", "female", "women", "girl", + "lady", "miss", "madam", "dame", "lass" + ) + genders_man <- c( + "man", "m", "male", "men", "boy", + "guy", "dude", "lad", "sir" + ) + both_genders <- c(genders_woman, genders_man, NA, "na") + text_gender <- if (all(is.na(data[[gender]]))) { "" } else { @@ -351,15 +365,15 @@ report_participants <- function(data, "Gender: ", insight::format_value(length(data[[gender]][tolower( data[[gender]] - ) %in% c("woman", "w", "f", "female")]) / nrow(data) * 100, digits = digits), + ) %in% genders_woman]) / nrow(data) * 100, digits = digits), "% women, ", insight::format_value(length(data[[gender]][tolower( data[[gender]] - ) %in% c("man", "m", "male")]) / nrow(data) * 100, digits = digits), + ) %in% genders_man]) / nrow(data) * 100, digits = digits), "% men, ", insight::format_value(100 - length(data[[gender]][tolower( data[[gender]] - ) %in% c("woman", "w", "f", "female", "man", "m", "male", NA, "na")]) / + ) %in% both_genders]) / nrow(data) * 100), "% non-binary", if (!insight::format_value(length(data[[gender]][tolower( data[[gender]] @@ -405,14 +419,16 @@ report_participants <- function(data, } else { data[[country]] <- as.character(data[[country]]) data[which(data[[country]] %in% c(NA, "NA")), country] <- "missing" - frequency_table <- as.data.frame(datawizard::data_tabulate(data[[country]]))[c(2, 4)] + frequency_table <- as.data.frame(datawizard::data_tabulate(data[[country]]), + stringsAsFactors = FALSE)[c(2, 4)] names(frequency_table)[2] <- "Percent" frequency_table <- frequency_table[-which(is.na(frequency_table$Value)), ] frequency_table <- frequency_table[order(-frequency_table$Percent), ] upper <- frequency_table[which(frequency_table$Percent >= threshold), ] lower <- frequency_table[which(frequency_table$Percent < threshold), ] if (nrow(lower) > 0) { - lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent), stringsAsFactors = FALSE) + lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent), + stringsAsFactors = FALSE) combined <- rbind(upper, lower_sum) } else { combined <- upper @@ -427,14 +443,16 @@ report_participants <- function(data, } else { data[[race]] <- as.character(data[[race]]) data[which(data[[race]] %in% c(NA, "NA")), race] <- "missing" - frequency_table <- as.data.frame(datawizard::data_tabulate(data[[race]]))[c(2, 4)] + frequency_table <- as.data.frame(datawizard::data_tabulate(data[[race]]), + stringsAsFactors = FALSE)[c(2, 4)] names(frequency_table)[2] <- "Percent" frequency_table <- frequency_table[-which(is.na(frequency_table$Value)), ] frequency_table <- frequency_table[order(-frequency_table$Percent), ] upper <- frequency_table[which(frequency_table$Percent >= threshold), ] lower <- frequency_table[which(frequency_table$Percent < threshold), ] if (nrow(lower) > 0) { - lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent), stringsAsFactors = FALSE) + lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent), + stringsAsFactors = FALSE) combined <- rbind(upper, lower_sum) } else { combined <- upper diff --git a/tests/testthat/_snaps/windows/report.htest-chi2.md b/tests/testthat/_snaps/windows/report.htest-chi2.md index 1b38000b..701b7a3f 100644 --- a/tests/testthat/_snaps/windows/report.htest-chi2.md +++ b/tests/testthat/_snaps/windows/report.htest-chi2.md @@ -84,9 +84,9 @@ Code report_effectsize(x, type = "tschuprows_t") Output - + Effect sizes were labelled following Funder's (2019) recommendations. - (Adjusted Tschuprow's t = 0.08, 95% CI [0.06, 1.00]) + very small (Adjusted Tschuprow's t = 0.08, 95% CI [0.06, 1.00]) --- diff --git a/tests/testthat/_snaps/windows/report_participants.md b/tests/testthat/_snaps/windows/report_participants.md index 8781ee24..f98c936e 100644 --- a/tests/testthat/_snaps/windows/report_participants.md +++ b/tests/testthat/_snaps/windows/report_participants.md @@ -59,5 +59,19 @@ Code report_participants(data, age = "Age", sex = "Sex") Output - [1] "6 participants (8, n = 1; 21, n = 1; 22, n = 1; 23, n = 1; 42, n = 1; 54, n = 1; Sex: 16.7% females, 33.3% males, 16.7% other, 33.33% missing; Gender: 33.3% women, 16.7% men, 16.67% non-binary, 33.33% missing; Education: -5, 16.67%; -3, 16.67%; 0, 16.67%; 3, 16.67%; 5, 16.67%; 8, 16.67%; Country: 33.33% Canada, 16.67% Germany, 16.67% India, 16.67% USA, 16.67% missing; Race: 16.67% A, 16.67% B, 16.67% C, 16.67% D, 16.67% E, 16.67% missing)" + [1] "6 participants (Mean age = 3.5, SD = 1.9, range: [1, 6]; Sex: 16.7% females, 33.3% males, 16.7% other, 33.33% missing; Gender: 33.3% women, 16.7% men, 16.67% non-binary, 33.33% missing; Education: -5, 16.67%; -3, 16.67%; 0, 16.67%; 3, 16.67%; 5, 16.67%; 8, 16.67%; Country: 33.33% Canada, 16.67% Germany, 16.67% India, 16.67% USA, 16.67% missing; Race: 16.67% A, 16.67% B, 16.67% C, 16.67% D, 16.67% E, 16.67% missing)" + +# report_participants age as character + + Code + report_participants(data) + Output + [1] "6 participants (Mean age = 29.5, SD = 15.0, range: [11, 52])" + +# report_participants different gender spellings + + Code + report_participants(data) + Output + [1] "20 participants (Gender: 50.0% women, 45.0% men, 0.00% non-binary, 5.00% missing)" diff --git a/tests/testthat/test-report_participants.R b/tests/testthat/test-report_participants.R index 0a5d097c..20319ddc 100644 --- a/tests/testthat/test-report_participants.R +++ b/tests/testthat/test-report_participants.R @@ -1,9 +1,9 @@ test_that("report_participants, argument gender works", { # Works when capitalized data <- data.frame( - "Age" = c(22, 22, 54, 54, 8, 8, 42, 42), - "Gender" = c("N", "N", "W", "M", "M", "M", "Non-Binary", "Non-Binary"), - "Condition" = c("A", "A", "A", "A", "B", "B", "B", "B"), + Age = c(22, 22, 54, 54, 8, 8, 42, 42), + Gender = c("N", "N", "W", "M", "M", "M", "Non-Binary", "Non-Binary"), + Condition = c("A", "A", "A", "A", "B", "B", "B", "B"), stringsAsFactors = FALSE ) out <- report_participants(data) @@ -41,10 +41,10 @@ test_that("report_participants, argument gender works", { test_that("report_participants", { data <- data.frame( - "Age" = c(22, 22, 54, 54, 8, 8), - "Sex" = c("F", "F", "M", "M", "I", "I"), - "Gender" = c("W", "W", "M", "M", "N", "N"), - "Participant" = c("S1", "S1", "s2", "s2", "s3", "s3"), + Age = c(22, 22, 54, 54, 8, 8), + Sex = c("F", "F", "M", "M", "I", "I"), + Gender = c("W", "W", "M", "M", "N", "N"), + Participant = c("S1", "S1", "s2", "s2", "s3", "s3"), stringsAsFactors = FALSE ) @@ -70,9 +70,9 @@ test_that("report_participants", { ) data2 <- data.frame( - "Age" = c(22, 22, 54, 54, 8, 8), - "Sex" = c("F", "F", "M", "O", "F", "O"), - "Gender" = c("W", "W", "M", "O", "W", "O"), + Age = c(22, 22, 54, 54, 8, 8), + Sex = c("F", "F", "M", "O", "F", "O"), + Gender = c("W", "W", "M", "O", "W", "O"), stringsAsFactors = FALSE ) @@ -82,9 +82,9 @@ test_that("report_participants", { ) data3 <- data.frame( - "Age" = c(22, 82, NA, NA, NA, NA), - "Sex" = c("F", "F", "", "", "NA", NA), - "Gender" = c("W", "W", "", "", "NA", NA), + Age = c(22, 82, NA, NA, NA, NA), + Sex = c("F", "F", "", "", "NA", NA), + Gender = c("W", "W", "", "", "NA", NA), stringsAsFactors = FALSE ) @@ -95,13 +95,13 @@ test_that("report_participants", { # Add tests for education, country, race data4 <- data.frame( - "Education" = c(0, 8, -3, -5, 3, 5, NA), - "Education2" = c( + Education = c(0, 8, -3, -5, 3, 5, NA), + Education2 = c( "Bachelor", "PhD", "Highschool", "Highschool", "Bachelor", "Bachelor", NA ), - "Country" = c("USA", "Canada", "Canada", "India", "Germany", "USA", NA), - "Race" = c("Black", NA, "White", "Asian", "Black", "Black", "White"), + Country = c("USA", "Canada", "Canada", "India", "Germany", "USA", NA), + Race = c("Black", NA, "White", "Asian", "Black", "Black", "White"), stringsAsFactors = FALSE ) @@ -121,12 +121,12 @@ test_that("report_participants", { test_that("report_participants test NAs no warning", { data <- data.frame( - "Age" = c(22, 23, 54, 21, 8, 42), - "Sex" = (c("Intersex", "F", "M", "M", "NA", NA)), - "Gender" = (c("N", "W", "W", "M", "NA", NA)), - "Country" = (c("USA", NA, "Canada", "Canada", "India", "Germany")), - "Education" = factor(c(0, 8, -3, -5, 3, 5)), - "Race" = c(LETTERS[1:5], NA) + Age = c(22, 23, 54, 21, 8, 42), + Sex = (c("Intersex", "F", "M", "M", "NA", NA)), + Gender = (c("N", "W", "W", "M", "NA", NA)), + Country = (c("USA", NA, "Canada", "Canada", "India", "Germany")), + Education = factor(c(0, 8, -3, -5, 3, 5)), + Race = c(LETTERS[1:5], NA) ) expect_snapshot( variant = "windows", @@ -134,15 +134,42 @@ test_that("report_participants test NAs no warning", { ) data <- data.frame( - "Age" = factor(c(22, 23, 54, 21, 8, 42)), - "Sex" = factor(c("Intersex", "F", "M", "M", "NA", NA)), - "Gender" = factor(c("N", "W", "W", "M", "NA", NA)), - "Country" = factor(c("USA", NA, "Canada", "Canada", "India", "Germany")), - "Education" = factor(c(0, 8, -3, -5, 3, 5)), - "Race" = factor(c(LETTERS[1:5], NA)) + Age = factor(c(22, 23, 54, 21, 8, 42)), + Sex = factor(c("Intersex", "F", "M", "M", "NA", NA)), + Gender = factor(c("N", "W", "W", "M", "NA", NA)), + Country = factor(c("USA", NA, "Canada", "Canada", "India", "Germany")), + Education = factor(c(0, 8, -3, -5, 3, 5)), + Race = factor(c(LETTERS[1:5], NA)) ) expect_snapshot( variant = "windows", report_participants(data, age = "Age", sex = "Sex") ) }) + +test_that("report_participants age as character", { + data <- data.frame( + Age = as.character(c(22, 22, 28, 11, 42, 52), stringsAsFactors = FALSE), + stringsAsFactors = FALSE + ) + expect_snapshot( + variant = "windows", + report_participants(data) + ) +}) + +test_that("report_participants different gender spellings", { + data <- data.frame( + Gender = c( + "Man", "M", "Male", "Men", "Boy", "Guy", + "Dude", "Lad", "Sir", + "Woman", "W", "Female", "Women", "Girl", + "Lady", "Miss", "Madam", "Dame", "Lass", + NA + ), stringsAsFactors = FALSE + ) + expect_snapshot( + variant = "windows", + report_participants(data) + ) +})