Skip to content

Commit

Permalink
report_participants: gender supports alternate spellings, convert a…
Browse files Browse the repository at this point in the history
…ge to numeric (#389)

* Gender now also supports more alternate spellings, and age converts the respective column to numeric.

* snapshots

* styler

* comma

* lints

* lints2 (stringsAsFactors = FALSE)

* lints3 (stringsAsFactors = FALSE in tests, etc.)
  • Loading branch information
rempsyc authored Aug 21, 2023
1 parent 0b1046d commit b5821f5
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 49 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Depends:
R (>= 3.6)
Imports:
bayestestR (>= 0.13.0),
effectsize (> 0.8.2),
effectsize (>= 0.8.5),
insight (>= 0.19.3.2),
parameters (>= 0.20.2),
performance (>= 0.10.2),
Expand Down Expand Up @@ -143,4 +143,6 @@ Collate:
'utils_grouped_df.R'
'zzz.R'
Roxygen: list(markdown = TRUE)
Remotes: easystats/insight
Remotes:
easystats/insight,
easystats/effectsize
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Bug fixes

* Fixed issue in `report_participants`, which did not print the `"gender"`
category for grouped output when that argument was written in lower-case.
Gender now also supports more alternate spellings, and age converts the
respective column to numeric.

* Fixed printing issue for intercept-only models.

Expand Down
48 changes: 33 additions & 15 deletions R/report_participants.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ report_participants <- function(data,
x[which(x == "")] <- NA
x
})
data <- as.data.frame(data_list)
data <- as.data.frame(data_list, stringsAsFactors = FALSE)

# find age variable automatically
if (is.null(age)) {
Expand Down Expand Up @@ -256,33 +256,37 @@ report_participants <- function(data,
country <- .replace_names(data, country)
race <- .replace_names(data, race)

# Set age as numeric
data[[age]] <- as.numeric(data[[age]])

# Grouped data
if (!is.null(participants)) {
data <- data.frame(
"Age" = stats::aggregate(data[[age]],
Age = stats::aggregate(data[[age]],
by = list(data[[participants]]),
FUN = mean
)[[2]],
"Sex" = stats::aggregate(data[[sex]],
Sex = stats::aggregate(data[[sex]],
by = list(data[[participants]]),
FUN = utils::head, n = 1
)[[2]],
"Gender" = stats::aggregate(data[[gender]],
Gender = stats::aggregate(data[[gender]],
by = list(data[[participants]]),
FUN = utils::head, n = 1
)[[2]],
"Education" = stats::aggregate(data[[education]],
Education = stats::aggregate(data[[education]],
by = list(data[[participants]]),
FUN = utils::head, n = 1
)[[2]],
"Country" = stats::aggregate(data[[country]],
Country = stats::aggregate(data[[country]],
by = list(data[[participants]]),
FUN = utils::head, n = 1
)[[2]],
"Race" = stats::aggregate(data[[race]],
Race = stats::aggregate(data[[race]],
by = list(data[[participants]]),
FUN = utils::head, n = 1
)[[2]]
)[[2]],
stringsAsFactors = FALSE
)
age <- "Age"
sex <- "Sex"
Expand Down Expand Up @@ -344,22 +348,32 @@ report_participants <- function(data,
)
}

genders_woman <- c(
"woman", "w", "female", "women", "girl",
"lady", "miss", "madam", "dame", "lass"
)
genders_man <- c(
"man", "m", "male", "men", "boy",
"guy", "dude", "lad", "sir"
)
both_genders <- c(genders_woman, genders_man, NA, "na")

text_gender <- if (all(is.na(data[[gender]]))) {
""
} else {
paste0(
"Gender: ",
insight::format_value(length(data[[gender]][tolower(
data[[gender]]
) %in% c("woman", "w", "f", "female")]) / nrow(data) * 100, digits = digits),
) %in% genders_woman]) / nrow(data) * 100, digits = digits),
"% women, ",
insight::format_value(length(data[[gender]][tolower(
data[[gender]]
) %in% c("man", "m", "male")]) / nrow(data) * 100, digits = digits),
) %in% genders_man]) / nrow(data) * 100, digits = digits),
"% men, ",
insight::format_value(100 - length(data[[gender]][tolower(
data[[gender]]
) %in% c("woman", "w", "f", "female", "man", "m", "male", NA, "na")]) /
) %in% both_genders]) /
nrow(data) * 100), "% non-binary",
if (!insight::format_value(length(data[[gender]][tolower(
data[[gender]]
Expand Down Expand Up @@ -405,14 +419,16 @@ report_participants <- function(data,
} else {
data[[country]] <- as.character(data[[country]])
data[which(data[[country]] %in% c(NA, "NA")), country] <- "missing"
frequency_table <- as.data.frame(datawizard::data_tabulate(data[[country]]))[c(2, 4)]
frequency_table <- as.data.frame(datawizard::data_tabulate(data[[country]]),
stringsAsFactors = FALSE)[c(2, 4)]
names(frequency_table)[2] <- "Percent"
frequency_table <- frequency_table[-which(is.na(frequency_table$Value)), ]
frequency_table <- frequency_table[order(-frequency_table$Percent), ]
upper <- frequency_table[which(frequency_table$Percent >= threshold), ]
lower <- frequency_table[which(frequency_table$Percent < threshold), ]
if (nrow(lower) > 0) {
lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent), stringsAsFactors = FALSE)
lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent),
stringsAsFactors = FALSE)
combined <- rbind(upper, lower_sum)
} else {
combined <- upper
Expand All @@ -427,14 +443,16 @@ report_participants <- function(data,
} else {
data[[race]] <- as.character(data[[race]])
data[which(data[[race]] %in% c(NA, "NA")), race] <- "missing"
frequency_table <- as.data.frame(datawizard::data_tabulate(data[[race]]))[c(2, 4)]
frequency_table <- as.data.frame(datawizard::data_tabulate(data[[race]]),
stringsAsFactors = FALSE)[c(2, 4)]
names(frequency_table)[2] <- "Percent"
frequency_table <- frequency_table[-which(is.na(frequency_table$Value)), ]
frequency_table <- frequency_table[order(-frequency_table$Percent), ]
upper <- frequency_table[which(frequency_table$Percent >= threshold), ]
lower <- frequency_table[which(frequency_table$Percent < threshold), ]
if (nrow(lower) > 0) {
lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent), stringsAsFactors = FALSE)
lower_sum <- data.frame(Value = "other", Percent = sum(lower$Percent),
stringsAsFactors = FALSE)
combined <- rbind(upper, lower_sum)
} else {
combined <- upper
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/_snaps/windows/report.htest-chi2.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@
Code
report_effectsize(x, type = "tschuprows_t")
Output
Effect sizes were labelled following Funder's (2019) recommendations.
(Adjusted Tschuprow's t = 0.08, 95% CI [0.06, 1.00])
very small (Adjusted Tschuprow's t = 0.08, 95% CI [0.06, 1.00])

---

Expand Down
16 changes: 15 additions & 1 deletion tests/testthat/_snaps/windows/report_participants.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,19 @@
Code
report_participants(data, age = "Age", sex = "Sex")
Output
[1] "6 participants (8, n = 1; 21, n = 1; 22, n = 1; 23, n = 1; 42, n = 1; 54, n = 1; Sex: 16.7% females, 33.3% males, 16.7% other, 33.33% missing; Gender: 33.3% women, 16.7% men, 16.67% non-binary, 33.33% missing; Education: -5, 16.67%; -3, 16.67%; 0, 16.67%; 3, 16.67%; 5, 16.67%; 8, 16.67%; Country: 33.33% Canada, 16.67% Germany, 16.67% India, 16.67% USA, 16.67% missing; Race: 16.67% A, 16.67% B, 16.67% C, 16.67% D, 16.67% E, 16.67% missing)"
[1] "6 participants (Mean age = 3.5, SD = 1.9, range: [1, 6]; Sex: 16.7% females, 33.3% males, 16.7% other, 33.33% missing; Gender: 33.3% women, 16.7% men, 16.67% non-binary, 33.33% missing; Education: -5, 16.67%; -3, 16.67%; 0, 16.67%; 3, 16.67%; 5, 16.67%; 8, 16.67%; Country: 33.33% Canada, 16.67% Germany, 16.67% India, 16.67% USA, 16.67% missing; Race: 16.67% A, 16.67% B, 16.67% C, 16.67% D, 16.67% E, 16.67% missing)"

# report_participants age as character

Code
report_participants(data)
Output
[1] "6 participants (Mean age = 29.5, SD = 15.0, range: [11, 52])"

# report_participants different gender spellings

Code
report_participants(data)
Output
[1] "20 participants (Gender: 50.0% women, 45.0% men, 0.00% non-binary, 5.00% missing)"

85 changes: 56 additions & 29 deletions tests/testthat/test-report_participants.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
test_that("report_participants, argument gender works", {
# Works when capitalized
data <- data.frame(
"Age" = c(22, 22, 54, 54, 8, 8, 42, 42),
"Gender" = c("N", "N", "W", "M", "M", "M", "Non-Binary", "Non-Binary"),
"Condition" = c("A", "A", "A", "A", "B", "B", "B", "B"),
Age = c(22, 22, 54, 54, 8, 8, 42, 42),
Gender = c("N", "N", "W", "M", "M", "M", "Non-Binary", "Non-Binary"),
Condition = c("A", "A", "A", "A", "B", "B", "B", "B"),
stringsAsFactors = FALSE
)
out <- report_participants(data)
Expand Down Expand Up @@ -41,10 +41,10 @@ test_that("report_participants, argument gender works", {

test_that("report_participants", {
data <- data.frame(
"Age" = c(22, 22, 54, 54, 8, 8),
"Sex" = c("F", "F", "M", "M", "I", "I"),
"Gender" = c("W", "W", "M", "M", "N", "N"),
"Participant" = c("S1", "S1", "s2", "s2", "s3", "s3"),
Age = c(22, 22, 54, 54, 8, 8),
Sex = c("F", "F", "M", "M", "I", "I"),
Gender = c("W", "W", "M", "M", "N", "N"),
Participant = c("S1", "S1", "s2", "s2", "s3", "s3"),
stringsAsFactors = FALSE
)

Expand All @@ -70,9 +70,9 @@ test_that("report_participants", {
)

data2 <- data.frame(
"Age" = c(22, 22, 54, 54, 8, 8),
"Sex" = c("F", "F", "M", "O", "F", "O"),
"Gender" = c("W", "W", "M", "O", "W", "O"),
Age = c(22, 22, 54, 54, 8, 8),
Sex = c("F", "F", "M", "O", "F", "O"),
Gender = c("W", "W", "M", "O", "W", "O"),
stringsAsFactors = FALSE
)

Expand All @@ -82,9 +82,9 @@ test_that("report_participants", {
)

data3 <- data.frame(
"Age" = c(22, 82, NA, NA, NA, NA),
"Sex" = c("F", "F", "", "", "NA", NA),
"Gender" = c("W", "W", "", "", "NA", NA),
Age = c(22, 82, NA, NA, NA, NA),
Sex = c("F", "F", "", "", "NA", NA),
Gender = c("W", "W", "", "", "NA", NA),
stringsAsFactors = FALSE
)

Expand All @@ -95,13 +95,13 @@ test_that("report_participants", {

# Add tests for education, country, race
data4 <- data.frame(
"Education" = c(0, 8, -3, -5, 3, 5, NA),
"Education2" = c(
Education = c(0, 8, -3, -5, 3, 5, NA),
Education2 = c(
"Bachelor", "PhD", "Highschool", "Highschool",
"Bachelor", "Bachelor", NA
),
"Country" = c("USA", "Canada", "Canada", "India", "Germany", "USA", NA),
"Race" = c("Black", NA, "White", "Asian", "Black", "Black", "White"),
Country = c("USA", "Canada", "Canada", "India", "Germany", "USA", NA),
Race = c("Black", NA, "White", "Asian", "Black", "Black", "White"),
stringsAsFactors = FALSE
)

Expand All @@ -121,28 +121,55 @@ test_that("report_participants", {

test_that("report_participants test NAs no warning", {
data <- data.frame(
"Age" = c(22, 23, 54, 21, 8, 42),
"Sex" = (c("Intersex", "F", "M", "M", "NA", NA)),
"Gender" = (c("N", "W", "W", "M", "NA", NA)),
"Country" = (c("USA", NA, "Canada", "Canada", "India", "Germany")),
"Education" = factor(c(0, 8, -3, -5, 3, 5)),
"Race" = c(LETTERS[1:5], NA)
Age = c(22, 23, 54, 21, 8, 42),
Sex = (c("Intersex", "F", "M", "M", "NA", NA)),
Gender = (c("N", "W", "W", "M", "NA", NA)),
Country = (c("USA", NA, "Canada", "Canada", "India", "Germany")),
Education = factor(c(0, 8, -3, -5, 3, 5)),
Race = c(LETTERS[1:5], NA)
)
expect_snapshot(
variant = "windows",
report_participants(data)
)

data <- data.frame(
"Age" = factor(c(22, 23, 54, 21, 8, 42)),
"Sex" = factor(c("Intersex", "F", "M", "M", "NA", NA)),
"Gender" = factor(c("N", "W", "W", "M", "NA", NA)),
"Country" = factor(c("USA", NA, "Canada", "Canada", "India", "Germany")),
"Education" = factor(c(0, 8, -3, -5, 3, 5)),
"Race" = factor(c(LETTERS[1:5], NA))
Age = factor(c(22, 23, 54, 21, 8, 42)),
Sex = factor(c("Intersex", "F", "M", "M", "NA", NA)),
Gender = factor(c("N", "W", "W", "M", "NA", NA)),
Country = factor(c("USA", NA, "Canada", "Canada", "India", "Germany")),
Education = factor(c(0, 8, -3, -5, 3, 5)),
Race = factor(c(LETTERS[1:5], NA))
)
expect_snapshot(
variant = "windows",
report_participants(data, age = "Age", sex = "Sex")
)
})

test_that("report_participants age as character", {
data <- data.frame(
Age = as.character(c(22, 22, 28, 11, 42, 52), stringsAsFactors = FALSE),
stringsAsFactors = FALSE
)
expect_snapshot(
variant = "windows",
report_participants(data)
)
})

test_that("report_participants different gender spellings", {
data <- data.frame(
Gender = c(
"Man", "M", "Male", "Men", "Boy", "Guy",
"Dude", "Lad", "Sir",
"Woman", "W", "Female", "Women", "Girl",
"Lady", "Miss", "Madam", "Dame", "Lass",
NA
), stringsAsFactors = FALSE
)
expect_snapshot(
variant = "windows",
report_participants(data)
)
})

0 comments on commit b5821f5

Please sign in to comment.