Skip to content

Commit

Permalink
add row_mean()
Browse files Browse the repository at this point in the history
close #142
  • Loading branch information
wibeasley committed Oct 3, 2024
1 parent 8e1916a commit f172b52
Show file tree
Hide file tree
Showing 4 changed files with 433 additions and 2 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export(readr_spec_aligned)
export(replace_nas_with_explicit)
export(replace_with_nas)
export(retrieve_key_value)
export(row_mean)
export(row_sum)
export(snake_case)
export(trim_character)
Expand Down
97 changes: 96 additions & 1 deletion R/row.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @name row_sum
#' @name row
#' @title Find the sum of selected columns within a row
#'
#' @description Sums across columns within a row,
Expand Down Expand Up @@ -53,6 +53,13 @@
#' nonmissing_count_name = "engine_nonmissing_count"
#' )
#'
#' mtcars |>
#' OuhscMunge::row_mean(
#' columns_to_process = c("cyl", "disp", "vs", "carb"),
#' new_column_name = "engine_mean",
#' nonmissing_count_name = "engine_nonmissing_count"
#' )
#'
#' if (require(tidyr))
#' tidyr::billboard |>
#' OuhscMunge::row_sum(
Expand All @@ -79,6 +86,7 @@
#' week_sum,
#' )

#' @rdname row
#' @export
row_sum <- function(
d,
Expand Down Expand Up @@ -177,3 +185,90 @@ row_sum <- function(

d
}

#' @rdname row
#' @export
row_mean <- function(
d,
columns_to_process = character(0),
pattern = "",
new_column_name = "row_mean",
threshold_proportion = .75,
nonmissing_count_name = NA_character_,
verbose = FALSE
) {
checkmate::assert_data_frame(d)
checkmate::assert_character(columns_to_process , any.missing = FALSE)
checkmate::assert_character(pattern , len = 1)
checkmate::assert_character(new_column_name , len = 1)
checkmate::assert_double( threshold_proportion, len = 1)
checkmate::assert_character(nonmissing_count_name, len = 1, min.chars = 1, any.missing = TRUE)
checkmate::assert_logical( verbose , len = 1)

if (length(columns_to_process) == 0L) {
columns_to_process <-
d |>
colnames() |>
grep(
x = _,
pattern = pattern,
value = TRUE,
perl = TRUE
)

if (verbose) {
message(
"The following columns will be processed:\n- ",
paste(columns_to_process, collapse = "\n- ")
)
}
}

.rm <- .nonmissing_count <- .nonmissing_proportion <- NULL
d <-
d |>
dplyr::mutate(
.rm =
rowMeans(
dplyr::across(!!columns_to_process),
na.rm = TRUE
),
.nonmissing_count =
rowSums(
dplyr::across(
!!columns_to_process,
.fns = \(x) {
!is.na(x)
}
)
),
.nonmissing_proportion = .nonmissing_count / length(columns_to_process),
{{new_column_name}} :=
dplyr::if_else(
threshold_proportion <= .nonmissing_proportion,
.rm,
# .rs / .nonmissing_count,
NA_real_
)
)

if (!is.na(nonmissing_count_name)) {
d <-
d |>
dplyr::mutate(
{{nonmissing_count_name}} := .nonmissing_count,
)
}

d <-
d |>
dplyr::select(
-.rm,
-.nonmissing_count,
-.nonmissing_proportion,
)
# Alternatively, return just the new columns
# dplyr::pull({{new_column_name}})

d
}
21 changes: 20 additions & 1 deletion man/row_sum.Rd → man/row.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f172b52

Please sign in to comment.