-
Notifications
You must be signed in to change notification settings - Fork 0
/
5-calculate_t-tests.R
114 lines (103 loc) · 3.32 KB
/
5-calculate_t-tests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# Calculate t-tests for each lab test and disease
library(tidyverse)
library(broom)
library(lsr)
count_patients <- function(data, disease_status) {
n <- data |> count(disease) |> filter(disease == disease_status) |> pull(n)
if (length(n) == 0) {
0
} else {
n
}
}
calculate_statistics <- function(disease) {
in_dir <- file.path("data", disease)
out_dir <- in_dir
labvalues_gendered <-
read_csv(file.path(
in_dir,
str_glue("matched_labvalues_selected_{disease}.csv")
), show_col_types = FALSE) |>
select(disease, gender, label, value = valuenum) |>
nest(.by = c(gender, label)) |>
mutate(
t_test = map(data, safely(
~ t.test(formula = value ~ disease, data = .x)
)),
cohens_d = map(data, safely(
~ cohensD(formula = value ~ disease, data = .x)
)),
successful = map(t_test, ~ pluck(.x, "result")),
effect_size = map(cohens_d, ~ pluck(.x, "result")),
effect_size = map_dbl(effect_size, ~ ifelse(is_null(.x), NA, .x)),
glanced = map(successful, ~ glance(.x)),
ncases = map_int(
data,
~ count_patients(.x, 1)
),
nmatched = map_int(
data,
~ count_patients(.x, 0)
),
ntotal = map_int(
data,
~ count_patients(.x, 1) + count_patients(.x, 0)
),
prop_cases = round(ncases / ntotal, 4),
prop_matched = round(nmatched / ntotal, 4)
) |>
select(-data, -t_test, -cohens_d, -successful) |>
unnest_wider(glanced) |>
mutate(across(where(is.numeric), ~ round(.x, 5)),
significant = p.value <= 0.05) |>
arrange(gender, label) |>
write_csv(str_glue("results/compared_lab_values_{disease}.csv"))
labvalues_overall <-
read_csv(file.path(
in_dir,
str_glue("matched_labvalues_selected_{disease}.csv")
), show_col_types = FALSE) |>
select(disease, label, value = valuenum) |>
nest(.by = c(label)) |>
mutate(
t_test = map(data, safely(
~ t.test(formula = value ~ disease, data = .x)
)),
cohens_d = map(data, safely(
~ cohensD(formula = value ~ disease, data = .x)
)),
successful = map(t_test, ~ pluck(.x, "result")),
effect_size = map(cohens_d, ~ pluck(.x, "result")),
effect_size = map_dbl(effect_size, ~ ifelse(is_null(.x), NA, .x)),
glanced = map(successful, ~ glance(.x)),
ncases = map_dbl(
data,
~ count_patients(.x, 1)
),
nmatched = map_dbl(
data,
~ count_patients(.x, 0)
),
ntotal = map_int(
data,
~ count_patients(.x, 1) + count_patients(.x, 0)
),
prop_cases = round(ncases / ntotal, 4),
prop_matched = round(nmatched / ntotal, 4)
) |>
select(-data, -t_test, -cohens_d, -successful) |>
unnest_wider(glanced) |>
mutate(across(where(is.numeric), ~ round(.x, 5)),
significant = p.value <= 0.05) |>
arrange(label) |>
write_csv(str_glue("results/compared_lab_values_overall_{disease}.csv"))
}
diseases <- list.files("ancillary/icd_codes/") |>
keep(~ str_detect(.x, c("diabetes"), negate = TRUE))
calculate_statistics(diseases[1])
calculate_statistics(diseases[2])
calculate_statistics(diseases[3])
calculate_statistics(diseases[4])
calculate_statistics(diseases[5])
calculate_statistics(diseases[6])
calculate_statistics(diseases[7])