forked from ed-wilkes/predictive-modelling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcustomSample.R
35 lines (26 loc) · 1.39 KB
/
customSample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
## Custom sampling method for caret models
custom <- list(name = "customSample"
,func = function(x, y) {
## Check input data
xc <- class(x)
if (!is.data.frame(x))
x <- as.data.frame(x)
if (!is.factor(y)) {
warning("Custom-sampling requires a factor variable as the response.
The original data was returned.")
return(list(x = x, y = y))
}
## Sample normals
x$.outcome <- y
x_minority <- dplyr::filter(x, .outcome != "No.significant.abnormality.detected.")
max_freq <- max(table(x_minority$.outcome))
x_majority <- dplyr::filter(x, .outcome == "No.significant.abnormality.detected.") %>%
dplyr::sample_n(max_freq, replace = FALSE) %>%
as.data.frame
x <- dplyr::bind_rows(x_minority, x_majority)
y <- x$.outcome
x <- x[, !(colnames(x) %in% c("y", ".outcome")), drop = TRUE]
## Return as list
out <- list(x = x, y = y)
}
,first = TRUE)