-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpredict-individual.Rmd
118 lines (95 loc) · 3.35 KB
/
predict-individual.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
```{r init}
source("lib/function_library.R", echo = F)
# Load other R source files in the lib directory.
load_all_code("lib")
# Load a bunch of required packages; optionally installing missing ones as needed.
load_all_packages(auto_install = F)
conf = list(verbose = F,
dir = ".",
data_dir = "data",
tex_dir = "tex",
visual_dir = "visuals")
```
```{r predict-setup}
# Setup multinode/multicore processing if available.
conf$cluster = ck37r::parallelize(allow_multinode = T)
#conf$method = "method.AUC"
conf$method = "method.NNLS"
# conf$outer_cv_folds = 10
conf$outer_cv_folds = 20
conf$cv_folds = 10
# If the cluster object is null it means we're setup for multicore (or nuthin).
if (is.null(conf$cluster) || is.na(conf$cluster)) {
# Use non-CV if we're running this in RStudio on a laptop.
if (.Platform$GUI == "RStudio") {
fn = "sl_fn"
} else {
fn = "cv_sl_fn"
}
# Manually change this to F to force non-parallel SuperLearner.
if (T) {
# Multicore version.
conf$sl_fn = ck37r::gen_superlearner(parallel = "multicore",
outer_cv_folds = conf$outer_cv_folds)[[fn]]
} else {
# Sequential version.
conf$sl_fn = SuperLearner
}
rm(fn)
} else {
# Multinode version.
conf$sl_fn = ck37r::gen_superlearner(parallel = "snow",
cluster = conf$cluster,
outer_cv_folds = conf$outer_cv_folds)$cv_sl_fn
}
```
## Individual Prediction
```{r predict}
# Backup our current configuration before we load the RData file.
conf_backup = conf
# Load data from the ariane-tbi-predict.Rmd file, to avoid duplicating code.
load(paste0(conf$dir, "/data/create-dataset.RData"))
# Restore our backup configuration.
conf = conf_backup
rm(conf_backup)
if (foreach::getDoParName() == "doSNOW") {
# Set multinode-compatible seed.
clusterSetRNGStream(conf$cluster, iseed = 1)
} else {
# Set multicore-compatible seed.
set.seed(1, "L'Ecuyer-CMRG")
}
X = dataset$X
Y = dataset$outcomes$progression
libs = individual_library_seq(dim(X), type = "regression", glm = F)
length(libs)
# Review final data dimensions.
dim(X)
# Temporaryily reduce library size.
# rpartPrune not working :/
# libs = libs[1:4]
# Test glmnet and XGB
libs = libs[c(3, 11)]
results = fit_model_libs(Y, X, libs, family=gaussian(), sl_fn = conf$sl_fn,
cv_folds = conf$cv_folds, cluster = conf$cluster)
save(results, libs, conf, dataset, file = paste0(conf$data_dir, "/predict-indiv-results.RData"))
```
Process results.
```{r process-results, eval=F}
load(paste0(conf$data_dir, "/predict-indiv-results.RData"))
# TODO: update this for PPMI!!!
# Create AUC plots for each SL result, plus a summary table.
lib_names = c("Mean", "Pruned Tree", "Elastic Net", "Elastic Net",
"Support Vector Machine (radial)",
"Multivariate Adaptive Regresson Spline (MARS)", "Polymars",
"Elastic Net screener + OLS",
"Random Forest", "Gradient Boosted Machine",
"XGBoost (GBM)", "XGBoost (GBM)")
analyze_results = plot_all_results(results, Y, "indiv", libs,
plot_dir = paste0(conf$visual_dir, "/visuals/"),
tex_dir = paste0(conf$tex_dir, "/tex/"), lib_names = lib_names)
save(analyze_results, file=paste0(conf$data_dir, "/predict-indiv-analysis.RData"))
```
```{r cleanup}
ck37r::stop_cluster(conf$cluster)
```