-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautoencoder_wine.R
126 lines (101 loc) · 3.13 KB
/
autoencoder_wine.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
## Housekeeping
setwd('/Users/lena/documents/R/PCA')
library(tidyverse)
library(keras)
library(tensorflow)
## Load and prepare data
df_wine <- data.table::fread("wine.data")
colnames(df_wine) <- c("class",
"alcohol",
"malic_acid",
"ash",
"alca_ash",
"magnesium",
"total_phenols",
"flav",
"nonflav_phenols",
"proan",
"col_intens",
"hue",
"protein",
"proline")
## Split into test and train and define x and y
set.seed(48)
df_wine <- df_wine %>%
mutate(id = sample(x = c(0, 1), size = nrow(.), replace = TRUE, prob = c(.7, .3)))
train <- df_wine %>%
filter(id == 0) %>%
select(-id)
test <- df_wine %>%
filter(id == 1) %>%
select(-id)
train_x <- as.matrix(train[, c(2:14)])
train_y <- train[, 1]
test_x <- as.matrix(test[, c(2:14)])
test_y <- test[, 1]
## scale values
# minmax_scale <- function(x) {
# (x-min(x))/(max(x)-min(x))
# }
# train_x <- apply(train_x, 2, minmax_scale)
# test_x <- apply(test_x, 2, minmax_scale)
z_score <- function(x){
(x-mean(x))/sd(x)
}
train_x <- apply(train_x, 2, z_score)
test_x <- apply(test_x, 2, z_score)
## define autoencoder
set_random_seed(123)
code_size <- 2
encoder_input <- layer_input(shape = ncol(train_x))
encoder_output <- encoder_input %>%
layer_dense(units = 16, activation = "tanh", use_bias = TRUE) %>%
layer_dense(units = code_size, activation = "tanh")
encoder <- keras_model(encoder_input, encoder_output)
summary(encoder)
decoder_input <- layer_input(shape = code_size)
decoder_output <- decoder_input %>%
layer_dense(units = 16, activation = "tanh", use_bias = TRUE) %>%
layer_dense(units = ncol(train_x))
decoder <- keras_model(decoder_input, decoder_output)
summary(decoder)
ae_input <- layer_input(shape = ncol(train_x))
ae_output <- ae_input %>%
encoder() %>%
decoder()
ae <- keras_model(ae_input, ae_output)
summary(ae)
ae %>% compile(
loss = "mean_squared_error",
optimizer = optimizer_adam(learning_rate = 0.01),
metrics = "mean_absolute_error",
)
ae %>% fit(
x = train_x,
y = train_x,
epochs = 100,
batch_size = 16,
verbose = 1,
)
code_ae_train <- encoder %>%
predict(train_x)
code_ae_test <- encoder %>%
predict(test_x)
## Classification with logit regression
class_train <- cbind(train_y, code_ae_train)
class_train$class <- as.factor(class_train$class)
logit <- multinom(class ~ V1 + V2,
data = class_train)
logit_predict <- predict(logit, code_ae_test)
## Evaluation
validation <- cbind(logit_predict, test_y)
validation <- data.frame(validation) %>%
mutate(class = as.factor(class)) %>%
mutate(compare = if_else(logit_predict == class, TRUE, FALSE))
accuracy <- (sum(validation$compare))/nrow(validation)
conf_matrix <- conf_mat(validation, class, logit_predict)
conf_matrix %>%
autoplot(type = "heatmap") +
scale_fill_gradient(low="#D6EAF8",high = "#2E86C1") +
theme(legend.position = "right") +
labs(fill = "Abs. freq")