-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest_mlp2.py
96 lines (86 loc) · 3.88 KB
/
test_mlp2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
# ------------------------------------------------------------------------------------------------------%
# Created by "Thieu" at 10:16, 06/07/2021 %
# %
# Email: [email protected] %
# Homepage: https://www.researchgate.net/profile/Nguyen_Thieu2 %
# Github: https://github.com/thieu1995 %
# ------------------------------------------------------------------------------------------------------%
from pandas import DataFrame, Series, concat, read_csv
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from math import sqrt
from utils.visual_util import draw_predict
from utils.timeseries_util import *
# fit an MLP network to training data
def fit_model(train, batch_size, nb_epoch, neurons):
X, y = train[:, 0:-1], train[:, -1]
model = Sequential()
model.add(Dense(neurons, activation='relu', input_dim=X.shape[1]))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X, y, epochs=nb_epoch, batch_size=batch_size, verbose=2, shuffle=False)
return model
# run a repeated experiment
def experiment(repeats, series, epochs, lag, neurons, test_size, batch_size):
# transform data to be stationary
raw_values = series.values
diff_values = difference(raw_values, 1)
# transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values, lag)
# supervised = timeseries_to_supervised(raw_values, lag)
supervised_values = supervised.values[lag:, :]
# split data into train and test-sets
train, test = supervised_values[0:-test_size], supervised_values[-test_size:]
# transform the scale of the data
scaler, train_scaled, test_scaled = scale(train, test)
# run experiment
error_scores = list()
for r in range(repeats):
# fit the model
train_trimmed = train_scaled[2:, :]
model = fit_model(train_trimmed, batch_size, epochs, neurons)
# forecast test dataset
test_reshaped = test_scaled[:, 0:-1]
output = model.predict(test_reshaped, batch_size=batch_size)
predictions = list()
for i in range(len(output)):
yhat = output[i, 0]
X = test_scaled[i, 0:-1]
# invert scaling
yhat = invert_scale(scaler, X, yhat)
# invert differencing
yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i)
# store forecast
predictions.append(yhat)
# report performance
rmse = sqrt(mean_squared_error(raw_values[-test_size:], predictions))
## Drawing
list_lines = [raw_values[-test_size:][:200], predictions[:200]]
list_legends = ["Observed", "Predicted"]
xy_labels = ["#Iteration", "CPU"]
pathsave = "./data/app/results2"
filename = f"mlp2-{dataname}-trial_{r}-{lag}-{neurons}-{test_size}-{batch_size}-{epochs}"
exts = [".png", ".pdf"]
draw_predict(list_lines, list_legends, xy_labels, "", filename, pathsave, exts, True)
print('%d) Test RMSE: %.3f' % (r + 1, rmse))
error_scores.append(rmse)
return error_scores
dataname = "gg_cpu_5m"
# load dataset
series = read_csv(f'./data/app/clean/{dataname}.csv', usecols=[0])
dataset = series.values
# experiment
repeats = 3
results = DataFrame()
lags = [18, 36, 48]
neurons = 6
test_size = 1000
batch_size = 128
# vary training epochs
epoch = 1000
for lag in lags:
results[str(lag)] = experiment(repeats, series, epoch, lag, neurons, test_size, batch_size)
# summarize results
print(results.describe())