-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnotebookScriptARIMA.py
69 lines (58 loc) · 2.85 KB
/
notebookScriptARIMA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
def perform_adf_test(series): # Augmented Dickey-Fuller test
result = adfuller(series.dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
print('\t%s: %.3f' % (key, value))
def run_arima(series, order): # Autoregressive Integrated Moving Average
model = ARIMA(series, order=order)
results = model.fit()
print(results.summary())
return results
# laden en resamplen van de data voor VM1 en VM2
vm1_memory_df = pd.read_csv('/mnt/data/memory_data1.csv', parse_dates=['timestamp'], dayfirst=True)
vm1_resampled = vm1_memory_df.set_index('timestamp').resample('10T').mean().interpolate()
vm2_memory_df = pd.read_csv('/mnt/data/memory_data2.csv', parse_dates=['timestamp'], dayfirst=True)
vm2_resampled = vm2_memory_df.set_index('timestamp').resample('10T').mean().interpolate()
# ADF test en ARIMA voor VM1 en VM2
print("VM1 Memory Data:")
perform_adf_test(vm1_resampled['mem_used'])
vm1_arima_results = run_arima(vm1_resampled['mem_used'], order=(1, 0, 1))
print("\nVM2 Memory Data:")
perform_adf_test(vm2_resampled['mem_used'])
vm2_arima_results = run_arima(vm2_resampled['mem_used'], order=(1, 0, 1))
# Forecasting voor VM1 en VM2
vm1_forecast = vm1_arima_results.get_forecast(steps=144)
vm1_forecast_index = pd.date_range(start=vm1_resampled.index[-1], periods=145, freq='10T')[1:]
vm1_forecast_mean = vm1_forecast.predicted_mean
vm1_conf_int = vm1_forecast.conf_int()
vm2_forecast = vm2_arima_results.get_forecast(steps=144)
vm2_forecast_index = pd.date_range(start=vm2_resampled.index[-1], periods=145, freq='10T')[1:]
vm2_forecast_mean = vm2_forecast.predicted_mean
vm2_conf_int = vm2_forecast.conf_int()
# en hoppa naar de grafieken voor VM1 en VM2
plt.figure(figsize=(12, 6))
plt.plot(vm1_resampled.index, vm1_resampled['mem_used'], label='VM1 Memory Used')
plt.plot(vm1_forecast_index, vm1_forecast_mean, label='VM1 Forecast', color='red')
plt.fill_between(vm1_forecast_index, vm1_conf_int.iloc[:, 0], vm1_conf_int.iloc[:, 1], color='red', alpha=0.3)
plt.title('VM1 Memory Usage Forecast for the Next 24 Hours')
plt.xlabel('Time')
plt.ylabel('Memory Used (MB)')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
plt.plot(vm2_resampled.index, vm2_resampled['mem_used'], label='VM2 Memory Used')
plt.plot(vm2_forecast_index, vm2_forecast_mean, label='VM2 Forecast', color='red')
plt.fill_between(vm2_forecast_index, vm2_conf_int.iloc[:, 0], vm2_conf_int.iloc[:, 1], color='red', alpha=0.3)
plt.title('VM2 Memory Usage Forecast for the Next 24 Hours')
plt.xlabel('Time')
plt.ylabel('Memory Used (MB)')
plt.legend()
plt.show()