forked from wuhuihangsufe/AlphaAI
-
Notifications
You must be signed in to change notification settings - Fork 4
/
preprocessing.py
80 lines (70 loc) · 3.24 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import pandas as pd
import pywt
import matplotlib.pyplot as plt
class PreProcessing:
def __init__(self, split, feature_split):
self.split = split
self.feature_split = feature_split
self.stock_data = pd.read_csv("stock_data.csv")
# wavelet transform and create autoencoder data
def make_wavelet_train(self):
train_data = []
test_data = []
log_train_data = []
for i in range((len(self.stock_data)//10)*10 - 11):
train = []
log_ret = []
for j in range(1, 6):
x = np.array(self.stock_data.iloc[i: i + 11, j])
(ca, cd) = pywt.dwt(x, "haar")
cat = pywt.threshold(ca, np.std(ca), mode="soft")
cdt = pywt.threshold(cd, np.std(cd), mode="soft")
tx = pywt.idwt(cat, cdt, "haar")
log = np.diff(np.log(tx))*100
macd = np.mean(x[5:]) - np.mean(x)
# ma = np.mean(x)
sd = np.std(x)
log_ret = np.append(log_ret, log)
x_tech = np.append(macd*10, sd)
train = np.append(train, x_tech)
train_data.append(train)
log_train_data.append(log_ret)
trained = pd.DataFrame(train_data)
trained.to_csv("preprocessing/indicators.csv")
log_train = pd.DataFrame(log_train_data, index=None)
log_train.to_csv("preprocessing/log_train.csv")
# auto_train = pd.DataFrame(train_data[0:800])
# auto_test = pd.DataFrame(train_data[801:1000])
# auto_train.to_csv("auto_train.csv")
# auto_test.to_csv("auto_test.csv")
rbm_train = pd.DataFrame(log_train_data[0:int(self.split*self.feature_split*len(log_train_data))], index=None)
rbm_train.to_csv("preprocessing/rbm_train.csv")
rbm_test = pd.DataFrame(log_train_data[int(self.split*self.feature_split*len(log_train_data))+1:
int(self.feature_split*len(log_train_data))])
rbm_test.to_csv("preprocessing/rbm_test.csv")
for i in range((len(self.stock_data) // 10) * 10 - 11):
y = 100*np.log(self.stock_data.iloc[i + 11, 5] / self.stock_data.iloc[i + 10, 5])
test_data.append(y)
test = pd.DataFrame(test_data)
test.to_csv("preprocessing/test_data.csv")
def make_test_data(self):
test_stock = []
# stock_data_test = pd.read_csv("stock_data_test.csv", index_col=0)
for i in range((len(self.stock_data) // 10) * 10 - 11):
l = self.stock_data.iloc[i+11, 5]
test_stock.append(l)
test = pd.DataFrame(test_stock)
test.to_csv("preprocessing/test_stock.csv")
stock_test_data = np.array(test_stock)[int(self.feature_split*len(test_stock) +
self.split*(1-self.feature_split)*len(test_stock)):]
stock = pd.DataFrame(stock_test_data, index=None)
stock.to_csv("stock_data_test.csv")
# print(train_data[1:5])
# print(test_data[1:5])
# plt.plot(train_data[1])
# plt.show()
if __name__ == "__main__":
preprocess = PreProcessing(0.8, 0.25)
preprocess.make_wavelet_train()
preprocess.make_test_data()