-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_evaluation.py
115 lines (95 loc) · 3.54 KB
/
model_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from joblib import Parallel, delayed
import joblib
from sklearn import metrics
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import seaborn as sns
import pandas as pd
import numpy as np
import os.path
from train_model import check_folder_exists
accuracy_percent = 0
def check_pickle_exists():
if (
not os.path.exists("pickle_files/X_test.pkl")
or not os.path.exists("pickle_files/Y_test.pkl")
or not os.path.exists("pickle_files/regressor.pkl")
):
from train_model import main
main()
def import_pickle_files():
regressor = joblib.load("pickle_files/regressor.pkl")
X_test = joblib.load("pickle_files/X_test.pkl")
Y_test = joblib.load("pickle_files/Y_test.pkl")
return regressor, X_test, Y_test
def master():
check_folder_exists("pickle_files")
check_folder_exists("media_plots")
# Load the model from the file
try:
regressor, X_test, Y_test = import_pickle_files()
except FileNotFoundError:
check_pickle_exists()
regressor, X_test, Y_test = import_pickle_files()
# Use the loaded model to make predictions and test
test_data_prediciton = regressor.predict(X_test)
# load test data as dataframe to pickle for later use
pd.DataFrame(test_data_prediciton).to_pickle("pickle_files/test_data_prediciton.pkl")
print("datatype of prediction made - ", type(test_data_prediciton), end="\n\n")
print("actual prediction made using non-gold values (first five values for simplicity) -")
print(test_data_prediciton[0:5], end="\n\n")
# R squared error to computer error rate
error_score = metrics.r2_score(Y_test, test_data_prediciton)
print("ACCURACY - ", (error_score * 100), "%", sep="")
# visualize error rate
Y_test = list(Y_test)
test_data_prediciton = list(test_data_prediciton)
# fix below repition
plt.plot(Y_test, color="blue", label="Actual Value")
plt.title("Number of values")
plt.ylabel("GOLD prices")
plt.legend()
plt.tight_layout()
plt.savefig("media_plots/actual_values.png", bbox_inches="tight")
plt.close()
# plt.show()
plt.plot(test_data_prediciton, color="red", label="predicted Value")
plt.title("Number of values")
plt.ylabel("GOLD prices")
plt.legend()
plt.tight_layout()
plt.savefig("media_plots/predicted_values.png", bbox_inches="tight")
plt.close()
# plt.show()
plt.plot(test_data_prediciton, color="red", label="predicted Value", alpha=0.5)
plt.plot(Y_test, color="blue", label="Actual Value", alpha=0.5)
plt.title("Number of values")
plt.ylabel("GOLD prices")
plt.legend()
plt.tight_layout()
plt.savefig("media_plots/price_comparison.png", bbox_inches="tight")
plt.close()
# plt.show()
# residual plot
fig, ax = plt.subplots()
sns.set_style("whitegrid")
fig.set_facecolor("black")
ax.set_facecolor("black")
ax.axes.set_xlabel("actual")
ax.axes.set_ylabel("predicted")
for tick_label in ax.axes.get_yticklabels():
tick_label.set_color("white")
for tick_label in ax.axes.get_xticklabels():
tick_label.set_color("white")
for _, s in ax.spines.items():
s.set_color("cyan")
plt.grid()
sns.residplot(x=Y_test, y=test_data_prediciton, color="cyan", label="Gold prices")
fig.savefig(r"media_plots/error_rate.png")
# plt.show()
global accuracy_percent
accuracy_percent = error_score
with open("accuracy_percent.txt", "w") as my_file:
my_file.write(str(error_score))
if __name__ == "__main__":
master()