-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathRandomForests.py
52 lines (41 loc) · 1.76 KB
/
RandomForests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.externals.six import StringIO
import pydotplus
from sklearn import tree
# Create RandomForestClassifier object
skullsForest = RandomForestClassifier(n_estimators=10, criterion='entropy')
# Read data from the file
my_data = pd.read_csv('skulls-data.csv', delimiter=',')
# Separate the features in the file
featuresNames = list(my_data.columns.values)[3:7]
# Remove the column containing the target name since it doesn't contain numeric values.
# axis=1 means we are removing columns instead of rows.
X = my_data.drop(my_data.columns[[0, 1, 2]], axis=1).values
# Get target names from the data
targetNames = my_data['epoch'].unique().tolist()
y = my_data['epoch']
# Cross Validation of data using train_test_split
X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, y, test_size=0.3, random_state=3)
skullsForest.fit(X_trainset, y_trainset)
predForest = skullsForest.predict(X_testset)
# Print the results
print(predForest)
print(y_testset)
print("RandomForests's Accuracy: ", metrics.accuracy_score(y_testset, predForest))
# Visualize the data
dot_data = StringIO()
filename = 'skullforests.png'
# Replace the argument for skullsForest below with the tree number to view that tree
tree.export_graphviz(skullsForest[9], out_file=dot_data,
feature_names=featuresNames,
class_names=targetNames,
filled=True, rounded=True,
special_characters=True,
leaves_parallel=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
# Write the output to the file
graph.write_png(filename)
print('File Created')