-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexample0_iris.py
172 lines (145 loc) · 5.95 KB
/
example0_iris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Copyright © The University of Edinburgh, 2024.
# Development has been supported by GSK.
"""Run example predict.profile on IRIS dataset
Uses fire to run classification, regression or view prediction (multiregression)
on the sklearn iris dataset.
Usage:
example0_iris.py COMMAND OUTPUT_DIR
COMMAND is one of the following:
c (shortcut for classification)
Run sklearn Iris example, predicting iris class from 1 and 2 views.
classification
Run sklearn Iris example, predicting iris class from 1 and 2 views.
r (shortcut for regression)
Run sklearn Iris example, predicting sepal length from 1 and 2 views.
regression
Run sklearn Iris example, predicting sepal length from 1 and 2 views.
mr (shortcut for multiregression)
Run sklearn Iris example, predicting sepal length from 1 and 2 views.
multiregression
Run sklearn Iris example, predicting sepal length from 1 and 2 views.
OUTPUT_DIR is the destination output directory
"""
import fire
from sklearn.datasets import load_iris
import phenonaut
import phenonaut.predict
from phenonaut.data import Dataset
def phenonaut_object_iris_2_views(iris_df, keep_only_class_1_and_2=False):
if keep_only_class_1_and_2:
iris_df = iris_df.query("target==0 or target==1")
column_names = iris_df.columns.to_list()
df1 = iris_df.iloc[:, [0, 1, 4]].copy()
df2 = iris_df.iloc[:, [2, 3, 4]].copy()
ds1 = Dataset("Iris_view1", df1, {"features": column_names[0:2]})
ds2 = Dataset("Iris_view2", df2, {"features": column_names[2:4]})
return phenonaut.Phenonaut([ds1, ds2], name="Iris dataset from scikit-learn")
def run_phenonaut_iris_classification(
output_path: str,
n_optuna_trials: int = 20,
run_predictors: bool = True,
optuna_merge_folds: bool = False,
):
"""Run sklearn Iris example, predicting iris class from 1 and 2 views.
Parameters
----------
output_path : Union[str, Path]
Output directory for the results of phenonaut.predict.profile
n_optuna_trials : int
Number of optuna_trials to run for each predictor and each view, by default 20
run_predictors : bool, optional
If True, then run all predictors, by default True
optuna_merge_folds : bool, optional
By default, every fold of the train-validation split is optimised by optuna for a predictor&view pair.
If optuna_merge_folds is true, then the average validation score of each fold is passed as the result
to optuna, optimising models across all folds. , by default False.
"""
phe = phenonaut_object_iris_2_views(
load_iris(as_frame=True).frame, keep_only_class_1_and_2=True
)
phenonaut.predict.profile(
phe,
output_path,
n_optuna_trials=n_optuna_trials,
optuna_merge_folds=optuna_merge_folds,
predictors=None if run_predictors else [],
)
def run_phenonaut_iris_regression(
output_path: str,
n_optuna_trials: int = 20,
run_predictors: bool = True,
optuna_merge_folds: bool = False,
):
"""Run sklearn Iris example, predicting sepal length from 1 and 2 views.
Parameters
----------
output_path : Union[str, Path]
Output directory for the results of phenonaut.predict.profile
n_optuna_trials : int
Number of optuna_trials to run for each predictor and each view, by default 20
run_predictors : bool, optional
If True, then run all predictors, by default True
optuna_merge_folds : bool, optional
By default, every fold of the train-validation split is optimised by optuna for a predictor&view pair.
If optuna_merge_folds is true, then the average validation score of each fold is passed as the result
to optuna, optimising models across all folds. , by default False.
"""
phe = phenonaut.Phenonaut(
load_iris(), dataframe_name="Iris, predict sepal length"
)
target_column_name = "sepal length (cm)"
y = phe[0].df[target_column_name]
phe[0].drop_columns([target_column_name, "target"])
phenonaut.predict.profile(
phe,
output_path,
n_optuna_trials=n_optuna_trials,
optuna_merge_folds=optuna_merge_folds,
predictors=None if run_predictors else [],
target=y,
)
def run_phenonaut_iris_multiregression(
output_path: str,
n_optuna_trials: int = 20,
run_predictors: bool = True,
optuna_merge_folds: bool = False,
):
"""Run sklearn Iris example, predicting sepal length from 1 and 2 views.
Parameters
----------
output_path : Union[str, Path]
Output directory for the results of phenonaut.predict.profile
n_optuna_trials : int
Number of optuna_trials to run for each predictor and each view, by default 20
run_predictors : bool, optional
If True, then run all predictors, by default True
optuna_merge_folds : bool, optional
By default, every fold of the train-validation split is optimised by optuna for a predictor&view pair.
If optuna_merge_folds is true, then the average validation score of each fold is passed as the result
to optuna, optimising models across all folds. , by default False.
"""
phe = phenonaut_object_iris_2_views(
load_iris(as_frame=True).frame, keep_only_class_1_and_2=True
)
phe[0].drop_columns("target")
phe[1].drop_columns("target")
phenonaut.predict.profile(
phe,
output_path,
n_optuna_trials=n_optuna_trials,
optuna_merge_folds=optuna_merge_folds,
predictors=None if run_predictors else [],
dataset_combinations=((0,),),
target=phe[1],
)
if __name__ == "__main__":
fire.Fire(
{
"c": run_phenonaut_iris_classification,
"classification": run_phenonaut_iris_classification,
"r": run_phenonaut_iris_regression,
"regression": run_phenonaut_iris_regression,
"mr": run_phenonaut_iris_multiregression,
"multiregression": run_phenonaut_iris_multiregression,
}
)