Skip to content

Commit

Permalink
final changes before other feats
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafael-Silva-Oliveira committed Feb 28, 2024
1 parent c49fdc4 commit 606df00
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 14 deletions.
131 changes: 131 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -649,3 +649,134 @@ preprocessing/reports/PipelineRun_2024_02_27-09_09_11_PM/Plots/TrainingCurve_Per
preprocessing/reports/PipelineRun_2024_02_27-09_09_11_PM/Plots/TrainingCurve_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_27-09_09_11_PM/Plots/TrainingCurve_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_27-09_09_11_PM/Plots/TrainingCurve_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_AdaBoostClassifier_2024_02_28_09_04_19_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_ComplementNB_2024_02_28_08_39_53_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_DecisionTreeClassifier_2024_02_28_08_40_44_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_HistGradientBoostingClassifier_2024_02_28_08_52_18_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_KNeighborsClassifier_2024_02_28_08_38_03_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_LogisticRegression_2024_02_28_08_36_45_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_MLPClassifier_2024_02_28_09_07_25_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_Perceptron_2024_02_28_08_36_57_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_RandomForestClassifier_2024_02_28_08_42_57_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_RidgeClassifier_2024_02_28_08_37_20_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_SGDClassifier_2024_02_28_08_37_10_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_SVC_2024_02_28_08_39_51_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Files/Test_classification_XGBClassifier_2024_02_28_09_13_40_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/RankedModelsByMetric.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_AdaBoostClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_HistGradientBoostingClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_MLPClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Test_ConfMatrix_XGBClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_AdaBoostClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_HistGradientBoostingClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_MLPClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/Training_ConfMatrix_XGBClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_AdaBoostClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_HistGradientBoostingClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_MLPClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-08_36_25_AM/Plots/TrainingCurve_XGBClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_ComplementNB_2024_02_28_09_20_08_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_DecisionTreeClassifier_2024_02_28_09_21_14_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_KNeighborsClassifier_2024_02_28_09_16_42_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_LogisticRegression_2024_02_28_09_15_08_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_Perceptron_2024_02_28_09_15_23_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_RandomForestClassifier_2024_02_28_09_26_13_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_RidgeClassifier_2024_02_28_09_15_54_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_SGDClassifier_2024_02_28_09_15_39_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Files/Test_classification_SVC_2024_02_28_09_20_02_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Test_ConfMatrix_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/Training_ConfMatrix_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_14_44_AM/Plots/TrainingCurve_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_AdaBoostClassifier_2024_02_28_10_49_06_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_ComplementNB_2024_02_28_09_58_49_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_DecisionTreeClassifier_2024_02_28_09_59_55_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_HistGradientBoostingClassifier_2024_02_28_10_13_04_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_KNeighborsClassifier_2024_02_28_09_54_06_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_LogisticRegression_2024_02_28_09_51_06_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_MLPClassifier_2024_02_28_11_15_47_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_Perceptron_2024_02_28_09_51_33_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_RandomForestClassifier_2024_02_28_10_03_27_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_RidgeClassifier_2024_02_28_09_52_23_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_SGDClassifier_2024_02_28_09_52_00_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_SVC_2024_02_28_09_58_46_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Files/Test_classification_XGBClassifier_2024_02_28_11_22_40_AM.xlsx
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Model/Best Optimized Model/HistGradientBoostingClassifier.joblib
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/AUC.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/RankedModelsByMetric.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_AdaBoostClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_HistGradientBoostingClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_MLPClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/Test_ConfMatrix_XGBClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_AdaBoostClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_ComplementNB.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_DecisionTreeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_HistGradientBoostingClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_KNeighborsClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_LogisticRegression.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_MLPClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_Perceptron.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_RandomForestClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_RidgeClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_SGDClassifier.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_SVC.png
preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_XGBClassifier.png
7 changes: 3 additions & 4 deletions preprocessing/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def run_ModelTraining(self) -> dict:
model_settings = config[modelling_problem_type]

# Load full dataset

# TODO: add NA_solver here
data.fillna(0, inplace=True)

# Drop all rows that contain NA
Expand Down Expand Up @@ -131,7 +131,7 @@ def run_ModelTraining(self) -> dict:
X_train, X_test, y_train, y_test = ModelTrainingPipeline.tabular_data_split(
X, y, modelling_problem_type
)
# Get best baseline model and optimize it
# Loop through each model, optimize them, find the model that generalizes best and get the predictions from this best model.
if modelling_problem_type == "classification":
best_clf, cv_results_dict, param_distribution, label_encoder, X_test_new = (
ModelTrainingPipeline.train_and_optimize_clf(
Expand All @@ -144,15 +144,14 @@ def run_ModelTraining(self) -> dict:
label_encoder,
)
)
# Save the best model
ModelTrainingPipeline.save_best_model(best_clf)

elif modelling_problem_type == "regression":
baseline_model = ModelTrainingPipeline.find_best_reg(
model_settings, X_train, X_test, y_train, y_test, modelling_problem_type
)

# Save best optimized model


def main(CONFIG_PATH: str):
config = json.load(open(CONFIG_PATH))
Expand Down
16 changes: 6 additions & 10 deletions preprocessing/src/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,14 +321,7 @@ def train_and_optimize_clf(
"std": cv_results_test.std(),
}

# Plot CM
self.plot_conf_matrix(
randomized_search,
X_train_cp,
y_train_cp,
model_name=model_name,
prefix="Training",
)
# NOTE: it doesn't make sense to add the Confusion Matrix for the traning data since the model has already been trained and seen the training data. If we fit the model again on the training data, it will provide much better results in the confusion matrix than the results that were seen in the training and validation scores. Confusion Matrix is only used on the test data
self.plot_training_curves(
randomized_search,
eval_score=eval_score,
Expand Down Expand Up @@ -471,7 +464,8 @@ def predict_clf(

def save_best_model(self, best_model):

if hasattr(clf, "best_estimator_"):
logger.info(f"Saving the best model:\n\n{best_model}")
if hasattr(best_model, "best_estimator_"):
model = best_model.best_estimator_
else:
model = best_model
Expand Down Expand Up @@ -613,7 +607,9 @@ def plot_CV_results(
gap = 0.015 # Space between the text and the end of the bar
# You have to call ax.text() for each bar
# They are already sorted and you need the index of the bar
for i, (v, s) in enumerate(zip(df_sorted[f"test_{eval_score}"], df_sorted.Std)):
for i, (v, s) in enumerate(
zip(df_sorted[f"training_{eval_score}"], df_sorted.Std)
):
ax.text(
v + s + gap, i, f"{v} ± {s}", color="blue"
) # Place the text at x=v+gap and y= idx
Expand Down

0 comments on commit 606df00

Please sign in to comment.