From 3b3d7cd4a3bae604769317ad17d5496ce924b1a4 Mon Sep 17 00:00:00 2001
From: Rafael-Silva-Oliveira <rafael.slo@outlook.com>
Date: Sun, 3 Mar 2024 20:17:38 +0100
Subject: [PATCH] Update cross-validation strategy logging and launch
 configuration

---
 .gitignore                           | 309 ++++++++++++++++++++
 .vscode/launch.json                  |  30 +-
 catboost_info/catboost_training.json | 133 +++++++++
 preprocessing/src/config.json        | 403 +++++++++++++++------------
 preprocessing/src/main.py            |  35 ++-
 preprocessing/src/preprocessing.py   |  78 +++++-
 preprocessing/src/train_model.py     | 289 ++++++++++++++++---
 preprocessing/src/utils.py           |   4 +-
 8 files changed, 1032 insertions(+), 249 deletions(-)
 create mode 100644 catboost_info/catboost_training.json

diff --git a/.gitignore b/.gitignore
index f0a8f2e..c5b302c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -780,3 +780,312 @@ preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_Rid
 preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_SGDClassifier.png
 preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_SVC.png
 preprocessing/reports/PipelineRun_2024_02_28-09_50_17_AM/Plots/TrainingCurve_XGBClassifier.png
+test_na.xlsx
+test2.xlsx
+test4.xlsx
+catboost_info/learn_error.tsv
+catboost_info/time_left.tsv
+catboost_info/learn/events.out.tfevents
+catboost_info/tmp/cat_feature_index.7bab4e20-bfdf59bc-3205bca-68b4cf9a.tmp
+catboost_info/tmp/cat_feature_index.b8bae20f-32489242-e592c2bc-899fda70.tmp
+catboost_info/tmp/cat_feature_index.b84c19d7-a5052183-80a8f1dd-1feab2c1.tmp
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_AdaBoostClassifier_2024_02_28_07_58_48_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_ComplementNB_2024_02_28_07_14_44_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_DecisionTreeClassifier_2024_02_28_07_15_42_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_HistGradientBoostingClassifier_2024_02_28_07_39_53_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_KNeighborsClassifier_2024_02_28_07_00_06_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_LogisticRegression_2024_02_28_06_58_06_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_MLPClassifier_2024_02_28_08_02_25_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_Perceptron_2024_02_28_06_58_30_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_RandomForestClassifier_2024_02_28_07_20_06_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_RidgeClassifier_2024_02_28_06_59_10_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_SGDClassifier_2024_02_28_06_58_51_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_SVC_2024_02_28_07_14_37_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Files/Test_classification_XGBClassifier_2024_02_28_08_10_40_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Model/Best Optimized Model/HistGradientBoostingClassifier.joblib
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/AUC.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/RankedModelsByMetric.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_SVC.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/Test_ConfMatrix_XGBClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_SVC.png
+preprocessing/reports/PipelineRun_2024_02_28-06_32_18_PM/Plots/TrainingCurve_XGBClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_AdaBoostClassifier_2024_02_29_09_20_45_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_ComplementNB_2024_02_29_05_49_24_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_DecisionTreeClassifier_2024_02_29_05_50_04_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_HistGradientBoostingClassifier_2024_02_29_08_45_11_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_KNeighborsClassifier_2024_02_29_05_40_40_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_LogisticRegression_2024_02_29_05_37_41_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_MLPClassifier_2024_02_29_10_26_59_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_Perceptron_2024_02_29_05_38_03_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_RandomForestClassifier_2024_02_29_05_53_06_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_RidgeClassifier_2024_02_29_05_38_46_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_SGDClassifier_2024_02_29_05_38_25_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_SVC_2024_02_29_05_49_20_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Files/Test_classification_XGBClassifier_2024_02_29_10_38_13_PM.xlsx
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Model/Best Optimized Model/XGBClassifier.joblib
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/AUC.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/RankedModelsByMetric.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_SVC.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/Test_ConfMatrix_XGBClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_SVC.png
+preprocessing/reports/PipelineRun_2024_02_29-05_22_36_PM/Plots/TrainingCurve_XGBClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Files/Test_classification_KNeighborsClassifier_2024_03_01_02_38_18_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Files/Test_classification_LogisticRegression_2024_03_01_02_36_27_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Files/Test_classification_Perceptron_2024_03_01_02_36_40_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Files/Test_classification_RidgeClassifier_2024_03_01_02_37_08_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Files/Test_classification_SGDClassifier_2024_03_01_02_36_56_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-02_31_34_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-02_39_33_PM/Files/Test_classification_LogisticRegression_2024_03_01_02_43_26_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-02_39_33_PM/Files/Test_classification_Perceptron_2024_03_01_02_43_39_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-02_39_33_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_01-02_39_33_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_01-02_39_33_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_01-02_39_33_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_ComplementNB_2024_03_01_06_51_58_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_DecisionTreeClassifier_2024_03_01_06_53_18_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_HistGradientBoostingClassifier_2024_03_01_07_35_19_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_KNeighborsClassifier_2024_03_01_06_48_22_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_LogisticRegression_2024_03_01_06_46_37_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_Perceptron_2024_03_01_06_46_50_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_RandomForestClassifier_2024_03_01_06_56_26_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_RidgeClassifier_2024_03_01_06_47_14_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_SGDClassifier_2024_03_01_06_47_03_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Files/Test_classification_SVC_2024_03_01_06_51_54_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/Test_ConfMatrix_SVC.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_01-06_42_24_PM/Plots/TrainingCurve_SVC.png
+preprocessing/reports/PipelineRun_2024_03_02-01_34_49_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-01_50_54_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Files/Test_classification_KNeighborsClassifier_2024_03_02_05_23_56_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Files/Test_classification_LogisticRegression_2024_03_02_05_23_25_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Files/Test_classification_Perceptron_2024_03_02_05_23_30_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Files/Test_classification_RidgeClassifier_2024_03_02_05_23_39_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Files/Test_classification_SGDClassifier_2024_03_02_05_23_35_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_19_58_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Files/Test_classification_KNeighborsClassifier_2024_03_02_05_36_24_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Files/Test_classification_LogisticRegression_2024_03_02_05_36_04_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Files/Test_classification_Perceptron_2024_03_02_05_36_08_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Files/Test_classification_RidgeClassifier_2024_03_02_05_36_16_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Files/Test_classification_SGDClassifier_2024_03_02_05_36_13_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_31_45_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_AdaBoostClassifier_2024_03_02_06_50_57_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_CatBoostClassifier_2024_03_03_11_47_24_AM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_ComplementNB_2024_03_02_05_45_10_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_DecisionTreeClassifier_2024_03_02_05_46_30_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_HistGradientBoostingClassifier_2024_03_02_06_11_01_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_KNeighborsClassifier_2024_03_02_05_41_51_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_LogisticRegression_2024_03_02_05_41_23_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_MLPClassifier_2024_03_02_07_00_31_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_Perceptron_2024_03_02_05_41_28_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_RandomForestClassifier_2024_03_02_05_50_23_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_RidgeClassifier_2024_03_02_05_41_40_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_SGDClassifier_2024_03_02_05_41_36_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_SVC_2024_03_02_05_45_08_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Files/Test_classification_XGBClassifier_2024_03_02_07_06_38_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_CatBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_SVC.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/Test_ConfMatrix_XGBClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_CatBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_SVC.png
+preprocessing/reports/PipelineRun_2024_03_02-05_37_58_PM/Plots/TrainingCurve_XGBClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-05_45_49_PM/Files/Test_classification_LogisticRegression_2024_03_03_06_06_37_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-05_45_49_PM/Model/Best Optimized Model/LogisticRegression.joblib
+preprocessing/reports/PipelineRun_2024_03_03-05_45_49_PM/Plots/AUC.png
+preprocessing/reports/PipelineRun_2024_03_03-05_45_49_PM/Plots/RankedModelsByMetric.png
+preprocessing/reports/PipelineRun_2024_03_03-05_45_49_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-05_45_49_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-06_08_29_PM/Files/Test_classification_LogisticRegression_2024_03_03_06_28_42_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-06_08_29_PM/Model/Best Optimized Model/LogisticRegression.joblib
+preprocessing/reports/PipelineRun_2024_03_03-06_08_29_PM/Plots/AUC.png
+preprocessing/reports/PipelineRun_2024_03_03-06_08_29_PM/Plots/RankedModelsByMetric.png
+preprocessing/reports/PipelineRun_2024_03_03-06_08_29_PM/Plots/SHAP_Waterfall_Plot.png
+preprocessing/reports/PipelineRun_2024_03_03-06_08_29_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-06_08_29_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-12_21_38_PM/Files/Test_classification_LGBMClassifier_2024_03_03_12_28_20_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_21_38_PM/Plots/Test_ConfMatrix_LGBMClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_21_38_PM/Plots/TrainingCurve_LGBMClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_32_02_PM/Files/Test_classification_LGBMClassifier_2024_03_03_12_36_04_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_32_02_PM/Plots/Test_ConfMatrix_LGBMClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_32_02_PM/Plots/TrainingCurve_LGBMClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_38_16_PM/Files/Test_classification_LGBMClassifier_2024_03_03_12_42_34_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_38_16_PM/Model/Best Optimized Model/LGBMClassifier.joblib
+preprocessing/reports/PipelineRun_2024_03_03-12_38_16_PM/Plots/AUC.png
+preprocessing/reports/PipelineRun_2024_03_03-12_38_16_PM/Plots/RankedModelsByMetric.png
+preprocessing/reports/PipelineRun_2024_03_03-12_38_16_PM/Plots/Test_ConfMatrix_LGBMClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_38_16_PM/Plots/TrainingCurve_LGBMClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_ComplementNB_2024_03_03_12_51_38_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_DecisionTreeClassifier_2024_03_03_12_52_40_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_KNeighborsClassifier_2024_03_03_12_47_26_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_LogisticRegression_2024_03_03_12_46_40_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_Perceptron_2024_03_03_12_46_45_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_RandomForestClassifier_2024_03_03_12_57_15_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_RidgeClassifier_2024_03_03_12_47_00_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_SGDClassifier_2024_03_03_12_46_56_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Files/Test_classification_SVC_2024_03_03_12_51_35_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/Test_ConfMatrix_SVC.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_43_22_PM/Plots/TrainingCurve_SVC.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_AdaBoostClassifier_2024_03_03_04_19_16_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_ComplementNB_2024_03_03_01_21_35_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_DecisionTreeClassifier_2024_03_03_01_36_16_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_HistGradientBoostingClassifier_2024_03_03_03_28_24_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_KNeighborsClassifier_2024_03_03_01_05_51_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_LogisticRegression_2024_03_03_01_03_42_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_MLPClassifier_2024_03_03_05_08_45_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_Perceptron_2024_03_03_01_03_56_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_RandomForestClassifier_2024_03_03_02_00_10_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_RidgeClassifier_2024_03_03_01_04_20_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_SGDClassifier_2024_03_03_01_04_11_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_SVC_2024_03_03_01_21_15_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Files/Test_classification_XGBClassifier_2024_03_03_05_34_05_PM.xlsx
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_SVC.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/Test_ConfMatrix_XGBClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_AdaBoostClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_ComplementNB.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_DecisionTreeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_HistGradientBoostingClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_KNeighborsClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_LogisticRegression.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_MLPClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_Perceptron.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_RandomForestClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_RidgeClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_SGDClassifier.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_SVC.png
+preprocessing/reports/PipelineRun_2024_03_03-12_59_52_PM/Plots/TrainingCurve_XGBClassifier.png
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 10daee9..07a5188 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,16 +1,16 @@
 {
-	// Use IntelliSense to learn about possible attributes.
-	// Hover to view descriptions of existing attributes.
-	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-	"version": "0.2.0",
-	"configurations": [
-		{
-			"name": "Python: Current File",
-			"type": "python",
-			"request": "launch",
-			"program": "${file}",
-			"console": "integratedTerminal",
-			"justMyCode": true
-		}
-	]
-}
\ No newline at end of file
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python: Current File",
+      "type": "python",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal",
+      "justMyCode": true
+    }
+  ]
+}
diff --git a/catboost_info/catboost_training.json b/catboost_info/catboost_training.json
new file mode 100644
index 0000000..0fc7b16
--- /dev/null
+++ b/catboost_info/catboost_training.json
@@ -0,0 +1,133 @@
+{
+"meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":1000,"learn_sets":["learn"],"name":"experiment"},
+"iterations":[
+{"learn":[0.6883371991],"iteration":0,"passed_time":0.5897550884,"remaining_time":589.1653333},
+{"learn":[0.6849897039],"iteration":1,"passed_time":1.187687649,"remaining_time":592.6561367},
+{"learn":[0.6813085268],"iteration":2,"passed_time":1.799819596,"remaining_time":598.1400458},
+{"learn":[0.6766436236],"iteration":3,"passed_time":2.394584261,"remaining_time":596.251481},
+{"learn":[0.6728130128],"iteration":4,"passed_time":3.000550225,"remaining_time":597.1094947},
+{"learn":[0.6690699493],"iteration":5,"passed_time":3.593251275,"remaining_time":595.2819611},
+{"learn":[0.6652317087],"iteration":6,"passed_time":4.177751719,"remaining_time":592.6439224},
+{"learn":[0.6619248218],"iteration":7,"passed_time":4.786182625,"remaining_time":593.4866456},
+{"learn":[0.6578150004],"iteration":8,"passed_time":5.394714473,"remaining_time":594.0180048},
+{"learn":[0.6539802907],"iteration":9,"passed_time":6.005211823,"remaining_time":594.5159705},
+{"learn":[0.6505922907],"iteration":10,"passed_time":6.618117736,"remaining_time":595.0289491},
+{"learn":[0.6473904205],"iteration":11,"passed_time":7.205823959,"remaining_time":593.2795059},
+{"learn":[0.644521791],"iteration":12,"passed_time":7.821837614,"remaining_time":593.8579788},
+{"learn":[0.6407377941],"iteration":13,"passed_time":8.418556906,"remaining_time":592.9069364},
+{"learn":[0.6369871366],"iteration":14,"passed_time":9.007965337,"remaining_time":591.5230571},
+{"learn":[0.633097111],"iteration":15,"passed_time":9.587356145,"remaining_time":589.6224029},
+{"learn":[0.6297596283],"iteration":16,"passed_time":10.20146429,"remaining_time":589.8846707},
+{"learn":[0.6264339319],"iteration":17,"passed_time":10.7849311,"remaining_time":588.3779077},
+{"learn":[0.622517565],"iteration":18,"passed_time":11.35832983,"remaining_time":586.4485036},
+{"learn":[0.6189781207],"iteration":19,"passed_time":11.96102383,"remaining_time":586.0901675},
+{"learn":[0.614891844],"iteration":20,"passed_time":12.55943334,"remaining_time":585.508821},
+{"learn":[0.6112319396],"iteration":21,"passed_time":13.12664415,"remaining_time":583.5389991},
+{"learn":[0.6078995127],"iteration":22,"passed_time":13.72043904,"remaining_time":582.8203888},
+{"learn":[0.6043469811],"iteration":23,"passed_time":14.30089813,"remaining_time":581.5698573},
+{"learn":[0.6009359144],"iteration":24,"passed_time":14.86045648,"remaining_time":579.5578027},
+{"learn":[0.597810164],"iteration":25,"passed_time":15.44499837,"remaining_time":578.5934006},
+{"learn":[0.5944324293],"iteration":26,"passed_time":16.03185389,"remaining_time":577.7405123},
+{"learn":[0.5906859395],"iteration":27,"passed_time":16.60986365,"remaining_time":576.5995524},
+{"learn":[0.5870805433],"iteration":28,"passed_time":17.18989798,"remaining_time":575.5652047},
+{"learn":[0.583553606],"iteration":29,"passed_time":17.7855844,"remaining_time":575.0672289},
+{"learn":[0.5804469559],"iteration":30,"passed_time":18.38185485,"remaining_time":574.5812048},
+{"learn":[0.5777068204],"iteration":31,"passed_time":18.99339344,"remaining_time":574.5501515},
+{"learn":[0.5750315315],"iteration":32,"passed_time":19.59031715,"remaining_time":574.0556571},
+{"learn":[0.5721755113],"iteration":33,"passed_time":20.16550665,"remaining_time":572.93763},
+{"learn":[0.5695312201],"iteration":34,"passed_time":20.75657917,"remaining_time":572.2885399},
+{"learn":[0.5666199482],"iteration":35,"passed_time":21.34408789,"remaining_time":571.5472424},
+{"learn":[0.5635877528],"iteration":36,"passed_time":21.92614231,"remaining_time":570.6722984},
+{"learn":[0.5612142249],"iteration":37,"passed_time":22.49868862,"remaining_time":569.5720646},
+{"learn":[0.5581252003],"iteration":38,"passed_time":23.08822871,"remaining_time":568.9176357},
+{"learn":[0.5546548047],"iteration":39,"passed_time":23.67937912,"remaining_time":568.3050989},
+{"learn":[0.5516636951],"iteration":40,"passed_time":24.29208519,"remaining_time":568.1977975},
+{"learn":[0.5490912517],"iteration":41,"passed_time":24.92510334,"remaining_time":568.5297381},
+{"learn":[0.546783947],"iteration":42,"passed_time":25.66325347,"remaining_time":571.1565947},
+{"learn":[0.5439302142],"iteration":43,"passed_time":26.28247857,"remaining_time":571.0465798},
+{"learn":[0.5411590415],"iteration":44,"passed_time":26.95499626,"remaining_time":572.0449206},
+{"learn":[0.5383552222],"iteration":45,"passed_time":27.63202213,"remaining_time":573.0641112},
+{"learn":[0.5349591336],"iteration":46,"passed_time":28.28663996,"remaining_time":573.5567635},
+{"learn":[0.5324778918],"iteration":47,"passed_time":28.89373871,"remaining_time":573.0591512},
+{"learn":[0.5298075007],"iteration":48,"passed_time":29.48112755,"remaining_time":572.1745368},
+{"learn":[0.5271983529],"iteration":49,"passed_time":30.07979139,"remaining_time":571.5160363},
+{"learn":[0.5245196434],"iteration":50,"passed_time":30.74697146,"remaining_time":572.1348219},
+{"learn":[0.522480269],"iteration":51,"passed_time":31.36928571,"remaining_time":571.8862087},
+{"learn":[0.5202081219],"iteration":52,"passed_time":31.99439527,"remaining_time":571.67344},
+{"learn":[0.5176574295],"iteration":53,"passed_time":32.59369419,"remaining_time":570.9932353},
+{"learn":[0.5149622626],"iteration":54,"passed_time":33.18225985,"remaining_time":570.1315556},
+{"learn":[0.5121084264],"iteration":55,"passed_time":33.79355552,"remaining_time":569.662793},
+{"learn":[0.5104709002],"iteration":56,"passed_time":34.40009825,"remaining_time":569.1103974},
+{"learn":[0.5082245484],"iteration":57,"passed_time":35.01813422,"remaining_time":568.7428007},
+{"learn":[0.5063720641],"iteration":58,"passed_time":35.63463605,"remaining_time":568.3422462},
+{"learn":[0.5034715164],"iteration":59,"passed_time":36.2459967,"remaining_time":567.8539483},
+{"learn":[0.5014089854],"iteration":60,"passed_time":36.84431034,"remaining_time":567.1607772},
+{"learn":[0.4990082327],"iteration":61,"passed_time":37.46327598,"remaining_time":566.7831107},
+{"learn":[0.4965486394],"iteration":62,"passed_time":38.06121818,"remaining_time":566.0851022},
+{"learn":[0.493733377],"iteration":63,"passed_time":38.68254915,"remaining_time":565.7322813},
+{"learn":[0.4913317912],"iteration":64,"passed_time":39.30046774,"remaining_time":565.3221128},
+{"learn":[0.4889768819],"iteration":65,"passed_time":39.88614378,"remaining_time":564.449368},
+{"learn":[0.4870395262],"iteration":66,"passed_time":40.54811603,"remaining_time":564.6476457},
+{"learn":[0.4853275731],"iteration":67,"passed_time":41.19816658,"remaining_time":564.6572243},
+{"learn":[0.4829123608],"iteration":68,"passed_time":41.81805237,"remaining_time":564.2406776},
+{"learn":[0.480085898],"iteration":69,"passed_time":42.43752536,"remaining_time":563.8128369},
+{"learn":[0.4773598818],"iteration":70,"passed_time":43.07911795,"remaining_time":563.6690222},
+{"learn":[0.474927284],"iteration":71,"passed_time":43.7036856,"remaining_time":563.2919477},
+{"learn":[0.4727803503],"iteration":72,"passed_time":44.31707578,"remaining_time":562.766154},
+{"learn":[0.470746707],"iteration":73,"passed_time":44.94146827,"remaining_time":562.3756705},
+{"learn":[0.469145809],"iteration":74,"passed_time":45.56078983,"remaining_time":561.9164078},
+{"learn":[0.4673539586],"iteration":75,"passed_time":46.19994484,"remaining_time":561.6940662},
+{"learn":[0.4653330282],"iteration":76,"passed_time":46.8279224,"remaining_time":561.3269139},
+{"learn":[0.4629332073],"iteration":77,"passed_time":47.45549751,"remaining_time":560.9483167},
+{"learn":[0.461455625],"iteration":78,"passed_time":48.08567549,"remaining_time":560.5937611},
+{"learn":[0.4589075141],"iteration":79,"passed_time":48.70650966,"remaining_time":560.1248611},
+{"learn":[0.4572562213],"iteration":80,"passed_time":49.34735463,"remaining_time":559.8792457},
+{"learn":[0.4550896079],"iteration":81,"passed_time":49.97140368,"remaining_time":559.4359582},
+{"learn":[0.4529972409],"iteration":82,"passed_time":50.59338196,"remaining_time":558.9654368},
+{"learn":[0.4508416438],"iteration":83,"passed_time":51.20695941,"remaining_time":558.3997002},
+{"learn":[0.4483993361],"iteration":84,"passed_time":51.87347723,"remaining_time":558.4027254},
+{"learn":[0.4465511643],"iteration":85,"passed_time":52.52178308,"remaining_time":558.1966249},
+{"learn":[0.4447563341],"iteration":86,"passed_time":53.2167855,"remaining_time":558.4704041},
+{"learn":[0.4426378815],"iteration":87,"passed_time":53.97495745,"remaining_time":559.3768317},
+{"learn":[0.4403107091],"iteration":88,"passed_time":54.68105686,"remaining_time":559.7128405},
+{"learn":[0.4386413034],"iteration":89,"passed_time":55.30474307,"remaining_time":559.1924022},
+{"learn":[0.436209122],"iteration":90,"passed_time":55.9357208,"remaining_time":558.7425297},
+{"learn":[0.4339061358],"iteration":91,"passed_time":56.52480875,"remaining_time":557.8752864},
+{"learn":[0.4323550755],"iteration":92,"passed_time":57.13760773,"remaining_time":557.2452711},
+{"learn":[0.4298656851],"iteration":93,"passed_time":57.77018037,"remaining_time":556.8062066},
+{"learn":[0.4277456214],"iteration":94,"passed_time":58.38802047,"remaining_time":556.2227214},
+{"learn":[0.4256634719],"iteration":95,"passed_time":59.01425543,"remaining_time":555.717572},
+{"learn":[0.4234715671],"iteration":96,"passed_time":59.60482696,"remaining_time":554.8779252},
+{"learn":[0.4214558458],"iteration":97,"passed_time":60.2197512,"remaining_time":554.267506},
+{"learn":[0.41920427],"iteration":98,"passed_time":60.8689845,"remaining_time":553.9692428},
+{"learn":[0.4173560088],"iteration":99,"passed_time":61.51418262,"remaining_time":553.6276436},
+{"learn":[0.415247437],"iteration":100,"passed_time":62.18599253,"remaining_time":553.5169038},
+{"learn":[0.4135330358],"iteration":101,"passed_time":62.82241164,"remaining_time":553.0835848},
+{"learn":[0.4116642926],"iteration":102,"passed_time":63.49063654,"remaining_time":552.9233105},
+{"learn":[0.4100296579],"iteration":103,"passed_time":64.10720745,"remaining_time":552.3082488},
+{"learn":[0.408585013],"iteration":104,"passed_time":64.71153045,"remaining_time":551.5887595},
+{"learn":[0.4069770162],"iteration":105,"passed_time":65.29487328,"remaining_time":550.6944973},
+{"learn":[0.405438881],"iteration":106,"passed_time":65.90525614,"remaining_time":550.0317171},
+{"learn":[0.4037825713],"iteration":107,"passed_time":66.51263033,"remaining_time":549.3450579},
+{"learn":[0.4023672899],"iteration":108,"passed_time":67.10965729,"remaining_time":548.5752719},
+{"learn":[0.400733606],"iteration":109,"passed_time":67.71025538,"remaining_time":547.8375208},
+{"learn":[0.3991515476],"iteration":110,"passed_time":68.35400905,"remaining_time":547.4478743},
+{"learn":[0.3973838822],"iteration":111,"passed_time":69.00053318,"remaining_time":547.0756559},
+{"learn":[0.3954570656],"iteration":112,"passed_time":69.61302302,"remaining_time":546.4314285},
+{"learn":[0.3935102679],"iteration":113,"passed_time":70.24954863,"remaining_time":545.9745622},
+{"learn":[0.3913648742],"iteration":114,"passed_time":70.88590953,"remaining_time":545.5133038},
+{"learn":[0.3890903913],"iteration":115,"passed_time":71.52397284,"remaining_time":545.0619999},
+{"learn":[0.3878660192],"iteration":116,"passed_time":72.18511074,"remaining_time":544.7816477},
+{"learn":[0.3859520437],"iteration":117,"passed_time":72.85663239,"remaining_time":544.5724556},
+{"learn":[0.3844859767],"iteration":118,"passed_time":73.51858511,"remaining_time":544.2846511},
+{"learn":[0.3823509789],"iteration":119,"passed_time":74.14153168,"remaining_time":543.7045656},
+{"learn":[0.3808779491],"iteration":120,"passed_time":74.75052938,"remaining_time":543.0224407},
+{"learn":[0.379343205],"iteration":121,"passed_time":75.34032947,"remaining_time":542.2033547},
+{"learn":[0.3778756983],"iteration":122,"passed_time":75.93713484,"remaining_time":541.4379452},
+{"learn":[0.3759205283],"iteration":123,"passed_time":76.50914277,"remaining_time":540.5000731},
+{"learn":[0.3743925077],"iteration":124,"passed_time":77.10508808,"remaining_time":539.7356166},
+{"learn":[0.3728343177],"iteration":125,"passed_time":77.70636566,"remaining_time":539.0108221},
+{"learn":[0.37147335],"iteration":126,"passed_time":78.33222552,"remaining_time":538.4569518},
+{"learn":[0.3696634524],"iteration":127,"passed_time":78.91101072,"remaining_time":537.5812605},
+{"learn":[0.3685633831],"iteration":128,"passed_time":79.49336659,"remaining_time":536.7342814},
+{
\ No newline at end of file
diff --git a/preprocessing/src/config.json b/preprocessing/src/config.json
index bd93bc1..4f3d8a8 100644
--- a/preprocessing/src/config.json
+++ b/preprocessing/src/config.json
@@ -1,6 +1,6 @@
 {
   "path_backbone": "C://Users//rafaelo//OneDrive - NTNU//Documents//Projects//preprocessing//preprocessing//preprocessing",
-  "data": "data//raw//METABRIC_RNA_Mutation.csv",
+  "data": "data//raw//full_METABRIC_RNA_Mutation.csv",
   "preprocessing": {
     "numerical_preprocessing": {
       "StandardScaler": {
@@ -535,7 +535,180 @@
         "transformed__tulp4",
         "transformed__ugt2b15",
         "transformed__ugt2b17",
-        "transformed__ugt2b7"
+        "transformed__ugt2b7",
+        "transformed__pik3ca_mut",
+        "transformed__tp53_mut",
+        "transformed__muc16_mut",
+        "transformed__ahnak2_mut",
+        "transformed__kmt2c_mut",
+        "transformed__syne1_mut",
+        "transformed__gata3_mut",
+        "transformed__map3k1_mut",
+        "transformed__ahnak_mut",
+        "transformed__dnah11_mut",
+        "transformed__cdh1_mut",
+        "transformed__dnah2_mut",
+        "transformed__kmt2d_mut",
+        "transformed__ush2a_mut",
+        "transformed__ryr2_mut",
+        "transformed__dnah5_mut",
+        "transformed__herc2_mut",
+        "transformed__pde4dip_mut",
+        "transformed__akap9_mut",
+        "transformed__tg_mut",
+        "transformed__birc6_mut",
+        "transformed__utrn_mut",
+        "transformed__tbx3_mut",
+        "transformed__col6a3_mut",
+        "transformed__arid1a_mut",
+        "transformed__lama2_mut",
+        "transformed__notch1_mut",
+        "transformed__cbfb_mut",
+        "transformed__ncor2_mut",
+        "transformed__col12a1_mut",
+        "transformed__col22a1_mut",
+        "transformed__pten_mut",
+        "transformed__akt1_mut",
+        "transformed__atr_mut",
+        "transformed__thada_mut",
+        "transformed__ncor1_mut",
+        "transformed__stab2_mut",
+        "transformed__myh9_mut",
+        "transformed__runx1_mut",
+        "transformed__nf1_mut",
+        "transformed__map2k4_mut",
+        "transformed__ros1_mut",
+        "transformed__lamb3_mut",
+        "transformed__arid1b_mut",
+        "transformed__erbb2_mut",
+        "transformed__sf3b1_mut",
+        "transformed__shank2_mut",
+        "transformed__ep300_mut",
+        "transformed__ptprd_mut",
+        "transformed__usp9x_mut",
+        "transformed__setd2_mut",
+        "transformed__setd1a_mut",
+        "transformed__thsd7a_mut",
+        "transformed__afdn_mut",
+        "transformed__erbb3_mut",
+        "transformed__rb1_mut",
+        "transformed__myo1a_mut",
+        "transformed__alk_mut",
+        "transformed__fanca_mut",
+        "transformed__adgra2_mut",
+        "transformed__ubr5_mut",
+        "transformed__pik3r1_mut",
+        "transformed__myo3a_mut",
+        "transformed__asxl2_mut",
+        "transformed__apc_mut",
+        "transformed__ctcf_mut",
+        "transformed__asxl1_mut",
+        "transformed__fancd2_mut",
+        "transformed__taf1_mut",
+        "transformed__kdm6a_mut",
+        "transformed__ctnna3_mut",
+        "transformed__brca1_mut",
+        "transformed__ptprm_mut",
+        "transformed__foxo3_mut",
+        "transformed__usp28_mut",
+        "transformed__gldc_mut",
+        "transformed__brca2_mut",
+        "transformed__cacna2d3_mut",
+        "transformed__arid2_mut",
+        "transformed__aff2_mut",
+        "transformed__lifr_mut",
+        "transformed__sbno1_mut",
+        "transformed__kdm3a_mut",
+        "transformed__ncoa3_mut",
+        "transformed__bap1_mut",
+        "transformed__l1cam_mut",
+        "transformed__pbrm1_mut",
+        "transformed__chd1_mut",
+        "transformed__jak1_mut",
+        "transformed__setdb1_mut",
+        "transformed__fam20c_mut",
+        "transformed__arid5b_mut",
+        "transformed__egfr_mut",
+        "transformed__map3k10_mut",
+        "transformed__smarcc2_mut",
+        "transformed__erbb4_mut",
+        "transformed__npnt_mut",
+        "transformed__nek1_mut",
+        "transformed__agmo_mut",
+        "transformed__zfp36l1_mut",
+        "transformed__smad4_mut",
+        "transformed__sik1_mut",
+        "transformed__casp8_mut",
+        "transformed__prkcq_mut",
+        "transformed__smarcc1_mut",
+        "transformed__palld_mut",
+        "transformed__dcaf4l2_mut",
+        "transformed__bcas3_mut",
+        "transformed__cdkn1b_mut",
+        "transformed__gps2_mut",
+        "transformed__men1_mut",
+        "transformed__stk11_mut",
+        "transformed__sik2_mut",
+        "transformed__ptpn22_mut",
+        "transformed__brip1_mut",
+        "transformed__flt3_mut",
+        "transformed__nrg3_mut",
+        "transformed__fbxw7_mut",
+        "transformed__ttyh1_mut",
+        "transformed__taf4b_mut",
+        "transformed__or6a2_mut",
+        "transformed__map3k13_mut",
+        "transformed__hdac9_mut",
+        "transformed__prkacg_mut",
+        "transformed__rpgr_mut",
+        "transformed__large1_mut",
+        "transformed__foxp1_mut",
+        "transformed__clk3_mut",
+        "transformed__prkcz_mut",
+        "transformed__lipi_mut",
+        "transformed__ppp2r2a_mut",
+        "transformed__prkce_mut",
+        "transformed__gh1_mut",
+        "transformed__gpr32_mut",
+        "transformed__kras_mut",
+        "transformed__nf2_mut",
+        "transformed__chek2_mut",
+        "transformed__ldlrap1_mut",
+        "transformed__clrn2_mut",
+        "transformed__acvrl1_mut",
+        "transformed__agtr2_mut",
+        "transformed__cdkn2a_mut",
+        "transformed__ctnna1_mut",
+        "transformed__magea8_mut",
+        "transformed__prr16_mut",
+        "transformed__dtwd2_mut",
+        "transformed__akt2_mut",
+        "transformed__braf_mut",
+        "transformed__foxo1_mut",
+        "transformed__nt5e_mut",
+        "transformed__ccnd3_mut",
+        "transformed__nr3c1_mut",
+        "transformed__prkg1_mut",
+        "transformed__tbl1xr1_mut",
+        "transformed__frmd3_mut",
+        "transformed__smad2_mut",
+        "transformed__sgcd_mut",
+        "transformed__spaca1_mut",
+        "transformed__rasgef1b_mut",
+        "transformed__hist1h2bc_mut",
+        "transformed__nr2f1_mut",
+        "transformed__klrg1_mut",
+        "transformed__mbl2_mut",
+        "transformed__mtap_mut",
+        "transformed__ppp2cb_mut",
+        "transformed__smarcd1_mut",
+        "transformed__nras_mut",
+        "transformed__ndfip1_mut",
+        "transformed__hras_mut",
+        "transformed__prps2_mut",
+        "transformed__smarcb1_mut",
+        "transformed__stmn2_mut",
+        "transformed__siah1_mut"
       ]
     },
     "feature_selection": {
@@ -557,7 +730,7 @@
       "RFECV": {
         "usage": true,
         "params": {
-          "min_features_to_select": 40,
+          "min_features_to_select": 50,
           "scoring": "accuracy",
           "step": 0.1,
           "cv": {
@@ -573,7 +746,6 @@
         }
       }
     },
-    "feature_engineering": {},
     "classification": {
       "usage": 1,
       "train_test_split": {
@@ -585,24 +757,28 @@
       "DummyClassifier": {
         "strategy": "most_frequent"
       },
+      "cv_settings": {
+        "n_iter": 1,
+        "scoring": ["accuracy"],
+        "cv": {
+          "StratifiedKFold": {
+            "n_splits": 10,
+            "shuffle": true,
+            "random_state": 0
+          }
+        },
+        "n_jobs": 2
+      },
+      "optimization": {
+        "optuna": { "usage": true },
+        "RandomizedSearchCV": { "usage": false }
+      },
       "models": {
         "linear_model": {
           "LogisticRegression": {
             "usage": 1,
             "run_feature_selection": true,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "penalty": ["l1", "l2"],
                 "C": "np.logspace(-3, 2, 6)",
@@ -612,21 +788,9 @@
             }
           },
           "Perceptron": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": true,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "penalty": ["l2", "l1", "elasticnet"],
                 "alpha": [0.0001, 0.001, 0.01],
@@ -639,21 +803,9 @@
             }
           },
           "SGDClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": true,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "loss": [
                   "hinge",
@@ -677,21 +829,9 @@
             }
           },
           "RidgeClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": true,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "alpha": [0.1, 1.0, 10.0],
                 "solver": [
@@ -710,21 +850,9 @@
         },
         "neighbors": {
           "KNeighborsClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "n_neighbors": [3, 5, 7, 9],
                 "weights": ["uniform", "distance"],
@@ -737,21 +865,9 @@
         },
         "svm": {
           "SVC": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "C": [0.1, 1, 10],
                 "kernel": ["linear", "rbf", "poly"],
@@ -764,21 +880,9 @@
         },
         "naive_bayes": {
           "ComplementNB": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "alpha": [
                   1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 0.001, 0.01, 0.1, 0.5, 1
@@ -789,21 +893,9 @@
         },
         "tree": {
           "DecisionTreeClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "criterion": ["gini", "entropy"],
                 "splitter": ["best", "random"],
@@ -816,21 +908,9 @@
         },
         "ensemble": {
           "RandomForestClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "n_estimators": [10, 50, 100, 200, 300],
                 "criterion": ["gini", "entropy"],
@@ -842,21 +922,9 @@
             }
           },
           "HistGradientBoostingClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "max_leaf_nodes": [15, 31, 63, 127],
                 "max_depth": [10, 20, 30],
@@ -866,21 +934,9 @@
             }
           },
           "AdaBoostClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "n_estimators": [10, 50, 100, 200, 300],
                 "algorithm": ["SAMME.R"],
@@ -892,21 +948,9 @@
         },
         "neural_network": {
           "MLPClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "activation": ["identity", "logistic", "tanh", "relu"],
                 "solver": ["lbfgs", "sgd", "adam"],
@@ -921,21 +965,9 @@
         },
         "XGBoost": {
           "XGBClassifier": {
-            "usage": 1,
+            "usage": 0,
             "run_feature_selection": false,
             "hyperparameters": {
-              "hyperparameter_settings": {
-                "n_iter": 3,
-                "scoring": ["accuracy"],
-                "cv": {
-                  "RepeatedStratifiedKFold": {
-                    "n_splits": 7,
-                    "n_repeats": 3,
-                    "random_state": 0
-                  }
-                },
-                "n_jobs": 2
-              },
               "param_distribution": {
                 "n_estimators": [10, 50, 100, 200, 300],
                 "max_depth": [3, 5, 7, 9, 12],
@@ -943,6 +975,33 @@
               }
             }
           }
+        },
+        "CatBoost": {
+          "CatBoostClassifier": {
+            "usage": 0,
+            "run_feature_selection": false,
+            "hyperparameters": {
+              "param_distribution": {
+                "depth": [4, 6, 8, 10],
+                "learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3],
+                "min_child_samples": [1, 5, 10, 20, 30]
+              }
+            }
+          }
+        },
+        "LightGBM": {
+          "LGBMClassifier": {
+            "usage": 0,
+            "run_feature_selection": false,
+            "hyperparameters": {
+              "param_distribution": {
+                "n_estimators": [10, 50, 100, 200, 300],
+                "max_depth": [3, 5, 7, 9, 12],
+                "learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3],
+                "num_leaves": [15, 31, 63, 127, 255]
+              }
+            }
+          }
         }
       }
     }
diff --git a/preprocessing/src/main.py b/preprocessing/src/main.py
index 4479e16..d5d905b 100644
--- a/preprocessing/src/main.py
+++ b/preprocessing/src/main.py
@@ -57,27 +57,35 @@
 
 class Orchestrator(object):
 
-    def __init__(self, config, path_backbone, data_dict, saving_path):
+    def __init__(self, config, path_backbone, data_dict, saving_path, encoder_dict):
         self.config = config
         self.path_backbone = path_backbone
         self.data_dict = data_dict
         self.saving_path = saving_path
+        self.encoder_dict = encoder_dict
 
     def run_PreProcessor(self) -> dict:
         logger.warning(
             "Note: It is important that numerical columns are pre-processed first, before categorical ones. This is to avoid OneHotEncoded columns (binary 0 and 1) to be seen as numerical during numerical pre-processing."
         )
         config = self.config["preprocessing"]
-        PreProcessorPipeline = PreProcessor(config=config)
+        PreProcessorPipeline = PreProcessor(
+            config=config, encoder_dict=self.encoder_dict
+        )
 
         data = self.data_dict["raw_data"].copy()
 
+        data = PreProcessorPipeline.NA_solver(data=data)
+
         # Start by pre-processing selected numerical columns
-        data = PreProcessorPipeline.encoders(data=data, dtype="numerical")
+        data, encoder_dict = PreProcessorPipeline.encoders(data=data, dtype="numerical")
 
         # Finally, process categorical columns
-        data = PreProcessorPipeline.encoders(data=data, dtype="categorical")
+        data, encoder_dict = PreProcessorPipeline.encoders(
+            data=data, dtype="categorical"
+        )
 
+        self.encoder_dict = encoder_dict
         self.data_dict.setdefault("preprocessed_data", data)
 
     def run_ModelTraining(self) -> dict:
@@ -100,15 +108,6 @@ def run_ModelTraining(self) -> dict:
 
         model_settings = config[modelling_problem_type]
 
-        # Load full dataset
-        # TODO: add NA_solver here
-        data.fillna(0, inplace=True)
-
-        # Drop all rows that contain NA
-        # logger.info(f"Data shape before dropping NAs {data.shape}")
-        # data.dropna(inplace=True)
-        # logger.info(f"Data shape after dropping NAs {data.shape}")
-
         if "all" in features:
             features = data.columns.tolist()
             features.remove(target)
@@ -131,6 +130,9 @@ def run_ModelTraining(self) -> dict:
         X_train, X_test, y_train, y_test = ModelTrainingPipeline.tabular_data_split(
             X, y, modelling_problem_type
         )
+        self.data_dict["original_X_test"] = X_test.copy()
+        self.data_dict["original_X_train"] = X_train.copy()
+
         # Loop through each model, optimize them, find the model that generalizes best and get the predictions from this best model.
         if modelling_problem_type == "classification":
             best_clf, cv_results_dict, param_distribution, label_encoder, X_test_new = (
@@ -147,6 +149,11 @@ def run_ModelTraining(self) -> dict:
             # Save the best model
             ModelTrainingPipeline.save_best_model(best_clf)
 
+            # Shap values and feature importance analysis
+            ModelTrainingPipeline.shap_analysis(
+                best_clf, X_test_new, self.data_dict["original_X_test"], y_test
+            )
+
         elif modelling_problem_type == "regression":
             baseline_model = ModelTrainingPipeline.find_best_reg(
                 model_settings, X_train, X_test, y_train, y_test, modelling_problem_type
@@ -190,11 +197,13 @@ def main(CONFIG_PATH: str):
     logger.info(f"Directory where outputs will be saved: {directory}")
 
     data_dict = {"raw_data": data}
+    encoder_dict = {"numerical_encoder": {}, "categorical_encoder": {}}
     ORCHESTRATOR = Orchestrator(
         config=config,
         path_backbone=path_backbone,
         data_dict=data_dict,
         saving_path=directory,
+        encoder_dict=encoder_dict,
     )
     ORCHESTRATOR.run_PreProcessor()
     ORCHESTRATOR.run_ModelTraining()
diff --git a/preprocessing/src/preprocessing.py b/preprocessing/src/preprocessing.py
index b0cff2c..689df9f 100644
--- a/preprocessing/src/preprocessing.py
+++ b/preprocessing/src/preprocessing.py
@@ -16,14 +16,22 @@
 from loguru import logger
 import pandas as pd
 import warnings
+from sklearn.linear_model import LinearRegression
+from typing import List
+import pandas as pd
+import numpy as np
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import OrdinalEncoder
+from sklearn.ensemble import HistGradientBoostingClassifier
 
 warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
 
 
 class PreProcessor:
 
-    def __init__(self, config: dict):
+    def __init__(self, config: dict, encoder_dict: dict):
         self.config = config
+        self.encoder_dict = encoder_dict
 
     def encoders(self, data: pd.DataFrame, dtype: str):
 
@@ -116,6 +124,7 @@ def encoders(self, data: pd.DataFrame, dtype: str):
                     remainder="drop",
                     verbose_feature_names_out=True,
                 )
+
                 # Create a new dataframe with just the new transformed columns
                 new_data = pd.DataFrame(
                     preprocessor.fit_transform(data_cp),
@@ -139,6 +148,69 @@ def encoders(self, data: pd.DataFrame, dtype: str):
                         ],
                         axis=1,
                     )
-        return data_cp
 
-    def NA_solver(self): ...
+            if dtype == "categorical":
+                self.encoder_dict["categorical_encoder"][
+                    preprocessor_name
+                ] = preprocessor
+            elif dtype == "numerical":
+                self.encoder_dict["numerical_encoder"][preprocessor_name] = preprocessor
+
+        return data_cp, self.encoder_dict
+
+    def NA_solver(self, data: pd.DataFrame):
+
+        logger.info(f"Running NaN solver")
+        from sklearn.ensemble import HistGradientBoostingRegressor
+        import re
+
+        # Create a copy of the original dataframe for regression
+        data_copy = data.copy()
+
+        # Convert object/string predictors in X to OrdinalEncoded integers
+        for column in data.columns:
+            if column != "mutation_count" and "_mut" in column:
+                data[column] = data_copy[column].apply(
+                    lambda x: 1 if isinstance(x, str) else x
+                )
+                data[column] = data[column].astype(object)
+            else:
+                encoder = OrdinalEncoder()
+                data[column] = encoder.fit_transform(
+                    data_copy[column].values.reshape(-1, 1)
+                )
+        for column in data.columns:
+            if data[column].dtype == "object":
+                # Fill NAs with majority voting
+                majority_vote = data[column].mode().iloc[0]
+                data[column].fillna(majority_vote, inplace=True)
+            elif data[column].dtype == "float" or data[column].dtype == "int":
+                # Fill NAs with regression imputation
+                missing_indices = data[column].isnull()
+                non_missing_indices = ~missing_indices
+
+                if missing_indices.sum() > 0:
+                    # Create a regression model
+                    model = HistGradientBoostingRegressor()
+
+                    # Prepare the input data for regression
+                    X = data[non_missing_indices].drop(column, axis=1)
+                    y = data[column][non_missing_indices]
+
+                    # Fit the model using non-missing values
+                    model.fit(X, y)
+
+                    # Predict the missing values
+                    X_missing = data[missing_indices].drop(column, axis=1)
+                    predicted_values = model.predict(X_missing)
+
+                    # Adjust the result if the column is count data
+                    if not data[column].apply(float.is_integer).all():
+                        predicted_values = np.round(predicted_values)
+
+                    # Fill the missing values with the predicted values
+                    data[column][missing_indices] = predicted_values
+            else:
+                # Skip invalid column type
+                continue
+        return data
diff --git a/preprocessing/src/train_model.py b/preprocessing/src/train_model.py
index 02ebf6e..bb09ba7 100644
--- a/preprocessing/src/train_model.py
+++ b/preprocessing/src/train_model.py
@@ -26,6 +26,9 @@
 )
 from sklearn.neighbors import KNeighborsClassifier
 import xgboost
+import catboost
+import lightgbm
+import optuna
 
 # from xgboost import XGBClassifier
 from sklearn.neural_network import MLPClassifier
@@ -45,6 +48,10 @@
     HistGradientBoostingRegressor,
     AdaBoostClassifier,
 )
+
+from catboost import CatBoostClassifier
+from lightgbm import LGBMClassifier
+
 from sklearn.model_selection import train_test_split, RandomizedSearchCV
 from sklearn.feature_selection import (
     SequentialFeatureSelector,
@@ -196,6 +203,29 @@ def train_and_optimize_clf(
         y_train = y_train.T.to_numpy()[0]
         y_test = y_test.T.to_numpy()[0]
 
+        # Gather CV settings
+        cv_settings = model_settings["cv_settings"]
+        eval_score = cv_settings["scoring"][0]
+
+        # Load cross validation strategy dynamically
+        cv = _return_cross_validation(cv_settings)
+
+        # Get optimization type
+        optimization = model_settings["optimization"]
+
+        optimization_type_list = [
+            key
+            for key, value in model_settings["optimization"].items()
+            if value["usage"]
+        ]
+
+        if len(optimization_type_list) > 1:
+            raise ValueError(
+                f"Only one optimization type can be used. You currently have {len(optimization_type_list)} types selected. Please, adjust the settings."
+            )
+
+        optimization_type = optimization_type_list[0]
+
         for model_type, model_config in models.items():
             for model_name, model_utils in model_config.items():
                 if model_utils["usage"] == 1:
@@ -207,6 +237,10 @@ def train_and_optimize_clf(
 
                     if "XGB" in model_name:
                         model = eval(f"xgboost.{model_name}()")
+                    elif "CatBoost" in model_name:
+                        model = eval(f"catboost.{model_name}()")
+                    elif "LGBMClassifier" in model_name:
+                        model = eval(f"lightgbm.{model_name}()")
                     else:
                         model = getattr(
                             importlib.import_module(f"sklearn.{model_type}"), model_name
@@ -244,12 +278,6 @@ def train_and_optimize_clf(
 
                     pipeline = make_pipeline(model)
 
-                    hyperparameter_settings = model_utils["hyperparameters"][
-                        "hyperparameter_settings"
-                    ]
-
-                    logger.info(f"Running CV for {model_name}.")
-
                     # Transform strings in the param_distribution to actual ranges
                     param_distribution_final = {
                         key: (
@@ -264,45 +292,64 @@ def train_and_optimize_clf(
 
                     # Optimizing the best baseline model on training data
                     logger.info(
-                        f"Optimizing baseline model using RandomizedSearchCV. \n Baseline model is {model_name} and it will be optimized with the following parameters: \n{param_distribution_final}"
+                        f"Optimizing baseline model using '{optimization_type}'. \n Baseline model is {model_name} and it will be optimized with the following parameters: \n{param_distribution_final}"
                     )
 
-                    # Load cross validation strategy dynamically
-                    cv = _return_cross_validation(hyperparameter_settings)
-
-                    # Optimize hyperparameters
-                    randomized_search = RandomizedSearchCV(
-                        model,  # Choose just the model and not the imputer
-                        param_distributions=param_distribution_final,
-                        n_iter=hyperparameter_settings["n_iter"],
-                        scoring=hyperparameter_settings["scoring"],
-                        cv=cv,
-                        random_state=0,
-                        n_jobs=hyperparameter_settings["n_jobs"],
-                        verbose=3,
-                        refit=hyperparameter_settings["scoring"][0],
-                        return_train_score=True,
-                    ).fit(X_train_cp, y_train_cp)
+                    if optimization_type == "optuna":
+                        study = self.optimize_clf(
+                            param_distribution_final,
+                            model,
+                            X_train_cp,
+                            y_train_cp,
+                            cv,
+                            cv_settings,
+                        )
+
+                        best_trial = study.best_trial
+                        best_params = best_trial.params
+                        best_cv_sore = best_trial.value
+
+                        print(f"Best hyperparameters for {model_name}: {best_params}")
+                        print(f"Best score: {best_cv_sore}")
+
+                        optimized_clf = model.set_params(**best_params)
+
+                        optimized_clf.fit(X_train_cp, y_train_cp)
+
+                    elif optimization_type == "RandomizedSearchCV":
+                        # Optimize hyperparameters
+                        optimized_clf = RandomizedSearchCV(
+                            model,  # Choose just the model and not the imputer
+                            param_distributions=param_distribution_final,
+                            n_iter=cv_settings["n_iter"],
+                            scoring=cv_settings["scoring"],
+                            cv=cv,
+                            random_state=0,
+                            n_jobs=cv_settings["n_jobs"],
+                            verbose=3,
+                            refit=cv_settings["scoring"][0],
+                            return_train_score=True,
+                        ).fit(X_train_cp, y_train_cp)
 
                     # Apply cross validation to get the best model
-                    if hasattr(randomized_search, "best_estimator_"):
+                    if hasattr(optimized_clf, "best_estimator_"):
                         cv_results = cross_validate(
-                            randomized_search.best_estimator_,
+                            optimized_clf.best_estimator_,
                             X_train_cp,
                             y_train_cp,
                             cv=cv,
-                            scoring=hyperparameter_settings["scoring"],
+                            scoring=cv_settings["scoring"],
                             return_estimator=True,
                             return_train_score=True,
                             error_score="raise",
                         )
                     else:
                         cv_results = cross_validate(
-                            randomized_search,
+                            optimized_clf,
                             X_train_cp,
                             y_train_cp,
                             cv=cv,
-                            scoring=hyperparameter_settings["scoring"],
+                            scoring=cv_settings["scoring"],
                             return_estimator=True,
                             return_train_score=True,
                             error_score="raise",
@@ -311,7 +358,6 @@ def train_and_optimize_clf(
                     cv_results_dict[model_name] = cv_results
                     # This "test_scoring_type" is actually the validation set. The actual test set will only be used to calculate the final classification report to avoid data leakage.Do note that if more scorings are added to the list, only the first one will be used to evaluate the best model score
 
-                    eval_score = hyperparameter_settings["scoring"][0]
                     cv_results_test = cv_results[f"test_{eval_score}"]
                     cv_results_train = cv_results[f"train_{eval_score}"]
                     current_model_score = cv_results_test.mean()
@@ -323,7 +369,7 @@ def train_and_optimize_clf(
 
                     # NOTE: it doesn't make sense to add the Confusion Matrix for the traning data since the model has already been trained and seen the training data. If we fit the model again on the training data, it will provide much better results in the confusion matrix than the results that were seen in the training and validation scores. Confusion Matrix is only used on the test data
                     self.plot_training_curves(
-                        randomized_search,
+                        optimized_clf,
                         eval_score=eval_score,
                         test_label_name="val",
                         model_name=model_name,
@@ -333,7 +379,7 @@ def train_and_optimize_clf(
 
                     # Use the optimized model to predict on the actual test set (unseen data)
                     test_results, pred_metrics_dict, y_pred_decoded = self.predict_clf(
-                        randomized_search,
+                        optimized_clf,
                         model_settings,
                         X_test_cp,
                         y_test,
@@ -354,7 +400,7 @@ def train_and_optimize_clf(
                     ][eval_score]
 
                     logger.info(
-                        f"\n\n- Score being used to attain the best model is '{eval_score}'.\n\n- The scores for the optimized version with RandomGridSearchCV of the '{model_name}' model are:\n\n- Validation score:\n"
+                        f"\n\n- Score being used to attain the best model is '{eval_score}'.\n\n- The scores for the optimized version with '{optimization_type}' of the '{model_name}' model are:\n\n- Validation score:\n"
                         f"{cv_results_test.mean():.3f} ± {cv_results_test.std():.3f}.\n\n- Training score:\n"
                         f"{cv_results_train.mean():.3f} ± {cv_results_train.std():.3f}. \n\n- Test score:\n{test_model_score:.3f}"
                     )
@@ -362,7 +408,7 @@ def train_and_optimize_clf(
                     # Check if test score is better than the previous best score. If so, save the current parameters and results as well as the best classifier (best classifier = the classifier that performed best on the unseen test data)
                     if test_model_score > best_score:
                         best_score = test_model_score
-                        best_baseline_model = randomized_search
+                        best_baseline_model = optimized_clf
                         param_distribution = model_utils["hyperparameters"][
                             "param_distribution"
                         ]
@@ -370,9 +416,14 @@ def train_and_optimize_clf(
                         best_model_test_results = pred_metrics_dict
                         best_model_X_test = X_test_cp
 
-        logger.info(
-            f"Best model is: {best_baseline_model.best_estimator_.__class__.__name__} and it has the following optimized parameters:\n\n {best_baseline_model.best_params_}\n\n... with a test '{eval_score}' of {best_score:.3f}"
-        )
+        if hasattr(best_baseline_model, "best_estimator_"):
+            logger.info(
+                f"Best model is: {best_baseline_model.best_estimator_.__class__.__name__} and it has the following optimized parameters:\n\n {best_baseline_model.best_params_}\n\n... with a test '{eval_score}' of {best_score:.3f}"
+            )
+        else:
+            logger.info(
+                f"Best model is: {best_baseline_model.__class__.__name__} and it has the following optimized parameters:\n\n {best_baseline_model.get_params}\n\n... with a test '{eval_score}' of {best_score:.3f}"
+            )
 
         # Plot the training metrics
         self.plot_CV_results(
@@ -398,6 +449,63 @@ def train_and_optimize_clf(
             best_model_X_test,
         )
 
+    # Define the objective function
+
+    def optimize_clf(
+        self,
+        param_distribution,
+        model,
+        X_train_cp,
+        y_train_cp,
+        cv,
+        cv_settings,
+    ):
+        from sklearn.model_selection import cross_val_score
+
+        def objective(
+            trial,
+            param_distribution,
+            model,
+            X_train_cp,
+            y_train_cp,
+            cv,
+            cv_settings,
+        ):
+            from sklearn.metrics import accuracy_score
+
+            params = {
+                k: trial.suggest_categorical(k, v)
+                for k, v in param_distribution.items()
+            }
+            model.set_params(**params)
+            return cross_val_score(
+                model,
+                X_train_cp,
+                y_train_cp,
+                cv=cv,
+                scoring=cv_settings["scoring"][0],
+            ).mean()
+
+            return cross_val_score
+
+        # Create a study object and optimize it
+        study = optuna.create_study(direction="maximize")
+
+        study.optimize(
+            lambda trial: objective(
+                trial=trial,
+                param_distribution=param_distribution,
+                model=model,
+                X_train_cp=X_train_cp,
+                y_train_cp=y_train_cp,
+                cv=cv,
+                cv_settings=cv_settings,
+            ),
+            n_trials=25,
+        )
+
+        return study
+
     def predict_clf(
         self,
         best_baseline_model,
@@ -410,7 +518,10 @@ def predict_clf(
     ):
 
         # Get information for the best optimized model
-        best_model = best_baseline_model.best_estimator_
+        if hasattr(best_baseline_model, "best_estimator_"):
+            best_model = best_baseline_model.best_estimator_
+        else:
+            best_model = best_baseline_model
         best_model_name = best_model.__class__.__name__
 
         # Get test score (no need for this as its already in the classifiction report down below)
@@ -476,7 +587,6 @@ def save_best_model(self, best_model):
             f"{self.saving_path}/Model/Best Optimized Model/{best_model_name}.joblib",
         )
 
-    def shapley_values(self): ...
     def plot_conf_matrix(self, clf, X, y, model_name, prefix):
         from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 
@@ -503,9 +613,12 @@ def plot_training_curves(
         cv_results,
         use_from_cross_val=False,
     ):
-
-        val_scores = clf.cv_results_[f"mean_test_{eval_score}"]
-        train_scores = clf.cv_results_[f"mean_train_{eval_score}"]
+        if hasattr(clf, "cv_results_"):
+            val_scores = clf.cv_results_[f"mean_test_{eval_score}"]
+            train_scores = clf.cv_results_[f"mean_train_{eval_score}"]
+        else:
+            val_scores = cv_results[f"test_{eval_score}"]
+            train_scores = cv_results[f"train_{eval_score}"]
 
         plt.plot(val_scores, label=test_label_name)
         plt.plot(train_scores, label="train")
@@ -553,10 +666,12 @@ def feature_selection(self, selector_type, model, config, X_train, y_train):
                 importance_getter=local_config["importance_getter"],
             )
 
-            selector_fitted = selector.fit(X_train, y_train)
-            selected_features = [
-                col for col in selector_fitted.get_feature_names_out(X_train.columns)
-            ]
+        selector_fitted = selector.fit(X_train, y_train)
+        selected_features = (
+            selector_fitted.get_feature_names_out(data.columns)
+            if selector_type == "SequentialFeatureSelector"
+            else selector_fitted.get_feature_names_out(X_train.columns)
+        )
 
         return selector_fitted, selected_features
 
@@ -697,3 +812,87 @@ def plot_clfs(
         with plt.rc_context():  # Use this to set figure params like size and dpi
             plt.savefig(f"{saving_path}\\Plots\\AUC.png", bbox_inches="tight")
         # plt.close()
+
+    def shap_analysis(self, model, X_test, X_test_original, y_test):
+        import shap
+
+        shap.initjs()
+
+        # Check the type of the model and return the appropriate explainer
+        X_test_original_cp = X_test_original.copy()
+
+        for col_type, preprocessor_dict in self.encoder_dict.items():
+            if col_type == "categorical_encoder":
+                for col, encoder in preprocessor_dict.items():
+                    X_test_original_cp = encoder.inverse_transform(X_test_original_cp)
+            if col_type == "numerical_encoder":
+                for col, encoder in preprocessor_dict.items():
+                    X_test_original_cp = encoder.inverse_transform(X_test_original_cp)
+        if isinstance(
+            model, (LogisticRegression, Perceptron, SGDClassifier, RidgeClassifierCV)
+        ):
+            explainer = shap.LinearExplainer(model, X_test_original_cp)
+        elif isinstance(
+            model,
+            (
+                RandomForestClassifier,
+                DecisionTreeClassifier,
+                HistGradientBoostingClassifier,
+                AdaBoostClassifier,
+                XGBClassifier,
+                CatBoostClassifier,
+                LGBMClassifier,
+            ),
+        ):
+            explainer = shap.TreeExplainer(model)
+        elif isinstance(
+            model, (KNeighborsClassifier, SVC, ComplementNB, MLPClassifier)
+        ):
+            explainer = shap.KernelExplainer(model.predict_proba, X_test_original_cp)
+        else:
+            raise ValueError(f"Unsupported model: {type(model)}")
+
+        explainer_shap = explainer(X_test_original_cp)
+        shap_values = explainer.shap_values(X_test_original_cp)
+        shap.plots.bar(explainer_shap)
+        with plt.rc_context():  # Use this to set figure params like size and dpi
+            plt.savefig(
+                f"{self.saving_path}\\Plots\\SHAP_Bar_Plot.png", bbox_inches="tight"
+            )
+        plt.close()
+
+        shap.summary_plot(explainer_shap, X_test_original_cp)
+        with plt.rc_context():  # Use this to set figure params like size and dpi
+            plt.savefig(
+                f"{self.saving_path}\\Plots\\SHAP_Summary_Plot.png", bbox_inches="tight"
+            )
+            plt.close()
+
+        # Create a SHAP decision plot for the first instance
+        shap.decision_plot(
+            explainer.expected_value, shap_values[0, :], X_test_original_cp.iloc[0, :]
+        )
+        with plt.rc_context():
+            # Save the plots to the reports folder
+            plt.savefig(
+                f"{self.saving_path}\\Plots\\SHAP_Decision_Plot.png",
+                bbox_inches="tight",
+            )
+            plt.close()
+
+        # add a force plot
+        shap.force_plot(
+            explainer.expected_value,
+            shap_values[0, :],
+            X_test_original_cp.iloc[0, :],
+            matplotlib=True,
+        )
+        # save the force plot with rc
+        with plt.rc_context():
+            plt.savefig(
+                f"{self.saving_path}\\Plots\\SHAP_Force_Plot.png",
+                bbox_inches="tight",
+            )
+            plt.close()
+
+        return shap_values, explainer
diff --git a/preprocessing/src/utils.py b/preprocessing/src/utils.py
index 748fbac..c90d4e5 100644
--- a/preprocessing/src/utils.py
+++ b/preprocessing/src/utils.py
@@ -23,7 +23,9 @@ def _return_cross_validation(local_config: dict):
     cv_class_name = str(list(local_config["cv"].keys())[0])
     cv_params = local_config["cv"][cv_class_name]
 
-    logger.info(f"Loading cross-validation strategy: {cv_class_name}. ")
+    logger.info(
+        f"\n\nLoading cross-validation strategy '{cv_class_name}' with the following parameters:\n{cv_params}. "
+    )
     cv = getattr(
         importlib.import_module(f"sklearn.model_selection"),
         str(list(local_config["cv"].keys())[0]),