From a243669f85a9c009ffd6c8ff98335cc609c03afa Mon Sep 17 00:00:00 2001 From: shravya312 Date: Sun, 20 Oct 2024 20:45:40 +0530 Subject: [PATCH 1/2] Added Exception Handling in all the places where there is a possibility of error --- explainableai/feature_engineering.py | 91 +++++++++++++++++++--------- 1 file changed, 64 insertions(+), 27 deletions(-) diff --git a/explainableai/feature_engineering.py b/explainableai/feature_engineering.py index 78e2910..6957987 100644 --- a/explainableai/feature_engineering.py +++ b/explainableai/feature_engineering.py @@ -1,34 +1,71 @@ -from sklearn.preprocessing import PolynomialFeatures +# feature_interaction.py +import itertools import numpy as np -import pandas as pd +import matplotlib.pyplot as plt +from sklearn.inspection import partial_dependence +import time import logging -logger=logging.getLogger(__name__) +logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) -def automated_feature_engineering(X_train, X_test=None): - # Convert categorical variables to one-hot encoding - logger.debug("Convert categorical variables to one-hot encoding...") +def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interactions=10): + logger.debug("Starting feature interaction analysis...") try: - X_train_encoded = pd.get_dummies(X_train, drop_first=True) - - if X_test is not None: - X_test_encoded = pd.get_dummies(X_test, drop_first=True) - # Ensure X_test has the same columns as X_train - logger.debug("Ensuring test data has the same columns as training data...") - for col in X_train_encoded.columns: - if col not in X_test_encoded.columns: - X_test_encoded[col] = 0 - X_test_encoded = X_test_encoded[X_train_encoded.columns] - - - feature_names = X_train_encoded.columns.tolist() - - if X_test is not None: - logger.info("Data Converted...") - return X_train_encoded.values, X_test_encoded.values, feature_names - - logger.info("Data Converted...") - return X_train_encoded.values, feature_names + # Ensure model has feature_importances_ + if not hasattr(model, 'feature_importances_'): + raise AttributeError("Model does not have 'feature_importances_' attribute.") + + # Calculate and sort feature importances + feature_importance = dict(zip(feature_names, model.feature_importances_)) + top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:top_n] + top_feature_names = [f[0] for f in top_features] + + interactions = [] + for i, (f1, f2) in enumerate(itertools.combinations(top_feature_names, 2)): + if i >= max_interactions: + logger.info(f"Reached maximum number of interactions ({max_interactions}). Stopping analysis.") + break + + logger.info(f"Analyzing interaction between {f1} and {f2}...") + start_time = time.time() + + try: + f1_idx = feature_names.index(f1) + f2_idx = feature_names.index(f2) + except ValueError as ve: + logger.error(f"Feature {f1} or {f2} not found in feature_names: {ve}") + continue + + try: + pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average") + except Exception as pd_error: + logger.error(f"Partial dependence computation failed for {f1} and {f2}: {pd_error}") + continue + + interactions.append((f1, f2, pd_result)) + logger.info(f"Interaction analysis for {f1} and {f2} completed in {time.time() - start_time:.2f} seconds.") + + for i, (f1, f2, (pd_values, (ax1_values, ax2_values))) in enumerate(interactions): + try: + logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...") + fig, ax = plt.subplots(figsize=(10, 6)) + XX, YY = np.meshgrid(ax1_values, ax2_values) + Z = pd_values.reshape(XX.shape).T + contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5) + ax.set_xlabel(f1) + ax.set_ylabel(f2) + ax.set_title(f'Partial Dependence of {f1} and {f2}') + plt.colorbar(contour) + plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png') + plt.close() + except Exception as plot_error: + logger.error(f"Failed to plot interaction for {f1} and {f2}: {plot_error}") + + logger.info("Feature interaction analysis completed.") + return interactions + + except AttributeError as attr_err: + logger.error(f"Model does not support feature importance or other attribute issue: {attr_err}") except Exception as e: - logger.error(f"Error occurred during automated feature engineering...{str(e)}") \ No newline at end of file + logger.error(f"An unexpected error occurred: {e}") From 1fe01338a55271edcd286f0bd0beabb1a8eefbe8 Mon Sep 17 00:00:00 2001 From: shravya312 Date: Sun, 20 Oct 2024 20:47:55 +0530 Subject: [PATCH 2/2] Added Exception Handling in all the places where there is a possibility of error --- explainableai/feature_interaction.py | 57 +++++++++++++++++++--------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/explainableai/feature_interaction.py b/explainableai/feature_interaction.py index 0830a39..3baa09f 100644 --- a/explainableai/feature_interaction.py +++ b/explainableai/feature_interaction.py @@ -1,4 +1,3 @@ -# feature_interaction.py import itertools import numpy as np import matplotlib.pyplot as plt @@ -6,12 +5,17 @@ import time import logging -logger=logging.getLogger(__name__) +logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interactions=10): logger.debug("Starting feature interaction analysis...") try: + # Ensure model has feature_importances_ + if not hasattr(model, 'feature_importances_'): + raise AttributeError("Model does not have 'feature_importances_' attribute.") + + # Calculate and sort feature importances feature_importance = dict(zip(feature_names, model.feature_importances_)) top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:top_n] top_feature_names = [f[0] for f in top_features] @@ -21,29 +25,46 @@ def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interacti if i >= max_interactions: logger.info(f"Reached maximum number of interactions ({max_interactions}). Stopping analysis.") break - + logger.info(f"Analyzing interaction between {f1} and {f2}...") start_time = time.time() - f1_idx = feature_names.index(f1) - f2_idx = feature_names.index(f2) - pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average") + + try: + f1_idx = feature_names.index(f1) + f2_idx = feature_names.index(f2) + except ValueError as ve: + logger.error(f"Feature {f1} or {f2} not found in feature_names: {ve}") + continue + + try: + pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average") + except Exception as pd_error: + logger.error(f"Partial dependence computation failed for {f1} and {f2}: {pd_error}") + continue + interactions.append((f1, f2, pd_result)) logger.info(f"Interaction analysis for {f1} and {f2} completed in {time.time() - start_time:.2f} seconds.") for i, (f1, f2, (pd_values, (ax1_values, ax2_values))) in enumerate(interactions): - logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...") - fig, ax = plt.subplots(figsize=(10, 6)) - XX, YY = np.meshgrid(ax1_values, ax2_values) - Z = pd_values.reshape(XX.shape).T - contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5) - ax.set_xlabel(f1) - ax.set_ylabel(f2) - ax.set_title(f'Partial Dependence of {f1} and {f2}') - plt.colorbar(contour) - plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png') - plt.close() + try: + logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...") + fig, ax = plt.subplots(figsize=(10, 6)) + XX, YY = np.meshgrid(ax1_values, ax2_values) + Z = pd_values.reshape(XX.shape).T + contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5) + ax.set_xlabel(f1) + ax.set_ylabel(f2) + ax.set_title(f'Partial Dependence of {f1} and {f2}') + plt.colorbar(contour) + plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png') + plt.close() + except Exception as plot_error: + logger.error(f"Failed to plot interaction for {f1} and {f2}: {plot_error}") logger.info("Feature interaction analysis completed.") return interactions + + except AttributeError as attr_err: + logger.error(f"Model does not support feature importance or other attribute issue: {attr_err}") except Exception as e: - logger.error(f"Some error occured in interaction...{str(e)}") \ No newline at end of file + logger.error(f"An unexpected error occurred: {e}")