Merge pull request #112 from shravya312/main

Added Exception Handing in in feature_interaction.py
ombhojane · Oct 28, 2024 · 97aa237 · 97aa237
2 parents b40054f + 1fe0133
commit 97aa237
Show file tree

Hide file tree

Showing 2 changed files with 103 additions and 45 deletions.
diff --git a/explainableai/feature_engineering.py b/explainableai/feature_engineering.py
@@ -1,34 +1,71 @@
-from sklearn.preprocessing import PolynomialFeatures
+# feature_interaction.py
+import itertools
 import numpy as np
-import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.inspection import partial_dependence
+import time
 import logging
 
-logger=logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
-def automated_feature_engineering(X_train, X_test=None):
-    # Convert categorical variables to one-hot encoding
-    logger.debug("Convert categorical variables to one-hot encoding...")
+def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interactions=10):
+    logger.debug("Starting feature interaction analysis...")
     try:
-        X_train_encoded = pd.get_dummies(X_train, drop_first=True)
-
-        if X_test is not None:
-            X_test_encoded = pd.get_dummies(X_test, drop_first=True)
-            # Ensure X_test has the same columns as X_train
-            logger.debug("Ensuring test data has the same columns as training data...")
-            for col in X_train_encoded.columns:
-                if col not in X_test_encoded.columns:
-                    X_test_encoded[col] = 0
-            X_test_encoded = X_test_encoded[X_train_encoded.columns]
-
-
-        feature_names = X_train_encoded.columns.tolist()
-
-        if X_test is not None:
-            logger.info("Data Converted...")
-            return X_train_encoded.values, X_test_encoded.values, feature_names
-
-        logger.info("Data Converted...")
-        return X_train_encoded.values, feature_names
+        # Ensure model has feature_importances_
+        if not hasattr(model, 'feature_importances_'):
+            raise AttributeError("Model does not have 'feature_importances_' attribute.")
+
+        # Calculate and sort feature importances
+        feature_importance = dict(zip(feature_names, model.feature_importances_))
+        top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:top_n]
+        top_feature_names = [f[0] for f in top_features]
+
+        interactions = []
+        for i, (f1, f2) in enumerate(itertools.combinations(top_feature_names, 2)):
+            if i >= max_interactions:
+                logger.info(f"Reached maximum number of interactions ({max_interactions}). Stopping analysis.")
+                break
+
+            logger.info(f"Analyzing interaction between {f1} and {f2}...")
+            start_time = time.time()
+
+            try:
+                f1_idx = feature_names.index(f1)
+                f2_idx = feature_names.index(f2)
+            except ValueError as ve:
+                logger.error(f"Feature {f1} or {f2} not found in feature_names: {ve}")
+                continue
+
+            try:
+                pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average")
+            except Exception as pd_error:
+                logger.error(f"Partial dependence computation failed for {f1} and {f2}: {pd_error}")
+                continue
+
+            interactions.append((f1, f2, pd_result))
+            logger.info(f"Interaction analysis for {f1} and {f2} completed in {time.time() - start_time:.2f} seconds.")
+
+        for i, (f1, f2, (pd_values, (ax1_values, ax2_values))) in enumerate(interactions):
+            try:
+                logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...")
+                fig, ax = plt.subplots(figsize=(10, 6))
+                XX, YY = np.meshgrid(ax1_values, ax2_values)
+                Z = pd_values.reshape(XX.shape).T
+                contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5)
+                ax.set_xlabel(f1)
+                ax.set_ylabel(f2)
+                ax.set_title(f'Partial Dependence of {f1} and {f2}')
+                plt.colorbar(contour)
+                plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png')
+                plt.close()
+            except Exception as plot_error:
+                logger.error(f"Failed to plot interaction for {f1} and {f2}: {plot_error}")
+
+        logger.info("Feature interaction analysis completed.")
+        return interactions
+
+    except AttributeError as attr_err:
+        logger.error(f"Model does not support feature importance or other attribute issue: {attr_err}")
     except Exception as e:
-        logger.error(f"Error occurred during automated feature engineering...{str(e)}")
+        logger.error(f"An unexpected error occurred: {e}")
diff --git a/explainableai/feature_interaction.py b/explainableai/feature_interaction.py
@@ -1,17 +1,21 @@
-# feature_interaction.py
 import itertools
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.inspection import partial_dependence
 import time
 import logging
 
-logger=logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
 def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interactions=10):
     logger.debug("Starting feature interaction analysis...")
     try:
+        # Ensure model has feature_importances_
+        if not hasattr(model, 'feature_importances_'):
+            raise AttributeError("Model does not have 'feature_importances_' attribute.")
+
+        # Calculate and sort feature importances
         feature_importance = dict(zip(feature_names, model.feature_importances_))
         top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:top_n]
         top_feature_names = [f[0] for f in top_features]
@@ -21,29 +25,46 @@ def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interacti
             if i >= max_interactions:
                 logger.info(f"Reached maximum number of interactions ({max_interactions}). Stopping analysis.")
                 break
-            
+
             logger.info(f"Analyzing interaction between {f1} and {f2}...")
             start_time = time.time()
-            f1_idx = feature_names.index(f1)
-            f2_idx = feature_names.index(f2)
-            pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average")
+
+            try:
+                f1_idx = feature_names.index(f1)
+                f2_idx = feature_names.index(f2)
+            except ValueError as ve:
+                logger.error(f"Feature {f1} or {f2} not found in feature_names: {ve}")
+                continue
+
+            try:
+                pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average")
+            except Exception as pd_error:
+                logger.error(f"Partial dependence computation failed for {f1} and {f2}: {pd_error}")
+                continue
+
             interactions.append((f1, f2, pd_result))
             logger.info(f"Interaction analysis for {f1} and {f2} completed in {time.time() - start_time:.2f} seconds.")
 
         for i, (f1, f2, (pd_values, (ax1_values, ax2_values))) in enumerate(interactions):
-            logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...")
-            fig, ax = plt.subplots(figsize=(10, 6))
-            XX, YY = np.meshgrid(ax1_values, ax2_values)
-            Z = pd_values.reshape(XX.shape).T
-            contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5)
-            ax.set_xlabel(f1)
-            ax.set_ylabel(f2)
-            ax.set_title(f'Partial Dependence of {f1} and {f2}')
-            plt.colorbar(contour)
-            plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png')
-            plt.close()
+            try:
+                logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...")
+                fig, ax = plt.subplots(figsize=(10, 6))
+                XX, YY = np.meshgrid(ax1_values, ax2_values)
+                Z = pd_values.reshape(XX.shape).T
+                contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5)
+                ax.set_xlabel(f1)
+                ax.set_ylabel(f2)
+                ax.set_title(f'Partial Dependence of {f1} and {f2}')
+                plt.colorbar(contour)
+                plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png')
+                plt.close()
+            except Exception as plot_error:
+                logger.error(f"Failed to plot interaction for {f1} and {f2}: {plot_error}")
 
         logger.info("Feature interaction analysis completed.")
         return interactions
+
+    except AttributeError as attr_err:
+        logger.error(f"Model does not support feature importance or other attribute issue: {attr_err}")
     except Exception as e:
-        logger.error(f"Some error occured in interaction...{str(e)}")
+        logger.error(f"An unexpected error occurred: {e}")