Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Exception Handing in in feature_interaction.py #112

Merged
merged 3 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 64 additions & 27 deletions explainableai/feature_engineering.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,71 @@
from sklearn.preprocessing import PolynomialFeatures
# feature_interaction.py
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.inspection import partial_dependence
import time
import logging

logger=logging.getLogger(__name__)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

def automated_feature_engineering(X_train, X_test=None):
# Convert categorical variables to one-hot encoding
logger.debug("Convert categorical variables to one-hot encoding...")
def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interactions=10):
logger.debug("Starting feature interaction analysis...")
try:
X_train_encoded = pd.get_dummies(X_train, drop_first=True)

if X_test is not None:
X_test_encoded = pd.get_dummies(X_test, drop_first=True)
# Ensure X_test has the same columns as X_train
logger.debug("Ensuring test data has the same columns as training data...")
for col in X_train_encoded.columns:
if col not in X_test_encoded.columns:
X_test_encoded[col] = 0
X_test_encoded = X_test_encoded[X_train_encoded.columns]


feature_names = X_train_encoded.columns.tolist()

if X_test is not None:
logger.info("Data Converted...")
return X_train_encoded.values, X_test_encoded.values, feature_names

logger.info("Data Converted...")
return X_train_encoded.values, feature_names
# Ensure model has feature_importances_
if not hasattr(model, 'feature_importances_'):
raise AttributeError("Model does not have 'feature_importances_' attribute.")

# Calculate and sort feature importances
feature_importance = dict(zip(feature_names, model.feature_importances_))
top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:top_n]
top_feature_names = [f[0] for f in top_features]

interactions = []
for i, (f1, f2) in enumerate(itertools.combinations(top_feature_names, 2)):
if i >= max_interactions:
logger.info(f"Reached maximum number of interactions ({max_interactions}). Stopping analysis.")
break

logger.info(f"Analyzing interaction between {f1} and {f2}...")
start_time = time.time()

try:
f1_idx = feature_names.index(f1)
f2_idx = feature_names.index(f2)
except ValueError as ve:
logger.error(f"Feature {f1} or {f2} not found in feature_names: {ve}")
continue

try:
pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average")
except Exception as pd_error:
logger.error(f"Partial dependence computation failed for {f1} and {f2}: {pd_error}")
continue

interactions.append((f1, f2, pd_result))
logger.info(f"Interaction analysis for {f1} and {f2} completed in {time.time() - start_time:.2f} seconds.")

for i, (f1, f2, (pd_values, (ax1_values, ax2_values))) in enumerate(interactions):
try:
logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...")
fig, ax = plt.subplots(figsize=(10, 6))
XX, YY = np.meshgrid(ax1_values, ax2_values)
Z = pd_values.reshape(XX.shape).T
contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5)
ax.set_xlabel(f1)
ax.set_ylabel(f2)
ax.set_title(f'Partial Dependence of {f1} and {f2}')
plt.colorbar(contour)
plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png')
plt.close()
except Exception as plot_error:
logger.error(f"Failed to plot interaction for {f1} and {f2}: {plot_error}")

logger.info("Feature interaction analysis completed.")
return interactions

except AttributeError as attr_err:
logger.error(f"Model does not support feature importance or other attribute issue: {attr_err}")
except Exception as e:
logger.error(f"Error occurred during automated feature engineering...{str(e)}")
logger.error(f"An unexpected error occurred: {e}")
57 changes: 39 additions & 18 deletions explainableai/feature_interaction.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
# feature_interaction.py
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn.inspection import partial_dependence
import time
import logging

logger=logging.getLogger(__name__)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interactions=10):
logger.debug("Starting feature interaction analysis...")
try:
# Ensure model has feature_importances_
if not hasattr(model, 'feature_importances_'):
raise AttributeError("Model does not have 'feature_importances_' attribute.")

# Calculate and sort feature importances
feature_importance = dict(zip(feature_names, model.feature_importances_))
top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:top_n]
top_feature_names = [f[0] for f in top_features]
Expand All @@ -21,29 +25,46 @@ def analyze_feature_interactions(model, X, feature_names, top_n=5, max_interacti
if i >= max_interactions:
logger.info(f"Reached maximum number of interactions ({max_interactions}). Stopping analysis.")
break

logger.info(f"Analyzing interaction between {f1} and {f2}...")
start_time = time.time()
f1_idx = feature_names.index(f1)
f2_idx = feature_names.index(f2)
pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average")

try:
f1_idx = feature_names.index(f1)
f2_idx = feature_names.index(f2)
except ValueError as ve:
logger.error(f"Feature {f1} or {f2} not found in feature_names: {ve}")
continue

try:
pd_result = partial_dependence(model, X, features=[f1_idx, f2_idx], kind="average")
except Exception as pd_error:
logger.error(f"Partial dependence computation failed for {f1} and {f2}: {pd_error}")
continue

interactions.append((f1, f2, pd_result))
logger.info(f"Interaction analysis for {f1} and {f2} completed in {time.time() - start_time:.2f} seconds.")

for i, (f1, f2, (pd_values, (ax1_values, ax2_values))) in enumerate(interactions):
logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...")
fig, ax = plt.subplots(figsize=(10, 6))
XX, YY = np.meshgrid(ax1_values, ax2_values)
Z = pd_values.reshape(XX.shape).T
contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5)
ax.set_xlabel(f1)
ax.set_ylabel(f2)
ax.set_title(f'Partial Dependence of {f1} and {f2}')
plt.colorbar(contour)
plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png')
plt.close()
try:
logger.debug(f"Plotting interaction {i+1} between {f1} and {f2}...")
fig, ax = plt.subplots(figsize=(10, 6))
XX, YY = np.meshgrid(ax1_values, ax2_values)
Z = pd_values.reshape(XX.shape).T
contour = ax.contourf(XX, YY, Z, cmap="RdBu_r", alpha=0.5)
ax.set_xlabel(f1)
ax.set_ylabel(f2)
ax.set_title(f'Partial Dependence of {f1} and {f2}')
plt.colorbar(contour)
plt.savefig(f'interaction_{i+1}_{f1}_{f2}.png')
plt.close()
except Exception as plot_error:
logger.error(f"Failed to plot interaction for {f1} and {f2}: {plot_error}")

logger.info("Feature interaction analysis completed.")
return interactions

except AttributeError as attr_err:
logger.error(f"Model does not support feature importance or other attribute issue: {attr_err}")
except Exception as e:
logger.error(f"Some error occured in interaction...{str(e)}")
logger.error(f"An unexpected error occurred: {e}")
Loading