Skip to content

Commit

Permalink
update math features wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
EdenWuyifan committed Aug 19, 2024
1 parent 16bda9d commit 971a8bf
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 20 deletions.
48 changes: 48 additions & 0 deletions alpha_automl/builtin_primitives/math_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import numpy as np
import pandas as pd
from alpha_automl.base_primitive import BasePrimitive
from feature_engine.creation import MathFeatures

class MathFeaturesSum(BasePrimitive):
def __init__(self, columns):
self.math_features = MathFeatures(variables=columns, func='sum')

def fit(self, X, y=None):
self.math_features.fit(X)
return self

def transform(self, X):
return self.math_features.transform(X)

class MathFeaturesMean(BasePrimitive):
def __init__(self, columns):
self.math_features = MathFeatures(variables=columns, func='mean')

def fit(self, X, y=None):
self.math_features.fit(X)
return self

def transform(self, X):
return self.math_features.transform(X)

class MathFeaturesStd(BasePrimitive):
def __init__(self, columns):
self.math_features = MathFeatures(variables=columns, func='std')

def fit(self, X, y=None):
self.math_features.fit(X)
return self

def transform(self, X):
return self.math_features.transform(X)

class MathFeaturesProd(BasePrimitive):
def __init__(self, columns):
self.math_features = MathFeatures(variables=columns, func='prod')

def fit(self, X, y=None):
self.math_features.fit(X)
return self

def transform(self, X):
return self.math_features.transform(X)
4 changes: 2 additions & 2 deletions alpha_automl/data_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def profile_data(X):
if 'missing_values_ratio' in profiled_column:
metadata['missing_values'] = True

metadata['numeric_columns'] = list(X.select_dtypes(include=['int64', 'float64']).columns)
metadata['categorical_columns'] = list(X.select_dtypes(include=['object', 'category']).columns)
metadata['numeric_columns'] = [(index_column, column_name) for index_column, column_name in enumerate(X.columns) if X[column_name].dtype in ['int64', 'float64']]
metadata['categorical_columns'] = [(index_column, column_name) for index_column, column_name in enumerate(X.columns) if X[column_name].dtype in ['object', 'category']]

logger.debug(f'Results of profiling data: non-numeric features = {str(metadata["nonnumeric_columns"].keys())}, '
f'useless columns = {str(metadata["useless_columns"])}, '
Expand Down
16 changes: 2 additions & 14 deletions alpha_automl/pipeline_synthesis/pipeline_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,6 @@ def change_default_hyperparams(primitive_object):
primitive_object.set_params(algorithm='SAMME')


def create_math_features(primitive_type, columns):
if primitive_type == "sum":
return MathFeatures(variables=columns, func='sum')
elif primitive_type == "mean":
return MathFeatures(variables=columns, func='mean')
elif primitive_type == "std":
return MathFeatures(variables=columns, func='std')
elif primitive_type == "prod":
return MathFeatures(variables=columns, func='prod')


def extract_estimators(pipeline_primitives, all_primitives):
estimators = []
estimator_name, estimator_obj = pipeline_primitives.pop()
Expand Down Expand Up @@ -118,9 +107,8 @@ def make_primitive_objects(self, primitives):
elif primitive_type == 'CLASSIFICATION_MULTI_ENSEMBLER' or primitive_type == 'REGRESSION_MULTI_ENSEMBLER':
estimators = extract_estimators(pipeline_primitives, self.all_primitives)
primitive_object = create_object(primitive_name, {'estimators': estimators})
elif "feature_engine.creation" in primitive_name:
primitive_name_type = primitive_name.split('-')[1]
primitive_object = create_math_features(primitive_name_type, list(range(len(numeric_columns))))
elif "alpha_automl.builtin_primitives.math_features" in primitive_name:
primitive_object = create_object(primitive_name, {'columns': [column_name for _, column_name in numeric_columns]})
elif self.all_primitives[primitive_name]['origin'] == NATIVE_PRIMITIVE: # It's an installed primitive
primitive_object = create_object(primitive_name, EXTRA_PARAMS.get(primitive_name, None))
else:
Expand Down
8 changes: 4 additions & 4 deletions alpha_automl/resource/primitives_hierarchy.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
],
"FEATURE_GENERATOR": [
"sklearn.preprocessing.PolynomialFeatures",
"feature_engine.creation.math_features.MathFeatures-sum",
"feature_engine.creation.math_features.MathFeatures-mean",
"feature_engine.creation.math_features.MathFeatures-prod",
"feature_engine.creation.math_features.MathFeatures-std"
"alpha_automl.builtin_primitives.math_features.MathFeaturesSum",
"alpha_automl.builtin_primitives.math_features.MathFeaturesMean",
"alpha_automl.builtin_primitives.math_features.MathFeaturesProd",
"alpha_automl.builtin_primitives.math_features.MathFeaturesStd"
],
"FEATURE_SCALER": [
"sklearn.preprocessing.MaxAbsScaler",
Expand Down

0 comments on commit 971a8bf

Please sign in to comment.