Skip to content

Commit

Permalink
update demo
Browse files Browse the repository at this point in the history
  • Loading branch information
Minqi824 committed Aug 5, 2022
1 parent ced8d06 commit e5961c0
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 30 deletions.
1 change: 1 addition & 0 deletions baseline/FEAWAD/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from tensorflow.keras.optimizers import Adam, RMSprop

# Disable TF eager execution mode for avoid the errors caused by the custom loss function
# the disable_eager_execution may occur error with DeepSVDD in pyod (2022.08.05)
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

Expand Down
18 changes: 14 additions & 4 deletions data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def add_label_contamination(self, X, y, noise_ratio:float):

return X, y

def generator(self, X=None, y=None,
def generator(self, X=None, y=None, minmax=True,
la=None, at_least_one_labeled=False,
realistic_synthetic_mode=None, alpha:int=5, percentage:float=0.1,
noise_type=None, duplicate_times:int=2, contam_ratio=1.00, noise_ratio:float=0.05):
Expand All @@ -210,6 +210,15 @@ def generator(self, X=None, y=None,
# load dataset
if self.dataset is None:
assert X is not None and y is not None, "For customized dataset, you should provide the X and y!"
# datasets from https://github.com/GuansongPang/ADRepository-Anomaly-detection-datasets/tree/main/numerical%20data/DevNet%20datasets
elif self.dataset in ['bank-additional-full_normalised', 'celeba_baldvsnonbald_normalised',
'census-income-full-mixed-binarized', 'creditcardfraud_normalised',
'KDD2014_donors_10feat_nomissing_normalised', 'UNSW_NB15_traintest_backdoor']:
data = pd.read_csv(os.path.join('datasets', self.dataset+'.csv'))
X = data.drop(['class'], axis=1).values
y = data['class'].values

minmax = False
else:
data = np.load(os.path.join('datasets', self.dataset+'.npz'), allow_pickle=True)
X = data['X']
Expand Down Expand Up @@ -294,9 +303,10 @@ def generator(self, X=None, y=None,
X_train, y_train = self.add_label_contamination(X_train, y_train, noise_ratio=noise_ratio)

# minmax scaling
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
if minmax:
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# idx of normal samples and unlabeled/labeled anomalies
idx_normal = np.where(y_train == 0)[0]
Expand Down
47 changes: 21 additions & 26 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,6 @@
from data_generator import DataGenerator
from myutils import Utils

# unsupervised models
from baseline.PyOD import PYOD
from baseline.DAGMM.run import DAGMM

# semi-supervised models
from baseline.GANomaly.run import GANomaly
from baseline.DeepSAD.src.run import DeepSAD
from baseline.REPEN.run import REPEN
from baseline.DevNet.run import DevNet
from baseline.PReNet.run import PReNet
from baseline.FEAWAD.run import FEAWAD

# # fully-supervised models
from baseline.Supervised import supervised
from baseline.FTTransformer.run import FTTransformer

class RunPipeline():
def __init__(self, suffix:str=None, mode:str='rla', parallel:str=None,
generate_duplicates=True, n_samples_threshold=1000,
Expand Down Expand Up @@ -97,20 +81,27 @@ def __init__(self, suffix:str=None, mode:str='rla', parallel:str=None,

# unsupervised algorithms
if self.parallel == 'unsupervise':
from baseline.PyOD import PYOD
from baseline.DAGMM.run import DAGMM

# from pyod
# for _ in ['IForest', 'OCSVM', 'CBLOF', 'COF', 'COPOD', 'ECOD', 'FeatureBagging', 'HBOS', 'KNN', 'LODA',
# 'LOF', 'LSCP', 'MCD', 'PCA', 'SOD', 'SOGAAL', 'MOGAAL', 'DeepSVDD']:
# self.model_dict[_] = PYOD
#
# # DAGMM
# self.model_dict['DAGMM'] = DAGMM

# DeepSVDD (if necessary, the DeepSVDD is only for tensorflow 2.0+)
for _ in ['DeepSVDD']:
for _ in ['IForest', 'OCSVM', 'CBLOF', 'COF', 'COPOD', 'ECOD', 'FeatureBagging', 'HBOS', 'KNN', 'LODA',
'LOF', 'LSCP', 'MCD', 'PCA', 'SOD', 'SOGAAL', 'MOGAAL', 'DeepSVDD']:
self.model_dict[_] = PYOD

# DAGMM
self.model_dict['DAGMM'] = DAGMM

# semi-supervised algorithms
elif self.parallel == 'semi-supervise':
from baseline.PyOD import PYOD
from baseline.GANomaly.run import GANomaly
from baseline.DeepSAD.src.run import DeepSAD
from baseline.REPEN.run import REPEN
from baseline.DevNet.run import DevNet
from baseline.PReNet.run import PReNet
from baseline.FEAWAD.run import FEAWAD

self.model_dict = {'GANomaly': GANomaly,
'DeepSAD': DeepSAD,
'REPEN': REPEN,
Expand All @@ -121,6 +112,9 @@ def __init__(self, suffix:str=None, mode:str='rla', parallel:str=None,

# fully-supervised algorithms
elif self.parallel == 'supervise':
from baseline.Supervised import supervised
from baseline.FTTransformer.run import FTTransformer

# from sklearn
for _ in ['LR', 'NB', 'SVM', 'MLP', 'RF', 'LGB', 'XGB', 'CatB']:
self.model_dict[_] = supervised
Expand All @@ -139,6 +133,7 @@ def __init__(self, suffix:str=None, mode:str='rla', parallel:str=None,
def dataset_filter(self):
# dataset list in the current folder
dataset_list_org = [os.path.splitext(_)[0] for _ in os.listdir('datasets')]
dataset_list_org = [_ for _ in dataset_list_org if not _.split('_')[0].isdigit()]

dataset_list = []
dataset_size = []
Expand Down Expand Up @@ -302,5 +297,5 @@ def run(self):
df_time.to_csv(os.path.join(os.getcwd(), 'result', 'Time_' + self.suffix + '.csv'), index=True)

# run the above pipeline for reproducing the results in the paper
pipeline = RunPipeline(suffix='ADBench_test', parallel='unsupervise', realistic_synthetic_mode=None, noise_type=None)
pipeline = RunPipeline(suffix='ADBench_test', parallel='supervise', realistic_synthetic_mode=None, noise_type=None)
pipeline.run()

0 comments on commit e5961c0

Please sign in to comment.