Skip to content

Commit

Permalink
update to the latest version
Browse files Browse the repository at this point in the history
  • Loading branch information
Minqi824 committed Aug 6, 2022
1 parent ca66910 commit c7c0482
Showing 1 changed file with 21 additions and 14 deletions.
35 changes: 21 additions & 14 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,15 @@ def __init__(self, suffix:str=None, mode:str='rla', parallel:str=None,
raise NotImplementedError

# We remove the following model for considering the computational cost
for _ in ['MOGAAL', 'LSCP', 'MCD']:
for _ in ['SOGAAL', 'MOGAAL', 'LSCP', 'MCD', 'FeatureBagging']:
if _ in self.model_dict.keys():
self.model_dict.pop(_)

# dataset filter for delelting those datasets that do not satisfy the experimental requirement
def dataset_filter(self):
# dataset list in the current folder
dataset_list_org = [os.path.splitext(_)[0] for _ in os.listdir('datasets') if os.path.splitext(_)[1] in ['.npz', '.csv']]
dataset_list_org = [_ for _ in dataset_list_org if not _.split('_')[0].isdigit()]
# dataset_list_org = [_ for _ in dataset_list_org if not _.split('_')[0].isdigit()]

dataset_list = []
dataset_size = []
Expand Down Expand Up @@ -197,13 +197,19 @@ def model_fit(self):

try:
# fitting
start_time = time.time()
self.clf = self.clf.fit(X_train=self.data['X_train'], y_train=self.data['y_train'],
ratio=sum(self.data['y_test']) / len(self.data['y_test']))
# predicting score
end_time = time.time(); time_fit = end_time - start_time


# predicting score (inference)
start_time = time.time()
if self.model_name == 'DAGMM':
score_test = self.clf.predict_score(self.data['X_train'], self.data['X_test'])
else:
score_test = self.clf.predict_score(self.data['X_test'])
end_time = time.time(); time_inference = end_time - start_time

# performance
result = self.utils.metric(y_true=self.data['y_test'], y_score=score_test, pos_label=1)
Expand All @@ -216,14 +222,15 @@ def model_fit(self):

except Exception as error:
print(f'Error in model fitting. Model:{self.model_name}, Error: {error}')
time_fit, time_inference = None, None
result = {'aucroc': np.nan, 'aucpr': np.nan}
pass

return result
return time_fit, time_inference, result

# run the experiment
def run(self):
# filteting dataset that do not meet the experimental requirements
# filteting dataset that does not meet the experimental requirements
dataset_list = self.dataset_filter()

# experimental parameters
Expand All @@ -238,13 +245,13 @@ def run(self):
else:
experiment_params = list(product(dataset_list, self.rla_list, self.seed_list))


print(f'{len(dataset_list)} datasets, {len(self.model_dict.keys())} models')

# save the results
df_AUCROC = pd.DataFrame(data=None, index=experiment_params, columns=list(self.model_dict.keys()))
df_AUCPR = pd.DataFrame(data=None, index=experiment_params, columns=list(self.model_dict.keys()))
df_time = pd.DataFrame(data=None, index=experiment_params, columns=list(self.model_dict.keys()))
df_time_fit = pd.DataFrame(data=None, index=experiment_params, columns=list(self.model_dict.keys()))
df_time_inference = pd.DataFrame(data=None, index=experiment_params, columns=list(self.model_dict.keys()))

for i, params in tqdm(enumerate(experiment_params)):
if self.noise_type is not None:
Expand Down Expand Up @@ -296,19 +303,19 @@ def run(self):
self.clf = self.model_dict[self.model_name]

# fit model
start_time = time.time() # starting time
result = self.model_fit()
end_time = time.time() # ending time
time_fit, time_inference, result = self.model_fit()

# store and save the result (AUC-ROC, AUC-PR and runtime)
# store and save the result (AUC-ROC, AUC-PR and runtime / inference time)
df_AUCROC[model_name].iloc[i] = result['aucroc']
df_AUCPR[model_name].iloc[i] = result['aucpr']
df_time[model_name].iloc[i] = round(end_time - start_time, 2)
df_time_fit[model_name].iloc[i] = time_fit
df_time_inference[model_name].iloc[i] = time_inference

df_AUCROC.to_csv(os.path.join(os.getcwd(), 'result', 'AUCROC_' + self.suffix + '.csv'), index=True)
df_AUCPR.to_csv(os.path.join(os.getcwd(), 'result', 'AUCPR_' + self.suffix + '.csv'), index=True)
df_time.to_csv(os.path.join(os.getcwd(), 'result', 'Time_' + self.suffix + '.csv'), index=True)
df_time_fit.to_csv(os.path.join(os.getcwd(), 'result', 'Time(fit)_' + self.suffix + '.csv'), index=True)
df_time_inference.to_csv(os.path.join(os.getcwd(), 'result', 'Time(inference)_' + self.suffix + '.csv'), index=True)

# run the above pipeline for reproducing the results in the paper
pipeline = RunPipeline(suffix='ADBench', parallel='unsupervise', realistic_synthetic_mode='cluster', noise_type=None)
pipeline = RunPipeline(suffix='ADBench', parallel='unsupervise', realistic_synthetic_mode=None, noise_type=None)
pipeline.run()

0 comments on commit c7c0482

Please sign in to comment.