Skip to content

Commit

Permalink
update code
Browse files Browse the repository at this point in the history
  • Loading branch information
Minqi824 committed Jun 22, 2022
1 parent 0f24efc commit f8bbf2d
Show file tree
Hide file tree
Showing 13 changed files with 200 additions and 294 deletions.
13 changes: 4 additions & 9 deletions baseline/DeepSAD/src/base/odds_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,14 @@ class ODDSDataset(Dataset):

def __init__(self, data, train=True):
super(Dataset, self).__init__()
X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']

self.train = train

if self.train:
self.data = torch.tensor(X_train, dtype=torch.float32)
self.targets = torch.tensor(y_train, dtype=torch.int64)
self.data = torch.tensor(data['X_train'], dtype=torch.float32)
self.targets = torch.tensor(data['y_train'], dtype=torch.int64)
else:
self.data = torch.tensor(X_test, dtype=torch.float32)
self.targets = torch.tensor(y_test, dtype=torch.int64)
self.data = torch.tensor(data['X_test'], dtype=torch.float32)
self.targets = torch.tensor(data['y_test'], dtype=torch.int64)

# self.semi_targets = torch.zeros_like(self.targets)
self.semi_targets = self.targets
Expand Down
34 changes: 3 additions & 31 deletions baseline/DeepSAD/src/datasets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,10 @@
from .odds import ODDSADDataset


def load_dataset(data):
def load_dataset(data, train=True):
"""Loads the dataset."""
#代码中暂不支持DeepSAD部署在CV数据集中,之后会更新

# if dataset == 'mnist':
# dataset = MNIST_Dataset(root=data_path,
# normal_class=normal_class,
# known_outlier_class=known_outlier_class,
# n_known_outlier_classes=n_known_outlier_classes,
# ratio_known_normal=ratio_known_normal,
# ratio_known_outlier=ratio_known_outlier,
# ratio_pollution=ratio_pollution)
#
# elif dataset == 'fmnist':
# dataset = FashionMNIST_Dataset(root=data_path,
# normal_class=normal_class,
# known_outlier_class=known_outlier_class,
# n_known_outlier_classes=n_known_outlier_classes,
# ratio_known_normal=ratio_known_normal,
# ratio_known_outlier=ratio_known_outlier,
# ratio_pollution=ratio_pollution)
#
# elif dataset == 'cifar10':
# dataset = CIFAR10_Dataset(root=data_path,
# normal_class=normal_class,
# known_outlier_class=known_outlier_class,
# n_known_outlier_classes=n_known_outlier_classes,
# ratio_known_normal=ratio_known_normal,
# ratio_known_outlier=ratio_known_outlier,
# ratio_pollution=ratio_pollution)

#tabular data
dataset = ODDSADDataset(data=data)
# for tabular data
dataset = ODDSADDataset(data=data, train=train)

return dataset
29 changes: 18 additions & 11 deletions baseline/DeepSAD/src/datasets/odds.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,32 @@

class ODDSADDataset(BaseADDataset):

def __init__(self, data):
def __init__(self, data, train):
super().__init__(self)

# Define normal and outlier classes
self.n_classes = 2 # 0: normal, 1: outlier
self.normal_classes = (0,)
self.outlier_classes = (1,)

# Get training set
self.train_set = ODDSDataset(data=data, train=True)
# Get testing set
self.test_set = ODDSDataset(data=data, train=False)
# training or testing dataset
self.train = train

if self.train:
# Get training set
self.train_set = ODDSDataset(data=data, train=True)
else:
# Get testing set
self.test_set = ODDSDataset(data=data, train=False)

def loaders(self, batch_size: int, shuffle_train=True, shuffle_test=False, num_workers: int = 0) -> (
DataLoader, DataLoader):

train_loader = DataLoader(dataset=self.train_set, batch_size=batch_size, shuffle=shuffle_train,
num_workers=num_workers, drop_last=True)
test_loader = DataLoader(dataset=self.test_set, batch_size=batch_size, shuffle=shuffle_test,
num_workers=num_workers, drop_last=False)

return train_loader, test_loader
if self.train:
train_loader = DataLoader(dataset=self.train_set, batch_size=batch_size, shuffle=shuffle_train,
num_workers=num_workers, drop_last=True)
return train_loader
else:
test_loader = DataLoader(dataset=self.test_set, batch_size=batch_size, shuffle=shuffle_test,
num_workers=num_workers, drop_last=False)
return test_loader
8 changes: 5 additions & 3 deletions baseline/DeepSAD/src/deepsad.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,16 @@ def test(self, dataset: BaseADDataset, device: str = 'cuda', n_jobs_dataloader:
if self.trainer is None:
self.trainer = DeepSADTrainer(self.c, self.eta, device=device, n_jobs_dataloader=n_jobs_dataloader)

self.trainer.test(dataset, self.net)
score = self.trainer.test(dataset, self.net)

# Get results
self.results['test_aucroc'] = self.trainer.test_aucroc
self.results['test_aucpr'] = self.trainer.test_aucpr
# self.results['test_aucroc'] = self.trainer.test_aucroc
# self.results['test_aucpr'] = self.trainer.test_aucpr
self.results['test_time'] = self.trainer.test_time
self.results['test_scores'] = self.trainer.test_scores

return score

def pretrain(self, dataset: BaseADDataset, input_size ,optimizer_name: str = 'adam', lr: float = 0.001, n_epochs: int = 100,
lr_milestones: tuple = (), batch_size: int = 128, weight_decay: float = 1e-6, device: str = 'cuda',
n_jobs_dataloader: int = 0):
Expand Down
16 changes: 9 additions & 7 deletions baseline/DeepSAD/src/optim/DeepSAD_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def train(self, dataset: BaseADDataset, net: BaseNet):
logger = logging.getLogger()

# Get train data loader
train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
train_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

# Set device for network
net = net.to(self.device)
Expand Down Expand Up @@ -101,7 +101,7 @@ def test(self, dataset: BaseADDataset, net: BaseNet):
logger = logging.getLogger()

# Get test data loader
_, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

# Set device for network
net = net.to(self.device)
Expand Down Expand Up @@ -141,18 +141,20 @@ def test(self, dataset: BaseADDataset, net: BaseNet):

# Compute AUC
_, labels, scores = zip(*idx_label_score)
labels = np.array(labels)
# labels = np.array(labels)
scores = np.array(scores)
self.test_aucroc = roc_auc_score(labels, scores)
self.test_aucpr = average_precision_score(labels, scores, pos_label = 1)
# self.test_aucroc = roc_auc_score(labels, scores)
# self.test_aucpr = average_precision_score(labels, scores, pos_label = 1)

# Log results
logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
logger.info('Test AUCROC: {:.2f}%'.format(100. * self.test_aucroc))
logger.info('Test AUCPR: {:.2f}%'.format(100. * self.test_aucpr))
# logger.info('Test AUCROC: {:.2f}%'.format(100. * self.test_aucroc))
# logger.info('Test AUCPR: {:.2f}%'.format(100. * self.test_aucpr))
logger.info('Test Time: {:.3f}s'.format(self.test_time))
logger.info('Finished testing.')

return scores

def init_center_c(self, train_loader: DataLoader, net: BaseNet, eps=0.1):
"""Initialize hypersphere center c as the mean from an initial forward pass on the data."""
n_samples = 0
Expand Down
4 changes: 2 additions & 2 deletions baseline/DeepSAD/src/optim/ae_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def train(self, dataset: BaseADDataset, ae_net: BaseNet):
logger = logging.getLogger()

# Get train data loader
train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
train_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

# Set loss
criterion = nn.MSELoss(reduction='none')
Expand Down Expand Up @@ -86,7 +86,7 @@ def test(self, dataset: BaseADDataset, ae_net: BaseNet):
logger = logging.getLogger()

# Get test data loader
_, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

# Set loss
criterion = nn.MSELoss(reduction='none')
Expand Down
60 changes: 23 additions & 37 deletions baseline/DeepSAD/src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self, seed, model_name='DeepSAD'):
self.num_threads = 0
self.n_jobs_dataloader = 0

def fit2test(self, data):
def fit(self, X_train, y_train, ratio=None):
"""
Deep SAD, a method for deep semi-supervised anomaly detection.
Expand All @@ -56,23 +56,24 @@ def fit2test(self, data):
logging.info('Number of threads: %d' % self.num_threads)
logging.info('Number of dataloader workers: %d' % self.n_jobs_dataloader)

#Load data (modified)
dataset = load_dataset(data=data)
# Load data
data = {'X_train': X_train, 'y_train': y_train}
dataset = load_dataset(data=data, train=True)
input_size = dataset.train_set.data.size(1) #input size

# Initialize DeepSAD model and set neural network phi
deepSAD = deepsad(self.eta)
deepSAD.set_network(self.net_name, input_size)
self.deepSAD = deepsad(self.eta)
self.deepSAD.set_network(self.net_name, input_size)

# If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
if self.load_model:
deepSAD.load_model(model_path=self.load_model, load_ae=True, map_location=self.device)
self.deepSAD.load_model(model_path=self.load_model, load_ae=True, map_location=self.device)
logging.info('Loading model from %s.' % self.load_model)

logging.info('Pretraining: %s' % self.pretrain)
if self.pretrain:
# Pretrain model on dataset (via autoencoder)
deepSAD.pretrain(dataset,
self.deepSAD.pretrain(dataset,
input_size,
optimizer_name=self.ae_optimizer_name,
lr=self.ae_lr,
Expand All @@ -84,18 +85,15 @@ def fit2test(self, data):
n_jobs_dataloader=self.n_jobs_dataloader)

# Train model on dataset
deepSAD.train(dataset,
optimizer_name=self.optimizer_name,
lr=self.lr,
n_epochs=self.n_epochs,
lr_milestones=self.lr_milestone,
batch_size=self.batch_size,
weight_decay=self.weight_decay,
device=self.device,
n_jobs_dataloader=self.n_jobs_dataloader)

# Test model
deepSAD.test(dataset, device=self.device, n_jobs_dataloader=self.n_jobs_dataloader)
self.deepSAD.train(dataset,
optimizer_name=self.optimizer_name,
lr=self.lr,
n_epochs=self.n_epochs,
lr_milestones=self.lr_milestone,
batch_size=self.batch_size,
weight_decay=self.weight_decay,
device=self.device,
n_jobs_dataloader=self.n_jobs_dataloader)

# Save results, model, and configuration
# deepSAD.save_results(export_json=xp_path + '/results.json')
Expand All @@ -108,23 +106,11 @@ def fit2test(self, data):
# idx_all_sorted = indices[np.argsort(scores)] # from lowest to highest score
# idx_normal_sorted = indices[labels == 0][np.argsort(scores[labels == 0])] # from lowest to highest score

if dataset in ('mnist', 'fmnist', 'cifar10'):
return self

if dataset in ('mnist', 'fmnist'):
X_all_low = dataset.test_set.data[idx_all_sorted[:32], ...].unsqueeze(1)
X_all_high = dataset.test_set.data[idx_all_sorted[-32:], ...].unsqueeze(1)
X_normal_low = dataset.test_set.data[idx_normal_sorted[:32], ...].unsqueeze(1)
X_normal_high = dataset.test_set.data[idx_normal_sorted[-32:], ...].unsqueeze(1)
def predict_score(self, X):
# input randomly generated y label for consistence
dataset = load_dataset(data={'X_test': X, 'y_test': np.random.choice([0, 1], X.shape[0])}, train=False)
score = self.deepSAD.test(dataset, device=self.device, n_jobs_dataloader=self.n_jobs_dataloader)

if dataset == 'cifar10':
X_all_low = torch.tensor(np.transpose(dataset.test_set.data[idx_all_sorted[:32], ...], (0,3,1,2)))
X_all_high = torch.tensor(np.transpose(dataset.test_set.data[idx_all_sorted[-32:], ...], (0,3,1,2)))
X_normal_low = torch.tensor(np.transpose(dataset.test_set.data[idx_normal_sorted[:32], ...], (0,3,1,2)))
X_normal_high = torch.tensor(np.transpose(dataset.test_set.data[idx_normal_sorted[-32:], ...], (0,3,1,2)))

plot_images_grid(X_all_low, export_img=self.xp_path + '/all_low', padding=2)
plot_images_grid(X_all_high, export_img=self.xp_path + '/all_high', padding=2)
plot_images_grid(X_normal_low, export_img=self.xp_path + '/normals_low', padding=2)
plot_images_grid(X_normal_high, export_img=self.xp_path + '/normals_high', padding=2)

return {'aucroc':deepSAD.results['test_aucroc'], 'aucpr':deepSAD.results['test_aucpr']}
return score
26 changes: 17 additions & 9 deletions baseline/DevNet/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
@author: Guansong Pang
The algorithm was implemented using Python 3.6.6, Keras 2.2.2 and TensorFlow 1.10.1.
More details can be found in our KDD19 paper.
Guansong Pang, Chunhua Shen, and Anton van den Hengel. 2019.
Deep Anomaly Detection with Deviation Networks.
Guansong Pang, Chunhua Shen, and Anton van den Hengel. 2019.
Deep Anomaly Detection with Deviation Networks.
In The 25th ACM SIGKDDConference on Knowledge Discovery and Data Mining (KDD ’19),
August4–8, 2019, Anchorage, AK, USA.ACM, New York, NY, USA, 10 pages. https://doi.org/10.1145/3292500.3330871
"""
Expand All @@ -16,9 +16,13 @@
from keras import backend as K
from keras.models import Model, load_model
from keras.layers import Input, Dense
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, TensorBoard

try:
from keras.optimizers import RMSprop # old tf version
except:
from tensorflow.keras.optimizers import RMSprop

import argparse
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -67,6 +71,7 @@ def __init__(self, seed, model_name='DevNet', save_suffix=None):
# random_seed = args.ramdn_seed

self.save_suffix = save_suffix
self.ref = None # normal distribution reference, created for reusing across subsequent function calls

def dev_network_d(self,input_shape):
'''
Expand Down Expand Up @@ -105,12 +110,15 @@ def deviation_loss(self, y_true, y_pred):
'''
z-score-based deviation loss
'''

confidence_margin = 5.
## size=5000 is the setting of l in algorithm 1 in the paper
ref = K.variable(np.random.normal(loc = 0., scale= 1.0, size = 5000) , dtype='float32')
dev = (y_pred - K.mean(ref)) / K.std(ref)
if self.ref is None:
self.ref = K.variable(np.random.normal(loc = 0., scale= 1.0, size = 5000), dtype='float32')
dev = (y_pred - K.mean(self.ref)) / K.std(self.ref)
inlier_loss = K.abs(dev)
outlier_loss = K.abs(K.maximum(confidence_margin - dev, 0.))

return K.mean((1 - y_true) * inlier_loss + y_true * outlier_loss)

def deviation_network(self, input_shape, network_depth):
Expand Down Expand Up @@ -160,7 +168,7 @@ def input_batch_generation_sup(self, X_train, outlier_indices, inlier_indices, b
sid = rng.choice(n_outliers, 1)
ref[i] = X_train[outlier_indices[sid]]
training_labels += [1]
return np.array(ref), np.array(training_labels)
return np.array(ref), np.array(training_labels, dtype=float)

def input_batch_generation_sup_sparse(self, X_train, outlier_indices, inlier_indices, batch_size, rng):
'''
Expand Down Expand Up @@ -218,9 +226,9 @@ def fit(self, X_train, y_train, ratio=None):
save_best_only = True, save_weights_only = True)

self.model.fit_generator(self.batch_generator_sup(X_train, outlier_indices, inlier_indices, batch_size, nb_batch, rng),
steps_per_epoch = nb_batch,
epochs = epochs,
callbacks=[checkpointer])
steps_per_epoch = nb_batch,
epochs = epochs,
callbacks=[checkpointer])

return self

Expand Down
Loading

0 comments on commit f8bbf2d

Please sign in to comment.