Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Minqi824 committed Jun 6, 2022
1 parent f666de0 commit b74c28c
Show file tree
Hide file tree
Showing 166 changed files with 14,306 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/ADBench.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions baseline/AE/fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from torch import nn
from torch.autograd import Variable

def fit(train_loader, model, optimizer, epochs, print_loss, device):
L1_criterion = nn.L1Loss(reduction='mean')

for epoch in range(epochs):
for i, data in enumerate(train_loader):
X, _ = data
X.to(device)
X = Variable(X)

#clear gradient
model.zero_grad()

_, X_hat = model(X)

# loss forward (reconstruction loss)
loss = L1_criterion(X, X_hat)

# backward and upgrade
loss.backward()
optimizer.step()

if (i % 50 == 0) & print_loss:
print('[%d/%d] [%d/%d] Loss: %.4f' % (epoch + 1, epochs, i, len(train_loader), loss))
20 changes: 20 additions & 0 deletions baseline/AE/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from torch import nn

class network(nn.Module):
def __init__(self, input_size:int, hidden_size:int=20, act_fun=None):
super(network, self).__init__()

self.encoder = nn.Sequential(
nn.Linear(input_size, hidden_size),
act_fun,
)

self.decoder = nn.Sequential(
nn.Linear(hidden_size, input_size),
)

def forward(self, input):
z = self.encoder(input)
X_hat = self.decoder(z)

return z, X_hat
71 changes: 71 additions & 0 deletions baseline/AE/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import sys

from myutils import Utils
import torch
from torch import nn
from torch.utils.data import Subset, DataLoader, TensorDataset

from baseline.AE.model import network
from baseline.AE.fit import fit

class AE():
def __init__(self, model_name:str, seed:int, epochs:int=50, batch_size:int=64, act_fun=nn.Tanh(),
lr:float=1e-2, mom:float=0.7):

self.utils = Utils()
self.device = self.utils.get_device() #get device
self.seed = seed

#hyper-parameters
self.epochs = epochs
self.batch_size = batch_size
self.act_fun = act_fun
self.lr = lr
self.mom = mom

def fit(self, X_train, y_train, ratio=None):
#only use the normal data
X_train = X_train[y_train == 0]
y_train = y_train[y_train == 0]

train_tensor = TensorDataset(torch.from_numpy(X_train).float(), torch.tensor(y_train).float())
train_loader = DataLoader(train_tensor, batch_size=self.batch_size, shuffle=False, drop_last=True)

input_size = X_train.shape[1]
if input_size < 8:
hidden_size = input_size // 2
else:
hidden_size = input_size // 4

# model initialization, there exists randomness because of weight initialization***
self.utils.set_seed(self.seed)
self.model = network(input_size=input_size, act_fun=self.act_fun)
self.model = self.model.to(self.device)

#optimizer
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.mom)
# fitting
fit(train_loader=train_loader, model=self.model, optimizer=optimizer, epochs=self.epochs, print_loss=False, device=self.device)

return self

#calculate the anomaly score based on the reconstruction loss
def predict_score(self, X):
L1_criterion = nn.L1Loss(reduction='none')
self.model.eval()

if torch.is_tensor(X):
pass
else:
X = torch.from_numpy(X)

X = X.float()
X = X.to(self.device)

with torch.no_grad():
_, X_hat = self.model(X)
score = L1_criterion(X, X_hat)
score = torch.mean(score, dim=1).cpu().detach().numpy()

return score
1 change: 1 addition & 0 deletions baseline/DAGMM/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/*
88 changes: 88 additions & 0 deletions baseline/DAGMM/forward_step.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import torch
import torch.nn.functional as F
from torch.autograd import Variable

import numpy as np


class ComputeLoss:
def __init__(self, model, lambda_energy, lambda_cov, device, n_gmm):
self.model = model
self.lambda_energy = lambda_energy
self.lambda_cov = lambda_cov
self.device = device
self.n_gmm = n_gmm

def forward(self, x, x_hat, z, gamma):
"""Computing the loss function for DAGMM."""
reconst_loss = torch.mean((x-x_hat).pow(2))

sample_energy, cov_diag = self.compute_energy(z, gamma)

loss = reconst_loss + self.lambda_energy * sample_energy + self.lambda_cov * cov_diag
return Variable(loss, requires_grad=True)

def compute_energy(self, z, gamma, phi=None, mu=None, cov=None, sample_mean=True):
"""Computing the sample energy function"""
if (phi is None) or (mu is None) or (cov is None):
phi, mu, cov = self.compute_params(z, gamma)

z_mu = (z.unsqueeze(1)- mu.unsqueeze(0))

eps = 1e-12
cov_inverse = []
det_cov = []
cov_diag = 0
for k in range(self.n_gmm):
cov_k = cov[k] + (torch.eye(cov[k].size(-1))*eps).to(self.device)
cov_inverse.append(torch.inverse(cov_k).unsqueeze(0))
det_cov.append((Cholesky.apply(cov_k.cpu() * (2*np.pi)).diag().prod()).unsqueeze(0))
cov_diag += torch.sum(1 / cov_k.diag())

cov_inverse = torch.cat(cov_inverse, dim=0)
det_cov = torch.cat(det_cov).to(self.device)

E_z = -0.5 * torch.sum(torch.sum(z_mu.unsqueeze(-1) * cov_inverse.unsqueeze(0), dim=-2) * z_mu, dim=-1)
E_z = torch.exp(E_z)
E_z = -torch.log(torch.sum(phi.unsqueeze(0)*E_z / (torch.sqrt(det_cov)).unsqueeze(0), dim=1) + eps)
if sample_mean==True:
E_z = torch.mean(E_z)
return E_z, cov_diag

def compute_params(self, z, gamma):
"""Computing the parameters phi, mu and gamma for sample energy function """
# K: number of Gaussian mixture components
# N: Number of samples
# D: Latent dimension
# z = NxD
# gamma = NxK

#phi = D
phi = torch.sum(gamma, dim=0)/gamma.size(0)

#mu = KxD
mu = torch.sum(z.unsqueeze(1) * gamma.unsqueeze(-1), dim=0)
mu /= torch.sum(gamma, dim=0).unsqueeze(-1)

z_mu = (z.unsqueeze(1) - mu.unsqueeze(0))
z_mu_z_mu_t = z_mu.unsqueeze(-1) * z_mu.unsqueeze(-2)

#cov = K x D x D
cov = torch.sum(gamma.unsqueeze(-1).unsqueeze(-1) * z_mu_z_mu_t, dim=0)
cov /= torch.sum(gamma, dim=0).unsqueeze(-1).unsqueeze(-1)

return phi, mu, cov


class Cholesky(torch.autograd.Function):
def forward(ctx, a):
l = torch.cholesky(a, False)
ctx.save_for_backward(l)
return l
def backward(ctx, grad_output):
l, = ctx.saved_variables
linv = l.inverse()
inner = torch.tril(torch.mm(l.t(), grad_output)) * torch.tril(
1.0 - Variable(l.data.new(l.size(1)).fill_(0.5).diag()))
s = torch.mm(linv.t(), torch.mm(inner, linv))
return s
44 changes: 44 additions & 0 deletions baseline/DAGMM/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# code based on https://github.com/danieltan07

import numpy as np
import argparse
import torch

from train import TrainerDAGMM
from test import eval
from preprocess import get_KDDCup99


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--num_epochs", type=int, default=200,
help="number of epochs")
parser.add_argument("--patience", type=int, default=50,
help="Patience for Early Stopping")
parser.add_argument('--lr', type=float, default=1e-4,
help='learning rate')
parser.add_argument('--lr_milestones', type=list, default=[50],
help='Milestones at which the scheduler multiply the lr by 0.1')
parser.add_argument("--batch_size", type=int, default=1024,
help="Batch size")
parser.add_argument('--latent_dim', type=int, default=1,
help='Dimension of the latent variable z')
parser.add_argument('--n_gmm', type=int, default=4,
help='Number of Gaussian components ')
parser.add_argument('--lambda_energy', type=float, default=0.1,
help='Parameter labda1 for the relative importance of sampling energy.')
parser.add_argument('--lambda_cov', type=int, default=0.005,
help='Parameter lambda2 for penalizing small values on'
'the diagonal of the covariance matrix')
#parsing arguments.
args = parser.parse_args()

#check if cuda is available.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Get train and test dataloaders.
data = get_KDDCup99(args)

DAGMM = TrainerDAGMM(args, data, device)
DAGMM.train()
DAGMM.eval(DAGMM.model, data[1], device) # data[1]: test dataloader
53 changes: 53 additions & 0 deletions baseline/DAGMM/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import torch
import torch.nn as nn
import torch.nn.functional as F


class DAGMM(nn.Module):
def __init__(self, input_size, n_gmm=2, z_dim=1):
"""Network for DAGMM (KDDCup99)"""
super(DAGMM, self).__init__()
#Encoder network
self.fc1 = nn.Linear(input_size, 60)
self.fc2 = nn.Linear(60, 30)
self.fc3 = nn.Linear(30, 10)
self.fc4 = nn.Linear(10, z_dim)

#Decoder network
self.fc5 = nn.Linear(z_dim, 10)
self.fc6 = nn.Linear(10, 30)
self.fc7 = nn.Linear(30, 60)
self.fc8 = nn.Linear(60, input_size)

#Estimation network
self.fc9 = nn.Linear(z_dim+2, 10)
self.fc10 = nn.Linear(10, n_gmm)

def encode(self, x):
h = torch.tanh(self.fc1(x))
h = torch.tanh(self.fc2(h))
h = torch.tanh(self.fc3(h))
return self.fc4(h)

def decode(self, x):
h = torch.tanh(self.fc5(x))
h = torch.tanh(self.fc6(h))
h = torch.tanh(self.fc7(h))
return self.fc8(h)

def estimate(self, z):
h = F.dropout(torch.tanh(self.fc9(z)), 0.5)
return F.softmax(self.fc10(h), dim=1)

def compute_reconstruction(self, x, x_hat):
relative_euclidean_distance = (x-x_hat).norm(2, dim=1) / x.norm(2, dim=1)
cosine_similarity = F.cosine_similarity(x, x_hat, dim=1)
return relative_euclidean_distance, cosine_similarity

def forward(self, x):
z_c = self.encode(x)
x_hat = self.decode(z_c)
rec_1, rec_2 = self.compute_reconstruction(x, x_hat)
z = torch.cat([z_c, rec_1.unsqueeze(-1), rec_2.unsqueeze(-1)], dim=1)
gamma = self.estimate(z)
return z_c, x_hat, z, gamma
Loading

0 comments on commit b74c28c

Please sign in to comment.