Skip to content

Commit

Permalink
add readme
Browse files Browse the repository at this point in the history
  • Loading branch information
cgsdfc committed Mar 6, 2023
1 parent 67f27d7 commit 9110131
Show file tree
Hide file tree
Showing 17 changed files with 5,670 additions and 13,427 deletions.
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Outlier Detection UI

This is a simple UI for running different outlier detection algorithms on synthesized dataset. The backend of this UI is based on the `pyod` library and thus this UI can also be viewed as a simple demonstration interface of `pyod` (although unofficially developed).

## Screenshots

Just opened with a splash,

![](docs/software-interface.jpg)

Detection results displayed,

![](docs/detection-results.png)

## Usage

1. Install dependencies in `requirements.txt`.
2. run `app.py`.
3. In the `PARAMETERS` panel, set the parameters.
4. Press `RUN` to see the results.

Fixing the `Random Seed` and switching the models allow you to compare the performances of different models on the same dataset. Changing the seed or the parameters of the dataset (e.g., `Ratio of Outliers`, `Feature Dims`) will cause a different dataset to be loaded.

## TODO

1. Add the `EXPORT` button to allow dataset & detection results & model export.
2. ...

Binary file added docs/detection-results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/software-interface.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 0 additions & 1 deletion draw/splash.drawio

This file was deleted.

Binary file removed draw/splash.png
Binary file not shown.
21 changes: 11 additions & 10 deletions src/Application.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
# SOFTWARE.

"""
这个文件要实现UI的回调,处理用户输入,以及启动异常检测程序。
This file implements the UI callbacks, handling of the user's input, and launching
the PyOD-based backend.
"""

import traceback
Expand Down Expand Up @@ -56,7 +57,7 @@ def __init__(self):
self.ui.leNumTrain.setValidator(QIntValidator(1, 10000, self))
self.ui.leNumTest.setValidator(QIntValidator(1, 5000, self))
self.ui.leOutlierRate.setValidator(QDoubleValidator(0.1, 0.5, 2, self))
self.ui.lbProgress.setText("就绪")
self.ui.lbProgress.setText("Ready")
self.ui.lbImage.setScaledContents(True)
self.data_config = DataConfig()
self.model_config = ModelConfig()
Expand Down Expand Up @@ -130,8 +131,8 @@ def on_pbRunDetect_clicked(self):
if self.job is not None:
QMessageBox.warning(
self,
"警告",
"检测进行中,请等待",
"Warning",
"Detection in progress. Please wait!",
QMessageBox.StandardButton.Yes,
QMessageBox.StandardButton.Yes,
)
Expand All @@ -140,7 +141,7 @@ def on_pbRunDetect_clicked(self):
pgb = self.ui.pgbEvaluator
pgb.reset()
pgb.setRange(0, len(RunEvaluator.ACTION_LIST) - 1)
self.ui.lbProgress.setText("检测中")
self.ui.lbProgress.setText("Detecting...")
self.job = RunEvaluator(
parent=self,
data_config=self.data_config,
Expand All @@ -150,10 +151,10 @@ def on_pbRunDetect_clicked(self):
self.job.start()

ACTION_TO_PROGRESS = dict(
load_data="数据加载完成",
load_model="模型加载完成",
detect="检测完成",
visualize="可视化完成",
load_data="Dataset loaded",
load_model="Model loaded",
detect="Detection done",
visualize="Visualization done",
)

def reset_job(self):
Expand All @@ -178,7 +179,7 @@ def on_error(tag: str, msg: str):
LOG.info(f"Error {msg}, tag {tag}")
QMessageBox.warning(
self,
"错误",
"Error",
msg,
QMessageBox.StandardButton.Yes,
QMessageBox.StandardButton.Yes,
Expand Down
2 changes: 1 addition & 1 deletion src/NAME.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

NAME = "异常流量数据分析软件"
NAME = "Outlier Detection Demon"
print(f"NAME IS {NAME}")
214 changes: 192 additions & 22 deletions src/OutlierDetect.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
# SOFTWARE.

"""
这个文件要实现对异常检测框架PyOD的封装。
为Application提供接口。我们的输入:
1. 数据集,一般是numpy或者pandas格式;
2. 方法的种类(名称);
3. 方法的超参数;
我们的输出:
1. 方法运行的结果;
2. 其他异常情况;
3. 可视化的结果;
The file implements the backend based on the PyOD framework
and provide an interface to the Application module.
Inputs:
1. Datasets in numpy or pandas formats.
2. The name of the OD algorithm.
Outputs:
1. The visualization of outlier detection or
2. Error messages.
"""
from sklearn.manifold import TSNE
from dataclasses import dataclass
Expand All @@ -52,7 +52,7 @@ def ensure_dir(dir: P):
VERBOSE = 999
logging.basicConfig(level=logging.INFO)
LOG = logging.getLogger("[OutlierDetect]")
PROJ_DIR = P(__file__).parent
PROJ_DIR = P(__file__).parent.parent
TMP_DIR = ensure_dir(PROJ_DIR.joinpath("tmp"))
CACHE_DIR = ensure_dir(PROJ_DIR.joinpath(".cache"))
MEMORY = Memory(location=CACHE_DIR, verbose=VERBOSE)
Expand Down Expand Up @@ -270,6 +270,174 @@ class DetectionResult:
y_test_pred_confidence = None


from pyod.utils.example import *


def visualize(
clf_name,
X_train,
y_train,
X_test,
y_test,
y_train_pred,
y_test_pred,
show_figure=True,
save_figure=False,
): # pragma: no cover
"""Utility function for visualizing the results in examples.
Internal use only.
Parameters
----------
clf_name : str
The name of the detector.
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
show_figure : bool, optional (default=True)
If set to True, show the figure.
save_figure : bool, optional (default=False)
If set to True, save the figure to the local.
"""

def _add_sub_plot(
X_inliers,
X_outliers,
sub_plot_title,
inlier_color="blue",
outlier_color="orange",
):
"""Internal method to add subplot of inliers and outliers.
Parameters
----------
X_inliers : numpy array of shape (n_samples, n_features)
Outliers.
X_outliers : numpy array of shape (n_samples, n_features)
Inliers.
sub_plot_title : str
Subplot title.
inlier_color : str, optional (default='blue')
The color of inliers.
outlier_color : str, optional (default='orange')
The color of outliers.
"""
plt.axis("equal")
plt.scatter(
X_inliers[:, 0], X_inliers[:, 1], label="inliers", color=inlier_color, s=20
)
plt.scatter(
X_outliers[:, 0],
X_outliers[:, 1],
label="outliers",
color=outlier_color,
s=20,
marker="^",
)
plt.title(sub_plot_title, fontsize=10)
plt.xticks([])
plt.yticks([])
plt.legend(loc=3, prop={"size": 10})

# check input data shapes are consistent
(
X_train,
y_train,
X_test,
y_test,
y_train_pred,
y_test_pred,
) = check_consistent_shape(
X_train, y_train, X_test, y_test, y_train_pred, y_test_pred
)

if X_train.shape[1] != 2:
raise ValueError(
"Input data has to be 2-d for visualization. The "
"input data has {shape}.".format(shape=X_train.shape)
)

X_train_outliers, X_train_inliers = get_outliers_inliers(X_train, y_train)
X_train_outliers_pred, X_train_inliers_pred = get_outliers_inliers(
X_train, y_train_pred
)

X_test_outliers, X_test_inliers = get_outliers_inliers(X_test, y_test)
X_test_outliers_pred, X_test_inliers_pred = get_outliers_inliers(
X_test, y_test_pred
)

# plot ground truth vs. predicted results
fig = plt.figure(figsize=(10, 8))
# plt.suptitle("Demo of {clf_name} Detector".format(clf_name=clf_name), fontsize=15)

fig.add_subplot(221)
_add_sub_plot(
X_train_inliers,
X_train_outliers,
"Train Set Ground Truth",
inlier_color="blue",
outlier_color="orange",
)

fig.add_subplot(222)
_add_sub_plot(
X_train_inliers_pred,
X_train_outliers_pred,
"Train Set Prediction",
inlier_color="blue",
outlier_color="orange",
)

fig.add_subplot(223)
_add_sub_plot(
X_test_inliers,
X_test_outliers,
"Test Set Ground Truth",
inlier_color="green",
outlier_color="red",
)

fig.add_subplot(224)
_add_sub_plot(
X_test_inliers_pred,
X_test_outliers_pred,
"Test Set Prediction",
inlier_color="green",
outlier_color="red",
)
plt.tight_layout()

if save_figure:
plt.savefig("{clf_name}.png".format(clf_name=clf_name), dpi=300)

if show_figure:
plt.show()


@dataclass
class DetectionEvaluator:
"""
Expand Down Expand Up @@ -314,18 +482,20 @@ def visualize(self, parent: P = None):
temp = P.cwd()
os.chdir(parent)
clf_name, data, res = self.model.name, self.data, self.result
from pyod.utils.example import visualize

visualize(
clf_name=clf_name,
show_figure=False,
save_figure=True,
X_train=data.X_train2d,
y_train=data.y_train,
X_test=data.X_test2d,
y_test=data.y_test,
y_train_pred=res.y_train_pred,
y_test_pred=res.y_test_pred,
# from pyod.utils.example import visualize
Parallel(2)(
delayed(visualize)(
clf_name=clf_name,
show_figure=False,
save_figure=True,
X_train=data.X_train2d,
y_train=data.y_train,
X_test=data.X_test2d,
y_test=data.y_test,
y_train_pred=res.y_train_pred,
y_test_pred=res.y_test_pred,
)
for _ in range(1)
)
os.chdir(temp)
image = parent.joinpath(f"{clf_name}.png")
Expand Down
20 changes: 10 additions & 10 deletions src/UserInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1531,16 +1531,16 @@ def setupUi(self, MainWindow):

def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "异常流量数据分析软件"))
self.groupBox_2.setTitle(_translate("MainWindow", "输入参数"))
self.label_4.setText(_translate("MainWindow", "异常点比例"))
self.label_3.setText(_translate("MainWindow", "测试样本数"))
self.label.setText(_translate("MainWindow", "选择模型"))
self.label_5.setText(_translate("MainWindow", "特征维数"))
self.label_2.setText(_translate("MainWindow", "训练样本数"))
self.label_6.setText(_translate("MainWindow", "随机种子"))
self.groupBox.setTitle(_translate("MainWindow", "检测结果"))
self.pbRunDetect.setText(_translate("MainWindow", "检测"))
MainWindow.setWindowTitle(_translate("MainWindow", "Outlier Detection Demonstration"))
self.groupBox_2.setTitle(_translate("MainWindow", "Parameters"))
self.label_4.setText(_translate("MainWindow", "Ratio of Outliers"))
self.label_3.setText(_translate("MainWindow", "#Testing Set"))
self.label.setText(_translate("MainWindow", "Select a Model"))
self.label_5.setText(_translate("MainWindow", "Feature Dims"))
self.label_2.setText(_translate("MainWindow", "#Training Set"))
self.label_6.setText(_translate("MainWindow", "Random Seed"))
self.groupBox.setTitle(_translate("MainWindow", "Detection Results"))
self.pbRunDetect.setText(_translate("MainWindow", "Run"))


import src.res_rc as res_rc
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes
Loading

0 comments on commit 9110131

Please sign in to comment.