add readme

cgsdfc · Mar 6, 2023 · 9110131 · 9110131
1 parent 67f27d7
commit 9110131
Show file tree

Hide file tree

Showing 17 changed files with 5,670 additions and 13,427 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,28 @@
+# Outlier Detection UI
+
+This is a simple UI for running different outlier detection algorithms on synthesized dataset. The backend of this UI is based on the `pyod` library and thus this UI can also be viewed as a simple demonstration interface of `pyod` (although unofficially developed).
+
+## Screenshots
+
+Just opened with a splash,
+
+![](docs/software-interface.jpg)
+
+Detection results displayed,
+
+![](docs/detection-results.png)
+
+## Usage
+
+1. Install dependencies in `requirements.txt`.
+2. run `app.py`.
+3. In the `PARAMETERS` panel, set the parameters.
+4. Press `RUN` to see the results.
+
+Fixing the `Random Seed` and switching the models allow you to compare the performances of different models on the same dataset. Changing the seed or the parameters of the dataset (e.g., `Ratio of Outliers`, `Feature Dims`) will cause a different dataset to be loaded. 
+
+## TODO
+
+1. Add the `EXPORT` button to allow dataset & detection results & model export.
+2. ...
+
diff --git a/docs/detection-results.png b/docs/detection-results.png
diff --git a/docs/software-interface.jpg b/docs/software-interface.jpg
diff --git a/draw/splash.drawio b/draw/splash.drawio
diff --git a/draw/splash.png b/draw/splash.png
diff --git a/src/Application.py b/src/Application.py
@@ -21,7 +21,8 @@
 # SOFTWARE.
 
 """
-这个文件要实现UI的回调，处理用户输入，以及启动异常检测程序。
+This file implements the UI callbacks, handling of the user's input, and launching
+the PyOD-based backend.
 """
 
 import traceback
@@ -56,7 +57,7 @@ def __init__(self):
         self.ui.leNumTrain.setValidator(QIntValidator(1, 10000, self))
         self.ui.leNumTest.setValidator(QIntValidator(1, 5000, self))
         self.ui.leOutlierRate.setValidator(QDoubleValidator(0.1, 0.5, 2, self))
-        self.ui.lbProgress.setText("就绪")
+        self.ui.lbProgress.setText("Ready")
         self.ui.lbImage.setScaledContents(True)
         self.data_config = DataConfig()
         self.model_config = ModelConfig()
@@ -130,8 +131,8 @@ def on_pbRunDetect_clicked(self):
         if self.job is not None:
             QMessageBox.warning(
                 self,
-                "警告",
-                "检测进行中，请等待",
+                "Warning",
+                "Detection in progress. Please wait!",
                 QMessageBox.StandardButton.Yes,
                 QMessageBox.StandardButton.Yes,
             )
@@ -140,7 +141,7 @@ def on_pbRunDetect_clicked(self):
         pgb = self.ui.pgbEvaluator
         pgb.reset()
         pgb.setRange(0, len(RunEvaluator.ACTION_LIST) - 1)
-        self.ui.lbProgress.setText("检测中")
+        self.ui.lbProgress.setText("Detecting...")
         self.job = RunEvaluator(
             parent=self,
             data_config=self.data_config,
@@ -150,10 +151,10 @@ def on_pbRunDetect_clicked(self):
         self.job.start()
 
     ACTION_TO_PROGRESS = dict(
-        load_data="数据加载完成",
-        load_model="模型加载完成",
-        detect="检测完成",
-        visualize="可视化完成",
+        load_data="Dataset loaded",
+        load_model="Model loaded",
+        detect="Detection done",
+        visualize="Visualization done",
     )
 
     def reset_job(self):
@@ -178,7 +179,7 @@ def on_error(tag: str, msg: str):
             LOG.info(f"Error {msg}, tag {tag}")
             QMessageBox.warning(
                 self,
-                "错误",
+                "Error",
                 msg,
                 QMessageBox.StandardButton.Yes,
                 QMessageBox.StandardButton.Yes,

diff --git a/src/NAME.py b/src/NAME.py
@@ -20,5 +20,5 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-NAME = "异常流量数据分析软件"
+NAME = "Outlier Detection Demon"
 print(f"NAME IS {NAME}")
diff --git a/src/OutlierDetect.py b/src/OutlierDetect.py
@@ -21,15 +21,15 @@
 # SOFTWARE.
 
 """
-这个文件要实现对异常检测框架PyOD的封装。
-为Application提供接口。我们的输入：
-1. 数据集，一般是numpy或者pandas格式；
-2. 方法的种类（名称）；
-3. 方法的超参数；
-我们的输出：
-1. 方法运行的结果；
-2. 其他异常情况；
-3. 可视化的结果；
+The file implements the backend based on the PyOD framework
+and provide an interface to the Application module.
+Inputs:
+1. Datasets in numpy or pandas formats.
+2. The name of the OD algorithm.
+
+Outputs:
+1. The visualization of outlier detection or
+2. Error messages.
 """
 from sklearn.manifold import TSNE
 from dataclasses import dataclass
@@ -52,7 +52,7 @@ def ensure_dir(dir: P):
 VERBOSE = 999
 logging.basicConfig(level=logging.INFO)
 LOG = logging.getLogger("[OutlierDetect]")
-PROJ_DIR = P(__file__).parent
+PROJ_DIR = P(__file__).parent.parent
 TMP_DIR = ensure_dir(PROJ_DIR.joinpath("tmp"))
 CACHE_DIR = ensure_dir(PROJ_DIR.joinpath(".cache"))
 MEMORY = Memory(location=CACHE_DIR, verbose=VERBOSE)
@@ -270,6 +270,174 @@ class DetectionResult:
     y_test_pred_confidence = None
 
 
+from pyod.utils.example import *
+
+
+def visualize(
+    clf_name,
+    X_train,
+    y_train,
+    X_test,
+    y_test,
+    y_train_pred,
+    y_test_pred,
+    show_figure=True,
+    save_figure=False,
+):  # pragma: no cover
+    """Utility function for visualizing the results in examples.
+    Internal use only.
+
+    Parameters
+    ----------
+    clf_name : str
+        The name of the detector.
+
+    X_train : numpy array of shape (n_samples, n_features)
+        The training samples.
+
+    y_train : list or array of shape (n_samples,)
+        The ground truth of training samples.
+
+    X_test : numpy array of shape (n_samples, n_features)
+        The test samples.
+
+    y_test : list or array of shape (n_samples,)
+        The ground truth of test samples.
+
+    y_train_pred : numpy array of shape (n_samples, n_features)
+        The predicted binary labels of the training samples.
+
+    y_test_pred : numpy array of shape (n_samples, n_features)
+        The predicted binary labels of the test samples.
+
+    show_figure : bool, optional (default=True)
+        If set to True, show the figure.
+
+    save_figure : bool, optional (default=False)
+        If set to True, save the figure to the local.
+
+    """
+
+    def _add_sub_plot(
+        X_inliers,
+        X_outliers,
+        sub_plot_title,
+        inlier_color="blue",
+        outlier_color="orange",
+    ):
+        """Internal method to add subplot of inliers and outliers.
+
+        Parameters
+        ----------
+        X_inliers : numpy array of shape (n_samples, n_features)
+            Outliers.
+
+        X_outliers : numpy array of shape (n_samples, n_features)
+            Inliers.
+
+        sub_plot_title : str
+            Subplot title.
+
+        inlier_color : str, optional (default='blue')
+            The color of inliers.
+
+        outlier_color : str, optional (default='orange')
+            The color of outliers.
+
+        """
+        plt.axis("equal")
+        plt.scatter(
+            X_inliers[:, 0], X_inliers[:, 1], label="inliers", color=inlier_color, s=20
+        )
+        plt.scatter(
+            X_outliers[:, 0],
+            X_outliers[:, 1],
+            label="outliers",
+            color=outlier_color,
+            s=20,
+            marker="^",
+        )
+        plt.title(sub_plot_title, fontsize=10)
+        plt.xticks([])
+        plt.yticks([])
+        plt.legend(loc=3, prop={"size": 10})
+
+    # check input data shapes are consistent
+    (
+        X_train,
+        y_train,
+        X_test,
+        y_test,
+        y_train_pred,
+        y_test_pred,
+    ) = check_consistent_shape(
+        X_train, y_train, X_test, y_test, y_train_pred, y_test_pred
+    )
+
+    if X_train.shape[1] != 2:
+        raise ValueError(
+            "Input data has to be 2-d for visualization. The "
+            "input data has {shape}.".format(shape=X_train.shape)
+        )
+
+    X_train_outliers, X_train_inliers = get_outliers_inliers(X_train, y_train)
+    X_train_outliers_pred, X_train_inliers_pred = get_outliers_inliers(
+        X_train, y_train_pred
+    )
+
+    X_test_outliers, X_test_inliers = get_outliers_inliers(X_test, y_test)
+    X_test_outliers_pred, X_test_inliers_pred = get_outliers_inliers(
+        X_test, y_test_pred
+    )
+
+    # plot ground truth vs. predicted results
+    fig = plt.figure(figsize=(10, 8))
+    # plt.suptitle("Demo of {clf_name} Detector".format(clf_name=clf_name), fontsize=15)
+
+    fig.add_subplot(221)
+    _add_sub_plot(
+        X_train_inliers,
+        X_train_outliers,
+        "Train Set Ground Truth",
+        inlier_color="blue",
+        outlier_color="orange",
+    )
+
+    fig.add_subplot(222)
+    _add_sub_plot(
+        X_train_inliers_pred,
+        X_train_outliers_pred,
+        "Train Set Prediction",
+        inlier_color="blue",
+        outlier_color="orange",
+    )
+
+    fig.add_subplot(223)
+    _add_sub_plot(
+        X_test_inliers,
+        X_test_outliers,
+        "Test Set Ground Truth",
+        inlier_color="green",
+        outlier_color="red",
+    )
+
+    fig.add_subplot(224)
+    _add_sub_plot(
+        X_test_inliers_pred,
+        X_test_outliers_pred,
+        "Test Set Prediction",
+        inlier_color="green",
+        outlier_color="red",
+    )
+    plt.tight_layout()
+
+    if save_figure:
+        plt.savefig("{clf_name}.png".format(clf_name=clf_name), dpi=300)
+
+    if show_figure:
+        plt.show()
+
+
 @dataclass
 class DetectionEvaluator:
     """
@@ -314,18 +482,20 @@ def visualize(self, parent: P = None):
         temp = P.cwd()
         os.chdir(parent)
         clf_name, data, res = self.model.name, self.data, self.result
-        from pyod.utils.example import visualize
-
-        visualize(
-            clf_name=clf_name,
-            show_figure=False,
-            save_figure=True,
-            X_train=data.X_train2d,
-            y_train=data.y_train,
-            X_test=data.X_test2d,
-            y_test=data.y_test,
-            y_train_pred=res.y_train_pred,
-            y_test_pred=res.y_test_pred,
+        # from pyod.utils.example import visualize
+        Parallel(2)(
+            delayed(visualize)(
+                clf_name=clf_name,
+                show_figure=False,
+                save_figure=True,
+                X_train=data.X_train2d,
+                y_train=data.y_train,
+                X_test=data.X_test2d,
+                y_test=data.y_test,
+                y_train_pred=res.y_train_pred,
+                y_test_pred=res.y_test_pred,
+            )
+            for _ in range(1)
         )
         os.chdir(temp)
         image = parent.joinpath(f"{clf_name}.png")

diff --git a/src/UserInterface.py b/src/UserInterface.py
@@ -1531,16 +1531,16 @@ def setupUi(self, MainWindow):
 
     def retranslateUi(self, MainWindow):
         _translate = QtCore.QCoreApplication.translate
-        MainWindow.setWindowTitle(_translate("MainWindow", "异常流量数据分析软件"))
-        self.groupBox_2.setTitle(_translate("MainWindow", "输入参数"))
-        self.label_4.setText(_translate("MainWindow", "异常点比例"))
-        self.label_3.setText(_translate("MainWindow", "测试样本数"))
-        self.label.setText(_translate("MainWindow", "选择模型"))
-        self.label_5.setText(_translate("MainWindow", "特征维数"))
-        self.label_2.setText(_translate("MainWindow", "训练样本数"))
-        self.label_6.setText(_translate("MainWindow", "随机种子"))
-        self.groupBox.setTitle(_translate("MainWindow", "检测结果"))
-        self.pbRunDetect.setText(_translate("MainWindow", "检测"))
+        MainWindow.setWindowTitle(_translate("MainWindow", "Outlier Detection Demonstration"))
+        self.groupBox_2.setTitle(_translate("MainWindow", "Parameters"))
+        self.label_4.setText(_translate("MainWindow", "Ratio of Outliers"))
+        self.label_3.setText(_translate("MainWindow", "#Testing Set"))
+        self.label.setText(_translate("MainWindow", "Select a Model"))
+        self.label_5.setText(_translate("MainWindow", "Feature Dims"))
+        self.label_2.setText(_translate("MainWindow", "#Training Set"))
+        self.label_6.setText(_translate("MainWindow", "Random Seed"))
+        self.groupBox.setTitle(_translate("MainWindow", "Detection Results"))
+        self.pbRunDetect.setText(_translate("MainWindow", "Run"))
 
 
 import src.res_rc as res_rc

diff --git a/draw/desk.drawio → src/desk.drawio b/draw/desk.drawio → src/desk.drawio
diff --git a/draw/desk.ico → src/desk.ico b/draw/desk.ico → src/desk.ico
diff --git a/draw/desk.png → src/desk.png b/draw/desk.png → src/desk.png