Merge pull request #472 from pallamadhavi/main

Automated Testing
UdayLab · Jul 2, 2024 · 96188af · 96188af
2 parents 06845c0 + a026060
commit 96188af
Show file tree

Hide file tree

Showing 21 changed files with 1,111 additions and 4 deletions.
diff --git a/tests/correlatedPattern/basic/abstract.py b/tests/correlatedPattern/basic/abstract.py
@@ -0,0 +1,174 @@
+#  Copyright (C)  2021 Rage Uday Kiran
+#
+#      This program is free software: you can redistribute it and/or modify
+#      it under the terms of the GNU General Public License as published by
+#      the Free Software Foundation, either version 3 of the License, or
+#      (at your option) any later version.
+#
+#      This program is distributed in the hope that it will be useful,
+#      but WITHOUT ANY WARRANTY; without even the implied warranty of
+#      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#      GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License
+#      along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from abc import ABC as _ABC, abstractmethod as _abstractmethod
+import time as _time
+import csv as _csv
+import pandas as _pd
+from collections import defaultdict as _defaultdict
+from itertools import combinations as _c
+import os as _os
+import os.path as _ospath
+import psutil as _psutil
+import validators as _validators
+from urllib.request import urlopen as _urlopen
+import sys as _sys
+import math as _math
+
+
+class _correlatedPatterns(_ABC):
+    """
+    :Description:   This abstract base class defines the variables and methods that every correlated pattern mining algorithm must
+                    employ in PAMI
+
+    :Attributes:
+
+        iFile : str
+            Input file name or path of the input file
+        minSup: integer or float or str
+            The user can specify minSup either in count or proportion of database size.
+            If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.
+            Otherwise, it will be treated as float.
+            Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float
+        minAllConf: float
+            The user given minimum all confidence Ratio(should be in range of 0 to 1)
+        sep : str
+            This variable is used to distinguish items from one another in a transaction. The default seperator is tab space or \t.
+            However, the users can override their default separator
+        startTime:float
+            To record the start time of the algorithm
+        endTime:float
+            To record the completion time of the algorithm
+        finalPatterns: dict
+            Storing the complete set of patterns in a dictionary variable
+        oFile : str
+            Name of the output file to store complete set of correlated patterns
+        memoryUSS : float
+            To store the total amount of USS memory consumed by the program
+        memoryRSS : float
+            To store the total amount of RSS memory consumed by the program
+
+    :Methods:
+
+        startMine()
+            Calling this function will start the actual mining process
+        getPatterns()
+            This function will output all interesting patterns discovered by an algorithm
+        save(oFile)
+            This function will store the discovered patterns in an output file specified by the user
+        getPatternsAsDataFrame()
+            The function outputs the patterns generated by an algorithm as a data frame
+        getMemoryUSS()
+            This function outputs the total amount of USS memory consumed by a mining algorithm
+        getMemoryRSS()
+            This function outputs the total amount of RSS memory consumed by a mining algorithm
+        getRuntime()
+            This function outputs the total runtime of a mining algorithm
+
+    """
+
+    def __init__(self, iFile, minSup, minAllConf, sep="\t"):
+        """
+        :param iFile: Input file name or path of the input file
+        :type iFile: str
+        :param minSup: The user can specify minSup either in count or proportion of database size.
+            If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.
+            Otherwise, it will be treated as float.
+            Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float
+        :type minSup: int or float or str
+        :param minAllConf: The user given minimum all confidence Ratio(should be in range of 0 to 1)
+        :type minAllConf :float
+        :param sep: separator used to distinguish items from each other. The default separator is tab space. However, users can override the default separator
+        :type sep: str
+        """
+
+        self._iFile = iFile
+        self._sep = sep
+        self._minSup = minSup
+        self._minAllConf = minAllConf
+        self._finalPatterns = {}
+        self._oFile = str()
+        self._memoryRSS = float()
+        self._memoryUSS = float()
+        self._startTime = float()
+        self._endTime = float()
+
+
+    @_abstractmethod
+    def startMine(self):
+        """
+        Code for the mining process will start from this function
+        """
+
+        pass
+
+    @_abstractmethod
+    def getPatterns(self):
+        """
+        Complete set of correlated patterns generated will be retrieved from this function
+        """
+
+        pass
+
+    @_abstractmethod
+    def save(self, oFile):
+        """
+        Complete set of correlated patterns will be saved in to an output file from this function
+        :param oFile: Name of the output file
+        :type oFile: csv file
+        """
+
+        pass
+
+    @_abstractmethod
+    def getPatternsAsDataFrame(self):
+        """
+        Complete set of correlated patterns will be loaded in to data frame from this function
+        """
+
+        pass
+
+    @_abstractmethod
+    def getMemoryUSS(self):
+        """
+        Total amount of USS memory consumed by the program will be retrieved from this function
+        """
+
+        pass
+
+    @_abstractmethod
+    def getMemoryRSS(self):
+        """
+        Total amount of RSS memory consumed by the program will be retrieved from this function
+        """
+
+        pass
+
+
+    @_abstractmethod
+    def getRuntime(self):
+        """
+        Total amount of runtime taken by the program will be retrieved from this function
+        """
+
+        pass
+
+    @_abstractmethod
+    def printResults(self):
+        """
+        To print the results of execution.
+        """
+
+        pass
diff --git a/tests/correlatedPattern/basic/automated_test_CoMine.py b/tests/correlatedPattern/basic/automated_test_CoMine.py
@@ -0,0 +1,18 @@
+import pandas as pd
+from PAMI.correlatedPattern.basic.CoMine import CoMine as alg
+import warnings
+
+warnings.filterwarnings("ignore")
+
+# CoMine algorithm from PAMI
+def test_pami(dataset, min_sup=0.2, min_all_conf=0.2):
+    dataset = [",".join(i) for i in dataset]
+    with open("sample.csv", "w+") as f:
+        f.write("\n".join(dataset))
+    obj = alg(iFile="sample.csv", minSup=min_sup, minAllConf=min_all_conf, sep=',')
+    obj.mine()
+    res = obj.getPatternsAsDataFrame()
+    res["Patterns"] = res["Patterns"].apply(lambda x: x.split())
+    res["Support"] = res["Support"].apply(lambda x: x / len(dataset))
+    pami = res
+    return pami
diff --git a/tests/correlatedPattern/basic/automated_test_CoMinePlus.py b/tests/correlatedPattern/basic/automated_test_CoMinePlus.py
@@ -0,0 +1,19 @@
+import pandas as pd
+from PAMI.correlatedPattern.basic.CoMinePlus import CoMinePlus as alg
+import warnings
+
+warnings.filterwarnings("ignore")
+
+# CoMine algorithm from PAMI
+def test_pami(dataset, min_sup=0.2, min_all_conf=0.2):
+    dataset = [",".join(i) for i in dataset]
+    with open("sample.csv", "w+") as f:
+        f.write("\n".join(dataset))
+    obj = alg(iFile="sample.csv", minSup=min_sup, minAllConf=min_all_conf, sep=',')
+    obj.mine()
+    res = obj.getPatternsAsDataFrame()
+    res["Patterns"] = res["Patterns"].apply(lambda x: x.split())
+    res["Support"] = res["Support"].apply(lambda x: x / len(dataset))
+    pami = res
+    return pami
+
diff --git a/tests/correlatedPattern/basic/automated_test_case_CoMine.py b/tests/correlatedPattern/basic/automated_test_case_CoMine.py
@@ -0,0 +1,57 @@
+import unittest
+from gen import generate_transactional_dataset
+from automated_test_CoMine import test_pami
+import warnings
+
+warnings.filterwarnings("ignore")
+
+class TestExample(unittest.TestCase):
+
+    def test_num_patterns(self):
+        for _ in range(3):
+            num_distinct_items = 20
+            num_transactions = 1000
+            max_items_per_transaction = 20
+            items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)]
+            dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction)
+
+            pami = test_pami(dataset)
+            # As we don't have a second method to compare, we just verify the length of pami
+            self.assertGreater(len(pami), 0, "No patterns were generated by CoMine")
+
+        print("3 test cases for number of patterns have been passed")
+
+    def test_equality(self):
+        for _ in range(3):
+            num_distinct_items = 20
+            num_transactions = 1000
+            max_items_per_transaction = 20
+            items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)]
+            dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction)
+
+            pami = test_pami(dataset)
+            # Since we have no second method to compare, we just verify the patterns are generated
+            pami_patterns = sorted(list(pami["Patterns"]))
+            self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by CoMine")
+
+        print("3 test cases for Patterns equality are passed")
+
+    def test_support(self):
+        for _ in range(3):
+            num_distinct_items = 20
+            num_transactions = 1000
+            max_items_per_transaction = 20
+            items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)]
+            dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction)
+
+            pami = test_pami(dataset)
+            # Since we have no second method to compare, we just verify the support values are generated
+            pami.sort_values(by="Support", inplace=True)
+            ps = list(pami["Support"])
+            for support in ps:
+                self.assertTrue(support > 0, "Support value should be greater than 0")
+
+        print("3 test cases for support equality are passed")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/correlatedPattern/basic/automated_test_case_CoMinePlus.py b/tests/correlatedPattern/basic/automated_test_case_CoMinePlus.py
@@ -0,0 +1,47 @@
+import unittest
+from gen import generate_transactional_dataset
+from automated_test_CoMinePlus import test_pami
+import warnings
+
+warnings.filterwarnings("ignore")
+
+class TestExample(unittest.TestCase):
+    def test_num_patterns(self):
+        for _ in range(3):
+            num_distinct_items = 20
+            num_transactions = 1000
+            max_items_per_transaction = 20
+            items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)]
+            dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction)
+            pami = test_pami(dataset)
+            self.assertGreater(len(pami), 0, "No patterns were generated by PAMI")
+        print("3 test cases for number of patterns have been passed")
+
+    def test_equality(self):
+        for _ in range(3):
+            num_distinct_items = 20
+            num_transactions = 1000
+            max_items_per_transaction = 20
+            items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)]
+            dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction)
+            pami = test_pami(dataset)
+            pami_patterns = sorted(list(pami["Patterns"]))
+            self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by PAMI")
+        print("3 test cases for Patterns equality are passed")
+
+    def test_support(self):
+        for _ in range(3):
+            num_distinct_items = 20
+            num_transactions = 1000
+            max_items_per_transaction = 20
+            items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)]
+            dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction)
+            pami = test_pami(dataset)
+            pami.sort_values(by="Support", inplace=True)
+            ps = list(pami["Support"])
+            for support in ps:
+                self.assertTrue(support > 0, "Support value should be greater than 0")
+        print("3 test cases for support equality are passed")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/correlatedPattern/basic/gen.py b/tests/correlatedPattern/basic/gen.py
@@ -0,0 +1,20 @@
+import random
+import warnings
+
+warnings.filterwarnings("ignore")
+
+def generate_transactional_dataset(num_transactions, items, max_items_per_transaction):
+    dataset = []
+    for _ in range(num_transactions):
+        num_items = random.randint(1, max_items_per_transaction)
+        transaction = random.sample(items, num_items)
+        dataset.append(transaction)
+    return dataset
+
+# Example usage:
+# num_distinct_items=20
+# num_transactions = 1000
+# max_items_per_transaction = 20
+# items=["item-{}".format(i) for i in range(1,num_distinct_items+1)]
+# dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction)
+# print(dataset)