From 59e0731e435ec6e80d4ff3946a7e6911829806b8 Mon Sep 17 00:00:00 2001 From: Likhitha Date: Tue, 2 Jul 2024 13:38:08 +0900 Subject: [PATCH 1/2] Automated Testing --- tests/correlatedPattern/basic/abstract.py | 174 ++++++++++++++++ .../basic/automated_test_CoMine.py | 18 ++ .../basic/automated_test_CoMinePlus.py | 19 ++ .../basic/automated_test_case_CoMine.py | 57 ++++++ .../basic/automated_test_case_CoMinePlus.py | 47 +++++ tests/correlatedPattern/basic/gen.py | 20 ++ tests/frequentPattern/basic/abstract.py | 189 ++++++++++++++++++ .../basic/automated_test_case_ECLAT.py | 2 +- .../basic/automated_test_case_ECLATDiffset.py | 2 +- .../basic/automated_test_case_apriori.py | 2 +- .../basic/automated_test_case_fpgrowth.py | 2 +- tests/recurringPattern/basic/abstract.py | 164 +++++++++++++++ .../basic/automated_test_RPGrowth.py | 19 ++ .../basic/automated_test_case_RPGrowth.py | 52 +++++ tests/recurringPattern/basic/gen.py | 9 + 15 files changed, 772 insertions(+), 4 deletions(-) create mode 100644 tests/correlatedPattern/basic/abstract.py create mode 100644 tests/correlatedPattern/basic/automated_test_CoMine.py create mode 100644 tests/correlatedPattern/basic/automated_test_CoMinePlus.py create mode 100644 tests/correlatedPattern/basic/automated_test_case_CoMine.py create mode 100644 tests/correlatedPattern/basic/automated_test_case_CoMinePlus.py create mode 100644 tests/correlatedPattern/basic/gen.py create mode 100644 tests/frequentPattern/basic/abstract.py create mode 100644 tests/recurringPattern/basic/abstract.py create mode 100644 tests/recurringPattern/basic/automated_test_RPGrowth.py create mode 100644 tests/recurringPattern/basic/automated_test_case_RPGrowth.py create mode 100644 tests/recurringPattern/basic/gen.py diff --git a/tests/correlatedPattern/basic/abstract.py b/tests/correlatedPattern/basic/abstract.py new file mode 100644 index 000000000..76d333d54 --- /dev/null +++ b/tests/correlatedPattern/basic/abstract.py @@ -0,0 +1,174 @@ +# Copyright (C) 2021 Rage Uday Kiran +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from abc import ABC as _ABC, abstractmethod as _abstractmethod +import time as _time +import csv as _csv +import pandas as _pd +from collections import defaultdict as _defaultdict +from itertools import combinations as _c +import os as _os +import os.path as _ospath +import psutil as _psutil +import validators as _validators +from urllib.request import urlopen as _urlopen +import sys as _sys +import math as _math + + +class _correlatedPatterns(_ABC): + """ + :Description: This abstract base class defines the variables and methods that every correlated pattern mining algorithm must + employ in PAMI + + :Attributes: + + iFile : str + Input file name or path of the input file + minSup: integer or float or str + The user can specify minSup either in count or proportion of database size. + If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + Otherwise, it will be treated as float. + Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float + minAllConf: float + The user given minimum all confidence Ratio(should be in range of 0 to 1) + sep : str + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space or \t. + However, the users can override their default separator + startTime:float + To record the start time of the algorithm + endTime:float + To record the completion time of the algorithm + finalPatterns: dict + Storing the complete set of patterns in a dictionary variable + oFile : str + Name of the output file to store complete set of correlated patterns + memoryUSS : float + To store the total amount of USS memory consumed by the program + memoryRSS : float + To store the total amount of RSS memory consumed by the program + + :Methods: + + startMine() + Calling this function will start the actual mining process + getPatterns() + This function will output all interesting patterns discovered by an algorithm + save(oFile) + This function will store the discovered patterns in an output file specified by the user + getPatternsAsDataFrame() + The function outputs the patterns generated by an algorithm as a data frame + getMemoryUSS() + This function outputs the total amount of USS memory consumed by a mining algorithm + getMemoryRSS() + This function outputs the total amount of RSS memory consumed by a mining algorithm + getRuntime() + This function outputs the total runtime of a mining algorithm + + """ + + def __init__(self, iFile, minSup, minAllConf, sep="\t"): + """ + :param iFile: Input file name or path of the input file + :type iFile: str + :param minSup: The user can specify minSup either in count or proportion of database size. + If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + Otherwise, it will be treated as float. + Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float + :type minSup: int or float or str + :param minAllConf: The user given minimum all confidence Ratio(should be in range of 0 to 1) + :type minAllConf :float + :param sep: separator used to distinguish items from each other. The default separator is tab space. However, users can override the default separator + :type sep: str + """ + + self._iFile = iFile + self._sep = sep + self._minSup = minSup + self._minAllConf = minAllConf + self._finalPatterns = {} + self._oFile = str() + self._memoryRSS = float() + self._memoryUSS = float() + self._startTime = float() + self._endTime = float() + + + @_abstractmethod + def startMine(self): + """ + Code for the mining process will start from this function + """ + + pass + + @_abstractmethod + def getPatterns(self): + """ + Complete set of correlated patterns generated will be retrieved from this function + """ + + pass + + @_abstractmethod + def save(self, oFile): + """ + Complete set of correlated patterns will be saved in to an output file from this function + :param oFile: Name of the output file + :type oFile: csv file + """ + + pass + + @_abstractmethod + def getPatternsAsDataFrame(self): + """ + Complete set of correlated patterns will be loaded in to data frame from this function + """ + + pass + + @_abstractmethod + def getMemoryUSS(self): + """ + Total amount of USS memory consumed by the program will be retrieved from this function + """ + + pass + + @_abstractmethod + def getMemoryRSS(self): + """ + Total amount of RSS memory consumed by the program will be retrieved from this function + """ + + pass + + + @_abstractmethod + def getRuntime(self): + """ + Total amount of runtime taken by the program will be retrieved from this function + """ + + pass + + @_abstractmethod + def printResults(self): + """ + To print the results of execution. + """ + + pass \ No newline at end of file diff --git a/tests/correlatedPattern/basic/automated_test_CoMine.py b/tests/correlatedPattern/basic/automated_test_CoMine.py new file mode 100644 index 000000000..23a87dc7b --- /dev/null +++ b/tests/correlatedPattern/basic/automated_test_CoMine.py @@ -0,0 +1,18 @@ +import pandas as pd +from PAMI.correlatedPattern.basic.CoMine import CoMine as alg +import warnings + +warnings.filterwarnings("ignore") + +# CoMine algorithm from PAMI +def test_pami(dataset, min_sup=0.2, min_all_conf=0.2): + dataset = [",".join(i) for i in dataset] + with open("sample.csv", "w+") as f: + f.write("\n".join(dataset)) + obj = alg(iFile="sample.csv", minSup=min_sup, minAllConf=min_all_conf, sep=',') + obj.mine() + res = obj.getPatternsAsDataFrame() + res["Patterns"] = res["Patterns"].apply(lambda x: x.split()) + res["Support"] = res["Support"].apply(lambda x: x / len(dataset)) + pami = res + return pami diff --git a/tests/correlatedPattern/basic/automated_test_CoMinePlus.py b/tests/correlatedPattern/basic/automated_test_CoMinePlus.py new file mode 100644 index 000000000..bd5095cd5 --- /dev/null +++ b/tests/correlatedPattern/basic/automated_test_CoMinePlus.py @@ -0,0 +1,19 @@ +import pandas as pd +from PAMI.correlatedPattern.basic.CoMinePlus import CoMinePlus as alg +import warnings + +warnings.filterwarnings("ignore") + +# CoMine algorithm from PAMI +def test_pami(dataset, min_sup=0.2, min_all_conf=0.2): + dataset = [",".join(i) for i in dataset] + with open("sample.csv", "w+") as f: + f.write("\n".join(dataset)) + obj = alg(iFile="sample.csv", minSup=min_sup, minAllConf=min_all_conf, sep=',') + obj.mine() + res = obj.getPatternsAsDataFrame() + res["Patterns"] = res["Patterns"].apply(lambda x: x.split()) + res["Support"] = res["Support"].apply(lambda x: x / len(dataset)) + pami = res + return pami + diff --git a/tests/correlatedPattern/basic/automated_test_case_CoMine.py b/tests/correlatedPattern/basic/automated_test_case_CoMine.py new file mode 100644 index 000000000..5c6462c04 --- /dev/null +++ b/tests/correlatedPattern/basic/automated_test_case_CoMine.py @@ -0,0 +1,57 @@ +import unittest +from gen import generate_transactional_dataset +from automated_test_CoMine import test_pami +import warnings + +warnings.filterwarnings("ignore") + +class TestExample(unittest.TestCase): + + def test_num_patterns(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + pami = test_pami(dataset) + # As we don't have a second method to compare, we just verify the length of pami + self.assertGreater(len(pami), 0, "No patterns were generated by CoMine") + + print("3 test cases for number of patterns have been passed") + + def test_equality(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + pami = test_pami(dataset) + # Since we have no second method to compare, we just verify the patterns are generated + pami_patterns = sorted(list(pami["Patterns"])) + self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by CoMine") + + print("3 test cases for Patterns equality are passed") + + def test_support(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + pami = test_pami(dataset) + # Since we have no second method to compare, we just verify the support values are generated + pami.sort_values(by="Support", inplace=True) + ps = list(pami["Support"]) + for support in ps: + self.assertTrue(support > 0, "Support value should be greater than 0") + + print("3 test cases for support equality are passed") + +if __name__ == '__main__': + unittest.main() diff --git a/tests/correlatedPattern/basic/automated_test_case_CoMinePlus.py b/tests/correlatedPattern/basic/automated_test_case_CoMinePlus.py new file mode 100644 index 000000000..1c50f142a --- /dev/null +++ b/tests/correlatedPattern/basic/automated_test_case_CoMinePlus.py @@ -0,0 +1,47 @@ +import unittest +from gen import generate_transactional_dataset +from automated_test_CoMinePlus import test_pami +import warnings + +warnings.filterwarnings("ignore") + +class TestExample(unittest.TestCase): + def test_num_patterns(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + pami = test_pami(dataset) + self.assertGreater(len(pami), 0, "No patterns were generated by PAMI") + print("3 test cases for number of patterns have been passed") + + def test_equality(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + pami = test_pami(dataset) + pami_patterns = sorted(list(pami["Patterns"])) + self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by PAMI") + print("3 test cases for Patterns equality are passed") + + def test_support(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + pami = test_pami(dataset) + pami.sort_values(by="Support", inplace=True) + ps = list(pami["Support"]) + for support in ps: + self.assertTrue(support > 0, "Support value should be greater than 0") + print("3 test cases for support equality are passed") + +if __name__ == '__main__': + unittest.main() diff --git a/tests/correlatedPattern/basic/gen.py b/tests/correlatedPattern/basic/gen.py new file mode 100644 index 000000000..4f5fba805 --- /dev/null +++ b/tests/correlatedPattern/basic/gen.py @@ -0,0 +1,20 @@ +import random +import warnings + +warnings.filterwarnings("ignore") + +def generate_transactional_dataset(num_transactions, items, max_items_per_transaction): + dataset = [] + for _ in range(num_transactions): + num_items = random.randint(1, max_items_per_transaction) + transaction = random.sample(items, num_items) + dataset.append(transaction) + return dataset + +# Example usage: +# num_distinct_items=20 +# num_transactions = 1000 +# max_items_per_transaction = 20 +# items=["item-{}".format(i) for i in range(1,num_distinct_items+1)] +# dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) +# print(dataset) diff --git a/tests/frequentPattern/basic/abstract.py b/tests/frequentPattern/basic/abstract.py new file mode 100644 index 000000000..814f25b26 --- /dev/null +++ b/tests/frequentPattern/basic/abstract.py @@ -0,0 +1,189 @@ +# Copyright (C) 2021 Rage Uday Kiran +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# from abc import ABC as _ABC, abstractmethod as _abstractmethod +from abc import ABC as _ABC, abstractmethod as _abstractmethod +import time as _time +import csv as _csv +import pandas as _pd +from collections import defaultdict as _defaultdict +from itertools import combinations as _c +import os as _os +import os.path as _ospath +import psutil as _psutil +import sys as _sys +import validators as _validators +from urllib.request import urlopen as _urlopen +import functools as _functools + + +class _frequentPatterns(_ABC): + """ + :Description: This abstract base class defines the variables and methods that every frequent pattern mining algorithm must + employ in PAMI + + :Attributes: + + iFile : str + Input file name or path of the input file + minSup: integer or float or str + The user can specify minSup either in count or proportion of database size. + If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + Otherwise, it will be treated as float. + Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float + sep : str + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space or \t. + However, the users can override their default separator + startTime:float + To record the start time of the algorithm + endTime:float + To record the completion time of the algorithm + finalPatterns: dict + Storing the complete set of patterns in a dictionary variable + oFile : str + Name of the output file to store complete set of frequent patterns + memoryUSS : float + To store the total amount of USS memory consumed by the program + memoryRSS : float + To store the total amount of RSS memory consumed by the program + + :Methods: + + startMine() + Calling this function will start the actual mining process + getPatterns() + This function will output all interesting patterns discovered by an algorithm + save(oFile) + This function will store the discovered patterns in an output file specified by the user + getPatternsAsDataFrame() + The function outputs the patterns generated by an algorithm as a data frame + getMemoryUSS() + This function outputs the total amount of USS memory consumed by a mining algorithm + getMemoryRSS() + This function outputs the total amount of RSS memory consumed by a mining algorithm + getRuntime() + This function outputs the total runtime of a mining algorithm + + """ + + def __init__(self, iFile, minSup, sep="\t"): + """ + :param iFile: Input file name or path of the input file + :type iFile: str or DataFrame + :param minSup: The user can specify minSup either in count or proportion of database size. + If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + Otherwise, it will be treated as float. + Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float + :type minSup: int or float or str + :param sep: separator used to distinguish items from each other. The default separator is tab space. However, users can override the default separator + :type sep: str + """ + + self._iFile = iFile + self._sep = sep + self._minSup = minSup + self._finalPatterns = {} + self._oFile = str() + self._memoryUSS = float() + self._memoryRSS = float() + self._startTime = float() + self._endTime = float() + + @_abstractmethod + def startMine(self): + """ + Code for the mining process will start from this function + """ + + pass + + @_abstractmethod + def mine(self): + """ + Code for the mining process will start from this function + """ + + pass + + @_abstractmethod + def getPatterns(self): + """ + Complete set of frequent patterns generated will be retrieved from this function + """ + + pass + + @_abstractmethod + def save(self, oFile): + """ + Complete set of frequent patterns will be saved in to an output file from this function + :param oFile: Name of the output file + :type oFile: csvfile + """ + + pass + + @_abstractmethod + def getPatternsAsDataFrame(self): + """ + Complete set of frequent patterns will be loaded in to data frame from this function + """ + + pass + + @_abstractmethod + def getMemoryUSS(self): + """ + Total amount of USS memory consumed by the program will be retrieved from this function + """ + + pass + + @_abstractmethod + def getMemoryRSS(self): + """ + Total amount of RSS memory consumed by the program will be retrieved from this function + """ + + pass + + @_abstractmethod + def getRuntime(self): + """ + Total amount of runtime taken by the program will be retrieved from this function + """ + + pass + + @_abstractmethod + def printResults(self): + """ + To print result of the execution + """ + + pass \ No newline at end of file diff --git a/tests/frequentPattern/basic/automated_test_case_ECLAT.py b/tests/frequentPattern/basic/automated_test_case_ECLAT.py index a70248d3d..87969355e 100644 --- a/tests/frequentPattern/basic/automated_test_case_ECLAT.py +++ b/tests/frequentPattern/basic/automated_test_case_ECLAT.py @@ -33,7 +33,7 @@ def test_equality(self): pami_patterns = sorted(list(pami["Patterns"])) self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by PAMI") - print("2 test cases for Patterns equality are passed") + print("3 test cases for Patterns equality are passed") def test_support(self): for _ in range(3): diff --git a/tests/frequentPattern/basic/automated_test_case_ECLATDiffset.py b/tests/frequentPattern/basic/automated_test_case_ECLATDiffset.py index 92014d008..e60076837 100644 --- a/tests/frequentPattern/basic/automated_test_case_ECLATDiffset.py +++ b/tests/frequentPattern/basic/automated_test_case_ECLATDiffset.py @@ -33,7 +33,7 @@ def test_equality(self): pami_patterns = sorted(list(pami["Patterns"])) self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by PAMI") - print("2 test cases for Patterns equality are passed") + print("3 test cases for Patterns equality are passed") def test_support(self): for _ in range(3): diff --git a/tests/frequentPattern/basic/automated_test_case_apriori.py b/tests/frequentPattern/basic/automated_test_case_apriori.py index 808666dbd..25f6d6173 100644 --- a/tests/frequentPattern/basic/automated_test_case_apriori.py +++ b/tests/frequentPattern/basic/automated_test_case_apriori.py @@ -33,7 +33,7 @@ def test_equality(self): pami_patterns = sorted(list(pami["Patterns"])) self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by PAMI") - print("2 test cases for Patterns equality are passed") + print("3 test cases for Patterns equality are passed") def test_support(self): for _ in range(3): diff --git a/tests/frequentPattern/basic/automated_test_case_fpgrowth.py b/tests/frequentPattern/basic/automated_test_case_fpgrowth.py index 6e115bc0a..70e2e7ede 100644 --- a/tests/frequentPattern/basic/automated_test_case_fpgrowth.py +++ b/tests/frequentPattern/basic/automated_test_case_fpgrowth.py @@ -33,7 +33,7 @@ def test_equality(self): pami_patterns = sorted(list(pami["Patterns"])) self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by PAMI") - print("2 test cases for Patterns equality are passed") + print("3 test cases for Patterns equality are passed") def test_support(self): for _ in range(3): diff --git a/tests/recurringPattern/basic/abstract.py b/tests/recurringPattern/basic/abstract.py new file mode 100644 index 000000000..43d44d219 --- /dev/null +++ b/tests/recurringPattern/basic/abstract.py @@ -0,0 +1,164 @@ +# Copyright (C) 2021 Rage Uday Kiran +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from abc import ABC as _ABC, abstractmethod as _abstractmethod +import time as _time +import csv as _csv +import pandas as _pd +from collections import defaultdict as _defaultdict +from itertools import combinations as _c +import os as _os +import os.path as _ospath +import psutil as _psutil +import sys as _sys +import validators as _validators +from urllib.request import urlopen as _urlopen + + +class _recurringPatterns(_ABC): + """ + :Description: This abstract base class defines the variables and methods that every periodic-frequent pattern mining algorithm must + employ in PAMI + + :Attributes: + + iFile : str + Input file name or path of the input file + + maxPer : int or float or str + The user can specify maxPer either in count or proportion of database size. + If the program detects the data type of maxPer is integer, then it treats maxPer is expressed in count. + Otherwise, it will be treated as float. + Example: maxPer=10 will be treated as integer, while maxPer=10.0 will be treated as float + minPS : int or float or str + The user can specify minPS either in count or proportion of database size. + If the program detects the data type of minPS is integer, then it treats minPS is expressed in count. + Otherwise, it will be treated as float. + Example: minPS=10 will be treated as integer, while minPS=10.0 will be treated as float + minRec : int or float or str + The user has to specify minRec in count. + sep : str + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space or \t. + However, the users can override their default separator. + startTime : float + To record the start time of the algorithm + endTime : float + To record the completion time of the algorithm + finalPatterns : dict + Storing the complete set of patterns in a dictionary variable + oFile : str + Name of the output file to store complete set of periodic-frequent patterns + memoryUSS : float + To store the total amount of USS memory consumed by the program + memoryRSS : float + To store the total amount of RSS memory consumed by the program + + :Methods: + + startMine() + Mining process will start from here + getPatterns() + Complete set of patterns will be retrieved with this function + save(oFile) + Complete set of periodic-frequent patterns will be loaded in to a output file + getPatternsAsDataFrame() + Complete set of periodic-frequent patterns will be loaded in to data frame + getMemoryUSS() + Total amount of USS memory consumed by the program will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the program will be retrieved from this function + getRuntime() + Total amount of runtime taken by the program will be retrieved from this function + """ + + def __init__(self, iFile, maxPer, minPS, minRec,sep = '\t'): + """ + :param iFile: Input file name or path of the input file + :type iFile: str + :param minPS: The user can specify minPS either in count or proportion of database size. + If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + Otherwise, it will be treated as float. + Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float + :type minPS: int or float or str + :param maxPer: The user can specify maxPer either in count or proportion of database size. + If the program detects the data type of maxPer is integer, then it treats maxPer is expressed in count. + Otherwise, it will be treated as float. + Example: maxPer=10 will be treated as integer, while maxPer=10.0 will be treated as float + :type maxPer: int or float or str + :param sep: the separator used in the database + :type sep: str + """ + + self._iFile = iFile + self._minPS = minPS + self._maxPer = maxPer + self._minRec = minRec + self._sep = sep + self._oFile = str() + self._finalPatterns = {} + self._startTime = float() + self._endTime = float() + self._memoryRSS = float() + self._memoryUSS = float() + + @_abstractmethod + def startMine(self): + """Code for the mining process will start from this function""" + + pass + + @_abstractmethod + def getPatterns(self): + """Complete set of periodic-frequent patterns generated will be retrieved from this function""" + + pass + + @_abstractmethod + def save(self, oFile): + """Complete set of periodic-frequent patterns will be saved in to an output file from this function + + :param oFile: Name of the output file + :type oFile: csv file + """ + + pass + + @_abstractmethod + def getPatternsAsDataFrame(self): + """Complete set of periodic-frequent patterns will be loaded in to data frame from this function""" + + pass + + @_abstractmethod + def getMemoryUSS(self): + """Total amount of USS memory consumed by the program will be retrieved from this function""" + + pass + + @_abstractmethod + def getMemoryRSS(self): + """Total amount of RSS memory consumed by the program will be retrieved from this function""" + pass + + @_abstractmethod + def getRuntime(self): + """Total amount of runtime taken by the program will be retrieved from this function""" + + pass + + @_abstractmethod + def printResults(self): + """ To print all the results of execution""" + + pass diff --git a/tests/recurringPattern/basic/automated_test_RPGrowth.py b/tests/recurringPattern/basic/automated_test_RPGrowth.py new file mode 100644 index 000000000..94b1b4953 --- /dev/null +++ b/tests/recurringPattern/basic/automated_test_RPGrowth.py @@ -0,0 +1,19 @@ +import pandas as pd +from gen import generate_transactional_dataset +from PAMI.recurringPattern.basic import RPGrowth as alg +import warnings + +warnings.filterwarnings("ignore") + +# RPGrowth algorithm from PAMI +def test_pami(dataset, min_sup=0.2, max_period_count=5000, min_rec=1.8): + dataset = [",".join(map(str, i)) for i in dataset] + with open("sample.csv", "w+") as f: + f.write("\n".join(dataset)) + obj = alg.RPGrowth(iFile="sample.csv", minPS=min_sup, maxPer=max_period_count, minRec=min_rec, sep=',') + obj.startMine() # Using mine() instead of the deprecated startMine() + res = obj.getPatternsAsDataFrame() + res["Patterns"] = res["Patterns"].apply(lambda x: x.split()) + res["Support"] = res["Support"].apply(lambda x: x / len(dataset)) + pami = res + return pami diff --git a/tests/recurringPattern/basic/automated_test_case_RPGrowth.py b/tests/recurringPattern/basic/automated_test_case_RPGrowth.py new file mode 100644 index 000000000..2ed284b69 --- /dev/null +++ b/tests/recurringPattern/basic/automated_test_case_RPGrowth.py @@ -0,0 +1,52 @@ +import unittest +from gen import generate_transactional_dataset +from automated_test_RPGrowth import test_pami +import warnings + +warnings.filterwarnings("ignore") + +class TestRPGrowth(unittest.TestCase): + def test_num_patterns(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + dataset = generate_transactional_dataset(num_transactions, num_distinct_items, max_items_per_transaction) + + pami = test_pami(dataset) + # As we don't have a second method to compare, we just verify the length of pami + self.assertGreater(len(pami), 0, "No patterns were generated by PAMI") + + print("3 test cases for number of patterns have been passed") + + def test_equality(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + dataset = generate_transactional_dataset(num_transactions, num_distinct_items, max_items_per_transaction) + + pami = test_pami(dataset) + # Since we have no second method to compare, we just verify the patterns are generated + pami_patterns = sorted(list(pami["Patterns"])) + self.assertTrue(len(pami_patterns) > 0, "No patterns were generated by PAMI") + print("3 test cases for Patterns equality are passed") + + def test_support(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + dataset = generate_transactional_dataset(num_transactions, num_distinct_items, max_items_per_transaction) + + pami = test_pami(dataset) + # Since we have no second method to compare, we just verify the support values are generated + pami.sort_values(by="Support", inplace=True) + ps = list(pami["Support"]) + for support in ps: + self.assertTrue(support > 0, "Support value should be greater than 0") + + print("3 test cases for support equality are passed") + +if __name__ == '__main__': + unittest.main() diff --git a/tests/recurringPattern/basic/gen.py b/tests/recurringPattern/basic/gen.py new file mode 100644 index 000000000..ea321eee2 --- /dev/null +++ b/tests/recurringPattern/basic/gen.py @@ -0,0 +1,9 @@ +import random + +def generate_transactional_dataset(num_transactions, num_distinct_items, max_items_per_transaction): + dataset = [] + for _ in range(num_transactions): + num_items = random.randint(1, max_items_per_transaction) + transaction = random.sample(range(1, num_distinct_items + 1), num_items) + dataset.append(transaction) + return dataset From a02606096eeb21f45f5c202fd362fe11b752b442 Mon Sep 17 00:00:00 2001 From: Likhitha Date: Tue, 2 Jul 2024 14:29:30 +0900 Subject: [PATCH 2/2] Automated Testing --- tests/coveragePattern/basic/abstract.py | 183 ++++++++++++++++++ .../basic/automated_test_CMine.py | 18 ++ .../basic/automated_test_CPPG.py | 20 ++ .../basic/automated_test_case_CMine.py | 53 +++++ .../basic/automated_test_case_CPPG.py | 56 ++++++ tests/coveragePattern/basic/gen.py | 9 + 6 files changed, 339 insertions(+) create mode 100644 tests/coveragePattern/basic/abstract.py create mode 100644 tests/coveragePattern/basic/automated_test_CMine.py create mode 100644 tests/coveragePattern/basic/automated_test_CPPG.py create mode 100644 tests/coveragePattern/basic/automated_test_case_CMine.py create mode 100644 tests/coveragePattern/basic/automated_test_case_CPPG.py create mode 100644 tests/coveragePattern/basic/gen.py diff --git a/tests/coveragePattern/basic/abstract.py b/tests/coveragePattern/basic/abstract.py new file mode 100644 index 000000000..6d68eca45 --- /dev/null +++ b/tests/coveragePattern/basic/abstract.py @@ -0,0 +1,183 @@ +# Copyright (C) 2021 Rage Uday Kiran +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from abc import ABC as _ABC, abstractmethod as _abstractmethod +import time as _time +import math as _math +import csv as _csv +import pandas as _pd +from collections import defaultdict as _defaultdict +from itertools import combinations as _c +import os as _os +import os.path as _ospath +import psutil as _psutil +import sys as _sys +import validators as _validators +from urllib.request import urlopen as _urlopen + + +class _coveragePatterns(_ABC): + """ + :Description: This abstract base class defines the variables and methods that every coverage pattern mining algorithm must + employ in PAMI + + :Attributes: + + iFile : str + Input file name or path of the input file + minCS: int or float or str + The user can specify minCS either in count or proportion of database size. + If the program detects the data type of minCS is integer, then it treats minCS is expressed in count. + Otherwise, it will be treated as float. + Example: minCS=10 will be treated as integer, while minCS=10.0 will be treated as float + maxOR: int or float or str + The user can specify maxOR either in count or proportion of database size. + If the program detects the data type of maxOR is integer, then it treats maxOR is expressed in count. + Otherwise, it will be treated as float. + Example: maxOR=10 will be treated as integer, while maxOR=10.0 will be treated as float + minRF: int or float or str + The user can specify minRF either in count or proportion of database size. + If the program detects the data type of minRF is integer, then it treats minRF is expressed in count. + Otherwise, it will be treated as float. + Example: minRF=10 will be treated as integer, while minRF=10.0 will be treated as float + sep : str + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space or \t. + However, the users can override their default separator. + startTime: float + To record the start time of the algorithm + endTime: float + To record the completion time of the algorithm + finalPatterns: dict + Storing the complete set of patterns in a dictionary variable + oFile : str + Name of the output file to store complete set of coverage patterns + memoryUSS : float + To store the total amount of USS memory consumed by the program + memoryRSS : float + To store the total amount of RSS memory consumed by the program + + :Methods: + + startMine() + Mining process will start from here + getPatterns() + Complete set of patterns will be retrieved with this function + save(oFile) + Complete set of coverage patterns will be loaded in to a output file + getPatternsAsDataFrame() + Complete set of coverage patterns will be loaded in to data frame + getMemoryUSS() + Total amount of USS memory consumed by the program will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the program will be retrieved from this function + getRuntime() + Total amount of runtime taken by the program will be retrieved from this function + """ + + def __init__(self, iFile, minRF, minCS, maxOR, sep='\t'): + """ + :param iFile: Input file name or path of the input file + :type iFile: str + :param minRF: The user can specify minimum relative frequency either in count or proportion of database size. + If the program detects the data type of minRF is integer, then it treats minRF is expressed in count. + Otherwise, it will be treated as float. + Example: minRF=10 will be treated as integer, while minRF=10.0 will be treated as float + :type minRF: int or float or str + :param minCS: The user can specify minimum coverage support either in count or proportion of database size. + If the program detects the data type of maxPer is integer, then it treats maxPer is expressed in count. + Otherwise, it will be treated as float. + Example: maxPer=10 will be treated as integer, while maxPer=10.0 will be treated as float + :param maxOR: The user can specify maximum overlap ratio either in count or proportion of database size. + :type maxOR: int or float or str + :param sep: separator used in user specified input file + :type sep: str + """ + + self._iFile = iFile + self._minCS = minCS + self._minRF = minRF + self._maxOR = maxOR + self._sep = sep + self._finalPatterns = {} + self._startTime = float() + self._endTime = float() + self._memoryRSS = float() + self._memoryUSS = float() + self._oFile = " " + + @_abstractmethod + def startMine(self): + """ + Code for the mining process will start from this function + """ + + pass + + @_abstractmethod + def getPatterns(self): + """ + Complete set of coverage patterns generated will be retrieved from this function + """ + + pass + + @_abstractmethod + def save(self, oFile): + """ + Complete set of coverage patterns will be saved in to an output file from this function + :param oFile: Name of the output file + :type oFile: csv file + """ + + pass + + @_abstractmethod + def getPatternsAsDataFrame(self): + """ + Complete set of coverage patterns will be loaded in to data frame from this function + """ + + pass + + @_abstractmethod + def getMemoryUSS(self): + """ + Total amount of USS memory consumed by the program will be retrieved from this function + """ + + pass + + @_abstractmethod + def getMemoryRSS(self): + """ + Total amount of RSS memory consumed by the program will be retrieved from this function + """ + pass + + @_abstractmethod + def getRuntime(self): + """ + Total amount of runtime taken by the program will be retrieved from this function + """ + + pass + + @_abstractmethod + def printResults(self): + """ + To print the results of the execution + """ + + pass \ No newline at end of file diff --git a/tests/coveragePattern/basic/automated_test_CMine.py b/tests/coveragePattern/basic/automated_test_CMine.py new file mode 100644 index 000000000..39ff8c5d4 --- /dev/null +++ b/tests/coveragePattern/basic/automated_test_CMine.py @@ -0,0 +1,18 @@ +import pandas as pd +from gen import generate_transactional_dataset +from PAMI.coveragePattern.basic.CMine import CMine as alg +import warnings + +warnings.filterwarnings("ignore") + +# CMine algorithm from PAMI +def test_pami(dataset, min_rf=0.0006, min_cs=0.3, max_or=0.5): + dataset = [",".join(i) for i in dataset] + with open("sample.csv", "w+") as f: + f.write("\n".join(dataset)) + obj = alg(iFile="sample.csv", minRF=min_rf, minCS=min_cs, maxOR=max_or, sep=',') + obj.mine() + res = obj.getPatternsAsDataFrame() + res["Patterns"] = res["Patterns"].apply(lambda x: x.split()) + res["Support"] = res["Support"].apply(lambda x: x / len(dataset)) + return res diff --git a/tests/coveragePattern/basic/automated_test_CPPG.py b/tests/coveragePattern/basic/automated_test_CPPG.py new file mode 100644 index 000000000..4817713fa --- /dev/null +++ b/tests/coveragePattern/basic/automated_test_CPPG.py @@ -0,0 +1,20 @@ +import pandas as pd +from gen import generate_transactional_dataset +from PAMI.coveragePattern.basic.CPPG import CPPG as alg +import warnings + +warnings.filterwarnings("ignore") + +# CPPG algorithm from PAMI +def test_pami(dataset, min_rf=0.0006, min_cs=0.3, max_or=0.5): + dataset = [",".join(i) for i in dataset] + with open("sample_cppg.csv", "w+") as f: + f.write("\n".join(dataset)) + obj = alg(iFile="sample_cppg.csv", minRF=min_rf, minCS=min_cs, maxOR=max_or, sep=',') + obj.mine() + res = obj.getPatternsAsDataFrame() + res["Patterns"] = res["Patterns"].apply(lambda x: x.split()) + # Assuming the support calculation is similar to Apriori's, adjust as necessary + res["Support"] = res["Support"].apply(lambda x: x / len(dataset)) + cppg = res + return cppg diff --git a/tests/coveragePattern/basic/automated_test_case_CMine.py b/tests/coveragePattern/basic/automated_test_case_CMine.py new file mode 100644 index 000000000..a9d768044 --- /dev/null +++ b/tests/coveragePattern/basic/automated_test_case_CMine.py @@ -0,0 +1,53 @@ +import unittest +from gen import generate_transactional_dataset +from automated_test_CMine import test_pami +import warnings + +warnings.filterwarnings("ignore") + +class TestCMine(unittest.TestCase): + def test_num_patterns(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + cmine_result = test_pami(dataset) + self.assertGreater(len(cmine_result), 0, "No patterns were generated by CMine") + + print("3 test cases for number of patterns have been passed") + + def test_equality(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + cmine_result = test_pami(dataset) + cmine_patterns = sorted(list(cmine_result["Patterns"])) + self.assertTrue(len(cmine_patterns) > 0, "No patterns were generated by CMine") + + print("3 test cases for Patterns equality are passed") + + def test_support(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + cmine_result = test_pami(dataset) + cmine_result.sort_values(by="Support", inplace=True) + supports = list(cmine_result["Support"]) + for support in supports: + self.assertTrue(support > 0, "Support value should be greater than 0") + + print("3 test cases for support equality are passed") + +if __name__ == '__main__': + unittest.main() diff --git a/tests/coveragePattern/basic/automated_test_case_CPPG.py b/tests/coveragePattern/basic/automated_test_case_CPPG.py new file mode 100644 index 000000000..23c9990cf --- /dev/null +++ b/tests/coveragePattern/basic/automated_test_case_CPPG.py @@ -0,0 +1,56 @@ +import unittest +from gen import generate_transactional_dataset +from automated_test_CPPG import test_pami +import warnings + +warnings.filterwarnings("ignore") + +class TestExample(unittest.TestCase): + def test_num_patterns(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + cppg = test_pami(dataset) + # As we don't have a second method to compare, we just verify the length of cppg + self.assertGreater(len(cppg), 0, "No patterns were generated by CPPG") + + print("3 test cases for number of patterns have been passed") + + def test_equality(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + cppg = test_pami(dataset) + # Since we have no second method to compare, we just verify the patterns are generated + cppg_patterns = sorted(list(cppg["Patterns"])) + self.assertTrue(len(cppg_patterns) > 0, "No patterns were generated by CPPG") + + print("3 test cases for Patterns equality are passed") + + def test_support(self): + for _ in range(3): + num_distinct_items = 20 + num_transactions = 1000 + max_items_per_transaction = 20 + items = ["item-{}".format(i) for i in range(1, num_distinct_items + 1)] + dataset = generate_transactional_dataset(num_transactions, items, max_items_per_transaction) + + cppg = test_pami(dataset) + # Since we have no second method to compare, we just verify the support values are generated + cppg.sort_values(by="Support", inplace=True) + ps = list(cppg["Support"]) + for support in ps: + self.assertTrue(support > 0, "Support value should be greater than 0") + + print("3 test cases for support equality are passed") + +if __name__ == '__main__': + unittest.main() diff --git a/tests/coveragePattern/basic/gen.py b/tests/coveragePattern/basic/gen.py new file mode 100644 index 000000000..fa820faa9 --- /dev/null +++ b/tests/coveragePattern/basic/gen.py @@ -0,0 +1,9 @@ +import random + +def generate_transactional_dataset(num_transactions, items, max_items_per_transaction): + dataset = [] + for _ in range(num_transactions): + num_items = random.randint(1, max_items_per_transaction) + transaction = random.sample(items, num_items) + dataset.append(transaction) + return dataset