From fbabe8e0faa7120ae4b8f5c0ab66265a3905f121 Mon Sep 17 00:00:00 2001 From: s1280130 Date: Fri, 11 Oct 2024 16:01:21 +0900 Subject: [PATCH] change around convert to get memory and runtime --- PAMI/extras/convert/DF2DB.py | 68 +++- PAMI/extras/convert/_DF2DB.py | 120 ++++++++ PAMI/extras/convert/_denseDF2DB.py | 291 ++++++++++++++++++ PAMI/extras/convert/_sparseDF2DB.py | 182 +++++++++++ .../TransactionalDatabase.py | 63 +++- .../_TransactionalDatabase.py | 201 ++++++++++++ .../_syntheticUtilityDatabase.py | 107 +++++++ .../syntheticUtilityDatabase.py | 76 ++++- 8 files changed, 1093 insertions(+), 15 deletions(-) create mode 100644 PAMI/extras/convert/_DF2DB.py create mode 100644 PAMI/extras/convert/_denseDF2DB.py create mode 100644 PAMI/extras/convert/_sparseDF2DB.py create mode 100644 PAMI/extras/syntheticDataGenerator/_TransactionalDatabase.py create mode 100644 PAMI/extras/syntheticDataGenerator/_syntheticUtilityDatabase.py diff --git a/PAMI/extras/convert/DF2DB.py b/PAMI/extras/convert/DF2DB.py index e0f3a5d79..e3979ab70 100644 --- a/PAMI/extras/convert/DF2DB.py +++ b/PAMI/extras/convert/DF2DB.py @@ -36,7 +36,7 @@ """ import PAMI.extras.convert.denseDF2DB as dense import PAMI.extras.convert.sparseDF2DB as sparse -import sys +import sys,psutil,os,time from typing import Union class DF2DB: @@ -54,6 +54,14 @@ class DF2DB: It is condition of all item :param DFtype: str : It is DataFrame type. It should be sparse or dense. Default DF is sparse. + :param memoryUSS : float + To store the total amount of USS memory consumed by the program + :param memoryRSS : float + To store the total amount of RSS memory consumed by the program + :param startTime : float + To record the start time of the mining process + endTime : float + To record the completion time of the mining process **Importing this algorithm into a python program** @@ -81,6 +89,11 @@ def __init__(self, inputDF, DFtype='dense') -> None: self.DF2DB = dense.denseDF2DB(self.inputDF) else: raise Exception('DF type should be sparse or dense') + self._startTime = float() + self._endTime = float() + self._memoryUSS = float() + self._memoryRSS = float() + def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str: """ create transactional database and return oFileName @@ -89,7 +102,12 @@ def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdVal :return: oFile name :rtype: str """ + self._startTime = time.time() self.DF2DB.convert2TransactionalDatabase(oFile,condition,thresholdValue) + process = psutil.Process(os.getpid()) + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + self._endTime = time.time() return self.DF2DB.getFileName() def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str: @@ -100,7 +118,12 @@ def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: U :return: oFile name :rtype: str """ + self._startTime = time.time() self.DF2DB.convert2TemporalDatabase(oFile,condition,thresholdValue) + process = psutil.Process(os.getpid()) + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + self._endTime = time.time() return self.DF2DB.getFileName() def convert2UtilityDatabase(self, oFile: str) -> str: @@ -111,10 +134,51 @@ def convert2UtilityDatabase(self, oFile: str) -> str: :return: outputFile name :rtype: str """ + self._startTime = time.time() self.DF2DB.convert2UtilityDatabase(oFile) + process = psutil.Process(os.getpid()) + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + self._endTime = time.time() return self.DF2DB.getFileName() + def getMemoryUSS(self) -> float: + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self) -> float: + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self) -> float: + """ + Calculating the total amount of runtime taken by the mining process + + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + + return self._endTime - self._startTime + + if __name__ == '__main__': obj = DF2DB(sys.argv[1]) - obj.getTransactionalDatabase(sys.argv[2],sys.argv[3],sys.argv[4]) \ No newline at end of file + obj.getTransactionalDatabase(sys.argv[2],sys.argv[3],sys.argv[4]) + print("Conversion is complete.") + print("Total Memory in USS:", obj.getMemoryUSS()) + print("Total Memory in RSS", obj.getMemoryRSS()) + print("Total ExecutionTime in ms:", obj.getRuntime()) diff --git a/PAMI/extras/convert/_DF2DB.py b/PAMI/extras/convert/_DF2DB.py new file mode 100644 index 000000000..e0f3a5d79 --- /dev/null +++ b/PAMI/extras/convert/_DF2DB.py @@ -0,0 +1,120 @@ +# DF2DB in this code dataframe is converting databases into sparse or dense transactional, temporal, Utility. +# +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# from PAMI.extras.DF2DB import DF2DB as db +# +# obj = db.DF2DB(idf, "sparse/dense") +# +# obj.convert2Transactional("outputFileName", ">=", 16) # To create transactional database +# +# obj.convert2Temporal("outputFileName", ">=", 16) # To create temporal database +# +# obj.convert2Utility("outputFileName") # To create utility database +# + + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +import PAMI.extras.convert.denseDF2DB as dense +import PAMI.extras.convert.sparseDF2DB as sparse +import sys +from typing import Union + +class DF2DB: + """ + :Description: This class will create database for given DataFrame based on Threshold values and conditions are defined in the class. + Converts Dataframe into sparse or dense dataframes. + + :Attributes: + + :param inputDF: DataFrame : + It is sparse or dense DataFrame + :param thresholdValue: int or float : + It is threshold value of all item + :param condition: str : + It is condition of all item + :param DFtype: str : + It is DataFrame type. It should be sparse or dense. Default DF is sparse. + + + **Importing this algorithm into a python program** + -------------------------------------------------------- + .. code-block:: python + + from PAMI.extras.DF2DB import DF2DB as db + + obj = db.DF2DB(idf, "sparse/dense") + + obj.convert2Transactional("outputFileName",condition,threshold) # To create transactional database + + obj.convert2Temporal("outputFileName",condition,threshold) # To create temporal database + + obj.convert2Utility("outputFileName",condition,threshold) # To create utility database + """, + + + def __init__(self, inputDF, DFtype='dense') -> None: + self.inputDF = inputDF + self.DFtype = DFtype.lower() + if DFtype == 'sparse': + self.DF2DB = sparse.sparseDF2DB(self.inputDF) + elif DFtype == 'dense': + self.DF2DB = dense.denseDF2DB(self.inputDF) + else: + raise Exception('DF type should be sparse or dense') + def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str: + """ + create transactional database and return oFileName + :param oFile: file name or path to store database + :type oFile: str + :return: oFile name + :rtype: str + """ + self.DF2DB.convert2TransactionalDatabase(oFile,condition,thresholdValue) + return self.DF2DB.getFileName() + + def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str: + """ + create temporal database and return oFile name + :param oFile: file name or path to store database + :type oFile: str + :return: oFile name + :rtype: str + """ + self.DF2DB.convert2TemporalDatabase(oFile,condition,thresholdValue) + return self.DF2DB.getFileName() + + def convert2UtilityDatabase(self, oFile: str) -> str: + """ + create utility database and return oFile name + :param oFile: file name or path to store database + :type oFile: str + :return: outputFile name + :rtype: str + """ + self.DF2DB.convert2UtilityDatabase(oFile) + return self.DF2DB.getFileName() + + +if __name__ == '__main__': + obj = DF2DB(sys.argv[1]) + obj.getTransactionalDatabase(sys.argv[2],sys.argv[3],sys.argv[4]) \ No newline at end of file diff --git a/PAMI/extras/convert/_denseDF2DB.py b/PAMI/extras/convert/_denseDF2DB.py new file mode 100644 index 000000000..18d301f6d --- /dev/null +++ b/PAMI/extras/convert/_denseDF2DB.py @@ -0,0 +1,291 @@ +# DenseFormatDF in this code the dense dataframe is converting databases into different transactional, temporal, utility types. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# from PAMI.extras.convert import denseDF2DB as db +# +# obj = db.denseDF2DB(idf) +# +# obj.save(oFile) +# +# obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database +# +# obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database +# +# obj.convert2MultipleTimeSeries("outputFileName", ">=", 16) # To create Mutliple TimeSeries database +# +# obj.convert2UtilityDatabase("outputFileName", ">=", 16) # To create utility database +# +# obj.getFileName() # To get file name of the database +# + + + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +import operator +from typing import Union + +condition_operator = { + '<': operator.lt, + '>': operator.gt, + '<=': operator.le, + '>=': operator.ge, + '==': operator.eq, + '!=': operator.ne +} + + +class denseDF2DB: + """ + :Description: This class create Data Base from DataFrame. + + :Attributes: + + :param inputDF: dataframe : + It is dense DataFrame + :param condition: str : + It is condition to judge the value in dataframe + :param thresholdValue: int or float : + User defined value. + + + **Importing this algorithm into a python program** + -------------------------------------------------------- + .. code-block:: python + + from PAMI.extras.convert import denseDF2DB as db + + obj = db.denseDF2DB(iDdf ) + + obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database + + obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database + + obj.convert2MultipleTimeSeries("outputFileName", ">=", 16) # To create Multiple TimeSeries database + + obj.convert2UtilityDatabase("outputFileName") # To create utility database + + obj.getFileName("outputFileName") # To get file name of the database + """ + + def __init__(self, inputDF) -> None: + self.inputDF = inputDF + self.tids = [] + self.items = [] + self.outputFile = ' ' + self.items = list(self.inputDF.columns.values) + self.tids = list(self.inputDF.index) + + def convert2TransactionalDatabase(self, outputFile: str, condition: str, thresholdValue: Union[int, float]) -> None: + """ + :Description: Create transactional data base + + :Attributes: + + :param outputFile: Write transactional database into outputFile + + :type outputFile: str + + :param condition: It is condition to judge the value in dataframe + + :type condition: str + + :param thresholdValue: User defined value. + + :type thresholdValue: Union[int, float] + """ + + + self.outputFile = outputFile + with open(outputFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{transaction[0]}') + for item in transaction[1:]: + f.write(f'\t{item}') + elif len(transaction) == 1: + f.write(f'{transaction[0]}') + else: + continue + f.write('\n') + + def convert2TemporalDatabase(self, outputFile: str, condition: str, thresholdValue: Union[int, float]) -> None: + """ + :Description: Create temporal database + + :param outputFile: Write temporal database into outputFile + + :type outputFile: str + + :param condition: It is condition to judge the value in dataframe + + :type condition: str + + :param thresholdValue: User defined value. + + :type thresholdValue: Union + """ + + self.outputFile = outputFile + with open(outputFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{tid + 1}') + for item in transaction: + f.write(f'\t{item}') + elif len(transaction) == 1: + f.write(f'{tid + 1}') + f.write(f'\t{transaction[0]}') + else: + continue + f.write('\n') + + def convert2MultipleTimeSeries(self, outputFile: str, condition: str, + thresholdValue: Union[int, float], interval: int) -> None: + """ + :Description: Create the multiple time series database. + + :param outputFile: Write multiple time series database into outputFile. + + :type outputFile: str + + :param interval: Breaks the given timeseries into intervals. + + :type interval: int + + :param condition: It is condition to judge the value in dataframe + + :param thresholdValue: User defined value. + + :type thresholdValue: int or float + """ + self.outputFile = outputFile + writer = open(self.outputFile, 'w+') + # with open(self.outputFile, 'w+') as f: + count = 0 + tids = [] + items = [] + values = [] + for tid in self.tids: + count += 1 + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + for i in transaction: + tids.append(count) + items.append(i) + values.append(self.inputDF.at[tid, i]) + if count == interval: + s1, s, ss = str(), str(), str() + if len(values) > 0: + + for j in range(len(tids)): + s1 = s1 + str(tids[j]) + '\t' + for j in range(len(items)): + s = s + items[j] + '\t' + for j in range(len(values)): + ss = ss + str(values[j]) + '\t' + + s2 = s1 + ':' + s + ':' + ss + writer.write("%s\n" % s2) + tids, items, values = [], [], [] + count = 0 + + def convert2UncertainTransactional(self, outputFile: str, condition: str, + thresholdValue: Union[int, float]) -> None: + self.outputFile = outputFile + with open(outputFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + uncertain = [self.inputDF.at[tid, item] for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{transaction[0]}') + for item in transaction[1:]: + f.write(f'\t{item}') + f.write(f':') + for value in uncertain: + tt = 0.1 + 0.036 * abs(25 - value) + tt = round(tt, 2) + f.write(f'\t{tt}') + elif len(transaction) == 1: + f.write(f'{transaction[0]}') + tt = 0.1 + 0.036 * abs(25 - uncertain[0]) + tt = round(tt, 2) + f.write(f':{tt}') + else: + continue + f.write('\n') + + def convert2UtilityDatabase(self, outputFile: str) -> None: + """ + :Description: Create the utility database. + + :param outputFile: Write utility database into outputFile + + :type outputFile: str + + :return: None + """ + + self.outputFile = outputFile + with open(self.outputFile, 'w') as f: + for tid in self.tids: + df = self.inputDF.loc[tid].dropna() + f.write(f'{df.index[0]}') + for item in df.index[1:]: + f.write(f'\t{item}') + f.write(f':{df.sum()}:') + f.write(f'{df.at[df.index[0]]}') + + for item in df.index[1:]: + f.write(f'\t{df.at[item]}') + f.write('\n') + + def getFileName(self) -> str: + """ + :return: outputFile name + :rtype: str + """ + + return self.outputFile + +# Dataframes do not run from a terminal + +# if __name__ == '__main__': +# obj = denseDF2DB(sys.argv[1]) +# obj.convert2TransactionalDatabase( sys.argv[2], sys.argv[3]sys.argv[4]) +# transactionalDB = obj.getFileName() +# print(transactionalDB) \ No newline at end of file diff --git a/PAMI/extras/convert/_sparseDF2DB.py b/PAMI/extras/convert/_sparseDF2DB.py new file mode 100644 index 000000000..975891189 --- /dev/null +++ b/PAMI/extras/convert/_sparseDF2DB.py @@ -0,0 +1,182 @@ +# SparseFormatDF in this code the dense dataframe is converting databases into different transactional, temporal, utility types. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# from PAMI.extras.convert import sparseDF2DB as db +# +# obj = db.sparseDF2DB(idf) +# +# obj.save(oFile) +# +# obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database +# +# obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database +# +# obj.convert2UtilityDatabase("outputFileName") # To create utility database +# +# obj.getFileName() # To get file name of the database +# + + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +import pandas as pd +import sys +from typing import Union +class sparseDF2DB: + """ + :Description: This class create Data Base from DataFrame. + + :Attributes: + + :param inputDF: dataframe : + It is dense DataFrame + :param condition: str : + It is condition to judge the value in dataframe + :param thresholdValue: int or float : + User defined value. + + **Importing this algorithm into a python program** + -------------------------------------------------------- + .. code-block:: python + + from PAMI.extras.DF2DB import SparseFormatDF as db + + obj = db.SparseFormatDF(iDdf) + + obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database + + obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database + + obj.convert2UtilityDatabase("outputFileName", ">=", 16) # To create utility database + + obj.getFileName("outputFileName", ">=", 16) # To get file name of the database + """ + + + def __init__(self, inputDF) -> None: + self.inputDF = inputDF + self.condition = "" + self.thresholdValue = 0 + self.outputFile = '' + + def setParametors(self,outputFile: str, condition: str, thresholdValue: Union[int, float]): + self.condition = condition + self.thresholdValue = thresholdValue + self.outputFile = outputFile + if self.condition == '>': + self.df = self.inputDF.query(f'value > {self.thresholdValue}') + elif self.condition == '>=': + self.df = self.inputDF.query(f'value >= {self.thresholdValue}') + elif self.condition == '<=': + self.df = self.inputDF.query(f'value <= {self.thresholdValue}') + elif self.condition == '<': + self.df = self.inputDF.query(f'value < {self.thresholdValue}') + else: + print('Condition error') + self.df = self.df.drop(columns='value') + self.df = self.df.groupby('tid')['item'].apply(list) + + def convert2TransactionalDatabase(self, outputFile: str, condition: str, thresholdValue: Union[int, float]) -> None: + """ + Create transactional data base + :param outputFile: str: + Write transactional data base into outputFile + + :param inputDF: dataframe : + It is dense DataFrame + :param condition: str : + It is condition to judge the value in dataframe + :param thresholdValue: int or float : + User defined value. + :return: None + """ + self.setParametors(outputFile, condition, thresholdValue) + with open(self.outputFile, 'w') as f: + for line in self.df: + f.write(f'{line[0]}') + for item in line[1:]: + f.write(f',{item}') + f.write('\n') + + def convert2TemporalDatabase(self, outputFile: str, condition: str, thresholdValue: float) -> None: + """ + Create temporal data base + :param outputFile: str: + Write transactional data base into outputFile + + :param inputDF: dataframe : + It is dense DataFrame + :param condition: str : + It is condition to judge the value in dataframe + :param thresholdValue: int or float : + User defined value. + :return: None + """ + self.setParametors(outputFile, condition, thresholdValue) + + with open(self.outputFile, 'w') as f: + for tid in self.df.index: + f.write(f'{tid}') + for item in self.df[tid]: + f.write(f',{item}') + f.write('\n') + + def convert2UtilityDatabase(self, outputFile: str) -> None: + """ + Create the utility database. + :param outputFile: str: + Write transactional data base into outputFile + + :param inputDF: dataframe : + It is dense DataFrame + :param condition: str : + It is condition to judge the value in dataframe + :param thresholdValue: int or float : + User defined value.r + :return: None + """ + + self.outputFile = outputFile + items = self.inputDF.groupby(level=0)['item'].apply(list) + values = self.inputDF.groupby(level=0)['value'].apply(list) + sums = self.inputDF.groupby(level=0)['value'].sum() + index = list(items.index) + with open(self.outputFile, 'w') as f: + for tid in index: + f.write(f'{items[tid][0]}') + for item in items[tid][1:]: + f.write(f'\t{item}') + f.write(f':{sums[tid]}:') + f.write(f'{values[tid][0]}') + for value in values[tid][1:]: + f.write(f'\t{value}') + f.write('\n') + + def getFileName(self) -> str: + + return self.outputFile + +if __name__ == '__main__': + + obj = sparseDF2DB(sys.argv[1]) + obj.createTemporal(sys.argv[2],sys.argv[3],sys.argv[4]) + obj.getFileName() + diff --git a/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py index 8e5413f52..0019a9c33 100644 --- a/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py +++ b/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py @@ -15,7 +15,7 @@ import numpy as np import pandas as pd -import sys +import sys,psutil,os,time __copyright__ = """ @@ -46,6 +46,14 @@ class TransactionalDatabase: Average number of items per line numItems: int Total number of items + memoryUSS : float + To store the total amount of USS memory consumed by the program + memoryRSS : float + To store the total amount of RSS memory consumed by the program + startTime : float + To record the start time of the mining process + endTime : float + To record the completion time of the mining process :Methods: @@ -55,7 +63,12 @@ class TransactionalDatabase: Save the transactional database to a user-specified file getTransactions: Get the transactional database - + getMemoryUSS() + Total amount of USS memory consumed by the mining process will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the mining process will be retrieved from this function + getRuntime() + Total amount of runtime taken by the mining process will be retrieved from this function **Methods to execute code on terminal** --------------------------------------------- @@ -106,7 +119,10 @@ def __init__(self, databaseSize, avgItemsPerTransaction, numItems,sep = "\t") -> self.numItems = numItems self.sep = sep self.db = [] - + self._startTime = float() + self._endTime = float() + self._memoryUSS = float() + self._memoryRSS = float() def _generateArray(self, nums, avg, maxItems) -> list: """ Generate a random array of length n whose values average to m @@ -154,7 +170,7 @@ def create(self) -> None: Generate the transactional database with the given input parameters. Returns: None """ - + self._startTime = time.time() values = self._generateArray(self.databaseSize, self.avgItemsPerTransaction, self.numItems) self.db = [] @@ -184,18 +200,55 @@ def getTransactions(self, sep = "\t") -> pd.DataFrame: db = pd.DataFrame(columns=[column]) db[column] = [sep.join(map(str, line)) for line in self.db] return db - + def getMemoryUSS(self) -> float: + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + process = psutil.Process(os.getpid()) + self._memoryUSS = process.memory_full_info().uss + return self._memoryUSS + + def getMemoryRSS(self) -> float: + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + process = psutil.Process(os.getpid()) + self._memoryRSS = process.memory_info().rss + return self._memoryRSS + + def getRuntime(self) -> float: + """ + Calculating the total amount of runtime taken by the mining process + + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + self._endTime = time.time() + return self._endTime - self._startTime if __name__ == "__main__": if len(sys.argv) == 5: obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3])) obj.create() obj.save(sys.argv[4]) + print("Total Memory in USS:", obj.getMemoryUSS()) + print("Total Memory in RSS", obj.getMemoryRSS()) + print("Total ExecutionTime in ms:", obj.getRuntime()) if len(sys.argv) == 6: obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]), sys.argv[4]) obj.create() obj.save(sys.argv[5]) + print("Total Memory in USS:", obj.getMemoryUSS()) + print("Total Memory in RSS", obj.getMemoryRSS()) + print("Total ExecutionTime in ms:", obj.getRuntime()) else: raise ValueError("Invalid number of arguments. Args: or Args: ") \ No newline at end of file diff --git a/PAMI/extras/syntheticDataGenerator/_TransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/_TransactionalDatabase.py new file mode 100644 index 000000000..8e5413f52 --- /dev/null +++ b/PAMI/extras/syntheticDataGenerator/_TransactionalDatabase.py @@ -0,0 +1,201 @@ +# TransactionalDatabase is a collection of transactions. It only considers the data in transactions and ignores the metadata. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# from PAMI.extras.syntheticDataGenerator import TransactionalDatabase as db +# +# obj = db(10, 5, 10) +# +# obj.create() +# +# obj.save('db.txt') +# +# print(obj.getTransactions()) +# + +import numpy as np +import pandas as pd +import sys + + +__copyright__ = """ + Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +class TransactionalDatabase: + """ + :Description: TransactionalDatabase is a collection of transactions. It only considers the data in transactions and ignores the metadata. + :Attributes: + + numLines: int + Number of lines + avgItemsPerLine: int + Average number of items per line + numItems: int + Total number of items + + :Methods: + + create: + Generate the transactional database + save: + Save the transactional database to a user-specified file + getTransactions: + Get the transactional database + + + **Methods to execute code on terminal** + --------------------------------------------- + + .. code-block:: console + + Format: + + (.venv) $ python3 TransactionalDatabase.py + + Example Usage: + + (.venv) $ python3 TransactionalDatabase.py 50.0 10.0 100 + + + + **Importing this algorithm into a python program** + -------------------------------------------------------- + from PAMI.extras.syntheticDataGenerator import TransactionalDatabase as db + + obj = db.TransactionalDatabase(10, 5, 10) + + obj.create() + + obj.save('db.txt') + + print(obj.getTransactions()) + + + """ + + def __init__(self, databaseSize, avgItemsPerTransaction, numItems,sep = "\t") -> None: + """ + Initialize the transactional database with the given parameters + + :param databaseSize: total number of transactions in the database + :type databaseSize: int + :param avgItemsPerTransaction: average number of items per transaction + :type avgItemsPerTransaction: int + :param numItems: total number of items + :type numItems: int + :param sep: separator to distinguish the items in a transaction + :type sep: str + """ + + self.databaseSize = databaseSize + self.avgItemsPerTransaction = avgItemsPerTransaction + self.numItems = numItems + self.sep = sep + self.db = [] + + def _generateArray(self, nums, avg, maxItems) -> list: + """ + Generate a random array of length n whose values average to m + + :param nums: number of values + :type nums: int + :param avg: average value + :type avg: int + :param maxItems: maximum value + :type maxItems: int + + Returns: + values: list - random array + """ + + # generate n random values + values = np.random.randint(1, avg * 2, nums) + sums = np.sum(values) + weights = values / sums + + # Calculate sumRes + sumRes = nums * avg + + # Adjust values based on weights and sumRes + new_values = np.round(sumRes * weights).astype(int) + + # if all transactions have 0 items, add 1 item to each transaction + for loc in np.where(new_values < 1)[0]: + new_values[loc] += 1 + + difference = sumRes - np.sum(new_values) + if difference > 0: + for i in range(difference): + index = np.random.randint(0, len(new_values)) + new_values[index] += 1 + else: + for i in range(abs(difference)): + index = np.random.randint(0, len(new_values)) + new_values[index] -= 1 + + return values + + def create(self) -> None: + """ + Generate the transactional database with the given input parameters. + Returns: None + """ + + values = self._generateArray(self.databaseSize, self.avgItemsPerTransaction, self.numItems) + + self.db = [] + for i in range(self.databaseSize): + self.db.append(np.random.choice(range(1, self.numItems + 1), values[i], replace=False)) + + def save(self, filename) -> None: + """ + Save the transactional database to a file + + :param filename: name of the file + :type filename: str + """ + + with open(filename, 'w') as f: + for line in self.db: + f.write(str(self.sep).join(map(str, line)) + '\n') + + def getTransactions(self, sep = "\t") -> pd.DataFrame: + """ + Get the transactional database in dataFrame format + + Returns: + db: pd.dataFrame - transactional database + """ + column = "Transactions" + db = pd.DataFrame(columns=[column]) + db[column] = [sep.join(map(str, line)) for line in self.db] + return db + + +if __name__ == "__main__": + + if len(sys.argv) == 5: + obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3])) + obj.create() + obj.save(sys.argv[4]) + if len(sys.argv) == 6: + obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]), sys.argv[4]) + obj.create() + obj.save(sys.argv[5]) + else: + raise ValueError("Invalid number of arguments. Args: or Args: ") + \ No newline at end of file diff --git a/PAMI/extras/syntheticDataGenerator/_syntheticUtilityDatabase.py b/PAMI/extras/syntheticDataGenerator/_syntheticUtilityDatabase.py new file mode 100644 index 000000000..a91270900 --- /dev/null +++ b/PAMI/extras/syntheticDataGenerator/_syntheticUtilityDatabase.py @@ -0,0 +1,107 @@ +import random as _rd + + +class syntheticUtilityDatabase: + """ + This class creates a synthetic utility database. + + Attributes: + totalTransactions (int): Number of transactions. + numOfItems (int): Number of items. + maxUtilRange (int): Maximum utility range. + avgTransactionLength (int): The length of average transaction. + + Methods: + __init__(totalTransactions, numOfItems, maxUtilRange, avgTransactionLength) + Constructor to initialize the database parameters. + createSyntheticUtilityDatabase(outputFile) + Create utility database and store it in the specified output file. + createRandomNumbers(n, targetSum) + Generate a list of random numbers with a specified target sum. + save(outputFile) + Save the generated utility database to a CSV file. + + Credits: + The complete program was written by A.Hemanth sree sai under the supervision of Professor Rage Uday Kiran. + """ + + def __init__(self, totalTransactions: int, numOfItems: int, maxUtilRange: int, avgTransactionLength: int) -> None: + """ + Constructor to initialize the database parameters. + + Parameters: + totalTransactions (int): Number of transactions. + numOfItems (int): Number of items. + maxUtilRange (int): Maximum utility range. + avgTransactionLength (int): The length of average transaction. + """ + self.totalTransactions = totalTransactions + self.numOfItems = numOfItems + self.maxUtilRange = maxUtilRange + self.avgTransactionLength = avgTransactionLength + self.transactions = [] + + def createSyntheticUtilityDatabase(self, outputFile: str) -> None: + """ + Create utility database and store it in the specified output file. + + Parameters: + outputFile (str): File name or path to store the database. + """ + if self.avgTransactionLength > self.numOfItems: + print("Error: avgTransactionLength cannot exceed numOfItems.") + return + + with open(outputFile, 'w') as writer: + for _ in range(self.totalTransactions): + length = _rd.randint(1, self.avgTransactionLength + 20) + items = [_rd.randint(1, self.numOfItems) for _ in range(length)] + utilities = [_rd.randint(1, self.maxUtilRange) for _ in range(length)] + + # Generating 13 random numbers with a target sum of 2000 + randomNumbers = self.createRandomNumbers(13, 2000) + + # Checking if avgTransactionLength exceeds numOfItems + if self.avgTransactionLength > self.numOfItems: + print("Error: avgTransactionLength cannot exceed numOfItems.") + return + + st = '\t'.join(map(str, items)) + '\t:' + str(sum(utilities)) + ':' + st1 = '\t'.join(map(str, randomNumbers)) + '\t' + + writer.write(f"{st}{st1}\n") + + def createRandomNumbers(self, n: int, targetSum: int) -> list[float]: + """ + Generate a list of random numbers with a specified target sum. + + Parameters: + n (int): Number of random numbers to generate. + targetSum (int): Target sum for the generated random numbers. + + Returns: + list: List of generated random numbers normalized and multiplied by the target sum. + """ + randNumbers = [_rd.uniform(0, 1) for _ in range(n)] + randSum = sum(randNumbers) + normalizedNumbers = [num / randSum for num in randNumbers] + result = [round(num * targetSum) for num in normalizedNumbers] + return result + + def save(self, outputFile: str) -> None: + """ + Save the generated utility database to a CSV file. + + Parameters: + outputFile (str): File name or path to store the CSV file. + """ + with open(outputFile, 'w') as f: + for transaction in self.transactions: + f.write('\t'.join(map(str, transaction)) + '\n') + + +if __name__ == "__main__": + ap = syntheticUtilityDatabase(100000, 870, 100, 10) + ap.createSyntheticUtilityDatabase("T10_util-12.csv") +else: + print("Error! The number of input parameters does not match the total number of parameters provided") diff --git a/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py b/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py index a91270900..5d1d61986 100644 --- a/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py +++ b/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py @@ -1,15 +1,27 @@ import random as _rd - +import psutil,os,time class syntheticUtilityDatabase: """ This class creates a synthetic utility database. Attributes: - totalTransactions (int): Number of transactions. - numOfItems (int): Number of items. - maxUtilRange (int): Maximum utility range. - avgTransactionLength (int): The length of average transaction. + totalTransactions :int + Number of transactions. + numOfItems : int + Number of items. + maxUtilRange : int + Maximum utility range. + avgTransactionLength : int + The length of average transaction. + memoryUSS : float + To store the total amount of USS memory consumed by the program + memoryRSS : float + To store the total amount of RSS memory consumed by the program + startTime : float + To record the start time of the mining process + endTime : float + To record the completion time of the mining process Methods: __init__(totalTransactions, numOfItems, maxUtilRange, avgTransactionLength) @@ -20,7 +32,12 @@ class syntheticUtilityDatabase: Generate a list of random numbers with a specified target sum. save(outputFile) Save the generated utility database to a CSV file. - + getMemoryUSS() + Total amount of USS memory consumed by the mining process will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the mining process will be retrieved from this function + getRuntime() + Total amount of runtime taken by the mining process will be retrieved from this function Credits: The complete program was written by A.Hemanth sree sai under the supervision of Professor Rage Uday Kiran. """ @@ -40,6 +57,10 @@ def __init__(self, totalTransactions: int, numOfItems: int, maxUtilRange: int, a self.maxUtilRange = maxUtilRange self.avgTransactionLength = avgTransactionLength self.transactions = [] + self._startTime = float() + self._endTime = float() + self._memoryUSS = float() + self._memoryRSS = float() def createSyntheticUtilityDatabase(self, outputFile: str) -> None: """ @@ -48,6 +69,7 @@ def createSyntheticUtilityDatabase(self, outputFile: str) -> None: Parameters: outputFile (str): File name or path to store the database. """ + self._startTime = time.time() if self.avgTransactionLength > self.numOfItems: print("Error: avgTransactionLength cannot exceed numOfItems.") return @@ -70,6 +92,10 @@ def createSyntheticUtilityDatabase(self, outputFile: str) -> None: st1 = '\t'.join(map(str, randomNumbers)) + '\t' writer.write(f"{st}{st1}\n") + process = psutil.Process(os.getpid()) + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + self._endTime = time.time() def createRandomNumbers(self, n: int, targetSum: int) -> list[float]: """ @@ -99,9 +125,43 @@ def save(self, outputFile: str) -> None: for transaction in self.transactions: f.write('\t'.join(map(str, transaction)) + '\n') + def getMemoryUSS(self) -> float: + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self) -> float: + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self) -> float: + """ + Calculating the total amount of runtime taken by the mining process + + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + + return self._endTime - self._startTime if __name__ == "__main__": - ap = syntheticUtilityDatabase(100000, 870, 100, 10) - ap.createSyntheticUtilityDatabase("T10_util-12.csv") + obj = syntheticUtilityDatabase(100000, 870, 100, 10) + obj.createSyntheticUtilityDatabase("T10_util-12.csv") + print("create SyntheticUtilityDatabase is complete.") + print("Total Memory in USS:", obj.getMemoryUSS()) + print("Total Memory in RSS", obj.getMemoryRSS()) + print("Total ExecutionTime in ms:", obj.getRuntime()) else: print("Error! The number of input parameters does not match the total number of parameters provided")