diff --git a/PAMI/extras/convert/DF2DB.py b/PAMI/extras/convert/DF2DB.py
index e0f3a5d7..e3979ab7 100644
--- a/PAMI/extras/convert/DF2DB.py
+++ b/PAMI/extras/convert/DF2DB.py
@@ -36,7 +36,7 @@
"""
import PAMI.extras.convert.denseDF2DB as dense
import PAMI.extras.convert.sparseDF2DB as sparse
-import sys
+import sys,psutil,os,time
from typing import Union
class DF2DB:
@@ -54,6 +54,14 @@ class DF2DB:
It is condition of all item
:param DFtype: str :
It is DataFrame type. It should be sparse or dense. Default DF is sparse.
+ :param memoryUSS : float
+ To store the total amount of USS memory consumed by the program
+ :param memoryRSS : float
+ To store the total amount of RSS memory consumed by the program
+ :param startTime : float
+ To record the start time of the mining process
+ endTime : float
+ To record the completion time of the mining process
**Importing this algorithm into a python program**
@@ -81,6 +89,11 @@ def __init__(self, inputDF, DFtype='dense') -> None:
self.DF2DB = dense.denseDF2DB(self.inputDF)
else:
raise Exception('DF type should be sparse or dense')
+ self._startTime = float()
+ self._endTime = float()
+ self._memoryUSS = float()
+ self._memoryRSS = float()
+
def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str:
"""
create transactional database and return oFileName
@@ -89,7 +102,12 @@ def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdVal
:return: oFile name
:rtype: str
"""
+ self._startTime = time.time()
self.DF2DB.convert2TransactionalDatabase(oFile,condition,thresholdValue)
+ process = psutil.Process(os.getpid())
+ self._memoryUSS = process.memory_full_info().uss
+ self._memoryRSS = process.memory_info().rss
+ self._endTime = time.time()
return self.DF2DB.getFileName()
def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str:
@@ -100,7 +118,12 @@ def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: U
:return: oFile name
:rtype: str
"""
+ self._startTime = time.time()
self.DF2DB.convert2TemporalDatabase(oFile,condition,thresholdValue)
+ process = psutil.Process(os.getpid())
+ self._memoryUSS = process.memory_full_info().uss
+ self._memoryRSS = process.memory_info().rss
+ self._endTime = time.time()
return self.DF2DB.getFileName()
def convert2UtilityDatabase(self, oFile: str) -> str:
@@ -111,10 +134,51 @@ def convert2UtilityDatabase(self, oFile: str) -> str:
:return: outputFile name
:rtype: str
"""
+ self._startTime = time.time()
self.DF2DB.convert2UtilityDatabase(oFile)
+ process = psutil.Process(os.getpid())
+ self._memoryUSS = process.memory_full_info().uss
+ self._memoryRSS = process.memory_info().rss
+ self._endTime = time.time()
return self.DF2DB.getFileName()
+ def getMemoryUSS(self) -> float:
+ """
+ Total amount of USS memory consumed by the mining process will be retrieved from this function
+
+ :return: returning USS memory consumed by the mining process
+ :rtype: float
+ """
+
+ return self._memoryUSS
+
+ def getMemoryRSS(self) -> float:
+ """
+ Total amount of RSS memory consumed by the mining process will be retrieved from this function
+
+ :return: returning RSS memory consumed by the mining process
+ :rtype: float
+ """
+
+ return self._memoryRSS
+
+ def getRuntime(self) -> float:
+ """
+ Calculating the total amount of runtime taken by the mining process
+
+
+ :return: returning total amount of runtime taken by the mining process
+ :rtype: float
+ """
+
+ return self._endTime - self._startTime
+
+
if __name__ == '__main__':
obj = DF2DB(sys.argv[1])
- obj.getTransactionalDatabase(sys.argv[2],sys.argv[3],sys.argv[4])
\ No newline at end of file
+ obj.getTransactionalDatabase(sys.argv[2],sys.argv[3],sys.argv[4])
+ print("Conversion is complete.")
+ print("Total Memory in USS:", obj.getMemoryUSS())
+ print("Total Memory in RSS", obj.getMemoryRSS())
+ print("Total ExecutionTime in ms:", obj.getRuntime())
diff --git a/PAMI/extras/convert/_DF2DB.py b/PAMI/extras/convert/_DF2DB.py
new file mode 100644
index 00000000..e0f3a5d7
--- /dev/null
+++ b/PAMI/extras/convert/_DF2DB.py
@@ -0,0 +1,120 @@
+# DF2DB in this code dataframe is converting databases into sparse or dense transactional, temporal, Utility.
+#
+#
+# **Importing this algorithm into a python program**
+# --------------------------------------------------------
+#
+# from PAMI.extras.DF2DB import DF2DB as db
+#
+# obj = db.DF2DB(idf, "sparse/dense")
+#
+# obj.convert2Transactional("outputFileName", ">=", 16) # To create transactional database
+#
+# obj.convert2Temporal("outputFileName", ">=", 16) # To create temporal database
+#
+# obj.convert2Utility("outputFileName") # To create utility database
+#
+
+
+
+
+__copyright__ = """
+Copyright (C) 2021 Rage Uday Kiran
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+import PAMI.extras.convert.denseDF2DB as dense
+import PAMI.extras.convert.sparseDF2DB as sparse
+import sys
+from typing import Union
+
+class DF2DB:
+ """
+ :Description: This class will create database for given DataFrame based on Threshold values and conditions are defined in the class.
+ Converts Dataframe into sparse or dense dataframes.
+
+ :Attributes:
+
+ :param inputDF: DataFrame :
+ It is sparse or dense DataFrame
+ :param thresholdValue: int or float :
+ It is threshold value of all item
+ :param condition: str :
+ It is condition of all item
+ :param DFtype: str :
+ It is DataFrame type. It should be sparse or dense. Default DF is sparse.
+
+
+ **Importing this algorithm into a python program**
+ --------------------------------------------------------
+ .. code-block:: python
+
+ from PAMI.extras.DF2DB import DF2DB as db
+
+ obj = db.DF2DB(idf, "sparse/dense")
+
+ obj.convert2Transactional("outputFileName",condition,threshold) # To create transactional database
+
+ obj.convert2Temporal("outputFileName",condition,threshold) # To create temporal database
+
+ obj.convert2Utility("outputFileName",condition,threshold) # To create utility database
+ """,
+
+
+ def __init__(self, inputDF, DFtype='dense') -> None:
+ self.inputDF = inputDF
+ self.DFtype = DFtype.lower()
+ if DFtype == 'sparse':
+ self.DF2DB = sparse.sparseDF2DB(self.inputDF)
+ elif DFtype == 'dense':
+ self.DF2DB = dense.denseDF2DB(self.inputDF)
+ else:
+ raise Exception('DF type should be sparse or dense')
+ def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str:
+ """
+ create transactional database and return oFileName
+ :param oFile: file name or path to store database
+ :type oFile: str
+ :return: oFile name
+ :rtype: str
+ """
+ self.DF2DB.convert2TransactionalDatabase(oFile,condition,thresholdValue)
+ return self.DF2DB.getFileName()
+
+ def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str:
+ """
+ create temporal database and return oFile name
+ :param oFile: file name or path to store database
+ :type oFile: str
+ :return: oFile name
+ :rtype: str
+ """
+ self.DF2DB.convert2TemporalDatabase(oFile,condition,thresholdValue)
+ return self.DF2DB.getFileName()
+
+ def convert2UtilityDatabase(self, oFile: str) -> str:
+ """
+ create utility database and return oFile name
+ :param oFile: file name or path to store database
+ :type oFile: str
+ :return: outputFile name
+ :rtype: str
+ """
+ self.DF2DB.convert2UtilityDatabase(oFile)
+ return self.DF2DB.getFileName()
+
+
+if __name__ == '__main__':
+ obj = DF2DB(sys.argv[1])
+ obj.getTransactionalDatabase(sys.argv[2],sys.argv[3],sys.argv[4])
\ No newline at end of file
diff --git a/PAMI/extras/convert/_denseDF2DB.py b/PAMI/extras/convert/_denseDF2DB.py
new file mode 100644
index 00000000..18d301f6
--- /dev/null
+++ b/PAMI/extras/convert/_denseDF2DB.py
@@ -0,0 +1,291 @@
+# DenseFormatDF in this code the dense dataframe is converting databases into different transactional, temporal, utility types.
+#
+# **Importing this algorithm into a python program**
+# --------------------------------------------------------
+#
+# from PAMI.extras.convert import denseDF2DB as db
+#
+# obj = db.denseDF2DB(idf)
+#
+# obj.save(oFile)
+#
+# obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database
+#
+# obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database
+#
+# obj.convert2MultipleTimeSeries("outputFileName", ">=", 16) # To create Mutliple TimeSeries database
+#
+# obj.convert2UtilityDatabase("outputFileName", ">=", 16) # To create utility database
+#
+# obj.getFileName() # To get file name of the database
+#
+
+
+
+
+
+__copyright__ = """
+Copyright (C) 2021 Rage Uday Kiran
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+
+import operator
+from typing import Union
+
+condition_operator = {
+ '<': operator.lt,
+ '>': operator.gt,
+ '<=': operator.le,
+ '>=': operator.ge,
+ '==': operator.eq,
+ '!=': operator.ne
+}
+
+
+class denseDF2DB:
+ """
+ :Description: This class create Data Base from DataFrame.
+
+ :Attributes:
+
+ :param inputDF: dataframe :
+ It is dense DataFrame
+ :param condition: str :
+ It is condition to judge the value in dataframe
+ :param thresholdValue: int or float :
+ User defined value.
+
+
+ **Importing this algorithm into a python program**
+ --------------------------------------------------------
+ .. code-block:: python
+
+ from PAMI.extras.convert import denseDF2DB as db
+
+ obj = db.denseDF2DB(iDdf )
+
+ obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database
+
+ obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database
+
+ obj.convert2MultipleTimeSeries("outputFileName", ">=", 16) # To create Multiple TimeSeries database
+
+ obj.convert2UtilityDatabase("outputFileName") # To create utility database
+
+ obj.getFileName("outputFileName") # To get file name of the database
+ """
+
+ def __init__(self, inputDF) -> None:
+ self.inputDF = inputDF
+ self.tids = []
+ self.items = []
+ self.outputFile = ' '
+ self.items = list(self.inputDF.columns.values)
+ self.tids = list(self.inputDF.index)
+
+ def convert2TransactionalDatabase(self, outputFile: str, condition: str, thresholdValue: Union[int, float]) -> None:
+ """
+ :Description: Create transactional data base
+
+ :Attributes:
+
+ :param outputFile: Write transactional database into outputFile
+
+ :type outputFile: str
+
+ :param condition: It is condition to judge the value in dataframe
+
+ :type condition: str
+
+ :param thresholdValue: User defined value.
+
+ :type thresholdValue: Union[int, float]
+ """
+
+
+ self.outputFile = outputFile
+ with open(outputFile, 'w') as f:
+ if condition not in condition_operator:
+ print('Condition error')
+ else:
+ for tid in self.tids:
+ transaction = [item for item in self.items if
+ condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)]
+ if len(transaction) > 1:
+ f.write(f'{transaction[0]}')
+ for item in transaction[1:]:
+ f.write(f'\t{item}')
+ elif len(transaction) == 1:
+ f.write(f'{transaction[0]}')
+ else:
+ continue
+ f.write('\n')
+
+ def convert2TemporalDatabase(self, outputFile: str, condition: str, thresholdValue: Union[int, float]) -> None:
+ """
+ :Description: Create temporal database
+
+ :param outputFile: Write temporal database into outputFile
+
+ :type outputFile: str
+
+ :param condition: It is condition to judge the value in dataframe
+
+ :type condition: str
+
+ :param thresholdValue: User defined value.
+
+ :type thresholdValue: Union
+ """
+
+ self.outputFile = outputFile
+ with open(outputFile, 'w') as f:
+ if condition not in condition_operator:
+ print('Condition error')
+ else:
+ for tid in self.tids:
+ transaction = [item for item in self.items if
+ condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)]
+ if len(transaction) > 1:
+ f.write(f'{tid + 1}')
+ for item in transaction:
+ f.write(f'\t{item}')
+ elif len(transaction) == 1:
+ f.write(f'{tid + 1}')
+ f.write(f'\t{transaction[0]}')
+ else:
+ continue
+ f.write('\n')
+
+ def convert2MultipleTimeSeries(self, outputFile: str, condition: str,
+ thresholdValue: Union[int, float], interval: int) -> None:
+ """
+ :Description: Create the multiple time series database.
+
+ :param outputFile: Write multiple time series database into outputFile.
+
+ :type outputFile: str
+
+ :param interval: Breaks the given timeseries into intervals.
+
+ :type interval: int
+
+ :param condition: It is condition to judge the value in dataframe
+
+ :param thresholdValue: User defined value.
+
+ :type thresholdValue: int or float
+ """
+ self.outputFile = outputFile
+ writer = open(self.outputFile, 'w+')
+ # with open(self.outputFile, 'w+') as f:
+ count = 0
+ tids = []
+ items = []
+ values = []
+ for tid in self.tids:
+ count += 1
+ transaction = [item for item in self.items if
+ condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)]
+ for i in transaction:
+ tids.append(count)
+ items.append(i)
+ values.append(self.inputDF.at[tid, i])
+ if count == interval:
+ s1, s, ss = str(), str(), str()
+ if len(values) > 0:
+
+ for j in range(len(tids)):
+ s1 = s1 + str(tids[j]) + '\t'
+ for j in range(len(items)):
+ s = s + items[j] + '\t'
+ for j in range(len(values)):
+ ss = ss + str(values[j]) + '\t'
+
+ s2 = s1 + ':' + s + ':' + ss
+ writer.write("%s\n" % s2)
+ tids, items, values = [], [], []
+ count = 0
+
+ def convert2UncertainTransactional(self, outputFile: str, condition: str,
+ thresholdValue: Union[int, float]) -> None:
+ self.outputFile = outputFile
+ with open(outputFile, 'w') as f:
+ if condition not in condition_operator:
+ print('Condition error')
+ else:
+ for tid in self.tids:
+ transaction = [item for item in self.items if
+ condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)]
+ uncertain = [self.inputDF.at[tid, item] for item in self.items if
+ condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)]
+ if len(transaction) > 1:
+ f.write(f'{transaction[0]}')
+ for item in transaction[1:]:
+ f.write(f'\t{item}')
+ f.write(f':')
+ for value in uncertain:
+ tt = 0.1 + 0.036 * abs(25 - value)
+ tt = round(tt, 2)
+ f.write(f'\t{tt}')
+ elif len(transaction) == 1:
+ f.write(f'{transaction[0]}')
+ tt = 0.1 + 0.036 * abs(25 - uncertain[0])
+ tt = round(tt, 2)
+ f.write(f':{tt}')
+ else:
+ continue
+ f.write('\n')
+
+ def convert2UtilityDatabase(self, outputFile: str) -> None:
+ """
+ :Description: Create the utility database.
+
+ :param outputFile: Write utility database into outputFile
+
+ :type outputFile: str
+
+ :return: None
+ """
+
+ self.outputFile = outputFile
+ with open(self.outputFile, 'w') as f:
+ for tid in self.tids:
+ df = self.inputDF.loc[tid].dropna()
+ f.write(f'{df.index[0]}')
+ for item in df.index[1:]:
+ f.write(f'\t{item}')
+ f.write(f':{df.sum()}:')
+ f.write(f'{df.at[df.index[0]]}')
+
+ for item in df.index[1:]:
+ f.write(f'\t{df.at[item]}')
+ f.write('\n')
+
+ def getFileName(self) -> str:
+ """
+ :return: outputFile name
+ :rtype: str
+ """
+
+ return self.outputFile
+
+# Dataframes do not run from a terminal
+
+# if __name__ == '__main__':
+# obj = denseDF2DB(sys.argv[1])
+# obj.convert2TransactionalDatabase( sys.argv[2], sys.argv[3]sys.argv[4])
+# transactionalDB = obj.getFileName()
+# print(transactionalDB)
\ No newline at end of file
diff --git a/PAMI/extras/convert/_sparseDF2DB.py b/PAMI/extras/convert/_sparseDF2DB.py
new file mode 100644
index 00000000..97589118
--- /dev/null
+++ b/PAMI/extras/convert/_sparseDF2DB.py
@@ -0,0 +1,182 @@
+# SparseFormatDF in this code the dense dataframe is converting databases into different transactional, temporal, utility types.
+#
+# **Importing this algorithm into a python program**
+# --------------------------------------------------------
+#
+# from PAMI.extras.convert import sparseDF2DB as db
+#
+# obj = db.sparseDF2DB(idf)
+#
+# obj.save(oFile)
+#
+# obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database
+#
+# obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database
+#
+# obj.convert2UtilityDatabase("outputFileName") # To create utility database
+#
+# obj.getFileName() # To get file name of the database
+#
+
+
+
+
+__copyright__ = """
+Copyright (C) 2021 Rage Uday Kiran
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+import pandas as pd
+import sys
+from typing import Union
+class sparseDF2DB:
+ """
+ :Description: This class create Data Base from DataFrame.
+
+ :Attributes:
+
+ :param inputDF: dataframe :
+ It is dense DataFrame
+ :param condition: str :
+ It is condition to judge the value in dataframe
+ :param thresholdValue: int or float :
+ User defined value.
+
+ **Importing this algorithm into a python program**
+ --------------------------------------------------------
+ .. code-block:: python
+
+ from PAMI.extras.DF2DB import SparseFormatDF as db
+
+ obj = db.SparseFormatDF(iDdf)
+
+ obj.convert2TransactionalDatabase("outputFileName", ">=", 16) # To create transactional database
+
+ obj.convert2TemporalDatabase("outputFileName", ">=", 16) # To create temporal database
+
+ obj.convert2UtilityDatabase("outputFileName", ">=", 16) # To create utility database
+
+ obj.getFileName("outputFileName", ">=", 16) # To get file name of the database
+ """
+
+
+ def __init__(self, inputDF) -> None:
+ self.inputDF = inputDF
+ self.condition = ""
+ self.thresholdValue = 0
+ self.outputFile = ''
+
+ def setParametors(self,outputFile: str, condition: str, thresholdValue: Union[int, float]):
+ self.condition = condition
+ self.thresholdValue = thresholdValue
+ self.outputFile = outputFile
+ if self.condition == '>':
+ self.df = self.inputDF.query(f'value > {self.thresholdValue}')
+ elif self.condition == '>=':
+ self.df = self.inputDF.query(f'value >= {self.thresholdValue}')
+ elif self.condition == '<=':
+ self.df = self.inputDF.query(f'value <= {self.thresholdValue}')
+ elif self.condition == '<':
+ self.df = self.inputDF.query(f'value < {self.thresholdValue}')
+ else:
+ print('Condition error')
+ self.df = self.df.drop(columns='value')
+ self.df = self.df.groupby('tid')['item'].apply(list)
+
+ def convert2TransactionalDatabase(self, outputFile: str, condition: str, thresholdValue: Union[int, float]) -> None:
+ """
+ Create transactional data base
+ :param outputFile: str:
+ Write transactional data base into outputFile
+
+ :param inputDF: dataframe :
+ It is dense DataFrame
+ :param condition: str :
+ It is condition to judge the value in dataframe
+ :param thresholdValue: int or float :
+ User defined value.
+ :return: None
+ """
+ self.setParametors(outputFile, condition, thresholdValue)
+ with open(self.outputFile, 'w') as f:
+ for line in self.df:
+ f.write(f'{line[0]}')
+ for item in line[1:]:
+ f.write(f',{item}')
+ f.write('\n')
+
+ def convert2TemporalDatabase(self, outputFile: str, condition: str, thresholdValue: float) -> None:
+ """
+ Create temporal data base
+ :param outputFile: str:
+ Write transactional data base into outputFile
+
+ :param inputDF: dataframe :
+ It is dense DataFrame
+ :param condition: str :
+ It is condition to judge the value in dataframe
+ :param thresholdValue: int or float :
+ User defined value.
+ :return: None
+ """
+ self.setParametors(outputFile, condition, thresholdValue)
+
+ with open(self.outputFile, 'w') as f:
+ for tid in self.df.index:
+ f.write(f'{tid}')
+ for item in self.df[tid]:
+ f.write(f',{item}')
+ f.write('\n')
+
+ def convert2UtilityDatabase(self, outputFile: str) -> None:
+ """
+ Create the utility database.
+ :param outputFile: str:
+ Write transactional data base into outputFile
+
+ :param inputDF: dataframe :
+ It is dense DataFrame
+ :param condition: str :
+ It is condition to judge the value in dataframe
+ :param thresholdValue: int or float :
+ User defined value.r
+ :return: None
+ """
+
+ self.outputFile = outputFile
+ items = self.inputDF.groupby(level=0)['item'].apply(list)
+ values = self.inputDF.groupby(level=0)['value'].apply(list)
+ sums = self.inputDF.groupby(level=0)['value'].sum()
+ index = list(items.index)
+ with open(self.outputFile, 'w') as f:
+ for tid in index:
+ f.write(f'{items[tid][0]}')
+ for item in items[tid][1:]:
+ f.write(f'\t{item}')
+ f.write(f':{sums[tid]}:')
+ f.write(f'{values[tid][0]}')
+ for value in values[tid][1:]:
+ f.write(f'\t{value}')
+ f.write('\n')
+
+ def getFileName(self) -> str:
+
+ return self.outputFile
+
+if __name__ == '__main__':
+
+ obj = sparseDF2DB(sys.argv[1])
+ obj.createTemporal(sys.argv[2],sys.argv[3],sys.argv[4])
+ obj.getFileName()
+
diff --git a/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py
index 8e5413f5..0019a9c3 100644
--- a/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py
+++ b/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py
@@ -15,7 +15,7 @@
import numpy as np
import pandas as pd
-import sys
+import sys,psutil,os,time
__copyright__ = """
@@ -46,6 +46,14 @@ class TransactionalDatabase:
Average number of items per line
numItems: int
Total number of items
+ memoryUSS : float
+ To store the total amount of USS memory consumed by the program
+ memoryRSS : float
+ To store the total amount of RSS memory consumed by the program
+ startTime : float
+ To record the start time of the mining process
+ endTime : float
+ To record the completion time of the mining process
:Methods:
@@ -55,7 +63,12 @@ class TransactionalDatabase:
Save the transactional database to a user-specified file
getTransactions:
Get the transactional database
-
+ getMemoryUSS()
+ Total amount of USS memory consumed by the mining process will be retrieved from this function
+ getMemoryRSS()
+ Total amount of RSS memory consumed by the mining process will be retrieved from this function
+ getRuntime()
+ Total amount of runtime taken by the mining process will be retrieved from this function
**Methods to execute code on terminal**
---------------------------------------------
@@ -106,7 +119,10 @@ def __init__(self, databaseSize, avgItemsPerTransaction, numItems,sep = "\t") ->
self.numItems = numItems
self.sep = sep
self.db = []
-
+ self._startTime = float()
+ self._endTime = float()
+ self._memoryUSS = float()
+ self._memoryRSS = float()
def _generateArray(self, nums, avg, maxItems) -> list:
"""
Generate a random array of length n whose values average to m
@@ -154,7 +170,7 @@ def create(self) -> None:
Generate the transactional database with the given input parameters.
Returns: None
"""
-
+ self._startTime = time.time()
values = self._generateArray(self.databaseSize, self.avgItemsPerTransaction, self.numItems)
self.db = []
@@ -184,18 +200,55 @@ def getTransactions(self, sep = "\t") -> pd.DataFrame:
db = pd.DataFrame(columns=[column])
db[column] = [sep.join(map(str, line)) for line in self.db]
return db
-
+ def getMemoryUSS(self) -> float:
+ """
+ Total amount of USS memory consumed by the mining process will be retrieved from this function
+
+ :return: returning USS memory consumed by the mining process
+ :rtype: float
+ """
+ process = psutil.Process(os.getpid())
+ self._memoryUSS = process.memory_full_info().uss
+ return self._memoryUSS
+
+ def getMemoryRSS(self) -> float:
+ """
+ Total amount of RSS memory consumed by the mining process will be retrieved from this function
+
+ :return: returning RSS memory consumed by the mining process
+ :rtype: float
+ """
+ process = psutil.Process(os.getpid())
+ self._memoryRSS = process.memory_info().rss
+ return self._memoryRSS
+
+ def getRuntime(self) -> float:
+ """
+ Calculating the total amount of runtime taken by the mining process
+
+
+ :return: returning total amount of runtime taken by the mining process
+ :rtype: float
+ """
+ self._endTime = time.time()
+ return self._endTime - self._startTime
if __name__ == "__main__":
if len(sys.argv) == 5:
obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]))
obj.create()
obj.save(sys.argv[4])
+ print("Total Memory in USS:", obj.getMemoryUSS())
+ print("Total Memory in RSS", obj.getMemoryRSS())
+ print("Total ExecutionTime in ms:", obj.getRuntime())
if len(sys.argv) == 6:
obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]), sys.argv[4])
obj.create()
obj.save(sys.argv[5])
+ print("Total Memory in USS:", obj.getMemoryUSS())
+ print("Total Memory in RSS", obj.getMemoryRSS())
+ print("Total ExecutionTime in ms:", obj.getRuntime())
else:
raise ValueError("Invalid number of arguments. Args: or Args: ")
\ No newline at end of file
diff --git a/PAMI/extras/syntheticDataGenerator/_TransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/_TransactionalDatabase.py
new file mode 100644
index 00000000..8e5413f5
--- /dev/null
+++ b/PAMI/extras/syntheticDataGenerator/_TransactionalDatabase.py
@@ -0,0 +1,201 @@
+# TransactionalDatabase is a collection of transactions. It only considers the data in transactions and ignores the metadata.
+#
+# **Importing this algorithm into a python program**
+# --------------------------------------------------------
+# from PAMI.extras.syntheticDataGenerator import TransactionalDatabase as db
+#
+# obj = db(10, 5, 10)
+#
+# obj.create()
+#
+# obj.save('db.txt')
+#
+# print(obj.getTransactions())
+#
+
+import numpy as np
+import pandas as pd
+import sys
+
+
+__copyright__ = """
+ Copyright (C) 2021 Rage Uday Kiran
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+
+class TransactionalDatabase:
+ """
+ :Description: TransactionalDatabase is a collection of transactions. It only considers the data in transactions and ignores the metadata.
+ :Attributes:
+
+ numLines: int
+ Number of lines
+ avgItemsPerLine: int
+ Average number of items per line
+ numItems: int
+ Total number of items
+
+ :Methods:
+
+ create:
+ Generate the transactional database
+ save:
+ Save the transactional database to a user-specified file
+ getTransactions:
+ Get the transactional database
+
+
+ **Methods to execute code on terminal**
+ ---------------------------------------------
+
+ .. code-block:: console
+
+ Format:
+
+ (.venv) $ python3 TransactionalDatabase.py
+
+ Example Usage:
+
+ (.venv) $ python3 TransactionalDatabase.py 50.0 10.0 100
+
+
+
+ **Importing this algorithm into a python program**
+ --------------------------------------------------------
+ from PAMI.extras.syntheticDataGenerator import TransactionalDatabase as db
+
+ obj = db.TransactionalDatabase(10, 5, 10)
+
+ obj.create()
+
+ obj.save('db.txt')
+
+ print(obj.getTransactions())
+
+
+ """
+
+ def __init__(self, databaseSize, avgItemsPerTransaction, numItems,sep = "\t") -> None:
+ """
+ Initialize the transactional database with the given parameters
+
+ :param databaseSize: total number of transactions in the database
+ :type databaseSize: int
+ :param avgItemsPerTransaction: average number of items per transaction
+ :type avgItemsPerTransaction: int
+ :param numItems: total number of items
+ :type numItems: int
+ :param sep: separator to distinguish the items in a transaction
+ :type sep: str
+ """
+
+ self.databaseSize = databaseSize
+ self.avgItemsPerTransaction = avgItemsPerTransaction
+ self.numItems = numItems
+ self.sep = sep
+ self.db = []
+
+ def _generateArray(self, nums, avg, maxItems) -> list:
+ """
+ Generate a random array of length n whose values average to m
+
+ :param nums: number of values
+ :type nums: int
+ :param avg: average value
+ :type avg: int
+ :param maxItems: maximum value
+ :type maxItems: int
+
+ Returns:
+ values: list - random array
+ """
+
+ # generate n random values
+ values = np.random.randint(1, avg * 2, nums)
+ sums = np.sum(values)
+ weights = values / sums
+
+ # Calculate sumRes
+ sumRes = nums * avg
+
+ # Adjust values based on weights and sumRes
+ new_values = np.round(sumRes * weights).astype(int)
+
+ # if all transactions have 0 items, add 1 item to each transaction
+ for loc in np.where(new_values < 1)[0]:
+ new_values[loc] += 1
+
+ difference = sumRes - np.sum(new_values)
+ if difference > 0:
+ for i in range(difference):
+ index = np.random.randint(0, len(new_values))
+ new_values[index] += 1
+ else:
+ for i in range(abs(difference)):
+ index = np.random.randint(0, len(new_values))
+ new_values[index] -= 1
+
+ return values
+
+ def create(self) -> None:
+ """
+ Generate the transactional database with the given input parameters.
+ Returns: None
+ """
+
+ values = self._generateArray(self.databaseSize, self.avgItemsPerTransaction, self.numItems)
+
+ self.db = []
+ for i in range(self.databaseSize):
+ self.db.append(np.random.choice(range(1, self.numItems + 1), values[i], replace=False))
+
+ def save(self, filename) -> None:
+ """
+ Save the transactional database to a file
+
+ :param filename: name of the file
+ :type filename: str
+ """
+
+ with open(filename, 'w') as f:
+ for line in self.db:
+ f.write(str(self.sep).join(map(str, line)) + '\n')
+
+ def getTransactions(self, sep = "\t") -> pd.DataFrame:
+ """
+ Get the transactional database in dataFrame format
+
+ Returns:
+ db: pd.dataFrame - transactional database
+ """
+ column = "Transactions"
+ db = pd.DataFrame(columns=[column])
+ db[column] = [sep.join(map(str, line)) for line in self.db]
+ return db
+
+
+if __name__ == "__main__":
+
+ if len(sys.argv) == 5:
+ obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]))
+ obj.create()
+ obj.save(sys.argv[4])
+ if len(sys.argv) == 6:
+ obj = TransactionalDatabase(int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]), sys.argv[4])
+ obj.create()
+ obj.save(sys.argv[5])
+ else:
+ raise ValueError("Invalid number of arguments. Args: or Args: ")
+
\ No newline at end of file
diff --git a/PAMI/extras/syntheticDataGenerator/_syntheticUtilityDatabase.py b/PAMI/extras/syntheticDataGenerator/_syntheticUtilityDatabase.py
new file mode 100644
index 00000000..a9127090
--- /dev/null
+++ b/PAMI/extras/syntheticDataGenerator/_syntheticUtilityDatabase.py
@@ -0,0 +1,107 @@
+import random as _rd
+
+
+class syntheticUtilityDatabase:
+ """
+ This class creates a synthetic utility database.
+
+ Attributes:
+ totalTransactions (int): Number of transactions.
+ numOfItems (int): Number of items.
+ maxUtilRange (int): Maximum utility range.
+ avgTransactionLength (int): The length of average transaction.
+
+ Methods:
+ __init__(totalTransactions, numOfItems, maxUtilRange, avgTransactionLength)
+ Constructor to initialize the database parameters.
+ createSyntheticUtilityDatabase(outputFile)
+ Create utility database and store it in the specified output file.
+ createRandomNumbers(n, targetSum)
+ Generate a list of random numbers with a specified target sum.
+ save(outputFile)
+ Save the generated utility database to a CSV file.
+
+ Credits:
+ The complete program was written by A.Hemanth sree sai under the supervision of Professor Rage Uday Kiran.
+ """
+
+ def __init__(self, totalTransactions: int, numOfItems: int, maxUtilRange: int, avgTransactionLength: int) -> None:
+ """
+ Constructor to initialize the database parameters.
+
+ Parameters:
+ totalTransactions (int): Number of transactions.
+ numOfItems (int): Number of items.
+ maxUtilRange (int): Maximum utility range.
+ avgTransactionLength (int): The length of average transaction.
+ """
+ self.totalTransactions = totalTransactions
+ self.numOfItems = numOfItems
+ self.maxUtilRange = maxUtilRange
+ self.avgTransactionLength = avgTransactionLength
+ self.transactions = []
+
+ def createSyntheticUtilityDatabase(self, outputFile: str) -> None:
+ """
+ Create utility database and store it in the specified output file.
+
+ Parameters:
+ outputFile (str): File name or path to store the database.
+ """
+ if self.avgTransactionLength > self.numOfItems:
+ print("Error: avgTransactionLength cannot exceed numOfItems.")
+ return
+
+ with open(outputFile, 'w') as writer:
+ for _ in range(self.totalTransactions):
+ length = _rd.randint(1, self.avgTransactionLength + 20)
+ items = [_rd.randint(1, self.numOfItems) for _ in range(length)]
+ utilities = [_rd.randint(1, self.maxUtilRange) for _ in range(length)]
+
+ # Generating 13 random numbers with a target sum of 2000
+ randomNumbers = self.createRandomNumbers(13, 2000)
+
+ # Checking if avgTransactionLength exceeds numOfItems
+ if self.avgTransactionLength > self.numOfItems:
+ print("Error: avgTransactionLength cannot exceed numOfItems.")
+ return
+
+ st = '\t'.join(map(str, items)) + '\t:' + str(sum(utilities)) + ':'
+ st1 = '\t'.join(map(str, randomNumbers)) + '\t'
+
+ writer.write(f"{st}{st1}\n")
+
+ def createRandomNumbers(self, n: int, targetSum: int) -> list[float]:
+ """
+ Generate a list of random numbers with a specified target sum.
+
+ Parameters:
+ n (int): Number of random numbers to generate.
+ targetSum (int): Target sum for the generated random numbers.
+
+ Returns:
+ list: List of generated random numbers normalized and multiplied by the target sum.
+ """
+ randNumbers = [_rd.uniform(0, 1) for _ in range(n)]
+ randSum = sum(randNumbers)
+ normalizedNumbers = [num / randSum for num in randNumbers]
+ result = [round(num * targetSum) for num in normalizedNumbers]
+ return result
+
+ def save(self, outputFile: str) -> None:
+ """
+ Save the generated utility database to a CSV file.
+
+ Parameters:
+ outputFile (str): File name or path to store the CSV file.
+ """
+ with open(outputFile, 'w') as f:
+ for transaction in self.transactions:
+ f.write('\t'.join(map(str, transaction)) + '\n')
+
+
+if __name__ == "__main__":
+ ap = syntheticUtilityDatabase(100000, 870, 100, 10)
+ ap.createSyntheticUtilityDatabase("T10_util-12.csv")
+else:
+ print("Error! The number of input parameters does not match the total number of parameters provided")
diff --git a/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py b/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py
index a9127090..5d1d6198 100644
--- a/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py
+++ b/PAMI/extras/syntheticDataGenerator/syntheticUtilityDatabase.py
@@ -1,15 +1,27 @@
import random as _rd
-
+import psutil,os,time
class syntheticUtilityDatabase:
"""
This class creates a synthetic utility database.
Attributes:
- totalTransactions (int): Number of transactions.
- numOfItems (int): Number of items.
- maxUtilRange (int): Maximum utility range.
- avgTransactionLength (int): The length of average transaction.
+ totalTransactions :int
+ Number of transactions.
+ numOfItems : int
+ Number of items.
+ maxUtilRange : int
+ Maximum utility range.
+ avgTransactionLength : int
+ The length of average transaction.
+ memoryUSS : float
+ To store the total amount of USS memory consumed by the program
+ memoryRSS : float
+ To store the total amount of RSS memory consumed by the program
+ startTime : float
+ To record the start time of the mining process
+ endTime : float
+ To record the completion time of the mining process
Methods:
__init__(totalTransactions, numOfItems, maxUtilRange, avgTransactionLength)
@@ -20,7 +32,12 @@ class syntheticUtilityDatabase:
Generate a list of random numbers with a specified target sum.
save(outputFile)
Save the generated utility database to a CSV file.
-
+ getMemoryUSS()
+ Total amount of USS memory consumed by the mining process will be retrieved from this function
+ getMemoryRSS()
+ Total amount of RSS memory consumed by the mining process will be retrieved from this function
+ getRuntime()
+ Total amount of runtime taken by the mining process will be retrieved from this function
Credits:
The complete program was written by A.Hemanth sree sai under the supervision of Professor Rage Uday Kiran.
"""
@@ -40,6 +57,10 @@ def __init__(self, totalTransactions: int, numOfItems: int, maxUtilRange: int, a
self.maxUtilRange = maxUtilRange
self.avgTransactionLength = avgTransactionLength
self.transactions = []
+ self._startTime = float()
+ self._endTime = float()
+ self._memoryUSS = float()
+ self._memoryRSS = float()
def createSyntheticUtilityDatabase(self, outputFile: str) -> None:
"""
@@ -48,6 +69,7 @@ def createSyntheticUtilityDatabase(self, outputFile: str) -> None:
Parameters:
outputFile (str): File name or path to store the database.
"""
+ self._startTime = time.time()
if self.avgTransactionLength > self.numOfItems:
print("Error: avgTransactionLength cannot exceed numOfItems.")
return
@@ -70,6 +92,10 @@ def createSyntheticUtilityDatabase(self, outputFile: str) -> None:
st1 = '\t'.join(map(str, randomNumbers)) + '\t'
writer.write(f"{st}{st1}\n")
+ process = psutil.Process(os.getpid())
+ self._memoryUSS = process.memory_full_info().uss
+ self._memoryRSS = process.memory_info().rss
+ self._endTime = time.time()
def createRandomNumbers(self, n: int, targetSum: int) -> list[float]:
"""
@@ -99,9 +125,43 @@ def save(self, outputFile: str) -> None:
for transaction in self.transactions:
f.write('\t'.join(map(str, transaction)) + '\n')
+ def getMemoryUSS(self) -> float:
+ """
+ Total amount of USS memory consumed by the mining process will be retrieved from this function
+
+ :return: returning USS memory consumed by the mining process
+ :rtype: float
+ """
+
+ return self._memoryUSS
+
+ def getMemoryRSS(self) -> float:
+ """
+ Total amount of RSS memory consumed by the mining process will be retrieved from this function
+
+ :return: returning RSS memory consumed by the mining process
+ :rtype: float
+ """
+
+ return self._memoryRSS
+
+ def getRuntime(self) -> float:
+ """
+ Calculating the total amount of runtime taken by the mining process
+
+
+ :return: returning total amount of runtime taken by the mining process
+ :rtype: float
+ """
+
+ return self._endTime - self._startTime
if __name__ == "__main__":
- ap = syntheticUtilityDatabase(100000, 870, 100, 10)
- ap.createSyntheticUtilityDatabase("T10_util-12.csv")
+ obj = syntheticUtilityDatabase(100000, 870, 100, 10)
+ obj.createSyntheticUtilityDatabase("T10_util-12.csv")
+ print("create SyntheticUtilityDatabase is complete.")
+ print("Total Memory in USS:", obj.getMemoryUSS())
+ print("Total Memory in RSS", obj.getMemoryRSS())
+ print("Total ExecutionTime in ms:", obj.getRuntime())
else:
print("Error! The number of input parameters does not match the total number of parameters provided")