Skip to content

Commit

Permalink
Fixing and improving memory size accounting
Browse files Browse the repository at this point in the history
  • Loading branch information
smastelini committed Oct 29, 2018
1 parent b131b58 commit 1bb55df
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 120 deletions.
2 changes: 1 addition & 1 deletion src/skmultiflow/evaluation/base_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def _update_metrics(self):
elif metric == constants.MODEL_SIZE:
values = []
for i in range(self.n_models):
values.append(calculate_object_size(self.model[i]))
values.append(calculate_object_size(self.model[i], 'kB'))

else:
raise ValueError('Unknown metric {}'.format(metric))
Expand Down
17 changes: 9 additions & 8 deletions src/skmultiflow/meta/adaptive_random_forests.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from copy import deepcopy
import numpy as np
from sklearn.preprocessing import normalize

import math
from skmultiflow.core.base import StreamModel
from skmultiflow.core.base_object import BaseObject
from skmultiflow.drift_detection.base_drift_detector import BaseDriftDetector
from skmultiflow.trees.hoeffding_tree import *
from skmultiflow.drift_detection.adwin import ADWIN
from skmultiflow.trees.arf_hoeffding_tree import ARFHoeffdingTree
from skmultiflow.metrics.measure_collection import ClassificationMeasurements
from skmultiflow.utils.utils import get_dimensions, normalize_values_in_dict
from skmultiflow.utils import check_random_state, check_weights


Expand Down Expand Up @@ -153,7 +154,7 @@ def __init__(self,
nominal_attributes=None,
random_state=None):
"""AdaptiveRandomForest class constructor."""
super().__init__()
super().__init__()
self.n_estimators = n_estimators
self.max_features = max_features
self.disable_weighted_vote = disable_weighted_vote
Expand Down Expand Up @@ -273,7 +274,7 @@ def predict_proba(self, X):

return normalize(y_proba_mean, norm='l1')

def reset(self):
def reset(self):
"""Reset ARF."""
self.ensemble = None
self.max_features = 0
Expand Down Expand Up @@ -380,7 +381,7 @@ def _set_max_features(self, n):

@staticmethod
def is_randomizable():
return True
return True


class ARFBaseLearner(BaseObject):
Expand Down Expand Up @@ -415,7 +416,7 @@ def __init__(self,
warning_detection_method: BaseDriftDetector,
is_background_learner):
self.index_original = index_original
self.classifier = classifier
self.classifier = classifier
self.created_on = instances_seen
self.is_background_learner = is_background_learner
self.evaluator_method = ClassificationMeasurements
Expand All @@ -427,7 +428,7 @@ def __init__(self,
self.last_drift_on = 0
self.last_warning_on = 0
self.nb_drifts_detected = 0
self.nb_warnings_detected = 0
self.nb_warnings_detected = 0

self.drift_detection = None
self.warning_detection = None
Expand All @@ -452,7 +453,7 @@ def reset(self, instances_seen):
self.warning_detection = self.background_learner.warning_detection
self.drift_detection = self.background_learner.drift_detection
self.evaluator_method = self.background_learner.evaluator_method
self.created_on = self.background_learner.created_on
self.created_on = self.background_learner.created_on
self.background_learner = None
else:
self.classifier.reset()
Expand Down
7 changes: 6 additions & 1 deletion src/skmultiflow/trees/arf_hoeffding_tree.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from skmultiflow.trees.hoeffding_tree import *
import numpy as np
from skmultiflow.trees.hoeffding_tree import HoeffdingTree, MAJORITY_CLASS, NAIVE_BAYES
from skmultiflow.utils import check_random_state
from skmultiflow.trees.nominal_attribute_class_observer import NominalAttributeClassObserver
from skmultiflow.trees.numeric_attribute_class_observer_gaussian import NumericAttributeClassObserverGaussian
from skmultiflow.utils.utils import get_dimensions
from skmultiflow.trees.utils import do_naive_bayes_prediction


class ARFHoeffdingTree(HoeffdingTree):
Expand Down
20 changes: 0 additions & 20 deletions src/skmultiflow/trees/hoeffding_adaptive_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,20 +145,6 @@ def __init__(self, split_test, class_observations):
self._random_seed = 1
self._classifier_random = check_random_state(self._random_seed)

# Override SplitNode
def calc_byte_size_including_subtree(self):
byte_size = self.__sizeof__()
if self._alternate_tree is not None:
byte_size += self._alternate_tree.calc_byte_size_including_subtree()
if self._estimation_error_weight is not None:
byte_size += self._estimation_error_weight.get_length_estimation()

for child in self._children:
if child is not None:
byte_size += child.calc_byte_size_including_subtree()

return byte_size

# Override NewNode
def number_leaves(self):
num_of_leaves = 0
Expand Down Expand Up @@ -303,12 +289,6 @@ def __init__(self, initial_class_observations):
self._randomSeed = 1
self._classifier_random = check_random_state(self._randomSeed)

def calc_byte_size(self):
byte_size = self.__sizeof__()
if self._estimation_error_weight is not None:
byte_size += self._estimation_error_weight.get_length_estimation()
return byte_size

# Override NewNode
def number_leaves(self):
return 1
Expand Down
74 changes: 6 additions & 68 deletions src/skmultiflow/trees/hoeffding_tree.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import sys
import logging
import textwrap
from abc import ABCMeta
from operator import attrgetter
from skmultiflow.utils.utils import *
import numpy as np
from skmultiflow.utils.utils import get_dimensions, normalize_values_in_dict, calculate_object_size
from skmultiflow.core.base import StreamModel
from skmultiflow.trees.numeric_attribute_class_observer_gaussian import NumericAttributeClassObserverGaussian
from skmultiflow.trees.nominal_attribute_class_observer import NominalAttributeClassObserver
Expand All @@ -12,7 +12,6 @@
from skmultiflow.trees.gini_split_criterion import GiniSplitCriterion
from skmultiflow.trees.info_gain_split_criterion import InfoGainSplitCriterion
from skmultiflow.trees.utils import do_naive_bayes_prediction
from skmultiflow.utils.utils import normalize_values_in_dict

GINI_SPLIT = 'gini'
INFO_GAIN_SPLIT = 'info_gain'
Expand Down Expand Up @@ -224,28 +223,6 @@ def calculate_promise(self):
else:
return 0

def __sizeof__(self):
""" Calculate the size of the node.
Returns
-------
int
Size of the node in bytes.
"""
return object.__sizeof__(self) + sys.getsizeof(self._observed_class_distribution)

def calc_byte_size_including_subtree(self):
""" Calculate the size of the node including its subtree.
Returns
-------
int
Size of the node and its subtree in bytes.
"""
return self.__sizeof__()

def describe_subtree(self, ht, buffer, indent=0):
""" Walk the tree and write its structure to a buffer string.
Expand Down Expand Up @@ -396,31 +373,6 @@ def subtree_depth(self):
max_child_depth = depth
return max_child_depth + 1

def __sizeof__(self):
""" Calculate the size of the node.
Returns
-------
int
Size of the node in bytes.
"""
return object.__sizeof__(self) + sys.getsizeof(self._children) + sys.getsizeof(self._split_test)

def calc_byte_size_including_subtree(self):
""" Calculate the size of the node including its subtree.
Returns
-------
int
Size of the node and its subtree in bytes.
"""
byte_size = self.__sizeof__()
for child in self._children.values():
if child is not None:
byte_size += child.calc_byte_size_including_subtree()
return byte_size

def describe_subtree(self, ht, buffer, indent=0):
""" Walk the tree and write its structure to a buffer string.
Expand Down Expand Up @@ -904,20 +856,6 @@ def classes(self):
def classes(self, value):
self._classes = value

def __sizeof__(self):
""" Calculate the size of the tree.
Returns
-------
int
Size of the tree in bytes.
"""
size = object.__sizeof__(self)
if self._tree_root is not None:
size += self._tree_root.calc_byte_size_including_subtree()
return size

def measure_byte_size(self):
""" Calculate the size of the tree.
Expand All @@ -927,7 +865,7 @@ def measure_byte_size(self):
Size of the tree in bytes.
"""
return self.__sizeof__()
return calculate_object_size(self)

def reset(self):
""" Reset the Hoeffding Tree to default values."""
Expand Down Expand Up @@ -1331,14 +1269,14 @@ def estimate_model_byte_size(self):
total_inactive_size = 0
for found_node in learning_nodes:
if isinstance(found_node.node, self.ActiveLearningNode):
total_active_size += sys.getsizeof(found_node.node)
total_active_size += calculate_object_size(found_node.node)
else:
total_inactive_size += sys.getsizeof(found_node.node)
total_inactive_size += calculate_object_size(found_node.node)
if total_active_size > 0:
self._active_leaf_byte_size_estimate = total_active_size / self._active_leaf_node_cnt
if total_inactive_size > 0:
self._inactive_leaf_byte_size_estimate = total_inactive_size / self._inactive_leaf_node_cnt
actual_model_size = self.measure_byte_size()
actual_model_size = calculate_object_size(self)
estimated_model_size = (self._active_leaf_node_cnt * self._active_leaf_byte_size_estimate
+ self._inactive_leaf_node_cnt * self._inactive_leaf_byte_size_estimate)
self._byte_size_estimate_overhead_fraction = actual_model_size / estimated_model_size
Expand Down
20 changes: 0 additions & 20 deletions src/skmultiflow/trees/regression_hoeffding_adaptive_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,6 @@ def __init__(self, split_test, class_observations):
self._random_seed = 1
self._classifier_random = check_random_state(self._random_seed)

# Override SplitNode
def calc_byte_size_including_subtree(self):
byte_size = self.__sizeof__()
if self._alternate_tree is not None:
byte_size += self._alternate_tree.calc_byte_size_including_subtree()
if self._estimation_error_weight is not None:
byte_size += self._estimation_error_weight.get_length_estimation()

for child in self._children:
if child is not None:
byte_size += child.calc_byte_size_including_subtree()

return byte_size

# Override NewNode
def number_leaves(self):
num_of_leaves = 0
Expand Down Expand Up @@ -274,12 +260,6 @@ def __init__(self, initial_class_observations, perceptron_weight, random_state=N
self._randomSeed = 1
self._classifier_random = check_random_state(self._randomSeed)

def calc_byte_size(self):
byte_size = self.__sizeof__()
if self._estimation_error_weight is not None:
byte_size += self._estimation_error_weight.get_length_estimation()
return byte_size

# Override NewNode
def number_leaves(self):
return 1
Expand Down
4 changes: 2 additions & 2 deletions src/skmultiflow/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def get_max_value_key(dictionary):
return 0


def calculate_object_size(obj, unit='kB'):
def calculate_object_size(obj, unit='byte'):
"""Iteratively calculates the `obj` size in bytes.
Visits all the elements related to obj accounting for their respective
Expand All @@ -91,7 +91,7 @@ def calculate_object_size(obj, unit='kB'):
Object to evaluate.
string: unit
The unit in which the accounted value is going to be returned.
Values: 'byte', 'kB', 'MB' (Default: 'kB').
Values: 'byte', 'kB', 'MB' (Default: 'byte').
Returns
-------
Expand Down

0 comments on commit 1bb55df

Please sign in to comment.