diff --git a/.gitignore b/.gitignore index 8a9883a..84739a7 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ .idea .DS_Store + +.coverage diff --git a/binpacking/__init__.py b/binpacking/__init__.py index 761fa29..384a22e 100644 --- a/binpacking/__init__.py +++ b/binpacking/__init__.py @@ -1,3 +1,5 @@ from binpacking.utilities import load_csv, save_csvs, print_binsizes from binpacking.to_constant_bin_number import to_constant_bin_number, csv_to_constant_bin_number from binpacking.to_constant_volume import to_constant_volume, csv_to_constant_volume + +__version__ = '1.5.0' diff --git a/binpacking/tests/constant_bin_number.py b/binpacking/tests/constant_bin_number.py index 19ca943..cc2d8f5 100644 --- a/binpacking/tests/constant_bin_number.py +++ b/binpacking/tests/constant_bin_number.py @@ -29,4 +29,44 @@ def test_key_func(): for bin_ in bins: for item in bin_: assert 'x' in item - assert 'y' in item \ No newline at end of file + assert 'y' in item + +def test_bounds_and_tuples(): + c = [ ('a', 10, 'foo'), ('b', 10, 'log'), ('c', 11), ('d', 1, 'bar'), ('e', 2, 'bommel'), ('f',7,'floggo') ] + N_bin = 4 + + bins = to_constant_bin_number(c,N_bin,weight_pos=1,upper_bound=11) + bins = [ sorted(_bin, key=lambda x:x[0]) for _bin in bins ] + assert bins == [ + [('a', 10, 'foo')], + [('b', 10, 'log')], + [('f', 7, 'floggo')], + [ + ('d', 1, 'bar'), + ('e', 2, 'bommel'), + ] + ] + + bins = to_constant_bin_number(c,N_bin,weight_pos=1,lower_bound=1) + bins = [ sorted(_bin, key=lambda x:x[0]) for _bin in bins ] + assert bins == [ + [('c', 11,)], + [('a', 10, 'foo')], + [('b', 10, 'log')], + [ + ('e', 2, 'bommel'), + ('f', 7, 'floggo'), + ], + ] + + bins = to_constant_bin_number(c,N_bin,weight_pos=1,lower_bound=1,upper_bound=11) + bins = [ sorted(_bin, key=lambda x:x[0]) for _bin in bins ] + assert bins == [ + [('a', 10, 'foo')], + [('b', 10, 'log')], + [('f', 7, 'floggo')], + [('e', 2, 'bommel')], + ] + +if __name__=="__main__": + test_bounds_and_tuples() diff --git a/binpacking/tests/constant_volume.py b/binpacking/tests/constant_volume.py index 7b63ccd..ad0d5d6 100644 --- a/binpacking/tests/constant_volume.py +++ b/binpacking/tests/constant_volume.py @@ -35,3 +35,47 @@ def test_no_fit(): values = [42, 24] bins = to_constant_volume(values, 20) assert bins == [[42], [24]] + + +def test_bounds_and_tuples(): + c = [ ('a', 10, 'foo'), ('b', 10, 'log'), ('c', 11), ('d', 1, 'bar'), ('e', 2, 'bommel'), ('f',7,'floggo') ] + V_max = 11 + + bins = to_constant_volume(c,V_max,weight_pos=1,upper_bound=11) + bins = [ sorted(_bin, key=lambda x:x[0]) for _bin in bins ] + assert bins == [ + [('a', 10, 'foo'), ('d', 1, 'bar')], + [('b', 10, 'log')], + [ + ('e', 2, 'bommel'), + ('f', 7, 'floggo'), + ], + ] + + bins = to_constant_volume(c,V_max,weight_pos=1,lower_bound=1) + bins = [ sorted(_bin, key=lambda x:x[0]) for _bin in bins ] + assert bins == [ + [('c', 11,)], + [('a', 10, 'foo')], + [('b', 10, 'log')], + [ + ('e', 2, 'bommel'), + ('f', 7, 'floggo'), + ], + ] + + bins = to_constant_volume(c,V_max,weight_pos=1,lower_bound=1,upper_bound=11) + bins = [ sorted(_bin, key=lambda x:x[0]) for _bin in bins ] + assert bins == [ + [('a', 10, 'foo')], + [('b', 10, 'log')], + [ + ('e', 2, 'bommel'), + ('f', 7, 'floggo'), + ], + ] + + + +if __name__=="__main__": + test_bounds_and_tuples() diff --git a/binpacking/to_constant_bin_number.py b/binpacking/to_constant_bin_number.py index 5ae6f54..ec779de 100644 --- a/binpacking/to_constant_bin_number.py +++ b/binpacking/to_constant_bin_number.py @@ -1,58 +1,102 @@ from __future__ import print_function from builtins import range -from binpacking.utilities import load_csv, save_csvs, print_binsizes - -import numpy as np - -def csv_to_constant_bin_number(filepath,weight_column,N_bin,has_header=False,delim=',',quotechar='"',lower_bound=None,upper_bound=None): - - data, weight_column, header = load_csv(filepath,weight_column,has_header=has_header,delim=delim,quotechar=quotechar) - - bins = to_constant_bin_number(data,N_bin,weight_pos=weight_column,lower_bound=lower_bound,upper_bound=upper_bound) - print_binsizes(bins,weight_column) - - save_csvs(bins,filepath,header,delim=delim,quotechar=quotechar) - - -def to_constant_bin_number(d,N_bin,weight_pos=None,key=None,lower_bound=None,upper_bound=None): - ''' +from binpacking.utilities import ( + load_csv, + save_csvs, + print_binsizes, + get, + argmin, + revargsort, + ) + +def csv_to_constant_bin_number(filepath, + weight_column, + N_bin, + has_header=False, + delim=',', + quotechar='"', + lower_bound=None, + upper_bound=None, + ): + """ + Load a csv file, binpack the rows according to one of the columns + to a constant number of bins. + Write a new csv file for each bin, containing + the corresponding rows. + """ + + data, weight_column, header = load_csv(filepath, + weight_column, + has_header=has_header, + delim=delim, + quotechar=quotechar, + ) + + bins = to_constant_bin_number(data, + N_bin, + weight_pos=weight_column, + lower_bound=lower_bound, + upper_bound=upper_bound, + ) + print_binsizes(bins, weight_column) + + save_csvs(bins, + filepath, + header, + delim=delim, + quotechar=quotechar, + ) + + +def to_constant_bin_number(d, + N_bin, + weight_pos=None, + key=None, + lower_bound=None, + upper_bound=None, + ): + """ Distributes a list of weights, a dictionary of weights or a list of tuples containing weights to a fixed number of bins while trying to keep the weight distribution constant. - INPUT: - --- d: list containing weights, - OR dictionary where each (key,value)-pair carries the weight as value, - OR list of tuples where one entry in the tuple is the weight. The position of - this weight has to be given in optional variable weight_pos - - optional: - ~~~ weight_pos: int -- if d is a list of tuples, this integer number gives the position of the weight in a tuple - ~~~ key: function -- if d is a list, this key functions grabs the weight for an item - ~~~ lower_bound: weights under this bound are not considered - ~~~ upper_bound: weights exceeding this bound are not considered - ''' - - #define functions for the applying the bounds - if lower_bound is not None and upper_bound is not None and lower_boundlower_bound,alower_bound)[0] - elif upper_bound is not None: - get_valid_weight_ndcs = lambda a: np.nonzero(a=upper_bound: - raise Exception("lower_bound is greater or equal to upper_bound") - + + Parameters + ========== + d : iterable + list containing weights, + OR dictionary where each (key,value)-pair carries the weight as value, + OR list of tuples where one entry in the tuple is the weight. The position of + this weight has to be given in optional variable weight_pos + N_bin : int + Number of bins to distribute items to. + weight_pos : int, default = None + if d is a list of tuples, this integer number gives the position of the weight in a tuple + key : function, default = None + if d is a list, this key functions grabs the weight for an item + lower_bound : float, default = None + weights under this bound are not considered + upper_bound : float, default = None + weights exceeding this bound are not considered + + Returns + ======= + bins : list + A list of length ``N_bin``. Each entry is a list of items or + a dict of items, depending on the type of ``d``. + """ + isdict = isinstance(d,dict) - if isinstance(d, list) and hasattr(d[0], '__len__'): + if not hasattr(d,'__len__'): + raise TypeError("d must be iterable") + + if not isdict and hasattr(d[0], '__len__'): if weight_pos is not None: key = lambda x: x[weight_pos] if key is None: raise ValueError("Must provide weight_pos or key for tuple list") - - if isinstance(d, list) and key: + + if not isdict and key: new_dict = {i: val for i, val in enumerate(d)} d = {i: key(val) for i, val in enumerate(d)} isdict = True @@ -64,45 +108,57 @@ def to_constant_bin_number(d,N_bin,weight_pos=None,key=None,lower_bound=None,upp #get keys and values (weights) keys_vals = d.items() - keys = np.array([ k for k,v in keys_vals ]) - vals = np.array([ v for k,v in keys_vals ]) + keys = [ k for k, v in keys_vals ] + vals = [ v for k, v in keys_vals ] #sort weights decreasingly - ndcs = np.argsort(vals)[::-1] + ndcs = revargsort(vals) - weights = vals[ndcs] - keys = keys[ndcs] + weights = get(vals, ndcs) + keys = get(keys, ndcs) bins = [ {} for i in range(N_bin) ] else: - weights = np.sort(np.array(d))[::-1] + weights = sorted(d,key=lambda x: -x) bins = [ [] for i in range(N_bin) ] #find the valid indices - valid_ndcs = get_valid_weight_ndcs(weights) - weights = weights[valid_ndcs] + if lower_bound is not None and upper_bound is not None and lower_bound=upper_bound: + raise Exception("lower_bound is greater or equal to upper_bound") + + valid_ndcs = list(valid_ndcs) + + weights = get(weights, valid_ndcs) if isdict: - keys = keys[valid_ndcs] + keys = get(keys, valid_ndcs) #the total volume is the sum of all weights - V_total = weights.sum() + V_total = sum(weights) #the first estimate of the maximum bin volume is #the total volume divided to all bins V_bin_max = V_total / float(N_bin) - + #prepare array containing the current weight of the bins - weight_sum = np.zeros(N_bin) + weight_sum = [0. for n in range(N_bin) ] #iterate through the weight list, starting with heaviest - for item,weight in enumerate(weights): - + for item, weight in enumerate(weights): + if isdict: key = keys[item] #put next value in bin with lowest weight sum - b = np.argmin(weight_sum) + b = argmin(weight_sum) #calculate new weight of this bin new_weight_sum = weight_sum[b] + weight @@ -127,7 +183,7 @@ def to_constant_bin_number(d,N_bin,weight_pos=None,key=None,lower_bound=None,upp else: #if not, increase the max volume by the sum of #the rest of the bins per bin - V_bin_max += np.sum(weights[item:]) / float(N_bin) + V_bin_max += sum(weights[item:]) / float(N_bin) if not is_tuple_list: return bins @@ -138,13 +194,10 @@ def to_constant_bin_number(d,N_bin,weight_pos=None,key=None,lower_bound=None,upp for key in bins[b]: new_bins[b].append(new_dict[key]) return new_bins - - - - if __name__=="__main__": import pylab as pl + import numpy as np a = np.random.power(0.01,size=1000) N_bin = 9 @@ -158,7 +211,7 @@ def to_constant_bin_number(d,N_bin,weight_pos=None,key=None,lower_bound=None,upp #plot distribution pl.plot(np.arange(N_bin),[np.sum(b) for b in bins]) pl.ylim([0,max([np.sum(b) for b in bins])+0.1]) - + b = { 'a': 10, 'b': 10, 'c':11, 'd':1, 'e': 2,'f':7 } bins = to_constant_bin_number(b,4) print("===== dict\n",b,"\n",bins) @@ -166,7 +219,7 @@ def to_constant_bin_number(d,N_bin,weight_pos=None,key=None,lower_bound=None,upp lower_bound = None upper_bound = None - b = ( ('a', 10), ('b', 10), ('c',11), ('d',1), ('e', 2),('f',7) ) + b = [ ('a', 10), ('b', 10), ('c',11), ('d',1), ('e', 2),('f',7,'foo') ] bins = to_constant_bin_number(b,4,weight_pos=1,lower_bound=lower_bound,upper_bound=upper_bound) print("===== list of tuples\n",b,"\n",bins) diff --git a/binpacking/to_constant_volume.py b/binpacking/to_constant_volume.py index b1e30e7..72a94b0 100644 --- a/binpacking/to_constant_volume.py +++ b/binpacking/to_constant_volume.py @@ -1,58 +1,104 @@ from __future__ import print_function from builtins import range -from binpacking.utilities import load_csv, save_csvs, print_binsizes - -import numpy as np - -def csv_to_constant_volume(filepath,weight_column,V_max,has_header=False,delim=',',quotechar='"',lower_bound=None,upper_bound=None): - - data, weight_column, header = load_csv(filepath,weight_column,has_header=has_header,delim=',',quotechar='"') - - bins = to_constant_volume(data,V_max,weight_pos=weight_column,lower_bound=lower_bound,upper_bound=upper_bound) - print_binsizes(bins,weight_column) - - save_csvs(bins,filepath,header,delim=delim,quotechar=quotechar) - - -def to_constant_volume(d,V_max,weight_pos=None,key=None,lower_bound=None,upper_bound=None): - ''' +from binpacking.utilities import ( + load_csv, + save_csvs, + print_binsizes, + get, + argmax, + revargsort, + ) + +def csv_to_constant_volume(filepath, + weight_column, + V_max, + has_header=False, + delim=',', + quotechar='"', + lower_bound=None, + upper_bound=None, + ): + """ + Load a csv file, binpack the rows according to one of the columns + to bins with constant volume. + Write a new csv file for each bin, containing + the corresponding rows. + """ + + + data, weight_column, header = load_csv(filepath, + weight_column, + has_header=has_header, + delim=',', + quotechar='"', + ) + + bins = to_constant_volume(data, + V_max, + weight_pos=weight_column, + lower_bound=lower_bound, + upper_bound=upper_bound, + ) + + print_binsizes(bins, weight_column) + + save_csvs(bins, + filepath, + header, + delim=delim, + quotechar=quotechar, + ) + + +def to_constant_volume(d, + V_max, + weight_pos=None, + key=None, + lower_bound=None, + upper_bound=None, + ): + """ Distributes a list of weights, a dictionary of weights or a list of tuples containing weights - to a minimal number of bins which have a fixed volume. - INPUT: - --- d: list containing weights, - OR dictionary where each (key,value)-pair carries the weight as value, - OR list of tuples where one entry in the tuple is the weight. The position of - this weight has to be given in optional variable weight_pos - - optional: - ~~~ weight_pos: int -- if d is a list of tuples, this integer number gives the position of the weight in a tuple - ~~~ key: function -- if d is a list, this key functions grabs the weight for an item - ~~~ lower_bound: weights under this bound are not considered - ~~~ upper_bound: weights exceeding this bound are not considered - ''' - - #define functions for the applying the bounds - if lower_bound is not None and upper_bound is not None and lower_boundlower_bound,alower_bound)[0] - elif upper_bound is not None: - get_valid_weight_ndcs = lambda a: np.nonzero(a=upper_bound: - raise Exception("lower_bound is greater or equal to upper_bound") + to a minimal number of bins that have a fixed volume. + + Parameters + ========== + d : iterable + list containing weights, + OR dictionary where each (key,value)-pair carries the weight as value, + OR list of tuples where one entry in the tuple is the weight. The position of + this weight has to be given in optional variable weight_pos + V_max : int or float + Fixed bin volume + weight_pos : int, default = None + if d is a list of tuples, this integer number gives the position of the weight in a tuple + key : function, default = None + if d is a list, this key functions grabs the weight for an item + lower_bound : float, default = None + weights under this bound are not considered + upper_bound : float, default = None + weights exceeding this bound are not considered + + Returns + ======= + bins : list + A list. Each entry is a list of items or + a dict of items, depending on the type of ``d``. + """ isdict = isinstance(d,dict) - if isinstance(d, list) and hasattr(d[0], '__len__'): + if not hasattr(d,'__len__'): + raise TypeError("d must be iterable") + + if not isdict and hasattr(d[0], '__len__'): if weight_pos is not None: key = lambda x: x[weight_pos] if key is None: raise ValueError("Must provide weight_pos or key for tuple list") - if isinstance(d, list) and key: + if not isdict and key: new_dict = {i: val for i, val in enumerate(d)} d = {i: key(val) for i, val in enumerate(d)} isdict = True @@ -64,47 +110,59 @@ def to_constant_volume(d,V_max,weight_pos=None,key=None,lower_bound=None,upper_b #get keys and values (weights) keys_vals = d.items() - keys = np.array([ k for k,v in keys_vals ]) - vals = np.array([ v for k,v in keys_vals ]) + keys = [ k for k, v in keys_vals ] + vals = [ v for k, v in keys_vals ] #sort weights decreasingly - ndcs = np.argsort(vals)[::-1] + ndcs = revargsort(vals) - weights = vals[ndcs] - keys = keys[ndcs] + weights = get(vals, ndcs) + keys = get(keys, ndcs) bins = [ {} ] else: - weights = np.sort(np.array(d))[::-1] + weights = sorted(d,key=lambda x:-x) bins = [ [] ] #find the valid indices - valid_ndcs = get_valid_weight_ndcs(weights) - weights = weights[valid_ndcs] + if lower_bound is not None and upper_bound is not None and lower_bound=upper_bound: + raise Exception("lower_bound is greater or equal to upper_bound") + + valid_ndcs = list(valid_ndcs) + + weights = get(weights, valid_ndcs) if isdict: - keys = keys[valid_ndcs] + keys = get(keys, valid_ndcs) #the total volume is the sum of all weights - V_total = weights.sum() + V_total = sum(weights) #prepare array containing the current weight of the bins - weight_sum = np.array([ 0. ]) + weight_sum = [ 0. ] #iterate through the weight list, starting with heaviest - for item,weight in enumerate(weights): + for item, weight in enumerate(weights): if isdict: key = keys[item] #find candidate bins where the weight might fit - candidate_bins = np.where(weight_sum+weight <= V_max)[0] + candidate_bins = list(filter(lambda i: weight_sum[i]+weight<=V_max, range(len(weight_sum)))) # if there are candidates where it fits if len(candidate_bins)>0: # find the fullest bin where this item fits and assign it - candidate_index = np.argmax(weight_sum[candidate_bins]) + candidate_index = argmax(get(weight_sum,candidate_bins)) b = candidate_bins[candidate_index] #if this weight doesn't fit in any existent bin @@ -114,7 +172,7 @@ def to_constant_volume(d,V_max,weight_pos=None,key=None,lower_bound=None,upper_b # open a new bin b = len(weight_sum) - weight_sum = np.append(weight_sum, 0.) + weight_sum.append(0.) if isdict: bins.append({}) else: @@ -140,13 +198,15 @@ def to_constant_volume(d,V_max,weight_pos=None,key=None,lower_bound=None,upper_b new_bins = [] for b in range(len(bins)): new_bins.append([]) - for key in bins[b]: - new_bins[b].append(new_dict[key]) + for _key in bins[b]: + new_bins[b].append(new_dict[_key]) return new_bins if __name__=="__main__": + import numpy as np + a = np.random.power(0.01,size=10000) V_max = 1. @@ -177,4 +237,18 @@ def to_constant_volume(d,V_max,weight_pos=None,key=None,lower_bound=None,upper_b print("needed", end-start, "sec") + c = [ ('a', 10, 'foo'), ('b', 10, 'log'), ('c', 11), ('d', 1, 'bar'), ('e', 2, 'bommel'), ('f',7,'floggo') ] + V_max = max(c,key=lambda x:x[1])[1] + + bins = to_constant_volume(c,V_max,weight_pos=1) + print(bins) + + + import time + start = time.time() + for i in range(100): + bins = to_constant_volume(a,V_max) + end = time.time() + print("needed", end-start, "sec") + pl.show() diff --git a/binpacking/utilities.py b/binpacking/utilities.py index 22b8a34..dc93735 100644 --- a/binpacking/utilities.py +++ b/binpacking/utilities.py @@ -57,3 +57,18 @@ def save_csvs(bins,filepath,header,delim=',',quotechar='"'): writer.writerow(header) for row in b: writer.writerow(row) + +def get(lst,ndx): + return [lst[n] for n in ndx] + +def argmin(lst): + return min(range(len(lst)), key=lst.__getitem__) + +def argmax(lst): + return max(range(len(lst)), key=lst.__getitem__) + +def argsort(lst): + return sorted(range(len(lst)), key=lst.__getitem__) + +def revargsort(lst): + return sorted(range(len(lst)), key=lambda i: -lst[i]) diff --git a/setup.py b/setup.py index e3894fc..8614175 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='binpacking', - version='1.4.5', + version='1.5.0', description='Heuristic distribution of weighted items to bins (either a fixed number of bins or a fixed number of volume per bin). Data may be in form of list, dictionary, list of tuples or csv-file.', url='https://www.github.com/benmaier/binpacking', author='Benjamin F. Maier', @@ -10,7 +10,7 @@ packages=['binpacking'], setup_requires=['pytest-runner'], install_requires=[ - 'numpy', 'future', + 'future', ], tests_require=['pytest', 'pytest-cov'], dependency_links=[