From 1aab9b939cdb029349a5b7ac2442f4a2b94960e9 Mon Sep 17 00:00:00 2001 From: "Daniel J. Hofmann" Date: Tue, 12 Jun 2018 17:49:21 -0700 Subject: [PATCH] Implements reservoir sampler randomly sampling stream of features, closes #7 --- robosat/osm/sampler.py | 72 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 robosat/osm/sampler.py diff --git a/robosat/osm/sampler.py b/robosat/osm/sampler.py new file mode 100644 index 00000000..51a2a542 --- /dev/null +++ b/robosat/osm/sampler.py @@ -0,0 +1,72 @@ +import random + + +class ReservoirSampler: + '''Randomly samples k items from a stream of unknown n items. + ''' + + def __init__(self, capacity): + '''Creates an new `ReservoirSampler` instance. + + Args: + capacity: the number of items to randomly samples from a stream of unknown size. + ''' + + assert capacity > 0 + + self.capacity = capacity + self.reservoir = [] + self.pushed = 0 + + def push(self, v): + '''Adds an item to the reservoir. + + Args: + v: the item from the stream to add to the reservoir. + ''' + + size = len(self.reservoir) + + if size < self.capacity: + self.reservoir.append(v) + else: + assert size == self.capacity + assert size <= self.pushed + + p = self.capacity / self.pushed + + if random.random() < p: + i = random.randint(0, size - 1) + self.reservoir[i] = v + + self.pushed += 1 + + def __len__(self): + '''Returns the number of randomly sampled items. + + Returns: + The number of randomly sampled items in the reservoir. + ''' + + return len(self.reservoir) + + def __getitem__(self, k): + '''Returns a randomly sampled item in the reservoir. + + Args: + k: the index for the kth item from the reservoir to return. + + Returns: + The kth item in the reservoir of randomly sampled items. + ''' + + return self.reservoir[k] + + def __repr__(self): + '''Returns the representation for this class. + + Returns: + The string representation for this class. + ''' + + return '<{}: {}>'.format(self.__class__.__name__, list(self))