Skip to content

Commit

Permalink
Merge #32
Browse files Browse the repository at this point in the history
32: Improve hashing r=crepererum a=crepererum

Result:
![screenshot from 2018-08-25 18-57-43](https://user-images.githubusercontent.com/1529400/44620666-d2353d80-a898-11e8-97b2-a373c00130a5.png)

See #22.

Co-authored-by: Marco Neumann <[email protected]>
  • Loading branch information
bors[bot] and crepererum committed Aug 25, 2018
2 parents 931e7ce + 287c17e commit b74c233
Show file tree
Hide file tree
Showing 4 changed files with 234 additions and 49 deletions.
14 changes: 7 additions & 7 deletions src/bloomfilter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::hash::{BuildHasher, Hash};

use fixedbitset::FixedBitSet;

use hash_utils::{HashIter, MyBuildHasherDefault};
use hash_utils::{HashIterBuilder, MyBuildHasherDefault};

/// A BloomFilter is a set-like data structure, that keeps track of elements it has seen without
/// the need to store them. Looking up values has a certain false positive rate, but a false
Expand Down Expand Up @@ -62,7 +62,7 @@ where
{
bs: FixedBitSet,
k: usize,
buildhasher: B,
builder: HashIterBuilder<B>,
}

impl BloomFilter {
Expand Down Expand Up @@ -97,7 +97,7 @@ where
Self {
bs: FixedBitSet::with_capacity(m),
k,
buildhasher,
builder: HashIterBuilder::new(m, k, buildhasher),
}
}

Expand Down Expand Up @@ -129,7 +129,7 @@ where

/// Get `BuildHasher`.
pub fn buildhasher(&self) -> &B {
&self.buildhasher
self.builder.buildhasher()
}

/// Add new element to the BloomFilter.
Expand All @@ -140,7 +140,7 @@ where
where
T: Hash,
{
for pos in HashIter::new(self.bs.len(), self.k, obj, &self.buildhasher) {
for pos in self.builder.iter_for(obj) {
self.bs.set(pos, true);
}
}
Expand All @@ -150,7 +150,7 @@ where
where
T: Hash,
{
for pos in HashIter::new(self.bs.len(), self.k, obj, &self.buildhasher) {
for pos in self.builder.iter_for(obj) {
if !self.bs[pos] {
return false;
}
Expand Down Expand Up @@ -188,7 +188,7 @@ where
other.bs.len()
);
assert!(
self.buildhasher == other.buildhasher,
self.buildhasher() == other.buildhasher(),
"buildhasher must be equal"
);

Expand Down
15 changes: 8 additions & 7 deletions src/countminsketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::hash::{BuildHasher, Hash};

use num_traits::{CheckedAdd, One, Unsigned, Zero};

use hash_utils::{HashIter, MyBuildHasherDefault};
use hash_utils::{HashIterBuilder, MyBuildHasherDefault};

/// A CountMinSketch is a data structure to estimate the frequency of elements in a data stream.
///
Expand Down Expand Up @@ -116,7 +116,7 @@ where
table: Vec<C>,
w: usize,
d: usize,
buildhasher: B,
builder: HashIterBuilder<B>,
}

impl<C> CountMinSketch<C>
Expand Down Expand Up @@ -164,7 +164,7 @@ where
table,
w,
d,
buildhasher,
builder: HashIterBuilder::new(w, d, buildhasher),
}
}

Expand Down Expand Up @@ -198,7 +198,7 @@ where

/// Get `BuildHasher`
pub fn buildhasher(&self) -> &B {
&self.buildhasher
self.builder.buildhasher()
}

/// Check whether the CountMinSketch is empty (i.e. no elements seen yet).
Expand All @@ -219,7 +219,7 @@ where
where
T: Hash,
{
for (i, pos) in HashIter::new(self.w, self.d, obj, &self.buildhasher).enumerate() {
for (i, pos) in self.builder.iter_for(obj).enumerate() {
let x = i * self.w + pos;
self.table[x] = self.table[x].checked_add(n).unwrap();
}
Expand All @@ -230,7 +230,8 @@ where
where
T: Hash,
{
HashIter::new(self.w, self.d, obj, &self.buildhasher)
self.builder
.iter_for(obj)
.enumerate()
.map(|(i, pos)| i * self.w + pos)
.map(|x| self.table[x].clone())
Expand All @@ -256,7 +257,7 @@ where
self.w, other.w
);
assert!(
self.buildhasher == other.buildhasher,
self.buildhasher() == other.buildhasher(),
"buildhasher must be equal"
);

Expand Down
Loading

0 comments on commit b74c233

Please sign in to comment.