Skip to content

Commit

Permalink
Add serializer functions
Browse files Browse the repository at this point in the history
Co-authored-by: Bryan Bennett <[email protected]>
  • Loading branch information
pascutto and bbenne10 committed Feb 10, 2021
1 parent 8ebb804 commit d581f77
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 1 deletion.
2 changes: 1 addition & 1 deletion bloomf.opam
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ build: [
depends: [
"ocaml" {>= "4.03.0"}
"dune" {>= "1.7.0"}
"bitv"
"bitv" {>= "1.4"}
"alcotest" {with-test}
]
synopsis: "Efficient Bloom filters for OCaml"
Expand Down
53 changes: 53 additions & 0 deletions src/bloomf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,55 @@ let size_estimate t =
let xf = float_of_int (Bitv.pop t.b) in
int_of_float (-.mf /. kf *. log (1. -. (xf /. mf)))

(* Serialisers *)

external set_64 : bytes -> int -> int64 -> unit = "%caml_string_set64u"

external swap64 : int64 -> int64 = "%bswap_int64"

let set_uint64 buf off v =
if not Sys.big_endian then set_64 buf off (swap64 v) else set_64 buf off v

(* type priv = { m : int; k : int; p_len : (int * int) list; b : Bitv.t } *)

let to_bytes t =
let enc_b = Bitv.to_bytes t.b in
let enc_b_len = Bytes.length enc_b in
let enc_p_len_len = 16 * List.length t.p_len in
let len = 8 + 8 + 8 + enc_p_len_len + enc_b_len in
let buf = Bytes.create len in
set_uint64 buf 0 (Int64.of_int t.m);
set_uint64 buf 8 (Int64.of_int t.k);
set_uint64 buf 16 (Int64.of_int (List.length t.p_len));
List.iteri
(fun i (i1, i2) ->
set_uint64 buf (24 + (8 * (2 * i))) (Int64.of_int i1);
set_uint64 buf (24 + (8 * ((2 * i) + 1))) (Int64.of_int i2))
t.p_len;
Bytes.blit enc_b 0 buf (24 + enc_p_len_len) enc_b_len;
buf

external get_64 : bytes -> int -> int64 = "%caml_string_get64"

let get_uint64 buf off =
if not Sys.big_endian then swap64 (get_64 buf off) else get_64 buf off

let of_bytes buf =
try
let m = get_uint64 buf 0 |> Int64.to_int in
let k = get_uint64 buf 8 |> Int64.to_int in
let p_len_len = get_uint64 buf 16 |> Int64.to_int in
let p_len =
List.init p_len_len (fun i ->
let i1 = get_uint64 buf (24 + (8 * (2 * i))) |> Int64.to_int in
let i2 = get_uint64 buf (24 + (8 * ((2 * i) + 1))) |> Int64.to_int in
(i1, i2))
in
let read = 24 + (16 * p_len_len) in
let b = Bytes.sub buf read (Bytes.length buf - read) |> Bitv.of_bytes in
Ok { m; k; p_len; b }
with _ -> Error (`Msg "invalid serialisation format")

module type Hashable = sig
type t

Expand All @@ -105,4 +154,8 @@ module Make (H : Hashable) = struct
let clear = clear

let size_estimate = size_estimate

let to_bytes = to_bytes

let of_bytes = of_bytes
end
10 changes: 10 additions & 0 deletions src/bloomf.mli
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ val size_estimate : 'a t -> int
the bloom filter. Please note that this operation is costly (see
benchmarks). *)

(** {2 Serializers/Deserializers} *)

val to_bytes : 'a t -> bytes

val of_bytes : bytes -> ('a t, [ `Msg of string ]) result

(** {1 Functorial interface} *)

(** The functorial interface allows you to specify your own hash function. *)
Expand Down Expand Up @@ -82,4 +88,8 @@ module Make (H : Hashable) : sig
val clear : t -> unit

val size_estimate : t -> int

val to_bytes : t -> bytes

val of_bytes : bytes -> (t, [ `Msg of string ]) result
end
13 changes: 13 additions & 0 deletions test/main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ let test_union () = test_op "union" Bloomf.union StringSet.union

let test_inter () = test_op "intersection" Bloomf.inter StringSet.inter

let test_bytes () =
let sizes = [ 1_000; 10_000; 100_000 ] in
List.iter
(fun i ->
let bf1, _ = create_and_fill i in
match Bloomf.to_bytes bf1 |> Bloomf.of_bytes with
| Ok bf2 ->
Alcotest.(check bool)
"serialisation / deserialisation" true (bf1 = bf2)
| Error _ -> Alcotest.failf "deserialisation failed")
sizes

let suite =
[
("Mem returns true when element was added", `Quick, test_mem);
Expand All @@ -87,6 +99,7 @@ let suite =
("Size estimate is correct", `Slow, test_size);
("Union", `Quick, test_union);
("Intersection", `Quick, test_inter);
("Serialisation", `Quick, test_bytes);
]

let () = Alcotest.run "Bloomf" [ ("bloomf", suite) ]

0 comments on commit d581f77

Please sign in to comment.