From d581f770a58f23531f2dea54d2c4f83d44d141a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Pascutto?= Date: Wed, 10 Feb 2021 14:27:32 +0100 Subject: [PATCH] Add serializer functions Co-authored-by: Bryan Bennett --- bloomf.opam | 2 +- src/bloomf.ml | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/bloomf.mli | 10 ++++++++++ test/main.ml | 13 +++++++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) diff --git a/bloomf.opam b/bloomf.opam index 2bf67fa..992d8cc 100644 --- a/bloomf.opam +++ b/bloomf.opam @@ -16,7 +16,7 @@ build: [ depends: [ "ocaml" {>= "4.03.0"} "dune" {>= "1.7.0"} - "bitv" + "bitv" {>= "1.4"} "alcotest" {with-test} ] synopsis: "Efficient Bloom filters for OCaml" diff --git a/src/bloomf.ml b/src/bloomf.ml index d28be6d..2f93075 100644 --- a/src/bloomf.ml +++ b/src/bloomf.ml @@ -87,6 +87,55 @@ let size_estimate t = let xf = float_of_int (Bitv.pop t.b) in int_of_float (-.mf /. kf *. log (1. -. (xf /. mf))) +(* Serialisers *) + +external set_64 : bytes -> int -> int64 -> unit = "%caml_string_set64u" + +external swap64 : int64 -> int64 = "%bswap_int64" + +let set_uint64 buf off v = + if not Sys.big_endian then set_64 buf off (swap64 v) else set_64 buf off v + +(* type priv = { m : int; k : int; p_len : (int * int) list; b : Bitv.t } *) + +let to_bytes t = + let enc_b = Bitv.to_bytes t.b in + let enc_b_len = Bytes.length enc_b in + let enc_p_len_len = 16 * List.length t.p_len in + let len = 8 + 8 + 8 + enc_p_len_len + enc_b_len in + let buf = Bytes.create len in + set_uint64 buf 0 (Int64.of_int t.m); + set_uint64 buf 8 (Int64.of_int t.k); + set_uint64 buf 16 (Int64.of_int (List.length t.p_len)); + List.iteri + (fun i (i1, i2) -> + set_uint64 buf (24 + (8 * (2 * i))) (Int64.of_int i1); + set_uint64 buf (24 + (8 * ((2 * i) + 1))) (Int64.of_int i2)) + t.p_len; + Bytes.blit enc_b 0 buf (24 + enc_p_len_len) enc_b_len; + buf + +external get_64 : bytes -> int -> int64 = "%caml_string_get64" + +let get_uint64 buf off = + if not Sys.big_endian then swap64 (get_64 buf off) else get_64 buf off + +let of_bytes buf = + try + let m = get_uint64 buf 0 |> Int64.to_int in + let k = get_uint64 buf 8 |> Int64.to_int in + let p_len_len = get_uint64 buf 16 |> Int64.to_int in + let p_len = + List.init p_len_len (fun i -> + let i1 = get_uint64 buf (24 + (8 * (2 * i))) |> Int64.to_int in + let i2 = get_uint64 buf (24 + (8 * ((2 * i) + 1))) |> Int64.to_int in + (i1, i2)) + in + let read = 24 + (16 * p_len_len) in + let b = Bytes.sub buf read (Bytes.length buf - read) |> Bitv.of_bytes in + Ok { m; k; p_len; b } + with _ -> Error (`Msg "invalid serialisation format") + module type Hashable = sig type t @@ -105,4 +154,8 @@ module Make (H : Hashable) = struct let clear = clear let size_estimate = size_estimate + + let to_bytes = to_bytes + + let of_bytes = of_bytes end diff --git a/src/bloomf.mli b/src/bloomf.mli index 6d86b68..5b1c7b7 100644 --- a/src/bloomf.mli +++ b/src/bloomf.mli @@ -54,6 +54,12 @@ val size_estimate : 'a t -> int the bloom filter. Please note that this operation is costly (see benchmarks). *) +(** {2 Serializers/Deserializers} *) + +val to_bytes : 'a t -> bytes + +val of_bytes : bytes -> ('a t, [ `Msg of string ]) result + (** {1 Functorial interface} *) (** The functorial interface allows you to specify your own hash function. *) @@ -82,4 +88,8 @@ module Make (H : Hashable) : sig val clear : t -> unit val size_estimate : t -> int + + val to_bytes : t -> bytes + + val of_bytes : bytes -> (t, [ `Msg of string ]) result end diff --git a/test/main.ml b/test/main.ml index aaa860b..8dcc516 100644 --- a/test/main.ml +++ b/test/main.ml @@ -78,6 +78,18 @@ let test_union () = test_op "union" Bloomf.union StringSet.union let test_inter () = test_op "intersection" Bloomf.inter StringSet.inter +let test_bytes () = + let sizes = [ 1_000; 10_000; 100_000 ] in + List.iter + (fun i -> + let bf1, _ = create_and_fill i in + match Bloomf.to_bytes bf1 |> Bloomf.of_bytes with + | Ok bf2 -> + Alcotest.(check bool) + "serialisation / deserialisation" true (bf1 = bf2) + | Error _ -> Alcotest.failf "deserialisation failed") + sizes + let suite = [ ("Mem returns true when element was added", `Quick, test_mem); @@ -87,6 +99,7 @@ let suite = ("Size estimate is correct", `Slow, test_size); ("Union", `Quick, test_union); ("Intersection", `Quick, test_inter); + ("Serialisation", `Quick, test_bytes); ] let () = Alcotest.run "Bloomf" [ ("bloomf", suite) ]