From b646af5c223f5d0f0382c3311e845e53851ab20f Mon Sep 17 00:00:00 2001 From: gwenaelle Date: Thu, 16 Mar 2023 17:25:58 +0100 Subject: [PATCH] irmin-pack: Use temp control files upon writing --- src/irmin-pack/layout.ml | 5 ++- src/irmin-pack/unix/control_file.ml | 53 ++++++++++++++---------- src/irmin-pack/unix/control_file_intf.ml | 26 ++++++++++-- src/irmin-pack/unix/errors.ml | 3 +- src/irmin-pack/unix/file_manager.ml | 20 +++++---- src/irmin-pack/unix/file_manager_intf.ml | 11 ++++- src/irmin-pack/unix/io_errors.ml | 3 +- src/irmin-pack/unix/lower.ml | 10 +++-- test/irmin-pack/test_lower.ml | 2 +- test/irmin-pack/test_pack.ml | 1 + 10 files changed, 90 insertions(+), 44 deletions(-) diff --git a/src/irmin-pack/layout.ml b/src/irmin-pack/layout.ml index 665df22281..a46fd607ae 100644 --- a/src/irmin-pack/layout.ml +++ b/src/irmin-pack/layout.ml @@ -54,6 +54,7 @@ module V4 = struct let branch = toplevel "store.branches" let dict = toplevel "store.dict" let control = toplevel "store.control" + let control_tmp = toplevel "store.control.tmp" let suffix_chunk ~chunk_idx = toplevel ("store." ^ string_of_int chunk_idx ^ ".suffix") @@ -81,7 +82,7 @@ module V5 = struct let directory ~idx = toplevel ("volume." ^ string_of_int idx) let control = toplevel "volume.control" - let control_tmp ~generation = + let control_gc_tmp ~generation = toplevel ("volume." ^ string_of_int generation ^ ".control") let mapping = toplevel "volume.mapping" @@ -107,6 +108,7 @@ module Classification = struct type t = [ `Branch | `Control + | `Control_tmp | `Dict | `Gc_result of int | `Mapping of int @@ -123,6 +125,7 @@ module Classification = struct | [ "store"; "pack" ] -> `V1_or_v2_pack | [ "store"; "branches" ] -> `Branch | [ "store"; "control" ] -> `Control + | [ "store"; "control"; "tmp" ] -> `Control_tmp | [ "store"; "dict" ] -> `Dict | [ "store"; g; "out" ] when is_number g -> `Gc_result (int_of_string g) | [ "store"; g; "reachable" ] when is_number g -> diff --git a/src/irmin-pack/unix/control_file.ml b/src/irmin-pack/unix/control_file.ml index 769b587fd0..b528ca8cfc 100644 --- a/src/irmin-pack/unix/control_file.ml +++ b/src/irmin-pack/unix/control_file.ml @@ -299,35 +299,48 @@ module Make (Serde : Serde.S) (Io : Io.S) = struct module Io = Io type payload = Serde.payload - type t = { io : Io.t; mutable payload : payload } + + type t = { + mutable io : Io.t; + mutable payload : payload; + path : string; + tmp_path : string option; + } let write io payload = let s = Serde.to_bin_string payload in - (* The data must fit inside a single page for atomic updates of the file. - This is only true for some file systems. This system will have to be - reworked for [V4]. *) - assert (String.length s <= Io.page_size); - Io.write_string io ~off:Int63.zero s + let set_payload t payload = + let open Result_syntax in + if Io.readonly t.io then Error `Ro_not_allowed + else + match t.tmp_path with + | None -> Error `No_tmp_path_provided + | Some tmp_path -> + let* () = Io.close t.io in + let* io_tmp = Io.create ~path:tmp_path ~overwrite:true in + t.io <- io_tmp; + let* () = write io_tmp payload in + let+ () = Io.move_file ~src:tmp_path ~dst:t.path in + t.payload <- payload + let read io = let open Result_syntax in let* string = Io.read_all_to_string io in - (* Since the control file is expected to fit in a page, - [read_all_to_string] should be atomic for most filesystems. *) Serde.of_bin_string string - let create_rw ~path ~overwrite (payload : payload) = + let create_rw ~path ~tmp_path ~overwrite (payload : payload) = let open Result_syntax in let* io = Io.create ~path ~overwrite in let+ () = write io payload in - { io; payload } + { io; payload; path; tmp_path } - let open_ ~path ~readonly = + let open_ ~path ~tmp_path ~readonly = let open Result_syntax in let* io = Io.open_ ~path ~readonly in let+ payload = read io in - { io; payload } + { io; payload; path; tmp_path } let close t = Io.close t.io let readonly t = Io.readonly t.io @@ -337,14 +350,17 @@ module Make (Serde : Serde.S) (Io : Io.S) = struct let open Result_syntax in if not @@ Io.readonly t.io then Error `Rw_not_allowed else - let+ payload = read t.io in + let* () = Io.close t.io in + let* io = Io.open_ ~path:t.path ~readonly:true in + t.io <- io; + let+ payload = read io in t.payload <- payload let read_payload ~path = let open Result_syntax in - let* t = open_ ~path ~readonly:true in - let payload = payload t in - let+ () = close t in + let* io = Io.open_ ~path ~readonly:true in + let* payload = read io in + let+ () = Io.close io in payload let read_raw_payload ~path = @@ -355,11 +371,6 @@ module Make (Serde : Serde.S) (Io : Io.S) = struct let+ () = Io.close io in payload - let set_payload t payload = - let open Result_syntax in - let+ () = write t.io payload in - t.payload <- payload - let fsync t = Io.fsync t.io end diff --git a/src/irmin-pack/unix/control_file_intf.ml b/src/irmin-pack/unix/control_file_intf.ml index 770d281a21..28acece9a0 100644 --- a/src/irmin-pack/unix/control_file_intf.ml +++ b/src/irmin-pack/unix/control_file_intf.ml @@ -302,6 +302,7 @@ module type S = sig val create_rw : path:string -> + tmp_path:string option -> overwrite:bool -> payload -> (t, [> Io.create_error | Io.write_error ]) result @@ -315,8 +316,14 @@ module type S = sig | `Closed | `Unknown_major_pack_version of string ] - val open_ : path:string -> readonly:bool -> (t, [> open_error ]) result - (** Create a rw instance of [t] by reading an existing file at [path]. *) + val open_ : + path:string -> + tmp_path:string option -> + readonly:bool -> + (t, [> open_error ]) result + (** Create a rw instance of [t] by reading an existing file at [path]. + [tmp_path] will be used by RW instances when updating it's content, it is + not required for RO instances or RW instances which will never be updated. *) val close : t -> (unit, [> Io.close_error ]) result @@ -348,7 +355,9 @@ module type S = sig | `Io_misc of Io.misc_error | `Closed | `Rw_not_allowed - | `Unknown_major_pack_version of string ] + | `Unknown_major_pack_version of string + | open_error + | Io.close_error ] val reload : t -> (unit, [> reload_error ]) result (** {3 RW mode} @@ -362,7 +371,16 @@ module type S = sig If the file changed since the last read, the payload in [t] is updated to match the content of the file. *) - val set_payload : t -> payload -> (unit, [> Io.write_error ]) result + type move_error := [ `Sys_error of string ] + + type set_error := + [ `No_tmp_path_provided + | Io.create_error + | Io.write_error + | move_error + | Io.close_error ] + + val set_payload : t -> payload -> (unit, [> set_error ]) result (** {3 RW mode} Write a new payload on disk. diff --git a/src/irmin-pack/unix/errors.ml b/src/irmin-pack/unix/errors.ml index eea615386f..88dd670c56 100644 --- a/src/irmin-pack/unix/errors.ml +++ b/src/irmin-pack/unix/errors.ml @@ -80,7 +80,8 @@ type base_error = | `Add_volume_requires_lower | `Volume_history_discontinuous | `Lower_has_no_volume - | `Volume_not_found of string ] + | `Volume_not_found of string + | `No_tmp_path_provided ] [@@deriving irmin ~pp] (** [base_error] is the type of most errors that can occur in a [result], except for errors that have associated exceptions (see below) and backend-specific diff --git a/src/irmin-pack/unix/file_manager.ml b/src/irmin-pack/unix/file_manager.ml index 018b249952..4742f92a01 100644 --- a/src/irmin-pack/unix/file_manager.ml +++ b/src/irmin-pack/unix/file_manager.ml @@ -270,7 +270,7 @@ struct | `Prefix g | `Mapping g -> g <> generation | `Suffix idx -> idx < chunk_start_idx || idx > chunk_start_idx + chunk_num - | `Reachable _ | `Sorted _ | `Gc_result _ -> true) + | `Reachable _ | `Sorted _ | `Gc_result _ | `Control_tmp -> true) files in List.iter @@ -387,7 +387,8 @@ struct let create_control_file ~overwrite config pl = let root = Irmin_pack.Conf.root config in let path = Layout.control ~root in - Control.create_rw ~path ~overwrite pl + let tmp_path = Layout.control_tmp ~root in + Control.create_rw ~path ~tmp_path:(Some tmp_path) ~overwrite pl (* Reload ***************************************************************** *) @@ -576,7 +577,8 @@ struct let lower_root = Irmin_pack.Conf.lower_root config in let* control = let path = Layout.control ~root in - Control.open_ ~readonly:false ~path + let tmp_path = Layout.control_tmp ~root in + Control.open_ ~readonly:false ~path ~tmp_path:(Some tmp_path) in let* Payload. { @@ -713,7 +715,7 @@ struct (* 1. Open the control file *) let* control = let path = Layout.control ~root in - Control.open_ ~readonly:true ~path + Control.open_ ~readonly:true ~path ~tmp_path:None (* If no control file, then check whether the store is in v1 or v2. *) |> Result.map_error (function | `No_such_file_or_directory _ -> ( @@ -797,7 +799,7 @@ struct | `File | `Other -> Error (`Not_a_directory root) | `Directory -> ( let path = Layout.control ~root in - match Control.open_ ~path ~readonly:true with + match Control.open_ ~path ~tmp_path:None ~readonly:true with | Ok _ -> Ok `V3 | Error (`No_such_file_or_directory _) -> v2_or_v1 () | Error `Not_a_file -> Error `Invalid_layout @@ -858,11 +860,11 @@ struct match volume_root with | None -> Ok () | Some root -> - let control_tmp = - Irmin_pack.Layout.V5.Volume.control_tmp ~generation ~root + let control_gc_tmp = + Irmin_pack.Layout.V5.Volume.control_gc_tmp ~generation ~root in let control = Irmin_pack.Layout.V5.Volume.control ~root in - let* () = Io.move_file ~src:control_tmp ~dst:control in + let* () = Io.move_file ~src:control_gc_tmp ~dst:control in reload_lower t ~volume_num:pl.volume_num in @@ -988,7 +990,7 @@ struct } in let path = Layout.control ~root:dst_root in - let* control = Control.create_rw ~path ~overwrite:false pl in + let* control = Control.create_rw ~path ~tmp_path:None ~overwrite:false pl in let* () = Control.close control in (* Step 4. Create the index. *) let* index = diff --git a/src/irmin-pack/unix/file_manager_intf.ml b/src/irmin-pack/unix/file_manager_intf.ml index 14814064ab..a4dc4a99da 100644 --- a/src/irmin-pack/unix/file_manager_intf.ml +++ b/src/irmin-pack/unix/file_manager_intf.ml @@ -90,7 +90,9 @@ module type S = sig | `Volume_missing of string | `Not_a_directory of string | `Multiple_empty_volumes - | `Index_failure of string ] + | `Index_failure of string + | `Sys_error of string + | `No_tmp_path_provided ] val create_rw : overwrite:bool -> Irmin.Backend.Conf.t -> (t, [> create_error ]) result @@ -120,6 +122,7 @@ module type S = sig | `No_such_file_or_directory of string | `Not_a_directory of string | `Not_a_file + | `No_tmp_path_provided | `Read_out_of_bounds | `Ro_not_allowed | `Sys_error of string @@ -204,7 +207,11 @@ module type S = sig [ `Index_failure of string | `Io_misc of Io.misc_error | `Ro_not_allowed - | `Closed ] + | `Closed + | `Double_close + | `File_exists of string + | `Sys_error of string + | `No_tmp_path_provided ] type flush_stages := [ `After_dict | `After_suffix ] type 'a hook := 'a -> unit diff --git a/src/irmin-pack/unix/io_errors.ml b/src/irmin-pack/unix/io_errors.ml index e460609b09..c05f05bdee 100644 --- a/src/irmin-pack/unix/io_errors.ml +++ b/src/irmin-pack/unix/io_errors.ml @@ -84,7 +84,8 @@ module Make (Io : Io.S) : S with module Io = Io = struct | `Lower_has_no_volume | `Add_volume_forbidden_during_gc | `Add_volume_requires_lower - | `Volume_not_found of string ] + | `Volume_not_found of string + | `No_tmp_path_provided ] [@@deriving irmin] let raise_error = function diff --git a/src/irmin-pack/unix/lower.ml b/src/irmin-pack/unix/lower.ml index 787a3c7e04..afad88362d 100644 --- a/src/irmin-pack/unix/lower.ml +++ b/src/irmin-pack/unix/lower.ml @@ -99,7 +99,8 @@ module Make_volume (Io : Io.S) (Errs : Io_errors.S with module Io = Io) = struct } in let control = Layout.control ~root in - Control.create_rw ~path:control ~overwrite:false payload >>= Control.close + Control.create_rw ~path:control ~tmp_path:None ~overwrite:false payload + >>= Control.close let path = function Empty { path } -> path | Nonempty { path; _ } -> path @@ -198,11 +199,12 @@ module Make_volume (Io : Io.S) (Errs : Io_errors.S with module Io = Io) = struct { start_offset; end_offset; mapping_end_poff; checksum = Int63.zero } in (* Write into temporary file on disk *) - let tmp_control_path = - Irmin_pack.Layout.V5.Volume.control_tmp ~generation ~root + let control_gc_tmp = + Irmin_pack.Layout.V5.Volume.control_gc_tmp ~generation ~root in let* c = - Control.create_rw ~path:tmp_control_path ~overwrite:true new_control + Control.create_rw ~path:control_gc_tmp ~tmp_path:None ~overwrite:true + new_control in let* () = Control.close c in Ok root diff --git a/test/irmin-pack/test_lower.ml b/test/irmin-pack/test_lower.ml index 5c52aa4bfa..bbc2cdc7f0 100644 --- a/test/irmin-pack/test_lower.ml +++ b/test/irmin-pack/test_lower.ml @@ -32,7 +32,7 @@ module Direct_tc = struct let create_control volume_path payload = let path = Irmin_pack.Layout.V5.Volume.control ~root:volume_path in - Control.create_rw ~path ~overwrite:true payload + Control.create_rw ~path ~tmp_path:None ~overwrite:true payload let test_empty () = let lower_root = create_lower_root () in diff --git a/test/irmin-pack/test_pack.ml b/test/irmin-pack/test_pack.ml index e44febc6d3..9c0a6c623d 100644 --- a/test/irmin-pack/test_pack.ml +++ b/test/irmin-pack/test_pack.ml @@ -505,6 +505,7 @@ module Layout = struct c `V1_or_v2_pack (V1_and_v2.pack ~root:"" |> classif); c `Branch (V4.branch ~root:"" |> classif); c `Control (V4.control ~root:"" |> classif); + c `Control_tmp (V4.control_tmp ~root:"" |> classif); c `Dict (V4.dict ~root:"" |> classif); c (`Gc_result 0) (V4.gc_result ~generation:0 ~root:"" |> classif); c (`Reachable 1) (V4.reachable ~generation:1 ~root:"" |> classif);