Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at benchs+cache 440 lines 16 kB view raw
1(** Merkle proofs for content-addressed trees. *) 2 3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ] 4 5type ('hash, 'contents) tree = 6 | Contents of 'contents 7 | Blinded_contents of 'hash 8 | Node of ('hash, 'contents) node 9 | Blinded_node of 'hash 10 11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list 12 13type ('hash, 'contents) t = { 14 before : 'hash kinded_hash; 15 after : 'hash kinded_hash; 16 state : ('hash, 'contents) tree; 17} 18 19let v ~before ~after state = { before; after; state } 20let before p = p.before 21let after p = p.after 22let state p = p.state 23 24module Make (C : Codec.S) = struct 25 type hash = C.hash 26 type contents = string 27 28 (* Path set for tracking accessed paths *) 29 module PathSet = Set.Make (struct 30 type t = string list 31 32 let compare = compare 33 end) 34 35 (* Internal tree representation during proof production *) 36 type tree_state = 37 | Producing of { 38 backend : hash Backend.t; 39 node_hash : hash; 40 mutable accessed : PathSet.t; 41 } 42 | From_proof of { tree : (hash, contents) tree } 43 44 module Tree = struct 45 type t = { state : tree_state } 46 47 let of_hash backend h = 48 { state = Producing { backend; node_hash = h; accessed = PathSet.empty } } 49 50 let of_proof_tree tree = { state = From_proof { tree } } 51 52 (* Read node from backend *) 53 let read_node backend h = 54 match backend.Backend.read h with 55 | None -> None 56 | Some data -> ( 57 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None) 58 59 (* Read contents from backend *) 60 let read_contents backend h = backend.Backend.read h 61 62 (* Record access to a path *) 63 let record_access t path = 64 match t.state with 65 | Producing p -> p.accessed <- PathSet.add path p.accessed 66 | From_proof _ -> () 67 68 (* Navigate to a path in a backend-stored node *) 69 let rec find_in_node backend node path = 70 match path with 71 | [] -> None (* Can't find contents at node root *) 72 | [ key ] -> ( 73 match C.find node key with 74 | Some (`Contents h) -> read_contents backend h 75 | Some (`Contents_inlined data) -> Some data 76 | _ -> None) 77 | key :: rest -> ( 78 match C.find node key with 79 | Some (`Node h) -> ( 80 match read_node backend h with 81 | Some child -> find_in_node backend child rest 82 | None -> None) 83 | _ -> None) 84 85 (* Navigate in proof tree *) 86 let rec find_in_proof tree path = 87 match (tree, path) with 88 | Contents c, [] -> Some c 89 | Node entries, key :: rest -> ( 90 match List.assoc_opt key entries with 91 | Some child -> find_in_proof child rest 92 | None -> None) 93 | Blinded_contents _, [] -> None (* Can't read blinded *) 94 | Blinded_node _, _ -> None (* Can't traverse blinded *) 95 | _ -> None 96 97 let find t path = 98 record_access t path; 99 match t.state with 100 | Producing { backend; node_hash; _ } -> ( 101 match read_node backend node_hash with 102 | Some node -> find_in_node backend node path 103 | None -> None) 104 | From_proof { tree } -> find_in_proof tree path 105 106 let rec find_tree_in_node backend node path = 107 match path with 108 | [] -> Some (of_hash backend (C.hash_node node)) 109 | key :: rest -> ( 110 match C.find node key with 111 | Some (`Node h) -> ( 112 match read_node backend h with 113 | Some child -> find_tree_in_node backend child rest 114 | None -> None) 115 | Some (`Contents _ | `Contents_inlined _) -> None 116 | None -> None) 117 118 let rec find_tree_in_proof tree path = 119 match (tree, path) with 120 | (Node _ as n), [] -> Some (of_proof_tree n) 121 | Node entries, key :: rest -> ( 122 match List.assoc_opt key entries with 123 | Some child -> find_tree_in_proof child rest 124 | None -> None) 125 | Blinded_node _, _ -> None 126 | Contents _, _ | Blinded_contents _, _ -> None 127 128 let find_tree t path = 129 record_access t path; 130 match t.state with 131 | Producing { backend; node_hash; _ } -> ( 132 match read_node backend node_hash with 133 | Some node -> find_tree_in_node backend node path 134 | None -> None) 135 | From_proof { tree } -> find_tree_in_proof tree path 136 137 let mem t path = Option.is_some (find t path) 138 139 let list t path = 140 record_access t path; 141 match t.state with 142 | Producing { backend; node_hash; _ } -> ( 143 let rec navigate node = function 144 | [] -> 145 C.list node 146 |> List.map (fun (k, v) -> 147 let kind = 148 match v with 149 | `Node _ -> `Node 150 | `Contents _ | `Contents_inlined _ -> `Contents 151 in 152 (k, kind)) 153 | key :: rest -> ( 154 match C.find node key with 155 | Some (`Node h) -> ( 156 match read_node backend h with 157 | Some child -> navigate child rest 158 | None -> []) 159 | _ -> []) 160 in 161 match read_node backend node_hash with 162 | Some node -> navigate node path 163 | None -> []) 164 | From_proof { tree } -> 165 let rec navigate t = function 166 | [] -> ( 167 match t with 168 | Node entries -> 169 List.map 170 (fun (k, v) -> 171 let kind = 172 match v with 173 | Node _ | Blinded_node _ -> `Node 174 | Contents _ | Blinded_contents _ -> `Contents 175 in 176 (k, kind)) 177 entries 178 | _ -> []) 179 | key :: rest -> ( 180 match t with 181 | Node entries -> ( 182 match List.assoc_opt key entries with 183 | Some child -> navigate child rest 184 | None -> []) 185 | _ -> []) 186 in 187 navigate tree path 188 189 (* Write operations - only work on producing trees *) 190 let add t path contents = 191 record_access t path; 192 match t.state with 193 | Producing { backend; node_hash; accessed } -> 194 let rec add_to_node node = function 195 | [] -> failwith "Proof.Tree.add: empty path" 196 | [ key ] -> 197 if C.inline_threshold > 0 198 && String.length contents <= C.inline_threshold 199 then C.add node key (`Contents_inlined contents) 200 else begin 201 let h = C.hash_contents contents in 202 backend.write h contents; 203 C.add node key (`Contents h) 204 end 205 | key :: rest -> 206 let child_node = 207 match C.find node key with 208 | Some (`Node h) -> ( 209 match read_node backend h with 210 | Some n -> n 211 | None -> C.empty_node) 212 | _ -> C.empty_node 213 in 214 let updated = add_to_node child_node rest in 215 let data = C.bytes_of_node updated in 216 let h = C.hash_node updated in 217 backend.write h data; 218 C.add node key (`Node h) 219 in 220 let node = 221 match read_node backend node_hash with 222 | Some n -> n 223 | None -> C.empty_node 224 in 225 let updated = add_to_node node path in 226 let data = C.bytes_of_node updated in 227 let new_hash = C.hash_node updated in 228 backend.write new_hash data; 229 { state = Producing { backend; node_hash = new_hash; accessed } } 230 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree" 231 232 let add_tree t path child = 233 record_access t path; 234 match (t.state, child.state) with 235 | ( Producing { backend; node_hash; accessed }, 236 Producing { node_hash = child_hash; _ } ) -> 237 let rec add_tree_to_node node = function 238 | [] -> failwith "Proof.Tree.add_tree: empty path" 239 | [ key ] -> C.add node key (`Node child_hash) 240 | key :: rest -> 241 let sub_node = 242 match C.find node key with 243 | Some (`Node h) -> ( 244 match read_node backend h with 245 | Some n -> n 246 | None -> C.empty_node) 247 | _ -> C.empty_node 248 in 249 let updated = add_tree_to_node sub_node rest in 250 let data = C.bytes_of_node updated in 251 let h = C.hash_node updated in 252 backend.write h data; 253 C.add node key (`Node h) 254 in 255 let node = 256 match read_node backend node_hash with 257 | Some n -> n 258 | None -> C.empty_node 259 in 260 let updated = add_tree_to_node node path in 261 let data = C.bytes_of_node updated in 262 let new_hash = C.hash_node updated in 263 backend.write new_hash data; 264 { state = Producing { backend; node_hash = new_hash; accessed } } 265 | _ -> failwith "Proof.Tree.add_tree: incompatible trees" 266 267 let remove t path = 268 record_access t path; 269 match t.state with 270 | Producing { backend; node_hash; accessed } -> 271 let rec remove_from_node node = function 272 | [] -> node 273 | [ key ] -> C.remove node key 274 | key :: rest -> ( 275 match C.find node key with 276 | Some (`Node h) -> ( 277 match read_node backend h with 278 | Some child -> 279 let updated = remove_from_node child rest in 280 if C.is_empty updated then C.remove node key 281 else 282 let data = C.bytes_of_node updated in 283 let h = C.hash_node updated in 284 backend.write h data; 285 C.add node key (`Node h) 286 | None -> node) 287 | _ -> node) 288 in 289 let node = 290 match read_node backend node_hash with 291 | Some n -> n 292 | None -> C.empty_node 293 in 294 let updated = remove_from_node node path in 295 let data = C.bytes_of_node updated in 296 let new_hash = C.hash_node updated in 297 backend.write new_hash data; 298 { state = Producing { backend; node_hash = new_hash; accessed } } 299 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree" 300 301 let hash t = 302 match t.state with 303 | Producing { node_hash; _ } -> node_hash 304 | From_proof { tree } -> 305 let rec hash_tree = function 306 | Contents c -> C.hash_contents c 307 | Blinded_contents h -> h 308 | Node entries -> 309 let node = 310 List.fold_left 311 (fun n (k, v) -> 312 let kind = 313 match v with 314 | Node _ | Blinded_node _ -> `Node (hash_tree v) 315 | Contents _ | Blinded_contents _ -> 316 `Contents (hash_tree v) 317 in 318 C.add n k kind) 319 C.empty_node entries 320 in 321 C.hash_node node 322 | Blinded_node h -> h 323 in 324 hash_tree tree 325 end 326 327 (* Build proof tree from accessed paths *) 328 let build_proof_tree backend node_hash accessed = 329 let rec build node prefix = 330 let dominated_by_access = 331 PathSet.exists 332 (fun path -> 333 let plen = List.length prefix in 334 List.length path >= plen 335 && List.filteri (fun i _ -> i < plen) path = prefix) 336 accessed 337 in 338 if not dominated_by_access then Blinded_node (C.hash_node node) 339 else 340 let entries = C.list node in 341 let children = 342 List.map 343 (fun (key, kind) -> 344 let child_path = prefix @ [ key ] in 345 let child_tree = 346 match kind with 347 | `Contents_inlined data -> 348 if PathSet.mem child_path accessed then Contents data 349 else Contents data (* inlined is always available *) 350 | `Contents h -> 351 if PathSet.mem child_path accessed then 352 match backend.Backend.read h with 353 | Some c -> Contents c 354 | None -> Blinded_contents h 355 else Blinded_contents h 356 | `Node h -> 357 if 358 PathSet.exists 359 (fun p -> 360 let clen = List.length child_path in 361 List.length p >= clen 362 && List.filteri (fun i _ -> i < clen) p = child_path) 363 accessed 364 then 365 match backend.Backend.read h with 366 | Some data -> ( 367 match C.node_of_bytes data with 368 | Ok child_node -> build child_node child_path 369 | Error _ -> Blinded_node h) 370 | None -> Blinded_node h 371 else Blinded_node h 372 in 373 (key, child_tree)) 374 entries 375 in 376 Node children 377 in 378 match backend.Backend.read node_hash with 379 | Some data -> ( 380 match C.node_of_bytes data with 381 | Ok node -> build node [] 382 | Error _ -> Blinded_node node_hash) 383 | None -> Blinded_node node_hash 384 385 let produce backend root_hash f = 386 let tree = Tree.of_hash backend root_hash in 387 let result_tree, result = f tree in 388 let after_hash = Tree.hash result_tree in 389 let accessed = 390 match tree.state with 391 | Producing { accessed; _ } -> accessed 392 | From_proof _ -> PathSet.empty 393 in 394 let proof_tree = build_proof_tree backend root_hash accessed in 395 let proof = 396 { before = `Node root_hash; after = `Node after_hash; state = proof_tree } 397 in 398 (proof, result) 399 400 let to_tree proof = Tree.of_proof_tree proof.state 401 402 let rec hash_of_tree = function 403 | Contents c -> `Contents (C.hash_contents c) 404 | Blinded_contents h -> `Contents h 405 | Node entries -> 406 let node = 407 List.fold_left 408 (fun n (k, v) -> 409 let h = 410 match hash_of_tree v with 411 | `Contents h -> `Contents h 412 | `Node h -> `Node h 413 in 414 C.add n k h) 415 C.empty_node entries 416 in 417 `Node (C.hash_node node) 418 | Blinded_node h -> `Node h 419 420 let verify proof f = 421 let tree = to_tree proof in 422 let result_tree, result = f tree in 423 let computed_after = `Node (Tree.hash result_tree) in 424 let expected_after = proof.after in 425 match (computed_after, expected_after) with 426 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result) 427 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 -> 428 Ok (result_tree, result) 429 | _ -> 430 Error 431 (`Proof_mismatch 432 (Printf.sprintf "expected %s, got %s" 433 (C.hash_to_hex 434 (match expected_after with `Node h | `Contents h -> h)) 435 (C.hash_to_hex 436 (match computed_after with `Node h | `Contents h -> h)))) 437end 438 439module Git = Make (Codec.Git) 440module Mst = Make (Codec.Mst)