Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at perf 472 lines 17 kB view raw
1(** Merkle proofs for content-addressed trees. *) 2 3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ] 4 5type ('hash, 'contents) tree = 6 | Contents of 'contents 7 | Blinded_contents of 'hash 8 | Node of ('hash, 'contents) node 9 | Blinded_node of 'hash 10 11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list 12 13type ('hash, 'contents) t = { 14 before : 'hash kinded_hash; 15 after : 'hash kinded_hash; 16 state : ('hash, 'contents) tree; 17} 18 19let v ~before ~after state = { before; after; state } 20let before p = p.before 21let after p = p.after 22let state p = p.state 23 24let pp pp_hash pp_contents fmt p = 25 let pp_kinded fmt = function 26 | `Contents h -> Fmt.pf fmt "contents:%a" pp_hash h 27 | `Node h -> Fmt.pf fmt "node:%a" pp_hash h 28 in 29 let rec pp_tree fmt = function 30 | Contents c -> Fmt.pf fmt "(%a)" pp_contents c 31 | Blinded_contents h -> Fmt.pf fmt "#(%a)" pp_hash h 32 | Node entries -> 33 Fmt.pf fmt "{%a}" 34 Fmt.(list ~sep:(any ", ") (pair ~sep:(any ":") string pp_tree)) 35 entries 36 | Blinded_node h -> Fmt.pf fmt "#{%a}" pp_hash h 37 in 38 Fmt.pf fmt "@[<2>proof{before=%a;@ after=%a;@ state=%a}@]" pp_kinded p.before 39 pp_kinded p.after pp_tree p.state 40 41module Make (C : Codec.S) = struct 42 type hash = C.hash 43 type contents = string 44 45 (* Path set for tracking accessed paths *) 46 module Path_set = Set.Make (struct 47 type t = string list 48 49 let compare = compare 50 end) 51 52 (* Internal tree representation during proof production *) 53 type tree_state = 54 | Producing of { 55 backend : hash Backend.t; 56 node_hash : hash; 57 mutable accessed : Path_set.t; 58 } 59 | From_proof of { tree : (hash, contents) tree } 60 61 module Tree = struct 62 type t = { state : tree_state } 63 64 let of_hash backend h = 65 { 66 state = Producing { backend; node_hash = h; accessed = Path_set.empty }; 67 } 68 69 let of_proof_tree tree = { state = From_proof { tree } } 70 71 (* Read node from backend *) 72 let read_node backend h = 73 match backend.Backend.read h with 74 | None -> None 75 | Some data -> ( 76 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None) 77 78 (* Read contents from backend *) 79 let read_contents backend h = backend.Backend.read h 80 81 (* Record access to a path *) 82 let record_access t path = 83 match t.state with 84 | Producing p -> p.accessed <- Path_set.add path p.accessed 85 | From_proof _ -> () 86 87 (* Navigate to a path in a backend-stored node *) 88 let rec find_in_node backend node path = 89 match path with 90 | [] -> None (* Can't find contents at node root *) 91 | [ key ] -> ( 92 match C.find node key with 93 | Some (`Contents h) -> read_contents backend h 94 | Some (`Contents_inlined data) -> Some data 95 | _ -> None) 96 | key :: rest -> ( 97 match C.find node key with 98 | Some (`Node h) -> ( 99 match read_node backend h with 100 | Some child -> find_in_node backend child rest 101 | None -> None) 102 | _ -> None) 103 104 (* Navigate in proof tree *) 105 let rec find_in_proof tree path = 106 match (tree, path) with 107 | Contents c, [] -> Some c 108 | Node entries, key :: rest -> ( 109 match List.assoc_opt key entries with 110 | Some child -> find_in_proof child rest 111 | None -> None) 112 | Blinded_contents _, [] -> None (* Can't read blinded *) 113 | Blinded_node _, _ -> None (* Can't traverse blinded *) 114 | _ -> None 115 116 let find t path = 117 record_access t path; 118 match t.state with 119 | Producing { backend; node_hash; _ } -> ( 120 match read_node backend node_hash with 121 | Some node -> find_in_node backend node path 122 | None -> None) 123 | From_proof { tree } -> find_in_proof tree path 124 125 let rec find_tree_in_node backend node path = 126 match path with 127 | [] -> Some (of_hash backend (C.hash_node node)) 128 | key :: rest -> ( 129 match C.find node key with 130 | Some (`Node h) -> ( 131 match read_node backend h with 132 | Some child -> find_tree_in_node backend child rest 133 | None -> None) 134 | Some (`Contents _ | `Contents_inlined _) -> None 135 | None -> None) 136 137 let rec find_tree_in_proof tree path = 138 match (tree, path) with 139 | (Node _ as n), [] -> Some (of_proof_tree n) 140 | Node entries, key :: rest -> ( 141 match List.assoc_opt key entries with 142 | Some child -> find_tree_in_proof child rest 143 | None -> None) 144 | Blinded_node _, _ -> None 145 | Contents _, _ | Blinded_contents _, _ -> None 146 147 let find_tree t path = 148 record_access t path; 149 match t.state with 150 | Producing { backend; node_hash; _ } -> ( 151 match read_node backend node_hash with 152 | Some node -> find_tree_in_node backend node path 153 | None -> None) 154 | From_proof { tree } -> find_tree_in_proof tree path 155 156 let mem t path = Option.is_some (find t path) 157 158 let list t path = 159 record_access t path; 160 match t.state with 161 | Producing { backend; node_hash; _ } -> ( 162 let rec navigate node = function 163 | [] -> 164 C.list node 165 |> List.map (fun (k, v) -> 166 let kind = 167 match v with 168 | `Node _ -> `Node 169 | `Contents _ | `Contents_inlined _ -> `Contents 170 in 171 (k, kind)) 172 | key :: rest -> ( 173 match C.find node key with 174 | Some (`Node h) -> ( 175 match read_node backend h with 176 | Some child -> navigate child rest 177 | None -> []) 178 | _ -> []) 179 in 180 match read_node backend node_hash with 181 | Some node -> navigate node path 182 | None -> []) 183 | From_proof { tree } -> 184 let rec navigate t = function 185 | [] -> ( 186 match t with 187 | Node entries -> 188 List.map 189 (fun (k, v) -> 190 let kind = 191 match v with 192 | Node _ | Blinded_node _ -> `Node 193 | Contents _ | Blinded_contents _ -> `Contents 194 in 195 (k, kind)) 196 entries 197 | _ -> []) 198 | key :: rest -> ( 199 match t with 200 | Node entries -> ( 201 match List.assoc_opt key entries with 202 | Some child -> navigate child rest 203 | None -> []) 204 | _ -> []) 205 in 206 navigate tree path 207 208 (* Write operations - only work on producing trees *) 209 let add t path contents = 210 record_access t path; 211 match t.state with 212 | Producing { backend; node_hash; accessed } -> 213 let rec add_to_node node = function 214 | [] -> failwith "Proof.Tree.add: empty path" 215 | [ key ] -> 216 if C.inline_threshold > 0 217 && String.length contents <= C.inline_threshold 218 then C.add node key (`Contents_inlined contents) 219 else begin 220 let h = C.hash_contents contents in 221 backend.write h contents; 222 C.add node key (`Contents h) 223 end 224 | key :: rest -> 225 let child_node = 226 match C.find node key with 227 | Some (`Node h) -> ( 228 match read_node backend h with 229 | Some n -> n 230 | None -> C.empty_node) 231 | _ -> C.empty_node 232 in 233 let updated = add_to_node child_node rest in 234 let data = C.bytes_of_node updated in 235 let h = C.hash_node updated in 236 backend.write h data; 237 C.add node key (`Node h) 238 in 239 let node = 240 match read_node backend node_hash with 241 | Some n -> n 242 | None -> C.empty_node 243 in 244 let updated = add_to_node node path in 245 let data = C.bytes_of_node updated in 246 let new_hash = C.hash_node updated in 247 backend.write new_hash data; 248 { state = Producing { backend; node_hash = new_hash; accessed } } 249 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree" 250 251 let add_tree t path child = 252 record_access t path; 253 match (t.state, child.state) with 254 | ( Producing { backend; node_hash; accessed }, 255 Producing { node_hash = child_hash; _ } ) -> 256 let rec add_tree_to_node node = function 257 | [] -> failwith "Proof.Tree.add_tree: empty path" 258 | [ key ] -> C.add node key (`Node child_hash) 259 | key :: rest -> 260 let sub_node = 261 match C.find node key with 262 | Some (`Node h) -> ( 263 match read_node backend h with 264 | Some n -> n 265 | None -> C.empty_node) 266 | _ -> C.empty_node 267 in 268 let updated = add_tree_to_node sub_node rest in 269 let data = C.bytes_of_node updated in 270 let h = C.hash_node updated in 271 backend.write h data; 272 C.add node key (`Node h) 273 in 274 let node = 275 match read_node backend node_hash with 276 | Some n -> n 277 | None -> C.empty_node 278 in 279 let updated = add_tree_to_node node path in 280 let data = C.bytes_of_node updated in 281 let new_hash = C.hash_node updated in 282 backend.write new_hash data; 283 { state = Producing { backend; node_hash = new_hash; accessed } } 284 | _ -> failwith "Proof.Tree.add_tree: incompatible trees" 285 286 let remove t path = 287 record_access t path; 288 match t.state with 289 | Producing { backend; node_hash; accessed } -> 290 let rec remove_from_node node = function 291 | [] -> node 292 | [ key ] -> C.remove node key 293 | key :: rest -> ( 294 match C.find node key with 295 | Some (`Node h) -> ( 296 match read_node backend h with 297 | Some child -> 298 let updated = remove_from_node child rest in 299 if C.is_empty updated then C.remove node key 300 else 301 let data = C.bytes_of_node updated in 302 let h = C.hash_node updated in 303 backend.write h data; 304 C.add node key (`Node h) 305 | None -> node) 306 | _ -> node) 307 in 308 let node = 309 match read_node backend node_hash with 310 | Some n -> n 311 | None -> C.empty_node 312 in 313 let updated = remove_from_node node path in 314 let data = C.bytes_of_node updated in 315 let new_hash = C.hash_node updated in 316 backend.write new_hash data; 317 { state = Producing { backend; node_hash = new_hash; accessed } } 318 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree" 319 320 let hash t = 321 match t.state with 322 | Producing { node_hash; _ } -> node_hash 323 | From_proof { tree } -> 324 let rec hash_tree = function 325 | Contents c -> C.hash_contents c 326 | Blinded_contents h -> h 327 | Node entries -> 328 let node = 329 List.fold_left 330 (fun n (k, v) -> 331 let kind = 332 match v with 333 | Node _ | Blinded_node _ -> `Node (hash_tree v) 334 | Contents c 335 when C.inline_threshold > 0 336 && String.length c <= C.inline_threshold -> 337 `Contents_inlined c 338 | Contents _ | Blinded_contents _ -> 339 `Contents (hash_tree v) 340 in 341 C.add n k kind) 342 C.empty_node entries 343 in 344 C.hash_node node 345 | Blinded_node h -> h 346 in 347 hash_tree tree 348 end 349 350 (* Build proof tree from accessed paths *) 351 let build_proof_tree backend node_hash accessed = 352 let rec build node prefix = 353 let dominated_by_access = 354 Path_set.exists 355 (fun path -> 356 let plen = List.length prefix in 357 List.length path >= plen 358 && List.filteri (fun i _ -> i < plen) path = prefix) 359 accessed 360 in 361 if not dominated_by_access then Blinded_node (C.hash_node node) 362 else 363 let entries = C.list node in 364 let children = 365 List.map 366 (fun (key, kind) -> 367 let child_path = prefix @ [ key ] in 368 let child_tree = 369 match kind with 370 | `Contents_inlined data -> 371 (* Inlined contents are part of the node structure and 372 must always be included in proofs to preserve the 373 node hash. They are small (≤ inline_threshold bytes) 374 so this doesn't leak significant data. *) 375 Contents data 376 | `Contents h -> 377 if Path_set.mem child_path accessed then 378 match backend.Backend.read h with 379 | Some c -> Contents c 380 | None -> Blinded_contents h 381 else Blinded_contents h 382 | `Node h -> 383 if 384 Path_set.exists 385 (fun p -> 386 let clen = List.length child_path in 387 List.length p >= clen 388 && List.filteri (fun i _ -> i < clen) p = child_path) 389 accessed 390 then 391 match backend.Backend.read h with 392 | Some data -> ( 393 match C.node_of_bytes data with 394 | Ok child_node -> build child_node child_path 395 | Error _ -> Blinded_node h) 396 | None -> Blinded_node h 397 else Blinded_node h 398 in 399 (key, child_tree)) 400 entries 401 in 402 Node children 403 in 404 match backend.Backend.read node_hash with 405 | Some data -> ( 406 match C.node_of_bytes data with 407 | Ok node -> build node [] 408 | Error _ -> Blinded_node node_hash) 409 | None -> Blinded_node node_hash 410 411 let produce backend root_hash f = 412 let tree = Tree.of_hash backend root_hash in 413 let result_tree, result = f tree in 414 let after_hash = Tree.hash result_tree in 415 let accessed = 416 match tree.state with 417 | Producing { accessed; _ } -> accessed 418 | From_proof _ -> Path_set.empty 419 in 420 let proof_tree = build_proof_tree backend root_hash accessed in 421 let proof = 422 { before = `Node root_hash; after = `Node after_hash; state = proof_tree } 423 in 424 (proof, result) 425 426 let to_tree proof = Tree.of_proof_tree proof.state 427 428 let rec hash_of_tree = function 429 | Contents c -> `Contents (C.hash_contents c) 430 | Blinded_contents h -> `Contents h 431 | Node entries -> 432 let node = 433 List.fold_left 434 (fun n (k, v) -> 435 let entry = 436 match v with 437 | Contents c 438 when C.inline_threshold > 0 439 && String.length c <= C.inline_threshold -> 440 `Contents_inlined c 441 | _ -> ( 442 match hash_of_tree v with 443 | `Contents h -> `Contents h 444 | `Node h -> `Node h) 445 in 446 C.add n k entry) 447 C.empty_node entries 448 in 449 `Node (C.hash_node node) 450 | Blinded_node h -> `Node h 451 452 let verify proof f = 453 let tree = to_tree proof in 454 let result_tree, result = f tree in 455 let computed_after = `Node (Tree.hash result_tree) in 456 let expected_after = proof.after in 457 match (computed_after, expected_after) with 458 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result) 459 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 -> 460 Ok (result_tree, result) 461 | _ -> 462 Error 463 (`Proof_mismatch 464 (Fmt.str "expected %s, got %s" 465 (C.hash_to_hex 466 (match expected_after with `Node h | `Contents h -> h)) 467 (C.hash_to_hex 468 (match computed_after with `Node h | `Contents h -> h)))) 469end 470 471module Git = Make (Codec.Git) 472module Mst = Make (Codec.Mst)