Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at inode 450 lines 16 kB view raw
1(** Merkle proofs for content-addressed trees. *) 2 3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ] 4 5type ('hash, 'contents) tree = 6 | Contents of 'contents 7 | Blinded_contents of 'hash 8 | Node of ('hash, 'contents) node 9 | Blinded_node of 'hash 10 11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list 12 13type ('hash, 'contents) t = { 14 before : 'hash kinded_hash; 15 after : 'hash kinded_hash; 16 state : ('hash, 'contents) tree; 17} 18 19let v ~before ~after state = { before; after; state } 20let before p = p.before 21let after p = p.after 22let state p = p.state 23 24module Make (C : Codec.S) = struct 25 type hash = C.hash 26 type contents = string 27 28 (* Path set for tracking accessed paths *) 29 module PathSet = Set.Make (struct 30 type t = string list 31 32 let compare = compare 33 end) 34 35 (* Internal tree representation during proof production *) 36 type tree_state = 37 | Producing of { 38 backend : hash Backend.t; 39 node_hash : hash; 40 mutable accessed : PathSet.t; 41 } 42 | From_proof of { tree : (hash, contents) tree } 43 44 module Tree = struct 45 type t = { state : tree_state } 46 47 let of_hash backend h = 48 { state = Producing { backend; node_hash = h; accessed = PathSet.empty } } 49 50 let of_proof_tree tree = { state = From_proof { tree } } 51 52 (* Read node from backend *) 53 let read_node backend h = 54 match backend.Backend.read h with 55 | None -> None 56 | Some data -> ( 57 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None) 58 59 (* Read contents from backend *) 60 let read_contents backend h = backend.Backend.read h 61 62 (* Record access to a path *) 63 let record_access t path = 64 match t.state with 65 | Producing p -> p.accessed <- PathSet.add path p.accessed 66 | From_proof _ -> () 67 68 (* Navigate to a path in a backend-stored node *) 69 let rec find_in_node backend node path = 70 match path with 71 | [] -> None (* Can't find contents at node root *) 72 | [ key ] -> ( 73 match C.find node key with 74 | Some (`Contents h) -> read_contents backend h 75 | Some (`Contents_inlined data) -> Some data 76 | _ -> None) 77 | key :: rest -> ( 78 match C.find node key with 79 | Some (`Node h) -> ( 80 match read_node backend h with 81 | Some child -> find_in_node backend child rest 82 | None -> None) 83 | _ -> None) 84 85 (* Navigate in proof tree *) 86 let rec find_in_proof tree path = 87 match (tree, path) with 88 | Contents c, [] -> Some c 89 | Node entries, key :: rest -> ( 90 match List.assoc_opt key entries with 91 | Some child -> find_in_proof child rest 92 | None -> None) 93 | Blinded_contents _, [] -> None (* Can't read blinded *) 94 | Blinded_node _, _ -> None (* Can't traverse blinded *) 95 | _ -> None 96 97 let find t path = 98 record_access t path; 99 match t.state with 100 | Producing { backend; node_hash; _ } -> ( 101 match read_node backend node_hash with 102 | Some node -> find_in_node backend node path 103 | None -> None) 104 | From_proof { tree } -> find_in_proof tree path 105 106 let rec find_tree_in_node backend node path = 107 match path with 108 | [] -> Some (of_hash backend (C.hash_node node)) 109 | key :: rest -> ( 110 match C.find node key with 111 | Some (`Node h) -> ( 112 match read_node backend h with 113 | Some child -> find_tree_in_node backend child rest 114 | None -> None) 115 | Some (`Contents _ | `Contents_inlined _) -> None 116 | None -> None) 117 118 let rec find_tree_in_proof tree path = 119 match (tree, path) with 120 | (Node _ as n), [] -> Some (of_proof_tree n) 121 | Node entries, key :: rest -> ( 122 match List.assoc_opt key entries with 123 | Some child -> find_tree_in_proof child rest 124 | None -> None) 125 | Blinded_node _, _ -> None 126 | Contents _, _ | Blinded_contents _, _ -> None 127 128 let find_tree t path = 129 record_access t path; 130 match t.state with 131 | Producing { backend; node_hash; _ } -> ( 132 match read_node backend node_hash with 133 | Some node -> find_tree_in_node backend node path 134 | None -> None) 135 | From_proof { tree } -> find_tree_in_proof tree path 136 137 let mem t path = Option.is_some (find t path) 138 139 let list t path = 140 record_access t path; 141 match t.state with 142 | Producing { backend; node_hash; _ } -> ( 143 let rec navigate node = function 144 | [] -> 145 C.list node 146 |> List.map (fun (k, v) -> 147 let kind = 148 match v with 149 | `Node _ -> `Node 150 | `Contents _ | `Contents_inlined _ -> `Contents 151 in 152 (k, kind)) 153 | key :: rest -> ( 154 match C.find node key with 155 | Some (`Node h) -> ( 156 match read_node backend h with 157 | Some child -> navigate child rest 158 | None -> []) 159 | _ -> []) 160 in 161 match read_node backend node_hash with 162 | Some node -> navigate node path 163 | None -> []) 164 | From_proof { tree } -> 165 let rec navigate t = function 166 | [] -> ( 167 match t with 168 | Node entries -> 169 List.map 170 (fun (k, v) -> 171 let kind = 172 match v with 173 | Node _ | Blinded_node _ -> `Node 174 | Contents _ | Blinded_contents _ -> `Contents 175 in 176 (k, kind)) 177 entries 178 | _ -> []) 179 | key :: rest -> ( 180 match t with 181 | Node entries -> ( 182 match List.assoc_opt key entries with 183 | Some child -> navigate child rest 184 | None -> []) 185 | _ -> []) 186 in 187 navigate tree path 188 189 (* Write operations - only work on producing trees *) 190 let add t path contents = 191 record_access t path; 192 match t.state with 193 | Producing { backend; node_hash; accessed } -> 194 let rec add_to_node node = function 195 | [] -> failwith "Proof.Tree.add: empty path" 196 | [ key ] -> 197 if C.inline_threshold > 0 198 && String.length contents <= C.inline_threshold 199 then C.add node key (`Contents_inlined contents) 200 else begin 201 let h = C.hash_contents contents in 202 backend.write h contents; 203 C.add node key (`Contents h) 204 end 205 | key :: rest -> 206 let child_node = 207 match C.find node key with 208 | Some (`Node h) -> ( 209 match read_node backend h with 210 | Some n -> n 211 | None -> C.empty_node) 212 | _ -> C.empty_node 213 in 214 let updated = add_to_node child_node rest in 215 let data = C.bytes_of_node updated in 216 let h = C.hash_node updated in 217 backend.write h data; 218 C.add node key (`Node h) 219 in 220 let node = 221 match read_node backend node_hash with 222 | Some n -> n 223 | None -> C.empty_node 224 in 225 let updated = add_to_node node path in 226 let data = C.bytes_of_node updated in 227 let new_hash = C.hash_node updated in 228 backend.write new_hash data; 229 { state = Producing { backend; node_hash = new_hash; accessed } } 230 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree" 231 232 let add_tree t path child = 233 record_access t path; 234 match (t.state, child.state) with 235 | ( Producing { backend; node_hash; accessed }, 236 Producing { node_hash = child_hash; _ } ) -> 237 let rec add_tree_to_node node = function 238 | [] -> failwith "Proof.Tree.add_tree: empty path" 239 | [ key ] -> C.add node key (`Node child_hash) 240 | key :: rest -> 241 let sub_node = 242 match C.find node key with 243 | Some (`Node h) -> ( 244 match read_node backend h with 245 | Some n -> n 246 | None -> C.empty_node) 247 | _ -> C.empty_node 248 in 249 let updated = add_tree_to_node sub_node rest in 250 let data = C.bytes_of_node updated in 251 let h = C.hash_node updated in 252 backend.write h data; 253 C.add node key (`Node h) 254 in 255 let node = 256 match read_node backend node_hash with 257 | Some n -> n 258 | None -> C.empty_node 259 in 260 let updated = add_tree_to_node node path in 261 let data = C.bytes_of_node updated in 262 let new_hash = C.hash_node updated in 263 backend.write new_hash data; 264 { state = Producing { backend; node_hash = new_hash; accessed } } 265 | _ -> failwith "Proof.Tree.add_tree: incompatible trees" 266 267 let remove t path = 268 record_access t path; 269 match t.state with 270 | Producing { backend; node_hash; accessed } -> 271 let rec remove_from_node node = function 272 | [] -> node 273 | [ key ] -> C.remove node key 274 | key :: rest -> ( 275 match C.find node key with 276 | Some (`Node h) -> ( 277 match read_node backend h with 278 | Some child -> 279 let updated = remove_from_node child rest in 280 if C.is_empty updated then C.remove node key 281 else 282 let data = C.bytes_of_node updated in 283 let h = C.hash_node updated in 284 backend.write h data; 285 C.add node key (`Node h) 286 | None -> node) 287 | _ -> node) 288 in 289 let node = 290 match read_node backend node_hash with 291 | Some n -> n 292 | None -> C.empty_node 293 in 294 let updated = remove_from_node node path in 295 let data = C.bytes_of_node updated in 296 let new_hash = C.hash_node updated in 297 backend.write new_hash data; 298 { state = Producing { backend; node_hash = new_hash; accessed } } 299 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree" 300 301 let hash t = 302 match t.state with 303 | Producing { node_hash; _ } -> node_hash 304 | From_proof { tree } -> 305 let rec hash_tree = function 306 | Contents c -> C.hash_contents c 307 | Blinded_contents h -> h 308 | Node entries -> 309 let node = 310 List.fold_left 311 (fun n (k, v) -> 312 let kind = 313 match v with 314 | Node _ | Blinded_node _ -> `Node (hash_tree v) 315 | Contents c 316 when C.inline_threshold > 0 317 && String.length c <= C.inline_threshold -> 318 `Contents_inlined c 319 | Contents _ | Blinded_contents _ -> 320 `Contents (hash_tree v) 321 in 322 C.add n k kind) 323 C.empty_node entries 324 in 325 C.hash_node node 326 | Blinded_node h -> h 327 in 328 hash_tree tree 329 end 330 331 (* Build proof tree from accessed paths *) 332 let build_proof_tree backend node_hash accessed = 333 let rec build node prefix = 334 let dominated_by_access = 335 PathSet.exists 336 (fun path -> 337 let plen = List.length prefix in 338 List.length path >= plen 339 && List.filteri (fun i _ -> i < plen) path = prefix) 340 accessed 341 in 342 if not dominated_by_access then Blinded_node (C.hash_node node) 343 else 344 let entries = C.list node in 345 let children = 346 List.map 347 (fun (key, kind) -> 348 let child_path = prefix @ [ key ] in 349 let child_tree = 350 match kind with 351 | `Contents_inlined data -> 352 if PathSet.mem child_path accessed then Contents data 353 else Contents data (* inlined is always available *) 354 | `Contents h -> 355 if PathSet.mem child_path accessed then 356 match backend.Backend.read h with 357 | Some c -> Contents c 358 | None -> Blinded_contents h 359 else Blinded_contents h 360 | `Node h -> 361 if 362 PathSet.exists 363 (fun p -> 364 let clen = List.length child_path in 365 List.length p >= clen 366 && List.filteri (fun i _ -> i < clen) p = child_path) 367 accessed 368 then 369 match backend.Backend.read h with 370 | Some data -> ( 371 match C.node_of_bytes data with 372 | Ok child_node -> build child_node child_path 373 | Error _ -> Blinded_node h) 374 | None -> Blinded_node h 375 else Blinded_node h 376 in 377 (key, child_tree)) 378 entries 379 in 380 Node children 381 in 382 match backend.Backend.read node_hash with 383 | Some data -> ( 384 match C.node_of_bytes data with 385 | Ok node -> build node [] 386 | Error _ -> Blinded_node node_hash) 387 | None -> Blinded_node node_hash 388 389 let produce backend root_hash f = 390 let tree = Tree.of_hash backend root_hash in 391 let result_tree, result = f tree in 392 let after_hash = Tree.hash result_tree in 393 let accessed = 394 match tree.state with 395 | Producing { accessed; _ } -> accessed 396 | From_proof _ -> PathSet.empty 397 in 398 let proof_tree = build_proof_tree backend root_hash accessed in 399 let proof = 400 { before = `Node root_hash; after = `Node after_hash; state = proof_tree } 401 in 402 (proof, result) 403 404 let to_tree proof = Tree.of_proof_tree proof.state 405 406 let rec hash_of_tree = function 407 | Contents c -> `Contents (C.hash_contents c) 408 | Blinded_contents h -> `Contents h 409 | Node entries -> 410 let node = 411 List.fold_left 412 (fun n (k, v) -> 413 let entry = 414 match v with 415 | Contents c 416 when C.inline_threshold > 0 417 && String.length c <= C.inline_threshold -> 418 `Contents_inlined c 419 | _ -> ( 420 match hash_of_tree v with 421 | `Contents h -> `Contents h 422 | `Node h -> `Node h) 423 in 424 C.add n k entry) 425 C.empty_node entries 426 in 427 `Node (C.hash_node node) 428 | Blinded_node h -> `Node h 429 430 let verify proof f = 431 let tree = to_tree proof in 432 let result_tree, result = f tree in 433 let computed_after = `Node (Tree.hash result_tree) in 434 let expected_after = proof.after in 435 match (computed_after, expected_after) with 436 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result) 437 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 -> 438 Ok (result_tree, result) 439 | _ -> 440 Error 441 (`Proof_mismatch 442 (Printf.sprintf "expected %s, got %s" 443 (C.hash_to_hex 444 (match expected_after with `Node h | `Contents h -> h)) 445 (C.hash_to_hex 446 (match computed_after with `Node h | `Contents h -> h)))) 447end 448 449module Git = Make (Codec.Git) 450module Mst = Make (Codec.Mst)