Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at benchs 429 lines 15 kB view raw
1(** Merkle proofs for content-addressed trees. *) 2 3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ] 4 5type ('hash, 'contents) tree = 6 | Contents of 'contents 7 | Blinded_contents of 'hash 8 | Node of ('hash, 'contents) node 9 | Blinded_node of 'hash 10 11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list 12 13type ('hash, 'contents) t = { 14 before : 'hash kinded_hash; 15 after : 'hash kinded_hash; 16 state : ('hash, 'contents) tree; 17} 18 19let v ~before ~after state = { before; after; state } 20let before p = p.before 21let after p = p.after 22let state p = p.state 23 24module Make (C : Codec.S) = struct 25 type hash = C.hash 26 type contents = string 27 28 (* Path set for tracking accessed paths *) 29 module PathSet = Set.Make (struct 30 type t = string list 31 32 let compare = compare 33 end) 34 35 (* Internal tree representation during proof production *) 36 type tree_state = 37 | Producing of { 38 backend : hash Backend.t; 39 node_hash : hash; 40 mutable accessed : PathSet.t; 41 } 42 | From_proof of { tree : (hash, contents) tree } 43 44 module Tree = struct 45 type t = { state : tree_state } 46 47 let of_hash backend h = 48 { state = Producing { backend; node_hash = h; accessed = PathSet.empty } } 49 50 let of_proof_tree tree = { state = From_proof { tree } } 51 52 (* Read node from backend *) 53 let read_node backend h = 54 match backend.Backend.read h with 55 | None -> None 56 | Some data -> ( 57 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None) 58 59 (* Read contents from backend *) 60 let read_contents backend h = backend.Backend.read h 61 62 (* Record access to a path *) 63 let record_access t path = 64 match t.state with 65 | Producing p -> p.accessed <- PathSet.add path p.accessed 66 | From_proof _ -> () 67 68 (* Navigate to a path in a backend-stored node *) 69 let rec find_in_node backend node path = 70 match path with 71 | [] -> None (* Can't find contents at node root *) 72 | [ key ] -> ( 73 match C.find node key with 74 | Some (`Contents h) -> read_contents backend h 75 | _ -> None) 76 | key :: rest -> ( 77 match C.find node key with 78 | Some (`Node h) -> ( 79 match read_node backend h with 80 | Some child -> find_in_node backend child rest 81 | None -> None) 82 | _ -> None) 83 84 (* Navigate in proof tree *) 85 let rec find_in_proof tree path = 86 match (tree, path) with 87 | Contents c, [] -> Some c 88 | Node entries, key :: rest -> ( 89 match List.assoc_opt key entries with 90 | Some child -> find_in_proof child rest 91 | None -> None) 92 | Blinded_contents _, [] -> None (* Can't read blinded *) 93 | Blinded_node _, _ -> None (* Can't traverse blinded *) 94 | _ -> None 95 96 let find t path = 97 record_access t path; 98 match t.state with 99 | Producing { backend; node_hash; _ } -> ( 100 match read_node backend node_hash with 101 | Some node -> find_in_node backend node path 102 | None -> None) 103 | From_proof { tree } -> find_in_proof tree path 104 105 let rec find_tree_in_node backend node path = 106 match path with 107 | [] -> Some (of_hash backend (C.hash_node node)) 108 | key :: rest -> ( 109 match C.find node key with 110 | Some (`Node h) -> ( 111 match read_node backend h with 112 | Some child -> find_tree_in_node backend child rest 113 | None -> None) 114 | Some (`Contents _) -> None 115 | None -> None) 116 117 let rec find_tree_in_proof tree path = 118 match (tree, path) with 119 | (Node _ as n), [] -> Some (of_proof_tree n) 120 | Node entries, key :: rest -> ( 121 match List.assoc_opt key entries with 122 | Some child -> find_tree_in_proof child rest 123 | None -> None) 124 | Blinded_node _, _ -> None 125 | Contents _, _ | Blinded_contents _, _ -> None 126 127 let find_tree t path = 128 record_access t path; 129 match t.state with 130 | Producing { backend; node_hash; _ } -> ( 131 match read_node backend node_hash with 132 | Some node -> find_tree_in_node backend node path 133 | None -> None) 134 | From_proof { tree } -> find_tree_in_proof tree path 135 136 let mem t path = Option.is_some (find t path) 137 138 let list t path = 139 record_access t path; 140 match t.state with 141 | Producing { backend; node_hash; _ } -> ( 142 let rec navigate node = function 143 | [] -> 144 C.list node 145 |> List.map (fun (k, v) -> 146 let kind = 147 match v with `Node _ -> `Node | `Contents _ -> `Contents 148 in 149 (k, kind)) 150 | key :: rest -> ( 151 match C.find node key with 152 | Some (`Node h) -> ( 153 match read_node backend h with 154 | Some child -> navigate child rest 155 | None -> []) 156 | _ -> []) 157 in 158 match read_node backend node_hash with 159 | Some node -> navigate node path 160 | None -> []) 161 | From_proof { tree } -> 162 let rec navigate t = function 163 | [] -> ( 164 match t with 165 | Node entries -> 166 List.map 167 (fun (k, v) -> 168 let kind = 169 match v with 170 | Node _ | Blinded_node _ -> `Node 171 | Contents _ | Blinded_contents _ -> `Contents 172 in 173 (k, kind)) 174 entries 175 | _ -> []) 176 | key :: rest -> ( 177 match t with 178 | Node entries -> ( 179 match List.assoc_opt key entries with 180 | Some child -> navigate child rest 181 | None -> []) 182 | _ -> []) 183 in 184 navigate tree path 185 186 (* Write operations - only work on producing trees *) 187 let add t path contents = 188 record_access t path; 189 match t.state with 190 | Producing { backend; node_hash; accessed } -> 191 let rec add_to_node node = function 192 | [] -> failwith "Proof.Tree.add: empty path" 193 | [ key ] -> 194 let h = C.hash_contents contents in 195 backend.write h contents; 196 C.add node key (`Contents h) 197 | key :: rest -> 198 let child_node = 199 match C.find node key with 200 | Some (`Node h) -> ( 201 match read_node backend h with 202 | Some n -> n 203 | None -> C.empty_node) 204 | _ -> C.empty_node 205 in 206 let updated = add_to_node child_node rest in 207 let data = C.bytes_of_node updated in 208 let h = C.hash_node updated in 209 backend.write h data; 210 C.add node key (`Node h) 211 in 212 let node = 213 match read_node backend node_hash with 214 | Some n -> n 215 | None -> C.empty_node 216 in 217 let updated = add_to_node node path in 218 let data = C.bytes_of_node updated in 219 let new_hash = C.hash_node updated in 220 backend.write new_hash data; 221 { state = Producing { backend; node_hash = new_hash; accessed } } 222 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree" 223 224 let add_tree t path child = 225 record_access t path; 226 match (t.state, child.state) with 227 | ( Producing { backend; node_hash; accessed }, 228 Producing { node_hash = child_hash; _ } ) -> 229 let rec add_tree_to_node node = function 230 | [] -> failwith "Proof.Tree.add_tree: empty path" 231 | [ key ] -> C.add node key (`Node child_hash) 232 | key :: rest -> 233 let sub_node = 234 match C.find node key with 235 | Some (`Node h) -> ( 236 match read_node backend h with 237 | Some n -> n 238 | None -> C.empty_node) 239 | _ -> C.empty_node 240 in 241 let updated = add_tree_to_node sub_node rest in 242 let data = C.bytes_of_node updated in 243 let h = C.hash_node updated in 244 backend.write h data; 245 C.add node key (`Node h) 246 in 247 let node = 248 match read_node backend node_hash with 249 | Some n -> n 250 | None -> C.empty_node 251 in 252 let updated = add_tree_to_node node path in 253 let data = C.bytes_of_node updated in 254 let new_hash = C.hash_node updated in 255 backend.write new_hash data; 256 { state = Producing { backend; node_hash = new_hash; accessed } } 257 | _ -> failwith "Proof.Tree.add_tree: incompatible trees" 258 259 let remove t path = 260 record_access t path; 261 match t.state with 262 | Producing { backend; node_hash; accessed } -> 263 let rec remove_from_node node = function 264 | [] -> node 265 | [ key ] -> C.remove node key 266 | key :: rest -> ( 267 match C.find node key with 268 | Some (`Node h) -> ( 269 match read_node backend h with 270 | Some child -> 271 let updated = remove_from_node child rest in 272 if C.is_empty updated then C.remove node key 273 else 274 let data = C.bytes_of_node updated in 275 let h = C.hash_node updated in 276 backend.write h data; 277 C.add node key (`Node h) 278 | None -> node) 279 | _ -> node) 280 in 281 let node = 282 match read_node backend node_hash with 283 | Some n -> n 284 | None -> C.empty_node 285 in 286 let updated = remove_from_node node path in 287 let data = C.bytes_of_node updated in 288 let new_hash = C.hash_node updated in 289 backend.write new_hash data; 290 { state = Producing { backend; node_hash = new_hash; accessed } } 291 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree" 292 293 let hash t = 294 match t.state with 295 | Producing { node_hash; _ } -> node_hash 296 | From_proof { tree } -> 297 let rec hash_tree = function 298 | Contents c -> C.hash_contents c 299 | Blinded_contents h -> h 300 | Node entries -> 301 let node = 302 List.fold_left 303 (fun n (k, v) -> 304 let kind = 305 match v with 306 | Node _ | Blinded_node _ -> `Node (hash_tree v) 307 | Contents _ | Blinded_contents _ -> 308 `Contents (hash_tree v) 309 in 310 C.add n k kind) 311 C.empty_node entries 312 in 313 C.hash_node node 314 | Blinded_node h -> h 315 in 316 hash_tree tree 317 end 318 319 (* Build proof tree from accessed paths *) 320 let build_proof_tree backend node_hash accessed = 321 let rec build node prefix = 322 let dominated_by_access = 323 PathSet.exists 324 (fun path -> 325 let plen = List.length prefix in 326 List.length path >= plen 327 && List.filteri (fun i _ -> i < plen) path = prefix) 328 accessed 329 in 330 if not dominated_by_access then Blinded_node (C.hash_node node) 331 else 332 let entries = C.list node in 333 let children = 334 List.map 335 (fun (key, kind) -> 336 let child_path = prefix @ [ key ] in 337 let child_tree = 338 match kind with 339 | `Contents h -> 340 if PathSet.mem child_path accessed then 341 match backend.Backend.read h with 342 | Some c -> Contents c 343 | None -> Blinded_contents h 344 else Blinded_contents h 345 | `Node h -> 346 if 347 PathSet.exists 348 (fun p -> 349 let clen = List.length child_path in 350 List.length p >= clen 351 && List.filteri (fun i _ -> i < clen) p = child_path) 352 accessed 353 then 354 match backend.Backend.read h with 355 | Some data -> ( 356 match C.node_of_bytes data with 357 | Ok child_node -> build child_node child_path 358 | Error _ -> Blinded_node h) 359 | None -> Blinded_node h 360 else Blinded_node h 361 in 362 (key, child_tree)) 363 entries 364 in 365 Node children 366 in 367 match backend.Backend.read node_hash with 368 | Some data -> ( 369 match C.node_of_bytes data with 370 | Ok node -> build node [] 371 | Error _ -> Blinded_node node_hash) 372 | None -> Blinded_node node_hash 373 374 let produce backend root_hash f = 375 let tree = Tree.of_hash backend root_hash in 376 let result_tree, result = f tree in 377 let after_hash = Tree.hash result_tree in 378 let accessed = 379 match tree.state with 380 | Producing { accessed; _ } -> accessed 381 | From_proof _ -> PathSet.empty 382 in 383 let proof_tree = build_proof_tree backend root_hash accessed in 384 let proof = 385 { before = `Node root_hash; after = `Node after_hash; state = proof_tree } 386 in 387 (proof, result) 388 389 let to_tree proof = Tree.of_proof_tree proof.state 390 391 let rec hash_of_tree = function 392 | Contents c -> `Contents (C.hash_contents c) 393 | Blinded_contents h -> `Contents h 394 | Node entries -> 395 let node = 396 List.fold_left 397 (fun n (k, v) -> 398 let h = 399 match hash_of_tree v with 400 | `Contents h -> `Contents h 401 | `Node h -> `Node h 402 in 403 C.add n k h) 404 C.empty_node entries 405 in 406 `Node (C.hash_node node) 407 | Blinded_node h -> `Node h 408 409 let verify proof f = 410 let tree = to_tree proof in 411 let result_tree, result = f tree in 412 let computed_after = `Node (Tree.hash result_tree) in 413 let expected_after = proof.after in 414 match (computed_after, expected_after) with 415 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result) 416 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 -> 417 Ok (result_tree, result) 418 | _ -> 419 Error 420 (`Proof_mismatch 421 (Printf.sprintf "expected %s, got %s" 422 (C.hash_to_hex 423 (match expected_after with `Node h | `Contents h -> h)) 424 (C.hash_to_hex 425 (match computed_after with `Node h | `Contents h -> h)))) 426end 427 428module Git = Make (Codec.Git) 429module Mst = Make (Codec.Mst)