Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at main 448 lines 16 kB view raw
1(** Merkle proofs for content-addressed trees. *) 2 3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ] 4 5type ('hash, 'contents) tree = 6 | Contents of 'contents 7 | Blinded_contents of 'hash 8 | Node of ('hash, 'contents) node 9 | Blinded_node of 'hash 10 11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list 12 13type ('hash, 'contents) t = { 14 before : 'hash kinded_hash; 15 after : 'hash kinded_hash; 16 state : ('hash, 'contents) tree; 17} 18 19let v ~before ~after state = { before; after; state } 20let before p = p.before 21let after p = p.after 22let state p = p.state 23 24let pp pp_hash pp_contents fmt p = 25 let pp_kinded fmt = function 26 | `Contents h -> Fmt.pf fmt "contents:%a" pp_hash h 27 | `Node h -> Fmt.pf fmt "node:%a" pp_hash h 28 in 29 let rec pp_tree fmt = function 30 | Contents c -> Fmt.pf fmt "(%a)" pp_contents c 31 | Blinded_contents h -> Fmt.pf fmt "#(%a)" pp_hash h 32 | Node entries -> 33 Fmt.pf fmt "{%a}" 34 Fmt.(list ~sep:(any ", ") (pair ~sep:(any ":") string pp_tree)) 35 entries 36 | Blinded_node h -> Fmt.pf fmt "#{%a}" pp_hash h 37 in 38 Fmt.pf fmt "@[<2>proof{before=%a;@ after=%a;@ state=%a}@]" pp_kinded p.before 39 pp_kinded p.after pp_tree p.state 40 41module Make (C : Codec.S) = struct 42 type hash = C.hash 43 type contents = string 44 45 (* Path set for tracking accessed paths *) 46 module Path_set = Set.Make (struct 47 type t = string list 48 49 let compare = compare 50 end) 51 52 (* Internal tree representation during proof production *) 53 type tree_state = 54 | Producing of { 55 backend : hash Backend.t; 56 node_hash : hash; 57 mutable accessed : Path_set.t; 58 } 59 | From_proof of { tree : (hash, contents) tree } 60 61 module Tree = struct 62 type t = { state : tree_state } 63 64 let of_hash backend h = 65 { 66 state = Producing { backend; node_hash = h; accessed = Path_set.empty }; 67 } 68 69 let of_proof_tree tree = { state = From_proof { tree } } 70 71 (* Read node from backend *) 72 let read_node backend h = 73 match backend.Backend.read h with 74 | None -> None 75 | Some data -> ( 76 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None) 77 78 (* Read contents from backend *) 79 let read_contents backend h = backend.Backend.read h 80 81 (* Record access to a path *) 82 let record_access t path = 83 match t.state with 84 | Producing p -> p.accessed <- Path_set.add path p.accessed 85 | From_proof _ -> () 86 87 (* Navigate to a path in a backend-stored node *) 88 let rec find_in_node backend node path = 89 match path with 90 | [] -> None (* Can't find contents at node root *) 91 | [ key ] -> ( 92 match C.find node key with 93 | Some (`Contents h) -> read_contents backend h 94 | _ -> None) 95 | key :: rest -> ( 96 match C.find node key with 97 | Some (`Node h) -> ( 98 match read_node backend h with 99 | Some child -> find_in_node backend child rest 100 | None -> None) 101 | _ -> None) 102 103 (* Navigate in proof tree *) 104 let rec find_in_proof tree path = 105 match (tree, path) with 106 | Contents c, [] -> Some c 107 | Node entries, key :: rest -> ( 108 match List.assoc_opt key entries with 109 | Some child -> find_in_proof child rest 110 | None -> None) 111 | Blinded_contents _, [] -> None (* Can't read blinded *) 112 | Blinded_node _, _ -> None (* Can't traverse blinded *) 113 | _ -> None 114 115 let find t path = 116 record_access t path; 117 match t.state with 118 | Producing { backend; node_hash; _ } -> ( 119 match read_node backend node_hash with 120 | Some node -> find_in_node backend node path 121 | None -> None) 122 | From_proof { tree } -> find_in_proof tree path 123 124 let rec find_tree_in_node backend node path = 125 match path with 126 | [] -> Some (of_hash backend (C.hash_node node)) 127 | key :: rest -> ( 128 match C.find node key with 129 | Some (`Node h) -> ( 130 match read_node backend h with 131 | Some child -> find_tree_in_node backend child rest 132 | None -> None) 133 | Some (`Contents _) -> None 134 | None -> None) 135 136 let rec find_tree_in_proof tree path = 137 match (tree, path) with 138 | (Node _ as n), [] -> Some (of_proof_tree n) 139 | Node entries, key :: rest -> ( 140 match List.assoc_opt key entries with 141 | Some child -> find_tree_in_proof child rest 142 | None -> None) 143 | Blinded_node _, _ -> None 144 | Contents _, _ | Blinded_contents _, _ -> None 145 146 let find_tree t path = 147 record_access t path; 148 match t.state with 149 | Producing { backend; node_hash; _ } -> ( 150 match read_node backend node_hash with 151 | Some node -> find_tree_in_node backend node path 152 | None -> None) 153 | From_proof { tree } -> find_tree_in_proof tree path 154 155 let mem t path = Option.is_some (find t path) 156 157 let list t path = 158 record_access t path; 159 match t.state with 160 | Producing { backend; node_hash; _ } -> ( 161 let rec navigate node = function 162 | [] -> 163 C.list node 164 |> List.map (fun (k, v) -> 165 let kind = 166 match v with `Node _ -> `Node | `Contents _ -> `Contents 167 in 168 (k, kind)) 169 | key :: rest -> ( 170 match C.find node key with 171 | Some (`Node h) -> ( 172 match read_node backend h with 173 | Some child -> navigate child rest 174 | None -> []) 175 | _ -> []) 176 in 177 match read_node backend node_hash with 178 | Some node -> navigate node path 179 | None -> []) 180 | From_proof { tree } -> 181 let rec navigate t = function 182 | [] -> ( 183 match t with 184 | Node entries -> 185 List.map 186 (fun (k, v) -> 187 let kind = 188 match v with 189 | Node _ | Blinded_node _ -> `Node 190 | Contents _ | Blinded_contents _ -> `Contents 191 in 192 (k, kind)) 193 entries 194 | _ -> []) 195 | key :: rest -> ( 196 match t with 197 | Node entries -> ( 198 match List.assoc_opt key entries with 199 | Some child -> navigate child rest 200 | None -> []) 201 | _ -> []) 202 in 203 navigate tree path 204 205 (* Write operations - only work on producing trees *) 206 let add t path contents = 207 record_access t path; 208 match t.state with 209 | Producing { backend; node_hash; accessed } -> 210 let rec add_to_node node = function 211 | [] -> failwith "Proof.Tree.add: empty path" 212 | [ key ] -> 213 let h = C.hash_contents contents in 214 backend.write h contents; 215 C.add node key (`Contents h) 216 | key :: rest -> 217 let child_node = 218 match C.find node key with 219 | Some (`Node h) -> ( 220 match read_node backend h with 221 | Some n -> n 222 | None -> C.empty_node) 223 | _ -> C.empty_node 224 in 225 let updated = add_to_node child_node rest in 226 let data = C.bytes_of_node updated in 227 let h = C.hash_node updated in 228 backend.write h data; 229 C.add node key (`Node h) 230 in 231 let node = 232 match read_node backend node_hash with 233 | Some n -> n 234 | None -> C.empty_node 235 in 236 let updated = add_to_node node path in 237 let data = C.bytes_of_node updated in 238 let new_hash = C.hash_node updated in 239 backend.write new_hash data; 240 { state = Producing { backend; node_hash = new_hash; accessed } } 241 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree" 242 243 let add_tree t path child = 244 record_access t path; 245 match (t.state, child.state) with 246 | ( Producing { backend; node_hash; accessed }, 247 Producing { node_hash = child_hash; _ } ) -> 248 let rec add_tree_to_node node = function 249 | [] -> failwith "Proof.Tree.add_tree: empty path" 250 | [ key ] -> C.add node key (`Node child_hash) 251 | key :: rest -> 252 let sub_node = 253 match C.find node key with 254 | Some (`Node h) -> ( 255 match read_node backend h with 256 | Some n -> n 257 | None -> C.empty_node) 258 | _ -> C.empty_node 259 in 260 let updated = add_tree_to_node sub_node rest in 261 let data = C.bytes_of_node updated in 262 let h = C.hash_node updated in 263 backend.write h data; 264 C.add node key (`Node h) 265 in 266 let node = 267 match read_node backend node_hash with 268 | Some n -> n 269 | None -> C.empty_node 270 in 271 let updated = add_tree_to_node node path in 272 let data = C.bytes_of_node updated in 273 let new_hash = C.hash_node updated in 274 backend.write new_hash data; 275 { state = Producing { backend; node_hash = new_hash; accessed } } 276 | _ -> failwith "Proof.Tree.add_tree: incompatible trees" 277 278 let remove t path = 279 record_access t path; 280 match t.state with 281 | Producing { backend; node_hash; accessed } -> 282 let rec remove_from_node node = function 283 | [] -> node 284 | [ key ] -> C.remove node key 285 | key :: rest -> ( 286 match C.find node key with 287 | Some (`Node h) -> ( 288 match read_node backend h with 289 | Some child -> 290 let updated = remove_from_node child rest in 291 if C.is_empty updated then C.remove node key 292 else 293 let data = C.bytes_of_node updated in 294 let h = C.hash_node updated in 295 backend.write h data; 296 C.add node key (`Node h) 297 | None -> node) 298 | _ -> node) 299 in 300 let node = 301 match read_node backend node_hash with 302 | Some n -> n 303 | None -> C.empty_node 304 in 305 let updated = remove_from_node node path in 306 let data = C.bytes_of_node updated in 307 let new_hash = C.hash_node updated in 308 backend.write new_hash data; 309 { state = Producing { backend; node_hash = new_hash; accessed } } 310 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree" 311 312 let hash t = 313 match t.state with 314 | Producing { node_hash; _ } -> node_hash 315 | From_proof { tree } -> 316 let rec hash_tree = function 317 | Contents c -> C.hash_contents c 318 | Blinded_contents h -> h 319 | Node entries -> 320 let node = 321 List.fold_left 322 (fun n (k, v) -> 323 let kind = 324 match v with 325 | Node _ | Blinded_node _ -> `Node (hash_tree v) 326 | Contents _ | Blinded_contents _ -> 327 `Contents (hash_tree v) 328 in 329 C.add n k kind) 330 C.empty_node entries 331 in 332 C.hash_node node 333 | Blinded_node h -> h 334 in 335 hash_tree tree 336 end 337 338 (* Build proof tree from accessed paths *) 339 let build_proof_tree backend node_hash accessed = 340 let rec build node prefix = 341 let dominated_by_access = 342 Path_set.exists 343 (fun path -> 344 let plen = List.length prefix in 345 List.length path >= plen 346 && List.filteri (fun i _ -> i < plen) path = prefix) 347 accessed 348 in 349 if not dominated_by_access then Blinded_node (C.hash_node node) 350 else 351 let entries = C.list node in 352 let children = 353 List.map 354 (fun (key, kind) -> 355 let child_path = prefix @ [ key ] in 356 let child_tree = 357 match kind with 358 | `Contents h -> 359 if Path_set.mem child_path accessed then 360 match backend.Backend.read h with 361 | Some c -> Contents c 362 | None -> Blinded_contents h 363 else Blinded_contents h 364 | `Node h -> 365 if 366 Path_set.exists 367 (fun p -> 368 let clen = List.length child_path in 369 List.length p >= clen 370 && List.filteri (fun i _ -> i < clen) p = child_path) 371 accessed 372 then 373 match backend.Backend.read h with 374 | Some data -> ( 375 match C.node_of_bytes data with 376 | Ok child_node -> build child_node child_path 377 | Error _ -> Blinded_node h) 378 | None -> Blinded_node h 379 else Blinded_node h 380 in 381 (key, child_tree)) 382 entries 383 in 384 Node children 385 in 386 match backend.Backend.read node_hash with 387 | Some data -> ( 388 match C.node_of_bytes data with 389 | Ok node -> build node [] 390 | Error _ -> Blinded_node node_hash) 391 | None -> Blinded_node node_hash 392 393 let produce backend root_hash f = 394 let tree = Tree.of_hash backend root_hash in 395 let result_tree, result = f tree in 396 let after_hash = Tree.hash result_tree in 397 let accessed = 398 match tree.state with 399 | Producing { accessed; _ } -> accessed 400 | From_proof _ -> Path_set.empty 401 in 402 let proof_tree = build_proof_tree backend root_hash accessed in 403 let proof = 404 { before = `Node root_hash; after = `Node after_hash; state = proof_tree } 405 in 406 (proof, result) 407 408 let to_tree proof = Tree.of_proof_tree proof.state 409 410 let rec hash_of_tree = function 411 | Contents c -> `Contents (C.hash_contents c) 412 | Blinded_contents h -> `Contents h 413 | Node entries -> 414 let node = 415 List.fold_left 416 (fun n (k, v) -> 417 let h = 418 match hash_of_tree v with 419 | `Contents h -> `Contents h 420 | `Node h -> `Node h 421 in 422 C.add n k h) 423 C.empty_node entries 424 in 425 `Node (C.hash_node node) 426 | Blinded_node h -> `Node h 427 428 let verify proof f = 429 let tree = to_tree proof in 430 let result_tree, result = f tree in 431 let computed_after = `Node (Tree.hash result_tree) in 432 let expected_after = proof.after in 433 match (computed_after, expected_after) with 434 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result) 435 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 -> 436 Ok (result_tree, result) 437 | _ -> 438 Error 439 (`Proof_mismatch 440 (Fmt.str "expected %s, got %s" 441 (C.hash_to_hex 442 (match expected_after with `Node h | `Contents h -> h)) 443 (C.hash_to_hex 444 (match computed_after with `Node h | `Contents h -> h)))) 445end 446 447module Git = Make (Codec.Git) 448module Mst = Make (Codec.Mst)