Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at perf 410 lines 15 kB view raw
1module SMap = Map.Make (String) 2module SSet = Set.Make (String) 3 4module Make (F : Codec.S) = struct 5 module Inode = Inode.Make (F) 6 7 type hash = F.hash 8 type path = string list 9 type concrete = [ `Contents of string | `Tree of (string * concrete) list ] 10 11 (* Internal tree representation with lazy loading *) 12 type node_state = 13 | Loaded of F.node 14 | Inode of { backend : hash Backend.t; hash : hash } 15 | Lazy of { backend : hash Backend.t; hash : hash } 16 | Shallow of hash 17 | Pruned of hash 18 19 type tree_node = 20 | Contents of string 21 | Node of node_record 22 23 and node_record = { 24 mutable state : node_state; 25 backend : hash Backend.t option; 26 mutable children : tree_node SMap.t; (* modifications *) 27 mutable removed : SSet.t; 28 resolved : (string, tree_node) Hashtbl.t; (* read cache *) 29 } 30 31 type t = tree_node 32 33 let empty () = 34 Node { state = Loaded F.empty_node; backend = None; 35 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 } 36 37 let of_hash ~backend hash = 38 Node { state = Lazy { backend; hash }; backend = Some backend; 39 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 } 40 41 let shallow hash = 42 Node { state = Shallow hash; backend = None; 43 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 } 44 45 let pruned hash = 46 Node { state = Pruned hash; backend = None; 47 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 } 48 49 let rec of_concrete : concrete -> t = function 50 | `Contents s -> Contents s 51 | `Tree entries -> 52 let children = 53 List.fold_left (fun m (name, c) -> SMap.add name (of_concrete c) m) 54 SMap.empty entries 55 in 56 Node { state = Loaded F.empty_node; backend = None; 57 children; removed = SSet.empty; resolved = Hashtbl.create 0 } 58 59 (* Resolve a lazy node: load from backend, detect inode format. *) 60 let resolve_state node = 61 match node.state with 62 | Loaded _ | Inode _ | Shallow _ | Pruned _ -> () 63 | Lazy { backend; hash } -> ( 64 match backend.read hash with 65 | None -> () 66 | Some data -> 67 if Inode.is_inode data then 68 node.state <- Inode { backend; hash } 69 else ( 70 match F.node_of_bytes data with 71 | Ok n -> node.state <- Loaded n 72 | Error _ -> ())) 73 74 (* Look up a single entry by name, handling both flat nodes and inodes. *) 75 let resolve_entry node name = 76 resolve_state node; 77 match node.state with 78 | Loaded n -> F.find n name 79 | Inode { backend; hash } -> Inode.find ~backend hash name 80 | _ -> None 81 82 (* List all entries, handling both flat nodes and inodes. *) 83 let resolve_entries node = 84 resolve_state node; 85 match node.state with 86 | Loaded n -> Some (F.list n) 87 | Inode { backend; hash } -> Some (Inode.list_all ~backend hash) 88 | _ -> None 89 90 (* Navigate to a path, returning the node and remaining path. 91 Resolved children are cached in [node.resolved] to avoid repeated 92 deserialization on subsequent reads. *) 93 let rec navigate t path = 94 match (t, path) with 95 | _, [] -> Some (t, []) 96 | Contents _, _ :: _ -> None 97 | Node node, name :: rest -> ( 98 (* Check modifications first *) 99 match SMap.find_opt name node.children with 100 | Some child -> navigate child rest 101 | None -> ( 102 if SSet.mem name node.removed then None 103 else 104 (* Check read cache *) 105 match Hashtbl.find_opt node.resolved name with 106 | Some child -> navigate child rest 107 | None -> 108 let resolved = 109 match resolve_entry node name with 110 | None -> None 111 | Some (`Contents_inlined data) -> 112 Some (Contents data) 113 | Some (`Contents hash) -> ( 114 match node.backend with 115 | Some backend -> ( 116 match backend.read hash with 117 | Some data -> Some (Contents data) 118 | None -> None) 119 | None -> None) 120 | Some (`Node hash) -> ( 121 match node.backend with 122 | Some backend -> Some (of_hash ~backend hash) 123 | None -> None) 124 in 125 match resolved with 126 | None -> None 127 | Some child -> 128 Hashtbl.replace node.resolved name child; 129 navigate child rest)) 130 131 let find t path = 132 match navigate t path with Some (Contents s, []) -> Some s | _ -> None 133 134 let find_tree t path = 135 match navigate t path with Some ((Node _ as n), []) -> Some n | _ -> None 136 137 let mem t path = Option.is_some (navigate t path) 138 139 let mem_tree t path = 140 match navigate t path with Some (Node _, []) -> true | _ -> false 141 142 let list t path = 143 match navigate t path with 144 | Some (Node node, []) -> 145 let base_entries = 146 match resolve_entries node with 147 | None -> [] 148 | Some entries -> 149 entries 150 |> List.filter (fun (name, _) -> 151 (not (SSet.mem name node.removed)) 152 && not (SMap.mem name node.children)) 153 |> List.map (fun (name, kind) -> 154 let k = 155 match kind with 156 | `Node _ -> `Node 157 | `Contents _ | `Contents_inlined _ -> `Contents 158 in 159 (name, k)) 160 in 161 let child_entries = 162 SMap.fold 163 (fun name child acc -> 164 let k = 165 match child with Node _ -> `Node | Contents _ -> `Contents 166 in 167 (name, k) :: acc) 168 node.children [] 169 in 170 List.sort 171 (fun (a, _) (b, _) -> String.compare a b) 172 (base_entries @ child_entries) 173 | _ -> [] 174 175 (* Resolve a child node for modification (add/remove at depth). *) 176 let resolve_child node name = 177 match SMap.find_opt name node.children with 178 | Some c -> c 179 | None -> ( 180 if SSet.mem name node.removed then empty () 181 else 182 match resolve_entry node name with 183 | Some (`Node hash) -> ( 184 match node.backend with 185 | Some backend -> of_hash ~backend hash 186 | None -> empty ()) 187 | Some (`Contents _ | `Contents_inlined _) | None -> empty ()) 188 189 (* Add contents at path, creating intermediate nodes as needed *) 190 let rec add_at t path value = 191 match (t, path) with 192 | _, [] -> value 193 | Contents _, _ :: _ -> 194 (* Replace contents with a tree *) 195 add_at (empty ()) path value 196 | Node node, [ name ] -> 197 let children = SMap.add name value node.children in 198 let removed = SSet.remove name node.removed in 199 Node { node with children; removed } 200 | Node node, name :: rest -> 201 let child = resolve_child node name in 202 let new_child = add_at child rest value in 203 let children = SMap.add name new_child node.children in 204 Node { node with children } 205 206 let add t path contents = add_at t path (Contents contents) 207 let add_tree t path subtree = add_at t path subtree 208 209 let rec remove t path = 210 match (t, path) with 211 | _, [] -> empty () 212 | Contents _, _ :: _ -> t 213 | Node node, [ name ] -> 214 let children = SMap.remove name node.children in 215 let removed = SSet.add name node.removed in 216 Node { node with children; removed } 217 | Node node, name :: rest -> 218 let child = resolve_child node name in 219 let new_child = remove child rest in 220 let children = SMap.add name new_child node.children in 221 Node { node with children } 222 223 let rec to_concrete t = 224 match t with 225 | Contents s -> `Contents s 226 | Node node -> 227 let entries = 228 match resolve_entries node with 229 | None -> [] 230 | Some all_entries -> 231 all_entries 232 |> List.filter_map (fun (name, kind) -> 233 if SSet.mem name node.removed then None 234 else if SMap.mem name node.children then None 235 else 236 match kind with 237 | `Contents_inlined data -> Some (name, `Contents data) 238 | `Contents hash -> ( 239 match node.backend with 240 | Some backend -> ( 241 match backend.read hash with 242 | Some data -> Some (name, `Contents data) 243 | None -> None) 244 | None -> None) 245 | `Node hash -> ( 246 match node.backend with 247 | Some backend -> 248 let child = of_hash ~backend hash in 249 Some (name, to_concrete child) 250 | None -> None)) 251 in 252 let child_entries = 253 SMap.fold 254 (fun name child acc -> (name, to_concrete child) :: acc) 255 node.children [] 256 in 257 let all = 258 List.sort 259 (fun (a, _) (b, _) -> String.compare a b) 260 (entries @ child_entries) 261 in 262 `Tree all 263 264 (* Write tree to backend and return hash *) 265 let rec write_tree t ~inline_threshold ~inode ~(backend : hash Backend.t) : hash = 266 match t with 267 | Contents s -> 268 let h = F.hash_contents s in 269 backend.write h s; 270 h 271 | Node node -> 272 (* Fast path: unmodified node with known hash — skip entirely *) 273 let dominated = SMap.is_empty node.children && SSet.is_empty node.removed in 274 if dominated then 275 match node.state with 276 | Inode { hash; _ } | Lazy { hash; _ } -> hash 277 | _ -> write_tree_slow node ~inline_threshold ~inode ~backend 278 else 279 write_tree_slow node ~inline_threshold ~inode ~backend 280 281 and write_tree_slow node ~inline_threshold ~inode ~(backend : hash Backend.t) : hash = 282 resolve_state node; 283 (* Compute child entries (recursively writing children) *) 284 let child_entries = 285 SMap.fold 286 (fun name child acc -> 287 let entry = 288 match child with 289 | Contents s 290 when inline_threshold > 0 291 && String.length s <= inline_threshold -> 292 (name, (`Contents_inlined s : F.entry)) 293 | Contents s -> 294 let h = F.hash_contents s in 295 backend.write h s; 296 (name, (`Contents h : F.entry)) 297 | Node _ -> 298 let child_hash = 299 write_tree child ~inline_threshold ~inode ~backend 300 in 301 (name, (`Node child_hash : F.entry)) 302 in 303 entry :: acc) 304 node.children [] 305 in 306 (match node.state with 307 | Inode { hash; backend = ib } when inode -> 308 (* Incremental update: only modify affected inode buckets *) 309 Inode.update ~backend hash ~additions:child_entries 310 ~removals:(SSet.elements node.removed) 311 | Inode { hash; backend = ib } -> 312 (* Inodes disabled: expand to flat node *) 313 let base_entries = Inode.list_all ~backend:ib hash in 314 let base = 315 List.fold_left 316 (fun n (name, entry) -> F.add n name entry) 317 F.empty_node base_entries 318 in 319 let base = 320 SSet.fold (fun name n -> F.remove n name) node.removed base 321 in 322 let final = 323 List.fold_left 324 (fun n (name, entry) -> F.add n name entry) 325 base child_entries 326 in 327 let data = F.bytes_of_node final in 328 let h = F.hash_node final in 329 backend.write h data; 330 h 331 | _ -> 332 (* Flat node: apply modifications, promote to inode if too large *) 333 let base = 334 match node.state with 335 | Loaded n -> n 336 | _ -> F.empty_node 337 in 338 let base = 339 SSet.fold (fun name n -> F.remove n name) node.removed base 340 in 341 let final = 342 List.fold_left 343 (fun n (name, entry) -> F.add n name entry) 344 base child_entries 345 in 346 if inode then begin 347 let entries = F.list final in 348 if List.length entries > Inode.max_entries then 349 Inode.write entries ~backend 350 else begin 351 let data = F.bytes_of_node final in 352 let h = F.hash_node final in 353 backend.write h data; 354 h 355 end 356 end else begin 357 let data = F.bytes_of_node final in 358 let h = F.hash_node final in 359 backend.write h data; 360 h 361 end) 362 363 let hash ?(inline_threshold = F.inline_threshold) ?(inode = true) t ~backend = 364 write_tree t ~inline_threshold ~inode ~backend 365 366 type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ] 367 368 let fold ?(force = `True) t init f = 369 let rec go path t acc = 370 match t with 371 | Contents s -> f path (`Contents s) acc 372 | Node node -> ( 373 let acc = f path `Tree acc in 374 match force with 375 | `True -> ( 376 resolve_state node; 377 match node.state with 378 | Loaded _ | Inode _ -> 379 SMap.fold 380 (fun name child acc -> go (path @ [ name ]) child acc) 381 node.children acc 382 | _ -> acc) 383 | `False fn -> ( 384 match node.state with 385 | Lazy { hash; _ } -> fn hash 386 | Shallow hash -> fn hash 387 | Pruned hash -> fn hash 388 | Loaded _ | Inode _ -> 389 SMap.fold 390 (fun name child acc -> go (path @ [ name ]) child acc) 391 node.children acc) 392 | `Shallow fn -> ( 393 match node.state with 394 | Shallow hash -> fn hash 395 | _ -> 396 SMap.fold 397 (fun name child acc -> go (path @ [ name ]) child acc) 398 node.children acc)) 399 in 400 go [] t init 401 402 let clear ?depth:_ _t = () 403 404 let equal t1 t2 = 405 (* Simple structural equality - could be optimized with hash comparison *) 406 to_concrete t1 = to_concrete t2 407end 408 409module Git = Make (Codec.Git) 410module Mst = Make (Codec.Mst)