Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at inode 404 lines 14 kB view raw
1module Make (F : Codec.S) = struct 2 module Inode = Inode.Make (F) 3 4 type hash = F.hash 5 type path = string list 6 type concrete = [ `Contents of string | `Tree of (string * concrete) list ] 7 8 (* Internal tree representation with lazy loading *) 9 type node_state = 10 | Loaded of F.node 11 | Inode of { backend : hash Backend.t; hash : hash } 12 | Lazy of { backend : hash Backend.t; hash : hash } 13 | Shallow of hash 14 | Pruned of hash 15 16 type tree_node = 17 | Contents of string 18 | Node of node_record 19 20 and node_record = { 21 mutable state : node_state; 22 backend : hash Backend.t option; 23 mutable children : (string * tree_node) list; (* modifications *) 24 mutable removed : string list; 25 resolved : (string, tree_node) Hashtbl.t; (* read cache *) 26 } 27 28 type t = tree_node 29 30 let empty () = 31 Node { state = Loaded F.empty_node; backend = None; 32 children = []; removed = []; resolved = Hashtbl.create 0 } 33 34 let of_hash ~backend hash = 35 Node { state = Lazy { backend; hash }; backend = Some backend; 36 children = []; removed = []; resolved = Hashtbl.create 0 } 37 38 let shallow hash = 39 Node { state = Shallow hash; backend = None; 40 children = []; removed = []; resolved = Hashtbl.create 0 } 41 42 let pruned hash = 43 Node { state = Pruned hash; backend = None; 44 children = []; removed = []; resolved = Hashtbl.create 0 } 45 46 let rec of_concrete : concrete -> t = function 47 | `Contents s -> Contents s 48 | `Tree entries -> 49 let children = 50 List.map (fun (name, c) -> (name, of_concrete c)) entries 51 in 52 Node { state = Loaded F.empty_node; backend = None; 53 children; removed = []; resolved = Hashtbl.create 0 } 54 55 (* Resolve a lazy node: load from backend, detect inode format. *) 56 let resolve_state node = 57 match node.state with 58 | Loaded _ | Inode _ | Shallow _ | Pruned _ -> () 59 | Lazy { backend; hash } -> ( 60 match backend.read hash with 61 | None -> () 62 | Some data -> 63 if Inode.is_inode data then 64 node.state <- Inode { backend; hash } 65 else ( 66 match F.node_of_bytes data with 67 | Ok n -> node.state <- Loaded n 68 | Error _ -> ())) 69 70 (* Look up a single entry by name, handling both flat nodes and inodes. *) 71 let resolve_entry node name = 72 resolve_state node; 73 match node.state with 74 | Loaded n -> F.find n name 75 | Inode { backend; hash } -> Inode.find ~backend hash name 76 | _ -> None 77 78 (* List all entries, handling both flat nodes and inodes. *) 79 let resolve_entries node = 80 resolve_state node; 81 match node.state with 82 | Loaded n -> Some (F.list n) 83 | Inode { backend; hash } -> Some (Inode.list_all ~backend hash) 84 | _ -> None 85 86 (* Navigate to a path, returning the node and remaining path. 87 Resolved children are cached in [node.resolved] to avoid repeated 88 deserialization on subsequent reads. *) 89 let rec navigate t path = 90 match (t, path) with 91 | _, [] -> Some (t, []) 92 | Contents _, _ :: _ -> None 93 | Node node, name :: rest -> ( 94 (* Check modifications first *) 95 match List.assoc_opt name node.children with 96 | Some child -> navigate child rest 97 | None -> ( 98 if List.mem name node.removed then None 99 else 100 (* Check read cache *) 101 match Hashtbl.find_opt node.resolved name with 102 | Some child -> navigate child rest 103 | None -> 104 let resolved = 105 match resolve_entry node name with 106 | None -> None 107 | Some (`Contents_inlined data) -> 108 Some (Contents data) 109 | Some (`Contents hash) -> ( 110 match node.backend with 111 | Some backend -> ( 112 match backend.read hash with 113 | Some data -> Some (Contents data) 114 | None -> None) 115 | None -> None) 116 | Some (`Node hash) -> ( 117 match node.backend with 118 | Some backend -> Some (of_hash ~backend hash) 119 | None -> None) 120 in 121 match resolved with 122 | None -> None 123 | Some child -> 124 Hashtbl.replace node.resolved name child; 125 navigate child rest)) 126 127 let find t path = 128 match navigate t path with Some (Contents s, []) -> Some s | _ -> None 129 130 let find_tree t path = 131 match navigate t path with Some ((Node _ as n), []) -> Some n | _ -> None 132 133 let mem t path = Option.is_some (navigate t path) 134 135 let mem_tree t path = 136 match navigate t path with Some (Node _, []) -> true | _ -> false 137 138 let list t path = 139 match navigate t path with 140 | Some (Node node, []) -> 141 let base_entries = 142 match resolve_entries node with 143 | None -> [] 144 | Some entries -> 145 entries 146 |> List.filter (fun (name, _) -> 147 (not (List.mem name node.removed)) 148 && not (List.mem_assoc name node.children)) 149 |> List.map (fun (name, kind) -> 150 let k = 151 match kind with 152 | `Node _ -> `Node 153 | `Contents _ | `Contents_inlined _ -> `Contents 154 in 155 (name, k)) 156 in 157 let child_entries = 158 List.map 159 (fun (name, child) -> 160 let k = 161 match child with Node _ -> `Node | Contents _ -> `Contents 162 in 163 (name, k)) 164 node.children 165 in 166 List.sort 167 (fun (a, _) (b, _) -> String.compare a b) 168 (base_entries @ child_entries) 169 | _ -> [] 170 171 (* Resolve a child node for modification (add/remove at depth). *) 172 let resolve_child node name = 173 match List.assoc_opt name node.children with 174 | Some c -> c 175 | None -> ( 176 if List.mem name node.removed then empty () 177 else 178 match resolve_entry node name with 179 | Some (`Node hash) -> ( 180 match node.backend with 181 | Some backend -> of_hash ~backend hash 182 | None -> empty ()) 183 | Some (`Contents _ | `Contents_inlined _) | None -> empty ()) 184 185 (* Add contents at path, creating intermediate nodes as needed *) 186 let rec add_at t path value = 187 match (t, path) with 188 | _, [] -> value 189 | Contents _, _ :: _ -> 190 (* Replace contents with a tree *) 191 add_at (empty ()) path value 192 | Node node, [ name ] -> 193 let children = 194 (name, value) :: List.filter (fun (n, _) -> n <> name) node.children 195 in 196 let removed = List.filter (( <> ) name) node.removed in 197 Node { node with children; removed } 198 | Node node, name :: rest -> 199 let child = resolve_child node name in 200 let new_child = add_at child rest value in 201 let children = 202 (name, new_child) 203 :: List.filter (fun (n, _) -> n <> name) node.children 204 in 205 Node { node with children } 206 207 let add t path contents = add_at t path (Contents contents) 208 let add_tree t path subtree = add_at t path subtree 209 210 let rec remove t path = 211 match (t, path) with 212 | _, [] -> empty () 213 | Contents _, _ :: _ -> t 214 | Node node, [ name ] -> 215 let children = List.filter (fun (n, _) -> n <> name) node.children in 216 let removed = 217 if List.mem name node.removed then node.removed 218 else name :: node.removed 219 in 220 Node { node with children; removed } 221 | Node node, name :: rest -> 222 let child = resolve_child node name in 223 let new_child = remove child rest in 224 let children = 225 (name, new_child) 226 :: List.filter (fun (n, _) -> n <> name) node.children 227 in 228 Node { node with children } 229 230 let rec to_concrete t = 231 match t with 232 | Contents s -> `Contents s 233 | Node node -> 234 let entries = 235 match resolve_entries node with 236 | None -> [] 237 | Some all_entries -> 238 all_entries 239 |> List.filter_map (fun (name, kind) -> 240 if List.mem name node.removed then None 241 else if List.mem_assoc name node.children then None 242 else 243 match kind with 244 | `Contents_inlined data -> Some (name, `Contents data) 245 | `Contents hash -> ( 246 match node.backend with 247 | Some backend -> ( 248 match backend.read hash with 249 | Some data -> Some (name, `Contents data) 250 | None -> None) 251 | None -> None) 252 | `Node hash -> ( 253 match node.backend with 254 | Some backend -> 255 let child = of_hash ~backend hash in 256 Some (name, to_concrete child) 257 | None -> None)) 258 in 259 let child_entries = 260 List.map 261 (fun (name, child) -> (name, to_concrete child)) 262 node.children 263 in 264 let all = 265 List.sort 266 (fun (a, _) (b, _) -> String.compare a b) 267 (entries @ child_entries) 268 in 269 `Tree all 270 271 (* Write tree to backend and return hash *) 272 let rec write_tree t ~inline_threshold ~inode ~(backend : hash Backend.t) : hash = 273 match t with 274 | Contents s -> 275 let h = F.hash_contents s in 276 backend.write h s; 277 h 278 | Node node -> 279 resolve_state node; 280 (* Compute child entries (recursively writing children) *) 281 let child_entries = 282 List.map 283 (fun (name, child) -> 284 match child with 285 | Contents s 286 when inline_threshold > 0 287 && String.length s <= inline_threshold -> 288 (name, (`Contents_inlined s : F.entry)) 289 | Contents s -> 290 let h = F.hash_contents s in 291 backend.write h s; 292 (name, (`Contents h : F.entry)) 293 | Node _ -> 294 let child_hash = 295 write_tree child ~inline_threshold ~inode ~backend 296 in 297 (name, (`Node child_hash : F.entry))) 298 node.children 299 in 300 (match node.state with 301 | Inode { hash; backend = ib } when inode -> 302 (* Incremental update: only modify affected inode buckets *) 303 Inode.update ~backend hash ~additions:child_entries 304 ~removals:node.removed 305 | Inode { hash; backend = ib } -> 306 (* Inodes disabled: expand to flat node *) 307 let base_entries = Inode.list_all ~backend:ib hash in 308 let base = 309 List.fold_left 310 (fun n (name, entry) -> F.add n name entry) 311 F.empty_node base_entries 312 in 313 let base = 314 List.fold_left (fun n name -> F.remove n name) base node.removed 315 in 316 let final = 317 List.fold_left 318 (fun n (name, entry) -> F.add n name entry) 319 base child_entries 320 in 321 let data = F.bytes_of_node final in 322 let h = F.hash_node final in 323 backend.write h data; 324 h 325 | _ -> 326 (* Flat node: apply modifications, promote to inode if too large *) 327 let base = 328 match node.state with 329 | Loaded n -> n 330 | _ -> F.empty_node 331 in 332 let base = 333 List.fold_left (fun n name -> F.remove n name) base node.removed 334 in 335 let final = 336 List.fold_left 337 (fun n (name, entry) -> F.add n name entry) 338 base child_entries 339 in 340 if inode then begin 341 let entries = F.list final in 342 if List.length entries > Inode.max_entries then 343 Inode.write entries ~backend 344 else begin 345 let data = F.bytes_of_node final in 346 let h = F.hash_node final in 347 backend.write h data; 348 h 349 end 350 end else begin 351 let data = F.bytes_of_node final in 352 let h = F.hash_node final in 353 backend.write h data; 354 h 355 end) 356 357 let hash ?(inline_threshold = F.inline_threshold) ?(inode = true) t ~backend = 358 write_tree t ~inline_threshold ~inode ~backend 359 360 type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ] 361 362 let fold ?(force = `True) t init f = 363 let rec go path t acc = 364 match t with 365 | Contents s -> f path (`Contents s) acc 366 | Node node -> ( 367 let acc = f path `Tree acc in 368 match force with 369 | `True -> ( 370 resolve_state node; 371 match node.state with 372 | Loaded _ | Inode _ -> 373 List.fold_left 374 (fun acc (name, child) -> go (path @ [ name ]) child acc) 375 acc node.children 376 | _ -> acc) 377 | `False fn -> ( 378 match node.state with 379 | Lazy { hash; _ } -> fn hash 380 | Shallow hash -> fn hash 381 | Pruned hash -> fn hash 382 | Loaded _ | Inode _ -> 383 List.fold_left 384 (fun acc (name, child) -> go (path @ [ name ]) child acc) 385 acc node.children) 386 | `Shallow fn -> ( 387 match node.state with 388 | Shallow hash -> fn hash 389 | _ -> 390 List.fold_left 391 (fun acc (name, child) -> go (path @ [ name ]) child acc) 392 acc node.children)) 393 in 394 go [] t init 395 396 let clear ?depth:_ _t = () 397 398 let equal t1 t2 = 399 (* Simple structural equality - could be optimized with hash comparison *) 400 to_concrete t1 = to_concrete t2 401end 402 403module Git = Make (Codec.Git) 404module Mst = Make (Codec.Mst)