Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at inline-small-objects 338 lines 12 kB view raw
1module Make (F : Codec.S) = struct 2 type hash = F.hash 3 type path = string list 4 type concrete = [ `Contents of string | `Tree of (string * concrete) list ] 5 6 (* Internal tree representation with lazy loading *) 7 type node_state = 8 | Loaded of F.node 9 | Lazy of { backend : hash Backend.t; hash : hash } 10 | Shallow of hash 11 | Pruned of hash 12 13 type tree_node = 14 | Contents of string 15 | Node of { 16 mutable state : node_state; 17 mutable children : (string * tree_node) list; (* modifications *) 18 mutable removed : string list; 19 } 20 21 type t = tree_node 22 23 let empty () = 24 Node { state = Loaded F.empty_node; children = []; removed = [] } 25 26 let of_hash ~backend hash = 27 Node { state = Lazy { backend; hash }; children = []; removed = [] } 28 29 let shallow hash = Node { state = Shallow hash; children = []; removed = [] } 30 let pruned hash = Node { state = Pruned hash; children = []; removed = [] } 31 32 let rec of_concrete : concrete -> t = function 33 | `Contents s -> Contents s 34 | `Tree entries -> 35 let children = 36 List.map (fun (name, c) -> (name, of_concrete c)) entries 37 in 38 Node { state = Loaded F.empty_node; children; removed = [] } 39 40 (* Force loading of a lazy node *) 41 let force_node state = 42 match state with 43 | Loaded n -> Some n 44 | Lazy { backend; hash } -> ( 45 match backend.read hash with 46 | Some data -> ( 47 match F.node_of_bytes data with Ok n -> Some n | Error _ -> None) 48 | None -> None) 49 | Shallow _ -> None 50 | Pruned _ -> None 51 52 (* Navigate to a path, returning the node and remaining path *) 53 let rec navigate t path = 54 match (t, path) with 55 | _, [] -> Some (t, []) 56 | Contents _, _ :: _ -> None 57 | Node node, name :: rest -> ( 58 (* Check modifications first *) 59 match List.assoc_opt name node.children with 60 | Some child -> navigate child rest 61 | None -> ( 62 if List.mem name node.removed then None 63 else 64 (* Try to load from underlying node *) 65 match force_node node.state with 66 | None -> None 67 | Some loaded -> ( 68 match F.find loaded name with 69 | None -> None 70 | Some (`Contents_inlined data) -> 71 navigate (Contents data) rest 72 | Some (`Contents hash) -> ( 73 (* Load the content blob *) 74 match node.state with 75 | Lazy { backend; _ } -> ( 76 match backend.read hash with 77 | Some data -> navigate (Contents data) rest 78 | None -> None) 79 | _ -> None) 80 | Some (`Node hash) -> ( 81 match node.state with 82 | Lazy { backend; _ } -> 83 let child = of_hash ~backend hash in 84 navigate child rest 85 | _ -> None)))) 86 87 let find t path = 88 match navigate t path with Some (Contents s, []) -> Some s | _ -> None 89 90 let find_tree t path = 91 match navigate t path with Some ((Node _ as n), []) -> Some n | _ -> None 92 93 let mem t path = Option.is_some (navigate t path) 94 95 let mem_tree t path = 96 match navigate t path with Some (Node _, []) -> true | _ -> false 97 98 let list t path = 99 match navigate t path with 100 | Some (Node node, []) -> ( 101 match force_node node.state with 102 | None -> [] 103 | Some loaded -> 104 let base_entries = 105 F.list loaded 106 |> List.filter (fun (name, _) -> 107 (not (List.mem name node.removed)) 108 && not (List.mem_assoc name node.children)) 109 |> List.map (fun (name, kind) -> 110 let k = 111 match kind with 112 | `Node _ -> `Node 113 | `Contents _ | `Contents_inlined _ -> `Contents 114 in 115 (name, k)) 116 in 117 let child_entries = 118 List.map 119 (fun (name, child) -> 120 let k = 121 match child with Node _ -> `Node | Contents _ -> `Contents 122 in 123 (name, k)) 124 node.children 125 in 126 List.sort 127 (fun (a, _) (b, _) -> String.compare a b) 128 (base_entries @ child_entries)) 129 | _ -> [] 130 131 (* Add contents at path, creating intermediate nodes as needed *) 132 let rec add_at t path value = 133 match (t, path) with 134 | _, [] -> value 135 | Contents _, _ :: _ -> 136 (* Replace contents with a tree *) 137 add_at (empty ()) path value 138 | Node node, [ name ] -> 139 let children = 140 (name, value) :: List.filter (fun (n, _) -> n <> name) node.children 141 in 142 let removed = List.filter (( <> ) name) node.removed in 143 Node { node with children; removed } 144 | Node node, name :: rest -> 145 let child = 146 match List.assoc_opt name node.children with 147 | Some c -> c 148 | None -> ( 149 if List.mem name node.removed then empty () 150 else 151 match force_node node.state with 152 | None -> empty () 153 | Some loaded -> ( 154 match F.find loaded name with 155 | Some (`Node hash) -> ( 156 match node.state with 157 | Lazy { backend; _ } -> of_hash ~backend hash 158 | _ -> empty ()) 159 | Some (`Contents _ | `Contents_inlined _) | None -> 160 empty ())) 161 in 162 let new_child = add_at child rest value in 163 let children = 164 (name, new_child) 165 :: List.filter (fun (n, _) -> n <> name) node.children 166 in 167 Node { node with children } 168 169 let add t path contents = add_at t path (Contents contents) 170 let add_tree t path subtree = add_at t path subtree 171 172 let rec remove t path = 173 match (t, path) with 174 | _, [] -> empty () 175 | Contents _, _ :: _ -> t 176 | Node node, [ name ] -> 177 let children = List.filter (fun (n, _) -> n <> name) node.children in 178 let removed = 179 if List.mem name node.removed then node.removed 180 else name :: node.removed 181 in 182 Node { node with children; removed } 183 | Node node, name :: rest -> 184 let child = 185 match List.assoc_opt name node.children with 186 | Some c -> c 187 | None -> ( 188 if List.mem name node.removed then empty () 189 else 190 match force_node node.state with 191 | None -> empty () 192 | Some loaded -> ( 193 match F.find loaded name with 194 | Some (`Node hash) -> ( 195 match node.state with 196 | Lazy { backend; _ } -> of_hash ~backend hash 197 | _ -> empty ()) 198 | Some (`Contents _ | `Contents_inlined _) | None -> 199 empty ())) 200 in 201 let new_child = remove child rest in 202 let children = 203 (name, new_child) 204 :: List.filter (fun (n, _) -> n <> name) node.children 205 in 206 Node { node with children } 207 208 let rec to_concrete t = 209 match t with 210 | Contents s -> `Contents s 211 | Node node -> 212 let entries = 213 match force_node node.state with 214 | None -> [] 215 | Some loaded -> 216 F.list loaded 217 |> List.filter_map (fun (name, kind) -> 218 if List.mem name node.removed then None 219 else if List.mem_assoc name node.children then None 220 else 221 match kind with 222 | `Contents_inlined data -> 223 Some (name, `Contents data) 224 | `Contents hash -> ( 225 match node.state with 226 | Lazy { backend; _ } -> ( 227 match backend.read hash with 228 | Some data -> Some (name, `Contents data) 229 | None -> None) 230 | _ -> None) 231 | `Node hash -> ( 232 match node.state with 233 | Lazy { backend; _ } -> 234 let child = of_hash ~backend hash in 235 Some (name, to_concrete child) 236 | _ -> None)) 237 in 238 let child_entries = 239 List.map 240 (fun (name, child) -> (name, to_concrete child)) 241 node.children 242 in 243 let all = 244 List.sort 245 (fun (a, _) (b, _) -> String.compare a b) 246 (entries @ child_entries) 247 in 248 `Tree all 249 250 (* Write tree to backend and return hash *) 251 let rec write_tree t ~inline_threshold ~(backend : hash Backend.t) : hash = 252 match t with 253 | Contents s -> 254 let h = F.hash_contents s in 255 backend.write h s; 256 h 257 | Node node -> 258 (* First, get the base node *) 259 let base = 260 match force_node node.state with Some n -> n | None -> F.empty_node 261 in 262 (* Apply removals *) 263 let base = 264 List.fold_left (fun n name -> F.remove n name) base node.removed 265 in 266 (* Apply additions (recursively writing children) *) 267 let final = 268 List.fold_left 269 (fun n (name, child) -> 270 match child with 271 | Contents s when inline_threshold > 0 272 && String.length s <= inline_threshold -> 273 (* Inline small contents directly in the node *) 274 F.add n name (`Contents_inlined s) 275 | Contents s -> 276 let h = F.hash_contents s in 277 backend.write h s; 278 F.add n name (`Contents h) 279 | Node _ -> 280 let child_hash = 281 write_tree child ~inline_threshold ~backend 282 in 283 F.add n name (`Node child_hash)) 284 base node.children 285 in 286 let data = F.bytes_of_node final in 287 let h = F.hash_node final in 288 backend.write h data; 289 h 290 291 let hash ?(inline_threshold = F.inline_threshold) t ~backend = 292 write_tree t ~inline_threshold ~backend 293 294 type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ] 295 296 let fold ?(force = `True) t init f = 297 let rec go path t acc = 298 match t with 299 | Contents s -> f path (`Contents s) acc 300 | Node node -> ( 301 let acc = f path `Tree acc in 302 match force with 303 | `True -> ( 304 match force_node node.state with 305 | None -> acc 306 | Some _loaded -> 307 (* Fold over children *) 308 List.fold_left 309 (fun acc (name, child) -> go (path @ [ name ]) child acc) 310 acc node.children) 311 | `False fn -> ( 312 match node.state with 313 | Lazy { hash; _ } -> fn hash 314 | Shallow hash -> fn hash 315 | Pruned hash -> fn hash 316 | Loaded _ -> 317 List.fold_left 318 (fun acc (name, child) -> go (path @ [ name ]) child acc) 319 acc node.children) 320 | `Shallow fn -> ( 321 match node.state with 322 | Shallow hash -> fn hash 323 | _ -> 324 List.fold_left 325 (fun acc (name, child) -> go (path @ [ name ]) child acc) 326 acc node.children)) 327 in 328 go [] t init 329 330 let clear ?depth:_ _t = () 331 332 let equal t1 t2 = 333 (* Simple structural equality - could be optimized with hash comparison *) 334 to_concrete t1 = to_concrete t2 335end 336 337module Git = Make (Codec.Git) 338module Mst = Make (Codec.Mst)