forked from
gazagnaire.org/irmin
Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1module SMap = Map.Make (String)
2module SSet = Set.Make (String)
3
4module Make (F : Codec.S) = struct
5 module Inode = Inode.Make (F)
6
7 type hash = F.hash
8 type path = string list
9 type concrete = [ `Contents of string | `Tree of (string * concrete) list ]
10
11 (* Internal tree representation with lazy loading *)
12 type node_state =
13 | Loaded of F.node
14 | Inode of { backend : hash Backend.t; hash : hash }
15 | Lazy of { backend : hash Backend.t; hash : hash }
16 | Shallow of hash
17 | Pruned of hash
18
19 type tree_node =
20 | Contents of string
21 | Node of node_record
22
23 and node_record = {
24 mutable state : node_state;
25 backend : hash Backend.t option;
26 mutable children : tree_node SMap.t; (* modifications *)
27 mutable removed : SSet.t;
28 resolved : (string, tree_node) Hashtbl.t; (* read cache *)
29 }
30
31 type t = tree_node
32
33 let empty () =
34 Node { state = Loaded F.empty_node; backend = None;
35 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 }
36
37 let of_hash ~backend hash =
38 Node { state = Lazy { backend; hash }; backend = Some backend;
39 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 }
40
41 let shallow hash =
42 Node { state = Shallow hash; backend = None;
43 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 }
44
45 let pruned hash =
46 Node { state = Pruned hash; backend = None;
47 children = SMap.empty; removed = SSet.empty; resolved = Hashtbl.create 0 }
48
49 let rec of_concrete : concrete -> t = function
50 | `Contents s -> Contents s
51 | `Tree entries ->
52 let children =
53 List.fold_left (fun m (name, c) -> SMap.add name (of_concrete c) m)
54 SMap.empty entries
55 in
56 Node { state = Loaded F.empty_node; backend = None;
57 children; removed = SSet.empty; resolved = Hashtbl.create 0 }
58
59 (* Resolve a lazy node: load from backend, detect inode format. *)
60 let resolve_state node =
61 match node.state with
62 | Loaded _ | Inode _ | Shallow _ | Pruned _ -> ()
63 | Lazy { backend; hash } -> (
64 match backend.read hash with
65 | None -> ()
66 | Some data ->
67 if Inode.is_inode data then
68 node.state <- Inode { backend; hash }
69 else (
70 match F.node_of_bytes data with
71 | Ok n -> node.state <- Loaded n
72 | Error _ -> ()))
73
74 (* Look up a single entry by name, handling both flat nodes and inodes. *)
75 let resolve_entry node name =
76 resolve_state node;
77 match node.state with
78 | Loaded n -> F.find n name
79 | Inode { backend; hash } -> Inode.find ~backend hash name
80 | _ -> None
81
82 (* List all entries, handling both flat nodes and inodes. *)
83 let resolve_entries node =
84 resolve_state node;
85 match node.state with
86 | Loaded n -> Some (F.list n)
87 | Inode { backend; hash } -> Some (Inode.list_all ~backend hash)
88 | _ -> None
89
90 (* Navigate to a path, returning the node and remaining path.
91 Resolved children are cached in [node.resolved] to avoid repeated
92 deserialization on subsequent reads. *)
93 let rec navigate t path =
94 match (t, path) with
95 | _, [] -> Some (t, [])
96 | Contents _, _ :: _ -> None
97 | Node node, name :: rest -> (
98 (* Check modifications first *)
99 match SMap.find_opt name node.children with
100 | Some child -> navigate child rest
101 | None -> (
102 if SSet.mem name node.removed then None
103 else
104 (* Check read cache *)
105 match Hashtbl.find_opt node.resolved name with
106 | Some child -> navigate child rest
107 | None ->
108 let resolved =
109 match resolve_entry node name with
110 | None -> None
111 | Some (`Contents_inlined data) ->
112 Some (Contents data)
113 | Some (`Contents hash) -> (
114 match node.backend with
115 | Some backend -> (
116 match backend.read hash with
117 | Some data -> Some (Contents data)
118 | None -> None)
119 | None -> None)
120 | Some (`Node hash) -> (
121 match node.backend with
122 | Some backend -> Some (of_hash ~backend hash)
123 | None -> None)
124 in
125 match resolved with
126 | None -> None
127 | Some child ->
128 Hashtbl.replace node.resolved name child;
129 navigate child rest))
130
131 let find t path =
132 match navigate t path with Some (Contents s, []) -> Some s | _ -> None
133
134 let find_tree t path =
135 match navigate t path with Some ((Node _ as n), []) -> Some n | _ -> None
136
137 let mem t path = Option.is_some (navigate t path)
138
139 let mem_tree t path =
140 match navigate t path with Some (Node _, []) -> true | _ -> false
141
142 let list t path =
143 match navigate t path with
144 | Some (Node node, []) ->
145 let base_entries =
146 match resolve_entries node with
147 | None -> []
148 | Some entries ->
149 entries
150 |> List.filter (fun (name, _) ->
151 (not (SSet.mem name node.removed))
152 && not (SMap.mem name node.children))
153 |> List.map (fun (name, kind) ->
154 let k =
155 match kind with
156 | `Node _ -> `Node
157 | `Contents _ | `Contents_inlined _ -> `Contents
158 in
159 (name, k))
160 in
161 let child_entries =
162 SMap.fold
163 (fun name child acc ->
164 let k =
165 match child with Node _ -> `Node | Contents _ -> `Contents
166 in
167 (name, k) :: acc)
168 node.children []
169 in
170 List.sort
171 (fun (a, _) (b, _) -> String.compare a b)
172 (base_entries @ child_entries)
173 | _ -> []
174
175 (* Resolve a child node for modification (add/remove at depth). *)
176 let resolve_child node name =
177 match SMap.find_opt name node.children with
178 | Some c -> c
179 | None -> (
180 if SSet.mem name node.removed then empty ()
181 else
182 match resolve_entry node name with
183 | Some (`Node hash) -> (
184 match node.backend with
185 | Some backend -> of_hash ~backend hash
186 | None -> empty ())
187 | Some (`Contents _ | `Contents_inlined _) | None -> empty ())
188
189 (* Add contents at path, creating intermediate nodes as needed *)
190 let rec add_at t path value =
191 match (t, path) with
192 | _, [] -> value
193 | Contents _, _ :: _ ->
194 (* Replace contents with a tree *)
195 add_at (empty ()) path value
196 | Node node, [ name ] ->
197 let children = SMap.add name value node.children in
198 let removed = SSet.remove name node.removed in
199 Node { node with children; removed }
200 | Node node, name :: rest ->
201 let child = resolve_child node name in
202 let new_child = add_at child rest value in
203 let children = SMap.add name new_child node.children in
204 Node { node with children }
205
206 let add t path contents = add_at t path (Contents contents)
207 let add_tree t path subtree = add_at t path subtree
208
209 let rec remove t path =
210 match (t, path) with
211 | _, [] -> empty ()
212 | Contents _, _ :: _ -> t
213 | Node node, [ name ] ->
214 let children = SMap.remove name node.children in
215 let removed = SSet.add name node.removed in
216 Node { node with children; removed }
217 | Node node, name :: rest ->
218 let child = resolve_child node name in
219 let new_child = remove child rest in
220 let children = SMap.add name new_child node.children in
221 Node { node with children }
222
223 let rec to_concrete t =
224 match t with
225 | Contents s -> `Contents s
226 | Node node ->
227 let entries =
228 match resolve_entries node with
229 | None -> []
230 | Some all_entries ->
231 all_entries
232 |> List.filter_map (fun (name, kind) ->
233 if SSet.mem name node.removed then None
234 else if SMap.mem name node.children then None
235 else
236 match kind with
237 | `Contents_inlined data -> Some (name, `Contents data)
238 | `Contents hash -> (
239 match node.backend with
240 | Some backend -> (
241 match backend.read hash with
242 | Some data -> Some (name, `Contents data)
243 | None -> None)
244 | None -> None)
245 | `Node hash -> (
246 match node.backend with
247 | Some backend ->
248 let child = of_hash ~backend hash in
249 Some (name, to_concrete child)
250 | None -> None))
251 in
252 let child_entries =
253 SMap.fold
254 (fun name child acc -> (name, to_concrete child) :: acc)
255 node.children []
256 in
257 let all =
258 List.sort
259 (fun (a, _) (b, _) -> String.compare a b)
260 (entries @ child_entries)
261 in
262 `Tree all
263
264 (* Write tree to backend and return hash *)
265 let rec write_tree t ~inline_threshold ~inode ~(backend : hash Backend.t) : hash =
266 match t with
267 | Contents s ->
268 let h = F.hash_contents s in
269 backend.write h s;
270 h
271 | Node node ->
272 (* Fast path: unmodified node with known hash — skip entirely *)
273 let dominated = SMap.is_empty node.children && SSet.is_empty node.removed in
274 if dominated then
275 match node.state with
276 | Inode { hash; _ } | Lazy { hash; _ } -> hash
277 | _ -> write_tree_slow node ~inline_threshold ~inode ~backend
278 else
279 write_tree_slow node ~inline_threshold ~inode ~backend
280
281 and write_tree_slow node ~inline_threshold ~inode ~(backend : hash Backend.t) : hash =
282 resolve_state node;
283 (* Compute child entries (recursively writing children) *)
284 let child_entries =
285 SMap.fold
286 (fun name child acc ->
287 let entry =
288 match child with
289 | Contents s
290 when inline_threshold > 0
291 && String.length s <= inline_threshold ->
292 (name, (`Contents_inlined s : F.entry))
293 | Contents s ->
294 let h = F.hash_contents s in
295 backend.write h s;
296 (name, (`Contents h : F.entry))
297 | Node _ ->
298 let child_hash =
299 write_tree child ~inline_threshold ~inode ~backend
300 in
301 (name, (`Node child_hash : F.entry))
302 in
303 entry :: acc)
304 node.children []
305 in
306 (match node.state with
307 | Inode { hash; backend = ib } when inode ->
308 (* Incremental update: only modify affected inode buckets *)
309 Inode.update ~backend hash ~additions:child_entries
310 ~removals:(SSet.elements node.removed)
311 | Inode { hash; backend = ib } ->
312 (* Inodes disabled: expand to flat node *)
313 let base_entries = Inode.list_all ~backend:ib hash in
314 let base =
315 List.fold_left
316 (fun n (name, entry) -> F.add n name entry)
317 F.empty_node base_entries
318 in
319 let base =
320 SSet.fold (fun name n -> F.remove n name) node.removed base
321 in
322 let final =
323 List.fold_left
324 (fun n (name, entry) -> F.add n name entry)
325 base child_entries
326 in
327 let data = F.bytes_of_node final in
328 let h = F.hash_node final in
329 backend.write h data;
330 h
331 | _ ->
332 (* Flat node: apply modifications, promote to inode if too large *)
333 let base =
334 match node.state with
335 | Loaded n -> n
336 | _ -> F.empty_node
337 in
338 let base =
339 SSet.fold (fun name n -> F.remove n name) node.removed base
340 in
341 let final =
342 List.fold_left
343 (fun n (name, entry) -> F.add n name entry)
344 base child_entries
345 in
346 if inode then begin
347 let entries = F.list final in
348 if List.length entries > Inode.max_entries then
349 Inode.write entries ~backend
350 else begin
351 let data = F.bytes_of_node final in
352 let h = F.hash_node final in
353 backend.write h data;
354 h
355 end
356 end else begin
357 let data = F.bytes_of_node final in
358 let h = F.hash_node final in
359 backend.write h data;
360 h
361 end)
362
363 let hash ?(inline_threshold = F.inline_threshold) ?(inode = true) t ~backend =
364 write_tree t ~inline_threshold ~inode ~backend
365
366 type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ]
367
368 let fold ?(force = `True) t init f =
369 let rec go path t acc =
370 match t with
371 | Contents s -> f path (`Contents s) acc
372 | Node node -> (
373 let acc = f path `Tree acc in
374 match force with
375 | `True -> (
376 resolve_state node;
377 match node.state with
378 | Loaded _ | Inode _ ->
379 SMap.fold
380 (fun name child acc -> go (path @ [ name ]) child acc)
381 node.children acc
382 | _ -> acc)
383 | `False fn -> (
384 match node.state with
385 | Lazy { hash; _ } -> fn hash
386 | Shallow hash -> fn hash
387 | Pruned hash -> fn hash
388 | Loaded _ | Inode _ ->
389 SMap.fold
390 (fun name child acc -> go (path @ [ name ]) child acc)
391 node.children acc)
392 | `Shallow fn -> (
393 match node.state with
394 | Shallow hash -> fn hash
395 | _ ->
396 SMap.fold
397 (fun name child acc -> go (path @ [ name ]) child acc)
398 node.children acc))
399 in
400 go [] t init
401
402 let clear ?depth:_ _t = ()
403
404 let equal t1 t2 =
405 (* Simple structural equality - could be optimized with hash comparison *)
406 to_concrete t1 = to_concrete t2
407end
408
409module Git = Make (Codec.Git)
410module Mst = Make (Codec.Mst)