forked from
gazagnaire.org/irmin
Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1module Make (F : Codec.S) = struct
2 module Inode = Inode.Make (F)
3
4 type hash = F.hash
5 type path = string list
6 type concrete = [ `Contents of string | `Tree of (string * concrete) list ]
7
8 (* Internal tree representation with lazy loading *)
9 type node_state =
10 | Loaded of F.node
11 | Inode of { backend : hash Backend.t; hash : hash }
12 | Lazy of { backend : hash Backend.t; hash : hash }
13 | Shallow of hash
14 | Pruned of hash
15
16 type tree_node =
17 | Contents of string
18 | Node of node_record
19
20 and node_record = {
21 mutable state : node_state;
22 backend : hash Backend.t option;
23 mutable children : (string * tree_node) list; (* modifications *)
24 mutable removed : string list;
25 resolved : (string, tree_node) Hashtbl.t; (* read cache *)
26 }
27
28 type t = tree_node
29
30 let empty () =
31 Node { state = Loaded F.empty_node; backend = None;
32 children = []; removed = []; resolved = Hashtbl.create 0 }
33
34 let of_hash ~backend hash =
35 Node { state = Lazy { backend; hash }; backend = Some backend;
36 children = []; removed = []; resolved = Hashtbl.create 0 }
37
38 let shallow hash =
39 Node { state = Shallow hash; backend = None;
40 children = []; removed = []; resolved = Hashtbl.create 0 }
41
42 let pruned hash =
43 Node { state = Pruned hash; backend = None;
44 children = []; removed = []; resolved = Hashtbl.create 0 }
45
46 let rec of_concrete : concrete -> t = function
47 | `Contents s -> Contents s
48 | `Tree entries ->
49 let children =
50 List.map (fun (name, c) -> (name, of_concrete c)) entries
51 in
52 Node { state = Loaded F.empty_node; backend = None;
53 children; removed = []; resolved = Hashtbl.create 0 }
54
55 (* Resolve a lazy node: load from backend, detect inode format. *)
56 let resolve_state node =
57 match node.state with
58 | Loaded _ | Inode _ | Shallow _ | Pruned _ -> ()
59 | Lazy { backend; hash } -> (
60 match backend.read hash with
61 | None -> ()
62 | Some data ->
63 if Inode.is_inode data then
64 node.state <- Inode { backend; hash }
65 else (
66 match F.node_of_bytes data with
67 | Ok n -> node.state <- Loaded n
68 | Error _ -> ()))
69
70 (* Look up a single entry by name, handling both flat nodes and inodes. *)
71 let resolve_entry node name =
72 resolve_state node;
73 match node.state with
74 | Loaded n -> F.find n name
75 | Inode { backend; hash } -> Inode.find ~backend hash name
76 | _ -> None
77
78 (* List all entries, handling both flat nodes and inodes. *)
79 let resolve_entries node =
80 resolve_state node;
81 match node.state with
82 | Loaded n -> Some (F.list n)
83 | Inode { backend; hash } -> Some (Inode.list_all ~backend hash)
84 | _ -> None
85
86 (* Navigate to a path, returning the node and remaining path.
87 Resolved children are cached in [node.resolved] to avoid repeated
88 deserialization on subsequent reads. *)
89 let rec navigate t path =
90 match (t, path) with
91 | _, [] -> Some (t, [])
92 | Contents _, _ :: _ -> None
93 | Node node, name :: rest -> (
94 (* Check modifications first *)
95 match List.assoc_opt name node.children with
96 | Some child -> navigate child rest
97 | None -> (
98 if List.mem name node.removed then None
99 else
100 (* Check read cache *)
101 match Hashtbl.find_opt node.resolved name with
102 | Some child -> navigate child rest
103 | None ->
104 let resolved =
105 match resolve_entry node name with
106 | None -> None
107 | Some (`Contents_inlined data) ->
108 Some (Contents data)
109 | Some (`Contents hash) -> (
110 match node.backend with
111 | Some backend -> (
112 match backend.read hash with
113 | Some data -> Some (Contents data)
114 | None -> None)
115 | None -> None)
116 | Some (`Node hash) -> (
117 match node.backend with
118 | Some backend -> Some (of_hash ~backend hash)
119 | None -> None)
120 in
121 match resolved with
122 | None -> None
123 | Some child ->
124 Hashtbl.replace node.resolved name child;
125 navigate child rest))
126
127 let find t path =
128 match navigate t path with Some (Contents s, []) -> Some s | _ -> None
129
130 let find_tree t path =
131 match navigate t path with Some ((Node _ as n), []) -> Some n | _ -> None
132
133 let mem t path = Option.is_some (navigate t path)
134
135 let mem_tree t path =
136 match navigate t path with Some (Node _, []) -> true | _ -> false
137
138 let list t path =
139 match navigate t path with
140 | Some (Node node, []) ->
141 let base_entries =
142 match resolve_entries node with
143 | None -> []
144 | Some entries ->
145 entries
146 |> List.filter (fun (name, _) ->
147 (not (List.mem name node.removed))
148 && not (List.mem_assoc name node.children))
149 |> List.map (fun (name, kind) ->
150 let k =
151 match kind with
152 | `Node _ -> `Node
153 | `Contents _ | `Contents_inlined _ -> `Contents
154 in
155 (name, k))
156 in
157 let child_entries =
158 List.map
159 (fun (name, child) ->
160 let k =
161 match child with Node _ -> `Node | Contents _ -> `Contents
162 in
163 (name, k))
164 node.children
165 in
166 List.sort
167 (fun (a, _) (b, _) -> String.compare a b)
168 (base_entries @ child_entries)
169 | _ -> []
170
171 (* Resolve a child node for modification (add/remove at depth). *)
172 let resolve_child node name =
173 match List.assoc_opt name node.children with
174 | Some c -> c
175 | None -> (
176 if List.mem name node.removed then empty ()
177 else
178 match resolve_entry node name with
179 | Some (`Node hash) -> (
180 match node.backend with
181 | Some backend -> of_hash ~backend hash
182 | None -> empty ())
183 | Some (`Contents _ | `Contents_inlined _) | None -> empty ())
184
185 (* Add contents at path, creating intermediate nodes as needed *)
186 let rec add_at t path value =
187 match (t, path) with
188 | _, [] -> value
189 | Contents _, _ :: _ ->
190 (* Replace contents with a tree *)
191 add_at (empty ()) path value
192 | Node node, [ name ] ->
193 let children =
194 (name, value) :: List.filter (fun (n, _) -> n <> name) node.children
195 in
196 let removed = List.filter (( <> ) name) node.removed in
197 Node { node with children; removed }
198 | Node node, name :: rest ->
199 let child = resolve_child node name in
200 let new_child = add_at child rest value in
201 let children =
202 (name, new_child)
203 :: List.filter (fun (n, _) -> n <> name) node.children
204 in
205 Node { node with children }
206
207 let add t path contents = add_at t path (Contents contents)
208 let add_tree t path subtree = add_at t path subtree
209
210 let rec remove t path =
211 match (t, path) with
212 | _, [] -> empty ()
213 | Contents _, _ :: _ -> t
214 | Node node, [ name ] ->
215 let children = List.filter (fun (n, _) -> n <> name) node.children in
216 let removed =
217 if List.mem name node.removed then node.removed
218 else name :: node.removed
219 in
220 Node { node with children; removed }
221 | Node node, name :: rest ->
222 let child = resolve_child node name in
223 let new_child = remove child rest in
224 let children =
225 (name, new_child)
226 :: List.filter (fun (n, _) -> n <> name) node.children
227 in
228 Node { node with children }
229
230 let rec to_concrete t =
231 match t with
232 | Contents s -> `Contents s
233 | Node node ->
234 let entries =
235 match resolve_entries node with
236 | None -> []
237 | Some all_entries ->
238 all_entries
239 |> List.filter_map (fun (name, kind) ->
240 if List.mem name node.removed then None
241 else if List.mem_assoc name node.children then None
242 else
243 match kind with
244 | `Contents_inlined data -> Some (name, `Contents data)
245 | `Contents hash -> (
246 match node.backend with
247 | Some backend -> (
248 match backend.read hash with
249 | Some data -> Some (name, `Contents data)
250 | None -> None)
251 | None -> None)
252 | `Node hash -> (
253 match node.backend with
254 | Some backend ->
255 let child = of_hash ~backend hash in
256 Some (name, to_concrete child)
257 | None -> None))
258 in
259 let child_entries =
260 List.map
261 (fun (name, child) -> (name, to_concrete child))
262 node.children
263 in
264 let all =
265 List.sort
266 (fun (a, _) (b, _) -> String.compare a b)
267 (entries @ child_entries)
268 in
269 `Tree all
270
271 (* Write tree to backend and return hash *)
272 let rec write_tree t ~inline_threshold ~inode ~(backend : hash Backend.t) : hash =
273 match t with
274 | Contents s ->
275 let h = F.hash_contents s in
276 backend.write h s;
277 h
278 | Node node ->
279 resolve_state node;
280 (* Compute child entries (recursively writing children) *)
281 let child_entries =
282 List.map
283 (fun (name, child) ->
284 match child with
285 | Contents s
286 when inline_threshold > 0
287 && String.length s <= inline_threshold ->
288 (name, (`Contents_inlined s : F.entry))
289 | Contents s ->
290 let h = F.hash_contents s in
291 backend.write h s;
292 (name, (`Contents h : F.entry))
293 | Node _ ->
294 let child_hash =
295 write_tree child ~inline_threshold ~inode ~backend
296 in
297 (name, (`Node child_hash : F.entry)))
298 node.children
299 in
300 (match node.state with
301 | Inode { hash; backend = ib } when inode ->
302 (* Incremental update: only modify affected inode buckets *)
303 Inode.update ~backend hash ~additions:child_entries
304 ~removals:node.removed
305 | Inode { hash; backend = ib } ->
306 (* Inodes disabled: expand to flat node *)
307 let base_entries = Inode.list_all ~backend:ib hash in
308 let base =
309 List.fold_left
310 (fun n (name, entry) -> F.add n name entry)
311 F.empty_node base_entries
312 in
313 let base =
314 List.fold_left (fun n name -> F.remove n name) base node.removed
315 in
316 let final =
317 List.fold_left
318 (fun n (name, entry) -> F.add n name entry)
319 base child_entries
320 in
321 let data = F.bytes_of_node final in
322 let h = F.hash_node final in
323 backend.write h data;
324 h
325 | _ ->
326 (* Flat node: apply modifications, promote to inode if too large *)
327 let base =
328 match node.state with
329 | Loaded n -> n
330 | _ -> F.empty_node
331 in
332 let base =
333 List.fold_left (fun n name -> F.remove n name) base node.removed
334 in
335 let final =
336 List.fold_left
337 (fun n (name, entry) -> F.add n name entry)
338 base child_entries
339 in
340 if inode then begin
341 let entries = F.list final in
342 if List.length entries > Inode.max_entries then
343 Inode.write entries ~backend
344 else begin
345 let data = F.bytes_of_node final in
346 let h = F.hash_node final in
347 backend.write h data;
348 h
349 end
350 end else begin
351 let data = F.bytes_of_node final in
352 let h = F.hash_node final in
353 backend.write h data;
354 h
355 end)
356
357 let hash ?(inline_threshold = F.inline_threshold) ?(inode = true) t ~backend =
358 write_tree t ~inline_threshold ~inode ~backend
359
360 type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ]
361
362 let fold ?(force = `True) t init f =
363 let rec go path t acc =
364 match t with
365 | Contents s -> f path (`Contents s) acc
366 | Node node -> (
367 let acc = f path `Tree acc in
368 match force with
369 | `True -> (
370 resolve_state node;
371 match node.state with
372 | Loaded _ | Inode _ ->
373 List.fold_left
374 (fun acc (name, child) -> go (path @ [ name ]) child acc)
375 acc node.children
376 | _ -> acc)
377 | `False fn -> (
378 match node.state with
379 | Lazy { hash; _ } -> fn hash
380 | Shallow hash -> fn hash
381 | Pruned hash -> fn hash
382 | Loaded _ | Inode _ ->
383 List.fold_left
384 (fun acc (name, child) -> go (path @ [ name ]) child acc)
385 acc node.children)
386 | `Shallow fn -> (
387 match node.state with
388 | Shallow hash -> fn hash
389 | _ ->
390 List.fold_left
391 (fun acc (name, child) -> go (path @ [ name ]) child acc)
392 acc node.children))
393 in
394 go [] t init
395
396 let clear ?depth:_ _t = ()
397
398 let equal t1 t2 =
399 (* Simple structural equality - could be optimized with hash comparison *)
400 to_concrete t1 = to_concrete t2
401end
402
403module Git = Make (Codec.Git)
404module Mst = Make (Codec.Mst)