forked from
gazagnaire.org/irmin
Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1module Make (F : Codec.S) = struct
2 type hash = F.hash
3 type path = string list
4 type concrete = [ `Contents of string | `Tree of (string * concrete) list ]
5
6 (* Internal tree representation with lazy loading *)
7 type node_state =
8 | Loaded of F.node
9 | Lazy of { backend : hash Backend.t; hash : hash }
10 | Shallow of hash
11 | Pruned of hash
12
13 type tree_node =
14 | Contents of string
15 | Node of {
16 mutable state : node_state;
17 mutable children : (string * tree_node) list; (* modifications *)
18 mutable removed : string list;
19 }
20
21 type t = tree_node
22
23 let empty () =
24 Node { state = Loaded F.empty_node; children = []; removed = [] }
25
26 let of_hash ~backend hash =
27 Node { state = Lazy { backend; hash }; children = []; removed = [] }
28
29 let shallow hash = Node { state = Shallow hash; children = []; removed = [] }
30 let pruned hash = Node { state = Pruned hash; children = []; removed = [] }
31
32 let rec of_concrete : concrete -> t = function
33 | `Contents s -> Contents s
34 | `Tree entries ->
35 let children =
36 List.map (fun (name, c) -> (name, of_concrete c)) entries
37 in
38 Node { state = Loaded F.empty_node; children; removed = [] }
39
40 (* Force loading of a lazy node *)
41 let force_node state =
42 match state with
43 | Loaded n -> Some n
44 | Lazy { backend; hash } -> (
45 match backend.read hash with
46 | Some data -> (
47 match F.node_of_bytes data with Ok n -> Some n | Error _ -> None)
48 | None -> None)
49 | Shallow _ -> None
50 | Pruned _ -> None
51
52 (* Navigate to a path, returning the node and remaining path *)
53 let rec navigate t path =
54 match (t, path) with
55 | _, [] -> Some (t, [])
56 | Contents _, _ :: _ -> None
57 | Node node, name :: rest -> (
58 (* Check modifications first *)
59 match List.assoc_opt name node.children with
60 | Some child -> navigate child rest
61 | None -> (
62 if List.mem name node.removed then None
63 else
64 (* Try to load from underlying node *)
65 match force_node node.state with
66 | None -> None
67 | Some loaded -> (
68 match F.find loaded name with
69 | None -> None
70 | Some (`Contents_inlined data) ->
71 navigate (Contents data) rest
72 | Some (`Contents hash) -> (
73 (* Load the content blob *)
74 match node.state with
75 | Lazy { backend; _ } -> (
76 match backend.read hash with
77 | Some data -> navigate (Contents data) rest
78 | None -> None)
79 | _ -> None)
80 | Some (`Node hash) -> (
81 match node.state with
82 | Lazy { backend; _ } ->
83 let child = of_hash ~backend hash in
84 navigate child rest
85 | _ -> None))))
86
87 let find t path =
88 match navigate t path with Some (Contents s, []) -> Some s | _ -> None
89
90 let find_tree t path =
91 match navigate t path with Some ((Node _ as n), []) -> Some n | _ -> None
92
93 let mem t path = Option.is_some (navigate t path)
94
95 let mem_tree t path =
96 match navigate t path with Some (Node _, []) -> true | _ -> false
97
98 let list t path =
99 match navigate t path with
100 | Some (Node node, []) -> (
101 match force_node node.state with
102 | None -> []
103 | Some loaded ->
104 let base_entries =
105 F.list loaded
106 |> List.filter (fun (name, _) ->
107 (not (List.mem name node.removed))
108 && not (List.mem_assoc name node.children))
109 |> List.map (fun (name, kind) ->
110 let k =
111 match kind with
112 | `Node _ -> `Node
113 | `Contents _ | `Contents_inlined _ -> `Contents
114 in
115 (name, k))
116 in
117 let child_entries =
118 List.map
119 (fun (name, child) ->
120 let k =
121 match child with Node _ -> `Node | Contents _ -> `Contents
122 in
123 (name, k))
124 node.children
125 in
126 List.sort
127 (fun (a, _) (b, _) -> String.compare a b)
128 (base_entries @ child_entries))
129 | _ -> []
130
131 (* Add contents at path, creating intermediate nodes as needed *)
132 let rec add_at t path value =
133 match (t, path) with
134 | _, [] -> value
135 | Contents _, _ :: _ ->
136 (* Replace contents with a tree *)
137 add_at (empty ()) path value
138 | Node node, [ name ] ->
139 let children =
140 (name, value) :: List.filter (fun (n, _) -> n <> name) node.children
141 in
142 let removed = List.filter (( <> ) name) node.removed in
143 Node { node with children; removed }
144 | Node node, name :: rest ->
145 let child =
146 match List.assoc_opt name node.children with
147 | Some c -> c
148 | None -> (
149 if List.mem name node.removed then empty ()
150 else
151 match force_node node.state with
152 | None -> empty ()
153 | Some loaded -> (
154 match F.find loaded name with
155 | Some (`Node hash) -> (
156 match node.state with
157 | Lazy { backend; _ } -> of_hash ~backend hash
158 | _ -> empty ())
159 | Some (`Contents _ | `Contents_inlined _) | None ->
160 empty ()))
161 in
162 let new_child = add_at child rest value in
163 let children =
164 (name, new_child)
165 :: List.filter (fun (n, _) -> n <> name) node.children
166 in
167 Node { node with children }
168
169 let add t path contents = add_at t path (Contents contents)
170 let add_tree t path subtree = add_at t path subtree
171
172 let rec remove t path =
173 match (t, path) with
174 | _, [] -> empty ()
175 | Contents _, _ :: _ -> t
176 | Node node, [ name ] ->
177 let children = List.filter (fun (n, _) -> n <> name) node.children in
178 let removed =
179 if List.mem name node.removed then node.removed
180 else name :: node.removed
181 in
182 Node { node with children; removed }
183 | Node node, name :: rest ->
184 let child =
185 match List.assoc_opt name node.children with
186 | Some c -> c
187 | None -> (
188 if List.mem name node.removed then empty ()
189 else
190 match force_node node.state with
191 | None -> empty ()
192 | Some loaded -> (
193 match F.find loaded name with
194 | Some (`Node hash) -> (
195 match node.state with
196 | Lazy { backend; _ } -> of_hash ~backend hash
197 | _ -> empty ())
198 | Some (`Contents _ | `Contents_inlined _) | None ->
199 empty ()))
200 in
201 let new_child = remove child rest in
202 let children =
203 (name, new_child)
204 :: List.filter (fun (n, _) -> n <> name) node.children
205 in
206 Node { node with children }
207
208 let rec to_concrete t =
209 match t with
210 | Contents s -> `Contents s
211 | Node node ->
212 let entries =
213 match force_node node.state with
214 | None -> []
215 | Some loaded ->
216 F.list loaded
217 |> List.filter_map (fun (name, kind) ->
218 if List.mem name node.removed then None
219 else if List.mem_assoc name node.children then None
220 else
221 match kind with
222 | `Contents_inlined data ->
223 Some (name, `Contents data)
224 | `Contents hash -> (
225 match node.state with
226 | Lazy { backend; _ } -> (
227 match backend.read hash with
228 | Some data -> Some (name, `Contents data)
229 | None -> None)
230 | _ -> None)
231 | `Node hash -> (
232 match node.state with
233 | Lazy { backend; _ } ->
234 let child = of_hash ~backend hash in
235 Some (name, to_concrete child)
236 | _ -> None))
237 in
238 let child_entries =
239 List.map
240 (fun (name, child) -> (name, to_concrete child))
241 node.children
242 in
243 let all =
244 List.sort
245 (fun (a, _) (b, _) -> String.compare a b)
246 (entries @ child_entries)
247 in
248 `Tree all
249
250 (* Write tree to backend and return hash *)
251 let rec write_tree t ~inline_threshold ~(backend : hash Backend.t) : hash =
252 match t with
253 | Contents s ->
254 let h = F.hash_contents s in
255 backend.write h s;
256 h
257 | Node node ->
258 (* First, get the base node *)
259 let base =
260 match force_node node.state with Some n -> n | None -> F.empty_node
261 in
262 (* Apply removals *)
263 let base =
264 List.fold_left (fun n name -> F.remove n name) base node.removed
265 in
266 (* Apply additions (recursively writing children) *)
267 let final =
268 List.fold_left
269 (fun n (name, child) ->
270 match child with
271 | Contents s when inline_threshold > 0
272 && String.length s <= inline_threshold ->
273 (* Inline small contents directly in the node *)
274 F.add n name (`Contents_inlined s)
275 | Contents s ->
276 let h = F.hash_contents s in
277 backend.write h s;
278 F.add n name (`Contents h)
279 | Node _ ->
280 let child_hash =
281 write_tree child ~inline_threshold ~backend
282 in
283 F.add n name (`Node child_hash))
284 base node.children
285 in
286 let data = F.bytes_of_node final in
287 let h = F.hash_node final in
288 backend.write h data;
289 h
290
291 let hash ?(inline_threshold = F.inline_threshold) t ~backend =
292 write_tree t ~inline_threshold ~backend
293
294 type 'a force = [ `True | `False of hash -> 'a | `Shallow of hash -> 'a ]
295
296 let fold ?(force = `True) t init f =
297 let rec go path t acc =
298 match t with
299 | Contents s -> f path (`Contents s) acc
300 | Node node -> (
301 let acc = f path `Tree acc in
302 match force with
303 | `True -> (
304 match force_node node.state with
305 | None -> acc
306 | Some _loaded ->
307 (* Fold over children *)
308 List.fold_left
309 (fun acc (name, child) -> go (path @ [ name ]) child acc)
310 acc node.children)
311 | `False fn -> (
312 match node.state with
313 | Lazy { hash; _ } -> fn hash
314 | Shallow hash -> fn hash
315 | Pruned hash -> fn hash
316 | Loaded _ ->
317 List.fold_left
318 (fun acc (name, child) -> go (path @ [ name ]) child acc)
319 acc node.children)
320 | `Shallow fn -> (
321 match node.state with
322 | Shallow hash -> fn hash
323 | _ ->
324 List.fold_left
325 (fun acc (name, child) -> go (path @ [ name ]) child acc)
326 acc node.children))
327 in
328 go [] t init
329
330 let clear ?depth:_ _t = ()
331
332 let equal t1 t2 =
333 (* Simple structural equality - could be optimized with hash comparison *)
334 to_concrete t1 = to_concrete t2
335end
336
337module Git = Make (Codec.Git)
338module Mst = Make (Codec.Mst)