forked from
gazagnaire.org/irmin
Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1(** Merkle proofs for content-addressed trees. *)
2
3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ]
4
5type ('hash, 'contents) tree =
6 | Contents of 'contents
7 | Blinded_contents of 'hash
8 | Node of ('hash, 'contents) node
9 | Blinded_node of 'hash
10
11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list
12
13type ('hash, 'contents) t = {
14 before : 'hash kinded_hash;
15 after : 'hash kinded_hash;
16 state : ('hash, 'contents) tree;
17}
18
19let v ~before ~after state = { before; after; state }
20let before p = p.before
21let after p = p.after
22let state p = p.state
23
24module Make (C : Codec.S) = struct
25 type hash = C.hash
26 type contents = string
27
28 (* Path set for tracking accessed paths *)
29 module PathSet = Set.Make (struct
30 type t = string list
31
32 let compare = compare
33 end)
34
35 (* Internal tree representation during proof production *)
36 type tree_state =
37 | Producing of {
38 backend : hash Backend.t;
39 node_hash : hash;
40 mutable accessed : PathSet.t;
41 }
42 | From_proof of { tree : (hash, contents) tree }
43
44 module Tree = struct
45 type t = { state : tree_state }
46
47 let of_hash backend h =
48 { state = Producing { backend; node_hash = h; accessed = PathSet.empty } }
49
50 let of_proof_tree tree = { state = From_proof { tree } }
51
52 (* Read node from backend *)
53 let read_node backend h =
54 match backend.Backend.read h with
55 | None -> None
56 | Some data -> (
57 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None)
58
59 (* Read contents from backend *)
60 let read_contents backend h = backend.Backend.read h
61
62 (* Record access to a path *)
63 let record_access t path =
64 match t.state with
65 | Producing p -> p.accessed <- PathSet.add path p.accessed
66 | From_proof _ -> ()
67
68 (* Navigate to a path in a backend-stored node *)
69 let rec find_in_node backend node path =
70 match path with
71 | [] -> None (* Can't find contents at node root *)
72 | [ key ] -> (
73 match C.find node key with
74 | Some (`Contents h) -> read_contents backend h
75 | Some (`Contents_inlined data) -> Some data
76 | _ -> None)
77 | key :: rest -> (
78 match C.find node key with
79 | Some (`Node h) -> (
80 match read_node backend h with
81 | Some child -> find_in_node backend child rest
82 | None -> None)
83 | _ -> None)
84
85 (* Navigate in proof tree *)
86 let rec find_in_proof tree path =
87 match (tree, path) with
88 | Contents c, [] -> Some c
89 | Node entries, key :: rest -> (
90 match List.assoc_opt key entries with
91 | Some child -> find_in_proof child rest
92 | None -> None)
93 | Blinded_contents _, [] -> None (* Can't read blinded *)
94 | Blinded_node _, _ -> None (* Can't traverse blinded *)
95 | _ -> None
96
97 let find t path =
98 record_access t path;
99 match t.state with
100 | Producing { backend; node_hash; _ } -> (
101 match read_node backend node_hash with
102 | Some node -> find_in_node backend node path
103 | None -> None)
104 | From_proof { tree } -> find_in_proof tree path
105
106 let rec find_tree_in_node backend node path =
107 match path with
108 | [] -> Some (of_hash backend (C.hash_node node))
109 | key :: rest -> (
110 match C.find node key with
111 | Some (`Node h) -> (
112 match read_node backend h with
113 | Some child -> find_tree_in_node backend child rest
114 | None -> None)
115 | Some (`Contents _ | `Contents_inlined _) -> None
116 | None -> None)
117
118 let rec find_tree_in_proof tree path =
119 match (tree, path) with
120 | (Node _ as n), [] -> Some (of_proof_tree n)
121 | Node entries, key :: rest -> (
122 match List.assoc_opt key entries with
123 | Some child -> find_tree_in_proof child rest
124 | None -> None)
125 | Blinded_node _, _ -> None
126 | Contents _, _ | Blinded_contents _, _ -> None
127
128 let find_tree t path =
129 record_access t path;
130 match t.state with
131 | Producing { backend; node_hash; _ } -> (
132 match read_node backend node_hash with
133 | Some node -> find_tree_in_node backend node path
134 | None -> None)
135 | From_proof { tree } -> find_tree_in_proof tree path
136
137 let mem t path = Option.is_some (find t path)
138
139 let list t path =
140 record_access t path;
141 match t.state with
142 | Producing { backend; node_hash; _ } -> (
143 let rec navigate node = function
144 | [] ->
145 C.list node
146 |> List.map (fun (k, v) ->
147 let kind =
148 match v with
149 | `Node _ -> `Node
150 | `Contents _ | `Contents_inlined _ -> `Contents
151 in
152 (k, kind))
153 | key :: rest -> (
154 match C.find node key with
155 | Some (`Node h) -> (
156 match read_node backend h with
157 | Some child -> navigate child rest
158 | None -> [])
159 | _ -> [])
160 in
161 match read_node backend node_hash with
162 | Some node -> navigate node path
163 | None -> [])
164 | From_proof { tree } ->
165 let rec navigate t = function
166 | [] -> (
167 match t with
168 | Node entries ->
169 List.map
170 (fun (k, v) ->
171 let kind =
172 match v with
173 | Node _ | Blinded_node _ -> `Node
174 | Contents _ | Blinded_contents _ -> `Contents
175 in
176 (k, kind))
177 entries
178 | _ -> [])
179 | key :: rest -> (
180 match t with
181 | Node entries -> (
182 match List.assoc_opt key entries with
183 | Some child -> navigate child rest
184 | None -> [])
185 | _ -> [])
186 in
187 navigate tree path
188
189 (* Write operations - only work on producing trees *)
190 let add t path contents =
191 record_access t path;
192 match t.state with
193 | Producing { backend; node_hash; accessed } ->
194 let rec add_to_node node = function
195 | [] -> failwith "Proof.Tree.add: empty path"
196 | [ key ] ->
197 if C.inline_threshold > 0
198 && String.length contents <= C.inline_threshold
199 then C.add node key (`Contents_inlined contents)
200 else begin
201 let h = C.hash_contents contents in
202 backend.write h contents;
203 C.add node key (`Contents h)
204 end
205 | key :: rest ->
206 let child_node =
207 match C.find node key with
208 | Some (`Node h) -> (
209 match read_node backend h with
210 | Some n -> n
211 | None -> C.empty_node)
212 | _ -> C.empty_node
213 in
214 let updated = add_to_node child_node rest in
215 let data = C.bytes_of_node updated in
216 let h = C.hash_node updated in
217 backend.write h data;
218 C.add node key (`Node h)
219 in
220 let node =
221 match read_node backend node_hash with
222 | Some n -> n
223 | None -> C.empty_node
224 in
225 let updated = add_to_node node path in
226 let data = C.bytes_of_node updated in
227 let new_hash = C.hash_node updated in
228 backend.write new_hash data;
229 { state = Producing { backend; node_hash = new_hash; accessed } }
230 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree"
231
232 let add_tree t path child =
233 record_access t path;
234 match (t.state, child.state) with
235 | ( Producing { backend; node_hash; accessed },
236 Producing { node_hash = child_hash; _ } ) ->
237 let rec add_tree_to_node node = function
238 | [] -> failwith "Proof.Tree.add_tree: empty path"
239 | [ key ] -> C.add node key (`Node child_hash)
240 | key :: rest ->
241 let sub_node =
242 match C.find node key with
243 | Some (`Node h) -> (
244 match read_node backend h with
245 | Some n -> n
246 | None -> C.empty_node)
247 | _ -> C.empty_node
248 in
249 let updated = add_tree_to_node sub_node rest in
250 let data = C.bytes_of_node updated in
251 let h = C.hash_node updated in
252 backend.write h data;
253 C.add node key (`Node h)
254 in
255 let node =
256 match read_node backend node_hash with
257 | Some n -> n
258 | None -> C.empty_node
259 in
260 let updated = add_tree_to_node node path in
261 let data = C.bytes_of_node updated in
262 let new_hash = C.hash_node updated in
263 backend.write new_hash data;
264 { state = Producing { backend; node_hash = new_hash; accessed } }
265 | _ -> failwith "Proof.Tree.add_tree: incompatible trees"
266
267 let remove t path =
268 record_access t path;
269 match t.state with
270 | Producing { backend; node_hash; accessed } ->
271 let rec remove_from_node node = function
272 | [] -> node
273 | [ key ] -> C.remove node key
274 | key :: rest -> (
275 match C.find node key with
276 | Some (`Node h) -> (
277 match read_node backend h with
278 | Some child ->
279 let updated = remove_from_node child rest in
280 if C.is_empty updated then C.remove node key
281 else
282 let data = C.bytes_of_node updated in
283 let h = C.hash_node updated in
284 backend.write h data;
285 C.add node key (`Node h)
286 | None -> node)
287 | _ -> node)
288 in
289 let node =
290 match read_node backend node_hash with
291 | Some n -> n
292 | None -> C.empty_node
293 in
294 let updated = remove_from_node node path in
295 let data = C.bytes_of_node updated in
296 let new_hash = C.hash_node updated in
297 backend.write new_hash data;
298 { state = Producing { backend; node_hash = new_hash; accessed } }
299 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree"
300
301 let hash t =
302 match t.state with
303 | Producing { node_hash; _ } -> node_hash
304 | From_proof { tree } ->
305 let rec hash_tree = function
306 | Contents c -> C.hash_contents c
307 | Blinded_contents h -> h
308 | Node entries ->
309 let node =
310 List.fold_left
311 (fun n (k, v) ->
312 let kind =
313 match v with
314 | Node _ | Blinded_node _ -> `Node (hash_tree v)
315 | Contents c
316 when C.inline_threshold > 0
317 && String.length c <= C.inline_threshold ->
318 `Contents_inlined c
319 | Contents _ | Blinded_contents _ ->
320 `Contents (hash_tree v)
321 in
322 C.add n k kind)
323 C.empty_node entries
324 in
325 C.hash_node node
326 | Blinded_node h -> h
327 in
328 hash_tree tree
329 end
330
331 (* Build proof tree from accessed paths *)
332 let build_proof_tree backend node_hash accessed =
333 let rec build node prefix =
334 let dominated_by_access =
335 PathSet.exists
336 (fun path ->
337 let plen = List.length prefix in
338 List.length path >= plen
339 && List.filteri (fun i _ -> i < plen) path = prefix)
340 accessed
341 in
342 if not dominated_by_access then Blinded_node (C.hash_node node)
343 else
344 let entries = C.list node in
345 let children =
346 List.map
347 (fun (key, kind) ->
348 let child_path = prefix @ [ key ] in
349 let child_tree =
350 match kind with
351 | `Contents_inlined data ->
352 if PathSet.mem child_path accessed then Contents data
353 else Contents data (* inlined is always available *)
354 | `Contents h ->
355 if PathSet.mem child_path accessed then
356 match backend.Backend.read h with
357 | Some c -> Contents c
358 | None -> Blinded_contents h
359 else Blinded_contents h
360 | `Node h ->
361 if
362 PathSet.exists
363 (fun p ->
364 let clen = List.length child_path in
365 List.length p >= clen
366 && List.filteri (fun i _ -> i < clen) p = child_path)
367 accessed
368 then
369 match backend.Backend.read h with
370 | Some data -> (
371 match C.node_of_bytes data with
372 | Ok child_node -> build child_node child_path
373 | Error _ -> Blinded_node h)
374 | None -> Blinded_node h
375 else Blinded_node h
376 in
377 (key, child_tree))
378 entries
379 in
380 Node children
381 in
382 match backend.Backend.read node_hash with
383 | Some data -> (
384 match C.node_of_bytes data with
385 | Ok node -> build node []
386 | Error _ -> Blinded_node node_hash)
387 | None -> Blinded_node node_hash
388
389 let produce backend root_hash f =
390 let tree = Tree.of_hash backend root_hash in
391 let result_tree, result = f tree in
392 let after_hash = Tree.hash result_tree in
393 let accessed =
394 match tree.state with
395 | Producing { accessed; _ } -> accessed
396 | From_proof _ -> PathSet.empty
397 in
398 let proof_tree = build_proof_tree backend root_hash accessed in
399 let proof =
400 { before = `Node root_hash; after = `Node after_hash; state = proof_tree }
401 in
402 (proof, result)
403
404 let to_tree proof = Tree.of_proof_tree proof.state
405
406 let rec hash_of_tree = function
407 | Contents c -> `Contents (C.hash_contents c)
408 | Blinded_contents h -> `Contents h
409 | Node entries ->
410 let node =
411 List.fold_left
412 (fun n (k, v) ->
413 let entry =
414 match v with
415 | Contents c
416 when C.inline_threshold > 0
417 && String.length c <= C.inline_threshold ->
418 `Contents_inlined c
419 | _ -> (
420 match hash_of_tree v with
421 | `Contents h -> `Contents h
422 | `Node h -> `Node h)
423 in
424 C.add n k entry)
425 C.empty_node entries
426 in
427 `Node (C.hash_node node)
428 | Blinded_node h -> `Node h
429
430 let verify proof f =
431 let tree = to_tree proof in
432 let result_tree, result = f tree in
433 let computed_after = `Node (Tree.hash result_tree) in
434 let expected_after = proof.after in
435 match (computed_after, expected_after) with
436 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result)
437 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 ->
438 Ok (result_tree, result)
439 | _ ->
440 Error
441 (`Proof_mismatch
442 (Printf.sprintf "expected %s, got %s"
443 (C.hash_to_hex
444 (match expected_after with `Node h | `Contents h -> h))
445 (C.hash_to_hex
446 (match computed_after with `Node h | `Contents h -> h))))
447end
448
449module Git = Make (Codec.Git)
450module Mst = Make (Codec.Mst)