forked from
gazagnaire.org/irmin
Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1(** Merkle proofs for content-addressed trees. *)
2
3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ]
4
5type ('hash, 'contents) tree =
6 | Contents of 'contents
7 | Blinded_contents of 'hash
8 | Node of ('hash, 'contents) node
9 | Blinded_node of 'hash
10
11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list
12
13type ('hash, 'contents) t = {
14 before : 'hash kinded_hash;
15 after : 'hash kinded_hash;
16 state : ('hash, 'contents) tree;
17}
18
19let v ~before ~after state = { before; after; state }
20let before p = p.before
21let after p = p.after
22let state p = p.state
23
24let pp pp_hash pp_contents fmt p =
25 let pp_kinded fmt = function
26 | `Contents h -> Fmt.pf fmt "contents:%a" pp_hash h
27 | `Node h -> Fmt.pf fmt "node:%a" pp_hash h
28 in
29 let rec pp_tree fmt = function
30 | Contents c -> Fmt.pf fmt "(%a)" pp_contents c
31 | Blinded_contents h -> Fmt.pf fmt "#(%a)" pp_hash h
32 | Node entries ->
33 Fmt.pf fmt "{%a}"
34 Fmt.(list ~sep:(any ", ") (pair ~sep:(any ":") string pp_tree))
35 entries
36 | Blinded_node h -> Fmt.pf fmt "#{%a}" pp_hash h
37 in
38 Fmt.pf fmt "@[<2>proof{before=%a;@ after=%a;@ state=%a}@]" pp_kinded p.before
39 pp_kinded p.after pp_tree p.state
40
41module Make (C : Codec.S) = struct
42 type hash = C.hash
43 type contents = string
44
45 (* Path set for tracking accessed paths *)
46 module Path_set = Set.Make (struct
47 type t = string list
48
49 let compare = compare
50 end)
51
52 (* Internal tree representation during proof production *)
53 type tree_state =
54 | Producing of {
55 backend : hash Backend.t;
56 node_hash : hash;
57 mutable accessed : Path_set.t;
58 }
59 | From_proof of { tree : (hash, contents) tree }
60
61 module Tree = struct
62 type t = { state : tree_state }
63
64 let of_hash backend h =
65 {
66 state = Producing { backend; node_hash = h; accessed = Path_set.empty };
67 }
68
69 let of_proof_tree tree = { state = From_proof { tree } }
70
71 (* Read node from backend *)
72 let read_node backend h =
73 match backend.Backend.read h with
74 | None -> None
75 | Some data -> (
76 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None)
77
78 (* Read contents from backend *)
79 let read_contents backend h = backend.Backend.read h
80
81 (* Record access to a path *)
82 let record_access t path =
83 match t.state with
84 | Producing p -> p.accessed <- Path_set.add path p.accessed
85 | From_proof _ -> ()
86
87 (* Navigate to a path in a backend-stored node *)
88 let rec find_in_node backend node path =
89 match path with
90 | [] -> None (* Can't find contents at node root *)
91 | [ key ] -> (
92 match C.find node key with
93 | Some (`Contents h) -> read_contents backend h
94 | Some (`Contents_inlined data) -> Some data
95 | _ -> None)
96 | key :: rest -> (
97 match C.find node key with
98 | Some (`Node h) -> (
99 match read_node backend h with
100 | Some child -> find_in_node backend child rest
101 | None -> None)
102 | _ -> None)
103
104 (* Navigate in proof tree *)
105 let rec find_in_proof tree path =
106 match (tree, path) with
107 | Contents c, [] -> Some c
108 | Node entries, key :: rest -> (
109 match List.assoc_opt key entries with
110 | Some child -> find_in_proof child rest
111 | None -> None)
112 | Blinded_contents _, [] -> None (* Can't read blinded *)
113 | Blinded_node _, _ -> None (* Can't traverse blinded *)
114 | _ -> None
115
116 let find t path =
117 record_access t path;
118 match t.state with
119 | Producing { backend; node_hash; _ } -> (
120 match read_node backend node_hash with
121 | Some node -> find_in_node backend node path
122 | None -> None)
123 | From_proof { tree } -> find_in_proof tree path
124
125 let rec find_tree_in_node backend node path =
126 match path with
127 | [] -> Some (of_hash backend (C.hash_node node))
128 | key :: rest -> (
129 match C.find node key with
130 | Some (`Node h) -> (
131 match read_node backend h with
132 | Some child -> find_tree_in_node backend child rest
133 | None -> None)
134 | Some (`Contents _ | `Contents_inlined _) -> None
135 | None -> None)
136
137 let rec find_tree_in_proof tree path =
138 match (tree, path) with
139 | (Node _ as n), [] -> Some (of_proof_tree n)
140 | Node entries, key :: rest -> (
141 match List.assoc_opt key entries with
142 | Some child -> find_tree_in_proof child rest
143 | None -> None)
144 | Blinded_node _, _ -> None
145 | Contents _, _ | Blinded_contents _, _ -> None
146
147 let find_tree t path =
148 record_access t path;
149 match t.state with
150 | Producing { backend; node_hash; _ } -> (
151 match read_node backend node_hash with
152 | Some node -> find_tree_in_node backend node path
153 | None -> None)
154 | From_proof { tree } -> find_tree_in_proof tree path
155
156 let mem t path = Option.is_some (find t path)
157
158 let list t path =
159 record_access t path;
160 match t.state with
161 | Producing { backend; node_hash; _ } -> (
162 let rec navigate node = function
163 | [] ->
164 C.list node
165 |> List.map (fun (k, v) ->
166 let kind =
167 match v with
168 | `Node _ -> `Node
169 | `Contents _ | `Contents_inlined _ -> `Contents
170 in
171 (k, kind))
172 | key :: rest -> (
173 match C.find node key with
174 | Some (`Node h) -> (
175 match read_node backend h with
176 | Some child -> navigate child rest
177 | None -> [])
178 | _ -> [])
179 in
180 match read_node backend node_hash with
181 | Some node -> navigate node path
182 | None -> [])
183 | From_proof { tree } ->
184 let rec navigate t = function
185 | [] -> (
186 match t with
187 | Node entries ->
188 List.map
189 (fun (k, v) ->
190 let kind =
191 match v with
192 | Node _ | Blinded_node _ -> `Node
193 | Contents _ | Blinded_contents _ -> `Contents
194 in
195 (k, kind))
196 entries
197 | _ -> [])
198 | key :: rest -> (
199 match t with
200 | Node entries -> (
201 match List.assoc_opt key entries with
202 | Some child -> navigate child rest
203 | None -> [])
204 | _ -> [])
205 in
206 navigate tree path
207
208 (* Write operations - only work on producing trees *)
209 let add t path contents =
210 record_access t path;
211 match t.state with
212 | Producing { backend; node_hash; accessed } ->
213 let rec add_to_node node = function
214 | [] -> failwith "Proof.Tree.add: empty path"
215 | [ key ] ->
216 if C.inline_threshold > 0
217 && String.length contents <= C.inline_threshold
218 then C.add node key (`Contents_inlined contents)
219 else begin
220 let h = C.hash_contents contents in
221 backend.write h contents;
222 C.add node key (`Contents h)
223 end
224 | key :: rest ->
225 let child_node =
226 match C.find node key with
227 | Some (`Node h) -> (
228 match read_node backend h with
229 | Some n -> n
230 | None -> C.empty_node)
231 | _ -> C.empty_node
232 in
233 let updated = add_to_node child_node rest in
234 let data = C.bytes_of_node updated in
235 let h = C.hash_node updated in
236 backend.write h data;
237 C.add node key (`Node h)
238 in
239 let node =
240 match read_node backend node_hash with
241 | Some n -> n
242 | None -> C.empty_node
243 in
244 let updated = add_to_node node path in
245 let data = C.bytes_of_node updated in
246 let new_hash = C.hash_node updated in
247 backend.write new_hash data;
248 { state = Producing { backend; node_hash = new_hash; accessed } }
249 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree"
250
251 let add_tree t path child =
252 record_access t path;
253 match (t.state, child.state) with
254 | ( Producing { backend; node_hash; accessed },
255 Producing { node_hash = child_hash; _ } ) ->
256 let rec add_tree_to_node node = function
257 | [] -> failwith "Proof.Tree.add_tree: empty path"
258 | [ key ] -> C.add node key (`Node child_hash)
259 | key :: rest ->
260 let sub_node =
261 match C.find node key with
262 | Some (`Node h) -> (
263 match read_node backend h with
264 | Some n -> n
265 | None -> C.empty_node)
266 | _ -> C.empty_node
267 in
268 let updated = add_tree_to_node sub_node rest in
269 let data = C.bytes_of_node updated in
270 let h = C.hash_node updated in
271 backend.write h data;
272 C.add node key (`Node h)
273 in
274 let node =
275 match read_node backend node_hash with
276 | Some n -> n
277 | None -> C.empty_node
278 in
279 let updated = add_tree_to_node node path in
280 let data = C.bytes_of_node updated in
281 let new_hash = C.hash_node updated in
282 backend.write new_hash data;
283 { state = Producing { backend; node_hash = new_hash; accessed } }
284 | _ -> failwith "Proof.Tree.add_tree: incompatible trees"
285
286 let remove t path =
287 record_access t path;
288 match t.state with
289 | Producing { backend; node_hash; accessed } ->
290 let rec remove_from_node node = function
291 | [] -> node
292 | [ key ] -> C.remove node key
293 | key :: rest -> (
294 match C.find node key with
295 | Some (`Node h) -> (
296 match read_node backend h with
297 | Some child ->
298 let updated = remove_from_node child rest in
299 if C.is_empty updated then C.remove node key
300 else
301 let data = C.bytes_of_node updated in
302 let h = C.hash_node updated in
303 backend.write h data;
304 C.add node key (`Node h)
305 | None -> node)
306 | _ -> node)
307 in
308 let node =
309 match read_node backend node_hash with
310 | Some n -> n
311 | None -> C.empty_node
312 in
313 let updated = remove_from_node node path in
314 let data = C.bytes_of_node updated in
315 let new_hash = C.hash_node updated in
316 backend.write new_hash data;
317 { state = Producing { backend; node_hash = new_hash; accessed } }
318 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree"
319
320 let hash t =
321 match t.state with
322 | Producing { node_hash; _ } -> node_hash
323 | From_proof { tree } ->
324 let rec hash_tree = function
325 | Contents c -> C.hash_contents c
326 | Blinded_contents h -> h
327 | Node entries ->
328 let node =
329 List.fold_left
330 (fun n (k, v) ->
331 let kind =
332 match v with
333 | Node _ | Blinded_node _ -> `Node (hash_tree v)
334 | Contents c
335 when C.inline_threshold > 0
336 && String.length c <= C.inline_threshold ->
337 `Contents_inlined c
338 | Contents _ | Blinded_contents _ ->
339 `Contents (hash_tree v)
340 in
341 C.add n k kind)
342 C.empty_node entries
343 in
344 C.hash_node node
345 | Blinded_node h -> h
346 in
347 hash_tree tree
348 end
349
350 (* Build proof tree from accessed paths *)
351 let build_proof_tree backend node_hash accessed =
352 let rec build node prefix =
353 let dominated_by_access =
354 Path_set.exists
355 (fun path ->
356 let plen = List.length prefix in
357 List.length path >= plen
358 && List.filteri (fun i _ -> i < plen) path = prefix)
359 accessed
360 in
361 if not dominated_by_access then Blinded_node (C.hash_node node)
362 else
363 let entries = C.list node in
364 let children =
365 List.map
366 (fun (key, kind) ->
367 let child_path = prefix @ [ key ] in
368 let child_tree =
369 match kind with
370 | `Contents_inlined data ->
371 (* Inlined contents are part of the node structure and
372 must always be included in proofs to preserve the
373 node hash. They are small (≤ inline_threshold bytes)
374 so this doesn't leak significant data. *)
375 Contents data
376 | `Contents h ->
377 if Path_set.mem child_path accessed then
378 match backend.Backend.read h with
379 | Some c -> Contents c
380 | None -> Blinded_contents h
381 else Blinded_contents h
382 | `Node h ->
383 if
384 Path_set.exists
385 (fun p ->
386 let clen = List.length child_path in
387 List.length p >= clen
388 && List.filteri (fun i _ -> i < clen) p = child_path)
389 accessed
390 then
391 match backend.Backend.read h with
392 | Some data -> (
393 match C.node_of_bytes data with
394 | Ok child_node -> build child_node child_path
395 | Error _ -> Blinded_node h)
396 | None -> Blinded_node h
397 else Blinded_node h
398 in
399 (key, child_tree))
400 entries
401 in
402 Node children
403 in
404 match backend.Backend.read node_hash with
405 | Some data -> (
406 match C.node_of_bytes data with
407 | Ok node -> build node []
408 | Error _ -> Blinded_node node_hash)
409 | None -> Blinded_node node_hash
410
411 let produce backend root_hash f =
412 let tree = Tree.of_hash backend root_hash in
413 let result_tree, result = f tree in
414 let after_hash = Tree.hash result_tree in
415 let accessed =
416 match tree.state with
417 | Producing { accessed; _ } -> accessed
418 | From_proof _ -> Path_set.empty
419 in
420 let proof_tree = build_proof_tree backend root_hash accessed in
421 let proof =
422 { before = `Node root_hash; after = `Node after_hash; state = proof_tree }
423 in
424 (proof, result)
425
426 let to_tree proof = Tree.of_proof_tree proof.state
427
428 let rec hash_of_tree = function
429 | Contents c -> `Contents (C.hash_contents c)
430 | Blinded_contents h -> `Contents h
431 | Node entries ->
432 let node =
433 List.fold_left
434 (fun n (k, v) ->
435 let entry =
436 match v with
437 | Contents c
438 when C.inline_threshold > 0
439 && String.length c <= C.inline_threshold ->
440 `Contents_inlined c
441 | _ -> (
442 match hash_of_tree v with
443 | `Contents h -> `Contents h
444 | `Node h -> `Node h)
445 in
446 C.add n k entry)
447 C.empty_node entries
448 in
449 `Node (C.hash_node node)
450 | Blinded_node h -> `Node h
451
452 let verify proof f =
453 let tree = to_tree proof in
454 let result_tree, result = f tree in
455 let computed_after = `Node (Tree.hash result_tree) in
456 let expected_after = proof.after in
457 match (computed_after, expected_after) with
458 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result)
459 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 ->
460 Ok (result_tree, result)
461 | _ ->
462 Error
463 (`Proof_mismatch
464 (Fmt.str "expected %s, got %s"
465 (C.hash_to_hex
466 (match expected_after with `Node h | `Contents h -> h))
467 (C.hash_to_hex
468 (match computed_after with `Node h | `Contents h -> h))))
469end
470
471module Git = Make (Codec.Git)
472module Mst = Make (Codec.Mst)