forked from
gazagnaire.org/irmin
Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1(** Merkle proofs for content-addressed trees. *)
2
3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ]
4
5type ('hash, 'contents) tree =
6 | Contents of 'contents
7 | Blinded_contents of 'hash
8 | Node of ('hash, 'contents) node
9 | Blinded_node of 'hash
10
11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list
12
13type ('hash, 'contents) t = {
14 before : 'hash kinded_hash;
15 after : 'hash kinded_hash;
16 state : ('hash, 'contents) tree;
17}
18
19let v ~before ~after state = { before; after; state }
20let before p = p.before
21let after p = p.after
22let state p = p.state
23
24module Make (C : Codec.S) = struct
25 type hash = C.hash
26 type contents = string
27
28 (* Path set for tracking accessed paths *)
29 module PathSet = Set.Make (struct
30 type t = string list
31
32 let compare = compare
33 end)
34
35 (* Internal tree representation during proof production *)
36 type tree_state =
37 | Producing of {
38 backend : hash Backend.t;
39 node_hash : hash;
40 mutable accessed : PathSet.t;
41 }
42 | From_proof of { tree : (hash, contents) tree }
43
44 module Tree = struct
45 type t = { state : tree_state }
46
47 let of_hash backend h =
48 { state = Producing { backend; node_hash = h; accessed = PathSet.empty } }
49
50 let of_proof_tree tree = { state = From_proof { tree } }
51
52 (* Read node from backend *)
53 let read_node backend h =
54 match backend.Backend.read h with
55 | None -> None
56 | Some data -> (
57 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None)
58
59 (* Read contents from backend *)
60 let read_contents backend h = backend.Backend.read h
61
62 (* Record access to a path *)
63 let record_access t path =
64 match t.state with
65 | Producing p -> p.accessed <- PathSet.add path p.accessed
66 | From_proof _ -> ()
67
68 (* Navigate to a path in a backend-stored node *)
69 let rec find_in_node backend node path =
70 match path with
71 | [] -> None (* Can't find contents at node root *)
72 | [ key ] -> (
73 match C.find node key with
74 | Some (`Contents h) -> read_contents backend h
75 | _ -> None)
76 | key :: rest -> (
77 match C.find node key with
78 | Some (`Node h) -> (
79 match read_node backend h with
80 | Some child -> find_in_node backend child rest
81 | None -> None)
82 | _ -> None)
83
84 (* Navigate in proof tree *)
85 let rec find_in_proof tree path =
86 match (tree, path) with
87 | Contents c, [] -> Some c
88 | Node entries, key :: rest -> (
89 match List.assoc_opt key entries with
90 | Some child -> find_in_proof child rest
91 | None -> None)
92 | Blinded_contents _, [] -> None (* Can't read blinded *)
93 | Blinded_node _, _ -> None (* Can't traverse blinded *)
94 | _ -> None
95
96 let find t path =
97 record_access t path;
98 match t.state with
99 | Producing { backend; node_hash; _ } -> (
100 match read_node backend node_hash with
101 | Some node -> find_in_node backend node path
102 | None -> None)
103 | From_proof { tree } -> find_in_proof tree path
104
105 let rec find_tree_in_node backend node path =
106 match path with
107 | [] -> Some (of_hash backend (C.hash_node node))
108 | key :: rest -> (
109 match C.find node key with
110 | Some (`Node h) -> (
111 match read_node backend h with
112 | Some child -> find_tree_in_node backend child rest
113 | None -> None)
114 | Some (`Contents _) -> None
115 | None -> None)
116
117 let rec find_tree_in_proof tree path =
118 match (tree, path) with
119 | (Node _ as n), [] -> Some (of_proof_tree n)
120 | Node entries, key :: rest -> (
121 match List.assoc_opt key entries with
122 | Some child -> find_tree_in_proof child rest
123 | None -> None)
124 | Blinded_node _, _ -> None
125 | Contents _, _ | Blinded_contents _, _ -> None
126
127 let find_tree t path =
128 record_access t path;
129 match t.state with
130 | Producing { backend; node_hash; _ } -> (
131 match read_node backend node_hash with
132 | Some node -> find_tree_in_node backend node path
133 | None -> None)
134 | From_proof { tree } -> find_tree_in_proof tree path
135
136 let mem t path = Option.is_some (find t path)
137
138 let list t path =
139 record_access t path;
140 match t.state with
141 | Producing { backend; node_hash; _ } -> (
142 let rec navigate node = function
143 | [] ->
144 C.list node
145 |> List.map (fun (k, v) ->
146 let kind =
147 match v with `Node _ -> `Node | `Contents _ -> `Contents
148 in
149 (k, kind))
150 | key :: rest -> (
151 match C.find node key with
152 | Some (`Node h) -> (
153 match read_node backend h with
154 | Some child -> navigate child rest
155 | None -> [])
156 | _ -> [])
157 in
158 match read_node backend node_hash with
159 | Some node -> navigate node path
160 | None -> [])
161 | From_proof { tree } ->
162 let rec navigate t = function
163 | [] -> (
164 match t with
165 | Node entries ->
166 List.map
167 (fun (k, v) ->
168 let kind =
169 match v with
170 | Node _ | Blinded_node _ -> `Node
171 | Contents _ | Blinded_contents _ -> `Contents
172 in
173 (k, kind))
174 entries
175 | _ -> [])
176 | key :: rest -> (
177 match t with
178 | Node entries -> (
179 match List.assoc_opt key entries with
180 | Some child -> navigate child rest
181 | None -> [])
182 | _ -> [])
183 in
184 navigate tree path
185
186 (* Write operations - only work on producing trees *)
187 let add t path contents =
188 record_access t path;
189 match t.state with
190 | Producing { backend; node_hash; accessed } ->
191 let rec add_to_node node = function
192 | [] -> failwith "Proof.Tree.add: empty path"
193 | [ key ] ->
194 let h = C.hash_contents contents in
195 backend.write h contents;
196 C.add node key (`Contents h)
197 | key :: rest ->
198 let child_node =
199 match C.find node key with
200 | Some (`Node h) -> (
201 match read_node backend h with
202 | Some n -> n
203 | None -> C.empty_node)
204 | _ -> C.empty_node
205 in
206 let updated = add_to_node child_node rest in
207 let data = C.bytes_of_node updated in
208 let h = C.hash_node updated in
209 backend.write h data;
210 C.add node key (`Node h)
211 in
212 let node =
213 match read_node backend node_hash with
214 | Some n -> n
215 | None -> C.empty_node
216 in
217 let updated = add_to_node node path in
218 let data = C.bytes_of_node updated in
219 let new_hash = C.hash_node updated in
220 backend.write new_hash data;
221 { state = Producing { backend; node_hash = new_hash; accessed } }
222 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree"
223
224 let add_tree t path child =
225 record_access t path;
226 match (t.state, child.state) with
227 | ( Producing { backend; node_hash; accessed },
228 Producing { node_hash = child_hash; _ } ) ->
229 let rec add_tree_to_node node = function
230 | [] -> failwith "Proof.Tree.add_tree: empty path"
231 | [ key ] -> C.add node key (`Node child_hash)
232 | key :: rest ->
233 let sub_node =
234 match C.find node key with
235 | Some (`Node h) -> (
236 match read_node backend h with
237 | Some n -> n
238 | None -> C.empty_node)
239 | _ -> C.empty_node
240 in
241 let updated = add_tree_to_node sub_node rest in
242 let data = C.bytes_of_node updated in
243 let h = C.hash_node updated in
244 backend.write h data;
245 C.add node key (`Node h)
246 in
247 let node =
248 match read_node backend node_hash with
249 | Some n -> n
250 | None -> C.empty_node
251 in
252 let updated = add_tree_to_node node path in
253 let data = C.bytes_of_node updated in
254 let new_hash = C.hash_node updated in
255 backend.write new_hash data;
256 { state = Producing { backend; node_hash = new_hash; accessed } }
257 | _ -> failwith "Proof.Tree.add_tree: incompatible trees"
258
259 let remove t path =
260 record_access t path;
261 match t.state with
262 | Producing { backend; node_hash; accessed } ->
263 let rec remove_from_node node = function
264 | [] -> node
265 | [ key ] -> C.remove node key
266 | key :: rest -> (
267 match C.find node key with
268 | Some (`Node h) -> (
269 match read_node backend h with
270 | Some child ->
271 let updated = remove_from_node child rest in
272 if C.is_empty updated then C.remove node key
273 else
274 let data = C.bytes_of_node updated in
275 let h = C.hash_node updated in
276 backend.write h data;
277 C.add node key (`Node h)
278 | None -> node)
279 | _ -> node)
280 in
281 let node =
282 match read_node backend node_hash with
283 | Some n -> n
284 | None -> C.empty_node
285 in
286 let updated = remove_from_node node path in
287 let data = C.bytes_of_node updated in
288 let new_hash = C.hash_node updated in
289 backend.write new_hash data;
290 { state = Producing { backend; node_hash = new_hash; accessed } }
291 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree"
292
293 let hash t =
294 match t.state with
295 | Producing { node_hash; _ } -> node_hash
296 | From_proof { tree } ->
297 let rec hash_tree = function
298 | Contents c -> C.hash_contents c
299 | Blinded_contents h -> h
300 | Node entries ->
301 let node =
302 List.fold_left
303 (fun n (k, v) ->
304 let kind =
305 match v with
306 | Node _ | Blinded_node _ -> `Node (hash_tree v)
307 | Contents _ | Blinded_contents _ ->
308 `Contents (hash_tree v)
309 in
310 C.add n k kind)
311 C.empty_node entries
312 in
313 C.hash_node node
314 | Blinded_node h -> h
315 in
316 hash_tree tree
317 end
318
319 (* Build proof tree from accessed paths *)
320 let build_proof_tree backend node_hash accessed =
321 let rec build node prefix =
322 let dominated_by_access =
323 PathSet.exists
324 (fun path ->
325 let plen = List.length prefix in
326 List.length path >= plen
327 && List.filteri (fun i _ -> i < plen) path = prefix)
328 accessed
329 in
330 if not dominated_by_access then Blinded_node (C.hash_node node)
331 else
332 let entries = C.list node in
333 let children =
334 List.map
335 (fun (key, kind) ->
336 let child_path = prefix @ [ key ] in
337 let child_tree =
338 match kind with
339 | `Contents h ->
340 if PathSet.mem child_path accessed then
341 match backend.Backend.read h with
342 | Some c -> Contents c
343 | None -> Blinded_contents h
344 else Blinded_contents h
345 | `Node h ->
346 if
347 PathSet.exists
348 (fun p ->
349 let clen = List.length child_path in
350 List.length p >= clen
351 && List.filteri (fun i _ -> i < clen) p = child_path)
352 accessed
353 then
354 match backend.Backend.read h with
355 | Some data -> (
356 match C.node_of_bytes data with
357 | Ok child_node -> build child_node child_path
358 | Error _ -> Blinded_node h)
359 | None -> Blinded_node h
360 else Blinded_node h
361 in
362 (key, child_tree))
363 entries
364 in
365 Node children
366 in
367 match backend.Backend.read node_hash with
368 | Some data -> (
369 match C.node_of_bytes data with
370 | Ok node -> build node []
371 | Error _ -> Blinded_node node_hash)
372 | None -> Blinded_node node_hash
373
374 let produce backend root_hash f =
375 let tree = Tree.of_hash backend root_hash in
376 let result_tree, result = f tree in
377 let after_hash = Tree.hash result_tree in
378 let accessed =
379 match tree.state with
380 | Producing { accessed; _ } -> accessed
381 | From_proof _ -> PathSet.empty
382 in
383 let proof_tree = build_proof_tree backend root_hash accessed in
384 let proof =
385 { before = `Node root_hash; after = `Node after_hash; state = proof_tree }
386 in
387 (proof, result)
388
389 let to_tree proof = Tree.of_proof_tree proof.state
390
391 let rec hash_of_tree = function
392 | Contents c -> `Contents (C.hash_contents c)
393 | Blinded_contents h -> `Contents h
394 | Node entries ->
395 let node =
396 List.fold_left
397 (fun n (k, v) ->
398 let h =
399 match hash_of_tree v with
400 | `Contents h -> `Contents h
401 | `Node h -> `Node h
402 in
403 C.add n k h)
404 C.empty_node entries
405 in
406 `Node (C.hash_node node)
407 | Blinded_node h -> `Node h
408
409 let verify proof f =
410 let tree = to_tree proof in
411 let result_tree, result = f tree in
412 let computed_after = `Node (Tree.hash result_tree) in
413 let expected_after = proof.after in
414 match (computed_after, expected_after) with
415 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result)
416 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 ->
417 Ok (result_tree, result)
418 | _ ->
419 Error
420 (`Proof_mismatch
421 (Printf.sprintf "expected %s, got %s"
422 (C.hash_to_hex
423 (match expected_after with `Node h | `Contents h -> h))
424 (C.hash_to_hex
425 (match computed_after with `Node h | `Contents h -> h))))
426end
427
428module Git = Make (Codec.Git)
429module Mst = Make (Codec.Mst)