forked from
gazagnaire.org/irmin
Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1(** Merkle proofs for content-addressed trees. *)
2
3type 'hash kinded_hash = [ `Contents of 'hash | `Node of 'hash ]
4
5type ('hash, 'contents) tree =
6 | Contents of 'contents
7 | Blinded_contents of 'hash
8 | Node of ('hash, 'contents) node
9 | Blinded_node of 'hash
10
11and ('hash, 'contents) node = (string * ('hash, 'contents) tree) list
12
13type ('hash, 'contents) t = {
14 before : 'hash kinded_hash;
15 after : 'hash kinded_hash;
16 state : ('hash, 'contents) tree;
17}
18
19let v ~before ~after state = { before; after; state }
20let before p = p.before
21let after p = p.after
22let state p = p.state
23
24module Make (C : Codec.S) = struct
25 type hash = C.hash
26 type contents = string
27
28 (* Path set for tracking accessed paths *)
29 module PathSet = Set.Make (struct
30 type t = string list
31
32 let compare = compare
33 end)
34
35 (* Internal tree representation during proof production *)
36 type tree_state =
37 | Producing of {
38 backend : hash Backend.t;
39 node_hash : hash;
40 mutable accessed : PathSet.t;
41 }
42 | From_proof of { tree : (hash, contents) tree }
43
44 module Tree = struct
45 type t = { state : tree_state }
46
47 let of_hash backend h =
48 { state = Producing { backend; node_hash = h; accessed = PathSet.empty } }
49
50 let of_proof_tree tree = { state = From_proof { tree } }
51
52 (* Read node from backend *)
53 let read_node backend h =
54 match backend.Backend.read h with
55 | None -> None
56 | Some data -> (
57 match C.node_of_bytes data with Ok n -> Some n | Error _ -> None)
58
59 (* Read contents from backend *)
60 let read_contents backend h = backend.Backend.read h
61
62 (* Record access to a path *)
63 let record_access t path =
64 match t.state with
65 | Producing p -> p.accessed <- PathSet.add path p.accessed
66 | From_proof _ -> ()
67
68 (* Navigate to a path in a backend-stored node *)
69 let rec find_in_node backend node path =
70 match path with
71 | [] -> None (* Can't find contents at node root *)
72 | [ key ] -> (
73 match C.find node key with
74 | Some (`Contents h) -> read_contents backend h
75 | Some (`Contents_inlined data) -> Some data
76 | _ -> None)
77 | key :: rest -> (
78 match C.find node key with
79 | Some (`Node h) -> (
80 match read_node backend h with
81 | Some child -> find_in_node backend child rest
82 | None -> None)
83 | _ -> None)
84
85 (* Navigate in proof tree *)
86 let rec find_in_proof tree path =
87 match (tree, path) with
88 | Contents c, [] -> Some c
89 | Node entries, key :: rest -> (
90 match List.assoc_opt key entries with
91 | Some child -> find_in_proof child rest
92 | None -> None)
93 | Blinded_contents _, [] -> None (* Can't read blinded *)
94 | Blinded_node _, _ -> None (* Can't traverse blinded *)
95 | _ -> None
96
97 let find t path =
98 record_access t path;
99 match t.state with
100 | Producing { backend; node_hash; _ } -> (
101 match read_node backend node_hash with
102 | Some node -> find_in_node backend node path
103 | None -> None)
104 | From_proof { tree } -> find_in_proof tree path
105
106 let rec find_tree_in_node backend node path =
107 match path with
108 | [] -> Some (of_hash backend (C.hash_node node))
109 | key :: rest -> (
110 match C.find node key with
111 | Some (`Node h) -> (
112 match read_node backend h with
113 | Some child -> find_tree_in_node backend child rest
114 | None -> None)
115 | Some (`Contents _ | `Contents_inlined _) -> None
116 | None -> None)
117
118 let rec find_tree_in_proof tree path =
119 match (tree, path) with
120 | (Node _ as n), [] -> Some (of_proof_tree n)
121 | Node entries, key :: rest -> (
122 match List.assoc_opt key entries with
123 | Some child -> find_tree_in_proof child rest
124 | None -> None)
125 | Blinded_node _, _ -> None
126 | Contents _, _ | Blinded_contents _, _ -> None
127
128 let find_tree t path =
129 record_access t path;
130 match t.state with
131 | Producing { backend; node_hash; _ } -> (
132 match read_node backend node_hash with
133 | Some node -> find_tree_in_node backend node path
134 | None -> None)
135 | From_proof { tree } -> find_tree_in_proof tree path
136
137 let mem t path = Option.is_some (find t path)
138
139 let list t path =
140 record_access t path;
141 match t.state with
142 | Producing { backend; node_hash; _ } -> (
143 let rec navigate node = function
144 | [] ->
145 C.list node
146 |> List.map (fun (k, v) ->
147 let kind =
148 match v with
149 | `Node _ -> `Node
150 | `Contents _ | `Contents_inlined _ -> `Contents
151 in
152 (k, kind))
153 | key :: rest -> (
154 match C.find node key with
155 | Some (`Node h) -> (
156 match read_node backend h with
157 | Some child -> navigate child rest
158 | None -> [])
159 | _ -> [])
160 in
161 match read_node backend node_hash with
162 | Some node -> navigate node path
163 | None -> [])
164 | From_proof { tree } ->
165 let rec navigate t = function
166 | [] -> (
167 match t with
168 | Node entries ->
169 List.map
170 (fun (k, v) ->
171 let kind =
172 match v with
173 | Node _ | Blinded_node _ -> `Node
174 | Contents _ | Blinded_contents _ -> `Contents
175 in
176 (k, kind))
177 entries
178 | _ -> [])
179 | key :: rest -> (
180 match t with
181 | Node entries -> (
182 match List.assoc_opt key entries with
183 | Some child -> navigate child rest
184 | None -> [])
185 | _ -> [])
186 in
187 navigate tree path
188
189 (* Write operations - only work on producing trees *)
190 let add t path contents =
191 record_access t path;
192 match t.state with
193 | Producing { backend; node_hash; accessed } ->
194 let rec add_to_node node = function
195 | [] -> failwith "Proof.Tree.add: empty path"
196 | [ key ] ->
197 if C.inline_threshold > 0
198 && String.length contents <= C.inline_threshold
199 then C.add node key (`Contents_inlined contents)
200 else begin
201 let h = C.hash_contents contents in
202 backend.write h contents;
203 C.add node key (`Contents h)
204 end
205 | key :: rest ->
206 let child_node =
207 match C.find node key with
208 | Some (`Node h) -> (
209 match read_node backend h with
210 | Some n -> n
211 | None -> C.empty_node)
212 | _ -> C.empty_node
213 in
214 let updated = add_to_node child_node rest in
215 let data = C.bytes_of_node updated in
216 let h = C.hash_node updated in
217 backend.write h data;
218 C.add node key (`Node h)
219 in
220 let node =
221 match read_node backend node_hash with
222 | Some n -> n
223 | None -> C.empty_node
224 in
225 let updated = add_to_node node path in
226 let data = C.bytes_of_node updated in
227 let new_hash = C.hash_node updated in
228 backend.write new_hash data;
229 { state = Producing { backend; node_hash = new_hash; accessed } }
230 | From_proof _ -> failwith "Proof.Tree.add: cannot modify proof tree"
231
232 let add_tree t path child =
233 record_access t path;
234 match (t.state, child.state) with
235 | ( Producing { backend; node_hash; accessed },
236 Producing { node_hash = child_hash; _ } ) ->
237 let rec add_tree_to_node node = function
238 | [] -> failwith "Proof.Tree.add_tree: empty path"
239 | [ key ] -> C.add node key (`Node child_hash)
240 | key :: rest ->
241 let sub_node =
242 match C.find node key with
243 | Some (`Node h) -> (
244 match read_node backend h with
245 | Some n -> n
246 | None -> C.empty_node)
247 | _ -> C.empty_node
248 in
249 let updated = add_tree_to_node sub_node rest in
250 let data = C.bytes_of_node updated in
251 let h = C.hash_node updated in
252 backend.write h data;
253 C.add node key (`Node h)
254 in
255 let node =
256 match read_node backend node_hash with
257 | Some n -> n
258 | None -> C.empty_node
259 in
260 let updated = add_tree_to_node node path in
261 let data = C.bytes_of_node updated in
262 let new_hash = C.hash_node updated in
263 backend.write new_hash data;
264 { state = Producing { backend; node_hash = new_hash; accessed } }
265 | _ -> failwith "Proof.Tree.add_tree: incompatible trees"
266
267 let remove t path =
268 record_access t path;
269 match t.state with
270 | Producing { backend; node_hash; accessed } ->
271 let rec remove_from_node node = function
272 | [] -> node
273 | [ key ] -> C.remove node key
274 | key :: rest -> (
275 match C.find node key with
276 | Some (`Node h) -> (
277 match read_node backend h with
278 | Some child ->
279 let updated = remove_from_node child rest in
280 if C.is_empty updated then C.remove node key
281 else
282 let data = C.bytes_of_node updated in
283 let h = C.hash_node updated in
284 backend.write h data;
285 C.add node key (`Node h)
286 | None -> node)
287 | _ -> node)
288 in
289 let node =
290 match read_node backend node_hash with
291 | Some n -> n
292 | None -> C.empty_node
293 in
294 let updated = remove_from_node node path in
295 let data = C.bytes_of_node updated in
296 let new_hash = C.hash_node updated in
297 backend.write new_hash data;
298 { state = Producing { backend; node_hash = new_hash; accessed } }
299 | From_proof _ -> failwith "Proof.Tree.remove: cannot modify proof tree"
300
301 let hash t =
302 match t.state with
303 | Producing { node_hash; _ } -> node_hash
304 | From_proof { tree } ->
305 let rec hash_tree = function
306 | Contents c -> C.hash_contents c
307 | Blinded_contents h -> h
308 | Node entries ->
309 let node =
310 List.fold_left
311 (fun n (k, v) ->
312 let kind =
313 match v with
314 | Node _ | Blinded_node _ -> `Node (hash_tree v)
315 | Contents _ | Blinded_contents _ ->
316 `Contents (hash_tree v)
317 in
318 C.add n k kind)
319 C.empty_node entries
320 in
321 C.hash_node node
322 | Blinded_node h -> h
323 in
324 hash_tree tree
325 end
326
327 (* Build proof tree from accessed paths *)
328 let build_proof_tree backend node_hash accessed =
329 let rec build node prefix =
330 let dominated_by_access =
331 PathSet.exists
332 (fun path ->
333 let plen = List.length prefix in
334 List.length path >= plen
335 && List.filteri (fun i _ -> i < plen) path = prefix)
336 accessed
337 in
338 if not dominated_by_access then Blinded_node (C.hash_node node)
339 else
340 let entries = C.list node in
341 let children =
342 List.map
343 (fun (key, kind) ->
344 let child_path = prefix @ [ key ] in
345 let child_tree =
346 match kind with
347 | `Contents_inlined data ->
348 if PathSet.mem child_path accessed then Contents data
349 else Contents data (* inlined is always available *)
350 | `Contents h ->
351 if PathSet.mem child_path accessed then
352 match backend.Backend.read h with
353 | Some c -> Contents c
354 | None -> Blinded_contents h
355 else Blinded_contents h
356 | `Node h ->
357 if
358 PathSet.exists
359 (fun p ->
360 let clen = List.length child_path in
361 List.length p >= clen
362 && List.filteri (fun i _ -> i < clen) p = child_path)
363 accessed
364 then
365 match backend.Backend.read h with
366 | Some data -> (
367 match C.node_of_bytes data with
368 | Ok child_node -> build child_node child_path
369 | Error _ -> Blinded_node h)
370 | None -> Blinded_node h
371 else Blinded_node h
372 in
373 (key, child_tree))
374 entries
375 in
376 Node children
377 in
378 match backend.Backend.read node_hash with
379 | Some data -> (
380 match C.node_of_bytes data with
381 | Ok node -> build node []
382 | Error _ -> Blinded_node node_hash)
383 | None -> Blinded_node node_hash
384
385 let produce backend root_hash f =
386 let tree = Tree.of_hash backend root_hash in
387 let result_tree, result = f tree in
388 let after_hash = Tree.hash result_tree in
389 let accessed =
390 match tree.state with
391 | Producing { accessed; _ } -> accessed
392 | From_proof _ -> PathSet.empty
393 in
394 let proof_tree = build_proof_tree backend root_hash accessed in
395 let proof =
396 { before = `Node root_hash; after = `Node after_hash; state = proof_tree }
397 in
398 (proof, result)
399
400 let to_tree proof = Tree.of_proof_tree proof.state
401
402 let rec hash_of_tree = function
403 | Contents c -> `Contents (C.hash_contents c)
404 | Blinded_contents h -> `Contents h
405 | Node entries ->
406 let node =
407 List.fold_left
408 (fun n (k, v) ->
409 let h =
410 match hash_of_tree v with
411 | `Contents h -> `Contents h
412 | `Node h -> `Node h
413 in
414 C.add n k h)
415 C.empty_node entries
416 in
417 `Node (C.hash_node node)
418 | Blinded_node h -> `Node h
419
420 let verify proof f =
421 let tree = to_tree proof in
422 let result_tree, result = f tree in
423 let computed_after = `Node (Tree.hash result_tree) in
424 let expected_after = proof.after in
425 match (computed_after, expected_after) with
426 | `Node h1, `Node h2 when C.hash_equal h1 h2 -> Ok (result_tree, result)
427 | `Contents h1, `Contents h2 when C.hash_equal h1 h2 ->
428 Ok (result_tree, result)
429 | _ ->
430 Error
431 (`Proof_mismatch
432 (Printf.sprintf "expected %s, got %s"
433 (C.hash_to_hex
434 (match expected_after with `Node h | `Contents h -> h))
435 (C.hash_to_hex
436 (match computed_after with `Node h | `Contents h -> h))))
437end
438
439module Git = Make (Codec.Git)
440module Mst = Make (Codec.Mst)