Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1module type S = sig
2 type node
3 type hash
4
5 val hash_node : node -> hash
6 val hash_contents : string -> hash
7 val node_of_bytes : string -> (node, [> `Msg of string ]) result
8 val bytes_of_node : node -> string
9 val empty_node : node
10 val find : node -> string -> [ `Node of hash | `Contents of hash ] option
11 val add : node -> string -> [ `Node of hash | `Contents of hash ] -> node
12 val remove : node -> string -> node
13 val list : node -> (string * [ `Node of hash | `Contents of hash ]) list
14 val is_empty : node -> bool
15
16 (* Hash operations *)
17 val hash_to_bytes : hash -> string
18 val hash_to_hex : hash -> string
19 val hash_of_hex : string -> (hash, [> `Msg of string ]) result
20 val hash_equal : hash -> hash -> bool
21 val hash_compare : hash -> hash -> int
22
23 (* Commit operations *)
24 type commit
25
26 val commit_make :
27 tree:hash ->
28 parents:hash list ->
29 author:string ->
30 committer:string ->
31 message:string ->
32 timestamp:int64 ->
33 commit
34
35 val commit_tree : commit -> hash
36 val commit_parents : commit -> hash list
37 val commit_author : commit -> string
38 val commit_committer : commit -> string
39 val commit_message : commit -> string
40 val commit_timestamp : commit -> int64
41 val commit_of_bytes : string -> (commit, [> `Msg of string ]) result
42 val commit_to_bytes : commit -> string
43 val commit_hash : commit -> hash
44end
45
46module type SHA1 = S with type hash = Hash.sha1
47module type SHA256 = S with type hash = Hash.sha256
48
49(** Git tree object format using ocaml-git. *)
50module Git : SHA1 = struct
51 type hash = Hash.sha1
52 type node = Git.Tree.t
53
54 (* Convert between irmin Hash.sha1 and Git.Hash.t *)
55 let git_hash_of_sha1 (h : hash) : Git.Hash.t =
56 Git.Hash.of_raw_string (Hash.to_bytes h)
57
58 let sha1_of_git_hash (h : Git.Hash.t) : hash =
59 Hash.sha1_of_bytes (Git.Hash.to_raw_string h)
60
61 let empty_node = Git.Tree.empty
62 let is_empty = Git.Tree.is_empty
63
64 let find node name =
65 match Git.Tree.find ~name node with
66 | None -> None
67 | Some entry -> (
68 let h = sha1_of_git_hash entry.hash in
69 match entry.perm with `Dir -> Some (`Node h) | _ -> Some (`Contents h))
70
71 let add node name kind =
72 let perm, hash =
73 match kind with
74 | `Node h -> (`Dir, git_hash_of_sha1 h)
75 | `Contents h -> (`Normal, git_hash_of_sha1 h)
76 in
77 let entry = Git.Tree.entry ~perm ~name hash in
78 Git.Tree.add entry node
79
80 let remove node name = Git.Tree.remove ~name node
81
82 let list node =
83 Git.Tree.to_list node
84 |> List.map (fun (entry : Git.Tree.entry) ->
85 let h = sha1_of_git_hash entry.hash in
86 let kind = match entry.perm with `Dir -> `Node h | _ -> `Contents h in
87 (entry.name, kind))
88
89 let bytes_of_node = Git.Tree.to_string
90
91 let node_of_bytes s : (node, [> `Msg of string ]) result =
92 match Git.Tree.of_string s with
93 | Ok n -> Ok n
94 | Error (`Msg m) -> Error (`Msg m)
95
96 let hash_node node = sha1_of_git_hash (Git.Tree.digest node)
97
98 let hash_contents data =
99 sha1_of_git_hash (Git.Hash.digest_string ~kind:`Blob data)
100
101 let hash_to_bytes = Hash.to_bytes
102 let hash_to_hex = Hash.to_hex
103 let hash_of_hex s : (hash, [> `Msg of string ]) result = Hash.sha1_of_hex s
104 let hash_equal = Hash.equal
105 let hash_compare = Hash.compare
106
107 (* Commit operations using ocaml-git *)
108 type commit = Git.Commit.t
109
110 let commit_make ~tree ~parents ~author ~committer ~message ~timestamp =
111 let user_of_string s =
112 (* Parse "Name <email>" format *)
113 match String.index_opt s '<' with
114 | None -> Git.User.v ~name:s ~email:"" ~date:timestamp ()
115 | Some i ->
116 let name = String.trim (String.sub s 0 i) in
117 let rest = String.sub s (i + 1) (String.length s - i - 1) in
118 let email =
119 match String.index_opt rest '>' with
120 | None -> rest
121 | Some j -> String.sub rest 0 j
122 in
123 Git.User.v ~name ~email ~date:timestamp ()
124 in
125 Git.Commit.v ~tree:(git_hash_of_sha1 tree)
126 ~parents:(List.map git_hash_of_sha1 parents)
127 ~author:(user_of_string author) ~committer:(user_of_string committer)
128 (Some message)
129
130 let commit_tree c = sha1_of_git_hash (Git.Commit.tree c)
131 let commit_parents c = List.map sha1_of_git_hash (Git.Commit.parents c)
132
133 let user_to_string u =
134 let name = Git.User.name u in
135 let email = Git.User.email u in
136 if email = "" then name else Fmt.str "%s <%s>" name email
137
138 let commit_author c = user_to_string (Git.Commit.author c)
139 let commit_committer c = user_to_string (Git.Commit.committer c)
140 let commit_message c = Option.value ~default:"" (Git.Commit.message c)
141 let commit_timestamp c = Git.User.date (Git.Commit.author c)
142
143 let commit_of_bytes s : (commit, [> `Msg of string ]) result =
144 match Git.Commit.of_string s with
145 | Ok c -> Ok c
146 | Error (`Msg m) -> Error (`Msg m)
147
148 let commit_to_bytes = Git.Commit.to_string
149 let commit_hash c = sha1_of_git_hash (Git.Commit.digest c)
150end
151
152(** ATProto Merkle Search Tree format using ocaml-atp.
153
154 MST uses SHA-256 with 2-bit prefix counting for tree depth. Keys are stored
155 sorted with common prefix compression. Encoded as DAG-CBOR. *)
156module Mst : SHA256 = struct
157 type hash = Hash.sha256
158
159 (* Convert between irmin Hash.sha256 and Atp.Cid.t *)
160 let cid_of_sha256 (h : hash) : Atp.Cid.t =
161 Atp.Cid.of_digest `Dag_cbor (Hash.to_bytes h)
162
163 let sha256_of_cid (cid : Atp.Cid.t) : hash =
164 Hash.sha256_of_bytes (Atp.Cid.digest cid)
165
166 (* Our node wraps Atp.Mst.Raw.node for serialization *)
167 type node = Atp.Mst.Raw.node
168
169 let empty_node : node = { l = None; e = [] }
170 let is_empty (node : node) = node.l = None && node.e = []
171
172 (* Decompress key from entry list *)
173 let decompress_keys (entries : Atp.Mst.Raw.entry list) :
174 (string * Atp.Mst.Raw.entry) list =
175 let rec loop prev_key acc = function
176 | [] -> List.rev acc
177 | (e : Atp.Mst.Raw.entry) :: rest ->
178 let key = String.sub prev_key 0 e.p ^ e.k in
179 loop key ((key, e) :: acc) rest
180 in
181 loop "" [] entries
182
183 let find (node : node) name =
184 let entries = decompress_keys node.e in
185 match List.find_opt (fun (k, _) -> k = name) entries with
186 | None -> None
187 | Some (_, e) ->
188 (* In MST, all values are content CIDs, subtrees are in 't' field *)
189 Some (`Contents (sha256_of_cid e.v))
190
191 (* Compress keys for serialization *)
192 let compress_keys entries =
193 let sorted =
194 List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) entries
195 in
196 let rec loop prev_key acc = function
197 | [] -> List.rev acc
198 | (key, (v, t)) :: rest ->
199 let p =
200 let rec shared i =
201 if i >= String.length prev_key || i >= String.length key then i
202 else if prev_key.[i] = key.[i] then shared (i + 1)
203 else i
204 in
205 shared 0
206 in
207 let k = String.sub key p (String.length key - p) in
208 let entry : Atp.Mst.Raw.entry = { p; k; v; t } in
209 loop key (entry :: acc) rest
210 in
211 loop "" [] sorted
212
213 let add (node : node) name kind =
214 let entries = decompress_keys node.e in
215 let v, t =
216 match kind with
217 | `Contents h -> (cid_of_sha256 h, None)
218 | `Node h -> (cid_of_sha256 h, None)
219 (* TODO: Handle subtree pointers *)
220 in
221 let _ = t in
222 (* suppress unused warning *)
223 let entries = List.filter (fun (k, _) -> k <> name) entries in
224 let entries =
225 (name, (v, None))
226 :: List.map (fun (k, (e : Atp.Mst.Raw.entry)) -> (k, (e.v, e.t))) entries
227 in
228 let compressed = compress_keys entries in
229 { node with e = compressed }
230
231 let remove (node : node) name =
232 let entries = decompress_keys node.e in
233 let entries = List.filter (fun (k, _) -> k <> name) entries in
234 let entries =
235 List.map (fun (k, (e : Atp.Mst.Raw.entry)) -> (k, (e.v, e.t))) entries
236 in
237 let compressed = compress_keys entries in
238 { node with e = compressed }
239
240 let list (node : node) =
241 let entries = decompress_keys node.e in
242 List.map
243 (fun (key, (e : Atp.Mst.Raw.entry)) ->
244 (key, `Contents (sha256_of_cid e.v)))
245 entries
246
247 let bytes_of_node node = Atp.Mst.Raw.encode_bytes node
248
249 let node_of_bytes data : (node, [> `Msg of string ]) result =
250 try Ok (Atp.Mst.Raw.decode_bytes data)
251 with exn ->
252 Error (`Msg ("failed to decode MST node: " ^ Printexc.to_string exn))
253
254 let hash_node node =
255 let data = bytes_of_node node in
256 Hash.sha256 data
257
258 let hash_contents data = Hash.sha256 data
259 let hash_to_bytes = Hash.to_bytes
260 let hash_to_hex = Hash.to_hex
261 let hash_of_hex s : (hash, [> `Msg of string ]) result = Hash.sha256_of_hex s
262 let hash_equal = Hash.equal
263 let hash_compare = Hash.compare
264
265 (* Commit operations for MST format using DAG-CBOR *)
266 type commit = {
267 tree : hash;
268 parents : hash list;
269 author : string;
270 committer : string;
271 message : string;
272 timestamp : int64;
273 }
274
275 let commit_make ~tree ~parents ~author ~committer ~message ~timestamp =
276 { tree; parents; author; committer; message; timestamp }
277
278 let commit_tree c = c.tree
279 let commit_parents c = c.parents
280 let commit_author c = c.author
281 let commit_committer c = c.committer
282 let commit_message c = c.message
283 let commit_timestamp c = c.timestamp
284
285 let commit_of_bytes s : (commit, [> `Msg of string ]) result =
286 try
287 let v = Atp.Dagcbor.decode_string ~cid_format:`Atproto s in
288 match v with
289 | `Map fields ->
290 let get_string key =
291 match List.assoc_opt key fields with
292 | Some (`String s) -> s
293 | _ -> ""
294 in
295 let get_int64 key =
296 match List.assoc_opt key fields with Some (`Int i) -> i | _ -> 0L
297 in
298 let get_link key =
299 match List.assoc_opt key fields with
300 | Some (`Link cid) -> sha256_of_cid cid
301 | _ -> Hash.sha256 ""
302 in
303 let get_links key =
304 match List.assoc_opt key fields with
305 | Some (`List links) ->
306 List.filter_map
307 (function `Link cid -> Some (sha256_of_cid cid) | _ -> None)
308 links
309 | _ -> []
310 in
311 Ok
312 {
313 tree = get_link "tree";
314 parents = get_links "parents";
315 author = get_string "author";
316 committer = get_string "committer";
317 message = get_string "message";
318 timestamp = get_int64 "timestamp";
319 }
320 | _ -> Error (`Msg "expected map for commit")
321 with Eio.Io _ as e -> Error (`Msg (Printexc.to_string e))
322
323 let commit_to_bytes c =
324 let v : Atp.Dagcbor.value =
325 `Map
326 [
327 ("author", `String c.author);
328 ("committer", `String c.committer);
329 ("message", `String c.message);
330 ( "parents",
331 `List (List.map (fun h -> `Link (cid_of_sha256 h)) c.parents) );
332 ("timestamp", `Int c.timestamp);
333 ("tree", `Link (cid_of_sha256 c.tree));
334 ]
335 in
336 Atp.Dagcbor.encode_string ~cid_format:`Atproto v
337
338 let commit_hash c =
339 let data = commit_to_bytes c in
340 Hash.sha256 data
341end