Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at benchs 340 lines 11 kB view raw
1module type S = sig 2 type node 3 type hash 4 5 val hash_node : node -> hash 6 val hash_contents : string -> hash 7 val node_of_bytes : string -> (node, [> `Msg of string ]) result 8 val bytes_of_node : node -> string 9 val empty_node : node 10 val find : node -> string -> [ `Node of hash | `Contents of hash ] option 11 val add : node -> string -> [ `Node of hash | `Contents of hash ] -> node 12 val remove : node -> string -> node 13 val list : node -> (string * [ `Node of hash | `Contents of hash ]) list 14 val is_empty : node -> bool 15 16 (* Hash operations *) 17 val hash_to_bytes : hash -> string 18 val hash_to_hex : hash -> string 19 val hash_of_hex : string -> (hash, [> `Msg of string ]) result 20 val hash_equal : hash -> hash -> bool 21 val hash_compare : hash -> hash -> int 22 23 (* Commit operations *) 24 type commit 25 26 val commit_make : 27 tree:hash -> 28 parents:hash list -> 29 author:string -> 30 committer:string -> 31 message:string -> 32 timestamp:int64 -> 33 commit 34 35 val commit_tree : commit -> hash 36 val commit_parents : commit -> hash list 37 val commit_author : commit -> string 38 val commit_committer : commit -> string 39 val commit_message : commit -> string 40 val commit_timestamp : commit -> int64 41 val commit_of_bytes : string -> (commit, [> `Msg of string ]) result 42 val commit_to_bytes : commit -> string 43 val commit_hash : commit -> hash 44end 45 46module type SHA1 = S with type hash = Hash.sha1 47module type SHA256 = S with type hash = Hash.sha256 48 49(** Git tree object format using ocaml-git. *) 50module Git : SHA1 = struct 51 type hash = Hash.sha1 52 type node = Git.Tree.t 53 54 (* Convert between irmin Hash.sha1 and Git.Hash.t *) 55 let git_hash_of_sha1 (h : hash) : Git.Hash.t = 56 Git.Hash.of_raw_string (Hash.to_bytes h) 57 58 let sha1_of_git_hash (h : Git.Hash.t) : hash = 59 Hash.sha1_of_bytes (Git.Hash.to_raw_string h) 60 61 let empty_node = Git.Tree.empty 62 let is_empty = Git.Tree.is_empty 63 64 let find node name = 65 match Git.Tree.find ~name node with 66 | None -> None 67 | Some entry -> ( 68 let h = sha1_of_git_hash entry.hash in 69 match entry.perm with `Dir -> Some (`Node h) | _ -> Some (`Contents h)) 70 71 let add node name kind = 72 let perm, hash = 73 match kind with 74 | `Node h -> (`Dir, git_hash_of_sha1 h) 75 | `Contents h -> (`Normal, git_hash_of_sha1 h) 76 in 77 let entry = Git.Tree.entry ~perm ~name hash in 78 Git.Tree.add entry node 79 80 let remove node name = Git.Tree.remove ~name node 81 82 let list node = 83 Git.Tree.to_list node 84 |> List.map (fun (entry : Git.Tree.entry) -> 85 let h = sha1_of_git_hash entry.hash in 86 let kind = match entry.perm with `Dir -> `Node h | _ -> `Contents h in 87 (entry.name, kind)) 88 89 let bytes_of_node = Git.Tree.to_string 90 91 let node_of_bytes s : (node, [> `Msg of string ]) result = 92 match Git.Tree.of_string s with 93 | Ok n -> Ok n 94 | Error (`Msg m) -> Error (`Msg m) 95 96 let hash_node node = sha1_of_git_hash (Git.Tree.digest node) 97 98 let hash_contents data = 99 sha1_of_git_hash (Git.Hash.digest_string ~kind:`Blob data) 100 101 let hash_to_bytes = Hash.to_bytes 102 let hash_to_hex = Hash.to_hex 103 let hash_of_hex s : (hash, [> `Msg of string ]) result = Hash.sha1_of_hex s 104 let hash_equal = Hash.equal 105 let hash_compare = Hash.compare 106 107 (* Commit operations using ocaml-git *) 108 type commit = Git.Commit.t 109 110 let commit_make ~tree ~parents ~author ~committer ~message ~timestamp = 111 let user_of_string s = 112 (* Parse "Name <email>" format *) 113 match String.index_opt s '<' with 114 | None -> Git.User.v ~name:s ~email:"" ~date:timestamp () 115 | Some i -> 116 let name = String.trim (String.sub s 0 i) in 117 let rest = String.sub s (i + 1) (String.length s - i - 1) in 118 let email = 119 match String.index_opt rest '>' with 120 | None -> rest 121 | Some j -> String.sub rest 0 j 122 in 123 Git.User.v ~name ~email ~date:timestamp () 124 in 125 Git.Commit.v ~tree:(git_hash_of_sha1 tree) 126 ~parents:(List.map git_hash_of_sha1 parents) 127 ~author:(user_of_string author) ~committer:(user_of_string committer) 128 (Some message) 129 130 let commit_tree c = sha1_of_git_hash (Git.Commit.tree c) 131 let commit_parents c = List.map sha1_of_git_hash (Git.Commit.parents c) 132 133 let user_to_string u = 134 let name = Git.User.name u in 135 let email = Git.User.email u in 136 if email = "" then name else Printf.sprintf "%s <%s>" name email 137 138 let commit_author c = user_to_string (Git.Commit.author c) 139 let commit_committer c = user_to_string (Git.Commit.committer c) 140 let commit_message c = Option.value ~default:"" (Git.Commit.message c) 141 let commit_timestamp c = Git.User.date (Git.Commit.author c) 142 143 let commit_of_bytes s : (commit, [> `Msg of string ]) result = 144 match Git.Commit.of_string s with 145 | Ok c -> Ok c 146 | Error (`Msg m) -> Error (`Msg m) 147 148 let commit_to_bytes = Git.Commit.to_string 149 let commit_hash c = sha1_of_git_hash (Git.Commit.digest c) 150end 151 152(** ATProto Merkle Search Tree format using ocaml-atp. 153 154 MST uses SHA-256 with 2-bit prefix counting for tree depth. Keys are stored 155 sorted with common prefix compression. Encoded as DAG-CBOR. *) 156module Mst : SHA256 = struct 157 type hash = Hash.sha256 158 159 (* Convert between irmin Hash.sha256 and Atp.Cid.t *) 160 let cid_of_sha256 (h : hash) : Atp.Cid.t = 161 Atp.Cid.of_digest `Dag_cbor (Hash.to_bytes h) 162 163 let sha256_of_cid (cid : Atp.Cid.t) : hash = 164 Hash.sha256_of_bytes (Atp.Cid.digest cid) 165 166 (* Our node wraps Atp.Mst.Raw.node for serialization *) 167 type node = Atp.Mst.Raw.node 168 169 let empty_node : node = { l = None; e = [] } 170 let is_empty (node : node) = node.l = None && node.e = [] 171 172 (* Decompress key from entry list *) 173 let decompress_keys (entries : Atp.Mst.Raw.entry list) : 174 (string * Atp.Mst.Raw.entry) list = 175 let rec loop prev_key acc = function 176 | [] -> List.rev acc 177 | (e : Atp.Mst.Raw.entry) :: rest -> 178 let key = String.sub prev_key 0 e.p ^ e.k in 179 loop key ((key, e) :: acc) rest 180 in 181 loop "" [] entries 182 183 let find (node : node) name = 184 let entries = decompress_keys node.e in 185 match List.find_opt (fun (k, _) -> k = name) entries with 186 | None -> None 187 | Some (_, e) -> 188 (* In MST, all values are content CIDs, subtrees are in 't' field *) 189 Some (`Contents (sha256_of_cid e.v)) 190 191 (* Compress keys for serialization *) 192 let compress_keys entries = 193 let sorted = 194 List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) entries 195 in 196 let rec loop prev_key acc = function 197 | [] -> List.rev acc 198 | (key, (v, t)) :: rest -> 199 let p = 200 let rec shared i = 201 if i >= String.length prev_key || i >= String.length key then i 202 else if prev_key.[i] = key.[i] then shared (i + 1) 203 else i 204 in 205 shared 0 206 in 207 let k = String.sub key p (String.length key - p) in 208 let entry : Atp.Mst.Raw.entry = { p; k; v; t } in 209 loop key (entry :: acc) rest 210 in 211 loop "" [] sorted 212 213 let add (node : node) name kind = 214 let entries = decompress_keys node.e in 215 let v, t = 216 match kind with 217 | `Contents h -> (cid_of_sha256 h, None) 218 | `Node h -> (cid_of_sha256 h, None) 219 (* TODO: Handle subtree pointers *) 220 in 221 let _ = t in 222 (* suppress unused warning *) 223 let entries = List.filter (fun (k, _) -> k <> name) entries in 224 let entries = 225 (name, (v, None)) 226 :: List.map (fun (k, (e : Atp.Mst.Raw.entry)) -> (k, (e.v, e.t))) entries 227 in 228 let compressed = compress_keys entries in 229 { node with e = compressed } 230 231 let remove (node : node) name = 232 let entries = decompress_keys node.e in 233 let entries = List.filter (fun (k, _) -> k <> name) entries in 234 let entries = 235 List.map (fun (k, (e : Atp.Mst.Raw.entry)) -> (k, (e.v, e.t))) entries 236 in 237 let compressed = compress_keys entries in 238 { node with e = compressed } 239 240 let list (node : node) = 241 let entries = decompress_keys node.e in 242 List.map 243 (fun (key, (e : Atp.Mst.Raw.entry)) -> 244 (key, `Contents (sha256_of_cid e.v))) 245 entries 246 247 let bytes_of_node node = Atp.Mst.Raw.encode_bytes node 248 249 let node_of_bytes data : (node, [> `Msg of string ]) result = 250 try Ok (Atp.Mst.Raw.decode_bytes data) 251 with _ -> Error (`Msg "failed to decode MST node") 252 253 let hash_node node = 254 let data = bytes_of_node node in 255 Hash.sha256 data 256 257 let hash_contents data = Hash.sha256 data 258 let hash_to_bytes = Hash.to_bytes 259 let hash_to_hex = Hash.to_hex 260 let hash_of_hex s : (hash, [> `Msg of string ]) result = Hash.sha256_of_hex s 261 let hash_equal = Hash.equal 262 let hash_compare = Hash.compare 263 264 (* Commit operations for MST format using DAG-CBOR *) 265 type commit = { 266 tree : hash; 267 parents : hash list; 268 author : string; 269 committer : string; 270 message : string; 271 timestamp : int64; 272 } 273 274 let commit_make ~tree ~parents ~author ~committer ~message ~timestamp = 275 { tree; parents; author; committer; message; timestamp } 276 277 let commit_tree c = c.tree 278 let commit_parents c = c.parents 279 let commit_author c = c.author 280 let commit_committer c = c.committer 281 let commit_message c = c.message 282 let commit_timestamp c = c.timestamp 283 284 let commit_of_bytes s : (commit, [> `Msg of string ]) result = 285 try 286 let v = Atp.Dagcbor.decode_string ~cid_format:`Atproto s in 287 match v with 288 | `Map fields -> 289 let get_string key = 290 match List.assoc_opt key fields with 291 | Some (`String s) -> s 292 | _ -> "" 293 in 294 let get_int64 key = 295 match List.assoc_opt key fields with Some (`Int i) -> i | _ -> 0L 296 in 297 let get_link key = 298 match List.assoc_opt key fields with 299 | Some (`Link cid) -> sha256_of_cid cid 300 | _ -> Hash.sha256 "" 301 in 302 let get_links key = 303 match List.assoc_opt key fields with 304 | Some (`List links) -> 305 List.filter_map 306 (function `Link cid -> Some (sha256_of_cid cid) | _ -> None) 307 links 308 | _ -> [] 309 in 310 Ok 311 { 312 tree = get_link "tree"; 313 parents = get_links "parents"; 314 author = get_string "author"; 315 committer = get_string "committer"; 316 message = get_string "message"; 317 timestamp = get_int64 "timestamp"; 318 } 319 | _ -> Error (`Msg "expected map for commit") 320 with Eio.Io _ as e -> Error (`Msg (Printexc.to_string e)) 321 322 let commit_to_bytes c = 323 let v : Atp.Dagcbor.value = 324 `Map 325 [ 326 ("author", `String c.author); 327 ("committer", `String c.committer); 328 ("message", `String c.message); 329 ( "parents", 330 `List (List.map (fun h -> `Link (cid_of_sha256 h)) c.parents) ); 331 ("timestamp", `Int c.timestamp); 332 ("tree", `Link (cid_of_sha256 c.tree)); 333 ] 334 in 335 Atp.Dagcbor.encode_string ~cid_format:`Atproto v 336 337 let commit_hash c = 338 let data = commit_to_bytes c in 339 Hash.sha256 data 340end