Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
at main 341 lines 11 kB view raw
1module type S = sig 2 type node 3 type hash 4 5 val hash_node : node -> hash 6 val hash_contents : string -> hash 7 val node_of_bytes : string -> (node, [> `Msg of string ]) result 8 val bytes_of_node : node -> string 9 val empty_node : node 10 val find : node -> string -> [ `Node of hash | `Contents of hash ] option 11 val add : node -> string -> [ `Node of hash | `Contents of hash ] -> node 12 val remove : node -> string -> node 13 val list : node -> (string * [ `Node of hash | `Contents of hash ]) list 14 val is_empty : node -> bool 15 16 (* Hash operations *) 17 val hash_to_bytes : hash -> string 18 val hash_to_hex : hash -> string 19 val hash_of_hex : string -> (hash, [> `Msg of string ]) result 20 val hash_equal : hash -> hash -> bool 21 val hash_compare : hash -> hash -> int 22 23 (* Commit operations *) 24 type commit 25 26 val commit_make : 27 tree:hash -> 28 parents:hash list -> 29 author:string -> 30 committer:string -> 31 message:string -> 32 timestamp:int64 -> 33 commit 34 35 val commit_tree : commit -> hash 36 val commit_parents : commit -> hash list 37 val commit_author : commit -> string 38 val commit_committer : commit -> string 39 val commit_message : commit -> string 40 val commit_timestamp : commit -> int64 41 val commit_of_bytes : string -> (commit, [> `Msg of string ]) result 42 val commit_to_bytes : commit -> string 43 val commit_hash : commit -> hash 44end 45 46module type SHA1 = S with type hash = Hash.sha1 47module type SHA256 = S with type hash = Hash.sha256 48 49(** Git tree object format using ocaml-git. *) 50module Git : SHA1 = struct 51 type hash = Hash.sha1 52 type node = Git.Tree.t 53 54 (* Convert between irmin Hash.sha1 and Git.Hash.t *) 55 let git_hash_of_sha1 (h : hash) : Git.Hash.t = 56 Git.Hash.of_raw_string (Hash.to_bytes h) 57 58 let sha1_of_git_hash (h : Git.Hash.t) : hash = 59 Hash.sha1_of_bytes (Git.Hash.to_raw_string h) 60 61 let empty_node = Git.Tree.empty 62 let is_empty = Git.Tree.is_empty 63 64 let find node name = 65 match Git.Tree.find ~name node with 66 | None -> None 67 | Some entry -> ( 68 let h = sha1_of_git_hash entry.hash in 69 match entry.perm with `Dir -> Some (`Node h) | _ -> Some (`Contents h)) 70 71 let add node name kind = 72 let perm, hash = 73 match kind with 74 | `Node h -> (`Dir, git_hash_of_sha1 h) 75 | `Contents h -> (`Normal, git_hash_of_sha1 h) 76 in 77 let entry = Git.Tree.entry ~perm ~name hash in 78 Git.Tree.add entry node 79 80 let remove node name = Git.Tree.remove ~name node 81 82 let list node = 83 Git.Tree.to_list node 84 |> List.map (fun (entry : Git.Tree.entry) -> 85 let h = sha1_of_git_hash entry.hash in 86 let kind = match entry.perm with `Dir -> `Node h | _ -> `Contents h in 87 (entry.name, kind)) 88 89 let bytes_of_node = Git.Tree.to_string 90 91 let node_of_bytes s : (node, [> `Msg of string ]) result = 92 match Git.Tree.of_string s with 93 | Ok n -> Ok n 94 | Error (`Msg m) -> Error (`Msg m) 95 96 let hash_node node = sha1_of_git_hash (Git.Tree.digest node) 97 98 let hash_contents data = 99 sha1_of_git_hash (Git.Hash.digest_string ~kind:`Blob data) 100 101 let hash_to_bytes = Hash.to_bytes 102 let hash_to_hex = Hash.to_hex 103 let hash_of_hex s : (hash, [> `Msg of string ]) result = Hash.sha1_of_hex s 104 let hash_equal = Hash.equal 105 let hash_compare = Hash.compare 106 107 (* Commit operations using ocaml-git *) 108 type commit = Git.Commit.t 109 110 let commit_make ~tree ~parents ~author ~committer ~message ~timestamp = 111 let user_of_string s = 112 (* Parse "Name <email>" format *) 113 match String.index_opt s '<' with 114 | None -> Git.User.v ~name:s ~email:"" ~date:timestamp () 115 | Some i -> 116 let name = String.trim (String.sub s 0 i) in 117 let rest = String.sub s (i + 1) (String.length s - i - 1) in 118 let email = 119 match String.index_opt rest '>' with 120 | None -> rest 121 | Some j -> String.sub rest 0 j 122 in 123 Git.User.v ~name ~email ~date:timestamp () 124 in 125 Git.Commit.v ~tree:(git_hash_of_sha1 tree) 126 ~parents:(List.map git_hash_of_sha1 parents) 127 ~author:(user_of_string author) ~committer:(user_of_string committer) 128 (Some message) 129 130 let commit_tree c = sha1_of_git_hash (Git.Commit.tree c) 131 let commit_parents c = List.map sha1_of_git_hash (Git.Commit.parents c) 132 133 let user_to_string u = 134 let name = Git.User.name u in 135 let email = Git.User.email u in 136 if email = "" then name else Fmt.str "%s <%s>" name email 137 138 let commit_author c = user_to_string (Git.Commit.author c) 139 let commit_committer c = user_to_string (Git.Commit.committer c) 140 let commit_message c = Option.value ~default:"" (Git.Commit.message c) 141 let commit_timestamp c = Git.User.date (Git.Commit.author c) 142 143 let commit_of_bytes s : (commit, [> `Msg of string ]) result = 144 match Git.Commit.of_string s with 145 | Ok c -> Ok c 146 | Error (`Msg m) -> Error (`Msg m) 147 148 let commit_to_bytes = Git.Commit.to_string 149 let commit_hash c = sha1_of_git_hash (Git.Commit.digest c) 150end 151 152(** ATProto Merkle Search Tree format using ocaml-atp. 153 154 MST uses SHA-256 with 2-bit prefix counting for tree depth. Keys are stored 155 sorted with common prefix compression. Encoded as DAG-CBOR. *) 156module Mst : SHA256 = struct 157 type hash = Hash.sha256 158 159 (* Convert between irmin Hash.sha256 and Atp.Cid.t *) 160 let cid_of_sha256 (h : hash) : Atp.Cid.t = 161 Atp.Cid.of_digest `Dag_cbor (Hash.to_bytes h) 162 163 let sha256_of_cid (cid : Atp.Cid.t) : hash = 164 Hash.sha256_of_bytes (Atp.Cid.digest cid) 165 166 (* Our node wraps Atp.Mst.Raw.node for serialization *) 167 type node = Atp.Mst.Raw.node 168 169 let empty_node : node = { l = None; e = [] } 170 let is_empty (node : node) = node.l = None && node.e = [] 171 172 (* Decompress key from entry list *) 173 let decompress_keys (entries : Atp.Mst.Raw.entry list) : 174 (string * Atp.Mst.Raw.entry) list = 175 let rec loop prev_key acc = function 176 | [] -> List.rev acc 177 | (e : Atp.Mst.Raw.entry) :: rest -> 178 let key = String.sub prev_key 0 e.p ^ e.k in 179 loop key ((key, e) :: acc) rest 180 in 181 loop "" [] entries 182 183 let find (node : node) name = 184 let entries = decompress_keys node.e in 185 match List.find_opt (fun (k, _) -> k = name) entries with 186 | None -> None 187 | Some (_, e) -> 188 (* In MST, all values are content CIDs, subtrees are in 't' field *) 189 Some (`Contents (sha256_of_cid e.v)) 190 191 (* Compress keys for serialization *) 192 let compress_keys entries = 193 let sorted = 194 List.sort (fun (k1, _) (k2, _) -> String.compare k1 k2) entries 195 in 196 let rec loop prev_key acc = function 197 | [] -> List.rev acc 198 | (key, (v, t)) :: rest -> 199 let p = 200 let rec shared i = 201 if i >= String.length prev_key || i >= String.length key then i 202 else if prev_key.[i] = key.[i] then shared (i + 1) 203 else i 204 in 205 shared 0 206 in 207 let k = String.sub key p (String.length key - p) in 208 let entry : Atp.Mst.Raw.entry = { p; k; v; t } in 209 loop key (entry :: acc) rest 210 in 211 loop "" [] sorted 212 213 let add (node : node) name kind = 214 let entries = decompress_keys node.e in 215 let v, t = 216 match kind with 217 | `Contents h -> (cid_of_sha256 h, None) 218 | `Node h -> (cid_of_sha256 h, None) 219 (* TODO: Handle subtree pointers *) 220 in 221 let _ = t in 222 (* suppress unused warning *) 223 let entries = List.filter (fun (k, _) -> k <> name) entries in 224 let entries = 225 (name, (v, None)) 226 :: List.map (fun (k, (e : Atp.Mst.Raw.entry)) -> (k, (e.v, e.t))) entries 227 in 228 let compressed = compress_keys entries in 229 { node with e = compressed } 230 231 let remove (node : node) name = 232 let entries = decompress_keys node.e in 233 let entries = List.filter (fun (k, _) -> k <> name) entries in 234 let entries = 235 List.map (fun (k, (e : Atp.Mst.Raw.entry)) -> (k, (e.v, e.t))) entries 236 in 237 let compressed = compress_keys entries in 238 { node with e = compressed } 239 240 let list (node : node) = 241 let entries = decompress_keys node.e in 242 List.map 243 (fun (key, (e : Atp.Mst.Raw.entry)) -> 244 (key, `Contents (sha256_of_cid e.v))) 245 entries 246 247 let bytes_of_node node = Atp.Mst.Raw.encode_bytes node 248 249 let node_of_bytes data : (node, [> `Msg of string ]) result = 250 try Ok (Atp.Mst.Raw.decode_bytes data) 251 with exn -> 252 Error (`Msg ("failed to decode MST node: " ^ Printexc.to_string exn)) 253 254 let hash_node node = 255 let data = bytes_of_node node in 256 Hash.sha256 data 257 258 let hash_contents data = Hash.sha256 data 259 let hash_to_bytes = Hash.to_bytes 260 let hash_to_hex = Hash.to_hex 261 let hash_of_hex s : (hash, [> `Msg of string ]) result = Hash.sha256_of_hex s 262 let hash_equal = Hash.equal 263 let hash_compare = Hash.compare 264 265 (* Commit operations for MST format using DAG-CBOR *) 266 type commit = { 267 tree : hash; 268 parents : hash list; 269 author : string; 270 committer : string; 271 message : string; 272 timestamp : int64; 273 } 274 275 let commit_make ~tree ~parents ~author ~committer ~message ~timestamp = 276 { tree; parents; author; committer; message; timestamp } 277 278 let commit_tree c = c.tree 279 let commit_parents c = c.parents 280 let commit_author c = c.author 281 let commit_committer c = c.committer 282 let commit_message c = c.message 283 let commit_timestamp c = c.timestamp 284 285 let commit_of_bytes s : (commit, [> `Msg of string ]) result = 286 try 287 let v = Atp.Dagcbor.decode_string ~cid_format:`Atproto s in 288 match v with 289 | `Map fields -> 290 let get_string key = 291 match List.assoc_opt key fields with 292 | Some (`String s) -> s 293 | _ -> "" 294 in 295 let get_int64 key = 296 match List.assoc_opt key fields with Some (`Int i) -> i | _ -> 0L 297 in 298 let get_link key = 299 match List.assoc_opt key fields with 300 | Some (`Link cid) -> sha256_of_cid cid 301 | _ -> Hash.sha256 "" 302 in 303 let get_links key = 304 match List.assoc_opt key fields with 305 | Some (`List links) -> 306 List.filter_map 307 (function `Link cid -> Some (sha256_of_cid cid) | _ -> None) 308 links 309 | _ -> [] 310 in 311 Ok 312 { 313 tree = get_link "tree"; 314 parents = get_links "parents"; 315 author = get_string "author"; 316 committer = get_string "committer"; 317 message = get_string "message"; 318 timestamp = get_int64 "timestamp"; 319 } 320 | _ -> Error (`Msg "expected map for commit") 321 with Eio.Io _ as e -> Error (`Msg (Printexc.to_string e)) 322 323 let commit_to_bytes c = 324 let v : Atp.Dagcbor.value = 325 `Map 326 [ 327 ("author", `String c.author); 328 ("committer", `String c.committer); 329 ("message", `String c.message); 330 ( "parents", 331 `List (List.map (fun h -> `Link (cid_of_sha256 h)) c.parents) ); 332 ("timestamp", `Int c.timestamp); 333 ("tree", `Link (cid_of_sha256 c.tree)); 334 ] 335 in 336 Atp.Dagcbor.encode_string ~cid_format:`Atproto v 337 338 let commit_hash c = 339 let data = commit_to_bytes c in 340 Hash.sha256 data 341end