Git object storage and pack files for Eio
at main 242 lines 8.1 kB view raw
1(* Copyright (c) 2013-2017 Thomas Gazagnaire <thomas@gazagnaire.org> 2 Copyright (c) 2017-2024 Romain Calascibetta <romain.calascibetta@gmail.com> 3 Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org> 4 5 Permission to use, copy, modify, and distribute this software for any 6 purpose with or without fee is hereby granted, provided that the above 7 copyright notice and this permission notice appear in all copies. 8 9 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) 16 17(** Git values (union of all object types). *) 18 19module Reader = Bytesrw.Bytes.Reader 20module Writer = Bytesrw.Bytes.Writer 21module Slice = Bytesrw.Bytes.Slice 22 23type t = Blob of Blob.t | Commit of Commit.t | Tree of Tree.t | Tag of Tag.t 24 25let blob b = Blob b 26let commit c = Commit c 27let tree t = Tree t 28let tag t = Tag t 29 30let kind = function 31 | Blob _ -> `Blob 32 | Commit _ -> `Commit 33 | Tree _ -> `Tree 34 | Tag _ -> `Tag 35 36let pp ppf = function 37 | Blob b -> Fmt.pf ppf "(Blob %a)" Blob.pp b 38 | Commit c -> Fmt.pf ppf "(Commit %a)" Commit.pp c 39 | Tree t -> Fmt.pf ppf "(Tree %a)" Tree.pp t 40 | Tag t -> Fmt.pf ppf "(Tag %a)" Tag.pp t 41 42let equal a b = 43 match (a, b) with 44 | Blob a, Blob b -> Blob.equal a b 45 | Commit a, Commit b -> Commit.equal a b 46 | Tree a, Tree b -> Tree.equal a b 47 | Tag a, Tag b -> Tag.equal a b 48 | _ -> false 49 50let compare a b = 51 match (a, b) with 52 | Blob a, Blob b -> Blob.compare a b 53 | Commit a, Commit b -> Commit.compare a b 54 | Tree a, Tree b -> Tree.compare a b 55 | Tag a, Tag b -> Tag.compare a b 56 | Blob _, _ -> -1 57 | _, Blob _ -> 1 58 | Commit _, _ -> -1 59 | _, Commit _ -> 1 60 | Tree _, _ -> -1 61 | _, Tree _ -> 1 62 63let hash = function 64 | Blob b -> Blob.hash b 65 | Commit c -> Commit.hash c 66 | Tree t -> Tree.hash t 67 | Tag t -> Tag.hash t 68 69let digest = function 70 | Blob b -> Blob.digest b 71 | Commit c -> Commit.digest c 72 | Tree t -> Tree.digest t 73 | Tag t -> Tag.digest t 74 75(** Get the raw content of a value (without header). *) 76let to_string_without_header = function 77 | Blob b -> Blob.to_string b 78 | Commit c -> Commit.to_string c 79 | Tree t -> Tree.to_string t 80 | Tag t -> Tag.to_string t 81 82let length v = String.length (to_string_without_header v) 83 84(** Get the raw content of a value with git header. Format: "type 85 length\x00content" *) 86let to_string v = 87 let content = to_string_without_header v in 88 let kind_str = 89 match kind v with 90 | `Blob -> "blob" 91 | `Commit -> "commit" 92 | `Tree -> "tree" 93 | `Tag -> "tag" 94 in 95 Fmt.str "%s %d\x00%s" kind_str (String.length content) content 96 97(** Parse a value from raw content (without header). *) 98let of_string ~kind content = 99 match kind with 100 | `Blob -> Ok (Blob (Blob.of_string content)) 101 | `Commit -> Result.map commit (Commit.of_string content) 102 | `Tree -> Result.map tree (Tree.of_string content) 103 | `Tag -> Result.map tag (Tag.of_string content) 104 105let of_string_exn ~kind content = 106 match of_string ~kind content with Ok v -> v | Error (`Msg m) -> failwith m 107 108(** Parse a value from raw content with git header. *) 109let of_string_with_header s = 110 (* Find the space after type *) 111 match String.index_opt s ' ' with 112 | None -> Error (`Msg "Invalid git object: missing space after type") 113 | Some sp_pos -> ( 114 let type_str = String.sub s 0 sp_pos in 115 (* Find the null byte after length *) 116 match String.index_from_opt s (sp_pos + 1) '\x00' with 117 | None -> Error (`Msg "Invalid git object: missing null after length") 118 | Some null_pos -> ( 119 let length_str = String.sub s (sp_pos + 1) (null_pos - sp_pos - 1) in 120 let content_start = null_pos + 1 in 121 let content = 122 String.sub s content_start (String.length s - content_start) 123 in 124 (* Verify length *) 125 match int_of_string_opt length_str with 126 | None -> Error (`Msg ("Invalid length: " ^ length_str)) 127 | Some expected_len -> 128 if String.length content <> expected_len then 129 Error 130 (`Msg 131 (Fmt.str "Length mismatch: expected %d, got %d" 132 expected_len (String.length content))) 133 else 134 let kind = 135 match type_str with 136 | "blob" -> Ok `Blob 137 | "commit" -> Ok `Commit 138 | "tree" -> Ok `Tree 139 | "tag" -> Ok `Tag 140 | _ -> Error (`Msg ("Unknown object type: " ^ type_str)) 141 in 142 Result.bind kind (fun kind -> of_string ~kind content))) 143 144let of_string_with_header_exn s = 145 match of_string_with_header s with Ok v -> v | Error (`Msg m) -> failwith m 146 147(** {1 Bytesrw support} *) 148 149(** Read until a specific byte is found *) 150let read_until reader byte = 151 let buf = Buffer.create 64 in 152 let rec loop () = 153 match Reader.read reader with 154 | slice when Slice.is_eod slice -> Error (`Msg "unexpected end of data") 155 | slice -> ( 156 let str = Slice.to_string slice in 157 match String.index_opt str byte with 158 | Some pos -> 159 Buffer.add_substring buf str 0 pos; 160 if pos + 1 < String.length str then begin 161 let leftover = 162 String.sub str (pos + 1) (String.length str - pos - 1) 163 in 164 Reader.push_back reader (Slice.of_string leftover) 165 end; 166 Ok (Buffer.contents buf) 167 | None -> 168 Buffer.add_string buf str; 169 loop ()) 170 in 171 loop () 172 173(** Read a git object from a reader. Returns (kind, length, content). *) 174let read_header reader = 175 let open Result.Syntax in 176 let* type_str = read_until reader ' ' in 177 let* length_str = read_until reader '\x00' in 178 let* kind = 179 match type_str with 180 | "blob" -> Ok `Blob 181 | "commit" -> Ok `Commit 182 | "tree" -> Ok `Tree 183 | "tag" -> Ok `Tag 184 | _ -> Error (`Msg ("Unknown object type: " ^ type_str)) 185 in 186 let* length = 187 match int_of_string_opt length_str with 188 | Some n -> Ok n 189 | None -> Error (`Msg ("Invalid length: " ^ length_str)) 190 in 191 Ok (kind, length) 192 193(** Parse from a reader, dispatching to reader-based parsers for commit and tree 194 to avoid materialising the full content string. Blobs and tags still go 195 through [of_string] (blobs are opaque bytes; tags are rare). *) 196let of_reader ~kind reader = 197 match kind with 198 | `Commit -> Result.map commit (Commit.of_reader reader) 199 | `Tree -> Result.map tree (Tree.of_reader reader) 200 | `Blob -> 201 let content = Reader.to_string reader in 202 Ok (Blob (Blob.of_string content)) 203 | `Tag -> 204 let content = Reader.to_string reader in 205 Result.map tag (Tag.of_string content) 206 207(** Read a git object from a bytesrw reader. *) 208let read reader = 209 let open Result.Syntax in 210 let* kind, _length = read_header reader in 211 of_reader ~kind reader 212 213(** Write a git object to a bytesrw writer. *) 214let write writer v = 215 let content = to_string_without_header v in 216 let kind_str = 217 match kind v with 218 | `Blob -> "blob" 219 | `Commit -> "commit" 220 | `Tree -> "tree" 221 | `Tag -> "tag" 222 in 223 let header = Fmt.str "%s %d\x00" kind_str (String.length content) in 224 Writer.write writer (Slice.of_string header); 225 Writer.write writer (Slice.of_string content) 226 227(** Write only the content (without header) to a bytesrw writer. *) 228let write_content writer v = 229 let content = to_string_without_header v in 230 Writer.write writer (Slice.of_string content) 231 232module Set = Set.Make (struct 233 type nonrec t = t 234 235 let compare = compare 236end) 237 238module Map = Map.Make (struct 239 type nonrec t = t 240 241 let compare = compare 242end)