Git object storage and pack files for Eio
1(* Copyright (c) 2013-2017 Thomas Gazagnaire <thomas@gazagnaire.org>
2 Copyright (c) 2017-2024 Romain Calascibetta <romain.calascibetta@gmail.com>
3 Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org>
4
5 Permission to use, copy, modify, and distribute this software for any
6 purpose with or without fee is hereby granted, provided that the above
7 copyright notice and this permission notice appear in all copies.
8
9 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *)
16
17(** Git values (union of all object types). *)
18
19module Reader = Bytesrw.Bytes.Reader
20module Writer = Bytesrw.Bytes.Writer
21module Slice = Bytesrw.Bytes.Slice
22
23type t = Blob of Blob.t | Commit of Commit.t | Tree of Tree.t | Tag of Tag.t
24
25let blob b = Blob b
26let commit c = Commit c
27let tree t = Tree t
28let tag t = Tag t
29
30let kind = function
31 | Blob _ -> `Blob
32 | Commit _ -> `Commit
33 | Tree _ -> `Tree
34 | Tag _ -> `Tag
35
36let pp ppf = function
37 | Blob b -> Fmt.pf ppf "(Blob %a)" Blob.pp b
38 | Commit c -> Fmt.pf ppf "(Commit %a)" Commit.pp c
39 | Tree t -> Fmt.pf ppf "(Tree %a)" Tree.pp t
40 | Tag t -> Fmt.pf ppf "(Tag %a)" Tag.pp t
41
42let equal a b =
43 match (a, b) with
44 | Blob a, Blob b -> Blob.equal a b
45 | Commit a, Commit b -> Commit.equal a b
46 | Tree a, Tree b -> Tree.equal a b
47 | Tag a, Tag b -> Tag.equal a b
48 | _ -> false
49
50let compare a b =
51 match (a, b) with
52 | Blob a, Blob b -> Blob.compare a b
53 | Commit a, Commit b -> Commit.compare a b
54 | Tree a, Tree b -> Tree.compare a b
55 | Tag a, Tag b -> Tag.compare a b
56 | Blob _, _ -> -1
57 | _, Blob _ -> 1
58 | Commit _, _ -> -1
59 | _, Commit _ -> 1
60 | Tree _, _ -> -1
61 | _, Tree _ -> 1
62
63let hash = function
64 | Blob b -> Blob.hash b
65 | Commit c -> Commit.hash c
66 | Tree t -> Tree.hash t
67 | Tag t -> Tag.hash t
68
69let digest = function
70 | Blob b -> Blob.digest b
71 | Commit c -> Commit.digest c
72 | Tree t -> Tree.digest t
73 | Tag t -> Tag.digest t
74
75(** Get the raw content of a value (without header). *)
76let to_string_without_header = function
77 | Blob b -> Blob.to_string b
78 | Commit c -> Commit.to_string c
79 | Tree t -> Tree.to_string t
80 | Tag t -> Tag.to_string t
81
82let length v = String.length (to_string_without_header v)
83
84(** Get the raw content of a value with git header. Format: "type
85 length\x00content" *)
86let to_string v =
87 let content = to_string_without_header v in
88 let kind_str =
89 match kind v with
90 | `Blob -> "blob"
91 | `Commit -> "commit"
92 | `Tree -> "tree"
93 | `Tag -> "tag"
94 in
95 Fmt.str "%s %d\x00%s" kind_str (String.length content) content
96
97(** Parse a value from raw content (without header). *)
98let of_string ~kind content =
99 match kind with
100 | `Blob -> Ok (Blob (Blob.of_string content))
101 | `Commit -> Result.map commit (Commit.of_string content)
102 | `Tree -> Result.map tree (Tree.of_string content)
103 | `Tag -> Result.map tag (Tag.of_string content)
104
105let of_string_exn ~kind content =
106 match of_string ~kind content with Ok v -> v | Error (`Msg m) -> failwith m
107
108(** Parse a value from raw content with git header. *)
109let of_string_with_header s =
110 (* Find the space after type *)
111 match String.index_opt s ' ' with
112 | None -> Error (`Msg "Invalid git object: missing space after type")
113 | Some sp_pos -> (
114 let type_str = String.sub s 0 sp_pos in
115 (* Find the null byte after length *)
116 match String.index_from_opt s (sp_pos + 1) '\x00' with
117 | None -> Error (`Msg "Invalid git object: missing null after length")
118 | Some null_pos -> (
119 let length_str = String.sub s (sp_pos + 1) (null_pos - sp_pos - 1) in
120 let content_start = null_pos + 1 in
121 let content =
122 String.sub s content_start (String.length s - content_start)
123 in
124 (* Verify length *)
125 match int_of_string_opt length_str with
126 | None -> Error (`Msg ("Invalid length: " ^ length_str))
127 | Some expected_len ->
128 if String.length content <> expected_len then
129 Error
130 (`Msg
131 (Fmt.str "Length mismatch: expected %d, got %d"
132 expected_len (String.length content)))
133 else
134 let kind =
135 match type_str with
136 | "blob" -> Ok `Blob
137 | "commit" -> Ok `Commit
138 | "tree" -> Ok `Tree
139 | "tag" -> Ok `Tag
140 | _ -> Error (`Msg ("Unknown object type: " ^ type_str))
141 in
142 Result.bind kind (fun kind -> of_string ~kind content)))
143
144let of_string_with_header_exn s =
145 match of_string_with_header s with Ok v -> v | Error (`Msg m) -> failwith m
146
147(** {1 Bytesrw support} *)
148
149(** Read until a specific byte is found *)
150let read_until reader byte =
151 let buf = Buffer.create 64 in
152 let rec loop () =
153 match Reader.read reader with
154 | slice when Slice.is_eod slice -> Error (`Msg "unexpected end of data")
155 | slice -> (
156 let str = Slice.to_string slice in
157 match String.index_opt str byte with
158 | Some pos ->
159 Buffer.add_substring buf str 0 pos;
160 if pos + 1 < String.length str then begin
161 let leftover =
162 String.sub str (pos + 1) (String.length str - pos - 1)
163 in
164 Reader.push_back reader (Slice.of_string leftover)
165 end;
166 Ok (Buffer.contents buf)
167 | None ->
168 Buffer.add_string buf str;
169 loop ())
170 in
171 loop ()
172
173(** Read a git object from a reader. Returns (kind, length, content). *)
174let read_header reader =
175 let open Result.Syntax in
176 let* type_str = read_until reader ' ' in
177 let* length_str = read_until reader '\x00' in
178 let* kind =
179 match type_str with
180 | "blob" -> Ok `Blob
181 | "commit" -> Ok `Commit
182 | "tree" -> Ok `Tree
183 | "tag" -> Ok `Tag
184 | _ -> Error (`Msg ("Unknown object type: " ^ type_str))
185 in
186 let* length =
187 match int_of_string_opt length_str with
188 | Some n -> Ok n
189 | None -> Error (`Msg ("Invalid length: " ^ length_str))
190 in
191 Ok (kind, length)
192
193(** Parse from a reader, dispatching to reader-based parsers for commit and tree
194 to avoid materialising the full content string. Blobs and tags still go
195 through [of_string] (blobs are opaque bytes; tags are rare). *)
196let of_reader ~kind reader =
197 match kind with
198 | `Commit -> Result.map commit (Commit.of_reader reader)
199 | `Tree -> Result.map tree (Tree.of_reader reader)
200 | `Blob ->
201 let content = Reader.to_string reader in
202 Ok (Blob (Blob.of_string content))
203 | `Tag ->
204 let content = Reader.to_string reader in
205 Result.map tag (Tag.of_string content)
206
207(** Read a git object from a bytesrw reader. *)
208let read reader =
209 let open Result.Syntax in
210 let* kind, _length = read_header reader in
211 of_reader ~kind reader
212
213(** Write a git object to a bytesrw writer. *)
214let write writer v =
215 let content = to_string_without_header v in
216 let kind_str =
217 match kind v with
218 | `Blob -> "blob"
219 | `Commit -> "commit"
220 | `Tree -> "tree"
221 | `Tag -> "tag"
222 in
223 let header = Fmt.str "%s %d\x00" kind_str (String.length content) in
224 Writer.write writer (Slice.of_string header);
225 Writer.write writer (Slice.of_string content)
226
227(** Write only the content (without header) to a bytesrw writer. *)
228let write_content writer v =
229 let content = to_string_without_header v in
230 Writer.write writer (Slice.of_string content)
231
232module Set = Set.Make (struct
233 type nonrec t = t
234
235 let compare = compare
236end)
237
238module Map = Map.Make (struct
239 type nonrec t = t
240
241 let compare = compare
242end)