Git object storage and pack files for Eio
1(* Copyright (c) 2013-2017 Thomas Gazagnaire <thomas@gazagnaire.org>
2 Copyright (c) 2017-2024 Romain Calascibetta <romain.calascibetta@gmail.com>
3 Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org>
4
5 Permission to use, copy, modify, and distribute this software for any
6 purpose with or without fee is hereby granted, provided that the above
7 copyright notice and this permission notice appear in all copies.
8
9 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *)
16
17(** Git tree objects (directory listings). *)
18
19type perm = [ `Normal | `Everybody | `Exec | `Link | `Dir | `Commit ]
20
21let perm_to_string = function
22 | `Normal -> "100644"
23 | `Everybody -> "100664"
24 | `Exec -> "100755"
25 | `Link -> "120000"
26 | `Dir -> "40000"
27 | `Commit -> "160000"
28
29let perm_of_string = function
30 | "44" | "100644" -> Ok `Normal
31 | "100664" -> Ok `Everybody
32 | "100755" -> Ok `Exec
33 | "120000" -> Ok `Link
34 | "40000" | "040000" -> Ok `Dir
35 | "160000" -> Ok `Commit
36 | v -> Error (`Msg ("Invalid permission: " ^ v))
37
38let perm_of_string_exn s =
39 match perm_of_string s with Ok p -> p | Error (`Msg m) -> invalid_arg m
40
41type entry = { perm : perm; name : string; hash : Hash.t }
42
43let entry ~perm ~name hash =
44 if String.contains name '\x00' then invalid_arg "Invalid entry name";
45 { perm; name; hash }
46
47let pp_entry ppf { perm; name; hash } =
48 Fmt.pf ppf "%s %s %a" (perm_to_string perm) name Hash.pp hash
49
50type t = entry list
51
52let empty = []
53let is_empty t = t = []
54
55let v entries =
56 (* Git sorts tree entries in a specific way *)
57 let name_with_suffix e =
58 match e.perm with `Dir -> e.name ^ "/" | _ -> e.name
59 in
60 List.sort
61 (fun a b -> String.compare (name_with_suffix a) (name_with_suffix b))
62 entries
63
64let add entry t =
65 v (entry :: List.filter (fun e -> not (String.equal e.name entry.name)) t)
66
67let remove ~name t = List.filter (fun e -> not (String.equal e.name name)) t
68let find ~name t = List.find_opt (fun e -> String.equal e.name name) t
69let to_list t = t
70let of_list entries = v entries
71let hashes t = List.map (fun e -> e.hash) t
72let iter f t = List.iter f t
73let pp ppf t = Fmt.(list ~sep:(any "@,") pp_entry) ppf t
74
75let equal a b = List.equal (fun x y -> compare x y = 0) a b
76
77and compare a b =
78 let rec loop a b =
79 match (a, b) with
80 | [], [] -> 0
81 | [], _ -> -1
82 | _, [] -> 1
83 | x :: xs, y :: ys ->
84 let c = String.compare x.name y.name in
85 if c <> 0 then c else loop xs ys
86 in
87 loop a b
88
89let hash t = Hashtbl.hash t
90
91(** Encode tree to git format. Format: "mode name\x00<20-byte-hash>" repeated *)
92let to_string t =
93 let buf = Buffer.create 256 in
94 List.iter
95 (fun { perm; name; hash } ->
96 (* Git uses octal mode without leading zeros for dirs *)
97 let mode = match perm with `Dir -> "40000" | _ -> perm_to_string perm in
98 Buffer.add_string buf mode;
99 Buffer.add_char buf ' ';
100 Buffer.add_string buf name;
101 Buffer.add_char buf '\x00';
102 Buffer.add_string buf (Hash.to_raw_string hash))
103 t;
104 Buffer.contents buf
105
106(** Parse tree from git format. *)
107let of_string s =
108 let len = String.length s in
109 let rec loop acc pos =
110 if pos >= len then Ok (v (List.rev acc))
111 else
112 (* Find the space after mode *)
113 match String.index_from_opt s pos ' ' with
114 | None -> Error (`Msg "Invalid tree: missing space after mode")
115 | Some sp_pos -> (
116 let mode = String.sub s pos (sp_pos - pos) in
117 match perm_of_string mode with
118 | Error _ as e -> e
119 | Ok perm -> (
120 (* Find the null byte after name *)
121 match String.index_from_opt s (sp_pos + 1) '\x00' with
122 | None -> Error (`Msg "Invalid tree: missing null after name")
123 | Some null_pos ->
124 let name =
125 String.sub s (sp_pos + 1) (null_pos - sp_pos - 1)
126 in
127 let hash_start = null_pos + 1 in
128 if hash_start + Hash.digest_size > len then
129 Error (`Msg "Invalid tree: truncated hash")
130 else
131 let hash_str = String.sub s hash_start Hash.digest_size in
132 let hash = Hash.of_raw_string hash_str in
133 let entry = { perm; name; hash } in
134 loop (entry :: acc) (hash_start + Hash.digest_size)))
135 in
136 loop [] 0
137
138let of_string_exn s =
139 match of_string s with Ok t -> t | Error (`Msg m) -> failwith m
140
141(** {1 Reader-based parsing}
142
143 Parse directly from a {!Bytesrw.Bytes.Reader.t} without materialising the
144 full object into a string. The reader must be positioned at the start of the
145 tree body. *)
146
147module Reader = Bytesrw.Bytes.Reader
148module Slice = Bytesrw.Bytes.Slice
149
150(** Read until [delim] from a reader. Returns the bytes before the delimiter;
151 the delimiter is consumed. Returns [None] at end-of-data. *)
152let read_until_byte reader delim =
153 let buf = Buffer.create 64 in
154 let rec loop () =
155 match Reader.read reader with
156 | slice when Slice.is_eod slice ->
157 if Buffer.length buf = 0 then None else Some (Buffer.contents buf)
158 | slice ->
159 let bytes = Slice.bytes slice in
160 let first = Slice.first slice in
161 let len = Slice.length slice in
162 let rec scan i =
163 if i >= first + len then begin
164 Buffer.add_subbytes buf bytes first len;
165 loop ()
166 end
167 else if Bytes.get_uint8 bytes i = Char.code delim then begin
168 Buffer.add_subbytes buf bytes first (i - first);
169 let rest_off = i + 1 in
170 let rest_len = first + len - rest_off in
171 if rest_len > 0 then
172 Reader.push_back reader
173 (Slice.make bytes ~first:rest_off ~length:rest_len);
174 Some (Buffer.contents buf)
175 end
176 else scan (i + 1)
177 in
178 scan first
179 in
180 loop ()
181
182(** Read exactly [n] bytes from a reader. Works at the slice level to avoid
183 intermediate string allocation. *)
184let read_exactly_bytes reader n =
185 let buf = Bytes.create n in
186 let rec loop off remaining =
187 if remaining = 0 then Ok buf
188 else
189 match Reader.read reader with
190 | slice when Slice.is_eod slice ->
191 Error (`Msg "Unexpected end of tree data")
192 | slice ->
193 let src = Slice.bytes slice in
194 let src_off = Slice.first slice in
195 let available = Slice.length slice in
196 let to_copy = min available remaining in
197 Bytes.blit src src_off buf off to_copy;
198 if to_copy < available then begin
199 let rest_off = src_off + to_copy in
200 let rest_len = available - to_copy in
201 Reader.push_back reader
202 (Slice.make src ~first:rest_off ~length:rest_len)
203 end;
204 loop (off + to_copy) (remaining - to_copy)
205 in
206 loop 0 n
207
208let of_reader reader =
209 let rec loop acc =
210 match read_until_byte reader ' ' with
211 | None ->
212 (* End of data — return accumulated entries *)
213 Ok (v (List.rev acc))
214 | Some mode -> (
215 match perm_of_string mode with
216 | Error _ as e -> e
217 | Ok perm -> (
218 match read_until_byte reader '\x00' with
219 | None -> Error (`Msg "Invalid tree: missing null after name")
220 | Some name -> (
221 match read_exactly_bytes reader Hash.digest_size with
222 | Error _ as e -> e
223 | Ok hash_bytes ->
224 let hash =
225 Hash.of_raw_string (Bytes.unsafe_to_string hash_bytes)
226 in
227 let entry = { perm; name; hash } in
228 loop (entry :: acc))))
229 in
230 loop []
231
232let digest t =
233 let s = to_string t in
234 Hash.digest_string ~kind:`Tree s
235
236module Set = Set.Make (struct
237 type nonrec t = t
238
239 let compare = compare
240end)
241
242module Map = Map.Make (struct
243 type nonrec t = t
244
245 let compare = compare
246end)