Git object storage and pack files for Eio
1(* Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org>
2
3 Permission to use, copy, modify, and distribute this software for any
4 purpose with or without fee is hereby granted, provided that the above
5 copyright notice and this permission notice appear in all copies.
6
7 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *)
14
15let pack_file =
16 let ic = open_in_bin "pack-testzone-0.pack" in
17 let len = in_channel_length ic in
18 let data = really_input_string ic len in
19 close_in ic;
20 data
21
22let index_file =
23 let ic = open_in_bin "pack-testzone-0.idx" in
24 let len = in_channel_length ic in
25 let data = really_input_string ic len in
26 close_in ic;
27 data
28
29let test_header () =
30 match Git.Pack.of_string pack_file with
31 | Ok pack ->
32 Alcotest.(check int) "version" 2 (Git.Pack.version pack);
33 (* pack-testzone-0 has 192 objects *)
34 Alcotest.(check int) "count" 192 (Git.Pack.count pack)
35 | Error (`Msg m) -> Alcotest.fail m
36
37let test_read_first_object () =
38 match Git.Pack.of_string pack_file with
39 | Ok pack -> (
40 (* Read first object at offset 12 (after header) *)
41 match Git.Pack.read_object_at pack 12 with
42 | Ok (kind, data) ->
43 (* Just verify we got some data *)
44 Alcotest.(check bool) "has data" true (String.length data > 0);
45 Alcotest.(check bool)
46 "valid kind" true
47 (match kind with `Commit | `Tree | `Blob | `Tag -> true)
48 | Error (`Msg m) -> Alcotest.fail m)
49 | Error (`Msg m) -> Alcotest.fail m
50
51let test_zlib_inflate () =
52 (* Simple zlib test - compress and decompress *)
53 let original = "Hello, World! This is a test string for zlib compression." in
54 (* Create zlib-compressed data *)
55 let compressed =
56 let reader = Bytesrw.Bytes.Reader.of_string original in
57 let compressed_reader = Bytesrw_zlib.Zlib.compress_reads () reader in
58 Bytesrw.Bytes.Reader.to_string compressed_reader
59 in
60 match Git.Pack.inflate compressed with
61 | Ok decompressed -> Alcotest.(check string) "roundtrip" original decompressed
62 | Error (`Msg m) -> Alcotest.fail m
63
64let test_delta () =
65 (* Test delta application with a simple example *)
66 let source = "Hello, World!" in
67 (* Delta format: source_size, target_size, then commands *)
68 (* Source size = 13 (0x0D), Target size = 14 (0x0E) *)
69 (* Command: COPY offset=0, size=13: cmd=0x90 (0x80|0x10), size=0x0D *)
70 (* - bit 7 (0x80): COPY command
71 - bit 4 (0x10): size byte 0 present
72 - offset is 0 by default (no offset bytes) *)
73 (* Command: INSERT 1 byte '!' = 0x01 '!' *)
74 let delta =
75 String.concat ""
76 [
77 "\x0D";
78 (* source size = 13 *)
79 "\x0E";
80 (* target size = 14 *)
81 "\x90\x0D";
82 (* COPY: cmd=0x90, size=13 *)
83 "\x01!";
84 (* INSERT: 1 byte '!' *)
85 ]
86 in
87 match Git.Pack.apply_delta ~source ~delta with
88 | Ok target -> Alcotest.(check string) "delta applied" "Hello, World!!" target
89 | Error (`Msg m) -> Alcotest.fail m
90
91let test_fold () =
92 (* Test that fold iterates over all objects exactly once *)
93 match Git.Pack.of_string pack_file with
94 | Ok pack -> (
95 let result =
96 Git.Pack.fold
97 (fun ~offset ~kind ~data acc ->
98 Alcotest.(check bool) "has data" true (String.length data > 0);
99 Alcotest.(check bool) "valid offset" true (offset >= 12);
100 Alcotest.(check bool)
101 "valid kind" true
102 (match kind with `Commit | `Tree | `Blob | `Tag -> true);
103 acc + 1)
104 0 pack
105 in
106 match result with
107 | Ok count -> Alcotest.(check int) "fold count matches header" 192 count
108 | Error (`Msg m) -> Alcotest.fail m)
109 | Error (`Msg m) -> Alcotest.fail m
110
111let test_fold_matches_index () =
112 (* Verify fold visits objects at the same offsets as the index *)
113 match (Git.Pack.of_string pack_file, Git.Pack_index.of_string index_file) with
114 | Ok pack, Ok idx ->
115 (* Collect all offsets from fold *)
116 let fold_offsets =
117 match
118 Git.Pack.fold
119 (fun ~offset ~kind:_ ~data:_ acc -> offset :: acc)
120 [] pack
121 with
122 | Ok offsets -> List.sort compare offsets
123 | Error (`Msg m) -> Alcotest.fail m
124 in
125 (* Collect all offsets from index *)
126 let index_offsets =
127 let offsets = ref [] in
128 Git.Pack_index.iter
129 (fun ~hash:_ ~offset ~crc:_ -> offsets := offset :: !offsets)
130 idx;
131 List.sort compare !offsets
132 in
133 Alcotest.(check int)
134 "same count"
135 (List.length index_offsets)
136 (List.length fold_offsets);
137 Alcotest.(check (list int)) "same offsets" index_offsets fold_offsets
138 | Error (`Msg m), _ -> Alcotest.fail ("pack: " ^ m)
139 | _, Error (`Msg m) -> Alcotest.fail ("index: " ^ m)
140
141(* Helper to run git commands in a directory *)
142let run_git dir args =
143 let cmd =
144 String.concat " "
145 ([ "git"; "-C"; dir ] @ List.map Filename.quote args @ [ "2>/dev/null" ])
146 in
147 let ic = Unix.open_process_in cmd in
148 let output = In_channel.input_all ic in
149 let _ = Unix.close_process_in ic in
150 String.trim output
151
152let test_from_git () =
153 (* Create a fresh repo with git, generate a pack, and verify we can read it *)
154 let tmp_dir = Filename.temp_dir "git_pack_test" "" in
155 Fun.protect
156 ~finally:(fun () -> ignore (Sys.command ("rm -rf " ^ tmp_dir)))
157 (fun () ->
158 (* Initialize repo *)
159 ignore (run_git tmp_dir [ "init" ]);
160 ignore (run_git tmp_dir [ "config"; "user.email"; "test@test.com" ]);
161 ignore (run_git tmp_dir [ "config"; "user.name"; "Test" ]);
162 (* Create some content *)
163 let file1 = Filename.concat tmp_dir "file1.txt" in
164 let file2 = Filename.concat tmp_dir "file2.txt" in
165 let oc = open_out file1 in
166 output_string oc "Hello, World!\n";
167 close_out oc;
168 ignore (run_git tmp_dir [ "add"; "file1.txt" ]);
169 ignore (run_git tmp_dir [ "commit"; "-m"; "First commit" ]);
170 (* Second commit *)
171 let oc = open_out file2 in
172 output_string oc "Second file content\n";
173 close_out oc;
174 ignore (run_git tmp_dir [ "add"; "file2.txt" ]);
175 ignore (run_git tmp_dir [ "commit"; "-m"; "Second commit" ]);
176 (* Create pack file *)
177 ignore (run_git tmp_dir [ "gc"; "--aggressive" ]);
178 (* Find the pack file *)
179 let pack_dir = Filename.concat tmp_dir ".git/objects/pack" in
180 let files = Sys.readdir pack_dir in
181 let pack_file_path =
182 Array.to_list files
183 |> List.find (fun f -> Filename.check_suffix f ".pack")
184 |> Filename.concat pack_dir
185 in
186 (* Read and parse the pack *)
187 let ic = open_in_bin pack_file_path in
188 let data = In_channel.input_all ic in
189 close_in ic;
190 match Git.Pack.of_string data with
191 | Ok pack -> (
192 (* Should have at least: 2 commits, 2 trees, 2 blobs *)
193 Alcotest.(check bool) "has objects" true (Git.Pack.count pack >= 4);
194 (* Verify we can fold over all objects *)
195 let result =
196 Git.Pack.fold
197 (fun ~offset:_ ~kind:_ ~data acc -> acc + String.length data)
198 0 pack
199 in
200 match result with
201 | Ok total_size ->
202 Alcotest.(check bool) "has content" true (total_size > 0)
203 | Error (`Msg m) -> Alcotest.fail ("fold: " ^ m))
204 | Error (`Msg m) -> Alcotest.fail m)
205
206let test_with_deltas () =
207 (* Create objects that will result in delta encoding *)
208 let tmp_dir = Filename.temp_dir "git_delta_test" "" in
209 Fun.protect
210 ~finally:(fun () -> ignore (Sys.command ("rm -rf " ^ tmp_dir)))
211 (fun () ->
212 (* Initialize repo *)
213 ignore (run_git tmp_dir [ "init" ]);
214 ignore (run_git tmp_dir [ "config"; "user.email"; "test@test.com" ]);
215 ignore (run_git tmp_dir [ "config"; "user.name"; "Test" ]);
216 (* Create a large file that will be delta-compressed when modified *)
217 let file = Filename.concat tmp_dir "large.txt" in
218 let oc = open_out file in
219 let ppf = Format.formatter_of_out_channel oc in
220 (* Write 10KB of content - enough to trigger delta compression *)
221 for i = 1 to 500 do
222 Fmt.pf ppf "Line %d: This is some repetitive content.\n" i
223 done;
224 Format.pp_print_flush ppf ();
225 close_out oc;
226 ignore (run_git tmp_dir [ "add"; "large.txt" ]);
227 ignore (run_git tmp_dir [ "commit"; "-m"; "Add large file" ]);
228 (* Modify just a few lines - this will create a delta *)
229 let oc = open_out file in
230 let ppf = Format.formatter_of_out_channel oc in
231 for i = 1 to 500 do
232 if i = 250 then Fmt.pf ppf "Line %d: MODIFIED CONTENT HERE!\n" i
233 else Fmt.pf ppf "Line %d: This is some repetitive content.\n" i
234 done;
235 Format.pp_print_flush ppf ();
236 close_out oc;
237 ignore (run_git tmp_dir [ "add"; "large.txt" ]);
238 ignore (run_git tmp_dir [ "commit"; "-m"; "Modify large file" ]);
239 (* Create pack with aggressive delta compression *)
240 ignore
241 (run_git tmp_dir [ "repack"; "-a"; "-d"; "--depth=50"; "--window=250" ]);
242 (* Find and read the pack *)
243 let pack_dir = Filename.concat tmp_dir ".git/objects/pack" in
244 let files = Sys.readdir pack_dir in
245 let pack_file_path =
246 Array.to_list files
247 |> List.find (fun f -> Filename.check_suffix f ".pack")
248 |> Filename.concat pack_dir
249 in
250 let ic = open_in_bin pack_file_path in
251 let data = In_channel.input_all ic in
252 close_in ic;
253 match Git.Pack.of_string data with
254 | Ok pack -> (
255 (* Fold should handle deltas correctly *)
256 let blobs = ref [] in
257 let result =
258 Git.Pack.fold
259 (fun ~offset:_ ~kind ~data acc ->
260 (match kind with `Blob -> blobs := data :: !blobs | _ -> ());
261 acc + 1)
262 0 pack
263 in
264 match result with
265 | Ok count ->
266 Alcotest.(check bool) "parsed all" true (count >= 4);
267 (* Verify we got the blob content *)
268 let has_line_content =
269 List.exists
270 (fun b -> String.length b > 5 && String.sub b 0 5 = "Line ")
271 !blobs
272 in
273 Alcotest.(check bool) "found blob content" true has_line_content
274 | Error (`Msg m) -> Alcotest.fail ("fold: " ^ m))
275 | Error (`Msg m) -> Alcotest.fail m)
276
277let tests =
278 [
279 Alcotest.test_case "header" `Quick test_header;
280 Alcotest.test_case "read_first_object" `Quick test_read_first_object;
281 Alcotest.test_case "zlib_inflate" `Quick test_zlib_inflate;
282 Alcotest.test_case "delta" `Quick test_delta;
283 Alcotest.test_case "fold" `Quick test_fold;
284 Alcotest.test_case "fold_matches_index" `Quick test_fold_matches_index;
285 Alcotest.test_case "from_git" `Slow test_from_git;
286 Alcotest.test_case "with_deltas" `Slow test_with_deltas;
287 ]
288
289let suite = ("pack", tests)