Git object storage and pack files for Eio
at main 289 lines 11 kB view raw
1(* Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org> 2 3 Permission to use, copy, modify, and distribute this software for any 4 purpose with or without fee is hereby granted, provided that the above 5 copyright notice and this permission notice appear in all copies. 6 7 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) 14 15let pack_file = 16 let ic = open_in_bin "pack-testzone-0.pack" in 17 let len = in_channel_length ic in 18 let data = really_input_string ic len in 19 close_in ic; 20 data 21 22let index_file = 23 let ic = open_in_bin "pack-testzone-0.idx" in 24 let len = in_channel_length ic in 25 let data = really_input_string ic len in 26 close_in ic; 27 data 28 29let test_header () = 30 match Git.Pack.of_string pack_file with 31 | Ok pack -> 32 Alcotest.(check int) "version" 2 (Git.Pack.version pack); 33 (* pack-testzone-0 has 192 objects *) 34 Alcotest.(check int) "count" 192 (Git.Pack.count pack) 35 | Error (`Msg m) -> Alcotest.fail m 36 37let test_read_first_object () = 38 match Git.Pack.of_string pack_file with 39 | Ok pack -> ( 40 (* Read first object at offset 12 (after header) *) 41 match Git.Pack.read_object_at pack 12 with 42 | Ok (kind, data) -> 43 (* Just verify we got some data *) 44 Alcotest.(check bool) "has data" true (String.length data > 0); 45 Alcotest.(check bool) 46 "valid kind" true 47 (match kind with `Commit | `Tree | `Blob | `Tag -> true) 48 | Error (`Msg m) -> Alcotest.fail m) 49 | Error (`Msg m) -> Alcotest.fail m 50 51let test_zlib_inflate () = 52 (* Simple zlib test - compress and decompress *) 53 let original = "Hello, World! This is a test string for zlib compression." in 54 (* Create zlib-compressed data *) 55 let compressed = 56 let reader = Bytesrw.Bytes.Reader.of_string original in 57 let compressed_reader = Bytesrw_zlib.Zlib.compress_reads () reader in 58 Bytesrw.Bytes.Reader.to_string compressed_reader 59 in 60 match Git.Pack.inflate compressed with 61 | Ok decompressed -> Alcotest.(check string) "roundtrip" original decompressed 62 | Error (`Msg m) -> Alcotest.fail m 63 64let test_delta () = 65 (* Test delta application with a simple example *) 66 let source = "Hello, World!" in 67 (* Delta format: source_size, target_size, then commands *) 68 (* Source size = 13 (0x0D), Target size = 14 (0x0E) *) 69 (* Command: COPY offset=0, size=13: cmd=0x90 (0x80|0x10), size=0x0D *) 70 (* - bit 7 (0x80): COPY command 71 - bit 4 (0x10): size byte 0 present 72 - offset is 0 by default (no offset bytes) *) 73 (* Command: INSERT 1 byte '!' = 0x01 '!' *) 74 let delta = 75 String.concat "" 76 [ 77 "\x0D"; 78 (* source size = 13 *) 79 "\x0E"; 80 (* target size = 14 *) 81 "\x90\x0D"; 82 (* COPY: cmd=0x90, size=13 *) 83 "\x01!"; 84 (* INSERT: 1 byte '!' *) 85 ] 86 in 87 match Git.Pack.apply_delta ~source ~delta with 88 | Ok target -> Alcotest.(check string) "delta applied" "Hello, World!!" target 89 | Error (`Msg m) -> Alcotest.fail m 90 91let test_fold () = 92 (* Test that fold iterates over all objects exactly once *) 93 match Git.Pack.of_string pack_file with 94 | Ok pack -> ( 95 let result = 96 Git.Pack.fold 97 (fun ~offset ~kind ~data acc -> 98 Alcotest.(check bool) "has data" true (String.length data > 0); 99 Alcotest.(check bool) "valid offset" true (offset >= 12); 100 Alcotest.(check bool) 101 "valid kind" true 102 (match kind with `Commit | `Tree | `Blob | `Tag -> true); 103 acc + 1) 104 0 pack 105 in 106 match result with 107 | Ok count -> Alcotest.(check int) "fold count matches header" 192 count 108 | Error (`Msg m) -> Alcotest.fail m) 109 | Error (`Msg m) -> Alcotest.fail m 110 111let test_fold_matches_index () = 112 (* Verify fold visits objects at the same offsets as the index *) 113 match (Git.Pack.of_string pack_file, Git.Pack_index.of_string index_file) with 114 | Ok pack, Ok idx -> 115 (* Collect all offsets from fold *) 116 let fold_offsets = 117 match 118 Git.Pack.fold 119 (fun ~offset ~kind:_ ~data:_ acc -> offset :: acc) 120 [] pack 121 with 122 | Ok offsets -> List.sort compare offsets 123 | Error (`Msg m) -> Alcotest.fail m 124 in 125 (* Collect all offsets from index *) 126 let index_offsets = 127 let offsets = ref [] in 128 Git.Pack_index.iter 129 (fun ~hash:_ ~offset ~crc:_ -> offsets := offset :: !offsets) 130 idx; 131 List.sort compare !offsets 132 in 133 Alcotest.(check int) 134 "same count" 135 (List.length index_offsets) 136 (List.length fold_offsets); 137 Alcotest.(check (list int)) "same offsets" index_offsets fold_offsets 138 | Error (`Msg m), _ -> Alcotest.fail ("pack: " ^ m) 139 | _, Error (`Msg m) -> Alcotest.fail ("index: " ^ m) 140 141(* Helper to run git commands in a directory *) 142let run_git dir args = 143 let cmd = 144 String.concat " " 145 ([ "git"; "-C"; dir ] @ List.map Filename.quote args @ [ "2>/dev/null" ]) 146 in 147 let ic = Unix.open_process_in cmd in 148 let output = In_channel.input_all ic in 149 let _ = Unix.close_process_in ic in 150 String.trim output 151 152let test_from_git () = 153 (* Create a fresh repo with git, generate a pack, and verify we can read it *) 154 let tmp_dir = Filename.temp_dir "git_pack_test" "" in 155 Fun.protect 156 ~finally:(fun () -> ignore (Sys.command ("rm -rf " ^ tmp_dir))) 157 (fun () -> 158 (* Initialize repo *) 159 ignore (run_git tmp_dir [ "init" ]); 160 ignore (run_git tmp_dir [ "config"; "user.email"; "test@test.com" ]); 161 ignore (run_git tmp_dir [ "config"; "user.name"; "Test" ]); 162 (* Create some content *) 163 let file1 = Filename.concat tmp_dir "file1.txt" in 164 let file2 = Filename.concat tmp_dir "file2.txt" in 165 let oc = open_out file1 in 166 output_string oc "Hello, World!\n"; 167 close_out oc; 168 ignore (run_git tmp_dir [ "add"; "file1.txt" ]); 169 ignore (run_git tmp_dir [ "commit"; "-m"; "First commit" ]); 170 (* Second commit *) 171 let oc = open_out file2 in 172 output_string oc "Second file content\n"; 173 close_out oc; 174 ignore (run_git tmp_dir [ "add"; "file2.txt" ]); 175 ignore (run_git tmp_dir [ "commit"; "-m"; "Second commit" ]); 176 (* Create pack file *) 177 ignore (run_git tmp_dir [ "gc"; "--aggressive" ]); 178 (* Find the pack file *) 179 let pack_dir = Filename.concat tmp_dir ".git/objects/pack" in 180 let files = Sys.readdir pack_dir in 181 let pack_file_path = 182 Array.to_list files 183 |> List.find (fun f -> Filename.check_suffix f ".pack") 184 |> Filename.concat pack_dir 185 in 186 (* Read and parse the pack *) 187 let ic = open_in_bin pack_file_path in 188 let data = In_channel.input_all ic in 189 close_in ic; 190 match Git.Pack.of_string data with 191 | Ok pack -> ( 192 (* Should have at least: 2 commits, 2 trees, 2 blobs *) 193 Alcotest.(check bool) "has objects" true (Git.Pack.count pack >= 4); 194 (* Verify we can fold over all objects *) 195 let result = 196 Git.Pack.fold 197 (fun ~offset:_ ~kind:_ ~data acc -> acc + String.length data) 198 0 pack 199 in 200 match result with 201 | Ok total_size -> 202 Alcotest.(check bool) "has content" true (total_size > 0) 203 | Error (`Msg m) -> Alcotest.fail ("fold: " ^ m)) 204 | Error (`Msg m) -> Alcotest.fail m) 205 206let test_with_deltas () = 207 (* Create objects that will result in delta encoding *) 208 let tmp_dir = Filename.temp_dir "git_delta_test" "" in 209 Fun.protect 210 ~finally:(fun () -> ignore (Sys.command ("rm -rf " ^ tmp_dir))) 211 (fun () -> 212 (* Initialize repo *) 213 ignore (run_git tmp_dir [ "init" ]); 214 ignore (run_git tmp_dir [ "config"; "user.email"; "test@test.com" ]); 215 ignore (run_git tmp_dir [ "config"; "user.name"; "Test" ]); 216 (* Create a large file that will be delta-compressed when modified *) 217 let file = Filename.concat tmp_dir "large.txt" in 218 let oc = open_out file in 219 let ppf = Format.formatter_of_out_channel oc in 220 (* Write 10KB of content - enough to trigger delta compression *) 221 for i = 1 to 500 do 222 Fmt.pf ppf "Line %d: This is some repetitive content.\n" i 223 done; 224 Format.pp_print_flush ppf (); 225 close_out oc; 226 ignore (run_git tmp_dir [ "add"; "large.txt" ]); 227 ignore (run_git tmp_dir [ "commit"; "-m"; "Add large file" ]); 228 (* Modify just a few lines - this will create a delta *) 229 let oc = open_out file in 230 let ppf = Format.formatter_of_out_channel oc in 231 for i = 1 to 500 do 232 if i = 250 then Fmt.pf ppf "Line %d: MODIFIED CONTENT HERE!\n" i 233 else Fmt.pf ppf "Line %d: This is some repetitive content.\n" i 234 done; 235 Format.pp_print_flush ppf (); 236 close_out oc; 237 ignore (run_git tmp_dir [ "add"; "large.txt" ]); 238 ignore (run_git tmp_dir [ "commit"; "-m"; "Modify large file" ]); 239 (* Create pack with aggressive delta compression *) 240 ignore 241 (run_git tmp_dir [ "repack"; "-a"; "-d"; "--depth=50"; "--window=250" ]); 242 (* Find and read the pack *) 243 let pack_dir = Filename.concat tmp_dir ".git/objects/pack" in 244 let files = Sys.readdir pack_dir in 245 let pack_file_path = 246 Array.to_list files 247 |> List.find (fun f -> Filename.check_suffix f ".pack") 248 |> Filename.concat pack_dir 249 in 250 let ic = open_in_bin pack_file_path in 251 let data = In_channel.input_all ic in 252 close_in ic; 253 match Git.Pack.of_string data with 254 | Ok pack -> ( 255 (* Fold should handle deltas correctly *) 256 let blobs = ref [] in 257 let result = 258 Git.Pack.fold 259 (fun ~offset:_ ~kind ~data acc -> 260 (match kind with `Blob -> blobs := data :: !blobs | _ -> ()); 261 acc + 1) 262 0 pack 263 in 264 match result with 265 | Ok count -> 266 Alcotest.(check bool) "parsed all" true (count >= 4); 267 (* Verify we got the blob content *) 268 let has_line_content = 269 List.exists 270 (fun b -> String.length b > 5 && String.sub b 0 5 = "Line ") 271 !blobs 272 in 273 Alcotest.(check bool) "found blob content" true has_line_content 274 | Error (`Msg m) -> Alcotest.fail ("fold: " ^ m)) 275 | Error (`Msg m) -> Alcotest.fail m) 276 277let tests = 278 [ 279 Alcotest.test_case "header" `Quick test_header; 280 Alcotest.test_case "read_first_object" `Quick test_read_first_object; 281 Alcotest.test_case "zlib_inflate" `Quick test_zlib_inflate; 282 Alcotest.test_case "delta" `Quick test_delta; 283 Alcotest.test_case "fold" `Quick test_fold; 284 Alcotest.test_case "fold_matches_index" `Quick test_fold_matches_index; 285 Alcotest.test_case "from_git" `Slow test_from_git; 286 Alcotest.test_case "with_deltas" `Slow test_with_deltas; 287 ] 288 289let suite = ("pack", tests)