Git fork
at reftables-rust 5291 lines 147 kB view raw
1#define USE_THE_REPOSITORY_VARIABLE 2#define DISABLE_SIGN_COMPARE_WARNINGS 3 4#include "builtin.h" 5#include "environment.h" 6#include "gettext.h" 7#include "hex.h" 8#include "config.h" 9#include "attr.h" 10#include "object.h" 11#include "commit.h" 12#include "tag.h" 13#include "delta.h" 14#include "pack.h" 15#include "pack-revindex.h" 16#include "csum-file.h" 17#include "tree-walk.h" 18#include "diff.h" 19#include "revision.h" 20#include "list-objects.h" 21#include "list-objects-filter-options.h" 22#include "pack-objects.h" 23#include "progress.h" 24#include "refs.h" 25#include "streaming.h" 26#include "thread-utils.h" 27#include "pack-bitmap.h" 28#include "delta-islands.h" 29#include "reachable.h" 30#include "oid-array.h" 31#include "strvec.h" 32#include "list.h" 33#include "packfile.h" 34#include "object-file.h" 35#include "odb.h" 36#include "replace-object.h" 37#include "dir.h" 38#include "midx.h" 39#include "trace2.h" 40#include "shallow.h" 41#include "promisor-remote.h" 42#include "pack-mtimes.h" 43#include "parse-options.h" 44#include "blob.h" 45#include "tree.h" 46#include "path-walk.h" 47#include "trace2.h" 48 49/* 50 * Objects we are going to pack are collected in the `to_pack` structure. 51 * It contains an array (dynamically expanded) of the object data, and a map 52 * that can resolve SHA1s to their position in the array. 53 */ 54static struct packing_data to_pack; 55 56static inline struct object_entry *oe_delta( 57 const struct packing_data *pack, 58 const struct object_entry *e) 59{ 60 if (!e->delta_idx) 61 return NULL; 62 if (e->ext_base) 63 return &pack->ext_bases[e->delta_idx - 1]; 64 else 65 return &pack->objects[e->delta_idx - 1]; 66} 67 68static inline unsigned long oe_delta_size(struct packing_data *pack, 69 const struct object_entry *e) 70{ 71 if (e->delta_size_valid) 72 return e->delta_size_; 73 74 /* 75 * pack->delta_size[] can't be NULL because oe_set_delta_size() 76 * must have been called when a new delta is saved with 77 * oe_set_delta(). 78 * If oe_delta() returns NULL (i.e. default state, which means 79 * delta_size_valid is also false), then the caller must never 80 * call oe_delta_size(). 81 */ 82 return pack->delta_size[e - pack->objects]; 83} 84 85unsigned long oe_get_size_slow(struct packing_data *pack, 86 const struct object_entry *e); 87 88static inline unsigned long oe_size(struct packing_data *pack, 89 const struct object_entry *e) 90{ 91 if (e->size_valid) 92 return e->size_; 93 94 return oe_get_size_slow(pack, e); 95} 96 97static inline void oe_set_delta(struct packing_data *pack, 98 struct object_entry *e, 99 struct object_entry *delta) 100{ 101 if (delta) 102 e->delta_idx = (delta - pack->objects) + 1; 103 else 104 e->delta_idx = 0; 105} 106 107static inline struct object_entry *oe_delta_sibling( 108 const struct packing_data *pack, 109 const struct object_entry *e) 110{ 111 if (e->delta_sibling_idx) 112 return &pack->objects[e->delta_sibling_idx - 1]; 113 return NULL; 114} 115 116static inline struct object_entry *oe_delta_child( 117 const struct packing_data *pack, 118 const struct object_entry *e) 119{ 120 if (e->delta_child_idx) 121 return &pack->objects[e->delta_child_idx - 1]; 122 return NULL; 123} 124 125static inline void oe_set_delta_child(struct packing_data *pack, 126 struct object_entry *e, 127 struct object_entry *delta) 128{ 129 if (delta) 130 e->delta_child_idx = (delta - pack->objects) + 1; 131 else 132 e->delta_child_idx = 0; 133} 134 135static inline void oe_set_delta_sibling(struct packing_data *pack, 136 struct object_entry *e, 137 struct object_entry *delta) 138{ 139 if (delta) 140 e->delta_sibling_idx = (delta - pack->objects) + 1; 141 else 142 e->delta_sibling_idx = 0; 143} 144 145static inline void oe_set_size(struct packing_data *pack, 146 struct object_entry *e, 147 unsigned long size) 148{ 149 if (size < pack->oe_size_limit) { 150 e->size_ = size; 151 e->size_valid = 1; 152 } else { 153 e->size_valid = 0; 154 if (oe_get_size_slow(pack, e) != size) 155 BUG("'size' is supposed to be the object size!"); 156 } 157} 158 159static inline void oe_set_delta_size(struct packing_data *pack, 160 struct object_entry *e, 161 unsigned long size) 162{ 163 if (size < pack->oe_delta_size_limit) { 164 e->delta_size_ = size; 165 e->delta_size_valid = 1; 166 } else { 167 packing_data_lock(pack); 168 if (!pack->delta_size) 169 ALLOC_ARRAY(pack->delta_size, pack->nr_alloc); 170 packing_data_unlock(pack); 171 172 pack->delta_size[e - pack->objects] = size; 173 e->delta_size_valid = 0; 174 } 175} 176 177#define IN_PACK(obj) oe_in_pack(&to_pack, obj) 178#define SIZE(obj) oe_size(&to_pack, obj) 179#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size) 180#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj) 181#define DELTA(obj) oe_delta(&to_pack, obj) 182#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj) 183#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj) 184#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val) 185#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid) 186#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val) 187#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val) 188#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) 189 190static const char *const pack_usage[] = { 191 N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n" 192 " [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n" 193 " [--local] [--incremental] [--window=<n>] [--depth=<n>]\n" 194 " [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n" 195 " [--cruft] [--cruft-expiration=<time>]\n" 196 " [--stdout [--filter=<filter-spec>] | <base-name>]\n" 197 " [--shallow] [--keep-true-parents] [--[no-]sparse]\n" 198 " [--name-hash-version=<n>] [--path-walk] < <object-list>"), 199 NULL 200}; 201 202static struct pack_idx_entry **written_list; 203static uint32_t nr_result, nr_written, nr_seen; 204static struct bitmap_index *bitmap_git; 205static uint32_t write_layer; 206 207static int non_empty; 208static int reuse_delta = 1, reuse_object = 1; 209static int keep_unreachable, unpack_unreachable, include_tag; 210static timestamp_t unpack_unreachable_expiration; 211static int pack_loose_unreachable; 212static int cruft; 213static int shallow = 0; 214static timestamp_t cruft_expiration; 215static int local; 216static int have_non_local_packs; 217static int incremental; 218static int ignore_packed_keep_on_disk; 219static int ignore_packed_keep_in_core; 220static int ignore_packed_keep_in_core_has_cruft; 221static int allow_ofs_delta; 222static struct pack_idx_option pack_idx_opts; 223static const char *base_name; 224static int progress = 1; 225static int window = 10; 226static unsigned long pack_size_limit; 227static int depth = 50; 228static int delta_search_threads; 229static int pack_to_stdout; 230static int sparse; 231static int thin; 232static int path_walk = -1; 233static int num_preferred_base; 234static struct progress *progress_state; 235 236static struct bitmapped_pack *reuse_packfiles; 237static size_t reuse_packfiles_nr; 238static size_t reuse_packfiles_used_nr; 239static uint32_t reuse_packfile_objects; 240static struct bitmap *reuse_packfile_bitmap; 241 242static int use_bitmap_index_default = 1; 243static int use_bitmap_index = -1; 244static enum { 245 NO_PACK_REUSE = 0, 246 SINGLE_PACK_REUSE, 247 MULTI_PACK_REUSE, 248} allow_pack_reuse = SINGLE_PACK_REUSE; 249static enum { 250 WRITE_BITMAP_FALSE = 0, 251 WRITE_BITMAP_QUIET, 252 WRITE_BITMAP_TRUE, 253} write_bitmap_index; 254static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE; 255 256static int exclude_promisor_objects; 257static int exclude_promisor_objects_best_effort; 258 259static int use_delta_islands; 260 261static unsigned long delta_cache_size = 0; 262static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE; 263static unsigned long cache_max_small_delta_size = 1000; 264 265static unsigned long window_memory_limit = 0; 266 267static struct string_list uri_protocols = STRING_LIST_INIT_NODUP; 268 269enum missing_action { 270 MA_ERROR = 0, /* fail if any missing objects are encountered */ 271 MA_ALLOW_ANY, /* silently allow ALL missing objects */ 272 MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */ 273}; 274static enum missing_action arg_missing_action; 275static show_object_fn fn_show_object; 276 277struct configured_exclusion { 278 struct oidmap_entry e; 279 char *pack_hash_hex; 280 char *uri; 281}; 282static struct oidmap configured_exclusions; 283 284static struct oidset excluded_by_config; 285static int name_hash_version = -1; 286 287enum stdin_packs_mode { 288 STDIN_PACKS_MODE_NONE, 289 STDIN_PACKS_MODE_STANDARD, 290 STDIN_PACKS_MODE_FOLLOW, 291}; 292 293/** 294 * Check whether the name_hash_version chosen by user input is appropriate, 295 * and also validate whether it is compatible with other features. 296 */ 297static void validate_name_hash_version(void) 298{ 299 if (name_hash_version < 1 || name_hash_version > 2) 300 die(_("invalid --name-hash-version option: %d"), name_hash_version); 301 if (write_bitmap_index && name_hash_version != 1) { 302 warning(_("currently, --write-bitmap-index requires --name-hash-version=1")); 303 name_hash_version = 1; 304 } 305} 306 307static inline uint32_t pack_name_hash_fn(const char *name) 308{ 309 static int seen_version = -1; 310 311 if (seen_version < 0) 312 seen_version = name_hash_version; 313 else if (seen_version != name_hash_version) 314 BUG("name hash version changed from %d to %d mid-process", 315 seen_version, name_hash_version); 316 317 switch (name_hash_version) { 318 case 1: 319 return pack_name_hash(name); 320 321 case 2: 322 return pack_name_hash_v2((const unsigned char *)name); 323 324 default: 325 BUG("invalid name-hash version: %d", name_hash_version); 326 } 327} 328 329/* 330 * stats 331 */ 332static uint32_t written, written_delta; 333static uint32_t reused, reused_delta; 334 335/* 336 * Indexed commits 337 */ 338static struct commit **indexed_commits; 339static unsigned int indexed_commits_nr; 340static unsigned int indexed_commits_alloc; 341 342static void index_commit_for_bitmap(struct commit *commit) 343{ 344 if (indexed_commits_nr >= indexed_commits_alloc) { 345 indexed_commits_alloc = (indexed_commits_alloc + 32) * 2; 346 REALLOC_ARRAY(indexed_commits, indexed_commits_alloc); 347 } 348 349 indexed_commits[indexed_commits_nr++] = commit; 350} 351 352static void *get_delta(struct object_entry *entry) 353{ 354 unsigned long size, base_size, delta_size; 355 void *buf, *base_buf, *delta_buf; 356 enum object_type type; 357 358 buf = odb_read_object(the_repository->objects, &entry->idx.oid, 359 &type, &size); 360 if (!buf) 361 die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); 362 base_buf = odb_read_object(the_repository->objects, 363 &DELTA(entry)->idx.oid, &type, 364 &base_size); 365 if (!base_buf) 366 die("unable to read %s", 367 oid_to_hex(&DELTA(entry)->idx.oid)); 368 delta_buf = diff_delta(base_buf, base_size, 369 buf, size, &delta_size, 0); 370 /* 371 * We successfully computed this delta once but dropped it for 372 * memory reasons. Something is very wrong if this time we 373 * recompute and create a different delta. 374 */ 375 if (!delta_buf || delta_size != DELTA_SIZE(entry)) 376 BUG("delta size changed"); 377 free(buf); 378 free(base_buf); 379 return delta_buf; 380} 381 382static unsigned long do_compress(void **pptr, unsigned long size) 383{ 384 git_zstream stream; 385 void *in, *out; 386 unsigned long maxsize; 387 388 git_deflate_init(&stream, pack_compression_level); 389 maxsize = git_deflate_bound(&stream, size); 390 391 in = *pptr; 392 out = xmalloc(maxsize); 393 *pptr = out; 394 395 stream.next_in = in; 396 stream.avail_in = size; 397 stream.next_out = out; 398 stream.avail_out = maxsize; 399 while (git_deflate(&stream, Z_FINISH) == Z_OK) 400 ; /* nothing */ 401 git_deflate_end(&stream); 402 403 free(in); 404 return stream.total_out; 405} 406 407static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, 408 const struct object_id *oid) 409{ 410 git_zstream stream; 411 unsigned char ibuf[1024 * 16]; 412 unsigned char obuf[1024 * 16]; 413 unsigned long olen = 0; 414 415 git_deflate_init(&stream, pack_compression_level); 416 417 for (;;) { 418 ssize_t readlen; 419 int zret = Z_OK; 420 readlen = read_istream(st, ibuf, sizeof(ibuf)); 421 if (readlen == -1) 422 die(_("unable to read %s"), oid_to_hex(oid)); 423 424 stream.next_in = ibuf; 425 stream.avail_in = readlen; 426 while ((stream.avail_in || readlen == 0) && 427 (zret == Z_OK || zret == Z_BUF_ERROR)) { 428 stream.next_out = obuf; 429 stream.avail_out = sizeof(obuf); 430 zret = git_deflate(&stream, readlen ? 0 : Z_FINISH); 431 hashwrite(f, obuf, stream.next_out - obuf); 432 olen += stream.next_out - obuf; 433 } 434 if (stream.avail_in) 435 die(_("deflate error (%d)"), zret); 436 if (readlen == 0) { 437 if (zret != Z_STREAM_END) 438 die(_("deflate error (%d)"), zret); 439 break; 440 } 441 } 442 git_deflate_end(&stream); 443 return olen; 444} 445 446/* 447 * we are going to reuse the existing object data as is. make 448 * sure it is not corrupt. 449 */ 450static int check_pack_inflate(struct packed_git *p, 451 struct pack_window **w_curs, 452 off_t offset, 453 off_t len, 454 unsigned long expect) 455{ 456 git_zstream stream; 457 unsigned char fakebuf[4096], *in; 458 int st; 459 460 memset(&stream, 0, sizeof(stream)); 461 git_inflate_init(&stream); 462 do { 463 in = use_pack(p, w_curs, offset, &stream.avail_in); 464 stream.next_in = in; 465 stream.next_out = fakebuf; 466 stream.avail_out = sizeof(fakebuf); 467 st = git_inflate(&stream, Z_FINISH); 468 offset += stream.next_in - in; 469 } while (st == Z_OK || st == Z_BUF_ERROR); 470 git_inflate_end(&stream); 471 return (st == Z_STREAM_END && 472 stream.total_out == expect && 473 stream.total_in == len) ? 0 : -1; 474} 475 476static void copy_pack_data(struct hashfile *f, 477 struct packed_git *p, 478 struct pack_window **w_curs, 479 off_t offset, 480 off_t len) 481{ 482 unsigned char *in; 483 unsigned long avail; 484 485 while (len) { 486 in = use_pack(p, w_curs, offset, &avail); 487 if (avail > len) 488 avail = (unsigned long)len; 489 hashwrite(f, in, avail); 490 offset += avail; 491 len -= avail; 492 } 493} 494 495static inline int oe_size_greater_than(struct packing_data *pack, 496 const struct object_entry *lhs, 497 unsigned long rhs) 498{ 499 if (lhs->size_valid) 500 return lhs->size_ > rhs; 501 if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */ 502 return 1; 503 return oe_get_size_slow(pack, lhs) > rhs; 504} 505 506/* Return 0 if we will bust the pack-size limit */ 507static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry, 508 unsigned long limit, int usable_delta) 509{ 510 unsigned long size, datalen; 511 unsigned char header[MAX_PACK_OBJECT_HEADER], 512 dheader[MAX_PACK_OBJECT_HEADER]; 513 unsigned hdrlen; 514 enum object_type type; 515 void *buf; 516 struct git_istream *st = NULL; 517 const unsigned hashsz = the_hash_algo->rawsz; 518 519 if (!usable_delta) { 520 if (oe_type(entry) == OBJ_BLOB && 521 oe_size_greater_than(&to_pack, entry, 522 repo_settings_get_big_file_threshold(the_repository)) && 523 (st = open_istream(the_repository, &entry->idx.oid, &type, 524 &size, NULL)) != NULL) 525 buf = NULL; 526 else { 527 buf = odb_read_object(the_repository->objects, 528 &entry->idx.oid, &type, 529 &size); 530 if (!buf) 531 die(_("unable to read %s"), 532 oid_to_hex(&entry->idx.oid)); 533 } 534 /* 535 * make sure no cached delta data remains from a 536 * previous attempt before a pack split occurred. 537 */ 538 FREE_AND_NULL(entry->delta_data); 539 entry->z_delta_size = 0; 540 } else if (entry->delta_data) { 541 size = DELTA_SIZE(entry); 542 buf = entry->delta_data; 543 entry->delta_data = NULL; 544 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ? 545 OBJ_OFS_DELTA : OBJ_REF_DELTA; 546 } else { 547 buf = get_delta(entry); 548 size = DELTA_SIZE(entry); 549 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ? 550 OBJ_OFS_DELTA : OBJ_REF_DELTA; 551 } 552 553 if (st) /* large blob case, just assume we don't compress well */ 554 datalen = size; 555 else if (entry->z_delta_size) 556 datalen = entry->z_delta_size; 557 else 558 datalen = do_compress(&buf, size); 559 560 /* 561 * The object header is a byte of 'type' followed by zero or 562 * more bytes of length. 563 */ 564 hdrlen = encode_in_pack_object_header(header, sizeof(header), 565 type, size); 566 567 if (type == OBJ_OFS_DELTA) { 568 /* 569 * Deltas with relative base contain an additional 570 * encoding of the relative offset for the delta 571 * base from this object's position in the pack. 572 */ 573 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset; 574 unsigned pos = sizeof(dheader) - 1; 575 dheader[pos] = ofs & 127; 576 while (ofs >>= 7) 577 dheader[--pos] = 128 | (--ofs & 127); 578 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { 579 if (st) 580 close_istream(st); 581 free(buf); 582 return 0; 583 } 584 hashwrite(f, header, hdrlen); 585 hashwrite(f, dheader + pos, sizeof(dheader) - pos); 586 hdrlen += sizeof(dheader) - pos; 587 } else if (type == OBJ_REF_DELTA) { 588 /* 589 * Deltas with a base reference contain 590 * additional bytes for the base object ID. 591 */ 592 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { 593 if (st) 594 close_istream(st); 595 free(buf); 596 return 0; 597 } 598 hashwrite(f, header, hdrlen); 599 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz); 600 hdrlen += hashsz; 601 } else { 602 if (limit && hdrlen + datalen + hashsz >= limit) { 603 if (st) 604 close_istream(st); 605 free(buf); 606 return 0; 607 } 608 hashwrite(f, header, hdrlen); 609 } 610 if (st) { 611 datalen = write_large_blob_data(st, f, &entry->idx.oid); 612 close_istream(st); 613 } else { 614 hashwrite(f, buf, datalen); 615 free(buf); 616 } 617 618 return hdrlen + datalen; 619} 620 621/* Return 0 if we will bust the pack-size limit */ 622static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, 623 unsigned long limit, int usable_delta) 624{ 625 struct packed_git *p = IN_PACK(entry); 626 struct pack_window *w_curs = NULL; 627 uint32_t pos; 628 off_t offset; 629 enum object_type type = oe_type(entry); 630 off_t datalen; 631 unsigned char header[MAX_PACK_OBJECT_HEADER], 632 dheader[MAX_PACK_OBJECT_HEADER]; 633 unsigned hdrlen; 634 const unsigned hashsz = the_hash_algo->rawsz; 635 unsigned long entry_size = SIZE(entry); 636 637 if (DELTA(entry)) 638 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ? 639 OBJ_OFS_DELTA : OBJ_REF_DELTA; 640 hdrlen = encode_in_pack_object_header(header, sizeof(header), 641 type, entry_size); 642 643 offset = entry->in_pack_offset; 644 if (offset_to_pack_pos(p, offset, &pos) < 0) 645 die(_("write_reuse_object: could not locate %s, expected at " 646 "offset %"PRIuMAX" in pack %s"), 647 oid_to_hex(&entry->idx.oid), (uintmax_t)offset, 648 p->pack_name); 649 datalen = pack_pos_to_offset(p, pos + 1) - offset; 650 if (!pack_to_stdout && p->index_version > 1 && 651 check_pack_crc(p, &w_curs, offset, datalen, 652 pack_pos_to_index(p, pos))) { 653 error(_("bad packed object CRC for %s"), 654 oid_to_hex(&entry->idx.oid)); 655 unuse_pack(&w_curs); 656 return write_no_reuse_object(f, entry, limit, usable_delta); 657 } 658 659 offset += entry->in_pack_header_size; 660 datalen -= entry->in_pack_header_size; 661 662 if (!pack_to_stdout && p->index_version == 1 && 663 check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) { 664 error(_("corrupt packed object for %s"), 665 oid_to_hex(&entry->idx.oid)); 666 unuse_pack(&w_curs); 667 return write_no_reuse_object(f, entry, limit, usable_delta); 668 } 669 670 if (type == OBJ_OFS_DELTA) { 671 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset; 672 unsigned pos = sizeof(dheader) - 1; 673 dheader[pos] = ofs & 127; 674 while (ofs >>= 7) 675 dheader[--pos] = 128 | (--ofs & 127); 676 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { 677 unuse_pack(&w_curs); 678 return 0; 679 } 680 hashwrite(f, header, hdrlen); 681 hashwrite(f, dheader + pos, sizeof(dheader) - pos); 682 hdrlen += sizeof(dheader) - pos; 683 reused_delta++; 684 } else if (type == OBJ_REF_DELTA) { 685 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { 686 unuse_pack(&w_curs); 687 return 0; 688 } 689 hashwrite(f, header, hdrlen); 690 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz); 691 hdrlen += hashsz; 692 reused_delta++; 693 } else { 694 if (limit && hdrlen + datalen + hashsz >= limit) { 695 unuse_pack(&w_curs); 696 return 0; 697 } 698 hashwrite(f, header, hdrlen); 699 } 700 copy_pack_data(f, p, &w_curs, offset, datalen); 701 unuse_pack(&w_curs); 702 reused++; 703 return hdrlen + datalen; 704} 705 706/* Return 0 if we will bust the pack-size limit */ 707static off_t write_object(struct hashfile *f, 708 struct object_entry *entry, 709 off_t write_offset) 710{ 711 unsigned long limit; 712 off_t len; 713 int usable_delta, to_reuse; 714 715 if (!pack_to_stdout) 716 crc32_begin(f); 717 718 /* apply size limit if limited packsize and not first object */ 719 if (!pack_size_limit || !nr_written) 720 limit = 0; 721 else if (pack_size_limit <= write_offset) 722 /* 723 * the earlier object did not fit the limit; avoid 724 * mistaking this with unlimited (i.e. limit = 0). 725 */ 726 limit = 1; 727 else 728 limit = pack_size_limit - write_offset; 729 730 if (!DELTA(entry)) 731 usable_delta = 0; /* no delta */ 732 else if (!pack_size_limit) 733 usable_delta = 1; /* unlimited packfile */ 734 else if (DELTA(entry)->idx.offset == (off_t)-1) 735 usable_delta = 0; /* base was written to another pack */ 736 else if (DELTA(entry)->idx.offset) 737 usable_delta = 1; /* base already exists in this pack */ 738 else 739 usable_delta = 0; /* base could end up in another pack */ 740 741 if (!reuse_object) 742 to_reuse = 0; /* explicit */ 743 else if (!IN_PACK(entry)) 744 to_reuse = 0; /* can't reuse what we don't have */ 745 else if (oe_type(entry) == OBJ_REF_DELTA || 746 oe_type(entry) == OBJ_OFS_DELTA) 747 /* check_object() decided it for us ... */ 748 to_reuse = usable_delta; 749 /* ... but pack split may override that */ 750 else if (oe_type(entry) != entry->in_pack_type) 751 to_reuse = 0; /* pack has delta which is unusable */ 752 else if (DELTA(entry)) 753 to_reuse = 0; /* we want to pack afresh */ 754 else 755 to_reuse = 1; /* we have it in-pack undeltified, 756 * and we do not need to deltify it. 757 */ 758 759 if (!to_reuse) 760 len = write_no_reuse_object(f, entry, limit, usable_delta); 761 else 762 len = write_reuse_object(f, entry, limit, usable_delta); 763 if (!len) 764 return 0; 765 766 if (usable_delta) 767 written_delta++; 768 written++; 769 if (!pack_to_stdout) 770 entry->idx.crc32 = crc32_end(f); 771 return len; 772} 773 774enum write_one_status { 775 WRITE_ONE_SKIP = -1, /* already written */ 776 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */ 777 WRITE_ONE_WRITTEN = 1, /* normal */ 778 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */ 779}; 780 781static enum write_one_status write_one(struct hashfile *f, 782 struct object_entry *e, 783 off_t *offset) 784{ 785 off_t size; 786 int recursing; 787 788 /* 789 * we set offset to 1 (which is an impossible value) to mark 790 * the fact that this object is involved in "write its base 791 * first before writing a deltified object" recursion. 792 */ 793 recursing = (e->idx.offset == 1); 794 if (recursing) { 795 warning(_("recursive delta detected for object %s"), 796 oid_to_hex(&e->idx.oid)); 797 return WRITE_ONE_RECURSIVE; 798 } else if (e->idx.offset || e->preferred_base) { 799 /* offset is non zero if object is written already. */ 800 return WRITE_ONE_SKIP; 801 } 802 803 /* if we are deltified, write out base object first. */ 804 if (DELTA(e)) { 805 e->idx.offset = 1; /* now recurse */ 806 switch (write_one(f, DELTA(e), offset)) { 807 case WRITE_ONE_RECURSIVE: 808 /* we cannot depend on this one */ 809 SET_DELTA(e, NULL); 810 break; 811 default: 812 break; 813 case WRITE_ONE_BREAK: 814 e->idx.offset = recursing; 815 return WRITE_ONE_BREAK; 816 } 817 } 818 819 e->idx.offset = *offset; 820 size = write_object(f, e, *offset); 821 if (!size) { 822 e->idx.offset = recursing; 823 return WRITE_ONE_BREAK; 824 } 825 written_list[nr_written++] = &e->idx; 826 827 /* make sure off_t is sufficiently large not to wrap */ 828 if (signed_add_overflows(*offset, size)) 829 die(_("pack too large for current definition of off_t")); 830 *offset += size; 831 return WRITE_ONE_WRITTEN; 832} 833 834static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid, 835 int flag UNUSED, void *cb_data UNUSED) 836{ 837 struct object_id peeled; 838 struct object_entry *entry = packlist_find(&to_pack, oid); 839 840 if (entry) 841 entry->tagged = 1; 842 if (!peel_iterated_oid(the_repository, oid, &peeled)) { 843 entry = packlist_find(&to_pack, &peeled); 844 if (entry) 845 entry->tagged = 1; 846 } 847 return 0; 848} 849 850static inline unsigned char oe_layer(struct packing_data *pack, 851 struct object_entry *e) 852{ 853 if (!pack->layer) 854 return 0; 855 return pack->layer[e - pack->objects]; 856} 857 858static inline void add_to_write_order(struct object_entry **wo, 859 unsigned int *endp, 860 struct object_entry *e) 861{ 862 if (e->filled || oe_layer(&to_pack, e) != write_layer) 863 return; 864 wo[(*endp)++] = e; 865 e->filled = 1; 866} 867 868static void add_descendants_to_write_order(struct object_entry **wo, 869 unsigned int *endp, 870 struct object_entry *e) 871{ 872 int add_to_order = 1; 873 while (e) { 874 if (add_to_order) { 875 struct object_entry *s; 876 /* add this node... */ 877 add_to_write_order(wo, endp, e); 878 /* all its siblings... */ 879 for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) { 880 add_to_write_order(wo, endp, s); 881 } 882 } 883 /* drop down a level to add left subtree nodes if possible */ 884 if (DELTA_CHILD(e)) { 885 add_to_order = 1; 886 e = DELTA_CHILD(e); 887 } else { 888 add_to_order = 0; 889 /* our sibling might have some children, it is next */ 890 if (DELTA_SIBLING(e)) { 891 e = DELTA_SIBLING(e); 892 continue; 893 } 894 /* go back to our parent node */ 895 e = DELTA(e); 896 while (e && !DELTA_SIBLING(e)) { 897 /* we're on the right side of a subtree, keep 898 * going up until we can go right again */ 899 e = DELTA(e); 900 } 901 if (!e) { 902 /* done- we hit our original root node */ 903 return; 904 } 905 /* pass it off to sibling at this level */ 906 e = DELTA_SIBLING(e); 907 } 908 }; 909} 910 911static void add_family_to_write_order(struct object_entry **wo, 912 unsigned int *endp, 913 struct object_entry *e) 914{ 915 struct object_entry *root; 916 917 for (root = e; DELTA(root); root = DELTA(root)) 918 ; /* nothing */ 919 add_descendants_to_write_order(wo, endp, root); 920} 921 922static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end) 923{ 924 unsigned int i, last_untagged; 925 struct object_entry *objects = to_pack.objects; 926 927 for (i = 0; i < to_pack.nr_objects; i++) { 928 if (objects[i].tagged) 929 break; 930 add_to_write_order(wo, wo_end, &objects[i]); 931 } 932 last_untagged = i; 933 934 /* 935 * Then fill all the tagged tips. 936 */ 937 for (; i < to_pack.nr_objects; i++) { 938 if (objects[i].tagged) 939 add_to_write_order(wo, wo_end, &objects[i]); 940 } 941 942 /* 943 * And then all remaining commits and tags. 944 */ 945 for (i = last_untagged; i < to_pack.nr_objects; i++) { 946 if (oe_type(&objects[i]) != OBJ_COMMIT && 947 oe_type(&objects[i]) != OBJ_TAG) 948 continue; 949 add_to_write_order(wo, wo_end, &objects[i]); 950 } 951 952 /* 953 * And then all the trees. 954 */ 955 for (i = last_untagged; i < to_pack.nr_objects; i++) { 956 if (oe_type(&objects[i]) != OBJ_TREE) 957 continue; 958 add_to_write_order(wo, wo_end, &objects[i]); 959 } 960 961 /* 962 * Finally all the rest in really tight order 963 */ 964 for (i = last_untagged; i < to_pack.nr_objects; i++) { 965 if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer) 966 add_family_to_write_order(wo, wo_end, &objects[i]); 967 } 968} 969 970static struct object_entry **compute_write_order(void) 971{ 972 uint32_t max_layers = 1; 973 unsigned int i, wo_end; 974 975 struct object_entry **wo; 976 struct object_entry *objects = to_pack.objects; 977 978 for (i = 0; i < to_pack.nr_objects; i++) { 979 objects[i].tagged = 0; 980 objects[i].filled = 0; 981 SET_DELTA_CHILD(&objects[i], NULL); 982 SET_DELTA_SIBLING(&objects[i], NULL); 983 } 984 985 /* 986 * Fully connect delta_child/delta_sibling network. 987 * Make sure delta_sibling is sorted in the original 988 * recency order. 989 */ 990 for (i = to_pack.nr_objects; i > 0;) { 991 struct object_entry *e = &objects[--i]; 992 if (!DELTA(e)) 993 continue; 994 /* Mark me as the first child */ 995 e->delta_sibling_idx = DELTA(e)->delta_child_idx; 996 SET_DELTA_CHILD(DELTA(e), e); 997 } 998 999 /* 1000 * Mark objects that are at the tip of tags. 1001 */ 1002 refs_for_each_tag_ref(get_main_ref_store(the_repository), mark_tagged, 1003 NULL); 1004 1005 if (use_delta_islands) { 1006 max_layers = compute_pack_layers(&to_pack); 1007 free_island_marks(); 1008 } 1009 1010 ALLOC_ARRAY(wo, to_pack.nr_objects); 1011 wo_end = 0; 1012 1013 for (; write_layer < max_layers; ++write_layer) 1014 compute_layer_order(wo, &wo_end); 1015 1016 if (wo_end != to_pack.nr_objects) 1017 die(_("ordered %u objects, expected %"PRIu32), 1018 wo_end, to_pack.nr_objects); 1019 1020 return wo; 1021} 1022 1023 1024/* 1025 * A reused set of objects. All objects in a chunk have the same 1026 * relative position in the original packfile and the generated 1027 * packfile. 1028 */ 1029 1030static struct reused_chunk { 1031 /* The offset of the first object of this chunk in the original 1032 * packfile. */ 1033 off_t original; 1034 /* The difference for "original" minus the offset of the first object of 1035 * this chunk in the generated packfile. */ 1036 off_t difference; 1037} *reused_chunks; 1038static int reused_chunks_nr; 1039static int reused_chunks_alloc; 1040 1041static void record_reused_object(off_t where, off_t offset) 1042{ 1043 if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset) 1044 return; 1045 1046 ALLOC_GROW(reused_chunks, reused_chunks_nr + 1, 1047 reused_chunks_alloc); 1048 reused_chunks[reused_chunks_nr].original = where; 1049 reused_chunks[reused_chunks_nr].difference = offset; 1050 reused_chunks_nr++; 1051} 1052 1053/* 1054 * Binary search to find the chunk that "where" is in. Note 1055 * that we're not looking for an exact match, just the first 1056 * chunk that contains it (which implicitly ends at the start 1057 * of the next chunk. 1058 */ 1059static off_t find_reused_offset(off_t where) 1060{ 1061 int lo = 0, hi = reused_chunks_nr; 1062 while (lo < hi) { 1063 int mi = lo + ((hi - lo) / 2); 1064 if (where == reused_chunks[mi].original) 1065 return reused_chunks[mi].difference; 1066 if (where < reused_chunks[mi].original) 1067 hi = mi; 1068 else 1069 lo = mi + 1; 1070 } 1071 1072 /* 1073 * The first chunk starts at zero, so we can't have gone below 1074 * there. 1075 */ 1076 assert(lo); 1077 return reused_chunks[lo-1].difference; 1078} 1079 1080static void write_reused_pack_one(struct packed_git *reuse_packfile, 1081 size_t pos, struct hashfile *out, 1082 off_t pack_start, 1083 struct pack_window **w_curs) 1084{ 1085 off_t offset, next, cur; 1086 enum object_type type; 1087 unsigned long size; 1088 1089 offset = pack_pos_to_offset(reuse_packfile, pos); 1090 next = pack_pos_to_offset(reuse_packfile, pos + 1); 1091 1092 record_reused_object(offset, 1093 offset - (hashfile_total(out) - pack_start)); 1094 1095 cur = offset; 1096 type = unpack_object_header(reuse_packfile, w_curs, &cur, &size); 1097 assert(type >= 0); 1098 1099 if (type == OBJ_OFS_DELTA) { 1100 off_t base_offset; 1101 off_t fixup; 1102 1103 unsigned char header[MAX_PACK_OBJECT_HEADER]; 1104 unsigned len; 1105 1106 base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset); 1107 assert(base_offset != 0); 1108 1109 /* Convert to REF_DELTA if we must... */ 1110 if (!allow_ofs_delta) { 1111 uint32_t base_pos; 1112 struct object_id base_oid; 1113 1114 if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0) 1115 die(_("expected object at offset %"PRIuMAX" " 1116 "in pack %s"), 1117 (uintmax_t)base_offset, 1118 reuse_packfile->pack_name); 1119 1120 nth_packed_object_id(&base_oid, reuse_packfile, 1121 pack_pos_to_index(reuse_packfile, base_pos)); 1122 1123 len = encode_in_pack_object_header(header, sizeof(header), 1124 OBJ_REF_DELTA, size); 1125 hashwrite(out, header, len); 1126 hashwrite(out, base_oid.hash, the_hash_algo->rawsz); 1127 copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); 1128 return; 1129 } 1130 1131 /* Otherwise see if we need to rewrite the offset... */ 1132 fixup = find_reused_offset(offset) - 1133 find_reused_offset(base_offset); 1134 if (fixup) { 1135 unsigned char ofs_header[MAX_PACK_OBJECT_HEADER]; 1136 unsigned i, ofs_len; 1137 off_t ofs = offset - base_offset - fixup; 1138 1139 len = encode_in_pack_object_header(header, sizeof(header), 1140 OBJ_OFS_DELTA, size); 1141 1142 i = sizeof(ofs_header) - 1; 1143 ofs_header[i] = ofs & 127; 1144 while (ofs >>= 7) 1145 ofs_header[--i] = 128 | (--ofs & 127); 1146 1147 ofs_len = sizeof(ofs_header) - i; 1148 1149 hashwrite(out, header, len); 1150 hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len); 1151 copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); 1152 return; 1153 } 1154 1155 /* ...otherwise we have no fixup, and can write it verbatim */ 1156 } 1157 1158 copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset); 1159} 1160 1161static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile, 1162 struct hashfile *out, 1163 struct pack_window **w_curs) 1164{ 1165 size_t pos = 0; 1166 size_t end; 1167 1168 if (reuse_packfile->bitmap_pos) { 1169 /* 1170 * We can't reuse whole chunks verbatim out of 1171 * non-preferred packs since we can't guarantee that 1172 * all duplicate objects were resolved in favor of 1173 * that pack. 1174 * 1175 * Even if we have a whole eword_t worth of bits that 1176 * could be reused, there may be objects between the 1177 * objects corresponding to the first and last bit of 1178 * that word which were selected from a different 1179 * pack, causing us to send duplicate or unwanted 1180 * objects. 1181 * 1182 * Handle non-preferred packs from within 1183 * write_reused_pack(), which inspects and reuses 1184 * individual bits. 1185 */ 1186 return reuse_packfile->bitmap_pos / BITS_IN_EWORD; 1187 } 1188 1189 /* 1190 * Only read through the last word whose bits all correspond 1191 * to objects in the given packfile, since we must stop at a 1192 * word boundary. 1193 * 1194 * If there is no whole word to read (i.e. the packfile 1195 * contains fewer than BITS_IN_EWORD objects), then we'll 1196 * inspect bits one-by-one in write_reused_pack(). 1197 */ 1198 end = reuse_packfile->bitmap_nr / BITS_IN_EWORD; 1199 if (reuse_packfile_bitmap->word_alloc < end) 1200 BUG("fewer words than expected in reuse_packfile_bitmap"); 1201 1202 while (pos < end && reuse_packfile_bitmap->words[pos] == (eword_t)~0) 1203 pos++; 1204 1205 if (pos) { 1206 off_t to_write; 1207 1208 written = (pos * BITS_IN_EWORD); 1209 to_write = pack_pos_to_offset(reuse_packfile->p, written) 1210 - sizeof(struct pack_header); 1211 1212 /* We're recording one chunk, not one object. */ 1213 record_reused_object(sizeof(struct pack_header), 0); 1214 hashflush(out); 1215 copy_pack_data(out, reuse_packfile->p, w_curs, 1216 sizeof(struct pack_header), to_write); 1217 1218 display_progress(progress_state, written); 1219 } 1220 return pos; 1221} 1222 1223static void write_reused_pack(struct bitmapped_pack *reuse_packfile, 1224 struct hashfile *f) 1225{ 1226 size_t i = reuse_packfile->bitmap_pos / BITS_IN_EWORD; 1227 uint32_t offset; 1228 off_t pack_start = hashfile_total(f) - sizeof(struct pack_header); 1229 struct pack_window *w_curs = NULL; 1230 1231 if (allow_ofs_delta) 1232 i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs); 1233 1234 for (; i < reuse_packfile_bitmap->word_alloc; ++i) { 1235 eword_t word = reuse_packfile_bitmap->words[i]; 1236 size_t pos = (i * BITS_IN_EWORD); 1237 1238 for (offset = 0; offset < BITS_IN_EWORD; ++offset) { 1239 uint32_t pack_pos; 1240 if ((word >> offset) == 0) 1241 break; 1242 1243 offset += ewah_bit_ctz64(word >> offset); 1244 if (pos + offset < reuse_packfile->bitmap_pos) 1245 continue; 1246 if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr) 1247 goto done; 1248 1249 if (reuse_packfile->bitmap_pos) { 1250 /* 1251 * When doing multi-pack reuse on a 1252 * non-preferred pack, translate bit positions 1253 * from the MIDX pseudo-pack order back to their 1254 * pack-relative positions before attempting 1255 * reuse. 1256 */ 1257 struct multi_pack_index *m = reuse_packfile->from_midx; 1258 uint32_t midx_pos; 1259 off_t pack_ofs; 1260 1261 if (!m) 1262 BUG("non-zero bitmap position without MIDX"); 1263 1264 midx_pos = pack_pos_to_midx(m, pos + offset); 1265 pack_ofs = nth_midxed_offset(m, midx_pos); 1266 1267 if (offset_to_pack_pos(reuse_packfile->p, 1268 pack_ofs, &pack_pos) < 0) 1269 BUG("could not find expected object at offset %"PRIuMAX" in pack %s", 1270 (uintmax_t)pack_ofs, 1271 pack_basename(reuse_packfile->p)); 1272 } else { 1273 /* 1274 * Can use bit positions directly, even for MIDX 1275 * bitmaps. See comment in try_partial_reuse() 1276 * for why. 1277 */ 1278 pack_pos = pos + offset; 1279 } 1280 1281 write_reused_pack_one(reuse_packfile->p, pack_pos, f, 1282 pack_start, &w_curs); 1283 display_progress(progress_state, ++written); 1284 } 1285 } 1286 1287done: 1288 unuse_pack(&w_curs); 1289} 1290 1291static void write_excluded_by_configs(void) 1292{ 1293 struct oidset_iter iter; 1294 const struct object_id *oid; 1295 1296 oidset_iter_init(&excluded_by_config, &iter); 1297 while ((oid = oidset_iter_next(&iter))) { 1298 struct configured_exclusion *ex = 1299 oidmap_get(&configured_exclusions, oid); 1300 1301 if (!ex) 1302 BUG("configured exclusion wasn't configured"); 1303 write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex)); 1304 write_in_full(1, " ", 1); 1305 write_in_full(1, ex->uri, strlen(ex->uri)); 1306 write_in_full(1, "\n", 1); 1307 } 1308} 1309 1310static const char no_split_warning[] = N_( 1311"disabling bitmap writing, packs are split due to pack.packSizeLimit" 1312); 1313 1314static void write_pack_file(void) 1315{ 1316 uint32_t i = 0, j; 1317 struct hashfile *f; 1318 off_t offset; 1319 uint32_t nr_remaining = nr_result; 1320 time_t last_mtime = 0; 1321 struct object_entry **write_order; 1322 1323 if (progress > pack_to_stdout) 1324 progress_state = start_progress(the_repository, 1325 _("Writing objects"), nr_result); 1326 ALLOC_ARRAY(written_list, to_pack.nr_objects); 1327 write_order = compute_write_order(); 1328 1329 do { 1330 unsigned char hash[GIT_MAX_RAWSZ]; 1331 char *pack_tmp_name = NULL; 1332 1333 if (pack_to_stdout) 1334 f = hashfd_throughput(the_repository->hash_algo, 1, 1335 "<stdout>", progress_state); 1336 else 1337 f = create_tmp_packfile(the_repository, &pack_tmp_name); 1338 1339 offset = write_pack_header(f, nr_remaining); 1340 1341 if (reuse_packfiles_nr) { 1342 assert(pack_to_stdout); 1343 for (j = 0; j < reuse_packfiles_nr; j++) { 1344 reused_chunks_nr = 0; 1345 write_reused_pack(&reuse_packfiles[j], f); 1346 if (reused_chunks_nr) 1347 reuse_packfiles_used_nr++; 1348 } 1349 offset = hashfile_total(f); 1350 } 1351 1352 nr_written = 0; 1353 for (; i < to_pack.nr_objects; i++) { 1354 struct object_entry *e = write_order[i]; 1355 if (write_one(f, e, &offset) == WRITE_ONE_BREAK) 1356 break; 1357 display_progress(progress_state, written); 1358 } 1359 1360 if (pack_to_stdout) { 1361 /* 1362 * We never fsync when writing to stdout since we may 1363 * not be writing to an actual pack file. For instance, 1364 * the upload-pack code passes a pipe here. Calling 1365 * fsync on a pipe results in unnecessary 1366 * synchronization with the reader on some platforms. 1367 */ 1368 finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE, 1369 CSUM_HASH_IN_STREAM | CSUM_CLOSE); 1370 } else if (nr_written == nr_remaining) { 1371 finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 1372 CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); 1373 } else { 1374 /* 1375 * If we wrote the wrong number of entries in the 1376 * header, rewrite it like in fast-import. 1377 */ 1378 1379 int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0); 1380 fixup_pack_header_footer(the_hash_algo, fd, hash, 1381 pack_tmp_name, nr_written, 1382 hash, offset); 1383 close(fd); 1384 if (write_bitmap_index) { 1385 if (write_bitmap_index != WRITE_BITMAP_QUIET) 1386 warning(_(no_split_warning)); 1387 write_bitmap_index = 0; 1388 } 1389 } 1390 1391 if (!pack_to_stdout) { 1392 struct stat st; 1393 struct strbuf tmpname = STRBUF_INIT; 1394 struct bitmap_writer bitmap_writer; 1395 char *idx_tmp_name = NULL; 1396 1397 /* 1398 * Packs are runtime accessed in their mtime 1399 * order since newer packs are more likely to contain 1400 * younger objects. So if we are creating multiple 1401 * packs then we should modify the mtime of later ones 1402 * to preserve this property. 1403 */ 1404 if (stat(pack_tmp_name, &st) < 0) { 1405 warning_errno(_("failed to stat %s"), pack_tmp_name); 1406 } else if (!last_mtime) { 1407 last_mtime = st.st_mtime; 1408 } else { 1409 struct utimbuf utb; 1410 utb.actime = st.st_atime; 1411 utb.modtime = --last_mtime; 1412 if (utime(pack_tmp_name, &utb) < 0) 1413 warning_errno(_("failed utime() on %s"), pack_tmp_name); 1414 } 1415 1416 strbuf_addf(&tmpname, "%s-%s.", base_name, 1417 hash_to_hex(hash)); 1418 1419 if (write_bitmap_index) { 1420 bitmap_writer_init(&bitmap_writer, 1421 the_repository, &to_pack, 1422 NULL); 1423 bitmap_writer_set_checksum(&bitmap_writer, hash); 1424 bitmap_writer_build_type_index(&bitmap_writer, 1425 written_list); 1426 } 1427 1428 if (cruft) 1429 pack_idx_opts.flags |= WRITE_MTIMES; 1430 1431 stage_tmp_packfiles(the_repository, &tmpname, 1432 pack_tmp_name, written_list, 1433 nr_written, &to_pack, 1434 &pack_idx_opts, hash, 1435 &idx_tmp_name); 1436 1437 if (write_bitmap_index) { 1438 size_t tmpname_len = tmpname.len; 1439 1440 strbuf_addstr(&tmpname, "bitmap"); 1441 stop_progress(&progress_state); 1442 1443 bitmap_writer_show_progress(&bitmap_writer, 1444 progress); 1445 bitmap_writer_select_commits(&bitmap_writer, 1446 indexed_commits, 1447 indexed_commits_nr); 1448 if (bitmap_writer_build(&bitmap_writer) < 0) 1449 die(_("failed to write bitmap index")); 1450 bitmap_writer_finish(&bitmap_writer, 1451 written_list, 1452 tmpname.buf, write_bitmap_options); 1453 bitmap_writer_free(&bitmap_writer); 1454 write_bitmap_index = 0; 1455 strbuf_setlen(&tmpname, tmpname_len); 1456 } 1457 1458 rename_tmp_packfile_idx(the_repository, &tmpname, &idx_tmp_name); 1459 1460 free(idx_tmp_name); 1461 strbuf_release(&tmpname); 1462 free(pack_tmp_name); 1463 puts(hash_to_hex(hash)); 1464 } 1465 1466 /* mark written objects as written to previous pack */ 1467 for (j = 0; j < nr_written; j++) { 1468 written_list[j]->offset = (off_t)-1; 1469 } 1470 nr_remaining -= nr_written; 1471 } while (nr_remaining && i < to_pack.nr_objects); 1472 1473 free(written_list); 1474 free(write_order); 1475 stop_progress(&progress_state); 1476 if (written != nr_result) 1477 die(_("wrote %"PRIu32" objects while expecting %"PRIu32), 1478 written, nr_result); 1479 trace2_data_intmax("pack-objects", the_repository, 1480 "write_pack_file/wrote", nr_result); 1481} 1482 1483static int no_try_delta(const char *path) 1484{ 1485 static struct attr_check *check; 1486 1487 if (!check) 1488 check = attr_check_initl("delta", NULL); 1489 git_check_attr(the_repository->index, path, check); 1490 if (ATTR_FALSE(check->items[0].value)) 1491 return 1; 1492 return 0; 1493} 1494 1495/* 1496 * When adding an object, check whether we have already added it 1497 * to our packing list. If so, we can skip. However, if we are 1498 * being asked to excludei t, but the previous mention was to include 1499 * it, make sure to adjust its flags and tweak our numbers accordingly. 1500 * 1501 * As an optimization, we pass out the index position where we would have 1502 * found the item, since that saves us from having to look it up again a 1503 * few lines later when we want to add the new entry. 1504 */ 1505static int have_duplicate_entry(const struct object_id *oid, 1506 int exclude) 1507{ 1508 struct object_entry *entry; 1509 1510 if (reuse_packfile_bitmap && 1511 bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid)) 1512 return 1; 1513 1514 entry = packlist_find(&to_pack, oid); 1515 if (!entry) 1516 return 0; 1517 1518 if (exclude) { 1519 if (!entry->preferred_base) 1520 nr_result--; 1521 entry->preferred_base = 1; 1522 } 1523 1524 return 1; 1525} 1526 1527static int want_cruft_object_mtime(struct repository *r, 1528 const struct object_id *oid, 1529 unsigned flags, uint32_t mtime) 1530{ 1531 struct packed_git **cache; 1532 1533 for (cache = kept_pack_cache(r, flags); *cache; cache++) { 1534 struct packed_git *p = *cache; 1535 off_t ofs; 1536 uint32_t candidate_mtime; 1537 1538 ofs = find_pack_entry_one(oid, p); 1539 if (!ofs) 1540 continue; 1541 1542 /* 1543 * We have a copy of the object 'oid' in a non-cruft 1544 * pack. We can avoid packing an additional copy 1545 * regardless of what the existing copy's mtime is since 1546 * it is outside of a cruft pack. 1547 */ 1548 if (!p->is_cruft) 1549 return 0; 1550 1551 /* 1552 * If we have a copy of the object 'oid' in a cruft 1553 * pack, then either read the cruft pack's mtime for 1554 * that object, or, if that can't be loaded, assume the 1555 * pack's mtime itself. 1556 */ 1557 if (!load_pack_mtimes(p)) { 1558 uint32_t pos; 1559 if (offset_to_pack_pos(p, ofs, &pos) < 0) 1560 continue; 1561 candidate_mtime = nth_packed_mtime(p, pos); 1562 } else { 1563 candidate_mtime = p->mtime; 1564 } 1565 1566 /* 1567 * We have a surviving copy of the object in a cruft 1568 * pack whose mtime is greater than or equal to the one 1569 * we are considering. We can thus avoid packing an 1570 * additional copy of that object. 1571 */ 1572 if (mtime <= candidate_mtime) 1573 return 0; 1574 } 1575 1576 return -1; 1577} 1578 1579static int want_found_object(const struct object_id *oid, int exclude, 1580 struct packed_git *p, uint32_t mtime) 1581{ 1582 if (exclude) 1583 return 1; 1584 if (incremental) 1585 return 0; 1586 1587 if (!is_pack_valid(p)) 1588 return -1; 1589 1590 /* 1591 * When asked to do --local (do not include an object that appears in a 1592 * pack we borrow from elsewhere) or --honor-pack-keep (do not include 1593 * an object that appears in a pack marked with .keep), finding a pack 1594 * that matches the criteria is sufficient for us to decide to omit it. 1595 * However, even if this pack does not satisfy the criteria, we need to 1596 * make sure no copy of this object appears in _any_ pack that makes us 1597 * to omit the object, so we need to check all the packs. 1598 * 1599 * We can however first check whether these options can possibly matter; 1600 * if they do not matter we know we want the object in generated pack. 1601 * Otherwise, we signal "-1" at the end to tell the caller that we do 1602 * not know either way, and it needs to check more packs. 1603 */ 1604 1605 /* 1606 * Objects in packs borrowed from elsewhere are discarded regardless of 1607 * if they appear in other packs that weren't borrowed. 1608 */ 1609 if (local && !p->pack_local) 1610 return 0; 1611 1612 /* 1613 * Then handle .keep first, as we have a fast(er) path there. 1614 */ 1615 if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) { 1616 /* 1617 * Set the flags for the kept-pack cache to be the ones we want 1618 * to ignore. 1619 * 1620 * That is, if we are ignoring objects in on-disk keep packs, 1621 * then we want to search through the on-disk keep and ignore 1622 * the in-core ones. 1623 */ 1624 unsigned flags = 0; 1625 if (ignore_packed_keep_on_disk) 1626 flags |= ON_DISK_KEEP_PACKS; 1627 if (ignore_packed_keep_in_core) 1628 flags |= IN_CORE_KEEP_PACKS; 1629 1630 /* 1631 * If the object is in a pack that we want to ignore, *and* we 1632 * don't have any cruft packs that are being retained, we can 1633 * abort quickly. 1634 */ 1635 if (!ignore_packed_keep_in_core_has_cruft) { 1636 if (ignore_packed_keep_on_disk && p->pack_keep) 1637 return 0; 1638 if (ignore_packed_keep_in_core && p->pack_keep_in_core) 1639 return 0; 1640 if (has_object_kept_pack(p->repo, oid, flags)) 1641 return 0; 1642 } else { 1643 /* 1644 * But if there is at least one cruft pack which 1645 * is being kept, we only want to include the 1646 * provided object if it has a strictly greater 1647 * mtime than any existing cruft copy. 1648 */ 1649 if (!want_cruft_object_mtime(p->repo, oid, flags, 1650 mtime)) 1651 return 0; 1652 } 1653 } 1654 1655 /* 1656 * At this point we know definitively that either we don't care about 1657 * keep-packs, or the object is not in one. Keep checking other 1658 * conditions... 1659 */ 1660 if (!local || !have_non_local_packs) 1661 return 1; 1662 1663 /* we don't know yet; keep looking for more packs */ 1664 return -1; 1665} 1666 1667static int want_object_in_pack_one(struct packed_git *p, 1668 const struct object_id *oid, 1669 int exclude, 1670 struct packed_git **found_pack, 1671 off_t *found_offset, 1672 uint32_t found_mtime) 1673{ 1674 off_t offset; 1675 1676 if (p == *found_pack) 1677 offset = *found_offset; 1678 else 1679 offset = find_pack_entry_one(oid, p); 1680 1681 if (offset) { 1682 if (!*found_pack) { 1683 if (!is_pack_valid(p)) 1684 return -1; 1685 *found_offset = offset; 1686 *found_pack = p; 1687 } 1688 return want_found_object(oid, exclude, p, found_mtime); 1689 } 1690 return -1; 1691} 1692 1693/* 1694 * Check whether we want the object in the pack (e.g., we do not want 1695 * objects found in non-local stores if the "--local" option was used). 1696 * 1697 * If the caller already knows an existing pack it wants to take the object 1698 * from, that is passed in *found_pack and *found_offset; otherwise this 1699 * function finds if there is any pack that has the object and returns the pack 1700 * and its offset in these variables. 1701 */ 1702static int want_object_in_pack_mtime(const struct object_id *oid, 1703 int exclude, 1704 struct packed_git **found_pack, 1705 off_t *found_offset, 1706 uint32_t found_mtime) 1707{ 1708 int want; 1709 struct odb_source *source; 1710 struct list_head *pos; 1711 1712 if (!exclude && local) { 1713 /* 1714 * Note that we start iterating at `sources->next` so that we 1715 * skip the local object source. 1716 */ 1717 struct odb_source *source = the_repository->objects->sources->next; 1718 for (; source; source = source->next) 1719 if (has_loose_object(source, oid)) 1720 return 0; 1721 } 1722 1723 /* 1724 * If we already know the pack object lives in, start checks from that 1725 * pack - in the usual case when neither --local was given nor .keep files 1726 * are present we will determine the answer right now. 1727 */ 1728 if (*found_pack) { 1729 want = want_found_object(oid, exclude, *found_pack, 1730 found_mtime); 1731 if (want != -1) 1732 return want; 1733 1734 *found_pack = NULL; 1735 *found_offset = 0; 1736 } 1737 1738 odb_prepare_alternates(the_repository->objects); 1739 1740 for (source = the_repository->objects->sources; source; source = source->next) { 1741 struct multi_pack_index *m = get_multi_pack_index(source); 1742 struct pack_entry e; 1743 1744 if (m && fill_midx_entry(m, oid, &e)) { 1745 want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime); 1746 if (want != -1) 1747 return want; 1748 } 1749 } 1750 1751 list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { 1752 struct packed_git *p = list_entry(pos, struct packed_git, mru); 1753 want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); 1754 if (!exclude && want > 0) 1755 list_move(&p->mru, 1756 packfile_store_get_packs_mru(the_repository->objects->packfiles)); 1757 if (want != -1) 1758 return want; 1759 } 1760 1761 if (uri_protocols.nr) { 1762 struct configured_exclusion *ex = 1763 oidmap_get(&configured_exclusions, oid); 1764 int i; 1765 const char *p; 1766 1767 if (ex) { 1768 for (i = 0; i < uri_protocols.nr; i++) { 1769 if (skip_prefix(ex->uri, 1770 uri_protocols.items[i].string, 1771 &p) && 1772 *p == ':') { 1773 oidset_insert(&excluded_by_config, oid); 1774 return 0; 1775 } 1776 } 1777 } 1778 } 1779 1780 return 1; 1781} 1782 1783static inline int want_object_in_pack(const struct object_id *oid, 1784 int exclude, 1785 struct packed_git **found_pack, 1786 off_t *found_offset) 1787{ 1788 return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset, 1789 0); 1790} 1791 1792static struct object_entry *create_object_entry(const struct object_id *oid, 1793 enum object_type type, 1794 uint32_t hash, 1795 int exclude, 1796 int no_try_delta, 1797 struct packed_git *found_pack, 1798 off_t found_offset) 1799{ 1800 struct object_entry *entry; 1801 1802 entry = packlist_alloc(&to_pack, oid); 1803 entry->hash = hash; 1804 oe_set_type(entry, type); 1805 if (exclude) 1806 entry->preferred_base = 1; 1807 else 1808 nr_result++; 1809 if (found_pack) { 1810 oe_set_in_pack(&to_pack, entry, found_pack); 1811 entry->in_pack_offset = found_offset; 1812 } 1813 1814 entry->no_try_delta = no_try_delta; 1815 1816 return entry; 1817} 1818 1819static const char no_closure_warning[] = N_( 1820"disabling bitmap writing, as some objects are not being packed" 1821); 1822 1823static int add_object_entry(const struct object_id *oid, enum object_type type, 1824 const char *name, int exclude) 1825{ 1826 struct packed_git *found_pack = NULL; 1827 off_t found_offset = 0; 1828 1829 display_progress(progress_state, ++nr_seen); 1830 1831 if (have_duplicate_entry(oid, exclude)) 1832 return 0; 1833 1834 if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) { 1835 /* The pack is missing an object, so it will not have closure */ 1836 if (write_bitmap_index) { 1837 if (write_bitmap_index != WRITE_BITMAP_QUIET) 1838 warning(_(no_closure_warning)); 1839 write_bitmap_index = 0; 1840 } 1841 return 0; 1842 } 1843 1844 create_object_entry(oid, type, pack_name_hash_fn(name), 1845 exclude, name && no_try_delta(name), 1846 found_pack, found_offset); 1847 return 1; 1848} 1849 1850static int add_object_entry_from_bitmap(const struct object_id *oid, 1851 enum object_type type, 1852 int flags UNUSED, uint32_t name_hash, 1853 struct packed_git *pack, off_t offset, 1854 void *payload UNUSED) 1855{ 1856 display_progress(progress_state, ++nr_seen); 1857 1858 if (have_duplicate_entry(oid, 0)) 1859 return 0; 1860 1861 if (!want_object_in_pack(oid, 0, &pack, &offset)) 1862 return 0; 1863 1864 create_object_entry(oid, type, name_hash, 0, 0, pack, offset); 1865 return 1; 1866} 1867 1868struct pbase_tree_cache { 1869 struct object_id oid; 1870 int ref; 1871 int temporary; 1872 void *tree_data; 1873 unsigned long tree_size; 1874}; 1875 1876static struct pbase_tree_cache *(pbase_tree_cache[256]); 1877static int pbase_tree_cache_ix(const struct object_id *oid) 1878{ 1879 return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache); 1880} 1881static int pbase_tree_cache_ix_incr(int ix) 1882{ 1883 return (ix+1) % ARRAY_SIZE(pbase_tree_cache); 1884} 1885 1886static struct pbase_tree { 1887 struct pbase_tree *next; 1888 /* This is a phony "cache" entry; we are not 1889 * going to evict it or find it through _get() 1890 * mechanism -- this is for the toplevel node that 1891 * would almost always change with any commit. 1892 */ 1893 struct pbase_tree_cache pcache; 1894} *pbase_tree; 1895 1896static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid) 1897{ 1898 struct pbase_tree_cache *ent, *nent; 1899 void *data; 1900 unsigned long size; 1901 enum object_type type; 1902 int neigh; 1903 int my_ix = pbase_tree_cache_ix(oid); 1904 int available_ix = -1; 1905 1906 /* pbase-tree-cache acts as a limited hashtable. 1907 * your object will be found at your index or within a few 1908 * slots after that slot if it is cached. 1909 */ 1910 for (neigh = 0; neigh < 8; neigh++) { 1911 ent = pbase_tree_cache[my_ix]; 1912 if (ent && oideq(&ent->oid, oid)) { 1913 ent->ref++; 1914 return ent; 1915 } 1916 else if (((available_ix < 0) && (!ent || !ent->ref)) || 1917 ((0 <= available_ix) && 1918 (!ent && pbase_tree_cache[available_ix]))) 1919 available_ix = my_ix; 1920 if (!ent) 1921 break; 1922 my_ix = pbase_tree_cache_ix_incr(my_ix); 1923 } 1924 1925 /* Did not find one. Either we got a bogus request or 1926 * we need to read and perhaps cache. 1927 */ 1928 data = odb_read_object(the_repository->objects, oid, &type, &size); 1929 if (!data) 1930 return NULL; 1931 if (type != OBJ_TREE) { 1932 free(data); 1933 return NULL; 1934 } 1935 1936 /* We need to either cache or return a throwaway copy */ 1937 1938 if (available_ix < 0) 1939 ent = NULL; 1940 else { 1941 ent = pbase_tree_cache[available_ix]; 1942 my_ix = available_ix; 1943 } 1944 1945 if (!ent) { 1946 nent = xmalloc(sizeof(*nent)); 1947 nent->temporary = (available_ix < 0); 1948 } 1949 else { 1950 /* evict and reuse */ 1951 free(ent->tree_data); 1952 nent = ent; 1953 } 1954 oidcpy(&nent->oid, oid); 1955 nent->tree_data = data; 1956 nent->tree_size = size; 1957 nent->ref = 1; 1958 if (!nent->temporary) 1959 pbase_tree_cache[my_ix] = nent; 1960 return nent; 1961} 1962 1963static void pbase_tree_put(struct pbase_tree_cache *cache) 1964{ 1965 if (!cache->temporary) { 1966 cache->ref--; 1967 return; 1968 } 1969 free(cache->tree_data); 1970 free(cache); 1971} 1972 1973static size_t name_cmp_len(const char *name) 1974{ 1975 return strcspn(name, "\n/"); 1976} 1977 1978static void add_pbase_object(struct tree_desc *tree, 1979 const char *name, 1980 size_t cmplen, 1981 const char *fullname) 1982{ 1983 struct name_entry entry; 1984 int cmp; 1985 1986 while (tree_entry(tree,&entry)) { 1987 if (S_ISGITLINK(entry.mode)) 1988 continue; 1989 cmp = tree_entry_len(&entry) != cmplen ? 1 : 1990 memcmp(name, entry.path, cmplen); 1991 if (cmp > 0) 1992 continue; 1993 if (cmp < 0) 1994 return; 1995 if (name[cmplen] != '/') { 1996 add_object_entry(&entry.oid, 1997 object_type(entry.mode), 1998 fullname, 1); 1999 return; 2000 } 2001 if (S_ISDIR(entry.mode)) { 2002 struct tree_desc sub; 2003 struct pbase_tree_cache *tree; 2004 const char *down = name+cmplen+1; 2005 size_t downlen = name_cmp_len(down); 2006 2007 tree = pbase_tree_get(&entry.oid); 2008 if (!tree) 2009 return; 2010 init_tree_desc(&sub, &tree->oid, 2011 tree->tree_data, tree->tree_size); 2012 2013 add_pbase_object(&sub, down, downlen, fullname); 2014 pbase_tree_put(tree); 2015 } 2016 } 2017} 2018 2019static unsigned *done_pbase_paths; 2020static int done_pbase_paths_num; 2021static int done_pbase_paths_alloc; 2022static int done_pbase_path_pos(unsigned hash) 2023{ 2024 int lo = 0; 2025 int hi = done_pbase_paths_num; 2026 while (lo < hi) { 2027 int mi = lo + (hi - lo) / 2; 2028 if (done_pbase_paths[mi] == hash) 2029 return mi; 2030 if (done_pbase_paths[mi] < hash) 2031 hi = mi; 2032 else 2033 lo = mi + 1; 2034 } 2035 return -lo-1; 2036} 2037 2038static int check_pbase_path(unsigned hash) 2039{ 2040 int pos = done_pbase_path_pos(hash); 2041 if (0 <= pos) 2042 return 1; 2043 pos = -pos - 1; 2044 ALLOC_GROW(done_pbase_paths, 2045 done_pbase_paths_num + 1, 2046 done_pbase_paths_alloc); 2047 done_pbase_paths_num++; 2048 if (pos < done_pbase_paths_num) 2049 MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos, 2050 done_pbase_paths_num - pos - 1); 2051 done_pbase_paths[pos] = hash; 2052 return 0; 2053} 2054 2055static void add_preferred_base_object(const char *name) 2056{ 2057 struct pbase_tree *it; 2058 size_t cmplen; 2059 unsigned hash = pack_name_hash_fn(name); 2060 2061 if (!num_preferred_base || check_pbase_path(hash)) 2062 return; 2063 2064 cmplen = name_cmp_len(name); 2065 for (it = pbase_tree; it; it = it->next) { 2066 if (cmplen == 0) { 2067 add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1); 2068 } 2069 else { 2070 struct tree_desc tree; 2071 init_tree_desc(&tree, &it->pcache.oid, 2072 it->pcache.tree_data, it->pcache.tree_size); 2073 add_pbase_object(&tree, name, cmplen, name); 2074 } 2075 } 2076} 2077 2078static void add_preferred_base(struct object_id *oid) 2079{ 2080 struct pbase_tree *it; 2081 void *data; 2082 unsigned long size; 2083 struct object_id tree_oid; 2084 2085 if (window <= num_preferred_base++) 2086 return; 2087 2088 data = odb_read_object_peeled(the_repository->objects, oid, 2089 OBJ_TREE, &size, &tree_oid); 2090 if (!data) 2091 return; 2092 2093 for (it = pbase_tree; it; it = it->next) { 2094 if (oideq(&it->pcache.oid, &tree_oid)) { 2095 free(data); 2096 return; 2097 } 2098 } 2099 2100 CALLOC_ARRAY(it, 1); 2101 it->next = pbase_tree; 2102 pbase_tree = it; 2103 2104 oidcpy(&it->pcache.oid, &tree_oid); 2105 it->pcache.tree_data = data; 2106 it->pcache.tree_size = size; 2107} 2108 2109static void cleanup_preferred_base(void) 2110{ 2111 struct pbase_tree *it; 2112 unsigned i; 2113 2114 it = pbase_tree; 2115 pbase_tree = NULL; 2116 while (it) { 2117 struct pbase_tree *tmp = it; 2118 it = tmp->next; 2119 free(tmp->pcache.tree_data); 2120 free(tmp); 2121 } 2122 2123 for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) { 2124 if (!pbase_tree_cache[i]) 2125 continue; 2126 free(pbase_tree_cache[i]->tree_data); 2127 FREE_AND_NULL(pbase_tree_cache[i]); 2128 } 2129 2130 FREE_AND_NULL(done_pbase_paths); 2131 done_pbase_paths_num = done_pbase_paths_alloc = 0; 2132} 2133 2134/* 2135 * Return 1 iff the object specified by "delta" can be sent 2136 * literally as a delta against the base in "base_sha1". If 2137 * so, then *base_out will point to the entry in our packing 2138 * list, or NULL if we must use the external-base list. 2139 * 2140 * Depth value does not matter - find_deltas() will 2141 * never consider reused delta as the base object to 2142 * deltify other objects against, in order to avoid 2143 * circular deltas. 2144 */ 2145static int can_reuse_delta(const struct object_id *base_oid, 2146 struct object_entry *delta, 2147 struct object_entry **base_out) 2148{ 2149 struct object_entry *base; 2150 2151 /* 2152 * First see if we're already sending the base (or it's explicitly in 2153 * our "excluded" list). 2154 */ 2155 base = packlist_find(&to_pack, base_oid); 2156 if (base) { 2157 if (!in_same_island(&delta->idx.oid, &base->idx.oid)) 2158 return 0; 2159 *base_out = base; 2160 return 1; 2161 } 2162 2163 /* 2164 * Otherwise, reachability bitmaps may tell us if the receiver has it, 2165 * even if it was buried too deep in history to make it into the 2166 * packing list. 2167 */ 2168 if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) { 2169 if (use_delta_islands) { 2170 if (!in_same_island(&delta->idx.oid, base_oid)) 2171 return 0; 2172 } 2173 *base_out = NULL; 2174 return 1; 2175 } 2176 2177 return 0; 2178} 2179 2180static void prefetch_to_pack(uint32_t object_index_start) { 2181 struct oid_array to_fetch = OID_ARRAY_INIT; 2182 uint32_t i; 2183 2184 for (i = object_index_start; i < to_pack.nr_objects; i++) { 2185 struct object_entry *entry = to_pack.objects + i; 2186 2187 if (!odb_read_object_info_extended(the_repository->objects, 2188 &entry->idx.oid, 2189 NULL, 2190 OBJECT_INFO_FOR_PREFETCH)) 2191 continue; 2192 oid_array_append(&to_fetch, &entry->idx.oid); 2193 } 2194 promisor_remote_get_direct(the_repository, 2195 to_fetch.oid, to_fetch.nr); 2196 oid_array_clear(&to_fetch); 2197} 2198 2199static void check_object(struct object_entry *entry, uint32_t object_index) 2200{ 2201 unsigned long canonical_size; 2202 enum object_type type; 2203 struct object_info oi = {.typep = &type, .sizep = &canonical_size}; 2204 2205 if (IN_PACK(entry)) { 2206 struct packed_git *p = IN_PACK(entry); 2207 struct pack_window *w_curs = NULL; 2208 int have_base = 0; 2209 struct object_id base_ref; 2210 struct object_entry *base_entry; 2211 unsigned long used, used_0; 2212 unsigned long avail; 2213 off_t ofs; 2214 unsigned char *buf, c; 2215 enum object_type type; 2216 unsigned long in_pack_size; 2217 2218 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail); 2219 2220 /* 2221 * We want in_pack_type even if we do not reuse delta 2222 * since non-delta representations could still be reused. 2223 */ 2224 used = unpack_object_header_buffer(buf, avail, 2225 &type, 2226 &in_pack_size); 2227 if (used == 0) 2228 goto give_up; 2229 2230 if (type < 0) 2231 BUG("invalid type %d", type); 2232 entry->in_pack_type = type; 2233 2234 /* 2235 * Determine if this is a delta and if so whether we can 2236 * reuse it or not. Otherwise let's find out as cheaply as 2237 * possible what the actual type and size for this object is. 2238 */ 2239 switch (entry->in_pack_type) { 2240 default: 2241 /* Not a delta hence we've already got all we need. */ 2242 oe_set_type(entry, entry->in_pack_type); 2243 SET_SIZE(entry, in_pack_size); 2244 entry->in_pack_header_size = used; 2245 if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB) 2246 goto give_up; 2247 unuse_pack(&w_curs); 2248 return; 2249 case OBJ_REF_DELTA: 2250 if (reuse_delta && !entry->preferred_base) { 2251 oidread(&base_ref, 2252 use_pack(p, &w_curs, 2253 entry->in_pack_offset + used, 2254 NULL), 2255 the_repository->hash_algo); 2256 have_base = 1; 2257 } 2258 entry->in_pack_header_size = used + the_hash_algo->rawsz; 2259 break; 2260 case OBJ_OFS_DELTA: 2261 buf = use_pack(p, &w_curs, 2262 entry->in_pack_offset + used, NULL); 2263 used_0 = 0; 2264 c = buf[used_0++]; 2265 ofs = c & 127; 2266 while (c & 128) { 2267 ofs += 1; 2268 if (!ofs || MSB(ofs, 7)) { 2269 error(_("delta base offset overflow in pack for %s"), 2270 oid_to_hex(&entry->idx.oid)); 2271 goto give_up; 2272 } 2273 c = buf[used_0++]; 2274 ofs = (ofs << 7) + (c & 127); 2275 } 2276 ofs = entry->in_pack_offset - ofs; 2277 if (ofs <= 0 || ofs >= entry->in_pack_offset) { 2278 error(_("delta base offset out of bound for %s"), 2279 oid_to_hex(&entry->idx.oid)); 2280 goto give_up; 2281 } 2282 if (reuse_delta && !entry->preferred_base) { 2283 uint32_t pos; 2284 if (offset_to_pack_pos(p, ofs, &pos) < 0) 2285 goto give_up; 2286 if (!nth_packed_object_id(&base_ref, p, 2287 pack_pos_to_index(p, pos))) 2288 have_base = 1; 2289 } 2290 entry->in_pack_header_size = used + used_0; 2291 break; 2292 } 2293 2294 if (have_base && 2295 can_reuse_delta(&base_ref, entry, &base_entry)) { 2296 oe_set_type(entry, entry->in_pack_type); 2297 SET_SIZE(entry, in_pack_size); /* delta size */ 2298 SET_DELTA_SIZE(entry, in_pack_size); 2299 2300 if (base_entry) { 2301 SET_DELTA(entry, base_entry); 2302 entry->delta_sibling_idx = base_entry->delta_child_idx; 2303 SET_DELTA_CHILD(base_entry, entry); 2304 } else { 2305 SET_DELTA_EXT(entry, &base_ref); 2306 } 2307 2308 unuse_pack(&w_curs); 2309 return; 2310 } 2311 2312 if (oe_type(entry)) { 2313 off_t delta_pos; 2314 2315 /* 2316 * This must be a delta and we already know what the 2317 * final object type is. Let's extract the actual 2318 * object size from the delta header. 2319 */ 2320 delta_pos = entry->in_pack_offset + entry->in_pack_header_size; 2321 canonical_size = get_size_from_delta(p, &w_curs, delta_pos); 2322 if (canonical_size == 0) 2323 goto give_up; 2324 SET_SIZE(entry, canonical_size); 2325 unuse_pack(&w_curs); 2326 return; 2327 } 2328 2329 /* 2330 * No choice but to fall back to the recursive delta walk 2331 * with odb_read_object_info() to find about the object type 2332 * at this point... 2333 */ 2334 give_up: 2335 unuse_pack(&w_curs); 2336 } 2337 2338 if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi, 2339 OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) { 2340 if (repo_has_promisor_remote(the_repository)) { 2341 prefetch_to_pack(object_index); 2342 if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi, 2343 OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) 2344 type = -1; 2345 } else { 2346 type = -1; 2347 } 2348 } 2349 oe_set_type(entry, type); 2350 if (entry->type_valid) { 2351 SET_SIZE(entry, canonical_size); 2352 } else { 2353 /* 2354 * Bad object type is checked in prepare_pack(). This is 2355 * to permit a missing preferred base object to be ignored 2356 * as a preferred base. Doing so can result in a larger 2357 * pack file, but the transfer will still take place. 2358 */ 2359 } 2360} 2361 2362static int pack_offset_sort(const void *_a, const void *_b) 2363{ 2364 const struct object_entry *a = *(struct object_entry **)_a; 2365 const struct object_entry *b = *(struct object_entry **)_b; 2366 const struct packed_git *a_in_pack = IN_PACK(a); 2367 const struct packed_git *b_in_pack = IN_PACK(b); 2368 2369 /* avoid filesystem trashing with loose objects */ 2370 if (!a_in_pack && !b_in_pack) 2371 return oidcmp(&a->idx.oid, &b->idx.oid); 2372 2373 if (a_in_pack < b_in_pack) 2374 return -1; 2375 if (a_in_pack > b_in_pack) 2376 return 1; 2377 return a->in_pack_offset < b->in_pack_offset ? -1 : 2378 (a->in_pack_offset > b->in_pack_offset); 2379} 2380 2381/* 2382 * Drop an on-disk delta we were planning to reuse. Naively, this would 2383 * just involve blanking out the "delta" field, but we have to deal 2384 * with some extra book-keeping: 2385 * 2386 * 1. Removing ourselves from the delta_sibling linked list. 2387 * 2388 * 2. Updating our size/type to the non-delta representation. These were 2389 * either not recorded initially (size) or overwritten with the delta type 2390 * (type) when check_object() decided to reuse the delta. 2391 * 2392 * 3. Resetting our delta depth, as we are now a base object. 2393 */ 2394static void drop_reused_delta(struct object_entry *entry) 2395{ 2396 unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx; 2397 struct object_info oi = OBJECT_INFO_INIT; 2398 enum object_type type; 2399 unsigned long size; 2400 2401 while (*idx) { 2402 struct object_entry *oe = &to_pack.objects[*idx - 1]; 2403 2404 if (oe == entry) 2405 *idx = oe->delta_sibling_idx; 2406 else 2407 idx = &oe->delta_sibling_idx; 2408 } 2409 SET_DELTA(entry, NULL); 2410 entry->depth = 0; 2411 2412 oi.sizep = &size; 2413 oi.typep = &type; 2414 if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { 2415 /* 2416 * We failed to get the info from this pack for some reason; 2417 * fall back to odb_read_object_info, which may find another copy. 2418 * And if that fails, the error will be recorded in oe_type(entry) 2419 * and dealt with in prepare_pack(). 2420 */ 2421 oe_set_type(entry, 2422 odb_read_object_info(the_repository->objects, 2423 &entry->idx.oid, &size)); 2424 } else { 2425 oe_set_type(entry, type); 2426 } 2427 SET_SIZE(entry, size); 2428} 2429 2430/* 2431 * Follow the chain of deltas from this entry onward, throwing away any links 2432 * that cause us to hit a cycle (as determined by the DFS state flags in 2433 * the entries). 2434 * 2435 * We also detect too-long reused chains that would violate our --depth 2436 * limit. 2437 */ 2438static void break_delta_chains(struct object_entry *entry) 2439{ 2440 /* 2441 * The actual depth of each object we will write is stored as an int, 2442 * as it cannot exceed our int "depth" limit. But before we break 2443 * changes based no that limit, we may potentially go as deep as the 2444 * number of objects, which is elsewhere bounded to a uint32_t. 2445 */ 2446 uint32_t total_depth; 2447 struct object_entry *cur, *next; 2448 2449 for (cur = entry, total_depth = 0; 2450 cur; 2451 cur = DELTA(cur), total_depth++) { 2452 if (cur->dfs_state == DFS_DONE) { 2453 /* 2454 * We've already seen this object and know it isn't 2455 * part of a cycle. We do need to append its depth 2456 * to our count. 2457 */ 2458 total_depth += cur->depth; 2459 break; 2460 } 2461 2462 /* 2463 * We break cycles before looping, so an ACTIVE state (or any 2464 * other cruft which made its way into the state variable) 2465 * is a bug. 2466 */ 2467 if (cur->dfs_state != DFS_NONE) 2468 BUG("confusing delta dfs state in first pass: %d", 2469 cur->dfs_state); 2470 2471 /* 2472 * Now we know this is the first time we've seen the object. If 2473 * it's not a delta, we're done traversing, but we'll mark it 2474 * done to save time on future traversals. 2475 */ 2476 if (!DELTA(cur)) { 2477 cur->dfs_state = DFS_DONE; 2478 break; 2479 } 2480 2481 /* 2482 * Mark ourselves as active and see if the next step causes 2483 * us to cycle to another active object. It's important to do 2484 * this _before_ we loop, because it impacts where we make the 2485 * cut, and thus how our total_depth counter works. 2486 * E.g., We may see a partial loop like: 2487 * 2488 * A -> B -> C -> D -> B 2489 * 2490 * Cutting B->C breaks the cycle. But now the depth of A is 2491 * only 1, and our total_depth counter is at 3. The size of the 2492 * error is always one less than the size of the cycle we 2493 * broke. Commits C and D were "lost" from A's chain. 2494 * 2495 * If we instead cut D->B, then the depth of A is correct at 3. 2496 * We keep all commits in the chain that we examined. 2497 */ 2498 cur->dfs_state = DFS_ACTIVE; 2499 if (DELTA(cur)->dfs_state == DFS_ACTIVE) { 2500 drop_reused_delta(cur); 2501 cur->dfs_state = DFS_DONE; 2502 break; 2503 } 2504 } 2505 2506 /* 2507 * And now that we've gone all the way to the bottom of the chain, we 2508 * need to clear the active flags and set the depth fields as 2509 * appropriate. Unlike the loop above, which can quit when it drops a 2510 * delta, we need to keep going to look for more depth cuts. So we need 2511 * an extra "next" pointer to keep going after we reset cur->delta. 2512 */ 2513 for (cur = entry; cur; cur = next) { 2514 next = DELTA(cur); 2515 2516 /* 2517 * We should have a chain of zero or more ACTIVE states down to 2518 * a final DONE. We can quit after the DONE, because either it 2519 * has no bases, or we've already handled them in a previous 2520 * call. 2521 */ 2522 if (cur->dfs_state == DFS_DONE) 2523 break; 2524 else if (cur->dfs_state != DFS_ACTIVE) 2525 BUG("confusing delta dfs state in second pass: %d", 2526 cur->dfs_state); 2527 2528 /* 2529 * If the total_depth is more than depth, then we need to snip 2530 * the chain into two or more smaller chains that don't exceed 2531 * the maximum depth. Most of the resulting chains will contain 2532 * (depth + 1) entries (i.e., depth deltas plus one base), and 2533 * the last chain (i.e., the one containing entry) will contain 2534 * whatever entries are left over, namely 2535 * (total_depth % (depth + 1)) of them. 2536 * 2537 * Since we are iterating towards decreasing depth, we need to 2538 * decrement total_depth as we go, and we need to write to the 2539 * entry what its final depth will be after all of the 2540 * snipping. Since we're snipping into chains of length (depth 2541 * + 1) entries, the final depth of an entry will be its 2542 * original depth modulo (depth + 1). Any time we encounter an 2543 * entry whose final depth is supposed to be zero, we snip it 2544 * from its delta base, thereby making it so. 2545 */ 2546 cur->depth = (total_depth--) % (depth + 1); 2547 if (!cur->depth) 2548 drop_reused_delta(cur); 2549 2550 cur->dfs_state = DFS_DONE; 2551 } 2552} 2553 2554static void get_object_details(void) 2555{ 2556 uint32_t i; 2557 struct object_entry **sorted_by_offset; 2558 2559 if (progress) 2560 progress_state = start_progress(the_repository, 2561 _("Counting objects"), 2562 to_pack.nr_objects); 2563 2564 CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects); 2565 for (i = 0; i < to_pack.nr_objects; i++) 2566 sorted_by_offset[i] = to_pack.objects + i; 2567 QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort); 2568 2569 for (i = 0; i < to_pack.nr_objects; i++) { 2570 struct object_entry *entry = sorted_by_offset[i]; 2571 check_object(entry, i); 2572 if (entry->type_valid && 2573 oe_size_greater_than(&to_pack, entry, 2574 repo_settings_get_big_file_threshold(the_repository))) 2575 entry->no_try_delta = 1; 2576 display_progress(progress_state, i + 1); 2577 } 2578 stop_progress(&progress_state); 2579 2580 /* 2581 * This must happen in a second pass, since we rely on the delta 2582 * information for the whole list being completed. 2583 */ 2584 for (i = 0; i < to_pack.nr_objects; i++) 2585 break_delta_chains(&to_pack.objects[i]); 2586 2587 free(sorted_by_offset); 2588} 2589 2590/* 2591 * We search for deltas in a list sorted by type, by filename hash, and then 2592 * by size, so that we see progressively smaller and smaller files. 2593 * That's because we prefer deltas to be from the bigger file 2594 * to the smaller -- deletes are potentially cheaper, but perhaps 2595 * more importantly, the bigger file is likely the more recent 2596 * one. The deepest deltas are therefore the oldest objects which are 2597 * less susceptible to be accessed often. 2598 */ 2599static int type_size_sort(const void *_a, const void *_b) 2600{ 2601 const struct object_entry *a = *(struct object_entry **)_a; 2602 const struct object_entry *b = *(struct object_entry **)_b; 2603 const enum object_type a_type = oe_type(a); 2604 const enum object_type b_type = oe_type(b); 2605 const unsigned long a_size = SIZE(a); 2606 const unsigned long b_size = SIZE(b); 2607 2608 if (a_type > b_type) 2609 return -1; 2610 if (a_type < b_type) 2611 return 1; 2612 if (a->hash > b->hash) 2613 return -1; 2614 if (a->hash < b->hash) 2615 return 1; 2616 if (a->preferred_base > b->preferred_base) 2617 return -1; 2618 if (a->preferred_base < b->preferred_base) 2619 return 1; 2620 if (use_delta_islands) { 2621 const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid); 2622 if (island_cmp) 2623 return island_cmp; 2624 } 2625 if (a_size > b_size) 2626 return -1; 2627 if (a_size < b_size) 2628 return 1; 2629 return a < b ? -1 : (a > b); /* newest first */ 2630} 2631 2632struct unpacked { 2633 struct object_entry *entry; 2634 void *data; 2635 struct delta_index *index; 2636 unsigned depth; 2637}; 2638 2639static int delta_cacheable(unsigned long src_size, unsigned long trg_size, 2640 unsigned long delta_size) 2641{ 2642 if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size) 2643 return 0; 2644 2645 if (delta_size < cache_max_small_delta_size) 2646 return 1; 2647 2648 /* cache delta, if objects are large enough compared to delta size */ 2649 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10)) 2650 return 1; 2651 2652 return 0; 2653} 2654 2655/* Protect delta_cache_size */ 2656static pthread_mutex_t cache_mutex; 2657#define cache_lock() pthread_mutex_lock(&cache_mutex) 2658#define cache_unlock() pthread_mutex_unlock(&cache_mutex) 2659 2660/* 2661 * Protect object list partitioning (e.g. struct thread_param) and 2662 * progress_state 2663 */ 2664static pthread_mutex_t progress_mutex; 2665#define progress_lock() pthread_mutex_lock(&progress_mutex) 2666#define progress_unlock() pthread_mutex_unlock(&progress_mutex) 2667 2668/* 2669 * Access to struct object_entry is unprotected since each thread owns 2670 * a portion of the main object list. Just don't access object entries 2671 * ahead in the list because they can be stolen and would need 2672 * progress_mutex for protection. 2673 */ 2674 2675static inline int oe_size_less_than(struct packing_data *pack, 2676 const struct object_entry *lhs, 2677 unsigned long rhs) 2678{ 2679 if (lhs->size_valid) 2680 return lhs->size_ < rhs; 2681 if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */ 2682 return 0; 2683 return oe_get_size_slow(pack, lhs) < rhs; 2684} 2685 2686static inline void oe_set_tree_depth(struct packing_data *pack, 2687 struct object_entry *e, 2688 unsigned int tree_depth) 2689{ 2690 if (!pack->tree_depth) 2691 CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc); 2692 pack->tree_depth[e - pack->objects] = tree_depth; 2693} 2694 2695/* 2696 * Return the size of the object without doing any delta 2697 * reconstruction (so non-deltas are true object sizes, but deltas 2698 * return the size of the delta data). 2699 */ 2700unsigned long oe_get_size_slow(struct packing_data *pack, 2701 const struct object_entry *e) 2702{ 2703 struct packed_git *p; 2704 struct pack_window *w_curs; 2705 unsigned char *buf; 2706 enum object_type type; 2707 unsigned long used, avail, size; 2708 2709 if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { 2710 packing_data_lock(&to_pack); 2711 if (odb_read_object_info(the_repository->objects, 2712 &e->idx.oid, &size) < 0) 2713 die(_("unable to get size of %s"), 2714 oid_to_hex(&e->idx.oid)); 2715 packing_data_unlock(&to_pack); 2716 return size; 2717 } 2718 2719 p = oe_in_pack(pack, e); 2720 if (!p) 2721 BUG("when e->type is a delta, it must belong to a pack"); 2722 2723 packing_data_lock(&to_pack); 2724 w_curs = NULL; 2725 buf = use_pack(p, &w_curs, e->in_pack_offset, &avail); 2726 used = unpack_object_header_buffer(buf, avail, &type, &size); 2727 if (used == 0) 2728 die(_("unable to parse object header of %s"), 2729 oid_to_hex(&e->idx.oid)); 2730 2731 unuse_pack(&w_curs); 2732 packing_data_unlock(&to_pack); 2733 return size; 2734} 2735 2736static int try_delta(struct unpacked *trg, struct unpacked *src, 2737 unsigned max_depth, unsigned long *mem_usage) 2738{ 2739 struct object_entry *trg_entry = trg->entry; 2740 struct object_entry *src_entry = src->entry; 2741 unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz; 2742 unsigned ref_depth; 2743 enum object_type type; 2744 void *delta_buf; 2745 2746 /* Don't bother doing diffs between different types */ 2747 if (oe_type(trg_entry) != oe_type(src_entry)) 2748 return -1; 2749 2750 /* 2751 * We do not bother to try a delta that we discarded on an 2752 * earlier try, but only when reusing delta data. Note that 2753 * src_entry that is marked as the preferred_base should always 2754 * be considered, as even if we produce a suboptimal delta against 2755 * it, we will still save the transfer cost, as we already know 2756 * the other side has it and we won't send src_entry at all. 2757 */ 2758 if (reuse_delta && IN_PACK(trg_entry) && 2759 IN_PACK(trg_entry) == IN_PACK(src_entry) && 2760 !src_entry->preferred_base && 2761 trg_entry->in_pack_type != OBJ_REF_DELTA && 2762 trg_entry->in_pack_type != OBJ_OFS_DELTA) 2763 return 0; 2764 2765 /* Let's not bust the allowed depth. */ 2766 if (src->depth >= max_depth) 2767 return 0; 2768 2769 /* Now some size filtering heuristics. */ 2770 trg_size = SIZE(trg_entry); 2771 if (!DELTA(trg_entry)) { 2772 max_size = trg_size/2 - the_hash_algo->rawsz; 2773 ref_depth = 1; 2774 } else { 2775 max_size = DELTA_SIZE(trg_entry); 2776 ref_depth = trg->depth; 2777 } 2778 max_size = (uint64_t)max_size * (max_depth - src->depth) / 2779 (max_depth - ref_depth + 1); 2780 if (max_size == 0) 2781 return 0; 2782 src_size = SIZE(src_entry); 2783 sizediff = src_size < trg_size ? trg_size - src_size : 0; 2784 if (sizediff >= max_size) 2785 return 0; 2786 if (trg_size < src_size / 32) 2787 return 0; 2788 2789 if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid)) 2790 return 0; 2791 2792 /* Load data if not already done */ 2793 if (!trg->data) { 2794 packing_data_lock(&to_pack); 2795 trg->data = odb_read_object(the_repository->objects, 2796 &trg_entry->idx.oid, &type, 2797 &sz); 2798 packing_data_unlock(&to_pack); 2799 if (!trg->data) 2800 die(_("object %s cannot be read"), 2801 oid_to_hex(&trg_entry->idx.oid)); 2802 if (sz != trg_size) 2803 die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"), 2804 oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz, 2805 (uintmax_t)trg_size); 2806 *mem_usage += sz; 2807 } 2808 if (!src->data) { 2809 packing_data_lock(&to_pack); 2810 src->data = odb_read_object(the_repository->objects, 2811 &src_entry->idx.oid, &type, 2812 &sz); 2813 packing_data_unlock(&to_pack); 2814 if (!src->data) { 2815 if (src_entry->preferred_base) { 2816 static int warned = 0; 2817 if (!warned++) 2818 warning(_("object %s cannot be read"), 2819 oid_to_hex(&src_entry->idx.oid)); 2820 /* 2821 * Those objects are not included in the 2822 * resulting pack. Be resilient and ignore 2823 * them if they can't be read, in case the 2824 * pack could be created nevertheless. 2825 */ 2826 return 0; 2827 } 2828 die(_("object %s cannot be read"), 2829 oid_to_hex(&src_entry->idx.oid)); 2830 } 2831 if (sz != src_size) 2832 die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"), 2833 oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz, 2834 (uintmax_t)src_size); 2835 *mem_usage += sz; 2836 } 2837 if (!src->index) { 2838 src->index = create_delta_index(src->data, src_size); 2839 if (!src->index) { 2840 static int warned = 0; 2841 if (!warned++) 2842 warning(_("suboptimal pack - out of memory")); 2843 return 0; 2844 } 2845 *mem_usage += sizeof_delta_index(src->index); 2846 } 2847 2848 delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size); 2849 if (!delta_buf) 2850 return 0; 2851 2852 if (DELTA(trg_entry)) { 2853 /* Prefer only shallower same-sized deltas. */ 2854 if (delta_size == DELTA_SIZE(trg_entry) && 2855 src->depth + 1 >= trg->depth) { 2856 free(delta_buf); 2857 return 0; 2858 } 2859 } 2860 2861 /* 2862 * Handle memory allocation outside of the cache 2863 * accounting lock. Compiler will optimize the strangeness 2864 * away when NO_PTHREADS is defined. 2865 */ 2866 free(trg_entry->delta_data); 2867 cache_lock(); 2868 if (trg_entry->delta_data) { 2869 delta_cache_size -= DELTA_SIZE(trg_entry); 2870 trg_entry->delta_data = NULL; 2871 } 2872 if (delta_cacheable(src_size, trg_size, delta_size)) { 2873 delta_cache_size += delta_size; 2874 cache_unlock(); 2875 trg_entry->delta_data = xrealloc(delta_buf, delta_size); 2876 } else { 2877 cache_unlock(); 2878 free(delta_buf); 2879 } 2880 2881 SET_DELTA(trg_entry, src_entry); 2882 SET_DELTA_SIZE(trg_entry, delta_size); 2883 trg->depth = src->depth + 1; 2884 2885 return 1; 2886} 2887 2888static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) 2889{ 2890 struct object_entry *child = DELTA_CHILD(me); 2891 unsigned int m = n; 2892 while (child) { 2893 const unsigned int c = check_delta_limit(child, n + 1); 2894 if (m < c) 2895 m = c; 2896 child = DELTA_SIBLING(child); 2897 } 2898 return m; 2899} 2900 2901static unsigned long free_unpacked(struct unpacked *n) 2902{ 2903 unsigned long freed_mem = sizeof_delta_index(n->index); 2904 free_delta_index(n->index); 2905 n->index = NULL; 2906 if (n->data) { 2907 freed_mem += SIZE(n->entry); 2908 FREE_AND_NULL(n->data); 2909 } 2910 n->entry = NULL; 2911 n->depth = 0; 2912 return freed_mem; 2913} 2914 2915static void find_deltas(struct object_entry **list, unsigned *list_size, 2916 int window, int depth, unsigned *processed) 2917{ 2918 uint32_t i, idx = 0, count = 0; 2919 struct unpacked *array; 2920 unsigned long mem_usage = 0; 2921 2922 CALLOC_ARRAY(array, window); 2923 2924 for (;;) { 2925 struct object_entry *entry; 2926 struct unpacked *n = array + idx; 2927 int j, max_depth, best_base = -1; 2928 2929 progress_lock(); 2930 if (!*list_size) { 2931 progress_unlock(); 2932 break; 2933 } 2934 entry = *list++; 2935 (*list_size)--; 2936 if (!entry->preferred_base) { 2937 (*processed)++; 2938 display_progress(progress_state, *processed); 2939 } 2940 progress_unlock(); 2941 2942 mem_usage -= free_unpacked(n); 2943 n->entry = entry; 2944 2945 while (window_memory_limit && 2946 mem_usage > window_memory_limit && 2947 count > 1) { 2948 const uint32_t tail = (idx + window - count) % window; 2949 mem_usage -= free_unpacked(array + tail); 2950 count--; 2951 } 2952 2953 /* We do not compute delta to *create* objects we are not 2954 * going to pack. 2955 */ 2956 if (entry->preferred_base) 2957 goto next; 2958 2959 /* 2960 * If the current object is at pack edge, take the depth the 2961 * objects that depend on the current object into account 2962 * otherwise they would become too deep. 2963 */ 2964 max_depth = depth; 2965 if (DELTA_CHILD(entry)) { 2966 max_depth -= check_delta_limit(entry, 0); 2967 if (max_depth <= 0) 2968 goto next; 2969 } 2970 2971 j = window; 2972 while (--j > 0) { 2973 int ret; 2974 uint32_t other_idx = idx + j; 2975 struct unpacked *m; 2976 if (other_idx >= window) 2977 other_idx -= window; 2978 m = array + other_idx; 2979 if (!m->entry) 2980 break; 2981 ret = try_delta(n, m, max_depth, &mem_usage); 2982 if (ret < 0) 2983 break; 2984 else if (ret > 0) 2985 best_base = other_idx; 2986 } 2987 2988 /* 2989 * If we decided to cache the delta data, then it is best 2990 * to compress it right away. First because we have to do 2991 * it anyway, and doing it here while we're threaded will 2992 * save a lot of time in the non threaded write phase, 2993 * as well as allow for caching more deltas within 2994 * the same cache size limit. 2995 * ... 2996 * But only if not writing to stdout, since in that case 2997 * the network is most likely throttling writes anyway, 2998 * and therefore it is best to go to the write phase ASAP 2999 * instead, as we can afford spending more time compressing 3000 * between writes at that moment. 3001 */ 3002 if (entry->delta_data && !pack_to_stdout) { 3003 unsigned long size; 3004 3005 size = do_compress(&entry->delta_data, DELTA_SIZE(entry)); 3006 if (size < (1U << OE_Z_DELTA_BITS)) { 3007 entry->z_delta_size = size; 3008 cache_lock(); 3009 delta_cache_size -= DELTA_SIZE(entry); 3010 delta_cache_size += entry->z_delta_size; 3011 cache_unlock(); 3012 } else { 3013 FREE_AND_NULL(entry->delta_data); 3014 entry->z_delta_size = 0; 3015 } 3016 } 3017 3018 /* if we made n a delta, and if n is already at max 3019 * depth, leaving it in the window is pointless. we 3020 * should evict it first. 3021 */ 3022 if (DELTA(entry) && max_depth <= n->depth) 3023 continue; 3024 3025 /* 3026 * Move the best delta base up in the window, after the 3027 * currently deltified object, to keep it longer. It will 3028 * be the first base object to be attempted next. 3029 */ 3030 if (DELTA(entry)) { 3031 struct unpacked swap = array[best_base]; 3032 int dist = (window + idx - best_base) % window; 3033 int dst = best_base; 3034 while (dist--) { 3035 int src = (dst + 1) % window; 3036 array[dst] = array[src]; 3037 dst = src; 3038 } 3039 array[dst] = swap; 3040 } 3041 3042 next: 3043 idx++; 3044 if (count + 1 < window) 3045 count++; 3046 if (idx >= window) 3047 idx = 0; 3048 } 3049 3050 for (i = 0; i < window; ++i) { 3051 free_delta_index(array[i].index); 3052 free(array[i].data); 3053 } 3054 free(array); 3055} 3056 3057/* 3058 * The main object list is split into smaller lists, each is handed to 3059 * one worker. 3060 * 3061 * The main thread waits on the condition that (at least) one of the workers 3062 * has stopped working (which is indicated in the .working member of 3063 * struct thread_params). 3064 * 3065 * When a work thread has completed its work, it sets .working to 0 and 3066 * signals the main thread and waits on the condition that .data_ready 3067 * becomes 1. 3068 * 3069 * The main thread steals half of the work from the worker that has 3070 * most work left to hand it to the idle worker. 3071 */ 3072 3073struct thread_params { 3074 pthread_t thread; 3075 struct object_entry **list; 3076 struct packing_region *regions; 3077 unsigned list_size; 3078 unsigned remaining; 3079 int window; 3080 int depth; 3081 int working; 3082 int data_ready; 3083 pthread_mutex_t mutex; 3084 pthread_cond_t cond; 3085 unsigned *processed; 3086}; 3087 3088static pthread_cond_t progress_cond; 3089 3090/* 3091 * Mutex and conditional variable can't be statically-initialized on Windows. 3092 */ 3093static void init_threaded_search(void) 3094{ 3095 pthread_mutex_init(&cache_mutex, NULL); 3096 pthread_mutex_init(&progress_mutex, NULL); 3097 pthread_cond_init(&progress_cond, NULL); 3098} 3099 3100static void cleanup_threaded_search(void) 3101{ 3102 pthread_cond_destroy(&progress_cond); 3103 pthread_mutex_destroy(&cache_mutex); 3104 pthread_mutex_destroy(&progress_mutex); 3105} 3106 3107static void *threaded_find_deltas(void *arg) 3108{ 3109 struct thread_params *me = arg; 3110 3111 progress_lock(); 3112 while (me->remaining) { 3113 progress_unlock(); 3114 3115 find_deltas(me->list, &me->remaining, 3116 me->window, me->depth, me->processed); 3117 3118 progress_lock(); 3119 me->working = 0; 3120 pthread_cond_signal(&progress_cond); 3121 progress_unlock(); 3122 3123 /* 3124 * We must not set ->data_ready before we wait on the 3125 * condition because the main thread may have set it to 1 3126 * before we get here. In order to be sure that new 3127 * work is available if we see 1 in ->data_ready, it 3128 * was initialized to 0 before this thread was spawned 3129 * and we reset it to 0 right away. 3130 */ 3131 pthread_mutex_lock(&me->mutex); 3132 while (!me->data_ready) 3133 pthread_cond_wait(&me->cond, &me->mutex); 3134 me->data_ready = 0; 3135 pthread_mutex_unlock(&me->mutex); 3136 3137 progress_lock(); 3138 } 3139 progress_unlock(); 3140 /* leave ->working 1 so that this doesn't get more work assigned */ 3141 return NULL; 3142} 3143 3144static void ll_find_deltas(struct object_entry **list, unsigned list_size, 3145 int window, int depth, unsigned *processed) 3146{ 3147 struct thread_params *p; 3148 int i, ret, active_threads = 0; 3149 3150 init_threaded_search(); 3151 3152 if (delta_search_threads <= 1) { 3153 find_deltas(list, &list_size, window, depth, processed); 3154 cleanup_threaded_search(); 3155 return; 3156 } 3157 if (progress > pack_to_stdout) 3158 fprintf_ln(stderr, _("Delta compression using up to %d threads"), 3159 delta_search_threads); 3160 CALLOC_ARRAY(p, delta_search_threads); 3161 3162 /* Partition the work amongst work threads. */ 3163 for (i = 0; i < delta_search_threads; i++) { 3164 unsigned sub_size = list_size / (delta_search_threads - i); 3165 3166 /* don't use too small segments or no deltas will be found */ 3167 if (sub_size < 2*window && i+1 < delta_search_threads) 3168 sub_size = 0; 3169 3170 p[i].window = window; 3171 p[i].depth = depth; 3172 p[i].processed = processed; 3173 p[i].working = 1; 3174 p[i].data_ready = 0; 3175 3176 /* try to split chunks on "path" boundaries */ 3177 while (sub_size && sub_size < list_size && 3178 list[sub_size]->hash && 3179 list[sub_size]->hash == list[sub_size-1]->hash) 3180 sub_size++; 3181 3182 p[i].list = list; 3183 p[i].list_size = sub_size; 3184 p[i].remaining = sub_size; 3185 3186 list += sub_size; 3187 list_size -= sub_size; 3188 } 3189 3190 /* Start work threads. */ 3191 for (i = 0; i < delta_search_threads; i++) { 3192 if (!p[i].list_size) 3193 continue; 3194 pthread_mutex_init(&p[i].mutex, NULL); 3195 pthread_cond_init(&p[i].cond, NULL); 3196 ret = pthread_create(&p[i].thread, NULL, 3197 threaded_find_deltas, &p[i]); 3198 if (ret) 3199 die(_("unable to create thread: %s"), strerror(ret)); 3200 active_threads++; 3201 } 3202 3203 /* 3204 * Now let's wait for work completion. Each time a thread is done 3205 * with its work, we steal half of the remaining work from the 3206 * thread with the largest number of unprocessed objects and give 3207 * it to that newly idle thread. This ensure good load balancing 3208 * until the remaining object list segments are simply too short 3209 * to be worth splitting anymore. 3210 */ 3211 while (active_threads) { 3212 struct thread_params *target = NULL; 3213 struct thread_params *victim = NULL; 3214 unsigned sub_size = 0; 3215 3216 progress_lock(); 3217 for (;;) { 3218 for (i = 0; !target && i < delta_search_threads; i++) 3219 if (!p[i].working) 3220 target = &p[i]; 3221 if (target) 3222 break; 3223 pthread_cond_wait(&progress_cond, &progress_mutex); 3224 } 3225 3226 for (i = 0; i < delta_search_threads; i++) 3227 if (p[i].remaining > 2*window && 3228 (!victim || victim->remaining < p[i].remaining)) 3229 victim = &p[i]; 3230 if (victim) { 3231 sub_size = victim->remaining / 2; 3232 list = victim->list + victim->list_size - sub_size; 3233 while (sub_size && list[0]->hash && 3234 list[0]->hash == list[-1]->hash) { 3235 list++; 3236 sub_size--; 3237 } 3238 if (!sub_size) { 3239 /* 3240 * It is possible for some "paths" to have 3241 * so many objects that no hash boundary 3242 * might be found. Let's just steal the 3243 * exact half in that case. 3244 */ 3245 sub_size = victim->remaining / 2; 3246 list -= sub_size; 3247 } 3248 target->list = list; 3249 victim->list_size -= sub_size; 3250 victim->remaining -= sub_size; 3251 } 3252 target->list_size = sub_size; 3253 target->remaining = sub_size; 3254 target->working = 1; 3255 progress_unlock(); 3256 3257 pthread_mutex_lock(&target->mutex); 3258 target->data_ready = 1; 3259 pthread_cond_signal(&target->cond); 3260 pthread_mutex_unlock(&target->mutex); 3261 3262 if (!sub_size) { 3263 pthread_join(target->thread, NULL); 3264 pthread_cond_destroy(&target->cond); 3265 pthread_mutex_destroy(&target->mutex); 3266 active_threads--; 3267 } 3268 } 3269 cleanup_threaded_search(); 3270 free(p); 3271} 3272 3273static int obj_is_packed(const struct object_id *oid) 3274{ 3275 return packlist_find(&to_pack, oid) || 3276 (reuse_packfile_bitmap && 3277 bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid)); 3278} 3279 3280static void add_tag_chain(const struct object_id *oid) 3281{ 3282 struct tag *tag; 3283 3284 /* 3285 * We catch duplicates already in add_object_entry(), but we'd 3286 * prefer to do this extra check to avoid having to parse the 3287 * tag at all if we already know that it's being packed (e.g., if 3288 * it was included via bitmaps, we would not have parsed it 3289 * previously). 3290 */ 3291 if (obj_is_packed(oid)) 3292 return; 3293 3294 tag = lookup_tag(the_repository, oid); 3295 while (1) { 3296 if (!tag || parse_tag(tag) || !tag->tagged) 3297 die(_("unable to pack objects reachable from tag %s"), 3298 oid_to_hex(oid)); 3299 3300 add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0); 3301 3302 if (tag->tagged->type != OBJ_TAG) 3303 return; 3304 3305 tag = (struct tag *)tag->tagged; 3306 } 3307} 3308 3309static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid, 3310 int flag UNUSED, void *cb_data UNUSED) 3311{ 3312 struct object_id peeled; 3313 3314 if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled)) 3315 add_tag_chain(oid); 3316 return 0; 3317} 3318 3319static int should_attempt_deltas(struct object_entry *entry) 3320{ 3321 if (DELTA(entry)) 3322 /* This happens if we decided to reuse existing 3323 * delta from a pack. "reuse_delta &&" is implied. 3324 */ 3325 return 0; 3326 3327 if (!entry->type_valid || 3328 oe_size_less_than(&to_pack, entry, 50)) 3329 return 0; 3330 3331 if (entry->no_try_delta) 3332 return 0; 3333 3334 if (!entry->preferred_base) { 3335 if (oe_type(entry) < 0) 3336 die(_("unable to get type of object %s"), 3337 oid_to_hex(&entry->idx.oid)); 3338 } else if (oe_type(entry) < 0) { 3339 /* 3340 * This object is not found, but we 3341 * don't have to include it anyway. 3342 */ 3343 return 0; 3344 } 3345 3346 return 1; 3347} 3348 3349static void find_deltas_for_region(struct object_entry *list, 3350 struct packing_region *region, 3351 unsigned int *processed) 3352{ 3353 struct object_entry **delta_list; 3354 unsigned int delta_list_nr = 0; 3355 3356 ALLOC_ARRAY(delta_list, region->nr); 3357 for (size_t i = 0; i < region->nr; i++) { 3358 struct object_entry *entry = list + region->start + i; 3359 if (should_attempt_deltas(entry)) 3360 delta_list[delta_list_nr++] = entry; 3361 } 3362 3363 QSORT(delta_list, delta_list_nr, type_size_sort); 3364 find_deltas(delta_list, &delta_list_nr, window, depth, processed); 3365 free(delta_list); 3366} 3367 3368static void find_deltas_by_region(struct object_entry *list, 3369 struct packing_region *regions, 3370 size_t start, size_t nr) 3371{ 3372 unsigned int processed = 0; 3373 size_t progress_nr; 3374 3375 if (!nr) 3376 return; 3377 3378 progress_nr = regions[nr - 1].start + regions[nr - 1].nr; 3379 3380 if (progress) 3381 progress_state = start_progress(the_repository, 3382 _("Compressing objects by path"), 3383 progress_nr); 3384 3385 while (nr--) 3386 find_deltas_for_region(list, 3387 &regions[start++], 3388 &processed); 3389 3390 display_progress(progress_state, progress_nr); 3391 stop_progress(&progress_state); 3392} 3393 3394static void *threaded_find_deltas_by_path(void *arg) 3395{ 3396 struct thread_params *me = arg; 3397 3398 progress_lock(); 3399 while (me->remaining) { 3400 while (me->remaining) { 3401 progress_unlock(); 3402 find_deltas_for_region(to_pack.objects, 3403 me->regions, 3404 me->processed); 3405 progress_lock(); 3406 me->remaining--; 3407 me->regions++; 3408 } 3409 3410 me->working = 0; 3411 pthread_cond_signal(&progress_cond); 3412 progress_unlock(); 3413 3414 /* 3415 * We must not set ->data_ready before we wait on the 3416 * condition because the main thread may have set it to 1 3417 * before we get here. In order to be sure that new 3418 * work is available if we see 1 in ->data_ready, it 3419 * was initialized to 0 before this thread was spawned 3420 * and we reset it to 0 right away. 3421 */ 3422 pthread_mutex_lock(&me->mutex); 3423 while (!me->data_ready) 3424 pthread_cond_wait(&me->cond, &me->mutex); 3425 me->data_ready = 0; 3426 pthread_mutex_unlock(&me->mutex); 3427 3428 progress_lock(); 3429 } 3430 progress_unlock(); 3431 /* leave ->working 1 so that this doesn't get more work assigned */ 3432 return NULL; 3433} 3434 3435static void ll_find_deltas_by_region(struct object_entry *list, 3436 struct packing_region *regions, 3437 uint32_t start, uint32_t nr) 3438{ 3439 struct thread_params *p; 3440 int i, ret, active_threads = 0; 3441 unsigned int processed = 0; 3442 uint32_t progress_nr; 3443 init_threaded_search(); 3444 3445 if (!nr) 3446 return; 3447 3448 progress_nr = regions[nr - 1].start + regions[nr - 1].nr; 3449 if (delta_search_threads <= 1) { 3450 find_deltas_by_region(list, regions, start, nr); 3451 cleanup_threaded_search(); 3452 return; 3453 } 3454 3455 if (progress > pack_to_stdout) 3456 fprintf_ln(stderr, 3457 Q_("Path-based delta compression using up to %d thread", 3458 "Path-based delta compression using up to %d threads", 3459 delta_search_threads), 3460 delta_search_threads); 3461 CALLOC_ARRAY(p, delta_search_threads); 3462 3463 if (progress) 3464 progress_state = start_progress(the_repository, 3465 _("Compressing objects by path"), 3466 progress_nr); 3467 /* Partition the work amongst work threads. */ 3468 for (i = 0; i < delta_search_threads; i++) { 3469 unsigned sub_size = nr / (delta_search_threads - i); 3470 3471 p[i].window = window; 3472 p[i].depth = depth; 3473 p[i].processed = &processed; 3474 p[i].working = 1; 3475 p[i].data_ready = 0; 3476 3477 p[i].regions = regions; 3478 p[i].list_size = sub_size; 3479 p[i].remaining = sub_size; 3480 3481 regions += sub_size; 3482 nr -= sub_size; 3483 } 3484 3485 /* Start work threads. */ 3486 for (i = 0; i < delta_search_threads; i++) { 3487 if (!p[i].list_size) 3488 continue; 3489 pthread_mutex_init(&p[i].mutex, NULL); 3490 pthread_cond_init(&p[i].cond, NULL); 3491 ret = pthread_create(&p[i].thread, NULL, 3492 threaded_find_deltas_by_path, &p[i]); 3493 if (ret) 3494 die(_("unable to create thread: %s"), strerror(ret)); 3495 active_threads++; 3496 } 3497 3498 /* 3499 * Now let's wait for work completion. Each time a thread is done 3500 * with its work, we steal half of the remaining work from the 3501 * thread with the largest number of unprocessed objects and give 3502 * it to that newly idle thread. This ensure good load balancing 3503 * until the remaining object list segments are simply too short 3504 * to be worth splitting anymore. 3505 */ 3506 while (active_threads) { 3507 struct thread_params *target = NULL; 3508 struct thread_params *victim = NULL; 3509 unsigned sub_size = 0; 3510 3511 progress_lock(); 3512 for (;;) { 3513 for (i = 0; !target && i < delta_search_threads; i++) 3514 if (!p[i].working) 3515 target = &p[i]; 3516 if (target) 3517 break; 3518 pthread_cond_wait(&progress_cond, &progress_mutex); 3519 } 3520 3521 for (i = 0; i < delta_search_threads; i++) 3522 if (p[i].remaining > 2*window && 3523 (!victim || victim->remaining < p[i].remaining)) 3524 victim = &p[i]; 3525 if (victim) { 3526 sub_size = victim->remaining / 2; 3527 target->regions = victim->regions + victim->remaining - sub_size; 3528 victim->list_size -= sub_size; 3529 victim->remaining -= sub_size; 3530 } 3531 target->list_size = sub_size; 3532 target->remaining = sub_size; 3533 target->working = 1; 3534 progress_unlock(); 3535 3536 pthread_mutex_lock(&target->mutex); 3537 target->data_ready = 1; 3538 pthread_cond_signal(&target->cond); 3539 pthread_mutex_unlock(&target->mutex); 3540 3541 if (!sub_size) { 3542 pthread_join(target->thread, NULL); 3543 pthread_cond_destroy(&target->cond); 3544 pthread_mutex_destroy(&target->mutex); 3545 active_threads--; 3546 } 3547 } 3548 cleanup_threaded_search(); 3549 free(p); 3550 3551 display_progress(progress_state, progress_nr); 3552 stop_progress(&progress_state); 3553} 3554 3555static void prepare_pack(int window, int depth) 3556{ 3557 struct object_entry **delta_list; 3558 uint32_t i, nr_deltas; 3559 unsigned n; 3560 3561 if (use_delta_islands) 3562 resolve_tree_islands(the_repository, progress, &to_pack); 3563 3564 get_object_details(); 3565 3566 /* 3567 * If we're locally repacking then we need to be doubly careful 3568 * from now on in order to make sure no stealth corruption gets 3569 * propagated to the new pack. Clients receiving streamed packs 3570 * should validate everything they get anyway so no need to incur 3571 * the additional cost here in that case. 3572 */ 3573 if (!pack_to_stdout) 3574 do_check_packed_object_crc = 1; 3575 3576 if (!to_pack.nr_objects || !window || !depth) 3577 return; 3578 3579 if (path_walk) 3580 ll_find_deltas_by_region(to_pack.objects, to_pack.regions, 3581 0, to_pack.nr_regions); 3582 3583 ALLOC_ARRAY(delta_list, to_pack.nr_objects); 3584 nr_deltas = n = 0; 3585 3586 for (i = 0; i < to_pack.nr_objects; i++) { 3587 struct object_entry *entry = to_pack.objects + i; 3588 3589 if (!should_attempt_deltas(entry)) 3590 continue; 3591 3592 if (!entry->preferred_base) 3593 nr_deltas++; 3594 3595 delta_list[n++] = entry; 3596 } 3597 3598 if (nr_deltas && n > 1) { 3599 unsigned nr_done = 0; 3600 3601 if (progress) 3602 progress_state = start_progress(the_repository, 3603 _("Compressing objects"), 3604 nr_deltas); 3605 QSORT(delta_list, n, type_size_sort); 3606 ll_find_deltas(delta_list, n, window+1, depth, &nr_done); 3607 stop_progress(&progress_state); 3608 if (nr_done != nr_deltas) 3609 die(_("inconsistency with delta count")); 3610 } 3611 free(delta_list); 3612} 3613 3614static int git_pack_config(const char *k, const char *v, 3615 const struct config_context *ctx, void *cb) 3616{ 3617 if (!strcmp(k, "pack.window")) { 3618 window = git_config_int(k, v, ctx->kvi); 3619 return 0; 3620 } 3621 if (!strcmp(k, "pack.windowmemory")) { 3622 window_memory_limit = git_config_ulong(k, v, ctx->kvi); 3623 return 0; 3624 } 3625 if (!strcmp(k, "pack.depth")) { 3626 depth = git_config_int(k, v, ctx->kvi); 3627 return 0; 3628 } 3629 if (!strcmp(k, "pack.deltacachesize")) { 3630 max_delta_cache_size = git_config_int(k, v, ctx->kvi); 3631 return 0; 3632 } 3633 if (!strcmp(k, "pack.deltacachelimit")) { 3634 cache_max_small_delta_size = git_config_int(k, v, ctx->kvi); 3635 return 0; 3636 } 3637 if (!strcmp(k, "pack.writebitmaphashcache")) { 3638 if (git_config_bool(k, v)) 3639 write_bitmap_options |= BITMAP_OPT_HASH_CACHE; 3640 else 3641 write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE; 3642 } 3643 3644 if (!strcmp(k, "pack.writebitmaplookuptable")) { 3645 if (git_config_bool(k, v)) 3646 write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE; 3647 else 3648 write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE; 3649 } 3650 3651 if (!strcmp(k, "pack.usebitmaps")) { 3652 use_bitmap_index_default = git_config_bool(k, v); 3653 return 0; 3654 } 3655 if (!strcmp(k, "pack.allowpackreuse")) { 3656 int res = git_parse_maybe_bool_text(v); 3657 if (res < 0) { 3658 if (!strcasecmp(v, "single")) 3659 allow_pack_reuse = SINGLE_PACK_REUSE; 3660 else if (!strcasecmp(v, "multi")) 3661 allow_pack_reuse = MULTI_PACK_REUSE; 3662 else 3663 die(_("invalid pack.allowPackReuse value: '%s'"), v); 3664 } else if (res) { 3665 allow_pack_reuse = SINGLE_PACK_REUSE; 3666 } else { 3667 allow_pack_reuse = NO_PACK_REUSE; 3668 } 3669 return 0; 3670 } 3671 if (!strcmp(k, "pack.threads")) { 3672 delta_search_threads = git_config_int(k, v, ctx->kvi); 3673 if (delta_search_threads < 0) 3674 die(_("invalid number of threads specified (%d)"), 3675 delta_search_threads); 3676 if (!HAVE_THREADS && delta_search_threads != 1) { 3677 warning(_("no threads support, ignoring %s"), k); 3678 delta_search_threads = 0; 3679 } 3680 return 0; 3681 } 3682 if (!strcmp(k, "pack.indexversion")) { 3683 pack_idx_opts.version = git_config_int(k, v, ctx->kvi); 3684 if (pack_idx_opts.version > 2) 3685 die(_("bad pack.indexVersion=%"PRIu32), 3686 pack_idx_opts.version); 3687 return 0; 3688 } 3689 if (!strcmp(k, "pack.writereverseindex")) { 3690 if (git_config_bool(k, v)) 3691 pack_idx_opts.flags |= WRITE_REV; 3692 else 3693 pack_idx_opts.flags &= ~WRITE_REV; 3694 return 0; 3695 } 3696 if (!strcmp(k, "uploadpack.blobpackfileuri")) { 3697 struct configured_exclusion *ex; 3698 const char *oid_end, *pack_end; 3699 /* 3700 * Stores the pack hash. This is not a true object ID, but is 3701 * of the same form. 3702 */ 3703 struct object_id pack_hash; 3704 3705 if (!v) 3706 return config_error_nonbool(k); 3707 3708 ex = xmalloc(sizeof(*ex)); 3709 if (parse_oid_hex(v, &ex->e.oid, &oid_end) || 3710 *oid_end != ' ' || 3711 parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) || 3712 *pack_end != ' ') 3713 die(_("value of uploadpack.blobpackfileuri must be " 3714 "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v); 3715 if (oidmap_get(&configured_exclusions, &ex->e.oid)) 3716 die(_("object already configured in another " 3717 "uploadpack.blobpackfileuri (got '%s')"), v); 3718 ex->pack_hash_hex = xcalloc(1, pack_end - oid_end); 3719 memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1); 3720 ex->uri = xstrdup(pack_end + 1); 3721 oidmap_put(&configured_exclusions, ex); 3722 } 3723 return git_default_config(k, v, ctx, cb); 3724} 3725 3726/* Counters for trace2 output when in --stdin-packs mode. */ 3727static int stdin_packs_found_nr; 3728static int stdin_packs_hints_nr; 3729 3730static int add_object_entry_from_pack(const struct object_id *oid, 3731 struct packed_git *p, 3732 uint32_t pos, 3733 void *_data) 3734{ 3735 off_t ofs; 3736 enum object_type type = OBJ_NONE; 3737 3738 display_progress(progress_state, ++nr_seen); 3739 3740 if (have_duplicate_entry(oid, 0)) 3741 return 0; 3742 3743 ofs = nth_packed_object_offset(p, pos); 3744 if (!want_object_in_pack(oid, 0, &p, &ofs)) 3745 return 0; 3746 3747 if (p) { 3748 struct object_info oi = OBJECT_INFO_INIT; 3749 3750 oi.typep = &type; 3751 if (packed_object_info(the_repository, p, ofs, &oi) < 0) { 3752 die(_("could not get type of object %s in pack %s"), 3753 oid_to_hex(oid), p->pack_name); 3754 } else if (type == OBJ_COMMIT) { 3755 struct rev_info *revs = _data; 3756 /* 3757 * commits in included packs are used as starting points for the 3758 * subsequent revision walk 3759 */ 3760 add_pending_oid(revs, NULL, oid, 0); 3761 } 3762 3763 stdin_packs_found_nr++; 3764 } 3765 3766 create_object_entry(oid, type, 0, 0, 0, p, ofs); 3767 3768 return 0; 3769} 3770 3771static void show_object_pack_hint(struct object *object, const char *name, 3772 void *data) 3773{ 3774 enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; 3775 if (mode == STDIN_PACKS_MODE_FOLLOW) { 3776 if (object->type == OBJ_BLOB && 3777 !odb_has_object(the_repository->objects, &object->oid, 0)) 3778 return; 3779 add_object_entry(&object->oid, object->type, name, 0); 3780 } else { 3781 struct object_entry *oe = packlist_find(&to_pack, &object->oid); 3782 if (!oe) 3783 return; 3784 3785 /* 3786 * Our 'to_pack' list was constructed by iterating all 3787 * objects packed in included packs, and so doesn't have 3788 * a non-zero hash field that you would typically pick 3789 * up during a reachability traversal. 3790 * 3791 * Make a best-effort attempt to fill in the ->hash and 3792 * ->no_try_delta fields here in order to perhaps 3793 * improve the delta selection process. 3794 */ 3795 oe->hash = pack_name_hash_fn(name); 3796 oe->no_try_delta = name && no_try_delta(name); 3797 3798 stdin_packs_hints_nr++; 3799 } 3800} 3801 3802static void show_commit_pack_hint(struct commit *commit, void *data) 3803{ 3804 enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; 3805 3806 if (mode == STDIN_PACKS_MODE_FOLLOW) { 3807 show_object_pack_hint((struct object *)commit, "", data); 3808 return; 3809 } 3810 3811 /* nothing to do; commits don't have a namehash */ 3812 3813} 3814 3815static int pack_mtime_cmp(const void *_a, const void *_b) 3816{ 3817 struct packed_git *a = ((const struct string_list_item*)_a)->util; 3818 struct packed_git *b = ((const struct string_list_item*)_b)->util; 3819 3820 /* 3821 * order packs by descending mtime so that objects are laid out 3822 * roughly as newest-to-oldest 3823 */ 3824 if (a->mtime < b->mtime) 3825 return 1; 3826 else if (b->mtime < a->mtime) 3827 return -1; 3828 else 3829 return 0; 3830} 3831 3832static void read_packs_list_from_stdin(struct rev_info *revs) 3833{ 3834 struct packfile_store *packs = the_repository->objects->packfiles; 3835 struct strbuf buf = STRBUF_INIT; 3836 struct string_list include_packs = STRING_LIST_INIT_DUP; 3837 struct string_list exclude_packs = STRING_LIST_INIT_DUP; 3838 struct string_list_item *item = NULL; 3839 3840 struct packed_git *p; 3841 3842 while (strbuf_getline(&buf, stdin) != EOF) { 3843 if (!buf.len) 3844 continue; 3845 3846 if (*buf.buf == '^') 3847 string_list_append(&exclude_packs, buf.buf + 1); 3848 else 3849 string_list_append(&include_packs, buf.buf); 3850 3851 strbuf_reset(&buf); 3852 } 3853 3854 string_list_sort(&include_packs); 3855 string_list_remove_duplicates(&include_packs, 0); 3856 string_list_sort(&exclude_packs); 3857 string_list_remove_duplicates(&exclude_packs, 0); 3858 3859 for (p = packfile_store_get_all_packs(packs); p; p = p->next) { 3860 const char *pack_name = pack_basename(p); 3861 3862 if ((item = string_list_lookup(&include_packs, pack_name))) 3863 item->util = p; 3864 if ((item = string_list_lookup(&exclude_packs, pack_name))) 3865 item->util = p; 3866 } 3867 3868 /* 3869 * Arguments we got on stdin may not even be packs. First 3870 * check that to avoid segfaulting later on in 3871 * e.g. pack_mtime_cmp(), excluded packs are handled below. 3872 * 3873 * Since we first parsed our STDIN and then sorted the input 3874 * lines the pack we error on will be whatever line happens to 3875 * sort first. This is lazy, it's enough that we report one 3876 * bad case here, we don't need to report the first/last one, 3877 * or all of them. 3878 */ 3879 for_each_string_list_item(item, &include_packs) { 3880 struct packed_git *p = item->util; 3881 if (!p) 3882 die(_("could not find pack '%s'"), item->string); 3883 if (!is_pack_valid(p)) 3884 die(_("packfile %s cannot be accessed"), p->pack_name); 3885 } 3886 3887 /* 3888 * Then, handle all of the excluded packs, marking them as 3889 * kept in-core so that later calls to add_object_entry() 3890 * discards any objects that are also found in excluded packs. 3891 */ 3892 for_each_string_list_item(item, &exclude_packs) { 3893 struct packed_git *p = item->util; 3894 if (!p) 3895 die(_("could not find pack '%s'"), item->string); 3896 p->pack_keep_in_core = 1; 3897 } 3898 3899 /* 3900 * Order packs by ascending mtime; use QSORT directly to access the 3901 * string_list_item's ->util pointer, which string_list_sort() does not 3902 * provide. 3903 */ 3904 QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp); 3905 3906 for_each_string_list_item(item, &include_packs) { 3907 struct packed_git *p = item->util; 3908 for_each_object_in_pack(p, 3909 add_object_entry_from_pack, 3910 revs, 3911 FOR_EACH_OBJECT_PACK_ORDER); 3912 } 3913 3914 strbuf_release(&buf); 3915 string_list_clear(&include_packs, 0); 3916 string_list_clear(&exclude_packs, 0); 3917} 3918 3919static void add_unreachable_loose_objects(struct rev_info *revs); 3920 3921static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) 3922{ 3923 struct rev_info revs; 3924 3925 repo_init_revisions(the_repository, &revs, NULL); 3926 /* 3927 * Use a revision walk to fill in the namehash of objects in the include 3928 * packs. To save time, we'll avoid traversing through objects that are 3929 * in excluded packs. 3930 * 3931 * That may cause us to avoid populating all of the namehash fields of 3932 * all included objects, but our goal is best-effort, since this is only 3933 * an optimization during delta selection. 3934 */ 3935 revs.no_kept_objects = 1; 3936 revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; 3937 revs.blob_objects = 1; 3938 revs.tree_objects = 1; 3939 revs.tag_objects = 1; 3940 revs.ignore_missing_links = 1; 3941 3942 /* avoids adding objects in excluded packs */ 3943 ignore_packed_keep_in_core = 1; 3944 read_packs_list_from_stdin(&revs); 3945 if (rev_list_unpacked) 3946 add_unreachable_loose_objects(&revs); 3947 3948 if (prepare_revision_walk(&revs)) 3949 die(_("revision walk setup failed")); 3950 traverse_commit_list(&revs, 3951 show_commit_pack_hint, 3952 show_object_pack_hint, 3953 &mode); 3954 3955 trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", 3956 stdin_packs_found_nr); 3957 trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", 3958 stdin_packs_hints_nr); 3959} 3960 3961static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, 3962 struct packed_git *pack, off_t offset, 3963 const char *name, uint32_t mtime) 3964{ 3965 struct object_entry *entry; 3966 3967 display_progress(progress_state, ++nr_seen); 3968 3969 entry = packlist_find(&to_pack, oid); 3970 if (entry) { 3971 if (name) { 3972 entry->hash = pack_name_hash_fn(name); 3973 entry->no_try_delta = no_try_delta(name); 3974 } 3975 } else { 3976 if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime)) 3977 return; 3978 if (!pack && type == OBJ_BLOB) { 3979 struct odb_source *source = the_repository->objects->sources; 3980 int found = 0; 3981 3982 for (; !found && source; source = source->next) 3983 if (has_loose_object(source, oid)) 3984 found = 1; 3985 3986 /* 3987 * If a traversed tree has a missing blob then we want 3988 * to avoid adding that missing object to our pack. 3989 * 3990 * This only applies to missing blobs, not trees, 3991 * because the traversal needs to parse sub-trees but 3992 * not blobs. 3993 * 3994 * Note we only perform this check when we couldn't 3995 * already find the object in a pack, so we're really 3996 * limited to "ensure non-tip blobs which don't exist in 3997 * packs do exist via loose objects". Confused? 3998 */ 3999 if (!found) 4000 return; 4001 } 4002 4003 entry = create_object_entry(oid, type, pack_name_hash_fn(name), 4004 0, name && no_try_delta(name), 4005 pack, offset); 4006 } 4007 4008 if (mtime > oe_cruft_mtime(&to_pack, entry)) 4009 oe_set_cruft_mtime(&to_pack, entry, mtime); 4010 return; 4011} 4012 4013static void show_cruft_object(struct object *obj, const char *name, void *data UNUSED) 4014{ 4015 /* 4016 * if we did not record it earlier, it's at least as old as our 4017 * expiration value. Rather than find it exactly, just use that 4018 * value. This may bump it forward from its real mtime, but it 4019 * will still be "too old" next time we run with the same 4020 * expiration. 4021 * 4022 * if obj does appear in the packing list, this call is a noop (or may 4023 * set the namehash). 4024 */ 4025 add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration); 4026} 4027 4028static void show_cruft_commit(struct commit *commit, void *data) 4029{ 4030 show_cruft_object((struct object*)commit, NULL, data); 4031} 4032 4033static int cruft_include_check_obj(struct object *obj, void *data UNUSED) 4034{ 4035 return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS); 4036} 4037 4038static int cruft_include_check(struct commit *commit, void *data) 4039{ 4040 return cruft_include_check_obj((struct object*)commit, data); 4041} 4042 4043static void set_cruft_mtime(const struct object *object, 4044 struct packed_git *pack, 4045 off_t offset, time_t mtime) 4046{ 4047 add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL, 4048 mtime); 4049} 4050 4051static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) 4052{ 4053 struct string_list_item *item = NULL; 4054 for_each_string_list_item(item, packs) { 4055 struct packed_git *p = item->util; 4056 if (!p) 4057 die(_("could not find pack '%s'"), item->string); 4058 if (p->is_cruft && keep) 4059 ignore_packed_keep_in_core_has_cruft = 1; 4060 p->pack_keep_in_core = keep; 4061 } 4062} 4063 4064static void add_objects_in_unpacked_packs(void); 4065 4066static void enumerate_cruft_objects(void) 4067{ 4068 if (progress) 4069 progress_state = start_progress(the_repository, 4070 _("Enumerating cruft objects"), 0); 4071 4072 add_objects_in_unpacked_packs(); 4073 add_unreachable_loose_objects(NULL); 4074 4075 stop_progress(&progress_state); 4076} 4077 4078static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) 4079{ 4080 struct packfile_store *packs = the_repository->objects->packfiles; 4081 struct packed_git *p; 4082 struct rev_info revs; 4083 int ret; 4084 4085 repo_init_revisions(the_repository, &revs, NULL); 4086 4087 revs.tag_objects = 1; 4088 revs.tree_objects = 1; 4089 revs.blob_objects = 1; 4090 4091 revs.include_check = cruft_include_check; 4092 revs.include_check_obj = cruft_include_check_obj; 4093 4094 revs.ignore_missing_links = 1; 4095 4096 if (progress) 4097 progress_state = start_progress(the_repository, 4098 _("Enumerating cruft objects"), 0); 4099 ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration, 4100 set_cruft_mtime, 1); 4101 stop_progress(&progress_state); 4102 4103 if (ret) 4104 die(_("unable to add cruft objects")); 4105 4106 /* 4107 * Re-mark only the fresh packs as kept so that objects in 4108 * unknown packs do not halt the reachability traversal early. 4109 */ 4110 for (p = packfile_store_get_all_packs(packs); p; p = p->next) 4111 p->pack_keep_in_core = 0; 4112 mark_pack_kept_in_core(fresh_packs, 1); 4113 4114 if (prepare_revision_walk(&revs)) 4115 die(_("revision walk setup failed")); 4116 if (progress) 4117 progress_state = start_progress(the_repository, 4118 _("Traversing cruft objects"), 0); 4119 nr_seen = 0; 4120 traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL); 4121 4122 stop_progress(&progress_state); 4123} 4124 4125static void read_cruft_objects(void) 4126{ 4127 struct packfile_store *packs = the_repository->objects->packfiles; 4128 struct strbuf buf = STRBUF_INIT; 4129 struct string_list discard_packs = STRING_LIST_INIT_DUP; 4130 struct string_list fresh_packs = STRING_LIST_INIT_DUP; 4131 struct packed_git *p; 4132 4133 ignore_packed_keep_in_core = 1; 4134 4135 while (strbuf_getline(&buf, stdin) != EOF) { 4136 if (!buf.len) 4137 continue; 4138 4139 if (*buf.buf == '-') 4140 string_list_append(&discard_packs, buf.buf + 1); 4141 else 4142 string_list_append(&fresh_packs, buf.buf); 4143 } 4144 4145 string_list_sort(&discard_packs); 4146 string_list_sort(&fresh_packs); 4147 4148 for (p = packfile_store_get_all_packs(packs); p; p = p->next) { 4149 const char *pack_name = pack_basename(p); 4150 struct string_list_item *item; 4151 4152 item = string_list_lookup(&fresh_packs, pack_name); 4153 if (!item) 4154 item = string_list_lookup(&discard_packs, pack_name); 4155 4156 if (item) { 4157 item->util = p; 4158 } else { 4159 /* 4160 * This pack wasn't mentioned in either the "fresh" or 4161 * "discard" list, so the caller didn't know about it. 4162 * 4163 * Mark it as kept so that its objects are ignored by 4164 * add_unseen_recent_objects_to_traversal(). We'll 4165 * unmark it before starting the traversal so it doesn't 4166 * halt the traversal early. 4167 */ 4168 p->pack_keep_in_core = 1; 4169 } 4170 } 4171 4172 mark_pack_kept_in_core(&fresh_packs, 1); 4173 mark_pack_kept_in_core(&discard_packs, 0); 4174 4175 if (cruft_expiration) 4176 enumerate_and_traverse_cruft_objects(&fresh_packs); 4177 else 4178 enumerate_cruft_objects(); 4179 4180 strbuf_release(&buf); 4181 string_list_clear(&discard_packs, 0); 4182 string_list_clear(&fresh_packs, 0); 4183} 4184 4185static void read_object_list_from_stdin(void) 4186{ 4187 char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2]; 4188 struct object_id oid; 4189 const char *p; 4190 4191 for (;;) { 4192 if (!fgets(line, sizeof(line), stdin)) { 4193 if (feof(stdin)) 4194 break; 4195 if (!ferror(stdin)) 4196 BUG("fgets returned NULL, not EOF, not error!"); 4197 if (errno != EINTR) 4198 die_errno("fgets"); 4199 clearerr(stdin); 4200 continue; 4201 } 4202 if (line[0] == '-') { 4203 if (get_oid_hex(line+1, &oid)) 4204 die(_("expected edge object ID, got garbage:\n %s"), 4205 line); 4206 add_preferred_base(&oid); 4207 continue; 4208 } 4209 if (parse_oid_hex(line, &oid, &p)) 4210 die(_("expected object ID, got garbage:\n %s"), line); 4211 4212 add_preferred_base_object(p + 1); 4213 add_object_entry(&oid, OBJ_NONE, p + 1, 0); 4214 } 4215} 4216 4217static void show_commit(struct commit *commit, void *data UNUSED) 4218{ 4219 add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0); 4220 4221 if (write_bitmap_index) 4222 index_commit_for_bitmap(commit); 4223 4224 if (use_delta_islands) 4225 propagate_island_marks(the_repository, commit); 4226} 4227 4228static void show_object(struct object *obj, const char *name, 4229 void *data UNUSED) 4230{ 4231 add_preferred_base_object(name); 4232 add_object_entry(&obj->oid, obj->type, name, 0); 4233 4234 if (use_delta_islands) { 4235 const char *p; 4236 unsigned depth; 4237 struct object_entry *ent; 4238 4239 /* the empty string is a root tree, which is depth 0 */ 4240 depth = *name ? 1 : 0; 4241 for (p = strchr(name, '/'); p; p = strchr(p + 1, '/')) 4242 depth++; 4243 4244 ent = packlist_find(&to_pack, &obj->oid); 4245 if (ent && depth > oe_tree_depth(&to_pack, ent)) 4246 oe_set_tree_depth(&to_pack, ent, depth); 4247 } 4248} 4249 4250static void show_object__ma_allow_any(struct object *obj, const char *name, void *data) 4251{ 4252 assert(arg_missing_action == MA_ALLOW_ANY); 4253 4254 /* 4255 * Quietly ignore ALL missing objects. This avoids problems with 4256 * staging them now and getting an odd error later. 4257 */ 4258 if (!odb_has_object(the_repository->objects, &obj->oid, 0)) 4259 return; 4260 4261 show_object(obj, name, data); 4262} 4263 4264static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data) 4265{ 4266 assert(arg_missing_action == MA_ALLOW_PROMISOR); 4267 4268 /* 4269 * Quietly ignore EXPECTED missing objects. This avoids problems with 4270 * staging them now and getting an odd error later. 4271 */ 4272 if (!odb_has_object(the_repository->objects, &obj->oid, 0) && 4273 is_promisor_object(to_pack.repo, &obj->oid)) 4274 return; 4275 4276 show_object(obj, name, data); 4277} 4278 4279static int option_parse_missing_action(const struct option *opt UNUSED, 4280 const char *arg, int unset) 4281{ 4282 assert(arg); 4283 assert(!unset); 4284 4285 if (!strcmp(arg, "error")) { 4286 arg_missing_action = MA_ERROR; 4287 fn_show_object = show_object; 4288 return 0; 4289 } 4290 4291 if (!strcmp(arg, "allow-any")) { 4292 arg_missing_action = MA_ALLOW_ANY; 4293 fetch_if_missing = 0; 4294 fn_show_object = show_object__ma_allow_any; 4295 return 0; 4296 } 4297 4298 if (!strcmp(arg, "allow-promisor")) { 4299 arg_missing_action = MA_ALLOW_PROMISOR; 4300 fetch_if_missing = 0; 4301 fn_show_object = show_object__ma_allow_promisor; 4302 return 0; 4303 } 4304 4305 die(_("invalid value for '%s': '%s'"), "--missing", arg); 4306 return 0; 4307} 4308 4309static void show_edge(struct commit *commit) 4310{ 4311 add_preferred_base(&commit->object.oid); 4312} 4313 4314static int add_object_in_unpacked_pack(const struct object_id *oid, 4315 struct packed_git *pack, 4316 uint32_t pos, 4317 void *data UNUSED) 4318{ 4319 if (cruft) { 4320 off_t offset; 4321 time_t mtime; 4322 4323 if (pack->is_cruft) { 4324 if (load_pack_mtimes(pack) < 0) 4325 die(_("could not load cruft pack .mtimes")); 4326 mtime = nth_packed_mtime(pack, pos); 4327 } else { 4328 mtime = pack->mtime; 4329 } 4330 offset = nth_packed_object_offset(pack, pos); 4331 4332 add_cruft_object_entry(oid, OBJ_NONE, pack, offset, 4333 NULL, mtime); 4334 } else { 4335 add_object_entry(oid, OBJ_NONE, "", 0); 4336 } 4337 return 0; 4338} 4339 4340static void add_objects_in_unpacked_packs(void) 4341{ 4342 if (for_each_packed_object(to_pack.repo, 4343 add_object_in_unpacked_pack, 4344 NULL, 4345 FOR_EACH_OBJECT_PACK_ORDER | 4346 FOR_EACH_OBJECT_LOCAL_ONLY | 4347 FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | 4348 FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) 4349 die(_("cannot open pack index")); 4350} 4351 4352static int add_loose_object(const struct object_id *oid, const char *path, 4353 void *data) 4354{ 4355 struct rev_info *revs = data; 4356 enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL); 4357 4358 if (type < 0) { 4359 warning(_("loose object at %s could not be examined"), path); 4360 return 0; 4361 } 4362 4363 if (cruft) { 4364 struct stat st; 4365 if (stat(path, &st) < 0) { 4366 if (errno == ENOENT) 4367 return 0; 4368 return error_errno("unable to stat %s", oid_to_hex(oid)); 4369 } 4370 4371 add_cruft_object_entry(oid, type, NULL, 0, NULL, 4372 st.st_mtime); 4373 } else { 4374 add_object_entry(oid, type, "", 0); 4375 } 4376 4377 if (revs && type == OBJ_COMMIT) 4378 add_pending_oid(revs, NULL, oid, 0); 4379 4380 return 0; 4381} 4382 4383/* 4384 * We actually don't even have to worry about reachability here. 4385 * add_object_entry will weed out duplicates, so we just add every 4386 * loose object we find. 4387 */ 4388static void add_unreachable_loose_objects(struct rev_info *revs) 4389{ 4390 for_each_loose_file_in_source(the_repository->objects->sources, 4391 add_loose_object, NULL, NULL, revs); 4392} 4393 4394static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) 4395{ 4396 struct packfile_store *packs = the_repository->objects->packfiles; 4397 static struct packed_git *last_found = (void *)1; 4398 struct packed_git *p; 4399 4400 p = (last_found != (void *)1) ? last_found : 4401 packfile_store_get_all_packs(packs); 4402 4403 while (p) { 4404 if ((!p->pack_local || p->pack_keep || 4405 p->pack_keep_in_core) && 4406 find_pack_entry_one(oid, p)) { 4407 last_found = p; 4408 return 1; 4409 } 4410 if (p == last_found) 4411 p = packfile_store_get_all_packs(packs); 4412 else 4413 p = p->next; 4414 if (p == last_found) 4415 p = p->next; 4416 } 4417 return 0; 4418} 4419 4420/* 4421 * Store a list of sha1s that are should not be discarded 4422 * because they are either written too recently, or are 4423 * reachable from another object that was. 4424 * 4425 * This is filled by get_object_list. 4426 */ 4427static struct oid_array recent_objects; 4428 4429static int loosened_object_can_be_discarded(const struct object_id *oid, 4430 timestamp_t mtime) 4431{ 4432 if (!unpack_unreachable_expiration) 4433 return 0; 4434 if (mtime > unpack_unreachable_expiration) 4435 return 0; 4436 if (oid_array_lookup(&recent_objects, oid) >= 0) 4437 return 0; 4438 return 1; 4439} 4440 4441static void loosen_unused_packed_objects(void) 4442{ 4443 struct packfile_store *packs = the_repository->objects->packfiles; 4444 struct packed_git *p; 4445 uint32_t i; 4446 uint32_t loosened_objects_nr = 0; 4447 struct object_id oid; 4448 4449 for (p = packfile_store_get_all_packs(packs); p; p = p->next) { 4450 if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) 4451 continue; 4452 4453 if (open_pack_index(p)) 4454 die(_("cannot open pack index")); 4455 4456 for (i = 0; i < p->num_objects; i++) { 4457 nth_packed_object_id(&oid, p, i); 4458 if (!packlist_find(&to_pack, &oid) && 4459 !has_sha1_pack_kept_or_nonlocal(&oid) && 4460 !loosened_object_can_be_discarded(&oid, p->mtime)) { 4461 if (force_object_loose(the_repository->objects->sources, 4462 &oid, p->mtime)) 4463 die(_("unable to force loose object")); 4464 loosened_objects_nr++; 4465 } 4466 } 4467 } 4468 4469 trace2_data_intmax("pack-objects", the_repository, 4470 "loosen_unused_packed_objects/loosened", loosened_objects_nr); 4471} 4472 4473/* 4474 * This tracks any options which pack-reuse code expects to be on, or which a 4475 * reader of the pack might not understand, and which would therefore prevent 4476 * blind reuse of what we have on disk. 4477 */ 4478static int pack_options_allow_reuse(void) 4479{ 4480 return allow_pack_reuse != NO_PACK_REUSE && 4481 pack_to_stdout && 4482 !ignore_packed_keep_on_disk && 4483 !ignore_packed_keep_in_core && 4484 (!local || !have_non_local_packs) && 4485 !incremental; 4486} 4487 4488static int get_object_list_from_bitmap(struct rev_info *revs) 4489{ 4490 if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) 4491 return -1; 4492 4493 /* 4494 * For now, force the name-hash version to be 1 since that 4495 * is the version implied by the bitmap format. Later, the 4496 * format can include this version explicitly in its format, 4497 * allowing readers to know the version that was used during 4498 * the bitmap write. 4499 */ 4500 name_hash_version = 1; 4501 4502 if (pack_options_allow_reuse()) 4503 reuse_partial_packfile_from_bitmap(bitmap_git, 4504 &reuse_packfiles, 4505 &reuse_packfiles_nr, 4506 &reuse_packfile_bitmap, 4507 allow_pack_reuse == MULTI_PACK_REUSE); 4508 4509 if (reuse_packfiles) { 4510 reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); 4511 if (!reuse_packfile_objects) 4512 BUG("expected non-empty reuse bitmap"); 4513 4514 nr_result += reuse_packfile_objects; 4515 nr_seen += reuse_packfile_objects; 4516 display_progress(progress_state, nr_seen); 4517 } 4518 4519 traverse_bitmap_commit_list(bitmap_git, revs, 4520 &add_object_entry_from_bitmap); 4521 return 0; 4522} 4523 4524static void record_recent_object(struct object *obj, 4525 const char *name UNUSED, 4526 void *data UNUSED) 4527{ 4528 oid_array_append(&recent_objects, &obj->oid); 4529} 4530 4531static void record_recent_commit(struct commit *commit, void *data UNUSED) 4532{ 4533 oid_array_append(&recent_objects, &commit->object.oid); 4534} 4535 4536static int mark_bitmap_preferred_tip(const char *refname, 4537 const char *referent UNUSED, 4538 const struct object_id *oid, 4539 int flags UNUSED, 4540 void *data UNUSED) 4541{ 4542 struct object_id peeled; 4543 struct object *object; 4544 4545 if (!peel_iterated_oid(the_repository, oid, &peeled)) 4546 oid = &peeled; 4547 4548 object = parse_object_or_die(the_repository, oid, refname); 4549 if (object->type == OBJ_COMMIT) 4550 object->flags |= NEEDS_BITMAP; 4551 4552 return 0; 4553} 4554 4555static void mark_bitmap_preferred_tips(void) 4556{ 4557 struct string_list_item *item; 4558 const struct string_list *preferred_tips; 4559 4560 preferred_tips = bitmap_preferred_tips(the_repository); 4561 if (!preferred_tips) 4562 return; 4563 4564 for_each_string_list_item(item, preferred_tips) { 4565 refs_for_each_ref_in(get_main_ref_store(the_repository), 4566 item->string, mark_bitmap_preferred_tip, 4567 NULL); 4568 } 4569} 4570 4571static inline int is_oid_uninteresting(struct repository *repo, 4572 struct object_id *oid) 4573{ 4574 struct object *o = lookup_object(repo, oid); 4575 return !o || (o->flags & UNINTERESTING); 4576} 4577 4578static int add_objects_by_path(const char *path, 4579 struct oid_array *oids, 4580 enum object_type type, 4581 void *data) 4582{ 4583 size_t oe_start = to_pack.nr_objects; 4584 size_t oe_end; 4585 unsigned int *processed = data; 4586 4587 /* 4588 * First, add all objects to the packing data, including the ones 4589 * marked UNINTERESTING (translated to 'exclude') as they can be 4590 * used as delta bases. 4591 */ 4592 for (size_t i = 0; i < oids->nr; i++) { 4593 int exclude; 4594 struct object_info oi = OBJECT_INFO_INIT; 4595 struct object_id *oid = &oids->oid[i]; 4596 4597 /* Skip objects that do not exist locally. */ 4598 if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) && 4599 odb_read_object_info_extended(the_repository->objects, oid, &oi, 4600 OBJECT_INFO_FOR_PREFETCH) < 0) 4601 continue; 4602 4603 exclude = is_oid_uninteresting(the_repository, oid); 4604 4605 if (exclude && !thin) 4606 continue; 4607 4608 add_object_entry(oid, type, path, exclude); 4609 } 4610 4611 oe_end = to_pack.nr_objects; 4612 4613 /* We can skip delta calculations if it is a no-op. */ 4614 if (oe_end == oe_start || !window) 4615 return 0; 4616 4617 ALLOC_GROW(to_pack.regions, 4618 to_pack.nr_regions + 1, 4619 to_pack.nr_regions_alloc); 4620 4621 to_pack.regions[to_pack.nr_regions].start = oe_start; 4622 to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start; 4623 to_pack.nr_regions++; 4624 4625 *processed += oids->nr; 4626 display_progress(progress_state, *processed); 4627 4628 return 0; 4629} 4630 4631static void get_object_list_path_walk(struct rev_info *revs) 4632{ 4633 struct path_walk_info info = PATH_WALK_INFO_INIT; 4634 unsigned int processed = 0; 4635 int result; 4636 4637 info.revs = revs; 4638 info.path_fn = add_objects_by_path; 4639 info.path_fn_data = &processed; 4640 4641 /* 4642 * Allow the --[no-]sparse option to be interesting here, if only 4643 * for testing purposes. Paths with no interesting objects will not 4644 * contribute to the resulting pack, but only create noisy preferred 4645 * base objects. 4646 */ 4647 info.prune_all_uninteresting = sparse; 4648 info.edge_aggressive = shallow; 4649 4650 trace2_region_enter("pack-objects", "path-walk", revs->repo); 4651 result = walk_objects_by_path(&info); 4652 trace2_region_leave("pack-objects", "path-walk", revs->repo); 4653 4654 if (result) 4655 die(_("failed to pack objects via path-walk")); 4656} 4657 4658static void get_object_list(struct rev_info *revs, struct strvec *argv) 4659{ 4660 struct setup_revision_opt s_r_opt = { 4661 .allow_exclude_promisor_objects = 1, 4662 }; 4663 char line[1000]; 4664 int flags = 0; 4665 int save_warning; 4666 4667 save_commit_buffer = 0; 4668 setup_revisions_from_strvec(argv, revs, &s_r_opt); 4669 4670 /* make sure shallows are read */ 4671 is_repository_shallow(the_repository); 4672 4673 save_warning = warn_on_object_refname_ambiguity; 4674 warn_on_object_refname_ambiguity = 0; 4675 4676 while (fgets(line, sizeof(line), stdin) != NULL) { 4677 int len = strlen(line); 4678 if (len && line[len - 1] == '\n') 4679 line[--len] = 0; 4680 if (!len) 4681 break; 4682 if (*line == '-') { 4683 if (!strcmp(line, "--not")) { 4684 flags ^= UNINTERESTING; 4685 write_bitmap_index = 0; 4686 continue; 4687 } 4688 if (starts_with(line, "--shallow ")) { 4689 struct object_id oid; 4690 if (get_oid_hex(line + 10, &oid)) 4691 die("not an object name '%s'", line + 10); 4692 register_shallow(the_repository, &oid); 4693 use_bitmap_index = 0; 4694 continue; 4695 } 4696 die(_("not a rev '%s'"), line); 4697 } 4698 if (handle_revision_arg(line, revs, flags, REVARG_CANNOT_BE_FILENAME)) 4699 die(_("bad revision '%s'"), line); 4700 } 4701 4702 warn_on_object_refname_ambiguity = save_warning; 4703 4704 if (use_bitmap_index && !get_object_list_from_bitmap(revs)) 4705 return; 4706 4707 if (use_delta_islands) 4708 load_delta_islands(the_repository, progress); 4709 4710 if (write_bitmap_index) 4711 mark_bitmap_preferred_tips(); 4712 4713 if (!fn_show_object) 4714 fn_show_object = show_object; 4715 4716 if (path_walk) { 4717 get_object_list_path_walk(revs); 4718 } else { 4719 if (prepare_revision_walk(revs)) 4720 die(_("revision walk setup failed")); 4721 mark_edges_uninteresting(revs, show_edge, sparse); 4722 traverse_commit_list(revs, 4723 show_commit, fn_show_object, 4724 NULL); 4725 } 4726 4727 if (unpack_unreachable_expiration) { 4728 revs->ignore_missing_links = 1; 4729 if (add_unseen_recent_objects_to_traversal(revs, 4730 unpack_unreachable_expiration, NULL, 0)) 4731 die(_("unable to add recent objects")); 4732 if (prepare_revision_walk(revs)) 4733 die(_("revision walk setup failed")); 4734 traverse_commit_list(revs, record_recent_commit, 4735 record_recent_object, NULL); 4736 } 4737 4738 if (keep_unreachable) 4739 add_objects_in_unpacked_packs(); 4740 if (pack_loose_unreachable) 4741 add_unreachable_loose_objects(NULL); 4742 if (unpack_unreachable) 4743 loosen_unused_packed_objects(); 4744 4745 oid_array_clear(&recent_objects); 4746} 4747 4748static void add_extra_kept_packs(const struct string_list *names) 4749{ 4750 struct packfile_store *packs = the_repository->objects->packfiles; 4751 struct packed_git *p; 4752 4753 if (!names->nr) 4754 return; 4755 4756 for (p = packfile_store_get_all_packs(packs); p; p = p->next) { 4757 const char *name = basename(p->pack_name); 4758 int i; 4759 4760 if (!p->pack_local) 4761 continue; 4762 4763 for (i = 0; i < names->nr; i++) 4764 if (!fspathcmp(name, names->items[i].string)) 4765 break; 4766 4767 if (i < names->nr) { 4768 p->pack_keep_in_core = 1; 4769 ignore_packed_keep_in_core = 1; 4770 continue; 4771 } 4772 } 4773} 4774 4775static int option_parse_quiet(const struct option *opt, const char *arg, 4776 int unset) 4777{ 4778 int *val = opt->value; 4779 4780 BUG_ON_OPT_ARG(arg); 4781 4782 if (!unset) 4783 *val = 0; 4784 else if (!*val) 4785 *val = 1; 4786 return 0; 4787} 4788 4789static int option_parse_index_version(const struct option *opt, 4790 const char *arg, int unset) 4791{ 4792 struct pack_idx_option *popts = opt->value; 4793 char *c; 4794 const char *val = arg; 4795 4796 BUG_ON_OPT_NEG(unset); 4797 4798 popts->version = strtoul(val, &c, 10); 4799 if (popts->version > 2) 4800 die(_("unsupported index version %s"), val); 4801 if (*c == ',' && c[1]) 4802 popts->off32_limit = strtoul(c+1, &c, 0); 4803 if (*c || popts->off32_limit & 0x80000000) 4804 die(_("bad index version '%s'"), val); 4805 return 0; 4806} 4807 4808static int option_parse_unpack_unreachable(const struct option *opt UNUSED, 4809 const char *arg, int unset) 4810{ 4811 if (unset) { 4812 unpack_unreachable = 0; 4813 unpack_unreachable_expiration = 0; 4814 } 4815 else { 4816 unpack_unreachable = 1; 4817 if (arg) 4818 unpack_unreachable_expiration = approxidate(arg); 4819 } 4820 return 0; 4821} 4822 4823static int option_parse_cruft_expiration(const struct option *opt UNUSED, 4824 const char *arg, int unset) 4825{ 4826 if (unset) { 4827 cruft = 0; 4828 cruft_expiration = 0; 4829 } else { 4830 cruft = 1; 4831 if (arg) 4832 cruft_expiration = approxidate(arg); 4833 } 4834 return 0; 4835} 4836 4837static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED) 4838{ 4839 struct object_info info = OBJECT_INFO_INIT; 4840 if (odb_read_object_info_extended(the_repository->objects, &obj->oid, &info, 0)) 4841 BUG("should_include_obj should only be called on existing objects"); 4842 return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor; 4843} 4844 4845static int is_not_in_promisor_pack(struct commit *commit, void *data) { 4846 return is_not_in_promisor_pack_obj((struct object *) commit, data); 4847} 4848 4849static int parse_stdin_packs_mode(const struct option *opt, const char *arg, 4850 int unset) 4851{ 4852 enum stdin_packs_mode *mode = opt->value; 4853 4854 if (unset) 4855 *mode = STDIN_PACKS_MODE_NONE; 4856 else if (!arg || !*arg) 4857 *mode = STDIN_PACKS_MODE_STANDARD; 4858 else if (!strcmp(arg, "follow")) 4859 *mode = STDIN_PACKS_MODE_FOLLOW; 4860 else 4861 die(_("invalid value for '%s': '%s'"), opt->long_name, arg); 4862 4863 return 0; 4864} 4865 4866int cmd_pack_objects(int argc, 4867 const char **argv, 4868 const char *prefix, 4869 struct repository *repo UNUSED) 4870{ 4871 int use_internal_rev_list = 0; 4872 int all_progress_implied = 0; 4873 struct strvec rp = STRVEC_INIT; 4874 int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; 4875 int rev_list_index = 0; 4876 enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE; 4877 struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; 4878 struct list_objects_filter_options filter_options = 4879 LIST_OBJECTS_FILTER_INIT; 4880 4881 struct option pack_objects_options[] = { 4882 OPT_CALLBACK_F('q', "quiet", &progress, NULL, 4883 N_("do not show progress meter"), 4884 PARSE_OPT_NOARG, option_parse_quiet), 4885 OPT_SET_INT(0, "progress", &progress, 4886 N_("show progress meter"), 1), 4887 OPT_SET_INT(0, "all-progress", &progress, 4888 N_("show progress meter during object writing phase"), 2), 4889 OPT_BOOL(0, "all-progress-implied", 4890 &all_progress_implied, 4891 N_("similar to --all-progress when progress meter is shown")), 4892 OPT_CALLBACK_F(0, "index-version", &pack_idx_opts, N_("<version>[,<offset>]"), 4893 N_("write the pack index file in the specified idx format version"), 4894 PARSE_OPT_NONEG, option_parse_index_version), 4895 OPT_UNSIGNED(0, "max-pack-size", &pack_size_limit, 4896 N_("maximum size of each output pack file")), 4897 OPT_BOOL(0, "local", &local, 4898 N_("ignore borrowed objects from alternate object store")), 4899 OPT_BOOL(0, "incremental", &incremental, 4900 N_("ignore packed objects")), 4901 OPT_INTEGER(0, "window", &window, 4902 N_("limit pack window by objects")), 4903 OPT_UNSIGNED(0, "window-memory", &window_memory_limit, 4904 N_("limit pack window by memory in addition to object limit")), 4905 OPT_INTEGER(0, "depth", &depth, 4906 N_("maximum length of delta chain allowed in the resulting pack")), 4907 OPT_BOOL(0, "reuse-delta", &reuse_delta, 4908 N_("reuse existing deltas")), 4909 OPT_BOOL(0, "reuse-object", &reuse_object, 4910 N_("reuse existing objects")), 4911 OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta, 4912 N_("use OFS_DELTA objects")), 4913 OPT_INTEGER(0, "threads", &delta_search_threads, 4914 N_("use threads when searching for best delta matches")), 4915 OPT_BOOL(0, "non-empty", &non_empty, 4916 N_("do not create an empty pack output")), 4917 OPT_BOOL(0, "revs", &use_internal_rev_list, 4918 N_("read revision arguments from standard input")), 4919 OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked, 4920 N_("limit the objects to those that are not yet packed"), 4921 1, PARSE_OPT_NONEG), 4922 OPT_SET_INT_F(0, "all", &rev_list_all, 4923 N_("include objects reachable from any reference"), 4924 1, PARSE_OPT_NONEG), 4925 OPT_SET_INT_F(0, "reflog", &rev_list_reflog, 4926 N_("include objects referred by reflog entries"), 4927 1, PARSE_OPT_NONEG), 4928 OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, 4929 N_("include objects referred to by the index"), 4930 1, PARSE_OPT_NONEG), 4931 OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"), 4932 N_("read packs from stdin"), 4933 PARSE_OPT_OPTARG, parse_stdin_packs_mode), 4934 OPT_BOOL(0, "stdin-packs", &stdin_packs, 4935 N_("read packs from stdin")), 4936 OPT_BOOL(0, "stdout", &pack_to_stdout, 4937 N_("output pack to stdout")), 4938 OPT_BOOL(0, "include-tag", &include_tag, 4939 N_("include tag objects that refer to objects to be packed")), 4940 OPT_BOOL(0, "keep-unreachable", &keep_unreachable, 4941 N_("keep unreachable objects")), 4942 OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable, 4943 N_("pack loose unreachable objects")), 4944 OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"), 4945 N_("unpack unreachable objects newer than <time>"), 4946 PARSE_OPT_OPTARG, option_parse_unpack_unreachable), 4947 OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")), 4948 OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"), 4949 N_("expire cruft objects older than <time>"), 4950 PARSE_OPT_OPTARG, option_parse_cruft_expiration), 4951 OPT_BOOL(0, "sparse", &sparse, 4952 N_("use the sparse reachability algorithm")), 4953 OPT_BOOL(0, "thin", &thin, 4954 N_("create thin packs")), 4955 OPT_BOOL(0, "path-walk", &path_walk, 4956 N_("use the path-walk API to walk objects when possible")), 4957 OPT_BOOL(0, "shallow", &shallow, 4958 N_("create packs suitable for shallow fetches")), 4959 OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk, 4960 N_("ignore packs that have companion .keep file")), 4961 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), 4962 N_("ignore this pack")), 4963 OPT_INTEGER(0, "compression", &pack_compression_level, 4964 N_("pack compression level")), 4965 OPT_BOOL(0, "keep-true-parents", &grafts_keep_true_parents, 4966 N_("do not hide commits by grafts")), 4967 OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index, 4968 N_("use a bitmap index if available to speed up counting objects")), 4969 OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index, 4970 N_("write a bitmap index together with the pack index"), 4971 WRITE_BITMAP_TRUE), 4972 OPT_SET_INT_F(0, "write-bitmap-index-quiet", 4973 &write_bitmap_index, 4974 N_("write a bitmap index if possible"), 4975 WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN), 4976 OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), 4977 OPT_CALLBACK_F(0, "missing", NULL, N_("action"), 4978 N_("handling for missing objects"), PARSE_OPT_NONEG, 4979 option_parse_missing_action), 4980 OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects, 4981 N_("do not pack objects in promisor packfiles")), 4982 OPT_BOOL(0, "exclude-promisor-objects-best-effort", 4983 &exclude_promisor_objects_best_effort, 4984 N_("implies --missing=allow-any")), 4985 OPT_BOOL(0, "delta-islands", &use_delta_islands, 4986 N_("respect islands during delta compression")), 4987 OPT_STRING_LIST(0, "uri-protocol", &uri_protocols, 4988 N_("protocol"), 4989 N_("exclude any configured uploadpack.blobpackfileuri with this protocol")), 4990 OPT_INTEGER(0, "name-hash-version", &name_hash_version, 4991 N_("use the specified name-hash function to group similar objects")), 4992 OPT_END(), 4993 }; 4994 4995 if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS)) 4996 BUG("too many dfs states, increase OE_DFS_STATE_BITS"); 4997 4998 disable_replace_refs(); 4999 5000 sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1); 5001 if (the_repository->gitdir) { 5002 prepare_repo_settings(the_repository); 5003 if (sparse < 0) 5004 sparse = the_repository->settings.pack_use_sparse; 5005 if (the_repository->settings.pack_use_multi_pack_reuse) 5006 allow_pack_reuse = MULTI_PACK_REUSE; 5007 } 5008 5009 reset_pack_idx_option(&pack_idx_opts); 5010 pack_idx_opts.flags |= WRITE_REV; 5011 repo_config(the_repository, git_pack_config, NULL); 5012 if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0)) 5013 pack_idx_opts.flags &= ~WRITE_REV; 5014 5015 progress = isatty(2); 5016 argc = parse_options(argc, argv, prefix, pack_objects_options, 5017 pack_usage, 0); 5018 5019 if (argc) { 5020 base_name = argv[0]; 5021 argc--; 5022 } 5023 if (pack_to_stdout != !base_name || argc) 5024 usage_with_options(pack_usage, pack_objects_options); 5025 5026 if (path_walk < 0) { 5027 if (use_bitmap_index > 0 || 5028 !use_internal_rev_list) 5029 path_walk = 0; 5030 else if (the_repository->gitdir && 5031 the_repository->settings.pack_use_path_walk) 5032 path_walk = 1; 5033 else 5034 path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0); 5035 } 5036 5037 if (depth < 0) 5038 depth = 0; 5039 if (depth >= (1 << OE_DEPTH_BITS)) { 5040 warning(_("delta chain depth %d is too deep, forcing %d"), 5041 depth, (1 << OE_DEPTH_BITS) - 1); 5042 depth = (1 << OE_DEPTH_BITS) - 1; 5043 } 5044 if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) { 5045 warning(_("pack.deltaCacheLimit is too high, forcing %d"), 5046 (1U << OE_Z_DELTA_BITS) - 1); 5047 cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1; 5048 } 5049 if (window < 0) 5050 window = 0; 5051 5052 strvec_push(&rp, "pack-objects"); 5053 5054 if (path_walk) { 5055 const char *option = NULL; 5056 if (filter_options.choice) 5057 option = "--filter"; 5058 else if (use_delta_islands) 5059 option = "--delta-islands"; 5060 5061 if (option) { 5062 warning(_("cannot use %s with %s"), 5063 option, "--path-walk"); 5064 path_walk = 0; 5065 } 5066 } 5067 if (path_walk) { 5068 strvec_push(&rp, "--boundary"); 5069 /* 5070 * We must disable the bitmaps because we are removing 5071 * the --objects / --objects-edge[-aggressive] options. 5072 */ 5073 use_bitmap_index = 0; 5074 } else if (thin) { 5075 use_internal_rev_list = 1; 5076 strvec_push(&rp, shallow 5077 ? "--objects-edge-aggressive" 5078 : "--objects-edge"); 5079 } else 5080 strvec_push(&rp, "--objects"); 5081 5082 if (rev_list_all) { 5083 use_internal_rev_list = 1; 5084 strvec_push(&rp, "--all"); 5085 } 5086 if (rev_list_reflog) { 5087 use_internal_rev_list = 1; 5088 strvec_push(&rp, "--reflog"); 5089 } 5090 if (rev_list_index) { 5091 use_internal_rev_list = 1; 5092 strvec_push(&rp, "--indexed-objects"); 5093 } 5094 if (rev_list_unpacked && !stdin_packs) { 5095 use_internal_rev_list = 1; 5096 strvec_push(&rp, "--unpacked"); 5097 } 5098 5099 die_for_incompatible_opt2(exclude_promisor_objects, 5100 "--exclude-promisor-objects", 5101 exclude_promisor_objects_best_effort, 5102 "--exclude-promisor-objects-best-effort"); 5103 if (exclude_promisor_objects) { 5104 use_internal_rev_list = 1; 5105 fetch_if_missing = 0; 5106 strvec_push(&rp, "--exclude-promisor-objects"); 5107 } else if (exclude_promisor_objects_best_effort) { 5108 use_internal_rev_list = 1; 5109 fetch_if_missing = 0; 5110 option_parse_missing_action(NULL, "allow-any", 0); 5111 /* revs configured below */ 5112 } 5113 if (unpack_unreachable || keep_unreachable || pack_loose_unreachable) 5114 use_internal_rev_list = 1; 5115 5116 if (!reuse_object) 5117 reuse_delta = 0; 5118 if (pack_compression_level == -1) 5119 pack_compression_level = Z_DEFAULT_COMPRESSION; 5120 else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION) 5121 die(_("bad pack compression level %d"), pack_compression_level); 5122 5123 if (!delta_search_threads) /* --threads=0 means autodetect */ 5124 delta_search_threads = online_cpus(); 5125 5126 if (!HAVE_THREADS && delta_search_threads != 1) 5127 warning(_("no threads support, ignoring --threads")); 5128 if (!pack_to_stdout && !pack_size_limit) 5129 pack_size_limit = pack_size_limit_cfg; 5130 if (pack_to_stdout && pack_size_limit) 5131 die(_("--max-pack-size cannot be used to build a pack for transfer")); 5132 if (pack_size_limit && pack_size_limit < 1024*1024) { 5133 warning(_("minimum pack size limit is 1 MiB")); 5134 pack_size_limit = 1024*1024; 5135 } 5136 5137 if (!pack_to_stdout && thin) 5138 die(_("--thin cannot be used to build an indexable pack")); 5139 5140 die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable", 5141 unpack_unreachable, "--unpack-unreachable"); 5142 if (!rev_list_all || !rev_list_reflog || !rev_list_index) 5143 unpack_unreachable_expiration = 0; 5144 5145 die_for_incompatible_opt2(stdin_packs, "--stdin-packs", 5146 filter_options.choice, "--filter"); 5147 5148 5149 if (stdin_packs && use_internal_rev_list) 5150 die(_("cannot use internal rev list with --stdin-packs")); 5151 5152 if (cruft) { 5153 if (use_internal_rev_list) 5154 die(_("cannot use internal rev list with --cruft")); 5155 die_for_incompatible_opt2(stdin_packs, "--stdin-packs", 5156 cruft, "--cruft"); 5157 } 5158 5159 /* 5160 * "soft" reasons not to use bitmaps - for on-disk repack by default we want 5161 * 5162 * - to produce good pack (with bitmap index not-yet-packed objects are 5163 * packed in suboptimal order). 5164 * 5165 * - to use more robust pack-generation codepath (avoiding possible 5166 * bugs in bitmap code and possible bitmap index corruption). 5167 */ 5168 if (!pack_to_stdout) 5169 use_bitmap_index_default = 0; 5170 5171 if (use_bitmap_index < 0) 5172 use_bitmap_index = use_bitmap_index_default; 5173 5174 /* "hard" reasons not to use bitmaps; these just won't work at all */ 5175 if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository)) 5176 use_bitmap_index = 0; 5177 5178 if (pack_to_stdout || !rev_list_all) 5179 write_bitmap_index = 0; 5180 5181 if (name_hash_version < 0) 5182 name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1); 5183 5184 validate_name_hash_version(); 5185 5186 if (use_delta_islands) 5187 strvec_push(&rp, "--topo-order"); 5188 5189 if (progress && all_progress_implied) 5190 progress = 2; 5191 5192 add_extra_kept_packs(&keep_pack_list); 5193 if (ignore_packed_keep_on_disk) { 5194 struct packfile_store *packs = the_repository->objects->packfiles; 5195 struct packed_git *p; 5196 5197 for (p = packfile_store_get_all_packs(packs); p; p = p->next) 5198 if (p->pack_local && p->pack_keep) 5199 break; 5200 if (!p) /* no keep-able packs found */ 5201 ignore_packed_keep_on_disk = 0; 5202 } 5203 if (local) { 5204 /* 5205 * unlike ignore_packed_keep_on_disk above, we do not 5206 * want to unset "local" based on looking at packs, as 5207 * it also covers non-local objects 5208 */ 5209 struct packfile_store *packs = the_repository->objects->packfiles; 5210 struct packed_git *p; 5211 5212 for (p = packfile_store_get_all_packs(packs); p; p = p->next) { 5213 if (!p->pack_local) { 5214 have_non_local_packs = 1; 5215 break; 5216 } 5217 } 5218 } 5219 5220 trace2_region_enter("pack-objects", "enumerate-objects", 5221 the_repository); 5222 prepare_packing_data(the_repository, &to_pack); 5223 5224 if (progress && !cruft) 5225 progress_state = start_progress(the_repository, 5226 _("Enumerating objects"), 0); 5227 if (stdin_packs) { 5228 read_stdin_packs(stdin_packs, rev_list_unpacked); 5229 } else if (cruft) { 5230 read_cruft_objects(); 5231 } else if (!use_internal_rev_list) { 5232 read_object_list_from_stdin(); 5233 } else { 5234 struct rev_info revs; 5235 5236 repo_init_revisions(the_repository, &revs, NULL); 5237 list_objects_filter_copy(&revs.filter, &filter_options); 5238 if (exclude_promisor_objects_best_effort) { 5239 revs.include_check = is_not_in_promisor_pack; 5240 revs.include_check_obj = is_not_in_promisor_pack_obj; 5241 } 5242 get_object_list(&revs, &rp); 5243 release_revisions(&revs); 5244 } 5245 cleanup_preferred_base(); 5246 if (include_tag && nr_result) 5247 refs_for_each_tag_ref(get_main_ref_store(the_repository), 5248 add_ref_tag, NULL); 5249 stop_progress(&progress_state); 5250 trace2_region_leave("pack-objects", "enumerate-objects", 5251 the_repository); 5252 5253 if (non_empty && !nr_result) 5254 goto cleanup; 5255 if (nr_result) { 5256 trace2_region_enter("pack-objects", "prepare-pack", 5257 the_repository); 5258 prepare_pack(window, depth); 5259 trace2_region_leave("pack-objects", "prepare-pack", 5260 the_repository); 5261 } 5262 5263 trace2_region_enter("pack-objects", "write-pack-file", the_repository); 5264 write_excluded_by_configs(); 5265 write_pack_file(); 5266 trace2_region_leave("pack-objects", "write-pack-file", the_repository); 5267 5268 if (progress) 5269 fprintf_ln(stderr, 5270 _("Total %"PRIu32" (delta %"PRIu32")," 5271 " reused %"PRIu32" (delta %"PRIu32")," 5272 " pack-reused %"PRIu32" (from %"PRIuMAX")"), 5273 written, written_delta, reused, reused_delta, 5274 reuse_packfile_objects, 5275 (uintmax_t)reuse_packfiles_used_nr); 5276 5277 trace2_data_intmax("pack-objects", the_repository, "written", written); 5278 trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta); 5279 trace2_data_intmax("pack-objects", the_repository, "reused", reused); 5280 trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta); 5281 trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects); 5282 trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr); 5283 5284cleanup: 5285 clear_packing_data(&to_pack); 5286 list_objects_filter_release(&filter_options); 5287 string_list_clear(&keep_pack_list, 0); 5288 strvec_clear(&rp); 5289 5290 return 0; 5291}