builtin/pack-objects.c at reftables-rust

freshlybakedca.ke / git
fork atom
Git fork
fork atom
git / builtin / pack-objects.c
at reftables-rust 5291 lines 147 kB view raw
wrap content
Patrick Steinhardt packfile: refactor `get_packed_git_mru()` to work on packfile store 5mo ago
dd52a29b
   1#define USE_THE_REPOSITORY_VARIABLE
   2#define DISABLE_SIGN_COMPARE_WARNINGS
   3
   4#include "builtin.h"
   5#include "environment.h"
   6#include "gettext.h"
   7#include "hex.h"
   8#include "config.h"
   9#include "attr.h"
  10#include "object.h"
  11#include "commit.h"
  12#include "tag.h"
  13#include "delta.h"
  14#include "pack.h"
  15#include "pack-revindex.h"
  16#include "csum-file.h"
  17#include "tree-walk.h"
  18#include "diff.h"
  19#include "revision.h"
  20#include "list-objects.h"
  21#include "list-objects-filter-options.h"
  22#include "pack-objects.h"
  23#include "progress.h"
  24#include "refs.h"
  25#include "streaming.h"
  26#include "thread-utils.h"
  27#include "pack-bitmap.h"
  28#include "delta-islands.h"
  29#include "reachable.h"
  30#include "oid-array.h"
  31#include "strvec.h"
  32#include "list.h"
  33#include "packfile.h"
  34#include "object-file.h"
  35#include "odb.h"
  36#include "replace-object.h"
  37#include "dir.h"
  38#include "midx.h"
  39#include "trace2.h"
  40#include "shallow.h"
  41#include "promisor-remote.h"
  42#include "pack-mtimes.h"
  43#include "parse-options.h"
  44#include "blob.h"
  45#include "tree.h"
  46#include "path-walk.h"
  47#include "trace2.h"
  48
  49/*
  50 * Objects we are going to pack are collected in the `to_pack` structure.
  51 * It contains an array (dynamically expanded) of the object data, and a map
  52 * that can resolve SHA1s to their position in the array.
  53 */
  54static struct packing_data to_pack;
  55
  56static inline struct object_entry *oe_delta(
  57		const struct packing_data *pack,
  58		const struct object_entry *e)
  59{
  60	if (!e->delta_idx)
  61		return NULL;
  62	if (e->ext_base)
  63		return &pack->ext_bases[e->delta_idx - 1];
  64	else
  65		return &pack->objects[e->delta_idx - 1];
  66}
  67
  68static inline unsigned long oe_delta_size(struct packing_data *pack,
  69					  const struct object_entry *e)
  70{
  71	if (e->delta_size_valid)
  72		return e->delta_size_;
  73
  74	/*
  75	 * pack->delta_size[] can't be NULL because oe_set_delta_size()
  76	 * must have been called when a new delta is saved with
  77	 * oe_set_delta().
  78	 * If oe_delta() returns NULL (i.e. default state, which means
  79	 * delta_size_valid is also false), then the caller must never
  80	 * call oe_delta_size().
  81	 */
  82	return pack->delta_size[e - pack->objects];
  83}
  84
  85unsigned long oe_get_size_slow(struct packing_data *pack,
  86			       const struct object_entry *e);
  87
  88static inline unsigned long oe_size(struct packing_data *pack,
  89				    const struct object_entry *e)
  90{
  91	if (e->size_valid)
  92		return e->size_;
  93
  94	return oe_get_size_slow(pack, e);
  95}
  96
  97static inline void oe_set_delta(struct packing_data *pack,
  98				struct object_entry *e,
  99				struct object_entry *delta)
 100{
 101	if (delta)
 102		e->delta_idx = (delta - pack->objects) + 1;
 103	else
 104		e->delta_idx = 0;
 105}
 106
 107static inline struct object_entry *oe_delta_sibling(
 108		const struct packing_data *pack,
 109		const struct object_entry *e)
 110{
 111	if (e->delta_sibling_idx)
 112		return &pack->objects[e->delta_sibling_idx - 1];
 113	return NULL;
 114}
 115
 116static inline struct object_entry *oe_delta_child(
 117		const struct packing_data *pack,
 118		const struct object_entry *e)
 119{
 120	if (e->delta_child_idx)
 121		return &pack->objects[e->delta_child_idx - 1];
 122	return NULL;
 123}
 124
 125static inline void oe_set_delta_child(struct packing_data *pack,
 126				      struct object_entry *e,
 127				      struct object_entry *delta)
 128{
 129	if (delta)
 130		e->delta_child_idx = (delta - pack->objects) + 1;
 131	else
 132		e->delta_child_idx = 0;
 133}
 134
 135static inline void oe_set_delta_sibling(struct packing_data *pack,
 136					struct object_entry *e,
 137					struct object_entry *delta)
 138{
 139	if (delta)
 140		e->delta_sibling_idx = (delta - pack->objects) + 1;
 141	else
 142		e->delta_sibling_idx = 0;
 143}
 144
 145static inline void oe_set_size(struct packing_data *pack,
 146			       struct object_entry *e,
 147			       unsigned long size)
 148{
 149	if (size < pack->oe_size_limit) {
 150		e->size_ = size;
 151		e->size_valid = 1;
 152	} else {
 153		e->size_valid = 0;
 154		if (oe_get_size_slow(pack, e) != size)
 155			BUG("'size' is supposed to be the object size!");
 156	}
 157}
 158
 159static inline void oe_set_delta_size(struct packing_data *pack,
 160				     struct object_entry *e,
 161				     unsigned long size)
 162{
 163	if (size < pack->oe_delta_size_limit) {
 164		e->delta_size_ = size;
 165		e->delta_size_valid = 1;
 166	} else {
 167		packing_data_lock(pack);
 168		if (!pack->delta_size)
 169			ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
 170		packing_data_unlock(pack);
 171
 172		pack->delta_size[e - pack->objects] = size;
 173		e->delta_size_valid = 0;
 174	}
 175}
 176
 177#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 178#define SIZE(obj) oe_size(&to_pack, obj)
 179#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
 180#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
 181#define DELTA(obj) oe_delta(&to_pack, obj)
 182#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
 183#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
 184#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
 185#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
 186#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
 187#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
 188#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
 189
 190static const char *const pack_usage[] = {
 191	N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n"
 192	   "                 [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n"
 193	   "                 [--local] [--incremental] [--window=<n>] [--depth=<n>]\n"
 194	   "                 [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n"
 195	   "                 [--cruft] [--cruft-expiration=<time>]\n"
 196	   "                 [--stdout [--filter=<filter-spec>] | <base-name>]\n"
 197	   "                 [--shallow] [--keep-true-parents] [--[no-]sparse]\n"
 198	   "                 [--name-hash-version=<n>] [--path-walk] < <object-list>"),
 199	NULL
 200};
 201
 202static struct pack_idx_entry **written_list;
 203static uint32_t nr_result, nr_written, nr_seen;
 204static struct bitmap_index *bitmap_git;
 205static uint32_t write_layer;
 206
 207static int non_empty;
 208static int reuse_delta = 1, reuse_object = 1;
 209static int keep_unreachable, unpack_unreachable, include_tag;
 210static timestamp_t unpack_unreachable_expiration;
 211static int pack_loose_unreachable;
 212static int cruft;
 213static int shallow = 0;
 214static timestamp_t cruft_expiration;
 215static int local;
 216static int have_non_local_packs;
 217static int incremental;
 218static int ignore_packed_keep_on_disk;
 219static int ignore_packed_keep_in_core;
 220static int ignore_packed_keep_in_core_has_cruft;
 221static int allow_ofs_delta;
 222static struct pack_idx_option pack_idx_opts;
 223static const char *base_name;
 224static int progress = 1;
 225static int window = 10;
 226static unsigned long pack_size_limit;
 227static int depth = 50;
 228static int delta_search_threads;
 229static int pack_to_stdout;
 230static int sparse;
 231static int thin;
 232static int path_walk = -1;
 233static int num_preferred_base;
 234static struct progress *progress_state;
 235
 236static struct bitmapped_pack *reuse_packfiles;
 237static size_t reuse_packfiles_nr;
 238static size_t reuse_packfiles_used_nr;
 239static uint32_t reuse_packfile_objects;
 240static struct bitmap *reuse_packfile_bitmap;
 241
 242static int use_bitmap_index_default = 1;
 243static int use_bitmap_index = -1;
 244static enum {
 245	NO_PACK_REUSE = 0,
 246	SINGLE_PACK_REUSE,
 247	MULTI_PACK_REUSE,
 248} allow_pack_reuse = SINGLE_PACK_REUSE;
 249static enum {
 250	WRITE_BITMAP_FALSE = 0,
 251	WRITE_BITMAP_QUIET,
 252	WRITE_BITMAP_TRUE,
 253} write_bitmap_index;
 254static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
 255
 256static int exclude_promisor_objects;
 257static int exclude_promisor_objects_best_effort;
 258
 259static int use_delta_islands;
 260
 261static unsigned long delta_cache_size = 0;
 262static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
 263static unsigned long cache_max_small_delta_size = 1000;
 264
 265static unsigned long window_memory_limit = 0;
 266
 267static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
 268
 269enum missing_action {
 270	MA_ERROR = 0,      /* fail if any missing objects are encountered */
 271	MA_ALLOW_ANY,      /* silently allow ALL missing objects */
 272	MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
 273};
 274static enum missing_action arg_missing_action;
 275static show_object_fn fn_show_object;
 276
 277struct configured_exclusion {
 278	struct oidmap_entry e;
 279	char *pack_hash_hex;
 280	char *uri;
 281};
 282static struct oidmap configured_exclusions;
 283
 284static struct oidset excluded_by_config;
 285static int name_hash_version = -1;
 286
 287enum stdin_packs_mode {
 288	STDIN_PACKS_MODE_NONE,
 289	STDIN_PACKS_MODE_STANDARD,
 290	STDIN_PACKS_MODE_FOLLOW,
 291};
 292
 293/**
 294 * Check whether the name_hash_version chosen by user input is appropriate,
 295 * and also validate whether it is compatible with other features.
 296 */
 297static void validate_name_hash_version(void)
 298{
 299	if (name_hash_version < 1 || name_hash_version > 2)
 300		die(_("invalid --name-hash-version option: %d"), name_hash_version);
 301	if (write_bitmap_index && name_hash_version != 1) {
 302		warning(_("currently, --write-bitmap-index requires --name-hash-version=1"));
 303		name_hash_version = 1;
 304	}
 305}
 306
 307static inline uint32_t pack_name_hash_fn(const char *name)
 308{
 309	static int seen_version = -1;
 310
 311	if (seen_version < 0)
 312		seen_version = name_hash_version;
 313	else if (seen_version != name_hash_version)
 314		BUG("name hash version changed from %d to %d mid-process",
 315		    seen_version, name_hash_version);
 316
 317	switch (name_hash_version) {
 318	case 1:
 319		return pack_name_hash(name);
 320
 321	case 2:
 322		return pack_name_hash_v2((const unsigned char *)name);
 323
 324	default:
 325		BUG("invalid name-hash version: %d", name_hash_version);
 326	}
 327}
 328
 329/*
 330 * stats
 331 */
 332static uint32_t written, written_delta;
 333static uint32_t reused, reused_delta;
 334
 335/*
 336 * Indexed commits
 337 */
 338static struct commit **indexed_commits;
 339static unsigned int indexed_commits_nr;
 340static unsigned int indexed_commits_alloc;
 341
 342static void index_commit_for_bitmap(struct commit *commit)
 343{
 344	if (indexed_commits_nr >= indexed_commits_alloc) {
 345		indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
 346		REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
 347	}
 348
 349	indexed_commits[indexed_commits_nr++] = commit;
 350}
 351
 352static void *get_delta(struct object_entry *entry)
 353{
 354	unsigned long size, base_size, delta_size;
 355	void *buf, *base_buf, *delta_buf;
 356	enum object_type type;
 357
 358	buf = odb_read_object(the_repository->objects, &entry->idx.oid,
 359			      &type, &size);
 360	if (!buf)
 361		die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
 362	base_buf = odb_read_object(the_repository->objects,
 363				   &DELTA(entry)->idx.oid, &type,
 364				   &base_size);
 365	if (!base_buf)
 366		die("unable to read %s",
 367		    oid_to_hex(&DELTA(entry)->idx.oid));
 368	delta_buf = diff_delta(base_buf, base_size,
 369			       buf, size, &delta_size, 0);
 370	/*
 371	 * We successfully computed this delta once but dropped it for
 372	 * memory reasons. Something is very wrong if this time we
 373	 * recompute and create a different delta.
 374	 */
 375	if (!delta_buf || delta_size != DELTA_SIZE(entry))
 376		BUG("delta size changed");
 377	free(buf);
 378	free(base_buf);
 379	return delta_buf;
 380}
 381
 382static unsigned long do_compress(void **pptr, unsigned long size)
 383{
 384	git_zstream stream;
 385	void *in, *out;
 386	unsigned long maxsize;
 387
 388	git_deflate_init(&stream, pack_compression_level);
 389	maxsize = git_deflate_bound(&stream, size);
 390
 391	in = *pptr;
 392	out = xmalloc(maxsize);
 393	*pptr = out;
 394
 395	stream.next_in = in;
 396	stream.avail_in = size;
 397	stream.next_out = out;
 398	stream.avail_out = maxsize;
 399	while (git_deflate(&stream, Z_FINISH) == Z_OK)
 400		; /* nothing */
 401	git_deflate_end(&stream);
 402
 403	free(in);
 404	return stream.total_out;
 405}
 406
 407static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
 408					   const struct object_id *oid)
 409{
 410	git_zstream stream;
 411	unsigned char ibuf[1024 * 16];
 412	unsigned char obuf[1024 * 16];
 413	unsigned long olen = 0;
 414
 415	git_deflate_init(&stream, pack_compression_level);
 416
 417	for (;;) {
 418		ssize_t readlen;
 419		int zret = Z_OK;
 420		readlen = read_istream(st, ibuf, sizeof(ibuf));
 421		if (readlen == -1)
 422			die(_("unable to read %s"), oid_to_hex(oid));
 423
 424		stream.next_in = ibuf;
 425		stream.avail_in = readlen;
 426		while ((stream.avail_in || readlen == 0) &&
 427		       (zret == Z_OK || zret == Z_BUF_ERROR)) {
 428			stream.next_out = obuf;
 429			stream.avail_out = sizeof(obuf);
 430			zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
 431			hashwrite(f, obuf, stream.next_out - obuf);
 432			olen += stream.next_out - obuf;
 433		}
 434		if (stream.avail_in)
 435			die(_("deflate error (%d)"), zret);
 436		if (readlen == 0) {
 437			if (zret != Z_STREAM_END)
 438				die(_("deflate error (%d)"), zret);
 439			break;
 440		}
 441	}
 442	git_deflate_end(&stream);
 443	return olen;
 444}
 445
 446/*
 447 * we are going to reuse the existing object data as is.  make
 448 * sure it is not corrupt.
 449 */
 450static int check_pack_inflate(struct packed_git *p,
 451		struct pack_window **w_curs,
 452		off_t offset,
 453		off_t len,
 454		unsigned long expect)
 455{
 456	git_zstream stream;
 457	unsigned char fakebuf[4096], *in;
 458	int st;
 459
 460	memset(&stream, 0, sizeof(stream));
 461	git_inflate_init(&stream);
 462	do {
 463		in = use_pack(p, w_curs, offset, &stream.avail_in);
 464		stream.next_in = in;
 465		stream.next_out = fakebuf;
 466		stream.avail_out = sizeof(fakebuf);
 467		st = git_inflate(&stream, Z_FINISH);
 468		offset += stream.next_in - in;
 469	} while (st == Z_OK || st == Z_BUF_ERROR);
 470	git_inflate_end(&stream);
 471	return (st == Z_STREAM_END &&
 472		stream.total_out == expect &&
 473		stream.total_in == len) ? 0 : -1;
 474}
 475
 476static void copy_pack_data(struct hashfile *f,
 477		struct packed_git *p,
 478		struct pack_window **w_curs,
 479		off_t offset,
 480		off_t len)
 481{
 482	unsigned char *in;
 483	unsigned long avail;
 484
 485	while (len) {
 486		in = use_pack(p, w_curs, offset, &avail);
 487		if (avail > len)
 488			avail = (unsigned long)len;
 489		hashwrite(f, in, avail);
 490		offset += avail;
 491		len -= avail;
 492	}
 493}
 494
 495static inline int oe_size_greater_than(struct packing_data *pack,
 496				       const struct object_entry *lhs,
 497				       unsigned long rhs)
 498{
 499	if (lhs->size_valid)
 500		return lhs->size_ > rhs;
 501	if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
 502		return 1;
 503	return oe_get_size_slow(pack, lhs) > rhs;
 504}
 505
 506/* Return 0 if we will bust the pack-size limit */
 507static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
 508					   unsigned long limit, int usable_delta)
 509{
 510	unsigned long size, datalen;
 511	unsigned char header[MAX_PACK_OBJECT_HEADER],
 512		      dheader[MAX_PACK_OBJECT_HEADER];
 513	unsigned hdrlen;
 514	enum object_type type;
 515	void *buf;
 516	struct git_istream *st = NULL;
 517	const unsigned hashsz = the_hash_algo->rawsz;
 518
 519	if (!usable_delta) {
 520		if (oe_type(entry) == OBJ_BLOB &&
 521		    oe_size_greater_than(&to_pack, entry,
 522					 repo_settings_get_big_file_threshold(the_repository)) &&
 523		    (st = open_istream(the_repository, &entry->idx.oid, &type,
 524				       &size, NULL)) != NULL)
 525			buf = NULL;
 526		else {
 527			buf = odb_read_object(the_repository->objects,
 528					      &entry->idx.oid, &type,
 529					      &size);
 530			if (!buf)
 531				die(_("unable to read %s"),
 532				    oid_to_hex(&entry->idx.oid));
 533		}
 534		/*
 535		 * make sure no cached delta data remains from a
 536		 * previous attempt before a pack split occurred.
 537		 */
 538		FREE_AND_NULL(entry->delta_data);
 539		entry->z_delta_size = 0;
 540	} else if (entry->delta_data) {
 541		size = DELTA_SIZE(entry);
 542		buf = entry->delta_data;
 543		entry->delta_data = NULL;
 544		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 545			OBJ_OFS_DELTA : OBJ_REF_DELTA;
 546	} else {
 547		buf = get_delta(entry);
 548		size = DELTA_SIZE(entry);
 549		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 550			OBJ_OFS_DELTA : OBJ_REF_DELTA;
 551	}
 552
 553	if (st)	/* large blob case, just assume we don't compress well */
 554		datalen = size;
 555	else if (entry->z_delta_size)
 556		datalen = entry->z_delta_size;
 557	else
 558		datalen = do_compress(&buf, size);
 559
 560	/*
 561	 * The object header is a byte of 'type' followed by zero or
 562	 * more bytes of length.
 563	 */
 564	hdrlen = encode_in_pack_object_header(header, sizeof(header),
 565					      type, size);
 566
 567	if (type == OBJ_OFS_DELTA) {
 568		/*
 569		 * Deltas with relative base contain an additional
 570		 * encoding of the relative offset for the delta
 571		 * base from this object's position in the pack.
 572		 */
 573		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 574		unsigned pos = sizeof(dheader) - 1;
 575		dheader[pos] = ofs & 127;
 576		while (ofs >>= 7)
 577			dheader[--pos] = 128 | (--ofs & 127);
 578		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 579			if (st)
 580				close_istream(st);
 581			free(buf);
 582			return 0;
 583		}
 584		hashwrite(f, header, hdrlen);
 585		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 586		hdrlen += sizeof(dheader) - pos;
 587	} else if (type == OBJ_REF_DELTA) {
 588		/*
 589		 * Deltas with a base reference contain
 590		 * additional bytes for the base object ID.
 591		 */
 592		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 593			if (st)
 594				close_istream(st);
 595			free(buf);
 596			return 0;
 597		}
 598		hashwrite(f, header, hdrlen);
 599		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 600		hdrlen += hashsz;
 601	} else {
 602		if (limit && hdrlen + datalen + hashsz >= limit) {
 603			if (st)
 604				close_istream(st);
 605			free(buf);
 606			return 0;
 607		}
 608		hashwrite(f, header, hdrlen);
 609	}
 610	if (st) {
 611		datalen = write_large_blob_data(st, f, &entry->idx.oid);
 612		close_istream(st);
 613	} else {
 614		hashwrite(f, buf, datalen);
 615		free(buf);
 616	}
 617
 618	return hdrlen + datalen;
 619}
 620
 621/* Return 0 if we will bust the pack-size limit */
 622static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 623				unsigned long limit, int usable_delta)
 624{
 625	struct packed_git *p = IN_PACK(entry);
 626	struct pack_window *w_curs = NULL;
 627	uint32_t pos;
 628	off_t offset;
 629	enum object_type type = oe_type(entry);
 630	off_t datalen;
 631	unsigned char header[MAX_PACK_OBJECT_HEADER],
 632		      dheader[MAX_PACK_OBJECT_HEADER];
 633	unsigned hdrlen;
 634	const unsigned hashsz = the_hash_algo->rawsz;
 635	unsigned long entry_size = SIZE(entry);
 636
 637	if (DELTA(entry))
 638		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 639			OBJ_OFS_DELTA : OBJ_REF_DELTA;
 640	hdrlen = encode_in_pack_object_header(header, sizeof(header),
 641					      type, entry_size);
 642
 643	offset = entry->in_pack_offset;
 644	if (offset_to_pack_pos(p, offset, &pos) < 0)
 645		die(_("write_reuse_object: could not locate %s, expected at "
 646		      "offset %"PRIuMAX" in pack %s"),
 647		    oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
 648		    p->pack_name);
 649	datalen = pack_pos_to_offset(p, pos + 1) - offset;
 650	if (!pack_to_stdout && p->index_version > 1 &&
 651	    check_pack_crc(p, &w_curs, offset, datalen,
 652			   pack_pos_to_index(p, pos))) {
 653		error(_("bad packed object CRC for %s"),
 654		      oid_to_hex(&entry->idx.oid));
 655		unuse_pack(&w_curs);
 656		return write_no_reuse_object(f, entry, limit, usable_delta);
 657	}
 658
 659	offset += entry->in_pack_header_size;
 660	datalen -= entry->in_pack_header_size;
 661
 662	if (!pack_to_stdout && p->index_version == 1 &&
 663	    check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 664		error(_("corrupt packed object for %s"),
 665		      oid_to_hex(&entry->idx.oid));
 666		unuse_pack(&w_curs);
 667		return write_no_reuse_object(f, entry, limit, usable_delta);
 668	}
 669
 670	if (type == OBJ_OFS_DELTA) {
 671		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 672		unsigned pos = sizeof(dheader) - 1;
 673		dheader[pos] = ofs & 127;
 674		while (ofs >>= 7)
 675			dheader[--pos] = 128 | (--ofs & 127);
 676		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 677			unuse_pack(&w_curs);
 678			return 0;
 679		}
 680		hashwrite(f, header, hdrlen);
 681		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 682		hdrlen += sizeof(dheader) - pos;
 683		reused_delta++;
 684	} else if (type == OBJ_REF_DELTA) {
 685		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 686			unuse_pack(&w_curs);
 687			return 0;
 688		}
 689		hashwrite(f, header, hdrlen);
 690		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 691		hdrlen += hashsz;
 692		reused_delta++;
 693	} else {
 694		if (limit && hdrlen + datalen + hashsz >= limit) {
 695			unuse_pack(&w_curs);
 696			return 0;
 697		}
 698		hashwrite(f, header, hdrlen);
 699	}
 700	copy_pack_data(f, p, &w_curs, offset, datalen);
 701	unuse_pack(&w_curs);
 702	reused++;
 703	return hdrlen + datalen;
 704}
 705
 706/* Return 0 if we will bust the pack-size limit */
 707static off_t write_object(struct hashfile *f,
 708			  struct object_entry *entry,
 709			  off_t write_offset)
 710{
 711	unsigned long limit;
 712	off_t len;
 713	int usable_delta, to_reuse;
 714
 715	if (!pack_to_stdout)
 716		crc32_begin(f);
 717
 718	/* apply size limit if limited packsize and not first object */
 719	if (!pack_size_limit || !nr_written)
 720		limit = 0;
 721	else if (pack_size_limit <= write_offset)
 722		/*
 723		 * the earlier object did not fit the limit; avoid
 724		 * mistaking this with unlimited (i.e. limit = 0).
 725		 */
 726		limit = 1;
 727	else
 728		limit = pack_size_limit - write_offset;
 729
 730	if (!DELTA(entry))
 731		usable_delta = 0;	/* no delta */
 732	else if (!pack_size_limit)
 733	       usable_delta = 1;	/* unlimited packfile */
 734	else if (DELTA(entry)->idx.offset == (off_t)-1)
 735		usable_delta = 0;	/* base was written to another pack */
 736	else if (DELTA(entry)->idx.offset)
 737		usable_delta = 1;	/* base already exists in this pack */
 738	else
 739		usable_delta = 0;	/* base could end up in another pack */
 740
 741	if (!reuse_object)
 742		to_reuse = 0;	/* explicit */
 743	else if (!IN_PACK(entry))
 744		to_reuse = 0;	/* can't reuse what we don't have */
 745	else if (oe_type(entry) == OBJ_REF_DELTA ||
 746		 oe_type(entry) == OBJ_OFS_DELTA)
 747				/* check_object() decided it for us ... */
 748		to_reuse = usable_delta;
 749				/* ... but pack split may override that */
 750	else if (oe_type(entry) != entry->in_pack_type)
 751		to_reuse = 0;	/* pack has delta which is unusable */
 752	else if (DELTA(entry))
 753		to_reuse = 0;	/* we want to pack afresh */
 754	else
 755		to_reuse = 1;	/* we have it in-pack undeltified,
 756				 * and we do not need to deltify it.
 757				 */
 758
 759	if (!to_reuse)
 760		len = write_no_reuse_object(f, entry, limit, usable_delta);
 761	else
 762		len = write_reuse_object(f, entry, limit, usable_delta);
 763	if (!len)
 764		return 0;
 765
 766	if (usable_delta)
 767		written_delta++;
 768	written++;
 769	if (!pack_to_stdout)
 770		entry->idx.crc32 = crc32_end(f);
 771	return len;
 772}
 773
 774enum write_one_status {
 775	WRITE_ONE_SKIP = -1, /* already written */
 776	WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
 777	WRITE_ONE_WRITTEN = 1, /* normal */
 778	WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
 779};
 780
 781static enum write_one_status write_one(struct hashfile *f,
 782				       struct object_entry *e,
 783				       off_t *offset)
 784{
 785	off_t size;
 786	int recursing;
 787
 788	/*
 789	 * we set offset to 1 (which is an impossible value) to mark
 790	 * the fact that this object is involved in "write its base
 791	 * first before writing a deltified object" recursion.
 792	 */
 793	recursing = (e->idx.offset == 1);
 794	if (recursing) {
 795		warning(_("recursive delta detected for object %s"),
 796			oid_to_hex(&e->idx.oid));
 797		return WRITE_ONE_RECURSIVE;
 798	} else if (e->idx.offset || e->preferred_base) {
 799		/* offset is non zero if object is written already. */
 800		return WRITE_ONE_SKIP;
 801	}
 802
 803	/* if we are deltified, write out base object first. */
 804	if (DELTA(e)) {
 805		e->idx.offset = 1; /* now recurse */
 806		switch (write_one(f, DELTA(e), offset)) {
 807		case WRITE_ONE_RECURSIVE:
 808			/* we cannot depend on this one */
 809			SET_DELTA(e, NULL);
 810			break;
 811		default:
 812			break;
 813		case WRITE_ONE_BREAK:
 814			e->idx.offset = recursing;
 815			return WRITE_ONE_BREAK;
 816		}
 817	}
 818
 819	e->idx.offset = *offset;
 820	size = write_object(f, e, *offset);
 821	if (!size) {
 822		e->idx.offset = recursing;
 823		return WRITE_ONE_BREAK;
 824	}
 825	written_list[nr_written++] = &e->idx;
 826
 827	/* make sure off_t is sufficiently large not to wrap */
 828	if (signed_add_overflows(*offset, size))
 829		die(_("pack too large for current definition of off_t"));
 830	*offset += size;
 831	return WRITE_ONE_WRITTEN;
 832}
 833
 834static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid,
 835		       int flag UNUSED, void *cb_data UNUSED)
 836{
 837	struct object_id peeled;
 838	struct object_entry *entry = packlist_find(&to_pack, oid);
 839
 840	if (entry)
 841		entry->tagged = 1;
 842	if (!peel_iterated_oid(the_repository, oid, &peeled)) {
 843		entry = packlist_find(&to_pack, &peeled);
 844		if (entry)
 845			entry->tagged = 1;
 846	}
 847	return 0;
 848}
 849
 850static inline unsigned char oe_layer(struct packing_data *pack,
 851				     struct object_entry *e)
 852{
 853	if (!pack->layer)
 854		return 0;
 855	return pack->layer[e - pack->objects];
 856}
 857
 858static inline void add_to_write_order(struct object_entry **wo,
 859			       unsigned int *endp,
 860			       struct object_entry *e)
 861{
 862	if (e->filled || oe_layer(&to_pack, e) != write_layer)
 863		return;
 864	wo[(*endp)++] = e;
 865	e->filled = 1;
 866}
 867
 868static void add_descendants_to_write_order(struct object_entry **wo,
 869					   unsigned int *endp,
 870					   struct object_entry *e)
 871{
 872	int add_to_order = 1;
 873	while (e) {
 874		if (add_to_order) {
 875			struct object_entry *s;
 876			/* add this node... */
 877			add_to_write_order(wo, endp, e);
 878			/* all its siblings... */
 879			for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
 880				add_to_write_order(wo, endp, s);
 881			}
 882		}
 883		/* drop down a level to add left subtree nodes if possible */
 884		if (DELTA_CHILD(e)) {
 885			add_to_order = 1;
 886			e = DELTA_CHILD(e);
 887		} else {
 888			add_to_order = 0;
 889			/* our sibling might have some children, it is next */
 890			if (DELTA_SIBLING(e)) {
 891				e = DELTA_SIBLING(e);
 892				continue;
 893			}
 894			/* go back to our parent node */
 895			e = DELTA(e);
 896			while (e && !DELTA_SIBLING(e)) {
 897				/* we're on the right side of a subtree, keep
 898				 * going up until we can go right again */
 899				e = DELTA(e);
 900			}
 901			if (!e) {
 902				/* done- we hit our original root node */
 903				return;
 904			}
 905			/* pass it off to sibling at this level */
 906			e = DELTA_SIBLING(e);
 907		}
 908	};
 909}
 910
 911static void add_family_to_write_order(struct object_entry **wo,
 912				      unsigned int *endp,
 913				      struct object_entry *e)
 914{
 915	struct object_entry *root;
 916
 917	for (root = e; DELTA(root); root = DELTA(root))
 918		; /* nothing */
 919	add_descendants_to_write_order(wo, endp, root);
 920}
 921
 922static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
 923{
 924	unsigned int i, last_untagged;
 925	struct object_entry *objects = to_pack.objects;
 926
 927	for (i = 0; i < to_pack.nr_objects; i++) {
 928		if (objects[i].tagged)
 929			break;
 930		add_to_write_order(wo, wo_end, &objects[i]);
 931	}
 932	last_untagged = i;
 933
 934	/*
 935	 * Then fill all the tagged tips.
 936	 */
 937	for (; i < to_pack.nr_objects; i++) {
 938		if (objects[i].tagged)
 939			add_to_write_order(wo, wo_end, &objects[i]);
 940	}
 941
 942	/*
 943	 * And then all remaining commits and tags.
 944	 */
 945	for (i = last_untagged; i < to_pack.nr_objects; i++) {
 946		if (oe_type(&objects[i]) != OBJ_COMMIT &&
 947		    oe_type(&objects[i]) != OBJ_TAG)
 948			continue;
 949		add_to_write_order(wo, wo_end, &objects[i]);
 950	}
 951
 952	/*
 953	 * And then all the trees.
 954	 */
 955	for (i = last_untagged; i < to_pack.nr_objects; i++) {
 956		if (oe_type(&objects[i]) != OBJ_TREE)
 957			continue;
 958		add_to_write_order(wo, wo_end, &objects[i]);
 959	}
 960
 961	/*
 962	 * Finally all the rest in really tight order
 963	 */
 964	for (i = last_untagged; i < to_pack.nr_objects; i++) {
 965		if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
 966			add_family_to_write_order(wo, wo_end, &objects[i]);
 967	}
 968}
 969
 970static struct object_entry **compute_write_order(void)
 971{
 972	uint32_t max_layers = 1;
 973	unsigned int i, wo_end;
 974
 975	struct object_entry **wo;
 976	struct object_entry *objects = to_pack.objects;
 977
 978	for (i = 0; i < to_pack.nr_objects; i++) {
 979		objects[i].tagged = 0;
 980		objects[i].filled = 0;
 981		SET_DELTA_CHILD(&objects[i], NULL);
 982		SET_DELTA_SIBLING(&objects[i], NULL);
 983	}
 984
 985	/*
 986	 * Fully connect delta_child/delta_sibling network.
 987	 * Make sure delta_sibling is sorted in the original
 988	 * recency order.
 989	 */
 990	for (i = to_pack.nr_objects; i > 0;) {
 991		struct object_entry *e = &objects[--i];
 992		if (!DELTA(e))
 993			continue;
 994		/* Mark me as the first child */
 995		e->delta_sibling_idx = DELTA(e)->delta_child_idx;
 996		SET_DELTA_CHILD(DELTA(e), e);
 997	}
 998
 999	/*
1000	 * Mark objects that are at the tip of tags.
1001	 */
1002	refs_for_each_tag_ref(get_main_ref_store(the_repository), mark_tagged,
1003			      NULL);
1004
1005	if (use_delta_islands) {
1006		max_layers = compute_pack_layers(&to_pack);
1007		free_island_marks();
1008	}
1009
1010	ALLOC_ARRAY(wo, to_pack.nr_objects);
1011	wo_end = 0;
1012
1013	for (; write_layer < max_layers; ++write_layer)
1014		compute_layer_order(wo, &wo_end);
1015
1016	if (wo_end != to_pack.nr_objects)
1017		die(_("ordered %u objects, expected %"PRIu32),
1018		    wo_end, to_pack.nr_objects);
1019
1020	return wo;
1021}
1022
1023
1024/*
1025 * A reused set of objects. All objects in a chunk have the same
1026 * relative position in the original packfile and the generated
1027 * packfile.
1028 */
1029
1030static struct reused_chunk {
1031	/* The offset of the first object of this chunk in the original
1032	 * packfile. */
1033	off_t original;
1034	/* The difference for "original" minus the offset of the first object of
1035	 * this chunk in the generated packfile. */
1036	off_t difference;
1037} *reused_chunks;
1038static int reused_chunks_nr;
1039static int reused_chunks_alloc;
1040
1041static void record_reused_object(off_t where, off_t offset)
1042{
1043	if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
1044		return;
1045
1046	ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
1047		   reused_chunks_alloc);
1048	reused_chunks[reused_chunks_nr].original = where;
1049	reused_chunks[reused_chunks_nr].difference = offset;
1050	reused_chunks_nr++;
1051}
1052
1053/*
1054 * Binary search to find the chunk that "where" is in. Note
1055 * that we're not looking for an exact match, just the first
1056 * chunk that contains it (which implicitly ends at the start
1057 * of the next chunk.
1058 */
1059static off_t find_reused_offset(off_t where)
1060{
1061	int lo = 0, hi = reused_chunks_nr;
1062	while (lo < hi) {
1063		int mi = lo + ((hi - lo) / 2);
1064		if (where == reused_chunks[mi].original)
1065			return reused_chunks[mi].difference;
1066		if (where < reused_chunks[mi].original)
1067			hi = mi;
1068		else
1069			lo = mi + 1;
1070	}
1071
1072	/*
1073	 * The first chunk starts at zero, so we can't have gone below
1074	 * there.
1075	 */
1076	assert(lo);
1077	return reused_chunks[lo-1].difference;
1078}
1079
1080static void write_reused_pack_one(struct packed_git *reuse_packfile,
1081				  size_t pos, struct hashfile *out,
1082				  off_t pack_start,
1083				  struct pack_window **w_curs)
1084{
1085	off_t offset, next, cur;
1086	enum object_type type;
1087	unsigned long size;
1088
1089	offset = pack_pos_to_offset(reuse_packfile, pos);
1090	next = pack_pos_to_offset(reuse_packfile, pos + 1);
1091
1092	record_reused_object(offset,
1093			     offset - (hashfile_total(out) - pack_start));
1094
1095	cur = offset;
1096	type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1097	assert(type >= 0);
1098
1099	if (type == OBJ_OFS_DELTA) {
1100		off_t base_offset;
1101		off_t fixup;
1102
1103		unsigned char header[MAX_PACK_OBJECT_HEADER];
1104		unsigned len;
1105
1106		base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1107		assert(base_offset != 0);
1108
1109		/* Convert to REF_DELTA if we must... */
1110		if (!allow_ofs_delta) {
1111			uint32_t base_pos;
1112			struct object_id base_oid;
1113
1114			if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1115				die(_("expected object at offset %"PRIuMAX" "
1116				      "in pack %s"),
1117				    (uintmax_t)base_offset,
1118				    reuse_packfile->pack_name);
1119
1120			nth_packed_object_id(&base_oid, reuse_packfile,
1121					     pack_pos_to_index(reuse_packfile, base_pos));
1122
1123			len = encode_in_pack_object_header(header, sizeof(header),
1124							   OBJ_REF_DELTA, size);
1125			hashwrite(out, header, len);
1126			hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1127			copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1128			return;
1129		}
1130
1131		/* Otherwise see if we need to rewrite the offset... */
1132		fixup = find_reused_offset(offset) -
1133			find_reused_offset(base_offset);
1134		if (fixup) {
1135			unsigned char ofs_header[MAX_PACK_OBJECT_HEADER];
1136			unsigned i, ofs_len;
1137			off_t ofs = offset - base_offset - fixup;
1138
1139			len = encode_in_pack_object_header(header, sizeof(header),
1140							   OBJ_OFS_DELTA, size);
1141
1142			i = sizeof(ofs_header) - 1;
1143			ofs_header[i] = ofs & 127;
1144			while (ofs >>= 7)
1145				ofs_header[--i] = 128 | (--ofs & 127);
1146
1147			ofs_len = sizeof(ofs_header) - i;
1148
1149			hashwrite(out, header, len);
1150			hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1151			copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1152			return;
1153		}
1154
1155		/* ...otherwise we have no fixup, and can write it verbatim */
1156	}
1157
1158	copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1159}
1160
1161static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile,
1162					 struct hashfile *out,
1163					 struct pack_window **w_curs)
1164{
1165	size_t pos = 0;
1166	size_t end;
1167
1168	if (reuse_packfile->bitmap_pos) {
1169		/*
1170		 * We can't reuse whole chunks verbatim out of
1171		 * non-preferred packs since we can't guarantee that
1172		 * all duplicate objects were resolved in favor of
1173		 * that pack.
1174		 *
1175		 * Even if we have a whole eword_t worth of bits that
1176		 * could be reused, there may be objects between the
1177		 * objects corresponding to the first and last bit of
1178		 * that word which were selected from a different
1179		 * pack, causing us to send duplicate or unwanted
1180		 * objects.
1181		 *
1182		 * Handle non-preferred packs from within
1183		 * write_reused_pack(), which inspects and reuses
1184		 * individual bits.
1185		 */
1186		return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1187	}
1188
1189	/*
1190	 * Only read through the last word whose bits all correspond
1191	 * to objects in the given packfile, since we must stop at a
1192	 * word boundary.
1193	 *
1194	 * If there is no whole word to read (i.e. the packfile
1195	 * contains fewer than BITS_IN_EWORD objects), then we'll
1196	 * inspect bits one-by-one in write_reused_pack().
1197	 */
1198	end = reuse_packfile->bitmap_nr / BITS_IN_EWORD;
1199	if (reuse_packfile_bitmap->word_alloc < end)
1200		BUG("fewer words than expected in reuse_packfile_bitmap");
1201
1202	while (pos < end && reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1203		pos++;
1204
1205	if (pos) {
1206		off_t to_write;
1207
1208		written = (pos * BITS_IN_EWORD);
1209		to_write = pack_pos_to_offset(reuse_packfile->p, written)
1210			- sizeof(struct pack_header);
1211
1212		/* We're recording one chunk, not one object. */
1213		record_reused_object(sizeof(struct pack_header), 0);
1214		hashflush(out);
1215		copy_pack_data(out, reuse_packfile->p, w_curs,
1216			sizeof(struct pack_header), to_write);
1217
1218		display_progress(progress_state, written);
1219	}
1220	return pos;
1221}
1222
1223static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
1224			      struct hashfile *f)
1225{
1226	size_t i = reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1227	uint32_t offset;
1228	off_t pack_start = hashfile_total(f) - sizeof(struct pack_header);
1229	struct pack_window *w_curs = NULL;
1230
1231	if (allow_ofs_delta)
1232		i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs);
1233
1234	for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1235		eword_t word = reuse_packfile_bitmap->words[i];
1236		size_t pos = (i * BITS_IN_EWORD);
1237
1238		for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1239			uint32_t pack_pos;
1240			if ((word >> offset) == 0)
1241				break;
1242
1243			offset += ewah_bit_ctz64(word >> offset);
1244			if (pos + offset < reuse_packfile->bitmap_pos)
1245				continue;
1246			if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr)
1247				goto done;
1248
1249			if (reuse_packfile->bitmap_pos) {
1250				/*
1251				 * When doing multi-pack reuse on a
1252				 * non-preferred pack, translate bit positions
1253				 * from the MIDX pseudo-pack order back to their
1254				 * pack-relative positions before attempting
1255				 * reuse.
1256				 */
1257				struct multi_pack_index *m = reuse_packfile->from_midx;
1258				uint32_t midx_pos;
1259				off_t pack_ofs;
1260
1261				if (!m)
1262					BUG("non-zero bitmap position without MIDX");
1263
1264				midx_pos = pack_pos_to_midx(m, pos + offset);
1265				pack_ofs = nth_midxed_offset(m, midx_pos);
1266
1267				if (offset_to_pack_pos(reuse_packfile->p,
1268						       pack_ofs, &pack_pos) < 0)
1269					BUG("could not find expected object at offset %"PRIuMAX" in pack %s",
1270					    (uintmax_t)pack_ofs,
1271					    pack_basename(reuse_packfile->p));
1272			} else {
1273				/*
1274				 * Can use bit positions directly, even for MIDX
1275				 * bitmaps. See comment in try_partial_reuse()
1276				 * for why.
1277				 */
1278				pack_pos = pos + offset;
1279			}
1280
1281			write_reused_pack_one(reuse_packfile->p, pack_pos, f,
1282					      pack_start, &w_curs);
1283			display_progress(progress_state, ++written);
1284		}
1285	}
1286
1287done:
1288	unuse_pack(&w_curs);
1289}
1290
1291static void write_excluded_by_configs(void)
1292{
1293	struct oidset_iter iter;
1294	const struct object_id *oid;
1295
1296	oidset_iter_init(&excluded_by_config, &iter);
1297	while ((oid = oidset_iter_next(&iter))) {
1298		struct configured_exclusion *ex =
1299			oidmap_get(&configured_exclusions, oid);
1300
1301		if (!ex)
1302			BUG("configured exclusion wasn't configured");
1303		write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1304		write_in_full(1, " ", 1);
1305		write_in_full(1, ex->uri, strlen(ex->uri));
1306		write_in_full(1, "\n", 1);
1307	}
1308}
1309
1310static const char no_split_warning[] = N_(
1311"disabling bitmap writing, packs are split due to pack.packSizeLimit"
1312);
1313
1314static void write_pack_file(void)
1315{
1316	uint32_t i = 0, j;
1317	struct hashfile *f;
1318	off_t offset;
1319	uint32_t nr_remaining = nr_result;
1320	time_t last_mtime = 0;
1321	struct object_entry **write_order;
1322
1323	if (progress > pack_to_stdout)
1324		progress_state = start_progress(the_repository,
1325						_("Writing objects"), nr_result);
1326	ALLOC_ARRAY(written_list, to_pack.nr_objects);
1327	write_order = compute_write_order();
1328
1329	do {
1330		unsigned char hash[GIT_MAX_RAWSZ];
1331		char *pack_tmp_name = NULL;
1332
1333		if (pack_to_stdout)
1334			f = hashfd_throughput(the_repository->hash_algo, 1,
1335					      "<stdout>", progress_state);
1336		else
1337			f = create_tmp_packfile(the_repository, &pack_tmp_name);
1338
1339		offset = write_pack_header(f, nr_remaining);
1340
1341		if (reuse_packfiles_nr) {
1342			assert(pack_to_stdout);
1343			for (j = 0; j < reuse_packfiles_nr; j++) {
1344				reused_chunks_nr = 0;
1345				write_reused_pack(&reuse_packfiles[j], f);
1346				if (reused_chunks_nr)
1347					reuse_packfiles_used_nr++;
1348			}
1349			offset = hashfile_total(f);
1350		}
1351
1352		nr_written = 0;
1353		for (; i < to_pack.nr_objects; i++) {
1354			struct object_entry *e = write_order[i];
1355			if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1356				break;
1357			display_progress(progress_state, written);
1358		}
1359
1360		if (pack_to_stdout) {
1361			/*
1362			 * We never fsync when writing to stdout since we may
1363			 * not be writing to an actual pack file. For instance,
1364			 * the upload-pack code passes a pipe here. Calling
1365			 * fsync on a pipe results in unnecessary
1366			 * synchronization with the reader on some platforms.
1367			 */
1368			finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1369					  CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1370		} else if (nr_written == nr_remaining) {
1371			finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1372					  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1373		} else {
1374			/*
1375			 * If we wrote the wrong number of entries in the
1376			 * header, rewrite it like in fast-import.
1377			 */
1378
1379			int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
1380			fixup_pack_header_footer(the_hash_algo, fd, hash,
1381						 pack_tmp_name, nr_written,
1382						 hash, offset);
1383			close(fd);
1384			if (write_bitmap_index) {
1385				if (write_bitmap_index != WRITE_BITMAP_QUIET)
1386					warning(_(no_split_warning));
1387				write_bitmap_index = 0;
1388			}
1389		}
1390
1391		if (!pack_to_stdout) {
1392			struct stat st;
1393			struct strbuf tmpname = STRBUF_INIT;
1394			struct bitmap_writer bitmap_writer;
1395			char *idx_tmp_name = NULL;
1396
1397			/*
1398			 * Packs are runtime accessed in their mtime
1399			 * order since newer packs are more likely to contain
1400			 * younger objects.  So if we are creating multiple
1401			 * packs then we should modify the mtime of later ones
1402			 * to preserve this property.
1403			 */
1404			if (stat(pack_tmp_name, &st) < 0) {
1405				warning_errno(_("failed to stat %s"), pack_tmp_name);
1406			} else if (!last_mtime) {
1407				last_mtime = st.st_mtime;
1408			} else {
1409				struct utimbuf utb;
1410				utb.actime = st.st_atime;
1411				utb.modtime = --last_mtime;
1412				if (utime(pack_tmp_name, &utb) < 0)
1413					warning_errno(_("failed utime() on %s"), pack_tmp_name);
1414			}
1415
1416			strbuf_addf(&tmpname, "%s-%s.", base_name,
1417				    hash_to_hex(hash));
1418
1419			if (write_bitmap_index) {
1420				bitmap_writer_init(&bitmap_writer,
1421						   the_repository, &to_pack,
1422						   NULL);
1423				bitmap_writer_set_checksum(&bitmap_writer, hash);
1424				bitmap_writer_build_type_index(&bitmap_writer,
1425							       written_list);
1426			}
1427
1428			if (cruft)
1429				pack_idx_opts.flags |= WRITE_MTIMES;
1430
1431			stage_tmp_packfiles(the_repository, &tmpname,
1432					    pack_tmp_name, written_list,
1433					    nr_written, &to_pack,
1434					    &pack_idx_opts, hash,
1435					    &idx_tmp_name);
1436
1437			if (write_bitmap_index) {
1438				size_t tmpname_len = tmpname.len;
1439
1440				strbuf_addstr(&tmpname, "bitmap");
1441				stop_progress(&progress_state);
1442
1443				bitmap_writer_show_progress(&bitmap_writer,
1444							    progress);
1445				bitmap_writer_select_commits(&bitmap_writer,
1446							     indexed_commits,
1447							     indexed_commits_nr);
1448				if (bitmap_writer_build(&bitmap_writer) < 0)
1449					die(_("failed to write bitmap index"));
1450				bitmap_writer_finish(&bitmap_writer,
1451						     written_list,
1452						     tmpname.buf, write_bitmap_options);
1453				bitmap_writer_free(&bitmap_writer);
1454				write_bitmap_index = 0;
1455				strbuf_setlen(&tmpname, tmpname_len);
1456			}
1457
1458			rename_tmp_packfile_idx(the_repository, &tmpname, &idx_tmp_name);
1459
1460			free(idx_tmp_name);
1461			strbuf_release(&tmpname);
1462			free(pack_tmp_name);
1463			puts(hash_to_hex(hash));
1464		}
1465
1466		/* mark written objects as written to previous pack */
1467		for (j = 0; j < nr_written; j++) {
1468			written_list[j]->offset = (off_t)-1;
1469		}
1470		nr_remaining -= nr_written;
1471	} while (nr_remaining && i < to_pack.nr_objects);
1472
1473	free(written_list);
1474	free(write_order);
1475	stop_progress(&progress_state);
1476	if (written != nr_result)
1477		die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1478		    written, nr_result);
1479	trace2_data_intmax("pack-objects", the_repository,
1480			   "write_pack_file/wrote", nr_result);
1481}
1482
1483static int no_try_delta(const char *path)
1484{
1485	static struct attr_check *check;
1486
1487	if (!check)
1488		check = attr_check_initl("delta", NULL);
1489	git_check_attr(the_repository->index, path, check);
1490	if (ATTR_FALSE(check->items[0].value))
1491		return 1;
1492	return 0;
1493}
1494
1495/*
1496 * When adding an object, check whether we have already added it
1497 * to our packing list. If so, we can skip. However, if we are
1498 * being asked to excludei t, but the previous mention was to include
1499 * it, make sure to adjust its flags and tweak our numbers accordingly.
1500 *
1501 * As an optimization, we pass out the index position where we would have
1502 * found the item, since that saves us from having to look it up again a
1503 * few lines later when we want to add the new entry.
1504 */
1505static int have_duplicate_entry(const struct object_id *oid,
1506				int exclude)
1507{
1508	struct object_entry *entry;
1509
1510	if (reuse_packfile_bitmap &&
1511	    bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1512		return 1;
1513
1514	entry = packlist_find(&to_pack, oid);
1515	if (!entry)
1516		return 0;
1517
1518	if (exclude) {
1519		if (!entry->preferred_base)
1520			nr_result--;
1521		entry->preferred_base = 1;
1522	}
1523
1524	return 1;
1525}
1526
1527static int want_cruft_object_mtime(struct repository *r,
1528				   const struct object_id *oid,
1529				   unsigned flags, uint32_t mtime)
1530{
1531	struct packed_git **cache;
1532
1533	for (cache = kept_pack_cache(r, flags); *cache; cache++) {
1534		struct packed_git *p = *cache;
1535		off_t ofs;
1536		uint32_t candidate_mtime;
1537
1538		ofs = find_pack_entry_one(oid, p);
1539		if (!ofs)
1540			continue;
1541
1542		/*
1543		 * We have a copy of the object 'oid' in a non-cruft
1544		 * pack. We can avoid packing an additional copy
1545		 * regardless of what the existing copy's mtime is since
1546		 * it is outside of a cruft pack.
1547		 */
1548		if (!p->is_cruft)
1549			return 0;
1550
1551		/*
1552		 * If we have a copy of the object 'oid' in a cruft
1553		 * pack, then either read the cruft pack's mtime for
1554		 * that object, or, if that can't be loaded, assume the
1555		 * pack's mtime itself.
1556		 */
1557		if (!load_pack_mtimes(p)) {
1558			uint32_t pos;
1559			if (offset_to_pack_pos(p, ofs, &pos) < 0)
1560				continue;
1561			candidate_mtime = nth_packed_mtime(p, pos);
1562		} else {
1563			candidate_mtime = p->mtime;
1564		}
1565
1566		/*
1567		 * We have a surviving copy of the object in a cruft
1568		 * pack whose mtime is greater than or equal to the one
1569		 * we are considering. We can thus avoid packing an
1570		 * additional copy of that object.
1571		 */
1572		if (mtime <= candidate_mtime)
1573			return 0;
1574	}
1575
1576	return -1;
1577}
1578
1579static int want_found_object(const struct object_id *oid, int exclude,
1580			     struct packed_git *p, uint32_t mtime)
1581{
1582	if (exclude)
1583		return 1;
1584	if (incremental)
1585		return 0;
1586
1587	if (!is_pack_valid(p))
1588		return -1;
1589
1590	/*
1591	 * When asked to do --local (do not include an object that appears in a
1592	 * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1593	 * an object that appears in a pack marked with .keep), finding a pack
1594	 * that matches the criteria is sufficient for us to decide to omit it.
1595	 * However, even if this pack does not satisfy the criteria, we need to
1596	 * make sure no copy of this object appears in _any_ pack that makes us
1597	 * to omit the object, so we need to check all the packs.
1598	 *
1599	 * We can however first check whether these options can possibly matter;
1600	 * if they do not matter we know we want the object in generated pack.
1601	 * Otherwise, we signal "-1" at the end to tell the caller that we do
1602	 * not know either way, and it needs to check more packs.
1603	 */
1604
1605	/*
1606	 * Objects in packs borrowed from elsewhere are discarded regardless of
1607	 * if they appear in other packs that weren't borrowed.
1608	 */
1609	if (local && !p->pack_local)
1610		return 0;
1611
1612	/*
1613	 * Then handle .keep first, as we have a fast(er) path there.
1614	 */
1615	if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1616		/*
1617		 * Set the flags for the kept-pack cache to be the ones we want
1618		 * to ignore.
1619		 *
1620		 * That is, if we are ignoring objects in on-disk keep packs,
1621		 * then we want to search through the on-disk keep and ignore
1622		 * the in-core ones.
1623		 */
1624		unsigned flags = 0;
1625		if (ignore_packed_keep_on_disk)
1626			flags |= ON_DISK_KEEP_PACKS;
1627		if (ignore_packed_keep_in_core)
1628			flags |= IN_CORE_KEEP_PACKS;
1629
1630		/*
1631		 * If the object is in a pack that we want to ignore, *and* we
1632		 * don't have any cruft packs that are being retained, we can
1633		 * abort quickly.
1634		 */
1635		if (!ignore_packed_keep_in_core_has_cruft) {
1636			if (ignore_packed_keep_on_disk && p->pack_keep)
1637				return 0;
1638			if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1639				return 0;
1640			if (has_object_kept_pack(p->repo, oid, flags))
1641				return 0;
1642		} else {
1643			/*
1644			 * But if there is at least one cruft pack which
1645			 * is being kept, we only want to include the
1646			 * provided object if it has a strictly greater
1647			 * mtime than any existing cruft copy.
1648			 */
1649			if (!want_cruft_object_mtime(p->repo, oid, flags,
1650						     mtime))
1651				return 0;
1652		}
1653	}
1654
1655	/*
1656	 * At this point we know definitively that either we don't care about
1657	 * keep-packs, or the object is not in one. Keep checking other
1658	 * conditions...
1659	 */
1660	if (!local || !have_non_local_packs)
1661		return 1;
1662
1663	/* we don't know yet; keep looking for more packs */
1664	return -1;
1665}
1666
1667static int want_object_in_pack_one(struct packed_git *p,
1668				   const struct object_id *oid,
1669				   int exclude,
1670				   struct packed_git **found_pack,
1671				   off_t *found_offset,
1672				   uint32_t found_mtime)
1673{
1674	off_t offset;
1675
1676	if (p == *found_pack)
1677		offset = *found_offset;
1678	else
1679		offset = find_pack_entry_one(oid, p);
1680
1681	if (offset) {
1682		if (!*found_pack) {
1683			if (!is_pack_valid(p))
1684				return -1;
1685			*found_offset = offset;
1686			*found_pack = p;
1687		}
1688		return want_found_object(oid, exclude, p, found_mtime);
1689	}
1690	return -1;
1691}
1692
1693/*
1694 * Check whether we want the object in the pack (e.g., we do not want
1695 * objects found in non-local stores if the "--local" option was used).
1696 *
1697 * If the caller already knows an existing pack it wants to take the object
1698 * from, that is passed in *found_pack and *found_offset; otherwise this
1699 * function finds if there is any pack that has the object and returns the pack
1700 * and its offset in these variables.
1701 */
1702static int want_object_in_pack_mtime(const struct object_id *oid,
1703				     int exclude,
1704				     struct packed_git **found_pack,
1705				     off_t *found_offset,
1706				     uint32_t found_mtime)
1707{
1708	int want;
1709	struct odb_source *source;
1710	struct list_head *pos;
1711
1712	if (!exclude && local) {
1713		/*
1714		 * Note that we start iterating at `sources->next` so that we
1715		 * skip the local object source.
1716		 */
1717		struct odb_source *source = the_repository->objects->sources->next;
1718		for (; source; source = source->next)
1719			if (has_loose_object(source, oid))
1720				return 0;
1721	}
1722
1723	/*
1724	 * If we already know the pack object lives in, start checks from that
1725	 * pack - in the usual case when neither --local was given nor .keep files
1726	 * are present we will determine the answer right now.
1727	 */
1728	if (*found_pack) {
1729		want = want_found_object(oid, exclude, *found_pack,
1730					 found_mtime);
1731		if (want != -1)
1732			return want;
1733
1734		*found_pack = NULL;
1735		*found_offset = 0;
1736	}
1737
1738	odb_prepare_alternates(the_repository->objects);
1739
1740	for (source = the_repository->objects->sources; source; source = source->next) {
1741		struct multi_pack_index *m = get_multi_pack_index(source);
1742		struct pack_entry e;
1743
1744		if (m && fill_midx_entry(m, oid, &e)) {
1745			want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime);
1746			if (want != -1)
1747				return want;
1748		}
1749	}
1750
1751	list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) {
1752		struct packed_git *p = list_entry(pos, struct packed_git, mru);
1753		want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
1754		if (!exclude && want > 0)
1755			list_move(&p->mru,
1756				  packfile_store_get_packs_mru(the_repository->objects->packfiles));
1757		if (want != -1)
1758			return want;
1759	}
1760
1761	if (uri_protocols.nr) {
1762		struct configured_exclusion *ex =
1763			oidmap_get(&configured_exclusions, oid);
1764		int i;
1765		const char *p;
1766
1767		if (ex) {
1768			for (i = 0; i < uri_protocols.nr; i++) {
1769				if (skip_prefix(ex->uri,
1770						uri_protocols.items[i].string,
1771						&p) &&
1772				    *p == ':') {
1773					oidset_insert(&excluded_by_config, oid);
1774					return 0;
1775				}
1776			}
1777		}
1778	}
1779
1780	return 1;
1781}
1782
1783static inline int want_object_in_pack(const struct object_id *oid,
1784				      int exclude,
1785				      struct packed_git **found_pack,
1786				      off_t *found_offset)
1787{
1788	return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset,
1789					 0);
1790}
1791
1792static struct object_entry *create_object_entry(const struct object_id *oid,
1793						enum object_type type,
1794						uint32_t hash,
1795						int exclude,
1796						int no_try_delta,
1797						struct packed_git *found_pack,
1798						off_t found_offset)
1799{
1800	struct object_entry *entry;
1801
1802	entry = packlist_alloc(&to_pack, oid);
1803	entry->hash = hash;
1804	oe_set_type(entry, type);
1805	if (exclude)
1806		entry->preferred_base = 1;
1807	else
1808		nr_result++;
1809	if (found_pack) {
1810		oe_set_in_pack(&to_pack, entry, found_pack);
1811		entry->in_pack_offset = found_offset;
1812	}
1813
1814	entry->no_try_delta = no_try_delta;
1815
1816	return entry;
1817}
1818
1819static const char no_closure_warning[] = N_(
1820"disabling bitmap writing, as some objects are not being packed"
1821);
1822
1823static int add_object_entry(const struct object_id *oid, enum object_type type,
1824			    const char *name, int exclude)
1825{
1826	struct packed_git *found_pack = NULL;
1827	off_t found_offset = 0;
1828
1829	display_progress(progress_state, ++nr_seen);
1830
1831	if (have_duplicate_entry(oid, exclude))
1832		return 0;
1833
1834	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1835		/* The pack is missing an object, so it will not have closure */
1836		if (write_bitmap_index) {
1837			if (write_bitmap_index != WRITE_BITMAP_QUIET)
1838				warning(_(no_closure_warning));
1839			write_bitmap_index = 0;
1840		}
1841		return 0;
1842	}
1843
1844	create_object_entry(oid, type, pack_name_hash_fn(name),
1845			    exclude, name && no_try_delta(name),
1846			    found_pack, found_offset);
1847	return 1;
1848}
1849
1850static int add_object_entry_from_bitmap(const struct object_id *oid,
1851					enum object_type type,
1852					int flags UNUSED, uint32_t name_hash,
1853					struct packed_git *pack, off_t offset,
1854					void *payload UNUSED)
1855{
1856	display_progress(progress_state, ++nr_seen);
1857
1858	if (have_duplicate_entry(oid, 0))
1859		return 0;
1860
1861	if (!want_object_in_pack(oid, 0, &pack, &offset))
1862		return 0;
1863
1864	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1865	return 1;
1866}
1867
1868struct pbase_tree_cache {
1869	struct object_id oid;
1870	int ref;
1871	int temporary;
1872	void *tree_data;
1873	unsigned long tree_size;
1874};
1875
1876static struct pbase_tree_cache *(pbase_tree_cache[256]);
1877static int pbase_tree_cache_ix(const struct object_id *oid)
1878{
1879	return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1880}
1881static int pbase_tree_cache_ix_incr(int ix)
1882{
1883	return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1884}
1885
1886static struct pbase_tree {
1887	struct pbase_tree *next;
1888	/* This is a phony "cache" entry; we are not
1889	 * going to evict it or find it through _get()
1890	 * mechanism -- this is for the toplevel node that
1891	 * would almost always change with any commit.
1892	 */
1893	struct pbase_tree_cache pcache;
1894} *pbase_tree;
1895
1896static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1897{
1898	struct pbase_tree_cache *ent, *nent;
1899	void *data;
1900	unsigned long size;
1901	enum object_type type;
1902	int neigh;
1903	int my_ix = pbase_tree_cache_ix(oid);
1904	int available_ix = -1;
1905
1906	/* pbase-tree-cache acts as a limited hashtable.
1907	 * your object will be found at your index or within a few
1908	 * slots after that slot if it is cached.
1909	 */
1910	for (neigh = 0; neigh < 8; neigh++) {
1911		ent = pbase_tree_cache[my_ix];
1912		if (ent && oideq(&ent->oid, oid)) {
1913			ent->ref++;
1914			return ent;
1915		}
1916		else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1917			 ((0 <= available_ix) &&
1918			  (!ent && pbase_tree_cache[available_ix])))
1919			available_ix = my_ix;
1920		if (!ent)
1921			break;
1922		my_ix = pbase_tree_cache_ix_incr(my_ix);
1923	}
1924
1925	/* Did not find one.  Either we got a bogus request or
1926	 * we need to read and perhaps cache.
1927	 */
1928	data = odb_read_object(the_repository->objects, oid, &type, &size);
1929	if (!data)
1930		return NULL;
1931	if (type != OBJ_TREE) {
1932		free(data);
1933		return NULL;
1934	}
1935
1936	/* We need to either cache or return a throwaway copy */
1937
1938	if (available_ix < 0)
1939		ent = NULL;
1940	else {
1941		ent = pbase_tree_cache[available_ix];
1942		my_ix = available_ix;
1943	}
1944
1945	if (!ent) {
1946		nent = xmalloc(sizeof(*nent));
1947		nent->temporary = (available_ix < 0);
1948	}
1949	else {
1950		/* evict and reuse */
1951		free(ent->tree_data);
1952		nent = ent;
1953	}
1954	oidcpy(&nent->oid, oid);
1955	nent->tree_data = data;
1956	nent->tree_size = size;
1957	nent->ref = 1;
1958	if (!nent->temporary)
1959		pbase_tree_cache[my_ix] = nent;
1960	return nent;
1961}
1962
1963static void pbase_tree_put(struct pbase_tree_cache *cache)
1964{
1965	if (!cache->temporary) {
1966		cache->ref--;
1967		return;
1968	}
1969	free(cache->tree_data);
1970	free(cache);
1971}
1972
1973static size_t name_cmp_len(const char *name)
1974{
1975	return strcspn(name, "\n/");
1976}
1977
1978static void add_pbase_object(struct tree_desc *tree,
1979			     const char *name,
1980			     size_t cmplen,
1981			     const char *fullname)
1982{
1983	struct name_entry entry;
1984	int cmp;
1985
1986	while (tree_entry(tree,&entry)) {
1987		if (S_ISGITLINK(entry.mode))
1988			continue;
1989		cmp = tree_entry_len(&entry) != cmplen ? 1 :
1990		      memcmp(name, entry.path, cmplen);
1991		if (cmp > 0)
1992			continue;
1993		if (cmp < 0)
1994			return;
1995		if (name[cmplen] != '/') {
1996			add_object_entry(&entry.oid,
1997					 object_type(entry.mode),
1998					 fullname, 1);
1999			return;
2000		}
2001		if (S_ISDIR(entry.mode)) {
2002			struct tree_desc sub;
2003			struct pbase_tree_cache *tree;
2004			const char *down = name+cmplen+1;
2005			size_t downlen = name_cmp_len(down);
2006
2007			tree = pbase_tree_get(&entry.oid);
2008			if (!tree)
2009				return;
2010			init_tree_desc(&sub, &tree->oid,
2011				       tree->tree_data, tree->tree_size);
2012
2013			add_pbase_object(&sub, down, downlen, fullname);
2014			pbase_tree_put(tree);
2015		}
2016	}
2017}
2018
2019static unsigned *done_pbase_paths;
2020static int done_pbase_paths_num;
2021static int done_pbase_paths_alloc;
2022static int done_pbase_path_pos(unsigned hash)
2023{
2024	int lo = 0;
2025	int hi = done_pbase_paths_num;
2026	while (lo < hi) {
2027		int mi = lo + (hi - lo) / 2;
2028		if (done_pbase_paths[mi] == hash)
2029			return mi;
2030		if (done_pbase_paths[mi] < hash)
2031			hi = mi;
2032		else
2033			lo = mi + 1;
2034	}
2035	return -lo-1;
2036}
2037
2038static int check_pbase_path(unsigned hash)
2039{
2040	int pos = done_pbase_path_pos(hash);
2041	if (0 <= pos)
2042		return 1;
2043	pos = -pos - 1;
2044	ALLOC_GROW(done_pbase_paths,
2045		   done_pbase_paths_num + 1,
2046		   done_pbase_paths_alloc);
2047	done_pbase_paths_num++;
2048	if (pos < done_pbase_paths_num)
2049		MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
2050			   done_pbase_paths_num - pos - 1);
2051	done_pbase_paths[pos] = hash;
2052	return 0;
2053}
2054
2055static void add_preferred_base_object(const char *name)
2056{
2057	struct pbase_tree *it;
2058	size_t cmplen;
2059	unsigned hash = pack_name_hash_fn(name);
2060
2061	if (!num_preferred_base || check_pbase_path(hash))
2062		return;
2063
2064	cmplen = name_cmp_len(name);
2065	for (it = pbase_tree; it; it = it->next) {
2066		if (cmplen == 0) {
2067			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
2068		}
2069		else {
2070			struct tree_desc tree;
2071			init_tree_desc(&tree, &it->pcache.oid,
2072				       it->pcache.tree_data, it->pcache.tree_size);
2073			add_pbase_object(&tree, name, cmplen, name);
2074		}
2075	}
2076}
2077
2078static void add_preferred_base(struct object_id *oid)
2079{
2080	struct pbase_tree *it;
2081	void *data;
2082	unsigned long size;
2083	struct object_id tree_oid;
2084
2085	if (window <= num_preferred_base++)
2086		return;
2087
2088	data = odb_read_object_peeled(the_repository->objects, oid,
2089				      OBJ_TREE, &size, &tree_oid);
2090	if (!data)
2091		return;
2092
2093	for (it = pbase_tree; it; it = it->next) {
2094		if (oideq(&it->pcache.oid, &tree_oid)) {
2095			free(data);
2096			return;
2097		}
2098	}
2099
2100	CALLOC_ARRAY(it, 1);
2101	it->next = pbase_tree;
2102	pbase_tree = it;
2103
2104	oidcpy(&it->pcache.oid, &tree_oid);
2105	it->pcache.tree_data = data;
2106	it->pcache.tree_size = size;
2107}
2108
2109static void cleanup_preferred_base(void)
2110{
2111	struct pbase_tree *it;
2112	unsigned i;
2113
2114	it = pbase_tree;
2115	pbase_tree = NULL;
2116	while (it) {
2117		struct pbase_tree *tmp = it;
2118		it = tmp->next;
2119		free(tmp->pcache.tree_data);
2120		free(tmp);
2121	}
2122
2123	for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
2124		if (!pbase_tree_cache[i])
2125			continue;
2126		free(pbase_tree_cache[i]->tree_data);
2127		FREE_AND_NULL(pbase_tree_cache[i]);
2128	}
2129
2130	FREE_AND_NULL(done_pbase_paths);
2131	done_pbase_paths_num = done_pbase_paths_alloc = 0;
2132}
2133
2134/*
2135 * Return 1 iff the object specified by "delta" can be sent
2136 * literally as a delta against the base in "base_sha1". If
2137 * so, then *base_out will point to the entry in our packing
2138 * list, or NULL if we must use the external-base list.
2139 *
2140 * Depth value does not matter - find_deltas() will
2141 * never consider reused delta as the base object to
2142 * deltify other objects against, in order to avoid
2143 * circular deltas.
2144 */
2145static int can_reuse_delta(const struct object_id *base_oid,
2146			   struct object_entry *delta,
2147			   struct object_entry **base_out)
2148{
2149	struct object_entry *base;
2150
2151	/*
2152	 * First see if we're already sending the base (or it's explicitly in
2153	 * our "excluded" list).
2154	 */
2155	base = packlist_find(&to_pack, base_oid);
2156	if (base) {
2157		if (!in_same_island(&delta->idx.oid, &base->idx.oid))
2158			return 0;
2159		*base_out = base;
2160		return 1;
2161	}
2162
2163	/*
2164	 * Otherwise, reachability bitmaps may tell us if the receiver has it,
2165	 * even if it was buried too deep in history to make it into the
2166	 * packing list.
2167	 */
2168	if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
2169		if (use_delta_islands) {
2170			if (!in_same_island(&delta->idx.oid, base_oid))
2171				return 0;
2172		}
2173		*base_out = NULL;
2174		return 1;
2175	}
2176
2177	return 0;
2178}
2179
2180static void prefetch_to_pack(uint32_t object_index_start) {
2181	struct oid_array to_fetch = OID_ARRAY_INIT;
2182	uint32_t i;
2183
2184	for (i = object_index_start; i < to_pack.nr_objects; i++) {
2185		struct object_entry *entry = to_pack.objects + i;
2186
2187		if (!odb_read_object_info_extended(the_repository->objects,
2188						   &entry->idx.oid,
2189						   NULL,
2190						   OBJECT_INFO_FOR_PREFETCH))
2191			continue;
2192		oid_array_append(&to_fetch, &entry->idx.oid);
2193	}
2194	promisor_remote_get_direct(the_repository,
2195				   to_fetch.oid, to_fetch.nr);
2196	oid_array_clear(&to_fetch);
2197}
2198
2199static void check_object(struct object_entry *entry, uint32_t object_index)
2200{
2201	unsigned long canonical_size;
2202	enum object_type type;
2203	struct object_info oi = {.typep = &type, .sizep = &canonical_size};
2204
2205	if (IN_PACK(entry)) {
2206		struct packed_git *p = IN_PACK(entry);
2207		struct pack_window *w_curs = NULL;
2208		int have_base = 0;
2209		struct object_id base_ref;
2210		struct object_entry *base_entry;
2211		unsigned long used, used_0;
2212		unsigned long avail;
2213		off_t ofs;
2214		unsigned char *buf, c;
2215		enum object_type type;
2216		unsigned long in_pack_size;
2217
2218		buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
2219
2220		/*
2221		 * We want in_pack_type even if we do not reuse delta
2222		 * since non-delta representations could still be reused.
2223		 */
2224		used = unpack_object_header_buffer(buf, avail,
2225						   &type,
2226						   &in_pack_size);
2227		if (used == 0)
2228			goto give_up;
2229
2230		if (type < 0)
2231			BUG("invalid type %d", type);
2232		entry->in_pack_type = type;
2233
2234		/*
2235		 * Determine if this is a delta and if so whether we can
2236		 * reuse it or not.  Otherwise let's find out as cheaply as
2237		 * possible what the actual type and size for this object is.
2238		 */
2239		switch (entry->in_pack_type) {
2240		default:
2241			/* Not a delta hence we've already got all we need. */
2242			oe_set_type(entry, entry->in_pack_type);
2243			SET_SIZE(entry, in_pack_size);
2244			entry->in_pack_header_size = used;
2245			if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
2246				goto give_up;
2247			unuse_pack(&w_curs);
2248			return;
2249		case OBJ_REF_DELTA:
2250			if (reuse_delta && !entry->preferred_base) {
2251				oidread(&base_ref,
2252					use_pack(p, &w_curs,
2253						 entry->in_pack_offset + used,
2254						 NULL),
2255					the_repository->hash_algo);
2256				have_base = 1;
2257			}
2258			entry->in_pack_header_size = used + the_hash_algo->rawsz;
2259			break;
2260		case OBJ_OFS_DELTA:
2261			buf = use_pack(p, &w_curs,
2262				       entry->in_pack_offset + used, NULL);
2263			used_0 = 0;
2264			c = buf[used_0++];
2265			ofs = c & 127;
2266			while (c & 128) {
2267				ofs += 1;
2268				if (!ofs || MSB(ofs, 7)) {
2269					error(_("delta base offset overflow in pack for %s"),
2270					      oid_to_hex(&entry->idx.oid));
2271					goto give_up;
2272				}
2273				c = buf[used_0++];
2274				ofs = (ofs << 7) + (c & 127);
2275			}
2276			ofs = entry->in_pack_offset - ofs;
2277			if (ofs <= 0 || ofs >= entry->in_pack_offset) {
2278				error(_("delta base offset out of bound for %s"),
2279				      oid_to_hex(&entry->idx.oid));
2280				goto give_up;
2281			}
2282			if (reuse_delta && !entry->preferred_base) {
2283				uint32_t pos;
2284				if (offset_to_pack_pos(p, ofs, &pos) < 0)
2285					goto give_up;
2286				if (!nth_packed_object_id(&base_ref, p,
2287							  pack_pos_to_index(p, pos)))
2288					have_base = 1;
2289			}
2290			entry->in_pack_header_size = used + used_0;
2291			break;
2292		}
2293
2294		if (have_base &&
2295		    can_reuse_delta(&base_ref, entry, &base_entry)) {
2296			oe_set_type(entry, entry->in_pack_type);
2297			SET_SIZE(entry, in_pack_size); /* delta size */
2298			SET_DELTA_SIZE(entry, in_pack_size);
2299
2300			if (base_entry) {
2301				SET_DELTA(entry, base_entry);
2302				entry->delta_sibling_idx = base_entry->delta_child_idx;
2303				SET_DELTA_CHILD(base_entry, entry);
2304			} else {
2305				SET_DELTA_EXT(entry, &base_ref);
2306			}
2307
2308			unuse_pack(&w_curs);
2309			return;
2310		}
2311
2312		if (oe_type(entry)) {
2313			off_t delta_pos;
2314
2315			/*
2316			 * This must be a delta and we already know what the
2317			 * final object type is.  Let's extract the actual
2318			 * object size from the delta header.
2319			 */
2320			delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2321			canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2322			if (canonical_size == 0)
2323				goto give_up;
2324			SET_SIZE(entry, canonical_size);
2325			unuse_pack(&w_curs);
2326			return;
2327		}
2328
2329		/*
2330		 * No choice but to fall back to the recursive delta walk
2331		 * with odb_read_object_info() to find about the object type
2332		 * at this point...
2333		 */
2334		give_up:
2335		unuse_pack(&w_curs);
2336	}
2337
2338	if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi,
2339					  OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2340		if (repo_has_promisor_remote(the_repository)) {
2341			prefetch_to_pack(object_index);
2342			if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi,
2343							  OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2344				type = -1;
2345		} else {
2346			type = -1;
2347		}
2348	}
2349	oe_set_type(entry, type);
2350	if (entry->type_valid) {
2351		SET_SIZE(entry, canonical_size);
2352	} else {
2353		/*
2354		 * Bad object type is checked in prepare_pack().  This is
2355		 * to permit a missing preferred base object to be ignored
2356		 * as a preferred base.  Doing so can result in a larger
2357		 * pack file, but the transfer will still take place.
2358		 */
2359	}
2360}
2361
2362static int pack_offset_sort(const void *_a, const void *_b)
2363{
2364	const struct object_entry *a = *(struct object_entry **)_a;
2365	const struct object_entry *b = *(struct object_entry **)_b;
2366	const struct packed_git *a_in_pack = IN_PACK(a);
2367	const struct packed_git *b_in_pack = IN_PACK(b);
2368
2369	/* avoid filesystem trashing with loose objects */
2370	if (!a_in_pack && !b_in_pack)
2371		return oidcmp(&a->idx.oid, &b->idx.oid);
2372
2373	if (a_in_pack < b_in_pack)
2374		return -1;
2375	if (a_in_pack > b_in_pack)
2376		return 1;
2377	return a->in_pack_offset < b->in_pack_offset ? -1 :
2378			(a->in_pack_offset > b->in_pack_offset);
2379}
2380
2381/*
2382 * Drop an on-disk delta we were planning to reuse. Naively, this would
2383 * just involve blanking out the "delta" field, but we have to deal
2384 * with some extra book-keeping:
2385 *
2386 *   1. Removing ourselves from the delta_sibling linked list.
2387 *
2388 *   2. Updating our size/type to the non-delta representation. These were
2389 *      either not recorded initially (size) or overwritten with the delta type
2390 *      (type) when check_object() decided to reuse the delta.
2391 *
2392 *   3. Resetting our delta depth, as we are now a base object.
2393 */
2394static void drop_reused_delta(struct object_entry *entry)
2395{
2396	unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2397	struct object_info oi = OBJECT_INFO_INIT;
2398	enum object_type type;
2399	unsigned long size;
2400
2401	while (*idx) {
2402		struct object_entry *oe = &to_pack.objects[*idx - 1];
2403
2404		if (oe == entry)
2405			*idx = oe->delta_sibling_idx;
2406		else
2407			idx = &oe->delta_sibling_idx;
2408	}
2409	SET_DELTA(entry, NULL);
2410	entry->depth = 0;
2411
2412	oi.sizep = &size;
2413	oi.typep = &type;
2414	if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2415		/*
2416		 * We failed to get the info from this pack for some reason;
2417		 * fall back to odb_read_object_info, which may find another copy.
2418		 * And if that fails, the error will be recorded in oe_type(entry)
2419		 * and dealt with in prepare_pack().
2420		 */
2421		oe_set_type(entry,
2422			    odb_read_object_info(the_repository->objects,
2423						 &entry->idx.oid, &size));
2424	} else {
2425		oe_set_type(entry, type);
2426	}
2427	SET_SIZE(entry, size);
2428}
2429
2430/*
2431 * Follow the chain of deltas from this entry onward, throwing away any links
2432 * that cause us to hit a cycle (as determined by the DFS state flags in
2433 * the entries).
2434 *
2435 * We also detect too-long reused chains that would violate our --depth
2436 * limit.
2437 */
2438static void break_delta_chains(struct object_entry *entry)
2439{
2440	/*
2441	 * The actual depth of each object we will write is stored as an int,
2442	 * as it cannot exceed our int "depth" limit. But before we break
2443	 * changes based no that limit, we may potentially go as deep as the
2444	 * number of objects, which is elsewhere bounded to a uint32_t.
2445	 */
2446	uint32_t total_depth;
2447	struct object_entry *cur, *next;
2448
2449	for (cur = entry, total_depth = 0;
2450	     cur;
2451	     cur = DELTA(cur), total_depth++) {
2452		if (cur->dfs_state == DFS_DONE) {
2453			/*
2454			 * We've already seen this object and know it isn't
2455			 * part of a cycle. We do need to append its depth
2456			 * to our count.
2457			 */
2458			total_depth += cur->depth;
2459			break;
2460		}
2461
2462		/*
2463		 * We break cycles before looping, so an ACTIVE state (or any
2464		 * other cruft which made its way into the state variable)
2465		 * is a bug.
2466		 */
2467		if (cur->dfs_state != DFS_NONE)
2468			BUG("confusing delta dfs state in first pass: %d",
2469			    cur->dfs_state);
2470
2471		/*
2472		 * Now we know this is the first time we've seen the object. If
2473		 * it's not a delta, we're done traversing, but we'll mark it
2474		 * done to save time on future traversals.
2475		 */
2476		if (!DELTA(cur)) {
2477			cur->dfs_state = DFS_DONE;
2478			break;
2479		}
2480
2481		/*
2482		 * Mark ourselves as active and see if the next step causes
2483		 * us to cycle to another active object. It's important to do
2484		 * this _before_ we loop, because it impacts where we make the
2485		 * cut, and thus how our total_depth counter works.
2486		 * E.g., We may see a partial loop like:
2487		 *
2488		 *   A -> B -> C -> D -> B
2489		 *
2490		 * Cutting B->C breaks the cycle. But now the depth of A is
2491		 * only 1, and our total_depth counter is at 3. The size of the
2492		 * error is always one less than the size of the cycle we
2493		 * broke. Commits C and D were "lost" from A's chain.
2494		 *
2495		 * If we instead cut D->B, then the depth of A is correct at 3.
2496		 * We keep all commits in the chain that we examined.
2497		 */
2498		cur->dfs_state = DFS_ACTIVE;
2499		if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2500			drop_reused_delta(cur);
2501			cur->dfs_state = DFS_DONE;
2502			break;
2503		}
2504	}
2505
2506	/*
2507	 * And now that we've gone all the way to the bottom of the chain, we
2508	 * need to clear the active flags and set the depth fields as
2509	 * appropriate. Unlike the loop above, which can quit when it drops a
2510	 * delta, we need to keep going to look for more depth cuts. So we need
2511	 * an extra "next" pointer to keep going after we reset cur->delta.
2512	 */
2513	for (cur = entry; cur; cur = next) {
2514		next = DELTA(cur);
2515
2516		/*
2517		 * We should have a chain of zero or more ACTIVE states down to
2518		 * a final DONE. We can quit after the DONE, because either it
2519		 * has no bases, or we've already handled them in a previous
2520		 * call.
2521		 */
2522		if (cur->dfs_state == DFS_DONE)
2523			break;
2524		else if (cur->dfs_state != DFS_ACTIVE)
2525			BUG("confusing delta dfs state in second pass: %d",
2526			    cur->dfs_state);
2527
2528		/*
2529		 * If the total_depth is more than depth, then we need to snip
2530		 * the chain into two or more smaller chains that don't exceed
2531		 * the maximum depth. Most of the resulting chains will contain
2532		 * (depth + 1) entries (i.e., depth deltas plus one base), and
2533		 * the last chain (i.e., the one containing entry) will contain
2534		 * whatever entries are left over, namely
2535		 * (total_depth % (depth + 1)) of them.
2536		 *
2537		 * Since we are iterating towards decreasing depth, we need to
2538		 * decrement total_depth as we go, and we need to write to the
2539		 * entry what its final depth will be after all of the
2540		 * snipping. Since we're snipping into chains of length (depth
2541		 * + 1) entries, the final depth of an entry will be its
2542		 * original depth modulo (depth + 1). Any time we encounter an
2543		 * entry whose final depth is supposed to be zero, we snip it
2544		 * from its delta base, thereby making it so.
2545		 */
2546		cur->depth = (total_depth--) % (depth + 1);
2547		if (!cur->depth)
2548			drop_reused_delta(cur);
2549
2550		cur->dfs_state = DFS_DONE;
2551	}
2552}
2553
2554static void get_object_details(void)
2555{
2556	uint32_t i;
2557	struct object_entry **sorted_by_offset;
2558
2559	if (progress)
2560		progress_state = start_progress(the_repository,
2561						_("Counting objects"),
2562						to_pack.nr_objects);
2563
2564	CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2565	for (i = 0; i < to_pack.nr_objects; i++)
2566		sorted_by_offset[i] = to_pack.objects + i;
2567	QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2568
2569	for (i = 0; i < to_pack.nr_objects; i++) {
2570		struct object_entry *entry = sorted_by_offset[i];
2571		check_object(entry, i);
2572		if (entry->type_valid &&
2573		    oe_size_greater_than(&to_pack, entry,
2574					 repo_settings_get_big_file_threshold(the_repository)))
2575			entry->no_try_delta = 1;
2576		display_progress(progress_state, i + 1);
2577	}
2578	stop_progress(&progress_state);
2579
2580	/*
2581	 * This must happen in a second pass, since we rely on the delta
2582	 * information for the whole list being completed.
2583	 */
2584	for (i = 0; i < to_pack.nr_objects; i++)
2585		break_delta_chains(&to_pack.objects[i]);
2586
2587	free(sorted_by_offset);
2588}
2589
2590/*
2591 * We search for deltas in a list sorted by type, by filename hash, and then
2592 * by size, so that we see progressively smaller and smaller files.
2593 * That's because we prefer deltas to be from the bigger file
2594 * to the smaller -- deletes are potentially cheaper, but perhaps
2595 * more importantly, the bigger file is likely the more recent
2596 * one.  The deepest deltas are therefore the oldest objects which are
2597 * less susceptible to be accessed often.
2598 */
2599static int type_size_sort(const void *_a, const void *_b)
2600{
2601	const struct object_entry *a = *(struct object_entry **)_a;
2602	const struct object_entry *b = *(struct object_entry **)_b;
2603	const enum object_type a_type = oe_type(a);
2604	const enum object_type b_type = oe_type(b);
2605	const unsigned long a_size = SIZE(a);
2606	const unsigned long b_size = SIZE(b);
2607
2608	if (a_type > b_type)
2609		return -1;
2610	if (a_type < b_type)
2611		return 1;
2612	if (a->hash > b->hash)
2613		return -1;
2614	if (a->hash < b->hash)
2615		return 1;
2616	if (a->preferred_base > b->preferred_base)
2617		return -1;
2618	if (a->preferred_base < b->preferred_base)
2619		return 1;
2620	if (use_delta_islands) {
2621		const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2622		if (island_cmp)
2623			return island_cmp;
2624	}
2625	if (a_size > b_size)
2626		return -1;
2627	if (a_size < b_size)
2628		return 1;
2629	return a < b ? -1 : (a > b);  /* newest first */
2630}
2631
2632struct unpacked {
2633	struct object_entry *entry;
2634	void *data;
2635	struct delta_index *index;
2636	unsigned depth;
2637};
2638
2639static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2640			   unsigned long delta_size)
2641{
2642	if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2643		return 0;
2644
2645	if (delta_size < cache_max_small_delta_size)
2646		return 1;
2647
2648	/* cache delta, if objects are large enough compared to delta size */
2649	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2650		return 1;
2651
2652	return 0;
2653}
2654
2655/* Protect delta_cache_size */
2656static pthread_mutex_t cache_mutex;
2657#define cache_lock()		pthread_mutex_lock(&cache_mutex)
2658#define cache_unlock()		pthread_mutex_unlock(&cache_mutex)
2659
2660/*
2661 * Protect object list partitioning (e.g. struct thread_param) and
2662 * progress_state
2663 */
2664static pthread_mutex_t progress_mutex;
2665#define progress_lock()		pthread_mutex_lock(&progress_mutex)
2666#define progress_unlock()	pthread_mutex_unlock(&progress_mutex)
2667
2668/*
2669 * Access to struct object_entry is unprotected since each thread owns
2670 * a portion of the main object list. Just don't access object entries
2671 * ahead in the list because they can be stolen and would need
2672 * progress_mutex for protection.
2673 */
2674
2675static inline int oe_size_less_than(struct packing_data *pack,
2676				    const struct object_entry *lhs,
2677				    unsigned long rhs)
2678{
2679	if (lhs->size_valid)
2680		return lhs->size_ < rhs;
2681	if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2682		return 0;
2683	return oe_get_size_slow(pack, lhs) < rhs;
2684}
2685
2686static inline void oe_set_tree_depth(struct packing_data *pack,
2687				     struct object_entry *e,
2688				     unsigned int tree_depth)
2689{
2690	if (!pack->tree_depth)
2691		CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2692	pack->tree_depth[e - pack->objects] = tree_depth;
2693}
2694
2695/*
2696 * Return the size of the object without doing any delta
2697 * reconstruction (so non-deltas are true object sizes, but deltas
2698 * return the size of the delta data).
2699 */
2700unsigned long oe_get_size_slow(struct packing_data *pack,
2701			       const struct object_entry *e)
2702{
2703	struct packed_git *p;
2704	struct pack_window *w_curs;
2705	unsigned char *buf;
2706	enum object_type type;
2707	unsigned long used, avail, size;
2708
2709	if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2710		packing_data_lock(&to_pack);
2711		if (odb_read_object_info(the_repository->objects,
2712					 &e->idx.oid, &size) < 0)
2713			die(_("unable to get size of %s"),
2714			    oid_to_hex(&e->idx.oid));
2715		packing_data_unlock(&to_pack);
2716		return size;
2717	}
2718
2719	p = oe_in_pack(pack, e);
2720	if (!p)
2721		BUG("when e->type is a delta, it must belong to a pack");
2722
2723	packing_data_lock(&to_pack);
2724	w_curs = NULL;
2725	buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2726	used = unpack_object_header_buffer(buf, avail, &type, &size);
2727	if (used == 0)
2728		die(_("unable to parse object header of %s"),
2729		    oid_to_hex(&e->idx.oid));
2730
2731	unuse_pack(&w_curs);
2732	packing_data_unlock(&to_pack);
2733	return size;
2734}
2735
2736static int try_delta(struct unpacked *trg, struct unpacked *src,
2737		     unsigned max_depth, unsigned long *mem_usage)
2738{
2739	struct object_entry *trg_entry = trg->entry;
2740	struct object_entry *src_entry = src->entry;
2741	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2742	unsigned ref_depth;
2743	enum object_type type;
2744	void *delta_buf;
2745
2746	/* Don't bother doing diffs between different types */
2747	if (oe_type(trg_entry) != oe_type(src_entry))
2748		return -1;
2749
2750	/*
2751	 * We do not bother to try a delta that we discarded on an
2752	 * earlier try, but only when reusing delta data.  Note that
2753	 * src_entry that is marked as the preferred_base should always
2754	 * be considered, as even if we produce a suboptimal delta against
2755	 * it, we will still save the transfer cost, as we already know
2756	 * the other side has it and we won't send src_entry at all.
2757	 */
2758	if (reuse_delta && IN_PACK(trg_entry) &&
2759	    IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2760	    !src_entry->preferred_base &&
2761	    trg_entry->in_pack_type != OBJ_REF_DELTA &&
2762	    trg_entry->in_pack_type != OBJ_OFS_DELTA)
2763		return 0;
2764
2765	/* Let's not bust the allowed depth. */
2766	if (src->depth >= max_depth)
2767		return 0;
2768
2769	/* Now some size filtering heuristics. */
2770	trg_size = SIZE(trg_entry);
2771	if (!DELTA(trg_entry)) {
2772		max_size = trg_size/2 - the_hash_algo->rawsz;
2773		ref_depth = 1;
2774	} else {
2775		max_size = DELTA_SIZE(trg_entry);
2776		ref_depth = trg->depth;
2777	}
2778	max_size = (uint64_t)max_size * (max_depth - src->depth) /
2779						(max_depth - ref_depth + 1);
2780	if (max_size == 0)
2781		return 0;
2782	src_size = SIZE(src_entry);
2783	sizediff = src_size < trg_size ? trg_size - src_size : 0;
2784	if (sizediff >= max_size)
2785		return 0;
2786	if (trg_size < src_size / 32)
2787		return 0;
2788
2789	if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2790		return 0;
2791
2792	/* Load data if not already done */
2793	if (!trg->data) {
2794		packing_data_lock(&to_pack);
2795		trg->data = odb_read_object(the_repository->objects,
2796					    &trg_entry->idx.oid, &type,
2797					    &sz);
2798		packing_data_unlock(&to_pack);
2799		if (!trg->data)
2800			die(_("object %s cannot be read"),
2801			    oid_to_hex(&trg_entry->idx.oid));
2802		if (sz != trg_size)
2803			die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2804			    oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2805			    (uintmax_t)trg_size);
2806		*mem_usage += sz;
2807	}
2808	if (!src->data) {
2809		packing_data_lock(&to_pack);
2810		src->data = odb_read_object(the_repository->objects,
2811					    &src_entry->idx.oid, &type,
2812					    &sz);
2813		packing_data_unlock(&to_pack);
2814		if (!src->data) {
2815			if (src_entry->preferred_base) {
2816				static int warned = 0;
2817				if (!warned++)
2818					warning(_("object %s cannot be read"),
2819						oid_to_hex(&src_entry->idx.oid));
2820				/*
2821				 * Those objects are not included in the
2822				 * resulting pack.  Be resilient and ignore
2823				 * them if they can't be read, in case the
2824				 * pack could be created nevertheless.
2825				 */
2826				return 0;
2827			}
2828			die(_("object %s cannot be read"),
2829			    oid_to_hex(&src_entry->idx.oid));
2830		}
2831		if (sz != src_size)
2832			die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2833			    oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2834			    (uintmax_t)src_size);
2835		*mem_usage += sz;
2836	}
2837	if (!src->index) {
2838		src->index = create_delta_index(src->data, src_size);
2839		if (!src->index) {
2840			static int warned = 0;
2841			if (!warned++)
2842				warning(_("suboptimal pack - out of memory"));
2843			return 0;
2844		}
2845		*mem_usage += sizeof_delta_index(src->index);
2846	}
2847
2848	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2849	if (!delta_buf)
2850		return 0;
2851
2852	if (DELTA(trg_entry)) {
2853		/* Prefer only shallower same-sized deltas. */
2854		if (delta_size == DELTA_SIZE(trg_entry) &&
2855		    src->depth + 1 >= trg->depth) {
2856			free(delta_buf);
2857			return 0;
2858		}
2859	}
2860
2861	/*
2862	 * Handle memory allocation outside of the cache
2863	 * accounting lock.  Compiler will optimize the strangeness
2864	 * away when NO_PTHREADS is defined.
2865	 */
2866	free(trg_entry->delta_data);
2867	cache_lock();
2868	if (trg_entry->delta_data) {
2869		delta_cache_size -= DELTA_SIZE(trg_entry);
2870		trg_entry->delta_data = NULL;
2871	}
2872	if (delta_cacheable(src_size, trg_size, delta_size)) {
2873		delta_cache_size += delta_size;
2874		cache_unlock();
2875		trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2876	} else {
2877		cache_unlock();
2878		free(delta_buf);
2879	}
2880
2881	SET_DELTA(trg_entry, src_entry);
2882	SET_DELTA_SIZE(trg_entry, delta_size);
2883	trg->depth = src->depth + 1;
2884
2885	return 1;
2886}
2887
2888static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2889{
2890	struct object_entry *child = DELTA_CHILD(me);
2891	unsigned int m = n;
2892	while (child) {
2893		const unsigned int c = check_delta_limit(child, n + 1);
2894		if (m < c)
2895			m = c;
2896		child = DELTA_SIBLING(child);
2897	}
2898	return m;
2899}
2900
2901static unsigned long free_unpacked(struct unpacked *n)
2902{
2903	unsigned long freed_mem = sizeof_delta_index(n->index);
2904	free_delta_index(n->index);
2905	n->index = NULL;
2906	if (n->data) {
2907		freed_mem += SIZE(n->entry);
2908		FREE_AND_NULL(n->data);
2909	}
2910	n->entry = NULL;
2911	n->depth = 0;
2912	return freed_mem;
2913}
2914
2915static void find_deltas(struct object_entry **list, unsigned *list_size,
2916			int window, int depth, unsigned *processed)
2917{
2918	uint32_t i, idx = 0, count = 0;
2919	struct unpacked *array;
2920	unsigned long mem_usage = 0;
2921
2922	CALLOC_ARRAY(array, window);
2923
2924	for (;;) {
2925		struct object_entry *entry;
2926		struct unpacked *n = array + idx;
2927		int j, max_depth, best_base = -1;
2928
2929		progress_lock();
2930		if (!*list_size) {
2931			progress_unlock();
2932			break;
2933		}
2934		entry = *list++;
2935		(*list_size)--;
2936		if (!entry->preferred_base) {
2937			(*processed)++;
2938			display_progress(progress_state, *processed);
2939		}
2940		progress_unlock();
2941
2942		mem_usage -= free_unpacked(n);
2943		n->entry = entry;
2944
2945		while (window_memory_limit &&
2946		       mem_usage > window_memory_limit &&
2947		       count > 1) {
2948			const uint32_t tail = (idx + window - count) % window;
2949			mem_usage -= free_unpacked(array + tail);
2950			count--;
2951		}
2952
2953		/* We do not compute delta to *create* objects we are not
2954		 * going to pack.
2955		 */
2956		if (entry->preferred_base)
2957			goto next;
2958
2959		/*
2960		 * If the current object is at pack edge, take the depth the
2961		 * objects that depend on the current object into account
2962		 * otherwise they would become too deep.
2963		 */
2964		max_depth = depth;
2965		if (DELTA_CHILD(entry)) {
2966			max_depth -= check_delta_limit(entry, 0);
2967			if (max_depth <= 0)
2968				goto next;
2969		}
2970
2971		j = window;
2972		while (--j > 0) {
2973			int ret;
2974			uint32_t other_idx = idx + j;
2975			struct unpacked *m;
2976			if (other_idx >= window)
2977				other_idx -= window;
2978			m = array + other_idx;
2979			if (!m->entry)
2980				break;
2981			ret = try_delta(n, m, max_depth, &mem_usage);
2982			if (ret < 0)
2983				break;
2984			else if (ret > 0)
2985				best_base = other_idx;
2986		}
2987
2988		/*
2989		 * If we decided to cache the delta data, then it is best
2990		 * to compress it right away.  First because we have to do
2991		 * it anyway, and doing it here while we're threaded will
2992		 * save a lot of time in the non threaded write phase,
2993		 * as well as allow for caching more deltas within
2994		 * the same cache size limit.
2995		 * ...
2996		 * But only if not writing to stdout, since in that case
2997		 * the network is most likely throttling writes anyway,
2998		 * and therefore it is best to go to the write phase ASAP
2999		 * instead, as we can afford spending more time compressing
3000		 * between writes at that moment.
3001		 */
3002		if (entry->delta_data && !pack_to_stdout) {
3003			unsigned long size;
3004
3005			size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
3006			if (size < (1U << OE_Z_DELTA_BITS)) {
3007				entry->z_delta_size = size;
3008				cache_lock();
3009				delta_cache_size -= DELTA_SIZE(entry);
3010				delta_cache_size += entry->z_delta_size;
3011				cache_unlock();
3012			} else {
3013				FREE_AND_NULL(entry->delta_data);
3014				entry->z_delta_size = 0;
3015			}
3016		}
3017
3018		/* if we made n a delta, and if n is already at max
3019		 * depth, leaving it in the window is pointless.  we
3020		 * should evict it first.
3021		 */
3022		if (DELTA(entry) && max_depth <= n->depth)
3023			continue;
3024
3025		/*
3026		 * Move the best delta base up in the window, after the
3027		 * currently deltified object, to keep it longer.  It will
3028		 * be the first base object to be attempted next.
3029		 */
3030		if (DELTA(entry)) {
3031			struct unpacked swap = array[best_base];
3032			int dist = (window + idx - best_base) % window;
3033			int dst = best_base;
3034			while (dist--) {
3035				int src = (dst + 1) % window;
3036				array[dst] = array[src];
3037				dst = src;
3038			}
3039			array[dst] = swap;
3040		}
3041
3042		next:
3043		idx++;
3044		if (count + 1 < window)
3045			count++;
3046		if (idx >= window)
3047			idx = 0;
3048	}
3049
3050	for (i = 0; i < window; ++i) {
3051		free_delta_index(array[i].index);
3052		free(array[i].data);
3053	}
3054	free(array);
3055}
3056
3057/*
3058 * The main object list is split into smaller lists, each is handed to
3059 * one worker.
3060 *
3061 * The main thread waits on the condition that (at least) one of the workers
3062 * has stopped working (which is indicated in the .working member of
3063 * struct thread_params).
3064 *
3065 * When a work thread has completed its work, it sets .working to 0 and
3066 * signals the main thread and waits on the condition that .data_ready
3067 * becomes 1.
3068 *
3069 * The main thread steals half of the work from the worker that has
3070 * most work left to hand it to the idle worker.
3071 */
3072
3073struct thread_params {
3074	pthread_t thread;
3075	struct object_entry **list;
3076	struct packing_region *regions;
3077	unsigned list_size;
3078	unsigned remaining;
3079	int window;
3080	int depth;
3081	int working;
3082	int data_ready;
3083	pthread_mutex_t mutex;
3084	pthread_cond_t cond;
3085	unsigned *processed;
3086};
3087
3088static pthread_cond_t progress_cond;
3089
3090/*
3091 * Mutex and conditional variable can't be statically-initialized on Windows.
3092 */
3093static void init_threaded_search(void)
3094{
3095	pthread_mutex_init(&cache_mutex, NULL);
3096	pthread_mutex_init(&progress_mutex, NULL);
3097	pthread_cond_init(&progress_cond, NULL);
3098}
3099
3100static void cleanup_threaded_search(void)
3101{
3102	pthread_cond_destroy(&progress_cond);
3103	pthread_mutex_destroy(&cache_mutex);
3104	pthread_mutex_destroy(&progress_mutex);
3105}
3106
3107static void *threaded_find_deltas(void *arg)
3108{
3109	struct thread_params *me = arg;
3110
3111	progress_lock();
3112	while (me->remaining) {
3113		progress_unlock();
3114
3115		find_deltas(me->list, &me->remaining,
3116			    me->window, me->depth, me->processed);
3117
3118		progress_lock();
3119		me->working = 0;
3120		pthread_cond_signal(&progress_cond);
3121		progress_unlock();
3122
3123		/*
3124		 * We must not set ->data_ready before we wait on the
3125		 * condition because the main thread may have set it to 1
3126		 * before we get here. In order to be sure that new
3127		 * work is available if we see 1 in ->data_ready, it
3128		 * was initialized to 0 before this thread was spawned
3129		 * and we reset it to 0 right away.
3130		 */
3131		pthread_mutex_lock(&me->mutex);
3132		while (!me->data_ready)
3133			pthread_cond_wait(&me->cond, &me->mutex);
3134		me->data_ready = 0;
3135		pthread_mutex_unlock(&me->mutex);
3136
3137		progress_lock();
3138	}
3139	progress_unlock();
3140	/* leave ->working 1 so that this doesn't get more work assigned */
3141	return NULL;
3142}
3143
3144static void ll_find_deltas(struct object_entry **list, unsigned list_size,
3145			   int window, int depth, unsigned *processed)
3146{
3147	struct thread_params *p;
3148	int i, ret, active_threads = 0;
3149
3150	init_threaded_search();
3151
3152	if (delta_search_threads <= 1) {
3153		find_deltas(list, &list_size, window, depth, processed);
3154		cleanup_threaded_search();
3155		return;
3156	}
3157	if (progress > pack_to_stdout)
3158		fprintf_ln(stderr, _("Delta compression using up to %d threads"),
3159			   delta_search_threads);
3160	CALLOC_ARRAY(p, delta_search_threads);
3161
3162	/* Partition the work amongst work threads. */
3163	for (i = 0; i < delta_search_threads; i++) {
3164		unsigned sub_size = list_size / (delta_search_threads - i);
3165
3166		/* don't use too small segments or no deltas will be found */
3167		if (sub_size < 2*window && i+1 < delta_search_threads)
3168			sub_size = 0;
3169
3170		p[i].window = window;
3171		p[i].depth = depth;
3172		p[i].processed = processed;
3173		p[i].working = 1;
3174		p[i].data_ready = 0;
3175
3176		/* try to split chunks on "path" boundaries */
3177		while (sub_size && sub_size < list_size &&
3178		       list[sub_size]->hash &&
3179		       list[sub_size]->hash == list[sub_size-1]->hash)
3180			sub_size++;
3181
3182		p[i].list = list;
3183		p[i].list_size = sub_size;
3184		p[i].remaining = sub_size;
3185
3186		list += sub_size;
3187		list_size -= sub_size;
3188	}
3189
3190	/* Start work threads. */
3191	for (i = 0; i < delta_search_threads; i++) {
3192		if (!p[i].list_size)
3193			continue;
3194		pthread_mutex_init(&p[i].mutex, NULL);
3195		pthread_cond_init(&p[i].cond, NULL);
3196		ret = pthread_create(&p[i].thread, NULL,
3197				     threaded_find_deltas, &p[i]);
3198		if (ret)
3199			die(_("unable to create thread: %s"), strerror(ret));
3200		active_threads++;
3201	}
3202
3203	/*
3204	 * Now let's wait for work completion.  Each time a thread is done
3205	 * with its work, we steal half of the remaining work from the
3206	 * thread with the largest number of unprocessed objects and give
3207	 * it to that newly idle thread.  This ensure good load balancing
3208	 * until the remaining object list segments are simply too short
3209	 * to be worth splitting anymore.
3210	 */
3211	while (active_threads) {
3212		struct thread_params *target = NULL;
3213		struct thread_params *victim = NULL;
3214		unsigned sub_size = 0;
3215
3216		progress_lock();
3217		for (;;) {
3218			for (i = 0; !target && i < delta_search_threads; i++)
3219				if (!p[i].working)
3220					target = &p[i];
3221			if (target)
3222				break;
3223			pthread_cond_wait(&progress_cond, &progress_mutex);
3224		}
3225
3226		for (i = 0; i < delta_search_threads; i++)
3227			if (p[i].remaining > 2*window &&
3228			    (!victim || victim->remaining < p[i].remaining))
3229				victim = &p[i];
3230		if (victim) {
3231			sub_size = victim->remaining / 2;
3232			list = victim->list + victim->list_size - sub_size;
3233			while (sub_size && list[0]->hash &&
3234			       list[0]->hash == list[-1]->hash) {
3235				list++;
3236				sub_size--;
3237			}
3238			if (!sub_size) {
3239				/*
3240				 * It is possible for some "paths" to have
3241				 * so many objects that no hash boundary
3242				 * might be found.  Let's just steal the
3243				 * exact half in that case.
3244				 */
3245				sub_size = victim->remaining / 2;
3246				list -= sub_size;
3247			}
3248			target->list = list;
3249			victim->list_size -= sub_size;
3250			victim->remaining -= sub_size;
3251		}
3252		target->list_size = sub_size;
3253		target->remaining = sub_size;
3254		target->working = 1;
3255		progress_unlock();
3256
3257		pthread_mutex_lock(&target->mutex);
3258		target->data_ready = 1;
3259		pthread_cond_signal(&target->cond);
3260		pthread_mutex_unlock(&target->mutex);
3261
3262		if (!sub_size) {
3263			pthread_join(target->thread, NULL);
3264			pthread_cond_destroy(&target->cond);
3265			pthread_mutex_destroy(&target->mutex);
3266			active_threads--;
3267		}
3268	}
3269	cleanup_threaded_search();
3270	free(p);
3271}
3272
3273static int obj_is_packed(const struct object_id *oid)
3274{
3275	return packlist_find(&to_pack, oid) ||
3276		(reuse_packfile_bitmap &&
3277		 bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
3278}
3279
3280static void add_tag_chain(const struct object_id *oid)
3281{
3282	struct tag *tag;
3283
3284	/*
3285	 * We catch duplicates already in add_object_entry(), but we'd
3286	 * prefer to do this extra check to avoid having to parse the
3287	 * tag at all if we already know that it's being packed (e.g., if
3288	 * it was included via bitmaps, we would not have parsed it
3289	 * previously).
3290	 */
3291	if (obj_is_packed(oid))
3292		return;
3293
3294	tag = lookup_tag(the_repository, oid);
3295	while (1) {
3296		if (!tag || parse_tag(tag) || !tag->tagged)
3297			die(_("unable to pack objects reachable from tag %s"),
3298			    oid_to_hex(oid));
3299
3300		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
3301
3302		if (tag->tagged->type != OBJ_TAG)
3303			return;
3304
3305		tag = (struct tag *)tag->tagged;
3306	}
3307}
3308
3309static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid,
3310		       int flag UNUSED, void *cb_data UNUSED)
3311{
3312	struct object_id peeled;
3313
3314	if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled))
3315		add_tag_chain(oid);
3316	return 0;
3317}
3318
3319static int should_attempt_deltas(struct object_entry *entry)
3320{
3321	if (DELTA(entry))
3322		/* This happens if we decided to reuse existing
3323		 * delta from a pack. "reuse_delta &&" is implied.
3324		 */
3325		return 0;
3326
3327	if (!entry->type_valid ||
3328	    oe_size_less_than(&to_pack, entry, 50))
3329		return 0;
3330
3331	if (entry->no_try_delta)
3332		return 0;
3333
3334	if (!entry->preferred_base) {
3335		if (oe_type(entry) < 0)
3336			die(_("unable to get type of object %s"),
3337				oid_to_hex(&entry->idx.oid));
3338	} else if (oe_type(entry) < 0) {
3339		/*
3340		 * This object is not found, but we
3341		 * don't have to include it anyway.
3342		 */
3343		return 0;
3344	}
3345
3346	return 1;
3347}
3348
3349static void find_deltas_for_region(struct object_entry *list,
3350				   struct packing_region *region,
3351				   unsigned int *processed)
3352{
3353	struct object_entry **delta_list;
3354	unsigned int delta_list_nr = 0;
3355
3356	ALLOC_ARRAY(delta_list, region->nr);
3357	for (size_t i = 0; i < region->nr; i++) {
3358		struct object_entry *entry = list + region->start + i;
3359		if (should_attempt_deltas(entry))
3360			delta_list[delta_list_nr++] = entry;
3361	}
3362
3363	QSORT(delta_list, delta_list_nr, type_size_sort);
3364	find_deltas(delta_list, &delta_list_nr, window, depth, processed);
3365	free(delta_list);
3366}
3367
3368static void find_deltas_by_region(struct object_entry *list,
3369				  struct packing_region *regions,
3370				  size_t start, size_t nr)
3371{
3372	unsigned int processed = 0;
3373	size_t progress_nr;
3374
3375	if (!nr)
3376		return;
3377
3378	progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
3379
3380	if (progress)
3381		progress_state = start_progress(the_repository,
3382						_("Compressing objects by path"),
3383						progress_nr);
3384
3385	while (nr--)
3386		find_deltas_for_region(list,
3387				       &regions[start++],
3388				       &processed);
3389
3390	display_progress(progress_state, progress_nr);
3391	stop_progress(&progress_state);
3392}
3393
3394static void *threaded_find_deltas_by_path(void *arg)
3395{
3396	struct thread_params *me = arg;
3397
3398	progress_lock();
3399	while (me->remaining) {
3400		while (me->remaining) {
3401			progress_unlock();
3402			find_deltas_for_region(to_pack.objects,
3403					       me->regions,
3404					       me->processed);
3405			progress_lock();
3406			me->remaining--;
3407			me->regions++;
3408		}
3409
3410		me->working = 0;
3411		pthread_cond_signal(&progress_cond);
3412		progress_unlock();
3413
3414		/*
3415		 * We must not set ->data_ready before we wait on the
3416		 * condition because the main thread may have set it to 1
3417		 * before we get here. In order to be sure that new
3418		 * work is available if we see 1 in ->data_ready, it
3419		 * was initialized to 0 before this thread was spawned
3420		 * and we reset it to 0 right away.
3421		 */
3422		pthread_mutex_lock(&me->mutex);
3423		while (!me->data_ready)
3424			pthread_cond_wait(&me->cond, &me->mutex);
3425		me->data_ready = 0;
3426		pthread_mutex_unlock(&me->mutex);
3427
3428		progress_lock();
3429	}
3430	progress_unlock();
3431	/* leave ->working 1 so that this doesn't get more work assigned */
3432	return NULL;
3433}
3434
3435static void ll_find_deltas_by_region(struct object_entry *list,
3436				     struct packing_region *regions,
3437				     uint32_t start, uint32_t nr)
3438{
3439	struct thread_params *p;
3440	int i, ret, active_threads = 0;
3441	unsigned int processed = 0;
3442	uint32_t progress_nr;
3443	init_threaded_search();
3444
3445	if (!nr)
3446		return;
3447
3448	progress_nr =  regions[nr - 1].start + regions[nr - 1].nr;
3449	if (delta_search_threads <= 1) {
3450		find_deltas_by_region(list, regions, start, nr);
3451		cleanup_threaded_search();
3452		return;
3453	}
3454
3455	if (progress > pack_to_stdout)
3456		fprintf_ln(stderr,
3457			   Q_("Path-based delta compression using up to %d thread",
3458			      "Path-based delta compression using up to %d threads",
3459			      delta_search_threads),
3460			   delta_search_threads);
3461	CALLOC_ARRAY(p, delta_search_threads);
3462
3463	if (progress)
3464		progress_state = start_progress(the_repository,
3465						_("Compressing objects by path"),
3466						progress_nr);
3467	/* Partition the work amongst work threads. */
3468	for (i = 0; i < delta_search_threads; i++) {
3469		unsigned sub_size = nr / (delta_search_threads - i);
3470
3471		p[i].window = window;
3472		p[i].depth = depth;
3473		p[i].processed = &processed;
3474		p[i].working = 1;
3475		p[i].data_ready = 0;
3476
3477		p[i].regions = regions;
3478		p[i].list_size = sub_size;
3479		p[i].remaining = sub_size;
3480
3481		regions += sub_size;
3482		nr -= sub_size;
3483	}
3484
3485	/* Start work threads. */
3486	for (i = 0; i < delta_search_threads; i++) {
3487		if (!p[i].list_size)
3488			continue;
3489		pthread_mutex_init(&p[i].mutex, NULL);
3490		pthread_cond_init(&p[i].cond, NULL);
3491		ret = pthread_create(&p[i].thread, NULL,
3492				     threaded_find_deltas_by_path, &p[i]);
3493		if (ret)
3494			die(_("unable to create thread: %s"), strerror(ret));
3495		active_threads++;
3496	}
3497
3498	/*
3499	 * Now let's wait for work completion.  Each time a thread is done
3500	 * with its work, we steal half of the remaining work from the
3501	 * thread with the largest number of unprocessed objects and give
3502	 * it to that newly idle thread.  This ensure good load balancing
3503	 * until the remaining object list segments are simply too short
3504	 * to be worth splitting anymore.
3505	 */
3506	while (active_threads) {
3507		struct thread_params *target = NULL;
3508		struct thread_params *victim = NULL;
3509		unsigned sub_size = 0;
3510
3511		progress_lock();
3512		for (;;) {
3513			for (i = 0; !target && i < delta_search_threads; i++)
3514				if (!p[i].working)
3515					target = &p[i];
3516			if (target)
3517				break;
3518			pthread_cond_wait(&progress_cond, &progress_mutex);
3519		}
3520
3521		for (i = 0; i < delta_search_threads; i++)
3522			if (p[i].remaining > 2*window &&
3523			    (!victim || victim->remaining < p[i].remaining))
3524				victim = &p[i];
3525		if (victim) {
3526			sub_size = victim->remaining / 2;
3527			target->regions = victim->regions + victim->remaining - sub_size;
3528			victim->list_size -= sub_size;
3529			victim->remaining -= sub_size;
3530		}
3531		target->list_size = sub_size;
3532		target->remaining = sub_size;
3533		target->working = 1;
3534		progress_unlock();
3535
3536		pthread_mutex_lock(&target->mutex);
3537		target->data_ready = 1;
3538		pthread_cond_signal(&target->cond);
3539		pthread_mutex_unlock(&target->mutex);
3540
3541		if (!sub_size) {
3542			pthread_join(target->thread, NULL);
3543			pthread_cond_destroy(&target->cond);
3544			pthread_mutex_destroy(&target->mutex);
3545			active_threads--;
3546		}
3547	}
3548	cleanup_threaded_search();
3549	free(p);
3550
3551	display_progress(progress_state, progress_nr);
3552	stop_progress(&progress_state);
3553}
3554
3555static void prepare_pack(int window, int depth)
3556{
3557	struct object_entry **delta_list;
3558	uint32_t i, nr_deltas;
3559	unsigned n;
3560
3561	if (use_delta_islands)
3562		resolve_tree_islands(the_repository, progress, &to_pack);
3563
3564	get_object_details();
3565
3566	/*
3567	 * If we're locally repacking then we need to be doubly careful
3568	 * from now on in order to make sure no stealth corruption gets
3569	 * propagated to the new pack.  Clients receiving streamed packs
3570	 * should validate everything they get anyway so no need to incur
3571	 * the additional cost here in that case.
3572	 */
3573	if (!pack_to_stdout)
3574		do_check_packed_object_crc = 1;
3575
3576	if (!to_pack.nr_objects || !window || !depth)
3577		return;
3578
3579	if (path_walk)
3580		ll_find_deltas_by_region(to_pack.objects, to_pack.regions,
3581					 0, to_pack.nr_regions);
3582
3583	ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3584	nr_deltas = n = 0;
3585
3586	for (i = 0; i < to_pack.nr_objects; i++) {
3587		struct object_entry *entry = to_pack.objects + i;
3588
3589		if (!should_attempt_deltas(entry))
3590			continue;
3591
3592		if (!entry->preferred_base)
3593			nr_deltas++;
3594
3595		delta_list[n++] = entry;
3596	}
3597
3598	if (nr_deltas && n > 1) {
3599		unsigned nr_done = 0;
3600
3601		if (progress)
3602			progress_state = start_progress(the_repository,
3603							_("Compressing objects"),
3604							nr_deltas);
3605		QSORT(delta_list, n, type_size_sort);
3606		ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3607		stop_progress(&progress_state);
3608		if (nr_done != nr_deltas)
3609			die(_("inconsistency with delta count"));
3610	}
3611	free(delta_list);
3612}
3613
3614static int git_pack_config(const char *k, const char *v,
3615			   const struct config_context *ctx, void *cb)
3616{
3617	if (!strcmp(k, "pack.window")) {
3618		window = git_config_int(k, v, ctx->kvi);
3619		return 0;
3620	}
3621	if (!strcmp(k, "pack.windowmemory")) {
3622		window_memory_limit = git_config_ulong(k, v, ctx->kvi);
3623		return 0;
3624	}
3625	if (!strcmp(k, "pack.depth")) {
3626		depth = git_config_int(k, v, ctx->kvi);
3627		return 0;
3628	}
3629	if (!strcmp(k, "pack.deltacachesize")) {
3630		max_delta_cache_size = git_config_int(k, v, ctx->kvi);
3631		return 0;
3632	}
3633	if (!strcmp(k, "pack.deltacachelimit")) {
3634		cache_max_small_delta_size = git_config_int(k, v, ctx->kvi);
3635		return 0;
3636	}
3637	if (!strcmp(k, "pack.writebitmaphashcache")) {
3638		if (git_config_bool(k, v))
3639			write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3640		else
3641			write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3642	}
3643
3644	if (!strcmp(k, "pack.writebitmaplookuptable")) {
3645		if (git_config_bool(k, v))
3646			write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE;
3647		else
3648			write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE;
3649	}
3650
3651	if (!strcmp(k, "pack.usebitmaps")) {
3652		use_bitmap_index_default = git_config_bool(k, v);
3653		return 0;
3654	}
3655	if (!strcmp(k, "pack.allowpackreuse")) {
3656		int res = git_parse_maybe_bool_text(v);
3657		if (res < 0) {
3658			if (!strcasecmp(v, "single"))
3659				allow_pack_reuse = SINGLE_PACK_REUSE;
3660			else if (!strcasecmp(v, "multi"))
3661				allow_pack_reuse = MULTI_PACK_REUSE;
3662			else
3663				die(_("invalid pack.allowPackReuse value: '%s'"), v);
3664		} else if (res) {
3665			allow_pack_reuse = SINGLE_PACK_REUSE;
3666		} else {
3667			allow_pack_reuse = NO_PACK_REUSE;
3668		}
3669		return 0;
3670	}
3671	if (!strcmp(k, "pack.threads")) {
3672		delta_search_threads = git_config_int(k, v, ctx->kvi);
3673		if (delta_search_threads < 0)
3674			die(_("invalid number of threads specified (%d)"),
3675			    delta_search_threads);
3676		if (!HAVE_THREADS && delta_search_threads != 1) {
3677			warning(_("no threads support, ignoring %s"), k);
3678			delta_search_threads = 0;
3679		}
3680		return 0;
3681	}
3682	if (!strcmp(k, "pack.indexversion")) {
3683		pack_idx_opts.version = git_config_int(k, v, ctx->kvi);
3684		if (pack_idx_opts.version > 2)
3685			die(_("bad pack.indexVersion=%"PRIu32),
3686			    pack_idx_opts.version);
3687		return 0;
3688	}
3689	if (!strcmp(k, "pack.writereverseindex")) {
3690		if (git_config_bool(k, v))
3691			pack_idx_opts.flags |= WRITE_REV;
3692		else
3693			pack_idx_opts.flags &= ~WRITE_REV;
3694		return 0;
3695	}
3696	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3697		struct configured_exclusion *ex;
3698		const char *oid_end, *pack_end;
3699		/*
3700		 * Stores the pack hash. This is not a true object ID, but is
3701		 * of the same form.
3702		 */
3703		struct object_id pack_hash;
3704
3705		if (!v)
3706			return config_error_nonbool(k);
3707
3708		ex = xmalloc(sizeof(*ex));
3709		if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3710		    *oid_end != ' ' ||
3711		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3712		    *pack_end != ' ')
3713			die(_("value of uploadpack.blobpackfileuri must be "
3714			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3715		if (oidmap_get(&configured_exclusions, &ex->e.oid))
3716			die(_("object already configured in another "
3717			      "uploadpack.blobpackfileuri (got '%s')"), v);
3718		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3719		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3720		ex->uri = xstrdup(pack_end + 1);
3721		oidmap_put(&configured_exclusions, ex);
3722	}
3723	return git_default_config(k, v, ctx, cb);
3724}
3725
3726/* Counters for trace2 output when in --stdin-packs mode. */
3727static int stdin_packs_found_nr;
3728static int stdin_packs_hints_nr;
3729
3730static int add_object_entry_from_pack(const struct object_id *oid,
3731				      struct packed_git *p,
3732				      uint32_t pos,
3733				      void *_data)
3734{
3735	off_t ofs;
3736	enum object_type type = OBJ_NONE;
3737
3738	display_progress(progress_state, ++nr_seen);
3739
3740	if (have_duplicate_entry(oid, 0))
3741		return 0;
3742
3743	ofs = nth_packed_object_offset(p, pos);
3744	if (!want_object_in_pack(oid, 0, &p, &ofs))
3745		return 0;
3746
3747	if (p) {
3748		struct object_info oi = OBJECT_INFO_INIT;
3749
3750		oi.typep = &type;
3751		if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
3752			die(_("could not get type of object %s in pack %s"),
3753			    oid_to_hex(oid), p->pack_name);
3754		} else if (type == OBJ_COMMIT) {
3755			struct rev_info *revs = _data;
3756			/*
3757			 * commits in included packs are used as starting points for the
3758			 * subsequent revision walk
3759			 */
3760			add_pending_oid(revs, NULL, oid, 0);
3761		}
3762
3763		stdin_packs_found_nr++;
3764	}
3765
3766	create_object_entry(oid, type, 0, 0, 0, p, ofs);
3767
3768	return 0;
3769}
3770
3771static void show_object_pack_hint(struct object *object, const char *name,
3772				  void *data)
3773{
3774	enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3775	if (mode == STDIN_PACKS_MODE_FOLLOW) {
3776		if (object->type == OBJ_BLOB &&
3777		    !odb_has_object(the_repository->objects, &object->oid, 0))
3778			return;
3779		add_object_entry(&object->oid, object->type, name, 0);
3780	} else {
3781		struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3782		if (!oe)
3783			return;
3784
3785		/*
3786		 * Our 'to_pack' list was constructed by iterating all
3787		 * objects packed in included packs, and so doesn't have
3788		 * a non-zero hash field that you would typically pick
3789		 * up during a reachability traversal.
3790		 *
3791		 * Make a best-effort attempt to fill in the ->hash and
3792		 * ->no_try_delta fields here in order to perhaps
3793		 * improve the delta selection process.
3794		 */
3795		oe->hash = pack_name_hash_fn(name);
3796		oe->no_try_delta = name && no_try_delta(name);
3797
3798		stdin_packs_hints_nr++;
3799	}
3800}
3801
3802static void show_commit_pack_hint(struct commit *commit, void *data)
3803{
3804	enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3805
3806	if (mode == STDIN_PACKS_MODE_FOLLOW) {
3807		show_object_pack_hint((struct object *)commit, "", data);
3808		return;
3809	}
3810
3811	/* nothing to do; commits don't have a namehash */
3812
3813}
3814
3815static int pack_mtime_cmp(const void *_a, const void *_b)
3816{
3817	struct packed_git *a = ((const struct string_list_item*)_a)->util;
3818	struct packed_git *b = ((const struct string_list_item*)_b)->util;
3819
3820	/*
3821	 * order packs by descending mtime so that objects are laid out
3822	 * roughly as newest-to-oldest
3823	 */
3824	if (a->mtime < b->mtime)
3825		return 1;
3826	else if (b->mtime < a->mtime)
3827		return -1;
3828	else
3829		return 0;
3830}
3831
3832static void read_packs_list_from_stdin(struct rev_info *revs)
3833{
3834	struct packfile_store *packs = the_repository->objects->packfiles;
3835	struct strbuf buf = STRBUF_INIT;
3836	struct string_list include_packs = STRING_LIST_INIT_DUP;
3837	struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3838	struct string_list_item *item = NULL;
3839
3840	struct packed_git *p;
3841
3842	while (strbuf_getline(&buf, stdin) != EOF) {
3843		if (!buf.len)
3844			continue;
3845
3846		if (*buf.buf == '^')
3847			string_list_append(&exclude_packs, buf.buf + 1);
3848		else
3849			string_list_append(&include_packs, buf.buf);
3850
3851		strbuf_reset(&buf);
3852	}
3853
3854	string_list_sort(&include_packs);
3855	string_list_remove_duplicates(&include_packs, 0);
3856	string_list_sort(&exclude_packs);
3857	string_list_remove_duplicates(&exclude_packs, 0);
3858
3859	for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
3860		const char *pack_name = pack_basename(p);
3861
3862		if ((item = string_list_lookup(&include_packs, pack_name)))
3863			item->util = p;
3864		if ((item = string_list_lookup(&exclude_packs, pack_name)))
3865			item->util = p;
3866	}
3867
3868	/*
3869	 * Arguments we got on stdin may not even be packs. First
3870	 * check that to avoid segfaulting later on in
3871	 * e.g. pack_mtime_cmp(), excluded packs are handled below.
3872	 *
3873	 * Since we first parsed our STDIN and then sorted the input
3874	 * lines the pack we error on will be whatever line happens to
3875	 * sort first. This is lazy, it's enough that we report one
3876	 * bad case here, we don't need to report the first/last one,
3877	 * or all of them.
3878	 */
3879	for_each_string_list_item(item, &include_packs) {
3880		struct packed_git *p = item->util;
3881		if (!p)
3882			die(_("could not find pack '%s'"), item->string);
3883		if (!is_pack_valid(p))
3884			die(_("packfile %s cannot be accessed"), p->pack_name);
3885	}
3886
3887	/*
3888	 * Then, handle all of the excluded packs, marking them as
3889	 * kept in-core so that later calls to add_object_entry()
3890	 * discards any objects that are also found in excluded packs.
3891	 */
3892	for_each_string_list_item(item, &exclude_packs) {
3893		struct packed_git *p = item->util;
3894		if (!p)
3895			die(_("could not find pack '%s'"), item->string);
3896		p->pack_keep_in_core = 1;
3897	}
3898
3899	/*
3900	 * Order packs by ascending mtime; use QSORT directly to access the
3901	 * string_list_item's ->util pointer, which string_list_sort() does not
3902	 * provide.
3903	 */
3904	QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3905
3906	for_each_string_list_item(item, &include_packs) {
3907		struct packed_git *p = item->util;
3908		for_each_object_in_pack(p,
3909					add_object_entry_from_pack,
3910					revs,
3911					FOR_EACH_OBJECT_PACK_ORDER);
3912	}
3913
3914	strbuf_release(&buf);
3915	string_list_clear(&include_packs, 0);
3916	string_list_clear(&exclude_packs, 0);
3917}
3918
3919static void add_unreachable_loose_objects(struct rev_info *revs);
3920
3921static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
3922{
3923	struct rev_info revs;
3924
3925	repo_init_revisions(the_repository, &revs, NULL);
3926	/*
3927	 * Use a revision walk to fill in the namehash of objects in the include
3928	 * packs. To save time, we'll avoid traversing through objects that are
3929	 * in excluded packs.
3930	 *
3931	 * That may cause us to avoid populating all of the namehash fields of
3932	 * all included objects, but our goal is best-effort, since this is only
3933	 * an optimization during delta selection.
3934	 */
3935	revs.no_kept_objects = 1;
3936	revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3937	revs.blob_objects = 1;
3938	revs.tree_objects = 1;
3939	revs.tag_objects = 1;
3940	revs.ignore_missing_links = 1;
3941
3942	/* avoids adding objects in excluded packs */
3943	ignore_packed_keep_in_core = 1;
3944	read_packs_list_from_stdin(&revs);
3945	if (rev_list_unpacked)
3946		add_unreachable_loose_objects(&revs);
3947
3948	if (prepare_revision_walk(&revs))
3949		die(_("revision walk setup failed"));
3950	traverse_commit_list(&revs,
3951			     show_commit_pack_hint,
3952			     show_object_pack_hint,
3953			     &mode);
3954
3955	trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3956			   stdin_packs_found_nr);
3957	trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3958			   stdin_packs_hints_nr);
3959}
3960
3961static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
3962				   struct packed_git *pack, off_t offset,
3963				   const char *name, uint32_t mtime)
3964{
3965	struct object_entry *entry;
3966
3967	display_progress(progress_state, ++nr_seen);
3968
3969	entry = packlist_find(&to_pack, oid);
3970	if (entry) {
3971		if (name) {
3972			entry->hash = pack_name_hash_fn(name);
3973			entry->no_try_delta = no_try_delta(name);
3974		}
3975	} else {
3976		if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime))
3977			return;
3978		if (!pack && type == OBJ_BLOB) {
3979			struct odb_source *source = the_repository->objects->sources;
3980			int found = 0;
3981
3982			for (; !found && source; source = source->next)
3983				if (has_loose_object(source, oid))
3984					found = 1;
3985
3986			/*
3987			 * If a traversed tree has a missing blob then we want
3988			 * to avoid adding that missing object to our pack.
3989			 *
3990			 * This only applies to missing blobs, not trees,
3991			 * because the traversal needs to parse sub-trees but
3992			 * not blobs.
3993			 *
3994			 * Note we only perform this check when we couldn't
3995			 * already find the object in a pack, so we're really
3996			 * limited to "ensure non-tip blobs which don't exist in
3997			 * packs do exist via loose objects". Confused?
3998			 */
3999			if (!found)
4000				return;
4001		}
4002
4003		entry = create_object_entry(oid, type, pack_name_hash_fn(name),
4004					    0, name && no_try_delta(name),
4005					    pack, offset);
4006	}
4007
4008	if (mtime > oe_cruft_mtime(&to_pack, entry))
4009		oe_set_cruft_mtime(&to_pack, entry, mtime);
4010	return;
4011}
4012
4013static void show_cruft_object(struct object *obj, const char *name, void *data UNUSED)
4014{
4015	/*
4016	 * if we did not record it earlier, it's at least as old as our
4017	 * expiration value. Rather than find it exactly, just use that
4018	 * value.  This may bump it forward from its real mtime, but it
4019	 * will still be "too old" next time we run with the same
4020	 * expiration.
4021	 *
4022	 * if obj does appear in the packing list, this call is a noop (or may
4023	 * set the namehash).
4024	 */
4025	add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
4026}
4027
4028static void show_cruft_commit(struct commit *commit, void *data)
4029{
4030	show_cruft_object((struct object*)commit, NULL, data);
4031}
4032
4033static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
4034{
4035	return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
4036}
4037
4038static int cruft_include_check(struct commit *commit, void *data)
4039{
4040	return cruft_include_check_obj((struct object*)commit, data);
4041}
4042
4043static void set_cruft_mtime(const struct object *object,
4044			    struct packed_git *pack,
4045			    off_t offset, time_t mtime)
4046{
4047	add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
4048			       mtime);
4049}
4050
4051static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
4052{
4053	struct string_list_item *item = NULL;
4054	for_each_string_list_item(item, packs) {
4055		struct packed_git *p = item->util;
4056		if (!p)
4057			die(_("could not find pack '%s'"), item->string);
4058		if (p->is_cruft && keep)
4059			ignore_packed_keep_in_core_has_cruft = 1;
4060		p->pack_keep_in_core = keep;
4061	}
4062}
4063
4064static void add_objects_in_unpacked_packs(void);
4065
4066static void enumerate_cruft_objects(void)
4067{
4068	if (progress)
4069		progress_state = start_progress(the_repository,
4070						_("Enumerating cruft objects"), 0);
4071
4072	add_objects_in_unpacked_packs();
4073	add_unreachable_loose_objects(NULL);
4074
4075	stop_progress(&progress_state);
4076}
4077
4078static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
4079{
4080	struct packfile_store *packs = the_repository->objects->packfiles;
4081	struct packed_git *p;
4082	struct rev_info revs;
4083	int ret;
4084
4085	repo_init_revisions(the_repository, &revs, NULL);
4086
4087	revs.tag_objects = 1;
4088	revs.tree_objects = 1;
4089	revs.blob_objects = 1;
4090
4091	revs.include_check = cruft_include_check;
4092	revs.include_check_obj = cruft_include_check_obj;
4093
4094	revs.ignore_missing_links = 1;
4095
4096	if (progress)
4097		progress_state = start_progress(the_repository,
4098						_("Enumerating cruft objects"), 0);
4099	ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
4100						     set_cruft_mtime, 1);
4101	stop_progress(&progress_state);
4102
4103	if (ret)
4104		die(_("unable to add cruft objects"));
4105
4106	/*
4107	 * Re-mark only the fresh packs as kept so that objects in
4108	 * unknown packs do not halt the reachability traversal early.
4109	 */
4110	for (p = packfile_store_get_all_packs(packs); p; p = p->next)
4111		p->pack_keep_in_core = 0;
4112	mark_pack_kept_in_core(fresh_packs, 1);
4113
4114	if (prepare_revision_walk(&revs))
4115		die(_("revision walk setup failed"));
4116	if (progress)
4117		progress_state = start_progress(the_repository,
4118						_("Traversing cruft objects"), 0);
4119	nr_seen = 0;
4120	traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
4121
4122	stop_progress(&progress_state);
4123}
4124
4125static void read_cruft_objects(void)
4126{
4127	struct packfile_store *packs = the_repository->objects->packfiles;
4128	struct strbuf buf = STRBUF_INIT;
4129	struct string_list discard_packs = STRING_LIST_INIT_DUP;
4130	struct string_list fresh_packs = STRING_LIST_INIT_DUP;
4131	struct packed_git *p;
4132
4133	ignore_packed_keep_in_core = 1;
4134
4135	while (strbuf_getline(&buf, stdin) != EOF) {
4136		if (!buf.len)
4137			continue;
4138
4139		if (*buf.buf == '-')
4140			string_list_append(&discard_packs, buf.buf + 1);
4141		else
4142			string_list_append(&fresh_packs, buf.buf);
4143	}
4144
4145	string_list_sort(&discard_packs);
4146	string_list_sort(&fresh_packs);
4147
4148	for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
4149		const char *pack_name = pack_basename(p);
4150		struct string_list_item *item;
4151
4152		item = string_list_lookup(&fresh_packs, pack_name);
4153		if (!item)
4154			item = string_list_lookup(&discard_packs, pack_name);
4155
4156		if (item) {
4157			item->util = p;
4158		} else {
4159			/*
4160			 * This pack wasn't mentioned in either the "fresh" or
4161			 * "discard" list, so the caller didn't know about it.
4162			 *
4163			 * Mark it as kept so that its objects are ignored by
4164			 * add_unseen_recent_objects_to_traversal(). We'll
4165			 * unmark it before starting the traversal so it doesn't
4166			 * halt the traversal early.
4167			 */
4168			p->pack_keep_in_core = 1;
4169		}
4170	}
4171
4172	mark_pack_kept_in_core(&fresh_packs, 1);
4173	mark_pack_kept_in_core(&discard_packs, 0);
4174
4175	if (cruft_expiration)
4176		enumerate_and_traverse_cruft_objects(&fresh_packs);
4177	else
4178		enumerate_cruft_objects();
4179
4180	strbuf_release(&buf);
4181	string_list_clear(&discard_packs, 0);
4182	string_list_clear(&fresh_packs, 0);
4183}
4184
4185static void read_object_list_from_stdin(void)
4186{
4187	char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
4188	struct object_id oid;
4189	const char *p;
4190
4191	for (;;) {
4192		if (!fgets(line, sizeof(line), stdin)) {
4193			if (feof(stdin))
4194				break;
4195			if (!ferror(stdin))
4196				BUG("fgets returned NULL, not EOF, not error!");
4197			if (errno != EINTR)
4198				die_errno("fgets");
4199			clearerr(stdin);
4200			continue;
4201		}
4202		if (line[0] == '-') {
4203			if (get_oid_hex(line+1, &oid))
4204				die(_("expected edge object ID, got garbage:\n %s"),
4205				    line);
4206			add_preferred_base(&oid);
4207			continue;
4208		}
4209		if (parse_oid_hex(line, &oid, &p))
4210			die(_("expected object ID, got garbage:\n %s"), line);
4211
4212		add_preferred_base_object(p + 1);
4213		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
4214	}
4215}
4216
4217static void show_commit(struct commit *commit, void *data UNUSED)
4218{
4219	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
4220
4221	if (write_bitmap_index)
4222		index_commit_for_bitmap(commit);
4223
4224	if (use_delta_islands)
4225		propagate_island_marks(the_repository, commit);
4226}
4227
4228static void show_object(struct object *obj, const char *name,
4229			void *data UNUSED)
4230{
4231	add_preferred_base_object(name);
4232	add_object_entry(&obj->oid, obj->type, name, 0);
4233
4234	if (use_delta_islands) {
4235		const char *p;
4236		unsigned depth;
4237		struct object_entry *ent;
4238
4239		/* the empty string is a root tree, which is depth 0 */
4240		depth = *name ? 1 : 0;
4241		for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
4242			depth++;
4243
4244		ent = packlist_find(&to_pack, &obj->oid);
4245		if (ent && depth > oe_tree_depth(&to_pack, ent))
4246			oe_set_tree_depth(&to_pack, ent, depth);
4247	}
4248}
4249
4250static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
4251{
4252	assert(arg_missing_action == MA_ALLOW_ANY);
4253
4254	/*
4255	 * Quietly ignore ALL missing objects.  This avoids problems with
4256	 * staging them now and getting an odd error later.
4257	 */
4258	if (!odb_has_object(the_repository->objects, &obj->oid, 0))
4259		return;
4260
4261	show_object(obj, name, data);
4262}
4263
4264static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
4265{
4266	assert(arg_missing_action == MA_ALLOW_PROMISOR);
4267
4268	/*
4269	 * Quietly ignore EXPECTED missing objects.  This avoids problems with
4270	 * staging them now and getting an odd error later.
4271	 */
4272	if (!odb_has_object(the_repository->objects, &obj->oid, 0) &&
4273	    is_promisor_object(to_pack.repo, &obj->oid))
4274		return;
4275
4276	show_object(obj, name, data);
4277}
4278
4279static int option_parse_missing_action(const struct option *opt UNUSED,
4280				       const char *arg, int unset)
4281{
4282	assert(arg);
4283	assert(!unset);
4284
4285	if (!strcmp(arg, "error")) {
4286		arg_missing_action = MA_ERROR;
4287		fn_show_object = show_object;
4288		return 0;
4289	}
4290
4291	if (!strcmp(arg, "allow-any")) {
4292		arg_missing_action = MA_ALLOW_ANY;
4293		fetch_if_missing = 0;
4294		fn_show_object = show_object__ma_allow_any;
4295		return 0;
4296	}
4297
4298	if (!strcmp(arg, "allow-promisor")) {
4299		arg_missing_action = MA_ALLOW_PROMISOR;
4300		fetch_if_missing = 0;
4301		fn_show_object = show_object__ma_allow_promisor;
4302		return 0;
4303	}
4304
4305	die(_("invalid value for '%s': '%s'"), "--missing", arg);
4306	return 0;
4307}
4308
4309static void show_edge(struct commit *commit)
4310{
4311	add_preferred_base(&commit->object.oid);
4312}
4313
4314static int add_object_in_unpacked_pack(const struct object_id *oid,
4315				       struct packed_git *pack,
4316				       uint32_t pos,
4317				       void *data UNUSED)
4318{
4319	if (cruft) {
4320		off_t offset;
4321		time_t mtime;
4322
4323		if (pack->is_cruft) {
4324			if (load_pack_mtimes(pack) < 0)
4325				die(_("could not load cruft pack .mtimes"));
4326			mtime = nth_packed_mtime(pack, pos);
4327		} else {
4328			mtime = pack->mtime;
4329		}
4330		offset = nth_packed_object_offset(pack, pos);
4331
4332		add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
4333				       NULL, mtime);
4334	} else {
4335		add_object_entry(oid, OBJ_NONE, "", 0);
4336	}
4337	return 0;
4338}
4339
4340static void add_objects_in_unpacked_packs(void)
4341{
4342	if (for_each_packed_object(to_pack.repo,
4343				   add_object_in_unpacked_pack,
4344				   NULL,
4345				   FOR_EACH_OBJECT_PACK_ORDER |
4346				   FOR_EACH_OBJECT_LOCAL_ONLY |
4347				   FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
4348				   FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
4349		die(_("cannot open pack index"));
4350}
4351
4352static int add_loose_object(const struct object_id *oid, const char *path,
4353			    void *data)
4354{
4355	struct rev_info *revs = data;
4356	enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL);
4357
4358	if (type < 0) {
4359		warning(_("loose object at %s could not be examined"), path);
4360		return 0;
4361	}
4362
4363	if (cruft) {
4364		struct stat st;
4365		if (stat(path, &st) < 0) {
4366			if (errno == ENOENT)
4367				return 0;
4368			return error_errno("unable to stat %s", oid_to_hex(oid));
4369		}
4370
4371		add_cruft_object_entry(oid, type, NULL, 0, NULL,
4372				       st.st_mtime);
4373	} else {
4374		add_object_entry(oid, type, "", 0);
4375	}
4376
4377	if (revs && type == OBJ_COMMIT)
4378		add_pending_oid(revs, NULL, oid, 0);
4379
4380	return 0;
4381}
4382
4383/*
4384 * We actually don't even have to worry about reachability here.
4385 * add_object_entry will weed out duplicates, so we just add every
4386 * loose object we find.
4387 */
4388static void add_unreachable_loose_objects(struct rev_info *revs)
4389{
4390	for_each_loose_file_in_source(the_repository->objects->sources,
4391				      add_loose_object, NULL, NULL, revs);
4392}
4393
4394static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
4395{
4396	struct packfile_store *packs = the_repository->objects->packfiles;
4397	static struct packed_git *last_found = (void *)1;
4398	struct packed_git *p;
4399
4400	p = (last_found != (void *)1) ? last_found :
4401					packfile_store_get_all_packs(packs);
4402
4403	while (p) {
4404		if ((!p->pack_local || p->pack_keep ||
4405				p->pack_keep_in_core) &&
4406			find_pack_entry_one(oid, p)) {
4407			last_found = p;
4408			return 1;
4409		}
4410		if (p == last_found)
4411			p = packfile_store_get_all_packs(packs);
4412		else
4413			p = p->next;
4414		if (p == last_found)
4415			p = p->next;
4416	}
4417	return 0;
4418}
4419
4420/*
4421 * Store a list of sha1s that are should not be discarded
4422 * because they are either written too recently, or are
4423 * reachable from another object that was.
4424 *
4425 * This is filled by get_object_list.
4426 */
4427static struct oid_array recent_objects;
4428
4429static int loosened_object_can_be_discarded(const struct object_id *oid,
4430					    timestamp_t mtime)
4431{
4432	if (!unpack_unreachable_expiration)
4433		return 0;
4434	if (mtime > unpack_unreachable_expiration)
4435		return 0;
4436	if (oid_array_lookup(&recent_objects, oid) >= 0)
4437		return 0;
4438	return 1;
4439}
4440
4441static void loosen_unused_packed_objects(void)
4442{
4443	struct packfile_store *packs = the_repository->objects->packfiles;
4444	struct packed_git *p;
4445	uint32_t i;
4446	uint32_t loosened_objects_nr = 0;
4447	struct object_id oid;
4448
4449	for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
4450		if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
4451			continue;
4452
4453		if (open_pack_index(p))
4454			die(_("cannot open pack index"));
4455
4456		for (i = 0; i < p->num_objects; i++) {
4457			nth_packed_object_id(&oid, p, i);
4458			if (!packlist_find(&to_pack, &oid) &&
4459			    !has_sha1_pack_kept_or_nonlocal(&oid) &&
4460			    !loosened_object_can_be_discarded(&oid, p->mtime)) {
4461				if (force_object_loose(the_repository->objects->sources,
4462						       &oid, p->mtime))
4463					die(_("unable to force loose object"));
4464				loosened_objects_nr++;
4465			}
4466		}
4467	}
4468
4469	trace2_data_intmax("pack-objects", the_repository,
4470			   "loosen_unused_packed_objects/loosened", loosened_objects_nr);
4471}
4472
4473/*
4474 * This tracks any options which pack-reuse code expects to be on, or which a
4475 * reader of the pack might not understand, and which would therefore prevent
4476 * blind reuse of what we have on disk.
4477 */
4478static int pack_options_allow_reuse(void)
4479{
4480	return allow_pack_reuse != NO_PACK_REUSE &&
4481	       pack_to_stdout &&
4482	       !ignore_packed_keep_on_disk &&
4483	       !ignore_packed_keep_in_core &&
4484	       (!local || !have_non_local_packs) &&
4485	       !incremental;
4486}
4487
4488static int get_object_list_from_bitmap(struct rev_info *revs)
4489{
4490	if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
4491		return -1;
4492
4493	/*
4494	 * For now, force the name-hash version to be 1 since that
4495	 * is the version implied by the bitmap format. Later, the
4496	 * format can include this version explicitly in its format,
4497	 * allowing readers to know the version that was used during
4498	 * the bitmap write.
4499	 */
4500	name_hash_version = 1;
4501
4502	if (pack_options_allow_reuse())
4503		reuse_partial_packfile_from_bitmap(bitmap_git,
4504						   &reuse_packfiles,
4505						   &reuse_packfiles_nr,
4506						   &reuse_packfile_bitmap,
4507						   allow_pack_reuse == MULTI_PACK_REUSE);
4508
4509	if (reuse_packfiles) {
4510		reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
4511		if (!reuse_packfile_objects)
4512			BUG("expected non-empty reuse bitmap");
4513
4514		nr_result += reuse_packfile_objects;
4515		nr_seen += reuse_packfile_objects;
4516		display_progress(progress_state, nr_seen);
4517	}
4518
4519	traverse_bitmap_commit_list(bitmap_git, revs,
4520				    &add_object_entry_from_bitmap);
4521	return 0;
4522}
4523
4524static void record_recent_object(struct object *obj,
4525				 const char *name UNUSED,
4526				 void *data UNUSED)
4527{
4528	oid_array_append(&recent_objects, &obj->oid);
4529}
4530
4531static void record_recent_commit(struct commit *commit, void *data UNUSED)
4532{
4533	oid_array_append(&recent_objects, &commit->object.oid);
4534}
4535
4536static int mark_bitmap_preferred_tip(const char *refname,
4537				     const char *referent UNUSED,
4538				     const struct object_id *oid,
4539				     int flags UNUSED,
4540				     void *data UNUSED)
4541{
4542	struct object_id peeled;
4543	struct object *object;
4544
4545	if (!peel_iterated_oid(the_repository, oid, &peeled))
4546		oid = &peeled;
4547
4548	object = parse_object_or_die(the_repository, oid, refname);
4549	if (object->type == OBJ_COMMIT)
4550		object->flags |= NEEDS_BITMAP;
4551
4552	return 0;
4553}
4554
4555static void mark_bitmap_preferred_tips(void)
4556{
4557	struct string_list_item *item;
4558	const struct string_list *preferred_tips;
4559
4560	preferred_tips = bitmap_preferred_tips(the_repository);
4561	if (!preferred_tips)
4562		return;
4563
4564	for_each_string_list_item(item, preferred_tips) {
4565		refs_for_each_ref_in(get_main_ref_store(the_repository),
4566				     item->string, mark_bitmap_preferred_tip,
4567				     NULL);
4568	}
4569}
4570
4571static inline int is_oid_uninteresting(struct repository *repo,
4572				       struct object_id *oid)
4573{
4574	struct object *o = lookup_object(repo, oid);
4575	return !o || (o->flags & UNINTERESTING);
4576}
4577
4578static int add_objects_by_path(const char *path,
4579			       struct oid_array *oids,
4580			       enum object_type type,
4581			       void *data)
4582{
4583	size_t oe_start = to_pack.nr_objects;
4584	size_t oe_end;
4585	unsigned int *processed = data;
4586
4587	/*
4588	 * First, add all objects to the packing data, including the ones
4589	 * marked UNINTERESTING (translated to 'exclude') as they can be
4590	 * used as delta bases.
4591	 */
4592	for (size_t i = 0; i < oids->nr; i++) {
4593		int exclude;
4594		struct object_info oi = OBJECT_INFO_INIT;
4595		struct object_id *oid = &oids->oid[i];
4596
4597		/* Skip objects that do not exist locally. */
4598		if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) &&
4599		    odb_read_object_info_extended(the_repository->objects, oid, &oi,
4600						  OBJECT_INFO_FOR_PREFETCH) < 0)
4601			continue;
4602
4603		exclude = is_oid_uninteresting(the_repository, oid);
4604
4605		if (exclude && !thin)
4606			continue;
4607
4608		add_object_entry(oid, type, path, exclude);
4609	}
4610
4611	oe_end = to_pack.nr_objects;
4612
4613	/* We can skip delta calculations if it is a no-op. */
4614	if (oe_end == oe_start || !window)
4615		return 0;
4616
4617	ALLOC_GROW(to_pack.regions,
4618		   to_pack.nr_regions + 1,
4619		   to_pack.nr_regions_alloc);
4620
4621	to_pack.regions[to_pack.nr_regions].start = oe_start;
4622	to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
4623	to_pack.nr_regions++;
4624
4625	*processed += oids->nr;
4626	display_progress(progress_state, *processed);
4627
4628	return 0;
4629}
4630
4631static void get_object_list_path_walk(struct rev_info *revs)
4632{
4633	struct path_walk_info info = PATH_WALK_INFO_INIT;
4634	unsigned int processed = 0;
4635	int result;
4636
4637	info.revs = revs;
4638	info.path_fn = add_objects_by_path;
4639	info.path_fn_data = &processed;
4640
4641	/*
4642	 * Allow the --[no-]sparse option to be interesting here, if only
4643	 * for testing purposes. Paths with no interesting objects will not
4644	 * contribute to the resulting pack, but only create noisy preferred
4645	 * base objects.
4646	 */
4647	info.prune_all_uninteresting = sparse;
4648	info.edge_aggressive = shallow;
4649
4650	trace2_region_enter("pack-objects", "path-walk", revs->repo);
4651	result = walk_objects_by_path(&info);
4652	trace2_region_leave("pack-objects", "path-walk", revs->repo);
4653
4654	if (result)
4655		die(_("failed to pack objects via path-walk"));
4656}
4657
4658static void get_object_list(struct rev_info *revs, struct strvec *argv)
4659{
4660	struct setup_revision_opt s_r_opt = {
4661		.allow_exclude_promisor_objects = 1,
4662	};
4663	char line[1000];
4664	int flags = 0;
4665	int save_warning;
4666
4667	save_commit_buffer = 0;
4668	setup_revisions_from_strvec(argv, revs, &s_r_opt);
4669
4670	/* make sure shallows are read */
4671	is_repository_shallow(the_repository);
4672
4673	save_warning = warn_on_object_refname_ambiguity;
4674	warn_on_object_refname_ambiguity = 0;
4675
4676	while (fgets(line, sizeof(line), stdin) != NULL) {
4677		int len = strlen(line);
4678		if (len && line[len - 1] == '\n')
4679			line[--len] = 0;
4680		if (!len)
4681			break;
4682		if (*line == '-') {
4683			if (!strcmp(line, "--not")) {
4684				flags ^= UNINTERESTING;
4685				write_bitmap_index = 0;
4686				continue;
4687			}
4688			if (starts_with(line, "--shallow ")) {
4689				struct object_id oid;
4690				if (get_oid_hex(line + 10, &oid))
4691					die("not an object name '%s'", line + 10);
4692				register_shallow(the_repository, &oid);
4693				use_bitmap_index = 0;
4694				continue;
4695			}
4696			die(_("not a rev '%s'"), line);
4697		}
4698		if (handle_revision_arg(line, revs, flags, REVARG_CANNOT_BE_FILENAME))
4699			die(_("bad revision '%s'"), line);
4700	}
4701
4702	warn_on_object_refname_ambiguity = save_warning;
4703
4704	if (use_bitmap_index && !get_object_list_from_bitmap(revs))
4705		return;
4706
4707	if (use_delta_islands)
4708		load_delta_islands(the_repository, progress);
4709
4710	if (write_bitmap_index)
4711		mark_bitmap_preferred_tips();
4712
4713	if (!fn_show_object)
4714		fn_show_object = show_object;
4715
4716	if (path_walk) {
4717		get_object_list_path_walk(revs);
4718	} else {
4719		if (prepare_revision_walk(revs))
4720			die(_("revision walk setup failed"));
4721		mark_edges_uninteresting(revs, show_edge, sparse);
4722		traverse_commit_list(revs,
4723				show_commit, fn_show_object,
4724				NULL);
4725	}
4726
4727	if (unpack_unreachable_expiration) {
4728		revs->ignore_missing_links = 1;
4729		if (add_unseen_recent_objects_to_traversal(revs,
4730				unpack_unreachable_expiration, NULL, 0))
4731			die(_("unable to add recent objects"));
4732		if (prepare_revision_walk(revs))
4733			die(_("revision walk setup failed"));
4734		traverse_commit_list(revs, record_recent_commit,
4735				     record_recent_object, NULL);
4736	}
4737
4738	if (keep_unreachable)
4739		add_objects_in_unpacked_packs();
4740	if (pack_loose_unreachable)
4741		add_unreachable_loose_objects(NULL);
4742	if (unpack_unreachable)
4743		loosen_unused_packed_objects();
4744
4745	oid_array_clear(&recent_objects);
4746}
4747
4748static void add_extra_kept_packs(const struct string_list *names)
4749{
4750	struct packfile_store *packs = the_repository->objects->packfiles;
4751	struct packed_git *p;
4752
4753	if (!names->nr)
4754		return;
4755
4756	for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
4757		const char *name = basename(p->pack_name);
4758		int i;
4759
4760		if (!p->pack_local)
4761			continue;
4762
4763		for (i = 0; i < names->nr; i++)
4764			if (!fspathcmp(name, names->items[i].string))
4765				break;
4766
4767		if (i < names->nr) {
4768			p->pack_keep_in_core = 1;
4769			ignore_packed_keep_in_core = 1;
4770			continue;
4771		}
4772	}
4773}
4774
4775static int option_parse_quiet(const struct option *opt, const char *arg,
4776			      int unset)
4777{
4778	int *val = opt->value;
4779
4780	BUG_ON_OPT_ARG(arg);
4781
4782	if (!unset)
4783		*val = 0;
4784	else if (!*val)
4785		*val = 1;
4786	return 0;
4787}
4788
4789static int option_parse_index_version(const struct option *opt,
4790				      const char *arg, int unset)
4791{
4792	struct pack_idx_option *popts = opt->value;
4793	char *c;
4794	const char *val = arg;
4795
4796	BUG_ON_OPT_NEG(unset);
4797
4798	popts->version = strtoul(val, &c, 10);
4799	if (popts->version > 2)
4800		die(_("unsupported index version %s"), val);
4801	if (*c == ',' && c[1])
4802		popts->off32_limit = strtoul(c+1, &c, 0);
4803	if (*c || popts->off32_limit & 0x80000000)
4804		die(_("bad index version '%s'"), val);
4805	return 0;
4806}
4807
4808static int option_parse_unpack_unreachable(const struct option *opt UNUSED,
4809					   const char *arg, int unset)
4810{
4811	if (unset) {
4812		unpack_unreachable = 0;
4813		unpack_unreachable_expiration = 0;
4814	}
4815	else {
4816		unpack_unreachable = 1;
4817		if (arg)
4818			unpack_unreachable_expiration = approxidate(arg);
4819	}
4820	return 0;
4821}
4822
4823static int option_parse_cruft_expiration(const struct option *opt UNUSED,
4824					 const char *arg, int unset)
4825{
4826	if (unset) {
4827		cruft = 0;
4828		cruft_expiration = 0;
4829	} else {
4830		cruft = 1;
4831		if (arg)
4832			cruft_expiration = approxidate(arg);
4833	}
4834	return 0;
4835}
4836
4837static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED)
4838{
4839	struct object_info info = OBJECT_INFO_INIT;
4840	if (odb_read_object_info_extended(the_repository->objects, &obj->oid, &info, 0))
4841		BUG("should_include_obj should only be called on existing objects");
4842	return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor;
4843}
4844
4845static int is_not_in_promisor_pack(struct commit *commit, void *data) {
4846	return is_not_in_promisor_pack_obj((struct object *) commit, data);
4847}
4848
4849static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
4850				  int unset)
4851{
4852	enum stdin_packs_mode *mode = opt->value;
4853
4854	if (unset)
4855		*mode = STDIN_PACKS_MODE_NONE;
4856	else if (!arg || !*arg)
4857		*mode = STDIN_PACKS_MODE_STANDARD;
4858	else if (!strcmp(arg, "follow"))
4859		*mode = STDIN_PACKS_MODE_FOLLOW;
4860	else
4861		die(_("invalid value for '%s': '%s'"), opt->long_name, arg);
4862
4863	return 0;
4864}
4865
4866int cmd_pack_objects(int argc,
4867		     const char **argv,
4868		     const char *prefix,
4869		     struct repository *repo UNUSED)
4870{
4871	int use_internal_rev_list = 0;
4872	int all_progress_implied = 0;
4873	struct strvec rp = STRVEC_INIT;
4874	int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
4875	int rev_list_index = 0;
4876	enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
4877	struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
4878	struct list_objects_filter_options filter_options =
4879		LIST_OBJECTS_FILTER_INIT;
4880
4881	struct option pack_objects_options[] = {
4882		OPT_CALLBACK_F('q', "quiet", &progress, NULL,
4883			       N_("do not show progress meter"),
4884			       PARSE_OPT_NOARG, option_parse_quiet),
4885		OPT_SET_INT(0, "progress", &progress,
4886			    N_("show progress meter"), 1),
4887		OPT_SET_INT(0, "all-progress", &progress,
4888			    N_("show progress meter during object writing phase"), 2),
4889		OPT_BOOL(0, "all-progress-implied",
4890			 &all_progress_implied,
4891			 N_("similar to --all-progress when progress meter is shown")),
4892		OPT_CALLBACK_F(0, "index-version", &pack_idx_opts, N_("<version>[,<offset>]"),
4893		  N_("write the pack index file in the specified idx format version"),
4894		  PARSE_OPT_NONEG, option_parse_index_version),
4895		OPT_UNSIGNED(0, "max-pack-size", &pack_size_limit,
4896			     N_("maximum size of each output pack file")),
4897		OPT_BOOL(0, "local", &local,
4898			 N_("ignore borrowed objects from alternate object store")),
4899		OPT_BOOL(0, "incremental", &incremental,
4900			 N_("ignore packed objects")),
4901		OPT_INTEGER(0, "window", &window,
4902			    N_("limit pack window by objects")),
4903		OPT_UNSIGNED(0, "window-memory", &window_memory_limit,
4904			     N_("limit pack window by memory in addition to object limit")),
4905		OPT_INTEGER(0, "depth", &depth,
4906			    N_("maximum length of delta chain allowed in the resulting pack")),
4907		OPT_BOOL(0, "reuse-delta", &reuse_delta,
4908			 N_("reuse existing deltas")),
4909		OPT_BOOL(0, "reuse-object", &reuse_object,
4910			 N_("reuse existing objects")),
4911		OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
4912			 N_("use OFS_DELTA objects")),
4913		OPT_INTEGER(0, "threads", &delta_search_threads,
4914			    N_("use threads when searching for best delta matches")),
4915		OPT_BOOL(0, "non-empty", &non_empty,
4916			 N_("do not create an empty pack output")),
4917		OPT_BOOL(0, "revs", &use_internal_rev_list,
4918			 N_("read revision arguments from standard input")),
4919		OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
4920			      N_("limit the objects to those that are not yet packed"),
4921			      1, PARSE_OPT_NONEG),
4922		OPT_SET_INT_F(0, "all", &rev_list_all,
4923			      N_("include objects reachable from any reference"),
4924			      1, PARSE_OPT_NONEG),
4925		OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
4926			      N_("include objects referred by reflog entries"),
4927			      1, PARSE_OPT_NONEG),
4928		OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
4929			      N_("include objects referred to by the index"),
4930			      1, PARSE_OPT_NONEG),
4931		OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
4932			     N_("read packs from stdin"),
4933			     PARSE_OPT_OPTARG, parse_stdin_packs_mode),
4934		OPT_BOOL(0, "stdin-packs", &stdin_packs,
4935			 N_("read packs from stdin")),
4936		OPT_BOOL(0, "stdout", &pack_to_stdout,
4937			 N_("output pack to stdout")),
4938		OPT_BOOL(0, "include-tag", &include_tag,
4939			 N_("include tag objects that refer to objects to be packed")),
4940		OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
4941			 N_("keep unreachable objects")),
4942		OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
4943			 N_("pack loose unreachable objects")),
4944		OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
4945		  N_("unpack unreachable objects newer than <time>"),
4946		  PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
4947		OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
4948		OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
4949		  N_("expire cruft objects older than <time>"),
4950		  PARSE_OPT_OPTARG, option_parse_cruft_expiration),
4951		OPT_BOOL(0, "sparse", &sparse,
4952			 N_("use the sparse reachability algorithm")),
4953		OPT_BOOL(0, "thin", &thin,
4954			 N_("create thin packs")),
4955		OPT_BOOL(0, "path-walk", &path_walk,
4956			 N_("use the path-walk API to walk objects when possible")),
4957		OPT_BOOL(0, "shallow", &shallow,
4958			 N_("create packs suitable for shallow fetches")),
4959		OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
4960			 N_("ignore packs that have companion .keep file")),
4961		OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
4962				N_("ignore this pack")),
4963		OPT_INTEGER(0, "compression", &pack_compression_level,
4964			    N_("pack compression level")),
4965		OPT_BOOL(0, "keep-true-parents", &grafts_keep_true_parents,
4966			 N_("do not hide commits by grafts")),
4967		OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
4968			 N_("use a bitmap index if available to speed up counting objects")),
4969		OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
4970			    N_("write a bitmap index together with the pack index"),
4971			    WRITE_BITMAP_TRUE),
4972		OPT_SET_INT_F(0, "write-bitmap-index-quiet",
4973			      &write_bitmap_index,
4974			      N_("write a bitmap index if possible"),
4975			      WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
4976		OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
4977		OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
4978		  N_("handling for missing objects"), PARSE_OPT_NONEG,
4979		  option_parse_missing_action),
4980		OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
4981			 N_("do not pack objects in promisor packfiles")),
4982		OPT_BOOL(0, "exclude-promisor-objects-best-effort",
4983			 &exclude_promisor_objects_best_effort,
4984			 N_("implies --missing=allow-any")),
4985		OPT_BOOL(0, "delta-islands", &use_delta_islands,
4986			 N_("respect islands during delta compression")),
4987		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
4988				N_("protocol"),
4989				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4990		OPT_INTEGER(0, "name-hash-version", &name_hash_version,
4991			 N_("use the specified name-hash function to group similar objects")),
4992		OPT_END(),
4993	};
4994
4995	if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
4996		BUG("too many dfs states, increase OE_DFS_STATE_BITS");
4997
4998	disable_replace_refs();
4999
5000	sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
5001	if (the_repository->gitdir) {
5002		prepare_repo_settings(the_repository);
5003		if (sparse < 0)
5004			sparse = the_repository->settings.pack_use_sparse;
5005		if (the_repository->settings.pack_use_multi_pack_reuse)
5006			allow_pack_reuse = MULTI_PACK_REUSE;
5007	}
5008
5009	reset_pack_idx_option(&pack_idx_opts);
5010	pack_idx_opts.flags |= WRITE_REV;
5011	repo_config(the_repository, git_pack_config, NULL);
5012	if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0))
5013		pack_idx_opts.flags &= ~WRITE_REV;
5014
5015	progress = isatty(2);
5016	argc = parse_options(argc, argv, prefix, pack_objects_options,
5017			     pack_usage, 0);
5018
5019	if (argc) {
5020		base_name = argv[0];
5021		argc--;
5022	}
5023	if (pack_to_stdout != !base_name || argc)
5024		usage_with_options(pack_usage, pack_objects_options);
5025
5026	if (path_walk < 0) {
5027		if (use_bitmap_index > 0 ||
5028		    !use_internal_rev_list)
5029			path_walk = 0;
5030		else if (the_repository->gitdir &&
5031			 the_repository->settings.pack_use_path_walk)
5032			path_walk = 1;
5033		else
5034			path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0);
5035	}
5036
5037	if (depth < 0)
5038		depth = 0;
5039	if (depth >= (1 << OE_DEPTH_BITS)) {
5040		warning(_("delta chain depth %d is too deep, forcing %d"),
5041			depth, (1 << OE_DEPTH_BITS) - 1);
5042		depth = (1 << OE_DEPTH_BITS) - 1;
5043	}
5044	if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
5045		warning(_("pack.deltaCacheLimit is too high, forcing %d"),
5046			(1U << OE_Z_DELTA_BITS) - 1);
5047		cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
5048	}
5049	if (window < 0)
5050		window = 0;
5051
5052	strvec_push(&rp, "pack-objects");
5053
5054	if (path_walk) {
5055		const char *option = NULL;
5056		if (filter_options.choice)
5057			option = "--filter";
5058		else if (use_delta_islands)
5059			option = "--delta-islands";
5060
5061		if (option) {
5062			warning(_("cannot use %s with %s"),
5063				option, "--path-walk");
5064			path_walk = 0;
5065		}
5066	}
5067	if (path_walk) {
5068		strvec_push(&rp, "--boundary");
5069		 /*
5070		  * We must disable the bitmaps because we are removing
5071		  * the --objects / --objects-edge[-aggressive] options.
5072		  */
5073		use_bitmap_index = 0;
5074	} else if (thin) {
5075		use_internal_rev_list = 1;
5076		strvec_push(&rp, shallow
5077				? "--objects-edge-aggressive"
5078				: "--objects-edge");
5079	} else
5080		strvec_push(&rp, "--objects");
5081
5082	if (rev_list_all) {
5083		use_internal_rev_list = 1;
5084		strvec_push(&rp, "--all");
5085	}
5086	if (rev_list_reflog) {
5087		use_internal_rev_list = 1;
5088		strvec_push(&rp, "--reflog");
5089	}
5090	if (rev_list_index) {
5091		use_internal_rev_list = 1;
5092		strvec_push(&rp, "--indexed-objects");
5093	}
5094	if (rev_list_unpacked && !stdin_packs) {
5095		use_internal_rev_list = 1;
5096		strvec_push(&rp, "--unpacked");
5097	}
5098
5099	die_for_incompatible_opt2(exclude_promisor_objects,
5100				  "--exclude-promisor-objects",
5101				  exclude_promisor_objects_best_effort,
5102				  "--exclude-promisor-objects-best-effort");
5103	if (exclude_promisor_objects) {
5104		use_internal_rev_list = 1;
5105		fetch_if_missing = 0;
5106		strvec_push(&rp, "--exclude-promisor-objects");
5107	} else if (exclude_promisor_objects_best_effort) {
5108		use_internal_rev_list = 1;
5109		fetch_if_missing = 0;
5110		option_parse_missing_action(NULL, "allow-any", 0);
5111		/* revs configured below */
5112	}
5113	if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
5114		use_internal_rev_list = 1;
5115
5116	if (!reuse_object)
5117		reuse_delta = 0;
5118	if (pack_compression_level == -1)
5119		pack_compression_level = Z_DEFAULT_COMPRESSION;
5120	else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
5121		die(_("bad pack compression level %d"), pack_compression_level);
5122
5123	if (!delta_search_threads)	/* --threads=0 means autodetect */
5124		delta_search_threads = online_cpus();
5125
5126	if (!HAVE_THREADS && delta_search_threads != 1)
5127		warning(_("no threads support, ignoring --threads"));
5128	if (!pack_to_stdout && !pack_size_limit)
5129		pack_size_limit = pack_size_limit_cfg;
5130	if (pack_to_stdout && pack_size_limit)
5131		die(_("--max-pack-size cannot be used to build a pack for transfer"));
5132	if (pack_size_limit && pack_size_limit < 1024*1024) {
5133		warning(_("minimum pack size limit is 1 MiB"));
5134		pack_size_limit = 1024*1024;
5135	}
5136
5137	if (!pack_to_stdout && thin)
5138		die(_("--thin cannot be used to build an indexable pack"));
5139
5140	die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
5141				  unpack_unreachable, "--unpack-unreachable");
5142	if (!rev_list_all || !rev_list_reflog || !rev_list_index)
5143		unpack_unreachable_expiration = 0;
5144
5145	die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5146				  filter_options.choice, "--filter");
5147
5148
5149	if (stdin_packs && use_internal_rev_list)
5150		die(_("cannot use internal rev list with --stdin-packs"));
5151
5152	if (cruft) {
5153		if (use_internal_rev_list)
5154			die(_("cannot use internal rev list with --cruft"));
5155		die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5156					  cruft, "--cruft");
5157	}
5158
5159	/*
5160	 * "soft" reasons not to use bitmaps - for on-disk repack by default we want
5161	 *
5162	 * - to produce good pack (with bitmap index not-yet-packed objects are
5163	 *   packed in suboptimal order).
5164	 *
5165	 * - to use more robust pack-generation codepath (avoiding possible
5166	 *   bugs in bitmap code and possible bitmap index corruption).
5167	 */
5168	if (!pack_to_stdout)
5169		use_bitmap_index_default = 0;
5170
5171	if (use_bitmap_index < 0)
5172		use_bitmap_index = use_bitmap_index_default;
5173
5174	/* "hard" reasons not to use bitmaps; these just won't work at all */
5175	if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
5176		use_bitmap_index = 0;
5177
5178	if (pack_to_stdout || !rev_list_all)
5179		write_bitmap_index = 0;
5180
5181	if (name_hash_version < 0)
5182		name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1);
5183
5184	validate_name_hash_version();
5185
5186	if (use_delta_islands)
5187		strvec_push(&rp, "--topo-order");
5188
5189	if (progress && all_progress_implied)
5190		progress = 2;
5191
5192	add_extra_kept_packs(&keep_pack_list);
5193	if (ignore_packed_keep_on_disk) {
5194		struct packfile_store *packs = the_repository->objects->packfiles;
5195		struct packed_git *p;
5196
5197		for (p = packfile_store_get_all_packs(packs); p; p = p->next)
5198			if (p->pack_local && p->pack_keep)
5199				break;
5200		if (!p) /* no keep-able packs found */
5201			ignore_packed_keep_on_disk = 0;
5202	}
5203	if (local) {
5204		/*
5205		 * unlike ignore_packed_keep_on_disk above, we do not
5206		 * want to unset "local" based on looking at packs, as
5207		 * it also covers non-local objects
5208		 */
5209		struct packfile_store *packs = the_repository->objects->packfiles;
5210		struct packed_git *p;
5211
5212		for (p = packfile_store_get_all_packs(packs); p; p = p->next) {
5213			if (!p->pack_local) {
5214				have_non_local_packs = 1;
5215				break;
5216			}
5217		}
5218	}
5219
5220	trace2_region_enter("pack-objects", "enumerate-objects",
5221			    the_repository);
5222	prepare_packing_data(the_repository, &to_pack);
5223
5224	if (progress && !cruft)
5225		progress_state = start_progress(the_repository,
5226						_("Enumerating objects"), 0);
5227	if (stdin_packs) {
5228		read_stdin_packs(stdin_packs, rev_list_unpacked);
5229	} else if (cruft) {
5230		read_cruft_objects();
5231	} else if (!use_internal_rev_list) {
5232		read_object_list_from_stdin();
5233	} else {
5234		struct rev_info revs;
5235
5236		repo_init_revisions(the_repository, &revs, NULL);
5237		list_objects_filter_copy(&revs.filter, &filter_options);
5238		if (exclude_promisor_objects_best_effort) {
5239			revs.include_check = is_not_in_promisor_pack;
5240			revs.include_check_obj = is_not_in_promisor_pack_obj;
5241		}
5242		get_object_list(&revs, &rp);
5243		release_revisions(&revs);
5244	}
5245	cleanup_preferred_base();
5246	if (include_tag && nr_result)
5247		refs_for_each_tag_ref(get_main_ref_store(the_repository),
5248				      add_ref_tag, NULL);
5249	stop_progress(&progress_state);
5250	trace2_region_leave("pack-objects", "enumerate-objects",
5251			    the_repository);
5252
5253	if (non_empty && !nr_result)
5254		goto cleanup;
5255	if (nr_result) {
5256		trace2_region_enter("pack-objects", "prepare-pack",
5257				    the_repository);
5258		prepare_pack(window, depth);
5259		trace2_region_leave("pack-objects", "prepare-pack",
5260				    the_repository);
5261	}
5262
5263	trace2_region_enter("pack-objects", "write-pack-file", the_repository);
5264	write_excluded_by_configs();
5265	write_pack_file();
5266	trace2_region_leave("pack-objects", "write-pack-file", the_repository);
5267
5268	if (progress)
5269		fprintf_ln(stderr,
5270			   _("Total %"PRIu32" (delta %"PRIu32"),"
5271			     " reused %"PRIu32" (delta %"PRIu32"),"
5272			     " pack-reused %"PRIu32" (from %"PRIuMAX")"),
5273			   written, written_delta, reused, reused_delta,
5274			   reuse_packfile_objects,
5275			   (uintmax_t)reuse_packfiles_used_nr);
5276
5277	trace2_data_intmax("pack-objects", the_repository, "written", written);
5278	trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta);
5279	trace2_data_intmax("pack-objects", the_repository, "reused", reused);
5280	trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta);
5281	trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects);
5282	trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr);
5283
5284cleanup:
5285	clear_packing_data(&to_pack);
5286	list_objects_filter_release(&filter_options);
5287	string_list_clear(&keep_pack_list, 0);
5288	strvec_clear(&rp);
5289
5290	return 0;
5291}