Git fork

Merge branch 'kn/bundle-dedup-optim'

Optimize the code to dedup references recorded in a bundle file.

* kn/bundle-dedup-optim:
bundle: fix non-linear performance scaling with refs
t6020: test for duplicate refnames in bundle creation

+61 -41
+7 -1
bundle.c
··· 384 384 { 385 385 int i; 386 386 int ref_count = 0; 387 + struct strset objects = STRSET_INIT; 387 388 388 389 for (i = 0; i < revs->pending.nr; i++) { 389 390 struct object_array_entry *e = revs->pending.objects + i; ··· 400 401 if (refs_read_ref_full(get_main_ref_store(the_repository), e->name, RESOLVE_REF_READING, &oid, &flag)) 401 402 flag = 0; 402 403 display_ref = (flag & REF_ISSYMREF) ? e->name : ref; 404 + 405 + if (strset_contains(&objects, display_ref)) 406 + goto skip_write_ref; 403 407 404 408 if (e->item->type == OBJ_TAG && 405 409 !is_tag_in_date_range(e->item, revs)) { ··· 423 427 } 424 428 425 429 ref_count++; 430 + strset_add(&objects, display_ref); 426 431 write_or_die(bundle_fd, oid_to_hex(&e->item->oid), the_hash_algo->hexsz); 427 432 write_or_die(bundle_fd, " ", 1); 428 433 write_or_die(bundle_fd, display_ref, strlen(display_ref)); ··· 430 435 skip_write_ref: 431 436 free(ref); 432 437 } 438 + 439 + strset_clear(&objects); 433 440 434 441 /* end header */ 435 442 write_or_die(bundle_fd, "\n", 1); ··· 566 573 */ 567 574 revs.blob_objects = revs.tree_objects = 0; 568 575 traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); 569 - object_array_remove_duplicates(&revs_copy.pending); 570 576 571 577 /* write bundle refs */ 572 578 ref_count = write_bundle_refs(bundle_fd, &revs_copy);
+1 -34
object.c
··· 492 492 array->nr = array->alloc = 0; 493 493 } 494 494 495 - /* 496 - * Return true if array already contains an entry. 497 - */ 498 - static int contains_object(struct object_array *array, 499 - const struct object *item, const char *name) 500 - { 501 - unsigned nr = array->nr, i; 502 - struct object_array_entry *object = array->objects; 503 - 504 - for (i = 0; i < nr; i++, object++) 505 - if (item == object->item && !strcmp(object->name, name)) 506 - return 1; 507 - return 0; 508 - } 509 - 510 - void object_array_remove_duplicates(struct object_array *array) 511 - { 512 - unsigned nr = array->nr, src; 513 - struct object_array_entry *objects = array->objects; 514 - 515 - array->nr = 0; 516 - for (src = 0; src < nr; src++) { 517 - if (!contains_object(array, objects[src].item, 518 - objects[src].name)) { 519 - if (src != array->nr) 520 - objects[array->nr] = objects[src]; 521 - array->nr++; 522 - } else { 523 - object_array_release_entry(&objects[src]); 524 - } 525 - } 526 - } 527 - 528 495 void clear_object_flags(struct repository *repo, unsigned flags) 529 496 { 530 497 int i; 531 498 532 - for (i=0; i < repo->parsed_objects->obj_hash_size; i++) { 499 + for (i = 0; i < repo->parsed_objects->obj_hash_size; i++) { 533 500 struct object *obj = repo->parsed_objects->obj_hash[i]; 534 501 if (obj) 535 502 obj->flags &= ~flags;
-6
object.h
··· 327 327 object_array_each_func_t want, void *cb_data); 328 328 329 329 /* 330 - * Remove from array all but the first entry with a given name. 331 - * Warning: this function uses an O(N^2) algorithm. 332 - */ 333 - void object_array_remove_duplicates(struct object_array *array); 334 - 335 - /* 336 330 * Remove any objects from the array, freeing all used memory; afterwards 337 331 * the array is ready to store more objects with add_object_array(). 338 332 */
+53
t/t6020-bundle-misc.sh
··· 673 673 grep "%" err 674 674 ' 675 675 676 + test_expect_success 'create bundle with duplicate refnames' ' 677 + git bundle create out.bdl "main" "main" && 678 + 679 + git bundle list-heads out.bdl | 680 + make_user_friendly_and_stable_output >actual && 681 + cat >expect <<-\EOF && 682 + <COMMIT-P> refs/heads/main 683 + EOF 684 + test_cmp expect actual 685 + ' 686 + 687 + test_expect_success 'create bundle with duplicate refnames and --all' ' 688 + git bundle create out.bdl --all "main" "main" && 689 + 690 + git bundle list-heads out.bdl | 691 + make_user_friendly_and_stable_output >actual && 692 + cat >expect <<-\EOF && 693 + <COMMIT-P> refs/heads/main 694 + <COMMIT-N> refs/heads/release 695 + <COMMIT-D> refs/heads/topic/1 696 + <COMMIT-H> refs/heads/topic/2 697 + <COMMIT-D> refs/pull/1/head 698 + <COMMIT-G> refs/pull/2/head 699 + <TAG-1> refs/tags/v1 700 + <TAG-2> refs/tags/v2 701 + <TAG-3> refs/tags/v3 702 + <COMMIT-P> HEAD 703 + EOF 704 + test_cmp expect actual 705 + ' 706 + 707 + test_expect_success 'create bundle with duplicate exlusion refnames' ' 708 + git bundle create out.bdl "main" "main^!" && 709 + 710 + git bundle list-heads out.bdl | 711 + make_user_friendly_and_stable_output >actual && 712 + cat >expect <<-\EOF && 713 + <COMMIT-P> refs/heads/main 714 + EOF 715 + test_cmp expect actual 716 + ' 717 + 718 + test_expect_success 'create bundle with duplicate refname short-form' ' 719 + git bundle create out.bdl "main" "main" "refs/heads/main" "refs/heads/main" && 720 + 721 + git bundle list-heads out.bdl | 722 + make_user_friendly_and_stable_output >actual && 723 + cat >expect <<-\EOF && 724 + <COMMIT-P> refs/heads/main 725 + EOF 726 + test_cmp expect actual 727 + ' 728 + 676 729 test_expect_success 'read bundle over stdin' ' 677 730 git bundle create some.bundle HEAD && 678 731