Git fork
at reftables-rust 1823 lines 45 kB view raw
1/* 2 * Copyright 2020 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style 5 * license that can be found in the LICENSE file or at 6 * https://developers.google.com/open-source/licenses/bsd 7 */ 8 9#include "stack.h" 10 11#include "system.h" 12#include "constants.h" 13#include "merged.h" 14#include "reftable-error.h" 15#include "reftable-record.h" 16#include "reftable-merged.h" 17#include "table.h" 18#include "writer.h" 19 20static int stack_filename(struct reftable_buf *dest, struct reftable_stack *st, 21 const char *name) 22{ 23 int err; 24 reftable_buf_reset(dest); 25 if ((err = reftable_buf_addstr(dest, st->reftable_dir)) < 0 || 26 (err = reftable_buf_addstr(dest, "/")) < 0 || 27 (err = reftable_buf_addstr(dest, name)) < 0) 28 return err; 29 return 0; 30} 31 32static int stack_fsync(const struct reftable_write_options *opts, int fd) 33{ 34 if (opts->fsync) 35 return opts->fsync(fd); 36 return fsync(fd); 37} 38 39static ssize_t reftable_write_data(int fd, const void *data, size_t size) 40{ 41 size_t total_written = 0; 42 const char *p = data; 43 44 while (total_written < size) { 45 ssize_t bytes_written = write(fd, p, size - total_written); 46 if (bytes_written < 0 && (errno == EAGAIN || errno == EINTR)) 47 continue; 48 if (bytes_written < 0) 49 return REFTABLE_IO_ERROR; 50 51 total_written += bytes_written; 52 p += bytes_written; 53 } 54 55 return total_written; 56} 57 58struct fd_writer { 59 const struct reftable_write_options *opts; 60 int fd; 61}; 62 63static ssize_t fd_writer_write(void *arg, const void *data, size_t sz) 64{ 65 struct fd_writer *writer = arg; 66 return reftable_write_data(writer->fd, data, sz); 67} 68 69static int fd_writer_flush(void *arg) 70{ 71 struct fd_writer *writer = arg; 72 return stack_fsync(writer->opts, writer->fd); 73} 74 75static int fd_read_lines(int fd, char ***namesp) 76{ 77 char *buf = NULL; 78 int err = 0; 79 off_t size; 80 81 size = lseek(fd, 0, SEEK_END); 82 if (size < 0) { 83 err = REFTABLE_IO_ERROR; 84 goto done; 85 } 86 87 err = lseek(fd, 0, SEEK_SET); 88 if (err < 0) { 89 err = REFTABLE_IO_ERROR; 90 goto done; 91 } 92 93 REFTABLE_ALLOC_ARRAY(buf, size + 1); 94 if (!buf) { 95 err = REFTABLE_OUT_OF_MEMORY_ERROR; 96 goto done; 97 } 98 99 for (off_t total_read = 0; total_read < size; ) { 100 ssize_t bytes_read = read(fd, buf + total_read, size - total_read); 101 if (bytes_read < 0 && (errno == EAGAIN || errno == EINTR)) 102 continue; 103 if (bytes_read < 0 || !bytes_read) { 104 err = REFTABLE_IO_ERROR; 105 goto done; 106 } 107 108 total_read += bytes_read; 109 } 110 buf[size] = 0; 111 112 err = parse_names(buf, size, namesp); 113done: 114 reftable_free(buf); 115 return err; 116} 117 118int read_lines(const char *filename, char ***namesp) 119{ 120 int fd = open(filename, O_RDONLY); 121 int err = 0; 122 if (fd < 0) { 123 if (errno == ENOENT) { 124 REFTABLE_CALLOC_ARRAY(*namesp, 1); 125 if (!*namesp) 126 return REFTABLE_OUT_OF_MEMORY_ERROR; 127 return 0; 128 } 129 130 return REFTABLE_IO_ERROR; 131 } 132 err = fd_read_lines(fd, namesp); 133 close(fd); 134 return err; 135} 136 137int reftable_stack_init_ref_iterator(struct reftable_stack *st, 138 struct reftable_iterator *it) 139{ 140 return merged_table_init_iter(reftable_stack_merged_table(st), 141 it, REFTABLE_BLOCK_TYPE_REF); 142} 143 144int reftable_stack_init_log_iterator(struct reftable_stack *st, 145 struct reftable_iterator *it) 146{ 147 return merged_table_init_iter(reftable_stack_merged_table(st), 148 it, REFTABLE_BLOCK_TYPE_LOG); 149} 150 151struct reftable_merged_table * 152reftable_stack_merged_table(struct reftable_stack *st) 153{ 154 return st->merged; 155} 156 157static int has_name(char **names, const char *name) 158{ 159 while (*names) { 160 if (!strcmp(*names, name)) 161 return 1; 162 names++; 163 } 164 return 0; 165} 166 167/* Close and free the stack */ 168void reftable_stack_destroy(struct reftable_stack *st) 169{ 170 char **names = NULL; 171 int err = 0; 172 173 if (!st) 174 return; 175 176 if (st->merged) { 177 reftable_merged_table_free(st->merged); 178 st->merged = NULL; 179 } 180 181 err = read_lines(st->list_file, &names); 182 if (err < 0) { 183 REFTABLE_FREE_AND_NULL(names); 184 } 185 186 if (st->tables) { 187 struct reftable_buf filename = REFTABLE_BUF_INIT; 188 189 for (size_t i = 0; i < st->tables_len; i++) { 190 const char *name = reftable_table_name(st->tables[i]); 191 int try_unlinking = 1; 192 193 reftable_buf_reset(&filename); 194 if (names && !has_name(names, name)) { 195 if (stack_filename(&filename, st, name) < 0) 196 try_unlinking = 0; 197 } 198 reftable_table_decref(st->tables[i]); 199 200 if (try_unlinking && filename.len) { 201 /* On Windows, can only unlink after closing. */ 202 unlink(filename.buf); 203 } 204 } 205 206 reftable_buf_release(&filename); 207 st->tables_len = 0; 208 REFTABLE_FREE_AND_NULL(st->tables); 209 } 210 211 if (st->list_fd >= 0) { 212 close(st->list_fd); 213 st->list_fd = -1; 214 } 215 216 REFTABLE_FREE_AND_NULL(st->list_file); 217 REFTABLE_FREE_AND_NULL(st->reftable_dir); 218 reftable_free(st); 219 free_names(names); 220} 221 222static struct reftable_table **stack_copy_tables(struct reftable_stack *st, 223 size_t cur_len) 224{ 225 struct reftable_table **cur = reftable_calloc(cur_len, sizeof(*cur)); 226 if (!cur) 227 return NULL; 228 for (size_t i = 0; i < cur_len; i++) 229 cur[i] = st->tables[i]; 230 return cur; 231} 232 233static int reftable_stack_reload_once(struct reftable_stack *st, 234 const char **names, 235 int reuse_open) 236{ 237 size_t cur_len = !st->merged ? 0 : st->merged->tables_len; 238 struct reftable_table **cur = NULL; 239 struct reftable_table **reused = NULL; 240 struct reftable_table **new_tables = NULL; 241 size_t reused_len = 0, reused_alloc = 0, names_len; 242 size_t new_tables_len = 0; 243 struct reftable_merged_table *new_merged = NULL; 244 struct reftable_buf table_path = REFTABLE_BUF_INIT; 245 int err = 0; 246 size_t i; 247 248 if (cur_len) { 249 cur = stack_copy_tables(st, cur_len); 250 if (!cur) { 251 err = REFTABLE_OUT_OF_MEMORY_ERROR; 252 goto done; 253 } 254 } 255 256 names_len = names_length(names); 257 258 if (names_len) { 259 new_tables = reftable_calloc(names_len, sizeof(*new_tables)); 260 if (!new_tables) { 261 err = REFTABLE_OUT_OF_MEMORY_ERROR; 262 goto done; 263 } 264 } 265 266 while (*names) { 267 struct reftable_table *table = NULL; 268 const char *name = *names++; 269 270 /* this is linear; we assume compaction keeps the number of 271 tables under control so this is not quadratic. */ 272 for (i = 0; reuse_open && i < cur_len; i++) { 273 if (cur[i] && 0 == strcmp(cur[i]->name, name)) { 274 table = cur[i]; 275 cur[i] = NULL; 276 277 /* 278 * When reloading the stack fails, we end up 279 * releasing all new tables. This also 280 * includes the reused tables, even though 281 * they are still in used by the old stack. We 282 * thus need to keep them alive here, which we 283 * do by bumping their refcount. 284 */ 285 REFTABLE_ALLOC_GROW_OR_NULL(reused, 286 reused_len + 1, 287 reused_alloc); 288 if (!reused) { 289 err = REFTABLE_OUT_OF_MEMORY_ERROR; 290 goto done; 291 } 292 reused[reused_len++] = table; 293 reftable_table_incref(table); 294 break; 295 } 296 } 297 298 if (!table) { 299 struct reftable_block_source src = { NULL }; 300 301 err = stack_filename(&table_path, st, name); 302 if (err < 0) 303 goto done; 304 305 err = reftable_block_source_from_file(&src, 306 table_path.buf); 307 if (err < 0) 308 goto done; 309 310 err = reftable_table_new(&table, &src, name); 311 if (err < 0) 312 goto done; 313 } 314 315 new_tables[new_tables_len] = table; 316 new_tables_len++; 317 } 318 319 /* success! */ 320 err = reftable_merged_table_new(&new_merged, new_tables, 321 new_tables_len, st->opts.hash_id); 322 if (err < 0) 323 goto done; 324 325 /* 326 * Close the old, non-reused tables and proactively try to unlink 327 * them. This is done for systems like Windows, where the underlying 328 * file of such an open table wouldn't have been possible to be 329 * unlinked by the compacting process. 330 */ 331 for (i = 0; i < cur_len; i++) { 332 if (cur[i]) { 333 const char *name = reftable_table_name(cur[i]); 334 335 err = stack_filename(&table_path, st, name); 336 if (err < 0) 337 goto done; 338 339 reftable_table_decref(cur[i]); 340 unlink(table_path.buf); 341 } 342 } 343 344 /* Update the stack to point to the new tables. */ 345 if (st->merged) 346 reftable_merged_table_free(st->merged); 347 new_merged->suppress_deletions = 1; 348 st->merged = new_merged; 349 350 if (st->tables) 351 reftable_free(st->tables); 352 st->tables = new_tables; 353 st->tables_len = new_tables_len; 354 new_tables = NULL; 355 new_tables_len = 0; 356 357 /* 358 * Decrement the refcount of reused tables again. This only needs to 359 * happen on the successful case, because on the unsuccessful one we 360 * decrement their refcount via `new_tables`. 361 */ 362 for (i = 0; i < reused_len; i++) 363 reftable_table_decref(reused[i]); 364 365done: 366 for (i = 0; i < new_tables_len; i++) 367 reftable_table_decref(new_tables[i]); 368 reftable_free(new_tables); 369 reftable_free(reused); 370 reftable_free(cur); 371 reftable_buf_release(&table_path); 372 return err; 373} 374 375/* return negative if a before b. */ 376static int tv_cmp(struct timeval *a, struct timeval *b) 377{ 378 time_t diff = a->tv_sec - b->tv_sec; 379 int udiff = a->tv_usec - b->tv_usec; 380 381 if (diff != 0) 382 return diff; 383 384 return udiff; 385} 386 387static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, 388 int reuse_open) 389{ 390 char **names = NULL, **names_after = NULL; 391 struct timeval deadline; 392 int64_t delay = 0; 393 int tries = 0, err; 394 int fd = -1; 395 396 err = gettimeofday(&deadline, NULL); 397 if (err < 0) 398 goto out; 399 deadline.tv_sec += 3; 400 401 while (1) { 402 struct timeval now; 403 404 err = gettimeofday(&now, NULL); 405 if (err < 0) 406 goto out; 407 408 /* 409 * Only look at deadlines after the first few times. This 410 * simplifies debugging in GDB. 411 */ 412 tries++; 413 if (tries > 3 && tv_cmp(&now, &deadline) >= 0) 414 goto out; 415 416 fd = open(st->list_file, O_RDONLY); 417 if (fd < 0) { 418 if (errno != ENOENT) { 419 err = REFTABLE_IO_ERROR; 420 goto out; 421 } 422 423 REFTABLE_CALLOC_ARRAY(names, 1); 424 if (!names) { 425 err = REFTABLE_OUT_OF_MEMORY_ERROR; 426 goto out; 427 } 428 } else { 429 err = fd_read_lines(fd, &names); 430 if (err < 0) 431 goto out; 432 } 433 434 err = reftable_stack_reload_once(st, (const char **) names, reuse_open); 435 if (!err) 436 break; 437 if (err != REFTABLE_NOT_EXIST_ERROR) 438 goto out; 439 440 /* 441 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent 442 * writer. Check if there was one by checking if the name list 443 * changed. 444 */ 445 err = read_lines(st->list_file, &names_after); 446 if (err < 0) 447 goto out; 448 if (names_equal((const char **) names_after, 449 (const char **) names)) { 450 err = REFTABLE_NOT_EXIST_ERROR; 451 goto out; 452 } 453 454 free_names(names); 455 names = NULL; 456 free_names(names_after); 457 names_after = NULL; 458 close(fd); 459 fd = -1; 460 461 delay = delay + (delay * reftable_rand()) / UINT32_MAX + 1; 462 poll(NULL, 0, delay); 463 } 464 465out: 466 /* 467 * Invalidate the stat cache. It is sufficient to only close the file 468 * descriptor and keep the cached stat info because we never use the 469 * latter when the former is negative. 470 */ 471 if (st->list_fd >= 0) { 472 close(st->list_fd); 473 st->list_fd = -1; 474 } 475 476 /* 477 * Cache stat information in case it provides a useful signal to us. 478 * According to POSIX, "The st_ino and st_dev fields taken together 479 * uniquely identify the file within the system." That being said, 480 * Windows is not POSIX compliant and we do not have these fields 481 * available. So the information we have there is insufficient to 482 * determine whether two file descriptors point to the same file. 483 * 484 * While we could fall back to using other signals like the file's 485 * mtime, those are not sufficient to avoid races. We thus refrain from 486 * using the stat cache on such systems and fall back to the secondary 487 * caching mechanism, which is to check whether contents of the file 488 * have changed. 489 * 490 * On other systems which are POSIX compliant we must keep the file 491 * descriptor open. This is to avoid a race condition where two 492 * processes access the reftable stack at the same point in time: 493 * 494 * 1. A reads the reftable stack and caches its stat info. 495 * 496 * 2. B updates the stack, appending a new table to "tables.list". 497 * This will both use a new inode and result in a different file 498 * size, thus invalidating A's cache in theory. 499 * 500 * 3. B decides to auto-compact the stack and merges two tables. The 501 * file size now matches what A has cached again. Furthermore, the 502 * filesystem may decide to recycle the inode number of the file 503 * we have replaced in (2) because it is not in use anymore. 504 * 505 * 4. A reloads the reftable stack. Neither the inode number nor the 506 * file size changed. If the timestamps did not change either then 507 * we think the cached copy of our stack is up-to-date. 508 * 509 * By keeping the file descriptor open the inode number cannot be 510 * recycled, mitigating the race. 511 */ 512 if (!err && fd >= 0 && !fstat(fd, &st->list_st) && 513 st->list_st.st_dev && st->list_st.st_ino) { 514 st->list_fd = fd; 515 fd = -1; 516 } 517 518 if (fd >= 0) 519 close(fd); 520 free_names(names); 521 free_names(names_after); 522 523 if (st->opts.on_reload) 524 st->opts.on_reload(st->opts.on_reload_payload); 525 526 return err; 527} 528 529int reftable_new_stack(struct reftable_stack **dest, const char *dir, 530 const struct reftable_write_options *_opts) 531{ 532 struct reftable_buf list_file_name = REFTABLE_BUF_INIT; 533 struct reftable_write_options opts = { 0 }; 534 struct reftable_stack *p; 535 int err; 536 537 p = reftable_calloc(1, sizeof(*p)); 538 if (!p) { 539 err = REFTABLE_OUT_OF_MEMORY_ERROR; 540 goto out; 541 } 542 543 if (_opts) 544 opts = *_opts; 545 if (opts.hash_id == 0) 546 opts.hash_id = REFTABLE_HASH_SHA1; 547 548 *dest = NULL; 549 550 reftable_buf_reset(&list_file_name); 551 if ((err = reftable_buf_addstr(&list_file_name, dir)) < 0 || 552 (err = reftable_buf_addstr(&list_file_name, "/tables.list")) < 0) 553 goto out; 554 555 p->list_file = reftable_buf_detach(&list_file_name); 556 p->list_fd = -1; 557 p->opts = opts; 558 p->reftable_dir = reftable_strdup(dir); 559 if (!p->reftable_dir) { 560 err = REFTABLE_OUT_OF_MEMORY_ERROR; 561 goto out; 562 } 563 564 err = reftable_stack_reload_maybe_reuse(p, 1); 565 if (err < 0) 566 goto out; 567 568 *dest = p; 569 err = 0; 570 571out: 572 if (err < 0) 573 reftable_stack_destroy(p); 574 return err; 575} 576 577/* 578 * Check whether the given stack is up-to-date with what we have in memory. 579 * Returns 0 if so, 1 if the stack is out-of-date or a negative error code 580 * otherwise. 581 */ 582static int stack_uptodate(struct reftable_stack *st) 583{ 584 char **names = NULL; 585 int err; 586 587 /* 588 * When we have cached stat information available then we use it to 589 * verify whether the file has been rewritten. 590 * 591 * Note that we explicitly do not want to use `stat_validity_check()` 592 * and friends here because they may end up not comparing the `st_dev` 593 * and `st_ino` fields. These functions thus cannot guarantee that we 594 * indeed still have the same file. 595 */ 596 if (st->list_fd >= 0) { 597 struct stat list_st; 598 599 if (stat(st->list_file, &list_st) < 0) { 600 /* 601 * It's fine for "tables.list" to not exist. In that 602 * case, we have to refresh when the loaded stack has 603 * any tables. 604 */ 605 if (errno == ENOENT) 606 return !!st->tables_len; 607 return REFTABLE_IO_ERROR; 608 } 609 610 /* 611 * When "tables.list" refers to the same file we can assume 612 * that it didn't change. This is because we always use 613 * rename(3P) to update the file and never write to it 614 * directly. 615 */ 616 if (st->list_st.st_dev == list_st.st_dev && 617 st->list_st.st_ino == list_st.st_ino) 618 return 0; 619 } 620 621 err = read_lines(st->list_file, &names); 622 if (err < 0) 623 return err; 624 625 for (size_t i = 0; i < st->tables_len; i++) { 626 if (!names[i]) { 627 err = 1; 628 goto done; 629 } 630 631 if (strcmp(st->tables[i]->name, names[i])) { 632 err = 1; 633 goto done; 634 } 635 } 636 637 if (names[st->merged->tables_len]) { 638 err = 1; 639 goto done; 640 } 641 642done: 643 free_names(names); 644 return err; 645} 646 647int reftable_stack_reload(struct reftable_stack *st) 648{ 649 int err = stack_uptodate(st); 650 if (err > 0) 651 return reftable_stack_reload_maybe_reuse(st, 1); 652 return err; 653} 654 655struct reftable_addition { 656 struct reftable_flock tables_list_lock; 657 struct reftable_stack *stack; 658 659 char **new_tables; 660 size_t new_tables_len, new_tables_cap; 661 uint64_t next_update_index; 662}; 663 664static void reftable_addition_close(struct reftable_addition *add) 665{ 666 struct reftable_buf nm = REFTABLE_BUF_INIT; 667 size_t i; 668 669 for (i = 0; i < add->new_tables_len; i++) { 670 if (!stack_filename(&nm, add->stack, add->new_tables[i])) 671 unlink(nm.buf); 672 reftable_free(add->new_tables[i]); 673 add->new_tables[i] = NULL; 674 } 675 reftable_free(add->new_tables); 676 add->new_tables = NULL; 677 add->new_tables_len = 0; 678 add->new_tables_cap = 0; 679 680 flock_release(&add->tables_list_lock); 681 reftable_buf_release(&nm); 682} 683 684static int reftable_stack_init_addition(struct reftable_addition *add, 685 struct reftable_stack *st, 686 unsigned int flags) 687{ 688 struct reftable_buf lock_file_name = REFTABLE_BUF_INIT; 689 int err; 690 691 memset(add, 0, sizeof(*add)); 692 add->stack = st; 693 694 err = flock_acquire(&add->tables_list_lock, st->list_file, 695 st->opts.lock_timeout_ms); 696 if (err < 0) 697 goto done; 698 699 if (st->opts.default_permissions) { 700 if (chmod(add->tables_list_lock.path, 701 st->opts.default_permissions) < 0) { 702 err = REFTABLE_IO_ERROR; 703 goto done; 704 } 705 } 706 707 err = stack_uptodate(st); 708 if (err < 0) 709 goto done; 710 if (err > 0 && flags & REFTABLE_STACK_NEW_ADDITION_RELOAD) { 711 err = reftable_stack_reload_maybe_reuse(add->stack, 1); 712 if (err) 713 goto done; 714 } 715 if (err > 0) { 716 err = REFTABLE_OUTDATED_ERROR; 717 goto done; 718 } 719 720 add->next_update_index = reftable_stack_next_update_index(st); 721done: 722 if (err) 723 reftable_addition_close(add); 724 reftable_buf_release(&lock_file_name); 725 return err; 726} 727 728static int stack_try_add(struct reftable_stack *st, 729 int (*write_table)(struct reftable_writer *wr, 730 void *arg), 731 void *arg, unsigned flags) 732{ 733 struct reftable_addition add; 734 int err; 735 736 err = reftable_stack_init_addition(&add, st, flags); 737 if (err < 0) 738 goto done; 739 740 err = reftable_addition_add(&add, write_table, arg); 741 if (err < 0) 742 goto done; 743 744 err = reftable_addition_commit(&add); 745done: 746 reftable_addition_close(&add); 747 return err; 748} 749 750int reftable_stack_add(struct reftable_stack *st, 751 int (*write)(struct reftable_writer *wr, void *arg), 752 void *arg, unsigned flags) 753{ 754 int err = stack_try_add(st, write, arg, flags); 755 if (err < 0) { 756 if (err == REFTABLE_OUTDATED_ERROR) { 757 /* Ignore error return, we want to propagate 758 REFTABLE_OUTDATED_ERROR. 759 */ 760 reftable_stack_reload(st); 761 } 762 return err; 763 } 764 765 return 0; 766} 767 768static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max) 769{ 770 char buf[100]; 771 uint32_t rnd = reftable_rand(); 772 snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", 773 min, max, rnd); 774 reftable_buf_reset(dest); 775 return reftable_buf_addstr(dest, buf); 776} 777 778void reftable_addition_destroy(struct reftable_addition *add) 779{ 780 if (!add) { 781 return; 782 } 783 reftable_addition_close(add); 784 reftable_free(add); 785} 786 787int reftable_addition_commit(struct reftable_addition *add) 788{ 789 struct reftable_buf table_list = REFTABLE_BUF_INIT; 790 int err = 0; 791 size_t i; 792 793 if (add->new_tables_len == 0) 794 goto done; 795 796 for (i = 0; i < add->stack->merged->tables_len; i++) { 797 if ((err = reftable_buf_addstr(&table_list, add->stack->tables[i]->name)) < 0 || 798 (err = reftable_buf_addstr(&table_list, "\n")) < 0) 799 goto done; 800 } 801 for (i = 0; i < add->new_tables_len; i++) { 802 if ((err = reftable_buf_addstr(&table_list, add->new_tables[i])) < 0 || 803 (err = reftable_buf_addstr(&table_list, "\n")) < 0) 804 goto done; 805 } 806 807 err = reftable_write_data(add->tables_list_lock.fd, 808 table_list.buf, table_list.len); 809 reftable_buf_release(&table_list); 810 if (err < 0) { 811 err = REFTABLE_IO_ERROR; 812 goto done; 813 } 814 815 err = stack_fsync(&add->stack->opts, add->tables_list_lock.fd); 816 if (err < 0) { 817 err = REFTABLE_IO_ERROR; 818 goto done; 819 } 820 821 err = flock_commit(&add->tables_list_lock); 822 if (err < 0) { 823 err = REFTABLE_IO_ERROR; 824 goto done; 825 } 826 827 /* success, no more state to clean up. */ 828 for (i = 0; i < add->new_tables_len; i++) 829 reftable_free(add->new_tables[i]); 830 reftable_free(add->new_tables); 831 add->new_tables = NULL; 832 add->new_tables_len = 0; 833 add->new_tables_cap = 0; 834 835 err = reftable_stack_reload_maybe_reuse(add->stack, 1); 836 if (err) 837 goto done; 838 839 if (!add->stack->opts.disable_auto_compact) { 840 /* 841 * Auto-compact the stack to keep the number of tables in 842 * control. It is possible that a concurrent writer is already 843 * trying to compact parts of the stack, which would lead to a 844 * `REFTABLE_LOCK_ERROR` because parts of the stack are locked 845 * already. Similarly, the stack may have been rewritten by a 846 * concurrent writer, which causes `REFTABLE_OUTDATED_ERROR`. 847 * Both of these errors are benign, so we simply ignore them. 848 */ 849 err = reftable_stack_auto_compact(add->stack); 850 if (err < 0 && err != REFTABLE_LOCK_ERROR && 851 err != REFTABLE_OUTDATED_ERROR) 852 goto done; 853 err = 0; 854 } 855 856done: 857 reftable_addition_close(add); 858 return err; 859} 860 861int reftable_stack_new_addition(struct reftable_addition **dest, 862 struct reftable_stack *st, 863 unsigned int flags) 864{ 865 int err; 866 867 REFTABLE_CALLOC_ARRAY(*dest, 1); 868 if (!*dest) 869 return REFTABLE_OUT_OF_MEMORY_ERROR; 870 871 err = reftable_stack_init_addition(*dest, st, flags); 872 if (err) { 873 reftable_free(*dest); 874 *dest = NULL; 875 } 876 877 return err; 878} 879 880int reftable_addition_add(struct reftable_addition *add, 881 int (*write_table)(struct reftable_writer *wr, 882 void *arg), 883 void *arg) 884{ 885 struct reftable_buf temp_tab_file_name = REFTABLE_BUF_INIT; 886 struct reftable_buf tab_file_name = REFTABLE_BUF_INIT; 887 struct reftable_buf next_name = REFTABLE_BUF_INIT; 888 struct reftable_writer *wr = NULL; 889 struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT; 890 struct fd_writer writer = { 891 .opts = &add->stack->opts, 892 }; 893 int err = 0; 894 895 reftable_buf_reset(&next_name); 896 897 err = format_name(&next_name, add->next_update_index, add->next_update_index); 898 if (err < 0) 899 goto done; 900 901 err = stack_filename(&temp_tab_file_name, add->stack, next_name.buf); 902 if (err < 0) 903 goto done; 904 905 err = reftable_buf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); 906 if (err < 0) 907 goto done; 908 909 err = tmpfile_from_pattern(&tab_file, temp_tab_file_name.buf); 910 if (err < 0) 911 goto done; 912 if (add->stack->opts.default_permissions) { 913 if (chmod(tab_file.path, 914 add->stack->opts.default_permissions)) { 915 err = REFTABLE_IO_ERROR; 916 goto done; 917 } 918 } 919 920 writer.fd = tab_file.fd; 921 err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush, 922 &writer, &add->stack->opts); 923 if (err < 0) 924 goto done; 925 926 err = write_table(wr, arg); 927 if (err < 0) 928 goto done; 929 930 err = reftable_writer_close(wr); 931 if (err == REFTABLE_EMPTY_TABLE_ERROR) { 932 err = 0; 933 goto done; 934 } 935 if (err < 0) 936 goto done; 937 938 err = tmpfile_close(&tab_file); 939 if (err < 0) 940 goto done; 941 942 if (wr->min_update_index < add->next_update_index) { 943 err = REFTABLE_API_ERROR; 944 goto done; 945 } 946 947 err = format_name(&next_name, wr->min_update_index, wr->max_update_index); 948 if (err < 0) 949 goto done; 950 951 err = reftable_buf_addstr(&next_name, ".ref"); 952 if (err < 0) 953 goto done; 954 955 err = stack_filename(&tab_file_name, add->stack, next_name.buf); 956 if (err < 0) 957 goto done; 958 959 /* 960 On windows, this relies on rand() picking a unique destination name. 961 Maybe we should do retry loop as well? 962 */ 963 err = tmpfile_rename(&tab_file, tab_file_name.buf); 964 if (err < 0) 965 goto done; 966 967 REFTABLE_ALLOC_GROW_OR_NULL(add->new_tables, add->new_tables_len + 1, 968 add->new_tables_cap); 969 if (!add->new_tables) { 970 err = REFTABLE_OUT_OF_MEMORY_ERROR; 971 goto done; 972 } 973 add->new_tables[add->new_tables_len++] = reftable_buf_detach(&next_name); 974 975done: 976 tmpfile_delete(&tab_file); 977 reftable_buf_release(&temp_tab_file_name); 978 reftable_buf_release(&tab_file_name); 979 reftable_buf_release(&next_name); 980 reftable_writer_free(wr); 981 return err; 982} 983 984uint64_t reftable_stack_next_update_index(struct reftable_stack *st) 985{ 986 int sz = st->merged->tables_len; 987 if (sz > 0) 988 return reftable_table_max_update_index(st->tables[sz - 1]) + 989 1; 990 return 1; 991} 992 993static int stack_write_compact(struct reftable_stack *st, 994 struct reftable_writer *wr, 995 size_t first, size_t last, 996 struct reftable_log_expiry_config *config) 997{ 998 struct reftable_merged_table *mt = NULL; 999 struct reftable_iterator it = { NULL }; 1000 struct reftable_ref_record ref = { NULL }; 1001 struct reftable_log_record log = { NULL }; 1002 size_t subtabs_len = last - first + 1; 1003 uint64_t entries = 0; 1004 int err = 0; 1005 1006 for (size_t i = first; i <= last; i++) 1007 st->stats.bytes += st->tables[i]->size; 1008 err = reftable_writer_set_limits(wr, st->tables[first]->min_update_index, 1009 st->tables[last]->max_update_index); 1010 if (err < 0) 1011 goto done; 1012 1013 err = reftable_merged_table_new(&mt, st->tables + first, subtabs_len, 1014 st->opts.hash_id); 1015 if (err < 0) 1016 goto done; 1017 1018 err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_REF); 1019 if (err < 0) 1020 goto done; 1021 1022 err = reftable_iterator_seek_ref(&it, ""); 1023 if (err < 0) 1024 goto done; 1025 1026 while (1) { 1027 err = reftable_iterator_next_ref(&it, &ref); 1028 if (err > 0) { 1029 err = 0; 1030 break; 1031 } 1032 if (err < 0) 1033 goto done; 1034 1035 if (first == 0 && reftable_ref_record_is_deletion(&ref)) { 1036 continue; 1037 } 1038 1039 err = reftable_writer_add_ref(wr, &ref); 1040 if (err < 0) 1041 goto done; 1042 entries++; 1043 } 1044 reftable_iterator_destroy(&it); 1045 1046 err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_LOG); 1047 if (err < 0) 1048 goto done; 1049 1050 err = reftable_iterator_seek_log(&it, ""); 1051 if (err < 0) 1052 goto done; 1053 1054 while (1) { 1055 err = reftable_iterator_next_log(&it, &log); 1056 if (err > 0) { 1057 err = 0; 1058 break; 1059 } 1060 if (err < 0) 1061 goto done; 1062 if (first == 0 && reftable_log_record_is_deletion(&log)) { 1063 continue; 1064 } 1065 1066 if (config && config->min_update_index > 0 && 1067 log.update_index < config->min_update_index) { 1068 continue; 1069 } 1070 1071 if (config && config->time > 0 && 1072 log.value.update.time < config->time) { 1073 continue; 1074 } 1075 1076 err = reftable_writer_add_log(wr, &log); 1077 if (err < 0) 1078 goto done; 1079 entries++; 1080 } 1081 1082done: 1083 reftable_iterator_destroy(&it); 1084 if (mt) 1085 reftable_merged_table_free(mt); 1086 reftable_ref_record_release(&ref); 1087 reftable_log_record_release(&log); 1088 st->stats.entries_written += entries; 1089 return err; 1090} 1091 1092static int stack_compact_locked(struct reftable_stack *st, 1093 size_t first, size_t last, 1094 struct reftable_log_expiry_config *config, 1095 struct reftable_tmpfile *tab_file_out) 1096{ 1097 struct reftable_buf next_name = REFTABLE_BUF_INIT; 1098 struct reftable_buf tab_file_path = REFTABLE_BUF_INIT; 1099 struct reftable_writer *wr = NULL; 1100 struct fd_writer writer= { 1101 .opts = &st->opts, 1102 }; 1103 struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT; 1104 int err = 0; 1105 1106 err = format_name(&next_name, reftable_table_min_update_index(st->tables[first]), 1107 reftable_table_max_update_index(st->tables[last])); 1108 if (err < 0) 1109 goto done; 1110 1111 err = stack_filename(&tab_file_path, st, next_name.buf); 1112 if (err < 0) 1113 goto done; 1114 1115 err = reftable_buf_addstr(&tab_file_path, ".temp.XXXXXX"); 1116 if (err < 0) 1117 goto done; 1118 1119 err = tmpfile_from_pattern(&tab_file, tab_file_path.buf); 1120 if (err < 0) 1121 goto done; 1122 1123 if (st->opts.default_permissions && 1124 chmod(tab_file.path, st->opts.default_permissions) < 0) { 1125 err = REFTABLE_IO_ERROR; 1126 goto done; 1127 } 1128 1129 writer.fd = tab_file.fd; 1130 err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush, 1131 &writer, &st->opts); 1132 if (err < 0) 1133 goto done; 1134 1135 err = stack_write_compact(st, wr, first, last, config); 1136 if (err < 0) 1137 goto done; 1138 1139 err = reftable_writer_close(wr); 1140 if (err < 0) 1141 goto done; 1142 1143 err = tmpfile_close(&tab_file); 1144 if (err < 0) 1145 goto done; 1146 1147 *tab_file_out = tab_file; 1148 tab_file = REFTABLE_TMPFILE_INIT; 1149 1150done: 1151 tmpfile_delete(&tab_file); 1152 reftable_writer_free(wr); 1153 reftable_buf_release(&next_name); 1154 reftable_buf_release(&tab_file_path); 1155 return err; 1156} 1157 1158enum stack_compact_range_flags { 1159 /* 1160 * Perform a best-effort compaction. That is, even if we cannot lock 1161 * all tables in the specified range, we will try to compact the 1162 * remaining slice. 1163 */ 1164 STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0), 1165}; 1166 1167/* 1168 * Compact all tables in the range `[first, last)` into a single new table. 1169 * 1170 * This function returns `0` on success or a code `< 0` on failure. When the 1171 * stack or any of the tables in the specified range are already locked then 1172 * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that 1173 * callers can either ignore, or they may choose to retry compaction after some 1174 * amount of time. 1175 */ 1176static int stack_compact_range(struct reftable_stack *st, 1177 size_t first, size_t last, 1178 struct reftable_log_expiry_config *expiry, 1179 unsigned int flags) 1180{ 1181 struct reftable_buf tables_list_buf = REFTABLE_BUF_INIT; 1182 struct reftable_buf new_table_name = REFTABLE_BUF_INIT; 1183 struct reftable_buf new_table_path = REFTABLE_BUF_INIT; 1184 struct reftable_buf table_name = REFTABLE_BUF_INIT; 1185 struct reftable_flock tables_list_lock = REFTABLE_FLOCK_INIT; 1186 struct reftable_flock *table_locks = NULL; 1187 struct reftable_tmpfile new_table = REFTABLE_TMPFILE_INIT; 1188 int is_empty_table = 0, err = 0; 1189 size_t first_to_replace, last_to_replace; 1190 size_t i, nlocks = 0; 1191 char **names = NULL; 1192 1193 if (first > last || (!expiry && first == last)) { 1194 err = 0; 1195 goto done; 1196 } 1197 1198 st->stats.attempts++; 1199 1200 /* 1201 * Hold the lock so that we can read "tables.list" and lock all tables 1202 * which are part of the user-specified range. 1203 */ 1204 err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms); 1205 if (err < 0) 1206 goto done; 1207 1208 /* 1209 * Check whether the stack is up-to-date. We unfortunately cannot 1210 * handle the situation gracefully in case it's _not_ up-to-date 1211 * because the range of tables that the user has requested us to 1212 * compact may have been changed. So instead we abort. 1213 * 1214 * We could in theory improve the situation by having the caller not 1215 * pass in a range, but instead the list of tables to compact. If so, 1216 * we could check that relevant tables still exist. But for now it's 1217 * good enough to just abort. 1218 */ 1219 err = stack_uptodate(st); 1220 if (err < 0) 1221 goto done; 1222 if (err > 0) { 1223 err = REFTABLE_OUTDATED_ERROR; 1224 goto done; 1225 } 1226 1227 /* 1228 * Lock all tables in the user-provided range. This is the slice of our 1229 * stack which we'll compact. 1230 * 1231 * Note that we lock tables in reverse order from last to first. The 1232 * intent behind this is to allow a newer process to perform best 1233 * effort compaction of tables that it has added in the case where an 1234 * older process is still busy compacting tables which are preexisting 1235 * from the point of view of the newer process. 1236 */ 1237 REFTABLE_ALLOC_ARRAY(table_locks, last - first + 1); 1238 if (!table_locks) { 1239 err = REFTABLE_OUT_OF_MEMORY_ERROR; 1240 goto done; 1241 } 1242 for (i = 0; i < last - first + 1; i++) 1243 table_locks[i] = REFTABLE_FLOCK_INIT; 1244 1245 for (i = last + 1; i > first; i--) { 1246 err = stack_filename(&table_name, st, reftable_table_name(st->tables[i - 1])); 1247 if (err < 0) 1248 goto done; 1249 1250 err = flock_acquire(&table_locks[nlocks], table_name.buf, 0); 1251 if (err < 0) { 1252 /* 1253 * When the table is locked already we may do a 1254 * best-effort compaction and compact only the tables 1255 * that we have managed to lock so far. This of course 1256 * requires that we have been able to lock at least two 1257 * tables, otherwise there would be nothing to compact. 1258 * In that case, we return a lock error to our caller. 1259 */ 1260 if (err == REFTABLE_LOCK_ERROR && last - (i - 1) >= 2 && 1261 flags & STACK_COMPACT_RANGE_BEST_EFFORT) { 1262 err = 0; 1263 /* 1264 * The subtraction is to offset the index, the 1265 * addition is to only compact up to the table 1266 * of the preceding iteration. They obviously 1267 * cancel each other out, but that may be 1268 * non-obvious when it was omitted. 1269 */ 1270 first = (i - 1) + 1; 1271 break; 1272 } 1273 1274 goto done; 1275 } 1276 1277 /* 1278 * We need to close the lockfiles as we might otherwise easily 1279 * run into file descriptor exhaustion when we compress a lot 1280 * of tables. 1281 */ 1282 err = flock_close(&table_locks[nlocks++]); 1283 if (err < 0) 1284 goto done; 1285 } 1286 1287 /* 1288 * We have locked all tables in our range and can thus release the 1289 * "tables.list" lock while compacting the locked tables. This allows 1290 * concurrent updates to the stack to proceed. 1291 */ 1292 err = flock_release(&tables_list_lock); 1293 if (err < 0) { 1294 err = REFTABLE_IO_ERROR; 1295 goto done; 1296 } 1297 1298 /* 1299 * Compact the now-locked tables into a new table. Note that compacting 1300 * these tables may end up with an empty new table in case tombstones 1301 * end up cancelling out all refs in that range. 1302 */ 1303 err = stack_compact_locked(st, first, last, expiry, &new_table); 1304 if (err < 0) { 1305 if (err != REFTABLE_EMPTY_TABLE_ERROR) 1306 goto done; 1307 is_empty_table = 1; 1308 } 1309 1310 /* 1311 * Now that we have written the new, compacted table we need to re-lock 1312 * "tables.list". We'll then replace the compacted range of tables with 1313 * the new table. 1314 */ 1315 err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms); 1316 if (err < 0) 1317 goto done; 1318 1319 if (st->opts.default_permissions) { 1320 if (chmod(tables_list_lock.path, 1321 st->opts.default_permissions) < 0) { 1322 err = REFTABLE_IO_ERROR; 1323 goto done; 1324 } 1325 } 1326 1327 /* 1328 * As we have unlocked the stack while compacting our slice of tables 1329 * it may have happened that a concurrently running process has updated 1330 * the stack while we were compacting. In that case, we need to check 1331 * whether the tables that we have just compacted still exist in the 1332 * stack in the exact same order as we have compacted them. 1333 * 1334 * If they do exist, then it is fine to continue and replace those 1335 * tables with our compacted version. If they don't, then we need to 1336 * abort. 1337 */ 1338 err = stack_uptodate(st); 1339 if (err < 0) 1340 goto done; 1341 if (err > 0) { 1342 ssize_t new_offset = -1; 1343 int fd; 1344 1345 fd = open(st->list_file, O_RDONLY); 1346 if (fd < 0) { 1347 err = REFTABLE_IO_ERROR; 1348 goto done; 1349 } 1350 1351 err = fd_read_lines(fd, &names); 1352 close(fd); 1353 if (err < 0) 1354 goto done; 1355 1356 /* 1357 * Search for the offset of the first table that we have 1358 * compacted in the updated "tables.list" file. 1359 */ 1360 for (size_t i = 0; names[i]; i++) { 1361 if (strcmp(names[i], st->tables[first]->name)) 1362 continue; 1363 1364 /* 1365 * We have found the first entry. Verify that all the 1366 * subsequent tables we have compacted still exist in 1367 * the modified stack in the exact same order as we 1368 * have compacted them. 1369 */ 1370 for (size_t j = 1; j < last - first + 1; j++) { 1371 const char *old = first + j < st->merged->tables_len ? 1372 st->tables[first + j]->name : NULL; 1373 const char *new = names[i + j]; 1374 1375 /* 1376 * If some entries are missing or in case the tables 1377 * have changed then we need to bail out. Again, this 1378 * shouldn't ever happen because we have locked the 1379 * tables we are compacting. 1380 */ 1381 if (!old || !new || strcmp(old, new)) { 1382 err = REFTABLE_OUTDATED_ERROR; 1383 goto done; 1384 } 1385 } 1386 1387 new_offset = i; 1388 break; 1389 } 1390 1391 /* 1392 * In case we didn't find our compacted tables in the stack we 1393 * need to bail out. In theory, this should have never happened 1394 * because we locked the tables we are compacting. 1395 */ 1396 if (new_offset < 0) { 1397 err = REFTABLE_OUTDATED_ERROR; 1398 goto done; 1399 } 1400 1401 /* 1402 * We have found the new range that we want to replace, so 1403 * let's update the range of tables that we want to replace. 1404 */ 1405 first_to_replace = new_offset; 1406 last_to_replace = last + (new_offset - first); 1407 } else { 1408 /* 1409 * `fd_read_lines()` uses a `NULL` sentinel to indicate that 1410 * the array is at its end. As we use `free_names()` to free 1411 * the array, we need to include this sentinel value here and 1412 * thus have to allocate `tables_len + 1` many entries. 1413 */ 1414 REFTABLE_CALLOC_ARRAY(names, st->merged->tables_len + 1); 1415 if (!names) { 1416 err = REFTABLE_OUT_OF_MEMORY_ERROR; 1417 goto done; 1418 } 1419 1420 for (size_t i = 0; i < st->merged->tables_len; i++) { 1421 names[i] = reftable_strdup(st->tables[i]->name); 1422 if (!names[i]) { 1423 err = REFTABLE_OUT_OF_MEMORY_ERROR; 1424 goto done; 1425 } 1426 } 1427 first_to_replace = first; 1428 last_to_replace = last; 1429 } 1430 1431 /* 1432 * If the resulting compacted table is not empty, then we need to move 1433 * it into place now. 1434 */ 1435 if (!is_empty_table) { 1436 err = format_name(&new_table_name, st->tables[first]->min_update_index, 1437 st->tables[last]->max_update_index); 1438 if (err < 0) 1439 goto done; 1440 1441 err = reftable_buf_addstr(&new_table_name, ".ref"); 1442 if (err < 0) 1443 goto done; 1444 1445 err = stack_filename(&new_table_path, st, new_table_name.buf); 1446 if (err < 0) 1447 goto done; 1448 1449 err = tmpfile_rename(&new_table, new_table_path.buf); 1450 if (err < 0) 1451 goto done; 1452 } 1453 1454 /* 1455 * Write the new "tables.list" contents with the compacted table we 1456 * have just written. In case the compacted table became empty we 1457 * simply skip writing it. 1458 */ 1459 for (i = 0; i < first_to_replace; i++) { 1460 if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 || 1461 (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) 1462 goto done; 1463 } 1464 if (!is_empty_table) { 1465 if ((err = reftable_buf_addstr(&tables_list_buf, new_table_name.buf)) < 0 || 1466 (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) 1467 goto done; 1468 } 1469 for (i = last_to_replace + 1; names[i]; i++) { 1470 if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 || 1471 (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) 1472 goto done; 1473 } 1474 1475 err = reftable_write_data(tables_list_lock.fd, 1476 tables_list_buf.buf, tables_list_buf.len); 1477 if (err < 0) { 1478 err = REFTABLE_IO_ERROR; 1479 unlink(new_table_path.buf); 1480 goto done; 1481 } 1482 1483 err = stack_fsync(&st->opts, tables_list_lock.fd); 1484 if (err < 0) { 1485 err = REFTABLE_IO_ERROR; 1486 unlink(new_table_path.buf); 1487 goto done; 1488 } 1489 1490 err = flock_commit(&tables_list_lock); 1491 if (err < 0) { 1492 err = REFTABLE_IO_ERROR; 1493 unlink(new_table_path.buf); 1494 goto done; 1495 } 1496 1497 /* 1498 * Reload the stack before deleting the compacted tables. We can only 1499 * delete the files after we closed them on Windows, so this needs to 1500 * happen first. 1501 */ 1502 err = reftable_stack_reload_maybe_reuse(st, first < last); 1503 if (err < 0) 1504 goto done; 1505 1506 /* 1507 * Delete the old tables. They may still be in use by concurrent 1508 * readers, so it is expected that unlinking tables may fail. 1509 */ 1510 for (i = 0; i < nlocks; i++) { 1511 struct reftable_flock *table_lock = &table_locks[i]; 1512 1513 reftable_buf_reset(&table_name); 1514 err = reftable_buf_add(&table_name, table_lock->path, 1515 strlen(table_lock->path) - strlen(".lock")); 1516 if (err) 1517 continue; 1518 1519 unlink(table_name.buf); 1520 } 1521 1522done: 1523 flock_release(&tables_list_lock); 1524 for (i = 0; table_locks && i < nlocks; i++) 1525 flock_release(&table_locks[i]); 1526 reftable_free(table_locks); 1527 1528 tmpfile_delete(&new_table); 1529 reftable_buf_release(&new_table_name); 1530 reftable_buf_release(&new_table_path); 1531 reftable_buf_release(&tables_list_buf); 1532 reftable_buf_release(&table_name); 1533 free_names(names); 1534 1535 if (err == REFTABLE_LOCK_ERROR) 1536 st->stats.failures++; 1537 1538 return err; 1539} 1540 1541int reftable_stack_compact_all(struct reftable_stack *st, 1542 struct reftable_log_expiry_config *config) 1543{ 1544 size_t last = st->merged->tables_len ? st->merged->tables_len - 1 : 0; 1545 return stack_compact_range(st, 0, last, config, 0); 1546} 1547 1548static int segment_size(struct segment *s) 1549{ 1550 return s->end - s->start; 1551} 1552 1553struct segment suggest_compaction_segment(uint64_t *sizes, size_t n, 1554 uint8_t factor) 1555{ 1556 struct segment seg = { 0 }; 1557 uint64_t bytes; 1558 size_t i; 1559 1560 if (!factor) 1561 factor = DEFAULT_GEOMETRIC_FACTOR; 1562 1563 /* 1564 * If there are no tables or only a single one then we don't have to 1565 * compact anything. The sequence is geometric by definition already. 1566 */ 1567 if (n <= 1) 1568 return seg; 1569 1570 /* 1571 * Find the ending table of the compaction segment needed to restore the 1572 * geometric sequence. Note that the segment end is exclusive. 1573 * 1574 * To do so, we iterate backwards starting from the most recent table 1575 * until a valid segment end is found. If the preceding table is smaller 1576 * than the current table multiplied by the geometric factor (2), the 1577 * compaction segment end has been identified. 1578 * 1579 * Tables after the ending point are not added to the byte count because 1580 * they are already valid members of the geometric sequence. Due to the 1581 * properties of a geometric sequence, it is not possible for the sum of 1582 * these tables to exceed the value of the ending point table. 1583 * 1584 * Example table size sequence requiring no compaction: 1585 * 64, 32, 16, 8, 4, 2, 1 1586 * 1587 * Example table size sequence where compaction segment end is set to 1588 * the last table. Since the segment end is exclusive, the last table is 1589 * excluded during subsequent compaction and the table with size 3 is 1590 * the final table included: 1591 * 64, 32, 16, 8, 4, 3, 1 1592 */ 1593 for (i = n - 1; i > 0; i--) { 1594 if (sizes[i - 1] < sizes[i] * factor) { 1595 seg.end = i + 1; 1596 bytes = sizes[i]; 1597 break; 1598 } 1599 } 1600 1601 /* 1602 * Find the starting table of the compaction segment by iterating 1603 * through the remaining tables and keeping track of the accumulated 1604 * size of all tables seen from the segment end table. The previous 1605 * table is compared to the accumulated size because the tables from the 1606 * segment end are merged backwards recursively. 1607 * 1608 * Note that we keep iterating even after we have found the first 1609 * starting point. This is because there may be tables in the stack 1610 * preceding that first starting point which violate the geometric 1611 * sequence. 1612 * 1613 * Example compaction segment start set to table with size 32: 1614 * 128, 32, 16, 8, 4, 3, 1 1615 */ 1616 for (; i > 0; i--) { 1617 uint64_t curr = bytes; 1618 bytes += sizes[i - 1]; 1619 1620 if (sizes[i - 1] < curr * factor) { 1621 seg.start = i - 1; 1622 seg.bytes = bytes; 1623 } 1624 } 1625 1626 return seg; 1627} 1628 1629static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) 1630{ 1631 int version = (st->opts.hash_id == REFTABLE_HASH_SHA1) ? 1 : 2; 1632 int overhead = header_size(version) - 1; 1633 uint64_t *sizes; 1634 1635 REFTABLE_CALLOC_ARRAY(sizes, st->merged->tables_len); 1636 if (!sizes) 1637 return NULL; 1638 1639 for (size_t i = 0; i < st->merged->tables_len; i++) 1640 sizes[i] = st->tables[i]->size - overhead; 1641 1642 return sizes; 1643} 1644 1645int reftable_stack_auto_compact(struct reftable_stack *st) 1646{ 1647 struct segment seg; 1648 uint64_t *sizes; 1649 1650 if (st->merged->tables_len < 2) 1651 return 0; 1652 1653 sizes = stack_table_sizes_for_compaction(st); 1654 if (!sizes) 1655 return REFTABLE_OUT_OF_MEMORY_ERROR; 1656 1657 seg = suggest_compaction_segment(sizes, st->merged->tables_len, 1658 st->opts.auto_compaction_factor); 1659 reftable_free(sizes); 1660 1661 if (segment_size(&seg) > 0) 1662 return stack_compact_range(st, seg.start, seg.end - 1, 1663 NULL, STACK_COMPACT_RANGE_BEST_EFFORT); 1664 1665 return 0; 1666} 1667 1668struct reftable_compaction_stats * 1669reftable_stack_compaction_stats(struct reftable_stack *st) 1670{ 1671 return &st->stats; 1672} 1673 1674int reftable_stack_read_ref(struct reftable_stack *st, const char *refname, 1675 struct reftable_ref_record *ref) 1676{ 1677 struct reftable_iterator it = { 0 }; 1678 int ret; 1679 1680 ret = reftable_merged_table_init_ref_iterator(st->merged, &it); 1681 if (ret) 1682 goto out; 1683 1684 ret = reftable_iterator_seek_ref(&it, refname); 1685 if (ret) 1686 goto out; 1687 1688 ret = reftable_iterator_next_ref(&it, ref); 1689 if (ret) 1690 goto out; 1691 1692 if (strcmp(ref->refname, refname) || 1693 reftable_ref_record_is_deletion(ref)) { 1694 reftable_ref_record_release(ref); 1695 ret = 1; 1696 goto out; 1697 } 1698 1699out: 1700 reftable_iterator_destroy(&it); 1701 return ret; 1702} 1703 1704int reftable_stack_read_log(struct reftable_stack *st, const char *refname, 1705 struct reftable_log_record *log) 1706{ 1707 struct reftable_iterator it = {0}; 1708 int err; 1709 1710 err = reftable_stack_init_log_iterator(st, &it); 1711 if (err) 1712 goto done; 1713 1714 err = reftable_iterator_seek_log(&it, refname); 1715 if (err) 1716 goto done; 1717 1718 err = reftable_iterator_next_log(&it, log); 1719 if (err) 1720 goto done; 1721 1722 if (strcmp(log->refname, refname) || 1723 reftable_log_record_is_deletion(log)) { 1724 err = 1; 1725 goto done; 1726 } 1727 1728done: 1729 if (err) { 1730 reftable_log_record_release(log); 1731 } 1732 reftable_iterator_destroy(&it); 1733 return err; 1734} 1735 1736static int is_table_name(const char *s) 1737{ 1738 const char *dot = strrchr(s, '.'); 1739 return dot && !strcmp(dot, ".ref"); 1740} 1741 1742static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max, 1743 const char *name) 1744{ 1745 int err = 0; 1746 uint64_t update_idx = 0; 1747 struct reftable_block_source src = { NULL }; 1748 struct reftable_table *table = NULL; 1749 struct reftable_buf table_path = REFTABLE_BUF_INIT; 1750 1751 err = stack_filename(&table_path, st, name); 1752 if (err < 0) 1753 goto done; 1754 1755 err = reftable_block_source_from_file(&src, table_path.buf); 1756 if (err < 0) 1757 goto done; 1758 1759 err = reftable_table_new(&table, &src, name); 1760 if (err < 0) 1761 goto done; 1762 1763 update_idx = reftable_table_max_update_index(table); 1764 reftable_table_decref(table); 1765 1766 if (update_idx <= max) { 1767 unlink(table_path.buf); 1768 } 1769done: 1770 reftable_buf_release(&table_path); 1771} 1772 1773static int reftable_stack_clean_locked(struct reftable_stack *st) 1774{ 1775 uint64_t max = reftable_merged_table_max_update_index( 1776 reftable_stack_merged_table(st)); 1777 DIR *dir = opendir(st->reftable_dir); 1778 struct dirent *d = NULL; 1779 if (!dir) { 1780 return REFTABLE_IO_ERROR; 1781 } 1782 1783 while ((d = readdir(dir))) { 1784 int found = 0; 1785 if (!is_table_name(d->d_name)) 1786 continue; 1787 1788 for (size_t i = 0; !found && i < st->tables_len; i++) 1789 found = !strcmp(reftable_table_name(st->tables[i]), d->d_name); 1790 if (found) 1791 continue; 1792 1793 remove_maybe_stale_table(st, max, d->d_name); 1794 } 1795 1796 closedir(dir); 1797 return 0; 1798} 1799 1800int reftable_stack_clean(struct reftable_stack *st) 1801{ 1802 struct reftable_addition *add = NULL; 1803 int err = reftable_stack_new_addition(&add, st, 0); 1804 if (err < 0) { 1805 goto done; 1806 } 1807 1808 err = reftable_stack_reload(st); 1809 if (err < 0) { 1810 goto done; 1811 } 1812 1813 err = reftable_stack_clean_locked(st); 1814 1815done: 1816 reftable_addition_destroy(add); 1817 return err; 1818} 1819 1820enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st) 1821{ 1822 return reftable_merged_table_hash_id(st->merged); 1823}