Git fork
1/*
2 * Copyright 2020 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style
5 * license that can be found in the LICENSE file or at
6 * https://developers.google.com/open-source/licenses/bsd
7 */
8
9#include "stack.h"
10
11#include "system.h"
12#include "constants.h"
13#include "merged.h"
14#include "reftable-error.h"
15#include "reftable-record.h"
16#include "reftable-merged.h"
17#include "table.h"
18#include "writer.h"
19
20static int stack_filename(struct reftable_buf *dest, struct reftable_stack *st,
21 const char *name)
22{
23 int err;
24 reftable_buf_reset(dest);
25 if ((err = reftable_buf_addstr(dest, st->reftable_dir)) < 0 ||
26 (err = reftable_buf_addstr(dest, "/")) < 0 ||
27 (err = reftable_buf_addstr(dest, name)) < 0)
28 return err;
29 return 0;
30}
31
32static int stack_fsync(const struct reftable_write_options *opts, int fd)
33{
34 if (opts->fsync)
35 return opts->fsync(fd);
36 return fsync(fd);
37}
38
39static ssize_t reftable_write_data(int fd, const void *data, size_t size)
40{
41 size_t total_written = 0;
42 const char *p = data;
43
44 while (total_written < size) {
45 ssize_t bytes_written = write(fd, p, size - total_written);
46 if (bytes_written < 0 && (errno == EAGAIN || errno == EINTR))
47 continue;
48 if (bytes_written < 0)
49 return REFTABLE_IO_ERROR;
50
51 total_written += bytes_written;
52 p += bytes_written;
53 }
54
55 return total_written;
56}
57
58struct fd_writer {
59 const struct reftable_write_options *opts;
60 int fd;
61};
62
63static ssize_t fd_writer_write(void *arg, const void *data, size_t sz)
64{
65 struct fd_writer *writer = arg;
66 return reftable_write_data(writer->fd, data, sz);
67}
68
69static int fd_writer_flush(void *arg)
70{
71 struct fd_writer *writer = arg;
72 return stack_fsync(writer->opts, writer->fd);
73}
74
75static int fd_read_lines(int fd, char ***namesp)
76{
77 char *buf = NULL;
78 int err = 0;
79 off_t size;
80
81 size = lseek(fd, 0, SEEK_END);
82 if (size < 0) {
83 err = REFTABLE_IO_ERROR;
84 goto done;
85 }
86
87 err = lseek(fd, 0, SEEK_SET);
88 if (err < 0) {
89 err = REFTABLE_IO_ERROR;
90 goto done;
91 }
92
93 REFTABLE_ALLOC_ARRAY(buf, size + 1);
94 if (!buf) {
95 err = REFTABLE_OUT_OF_MEMORY_ERROR;
96 goto done;
97 }
98
99 for (off_t total_read = 0; total_read < size; ) {
100 ssize_t bytes_read = read(fd, buf + total_read, size - total_read);
101 if (bytes_read < 0 && (errno == EAGAIN || errno == EINTR))
102 continue;
103 if (bytes_read < 0 || !bytes_read) {
104 err = REFTABLE_IO_ERROR;
105 goto done;
106 }
107
108 total_read += bytes_read;
109 }
110 buf[size] = 0;
111
112 err = parse_names(buf, size, namesp);
113done:
114 reftable_free(buf);
115 return err;
116}
117
118int read_lines(const char *filename, char ***namesp)
119{
120 int fd = open(filename, O_RDONLY);
121 int err = 0;
122 if (fd < 0) {
123 if (errno == ENOENT) {
124 REFTABLE_CALLOC_ARRAY(*namesp, 1);
125 if (!*namesp)
126 return REFTABLE_OUT_OF_MEMORY_ERROR;
127 return 0;
128 }
129
130 return REFTABLE_IO_ERROR;
131 }
132 err = fd_read_lines(fd, namesp);
133 close(fd);
134 return err;
135}
136
137int reftable_stack_init_ref_iterator(struct reftable_stack *st,
138 struct reftable_iterator *it)
139{
140 return merged_table_init_iter(reftable_stack_merged_table(st),
141 it, REFTABLE_BLOCK_TYPE_REF);
142}
143
144int reftable_stack_init_log_iterator(struct reftable_stack *st,
145 struct reftable_iterator *it)
146{
147 return merged_table_init_iter(reftable_stack_merged_table(st),
148 it, REFTABLE_BLOCK_TYPE_LOG);
149}
150
151struct reftable_merged_table *
152reftable_stack_merged_table(struct reftable_stack *st)
153{
154 return st->merged;
155}
156
157static int has_name(char **names, const char *name)
158{
159 while (*names) {
160 if (!strcmp(*names, name))
161 return 1;
162 names++;
163 }
164 return 0;
165}
166
167/* Close and free the stack */
168void reftable_stack_destroy(struct reftable_stack *st)
169{
170 char **names = NULL;
171 int err = 0;
172
173 if (!st)
174 return;
175
176 if (st->merged) {
177 reftable_merged_table_free(st->merged);
178 st->merged = NULL;
179 }
180
181 err = read_lines(st->list_file, &names);
182 if (err < 0) {
183 REFTABLE_FREE_AND_NULL(names);
184 }
185
186 if (st->tables) {
187 struct reftable_buf filename = REFTABLE_BUF_INIT;
188
189 for (size_t i = 0; i < st->tables_len; i++) {
190 const char *name = reftable_table_name(st->tables[i]);
191 int try_unlinking = 1;
192
193 reftable_buf_reset(&filename);
194 if (names && !has_name(names, name)) {
195 if (stack_filename(&filename, st, name) < 0)
196 try_unlinking = 0;
197 }
198 reftable_table_decref(st->tables[i]);
199
200 if (try_unlinking && filename.len) {
201 /* On Windows, can only unlink after closing. */
202 unlink(filename.buf);
203 }
204 }
205
206 reftable_buf_release(&filename);
207 st->tables_len = 0;
208 REFTABLE_FREE_AND_NULL(st->tables);
209 }
210
211 if (st->list_fd >= 0) {
212 close(st->list_fd);
213 st->list_fd = -1;
214 }
215
216 REFTABLE_FREE_AND_NULL(st->list_file);
217 REFTABLE_FREE_AND_NULL(st->reftable_dir);
218 reftable_free(st);
219 free_names(names);
220}
221
222static struct reftable_table **stack_copy_tables(struct reftable_stack *st,
223 size_t cur_len)
224{
225 struct reftable_table **cur = reftable_calloc(cur_len, sizeof(*cur));
226 if (!cur)
227 return NULL;
228 for (size_t i = 0; i < cur_len; i++)
229 cur[i] = st->tables[i];
230 return cur;
231}
232
233static int reftable_stack_reload_once(struct reftable_stack *st,
234 const char **names,
235 int reuse_open)
236{
237 size_t cur_len = !st->merged ? 0 : st->merged->tables_len;
238 struct reftable_table **cur = NULL;
239 struct reftable_table **reused = NULL;
240 struct reftable_table **new_tables = NULL;
241 size_t reused_len = 0, reused_alloc = 0, names_len;
242 size_t new_tables_len = 0;
243 struct reftable_merged_table *new_merged = NULL;
244 struct reftable_buf table_path = REFTABLE_BUF_INIT;
245 int err = 0;
246 size_t i;
247
248 if (cur_len) {
249 cur = stack_copy_tables(st, cur_len);
250 if (!cur) {
251 err = REFTABLE_OUT_OF_MEMORY_ERROR;
252 goto done;
253 }
254 }
255
256 names_len = names_length(names);
257
258 if (names_len) {
259 new_tables = reftable_calloc(names_len, sizeof(*new_tables));
260 if (!new_tables) {
261 err = REFTABLE_OUT_OF_MEMORY_ERROR;
262 goto done;
263 }
264 }
265
266 while (*names) {
267 struct reftable_table *table = NULL;
268 const char *name = *names++;
269
270 /* this is linear; we assume compaction keeps the number of
271 tables under control so this is not quadratic. */
272 for (i = 0; reuse_open && i < cur_len; i++) {
273 if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
274 table = cur[i];
275 cur[i] = NULL;
276
277 /*
278 * When reloading the stack fails, we end up
279 * releasing all new tables. This also
280 * includes the reused tables, even though
281 * they are still in used by the old stack. We
282 * thus need to keep them alive here, which we
283 * do by bumping their refcount.
284 */
285 REFTABLE_ALLOC_GROW_OR_NULL(reused,
286 reused_len + 1,
287 reused_alloc);
288 if (!reused) {
289 err = REFTABLE_OUT_OF_MEMORY_ERROR;
290 goto done;
291 }
292 reused[reused_len++] = table;
293 reftable_table_incref(table);
294 break;
295 }
296 }
297
298 if (!table) {
299 struct reftable_block_source src = { NULL };
300
301 err = stack_filename(&table_path, st, name);
302 if (err < 0)
303 goto done;
304
305 err = reftable_block_source_from_file(&src,
306 table_path.buf);
307 if (err < 0)
308 goto done;
309
310 err = reftable_table_new(&table, &src, name);
311 if (err < 0)
312 goto done;
313 }
314
315 new_tables[new_tables_len] = table;
316 new_tables_len++;
317 }
318
319 /* success! */
320 err = reftable_merged_table_new(&new_merged, new_tables,
321 new_tables_len, st->opts.hash_id);
322 if (err < 0)
323 goto done;
324
325 /*
326 * Close the old, non-reused tables and proactively try to unlink
327 * them. This is done for systems like Windows, where the underlying
328 * file of such an open table wouldn't have been possible to be
329 * unlinked by the compacting process.
330 */
331 for (i = 0; i < cur_len; i++) {
332 if (cur[i]) {
333 const char *name = reftable_table_name(cur[i]);
334
335 err = stack_filename(&table_path, st, name);
336 if (err < 0)
337 goto done;
338
339 reftable_table_decref(cur[i]);
340 unlink(table_path.buf);
341 }
342 }
343
344 /* Update the stack to point to the new tables. */
345 if (st->merged)
346 reftable_merged_table_free(st->merged);
347 new_merged->suppress_deletions = 1;
348 st->merged = new_merged;
349
350 if (st->tables)
351 reftable_free(st->tables);
352 st->tables = new_tables;
353 st->tables_len = new_tables_len;
354 new_tables = NULL;
355 new_tables_len = 0;
356
357 /*
358 * Decrement the refcount of reused tables again. This only needs to
359 * happen on the successful case, because on the unsuccessful one we
360 * decrement their refcount via `new_tables`.
361 */
362 for (i = 0; i < reused_len; i++)
363 reftable_table_decref(reused[i]);
364
365done:
366 for (i = 0; i < new_tables_len; i++)
367 reftable_table_decref(new_tables[i]);
368 reftable_free(new_tables);
369 reftable_free(reused);
370 reftable_free(cur);
371 reftable_buf_release(&table_path);
372 return err;
373}
374
375/* return negative if a before b. */
376static int tv_cmp(struct timeval *a, struct timeval *b)
377{
378 time_t diff = a->tv_sec - b->tv_sec;
379 int udiff = a->tv_usec - b->tv_usec;
380
381 if (diff != 0)
382 return diff;
383
384 return udiff;
385}
386
387static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
388 int reuse_open)
389{
390 char **names = NULL, **names_after = NULL;
391 struct timeval deadline;
392 int64_t delay = 0;
393 int tries = 0, err;
394 int fd = -1;
395
396 err = gettimeofday(&deadline, NULL);
397 if (err < 0)
398 goto out;
399 deadline.tv_sec += 3;
400
401 while (1) {
402 struct timeval now;
403
404 err = gettimeofday(&now, NULL);
405 if (err < 0)
406 goto out;
407
408 /*
409 * Only look at deadlines after the first few times. This
410 * simplifies debugging in GDB.
411 */
412 tries++;
413 if (tries > 3 && tv_cmp(&now, &deadline) >= 0)
414 goto out;
415
416 fd = open(st->list_file, O_RDONLY);
417 if (fd < 0) {
418 if (errno != ENOENT) {
419 err = REFTABLE_IO_ERROR;
420 goto out;
421 }
422
423 REFTABLE_CALLOC_ARRAY(names, 1);
424 if (!names) {
425 err = REFTABLE_OUT_OF_MEMORY_ERROR;
426 goto out;
427 }
428 } else {
429 err = fd_read_lines(fd, &names);
430 if (err < 0)
431 goto out;
432 }
433
434 err = reftable_stack_reload_once(st, (const char **) names, reuse_open);
435 if (!err)
436 break;
437 if (err != REFTABLE_NOT_EXIST_ERROR)
438 goto out;
439
440 /*
441 * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
442 * writer. Check if there was one by checking if the name list
443 * changed.
444 */
445 err = read_lines(st->list_file, &names_after);
446 if (err < 0)
447 goto out;
448 if (names_equal((const char **) names_after,
449 (const char **) names)) {
450 err = REFTABLE_NOT_EXIST_ERROR;
451 goto out;
452 }
453
454 free_names(names);
455 names = NULL;
456 free_names(names_after);
457 names_after = NULL;
458 close(fd);
459 fd = -1;
460
461 delay = delay + (delay * reftable_rand()) / UINT32_MAX + 1;
462 poll(NULL, 0, delay);
463 }
464
465out:
466 /*
467 * Invalidate the stat cache. It is sufficient to only close the file
468 * descriptor and keep the cached stat info because we never use the
469 * latter when the former is negative.
470 */
471 if (st->list_fd >= 0) {
472 close(st->list_fd);
473 st->list_fd = -1;
474 }
475
476 /*
477 * Cache stat information in case it provides a useful signal to us.
478 * According to POSIX, "The st_ino and st_dev fields taken together
479 * uniquely identify the file within the system." That being said,
480 * Windows is not POSIX compliant and we do not have these fields
481 * available. So the information we have there is insufficient to
482 * determine whether two file descriptors point to the same file.
483 *
484 * While we could fall back to using other signals like the file's
485 * mtime, those are not sufficient to avoid races. We thus refrain from
486 * using the stat cache on such systems and fall back to the secondary
487 * caching mechanism, which is to check whether contents of the file
488 * have changed.
489 *
490 * On other systems which are POSIX compliant we must keep the file
491 * descriptor open. This is to avoid a race condition where two
492 * processes access the reftable stack at the same point in time:
493 *
494 * 1. A reads the reftable stack and caches its stat info.
495 *
496 * 2. B updates the stack, appending a new table to "tables.list".
497 * This will both use a new inode and result in a different file
498 * size, thus invalidating A's cache in theory.
499 *
500 * 3. B decides to auto-compact the stack and merges two tables. The
501 * file size now matches what A has cached again. Furthermore, the
502 * filesystem may decide to recycle the inode number of the file
503 * we have replaced in (2) because it is not in use anymore.
504 *
505 * 4. A reloads the reftable stack. Neither the inode number nor the
506 * file size changed. If the timestamps did not change either then
507 * we think the cached copy of our stack is up-to-date.
508 *
509 * By keeping the file descriptor open the inode number cannot be
510 * recycled, mitigating the race.
511 */
512 if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
513 st->list_st.st_dev && st->list_st.st_ino) {
514 st->list_fd = fd;
515 fd = -1;
516 }
517
518 if (fd >= 0)
519 close(fd);
520 free_names(names);
521 free_names(names_after);
522
523 if (st->opts.on_reload)
524 st->opts.on_reload(st->opts.on_reload_payload);
525
526 return err;
527}
528
529int reftable_new_stack(struct reftable_stack **dest, const char *dir,
530 const struct reftable_write_options *_opts)
531{
532 struct reftable_buf list_file_name = REFTABLE_BUF_INIT;
533 struct reftable_write_options opts = { 0 };
534 struct reftable_stack *p;
535 int err;
536
537 p = reftable_calloc(1, sizeof(*p));
538 if (!p) {
539 err = REFTABLE_OUT_OF_MEMORY_ERROR;
540 goto out;
541 }
542
543 if (_opts)
544 opts = *_opts;
545 if (opts.hash_id == 0)
546 opts.hash_id = REFTABLE_HASH_SHA1;
547
548 *dest = NULL;
549
550 reftable_buf_reset(&list_file_name);
551 if ((err = reftable_buf_addstr(&list_file_name, dir)) < 0 ||
552 (err = reftable_buf_addstr(&list_file_name, "/tables.list")) < 0)
553 goto out;
554
555 p->list_file = reftable_buf_detach(&list_file_name);
556 p->list_fd = -1;
557 p->opts = opts;
558 p->reftable_dir = reftable_strdup(dir);
559 if (!p->reftable_dir) {
560 err = REFTABLE_OUT_OF_MEMORY_ERROR;
561 goto out;
562 }
563
564 err = reftable_stack_reload_maybe_reuse(p, 1);
565 if (err < 0)
566 goto out;
567
568 *dest = p;
569 err = 0;
570
571out:
572 if (err < 0)
573 reftable_stack_destroy(p);
574 return err;
575}
576
577/*
578 * Check whether the given stack is up-to-date with what we have in memory.
579 * Returns 0 if so, 1 if the stack is out-of-date or a negative error code
580 * otherwise.
581 */
582static int stack_uptodate(struct reftable_stack *st)
583{
584 char **names = NULL;
585 int err;
586
587 /*
588 * When we have cached stat information available then we use it to
589 * verify whether the file has been rewritten.
590 *
591 * Note that we explicitly do not want to use `stat_validity_check()`
592 * and friends here because they may end up not comparing the `st_dev`
593 * and `st_ino` fields. These functions thus cannot guarantee that we
594 * indeed still have the same file.
595 */
596 if (st->list_fd >= 0) {
597 struct stat list_st;
598
599 if (stat(st->list_file, &list_st) < 0) {
600 /*
601 * It's fine for "tables.list" to not exist. In that
602 * case, we have to refresh when the loaded stack has
603 * any tables.
604 */
605 if (errno == ENOENT)
606 return !!st->tables_len;
607 return REFTABLE_IO_ERROR;
608 }
609
610 /*
611 * When "tables.list" refers to the same file we can assume
612 * that it didn't change. This is because we always use
613 * rename(3P) to update the file and never write to it
614 * directly.
615 */
616 if (st->list_st.st_dev == list_st.st_dev &&
617 st->list_st.st_ino == list_st.st_ino)
618 return 0;
619 }
620
621 err = read_lines(st->list_file, &names);
622 if (err < 0)
623 return err;
624
625 for (size_t i = 0; i < st->tables_len; i++) {
626 if (!names[i]) {
627 err = 1;
628 goto done;
629 }
630
631 if (strcmp(st->tables[i]->name, names[i])) {
632 err = 1;
633 goto done;
634 }
635 }
636
637 if (names[st->merged->tables_len]) {
638 err = 1;
639 goto done;
640 }
641
642done:
643 free_names(names);
644 return err;
645}
646
647int reftable_stack_reload(struct reftable_stack *st)
648{
649 int err = stack_uptodate(st);
650 if (err > 0)
651 return reftable_stack_reload_maybe_reuse(st, 1);
652 return err;
653}
654
655struct reftable_addition {
656 struct reftable_flock tables_list_lock;
657 struct reftable_stack *stack;
658
659 char **new_tables;
660 size_t new_tables_len, new_tables_cap;
661 uint64_t next_update_index;
662};
663
664static void reftable_addition_close(struct reftable_addition *add)
665{
666 struct reftable_buf nm = REFTABLE_BUF_INIT;
667 size_t i;
668
669 for (i = 0; i < add->new_tables_len; i++) {
670 if (!stack_filename(&nm, add->stack, add->new_tables[i]))
671 unlink(nm.buf);
672 reftable_free(add->new_tables[i]);
673 add->new_tables[i] = NULL;
674 }
675 reftable_free(add->new_tables);
676 add->new_tables = NULL;
677 add->new_tables_len = 0;
678 add->new_tables_cap = 0;
679
680 flock_release(&add->tables_list_lock);
681 reftable_buf_release(&nm);
682}
683
684static int reftable_stack_init_addition(struct reftable_addition *add,
685 struct reftable_stack *st,
686 unsigned int flags)
687{
688 struct reftable_buf lock_file_name = REFTABLE_BUF_INIT;
689 int err;
690
691 memset(add, 0, sizeof(*add));
692 add->stack = st;
693
694 err = flock_acquire(&add->tables_list_lock, st->list_file,
695 st->opts.lock_timeout_ms);
696 if (err < 0)
697 goto done;
698
699 if (st->opts.default_permissions) {
700 if (chmod(add->tables_list_lock.path,
701 st->opts.default_permissions) < 0) {
702 err = REFTABLE_IO_ERROR;
703 goto done;
704 }
705 }
706
707 err = stack_uptodate(st);
708 if (err < 0)
709 goto done;
710 if (err > 0 && flags & REFTABLE_STACK_NEW_ADDITION_RELOAD) {
711 err = reftable_stack_reload_maybe_reuse(add->stack, 1);
712 if (err)
713 goto done;
714 }
715 if (err > 0) {
716 err = REFTABLE_OUTDATED_ERROR;
717 goto done;
718 }
719
720 add->next_update_index = reftable_stack_next_update_index(st);
721done:
722 if (err)
723 reftable_addition_close(add);
724 reftable_buf_release(&lock_file_name);
725 return err;
726}
727
728static int stack_try_add(struct reftable_stack *st,
729 int (*write_table)(struct reftable_writer *wr,
730 void *arg),
731 void *arg, unsigned flags)
732{
733 struct reftable_addition add;
734 int err;
735
736 err = reftable_stack_init_addition(&add, st, flags);
737 if (err < 0)
738 goto done;
739
740 err = reftable_addition_add(&add, write_table, arg);
741 if (err < 0)
742 goto done;
743
744 err = reftable_addition_commit(&add);
745done:
746 reftable_addition_close(&add);
747 return err;
748}
749
750int reftable_stack_add(struct reftable_stack *st,
751 int (*write)(struct reftable_writer *wr, void *arg),
752 void *arg, unsigned flags)
753{
754 int err = stack_try_add(st, write, arg, flags);
755 if (err < 0) {
756 if (err == REFTABLE_OUTDATED_ERROR) {
757 /* Ignore error return, we want to propagate
758 REFTABLE_OUTDATED_ERROR.
759 */
760 reftable_stack_reload(st);
761 }
762 return err;
763 }
764
765 return 0;
766}
767
768static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max)
769{
770 char buf[100];
771 uint32_t rnd = reftable_rand();
772 snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x",
773 min, max, rnd);
774 reftable_buf_reset(dest);
775 return reftable_buf_addstr(dest, buf);
776}
777
778void reftable_addition_destroy(struct reftable_addition *add)
779{
780 if (!add) {
781 return;
782 }
783 reftable_addition_close(add);
784 reftable_free(add);
785}
786
787int reftable_addition_commit(struct reftable_addition *add)
788{
789 struct reftable_buf table_list = REFTABLE_BUF_INIT;
790 int err = 0;
791 size_t i;
792
793 if (add->new_tables_len == 0)
794 goto done;
795
796 for (i = 0; i < add->stack->merged->tables_len; i++) {
797 if ((err = reftable_buf_addstr(&table_list, add->stack->tables[i]->name)) < 0 ||
798 (err = reftable_buf_addstr(&table_list, "\n")) < 0)
799 goto done;
800 }
801 for (i = 0; i < add->new_tables_len; i++) {
802 if ((err = reftable_buf_addstr(&table_list, add->new_tables[i])) < 0 ||
803 (err = reftable_buf_addstr(&table_list, "\n")) < 0)
804 goto done;
805 }
806
807 err = reftable_write_data(add->tables_list_lock.fd,
808 table_list.buf, table_list.len);
809 reftable_buf_release(&table_list);
810 if (err < 0) {
811 err = REFTABLE_IO_ERROR;
812 goto done;
813 }
814
815 err = stack_fsync(&add->stack->opts, add->tables_list_lock.fd);
816 if (err < 0) {
817 err = REFTABLE_IO_ERROR;
818 goto done;
819 }
820
821 err = flock_commit(&add->tables_list_lock);
822 if (err < 0) {
823 err = REFTABLE_IO_ERROR;
824 goto done;
825 }
826
827 /* success, no more state to clean up. */
828 for (i = 0; i < add->new_tables_len; i++)
829 reftable_free(add->new_tables[i]);
830 reftable_free(add->new_tables);
831 add->new_tables = NULL;
832 add->new_tables_len = 0;
833 add->new_tables_cap = 0;
834
835 err = reftable_stack_reload_maybe_reuse(add->stack, 1);
836 if (err)
837 goto done;
838
839 if (!add->stack->opts.disable_auto_compact) {
840 /*
841 * Auto-compact the stack to keep the number of tables in
842 * control. It is possible that a concurrent writer is already
843 * trying to compact parts of the stack, which would lead to a
844 * `REFTABLE_LOCK_ERROR` because parts of the stack are locked
845 * already. Similarly, the stack may have been rewritten by a
846 * concurrent writer, which causes `REFTABLE_OUTDATED_ERROR`.
847 * Both of these errors are benign, so we simply ignore them.
848 */
849 err = reftable_stack_auto_compact(add->stack);
850 if (err < 0 && err != REFTABLE_LOCK_ERROR &&
851 err != REFTABLE_OUTDATED_ERROR)
852 goto done;
853 err = 0;
854 }
855
856done:
857 reftable_addition_close(add);
858 return err;
859}
860
861int reftable_stack_new_addition(struct reftable_addition **dest,
862 struct reftable_stack *st,
863 unsigned int flags)
864{
865 int err;
866
867 REFTABLE_CALLOC_ARRAY(*dest, 1);
868 if (!*dest)
869 return REFTABLE_OUT_OF_MEMORY_ERROR;
870
871 err = reftable_stack_init_addition(*dest, st, flags);
872 if (err) {
873 reftable_free(*dest);
874 *dest = NULL;
875 }
876
877 return err;
878}
879
880int reftable_addition_add(struct reftable_addition *add,
881 int (*write_table)(struct reftable_writer *wr,
882 void *arg),
883 void *arg)
884{
885 struct reftable_buf temp_tab_file_name = REFTABLE_BUF_INIT;
886 struct reftable_buf tab_file_name = REFTABLE_BUF_INIT;
887 struct reftable_buf next_name = REFTABLE_BUF_INIT;
888 struct reftable_writer *wr = NULL;
889 struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT;
890 struct fd_writer writer = {
891 .opts = &add->stack->opts,
892 };
893 int err = 0;
894
895 reftable_buf_reset(&next_name);
896
897 err = format_name(&next_name, add->next_update_index, add->next_update_index);
898 if (err < 0)
899 goto done;
900
901 err = stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
902 if (err < 0)
903 goto done;
904
905 err = reftable_buf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
906 if (err < 0)
907 goto done;
908
909 err = tmpfile_from_pattern(&tab_file, temp_tab_file_name.buf);
910 if (err < 0)
911 goto done;
912 if (add->stack->opts.default_permissions) {
913 if (chmod(tab_file.path,
914 add->stack->opts.default_permissions)) {
915 err = REFTABLE_IO_ERROR;
916 goto done;
917 }
918 }
919
920 writer.fd = tab_file.fd;
921 err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush,
922 &writer, &add->stack->opts);
923 if (err < 0)
924 goto done;
925
926 err = write_table(wr, arg);
927 if (err < 0)
928 goto done;
929
930 err = reftable_writer_close(wr);
931 if (err == REFTABLE_EMPTY_TABLE_ERROR) {
932 err = 0;
933 goto done;
934 }
935 if (err < 0)
936 goto done;
937
938 err = tmpfile_close(&tab_file);
939 if (err < 0)
940 goto done;
941
942 if (wr->min_update_index < add->next_update_index) {
943 err = REFTABLE_API_ERROR;
944 goto done;
945 }
946
947 err = format_name(&next_name, wr->min_update_index, wr->max_update_index);
948 if (err < 0)
949 goto done;
950
951 err = reftable_buf_addstr(&next_name, ".ref");
952 if (err < 0)
953 goto done;
954
955 err = stack_filename(&tab_file_name, add->stack, next_name.buf);
956 if (err < 0)
957 goto done;
958
959 /*
960 On windows, this relies on rand() picking a unique destination name.
961 Maybe we should do retry loop as well?
962 */
963 err = tmpfile_rename(&tab_file, tab_file_name.buf);
964 if (err < 0)
965 goto done;
966
967 REFTABLE_ALLOC_GROW_OR_NULL(add->new_tables, add->new_tables_len + 1,
968 add->new_tables_cap);
969 if (!add->new_tables) {
970 err = REFTABLE_OUT_OF_MEMORY_ERROR;
971 goto done;
972 }
973 add->new_tables[add->new_tables_len++] = reftable_buf_detach(&next_name);
974
975done:
976 tmpfile_delete(&tab_file);
977 reftable_buf_release(&temp_tab_file_name);
978 reftable_buf_release(&tab_file_name);
979 reftable_buf_release(&next_name);
980 reftable_writer_free(wr);
981 return err;
982}
983
984uint64_t reftable_stack_next_update_index(struct reftable_stack *st)
985{
986 int sz = st->merged->tables_len;
987 if (sz > 0)
988 return reftable_table_max_update_index(st->tables[sz - 1]) +
989 1;
990 return 1;
991}
992
993static int stack_write_compact(struct reftable_stack *st,
994 struct reftable_writer *wr,
995 size_t first, size_t last,
996 struct reftable_log_expiry_config *config)
997{
998 struct reftable_merged_table *mt = NULL;
999 struct reftable_iterator it = { NULL };
1000 struct reftable_ref_record ref = { NULL };
1001 struct reftable_log_record log = { NULL };
1002 size_t subtabs_len = last - first + 1;
1003 uint64_t entries = 0;
1004 int err = 0;
1005
1006 for (size_t i = first; i <= last; i++)
1007 st->stats.bytes += st->tables[i]->size;
1008 err = reftable_writer_set_limits(wr, st->tables[first]->min_update_index,
1009 st->tables[last]->max_update_index);
1010 if (err < 0)
1011 goto done;
1012
1013 err = reftable_merged_table_new(&mt, st->tables + first, subtabs_len,
1014 st->opts.hash_id);
1015 if (err < 0)
1016 goto done;
1017
1018 err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_REF);
1019 if (err < 0)
1020 goto done;
1021
1022 err = reftable_iterator_seek_ref(&it, "");
1023 if (err < 0)
1024 goto done;
1025
1026 while (1) {
1027 err = reftable_iterator_next_ref(&it, &ref);
1028 if (err > 0) {
1029 err = 0;
1030 break;
1031 }
1032 if (err < 0)
1033 goto done;
1034
1035 if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
1036 continue;
1037 }
1038
1039 err = reftable_writer_add_ref(wr, &ref);
1040 if (err < 0)
1041 goto done;
1042 entries++;
1043 }
1044 reftable_iterator_destroy(&it);
1045
1046 err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_LOG);
1047 if (err < 0)
1048 goto done;
1049
1050 err = reftable_iterator_seek_log(&it, "");
1051 if (err < 0)
1052 goto done;
1053
1054 while (1) {
1055 err = reftable_iterator_next_log(&it, &log);
1056 if (err > 0) {
1057 err = 0;
1058 break;
1059 }
1060 if (err < 0)
1061 goto done;
1062 if (first == 0 && reftable_log_record_is_deletion(&log)) {
1063 continue;
1064 }
1065
1066 if (config && config->min_update_index > 0 &&
1067 log.update_index < config->min_update_index) {
1068 continue;
1069 }
1070
1071 if (config && config->time > 0 &&
1072 log.value.update.time < config->time) {
1073 continue;
1074 }
1075
1076 err = reftable_writer_add_log(wr, &log);
1077 if (err < 0)
1078 goto done;
1079 entries++;
1080 }
1081
1082done:
1083 reftable_iterator_destroy(&it);
1084 if (mt)
1085 reftable_merged_table_free(mt);
1086 reftable_ref_record_release(&ref);
1087 reftable_log_record_release(&log);
1088 st->stats.entries_written += entries;
1089 return err;
1090}
1091
1092static int stack_compact_locked(struct reftable_stack *st,
1093 size_t first, size_t last,
1094 struct reftable_log_expiry_config *config,
1095 struct reftable_tmpfile *tab_file_out)
1096{
1097 struct reftable_buf next_name = REFTABLE_BUF_INIT;
1098 struct reftable_buf tab_file_path = REFTABLE_BUF_INIT;
1099 struct reftable_writer *wr = NULL;
1100 struct fd_writer writer= {
1101 .opts = &st->opts,
1102 };
1103 struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT;
1104 int err = 0;
1105
1106 err = format_name(&next_name, reftable_table_min_update_index(st->tables[first]),
1107 reftable_table_max_update_index(st->tables[last]));
1108 if (err < 0)
1109 goto done;
1110
1111 err = stack_filename(&tab_file_path, st, next_name.buf);
1112 if (err < 0)
1113 goto done;
1114
1115 err = reftable_buf_addstr(&tab_file_path, ".temp.XXXXXX");
1116 if (err < 0)
1117 goto done;
1118
1119 err = tmpfile_from_pattern(&tab_file, tab_file_path.buf);
1120 if (err < 0)
1121 goto done;
1122
1123 if (st->opts.default_permissions &&
1124 chmod(tab_file.path, st->opts.default_permissions) < 0) {
1125 err = REFTABLE_IO_ERROR;
1126 goto done;
1127 }
1128
1129 writer.fd = tab_file.fd;
1130 err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush,
1131 &writer, &st->opts);
1132 if (err < 0)
1133 goto done;
1134
1135 err = stack_write_compact(st, wr, first, last, config);
1136 if (err < 0)
1137 goto done;
1138
1139 err = reftable_writer_close(wr);
1140 if (err < 0)
1141 goto done;
1142
1143 err = tmpfile_close(&tab_file);
1144 if (err < 0)
1145 goto done;
1146
1147 *tab_file_out = tab_file;
1148 tab_file = REFTABLE_TMPFILE_INIT;
1149
1150done:
1151 tmpfile_delete(&tab_file);
1152 reftable_writer_free(wr);
1153 reftable_buf_release(&next_name);
1154 reftable_buf_release(&tab_file_path);
1155 return err;
1156}
1157
1158enum stack_compact_range_flags {
1159 /*
1160 * Perform a best-effort compaction. That is, even if we cannot lock
1161 * all tables in the specified range, we will try to compact the
1162 * remaining slice.
1163 */
1164 STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0),
1165};
1166
1167/*
1168 * Compact all tables in the range `[first, last)` into a single new table.
1169 *
1170 * This function returns `0` on success or a code `< 0` on failure. When the
1171 * stack or any of the tables in the specified range are already locked then
1172 * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that
1173 * callers can either ignore, or they may choose to retry compaction after some
1174 * amount of time.
1175 */
1176static int stack_compact_range(struct reftable_stack *st,
1177 size_t first, size_t last,
1178 struct reftable_log_expiry_config *expiry,
1179 unsigned int flags)
1180{
1181 struct reftable_buf tables_list_buf = REFTABLE_BUF_INIT;
1182 struct reftable_buf new_table_name = REFTABLE_BUF_INIT;
1183 struct reftable_buf new_table_path = REFTABLE_BUF_INIT;
1184 struct reftable_buf table_name = REFTABLE_BUF_INIT;
1185 struct reftable_flock tables_list_lock = REFTABLE_FLOCK_INIT;
1186 struct reftable_flock *table_locks = NULL;
1187 struct reftable_tmpfile new_table = REFTABLE_TMPFILE_INIT;
1188 int is_empty_table = 0, err = 0;
1189 size_t first_to_replace, last_to_replace;
1190 size_t i, nlocks = 0;
1191 char **names = NULL;
1192
1193 if (first > last || (!expiry && first == last)) {
1194 err = 0;
1195 goto done;
1196 }
1197
1198 st->stats.attempts++;
1199
1200 /*
1201 * Hold the lock so that we can read "tables.list" and lock all tables
1202 * which are part of the user-specified range.
1203 */
1204 err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms);
1205 if (err < 0)
1206 goto done;
1207
1208 /*
1209 * Check whether the stack is up-to-date. We unfortunately cannot
1210 * handle the situation gracefully in case it's _not_ up-to-date
1211 * because the range of tables that the user has requested us to
1212 * compact may have been changed. So instead we abort.
1213 *
1214 * We could in theory improve the situation by having the caller not
1215 * pass in a range, but instead the list of tables to compact. If so,
1216 * we could check that relevant tables still exist. But for now it's
1217 * good enough to just abort.
1218 */
1219 err = stack_uptodate(st);
1220 if (err < 0)
1221 goto done;
1222 if (err > 0) {
1223 err = REFTABLE_OUTDATED_ERROR;
1224 goto done;
1225 }
1226
1227 /*
1228 * Lock all tables in the user-provided range. This is the slice of our
1229 * stack which we'll compact.
1230 *
1231 * Note that we lock tables in reverse order from last to first. The
1232 * intent behind this is to allow a newer process to perform best
1233 * effort compaction of tables that it has added in the case where an
1234 * older process is still busy compacting tables which are preexisting
1235 * from the point of view of the newer process.
1236 */
1237 REFTABLE_ALLOC_ARRAY(table_locks, last - first + 1);
1238 if (!table_locks) {
1239 err = REFTABLE_OUT_OF_MEMORY_ERROR;
1240 goto done;
1241 }
1242 for (i = 0; i < last - first + 1; i++)
1243 table_locks[i] = REFTABLE_FLOCK_INIT;
1244
1245 for (i = last + 1; i > first; i--) {
1246 err = stack_filename(&table_name, st, reftable_table_name(st->tables[i - 1]));
1247 if (err < 0)
1248 goto done;
1249
1250 err = flock_acquire(&table_locks[nlocks], table_name.buf, 0);
1251 if (err < 0) {
1252 /*
1253 * When the table is locked already we may do a
1254 * best-effort compaction and compact only the tables
1255 * that we have managed to lock so far. This of course
1256 * requires that we have been able to lock at least two
1257 * tables, otherwise there would be nothing to compact.
1258 * In that case, we return a lock error to our caller.
1259 */
1260 if (err == REFTABLE_LOCK_ERROR && last - (i - 1) >= 2 &&
1261 flags & STACK_COMPACT_RANGE_BEST_EFFORT) {
1262 err = 0;
1263 /*
1264 * The subtraction is to offset the index, the
1265 * addition is to only compact up to the table
1266 * of the preceding iteration. They obviously
1267 * cancel each other out, but that may be
1268 * non-obvious when it was omitted.
1269 */
1270 first = (i - 1) + 1;
1271 break;
1272 }
1273
1274 goto done;
1275 }
1276
1277 /*
1278 * We need to close the lockfiles as we might otherwise easily
1279 * run into file descriptor exhaustion when we compress a lot
1280 * of tables.
1281 */
1282 err = flock_close(&table_locks[nlocks++]);
1283 if (err < 0)
1284 goto done;
1285 }
1286
1287 /*
1288 * We have locked all tables in our range and can thus release the
1289 * "tables.list" lock while compacting the locked tables. This allows
1290 * concurrent updates to the stack to proceed.
1291 */
1292 err = flock_release(&tables_list_lock);
1293 if (err < 0) {
1294 err = REFTABLE_IO_ERROR;
1295 goto done;
1296 }
1297
1298 /*
1299 * Compact the now-locked tables into a new table. Note that compacting
1300 * these tables may end up with an empty new table in case tombstones
1301 * end up cancelling out all refs in that range.
1302 */
1303 err = stack_compact_locked(st, first, last, expiry, &new_table);
1304 if (err < 0) {
1305 if (err != REFTABLE_EMPTY_TABLE_ERROR)
1306 goto done;
1307 is_empty_table = 1;
1308 }
1309
1310 /*
1311 * Now that we have written the new, compacted table we need to re-lock
1312 * "tables.list". We'll then replace the compacted range of tables with
1313 * the new table.
1314 */
1315 err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms);
1316 if (err < 0)
1317 goto done;
1318
1319 if (st->opts.default_permissions) {
1320 if (chmod(tables_list_lock.path,
1321 st->opts.default_permissions) < 0) {
1322 err = REFTABLE_IO_ERROR;
1323 goto done;
1324 }
1325 }
1326
1327 /*
1328 * As we have unlocked the stack while compacting our slice of tables
1329 * it may have happened that a concurrently running process has updated
1330 * the stack while we were compacting. In that case, we need to check
1331 * whether the tables that we have just compacted still exist in the
1332 * stack in the exact same order as we have compacted them.
1333 *
1334 * If they do exist, then it is fine to continue and replace those
1335 * tables with our compacted version. If they don't, then we need to
1336 * abort.
1337 */
1338 err = stack_uptodate(st);
1339 if (err < 0)
1340 goto done;
1341 if (err > 0) {
1342 ssize_t new_offset = -1;
1343 int fd;
1344
1345 fd = open(st->list_file, O_RDONLY);
1346 if (fd < 0) {
1347 err = REFTABLE_IO_ERROR;
1348 goto done;
1349 }
1350
1351 err = fd_read_lines(fd, &names);
1352 close(fd);
1353 if (err < 0)
1354 goto done;
1355
1356 /*
1357 * Search for the offset of the first table that we have
1358 * compacted in the updated "tables.list" file.
1359 */
1360 for (size_t i = 0; names[i]; i++) {
1361 if (strcmp(names[i], st->tables[first]->name))
1362 continue;
1363
1364 /*
1365 * We have found the first entry. Verify that all the
1366 * subsequent tables we have compacted still exist in
1367 * the modified stack in the exact same order as we
1368 * have compacted them.
1369 */
1370 for (size_t j = 1; j < last - first + 1; j++) {
1371 const char *old = first + j < st->merged->tables_len ?
1372 st->tables[first + j]->name : NULL;
1373 const char *new = names[i + j];
1374
1375 /*
1376 * If some entries are missing or in case the tables
1377 * have changed then we need to bail out. Again, this
1378 * shouldn't ever happen because we have locked the
1379 * tables we are compacting.
1380 */
1381 if (!old || !new || strcmp(old, new)) {
1382 err = REFTABLE_OUTDATED_ERROR;
1383 goto done;
1384 }
1385 }
1386
1387 new_offset = i;
1388 break;
1389 }
1390
1391 /*
1392 * In case we didn't find our compacted tables in the stack we
1393 * need to bail out. In theory, this should have never happened
1394 * because we locked the tables we are compacting.
1395 */
1396 if (new_offset < 0) {
1397 err = REFTABLE_OUTDATED_ERROR;
1398 goto done;
1399 }
1400
1401 /*
1402 * We have found the new range that we want to replace, so
1403 * let's update the range of tables that we want to replace.
1404 */
1405 first_to_replace = new_offset;
1406 last_to_replace = last + (new_offset - first);
1407 } else {
1408 /*
1409 * `fd_read_lines()` uses a `NULL` sentinel to indicate that
1410 * the array is at its end. As we use `free_names()` to free
1411 * the array, we need to include this sentinel value here and
1412 * thus have to allocate `tables_len + 1` many entries.
1413 */
1414 REFTABLE_CALLOC_ARRAY(names, st->merged->tables_len + 1);
1415 if (!names) {
1416 err = REFTABLE_OUT_OF_MEMORY_ERROR;
1417 goto done;
1418 }
1419
1420 for (size_t i = 0; i < st->merged->tables_len; i++) {
1421 names[i] = reftable_strdup(st->tables[i]->name);
1422 if (!names[i]) {
1423 err = REFTABLE_OUT_OF_MEMORY_ERROR;
1424 goto done;
1425 }
1426 }
1427 first_to_replace = first;
1428 last_to_replace = last;
1429 }
1430
1431 /*
1432 * If the resulting compacted table is not empty, then we need to move
1433 * it into place now.
1434 */
1435 if (!is_empty_table) {
1436 err = format_name(&new_table_name, st->tables[first]->min_update_index,
1437 st->tables[last]->max_update_index);
1438 if (err < 0)
1439 goto done;
1440
1441 err = reftable_buf_addstr(&new_table_name, ".ref");
1442 if (err < 0)
1443 goto done;
1444
1445 err = stack_filename(&new_table_path, st, new_table_name.buf);
1446 if (err < 0)
1447 goto done;
1448
1449 err = tmpfile_rename(&new_table, new_table_path.buf);
1450 if (err < 0)
1451 goto done;
1452 }
1453
1454 /*
1455 * Write the new "tables.list" contents with the compacted table we
1456 * have just written. In case the compacted table became empty we
1457 * simply skip writing it.
1458 */
1459 for (i = 0; i < first_to_replace; i++) {
1460 if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 ||
1461 (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
1462 goto done;
1463 }
1464 if (!is_empty_table) {
1465 if ((err = reftable_buf_addstr(&tables_list_buf, new_table_name.buf)) < 0 ||
1466 (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
1467 goto done;
1468 }
1469 for (i = last_to_replace + 1; names[i]; i++) {
1470 if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 ||
1471 (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
1472 goto done;
1473 }
1474
1475 err = reftable_write_data(tables_list_lock.fd,
1476 tables_list_buf.buf, tables_list_buf.len);
1477 if (err < 0) {
1478 err = REFTABLE_IO_ERROR;
1479 unlink(new_table_path.buf);
1480 goto done;
1481 }
1482
1483 err = stack_fsync(&st->opts, tables_list_lock.fd);
1484 if (err < 0) {
1485 err = REFTABLE_IO_ERROR;
1486 unlink(new_table_path.buf);
1487 goto done;
1488 }
1489
1490 err = flock_commit(&tables_list_lock);
1491 if (err < 0) {
1492 err = REFTABLE_IO_ERROR;
1493 unlink(new_table_path.buf);
1494 goto done;
1495 }
1496
1497 /*
1498 * Reload the stack before deleting the compacted tables. We can only
1499 * delete the files after we closed them on Windows, so this needs to
1500 * happen first.
1501 */
1502 err = reftable_stack_reload_maybe_reuse(st, first < last);
1503 if (err < 0)
1504 goto done;
1505
1506 /*
1507 * Delete the old tables. They may still be in use by concurrent
1508 * readers, so it is expected that unlinking tables may fail.
1509 */
1510 for (i = 0; i < nlocks; i++) {
1511 struct reftable_flock *table_lock = &table_locks[i];
1512
1513 reftable_buf_reset(&table_name);
1514 err = reftable_buf_add(&table_name, table_lock->path,
1515 strlen(table_lock->path) - strlen(".lock"));
1516 if (err)
1517 continue;
1518
1519 unlink(table_name.buf);
1520 }
1521
1522done:
1523 flock_release(&tables_list_lock);
1524 for (i = 0; table_locks && i < nlocks; i++)
1525 flock_release(&table_locks[i]);
1526 reftable_free(table_locks);
1527
1528 tmpfile_delete(&new_table);
1529 reftable_buf_release(&new_table_name);
1530 reftable_buf_release(&new_table_path);
1531 reftable_buf_release(&tables_list_buf);
1532 reftable_buf_release(&table_name);
1533 free_names(names);
1534
1535 if (err == REFTABLE_LOCK_ERROR)
1536 st->stats.failures++;
1537
1538 return err;
1539}
1540
1541int reftable_stack_compact_all(struct reftable_stack *st,
1542 struct reftable_log_expiry_config *config)
1543{
1544 size_t last = st->merged->tables_len ? st->merged->tables_len - 1 : 0;
1545 return stack_compact_range(st, 0, last, config, 0);
1546}
1547
1548static int segment_size(struct segment *s)
1549{
1550 return s->end - s->start;
1551}
1552
1553struct segment suggest_compaction_segment(uint64_t *sizes, size_t n,
1554 uint8_t factor)
1555{
1556 struct segment seg = { 0 };
1557 uint64_t bytes;
1558 size_t i;
1559
1560 if (!factor)
1561 factor = DEFAULT_GEOMETRIC_FACTOR;
1562
1563 /*
1564 * If there are no tables or only a single one then we don't have to
1565 * compact anything. The sequence is geometric by definition already.
1566 */
1567 if (n <= 1)
1568 return seg;
1569
1570 /*
1571 * Find the ending table of the compaction segment needed to restore the
1572 * geometric sequence. Note that the segment end is exclusive.
1573 *
1574 * To do so, we iterate backwards starting from the most recent table
1575 * until a valid segment end is found. If the preceding table is smaller
1576 * than the current table multiplied by the geometric factor (2), the
1577 * compaction segment end has been identified.
1578 *
1579 * Tables after the ending point are not added to the byte count because
1580 * they are already valid members of the geometric sequence. Due to the
1581 * properties of a geometric sequence, it is not possible for the sum of
1582 * these tables to exceed the value of the ending point table.
1583 *
1584 * Example table size sequence requiring no compaction:
1585 * 64, 32, 16, 8, 4, 2, 1
1586 *
1587 * Example table size sequence where compaction segment end is set to
1588 * the last table. Since the segment end is exclusive, the last table is
1589 * excluded during subsequent compaction and the table with size 3 is
1590 * the final table included:
1591 * 64, 32, 16, 8, 4, 3, 1
1592 */
1593 for (i = n - 1; i > 0; i--) {
1594 if (sizes[i - 1] < sizes[i] * factor) {
1595 seg.end = i + 1;
1596 bytes = sizes[i];
1597 break;
1598 }
1599 }
1600
1601 /*
1602 * Find the starting table of the compaction segment by iterating
1603 * through the remaining tables and keeping track of the accumulated
1604 * size of all tables seen from the segment end table. The previous
1605 * table is compared to the accumulated size because the tables from the
1606 * segment end are merged backwards recursively.
1607 *
1608 * Note that we keep iterating even after we have found the first
1609 * starting point. This is because there may be tables in the stack
1610 * preceding that first starting point which violate the geometric
1611 * sequence.
1612 *
1613 * Example compaction segment start set to table with size 32:
1614 * 128, 32, 16, 8, 4, 3, 1
1615 */
1616 for (; i > 0; i--) {
1617 uint64_t curr = bytes;
1618 bytes += sizes[i - 1];
1619
1620 if (sizes[i - 1] < curr * factor) {
1621 seg.start = i - 1;
1622 seg.bytes = bytes;
1623 }
1624 }
1625
1626 return seg;
1627}
1628
1629static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
1630{
1631 int version = (st->opts.hash_id == REFTABLE_HASH_SHA1) ? 1 : 2;
1632 int overhead = header_size(version) - 1;
1633 uint64_t *sizes;
1634
1635 REFTABLE_CALLOC_ARRAY(sizes, st->merged->tables_len);
1636 if (!sizes)
1637 return NULL;
1638
1639 for (size_t i = 0; i < st->merged->tables_len; i++)
1640 sizes[i] = st->tables[i]->size - overhead;
1641
1642 return sizes;
1643}
1644
1645int reftable_stack_auto_compact(struct reftable_stack *st)
1646{
1647 struct segment seg;
1648 uint64_t *sizes;
1649
1650 if (st->merged->tables_len < 2)
1651 return 0;
1652
1653 sizes = stack_table_sizes_for_compaction(st);
1654 if (!sizes)
1655 return REFTABLE_OUT_OF_MEMORY_ERROR;
1656
1657 seg = suggest_compaction_segment(sizes, st->merged->tables_len,
1658 st->opts.auto_compaction_factor);
1659 reftable_free(sizes);
1660
1661 if (segment_size(&seg) > 0)
1662 return stack_compact_range(st, seg.start, seg.end - 1,
1663 NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
1664
1665 return 0;
1666}
1667
1668struct reftable_compaction_stats *
1669reftable_stack_compaction_stats(struct reftable_stack *st)
1670{
1671 return &st->stats;
1672}
1673
1674int reftable_stack_read_ref(struct reftable_stack *st, const char *refname,
1675 struct reftable_ref_record *ref)
1676{
1677 struct reftable_iterator it = { 0 };
1678 int ret;
1679
1680 ret = reftable_merged_table_init_ref_iterator(st->merged, &it);
1681 if (ret)
1682 goto out;
1683
1684 ret = reftable_iterator_seek_ref(&it, refname);
1685 if (ret)
1686 goto out;
1687
1688 ret = reftable_iterator_next_ref(&it, ref);
1689 if (ret)
1690 goto out;
1691
1692 if (strcmp(ref->refname, refname) ||
1693 reftable_ref_record_is_deletion(ref)) {
1694 reftable_ref_record_release(ref);
1695 ret = 1;
1696 goto out;
1697 }
1698
1699out:
1700 reftable_iterator_destroy(&it);
1701 return ret;
1702}
1703
1704int reftable_stack_read_log(struct reftable_stack *st, const char *refname,
1705 struct reftable_log_record *log)
1706{
1707 struct reftable_iterator it = {0};
1708 int err;
1709
1710 err = reftable_stack_init_log_iterator(st, &it);
1711 if (err)
1712 goto done;
1713
1714 err = reftable_iterator_seek_log(&it, refname);
1715 if (err)
1716 goto done;
1717
1718 err = reftable_iterator_next_log(&it, log);
1719 if (err)
1720 goto done;
1721
1722 if (strcmp(log->refname, refname) ||
1723 reftable_log_record_is_deletion(log)) {
1724 err = 1;
1725 goto done;
1726 }
1727
1728done:
1729 if (err) {
1730 reftable_log_record_release(log);
1731 }
1732 reftable_iterator_destroy(&it);
1733 return err;
1734}
1735
1736static int is_table_name(const char *s)
1737{
1738 const char *dot = strrchr(s, '.');
1739 return dot && !strcmp(dot, ".ref");
1740}
1741
1742static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
1743 const char *name)
1744{
1745 int err = 0;
1746 uint64_t update_idx = 0;
1747 struct reftable_block_source src = { NULL };
1748 struct reftable_table *table = NULL;
1749 struct reftable_buf table_path = REFTABLE_BUF_INIT;
1750
1751 err = stack_filename(&table_path, st, name);
1752 if (err < 0)
1753 goto done;
1754
1755 err = reftable_block_source_from_file(&src, table_path.buf);
1756 if (err < 0)
1757 goto done;
1758
1759 err = reftable_table_new(&table, &src, name);
1760 if (err < 0)
1761 goto done;
1762
1763 update_idx = reftable_table_max_update_index(table);
1764 reftable_table_decref(table);
1765
1766 if (update_idx <= max) {
1767 unlink(table_path.buf);
1768 }
1769done:
1770 reftable_buf_release(&table_path);
1771}
1772
1773static int reftable_stack_clean_locked(struct reftable_stack *st)
1774{
1775 uint64_t max = reftable_merged_table_max_update_index(
1776 reftable_stack_merged_table(st));
1777 DIR *dir = opendir(st->reftable_dir);
1778 struct dirent *d = NULL;
1779 if (!dir) {
1780 return REFTABLE_IO_ERROR;
1781 }
1782
1783 while ((d = readdir(dir))) {
1784 int found = 0;
1785 if (!is_table_name(d->d_name))
1786 continue;
1787
1788 for (size_t i = 0; !found && i < st->tables_len; i++)
1789 found = !strcmp(reftable_table_name(st->tables[i]), d->d_name);
1790 if (found)
1791 continue;
1792
1793 remove_maybe_stale_table(st, max, d->d_name);
1794 }
1795
1796 closedir(dir);
1797 return 0;
1798}
1799
1800int reftable_stack_clean(struct reftable_stack *st)
1801{
1802 struct reftable_addition *add = NULL;
1803 int err = reftable_stack_new_addition(&add, st, 0);
1804 if (err < 0) {
1805 goto done;
1806 }
1807
1808 err = reftable_stack_reload(st);
1809 if (err < 0) {
1810 goto done;
1811 }
1812
1813 err = reftable_stack_clean_locked(st);
1814
1815done:
1816 reftable_addition_destroy(add);
1817 return err;
1818}
1819
1820enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
1821{
1822 return reftable_merged_table_hash_id(st->merged);
1823}