Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcache: option to automatically run gc thread after writeback

The option gc_after_writeback is disabled by default, because garbage
collection will discard SSD data which drops cached data.

Echo 1 into /sys/fs/bcache/<UUID>/internal/gc_after_writeback will
enable this option, which wakes up gc thread when writeback accomplished
and all cached data is clean.

This option is helpful for people who cares writing performance more. In
heavy writing workload, all cached data can be clean only happens when
writeback thread cleans all cached data in I/O idle time. In such
situation a following gc running may help to shrink bcache B+ tree and
discard more clean data, which may be helpful for future writing
requests.

If you are not sure whether this is helpful for your own workload,
please leave it as disabled by default.

Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Coly Li and committed by
Jens Axboe
7a671d8e cb07ad63

+52
+14
drivers/md/bcache/bcache.h
··· 627 627 struct bkey gc_done; 628 628 629 629 /* 630 + * For automatical garbage collection after writeback completed, this 631 + * varialbe is used as bit fields, 632 + * - 0000 0001b (BCH_ENABLE_AUTO_GC): enable gc after writeback 633 + * - 0000 0010b (BCH_DO_AUTO_GC): do gc after writeback 634 + * This is an optimization for following write request after writeback 635 + * finished, but read hit rate dropped due to clean data on cache is 636 + * discarded. Unless user explicitly sets it via sysfs, it won't be 637 + * enabled. 638 + */ 639 + #define BCH_ENABLE_AUTO_GC 1 640 + #define BCH_DO_AUTO_GC 2 641 + uint8_t gc_after_writeback; 642 + 643 + /* 630 644 * The allocation code needs gc_mark in struct bucket to be correct, but 631 645 * it's not while a gc is in progress. Protected by bucket_lock. 632 646 */
+9
drivers/md/bcache/sysfs.c
··· 128 128 rw_attribute(cache_replacement_policy); 129 129 rw_attribute(btree_shrinker_disabled); 130 130 rw_attribute(copy_gc_enabled); 131 + rw_attribute(gc_after_writeback); 131 132 rw_attribute(size); 132 133 133 134 static ssize_t bch_snprint_string_list(char *buf, ··· 694 693 sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); 695 694 sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); 696 695 sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); 696 + sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback); 697 697 sysfs_printf(io_disable, "%i", 698 698 test_bit(CACHE_SET_IO_DISABLE, &c->flags)); 699 699 ··· 795 793 sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite); 796 794 sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled); 797 795 sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled); 796 + /* 797 + * write gc_after_writeback here may overwrite an already set 798 + * BCH_DO_AUTO_GC, it doesn't matter because this flag will be 799 + * set in next chance. 800 + */ 801 + sysfs_strtoul_clamp(gc_after_writeback, c->gc_after_writeback, 0, 1); 798 802 799 803 return size; 800 804 } ··· 881 873 &sysfs_gc_always_rewrite, 882 874 &sysfs_btree_shrinker_disabled, 883 875 &sysfs_copy_gc_enabled, 876 + &sysfs_gc_after_writeback, 884 877 &sysfs_io_disable, 885 878 NULL 886 879 };
+27
drivers/md/bcache/writeback.c
··· 17 17 #include <linux/sched/clock.h> 18 18 #include <trace/events/bcache.h> 19 19 20 + static void update_gc_after_writeback(struct cache_set *c) 21 + { 22 + if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) || 23 + c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD) 24 + return; 25 + 26 + c->gc_after_writeback |= BCH_DO_AUTO_GC; 27 + } 28 + 20 29 /* Rate limiting */ 21 30 static uint64_t __calc_target_rate(struct cached_dev *dc) 22 31 { ··· 200 191 if (!set_at_max_writeback_rate(c, dc)) { 201 192 down_read(&dc->writeback_lock); 202 193 __update_writeback_rate(dc); 194 + update_gc_after_writeback(c); 203 195 up_read(&dc->writeback_lock); 204 196 } 205 197 } ··· 698 688 if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) { 699 689 up_write(&dc->writeback_lock); 700 690 break; 691 + } 692 + 693 + /* 694 + * When dirty data rate is high (e.g. 50%+), there might 695 + * be heavy buckets fragmentation after writeback 696 + * finished, which hurts following write performance. 697 + * If users really care about write performance they 698 + * may set BCH_ENABLE_AUTO_GC via sysfs, then when 699 + * BCH_DO_AUTO_GC is set, garbage collection thread 700 + * will be wake up here. After moving gc, the shrunk 701 + * btree and discarded free buckets SSD space may be 702 + * helpful for following write requests. 703 + */ 704 + if (c->gc_after_writeback == 705 + (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) { 706 + c->gc_after_writeback &= ~BCH_DO_AUTO_GC; 707 + force_wake_up_gc(c); 701 708 } 702 709 } 703 710
+2
drivers/md/bcache/writeback.h
··· 11 11 #define WRITEBACK_RATE_UPDATE_SECS_MAX 60 12 12 #define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5 13 13 14 + #define BCH_AUTO_GC_DIRTY_THRESHOLD 50 15 + 14 16 /* 15 17 * 14 (16384ths) is chosen here as something that each backing device 16 18 * should be a reasonable fraction of the share, and not to blow up