Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Run btree key cache shrinker less aggressively

The btree key cache maintains lists of items that have been freed, but
can't yet be reclaimed because a bch2_trans_relock() call might find
them - we're waiting for SRCU readers to release.

Previously, we wouldn't count these items against the number we're
attempting to scan for, which would mean we'd evict more live key cache
entries - doing quite a bit of potentially unecessary work.

With recent work to make sure we don't hold SRCU locks for too long, it
should be safe to count all the items on the freelists against number to
scan - even if we can't reclaim them yet, we will be able to soon.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+23 -4
+19 -4
fs/bcachefs/btree_key_cache.c
··· 89 89 ck->btree_trans_barrier_seq = 90 90 start_poll_synchronize_srcu(&c->btree_trans_barrier); 91 91 92 - if (ck->c.lock.readers) 92 + if (ck->c.lock.readers) { 93 93 list_move_tail(&ck->list, &bc->freed_pcpu); 94 - else 94 + bc->nr_freed_pcpu++; 95 + } else { 95 96 list_move_tail(&ck->list, &bc->freed_nonpcpu); 97 + bc->nr_freed_nonpcpu++; 98 + } 96 99 atomic_long_inc(&bc->nr_freed); 97 100 98 101 kfree(ck->k); ··· 111 108 struct bkey_cached *ck) 112 109 { 113 110 struct bkey_cached *pos; 111 + 112 + bc->nr_freed_nonpcpu++; 114 113 115 114 list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) { 116 115 if (ULONG_CMP_GE(ck->btree_trans_barrier_seq, ··· 163 158 #else 164 159 mutex_lock(&bc->lock); 165 160 list_move_tail(&ck->list, &bc->freed_nonpcpu); 161 + bc->nr_freed_nonpcpu++; 166 162 mutex_unlock(&bc->lock); 167 163 #endif 168 164 } else { ··· 223 217 f->nr < ARRAY_SIZE(f->objs) / 2) { 224 218 ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); 225 219 list_del_init(&ck->list); 220 + bc->nr_freed_nonpcpu--; 226 221 f->objs[f->nr++] = ck; 227 222 } 228 223 ··· 236 229 if (!list_empty(&bc->freed_nonpcpu)) { 237 230 ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); 238 231 list_del_init(&ck->list); 232 + bc->nr_freed_nonpcpu--; 239 233 } 240 234 mutex_unlock(&bc->lock); 241 235 #endif ··· 858 850 * Newest freed entries are at the end of the list - once we hit one 859 851 * that's too new to be freed, we can bail out: 860 852 */ 853 + scanned += bc->nr_freed_nonpcpu; 854 + 861 855 list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { 862 856 if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, 863 857 ck->btree_trans_barrier_seq)) ··· 869 859 six_lock_exit(&ck->c.lock); 870 860 kmem_cache_free(bch2_key_cache, ck); 871 861 atomic_long_dec(&bc->nr_freed); 872 - scanned++; 873 862 freed++; 863 + bc->nr_freed_nonpcpu--; 874 864 } 875 865 876 866 if (scanned >= nr) 877 867 goto out; 868 + 869 + scanned += bc->nr_freed_pcpu; 878 870 879 871 list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { 880 872 if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ··· 887 875 six_lock_exit(&ck->c.lock); 888 876 kmem_cache_free(bch2_key_cache, ck); 889 877 atomic_long_dec(&bc->nr_freed); 890 - scanned++; 891 878 freed++; 879 + bc->nr_freed_pcpu--; 892 880 } 893 881 894 882 if (scanned >= nr) ··· 993 981 } 994 982 } 995 983 #endif 984 + 985 + BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu); 986 + BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu); 996 987 997 988 list_splice(&bc->freed_pcpu, &items); 998 989 list_splice(&bc->freed_nonpcpu, &items);
+4
fs/bcachefs/btree_key_cache_types.h
··· 11 11 struct mutex lock; 12 12 struct rhashtable table; 13 13 bool table_init_done; 14 + 14 15 struct list_head freed_pcpu; 16 + size_t nr_freed_pcpu; 15 17 struct list_head freed_nonpcpu; 18 + size_t nr_freed_nonpcpu; 19 + 16 20 struct shrinker *shrink; 17 21 unsigned shrink_iter; 18 22 struct btree_key_cache_freelist __percpu *pcpu_freed;