Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: shrinkers: introduce debugfs interface for memory shrinkers

This commit introduces the /sys/kernel/debug/shrinker debugfs interface
which provides an ability to observe the state of individual kernel memory
shrinkers.

Because the feature adds some memory overhead (which shouldn't be large
unless there is a huge amount of registered shrinkers), it's guarded by a
config option (enabled by default).

This commit introduces the "count" interface for each shrinker registered
in the system.

The output is in the following format:
<cgroup inode id> <nr of objects on node 0> <nr of objects on node 1>...
<cgroup inode id> <nr of objects on node 0> <nr of objects on node 1>...
...

To reduce the size of output on machines with many thousands cgroups, if
the total number of objects on all nodes is 0, the line is omitted.

If the shrinker is not memcg-aware or CONFIG_MEMCG is off, 0 is printed as
cgroup inode id. If the shrinker is not numa-aware, 0's are printed for
all nodes except the first one.

This commit gives debugfs entries simple numeric names, which are not very
convenient. The following commit in the series will provide shrinkers
with more meaningful names.

[akpm@linux-foundation.org: remove WARN_ON_ONCE(), per Roman]
Reported-by: syzbot+300d27c79fe6d4cbcc39@syzkaller.appspotmail.com
Link: https://lkml.kernel.org/r/20220601032227.4076670-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>
Acked-by: Muchun Song <songmuchun@bytedance.com>
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Hillf Danton <hdanton@sina.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Roman Gushchin and committed by
akpm
5035ebc6 c15187a4

+200 -3
+18 -1
include/linux/shrinker.h
··· 73 73 /* ID in shrinker_idr */ 74 74 int id; 75 75 #endif 76 + #ifdef CONFIG_SHRINKER_DEBUG 77 + int debugfs_id; 78 + struct dentry *debugfs_entry; 79 + #endif 76 80 /* objs pending delete, per node */ 77 81 atomic_long_t *nr_deferred; 78 82 }; ··· 98 94 extern void unregister_shrinker(struct shrinker *shrinker); 99 95 extern void free_prealloced_shrinker(struct shrinker *shrinker); 100 96 extern void synchronize_shrinkers(void); 101 - #endif 97 + 98 + #ifdef CONFIG_SHRINKER_DEBUG 99 + extern int shrinker_debugfs_add(struct shrinker *shrinker); 100 + extern void shrinker_debugfs_remove(struct shrinker *shrinker); 101 + #else /* CONFIG_SHRINKER_DEBUG */ 102 + static inline int shrinker_debugfs_add(struct shrinker *shrinker) 103 + { 104 + return 0; 105 + } 106 + static inline void shrinker_debugfs_remove(struct shrinker *shrinker) 107 + { 108 + } 109 + #endif /* CONFIG_SHRINKER_DEBUG */ 110 + #endif /* _LINUX_SHRINKER_H */
+9
lib/Kconfig.debug
··· 699 699 help 700 700 Debug objects boot parameter default value 701 701 702 + config SHRINKER_DEBUG 703 + default y 704 + bool "Enable shrinker debugging support" 705 + depends on DEBUG_FS 706 + help 707 + Say Y to enable the shrinker debugfs interface which provides 708 + visibility into the kernel memory shrinkers subsystem. 709 + Disable it to avoid an extra memory footprint. 710 + 702 711 config HAVE_DEBUG_KMEMLEAK 703 712 bool 704 713
+1
mm/Makefile
··· 133 133 obj-$(CONFIG_IO_MAPPING) += io-mapping.o 134 134 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o 135 135 obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o 136 + obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
+168
mm/shrinker_debug.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/idr.h> 3 + #include <linux/slab.h> 4 + #include <linux/debugfs.h> 5 + #include <linux/seq_file.h> 6 + #include <linux/shrinker.h> 7 + #include <linux/memcontrol.h> 8 + 9 + /* defined in vmscan.c */ 10 + extern struct rw_semaphore shrinker_rwsem; 11 + extern struct list_head shrinker_list; 12 + 13 + static DEFINE_IDA(shrinker_debugfs_ida); 14 + static struct dentry *shrinker_debugfs_root; 15 + 16 + static unsigned long shrinker_count_objects(struct shrinker *shrinker, 17 + struct mem_cgroup *memcg, 18 + unsigned long *count_per_node) 19 + { 20 + unsigned long nr, total = 0; 21 + int nid; 22 + 23 + for_each_node(nid) { 24 + if (nid == 0 || (shrinker->flags & SHRINKER_NUMA_AWARE)) { 25 + struct shrink_control sc = { 26 + .gfp_mask = GFP_KERNEL, 27 + .nid = nid, 28 + .memcg = memcg, 29 + }; 30 + 31 + nr = shrinker->count_objects(shrinker, &sc); 32 + if (nr == SHRINK_EMPTY) 33 + nr = 0; 34 + } else { 35 + nr = 0; 36 + } 37 + 38 + count_per_node[nid] = nr; 39 + total += nr; 40 + } 41 + 42 + return total; 43 + } 44 + 45 + static int shrinker_debugfs_count_show(struct seq_file *m, void *v) 46 + { 47 + struct shrinker *shrinker = m->private; 48 + unsigned long *count_per_node; 49 + struct mem_cgroup *memcg; 50 + unsigned long total; 51 + bool memcg_aware; 52 + int ret, nid; 53 + 54 + count_per_node = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL); 55 + if (!count_per_node) 56 + return -ENOMEM; 57 + 58 + ret = down_read_killable(&shrinker_rwsem); 59 + if (ret) { 60 + kfree(count_per_node); 61 + return ret; 62 + } 63 + rcu_read_lock(); 64 + 65 + memcg_aware = shrinker->flags & SHRINKER_MEMCG_AWARE; 66 + 67 + memcg = mem_cgroup_iter(NULL, NULL, NULL); 68 + do { 69 + if (memcg && !mem_cgroup_online(memcg)) 70 + continue; 71 + 72 + total = shrinker_count_objects(shrinker, 73 + memcg_aware ? memcg : NULL, 74 + count_per_node); 75 + if (total) { 76 + seq_printf(m, "%lu", mem_cgroup_ino(memcg)); 77 + for_each_node(nid) 78 + seq_printf(m, " %lu", count_per_node[nid]); 79 + seq_putc(m, '\n'); 80 + } 81 + 82 + if (!memcg_aware) { 83 + mem_cgroup_iter_break(NULL, memcg); 84 + break; 85 + } 86 + 87 + if (signal_pending(current)) { 88 + mem_cgroup_iter_break(NULL, memcg); 89 + ret = -EINTR; 90 + break; 91 + } 92 + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); 93 + 94 + rcu_read_unlock(); 95 + up_read(&shrinker_rwsem); 96 + 97 + kfree(count_per_node); 98 + return ret; 99 + } 100 + DEFINE_SHOW_ATTRIBUTE(shrinker_debugfs_count); 101 + 102 + int shrinker_debugfs_add(struct shrinker *shrinker) 103 + { 104 + struct dentry *entry; 105 + char buf[16]; 106 + int id; 107 + 108 + lockdep_assert_held(&shrinker_rwsem); 109 + 110 + /* debugfs isn't initialized yet, add debugfs entries later. */ 111 + if (!shrinker_debugfs_root) 112 + return 0; 113 + 114 + id = ida_alloc(&shrinker_debugfs_ida, GFP_KERNEL); 115 + if (id < 0) 116 + return id; 117 + shrinker->debugfs_id = id; 118 + 119 + snprintf(buf, sizeof(buf), "%d", id); 120 + 121 + /* create debugfs entry */ 122 + entry = debugfs_create_dir(buf, shrinker_debugfs_root); 123 + if (IS_ERR(entry)) { 124 + ida_free(&shrinker_debugfs_ida, id); 125 + return PTR_ERR(entry); 126 + } 127 + shrinker->debugfs_entry = entry; 128 + 129 + debugfs_create_file("count", 0220, entry, shrinker, 130 + &shrinker_debugfs_count_fops); 131 + return 0; 132 + } 133 + 134 + void shrinker_debugfs_remove(struct shrinker *shrinker) 135 + { 136 + lockdep_assert_held(&shrinker_rwsem); 137 + 138 + if (!shrinker->debugfs_entry) 139 + return; 140 + 141 + debugfs_remove_recursive(shrinker->debugfs_entry); 142 + ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id); 143 + } 144 + 145 + static int __init shrinker_debugfs_init(void) 146 + { 147 + struct shrinker *shrinker; 148 + struct dentry *dentry; 149 + int ret = 0; 150 + 151 + dentry = debugfs_create_dir("shrinker", NULL); 152 + if (IS_ERR(dentry)) 153 + return PTR_ERR(dentry); 154 + shrinker_debugfs_root = dentry; 155 + 156 + /* Create debugfs entries for shrinkers registered at boot */ 157 + down_write(&shrinker_rwsem); 158 + list_for_each_entry(shrinker, &shrinker_list, list) 159 + if (!shrinker->debugfs_entry) { 160 + ret = shrinker_debugfs_add(shrinker); 161 + if (ret) 162 + break; 163 + } 164 + up_write(&shrinker_rwsem); 165 + 166 + return ret; 167 + } 168 + late_initcall(shrinker_debugfs_init);
+4 -2
mm/vmscan.c
··· 190 190 task->reclaim_state = rs; 191 191 } 192 192 193 - static LIST_HEAD(shrinker_list); 194 - static DECLARE_RWSEM(shrinker_rwsem); 193 + LIST_HEAD(shrinker_list); 194 + DECLARE_RWSEM(shrinker_rwsem); 195 195 196 196 #ifdef CONFIG_MEMCG 197 197 static int shrinker_nr_max; ··· 650 650 down_write(&shrinker_rwsem); 651 651 list_add_tail(&shrinker->list, &shrinker_list); 652 652 shrinker->flags |= SHRINKER_REGISTERED; 653 + shrinker_debugfs_add(shrinker); 653 654 up_write(&shrinker_rwsem); 654 655 } 655 656 ··· 678 677 shrinker->flags &= ~SHRINKER_REGISTERED; 679 678 if (shrinker->flags & SHRINKER_MEMCG_AWARE) 680 679 unregister_memcg_shrinker(shrinker); 680 + shrinker_debugfs_remove(shrinker); 681 681 up_write(&shrinker_rwsem); 682 682 683 683 kfree(shrinker->nr_deferred);