Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tools/cgroup: add memcg_slabinfo.py tool

Add a drgn-based tool to display slab information for a given memcg. Can
replace cgroup v1 memory.kmem.slabinfo interface on cgroup v2, but in a
more flexiable way.

Currently supports only SLUB configuration, but SLAB can be trivially
added later.

Output example:
$ sudo ./tools/cgroup/memcg_slabinfo.py /sys/fs/cgroup/user.slice/user-111017.slice/user\@111017.service
shmem_inode_cache 92 92 704 46 8 : tunables 0 0 0 : slabdata 2 2 0
eventpoll_pwq 56 56 72 56 1 : tunables 0 0 0 : slabdata 1 1 0
eventpoll_epi 32 32 128 32 1 : tunables 0 0 0 : slabdata 1 1 0
kmalloc-8 0 0 8 512 1 : tunables 0 0 0 : slabdata 0 0 0
kmalloc-96 0 0 96 42 1 : tunables 0 0 0 : slabdata 0 0 0
kmalloc-2048 0 0 2048 16 8 : tunables 0 0 0 : slabdata 0 0 0
kmalloc-64 128 128 64 64 1 : tunables 0 0 0 : slabdata 2 2 0
mm_struct 160 160 1024 32 8 : tunables 0 0 0 : slabdata 5 5 0
signal_cache 96 96 1024 32 8 : tunables 0 0 0 : slabdata 3 3 0
sighand_cache 45 45 2112 15 8 : tunables 0 0 0 : slabdata 3 3 0
files_cache 138 138 704 46 8 : tunables 0 0 0 : slabdata 3 3 0
task_delay_info 153 153 80 51 1 : tunables 0 0 0 : slabdata 3 3 0
task_struct 27 27 3520 9 8 : tunables 0 0 0 : slabdata 3 3 0
radix_tree_node 56 56 584 28 4 : tunables 0 0 0 : slabdata 2 2 0
btrfs_inode 140 140 1136 28 8 : tunables 0 0 0 : slabdata 5 5 0
kmalloc-1024 64 64 1024 32 8 : tunables 0 0 0 : slabdata 2 2 0
kmalloc-192 84 84 192 42 2 : tunables 0 0 0 : slabdata 2 2 0
inode_cache 54 54 600 27 4 : tunables 0 0 0 : slabdata 2 2 0
kmalloc-128 0 0 128 32 1 : tunables 0 0 0 : slabdata 0 0 0
kmalloc-512 32 32 512 32 4 : tunables 0 0 0 : slabdata 1 1 0
skbuff_head_cache 32 32 256 32 2 : tunables 0 0 0 : slabdata 1 1 0
sock_inode_cache 46 46 704 46 8 : tunables 0 0 0 : slabdata 1 1 0
cred_jar 378 378 192 42 2 : tunables 0 0 0 : slabdata 9 9 0
proc_inode_cache 96 96 672 24 4 : tunables 0 0 0 : slabdata 4 4 0
dentry 336 336 192 42 2 : tunables 0 0 0 : slabdata 8 8 0
filp 697 864 256 32 2 : tunables 0 0 0 : slabdata 27 27 0
anon_vma 644 644 88 46 1 : tunables 0 0 0 : slabdata 14 14 0
pid 1408 1408 64 64 1 : tunables 0 0 0 : slabdata 22 22 0
vm_area_struct 1200 1200 200 40 2 : tunables 0 0 0 : slabdata 30 30 0

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Link: http://lkml.kernel.org/r/20200623174037.3951353-20-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Roman Gushchin and committed by
Linus Torvalds
fbc1ac9d 933dc80e

+226
+226
tools/cgroup/memcg_slabinfo.py
··· 1 + #!/usr/bin/env drgn 2 + # 3 + # Copyright (C) 2020 Roman Gushchin <guro@fb.com> 4 + # Copyright (C) 2020 Facebook 5 + 6 + from os import stat 7 + import argparse 8 + import sys 9 + 10 + from drgn.helpers.linux import list_for_each_entry, list_empty 11 + from drgn.helpers.linux import for_each_page 12 + from drgn.helpers.linux.cpumask import for_each_online_cpu 13 + from drgn.helpers.linux.percpu import per_cpu_ptr 14 + from drgn import container_of, FaultError, Object 15 + 16 + 17 + DESC = """ 18 + This is a drgn script to provide slab statistics for memory cgroups. 19 + It supports cgroup v2 and v1 and can emulate memory.kmem.slabinfo 20 + interface of cgroup v1. 21 + For drgn, visit https://github.com/osandov/drgn. 22 + """ 23 + 24 + 25 + MEMCGS = {} 26 + 27 + OO_SHIFT = 16 28 + OO_MASK = ((1 << OO_SHIFT) - 1) 29 + 30 + 31 + def err(s): 32 + print('slabinfo.py: error: %s' % s, file=sys.stderr, flush=True) 33 + sys.exit(1) 34 + 35 + 36 + def find_memcg_ids(css=prog['root_mem_cgroup'].css, prefix=''): 37 + if not list_empty(css.children.address_of_()): 38 + for css in list_for_each_entry('struct cgroup_subsys_state', 39 + css.children.address_of_(), 40 + 'sibling'): 41 + name = prefix + '/' + css.cgroup.kn.name.string_().decode('utf-8') 42 + memcg = container_of(css, 'struct mem_cgroup', 'css') 43 + MEMCGS[css.cgroup.kn.id.value_()] = memcg 44 + find_memcg_ids(css, name) 45 + 46 + 47 + def is_root_cache(s): 48 + try: 49 + return False if s.memcg_params.root_cache else True 50 + except AttributeError: 51 + return True 52 + 53 + 54 + def cache_name(s): 55 + if is_root_cache(s): 56 + return s.name.string_().decode('utf-8') 57 + else: 58 + return s.memcg_params.root_cache.name.string_().decode('utf-8') 59 + 60 + 61 + # SLUB 62 + 63 + def oo_order(s): 64 + return s.oo.x >> OO_SHIFT 65 + 66 + 67 + def oo_objects(s): 68 + return s.oo.x & OO_MASK 69 + 70 + 71 + def count_partial(n, fn): 72 + nr_pages = 0 73 + for page in list_for_each_entry('struct page', n.partial.address_of_(), 74 + 'lru'): 75 + nr_pages += fn(page) 76 + return nr_pages 77 + 78 + 79 + def count_free(page): 80 + return page.objects - page.inuse 81 + 82 + 83 + def slub_get_slabinfo(s, cfg): 84 + nr_slabs = 0 85 + nr_objs = 0 86 + nr_free = 0 87 + 88 + for node in range(cfg['nr_nodes']): 89 + n = s.node[node] 90 + nr_slabs += n.nr_slabs.counter.value_() 91 + nr_objs += n.total_objects.counter.value_() 92 + nr_free += count_partial(n, count_free) 93 + 94 + return {'active_objs': nr_objs - nr_free, 95 + 'num_objs': nr_objs, 96 + 'active_slabs': nr_slabs, 97 + 'num_slabs': nr_slabs, 98 + 'objects_per_slab': oo_objects(s), 99 + 'cache_order': oo_order(s), 100 + 'limit': 0, 101 + 'batchcount': 0, 102 + 'shared': 0, 103 + 'shared_avail': 0} 104 + 105 + 106 + def cache_show(s, cfg, objs): 107 + if cfg['allocator'] == 'SLUB': 108 + sinfo = slub_get_slabinfo(s, cfg) 109 + else: 110 + err('SLAB isn\'t supported yet') 111 + 112 + if cfg['shared_slab_pages']: 113 + sinfo['active_objs'] = objs 114 + sinfo['num_objs'] = objs 115 + 116 + print('%-17s %6lu %6lu %6u %4u %4d' 117 + ' : tunables %4u %4u %4u' 118 + ' : slabdata %6lu %6lu %6lu' % ( 119 + cache_name(s), sinfo['active_objs'], sinfo['num_objs'], 120 + s.size, sinfo['objects_per_slab'], 1 << sinfo['cache_order'], 121 + sinfo['limit'], sinfo['batchcount'], sinfo['shared'], 122 + sinfo['active_slabs'], sinfo['num_slabs'], 123 + sinfo['shared_avail'])) 124 + 125 + 126 + def detect_kernel_config(): 127 + cfg = {} 128 + 129 + cfg['nr_nodes'] = prog['nr_online_nodes'].value_() 130 + 131 + if prog.type('struct kmem_cache').members[1][1] == 'flags': 132 + cfg['allocator'] = 'SLUB' 133 + elif prog.type('struct kmem_cache').members[1][1] == 'batchcount': 134 + cfg['allocator'] = 'SLAB' 135 + else: 136 + err('Can\'t determine the slab allocator') 137 + 138 + cfg['shared_slab_pages'] = False 139 + try: 140 + if prog.type('struct obj_cgroup'): 141 + cfg['shared_slab_pages'] = True 142 + except: 143 + pass 144 + 145 + return cfg 146 + 147 + 148 + def for_each_slab_page(prog): 149 + PGSlab = 1 << prog.constant('PG_slab') 150 + PGHead = 1 << prog.constant('PG_head') 151 + 152 + for page in for_each_page(prog): 153 + try: 154 + if page.flags.value_() & PGSlab: 155 + yield page 156 + except FaultError: 157 + pass 158 + 159 + 160 + def main(): 161 + parser = argparse.ArgumentParser(description=DESC, 162 + formatter_class= 163 + argparse.RawTextHelpFormatter) 164 + parser.add_argument('cgroup', metavar='CGROUP', 165 + help='Target memory cgroup') 166 + args = parser.parse_args() 167 + 168 + try: 169 + cgroup_id = stat(args.cgroup).st_ino 170 + find_memcg_ids() 171 + memcg = MEMCGS[cgroup_id] 172 + except KeyError: 173 + err('Can\'t find the memory cgroup') 174 + 175 + cfg = detect_kernel_config() 176 + 177 + print('# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>' 178 + ' : tunables <limit> <batchcount> <sharedfactor>' 179 + ' : slabdata <active_slabs> <num_slabs> <sharedavail>') 180 + 181 + if cfg['shared_slab_pages']: 182 + obj_cgroups = set() 183 + stats = {} 184 + caches = {} 185 + 186 + # find memcg pointers belonging to the specified cgroup 187 + obj_cgroups.add(memcg.objcg.value_()) 188 + for ptr in list_for_each_entry('struct obj_cgroup', 189 + memcg.objcg_list.address_of_(), 190 + 'list'): 191 + obj_cgroups.add(ptr.value_()) 192 + 193 + # look over all slab pages, belonging to non-root memcgs 194 + # and look for objects belonging to the given memory cgroup 195 + for page in for_each_slab_page(prog): 196 + objcg_vec_raw = page.obj_cgroups.value_() 197 + if objcg_vec_raw == 0: 198 + continue 199 + cache = page.slab_cache 200 + if not cache: 201 + continue 202 + addr = cache.value_() 203 + caches[addr] = cache 204 + # clear the lowest bit to get the true obj_cgroups 205 + objcg_vec = Object(prog, page.obj_cgroups.type_, 206 + value=objcg_vec_raw & ~1) 207 + 208 + if addr not in stats: 209 + stats[addr] = 0 210 + 211 + for i in range(oo_objects(cache)): 212 + if objcg_vec[i].value_() in obj_cgroups: 213 + stats[addr] += 1 214 + 215 + for addr in caches: 216 + if stats[addr] > 0: 217 + cache_show(caches[addr], cfg, stats[addr]) 218 + 219 + else: 220 + for s in list_for_each_entry('struct kmem_cache', 221 + memcg.kmem_caches.address_of_(), 222 + 'memcg_params.kmem_caches_node'): 223 + cache_show(s, cfg, None) 224 + 225 + 226 + main()