Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kselftests: cgroup: add kernel memory accounting tests

Add some tests to cover the kernel memory accounting functionality. These
are covering some issues (and changes) we had recently.

1) A test which allocates a lot of negative dentries, checks memcg slab
statistics, creates memory pressure by setting memory.max to some low
value and checks that some number of slabs was reclaimed.

2) A test which covers side effects of memcg destruction: it creates
and destroys a large number of sub-cgroups, each containing a
multi-threaded workload which allocates and releases some kernel
memory. Then it checks that the charge ans memory.stats do add up on
the parent level.

3) A test which reads /proc/kpagecgroup and implicitly checks that it
doesn't crash the system.

4) A test which spawns a large number of threads and checks that the
kernel stacks accounting works as expected.

5) A test which checks that living charged slab objects are not
preventing the memory cgroup from being released after being deleted by
a user.

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Link: http://lkml.kernel.org/r/20200623174037.3951353-19-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Roman Gushchin and committed by
Linus Torvalds
933dc80e 10befea9

+385
+1
tools/testing/selftests/cgroup/.gitignore
··· 2 2 test_memcontrol 3 3 test_core 4 4 test_freezer 5 + test_kmem
+2
tools/testing/selftests/cgroup/Makefile
··· 6 6 TEST_FILES := with_stress.sh 7 7 TEST_PROGS := test_stress.sh 8 8 TEST_GEN_PROGS = test_memcontrol 9 + TEST_GEN_PROGS += test_kmem 9 10 TEST_GEN_PROGS += test_core 10 11 TEST_GEN_PROGS += test_freezer 11 12 12 13 include ../lib.mk 13 14 14 15 $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h 16 + $(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h 15 17 $(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h 16 18 $(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
+382
tools/testing/selftests/cgroup/test_kmem.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #define _GNU_SOURCE 3 + 4 + #include <linux/limits.h> 5 + #include <fcntl.h> 6 + #include <stdio.h> 7 + #include <stdlib.h> 8 + #include <string.h> 9 + #include <sys/stat.h> 10 + #include <sys/types.h> 11 + #include <unistd.h> 12 + #include <sys/wait.h> 13 + #include <errno.h> 14 + #include <sys/sysinfo.h> 15 + #include <pthread.h> 16 + 17 + #include "../kselftest.h" 18 + #include "cgroup_util.h" 19 + 20 + 21 + static int alloc_dcache(const char *cgroup, void *arg) 22 + { 23 + unsigned long i; 24 + struct stat st; 25 + char buf[128]; 26 + 27 + for (i = 0; i < (unsigned long)arg; i++) { 28 + snprintf(buf, sizeof(buf), 29 + "/something-non-existent-with-a-long-name-%64lu-%d", 30 + i, getpid()); 31 + stat(buf, &st); 32 + } 33 + 34 + return 0; 35 + } 36 + 37 + /* 38 + * This test allocates 100000 of negative dentries with long names. 39 + * Then it checks that "slab" in memory.stat is larger than 1M. 40 + * Then it sets memory.high to 1M and checks that at least 1/2 41 + * of slab memory has been reclaimed. 42 + */ 43 + static int test_kmem_basic(const char *root) 44 + { 45 + int ret = KSFT_FAIL; 46 + char *cg = NULL; 47 + long slab0, slab1, current; 48 + 49 + cg = cg_name(root, "kmem_basic_test"); 50 + if (!cg) 51 + goto cleanup; 52 + 53 + if (cg_create(cg)) 54 + goto cleanup; 55 + 56 + if (cg_run(cg, alloc_dcache, (void *)100000)) 57 + goto cleanup; 58 + 59 + slab0 = cg_read_key_long(cg, "memory.stat", "slab "); 60 + if (slab0 < (1 << 20)) 61 + goto cleanup; 62 + 63 + cg_write(cg, "memory.high", "1M"); 64 + slab1 = cg_read_key_long(cg, "memory.stat", "slab "); 65 + if (slab1 <= 0) 66 + goto cleanup; 67 + 68 + current = cg_read_long(cg, "memory.current"); 69 + if (current <= 0) 70 + goto cleanup; 71 + 72 + if (slab1 < slab0 / 2 && current < slab0 / 2) 73 + ret = KSFT_PASS; 74 + cleanup: 75 + cg_destroy(cg); 76 + free(cg); 77 + 78 + return ret; 79 + } 80 + 81 + static void *alloc_kmem_fn(void *arg) 82 + { 83 + alloc_dcache(NULL, (void *)100); 84 + return NULL; 85 + } 86 + 87 + static int alloc_kmem_smp(const char *cgroup, void *arg) 88 + { 89 + int nr_threads = 2 * get_nprocs(); 90 + pthread_t *tinfo; 91 + unsigned long i; 92 + int ret = -1; 93 + 94 + tinfo = calloc(nr_threads, sizeof(pthread_t)); 95 + if (tinfo == NULL) 96 + return -1; 97 + 98 + for (i = 0; i < nr_threads; i++) { 99 + if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn, 100 + (void *)i)) { 101 + free(tinfo); 102 + return -1; 103 + } 104 + } 105 + 106 + for (i = 0; i < nr_threads; i++) { 107 + ret = pthread_join(tinfo[i], NULL); 108 + if (ret) 109 + break; 110 + } 111 + 112 + free(tinfo); 113 + return ret; 114 + } 115 + 116 + static int cg_run_in_subcgroups(const char *parent, 117 + int (*fn)(const char *cgroup, void *arg), 118 + void *arg, int times) 119 + { 120 + char *child; 121 + int i; 122 + 123 + for (i = 0; i < times; i++) { 124 + child = cg_name_indexed(parent, "child", i); 125 + if (!child) 126 + return -1; 127 + 128 + if (cg_create(child)) { 129 + cg_destroy(child); 130 + free(child); 131 + return -1; 132 + } 133 + 134 + if (cg_run(child, fn, NULL)) { 135 + cg_destroy(child); 136 + free(child); 137 + return -1; 138 + } 139 + 140 + cg_destroy(child); 141 + free(child); 142 + } 143 + 144 + return 0; 145 + } 146 + 147 + /* 148 + * The test creates and destroys a large number of cgroups. In each cgroup it 149 + * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS 150 + * threads. Then it checks the sanity of numbers on the parent level: 151 + * the total size of the cgroups should be roughly equal to 152 + * anon + file + slab + kernel_stack. 153 + */ 154 + static int test_kmem_memcg_deletion(const char *root) 155 + { 156 + long current, slab, anon, file, kernel_stack, sum; 157 + int ret = KSFT_FAIL; 158 + char *parent; 159 + 160 + parent = cg_name(root, "kmem_memcg_deletion_test"); 161 + if (!parent) 162 + goto cleanup; 163 + 164 + if (cg_create(parent)) 165 + goto cleanup; 166 + 167 + if (cg_write(parent, "cgroup.subtree_control", "+memory")) 168 + goto cleanup; 169 + 170 + if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100)) 171 + goto cleanup; 172 + 173 + current = cg_read_long(parent, "memory.current"); 174 + slab = cg_read_key_long(parent, "memory.stat", "slab "); 175 + anon = cg_read_key_long(parent, "memory.stat", "anon "); 176 + file = cg_read_key_long(parent, "memory.stat", "file "); 177 + kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); 178 + if (current < 0 || slab < 0 || anon < 0 || file < 0 || 179 + kernel_stack < 0) 180 + goto cleanup; 181 + 182 + sum = slab + anon + file + kernel_stack; 183 + if (abs(sum - current) < 4096 * 32 * 2 * get_nprocs()) { 184 + ret = KSFT_PASS; 185 + } else { 186 + printf("memory.current = %ld\n", current); 187 + printf("slab + anon + file + kernel_stack = %ld\n", sum); 188 + printf("slab = %ld\n", slab); 189 + printf("anon = %ld\n", anon); 190 + printf("file = %ld\n", file); 191 + printf("kernel_stack = %ld\n", kernel_stack); 192 + } 193 + 194 + cleanup: 195 + cg_destroy(parent); 196 + free(parent); 197 + 198 + return ret; 199 + } 200 + 201 + /* 202 + * The test reads the entire /proc/kpagecgroup. If the operation went 203 + * successfully (and the kernel didn't panic), the test is treated as passed. 204 + */ 205 + static int test_kmem_proc_kpagecgroup(const char *root) 206 + { 207 + unsigned long buf[128]; 208 + int ret = KSFT_FAIL; 209 + ssize_t len; 210 + int fd; 211 + 212 + fd = open("/proc/kpagecgroup", O_RDONLY); 213 + if (fd < 0) 214 + return ret; 215 + 216 + do { 217 + len = read(fd, buf, sizeof(buf)); 218 + } while (len > 0); 219 + 220 + if (len == 0) 221 + ret = KSFT_PASS; 222 + 223 + close(fd); 224 + return ret; 225 + } 226 + 227 + static void *pthread_wait_fn(void *arg) 228 + { 229 + sleep(100); 230 + return NULL; 231 + } 232 + 233 + static int spawn_1000_threads(const char *cgroup, void *arg) 234 + { 235 + int nr_threads = 1000; 236 + pthread_t *tinfo; 237 + unsigned long i; 238 + long stack; 239 + int ret = -1; 240 + 241 + tinfo = calloc(nr_threads, sizeof(pthread_t)); 242 + if (tinfo == NULL) 243 + return -1; 244 + 245 + for (i = 0; i < nr_threads; i++) { 246 + if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn, 247 + (void *)i)) { 248 + free(tinfo); 249 + return(-1); 250 + } 251 + } 252 + 253 + stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack "); 254 + if (stack >= 4096 * 1000) 255 + ret = 0; 256 + 257 + free(tinfo); 258 + return ret; 259 + } 260 + 261 + /* 262 + * The test spawns a process, which spawns 1000 threads. Then it checks 263 + * that memory.stat's kernel_stack is at least 1000 pages large. 264 + */ 265 + static int test_kmem_kernel_stacks(const char *root) 266 + { 267 + int ret = KSFT_FAIL; 268 + char *cg = NULL; 269 + 270 + cg = cg_name(root, "kmem_kernel_stacks_test"); 271 + if (!cg) 272 + goto cleanup; 273 + 274 + if (cg_create(cg)) 275 + goto cleanup; 276 + 277 + if (cg_run(cg, spawn_1000_threads, NULL)) 278 + goto cleanup; 279 + 280 + ret = KSFT_PASS; 281 + cleanup: 282 + cg_destroy(cg); 283 + free(cg); 284 + 285 + return ret; 286 + } 287 + 288 + /* 289 + * This test sequentionally creates 30 child cgroups, allocates some 290 + * kernel memory in each of them, and deletes them. Then it checks 291 + * that the number of dying cgroups on the parent level is 0. 292 + */ 293 + static int test_kmem_dead_cgroups(const char *root) 294 + { 295 + int ret = KSFT_FAIL; 296 + char *parent; 297 + long dead; 298 + int i; 299 + 300 + parent = cg_name(root, "kmem_dead_cgroups_test"); 301 + if (!parent) 302 + goto cleanup; 303 + 304 + if (cg_create(parent)) 305 + goto cleanup; 306 + 307 + if (cg_write(parent, "cgroup.subtree_control", "+memory")) 308 + goto cleanup; 309 + 310 + if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) 311 + goto cleanup; 312 + 313 + for (i = 0; i < 5; i++) { 314 + dead = cg_read_key_long(parent, "cgroup.stat", 315 + "nr_dying_descendants "); 316 + if (dead == 0) { 317 + ret = KSFT_PASS; 318 + break; 319 + } 320 + /* 321 + * Reclaiming cgroups might take some time, 322 + * let's wait a bit and repeat. 323 + */ 324 + sleep(1); 325 + } 326 + 327 + cleanup: 328 + cg_destroy(parent); 329 + free(parent); 330 + 331 + return ret; 332 + } 333 + 334 + #define T(x) { x, #x } 335 + struct kmem_test { 336 + int (*fn)(const char *root); 337 + const char *name; 338 + } tests[] = { 339 + T(test_kmem_basic), 340 + T(test_kmem_memcg_deletion), 341 + T(test_kmem_proc_kpagecgroup), 342 + T(test_kmem_kernel_stacks), 343 + T(test_kmem_dead_cgroups), 344 + }; 345 + #undef T 346 + 347 + int main(int argc, char **argv) 348 + { 349 + char root[PATH_MAX]; 350 + int i, ret = EXIT_SUCCESS; 351 + 352 + if (cg_find_unified_root(root, sizeof(root))) 353 + ksft_exit_skip("cgroup v2 isn't mounted\n"); 354 + 355 + /* 356 + * Check that memory controller is available: 357 + * memory is listed in cgroup.controllers 358 + */ 359 + if (cg_read_strstr(root, "cgroup.controllers", "memory")) 360 + ksft_exit_skip("memory controller isn't available\n"); 361 + 362 + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 363 + if (cg_write(root, "cgroup.subtree_control", "+memory")) 364 + ksft_exit_skip("Failed to set memory controller\n"); 365 + 366 + for (i = 0; i < ARRAY_SIZE(tests); i++) { 367 + switch (tests[i].fn(root)) { 368 + case KSFT_PASS: 369 + ksft_test_result_pass("%s\n", tests[i].name); 370 + break; 371 + case KSFT_SKIP: 372 + ksft_test_result_skip("%s\n", tests[i].name); 373 + break; 374 + default: 375 + ret = EXIT_FAILURE; 376 + ksft_test_result_fail("%s\n", tests[i].name); 377 + break; 378 + } 379 + } 380 + 381 + return ret; 382 + }