at master 1714 lines 37 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#define _GNU_SOURCE 3 4#include <linux/limits.h> 5#include <linux/oom.h> 6#include <fcntl.h> 7#include <stdio.h> 8#include <stdlib.h> 9#include <string.h> 10#include <sys/stat.h> 11#include <sys/types.h> 12#include <unistd.h> 13#include <sys/socket.h> 14#include <sys/wait.h> 15#include <arpa/inet.h> 16#include <netinet/in.h> 17#include <netdb.h> 18#include <errno.h> 19#include <sys/mman.h> 20 21#include "kselftest.h" 22#include "cgroup_util.h" 23 24#define MEMCG_SOCKSTAT_WAIT_RETRIES 30 25 26static bool has_localevents; 27static bool has_recursiveprot; 28 29int get_temp_fd(void) 30{ 31 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 32} 33 34int alloc_pagecache(int fd, size_t size) 35{ 36 char buf[PAGE_SIZE]; 37 struct stat st; 38 int i; 39 40 if (fstat(fd, &st)) 41 goto cleanup; 42 43 size += st.st_size; 44 45 if (ftruncate(fd, size)) 46 goto cleanup; 47 48 for (i = 0; i < size; i += sizeof(buf)) 49 read(fd, buf, sizeof(buf)); 50 51 return 0; 52 53cleanup: 54 return -1; 55} 56 57int alloc_anon(const char *cgroup, void *arg) 58{ 59 size_t size = (unsigned long)arg; 60 char *buf, *ptr; 61 62 buf = malloc(size); 63 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 64 *ptr = 0; 65 66 free(buf); 67 return 0; 68} 69 70int is_swap_enabled(void) 71{ 72 char buf[PAGE_SIZE]; 73 const char delim[] = "\n"; 74 int cnt = 0; 75 char *line; 76 77 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 78 return -1; 79 80 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 81 cnt++; 82 83 return cnt > 1; 84} 85 86int set_oom_adj_score(int pid, int score) 87{ 88 char path[PATH_MAX]; 89 int fd, len; 90 91 sprintf(path, "/proc/%d/oom_score_adj", pid); 92 93 fd = open(path, O_WRONLY | O_APPEND); 94 if (fd < 0) 95 return fd; 96 97 len = dprintf(fd, "%d", score); 98 if (len < 0) { 99 close(fd); 100 return len; 101 } 102 103 close(fd); 104 return 0; 105} 106 107/* 108 * This test creates two nested cgroups with and without enabling 109 * the memory controller. 110 */ 111static int test_memcg_subtree_control(const char *root) 112{ 113 char *parent, *child, *parent2 = NULL, *child2 = NULL; 114 int ret = KSFT_FAIL; 115 char buf[PAGE_SIZE]; 116 117 /* Create two nested cgroups with the memory controller enabled */ 118 parent = cg_name(root, "memcg_test_0"); 119 child = cg_name(root, "memcg_test_0/memcg_test_1"); 120 if (!parent || !child) 121 goto cleanup_free; 122 123 if (cg_create(parent)) 124 goto cleanup_free; 125 126 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 127 goto cleanup_parent; 128 129 if (cg_create(child)) 130 goto cleanup_parent; 131 132 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 133 goto cleanup_child; 134 135 /* Create two nested cgroups without enabling memory controller */ 136 parent2 = cg_name(root, "memcg_test_1"); 137 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 138 if (!parent2 || !child2) 139 goto cleanup_free2; 140 141 if (cg_create(parent2)) 142 goto cleanup_free2; 143 144 if (cg_create(child2)) 145 goto cleanup_parent2; 146 147 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 148 goto cleanup_all; 149 150 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 151 goto cleanup_all; 152 153 ret = KSFT_PASS; 154 155cleanup_all: 156 cg_destroy(child2); 157cleanup_parent2: 158 cg_destroy(parent2); 159cleanup_free2: 160 free(parent2); 161 free(child2); 162cleanup_child: 163 cg_destroy(child); 164cleanup_parent: 165 cg_destroy(parent); 166cleanup_free: 167 free(parent); 168 free(child); 169 170 return ret; 171} 172 173static int alloc_anon_50M_check(const char *cgroup, void *arg) 174{ 175 size_t size = MB(50); 176 char *buf, *ptr; 177 long anon, current; 178 int ret = -1; 179 180 buf = malloc(size); 181 if (buf == NULL) { 182 fprintf(stderr, "malloc() failed\n"); 183 return -1; 184 } 185 186 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 187 *ptr = 0; 188 189 current = cg_read_long(cgroup, "memory.current"); 190 if (current < size) 191 goto cleanup; 192 193 if (!values_close(size, current, 3)) 194 goto cleanup; 195 196 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 197 if (anon < 0) 198 goto cleanup; 199 200 if (!values_close(anon, current, 3)) 201 goto cleanup; 202 203 ret = 0; 204cleanup: 205 free(buf); 206 return ret; 207} 208 209static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 210{ 211 size_t size = MB(50); 212 int ret = -1; 213 long current, file; 214 int fd; 215 216 fd = get_temp_fd(); 217 if (fd < 0) 218 return -1; 219 220 if (alloc_pagecache(fd, size)) 221 goto cleanup; 222 223 current = cg_read_long(cgroup, "memory.current"); 224 if (current < size) 225 goto cleanup; 226 227 file = cg_read_key_long(cgroup, "memory.stat", "file "); 228 if (file < 0) 229 goto cleanup; 230 231 if (!values_close(file, current, 10)) 232 goto cleanup; 233 234 ret = 0; 235 236cleanup: 237 close(fd); 238 return ret; 239} 240 241/* 242 * This test create a memory cgroup, allocates 243 * some anonymous memory and some pagecache 244 * and checks memory.current, memory.peak, and some memory.stat values. 245 */ 246static int test_memcg_current_peak(const char *root) 247{ 248 int ret = KSFT_FAIL; 249 long current, peak, peak_reset; 250 char *memcg; 251 bool fd2_closed = false, fd3_closed = false, fd4_closed = false; 252 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; 253 struct stat ss; 254 255 memcg = cg_name(root, "memcg_test"); 256 if (!memcg) 257 goto cleanup; 258 259 if (cg_create(memcg)) 260 goto cleanup; 261 262 current = cg_read_long(memcg, "memory.current"); 263 if (current != 0) 264 goto cleanup; 265 266 peak = cg_read_long(memcg, "memory.peak"); 267 if (peak != 0) 268 goto cleanup; 269 270 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 271 goto cleanup; 272 273 peak = cg_read_long(memcg, "memory.peak"); 274 if (peak < MB(50)) 275 goto cleanup; 276 277 /* 278 * We'll open a few FDs for the same memory.peak file to exercise the free-path 279 * We need at least three to be closed in a different order than writes occurred to test 280 * the linked-list handling. 281 */ 282 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 283 284 if (peak_fd == -1) { 285 if (errno == ENOENT) 286 ret = KSFT_SKIP; 287 goto cleanup; 288 } 289 290 /* 291 * Before we try to use memory.peak's fd, try to figure out whether 292 * this kernel supports writing to that file in the first place. (by 293 * checking the writable bit on the file's st_mode) 294 */ 295 if (fstat(peak_fd, &ss)) 296 goto cleanup; 297 298 if ((ss.st_mode & S_IWUSR) == 0) { 299 ret = KSFT_SKIP; 300 goto cleanup; 301 } 302 303 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 304 305 if (peak_fd2 == -1) 306 goto cleanup; 307 308 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 309 310 if (peak_fd3 == -1) 311 goto cleanup; 312 313 /* any non-empty string resets, but make it clear */ 314 static const char reset_string[] = "reset\n"; 315 316 peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); 317 if (peak_reset != sizeof(reset_string)) 318 goto cleanup; 319 320 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); 321 if (peak_reset != sizeof(reset_string)) 322 goto cleanup; 323 324 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); 325 if (peak_reset != sizeof(reset_string)) 326 goto cleanup; 327 328 /* Make sure a completely independent read isn't affected by our FD-local reset above*/ 329 peak = cg_read_long(memcg, "memory.peak"); 330 if (peak < MB(50)) 331 goto cleanup; 332 333 fd2_closed = true; 334 if (close(peak_fd2)) 335 goto cleanup; 336 337 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 338 339 if (peak_fd4 == -1) 340 goto cleanup; 341 342 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); 343 if (peak_reset != sizeof(reset_string)) 344 goto cleanup; 345 346 peak = cg_read_long_fd(peak_fd); 347 if (peak > MB(30) || peak < 0) 348 goto cleanup; 349 350 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 351 goto cleanup; 352 353 peak = cg_read_long(memcg, "memory.peak"); 354 if (peak < MB(50)) 355 goto cleanup; 356 357 /* Make sure everything is back to normal */ 358 peak = cg_read_long_fd(peak_fd); 359 if (peak < MB(50)) 360 goto cleanup; 361 362 peak = cg_read_long_fd(peak_fd4); 363 if (peak < MB(50)) 364 goto cleanup; 365 366 fd3_closed = true; 367 if (close(peak_fd3)) 368 goto cleanup; 369 370 fd4_closed = true; 371 if (close(peak_fd4)) 372 goto cleanup; 373 374 ret = KSFT_PASS; 375 376cleanup: 377 close(peak_fd); 378 if (!fd2_closed) 379 close(peak_fd2); 380 if (!fd3_closed) 381 close(peak_fd3); 382 if (!fd4_closed) 383 close(peak_fd4); 384 cg_destroy(memcg); 385 free(memcg); 386 387 return ret; 388} 389 390static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 391{ 392 int fd = (long)arg; 393 int ppid = getppid(); 394 395 if (alloc_pagecache(fd, MB(50))) 396 return -1; 397 398 while (getppid() == ppid) 399 sleep(1); 400 401 return 0; 402} 403 404static int alloc_anon_noexit(const char *cgroup, void *arg) 405{ 406 int ppid = getppid(); 407 size_t size = (unsigned long)arg; 408 char *buf, *ptr; 409 410 buf = malloc(size); 411 if (buf == NULL) { 412 fprintf(stderr, "malloc() failed\n"); 413 return -1; 414 } 415 416 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 417 *ptr = 0; 418 419 while (getppid() == ppid) 420 sleep(1); 421 422 free(buf); 423 return 0; 424} 425 426/* 427 * Wait until processes are killed asynchronously by the OOM killer 428 * If we exceed a timeout, fail. 429 */ 430static int cg_test_proc_killed(const char *cgroup) 431{ 432 int limit; 433 434 for (limit = 10; limit > 0; limit--) { 435 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 436 return 0; 437 438 usleep(100000); 439 } 440 return -1; 441} 442 443static bool reclaim_until(const char *memcg, long goal); 444 445/* 446 * First, this test creates the following hierarchy: 447 * A memory.min = 0, memory.max = 200M 448 * A/B memory.min = 50M 449 * A/B/C memory.min = 75M, memory.current = 50M 450 * A/B/D memory.min = 25M, memory.current = 50M 451 * A/B/E memory.min = 0, memory.current = 50M 452 * A/B/F memory.min = 500M, memory.current = 0 453 * 454 * (or memory.low if we test soft protection) 455 * 456 * Usages are pagecache and the test keeps a running 457 * process in every leaf cgroup. 458 * Then it creates A/G and creates a significant 459 * memory pressure in A. 460 * 461 * Then it checks actual memory usages and expects that: 462 * A/B memory.current ~= 50M 463 * A/B/C memory.current ~= 29M [memory.events:low > 0] 464 * A/B/D memory.current ~= 21M [memory.events:low > 0] 465 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, 466 * undefined otherwise] 467 * A/B/F memory.current = 0 [memory.events:low == 0] 468 * (for origin of the numbers, see model in memcg_protection.m.) 469 * 470 * After that it tries to allocate more than there is 471 * unprotected memory in A available, and checks that: 472 * a) memory.min protects pagecache even in this case, 473 * b) memory.low allows reclaiming page cache with low events. 474 * 475 * Then we try to reclaim from A/B/C using memory.reclaim until its 476 * usage reaches 10M. 477 * This makes sure that: 478 * (a) We ignore the protection of the reclaim target memcg. 479 * (b) The previously calculated emin value (~29M) should be dismissed. 480 */ 481static int test_memcg_protection(const char *root, bool min) 482{ 483 int ret = KSFT_FAIL, rc; 484 char *parent[3] = {NULL}; 485 char *children[4] = {NULL}; 486 const char *attribute = min ? "memory.min" : "memory.low"; 487 long c[4]; 488 long current; 489 int i, attempts; 490 int fd; 491 492 fd = get_temp_fd(); 493 if (fd < 0) 494 goto cleanup; 495 496 parent[0] = cg_name(root, "memcg_test_0"); 497 if (!parent[0]) 498 goto cleanup; 499 500 parent[1] = cg_name(parent[0], "memcg_test_1"); 501 if (!parent[1]) 502 goto cleanup; 503 504 parent[2] = cg_name(parent[0], "memcg_test_2"); 505 if (!parent[2]) 506 goto cleanup; 507 508 if (cg_create(parent[0])) 509 goto cleanup; 510 511 if (cg_read_long(parent[0], attribute)) { 512 /* No memory.min on older kernels is fine */ 513 if (min) 514 ret = KSFT_SKIP; 515 goto cleanup; 516 } 517 518 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 519 goto cleanup; 520 521 if (cg_write(parent[0], "memory.max", "200M")) 522 goto cleanup; 523 524 if (cg_write(parent[0], "memory.swap.max", "0")) 525 goto cleanup; 526 527 if (cg_create(parent[1])) 528 goto cleanup; 529 530 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 531 goto cleanup; 532 533 if (cg_create(parent[2])) 534 goto cleanup; 535 536 for (i = 0; i < ARRAY_SIZE(children); i++) { 537 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 538 if (!children[i]) 539 goto cleanup; 540 541 if (cg_create(children[i])) 542 goto cleanup; 543 544 if (i > 2) 545 continue; 546 547 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 548 (void *)(long)fd); 549 } 550 551 if (cg_write(parent[1], attribute, "50M")) 552 goto cleanup; 553 if (cg_write(children[0], attribute, "75M")) 554 goto cleanup; 555 if (cg_write(children[1], attribute, "25M")) 556 goto cleanup; 557 if (cg_write(children[2], attribute, "0")) 558 goto cleanup; 559 if (cg_write(children[3], attribute, "500M")) 560 goto cleanup; 561 562 attempts = 0; 563 while (!values_close(cg_read_long(parent[1], "memory.current"), 564 MB(150), 3)) { 565 if (attempts++ > 5) 566 break; 567 sleep(1); 568 } 569 570 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 571 goto cleanup; 572 573 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 574 goto cleanup; 575 576 for (i = 0; i < ARRAY_SIZE(children); i++) 577 c[i] = cg_read_long(children[i], "memory.current"); 578 579 if (!values_close(c[0], MB(29), 15)) 580 goto cleanup; 581 582 if (!values_close(c[1], MB(21), 20)) 583 goto cleanup; 584 585 if (c[3] != 0) 586 goto cleanup; 587 588 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 589 if (min && !rc) 590 goto cleanup; 591 else if (!min && rc) { 592 fprintf(stderr, 593 "memory.low prevents from allocating anon memory\n"); 594 goto cleanup; 595 } 596 597 current = min ? MB(50) : MB(30); 598 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 599 goto cleanup; 600 601 if (!reclaim_until(children[0], MB(10))) 602 goto cleanup; 603 604 if (min) { 605 ret = KSFT_PASS; 606 goto cleanup; 607 } 608 609 /* 610 * Child 2 has memory.low=0, but some low protection may still be 611 * distributed down from its parent with memory.low=50M if cgroup2 612 * memory_recursiveprot mount option is enabled. Ignore the low 613 * event count in this case. 614 */ 615 for (i = 0; i < ARRAY_SIZE(children); i++) { 616 int ignore_low_events_index = has_recursiveprot ? 2 : -1; 617 int no_low_events_index = 1; 618 long low, oom; 619 620 oom = cg_read_key_long(children[i], "memory.events", "oom "); 621 low = cg_read_key_long(children[i], "memory.events", "low "); 622 623 if (oom) 624 goto cleanup; 625 if (i == ignore_low_events_index) 626 continue; 627 if (i <= no_low_events_index && low <= 0) 628 goto cleanup; 629 if (i > no_low_events_index && low) 630 goto cleanup; 631 632 } 633 634 ret = KSFT_PASS; 635 636cleanup: 637 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 638 if (!children[i]) 639 continue; 640 641 cg_destroy(children[i]); 642 free(children[i]); 643 } 644 645 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 646 if (!parent[i]) 647 continue; 648 649 cg_destroy(parent[i]); 650 free(parent[i]); 651 } 652 close(fd); 653 return ret; 654} 655 656static int test_memcg_min(const char *root) 657{ 658 return test_memcg_protection(root, true); 659} 660 661static int test_memcg_low(const char *root) 662{ 663 return test_memcg_protection(root, false); 664} 665 666static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 667{ 668 size_t size = MB(50); 669 int ret = -1; 670 long current, high, max; 671 int fd; 672 673 high = cg_read_long(cgroup, "memory.high"); 674 max = cg_read_long(cgroup, "memory.max"); 675 if (high != MB(30) && max != MB(30)) 676 return -1; 677 678 fd = get_temp_fd(); 679 if (fd < 0) 680 return -1; 681 682 if (alloc_pagecache(fd, size)) 683 goto cleanup; 684 685 current = cg_read_long(cgroup, "memory.current"); 686 if (!values_close(current, MB(30), 5)) 687 goto cleanup; 688 689 ret = 0; 690 691cleanup: 692 close(fd); 693 return ret; 694 695} 696 697/* 698 * This test checks that memory.high limits the amount of 699 * memory which can be consumed by either anonymous memory 700 * or pagecache. 701 */ 702static int test_memcg_high(const char *root) 703{ 704 int ret = KSFT_FAIL; 705 char *memcg; 706 long high; 707 708 memcg = cg_name(root, "memcg_test"); 709 if (!memcg) 710 goto cleanup; 711 712 if (cg_create(memcg)) 713 goto cleanup; 714 715 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 716 goto cleanup; 717 718 if (cg_write(memcg, "memory.swap.max", "0")) 719 goto cleanup; 720 721 if (cg_write(memcg, "memory.high", "30M")) 722 goto cleanup; 723 724 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 725 goto cleanup; 726 727 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 728 goto cleanup; 729 730 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 731 goto cleanup; 732 733 high = cg_read_key_long(memcg, "memory.events", "high "); 734 if (high <= 0) 735 goto cleanup; 736 737 ret = KSFT_PASS; 738 739cleanup: 740 cg_destroy(memcg); 741 free(memcg); 742 743 return ret; 744} 745 746static int alloc_anon_mlock(const char *cgroup, void *arg) 747{ 748 size_t size = (size_t)arg; 749 void *buf; 750 751 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 752 0, 0); 753 if (buf == MAP_FAILED) 754 return -1; 755 756 mlock(buf, size); 757 munmap(buf, size); 758 return 0; 759} 760 761/* 762 * This test checks that memory.high is able to throttle big single shot 763 * allocation i.e. large allocation within one kernel entry. 764 */ 765static int test_memcg_high_sync(const char *root) 766{ 767 int ret = KSFT_FAIL, pid, fd = -1; 768 char *memcg; 769 long pre_high, pre_max; 770 long post_high, post_max; 771 772 memcg = cg_name(root, "memcg_test"); 773 if (!memcg) 774 goto cleanup; 775 776 if (cg_create(memcg)) 777 goto cleanup; 778 779 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 780 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 781 if (pre_high < 0 || pre_max < 0) 782 goto cleanup; 783 784 if (cg_write(memcg, "memory.swap.max", "0")) 785 goto cleanup; 786 787 if (cg_write(memcg, "memory.high", "30M")) 788 goto cleanup; 789 790 if (cg_write(memcg, "memory.max", "140M")) 791 goto cleanup; 792 793 fd = memcg_prepare_for_wait(memcg); 794 if (fd < 0) 795 goto cleanup; 796 797 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 798 if (pid < 0) 799 goto cleanup; 800 801 cg_wait_for(fd); 802 803 post_high = cg_read_key_long(memcg, "memory.events", "high "); 804 post_max = cg_read_key_long(memcg, "memory.events", "max "); 805 if (post_high < 0 || post_max < 0) 806 goto cleanup; 807 808 if (pre_high == post_high || pre_max != post_max) 809 goto cleanup; 810 811 ret = KSFT_PASS; 812 813cleanup: 814 if (fd >= 0) 815 close(fd); 816 cg_destroy(memcg); 817 free(memcg); 818 819 return ret; 820} 821 822/* 823 * This test checks that memory.max limits the amount of 824 * memory which can be consumed by either anonymous memory 825 * or pagecache. 826 */ 827static int test_memcg_max(const char *root) 828{ 829 int ret = KSFT_FAIL; 830 char *memcg; 831 long current, max; 832 833 memcg = cg_name(root, "memcg_test"); 834 if (!memcg) 835 goto cleanup; 836 837 if (cg_create(memcg)) 838 goto cleanup; 839 840 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 841 goto cleanup; 842 843 if (cg_write(memcg, "memory.swap.max", "0")) 844 goto cleanup; 845 846 if (cg_write(memcg, "memory.max", "30M")) 847 goto cleanup; 848 849 /* Should be killed by OOM killer */ 850 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 851 goto cleanup; 852 853 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 854 goto cleanup; 855 856 current = cg_read_long(memcg, "memory.current"); 857 if (current > MB(30) || !current) 858 goto cleanup; 859 860 max = cg_read_key_long(memcg, "memory.events", "max "); 861 if (max <= 0) 862 goto cleanup; 863 864 ret = KSFT_PASS; 865 866cleanup: 867 cg_destroy(memcg); 868 free(memcg); 869 870 return ret; 871} 872 873/* 874 * Reclaim from @memcg until usage reaches @goal by writing to 875 * memory.reclaim. 876 * 877 * This function will return false if the usage is already below the 878 * goal. 879 * 880 * This function assumes that writing to memory.reclaim is the only 881 * source of change in memory.current (no concurrent allocations or 882 * reclaim). 883 * 884 * This function makes sure memory.reclaim is sane. It will return 885 * false if memory.reclaim's error codes do not make sense, even if 886 * the usage goal was satisfied. 887 */ 888static bool reclaim_until(const char *memcg, long goal) 889{ 890 char buf[64]; 891 int retries, err; 892 long current, to_reclaim; 893 bool reclaimed = false; 894 895 for (retries = 5; retries > 0; retries--) { 896 current = cg_read_long(memcg, "memory.current"); 897 898 if (current < goal || values_close(current, goal, 3)) 899 break; 900 /* Did memory.reclaim return 0 incorrectly? */ 901 else if (reclaimed) 902 return false; 903 904 to_reclaim = current - goal; 905 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 906 err = cg_write(memcg, "memory.reclaim", buf); 907 if (!err) 908 reclaimed = true; 909 else if (err != -EAGAIN) 910 return false; 911 } 912 return reclaimed; 913} 914 915/* 916 * This test checks that memory.reclaim reclaims the given 917 * amount of memory (from both anon and file, if possible). 918 */ 919static int test_memcg_reclaim(const char *root) 920{ 921 int ret = KSFT_FAIL; 922 int fd = -1; 923 int retries; 924 char *memcg; 925 long current, expected_usage; 926 927 memcg = cg_name(root, "memcg_test"); 928 if (!memcg) 929 goto cleanup; 930 931 if (cg_create(memcg)) 932 goto cleanup; 933 934 current = cg_read_long(memcg, "memory.current"); 935 if (current != 0) 936 goto cleanup; 937 938 fd = get_temp_fd(); 939 if (fd < 0) 940 goto cleanup; 941 942 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 943 944 /* 945 * If swap is enabled, try to reclaim from both anon and file, else try 946 * to reclaim from file only. 947 */ 948 if (is_swap_enabled()) { 949 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 950 expected_usage = MB(100); 951 } else 952 expected_usage = MB(50); 953 954 /* 955 * Wait until current usage reaches the expected usage (or we run out of 956 * retries). 957 */ 958 retries = 5; 959 while (!values_close(cg_read_long(memcg, "memory.current"), 960 expected_usage, 10)) { 961 if (retries--) { 962 sleep(1); 963 continue; 964 } else { 965 fprintf(stderr, 966 "failed to allocate %ld for memcg reclaim test\n", 967 expected_usage); 968 goto cleanup; 969 } 970 } 971 972 /* 973 * Reclaim until current reaches 30M, this makes sure we hit both anon 974 * and file if swap is enabled. 975 */ 976 if (!reclaim_until(memcg, MB(30))) 977 goto cleanup; 978 979 ret = KSFT_PASS; 980cleanup: 981 cg_destroy(memcg); 982 free(memcg); 983 close(fd); 984 985 return ret; 986} 987 988static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 989{ 990 long mem_max = (long)arg; 991 size_t size = MB(50); 992 char *buf, *ptr; 993 long mem_current, swap_current; 994 int ret = -1; 995 996 buf = malloc(size); 997 if (buf == NULL) { 998 fprintf(stderr, "malloc() failed\n"); 999 return -1; 1000 } 1001 1002 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 1003 *ptr = 0; 1004 1005 mem_current = cg_read_long(cgroup, "memory.current"); 1006 if (!mem_current || !values_close(mem_current, mem_max, 3)) 1007 goto cleanup; 1008 1009 swap_current = cg_read_long(cgroup, "memory.swap.current"); 1010 if (!swap_current || 1011 !values_close(mem_current + swap_current, size, 3)) 1012 goto cleanup; 1013 1014 ret = 0; 1015cleanup: 1016 free(buf); 1017 return ret; 1018} 1019 1020/* 1021 * This test checks that memory.swap.max limits the amount of 1022 * anonymous memory which can be swapped out. Additionally, it verifies that 1023 * memory.swap.peak reflects the high watermark and can be reset. 1024 */ 1025static int test_memcg_swap_max_peak(const char *root) 1026{ 1027 int ret = KSFT_FAIL; 1028 char *memcg; 1029 long max, peak; 1030 struct stat ss; 1031 int swap_peak_fd = -1, mem_peak_fd = -1; 1032 1033 /* any non-empty string resets */ 1034 static const char reset_string[] = "foobarbaz"; 1035 1036 if (!is_swap_enabled()) 1037 return KSFT_SKIP; 1038 1039 memcg = cg_name(root, "memcg_test"); 1040 if (!memcg) 1041 goto cleanup; 1042 1043 if (cg_create(memcg)) 1044 goto cleanup; 1045 1046 if (cg_read_long(memcg, "memory.swap.current")) { 1047 ret = KSFT_SKIP; 1048 goto cleanup; 1049 } 1050 1051 swap_peak_fd = cg_open(memcg, "memory.swap.peak", 1052 O_RDWR | O_APPEND | O_CLOEXEC); 1053 1054 if (swap_peak_fd == -1) { 1055 if (errno == ENOENT) 1056 ret = KSFT_SKIP; 1057 goto cleanup; 1058 } 1059 1060 /* 1061 * Before we try to use memory.swap.peak's fd, try to figure out 1062 * whether this kernel supports writing to that file in the first 1063 * place. (by checking the writable bit on the file's st_mode) 1064 */ 1065 if (fstat(swap_peak_fd, &ss)) 1066 goto cleanup; 1067 1068 if ((ss.st_mode & S_IWUSR) == 0) { 1069 ret = KSFT_SKIP; 1070 goto cleanup; 1071 } 1072 1073 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 1074 1075 if (mem_peak_fd == -1) 1076 goto cleanup; 1077 1078 if (cg_read_long(memcg, "memory.swap.peak")) 1079 goto cleanup; 1080 1081 if (cg_read_long_fd(swap_peak_fd)) 1082 goto cleanup; 1083 1084 /* switch the swap and mem fds into local-peak tracking mode*/ 1085 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1086 1087 if (peak_reset != sizeof(reset_string)) 1088 goto cleanup; 1089 1090 if (cg_read_long_fd(swap_peak_fd)) 1091 goto cleanup; 1092 1093 if (cg_read_long(memcg, "memory.peak")) 1094 goto cleanup; 1095 1096 if (cg_read_long_fd(mem_peak_fd)) 1097 goto cleanup; 1098 1099 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1100 if (peak_reset != sizeof(reset_string)) 1101 goto cleanup; 1102 1103 if (cg_read_long_fd(mem_peak_fd)) 1104 goto cleanup; 1105 1106 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 1107 goto cleanup; 1108 1109 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 1110 goto cleanup; 1111 1112 if (cg_write(memcg, "memory.swap.max", "30M")) 1113 goto cleanup; 1114 1115 if (cg_write(memcg, "memory.max", "30M")) 1116 goto cleanup; 1117 1118 /* Should be killed by OOM killer */ 1119 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1120 goto cleanup; 1121 1122 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1123 goto cleanup; 1124 1125 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1126 goto cleanup; 1127 1128 peak = cg_read_long(memcg, "memory.peak"); 1129 if (peak < MB(29)) 1130 goto cleanup; 1131 1132 peak = cg_read_long(memcg, "memory.swap.peak"); 1133 if (peak < MB(29)) 1134 goto cleanup; 1135 1136 peak = cg_read_long_fd(mem_peak_fd); 1137 if (peak < MB(29)) 1138 goto cleanup; 1139 1140 peak = cg_read_long_fd(swap_peak_fd); 1141 if (peak < MB(29)) 1142 goto cleanup; 1143 1144 /* 1145 * open, reset and close the peak swap on another FD to make sure 1146 * multiple extant fds don't corrupt the linked-list 1147 */ 1148 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); 1149 if (peak_reset) 1150 goto cleanup; 1151 1152 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); 1153 if (peak_reset) 1154 goto cleanup; 1155 1156 /* actually reset on the fds */ 1157 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1158 if (peak_reset != sizeof(reset_string)) 1159 goto cleanup; 1160 1161 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1162 if (peak_reset != sizeof(reset_string)) 1163 goto cleanup; 1164 1165 peak = cg_read_long_fd(swap_peak_fd); 1166 if (peak > MB(10)) 1167 goto cleanup; 1168 1169 /* 1170 * The cgroup is now empty, but there may be a page or two associated 1171 * with the open FD accounted to it. 1172 */ 1173 peak = cg_read_long_fd(mem_peak_fd); 1174 if (peak > MB(1)) 1175 goto cleanup; 1176 1177 if (cg_read_long(memcg, "memory.peak") < MB(29)) 1178 goto cleanup; 1179 1180 if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) 1181 goto cleanup; 1182 1183 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 1184 goto cleanup; 1185 1186 max = cg_read_key_long(memcg, "memory.events", "max "); 1187 if (max <= 0) 1188 goto cleanup; 1189 1190 peak = cg_read_long(memcg, "memory.peak"); 1191 if (peak < MB(29)) 1192 goto cleanup; 1193 1194 peak = cg_read_long(memcg, "memory.swap.peak"); 1195 if (peak < MB(29)) 1196 goto cleanup; 1197 1198 peak = cg_read_long_fd(mem_peak_fd); 1199 if (peak < MB(29)) 1200 goto cleanup; 1201 1202 peak = cg_read_long_fd(swap_peak_fd); 1203 if (peak < MB(19)) 1204 goto cleanup; 1205 1206 ret = KSFT_PASS; 1207 1208cleanup: 1209 if (mem_peak_fd != -1 && close(mem_peak_fd)) 1210 ret = KSFT_FAIL; 1211 if (swap_peak_fd != -1 && close(swap_peak_fd)) 1212 ret = KSFT_FAIL; 1213 cg_destroy(memcg); 1214 free(memcg); 1215 1216 return ret; 1217} 1218 1219/* 1220 * This test disables swapping and tries to allocate anonymous memory 1221 * up to OOM. Then it checks for oom and oom_kill events in 1222 * memory.events. 1223 */ 1224static int test_memcg_oom_events(const char *root) 1225{ 1226 int ret = KSFT_FAIL; 1227 char *memcg; 1228 1229 memcg = cg_name(root, "memcg_test"); 1230 if (!memcg) 1231 goto cleanup; 1232 1233 if (cg_create(memcg)) 1234 goto cleanup; 1235 1236 if (cg_write(memcg, "memory.max", "30M")) 1237 goto cleanup; 1238 1239 if (cg_write(memcg, "memory.swap.max", "0")) 1240 goto cleanup; 1241 1242 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1243 goto cleanup; 1244 1245 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 1246 goto cleanup; 1247 1248 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1249 goto cleanup; 1250 1251 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1252 goto cleanup; 1253 1254 ret = KSFT_PASS; 1255 1256cleanup: 1257 cg_destroy(memcg); 1258 free(memcg); 1259 1260 return ret; 1261} 1262 1263struct tcp_server_args { 1264 unsigned short port; 1265 int ctl[2]; 1266}; 1267 1268static int tcp_server(const char *cgroup, void *arg) 1269{ 1270 struct tcp_server_args *srv_args = arg; 1271 struct sockaddr_in6 saddr = { 0 }; 1272 socklen_t slen = sizeof(saddr); 1273 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1274 1275 close(srv_args->ctl[0]); 1276 ctl_fd = srv_args->ctl[1]; 1277 1278 saddr.sin6_family = AF_INET6; 1279 saddr.sin6_addr = in6addr_any; 1280 saddr.sin6_port = htons(srv_args->port); 1281 1282 sk = socket(AF_INET6, SOCK_STREAM, 0); 1283 if (sk < 0) 1284 return ret; 1285 1286 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1287 goto cleanup; 1288 1289 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1290 write(ctl_fd, &errno, sizeof(errno)); 1291 goto cleanup; 1292 } 1293 1294 if (listen(sk, 1)) 1295 goto cleanup; 1296 1297 ret = 0; 1298 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1299 ret = -1; 1300 goto cleanup; 1301 } 1302 1303 client_sk = accept(sk, NULL, NULL); 1304 if (client_sk < 0) 1305 goto cleanup; 1306 1307 ret = -1; 1308 for (;;) { 1309 uint8_t buf[0x100000]; 1310 1311 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1312 if (errno == ECONNRESET) 1313 ret = 0; 1314 break; 1315 } 1316 } 1317 1318 close(client_sk); 1319 1320cleanup: 1321 close(sk); 1322 return ret; 1323} 1324 1325static int tcp_client(const char *cgroup, unsigned short port) 1326{ 1327 const char server[] = "localhost"; 1328 struct addrinfo *ai; 1329 char servport[6]; 1330 int retries = 0x10; /* nice round number */ 1331 int sk, ret; 1332 long allocated; 1333 1334 allocated = cg_read_long(cgroup, "memory.current"); 1335 snprintf(servport, sizeof(servport), "%hd", port); 1336 ret = getaddrinfo(server, servport, NULL, &ai); 1337 if (ret) 1338 return ret; 1339 1340 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1341 if (sk < 0) 1342 goto free_ainfo; 1343 1344 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1345 if (ret < 0) 1346 goto close_sk; 1347 1348 ret = KSFT_FAIL; 1349 while (retries--) { 1350 uint8_t buf[0x100000]; 1351 long current, sock; 1352 1353 if (read(sk, buf, sizeof(buf)) <= 0) 1354 goto close_sk; 1355 1356 current = cg_read_long(cgroup, "memory.current"); 1357 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1358 1359 if (current < 0 || sock < 0) 1360 goto close_sk; 1361 1362 /* exclude the memory not related to socket connection */ 1363 if (values_close(current - allocated, sock, 10)) { 1364 ret = KSFT_PASS; 1365 break; 1366 } 1367 } 1368 1369close_sk: 1370 close(sk); 1371free_ainfo: 1372 freeaddrinfo(ai); 1373 return ret; 1374} 1375 1376/* 1377 * This test checks socket memory accounting. 1378 * The test forks a TCP server listens on a random port between 1000 1379 * and 61000. Once it gets a client connection, it starts writing to 1380 * its socket. 1381 * The TCP client interleaves reads from the socket with check whether 1382 * memory.current and memory.stat.sock are similar. 1383 */ 1384static int test_memcg_sock(const char *root) 1385{ 1386 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1387 unsigned short port; 1388 char *memcg; 1389 long sock_post = -1; 1390 1391 memcg = cg_name(root, "memcg_test"); 1392 if (!memcg) 1393 goto cleanup; 1394 1395 if (cg_create(memcg)) 1396 goto cleanup; 1397 1398 while (bind_retries--) { 1399 struct tcp_server_args args; 1400 1401 if (pipe(args.ctl)) 1402 goto cleanup; 1403 1404 port = args.port = 1000 + rand() % 60000; 1405 1406 pid = cg_run_nowait(memcg, tcp_server, &args); 1407 if (pid < 0) 1408 goto cleanup; 1409 1410 close(args.ctl[1]); 1411 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1412 goto cleanup; 1413 close(args.ctl[0]); 1414 1415 if (!err) 1416 break; 1417 if (err != EADDRINUSE) 1418 goto cleanup; 1419 1420 waitpid(pid, NULL, 0); 1421 } 1422 1423 if (err == EADDRINUSE) { 1424 ret = KSFT_SKIP; 1425 goto cleanup; 1426 } 1427 1428 if (tcp_client(memcg, port) != KSFT_PASS) 1429 goto cleanup; 1430 1431 waitpid(pid, &err, 0); 1432 if (WEXITSTATUS(err)) 1433 goto cleanup; 1434 1435 if (cg_read_long(memcg, "memory.current") < 0) 1436 goto cleanup; 1437 1438 /* 1439 * memory.stat is updated asynchronously via the memcg rstat 1440 * flushing worker, which runs periodically (every 2 seconds, 1441 * see FLUSH_TIME). On a busy system, the "sock " counter may 1442 * stay non-zero for a short period of time after the TCP 1443 * connection is closed and all socket memory has been 1444 * uncharged. 1445 * 1446 * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some 1447 * scheduling slack) and require that the "sock " counter 1448 * eventually drops to zero. 1449 */ 1450 sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0, 1451 MEMCG_SOCKSTAT_WAIT_RETRIES, 1452 DEFAULT_WAIT_INTERVAL_US); 1453 if (sock_post) 1454 goto cleanup; 1455 1456 ret = KSFT_PASS; 1457 1458cleanup: 1459 cg_destroy(memcg); 1460 free(memcg); 1461 1462 return ret; 1463} 1464 1465/* 1466 * This test disables swapping and tries to allocate anonymous memory 1467 * up to OOM with memory.group.oom set. Then it checks that all 1468 * processes in the leaf were killed. It also checks that oom_events 1469 * were propagated to the parent level. 1470 */ 1471static int test_memcg_oom_group_leaf_events(const char *root) 1472{ 1473 int ret = KSFT_FAIL; 1474 char *parent, *child; 1475 long parent_oom_events; 1476 1477 parent = cg_name(root, "memcg_test_0"); 1478 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1479 1480 if (!parent || !child) 1481 goto cleanup; 1482 1483 if (cg_create(parent)) 1484 goto cleanup; 1485 1486 if (cg_create(child)) 1487 goto cleanup; 1488 1489 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1490 goto cleanup; 1491 1492 if (cg_write(child, "memory.max", "50M")) 1493 goto cleanup; 1494 1495 if (cg_write(child, "memory.swap.max", "0")) 1496 goto cleanup; 1497 1498 if (cg_write(child, "memory.oom.group", "1")) 1499 goto cleanup; 1500 1501 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1502 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1503 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1504 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1505 goto cleanup; 1506 1507 if (cg_test_proc_killed(child)) 1508 goto cleanup; 1509 1510 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1511 goto cleanup; 1512 1513 parent_oom_events = cg_read_key_long( 1514 parent, "memory.events", "oom_kill "); 1515 /* 1516 * If memory_localevents is not enabled (the default), the parent should 1517 * count OOM events in its children groups. Otherwise, it should not 1518 * have observed any events. 1519 */ 1520 if (has_localevents && parent_oom_events != 0) 1521 goto cleanup; 1522 else if (!has_localevents && parent_oom_events <= 0) 1523 goto cleanup; 1524 1525 ret = KSFT_PASS; 1526 1527cleanup: 1528 if (child) 1529 cg_destroy(child); 1530 if (parent) 1531 cg_destroy(parent); 1532 free(child); 1533 free(parent); 1534 1535 return ret; 1536} 1537 1538/* 1539 * This test disables swapping and tries to allocate anonymous memory 1540 * up to OOM with memory.group.oom set. Then it checks that all 1541 * processes in the parent and leaf were killed. 1542 */ 1543static int test_memcg_oom_group_parent_events(const char *root) 1544{ 1545 int ret = KSFT_FAIL; 1546 char *parent, *child; 1547 1548 parent = cg_name(root, "memcg_test_0"); 1549 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1550 1551 if (!parent || !child) 1552 goto cleanup; 1553 1554 if (cg_create(parent)) 1555 goto cleanup; 1556 1557 if (cg_create(child)) 1558 goto cleanup; 1559 1560 if (cg_write(parent, "memory.max", "80M")) 1561 goto cleanup; 1562 1563 if (cg_write(parent, "memory.swap.max", "0")) 1564 goto cleanup; 1565 1566 if (cg_write(parent, "memory.oom.group", "1")) 1567 goto cleanup; 1568 1569 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1570 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1571 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1572 1573 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1574 goto cleanup; 1575 1576 if (cg_test_proc_killed(child)) 1577 goto cleanup; 1578 if (cg_test_proc_killed(parent)) 1579 goto cleanup; 1580 1581 ret = KSFT_PASS; 1582 1583cleanup: 1584 if (child) 1585 cg_destroy(child); 1586 if (parent) 1587 cg_destroy(parent); 1588 free(child); 1589 free(parent); 1590 1591 return ret; 1592} 1593 1594/* 1595 * This test disables swapping and tries to allocate anonymous memory 1596 * up to OOM with memory.group.oom set. Then it checks that all 1597 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1598 */ 1599static int test_memcg_oom_group_score_events(const char *root) 1600{ 1601 int ret = KSFT_FAIL; 1602 char *memcg; 1603 int safe_pid; 1604 1605 memcg = cg_name(root, "memcg_test_0"); 1606 1607 if (!memcg) 1608 goto cleanup; 1609 1610 if (cg_create(memcg)) 1611 goto cleanup; 1612 1613 if (cg_write(memcg, "memory.max", "50M")) 1614 goto cleanup; 1615 1616 if (cg_write(memcg, "memory.swap.max", "0")) 1617 goto cleanup; 1618 1619 if (cg_write(memcg, "memory.oom.group", "1")) 1620 goto cleanup; 1621 1622 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1623 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1624 goto cleanup; 1625 1626 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1627 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1628 goto cleanup; 1629 1630 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1631 goto cleanup; 1632 1633 if (kill(safe_pid, SIGKILL)) 1634 goto cleanup; 1635 1636 ret = KSFT_PASS; 1637 1638cleanup: 1639 if (memcg) 1640 cg_destroy(memcg); 1641 free(memcg); 1642 1643 return ret; 1644} 1645 1646#define T(x) { x, #x } 1647struct memcg_test { 1648 int (*fn)(const char *root); 1649 const char *name; 1650} tests[] = { 1651 T(test_memcg_subtree_control), 1652 T(test_memcg_current_peak), 1653 T(test_memcg_min), 1654 T(test_memcg_low), 1655 T(test_memcg_high), 1656 T(test_memcg_high_sync), 1657 T(test_memcg_max), 1658 T(test_memcg_reclaim), 1659 T(test_memcg_oom_events), 1660 T(test_memcg_swap_max_peak), 1661 T(test_memcg_sock), 1662 T(test_memcg_oom_group_leaf_events), 1663 T(test_memcg_oom_group_parent_events), 1664 T(test_memcg_oom_group_score_events), 1665}; 1666#undef T 1667 1668int main(int argc, char **argv) 1669{ 1670 char root[PATH_MAX]; 1671 int i, proc_status; 1672 1673 ksft_print_header(); 1674 ksft_set_plan(ARRAY_SIZE(tests)); 1675 if (cg_find_unified_root(root, sizeof(root), NULL)) 1676 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1677 1678 /* 1679 * Check that memory controller is available: 1680 * memory is listed in cgroup.controllers 1681 */ 1682 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1683 ksft_exit_skip("memory controller isn't available\n"); 1684 1685 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1686 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1687 ksft_exit_skip("Failed to set memory controller\n"); 1688 1689 proc_status = proc_mount_contains("memory_recursiveprot"); 1690 if (proc_status < 0) 1691 ksft_exit_skip("Failed to query cgroup mount option\n"); 1692 has_recursiveprot = proc_status; 1693 1694 proc_status = proc_mount_contains("memory_localevents"); 1695 if (proc_status < 0) 1696 ksft_exit_skip("Failed to query cgroup mount option\n"); 1697 has_localevents = proc_status; 1698 1699 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1700 switch (tests[i].fn(root)) { 1701 case KSFT_PASS: 1702 ksft_test_result_pass("%s\n", tests[i].name); 1703 break; 1704 case KSFT_SKIP: 1705 ksft_test_result_skip("%s\n", tests[i].name); 1706 break; 1707 default: 1708 ksft_test_result_fail("%s\n", tests[i].name); 1709 break; 1710 } 1711 } 1712 1713 ksft_finished(); 1714}