Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <linux/oom.h>
6#include <fcntl.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <unistd.h>
13#include <sys/socket.h>
14#include <sys/wait.h>
15#include <arpa/inet.h>
16#include <netinet/in.h>
17#include <netdb.h>
18#include <errno.h>
19#include <sys/mman.h>
20
21#include "kselftest.h"
22#include "cgroup_util.h"
23
24#define MEMCG_SOCKSTAT_WAIT_RETRIES 30
25
26static bool has_localevents;
27static bool has_recursiveprot;
28
29int get_temp_fd(void)
30{
31 return open(".", O_TMPFILE | O_RDWR | O_EXCL);
32}
33
34int alloc_pagecache(int fd, size_t size)
35{
36 char buf[PAGE_SIZE];
37 struct stat st;
38 int i;
39
40 if (fstat(fd, &st))
41 goto cleanup;
42
43 size += st.st_size;
44
45 if (ftruncate(fd, size))
46 goto cleanup;
47
48 for (i = 0; i < size; i += sizeof(buf))
49 read(fd, buf, sizeof(buf));
50
51 return 0;
52
53cleanup:
54 return -1;
55}
56
57int alloc_anon(const char *cgroup, void *arg)
58{
59 size_t size = (unsigned long)arg;
60 char *buf, *ptr;
61
62 buf = malloc(size);
63 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
64 *ptr = 0;
65
66 free(buf);
67 return 0;
68}
69
70int is_swap_enabled(void)
71{
72 char buf[PAGE_SIZE];
73 const char delim[] = "\n";
74 int cnt = 0;
75 char *line;
76
77 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
78 return -1;
79
80 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
81 cnt++;
82
83 return cnt > 1;
84}
85
86int set_oom_adj_score(int pid, int score)
87{
88 char path[PATH_MAX];
89 int fd, len;
90
91 sprintf(path, "/proc/%d/oom_score_adj", pid);
92
93 fd = open(path, O_WRONLY | O_APPEND);
94 if (fd < 0)
95 return fd;
96
97 len = dprintf(fd, "%d", score);
98 if (len < 0) {
99 close(fd);
100 return len;
101 }
102
103 close(fd);
104 return 0;
105}
106
107/*
108 * This test creates two nested cgroups with and without enabling
109 * the memory controller.
110 */
111static int test_memcg_subtree_control(const char *root)
112{
113 char *parent, *child, *parent2 = NULL, *child2 = NULL;
114 int ret = KSFT_FAIL;
115 char buf[PAGE_SIZE];
116
117 /* Create two nested cgroups with the memory controller enabled */
118 parent = cg_name(root, "memcg_test_0");
119 child = cg_name(root, "memcg_test_0/memcg_test_1");
120 if (!parent || !child)
121 goto cleanup_free;
122
123 if (cg_create(parent))
124 goto cleanup_free;
125
126 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
127 goto cleanup_parent;
128
129 if (cg_create(child))
130 goto cleanup_parent;
131
132 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
133 goto cleanup_child;
134
135 /* Create two nested cgroups without enabling memory controller */
136 parent2 = cg_name(root, "memcg_test_1");
137 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
138 if (!parent2 || !child2)
139 goto cleanup_free2;
140
141 if (cg_create(parent2))
142 goto cleanup_free2;
143
144 if (cg_create(child2))
145 goto cleanup_parent2;
146
147 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
148 goto cleanup_all;
149
150 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
151 goto cleanup_all;
152
153 ret = KSFT_PASS;
154
155cleanup_all:
156 cg_destroy(child2);
157cleanup_parent2:
158 cg_destroy(parent2);
159cleanup_free2:
160 free(parent2);
161 free(child2);
162cleanup_child:
163 cg_destroy(child);
164cleanup_parent:
165 cg_destroy(parent);
166cleanup_free:
167 free(parent);
168 free(child);
169
170 return ret;
171}
172
173static int alloc_anon_50M_check(const char *cgroup, void *arg)
174{
175 size_t size = MB(50);
176 char *buf, *ptr;
177 long anon, current;
178 int ret = -1;
179
180 buf = malloc(size);
181 if (buf == NULL) {
182 fprintf(stderr, "malloc() failed\n");
183 return -1;
184 }
185
186 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
187 *ptr = 0;
188
189 current = cg_read_long(cgroup, "memory.current");
190 if (current < size)
191 goto cleanup;
192
193 if (!values_close(size, current, 3))
194 goto cleanup;
195
196 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
197 if (anon < 0)
198 goto cleanup;
199
200 if (!values_close(anon, current, 3))
201 goto cleanup;
202
203 ret = 0;
204cleanup:
205 free(buf);
206 return ret;
207}
208
209static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
210{
211 size_t size = MB(50);
212 int ret = -1;
213 long current, file;
214 int fd;
215
216 fd = get_temp_fd();
217 if (fd < 0)
218 return -1;
219
220 if (alloc_pagecache(fd, size))
221 goto cleanup;
222
223 current = cg_read_long(cgroup, "memory.current");
224 if (current < size)
225 goto cleanup;
226
227 file = cg_read_key_long(cgroup, "memory.stat", "file ");
228 if (file < 0)
229 goto cleanup;
230
231 if (!values_close(file, current, 10))
232 goto cleanup;
233
234 ret = 0;
235
236cleanup:
237 close(fd);
238 return ret;
239}
240
241/*
242 * This test create a memory cgroup, allocates
243 * some anonymous memory and some pagecache
244 * and checks memory.current, memory.peak, and some memory.stat values.
245 */
246static int test_memcg_current_peak(const char *root)
247{
248 int ret = KSFT_FAIL;
249 long current, peak, peak_reset;
250 char *memcg;
251 bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
252 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
253 struct stat ss;
254
255 memcg = cg_name(root, "memcg_test");
256 if (!memcg)
257 goto cleanup;
258
259 if (cg_create(memcg))
260 goto cleanup;
261
262 current = cg_read_long(memcg, "memory.current");
263 if (current != 0)
264 goto cleanup;
265
266 peak = cg_read_long(memcg, "memory.peak");
267 if (peak != 0)
268 goto cleanup;
269
270 if (cg_run(memcg, alloc_anon_50M_check, NULL))
271 goto cleanup;
272
273 peak = cg_read_long(memcg, "memory.peak");
274 if (peak < MB(50))
275 goto cleanup;
276
277 /*
278 * We'll open a few FDs for the same memory.peak file to exercise the free-path
279 * We need at least three to be closed in a different order than writes occurred to test
280 * the linked-list handling.
281 */
282 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
283
284 if (peak_fd == -1) {
285 if (errno == ENOENT)
286 ret = KSFT_SKIP;
287 goto cleanup;
288 }
289
290 /*
291 * Before we try to use memory.peak's fd, try to figure out whether
292 * this kernel supports writing to that file in the first place. (by
293 * checking the writable bit on the file's st_mode)
294 */
295 if (fstat(peak_fd, &ss))
296 goto cleanup;
297
298 if ((ss.st_mode & S_IWUSR) == 0) {
299 ret = KSFT_SKIP;
300 goto cleanup;
301 }
302
303 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
304
305 if (peak_fd2 == -1)
306 goto cleanup;
307
308 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
309
310 if (peak_fd3 == -1)
311 goto cleanup;
312
313 /* any non-empty string resets, but make it clear */
314 static const char reset_string[] = "reset\n";
315
316 peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
317 if (peak_reset != sizeof(reset_string))
318 goto cleanup;
319
320 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
321 if (peak_reset != sizeof(reset_string))
322 goto cleanup;
323
324 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
325 if (peak_reset != sizeof(reset_string))
326 goto cleanup;
327
328 /* Make sure a completely independent read isn't affected by our FD-local reset above*/
329 peak = cg_read_long(memcg, "memory.peak");
330 if (peak < MB(50))
331 goto cleanup;
332
333 fd2_closed = true;
334 if (close(peak_fd2))
335 goto cleanup;
336
337 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
338
339 if (peak_fd4 == -1)
340 goto cleanup;
341
342 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
343 if (peak_reset != sizeof(reset_string))
344 goto cleanup;
345
346 peak = cg_read_long_fd(peak_fd);
347 if (peak > MB(30) || peak < 0)
348 goto cleanup;
349
350 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
351 goto cleanup;
352
353 peak = cg_read_long(memcg, "memory.peak");
354 if (peak < MB(50))
355 goto cleanup;
356
357 /* Make sure everything is back to normal */
358 peak = cg_read_long_fd(peak_fd);
359 if (peak < MB(50))
360 goto cleanup;
361
362 peak = cg_read_long_fd(peak_fd4);
363 if (peak < MB(50))
364 goto cleanup;
365
366 fd3_closed = true;
367 if (close(peak_fd3))
368 goto cleanup;
369
370 fd4_closed = true;
371 if (close(peak_fd4))
372 goto cleanup;
373
374 ret = KSFT_PASS;
375
376cleanup:
377 close(peak_fd);
378 if (!fd2_closed)
379 close(peak_fd2);
380 if (!fd3_closed)
381 close(peak_fd3);
382 if (!fd4_closed)
383 close(peak_fd4);
384 cg_destroy(memcg);
385 free(memcg);
386
387 return ret;
388}
389
390static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
391{
392 int fd = (long)arg;
393 int ppid = getppid();
394
395 if (alloc_pagecache(fd, MB(50)))
396 return -1;
397
398 while (getppid() == ppid)
399 sleep(1);
400
401 return 0;
402}
403
404static int alloc_anon_noexit(const char *cgroup, void *arg)
405{
406 int ppid = getppid();
407 size_t size = (unsigned long)arg;
408 char *buf, *ptr;
409
410 buf = malloc(size);
411 if (buf == NULL) {
412 fprintf(stderr, "malloc() failed\n");
413 return -1;
414 }
415
416 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
417 *ptr = 0;
418
419 while (getppid() == ppid)
420 sleep(1);
421
422 free(buf);
423 return 0;
424}
425
426/*
427 * Wait until processes are killed asynchronously by the OOM killer
428 * If we exceed a timeout, fail.
429 */
430static int cg_test_proc_killed(const char *cgroup)
431{
432 int limit;
433
434 for (limit = 10; limit > 0; limit--) {
435 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
436 return 0;
437
438 usleep(100000);
439 }
440 return -1;
441}
442
443static bool reclaim_until(const char *memcg, long goal);
444
445/*
446 * First, this test creates the following hierarchy:
447 * A memory.min = 0, memory.max = 200M
448 * A/B memory.min = 50M
449 * A/B/C memory.min = 75M, memory.current = 50M
450 * A/B/D memory.min = 25M, memory.current = 50M
451 * A/B/E memory.min = 0, memory.current = 50M
452 * A/B/F memory.min = 500M, memory.current = 0
453 *
454 * (or memory.low if we test soft protection)
455 *
456 * Usages are pagecache and the test keeps a running
457 * process in every leaf cgroup.
458 * Then it creates A/G and creates a significant
459 * memory pressure in A.
460 *
461 * Then it checks actual memory usages and expects that:
462 * A/B memory.current ~= 50M
463 * A/B/C memory.current ~= 29M [memory.events:low > 0]
464 * A/B/D memory.current ~= 21M [memory.events:low > 0]
465 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot,
466 * undefined otherwise]
467 * A/B/F memory.current = 0 [memory.events:low == 0]
468 * (for origin of the numbers, see model in memcg_protection.m.)
469 *
470 * After that it tries to allocate more than there is
471 * unprotected memory in A available, and checks that:
472 * a) memory.min protects pagecache even in this case,
473 * b) memory.low allows reclaiming page cache with low events.
474 *
475 * Then we try to reclaim from A/B/C using memory.reclaim until its
476 * usage reaches 10M.
477 * This makes sure that:
478 * (a) We ignore the protection of the reclaim target memcg.
479 * (b) The previously calculated emin value (~29M) should be dismissed.
480 */
481static int test_memcg_protection(const char *root, bool min)
482{
483 int ret = KSFT_FAIL, rc;
484 char *parent[3] = {NULL};
485 char *children[4] = {NULL};
486 const char *attribute = min ? "memory.min" : "memory.low";
487 long c[4];
488 long current;
489 int i, attempts;
490 int fd;
491
492 fd = get_temp_fd();
493 if (fd < 0)
494 goto cleanup;
495
496 parent[0] = cg_name(root, "memcg_test_0");
497 if (!parent[0])
498 goto cleanup;
499
500 parent[1] = cg_name(parent[0], "memcg_test_1");
501 if (!parent[1])
502 goto cleanup;
503
504 parent[2] = cg_name(parent[0], "memcg_test_2");
505 if (!parent[2])
506 goto cleanup;
507
508 if (cg_create(parent[0]))
509 goto cleanup;
510
511 if (cg_read_long(parent[0], attribute)) {
512 /* No memory.min on older kernels is fine */
513 if (min)
514 ret = KSFT_SKIP;
515 goto cleanup;
516 }
517
518 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
519 goto cleanup;
520
521 if (cg_write(parent[0], "memory.max", "200M"))
522 goto cleanup;
523
524 if (cg_write(parent[0], "memory.swap.max", "0"))
525 goto cleanup;
526
527 if (cg_create(parent[1]))
528 goto cleanup;
529
530 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
531 goto cleanup;
532
533 if (cg_create(parent[2]))
534 goto cleanup;
535
536 for (i = 0; i < ARRAY_SIZE(children); i++) {
537 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
538 if (!children[i])
539 goto cleanup;
540
541 if (cg_create(children[i]))
542 goto cleanup;
543
544 if (i > 2)
545 continue;
546
547 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
548 (void *)(long)fd);
549 }
550
551 if (cg_write(parent[1], attribute, "50M"))
552 goto cleanup;
553 if (cg_write(children[0], attribute, "75M"))
554 goto cleanup;
555 if (cg_write(children[1], attribute, "25M"))
556 goto cleanup;
557 if (cg_write(children[2], attribute, "0"))
558 goto cleanup;
559 if (cg_write(children[3], attribute, "500M"))
560 goto cleanup;
561
562 attempts = 0;
563 while (!values_close(cg_read_long(parent[1], "memory.current"),
564 MB(150), 3)) {
565 if (attempts++ > 5)
566 break;
567 sleep(1);
568 }
569
570 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
571 goto cleanup;
572
573 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
574 goto cleanup;
575
576 for (i = 0; i < ARRAY_SIZE(children); i++)
577 c[i] = cg_read_long(children[i], "memory.current");
578
579 if (!values_close(c[0], MB(29), 15))
580 goto cleanup;
581
582 if (!values_close(c[1], MB(21), 20))
583 goto cleanup;
584
585 if (c[3] != 0)
586 goto cleanup;
587
588 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
589 if (min && !rc)
590 goto cleanup;
591 else if (!min && rc) {
592 fprintf(stderr,
593 "memory.low prevents from allocating anon memory\n");
594 goto cleanup;
595 }
596
597 current = min ? MB(50) : MB(30);
598 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
599 goto cleanup;
600
601 if (!reclaim_until(children[0], MB(10)))
602 goto cleanup;
603
604 if (min) {
605 ret = KSFT_PASS;
606 goto cleanup;
607 }
608
609 /*
610 * Child 2 has memory.low=0, but some low protection may still be
611 * distributed down from its parent with memory.low=50M if cgroup2
612 * memory_recursiveprot mount option is enabled. Ignore the low
613 * event count in this case.
614 */
615 for (i = 0; i < ARRAY_SIZE(children); i++) {
616 int ignore_low_events_index = has_recursiveprot ? 2 : -1;
617 int no_low_events_index = 1;
618 long low, oom;
619
620 oom = cg_read_key_long(children[i], "memory.events", "oom ");
621 low = cg_read_key_long(children[i], "memory.events", "low ");
622
623 if (oom)
624 goto cleanup;
625 if (i == ignore_low_events_index)
626 continue;
627 if (i <= no_low_events_index && low <= 0)
628 goto cleanup;
629 if (i > no_low_events_index && low)
630 goto cleanup;
631
632 }
633
634 ret = KSFT_PASS;
635
636cleanup:
637 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
638 if (!children[i])
639 continue;
640
641 cg_destroy(children[i]);
642 free(children[i]);
643 }
644
645 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
646 if (!parent[i])
647 continue;
648
649 cg_destroy(parent[i]);
650 free(parent[i]);
651 }
652 close(fd);
653 return ret;
654}
655
656static int test_memcg_min(const char *root)
657{
658 return test_memcg_protection(root, true);
659}
660
661static int test_memcg_low(const char *root)
662{
663 return test_memcg_protection(root, false);
664}
665
666static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
667{
668 size_t size = MB(50);
669 int ret = -1;
670 long current, high, max;
671 int fd;
672
673 high = cg_read_long(cgroup, "memory.high");
674 max = cg_read_long(cgroup, "memory.max");
675 if (high != MB(30) && max != MB(30))
676 return -1;
677
678 fd = get_temp_fd();
679 if (fd < 0)
680 return -1;
681
682 if (alloc_pagecache(fd, size))
683 goto cleanup;
684
685 current = cg_read_long(cgroup, "memory.current");
686 if (!values_close(current, MB(30), 5))
687 goto cleanup;
688
689 ret = 0;
690
691cleanup:
692 close(fd);
693 return ret;
694
695}
696
697/*
698 * This test checks that memory.high limits the amount of
699 * memory which can be consumed by either anonymous memory
700 * or pagecache.
701 */
702static int test_memcg_high(const char *root)
703{
704 int ret = KSFT_FAIL;
705 char *memcg;
706 long high;
707
708 memcg = cg_name(root, "memcg_test");
709 if (!memcg)
710 goto cleanup;
711
712 if (cg_create(memcg))
713 goto cleanup;
714
715 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
716 goto cleanup;
717
718 if (cg_write(memcg, "memory.swap.max", "0"))
719 goto cleanup;
720
721 if (cg_write(memcg, "memory.high", "30M"))
722 goto cleanup;
723
724 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
725 goto cleanup;
726
727 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
728 goto cleanup;
729
730 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
731 goto cleanup;
732
733 high = cg_read_key_long(memcg, "memory.events", "high ");
734 if (high <= 0)
735 goto cleanup;
736
737 ret = KSFT_PASS;
738
739cleanup:
740 cg_destroy(memcg);
741 free(memcg);
742
743 return ret;
744}
745
746static int alloc_anon_mlock(const char *cgroup, void *arg)
747{
748 size_t size = (size_t)arg;
749 void *buf;
750
751 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
752 0, 0);
753 if (buf == MAP_FAILED)
754 return -1;
755
756 mlock(buf, size);
757 munmap(buf, size);
758 return 0;
759}
760
761/*
762 * This test checks that memory.high is able to throttle big single shot
763 * allocation i.e. large allocation within one kernel entry.
764 */
765static int test_memcg_high_sync(const char *root)
766{
767 int ret = KSFT_FAIL, pid, fd = -1;
768 char *memcg;
769 long pre_high, pre_max;
770 long post_high, post_max;
771
772 memcg = cg_name(root, "memcg_test");
773 if (!memcg)
774 goto cleanup;
775
776 if (cg_create(memcg))
777 goto cleanup;
778
779 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
780 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
781 if (pre_high < 0 || pre_max < 0)
782 goto cleanup;
783
784 if (cg_write(memcg, "memory.swap.max", "0"))
785 goto cleanup;
786
787 if (cg_write(memcg, "memory.high", "30M"))
788 goto cleanup;
789
790 if (cg_write(memcg, "memory.max", "140M"))
791 goto cleanup;
792
793 fd = memcg_prepare_for_wait(memcg);
794 if (fd < 0)
795 goto cleanup;
796
797 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
798 if (pid < 0)
799 goto cleanup;
800
801 cg_wait_for(fd);
802
803 post_high = cg_read_key_long(memcg, "memory.events", "high ");
804 post_max = cg_read_key_long(memcg, "memory.events", "max ");
805 if (post_high < 0 || post_max < 0)
806 goto cleanup;
807
808 if (pre_high == post_high || pre_max != post_max)
809 goto cleanup;
810
811 ret = KSFT_PASS;
812
813cleanup:
814 if (fd >= 0)
815 close(fd);
816 cg_destroy(memcg);
817 free(memcg);
818
819 return ret;
820}
821
822/*
823 * This test checks that memory.max limits the amount of
824 * memory which can be consumed by either anonymous memory
825 * or pagecache.
826 */
827static int test_memcg_max(const char *root)
828{
829 int ret = KSFT_FAIL;
830 char *memcg;
831 long current, max;
832
833 memcg = cg_name(root, "memcg_test");
834 if (!memcg)
835 goto cleanup;
836
837 if (cg_create(memcg))
838 goto cleanup;
839
840 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
841 goto cleanup;
842
843 if (cg_write(memcg, "memory.swap.max", "0"))
844 goto cleanup;
845
846 if (cg_write(memcg, "memory.max", "30M"))
847 goto cleanup;
848
849 /* Should be killed by OOM killer */
850 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
851 goto cleanup;
852
853 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
854 goto cleanup;
855
856 current = cg_read_long(memcg, "memory.current");
857 if (current > MB(30) || !current)
858 goto cleanup;
859
860 max = cg_read_key_long(memcg, "memory.events", "max ");
861 if (max <= 0)
862 goto cleanup;
863
864 ret = KSFT_PASS;
865
866cleanup:
867 cg_destroy(memcg);
868 free(memcg);
869
870 return ret;
871}
872
873/*
874 * Reclaim from @memcg until usage reaches @goal by writing to
875 * memory.reclaim.
876 *
877 * This function will return false if the usage is already below the
878 * goal.
879 *
880 * This function assumes that writing to memory.reclaim is the only
881 * source of change in memory.current (no concurrent allocations or
882 * reclaim).
883 *
884 * This function makes sure memory.reclaim is sane. It will return
885 * false if memory.reclaim's error codes do not make sense, even if
886 * the usage goal was satisfied.
887 */
888static bool reclaim_until(const char *memcg, long goal)
889{
890 char buf[64];
891 int retries, err;
892 long current, to_reclaim;
893 bool reclaimed = false;
894
895 for (retries = 5; retries > 0; retries--) {
896 current = cg_read_long(memcg, "memory.current");
897
898 if (current < goal || values_close(current, goal, 3))
899 break;
900 /* Did memory.reclaim return 0 incorrectly? */
901 else if (reclaimed)
902 return false;
903
904 to_reclaim = current - goal;
905 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
906 err = cg_write(memcg, "memory.reclaim", buf);
907 if (!err)
908 reclaimed = true;
909 else if (err != -EAGAIN)
910 return false;
911 }
912 return reclaimed;
913}
914
915/*
916 * This test checks that memory.reclaim reclaims the given
917 * amount of memory (from both anon and file, if possible).
918 */
919static int test_memcg_reclaim(const char *root)
920{
921 int ret = KSFT_FAIL;
922 int fd = -1;
923 int retries;
924 char *memcg;
925 long current, expected_usage;
926
927 memcg = cg_name(root, "memcg_test");
928 if (!memcg)
929 goto cleanup;
930
931 if (cg_create(memcg))
932 goto cleanup;
933
934 current = cg_read_long(memcg, "memory.current");
935 if (current != 0)
936 goto cleanup;
937
938 fd = get_temp_fd();
939 if (fd < 0)
940 goto cleanup;
941
942 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
943
944 /*
945 * If swap is enabled, try to reclaim from both anon and file, else try
946 * to reclaim from file only.
947 */
948 if (is_swap_enabled()) {
949 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
950 expected_usage = MB(100);
951 } else
952 expected_usage = MB(50);
953
954 /*
955 * Wait until current usage reaches the expected usage (or we run out of
956 * retries).
957 */
958 retries = 5;
959 while (!values_close(cg_read_long(memcg, "memory.current"),
960 expected_usage, 10)) {
961 if (retries--) {
962 sleep(1);
963 continue;
964 } else {
965 fprintf(stderr,
966 "failed to allocate %ld for memcg reclaim test\n",
967 expected_usage);
968 goto cleanup;
969 }
970 }
971
972 /*
973 * Reclaim until current reaches 30M, this makes sure we hit both anon
974 * and file if swap is enabled.
975 */
976 if (!reclaim_until(memcg, MB(30)))
977 goto cleanup;
978
979 ret = KSFT_PASS;
980cleanup:
981 cg_destroy(memcg);
982 free(memcg);
983 close(fd);
984
985 return ret;
986}
987
988static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
989{
990 long mem_max = (long)arg;
991 size_t size = MB(50);
992 char *buf, *ptr;
993 long mem_current, swap_current;
994 int ret = -1;
995
996 buf = malloc(size);
997 if (buf == NULL) {
998 fprintf(stderr, "malloc() failed\n");
999 return -1;
1000 }
1001
1002 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
1003 *ptr = 0;
1004
1005 mem_current = cg_read_long(cgroup, "memory.current");
1006 if (!mem_current || !values_close(mem_current, mem_max, 3))
1007 goto cleanup;
1008
1009 swap_current = cg_read_long(cgroup, "memory.swap.current");
1010 if (!swap_current ||
1011 !values_close(mem_current + swap_current, size, 3))
1012 goto cleanup;
1013
1014 ret = 0;
1015cleanup:
1016 free(buf);
1017 return ret;
1018}
1019
1020/*
1021 * This test checks that memory.swap.max limits the amount of
1022 * anonymous memory which can be swapped out. Additionally, it verifies that
1023 * memory.swap.peak reflects the high watermark and can be reset.
1024 */
1025static int test_memcg_swap_max_peak(const char *root)
1026{
1027 int ret = KSFT_FAIL;
1028 char *memcg;
1029 long max, peak;
1030 struct stat ss;
1031 int swap_peak_fd = -1, mem_peak_fd = -1;
1032
1033 /* any non-empty string resets */
1034 static const char reset_string[] = "foobarbaz";
1035
1036 if (!is_swap_enabled())
1037 return KSFT_SKIP;
1038
1039 memcg = cg_name(root, "memcg_test");
1040 if (!memcg)
1041 goto cleanup;
1042
1043 if (cg_create(memcg))
1044 goto cleanup;
1045
1046 if (cg_read_long(memcg, "memory.swap.current")) {
1047 ret = KSFT_SKIP;
1048 goto cleanup;
1049 }
1050
1051 swap_peak_fd = cg_open(memcg, "memory.swap.peak",
1052 O_RDWR | O_APPEND | O_CLOEXEC);
1053
1054 if (swap_peak_fd == -1) {
1055 if (errno == ENOENT)
1056 ret = KSFT_SKIP;
1057 goto cleanup;
1058 }
1059
1060 /*
1061 * Before we try to use memory.swap.peak's fd, try to figure out
1062 * whether this kernel supports writing to that file in the first
1063 * place. (by checking the writable bit on the file's st_mode)
1064 */
1065 if (fstat(swap_peak_fd, &ss))
1066 goto cleanup;
1067
1068 if ((ss.st_mode & S_IWUSR) == 0) {
1069 ret = KSFT_SKIP;
1070 goto cleanup;
1071 }
1072
1073 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
1074
1075 if (mem_peak_fd == -1)
1076 goto cleanup;
1077
1078 if (cg_read_long(memcg, "memory.swap.peak"))
1079 goto cleanup;
1080
1081 if (cg_read_long_fd(swap_peak_fd))
1082 goto cleanup;
1083
1084 /* switch the swap and mem fds into local-peak tracking mode*/
1085 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
1086
1087 if (peak_reset != sizeof(reset_string))
1088 goto cleanup;
1089
1090 if (cg_read_long_fd(swap_peak_fd))
1091 goto cleanup;
1092
1093 if (cg_read_long(memcg, "memory.peak"))
1094 goto cleanup;
1095
1096 if (cg_read_long_fd(mem_peak_fd))
1097 goto cleanup;
1098
1099 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1100 if (peak_reset != sizeof(reset_string))
1101 goto cleanup;
1102
1103 if (cg_read_long_fd(mem_peak_fd))
1104 goto cleanup;
1105
1106 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
1107 goto cleanup;
1108
1109 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
1110 goto cleanup;
1111
1112 if (cg_write(memcg, "memory.swap.max", "30M"))
1113 goto cleanup;
1114
1115 if (cg_write(memcg, "memory.max", "30M"))
1116 goto cleanup;
1117
1118 /* Should be killed by OOM killer */
1119 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1120 goto cleanup;
1121
1122 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1123 goto cleanup;
1124
1125 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1126 goto cleanup;
1127
1128 peak = cg_read_long(memcg, "memory.peak");
1129 if (peak < MB(29))
1130 goto cleanup;
1131
1132 peak = cg_read_long(memcg, "memory.swap.peak");
1133 if (peak < MB(29))
1134 goto cleanup;
1135
1136 peak = cg_read_long_fd(mem_peak_fd);
1137 if (peak < MB(29))
1138 goto cleanup;
1139
1140 peak = cg_read_long_fd(swap_peak_fd);
1141 if (peak < MB(29))
1142 goto cleanup;
1143
1144 /*
1145 * open, reset and close the peak swap on another FD to make sure
1146 * multiple extant fds don't corrupt the linked-list
1147 */
1148 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
1149 if (peak_reset)
1150 goto cleanup;
1151
1152 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
1153 if (peak_reset)
1154 goto cleanup;
1155
1156 /* actually reset on the fds */
1157 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
1158 if (peak_reset != sizeof(reset_string))
1159 goto cleanup;
1160
1161 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1162 if (peak_reset != sizeof(reset_string))
1163 goto cleanup;
1164
1165 peak = cg_read_long_fd(swap_peak_fd);
1166 if (peak > MB(10))
1167 goto cleanup;
1168
1169 /*
1170 * The cgroup is now empty, but there may be a page or two associated
1171 * with the open FD accounted to it.
1172 */
1173 peak = cg_read_long_fd(mem_peak_fd);
1174 if (peak > MB(1))
1175 goto cleanup;
1176
1177 if (cg_read_long(memcg, "memory.peak") < MB(29))
1178 goto cleanup;
1179
1180 if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
1181 goto cleanup;
1182
1183 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
1184 goto cleanup;
1185
1186 max = cg_read_key_long(memcg, "memory.events", "max ");
1187 if (max <= 0)
1188 goto cleanup;
1189
1190 peak = cg_read_long(memcg, "memory.peak");
1191 if (peak < MB(29))
1192 goto cleanup;
1193
1194 peak = cg_read_long(memcg, "memory.swap.peak");
1195 if (peak < MB(29))
1196 goto cleanup;
1197
1198 peak = cg_read_long_fd(mem_peak_fd);
1199 if (peak < MB(29))
1200 goto cleanup;
1201
1202 peak = cg_read_long_fd(swap_peak_fd);
1203 if (peak < MB(19))
1204 goto cleanup;
1205
1206 ret = KSFT_PASS;
1207
1208cleanup:
1209 if (mem_peak_fd != -1 && close(mem_peak_fd))
1210 ret = KSFT_FAIL;
1211 if (swap_peak_fd != -1 && close(swap_peak_fd))
1212 ret = KSFT_FAIL;
1213 cg_destroy(memcg);
1214 free(memcg);
1215
1216 return ret;
1217}
1218
1219/*
1220 * This test disables swapping and tries to allocate anonymous memory
1221 * up to OOM. Then it checks for oom and oom_kill events in
1222 * memory.events.
1223 */
1224static int test_memcg_oom_events(const char *root)
1225{
1226 int ret = KSFT_FAIL;
1227 char *memcg;
1228
1229 memcg = cg_name(root, "memcg_test");
1230 if (!memcg)
1231 goto cleanup;
1232
1233 if (cg_create(memcg))
1234 goto cleanup;
1235
1236 if (cg_write(memcg, "memory.max", "30M"))
1237 goto cleanup;
1238
1239 if (cg_write(memcg, "memory.swap.max", "0"))
1240 goto cleanup;
1241
1242 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1243 goto cleanup;
1244
1245 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
1246 goto cleanup;
1247
1248 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1249 goto cleanup;
1250
1251 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1252 goto cleanup;
1253
1254 ret = KSFT_PASS;
1255
1256cleanup:
1257 cg_destroy(memcg);
1258 free(memcg);
1259
1260 return ret;
1261}
1262
1263struct tcp_server_args {
1264 unsigned short port;
1265 int ctl[2];
1266};
1267
1268static int tcp_server(const char *cgroup, void *arg)
1269{
1270 struct tcp_server_args *srv_args = arg;
1271 struct sockaddr_in6 saddr = { 0 };
1272 socklen_t slen = sizeof(saddr);
1273 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
1274
1275 close(srv_args->ctl[0]);
1276 ctl_fd = srv_args->ctl[1];
1277
1278 saddr.sin6_family = AF_INET6;
1279 saddr.sin6_addr = in6addr_any;
1280 saddr.sin6_port = htons(srv_args->port);
1281
1282 sk = socket(AF_INET6, SOCK_STREAM, 0);
1283 if (sk < 0)
1284 return ret;
1285
1286 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
1287 goto cleanup;
1288
1289 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
1290 write(ctl_fd, &errno, sizeof(errno));
1291 goto cleanup;
1292 }
1293
1294 if (listen(sk, 1))
1295 goto cleanup;
1296
1297 ret = 0;
1298 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
1299 ret = -1;
1300 goto cleanup;
1301 }
1302
1303 client_sk = accept(sk, NULL, NULL);
1304 if (client_sk < 0)
1305 goto cleanup;
1306
1307 ret = -1;
1308 for (;;) {
1309 uint8_t buf[0x100000];
1310
1311 if (write(client_sk, buf, sizeof(buf)) <= 0) {
1312 if (errno == ECONNRESET)
1313 ret = 0;
1314 break;
1315 }
1316 }
1317
1318 close(client_sk);
1319
1320cleanup:
1321 close(sk);
1322 return ret;
1323}
1324
1325static int tcp_client(const char *cgroup, unsigned short port)
1326{
1327 const char server[] = "localhost";
1328 struct addrinfo *ai;
1329 char servport[6];
1330 int retries = 0x10; /* nice round number */
1331 int sk, ret;
1332 long allocated;
1333
1334 allocated = cg_read_long(cgroup, "memory.current");
1335 snprintf(servport, sizeof(servport), "%hd", port);
1336 ret = getaddrinfo(server, servport, NULL, &ai);
1337 if (ret)
1338 return ret;
1339
1340 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
1341 if (sk < 0)
1342 goto free_ainfo;
1343
1344 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
1345 if (ret < 0)
1346 goto close_sk;
1347
1348 ret = KSFT_FAIL;
1349 while (retries--) {
1350 uint8_t buf[0x100000];
1351 long current, sock;
1352
1353 if (read(sk, buf, sizeof(buf)) <= 0)
1354 goto close_sk;
1355
1356 current = cg_read_long(cgroup, "memory.current");
1357 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
1358
1359 if (current < 0 || sock < 0)
1360 goto close_sk;
1361
1362 /* exclude the memory not related to socket connection */
1363 if (values_close(current - allocated, sock, 10)) {
1364 ret = KSFT_PASS;
1365 break;
1366 }
1367 }
1368
1369close_sk:
1370 close(sk);
1371free_ainfo:
1372 freeaddrinfo(ai);
1373 return ret;
1374}
1375
1376/*
1377 * This test checks socket memory accounting.
1378 * The test forks a TCP server listens on a random port between 1000
1379 * and 61000. Once it gets a client connection, it starts writing to
1380 * its socket.
1381 * The TCP client interleaves reads from the socket with check whether
1382 * memory.current and memory.stat.sock are similar.
1383 */
1384static int test_memcg_sock(const char *root)
1385{
1386 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1387 unsigned short port;
1388 char *memcg;
1389 long sock_post = -1;
1390
1391 memcg = cg_name(root, "memcg_test");
1392 if (!memcg)
1393 goto cleanup;
1394
1395 if (cg_create(memcg))
1396 goto cleanup;
1397
1398 while (bind_retries--) {
1399 struct tcp_server_args args;
1400
1401 if (pipe(args.ctl))
1402 goto cleanup;
1403
1404 port = args.port = 1000 + rand() % 60000;
1405
1406 pid = cg_run_nowait(memcg, tcp_server, &args);
1407 if (pid < 0)
1408 goto cleanup;
1409
1410 close(args.ctl[1]);
1411 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1412 goto cleanup;
1413 close(args.ctl[0]);
1414
1415 if (!err)
1416 break;
1417 if (err != EADDRINUSE)
1418 goto cleanup;
1419
1420 waitpid(pid, NULL, 0);
1421 }
1422
1423 if (err == EADDRINUSE) {
1424 ret = KSFT_SKIP;
1425 goto cleanup;
1426 }
1427
1428 if (tcp_client(memcg, port) != KSFT_PASS)
1429 goto cleanup;
1430
1431 waitpid(pid, &err, 0);
1432 if (WEXITSTATUS(err))
1433 goto cleanup;
1434
1435 if (cg_read_long(memcg, "memory.current") < 0)
1436 goto cleanup;
1437
1438 /*
1439 * memory.stat is updated asynchronously via the memcg rstat
1440 * flushing worker, which runs periodically (every 2 seconds,
1441 * see FLUSH_TIME). On a busy system, the "sock " counter may
1442 * stay non-zero for a short period of time after the TCP
1443 * connection is closed and all socket memory has been
1444 * uncharged.
1445 *
1446 * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some
1447 * scheduling slack) and require that the "sock " counter
1448 * eventually drops to zero.
1449 */
1450 sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0,
1451 MEMCG_SOCKSTAT_WAIT_RETRIES,
1452 DEFAULT_WAIT_INTERVAL_US);
1453 if (sock_post)
1454 goto cleanup;
1455
1456 ret = KSFT_PASS;
1457
1458cleanup:
1459 cg_destroy(memcg);
1460 free(memcg);
1461
1462 return ret;
1463}
1464
1465/*
1466 * This test disables swapping and tries to allocate anonymous memory
1467 * up to OOM with memory.group.oom set. Then it checks that all
1468 * processes in the leaf were killed. It also checks that oom_events
1469 * were propagated to the parent level.
1470 */
1471static int test_memcg_oom_group_leaf_events(const char *root)
1472{
1473 int ret = KSFT_FAIL;
1474 char *parent, *child;
1475 long parent_oom_events;
1476
1477 parent = cg_name(root, "memcg_test_0");
1478 child = cg_name(root, "memcg_test_0/memcg_test_1");
1479
1480 if (!parent || !child)
1481 goto cleanup;
1482
1483 if (cg_create(parent))
1484 goto cleanup;
1485
1486 if (cg_create(child))
1487 goto cleanup;
1488
1489 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1490 goto cleanup;
1491
1492 if (cg_write(child, "memory.max", "50M"))
1493 goto cleanup;
1494
1495 if (cg_write(child, "memory.swap.max", "0"))
1496 goto cleanup;
1497
1498 if (cg_write(child, "memory.oom.group", "1"))
1499 goto cleanup;
1500
1501 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1502 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1503 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1504 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1505 goto cleanup;
1506
1507 if (cg_test_proc_killed(child))
1508 goto cleanup;
1509
1510 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1511 goto cleanup;
1512
1513 parent_oom_events = cg_read_key_long(
1514 parent, "memory.events", "oom_kill ");
1515 /*
1516 * If memory_localevents is not enabled (the default), the parent should
1517 * count OOM events in its children groups. Otherwise, it should not
1518 * have observed any events.
1519 */
1520 if (has_localevents && parent_oom_events != 0)
1521 goto cleanup;
1522 else if (!has_localevents && parent_oom_events <= 0)
1523 goto cleanup;
1524
1525 ret = KSFT_PASS;
1526
1527cleanup:
1528 if (child)
1529 cg_destroy(child);
1530 if (parent)
1531 cg_destroy(parent);
1532 free(child);
1533 free(parent);
1534
1535 return ret;
1536}
1537
1538/*
1539 * This test disables swapping and tries to allocate anonymous memory
1540 * up to OOM with memory.group.oom set. Then it checks that all
1541 * processes in the parent and leaf were killed.
1542 */
1543static int test_memcg_oom_group_parent_events(const char *root)
1544{
1545 int ret = KSFT_FAIL;
1546 char *parent, *child;
1547
1548 parent = cg_name(root, "memcg_test_0");
1549 child = cg_name(root, "memcg_test_0/memcg_test_1");
1550
1551 if (!parent || !child)
1552 goto cleanup;
1553
1554 if (cg_create(parent))
1555 goto cleanup;
1556
1557 if (cg_create(child))
1558 goto cleanup;
1559
1560 if (cg_write(parent, "memory.max", "80M"))
1561 goto cleanup;
1562
1563 if (cg_write(parent, "memory.swap.max", "0"))
1564 goto cleanup;
1565
1566 if (cg_write(parent, "memory.oom.group", "1"))
1567 goto cleanup;
1568
1569 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1570 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1571 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1572
1573 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1574 goto cleanup;
1575
1576 if (cg_test_proc_killed(child))
1577 goto cleanup;
1578 if (cg_test_proc_killed(parent))
1579 goto cleanup;
1580
1581 ret = KSFT_PASS;
1582
1583cleanup:
1584 if (child)
1585 cg_destroy(child);
1586 if (parent)
1587 cg_destroy(parent);
1588 free(child);
1589 free(parent);
1590
1591 return ret;
1592}
1593
1594/*
1595 * This test disables swapping and tries to allocate anonymous memory
1596 * up to OOM with memory.group.oom set. Then it checks that all
1597 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1598 */
1599static int test_memcg_oom_group_score_events(const char *root)
1600{
1601 int ret = KSFT_FAIL;
1602 char *memcg;
1603 int safe_pid;
1604
1605 memcg = cg_name(root, "memcg_test_0");
1606
1607 if (!memcg)
1608 goto cleanup;
1609
1610 if (cg_create(memcg))
1611 goto cleanup;
1612
1613 if (cg_write(memcg, "memory.max", "50M"))
1614 goto cleanup;
1615
1616 if (cg_write(memcg, "memory.swap.max", "0"))
1617 goto cleanup;
1618
1619 if (cg_write(memcg, "memory.oom.group", "1"))
1620 goto cleanup;
1621
1622 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1623 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1624 goto cleanup;
1625
1626 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1627 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1628 goto cleanup;
1629
1630 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1631 goto cleanup;
1632
1633 if (kill(safe_pid, SIGKILL))
1634 goto cleanup;
1635
1636 ret = KSFT_PASS;
1637
1638cleanup:
1639 if (memcg)
1640 cg_destroy(memcg);
1641 free(memcg);
1642
1643 return ret;
1644}
1645
1646#define T(x) { x, #x }
1647struct memcg_test {
1648 int (*fn)(const char *root);
1649 const char *name;
1650} tests[] = {
1651 T(test_memcg_subtree_control),
1652 T(test_memcg_current_peak),
1653 T(test_memcg_min),
1654 T(test_memcg_low),
1655 T(test_memcg_high),
1656 T(test_memcg_high_sync),
1657 T(test_memcg_max),
1658 T(test_memcg_reclaim),
1659 T(test_memcg_oom_events),
1660 T(test_memcg_swap_max_peak),
1661 T(test_memcg_sock),
1662 T(test_memcg_oom_group_leaf_events),
1663 T(test_memcg_oom_group_parent_events),
1664 T(test_memcg_oom_group_score_events),
1665};
1666#undef T
1667
1668int main(int argc, char **argv)
1669{
1670 char root[PATH_MAX];
1671 int i, proc_status;
1672
1673 ksft_print_header();
1674 ksft_set_plan(ARRAY_SIZE(tests));
1675 if (cg_find_unified_root(root, sizeof(root), NULL))
1676 ksft_exit_skip("cgroup v2 isn't mounted\n");
1677
1678 /*
1679 * Check that memory controller is available:
1680 * memory is listed in cgroup.controllers
1681 */
1682 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1683 ksft_exit_skip("memory controller isn't available\n");
1684
1685 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1686 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1687 ksft_exit_skip("Failed to set memory controller\n");
1688
1689 proc_status = proc_mount_contains("memory_recursiveprot");
1690 if (proc_status < 0)
1691 ksft_exit_skip("Failed to query cgroup mount option\n");
1692 has_recursiveprot = proc_status;
1693
1694 proc_status = proc_mount_contains("memory_localevents");
1695 if (proc_status < 0)
1696 ksft_exit_skip("Failed to query cgroup mount option\n");
1697 has_localevents = proc_status;
1698
1699 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1700 switch (tests[i].fn(root)) {
1701 case KSFT_PASS:
1702 ksft_test_result_pass("%s\n", tests[i].name);
1703 break;
1704 case KSFT_SKIP:
1705 ksft_test_result_skip("%s\n", tests[i].name);
1706 break;
1707 default:
1708 ksft_test_result_fail("%s\n", tests[i].name);
1709 break;
1710 }
1711 }
1712
1713 ksft_finished();
1714}