Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <unistd.h>
6#include <stdio.h>
7#include <signal.h>
8#include <sys/sysinfo.h>
9#include <string.h>
10#include <sys/wait.h>
11#include <sys/mman.h>
12
13#include "../kselftest.h"
14#include "cgroup_util.h"
15
16static int read_int(const char *path, size_t *value)
17{
18 FILE *file;
19 int ret = 0;
20
21 file = fopen(path, "r");
22 if (!file)
23 return -1;
24 if (fscanf(file, "%ld", value) != 1)
25 ret = -1;
26 fclose(file);
27 return ret;
28}
29
30static int set_min_free_kb(size_t value)
31{
32 FILE *file;
33 int ret;
34
35 file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 if (!file)
37 return -1;
38 ret = fprintf(file, "%ld\n", value);
39 fclose(file);
40 return ret;
41}
42
43static int read_min_free_kb(size_t *value)
44{
45 return read_int("/proc/sys/vm/min_free_kbytes", value);
46}
47
48static int get_zswap_stored_pages(size_t *value)
49{
50 return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51}
52
53static int get_zswap_written_back_pages(size_t *value)
54{
55 return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
56}
57
58static long get_zswpout(const char *cgroup)
59{
60 return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61}
62
63static int allocate_bytes(const char *cgroup, void *arg)
64{
65 size_t size = (size_t)arg;
66 char *mem = (char *)malloc(size);
67
68 if (!mem)
69 return -1;
70 for (int i = 0; i < size; i += 4095)
71 mem[i] = 'a';
72 free(mem);
73 return 0;
74}
75
76/*
77 * Sanity test to check that pages are written into zswap.
78 */
79static int test_zswap_usage(const char *root)
80{
81 long zswpout_before, zswpout_after;
82 int ret = KSFT_FAIL;
83 char *test_group;
84
85 /* Set up */
86 test_group = cg_name(root, "no_shrink_test");
87 if (!test_group)
88 goto out;
89 if (cg_create(test_group))
90 goto out;
91 if (cg_write(test_group, "memory.max", "1M"))
92 goto out;
93
94 zswpout_before = get_zswpout(test_group);
95 if (zswpout_before < 0) {
96 ksft_print_msg("Failed to get zswpout\n");
97 goto out;
98 }
99
100 /* Allocate more than memory.max to push memory into zswap */
101 if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
102 goto out;
103
104 /* Verify that pages come into zswap */
105 zswpout_after = get_zswpout(test_group);
106 if (zswpout_after <= zswpout_before) {
107 ksft_print_msg("zswpout does not increase after test program\n");
108 goto out;
109 }
110 ret = KSFT_PASS;
111
112out:
113 cg_destroy(test_group);
114 free(test_group);
115 return ret;
116}
117
118/*
119 * When trying to store a memcg page in zswap, if the memcg hits its memory
120 * limit in zswap, writeback should not be triggered.
121 *
122 * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
123 * not zswap"). Needs to be revised when a per memcg writeback mechanism is
124 * implemented.
125 */
126static int test_no_invasive_cgroup_shrink(const char *root)
127{
128 size_t written_back_before, written_back_after;
129 int ret = KSFT_FAIL;
130 char *test_group;
131
132 /* Set up */
133 test_group = cg_name(root, "no_shrink_test");
134 if (!test_group)
135 goto out;
136 if (cg_create(test_group))
137 goto out;
138 if (cg_write(test_group, "memory.max", "1M"))
139 goto out;
140 if (cg_write(test_group, "memory.zswap.max", "10K"))
141 goto out;
142 if (get_zswap_written_back_pages(&written_back_before))
143 goto out;
144
145 /* Allocate 10x memory.max to push memory into zswap */
146 if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
147 goto out;
148
149 /* Verify that no writeback happened because of the memcg allocation */
150 if (get_zswap_written_back_pages(&written_back_after))
151 goto out;
152 if (written_back_after == written_back_before)
153 ret = KSFT_PASS;
154out:
155 cg_destroy(test_group);
156 free(test_group);
157 return ret;
158}
159
160struct no_kmem_bypass_child_args {
161 size_t target_alloc_bytes;
162 size_t child_allocated;
163};
164
165static int no_kmem_bypass_child(const char *cgroup, void *arg)
166{
167 struct no_kmem_bypass_child_args *values = arg;
168 void *allocation;
169
170 allocation = malloc(values->target_alloc_bytes);
171 if (!allocation) {
172 values->child_allocated = true;
173 return -1;
174 }
175 for (long i = 0; i < values->target_alloc_bytes; i += 4095)
176 ((char *)allocation)[i] = 'a';
177 values->child_allocated = true;
178 pause();
179 free(allocation);
180 return 0;
181}
182
183/*
184 * When pages owned by a memcg are pushed to zswap by kswapd, they should be
185 * charged to that cgroup. This wasn't the case before commit
186 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
187 *
188 * The test first allocates memory in a memcg, then raises min_free_kbytes to
189 * a very high value so that the allocation falls below low wm, then makes
190 * another allocation to trigger kswapd that should push the memcg-owned pages
191 * to zswap and verifies that the zswap pages are correctly charged.
192 *
193 * To be run on a VM with at most 4G of memory.
194 */
195static int test_no_kmem_bypass(const char *root)
196{
197 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
198 struct no_kmem_bypass_child_args *values;
199 size_t trigger_allocation_size;
200 int wait_child_iteration = 0;
201 long stored_pages_threshold;
202 struct sysinfo sys_info;
203 int ret = KSFT_FAIL;
204 int child_status;
205 char *test_group;
206 pid_t child_pid;
207
208 /* Read sys info and compute test values accordingly */
209 if (sysinfo(&sys_info) != 0)
210 return KSFT_FAIL;
211 if (sys_info.totalram > 5000000000)
212 return KSFT_SKIP;
213 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
214 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
215 if (values == MAP_FAILED)
216 return KSFT_FAIL;
217 if (read_min_free_kb(&min_free_kb_original))
218 return KSFT_FAIL;
219 min_free_kb_high = sys_info.totalram / 2000;
220 min_free_kb_low = sys_info.totalram / 500000;
221 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
222 sys_info.totalram * 5 / 100;
223 stored_pages_threshold = sys_info.totalram / 5 / 4096;
224 trigger_allocation_size = sys_info.totalram / 20;
225
226 /* Set up test memcg */
227 if (cg_write(root, "cgroup.subtree_control", "+memory"))
228 goto out;
229 test_group = cg_name(root, "kmem_bypass_test");
230 if (!test_group)
231 goto out;
232
233 /* Spawn memcg child and wait for it to allocate */
234 set_min_free_kb(min_free_kb_low);
235 if (cg_create(test_group))
236 goto out;
237 values->child_allocated = false;
238 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
239 if (child_pid < 0)
240 goto out;
241 while (!values->child_allocated && wait_child_iteration++ < 10000)
242 usleep(1000);
243
244 /* Try to wakeup kswapd and let it push child memory to zswap */
245 set_min_free_kb(min_free_kb_high);
246 for (int i = 0; i < 20; i++) {
247 size_t stored_pages;
248 char *trigger_allocation = malloc(trigger_allocation_size);
249
250 if (!trigger_allocation)
251 break;
252 for (int i = 0; i < trigger_allocation_size; i += 4095)
253 trigger_allocation[i] = 'b';
254 usleep(100000);
255 free(trigger_allocation);
256 if (get_zswap_stored_pages(&stored_pages))
257 break;
258 if (stored_pages < 0)
259 break;
260 /* If memory was pushed to zswap, verify it belongs to memcg */
261 if (stored_pages > stored_pages_threshold) {
262 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
263 int delta = stored_pages * 4096 - zswapped;
264 int result_ok = delta < stored_pages * 4096 / 4;
265
266 ret = result_ok ? KSFT_PASS : KSFT_FAIL;
267 break;
268 }
269 }
270
271 kill(child_pid, SIGTERM);
272 waitpid(child_pid, &child_status, 0);
273out:
274 set_min_free_kb(min_free_kb_original);
275 cg_destroy(test_group);
276 free(test_group);
277 return ret;
278}
279
280#define T(x) { x, #x }
281struct zswap_test {
282 int (*fn)(const char *root);
283 const char *name;
284} tests[] = {
285 T(test_zswap_usage),
286 T(test_no_kmem_bypass),
287 T(test_no_invasive_cgroup_shrink),
288};
289#undef T
290
291static bool zswap_configured(void)
292{
293 return access("/sys/module/zswap", F_OK) == 0;
294}
295
296int main(int argc, char **argv)
297{
298 char root[PATH_MAX];
299 int i, ret = EXIT_SUCCESS;
300
301 if (cg_find_unified_root(root, sizeof(root)))
302 ksft_exit_skip("cgroup v2 isn't mounted\n");
303
304 if (!zswap_configured())
305 ksft_exit_skip("zswap isn't configured\n");
306
307 /*
308 * Check that memory controller is available:
309 * memory is listed in cgroup.controllers
310 */
311 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
312 ksft_exit_skip("memory controller isn't available\n");
313
314 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
315 if (cg_write(root, "cgroup.subtree_control", "+memory"))
316 ksft_exit_skip("Failed to set memory controller\n");
317
318 for (i = 0; i < ARRAY_SIZE(tests); i++) {
319 switch (tests[i].fn(root)) {
320 case KSFT_PASS:
321 ksft_test_result_pass("%s\n", tests[i].name);
322 break;
323 case KSFT_SKIP:
324 ksft_test_result_skip("%s\n", tests[i].name);
325 break;
326 default:
327 ret = EXIT_FAILURE;
328 ksft_test_result_fail("%s\n", tests[i].name);
329 break;
330 }
331 }
332
333 return ret;
334}