Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#define _GNU_SOURCE
4
5#include <errno.h>
6#include <fcntl.h>
7#include <linux/limits.h>
8#include <poll.h>
9#include <signal.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <sys/inotify.h>
14#include <sys/stat.h>
15#include <sys/types.h>
16#include <sys/wait.h>
17#include <unistd.h>
18
19#include "cgroup_util.h"
20#include "../../clone3/clone3_selftests.h"
21
22/* Returns read len on success, or -errno on failure. */
23ssize_t read_text(const char *path, char *buf, size_t max_len)
24{
25 ssize_t len;
26 int fd;
27
28 fd = open(path, O_RDONLY);
29 if (fd < 0)
30 return -errno;
31
32 len = read(fd, buf, max_len - 1);
33
34 if (len >= 0)
35 buf[len] = 0;
36
37 close(fd);
38 return len < 0 ? -errno : len;
39}
40
41/* Returns written len on success, or -errno on failure. */
42ssize_t write_text(const char *path, char *buf, ssize_t len)
43{
44 int fd;
45
46 fd = open(path, O_WRONLY | O_APPEND);
47 if (fd < 0)
48 return -errno;
49
50 len = write(fd, buf, len);
51 close(fd);
52 return len < 0 ? -errno : len;
53}
54
55char *cg_name(const char *root, const char *name)
56{
57 size_t len = strlen(root) + strlen(name) + 2;
58 char *ret = malloc(len);
59
60 snprintf(ret, len, "%s/%s", root, name);
61
62 return ret;
63}
64
65char *cg_name_indexed(const char *root, const char *name, int index)
66{
67 size_t len = strlen(root) + strlen(name) + 10;
68 char *ret = malloc(len);
69
70 snprintf(ret, len, "%s/%s_%d", root, name, index);
71
72 return ret;
73}
74
75char *cg_control(const char *cgroup, const char *control)
76{
77 size_t len = strlen(cgroup) + strlen(control) + 2;
78 char *ret = malloc(len);
79
80 snprintf(ret, len, "%s/%s", cgroup, control);
81
82 return ret;
83}
84
85/* Returns 0 on success, or -errno on failure. */
86int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
87{
88 char path[PATH_MAX];
89 ssize_t ret;
90
91 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
92
93 ret = read_text(path, buf, len);
94 return ret >= 0 ? 0 : ret;
95}
96
97int cg_read_strcmp(const char *cgroup, const char *control,
98 const char *expected)
99{
100 size_t size;
101 char *buf;
102 int ret;
103
104 /* Handle the case of comparing against empty string */
105 if (!expected)
106 return -1;
107 else
108 size = strlen(expected) + 1;
109
110 buf = malloc(size);
111 if (!buf)
112 return -1;
113
114 if (cg_read(cgroup, control, buf, size)) {
115 free(buf);
116 return -1;
117 }
118
119 ret = strcmp(expected, buf);
120 free(buf);
121 return ret;
122}
123
124int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
125{
126 char buf[PAGE_SIZE];
127
128 if (cg_read(cgroup, control, buf, sizeof(buf)))
129 return -1;
130
131 return strstr(buf, needle) ? 0 : -1;
132}
133
134long cg_read_long(const char *cgroup, const char *control)
135{
136 char buf[128];
137
138 if (cg_read(cgroup, control, buf, sizeof(buf)))
139 return -1;
140
141 return atol(buf);
142}
143
144long cg_read_long_fd(int fd)
145{
146 char buf[128];
147
148 if (pread(fd, buf, sizeof(buf), 0) <= 0)
149 return -1;
150
151 return atol(buf);
152}
153
154long cg_read_key_long(const char *cgroup, const char *control, const char *key)
155{
156 char buf[PAGE_SIZE];
157 char *ptr;
158
159 if (cg_read(cgroup, control, buf, sizeof(buf)))
160 return -1;
161
162 ptr = strstr(buf, key);
163 if (!ptr)
164 return -1;
165
166 return atol(ptr + strlen(key));
167}
168
169long cg_read_lc(const char *cgroup, const char *control)
170{
171 char buf[PAGE_SIZE];
172 const char delim[] = "\n";
173 char *line;
174 long cnt = 0;
175
176 if (cg_read(cgroup, control, buf, sizeof(buf)))
177 return -1;
178
179 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
180 cnt++;
181
182 return cnt;
183}
184
185/* Returns 0 on success, or -errno on failure. */
186int cg_write(const char *cgroup, const char *control, char *buf)
187{
188 char path[PATH_MAX];
189 ssize_t len = strlen(buf), ret;
190
191 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
192 ret = write_text(path, buf, len);
193 return ret == len ? 0 : ret;
194}
195
196/*
197 * Returns fd on success, or -1 on failure.
198 * (fd should be closed with close() as usual)
199 */
200int cg_open(const char *cgroup, const char *control, int flags)
201{
202 char path[PATH_MAX];
203
204 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
205 return open(path, flags);
206}
207
208int cg_write_numeric(const char *cgroup, const char *control, long value)
209{
210 char buf[64];
211 int ret;
212
213 ret = sprintf(buf, "%lu", value);
214 if (ret < 0)
215 return ret;
216
217 return cg_write(cgroup, control, buf);
218}
219
220static int cg_find_root(char *root, size_t len, const char *controller,
221 bool *nsdelegate)
222{
223 char buf[10 * PAGE_SIZE];
224 char *fs, *mount, *type, *options;
225 const char delim[] = "\n\t ";
226
227 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
228 return -1;
229
230 /*
231 * Example:
232 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
233 */
234 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
235 mount = strtok(NULL, delim);
236 type = strtok(NULL, delim);
237 options = strtok(NULL, delim);
238 strtok(NULL, delim);
239 strtok(NULL, delim);
240 if (strcmp(type, "cgroup") == 0) {
241 if (!controller || !strstr(options, controller))
242 continue;
243 } else if (strcmp(type, "cgroup2") == 0) {
244 if (controller &&
245 cg_read_strstr(mount, "cgroup.controllers", controller))
246 continue;
247 } else {
248 continue;
249 }
250 strncpy(root, mount, len);
251
252 if (nsdelegate)
253 *nsdelegate = !!strstr(options, "nsdelegate");
254 return 0;
255
256 }
257
258 return -1;
259}
260
261int cg_find_controller_root(char *root, size_t len, const char *controller)
262{
263 return cg_find_root(root, len, controller, NULL);
264}
265
266int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
267{
268 return cg_find_root(root, len, NULL, nsdelegate);
269}
270
271int cg_create(const char *cgroup)
272{
273 return mkdir(cgroup, 0755);
274}
275
276int cg_wait_for_proc_count(const char *cgroup, int count)
277{
278 char buf[10 * PAGE_SIZE] = {0};
279 int attempts;
280 char *ptr;
281
282 for (attempts = 10; attempts >= 0; attempts--) {
283 int nr = 0;
284
285 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
286 break;
287
288 for (ptr = buf; *ptr; ptr++)
289 if (*ptr == '\n')
290 nr++;
291
292 if (nr >= count)
293 return 0;
294
295 usleep(100000);
296 }
297
298 return -1;
299}
300
301int cg_killall(const char *cgroup)
302{
303 char buf[PAGE_SIZE];
304 char *ptr = buf;
305
306 /* If cgroup.kill exists use it. */
307 if (!cg_write(cgroup, "cgroup.kill", "1"))
308 return 0;
309
310 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
311 return -1;
312
313 while (ptr < buf + sizeof(buf)) {
314 int pid = strtol(ptr, &ptr, 10);
315
316 if (pid == 0)
317 break;
318 if (*ptr)
319 ptr++;
320 else
321 break;
322 if (kill(pid, SIGKILL))
323 return -1;
324 }
325
326 return 0;
327}
328
329int cg_destroy(const char *cgroup)
330{
331 int ret;
332
333 if (!cgroup)
334 return 0;
335retry:
336 ret = rmdir(cgroup);
337 if (ret && errno == EBUSY) {
338 cg_killall(cgroup);
339 usleep(100);
340 goto retry;
341 }
342
343 if (ret && errno == ENOENT)
344 ret = 0;
345
346 return ret;
347}
348
349int cg_enter(const char *cgroup, int pid)
350{
351 char pidbuf[64];
352
353 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
354 return cg_write(cgroup, "cgroup.procs", pidbuf);
355}
356
357int cg_enter_current(const char *cgroup)
358{
359 return cg_write(cgroup, "cgroup.procs", "0");
360}
361
362int cg_enter_current_thread(const char *cgroup)
363{
364 return cg_write(cgroup, "cgroup.threads", "0");
365}
366
367int cg_run(const char *cgroup,
368 int (*fn)(const char *cgroup, void *arg),
369 void *arg)
370{
371 int pid, retcode;
372
373 pid = fork();
374 if (pid < 0) {
375 return pid;
376 } else if (pid == 0) {
377 char buf[64];
378
379 snprintf(buf, sizeof(buf), "%d", getpid());
380 if (cg_write(cgroup, "cgroup.procs", buf))
381 exit(EXIT_FAILURE);
382 exit(fn(cgroup, arg));
383 } else {
384 waitpid(pid, &retcode, 0);
385 if (WIFEXITED(retcode))
386 return WEXITSTATUS(retcode);
387 else
388 return -1;
389 }
390}
391
392pid_t clone_into_cgroup(int cgroup_fd)
393{
394#ifdef CLONE_ARGS_SIZE_VER2
395 pid_t pid;
396
397 struct __clone_args args = {
398 .flags = CLONE_INTO_CGROUP,
399 .exit_signal = SIGCHLD,
400 .cgroup = cgroup_fd,
401 };
402
403 pid = sys_clone3(&args, sizeof(struct __clone_args));
404 /*
405 * Verify that this is a genuine test failure:
406 * ENOSYS -> clone3() not available
407 * E2BIG -> CLONE_INTO_CGROUP not available
408 */
409 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
410 goto pretend_enosys;
411
412 return pid;
413
414pretend_enosys:
415#endif
416 errno = ENOSYS;
417 return -ENOSYS;
418}
419
420int clone_reap(pid_t pid, int options)
421{
422 int ret;
423 siginfo_t info = {
424 .si_signo = 0,
425 };
426
427again:
428 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
429 if (ret < 0) {
430 if (errno == EINTR)
431 goto again;
432 return -1;
433 }
434
435 if (options & WEXITED) {
436 if (WIFEXITED(info.si_status))
437 return WEXITSTATUS(info.si_status);
438 }
439
440 if (options & WSTOPPED) {
441 if (WIFSTOPPED(info.si_status))
442 return WSTOPSIG(info.si_status);
443 }
444
445 if (options & WCONTINUED) {
446 if (WIFCONTINUED(info.si_status))
447 return 0;
448 }
449
450 return -1;
451}
452
453int dirfd_open_opath(const char *dir)
454{
455 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
456}
457
458#define close_prot_errno(fd) \
459 if (fd >= 0) { \
460 int _e_ = errno; \
461 close(fd); \
462 errno = _e_; \
463 }
464
465static int clone_into_cgroup_run_nowait(const char *cgroup,
466 int (*fn)(const char *cgroup, void *arg),
467 void *arg)
468{
469 int cgroup_fd;
470 pid_t pid;
471
472 cgroup_fd = dirfd_open_opath(cgroup);
473 if (cgroup_fd < 0)
474 return -1;
475
476 pid = clone_into_cgroup(cgroup_fd);
477 close_prot_errno(cgroup_fd);
478 if (pid == 0)
479 exit(fn(cgroup, arg));
480
481 return pid;
482}
483
484int cg_run_nowait(const char *cgroup,
485 int (*fn)(const char *cgroup, void *arg),
486 void *arg)
487{
488 int pid;
489
490 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
491 if (pid > 0)
492 return pid;
493
494 /* Genuine test failure. */
495 if (pid < 0 && errno != ENOSYS)
496 return -1;
497
498 pid = fork();
499 if (pid == 0) {
500 char buf[64];
501
502 snprintf(buf, sizeof(buf), "%d", getpid());
503 if (cg_write(cgroup, "cgroup.procs", buf))
504 exit(EXIT_FAILURE);
505 exit(fn(cgroup, arg));
506 }
507
508 return pid;
509}
510
511int proc_mount_contains(const char *option)
512{
513 char buf[4 * PAGE_SIZE];
514 ssize_t read;
515
516 read = read_text("/proc/mounts", buf, sizeof(buf));
517 if (read < 0)
518 return read;
519
520 return strstr(buf, option) != NULL;
521}
522
523ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
524{
525 char path[PATH_MAX];
526 ssize_t ret;
527
528 if (!pid)
529 snprintf(path, sizeof(path), "/proc/%s/%s",
530 thread ? "thread-self" : "self", item);
531 else
532 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
533
534 ret = read_text(path, buf, size);
535 return ret < 0 ? -1 : ret;
536}
537
538int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
539{
540 char buf[PAGE_SIZE];
541
542 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
543 return -1;
544
545 return strstr(buf, needle) ? 0 : -1;
546}
547
548int clone_into_cgroup_run_wait(const char *cgroup)
549{
550 int cgroup_fd;
551 pid_t pid;
552
553 cgroup_fd = dirfd_open_opath(cgroup);
554 if (cgroup_fd < 0)
555 return -1;
556
557 pid = clone_into_cgroup(cgroup_fd);
558 close_prot_errno(cgroup_fd);
559 if (pid < 0)
560 return -1;
561
562 if (pid == 0)
563 exit(EXIT_SUCCESS);
564
565 /*
566 * We don't care whether this fails. We only care whether the initial
567 * clone succeeded.
568 */
569 (void)clone_reap(pid, WEXITED);
570 return 0;
571}
572
573static int __prepare_for_wait(const char *cgroup, const char *filename)
574{
575 int fd, ret = -1;
576
577 fd = inotify_init1(0);
578 if (fd == -1)
579 return fd;
580
581 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
582 if (ret == -1) {
583 close(fd);
584 fd = -1;
585 }
586
587 return fd;
588}
589
590int cg_prepare_for_wait(const char *cgroup)
591{
592 return __prepare_for_wait(cgroup, "cgroup.events");
593}
594
595int memcg_prepare_for_wait(const char *cgroup)
596{
597 return __prepare_for_wait(cgroup, "memory.events");
598}
599
600int cg_wait_for(int fd)
601{
602 int ret = -1;
603 struct pollfd fds = {
604 .fd = fd,
605 .events = POLLIN,
606 };
607
608 while (true) {
609 ret = poll(&fds, 1, 10000);
610
611 if (ret == -1) {
612 if (errno == EINTR)
613 continue;
614
615 break;
616 }
617
618 if (ret > 0 && fds.revents & POLLIN) {
619 ret = 0;
620 break;
621 }
622 }
623
624 return ret;
625}