Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v6.16-rc5 625 lines 12 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#define _GNU_SOURCE 4 5#include <errno.h> 6#include <fcntl.h> 7#include <linux/limits.h> 8#include <poll.h> 9#include <signal.h> 10#include <stdio.h> 11#include <stdlib.h> 12#include <string.h> 13#include <sys/inotify.h> 14#include <sys/stat.h> 15#include <sys/types.h> 16#include <sys/wait.h> 17#include <unistd.h> 18 19#include "cgroup_util.h" 20#include "../../clone3/clone3_selftests.h" 21 22/* Returns read len on success, or -errno on failure. */ 23ssize_t read_text(const char *path, char *buf, size_t max_len) 24{ 25 ssize_t len; 26 int fd; 27 28 fd = open(path, O_RDONLY); 29 if (fd < 0) 30 return -errno; 31 32 len = read(fd, buf, max_len - 1); 33 34 if (len >= 0) 35 buf[len] = 0; 36 37 close(fd); 38 return len < 0 ? -errno : len; 39} 40 41/* Returns written len on success, or -errno on failure. */ 42ssize_t write_text(const char *path, char *buf, ssize_t len) 43{ 44 int fd; 45 46 fd = open(path, O_WRONLY | O_APPEND); 47 if (fd < 0) 48 return -errno; 49 50 len = write(fd, buf, len); 51 close(fd); 52 return len < 0 ? -errno : len; 53} 54 55char *cg_name(const char *root, const char *name) 56{ 57 size_t len = strlen(root) + strlen(name) + 2; 58 char *ret = malloc(len); 59 60 snprintf(ret, len, "%s/%s", root, name); 61 62 return ret; 63} 64 65char *cg_name_indexed(const char *root, const char *name, int index) 66{ 67 size_t len = strlen(root) + strlen(name) + 10; 68 char *ret = malloc(len); 69 70 snprintf(ret, len, "%s/%s_%d", root, name, index); 71 72 return ret; 73} 74 75char *cg_control(const char *cgroup, const char *control) 76{ 77 size_t len = strlen(cgroup) + strlen(control) + 2; 78 char *ret = malloc(len); 79 80 snprintf(ret, len, "%s/%s", cgroup, control); 81 82 return ret; 83} 84 85/* Returns 0 on success, or -errno on failure. */ 86int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 87{ 88 char path[PATH_MAX]; 89 ssize_t ret; 90 91 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 92 93 ret = read_text(path, buf, len); 94 return ret >= 0 ? 0 : ret; 95} 96 97int cg_read_strcmp(const char *cgroup, const char *control, 98 const char *expected) 99{ 100 size_t size; 101 char *buf; 102 int ret; 103 104 /* Handle the case of comparing against empty string */ 105 if (!expected) 106 return -1; 107 else 108 size = strlen(expected) + 1; 109 110 buf = malloc(size); 111 if (!buf) 112 return -1; 113 114 if (cg_read(cgroup, control, buf, size)) { 115 free(buf); 116 return -1; 117 } 118 119 ret = strcmp(expected, buf); 120 free(buf); 121 return ret; 122} 123 124int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 125{ 126 char buf[PAGE_SIZE]; 127 128 if (cg_read(cgroup, control, buf, sizeof(buf))) 129 return -1; 130 131 return strstr(buf, needle) ? 0 : -1; 132} 133 134long cg_read_long(const char *cgroup, const char *control) 135{ 136 char buf[128]; 137 138 if (cg_read(cgroup, control, buf, sizeof(buf))) 139 return -1; 140 141 return atol(buf); 142} 143 144long cg_read_long_fd(int fd) 145{ 146 char buf[128]; 147 148 if (pread(fd, buf, sizeof(buf), 0) <= 0) 149 return -1; 150 151 return atol(buf); 152} 153 154long cg_read_key_long(const char *cgroup, const char *control, const char *key) 155{ 156 char buf[PAGE_SIZE]; 157 char *ptr; 158 159 if (cg_read(cgroup, control, buf, sizeof(buf))) 160 return -1; 161 162 ptr = strstr(buf, key); 163 if (!ptr) 164 return -1; 165 166 return atol(ptr + strlen(key)); 167} 168 169long cg_read_lc(const char *cgroup, const char *control) 170{ 171 char buf[PAGE_SIZE]; 172 const char delim[] = "\n"; 173 char *line; 174 long cnt = 0; 175 176 if (cg_read(cgroup, control, buf, sizeof(buf))) 177 return -1; 178 179 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 180 cnt++; 181 182 return cnt; 183} 184 185/* Returns 0 on success, or -errno on failure. */ 186int cg_write(const char *cgroup, const char *control, char *buf) 187{ 188 char path[PATH_MAX]; 189 ssize_t len = strlen(buf), ret; 190 191 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 192 ret = write_text(path, buf, len); 193 return ret == len ? 0 : ret; 194} 195 196/* 197 * Returns fd on success, or -1 on failure. 198 * (fd should be closed with close() as usual) 199 */ 200int cg_open(const char *cgroup, const char *control, int flags) 201{ 202 char path[PATH_MAX]; 203 204 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 205 return open(path, flags); 206} 207 208int cg_write_numeric(const char *cgroup, const char *control, long value) 209{ 210 char buf[64]; 211 int ret; 212 213 ret = sprintf(buf, "%lu", value); 214 if (ret < 0) 215 return ret; 216 217 return cg_write(cgroup, control, buf); 218} 219 220static int cg_find_root(char *root, size_t len, const char *controller, 221 bool *nsdelegate) 222{ 223 char buf[10 * PAGE_SIZE]; 224 char *fs, *mount, *type, *options; 225 const char delim[] = "\n\t "; 226 227 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 228 return -1; 229 230 /* 231 * Example: 232 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 233 */ 234 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 235 mount = strtok(NULL, delim); 236 type = strtok(NULL, delim); 237 options = strtok(NULL, delim); 238 strtok(NULL, delim); 239 strtok(NULL, delim); 240 if (strcmp(type, "cgroup") == 0) { 241 if (!controller || !strstr(options, controller)) 242 continue; 243 } else if (strcmp(type, "cgroup2") == 0) { 244 if (controller && 245 cg_read_strstr(mount, "cgroup.controllers", controller)) 246 continue; 247 } else { 248 continue; 249 } 250 strncpy(root, mount, len); 251 252 if (nsdelegate) 253 *nsdelegate = !!strstr(options, "nsdelegate"); 254 return 0; 255 256 } 257 258 return -1; 259} 260 261int cg_find_controller_root(char *root, size_t len, const char *controller) 262{ 263 return cg_find_root(root, len, controller, NULL); 264} 265 266int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) 267{ 268 return cg_find_root(root, len, NULL, nsdelegate); 269} 270 271int cg_create(const char *cgroup) 272{ 273 return mkdir(cgroup, 0755); 274} 275 276int cg_wait_for_proc_count(const char *cgroup, int count) 277{ 278 char buf[10 * PAGE_SIZE] = {0}; 279 int attempts; 280 char *ptr; 281 282 for (attempts = 10; attempts >= 0; attempts--) { 283 int nr = 0; 284 285 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 286 break; 287 288 for (ptr = buf; *ptr; ptr++) 289 if (*ptr == '\n') 290 nr++; 291 292 if (nr >= count) 293 return 0; 294 295 usleep(100000); 296 } 297 298 return -1; 299} 300 301int cg_killall(const char *cgroup) 302{ 303 char buf[PAGE_SIZE]; 304 char *ptr = buf; 305 306 /* If cgroup.kill exists use it. */ 307 if (!cg_write(cgroup, "cgroup.kill", "1")) 308 return 0; 309 310 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 311 return -1; 312 313 while (ptr < buf + sizeof(buf)) { 314 int pid = strtol(ptr, &ptr, 10); 315 316 if (pid == 0) 317 break; 318 if (*ptr) 319 ptr++; 320 else 321 break; 322 if (kill(pid, SIGKILL)) 323 return -1; 324 } 325 326 return 0; 327} 328 329int cg_destroy(const char *cgroup) 330{ 331 int ret; 332 333 if (!cgroup) 334 return 0; 335retry: 336 ret = rmdir(cgroup); 337 if (ret && errno == EBUSY) { 338 cg_killall(cgroup); 339 usleep(100); 340 goto retry; 341 } 342 343 if (ret && errno == ENOENT) 344 ret = 0; 345 346 return ret; 347} 348 349int cg_enter(const char *cgroup, int pid) 350{ 351 char pidbuf[64]; 352 353 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 354 return cg_write(cgroup, "cgroup.procs", pidbuf); 355} 356 357int cg_enter_current(const char *cgroup) 358{ 359 return cg_write(cgroup, "cgroup.procs", "0"); 360} 361 362int cg_enter_current_thread(const char *cgroup) 363{ 364 return cg_write(cgroup, "cgroup.threads", "0"); 365} 366 367int cg_run(const char *cgroup, 368 int (*fn)(const char *cgroup, void *arg), 369 void *arg) 370{ 371 int pid, retcode; 372 373 pid = fork(); 374 if (pid < 0) { 375 return pid; 376 } else if (pid == 0) { 377 char buf[64]; 378 379 snprintf(buf, sizeof(buf), "%d", getpid()); 380 if (cg_write(cgroup, "cgroup.procs", buf)) 381 exit(EXIT_FAILURE); 382 exit(fn(cgroup, arg)); 383 } else { 384 waitpid(pid, &retcode, 0); 385 if (WIFEXITED(retcode)) 386 return WEXITSTATUS(retcode); 387 else 388 return -1; 389 } 390} 391 392pid_t clone_into_cgroup(int cgroup_fd) 393{ 394#ifdef CLONE_ARGS_SIZE_VER2 395 pid_t pid; 396 397 struct __clone_args args = { 398 .flags = CLONE_INTO_CGROUP, 399 .exit_signal = SIGCHLD, 400 .cgroup = cgroup_fd, 401 }; 402 403 pid = sys_clone3(&args, sizeof(struct __clone_args)); 404 /* 405 * Verify that this is a genuine test failure: 406 * ENOSYS -> clone3() not available 407 * E2BIG -> CLONE_INTO_CGROUP not available 408 */ 409 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 410 goto pretend_enosys; 411 412 return pid; 413 414pretend_enosys: 415#endif 416 errno = ENOSYS; 417 return -ENOSYS; 418} 419 420int clone_reap(pid_t pid, int options) 421{ 422 int ret; 423 siginfo_t info = { 424 .si_signo = 0, 425 }; 426 427again: 428 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 429 if (ret < 0) { 430 if (errno == EINTR) 431 goto again; 432 return -1; 433 } 434 435 if (options & WEXITED) { 436 if (WIFEXITED(info.si_status)) 437 return WEXITSTATUS(info.si_status); 438 } 439 440 if (options & WSTOPPED) { 441 if (WIFSTOPPED(info.si_status)) 442 return WSTOPSIG(info.si_status); 443 } 444 445 if (options & WCONTINUED) { 446 if (WIFCONTINUED(info.si_status)) 447 return 0; 448 } 449 450 return -1; 451} 452 453int dirfd_open_opath(const char *dir) 454{ 455 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 456} 457 458#define close_prot_errno(fd) \ 459 if (fd >= 0) { \ 460 int _e_ = errno; \ 461 close(fd); \ 462 errno = _e_; \ 463 } 464 465static int clone_into_cgroup_run_nowait(const char *cgroup, 466 int (*fn)(const char *cgroup, void *arg), 467 void *arg) 468{ 469 int cgroup_fd; 470 pid_t pid; 471 472 cgroup_fd = dirfd_open_opath(cgroup); 473 if (cgroup_fd < 0) 474 return -1; 475 476 pid = clone_into_cgroup(cgroup_fd); 477 close_prot_errno(cgroup_fd); 478 if (pid == 0) 479 exit(fn(cgroup, arg)); 480 481 return pid; 482} 483 484int cg_run_nowait(const char *cgroup, 485 int (*fn)(const char *cgroup, void *arg), 486 void *arg) 487{ 488 int pid; 489 490 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 491 if (pid > 0) 492 return pid; 493 494 /* Genuine test failure. */ 495 if (pid < 0 && errno != ENOSYS) 496 return -1; 497 498 pid = fork(); 499 if (pid == 0) { 500 char buf[64]; 501 502 snprintf(buf, sizeof(buf), "%d", getpid()); 503 if (cg_write(cgroup, "cgroup.procs", buf)) 504 exit(EXIT_FAILURE); 505 exit(fn(cgroup, arg)); 506 } 507 508 return pid; 509} 510 511int proc_mount_contains(const char *option) 512{ 513 char buf[4 * PAGE_SIZE]; 514 ssize_t read; 515 516 read = read_text("/proc/mounts", buf, sizeof(buf)); 517 if (read < 0) 518 return read; 519 520 return strstr(buf, option) != NULL; 521} 522 523ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 524{ 525 char path[PATH_MAX]; 526 ssize_t ret; 527 528 if (!pid) 529 snprintf(path, sizeof(path), "/proc/%s/%s", 530 thread ? "thread-self" : "self", item); 531 else 532 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 533 534 ret = read_text(path, buf, size); 535 return ret < 0 ? -1 : ret; 536} 537 538int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 539{ 540 char buf[PAGE_SIZE]; 541 542 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 543 return -1; 544 545 return strstr(buf, needle) ? 0 : -1; 546} 547 548int clone_into_cgroup_run_wait(const char *cgroup) 549{ 550 int cgroup_fd; 551 pid_t pid; 552 553 cgroup_fd = dirfd_open_opath(cgroup); 554 if (cgroup_fd < 0) 555 return -1; 556 557 pid = clone_into_cgroup(cgroup_fd); 558 close_prot_errno(cgroup_fd); 559 if (pid < 0) 560 return -1; 561 562 if (pid == 0) 563 exit(EXIT_SUCCESS); 564 565 /* 566 * We don't care whether this fails. We only care whether the initial 567 * clone succeeded. 568 */ 569 (void)clone_reap(pid, WEXITED); 570 return 0; 571} 572 573static int __prepare_for_wait(const char *cgroup, const char *filename) 574{ 575 int fd, ret = -1; 576 577 fd = inotify_init1(0); 578 if (fd == -1) 579 return fd; 580 581 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 582 if (ret == -1) { 583 close(fd); 584 fd = -1; 585 } 586 587 return fd; 588} 589 590int cg_prepare_for_wait(const char *cgroup) 591{ 592 return __prepare_for_wait(cgroup, "cgroup.events"); 593} 594 595int memcg_prepare_for_wait(const char *cgroup) 596{ 597 return __prepare_for_wait(cgroup, "memory.events"); 598} 599 600int cg_wait_for(int fd) 601{ 602 int ret = -1; 603 struct pollfd fds = { 604 .fd = fd, 605 .events = POLLIN, 606 }; 607 608 while (true) { 609 ret = poll(&fds, 1, 10000); 610 611 if (ret == -1) { 612 if (errno == EINTR) 613 continue; 614 615 break; 616 } 617 618 if (ret > 0 && fds.revents & POLLIN) { 619 ret = 0; 620 break; 621 } 622 } 623 624 return ret; 625}