at master 15 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2 3#define _GNU_SOURCE 4#include <errno.h> 5#include <fcntl.h> 6#include <linux/kernel.h> 7#include <limits.h> 8#include <stdbool.h> 9#include <stdio.h> 10#include <stdlib.h> 11#include <string.h> 12#include <syscall.h> 13#include <unistd.h> 14#include <sys/resource.h> 15#include <linux/close_range.h> 16 17#include "kselftest_harness.h" 18#include "../clone3/clone3_selftests.h" 19 20 21#ifndef F_LINUX_SPECIFIC_BASE 22#define F_LINUX_SPECIFIC_BASE 1024 23#endif 24 25#ifndef F_DUPFD_QUERY 26#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) 27#endif 28 29#ifndef F_CREATED_QUERY 30#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) 31#endif 32 33static inline int sys_close_range(unsigned int fd, unsigned int max_fd, 34 unsigned int flags) 35{ 36 return syscall(__NR_close_range, fd, max_fd, flags); 37} 38 39TEST(core_close_range) 40{ 41 int i, ret; 42 int open_fds[101]; 43 44 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 45 int fd; 46 47 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 48 ASSERT_GE(fd, 0) { 49 if (errno == ENOENT) 50 SKIP(return, "Skipping test since /dev/null does not exist"); 51 } 52 53 open_fds[i] = fd; 54 } 55 56 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { 57 if (errno == ENOSYS) 58 SKIP(return, "close_range() syscall not supported"); 59 } 60 61 for (i = 0; i < 100; i++) { 62 ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]); 63 if (ret < 0) { 64 EXPECT_EQ(errno, EINVAL); 65 } else { 66 EXPECT_EQ(ret, 0); 67 } 68 } 69 70 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); 71 72 for (i = 0; i <= 50; i++) 73 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 74 75 for (i = 51; i <= 100; i++) 76 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 77 78 /* create a couple of gaps */ 79 close(57); 80 close(78); 81 close(81); 82 close(82); 83 close(84); 84 close(90); 85 86 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0)); 87 88 for (i = 51; i <= 92; i++) 89 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 90 91 for (i = 93; i <= 100; i++) 92 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 93 94 /* test that the kernel caps and still closes all fds */ 95 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0)); 96 97 for (i = 93; i <= 99; i++) 98 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 99 100 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 101 102 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0)); 103 104 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL)); 105} 106 107TEST(close_range_unshare) 108{ 109 int i, ret, status; 110 pid_t pid; 111 int open_fds[101]; 112 struct __clone_args args = { 113 .flags = CLONE_FILES, 114 .exit_signal = SIGCHLD, 115 }; 116 117 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 118 int fd; 119 120 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 121 ASSERT_GE(fd, 0) { 122 if (errno == ENOENT) 123 SKIP(return, "Skipping test since /dev/null does not exist"); 124 } 125 126 open_fds[i] = fd; 127 } 128 129 pid = sys_clone3(&args, sizeof(args)); 130 ASSERT_GE(pid, 0); 131 132 if (pid == 0) { 133 ret = sys_close_range(open_fds[0], open_fds[50], 134 CLOSE_RANGE_UNSHARE); 135 if (ret) 136 exit(EXIT_FAILURE); 137 138 for (i = 0; i <= 50; i++) 139 if (fcntl(open_fds[i], F_GETFL) != -1) 140 exit(EXIT_FAILURE); 141 142 for (i = 51; i <= 100; i++) 143 if (fcntl(open_fds[i], F_GETFL) == -1) 144 exit(EXIT_FAILURE); 145 146 /* create a couple of gaps */ 147 close(57); 148 close(78); 149 close(81); 150 close(82); 151 close(84); 152 close(90); 153 154 ret = sys_close_range(open_fds[51], open_fds[92], 155 CLOSE_RANGE_UNSHARE); 156 if (ret) 157 exit(EXIT_FAILURE); 158 159 for (i = 51; i <= 92; i++) 160 if (fcntl(open_fds[i], F_GETFL) != -1) 161 exit(EXIT_FAILURE); 162 163 for (i = 93; i <= 100; i++) 164 if (fcntl(open_fds[i], F_GETFL) == -1) 165 exit(EXIT_FAILURE); 166 167 /* test that the kernel caps and still closes all fds */ 168 ret = sys_close_range(open_fds[93], open_fds[99], 169 CLOSE_RANGE_UNSHARE); 170 if (ret) 171 exit(EXIT_FAILURE); 172 173 for (i = 93; i <= 99; i++) 174 if (fcntl(open_fds[i], F_GETFL) != -1) 175 exit(EXIT_FAILURE); 176 177 if (fcntl(open_fds[100], F_GETFL) == -1) 178 exit(EXIT_FAILURE); 179 180 ret = sys_close_range(open_fds[100], open_fds[100], 181 CLOSE_RANGE_UNSHARE); 182 if (ret) 183 exit(EXIT_FAILURE); 184 185 if (fcntl(open_fds[100], F_GETFL) != -1) 186 exit(EXIT_FAILURE); 187 188 exit(EXIT_SUCCESS); 189 } 190 191 EXPECT_EQ(waitpid(pid, &status, 0), pid); 192 EXPECT_EQ(true, WIFEXITED(status)); 193 EXPECT_EQ(0, WEXITSTATUS(status)); 194} 195 196TEST(close_range_unshare_capped) 197{ 198 int i, ret, status; 199 pid_t pid; 200 int open_fds[101]; 201 struct __clone_args args = { 202 .flags = CLONE_FILES, 203 .exit_signal = SIGCHLD, 204 }; 205 206 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 207 int fd; 208 209 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 210 ASSERT_GE(fd, 0) { 211 if (errno == ENOENT) 212 SKIP(return, "Skipping test since /dev/null does not exist"); 213 } 214 215 open_fds[i] = fd; 216 } 217 218 pid = sys_clone3(&args, sizeof(args)); 219 ASSERT_GE(pid, 0); 220 221 if (pid == 0) { 222 ret = sys_close_range(open_fds[0], UINT_MAX, 223 CLOSE_RANGE_UNSHARE); 224 if (ret) 225 exit(EXIT_FAILURE); 226 227 for (i = 0; i <= 100; i++) 228 if (fcntl(open_fds[i], F_GETFL) != -1) 229 exit(EXIT_FAILURE); 230 231 exit(EXIT_SUCCESS); 232 } 233 234 EXPECT_EQ(waitpid(pid, &status, 0), pid); 235 EXPECT_EQ(true, WIFEXITED(status)); 236 EXPECT_EQ(0, WEXITSTATUS(status)); 237} 238 239TEST(close_range_cloexec) 240{ 241 int i, ret; 242 int open_fds[101]; 243 struct rlimit rlimit; 244 245 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 246 int fd; 247 248 fd = open("/dev/null", O_RDONLY); 249 ASSERT_GE(fd, 0) { 250 if (errno == ENOENT) 251 SKIP(return, "Skipping test since /dev/null does not exist"); 252 } 253 254 open_fds[i] = fd; 255 } 256 257 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 258 if (ret < 0) { 259 if (errno == ENOSYS) 260 SKIP(return, "close_range() syscall not supported"); 261 if (errno == EINVAL) 262 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 263 } 264 265 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 266 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 267 rlimit.rlim_cur = 25; 268 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 269 270 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 271 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC); 272 ASSERT_EQ(0, ret); 273 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC); 274 ASSERT_EQ(0, ret); 275 276 for (i = 0; i <= 50; i++) { 277 int flags = fcntl(open_fds[i], F_GETFD); 278 279 EXPECT_GT(flags, -1); 280 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 281 } 282 283 for (i = 51; i <= 74; i++) { 284 int flags = fcntl(open_fds[i], F_GETFD); 285 286 EXPECT_GT(flags, -1); 287 EXPECT_EQ(flags & FD_CLOEXEC, 0); 288 } 289 290 for (i = 75; i <= 100; i++) { 291 int flags = fcntl(open_fds[i], F_GETFD); 292 293 EXPECT_GT(flags, -1); 294 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 295 } 296 297 /* Test a common pattern. */ 298 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC); 299 for (i = 0; i <= 100; i++) { 300 int flags = fcntl(open_fds[i], F_GETFD); 301 302 EXPECT_GT(flags, -1); 303 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 304 } 305} 306 307TEST(close_range_cloexec_unshare) 308{ 309 int i, ret; 310 int open_fds[101]; 311 struct rlimit rlimit; 312 313 for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 314 int fd; 315 316 fd = open("/dev/null", O_RDONLY); 317 ASSERT_GE(fd, 0) { 318 if (errno == ENOENT) 319 SKIP(return, "Skipping test since /dev/null does not exist"); 320 } 321 322 open_fds[i] = fd; 323 } 324 325 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC); 326 if (ret < 0) { 327 if (errno == ENOSYS) 328 SKIP(return, "close_range() syscall not supported"); 329 if (errno == EINVAL) 330 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC"); 331 } 332 333 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */ 334 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); 335 rlimit.rlim_cur = 25; 336 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)); 337 338 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */ 339 ret = sys_close_range(open_fds[0], open_fds[50], 340 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 341 ASSERT_EQ(0, ret); 342 ret = sys_close_range(open_fds[75], open_fds[100], 343 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 344 ASSERT_EQ(0, ret); 345 346 for (i = 0; i <= 50; i++) { 347 int flags = fcntl(open_fds[i], F_GETFD); 348 349 EXPECT_GT(flags, -1); 350 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 351 } 352 353 for (i = 51; i <= 74; i++) { 354 int flags = fcntl(open_fds[i], F_GETFD); 355 356 EXPECT_GT(flags, -1); 357 EXPECT_EQ(flags & FD_CLOEXEC, 0); 358 } 359 360 for (i = 75; i <= 100; i++) { 361 int flags = fcntl(open_fds[i], F_GETFD); 362 363 EXPECT_GT(flags, -1); 364 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 365 } 366 367 /* Test a common pattern. */ 368 ret = sys_close_range(3, UINT_MAX, 369 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE); 370 for (i = 0; i <= 100; i++) { 371 int flags = fcntl(open_fds[i], F_GETFD); 372 373 EXPECT_GT(flags, -1); 374 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 375 } 376} 377 378/* 379 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com 380 */ 381TEST(close_range_cloexec_syzbot) 382{ 383 int fd1, fd2, fd3, fd4, flags, ret, status; 384 pid_t pid; 385 struct __clone_args args = { 386 .flags = CLONE_FILES, 387 .exit_signal = SIGCHLD, 388 }; 389 390 /* Create a huge gap in the fd table. */ 391 fd1 = open("/dev/null", O_RDWR); 392 EXPECT_GT(fd1, 0); 393 394 fd2 = dup2(fd1, 1000); 395 EXPECT_GT(fd2, 0); 396 397 flags = fcntl(fd1, F_DUPFD_QUERY, fd2); 398 if (flags < 0) { 399 EXPECT_EQ(errno, EINVAL); 400 } else { 401 EXPECT_EQ(flags, 1); 402 } 403 404 pid = sys_clone3(&args, sizeof(args)); 405 ASSERT_GE(pid, 0); 406 407 if (pid == 0) { 408 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC); 409 if (ret) 410 exit(EXIT_FAILURE); 411 412 /* 413 * We now have a private file descriptor table and all 414 * our open fds should still be open but made 415 * close-on-exec. 416 */ 417 flags = fcntl(fd1, F_GETFD); 418 EXPECT_GT(flags, -1); 419 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 420 421 flags = fcntl(fd2, F_GETFD); 422 EXPECT_GT(flags, -1); 423 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 424 425 fd3 = dup2(fd1, 42); 426 EXPECT_GT(fd3, 0); 427 428 flags = fcntl(fd1, F_DUPFD_QUERY, fd3); 429 if (flags < 0) { 430 EXPECT_EQ(errno, EINVAL); 431 } else { 432 EXPECT_EQ(flags, 1); 433 } 434 435 436 437 /* 438 * Duplicating the file descriptor must remove the 439 * FD_CLOEXEC flag. 440 */ 441 flags = fcntl(fd3, F_GETFD); 442 EXPECT_GT(flags, -1); 443 EXPECT_EQ(flags & FD_CLOEXEC, 0); 444 445 exit(EXIT_SUCCESS); 446 } 447 448 EXPECT_EQ(waitpid(pid, &status, 0), pid); 449 EXPECT_EQ(true, WIFEXITED(status)); 450 EXPECT_EQ(0, WEXITSTATUS(status)); 451 452 /* 453 * We had a shared file descriptor table before along with requesting 454 * close-on-exec so the original fds must not be close-on-exec. 455 */ 456 flags = fcntl(fd1, F_GETFD); 457 EXPECT_GT(flags, -1); 458 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 459 460 flags = fcntl(fd2, F_GETFD); 461 EXPECT_GT(flags, -1); 462 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 463 464 fd3 = dup2(fd1, 42); 465 EXPECT_GT(fd3, 0); 466 467 flags = fcntl(fd1, F_DUPFD_QUERY, fd3); 468 if (flags < 0) { 469 EXPECT_EQ(errno, EINVAL); 470 } else { 471 EXPECT_EQ(flags, 1); 472 } 473 474 fd4 = open("/dev/null", O_RDWR); 475 EXPECT_GT(fd4, 0); 476 477 /* Same inode, different file pointers. */ 478 flags = fcntl(fd1, F_DUPFD_QUERY, fd4); 479 if (flags < 0) { 480 EXPECT_EQ(errno, EINVAL); 481 } else { 482 EXPECT_EQ(flags, 0); 483 } 484 485 flags = fcntl(fd3, F_GETFD); 486 EXPECT_GT(flags, -1); 487 EXPECT_EQ(flags & FD_CLOEXEC, 0); 488 489 EXPECT_EQ(close(fd1), 0); 490 EXPECT_EQ(close(fd2), 0); 491 EXPECT_EQ(close(fd3), 0); 492 EXPECT_EQ(close(fd4), 0); 493} 494 495/* 496 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com 497 */ 498TEST(close_range_cloexec_unshare_syzbot) 499{ 500 int i, fd1, fd2, fd3, flags, ret, status; 501 pid_t pid; 502 struct __clone_args args = { 503 .flags = CLONE_FILES, 504 .exit_signal = SIGCHLD, 505 }; 506 507 /* 508 * Create a huge gap in the fd table. When we now call 509 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper 510 * bound the kernel will only copy up to fd1 file descriptors into the 511 * new fd table. If the kernel is buggy and doesn't handle 512 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file 513 * descriptors and we will oops! 514 * 515 * On a buggy kernel this should immediately oops. But let's loop just 516 * to be sure. 517 */ 518 fd1 = open("/dev/null", O_RDWR); 519 EXPECT_GT(fd1, 0); 520 521 fd2 = dup2(fd1, 1000); 522 EXPECT_GT(fd2, 0); 523 524 for (i = 0; i < 100; i++) { 525 526 pid = sys_clone3(&args, sizeof(args)); 527 ASSERT_GE(pid, 0); 528 529 if (pid == 0) { 530 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE | 531 CLOSE_RANGE_CLOEXEC); 532 if (ret) 533 exit(EXIT_FAILURE); 534 535 /* 536 * We now have a private file descriptor table and all 537 * our open fds should still be open but made 538 * close-on-exec. 539 */ 540 flags = fcntl(fd1, F_GETFD); 541 EXPECT_GT(flags, -1); 542 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 543 544 flags = fcntl(fd2, F_GETFD); 545 EXPECT_GT(flags, -1); 546 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 547 548 fd3 = dup2(fd1, 42); 549 EXPECT_GT(fd3, 0); 550 551 /* 552 * Duplicating the file descriptor must remove the 553 * FD_CLOEXEC flag. 554 */ 555 flags = fcntl(fd3, F_GETFD); 556 EXPECT_GT(flags, -1); 557 EXPECT_EQ(flags & FD_CLOEXEC, 0); 558 559 EXPECT_EQ(close(fd1), 0); 560 EXPECT_EQ(close(fd2), 0); 561 EXPECT_EQ(close(fd3), 0); 562 563 exit(EXIT_SUCCESS); 564 } 565 566 EXPECT_EQ(waitpid(pid, &status, 0), pid); 567 EXPECT_EQ(true, WIFEXITED(status)); 568 EXPECT_EQ(0, WEXITSTATUS(status)); 569 } 570 571 /* 572 * We created a private file descriptor table before along with 573 * requesting close-on-exec so the original fds must not be 574 * close-on-exec. 575 */ 576 flags = fcntl(fd1, F_GETFD); 577 EXPECT_GT(flags, -1); 578 EXPECT_EQ(flags & FD_CLOEXEC, 0); 579 580 flags = fcntl(fd2, F_GETFD); 581 EXPECT_GT(flags, -1); 582 EXPECT_EQ(flags & FD_CLOEXEC, 0); 583 584 fd3 = dup2(fd1, 42); 585 EXPECT_GT(fd3, 0); 586 587 flags = fcntl(fd3, F_GETFD); 588 EXPECT_GT(flags, -1); 589 EXPECT_EQ(flags & FD_CLOEXEC, 0); 590 591 EXPECT_EQ(close(fd1), 0); 592 EXPECT_EQ(close(fd2), 0); 593 EXPECT_EQ(close(fd3), 0); 594} 595 596TEST(close_range_bitmap_corruption) 597{ 598 pid_t pid; 599 int status; 600 struct __clone_args args = { 601 .flags = CLONE_FILES, 602 .exit_signal = SIGCHLD, 603 }; 604 605 /* get the first 128 descriptors open */ 606 for (int i = 2; i < 128; i++) 607 EXPECT_GE(dup2(0, i), 0); 608 609 /* get descriptor table shared */ 610 pid = sys_clone3(&args, sizeof(args)); 611 ASSERT_GE(pid, 0); 612 613 if (pid == 0) { 614 /* unshare and truncate descriptor table down to 64 */ 615 if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) 616 exit(EXIT_FAILURE); 617 618 ASSERT_EQ(fcntl(64, F_GETFD), -1); 619 /* ... and verify that the range 64..127 is not 620 stuck "fully used" according to secondary bitmap */ 621 EXPECT_EQ(dup(0), 64) 622 exit(EXIT_FAILURE); 623 exit(EXIT_SUCCESS); 624 } 625 626 EXPECT_EQ(waitpid(pid, &status, 0), pid); 627 EXPECT_EQ(true, WIFEXITED(status)); 628 EXPECT_EQ(0, WEXITSTATUS(status)); 629} 630 631TEST(fcntl_created) 632{ 633 for (int i = 0; i < 101; i++) { 634 int fd; 635 char path[PATH_MAX]; 636 637 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 638 ASSERT_GE(fd, 0) { 639 if (errno == ENOENT) 640 SKIP(return, 641 "Skipping test since /dev/null does not exist"); 642 } 643 644 /* We didn't create "/dev/null". */ 645 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); 646 close(fd); 647 648 sprintf(path, "aaaa_%d", i); 649 fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600); 650 ASSERT_GE(fd, 0); 651 652 /* We created "aaaa_%d". */ 653 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1); 654 close(fd); 655 656 fd = open(path, O_RDONLY | O_CLOEXEC); 657 ASSERT_GE(fd, 0); 658 659 /* We're opening it again, so no positive creation check. */ 660 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); 661 close(fd); 662 unlink(path); 663 } 664} 665 666TEST_HARNESS_MAIN