Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/core: add regression test for CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC

This test is a minimalized version of the reproducer given by syzbot
(cf. [1]).

After introducing CLOSE_RANGE_CLOEXEC syzbot reported a crash when
CLOSE_RANGE_CLOEXEC is specified in conjunction with
CLOSE_RANGE_UNSHARE. When CLOSE_RANGE_UNSHARE is specified the caller
will receive a private file descriptor table in case their file
descriptor table is currently shared.
For the case where the caller has requested all file descriptors to be
actually closed via e.g. close_range(3, ~0U, 0) the kernel knows that
the caller does not need any of the file descriptors anymore and will
optimize the close operation by only copying all files in the range from
0 to 3 and no others.

However, if the caller requested CLOSE_RANGE_CLOEXEC together with
CLOSE_RANGE_UNSHARE the caller wants to still make use of the file
descriptors so the kernel needs to copy all of them and can't optimize.

The original patch didn't account for this and thus could cause oopses
as evidenced by the syzbot report. Add tests for this regression.

We first create a huge gap in the fd table. When we now call
CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
bound the kernel will only copy up to fd1 file descriptors into the new
fd table. If the kernel is buggy and doesn't handle CLOSE_RANGE_CLOEXEC
correctly it will not have copied all file descriptors and we will oops!

This test passes on a fixed kernel and will trigger an oops on a buggy
kernel.

[1]: https://syzkaller.appspot.com/text?tag=KernelConfig&x=db720fe37a6a41d8

Cc: Giuseppe Scrivano <gscrivan@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Link: syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
Link: https://lore.kernel.org/r/20201218145415.801063-4-christian.brauner@ubuntu.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>

+183
+183
tools/testing/selftests/core/close_range_test.c
··· 384 384 } 385 385 } 386 386 387 + /* 388 + * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com 389 + */ 390 + TEST(close_range_cloexec_syzbot) 391 + { 392 + int fd1, fd2, fd3, flags, ret, status; 393 + pid_t pid; 394 + struct __clone_args args = { 395 + .flags = CLONE_FILES, 396 + .exit_signal = SIGCHLD, 397 + }; 398 + 399 + /* Create a huge gap in the fd table. */ 400 + fd1 = open("/dev/null", O_RDWR); 401 + EXPECT_GT(fd1, 0); 402 + 403 + fd2 = dup2(fd1, 1000); 404 + EXPECT_GT(fd2, 0); 405 + 406 + pid = sys_clone3(&args, sizeof(args)); 407 + ASSERT_GE(pid, 0); 408 + 409 + if (pid == 0) { 410 + ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC); 411 + if (ret) 412 + exit(EXIT_FAILURE); 413 + 414 + /* 415 + * We now have a private file descriptor table and all 416 + * our open fds should still be open but made 417 + * close-on-exec. 418 + */ 419 + flags = fcntl(fd1, F_GETFD); 420 + EXPECT_GT(flags, -1); 421 + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 422 + 423 + flags = fcntl(fd2, F_GETFD); 424 + EXPECT_GT(flags, -1); 425 + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 426 + 427 + fd3 = dup2(fd1, 42); 428 + EXPECT_GT(fd3, 0); 429 + 430 + /* 431 + * Duplicating the file descriptor must remove the 432 + * FD_CLOEXEC flag. 433 + */ 434 + flags = fcntl(fd3, F_GETFD); 435 + EXPECT_GT(flags, -1); 436 + EXPECT_EQ(flags & FD_CLOEXEC, 0); 437 + 438 + exit(EXIT_SUCCESS); 439 + } 440 + 441 + EXPECT_EQ(waitpid(pid, &status, 0), pid); 442 + EXPECT_EQ(true, WIFEXITED(status)); 443 + EXPECT_EQ(0, WEXITSTATUS(status)); 444 + 445 + /* 446 + * We had a shared file descriptor table before along with requesting 447 + * close-on-exec so the original fds must not be close-on-exec. 448 + */ 449 + flags = fcntl(fd1, F_GETFD); 450 + EXPECT_GT(flags, -1); 451 + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 452 + 453 + flags = fcntl(fd2, F_GETFD); 454 + EXPECT_GT(flags, -1); 455 + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 456 + 457 + fd3 = dup2(fd1, 42); 458 + EXPECT_GT(fd3, 0); 459 + 460 + flags = fcntl(fd3, F_GETFD); 461 + EXPECT_GT(flags, -1); 462 + EXPECT_EQ(flags & FD_CLOEXEC, 0); 463 + 464 + EXPECT_EQ(close(fd1), 0); 465 + EXPECT_EQ(close(fd2), 0); 466 + EXPECT_EQ(close(fd3), 0); 467 + } 468 + 469 + /* 470 + * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com 471 + */ 472 + TEST(close_range_cloexec_unshare_syzbot) 473 + { 474 + int i, fd1, fd2, fd3, flags, ret, status; 475 + pid_t pid; 476 + struct __clone_args args = { 477 + .flags = CLONE_FILES, 478 + .exit_signal = SIGCHLD, 479 + }; 480 + 481 + /* 482 + * Create a huge gap in the fd table. When we now call 483 + * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper 484 + * bound the kernel will only copy up to fd1 file descriptors into the 485 + * new fd table. If the kernel is buggy and doesn't handle 486 + * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file 487 + * descriptors and we will oops! 488 + * 489 + * On a buggy kernel this should immediately oops. But let's loop just 490 + * to be sure. 491 + */ 492 + fd1 = open("/dev/null", O_RDWR); 493 + EXPECT_GT(fd1, 0); 494 + 495 + fd2 = dup2(fd1, 1000); 496 + EXPECT_GT(fd2, 0); 497 + 498 + for (i = 0; i < 100; i++) { 499 + 500 + pid = sys_clone3(&args, sizeof(args)); 501 + ASSERT_GE(pid, 0); 502 + 503 + if (pid == 0) { 504 + ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE | 505 + CLOSE_RANGE_CLOEXEC); 506 + if (ret) 507 + exit(EXIT_FAILURE); 508 + 509 + /* 510 + * We now have a private file descriptor table and all 511 + * our open fds should still be open but made 512 + * close-on-exec. 513 + */ 514 + flags = fcntl(fd1, F_GETFD); 515 + EXPECT_GT(flags, -1); 516 + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 517 + 518 + flags = fcntl(fd2, F_GETFD); 519 + EXPECT_GT(flags, -1); 520 + EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC); 521 + 522 + fd3 = dup2(fd1, 42); 523 + EXPECT_GT(fd3, 0); 524 + 525 + /* 526 + * Duplicating the file descriptor must remove the 527 + * FD_CLOEXEC flag. 528 + */ 529 + flags = fcntl(fd3, F_GETFD); 530 + EXPECT_GT(flags, -1); 531 + EXPECT_EQ(flags & FD_CLOEXEC, 0); 532 + 533 + EXPECT_EQ(close(fd1), 0); 534 + EXPECT_EQ(close(fd2), 0); 535 + EXPECT_EQ(close(fd3), 0); 536 + 537 + exit(EXIT_SUCCESS); 538 + } 539 + 540 + EXPECT_EQ(waitpid(pid, &status, 0), pid); 541 + EXPECT_EQ(true, WIFEXITED(status)); 542 + EXPECT_EQ(0, WEXITSTATUS(status)); 543 + } 544 + 545 + /* 546 + * We created a private file descriptor table before along with 547 + * requesting close-on-exec so the original fds must not be 548 + * close-on-exec. 549 + */ 550 + flags = fcntl(fd1, F_GETFD); 551 + EXPECT_GT(flags, -1); 552 + EXPECT_EQ(flags & FD_CLOEXEC, 0); 553 + 554 + flags = fcntl(fd2, F_GETFD); 555 + EXPECT_GT(flags, -1); 556 + EXPECT_EQ(flags & FD_CLOEXEC, 0); 557 + 558 + fd3 = dup2(fd1, 42); 559 + EXPECT_GT(fd3, 0); 560 + 561 + flags = fcntl(fd3, F_GETFD); 562 + EXPECT_GT(flags, -1); 563 + EXPECT_EQ(flags & FD_CLOEXEC, 0); 564 + 565 + EXPECT_EQ(close(fd1), 0); 566 + EXPECT_EQ(close(fd2), 0); 567 + EXPECT_EQ(close(fd3), 0); 568 + } 569 + 387 570 TEST_HARNESS_MAIN