Merge tag 'kselftest-fix-vfork-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux

Pull Kselftest fixes from Mickaël Salaün:
"Fix Kselftest's vfork() side effects.

As reported by Kernel Test Robot and Sean Christopherson, some
tests fail since v6.9-rc1 . This is due to the use of vfork() which
introduced some side effects. Similarly, while making it more generic,
a previous commit made some Landlock file system tests flaky, and
subject to the host's file system mount configuration.

This fixes all these side effects by replacing vfork() with clone3()
and CLONE_VFORK, which is cleaner (no arbitrary shared memory) and
makes the Kselftest framework more robust"

Link: https://lore.kernel.org/oe-lkp/202403291015.1fcfa957-oliver.sang@intel.com
Link: https://lore.kernel.org/r/ZjPelW6-AbtYvslu@google.com
Link: https://lore.kernel.org/r/20240511171445.904356-1-mic@digikod.net

* tag 'kselftest-fix-vfork-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux:
selftests/harness: Handle TEST_F()'s explicit exit codes
selftests/harness: Fix vfork() side effects
selftests/harness: Share _metadata between forked processes
selftests/pidfd: Fix wrong expectation
selftests/harness: Constify fixture variants
selftests/landlock: Do not allocate memory in fixture data
selftests/harness: Fix interleaved scheduling leading to race conditions
selftests/harness: Fix fixture teardown
selftests/landlock: Fix FS tests when run on a private mount point
selftests/pidfd: Fix config for pidfd_setns_test

Changed files
+147 -67
tools
testing
+94 -33
tools/testing/selftests/kselftest_harness.h
··· 66 66 #include <sys/wait.h> 67 67 #include <unistd.h> 68 68 #include <setjmp.h> 69 + #include <syscall.h> 70 + #include <linux/sched.h> 69 71 70 72 #include "kselftest.h" 71 73 ··· 81 79 #ifndef TH_LOG_ENABLED 82 80 # define TH_LOG_ENABLED 1 83 81 #endif 82 + 83 + /* Wait for the child process to end but without sharing memory mapping. */ 84 + static inline pid_t clone3_vfork(void) 85 + { 86 + struct clone_args args = { 87 + .flags = CLONE_VFORK, 88 + .exit_signal = SIGCHLD, 89 + }; 90 + 91 + return syscall(__NR_clone3, &args, sizeof(args)); 92 + } 84 93 85 94 /** 86 95 * TH_LOG() ··· 294 281 * A bare "return;" statement may be used to return early. 295 282 */ 296 283 #define FIXTURE_TEARDOWN(fixture_name) \ 284 + static const bool fixture_name##_teardown_parent; \ 285 + __FIXTURE_TEARDOWN(fixture_name) 286 + 287 + /** 288 + * FIXTURE_TEARDOWN_PARENT() 289 + * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly. 290 + * 291 + * @fixture_name: fixture name 292 + * 293 + * .. code-block:: c 294 + * 295 + * FIXTURE_TEARDOWN_PARENT(fixture_name) { implementation } 296 + * 297 + * Same as FIXTURE_TEARDOWN() but run this code in a parent process. This 298 + * enables the test process to drop its privileges without impacting the 299 + * related FIXTURE_TEARDOWN_PARENT() (e.g. to remove files from a directory 300 + * where write access was dropped). 301 + * 302 + * To make it possible for the parent process to use *self*, share (MAP_SHARED) 303 + * the fixture data between all forked processes. 304 + */ 305 + #define FIXTURE_TEARDOWN_PARENT(fixture_name) \ 306 + static const bool fixture_name##_teardown_parent = true; \ 307 + __FIXTURE_TEARDOWN(fixture_name) 308 + 309 + #define __FIXTURE_TEARDOWN(fixture_name) \ 297 310 void fixture_name##_teardown( \ 298 311 struct __test_metadata __attribute__((unused)) *_metadata, \ 299 312 FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ ··· 364 325 * variant. 365 326 */ 366 327 #define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \ 367 - extern FIXTURE_VARIANT(fixture_name) \ 328 + extern const FIXTURE_VARIANT(fixture_name) \ 368 329 _##fixture_name##_##variant_name##_variant; \ 369 330 static struct __fixture_variant_metadata \ 370 331 _##fixture_name##_##variant_name##_object = \ ··· 376 337 __register_fixture_variant(&_##fixture_name##_fixture_object, \ 377 338 &_##fixture_name##_##variant_name##_object); \ 378 339 } \ 379 - FIXTURE_VARIANT(fixture_name) \ 340 + const FIXTURE_VARIANT(fixture_name) \ 380 341 _##fixture_name##_##variant_name##_variant = 381 342 382 343 /** ··· 394 355 * Very similar to TEST() except that *self* is the setup instance of fixture's 395 356 * datatype exposed for use by the implementation. 396 357 * 397 - * The @test_name code is run in a separate process sharing the same memory 398 - * (i.e. vfork), which means that the test process can update its privileges 399 - * without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from 400 - * a directory where write access was dropped). 358 + * The _metadata object is shared (MAP_SHARED) with all the potential forked 359 + * processes, which enables them to use EXCEPT_*() and ASSERT_*(). 360 + * 361 + * The *self* object is only shared with the potential forked processes if 362 + * FIXTURE_TEARDOWN_PARENT() is used instead of FIXTURE_TEARDOWN(). 401 363 */ 402 364 #define TEST_F(fixture_name, test_name) \ 403 365 __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT) ··· 419 379 struct __fixture_variant_metadata *variant) \ 420 380 { \ 421 381 /* fixture data is alloced, setup, and torn down per call. */ \ 422 - FIXTURE_DATA(fixture_name) self; \ 382 + FIXTURE_DATA(fixture_name) self_private, *self = NULL; \ 423 383 pid_t child = 1; \ 424 384 int status = 0; \ 425 - bool jmp = false; \ 426 - memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \ 385 + /* Makes sure there is only one teardown, even when child forks again. */ \ 386 + bool *teardown = mmap(NULL, sizeof(*teardown), \ 387 + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \ 388 + *teardown = false; \ 389 + if (sizeof(*self) > 0) { \ 390 + if (fixture_name##_teardown_parent) { \ 391 + self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \ 392 + MAP_SHARED | MAP_ANONYMOUS, -1, 0); \ 393 + } else { \ 394 + memset(&self_private, 0, sizeof(self_private)); \ 395 + self = &self_private; \ 396 + } \ 397 + } \ 427 398 if (setjmp(_metadata->env) == 0) { \ 428 - /* Use the same _metadata. */ \ 429 - child = vfork(); \ 399 + /* _metadata and potentially self are shared with all forks. */ \ 400 + child = clone3_vfork(); \ 430 401 if (child == 0) { \ 431 - fixture_name##_setup(_metadata, &self, variant->data); \ 402 + fixture_name##_setup(_metadata, self, variant->data); \ 432 403 /* Let setup failure terminate early. */ \ 433 404 if (_metadata->exit_code) \ 434 405 _exit(0); \ 435 406 _metadata->setup_completed = true; \ 436 - fixture_name##_##test_name(_metadata, &self, variant->data); \ 407 + fixture_name##_##test_name(_metadata, self, variant->data); \ 437 408 } else if (child < 0 || child != waitpid(child, &status, 0)) { \ 438 409 ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \ 439 410 _metadata->exit_code = KSFT_FAIL; \ 440 411 } \ 441 412 } \ 442 - else \ 443 - jmp = true; \ 444 413 if (child == 0) { \ 445 - if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \ 446 - fixture_name##_teardown(_metadata, &self, variant->data); \ 414 + if (_metadata->setup_completed && !fixture_name##_teardown_parent && \ 415 + __sync_bool_compare_and_swap(teardown, false, true)) \ 416 + fixture_name##_teardown(_metadata, self, variant->data); \ 447 417 _exit(0); \ 448 418 } \ 449 - if (_metadata->setup_completed && _metadata->teardown_parent) \ 450 - fixture_name##_teardown(_metadata, &self, variant->data); \ 451 - if (!WIFEXITED(status) && WIFSIGNALED(status)) \ 419 + if (_metadata->setup_completed && fixture_name##_teardown_parent && \ 420 + __sync_bool_compare_and_swap(teardown, false, true)) \ 421 + fixture_name##_teardown(_metadata, self, variant->data); \ 422 + munmap(teardown, sizeof(*teardown)); \ 423 + if (self && fixture_name##_teardown_parent) \ 424 + munmap(self, sizeof(*self)); \ 425 + if (WIFEXITED(status)) { \ 426 + if (WEXITSTATUS(status)) \ 427 + _metadata->exit_code = WEXITSTATUS(status); \ 428 + } else if (WIFSIGNALED(status)) { \ 452 429 /* Forward signal to __wait_for_test(). */ \ 453 430 kill(getpid(), WTERMSIG(status)); \ 431 + } \ 454 432 __test_check_assert(_metadata); \ 455 433 } \ 456 - static struct __test_metadata \ 457 - _##fixture_name##_##test_name##_object = { \ 458 - .name = #test_name, \ 459 - .fn = &wrapper_##fixture_name##_##test_name, \ 460 - .fixture = &_##fixture_name##_fixture_object, \ 461 - .termsig = signal, \ 462 - .timeout = tmout, \ 463 - .teardown_parent = false, \ 464 - }; \ 434 + static struct __test_metadata *_##fixture_name##_##test_name##_object; \ 465 435 static void __attribute__((constructor)) \ 466 436 _register_##fixture_name##_##test_name(void) \ 467 437 { \ 468 - __register_test(&_##fixture_name##_##test_name##_object); \ 438 + struct __test_metadata *object = mmap(NULL, sizeof(*object), \ 439 + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \ 440 + object->name = #test_name; \ 441 + object->fn = &wrapper_##fixture_name##_##test_name; \ 442 + object->fixture = &_##fixture_name##_fixture_object; \ 443 + object->termsig = signal; \ 444 + object->timeout = tmout; \ 445 + _##fixture_name##_##test_name##_object = object; \ 446 + __register_test(object); \ 469 447 } \ 470 448 static void fixture_name##_##test_name( \ 471 449 struct __test_metadata __attribute__((unused)) *_metadata, \ ··· 891 833 { \ 892 834 .fixture = &_##fixture_name##_fixture_object, \ 893 835 .variant = &_##fixture_name##_##variant_name##_object, \ 894 - .test = &_##fixture_name##_##test_name##_object, \ 895 836 }; \ 896 837 static void __attribute__((constructor)) \ 897 838 _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \ 898 839 { \ 840 + _##fixture_name##_##variant_name##_##test_name##_xfail.test = \ 841 + _##fixture_name##_##test_name##_object; \ 899 842 __register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \ 900 843 } 901 844 ··· 939 880 bool timed_out; /* did this test timeout instead of exiting? */ 940 881 bool aborted; /* stopped test due to failed ASSERT */ 941 882 bool setup_completed; /* did setup finish? */ 942 - bool teardown_parent; /* run teardown in a parent process */ 943 883 jmp_buf env; /* for exiting out of test early */ 944 884 struct __test_results *results; 945 885 struct __test_metadata *prev, *next; ··· 1222 1164 /* reset test struct */ 1223 1165 t->exit_code = KSFT_PASS; 1224 1166 t->trigger = 0; 1167 + t->aborted = false; 1168 + t->setup_completed = false; 1169 + memset(t->env, 0, sizeof(t->env)); 1225 1170 memset(t->results->reason, 0, sizeof(t->results->reason)); 1226 1171 1227 1172 if (asprintf(&test_name, "%s%s%s.%s", f->name, ··· 1240 1179 fflush(stdout); 1241 1180 fflush(stderr); 1242 1181 1243 - t->pid = fork(); 1182 + t->pid = clone3_vfork(); 1244 1183 if (t->pid < 0) { 1245 1184 ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); 1246 1185 t->exit_code = KSFT_FAIL;
+50 -33
tools/testing/selftests/landlock/fs_test.c
··· 9 9 10 10 #define _GNU_SOURCE 11 11 #include <fcntl.h> 12 + #include <libgen.h> 12 13 #include <linux/landlock.h> 13 14 #include <linux/magic.h> 14 15 #include <sched.h> ··· 286 285 287 286 static void prepare_layout(struct __test_metadata *const _metadata) 288 287 { 289 - _metadata->teardown_parent = true; 290 - 291 288 prepare_layout_opt(_metadata, &mnt_tmp); 292 289 } 293 290 294 291 static void cleanup_layout(struct __test_metadata *const _metadata) 295 292 { 296 293 set_cap(_metadata, CAP_SYS_ADMIN); 297 - EXPECT_EQ(0, umount(TMP_DIR)); 294 + if (umount(TMP_DIR)) { 295 + /* 296 + * According to the test environment, the mount point of the 297 + * current directory may be shared or not, which changes the 298 + * visibility of the nested TMP_DIR mount point for the test's 299 + * parent process doing this cleanup. 300 + */ 301 + ASSERT_EQ(EINVAL, errno); 302 + } 298 303 clear_cap(_metadata, CAP_SYS_ADMIN); 299 304 EXPECT_EQ(0, remove_path(TMP_DIR)); 300 305 } ··· 314 307 prepare_layout(_metadata); 315 308 } 316 309 317 - FIXTURE_TEARDOWN(layout0) 310 + FIXTURE_TEARDOWN_PARENT(layout0) 318 311 { 319 312 cleanup_layout(_metadata); 320 313 } ··· 377 370 create_layout1(_metadata); 378 371 } 379 372 380 - FIXTURE_TEARDOWN(layout1) 373 + FIXTURE_TEARDOWN_PARENT(layout1) 381 374 { 382 375 remove_layout1(_metadata); 383 376 ··· 3690 3683 create_file(_metadata, file1_s1d1); 3691 3684 } 3692 3685 3693 - FIXTURE_TEARDOWN(ftruncate) 3686 + FIXTURE_TEARDOWN_PARENT(ftruncate) 3694 3687 { 3695 3688 EXPECT_EQ(0, remove_path(file1_s1d1)); 3696 3689 cleanup_layout(_metadata); ··· 3868 3861 clear_cap(_metadata, CAP_SYS_ADMIN); 3869 3862 } 3870 3863 3871 - FIXTURE_TEARDOWN(layout1_bind) 3864 + FIXTURE_TEARDOWN_PARENT(layout1_bind) 3872 3865 { 3873 3866 /* umount(dir_s2d2)) is handled by namespace lifetime. */ 3874 3867 ··· 4273 4266 clear_cap(_metadata, CAP_SYS_ADMIN); 4274 4267 } 4275 4268 4276 - FIXTURE_TEARDOWN(layout2_overlay) 4269 + FIXTURE_TEARDOWN_PARENT(layout2_overlay) 4277 4270 { 4278 4271 if (self->skip_test) 4279 4272 SKIP(return, "overlayfs is not supported (teardown)"); ··· 4623 4616 { 4624 4617 bool has_created_dir; 4625 4618 bool has_created_file; 4626 - char *dir_path; 4627 4619 bool skip_test; 4628 4620 }; 4629 4621 ··· 4681 4675 .cwd_fs_magic = HOSTFS_SUPER_MAGIC, 4682 4676 }; 4683 4677 4678 + static char *dirname_alloc(const char *path) 4679 + { 4680 + char *dup; 4681 + 4682 + if (!path) 4683 + return NULL; 4684 + 4685 + dup = strdup(path); 4686 + if (!dup) 4687 + return NULL; 4688 + 4689 + return dirname(dup); 4690 + } 4691 + 4684 4692 FIXTURE_SETUP(layout3_fs) 4685 4693 { 4686 4694 struct stat statbuf; 4687 - const char *slash; 4688 - size_t dir_len; 4695 + char *dir_path = dirname_alloc(variant->file_path); 4689 4696 4690 4697 if (!supports_filesystem(variant->mnt.type) || 4691 4698 !cwd_matches_fs(variant->cwd_fs_magic)) { ··· 4706 4687 SKIP(return, "this filesystem is not supported (setup)"); 4707 4688 } 4708 4689 4709 - _metadata->teardown_parent = true; 4710 - 4711 - slash = strrchr(variant->file_path, '/'); 4712 - ASSERT_NE(slash, NULL); 4713 - dir_len = (size_t)slash - (size_t)variant->file_path; 4714 - ASSERT_LT(0, dir_len); 4715 - self->dir_path = malloc(dir_len + 1); 4716 - self->dir_path[dir_len] = '\0'; 4717 - strncpy(self->dir_path, variant->file_path, dir_len); 4718 - 4719 4690 prepare_layout_opt(_metadata, &variant->mnt); 4720 4691 4721 4692 /* Creates directory when required. */ 4722 - if (stat(self->dir_path, &statbuf)) { 4693 + if (stat(dir_path, &statbuf)) { 4723 4694 set_cap(_metadata, CAP_DAC_OVERRIDE); 4724 - EXPECT_EQ(0, mkdir(self->dir_path, 0700)) 4695 + EXPECT_EQ(0, mkdir(dir_path, 0700)) 4725 4696 { 4726 4697 TH_LOG("Failed to create directory \"%s\": %s", 4727 - self->dir_path, strerror(errno)); 4728 - free(self->dir_path); 4729 - self->dir_path = NULL; 4698 + dir_path, strerror(errno)); 4730 4699 } 4731 4700 self->has_created_dir = true; 4732 4701 clear_cap(_metadata, CAP_DAC_OVERRIDE); ··· 4735 4728 self->has_created_file = true; 4736 4729 clear_cap(_metadata, CAP_DAC_OVERRIDE); 4737 4730 } 4731 + 4732 + free(dir_path); 4738 4733 } 4739 4734 4740 - FIXTURE_TEARDOWN(layout3_fs) 4735 + FIXTURE_TEARDOWN_PARENT(layout3_fs) 4741 4736 { 4742 4737 if (self->skip_test) 4743 4738 SKIP(return, "this filesystem is not supported (teardown)"); ··· 4755 4746 } 4756 4747 4757 4748 if (self->has_created_dir) { 4749 + char *dir_path = dirname_alloc(variant->file_path); 4750 + 4758 4751 set_cap(_metadata, CAP_DAC_OVERRIDE); 4759 4752 /* 4760 4753 * Don't check for error because the directory might already 4761 4754 * have been removed (cf. release_inode test). 4762 4755 */ 4763 - rmdir(self->dir_path); 4756 + rmdir(dir_path); 4764 4757 clear_cap(_metadata, CAP_DAC_OVERRIDE); 4758 + free(dir_path); 4765 4759 } 4766 - free(self->dir_path); 4767 - self->dir_path = NULL; 4768 4760 4769 4761 cleanup_layout(_metadata); 4770 4762 } ··· 4832 4822 4833 4823 TEST_F_FORK(layout3_fs, tag_inode_dir_child) 4834 4824 { 4835 - layer3_fs_tag_inode(_metadata, self, variant, self->dir_path); 4825 + char *dir_path = dirname_alloc(variant->file_path); 4826 + 4827 + layer3_fs_tag_inode(_metadata, self, variant, dir_path); 4828 + free(dir_path); 4836 4829 } 4837 4830 4838 4831 TEST_F_FORK(layout3_fs, tag_inode_file) ··· 4862 4849 if (self->has_created_file) 4863 4850 EXPECT_EQ(0, remove_path(variant->file_path)); 4864 4851 4865 - if (self->has_created_dir) 4852 + if (self->has_created_dir) { 4853 + char *dir_path = dirname_alloc(variant->file_path); 4854 + 4866 4855 /* Don't check for error because of cgroup specificities. */ 4867 - remove_path(self->dir_path); 4856 + remove_path(dir_path); 4857 + free(dir_path); 4858 + } 4868 4859 4869 4860 ruleset_fd = 4870 4861 create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+2
tools/testing/selftests/pidfd/config
··· 3 3 CONFIG_USER_NS=y 4 4 CONFIG_PID_NS=y 5 5 CONFIG_NET_NS=y 6 + CONFIG_TIME_NS=y 7 + CONFIG_GENERIC_VDSO_TIME_NS=y 6 8 CONFIG_CGROUPS=y 7 9 CONFIG_CHECKPOINT_RESTORE=y
+1 -1
tools/testing/selftests/pidfd/pidfd_setns_test.c
··· 158 158 /* Create task that exits right away. */ 159 159 self->child_pid_exited = create_child(&self->child_pidfd_exited, 160 160 CLONE_NEWUSER | CLONE_NEWNET); 161 - EXPECT_GT(self->child_pid_exited, 0); 161 + EXPECT_GE(self->child_pid_exited, 0); 162 162 163 163 if (self->child_pid_exited == 0) 164 164 _exit(EXIT_SUCCESS);