Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

pidfs: add selftests for new namespace ioctls

Add selftests to verify that deriving namespace file descriptors from
pidfd file descriptors works correctly.

Link: https://lore.kernel.org/r/20240722-work-pidfs-69dbea91edab@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>

+227 -31
+227 -31
tools/testing/selftests/pidfd/pidfd_setns_test.c
··· 16 16 #include <unistd.h> 17 17 #include <sys/socket.h> 18 18 #include <sys/stat.h> 19 + #include <linux/ioctl.h> 19 20 20 21 #include "pidfd.h" 21 22 #include "../clone3/clone3_selftests.h" 22 23 #include "../kselftest_harness.h" 24 + 25 + #ifndef PIDFS_IOCTL_MAGIC 26 + #define PIDFS_IOCTL_MAGIC 0xFF 27 + #endif 28 + 29 + #ifndef PIDFD_GET_CGROUP_NAMESPACE 30 + #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) 31 + #endif 32 + 33 + #ifndef PIDFD_GET_IPC_NAMESPACE 34 + #define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) 35 + #endif 36 + 37 + #ifndef PIDFD_GET_MNT_NAMESPACE 38 + #define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) 39 + #endif 40 + 41 + #ifndef PIDFD_GET_NET_NAMESPACE 42 + #define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) 43 + #endif 44 + 45 + #ifndef PIDFD_GET_PID_NAMESPACE 46 + #define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) 47 + #endif 48 + 49 + #ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE 50 + #define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) 51 + #endif 52 + 53 + #ifndef PIDFD_GET_TIME_NAMESPACE 54 + #define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) 55 + #endif 56 + 57 + #ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE 58 + #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) 59 + #endif 60 + 61 + #ifndef PIDFD_GET_USER_NAMESPACE 62 + #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) 63 + #endif 64 + 65 + #ifndef PIDFD_GET_UTS_NAMESPACE 66 + #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) 67 + #endif 23 68 24 69 enum { 25 70 PIDFD_NS_USER, ··· 76 31 PIDFD_NS_CGROUP, 77 32 PIDFD_NS_PIDCLD, 78 33 PIDFD_NS_TIME, 34 + PIDFD_NS_TIMECLD, 79 35 PIDFD_NS_MAX 80 36 }; 81 37 82 38 const struct ns_info { 83 39 const char *name; 84 40 int flag; 41 + unsigned int pidfd_ioctl; 85 42 } ns_info[] = { 86 - [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, 87 - [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, 88 - [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, 89 - [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, 90 - [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, 91 - [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, 92 - [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, 93 - [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, 94 - [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, 43 + [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, PIDFD_GET_USER_NAMESPACE, }, 44 + [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, PIDFD_GET_MNT_NAMESPACE, }, 45 + [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, PIDFD_GET_PID_NAMESPACE, }, 46 + [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, PIDFD_GET_UTS_NAMESPACE, }, 47 + [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, PIDFD_GET_IPC_NAMESPACE, }, 48 + [PIDFD_NS_NET] = { "net", CLONE_NEWNET, PIDFD_GET_NET_NAMESPACE, }, 49 + [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, }, 50 + [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, PIDFD_GET_TIME_NAMESPACE, }, 51 + [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE, }, 52 + [PIDFD_NS_TIMECLD] = { "time_for_children", 0, PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE, }, 95 53 }; 96 54 97 55 FIXTURE(current_nsset) ··· 102 54 pid_t pid; 103 55 int pidfd; 104 56 int nsfds[PIDFD_NS_MAX]; 57 + int child_pidfd_derived_nsfds[PIDFD_NS_MAX]; 105 58 106 59 pid_t child_pid_exited; 107 60 int child_pidfd_exited; ··· 110 61 pid_t child_pid1; 111 62 int child_pidfd1; 112 63 int child_nsfds1[PIDFD_NS_MAX]; 64 + int child_pidfd_derived_nsfds1[PIDFD_NS_MAX]; 113 65 114 66 pid_t child_pid2; 115 67 int child_pidfd2; 116 68 int child_nsfds2[PIDFD_NS_MAX]; 69 + int child_pidfd_derived_nsfds2[PIDFD_NS_MAX]; 117 70 }; 118 71 119 72 static int sys_waitid(int which, pid_t pid, int options) ··· 179 128 char c; 180 129 181 130 for (i = 0; i < PIDFD_NS_MAX; i++) { 182 - self->nsfds[i] = -EBADF; 183 - self->child_nsfds1[i] = -EBADF; 184 - self->child_nsfds2[i] = -EBADF; 131 + self->nsfds[i] = -EBADF; 132 + self->child_nsfds1[i] = -EBADF; 133 + self->child_nsfds2[i] = -EBADF; 134 + self->child_pidfd_derived_nsfds[i] = -EBADF; 135 + self->child_pidfd_derived_nsfds1[i] = -EBADF; 136 + self->child_pidfd_derived_nsfds2[i] = -EBADF; 185 137 } 186 138 187 139 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); ··· 193 139 } 194 140 195 141 self->pid = getpid(); 142 + self->pidfd = sys_pidfd_open(self->pid, 0); 143 + EXPECT_GT(self->pidfd, 0) { 144 + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 145 + } 146 + 196 147 for (i = 0; i < PIDFD_NS_MAX; i++) { 197 148 const struct ns_info *info = &ns_info[i]; 198 149 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); ··· 207 148 info->name, self->pid); 208 149 } 209 150 } 210 - } 211 151 212 - self->pidfd = sys_pidfd_open(self->pid, 0); 213 - EXPECT_GT(self->pidfd, 0) { 214 - TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 152 + self->child_pidfd_derived_nsfds[i] = ioctl(self->pidfd, info->pidfd_ioctl, 0); 153 + if (self->child_pidfd_derived_nsfds[i] < 0) { 154 + EXPECT_EQ(errno, EOPNOTSUPP) { 155 + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", 156 + info->name, self->pid); 157 + } 158 + } 215 159 } 216 160 217 161 /* Create task that exits right away. */ 218 - self->child_pid_exited = create_child(&self->child_pidfd_exited, 219 - CLONE_NEWUSER | CLONE_NEWNET); 162 + self->child_pid_exited = create_child(&self->child_pidfd_exited, 0); 220 163 EXPECT_GE(self->child_pid_exited, 0); 221 164 222 - if (self->child_pid_exited == 0) 165 + if (self->child_pid_exited == 0) { 166 + if (self->nsfds[PIDFD_NS_USER] >= 0 && unshare(CLONE_NEWUSER) < 0) 167 + _exit(EXIT_FAILURE); 168 + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) 169 + _exit(EXIT_FAILURE); 223 170 _exit(EXIT_SUCCESS); 171 + } 224 172 225 173 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); 226 174 ··· 240 174 EXPECT_EQ(ret, 0); 241 175 242 176 /* Create tasks that will be stopped. */ 243 - self->child_pid1 = create_child(&self->child_pidfd1, 244 - CLONE_NEWUSER | CLONE_NEWNS | 245 - CLONE_NEWCGROUP | CLONE_NEWIPC | 246 - CLONE_NEWUTS | CLONE_NEWPID | 247 - CLONE_NEWNET); 177 + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) 178 + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER | CLONE_NEWPID); 179 + else if (self->nsfds[PIDFD_NS_PID] >= 0) 180 + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWPID); 181 + else if (self->nsfds[PIDFD_NS_USER] >= 0) 182 + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); 183 + else 184 + self->child_pid1 = create_child(&self->child_pidfd1, 0); 248 185 EXPECT_GE(self->child_pid1, 0); 249 186 250 187 if (self->child_pid1 == 0) { 251 188 close(ipc_sockets[0]); 252 189 253 - if (!switch_timens()) 190 + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { 191 + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); 254 192 _exit(EXIT_FAILURE); 193 + } 194 + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { 195 + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); 196 + _exit(EXIT_FAILURE); 197 + } 198 + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { 199 + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); 200 + _exit(EXIT_FAILURE); 201 + } 202 + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { 203 + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); 204 + _exit(EXIT_FAILURE); 205 + } 206 + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { 207 + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); 208 + _exit(EXIT_FAILURE); 209 + } 210 + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { 211 + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); 212 + _exit(EXIT_FAILURE); 213 + } 255 214 256 215 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 257 216 _exit(EXIT_FAILURE); ··· 294 203 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 295 204 EXPECT_EQ(ret, 0); 296 205 297 - self->child_pid2 = create_child(&self->child_pidfd2, 298 - CLONE_NEWUSER | CLONE_NEWNS | 299 - CLONE_NEWCGROUP | CLONE_NEWIPC | 300 - CLONE_NEWUTS | CLONE_NEWPID | 301 - CLONE_NEWNET); 206 + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) 207 + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); 208 + else if (self->nsfds[PIDFD_NS_PID] >= 0) 209 + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWPID); 210 + else if (self->nsfds[PIDFD_NS_USER] >= 0) 211 + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER); 212 + else 213 + self->child_pid2 = create_child(&self->child_pidfd2, 0); 302 214 EXPECT_GE(self->child_pid2, 0); 303 215 304 216 if (self->child_pid2 == 0) { 305 217 close(ipc_sockets[0]); 306 218 307 - if (!switch_timens()) 219 + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { 220 + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); 308 221 _exit(EXIT_FAILURE); 222 + } 223 + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { 224 + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); 225 + _exit(EXIT_FAILURE); 226 + } 227 + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { 228 + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); 229 + _exit(EXIT_FAILURE); 230 + } 231 + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { 232 + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); 233 + _exit(EXIT_FAILURE); 234 + } 235 + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { 236 + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); 237 + _exit(EXIT_FAILURE); 238 + } 239 + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { 240 + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); 241 + _exit(EXIT_FAILURE); 242 + } 309 243 310 244 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 311 245 _exit(EXIT_FAILURE); ··· 383 267 info->name, self->child_pid1); 384 268 } 385 269 } 270 + 271 + self->child_pidfd_derived_nsfds1[i] = ioctl(self->child_pidfd1, info->pidfd_ioctl, 0); 272 + if (self->child_pidfd_derived_nsfds1[i] < 0) { 273 + EXPECT_EQ(errno, EOPNOTSUPP) { 274 + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", 275 + info->name, self->child_pid1); 276 + } 277 + } 278 + 279 + self->child_pidfd_derived_nsfds2[i] = ioctl(self->child_pidfd2, info->pidfd_ioctl, 0); 280 + if (self->child_pidfd_derived_nsfds2[i] < 0) { 281 + EXPECT_EQ(errno, EOPNOTSUPP) { 282 + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", 283 + info->name, self->child_pid2); 284 + } 285 + } 386 286 } 387 287 388 288 close(proc_fd); ··· 420 288 close(self->child_nsfds1[i]); 421 289 if (self->child_nsfds2[i] >= 0) 422 290 close(self->child_nsfds2[i]); 291 + if (self->child_pidfd_derived_nsfds[i] >= 0) 292 + close(self->child_pidfd_derived_nsfds[i]); 293 + if (self->child_pidfd_derived_nsfds1[i] >= 0) 294 + close(self->child_pidfd_derived_nsfds1[i]); 295 + if (self->child_pidfd_derived_nsfds2[i] >= 0) 296 + close(self->child_pidfd_derived_nsfds2[i]); 423 297 } 424 298 425 299 if (self->child_pidfd1 >= 0) ··· 584 446 } 585 447 } 586 448 449 + TEST_F(current_nsset, pidfd_derived_nsfd_incremental_setns) 450 + { 451 + int i; 452 + pid_t pid; 453 + 454 + pid = getpid(); 455 + for (i = 0; i < PIDFD_NS_MAX; i++) { 456 + const struct ns_info *info = &ns_info[i]; 457 + int nsfd; 458 + 459 + if (self->child_pidfd_derived_nsfds1[i] < 0) 460 + continue; 461 + 462 + if (info->flag) { 463 + ASSERT_EQ(setns(self->child_pidfd_derived_nsfds1[i], info->flag), 0) { 464 + TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", 465 + info->name, self->child_pid1, 466 + self->child_pidfd_derived_nsfds1[i]); 467 + } 468 + } 469 + 470 + /* Verify that we have changed to the correct namespaces. */ 471 + if (info->flag == CLONE_NEWPID) 472 + nsfd = self->child_pidfd_derived_nsfds[i]; 473 + else 474 + nsfd = self->child_pidfd_derived_nsfds1[i]; 475 + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 476 + TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", 477 + info->name, self->child_pid1, 478 + self->child_pidfd_derived_nsfds1[i]); 479 + } 480 + TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", 481 + info->name, self->child_pid1, self->child_pidfd_derived_nsfds1[i]); 482 + } 483 + } 484 + 587 485 TEST_F(current_nsset, pidfd_one_shot_setns) 588 486 { 589 487 unsigned flags = 0; ··· 715 541 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 716 542 info->name, self->child_pid2, 717 543 self->child_nsfds2[i]); 544 + } 545 + 546 + /* 547 + * Can't setns to a user namespace outside of our hierarchy since we 548 + * don't have caps in there and didn't create it. That means that under 549 + * no circumstances should we be able to setns to any of the other 550 + * ones since they aren't owned by our user namespace. 551 + */ 552 + for (i = 0; i < PIDFD_NS_MAX; i++) { 553 + const struct ns_info *info = &ns_info[i]; 554 + 555 + if (self->child_pidfd_derived_nsfds2[i] < 0 || !info->flag) 556 + continue; 557 + 558 + ASSERT_NE(setns(self->child_pidfd_derived_nsfds2[i], info->flag), 0) { 559 + TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", 560 + info->name, self->child_pid2, 561 + self->child_pidfd_derived_nsfds2[i]); 562 + } 563 + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 564 + info->name, self->child_pid2, 565 + self->child_pidfd_derived_nsfds2[i]); 718 566 } 719 567 } 720 568