Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/pidfd: add pidfd setns tests

This is basically a test-suite for setns() and as of now contains:
- test that we can't pass garbage flags
- test that we can't attach to the namespaces of task that has already exited
- test that we can incrementally setns into all namespaces of a target task
using a pidfd
- test that we can setns atomically into all namespaces of a target task
- test that we can't cross setns into a user namespace outside of our user
namespace hierarchy
- test that we can't setns into namespaces owned by user namespaces over which
we are not privileged

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20200505140432.181565-4-christian.brauner@ubuntu.com

+482 -1
+1
tools/testing/selftests/pidfd/.gitignore
··· 5 5 pidfd_wait 6 6 pidfd_fdinfo_test 7 7 pidfd_getfd_test 8 + pidfd_setns_test
+2 -1
tools/testing/selftests/pidfd/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 CFLAGS += -g -I../../../../usr/include/ -pthread 3 3 4 - TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test 4 + TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ 5 + pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test 5 6 6 7 include ../lib.mk 7 8
+6
tools/testing/selftests/pidfd/config
··· 1 + CONFIG_UTS_NS=y 2 + CONFIG_IPC_NS=y 3 + CONFIG_USER_NS=y 4 + CONFIG_PID_NS=y 5 + CONFIG_NET_NS=y 6 + CONFIG_CGROUPS=y
+473
tools/testing/selftests/pidfd/pidfd_setns_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #define _GNU_SOURCE 4 + #include <errno.h> 5 + #include <fcntl.h> 6 + #include <limits.h> 7 + #include <linux/types.h> 8 + #include <sched.h> 9 + #include <signal.h> 10 + #include <stdio.h> 11 + #include <stdlib.h> 12 + #include <string.h> 13 + #include <syscall.h> 14 + #include <sys/prctl.h> 15 + #include <sys/wait.h> 16 + #include <unistd.h> 17 + #include <sys/socket.h> 18 + #include <sys/stat.h> 19 + #include <linux/kcmp.h> 20 + 21 + #include "pidfd.h" 22 + #include "../clone3/clone3_selftests.h" 23 + #include "../kselftest.h" 24 + #include "../kselftest_harness.h" 25 + 26 + enum { 27 + PIDFD_NS_USER, 28 + PIDFD_NS_MNT, 29 + PIDFD_NS_PID, 30 + PIDFD_NS_UTS, 31 + PIDFD_NS_IPC, 32 + PIDFD_NS_NET, 33 + PIDFD_NS_CGROUP, 34 + PIDFD_NS_PIDCLD, 35 + PIDFD_NS_MAX 36 + }; 37 + 38 + const struct ns_info { 39 + const char *name; 40 + int flag; 41 + } ns_info[] = { 42 + [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, 43 + [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, 44 + [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, 45 + [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, 46 + [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, 47 + [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, 48 + [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, 49 + [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, 50 + }; 51 + 52 + FIXTURE(current_nsset) 53 + { 54 + pid_t pid; 55 + int pidfd; 56 + int nsfds[PIDFD_NS_MAX]; 57 + 58 + pid_t child_pid_exited; 59 + int child_pidfd_exited; 60 + 61 + pid_t child_pid1; 62 + int child_pidfd1; 63 + int child_nsfds1[PIDFD_NS_MAX]; 64 + 65 + pid_t child_pid2; 66 + int child_pidfd2; 67 + int child_nsfds2[PIDFD_NS_MAX]; 68 + }; 69 + 70 + static int sys_waitid(int which, pid_t pid, int options) 71 + { 72 + return syscall(__NR_waitid, which, pid, NULL, options, NULL); 73 + } 74 + 75 + pid_t create_child(int *pidfd, unsigned flags) 76 + { 77 + struct clone_args args = { 78 + .flags = CLONE_PIDFD | flags, 79 + .exit_signal = SIGCHLD, 80 + .pidfd = ptr_to_u64(pidfd), 81 + }; 82 + 83 + return sys_clone3(&args, sizeof(struct clone_args)); 84 + } 85 + 86 + FIXTURE_SETUP(current_nsset) 87 + { 88 + int i, proc_fd, ret; 89 + 90 + for (i = 0; i < PIDFD_NS_MAX; i++) { 91 + self->nsfds[i] = -EBADF; 92 + self->child_nsfds1[i] = -EBADF; 93 + self->child_nsfds2[i] = -EBADF; 94 + } 95 + 96 + proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); 97 + ASSERT_GE(proc_fd, 0) { 98 + TH_LOG("%m - Failed to open /proc/self/ns"); 99 + } 100 + 101 + self->pid = getpid(); 102 + for (i = 0; i < PIDFD_NS_MAX; i++) { 103 + const struct ns_info *info = &ns_info[i]; 104 + self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 105 + if (self->nsfds[i] < 0) { 106 + EXPECT_EQ(errno, ENOENT) { 107 + TH_LOG("%m - Failed to open %s namespace for process %d", 108 + info->name, self->pid); 109 + } 110 + } 111 + } 112 + 113 + self->pidfd = sys_pidfd_open(self->pid, 0); 114 + EXPECT_GT(self->pidfd, 0) { 115 + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 116 + } 117 + 118 + /* Create task that exits right away. */ 119 + self->child_pid_exited = create_child(&self->child_pidfd_exited, 120 + CLONE_NEWUSER | CLONE_NEWNET); 121 + EXPECT_GT(self->child_pid_exited, 0); 122 + 123 + if (self->child_pid_exited == 0) 124 + _exit(EXIT_SUCCESS); 125 + 126 + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); 127 + 128 + self->pidfd = sys_pidfd_open(self->pid, 0); 129 + EXPECT_GE(self->pidfd, 0) { 130 + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 131 + } 132 + 133 + /* Create tasks that will be stopped. */ 134 + self->child_pid1 = create_child(&self->child_pidfd1, 135 + CLONE_NEWUSER | CLONE_NEWNS | 136 + CLONE_NEWCGROUP | CLONE_NEWIPC | 137 + CLONE_NEWUTS | CLONE_NEWPID | 138 + CLONE_NEWNET); 139 + EXPECT_GE(self->child_pid1, 0); 140 + 141 + if (self->child_pid1 == 0) { 142 + pause(); 143 + _exit(EXIT_SUCCESS); 144 + } 145 + 146 + self->child_pid2 = create_child(&self->child_pidfd2, 147 + CLONE_NEWUSER | CLONE_NEWNS | 148 + CLONE_NEWCGROUP | CLONE_NEWIPC | 149 + CLONE_NEWUTS | CLONE_NEWPID | 150 + CLONE_NEWNET); 151 + EXPECT_GE(self->child_pid2, 0); 152 + 153 + if (self->child_pid2 == 0) { 154 + pause(); 155 + _exit(EXIT_SUCCESS); 156 + } 157 + 158 + for (i = 0; i < PIDFD_NS_MAX; i++) { 159 + char p[100]; 160 + 161 + const struct ns_info *info = &ns_info[i]; 162 + 163 + self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 164 + if (self->nsfds[i] < 0) { 165 + EXPECT_EQ(errno, ENOENT) { 166 + TH_LOG("%m - Failed to open %s namespace for process %d", 167 + info->name, self->pid); 168 + } 169 + } 170 + 171 + ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 172 + self->child_pid1, info->name); 173 + EXPECT_GT(ret, 0); 174 + EXPECT_LT(ret, sizeof(p)); 175 + 176 + self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC); 177 + if (self->child_nsfds1[i] < 0) { 178 + EXPECT_EQ(errno, ENOENT) { 179 + TH_LOG("%m - Failed to open %s namespace for process %d", 180 + info->name, self->child_pid1); 181 + } 182 + } 183 + 184 + ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 185 + self->child_pid2, info->name); 186 + EXPECT_GT(ret, 0); 187 + EXPECT_LT(ret, sizeof(p)); 188 + 189 + self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC); 190 + if (self->child_nsfds2[i] < 0) { 191 + EXPECT_EQ(errno, ENOENT) { 192 + TH_LOG("%m - Failed to open %s namespace for process %d", 193 + info->name, self->child_pid1); 194 + } 195 + } 196 + } 197 + 198 + close(proc_fd); 199 + } 200 + 201 + FIXTURE_TEARDOWN(current_nsset) 202 + { 203 + int i; 204 + 205 + ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1, 206 + SIGKILL, NULL, 0), 0); 207 + ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2, 208 + SIGKILL, NULL, 0), 0); 209 + 210 + for (i = 0; i < PIDFD_NS_MAX; i++) { 211 + if (self->nsfds[i] >= 0) 212 + close(self->nsfds[i]); 213 + if (self->child_nsfds1[i] >= 0) 214 + close(self->child_nsfds1[i]); 215 + if (self->child_nsfds2[i] >= 0) 216 + close(self->child_nsfds2[i]); 217 + } 218 + 219 + if (self->child_pidfd1 >= 0) 220 + EXPECT_EQ(0, close(self->child_pidfd1)); 221 + if (self->child_pidfd2 >= 0) 222 + EXPECT_EQ(0, close(self->child_pidfd2)); 223 + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); 224 + ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); 225 + ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); 226 + } 227 + 228 + static int preserve_ns(const int pid, const char *ns) 229 + { 230 + int ret; 231 + char path[50]; 232 + 233 + ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns); 234 + if (ret < 0 || (size_t)ret >= sizeof(path)) 235 + return -EIO; 236 + 237 + return open(path, O_RDONLY | O_CLOEXEC); 238 + } 239 + 240 + static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) 241 + { 242 + int ns_fd2 = -EBADF; 243 + int ret = -1; 244 + struct stat ns_st1, ns_st2; 245 + 246 + ret = fstat(ns_fd1, &ns_st1); 247 + if (ret < 0) 248 + return -1; 249 + 250 + ns_fd2 = preserve_ns(pid2, ns); 251 + if (ns_fd2 < 0) 252 + return -1; 253 + 254 + ret = fstat(ns_fd2, &ns_st2); 255 + close(ns_fd2); 256 + if (ret < 0) 257 + return -1; 258 + 259 + /* processes are in the same namespace */ 260 + if ((ns_st1.st_dev == ns_st2.st_dev) && 261 + (ns_st1.st_ino == ns_st2.st_ino)) 262 + return 1; 263 + 264 + /* processes are in different namespaces */ 265 + return 0; 266 + } 267 + 268 + /* Test that we can't pass garbage to the kernel. */ 269 + TEST_F(current_nsset, invalid_flags) 270 + { 271 + ASSERT_NE(setns(self->pidfd, 0), 0); 272 + EXPECT_EQ(errno, EINVAL); 273 + 274 + ASSERT_NE(setns(self->pidfd, -1), 0); 275 + EXPECT_EQ(errno, EINVAL); 276 + 277 + ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); 278 + EXPECT_EQ(errno, EINVAL); 279 + 280 + ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); 281 + EXPECT_EQ(errno, EINVAL); 282 + } 283 + 284 + /* Test that we can't attach to a task that has already exited. */ 285 + TEST_F(current_nsset, pidfd_exited_child) 286 + { 287 + int i; 288 + pid_t pid; 289 + 290 + ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), 291 + 0); 292 + EXPECT_EQ(errno, ESRCH); 293 + 294 + pid = getpid(); 295 + for (i = 0; i < PIDFD_NS_MAX; i++) { 296 + const struct ns_info *info = &ns_info[i]; 297 + /* Verify that we haven't changed any namespaces. */ 298 + if (self->nsfds[i] >= 0) 299 + ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1); 300 + } 301 + } 302 + 303 + TEST_F(current_nsset, pidfd_incremental_setns) 304 + { 305 + int i; 306 + pid_t pid; 307 + 308 + pid = getpid(); 309 + for (i = 0; i < PIDFD_NS_MAX; i++) { 310 + const struct ns_info *info = &ns_info[i]; 311 + int nsfd; 312 + 313 + if (self->child_nsfds1[i] < 0) 314 + continue; 315 + 316 + if (info->flag) { 317 + ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) { 318 + TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d", 319 + info->name, self->child_pid1, 320 + self->child_pidfd1); 321 + } 322 + } 323 + 324 + /* Verify that we have changed to the correct namespaces. */ 325 + if (info->flag == CLONE_NEWPID) 326 + nsfd = self->nsfds[i]; 327 + else 328 + nsfd = self->child_nsfds1[i]; 329 + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 330 + TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d", 331 + info->name, self->child_pid1, 332 + self->child_pidfd1); 333 + } 334 + TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d", 335 + info->name, self->child_pid1, self->child_pidfd1); 336 + } 337 + } 338 + 339 + TEST_F(current_nsset, nsfd_incremental_setns) 340 + { 341 + int i; 342 + pid_t pid; 343 + 344 + pid = getpid(); 345 + for (i = 0; i < PIDFD_NS_MAX; i++) { 346 + const struct ns_info *info = &ns_info[i]; 347 + int nsfd; 348 + 349 + if (self->child_nsfds1[i] < 0) 350 + continue; 351 + 352 + if (info->flag) { 353 + ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) { 354 + TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", 355 + info->name, self->child_pid1, 356 + self->child_nsfds1[i]); 357 + } 358 + } 359 + 360 + /* Verify that we have changed to the correct namespaces. */ 361 + if (info->flag == CLONE_NEWPID) 362 + nsfd = self->nsfds[i]; 363 + else 364 + nsfd = self->child_nsfds1[i]; 365 + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 366 + TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", 367 + info->name, self->child_pid1, 368 + self->child_nsfds1[i]); 369 + } 370 + TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", 371 + info->name, self->child_pid1, self->child_nsfds1[i]); 372 + } 373 + } 374 + 375 + TEST_F(current_nsset, pidfd_one_shot_setns) 376 + { 377 + unsigned flags = 0; 378 + int i; 379 + pid_t pid; 380 + 381 + for (i = 0; i < PIDFD_NS_MAX; i++) { 382 + const struct ns_info *info = &ns_info[i]; 383 + 384 + if (self->child_nsfds1[i] < 0) 385 + continue; 386 + 387 + flags |= info->flag; 388 + TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 389 + info->name, self->child_pid1); 390 + } 391 + 392 + ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 393 + TH_LOG("%m - Failed to setns to namespaces of %d", 394 + self->child_pid1); 395 + } 396 + 397 + pid = getpid(); 398 + for (i = 0; i < PIDFD_NS_MAX; i++) { 399 + const struct ns_info *info = &ns_info[i]; 400 + int nsfd; 401 + 402 + if (self->child_nsfds1[i] < 0) 403 + continue; 404 + 405 + /* Verify that we have changed to the correct namespaces. */ 406 + if (info->flag == CLONE_NEWPID) 407 + nsfd = self->nsfds[i]; 408 + else 409 + nsfd = self->child_nsfds1[i]; 410 + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 411 + TH_LOG("setns failed to place us correctly into %s namespace of %d", 412 + info->name, self->child_pid1); 413 + } 414 + TH_LOG("Managed to correctly setns to %s namespace of %d", 415 + info->name, self->child_pid1); 416 + } 417 + } 418 + 419 + TEST_F(current_nsset, no_foul_play) 420 + { 421 + unsigned flags = 0; 422 + int i; 423 + 424 + for (i = 0; i < PIDFD_NS_MAX; i++) { 425 + const struct ns_info *info = &ns_info[i]; 426 + 427 + if (self->child_nsfds1[i] < 0) 428 + continue; 429 + 430 + flags |= info->flag; 431 + if (info->flag) /* No use logging pid_for_children. */ 432 + TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 433 + info->name, self->child_pid1); 434 + } 435 + 436 + ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 437 + TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d", 438 + self->child_pid1, self->child_pidfd1); 439 + } 440 + 441 + /* 442 + * Can't setns to a user namespace outside of our hierarchy since we 443 + * don't have caps in there and didn't create it. That means that under 444 + * no circumstances should we be able to setns to any of the other 445 + * ones since they aren't owned by our user namespace. 446 + */ 447 + for (i = 0; i < PIDFD_NS_MAX; i++) { 448 + const struct ns_info *info = &ns_info[i]; 449 + 450 + if (self->child_nsfds2[i] < 0 || !info->flag) 451 + continue; 452 + 453 + ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) { 454 + TH_LOG("Managed to setns to %s namespace of %d via pidfd %d", 455 + info->name, self->child_pid2, 456 + self->child_pidfd2); 457 + } 458 + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d", 459 + info->name, self->child_pid2, 460 + self->child_pidfd2); 461 + 462 + ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) { 463 + TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", 464 + info->name, self->child_pid2, 465 + self->child_nsfds2[i]); 466 + } 467 + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 468 + info->name, self->child_pid2, 469 + self->child_nsfds2[i]); 470 + } 471 + } 472 + 473 + TEST_HARNESS_MAIN