Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4#include <errno.h>
5#include <fcntl.h>
6#include <limits.h>
7#include <linux/types.h>
8#include <sched.h>
9#include <signal.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <syscall.h>
14#include <sys/prctl.h>
15#include <sys/wait.h>
16#include <unistd.h>
17#include <sys/socket.h>
18#include <sys/stat.h>
19#include <linux/kcmp.h>
20
21#include "pidfd.h"
22#include "../clone3/clone3_selftests.h"
23#include "../kselftest_harness.h"
24
25enum {
26 PIDFD_NS_USER,
27 PIDFD_NS_MNT,
28 PIDFD_NS_PID,
29 PIDFD_NS_UTS,
30 PIDFD_NS_IPC,
31 PIDFD_NS_NET,
32 PIDFD_NS_CGROUP,
33 PIDFD_NS_PIDCLD,
34 PIDFD_NS_TIME,
35 PIDFD_NS_MAX
36};
37
38const struct ns_info {
39 const char *name;
40 int flag;
41} ns_info[] = {
42 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
43 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
44 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
45 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
46 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
47 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
48 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
49 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
50 [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, },
51};
52
53FIXTURE(current_nsset)
54{
55 pid_t pid;
56 int pidfd;
57 int nsfds[PIDFD_NS_MAX];
58
59 pid_t child_pid_exited;
60 int child_pidfd_exited;
61
62 pid_t child_pid1;
63 int child_pidfd1;
64 int child_nsfds1[PIDFD_NS_MAX];
65
66 pid_t child_pid2;
67 int child_pidfd2;
68 int child_nsfds2[PIDFD_NS_MAX];
69};
70
71static int sys_waitid(int which, pid_t pid, int options)
72{
73 return syscall(__NR_waitid, which, pid, NULL, options, NULL);
74}
75
76pid_t create_child(int *pidfd, unsigned flags)
77{
78 struct clone_args args = {
79 .flags = CLONE_PIDFD | flags,
80 .exit_signal = SIGCHLD,
81 .pidfd = ptr_to_u64(pidfd),
82 };
83
84 return sys_clone3(&args, sizeof(struct clone_args));
85}
86
87static bool switch_timens(void)
88{
89 int fd, ret;
90
91 if (unshare(CLONE_NEWTIME))
92 return false;
93
94 fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC);
95 if (fd < 0)
96 return false;
97
98 ret = setns(fd, CLONE_NEWTIME);
99 close(fd);
100 return ret == 0;
101}
102
103static ssize_t read_nointr(int fd, void *buf, size_t count)
104{
105 ssize_t ret;
106
107 do {
108 ret = read(fd, buf, count);
109 } while (ret < 0 && errno == EINTR);
110
111 return ret;
112}
113
114static ssize_t write_nointr(int fd, const void *buf, size_t count)
115{
116 ssize_t ret;
117
118 do {
119 ret = write(fd, buf, count);
120 } while (ret < 0 && errno == EINTR);
121
122 return ret;
123}
124
125FIXTURE_SETUP(current_nsset)
126{
127 int i, proc_fd, ret;
128 int ipc_sockets[2];
129 char c;
130
131 for (i = 0; i < PIDFD_NS_MAX; i++) {
132 self->nsfds[i] = -EBADF;
133 self->child_nsfds1[i] = -EBADF;
134 self->child_nsfds2[i] = -EBADF;
135 }
136
137 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
138 ASSERT_GE(proc_fd, 0) {
139 TH_LOG("%m - Failed to open /proc/self/ns");
140 }
141
142 self->pid = getpid();
143 for (i = 0; i < PIDFD_NS_MAX; i++) {
144 const struct ns_info *info = &ns_info[i];
145 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
146 if (self->nsfds[i] < 0) {
147 EXPECT_EQ(errno, ENOENT) {
148 TH_LOG("%m - Failed to open %s namespace for process %d",
149 info->name, self->pid);
150 }
151 }
152 }
153
154 self->pidfd = sys_pidfd_open(self->pid, 0);
155 EXPECT_GT(self->pidfd, 0) {
156 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
157 }
158
159 /* Create task that exits right away. */
160 self->child_pid_exited = create_child(&self->child_pidfd_exited,
161 CLONE_NEWUSER | CLONE_NEWNET);
162 EXPECT_GT(self->child_pid_exited, 0);
163
164 if (self->child_pid_exited == 0)
165 _exit(EXIT_SUCCESS);
166
167 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
168
169 self->pidfd = sys_pidfd_open(self->pid, 0);
170 EXPECT_GE(self->pidfd, 0) {
171 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
172 }
173
174 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
175 EXPECT_EQ(ret, 0);
176
177 /* Create tasks that will be stopped. */
178 self->child_pid1 = create_child(&self->child_pidfd1,
179 CLONE_NEWUSER | CLONE_NEWNS |
180 CLONE_NEWCGROUP | CLONE_NEWIPC |
181 CLONE_NEWUTS | CLONE_NEWPID |
182 CLONE_NEWNET);
183 EXPECT_GE(self->child_pid1, 0);
184
185 if (self->child_pid1 == 0) {
186 close(ipc_sockets[0]);
187
188 if (!switch_timens())
189 _exit(EXIT_FAILURE);
190
191 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
192 _exit(EXIT_FAILURE);
193
194 close(ipc_sockets[1]);
195
196 pause();
197 _exit(EXIT_SUCCESS);
198 }
199
200 close(ipc_sockets[1]);
201 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
202 close(ipc_sockets[0]);
203
204 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
205 EXPECT_EQ(ret, 0);
206
207 self->child_pid2 = create_child(&self->child_pidfd2,
208 CLONE_NEWUSER | CLONE_NEWNS |
209 CLONE_NEWCGROUP | CLONE_NEWIPC |
210 CLONE_NEWUTS | CLONE_NEWPID |
211 CLONE_NEWNET);
212 EXPECT_GE(self->child_pid2, 0);
213
214 if (self->child_pid2 == 0) {
215 close(ipc_sockets[0]);
216
217 if (!switch_timens())
218 _exit(EXIT_FAILURE);
219
220 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
221 _exit(EXIT_FAILURE);
222
223 close(ipc_sockets[1]);
224
225 pause();
226 _exit(EXIT_SUCCESS);
227 }
228
229 close(ipc_sockets[1]);
230 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
231 close(ipc_sockets[0]);
232
233 for (i = 0; i < PIDFD_NS_MAX; i++) {
234 char p[100];
235
236 const struct ns_info *info = &ns_info[i];
237
238 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
239 if (self->nsfds[i] < 0) {
240 EXPECT_EQ(errno, ENOENT) {
241 TH_LOG("%m - Failed to open %s namespace for process %d",
242 info->name, self->pid);
243 }
244 }
245
246 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
247 self->child_pid1, info->name);
248 EXPECT_GT(ret, 0);
249 EXPECT_LT(ret, sizeof(p));
250
251 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
252 if (self->child_nsfds1[i] < 0) {
253 EXPECT_EQ(errno, ENOENT) {
254 TH_LOG("%m - Failed to open %s namespace for process %d",
255 info->name, self->child_pid1);
256 }
257 }
258
259 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
260 self->child_pid2, info->name);
261 EXPECT_GT(ret, 0);
262 EXPECT_LT(ret, sizeof(p));
263
264 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
265 if (self->child_nsfds2[i] < 0) {
266 EXPECT_EQ(errno, ENOENT) {
267 TH_LOG("%m - Failed to open %s namespace for process %d",
268 info->name, self->child_pid1);
269 }
270 }
271 }
272
273 close(proc_fd);
274}
275
276FIXTURE_TEARDOWN(current_nsset)
277{
278 int i;
279
280 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
281 SIGKILL, NULL, 0), 0);
282 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
283 SIGKILL, NULL, 0), 0);
284
285 for (i = 0; i < PIDFD_NS_MAX; i++) {
286 if (self->nsfds[i] >= 0)
287 close(self->nsfds[i]);
288 if (self->child_nsfds1[i] >= 0)
289 close(self->child_nsfds1[i]);
290 if (self->child_nsfds2[i] >= 0)
291 close(self->child_nsfds2[i]);
292 }
293
294 if (self->child_pidfd1 >= 0)
295 EXPECT_EQ(0, close(self->child_pidfd1));
296 if (self->child_pidfd2 >= 0)
297 EXPECT_EQ(0, close(self->child_pidfd2));
298 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
299 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
300 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
301}
302
303static int preserve_ns(const int pid, const char *ns)
304{
305 int ret;
306 char path[50];
307
308 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
309 if (ret < 0 || (size_t)ret >= sizeof(path))
310 return -EIO;
311
312 return open(path, O_RDONLY | O_CLOEXEC);
313}
314
315static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
316{
317 int ns_fd2 = -EBADF;
318 int ret = -1;
319 struct stat ns_st1, ns_st2;
320
321 ret = fstat(ns_fd1, &ns_st1);
322 if (ret < 0)
323 return -1;
324
325 ns_fd2 = preserve_ns(pid2, ns);
326 if (ns_fd2 < 0)
327 return -1;
328
329 ret = fstat(ns_fd2, &ns_st2);
330 close(ns_fd2);
331 if (ret < 0)
332 return -1;
333
334 /* processes are in the same namespace */
335 if ((ns_st1.st_dev == ns_st2.st_dev) &&
336 (ns_st1.st_ino == ns_st2.st_ino))
337 return 1;
338
339 /* processes are in different namespaces */
340 return 0;
341}
342
343/* Test that we can't pass garbage to the kernel. */
344TEST_F(current_nsset, invalid_flags)
345{
346 ASSERT_NE(setns(self->pidfd, 0), 0);
347 EXPECT_EQ(errno, EINVAL);
348
349 ASSERT_NE(setns(self->pidfd, -1), 0);
350 EXPECT_EQ(errno, EINVAL);
351
352 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
353 EXPECT_EQ(errno, EINVAL);
354
355 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
356 EXPECT_EQ(errno, EINVAL);
357}
358
359/* Test that we can't attach to a task that has already exited. */
360TEST_F(current_nsset, pidfd_exited_child)
361{
362 int i;
363 pid_t pid;
364
365 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
366 0);
367 EXPECT_EQ(errno, ESRCH);
368
369 pid = getpid();
370 for (i = 0; i < PIDFD_NS_MAX; i++) {
371 const struct ns_info *info = &ns_info[i];
372 /* Verify that we haven't changed any namespaces. */
373 if (self->nsfds[i] >= 0)
374 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
375 }
376}
377
378TEST_F(current_nsset, pidfd_incremental_setns)
379{
380 int i;
381 pid_t pid;
382
383 pid = getpid();
384 for (i = 0; i < PIDFD_NS_MAX; i++) {
385 const struct ns_info *info = &ns_info[i];
386 int nsfd;
387
388 if (self->child_nsfds1[i] < 0)
389 continue;
390
391 if (info->flag) {
392 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
393 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
394 info->name, self->child_pid1,
395 self->child_pidfd1);
396 }
397 }
398
399 /* Verify that we have changed to the correct namespaces. */
400 if (info->flag == CLONE_NEWPID)
401 nsfd = self->nsfds[i];
402 else
403 nsfd = self->child_nsfds1[i];
404 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
405 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
406 info->name, self->child_pid1,
407 self->child_pidfd1);
408 }
409 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
410 info->name, self->child_pid1, self->child_pidfd1);
411 }
412}
413
414TEST_F(current_nsset, nsfd_incremental_setns)
415{
416 int i;
417 pid_t pid;
418
419 pid = getpid();
420 for (i = 0; i < PIDFD_NS_MAX; i++) {
421 const struct ns_info *info = &ns_info[i];
422 int nsfd;
423
424 if (self->child_nsfds1[i] < 0)
425 continue;
426
427 if (info->flag) {
428 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
429 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
430 info->name, self->child_pid1,
431 self->child_nsfds1[i]);
432 }
433 }
434
435 /* Verify that we have changed to the correct namespaces. */
436 if (info->flag == CLONE_NEWPID)
437 nsfd = self->nsfds[i];
438 else
439 nsfd = self->child_nsfds1[i];
440 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
441 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
442 info->name, self->child_pid1,
443 self->child_nsfds1[i]);
444 }
445 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
446 info->name, self->child_pid1, self->child_nsfds1[i]);
447 }
448}
449
450TEST_F(current_nsset, pidfd_one_shot_setns)
451{
452 unsigned flags = 0;
453 int i;
454 pid_t pid;
455
456 for (i = 0; i < PIDFD_NS_MAX; i++) {
457 const struct ns_info *info = &ns_info[i];
458
459 if (self->child_nsfds1[i] < 0)
460 continue;
461
462 flags |= info->flag;
463 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
464 info->name, self->child_pid1);
465 }
466
467 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
468 TH_LOG("%m - Failed to setns to namespaces of %d",
469 self->child_pid1);
470 }
471
472 pid = getpid();
473 for (i = 0; i < PIDFD_NS_MAX; i++) {
474 const struct ns_info *info = &ns_info[i];
475 int nsfd;
476
477 if (self->child_nsfds1[i] < 0)
478 continue;
479
480 /* Verify that we have changed to the correct namespaces. */
481 if (info->flag == CLONE_NEWPID)
482 nsfd = self->nsfds[i];
483 else
484 nsfd = self->child_nsfds1[i];
485 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
486 TH_LOG("setns failed to place us correctly into %s namespace of %d",
487 info->name, self->child_pid1);
488 }
489 TH_LOG("Managed to correctly setns to %s namespace of %d",
490 info->name, self->child_pid1);
491 }
492}
493
494TEST_F(current_nsset, no_foul_play)
495{
496 unsigned flags = 0;
497 int i;
498
499 for (i = 0; i < PIDFD_NS_MAX; i++) {
500 const struct ns_info *info = &ns_info[i];
501
502 if (self->child_nsfds1[i] < 0)
503 continue;
504
505 flags |= info->flag;
506 if (info->flag) /* No use logging pid_for_children. */
507 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
508 info->name, self->child_pid1);
509 }
510
511 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
512 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
513 self->child_pid1, self->child_pidfd1);
514 }
515
516 /*
517 * Can't setns to a user namespace outside of our hierarchy since we
518 * don't have caps in there and didn't create it. That means that under
519 * no circumstances should we be able to setns to any of the other
520 * ones since they aren't owned by our user namespace.
521 */
522 for (i = 0; i < PIDFD_NS_MAX; i++) {
523 const struct ns_info *info = &ns_info[i];
524
525 if (self->child_nsfds2[i] < 0 || !info->flag)
526 continue;
527
528 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
529 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
530 info->name, self->child_pid2,
531 self->child_pidfd2);
532 }
533 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
534 info->name, self->child_pid2,
535 self->child_pidfd2);
536
537 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
538 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
539 info->name, self->child_pid2,
540 self->child_nsfds2[i]);
541 }
542 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
543 info->name, self->child_pid2,
544 self->child_nsfds2[i]);
545 }
546}
547
548TEST(setns_einval)
549{
550 int fd;
551
552 fd = sys_memfd_create("rostock", 0);
553 EXPECT_GT(fd, 0);
554
555 ASSERT_NE(setns(fd, 0), 0);
556 EXPECT_EQ(errno, EINVAL);
557 close(fd);
558}
559
560TEST_HARNESS_MAIN