Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/namespaces: test for efault

Ensure that put_user() can fail and that namespace cleanup works
correctly.

Link: https://patch.msgid.link/20251109-namespace-6-19-fixes-v1-8-ae8a4ad5a3b3@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>

+533
+1
tools/testing/selftests/namespaces/.gitignore
··· 4 4 ns_active_ref_test 5 5 listns_test 6 6 listns_permissions_test 7 + listns_efault_test 7 8 siocgskns_test 8 9 cred_change_test 9 10 stress_test
+2
tools/testing/selftests/namespaces/Makefile
··· 8 8 ns_active_ref_test \ 9 9 listns_test \ 10 10 listns_permissions_test \ 11 + listns_efault_test \ 11 12 siocgskns_test \ 12 13 cred_change_test \ 13 14 stress_test \ ··· 20 19 $(OUTPUT)/ns_active_ref_test: ../filesystems/utils.c 21 20 $(OUTPUT)/listns_test: ../filesystems/utils.c 22 21 $(OUTPUT)/listns_permissions_test: ../filesystems/utils.c 22 + $(OUTPUT)/listns_efault_test: ../filesystems/utils.c 23 23 $(OUTPUT)/siocgskns_test: ../filesystems/utils.c 24 24 $(OUTPUT)/cred_change_test: ../filesystems/utils.c 25 25 $(OUTPUT)/stress_test: ../filesystems/utils.c
+530
tools/testing/selftests/namespaces/listns_efault_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #define _GNU_SOURCE 3 + #include <errno.h> 4 + #include <fcntl.h> 5 + #include <limits.h> 6 + #include <sched.h> 7 + #include <signal.h> 8 + #include <stdio.h> 9 + #include <stdlib.h> 10 + #include <string.h> 11 + #include <linux/nsfs.h> 12 + #include <sys/ioctl.h> 13 + #include <sys/mman.h> 14 + #include <sys/mount.h> 15 + #include <sys/socket.h> 16 + #include <sys/stat.h> 17 + #include <sys/syscall.h> 18 + #include <sys/types.h> 19 + #include <sys/wait.h> 20 + #include <unistd.h> 21 + #include "../kselftest_harness.h" 22 + #include "../filesystems/utils.h" 23 + #include "../pidfd/pidfd.h" 24 + #include "wrappers.h" 25 + 26 + /* 27 + * Test listns() error handling with invalid buffer addresses. 28 + * 29 + * When the buffer pointer is invalid (e.g., crossing page boundaries 30 + * into unmapped memory), listns() returns EINVAL. 31 + * 32 + * This test also creates mount namespaces that get destroyed during 33 + * iteration, testing that namespace cleanup happens outside the RCU 34 + * read lock. 35 + */ 36 + TEST(listns_partial_fault_with_ns_cleanup) 37 + { 38 + void *map; 39 + __u64 *ns_ids; 40 + ssize_t ret; 41 + long page_size; 42 + pid_t pid, iter_pid; 43 + int pidfds[5]; 44 + int sv[5][2]; 45 + int iter_pidfd; 46 + int i, status; 47 + char c; 48 + 49 + page_size = sysconf(_SC_PAGESIZE); 50 + ASSERT_GT(page_size, 0); 51 + 52 + /* 53 + * Map two pages: 54 + * - First page: readable and writable 55 + * - Second page: will be unmapped to trigger EFAULT 56 + */ 57 + map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, 58 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 59 + ASSERT_NE(map, MAP_FAILED); 60 + 61 + /* Unmap the second page */ 62 + ret = munmap((char *)map + page_size, page_size); 63 + ASSERT_EQ(ret, 0); 64 + 65 + /* 66 + * Position the buffer pointer so there's room for exactly one u64 67 + * before the page boundary. The second u64 would fall into the 68 + * unmapped page. 69 + */ 70 + ns_ids = ((__u64 *)((char *)map + page_size)) - 1; 71 + 72 + /* 73 + * Create a separate process to run listns() in a loop concurrently 74 + * with namespace creation and destruction. 75 + */ 76 + iter_pid = create_child(&iter_pidfd, 0); 77 + ASSERT_NE(iter_pid, -1); 78 + 79 + if (iter_pid == 0) { 80 + struct ns_id_req req = { 81 + .size = sizeof(req), 82 + .spare = 0, 83 + .ns_id = 0, 84 + .ns_type = 0, /* All types */ 85 + .spare2 = 0, 86 + .user_ns_id = 0, /* Global listing */ 87 + }; 88 + int iter_ret; 89 + 90 + /* 91 + * Loop calling listns() until killed. 92 + * The kernel should: 93 + * 1. Successfully write the first namespace ID (within valid page) 94 + * 2. Fail with EFAULT when trying to write the second ID (unmapped page) 95 + * 3. Handle concurrent namespace destruction without deadlock 96 + */ 97 + while (1) { 98 + iter_ret = sys_listns(&req, ns_ids, 2, 0); 99 + 100 + if (iter_ret == -1 && errno == ENOSYS) 101 + _exit(PIDFD_SKIP); 102 + } 103 + } 104 + 105 + /* Small delay to let iterator start looping */ 106 + usleep(50000); 107 + 108 + /* 109 + * Create several child processes, each in its own mount namespace. 110 + * These will be destroyed while the iterator is running listns(). 111 + */ 112 + for (i = 0; i < 5; i++) { 113 + /* Create socketpair for synchronization */ 114 + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0); 115 + 116 + pid = create_child(&pidfds[i], CLONE_NEWNS); 117 + ASSERT_NE(pid, -1); 118 + 119 + if (pid == 0) { 120 + close(sv[i][0]); /* Close parent end */ 121 + 122 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) 123 + _exit(1); 124 + 125 + /* Child: create a couple of tmpfs mounts */ 126 + if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST) 127 + _exit(1); 128 + if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST) 129 + _exit(1); 130 + 131 + if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1) 132 + _exit(1); 133 + if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1) 134 + _exit(1); 135 + 136 + /* Signal parent that setup is complete */ 137 + if (write_nointr(sv[i][1], "R", 1) != 1) 138 + _exit(1); 139 + 140 + /* Wait for parent to signal us to exit */ 141 + if (read_nointr(sv[i][1], &c, 1) != 1) 142 + _exit(1); 143 + 144 + close(sv[i][1]); 145 + _exit(0); 146 + } 147 + 148 + close(sv[i][1]); /* Close child end */ 149 + } 150 + 151 + /* Wait for all children to finish setup */ 152 + for (i = 0; i < 5; i++) { 153 + ret = read_nointr(sv[i][0], &c, 1); 154 + ASSERT_EQ(ret, 1); 155 + ASSERT_EQ(c, 'R'); 156 + } 157 + 158 + /* 159 + * Signal children to exit. This will destroy their mount namespaces 160 + * while listns() is iterating the namespace tree. 161 + * This tests that cleanup happens outside the RCU read lock. 162 + */ 163 + for (i = 0; i < 5; i++) 164 + write_nointr(sv[i][0], "X", 1); 165 + 166 + /* Wait for all mount namespace children to exit and cleanup */ 167 + for (i = 0; i < 5; i++) { 168 + waitpid(-1, NULL, 0); 169 + close(sv[i][0]); 170 + close(pidfds[i]); 171 + } 172 + 173 + /* Kill iterator and wait for it */ 174 + sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0); 175 + ret = waitpid(iter_pid, &status, 0); 176 + ASSERT_EQ(ret, iter_pid); 177 + close(iter_pidfd); 178 + 179 + /* Should have been killed */ 180 + ASSERT_TRUE(WIFSIGNALED(status)); 181 + ASSERT_EQ(WTERMSIG(status), SIGKILL); 182 + 183 + /* Clean up */ 184 + munmap(map, page_size); 185 + } 186 + 187 + /* 188 + * Test listns() error handling when the entire buffer is invalid. 189 + * This is a sanity check that basic invalid pointer detection works. 190 + */ 191 + TEST(listns_complete_fault) 192 + { 193 + struct ns_id_req req = { 194 + .size = sizeof(req), 195 + .spare = 0, 196 + .ns_id = 0, 197 + .ns_type = 0, 198 + .spare2 = 0, 199 + .user_ns_id = 0, 200 + }; 201 + __u64 *ns_ids; 202 + ssize_t ret; 203 + 204 + /* Use a clearly invalid pointer */ 205 + ns_ids = (__u64 *)0xdeadbeef; 206 + 207 + ret = sys_listns(&req, ns_ids, 10, 0); 208 + 209 + if (ret == -1 && errno == ENOSYS) 210 + SKIP(return, "listns() not supported"); 211 + 212 + /* Should fail with EFAULT */ 213 + ASSERT_EQ(ret, -1); 214 + ASSERT_EQ(errno, EFAULT); 215 + } 216 + 217 + /* 218 + * Test listns() error handling when the buffer is NULL. 219 + */ 220 + TEST(listns_null_buffer) 221 + { 222 + struct ns_id_req req = { 223 + .size = sizeof(req), 224 + .spare = 0, 225 + .ns_id = 0, 226 + .ns_type = 0, 227 + .spare2 = 0, 228 + .user_ns_id = 0, 229 + }; 230 + ssize_t ret; 231 + 232 + /* NULL buffer with non-zero count should fail */ 233 + ret = sys_listns(&req, NULL, 10, 0); 234 + 235 + if (ret == -1 && errno == ENOSYS) 236 + SKIP(return, "listns() not supported"); 237 + 238 + /* Should fail with EFAULT */ 239 + ASSERT_EQ(ret, -1); 240 + ASSERT_EQ(errno, EFAULT); 241 + } 242 + 243 + /* 244 + * Test listns() with a buffer that becomes invalid mid-iteration 245 + * (after several successful writes), combined with mount namespace 246 + * destruction to test RCU cleanup logic. 247 + */ 248 + TEST(listns_late_fault_with_ns_cleanup) 249 + { 250 + void *map; 251 + __u64 *ns_ids; 252 + ssize_t ret; 253 + long page_size; 254 + pid_t pid, iter_pid; 255 + int pidfds[10]; 256 + int sv[10][2]; 257 + int iter_pidfd; 258 + int i, status; 259 + char c; 260 + 261 + page_size = sysconf(_SC_PAGESIZE); 262 + ASSERT_GT(page_size, 0); 263 + 264 + /* Map two pages */ 265 + map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, 266 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 267 + ASSERT_NE(map, MAP_FAILED); 268 + 269 + /* Unmap the second page */ 270 + ret = munmap((char *)map + page_size, page_size); 271 + ASSERT_EQ(ret, 0); 272 + 273 + /* 274 + * Position buffer so we can write several u64s successfully 275 + * before hitting the page boundary. 276 + */ 277 + ns_ids = ((__u64 *)((char *)map + page_size)) - 5; 278 + 279 + /* 280 + * Create a separate process to run listns() concurrently. 281 + */ 282 + iter_pid = create_child(&iter_pidfd, 0); 283 + ASSERT_NE(iter_pid, -1); 284 + 285 + if (iter_pid == 0) { 286 + struct ns_id_req req = { 287 + .size = sizeof(req), 288 + .spare = 0, 289 + .ns_id = 0, 290 + .ns_type = 0, 291 + .spare2 = 0, 292 + .user_ns_id = 0, 293 + }; 294 + int iter_ret; 295 + 296 + /* 297 + * Loop calling listns() until killed. 298 + * Request 10 namespace IDs while namespaces are being destroyed. 299 + * This tests: 300 + * 1. EFAULT handling when buffer becomes invalid 301 + * 2. Namespace cleanup outside RCU read lock during iteration 302 + */ 303 + while (1) { 304 + iter_ret = sys_listns(&req, ns_ids, 10, 0); 305 + 306 + if (iter_ret == -1 && errno == ENOSYS) 307 + _exit(PIDFD_SKIP); 308 + } 309 + } 310 + 311 + /* Small delay to let iterator start looping */ 312 + usleep(50000); 313 + 314 + /* 315 + * Create more children with mount namespaces to increase the 316 + * likelihood that namespace cleanup happens during iteration. 317 + */ 318 + for (i = 0; i < 10; i++) { 319 + /* Create socketpair for synchronization */ 320 + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0); 321 + 322 + pid = create_child(&pidfds[i], CLONE_NEWNS); 323 + ASSERT_NE(pid, -1); 324 + 325 + if (pid == 0) { 326 + close(sv[i][0]); /* Close parent end */ 327 + 328 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) 329 + _exit(1); 330 + 331 + /* Child: create tmpfs mounts */ 332 + if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST) 333 + _exit(1); 334 + if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST) 335 + _exit(1); 336 + 337 + if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1) 338 + _exit(1); 339 + if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1) 340 + _exit(1); 341 + 342 + /* Signal parent that setup is complete */ 343 + if (write_nointr(sv[i][1], "R", 1) != 1) 344 + _exit(1); 345 + 346 + /* Wait for parent to signal us to exit */ 347 + if (read_nointr(sv[i][1], &c, 1) != 1) 348 + _exit(1); 349 + 350 + close(sv[i][1]); 351 + _exit(0); 352 + } 353 + 354 + close(sv[i][1]); /* Close child end */ 355 + } 356 + 357 + /* Wait for all children to finish setup */ 358 + for (i = 0; i < 10; i++) { 359 + ret = read_nointr(sv[i][0], &c, 1); 360 + ASSERT_EQ(ret, 1); 361 + ASSERT_EQ(c, 'R'); 362 + } 363 + 364 + /* Kill half the children */ 365 + for (i = 0; i < 5; i++) 366 + write_nointr(sv[i][0], "X", 1); 367 + 368 + /* Small delay to let some exit */ 369 + usleep(10000); 370 + 371 + /* Kill remaining children */ 372 + for (i = 5; i < 10; i++) 373 + write_nointr(sv[i][0], "X", 1); 374 + 375 + /* Wait for all children and cleanup */ 376 + for (i = 0; i < 10; i++) { 377 + waitpid(-1, NULL, 0); 378 + close(sv[i][0]); 379 + close(pidfds[i]); 380 + } 381 + 382 + /* Kill iterator and wait for it */ 383 + sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0); 384 + ret = waitpid(iter_pid, &status, 0); 385 + ASSERT_EQ(ret, iter_pid); 386 + close(iter_pidfd); 387 + 388 + /* Should have been killed */ 389 + ASSERT_TRUE(WIFSIGNALED(status)); 390 + ASSERT_EQ(WTERMSIG(status), SIGKILL); 391 + 392 + /* Clean up */ 393 + munmap(map, page_size); 394 + } 395 + 396 + /* 397 + * Test specifically focused on mount namespace cleanup during EFAULT. 398 + * Filter for mount namespaces only. 399 + */ 400 + TEST(listns_mnt_ns_cleanup_on_fault) 401 + { 402 + void *map; 403 + __u64 *ns_ids; 404 + ssize_t ret; 405 + long page_size; 406 + pid_t pid, iter_pid; 407 + int pidfds[8]; 408 + int sv[8][2]; 409 + int iter_pidfd; 410 + int i, status; 411 + char c; 412 + 413 + page_size = sysconf(_SC_PAGESIZE); 414 + ASSERT_GT(page_size, 0); 415 + 416 + /* Set up partial fault buffer */ 417 + map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, 418 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 419 + ASSERT_NE(map, MAP_FAILED); 420 + 421 + ret = munmap((char *)map + page_size, page_size); 422 + ASSERT_EQ(ret, 0); 423 + 424 + /* Position for 3 successful writes, then fault */ 425 + ns_ids = ((__u64 *)((char *)map + page_size)) - 3; 426 + 427 + /* 428 + * Create a separate process to run listns() concurrently. 429 + */ 430 + iter_pid = create_child(&iter_pidfd, 0); 431 + ASSERT_NE(iter_pid, -1); 432 + 433 + if (iter_pid == 0) { 434 + struct ns_id_req req = { 435 + .size = sizeof(req), 436 + .spare = 0, 437 + .ns_id = 0, 438 + .ns_type = CLONE_NEWNS, /* Only mount namespaces */ 439 + .spare2 = 0, 440 + .user_ns_id = 0, 441 + }; 442 + int iter_ret; 443 + 444 + /* 445 + * Loop calling listns() until killed. 446 + * Call listns() to race with namespace destruction. 447 + */ 448 + while (1) { 449 + iter_ret = sys_listns(&req, ns_ids, 10, 0); 450 + 451 + if (iter_ret == -1 && errno == ENOSYS) 452 + _exit(PIDFD_SKIP); 453 + } 454 + } 455 + 456 + /* Small delay to let iterator start looping */ 457 + usleep(50000); 458 + 459 + /* Create children with mount namespaces */ 460 + for (i = 0; i < 8; i++) { 461 + /* Create socketpair for synchronization */ 462 + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0); 463 + 464 + pid = create_child(&pidfds[i], CLONE_NEWNS); 465 + ASSERT_NE(pid, -1); 466 + 467 + if (pid == 0) { 468 + close(sv[i][0]); /* Close parent end */ 469 + 470 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) 471 + _exit(1); 472 + 473 + /* Do some mount operations to make cleanup more interesting */ 474 + if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST) 475 + _exit(1); 476 + if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST) 477 + _exit(1); 478 + 479 + if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1) 480 + _exit(1); 481 + if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1) 482 + _exit(1); 483 + 484 + /* Signal parent that setup is complete */ 485 + if (write_nointr(sv[i][1], "R", 1) != 1) 486 + _exit(1); 487 + 488 + /* Wait for parent to signal us to exit */ 489 + if (read_nointr(sv[i][1], &c, 1) != 1) 490 + _exit(1); 491 + 492 + close(sv[i][1]); 493 + _exit(0); 494 + } 495 + 496 + close(sv[i][1]); /* Close child end */ 497 + } 498 + 499 + /* Wait for all children to finish setup */ 500 + for (i = 0; i < 8; i++) { 501 + ret = read_nointr(sv[i][0], &c, 1); 502 + ASSERT_EQ(ret, 1); 503 + ASSERT_EQ(c, 'R'); 504 + } 505 + 506 + /* Kill children to trigger namespace destruction during iteration */ 507 + for (i = 0; i < 8; i++) 508 + write_nointr(sv[i][0], "X", 1); 509 + 510 + /* Wait for children and cleanup */ 511 + for (i = 0; i < 8; i++) { 512 + waitpid(-1, NULL, 0); 513 + close(sv[i][0]); 514 + close(pidfds[i]); 515 + } 516 + 517 + /* Kill iterator and wait for it */ 518 + sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0); 519 + ret = waitpid(iter_pid, &status, 0); 520 + ASSERT_EQ(ret, iter_pid); 521 + close(iter_pidfd); 522 + 523 + /* Should have been killed */ 524 + ASSERT_TRUE(WIFSIGNALED(status)); 525 + ASSERT_EQ(WTERMSIG(status), SIGKILL); 526 + 527 + munmap(map, page_size); 528 + } 529 + 530 + TEST_HARNESS_MAIN