Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'locking-futex-2025-12-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull futex updates from Ingo Molnar:

- Standardize on ktime_t in restart_block::time as well (Thomas
Weißschuh)

- Futex selftests:
- Add robust list testcases (André Almeida)
- Formatting fixes/cleanups (Carlos Llamas)

* tag 'locking-futex-2025-12-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
futex: Store time as ktime_t in restart block
selftests/futex: Create test for robust list
selftests/futex: Skip tests if shmget unsupported
selftests/futex: Add newline to ksft_exit_fail_msg()
selftests/futex: Remove unused test_futex_mpol()

+567 -15
+1 -1
include/linux/restart_block.h
··· 32 32 u32 val; 33 33 u32 flags; 34 34 u32 bitset; 35 - u64 time; 35 + ktime_t time; 36 36 u32 __user *uaddr2; 37 37 } futex; 38 38 /* For nanosleep */
+4 -5
kernel/futex/waitwake.c
··· 738 738 static long futex_wait_restart(struct restart_block *restart) 739 739 { 740 740 u32 __user *uaddr = restart->futex.uaddr; 741 - ktime_t t, *tp = NULL; 741 + ktime_t *tp = NULL; 742 742 743 - if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { 744 - t = restart->futex.time; 745 - tp = &t; 746 - } 743 + if (restart->futex.flags & FLAGS_HAS_TIMEOUT) 744 + tp = &restart->futex.time; 745 + 747 746 restart->fn = do_no_restart_syscall; 748 747 749 748 return (long)futex_wait(uaddr, restart->futex.flags,
+1
tools/testing/selftests/futex/functional/.gitignore
··· 12 12 futex_wait_wouldblock 13 13 futex_waitv 14 14 futex_numa 15 + robust_list
+2 -1
tools/testing/selftests/futex/functional/Makefile
··· 22 22 futex_priv_hash \ 23 23 futex_numa_mpol \ 24 24 futex_waitv \ 25 - futex_numa 25 + futex_numa \ 26 + robust_list 26 27 27 28 TEST_PROGS := run.sh 28 29
-5
tools/testing/selftests/futex/functional/futex_numa_mpol.c
··· 131 131 __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); 132 132 } 133 133 134 - static void test_futex_mpol(void *futex_ptr, int err_value) 135 - { 136 - __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); 137 - } 138 - 139 134 TEST(futex_numa_mpol) 140 135 { 141 136 struct futex32_numa *futex_numa;
+5 -3
tools/testing/selftests/futex/functional/futex_wait.c
··· 71 71 /* Testing an anon page shared memory */ 72 72 shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); 73 73 if (shm_id < 0) { 74 + if (errno == ENOSYS) 75 + ksft_exit_skip("shmget syscall not supported\n"); 74 76 perror("shmget"); 75 77 exit(1); 76 78 } ··· 110 108 /* Testing a file backed shared memory */ 111 109 fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 112 110 if (fd < 0) 113 - ksft_exit_fail_msg("open"); 111 + ksft_exit_fail_msg("open\n"); 114 112 115 113 if (ftruncate(fd, sizeof(f_private))) 116 - ksft_exit_fail_msg("ftruncate"); 114 + ksft_exit_fail_msg("ftruncate\n"); 117 115 118 116 shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 119 117 if (shm == MAP_FAILED) 120 - ksft_exit_fail_msg("mmap"); 118 + ksft_exit_fail_msg("mmap\n"); 121 119 122 120 memcpy(shm, &f_private, sizeof(f_private)); 123 121
+2
tools/testing/selftests/futex/functional/futex_waitv.c
··· 86 86 int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); 87 87 88 88 if (shm_id < 0) { 89 + if (errno == ENOSYS) 90 + ksft_exit_skip("shmget syscall not supported\n"); 89 91 perror("shmget"); 90 92 exit(1); 91 93 }
+552
tools/testing/selftests/futex/functional/robust_list.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2025 Igalia S.L. 4 + * 5 + * Robust list test by André Almeida <andrealmeid@igalia.com> 6 + * 7 + * The robust list uAPI allows userspace to create "robust" locks, in the sense 8 + * that if the lock holder thread dies, the remaining threads that are waiting 9 + * for the lock won't block forever, waiting for a lock that will never be 10 + * released. 11 + * 12 + * This is achieve by userspace setting a list where a thread can enter all the 13 + * locks (futexes) that it is holding. The robust list is a linked list, and 14 + * userspace register the start of the list with the syscall set_robust_list(). 15 + * If such thread eventually dies, the kernel will walk this list, waking up one 16 + * thread waiting for each futex and marking the futex word with the flag 17 + * FUTEX_OWNER_DIED. 18 + * 19 + * See also 20 + * man set_robust_list 21 + * Documententation/locking/robust-futex-ABI.rst 22 + * Documententation/locking/robust-futexes.rst 23 + */ 24 + 25 + #define _GNU_SOURCE 26 + 27 + #include "futextest.h" 28 + #include "../../kselftest_harness.h" 29 + 30 + #include <errno.h> 31 + #include <pthread.h> 32 + #include <signal.h> 33 + #include <stdatomic.h> 34 + #include <stdbool.h> 35 + #include <stddef.h> 36 + #include <sys/mman.h> 37 + #include <sys/wait.h> 38 + 39 + #define STACK_SIZE (1024 * 1024) 40 + 41 + #define FUTEX_TIMEOUT 3 42 + 43 + #define SLEEP_US 100 44 + 45 + static pthread_barrier_t barrier, barrier2; 46 + 47 + static int set_robust_list(struct robust_list_head *head, size_t len) 48 + { 49 + return syscall(SYS_set_robust_list, head, len); 50 + } 51 + 52 + static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr) 53 + { 54 + return syscall(SYS_get_robust_list, pid, head, len_ptr); 55 + } 56 + 57 + /* 58 + * Basic lock struct, contains just the futex word and the robust list element 59 + * Real implementations have also a *prev to easily walk in the list 60 + */ 61 + struct lock_struct { 62 + _Atomic(unsigned int) futex; 63 + struct robust_list list; 64 + }; 65 + 66 + /* 67 + * Helper function to spawn a child thread. Returns -1 on error, pid on success 68 + */ 69 + static int create_child(int (*fn)(void *arg), void *arg) 70 + { 71 + char *stack; 72 + pid_t pid; 73 + 74 + stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, 75 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); 76 + if (stack == MAP_FAILED) 77 + return -1; 78 + 79 + stack += STACK_SIZE; 80 + 81 + pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg); 82 + 83 + if (pid == -1) 84 + return -1; 85 + 86 + return pid; 87 + } 88 + 89 + /* 90 + * Helper function to prepare and register a robust list 91 + */ 92 + static int set_list(struct robust_list_head *head) 93 + { 94 + int ret; 95 + 96 + ret = set_robust_list(head, sizeof(*head)); 97 + if (ret) 98 + return ret; 99 + 100 + head->futex_offset = (size_t) offsetof(struct lock_struct, futex) - 101 + (size_t) offsetof(struct lock_struct, list); 102 + head->list.next = &head->list; 103 + head->list_op_pending = NULL; 104 + 105 + return 0; 106 + } 107 + 108 + /* 109 + * A basic (and incomplete) mutex lock function with robustness 110 + */ 111 + static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject) 112 + { 113 + _Atomic(unsigned int) *futex = &lock->futex; 114 + unsigned int zero = 0; 115 + pid_t tid = gettid(); 116 + int ret = -1; 117 + 118 + /* 119 + * Set list_op_pending before starting the lock, so the kernel can catch 120 + * the case where the thread died during the lock operation 121 + */ 122 + head->list_op_pending = &lock->list; 123 + 124 + if (atomic_compare_exchange_strong(futex, &zero, tid)) { 125 + /* 126 + * We took the lock, insert it in the robust list 127 + */ 128 + struct robust_list *list = &head->list; 129 + 130 + /* Error injection to test list_op_pending */ 131 + if (error_inject) 132 + return 0; 133 + 134 + while (list->next != &head->list) 135 + list = list->next; 136 + 137 + list->next = &lock->list; 138 + lock->list.next = &head->list; 139 + 140 + ret = 0; 141 + } else { 142 + /* 143 + * We didn't take the lock, wait until the owner wakes (or dies) 144 + */ 145 + struct timespec to; 146 + 147 + to.tv_sec = FUTEX_TIMEOUT; 148 + to.tv_nsec = 0; 149 + 150 + tid = atomic_load(futex); 151 + /* Kernel ignores futexes without the waiters flag */ 152 + tid |= FUTEX_WAITERS; 153 + atomic_store(futex, tid); 154 + 155 + ret = futex_wait((futex_t *) futex, tid, &to, 0); 156 + 157 + /* 158 + * A real mutex_lock() implementation would loop here to finally 159 + * take the lock. We don't care about that, so we stop here. 160 + */ 161 + } 162 + 163 + head->list_op_pending = NULL; 164 + 165 + return ret; 166 + } 167 + 168 + /* 169 + * This child thread will succeed taking the lock, and then will exit holding it 170 + */ 171 + static int child_fn_lock(void *arg) 172 + { 173 + struct lock_struct *lock = arg; 174 + struct robust_list_head head; 175 + int ret; 176 + 177 + ret = set_list(&head); 178 + if (ret) { 179 + ksft_test_result_fail("set_robust_list error\n"); 180 + return ret; 181 + } 182 + 183 + ret = mutex_lock(lock, &head, false); 184 + if (ret) { 185 + ksft_test_result_fail("mutex_lock error\n"); 186 + return ret; 187 + } 188 + 189 + pthread_barrier_wait(&barrier); 190 + 191 + /* 192 + * There's a race here: the parent thread needs to be inside 193 + * futex_wait() before the child thread dies, otherwise it will miss the 194 + * wakeup from handle_futex_death() that this child will emit. We wait a 195 + * little bit just to make sure that this happens. 196 + */ 197 + usleep(SLEEP_US); 198 + 199 + return 0; 200 + } 201 + 202 + /* 203 + * Spawns a child thread that will set a robust list, take the lock, register it 204 + * in the robust list and die. The parent thread will wait on this futex, and 205 + * should be waken up when the child exits. 206 + */ 207 + TEST(test_robustness) 208 + { 209 + struct lock_struct lock = { .futex = 0 }; 210 + _Atomic(unsigned int) *futex = &lock.futex; 211 + struct robust_list_head head; 212 + int ret, pid, wstatus; 213 + 214 + ret = set_list(&head); 215 + ASSERT_EQ(ret, 0); 216 + 217 + /* 218 + * Lets use a barrier to ensure that the child thread takes the lock 219 + * before the parent 220 + */ 221 + ret = pthread_barrier_init(&barrier, NULL, 2); 222 + ASSERT_EQ(ret, 0); 223 + 224 + pid = create_child(&child_fn_lock, &lock); 225 + ASSERT_NE(pid, -1); 226 + 227 + pthread_barrier_wait(&barrier); 228 + ret = mutex_lock(&lock, &head, false); 229 + 230 + /* 231 + * futex_wait() should return 0 and the futex word should be marked with 232 + * FUTEX_OWNER_DIED 233 + */ 234 + ASSERT_EQ(ret, 0); 235 + 236 + ASSERT_TRUE(*futex & FUTEX_OWNER_DIED); 237 + 238 + wait(&wstatus); 239 + pthread_barrier_destroy(&barrier); 240 + 241 + /* Pass only if the child hasn't return error */ 242 + if (!WEXITSTATUS(wstatus)) 243 + ksft_test_result_pass("%s\n", __func__); 244 + } 245 + 246 + /* 247 + * The only valid value for len is sizeof(*head) 248 + */ 249 + TEST(test_set_robust_list_invalid_size) 250 + { 251 + struct robust_list_head head; 252 + size_t head_size = sizeof(head); 253 + int ret; 254 + 255 + ret = set_robust_list(&head, head_size); 256 + ASSERT_EQ(ret, 0); 257 + 258 + ret = set_robust_list(&head, head_size * 2); 259 + ASSERT_EQ(ret, -1); 260 + ASSERT_EQ(errno, EINVAL); 261 + 262 + ret = set_robust_list(&head, head_size - 1); 263 + ASSERT_EQ(ret, -1); 264 + ASSERT_EQ(errno, EINVAL); 265 + 266 + ret = set_robust_list(&head, 0); 267 + ASSERT_EQ(ret, -1); 268 + ASSERT_EQ(errno, EINVAL); 269 + 270 + ksft_test_result_pass("%s\n", __func__); 271 + } 272 + 273 + /* 274 + * Test get_robust_list with pid = 0, getting the list of the running thread 275 + */ 276 + TEST(test_get_robust_list_self) 277 + { 278 + struct robust_list_head head, head2, *get_head; 279 + size_t head_size = sizeof(head), len_ptr; 280 + int ret; 281 + 282 + ret = set_robust_list(&head, head_size); 283 + ASSERT_EQ(ret, 0); 284 + 285 + ret = get_robust_list(0, &get_head, &len_ptr); 286 + ASSERT_EQ(ret, 0); 287 + ASSERT_EQ(get_head, &head); 288 + ASSERT_EQ(head_size, len_ptr); 289 + 290 + ret = set_robust_list(&head2, head_size); 291 + ASSERT_EQ(ret, 0); 292 + 293 + ret = get_robust_list(0, &get_head, &len_ptr); 294 + ASSERT_EQ(ret, 0); 295 + ASSERT_EQ(get_head, &head2); 296 + ASSERT_EQ(head_size, len_ptr); 297 + 298 + ksft_test_result_pass("%s\n", __func__); 299 + } 300 + 301 + static int child_list(void *arg) 302 + { 303 + struct robust_list_head *head = arg; 304 + int ret; 305 + 306 + ret = set_robust_list(head, sizeof(*head)); 307 + if (ret) { 308 + ksft_test_result_fail("set_robust_list error\n"); 309 + return -1; 310 + } 311 + 312 + /* 313 + * After setting the list head, wait until the main thread can call 314 + * get_robust_list() for this thread before exiting. 315 + */ 316 + pthread_barrier_wait(&barrier); 317 + pthread_barrier_wait(&barrier2); 318 + 319 + return 0; 320 + } 321 + 322 + /* 323 + * Test get_robust_list from another thread. We use two barriers here to ensure 324 + * that: 325 + * 1) the child thread set the list before we try to get it from the 326 + * parent 327 + * 2) the child thread still alive when we try to get the list from it 328 + */ 329 + TEST(test_get_robust_list_child) 330 + { 331 + struct robust_list_head head, *get_head; 332 + int ret, wstatus; 333 + size_t len_ptr; 334 + pid_t tid; 335 + 336 + ret = pthread_barrier_init(&barrier, NULL, 2); 337 + ret = pthread_barrier_init(&barrier2, NULL, 2); 338 + ASSERT_EQ(ret, 0); 339 + 340 + tid = create_child(&child_list, &head); 341 + ASSERT_NE(tid, -1); 342 + 343 + pthread_barrier_wait(&barrier); 344 + 345 + ret = get_robust_list(tid, &get_head, &len_ptr); 346 + ASSERT_EQ(ret, 0); 347 + ASSERT_EQ(&head, get_head); 348 + 349 + pthread_barrier_wait(&barrier2); 350 + 351 + wait(&wstatus); 352 + pthread_barrier_destroy(&barrier); 353 + pthread_barrier_destroy(&barrier2); 354 + 355 + /* Pass only if the child hasn't return error */ 356 + if (!WEXITSTATUS(wstatus)) 357 + ksft_test_result_pass("%s\n", __func__); 358 + } 359 + 360 + static int child_fn_lock_with_error(void *arg) 361 + { 362 + struct lock_struct *lock = arg; 363 + struct robust_list_head head; 364 + int ret; 365 + 366 + ret = set_list(&head); 367 + if (ret) { 368 + ksft_test_result_fail("set_robust_list error\n"); 369 + return -1; 370 + } 371 + 372 + ret = mutex_lock(lock, &head, true); 373 + if (ret) { 374 + ksft_test_result_fail("mutex_lock error\n"); 375 + return -1; 376 + } 377 + 378 + pthread_barrier_wait(&barrier); 379 + 380 + /* See comment at child_fn_lock() */ 381 + usleep(SLEEP_US); 382 + 383 + return 0; 384 + } 385 + 386 + /* 387 + * Same as robustness test, but inject an error where the mutex_lock() exits 388 + * earlier, just after setting list_op_pending and taking the lock, to test the 389 + * list_op_pending mechanism 390 + */ 391 + TEST(test_set_list_op_pending) 392 + { 393 + struct lock_struct lock = { .futex = 0 }; 394 + _Atomic(unsigned int) *futex = &lock.futex; 395 + struct robust_list_head head; 396 + int ret, wstatus; 397 + 398 + ret = set_list(&head); 399 + ASSERT_EQ(ret, 0); 400 + 401 + ret = pthread_barrier_init(&barrier, NULL, 2); 402 + ASSERT_EQ(ret, 0); 403 + 404 + ret = create_child(&child_fn_lock_with_error, &lock); 405 + ASSERT_NE(ret, -1); 406 + 407 + pthread_barrier_wait(&barrier); 408 + ret = mutex_lock(&lock, &head, false); 409 + 410 + ASSERT_EQ(ret, 0); 411 + 412 + ASSERT_TRUE(*futex & FUTEX_OWNER_DIED); 413 + 414 + wait(&wstatus); 415 + pthread_barrier_destroy(&barrier); 416 + 417 + /* Pass only if the child hasn't return error */ 418 + if (!WEXITSTATUS(wstatus)) 419 + ksft_test_result_pass("%s\n", __func__); 420 + else 421 + ksft_test_result_fail("%s\n", __func__); 422 + } 423 + 424 + #define CHILD_NR 10 425 + 426 + static int child_lock_holder(void *arg) 427 + { 428 + struct lock_struct *locks = arg; 429 + struct robust_list_head head; 430 + int i; 431 + 432 + set_list(&head); 433 + 434 + for (i = 0; i < CHILD_NR; i++) { 435 + locks[i].futex = 0; 436 + mutex_lock(&locks[i], &head, false); 437 + } 438 + 439 + pthread_barrier_wait(&barrier); 440 + pthread_barrier_wait(&barrier2); 441 + 442 + /* See comment at child_fn_lock() */ 443 + usleep(SLEEP_US); 444 + 445 + return 0; 446 + } 447 + 448 + static int child_wait_lock(void *arg) 449 + { 450 + struct lock_struct *lock = arg; 451 + struct robust_list_head head; 452 + int ret; 453 + 454 + pthread_barrier_wait(&barrier2); 455 + ret = mutex_lock(lock, &head, false); 456 + 457 + if (ret) { 458 + ksft_test_result_fail("mutex_lock error\n"); 459 + return -1; 460 + } 461 + 462 + if (!(lock->futex & FUTEX_OWNER_DIED)) { 463 + ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n"); 464 + return -1; 465 + } 466 + 467 + return 0; 468 + } 469 + 470 + /* 471 + * Test a robust list of more than one element. All the waiters should wake when 472 + * the holder dies 473 + */ 474 + TEST(test_robust_list_multiple_elements) 475 + { 476 + struct lock_struct locks[CHILD_NR]; 477 + pid_t pids[CHILD_NR + 1]; 478 + int i, ret, wstatus; 479 + 480 + ret = pthread_barrier_init(&barrier, NULL, 2); 481 + ASSERT_EQ(ret, 0); 482 + ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1); 483 + ASSERT_EQ(ret, 0); 484 + 485 + pids[0] = create_child(&child_lock_holder, &locks); 486 + 487 + /* Wait until the locker thread takes the look */ 488 + pthread_barrier_wait(&barrier); 489 + 490 + for (i = 0; i < CHILD_NR; i++) 491 + pids[i+1] = create_child(&child_wait_lock, &locks[i]); 492 + 493 + /* Wait for all children to return */ 494 + ret = 0; 495 + 496 + for (i = 0; i < CHILD_NR; i++) { 497 + waitpid(pids[i], &wstatus, 0); 498 + if (WEXITSTATUS(wstatus)) 499 + ret = -1; 500 + } 501 + 502 + pthread_barrier_destroy(&barrier); 503 + pthread_barrier_destroy(&barrier2); 504 + 505 + /* Pass only if the child hasn't return error */ 506 + if (!ret) 507 + ksft_test_result_pass("%s\n", __func__); 508 + } 509 + 510 + static int child_circular_list(void *arg) 511 + { 512 + static struct robust_list_head head; 513 + struct lock_struct a, b, c; 514 + int ret; 515 + 516 + ret = set_list(&head); 517 + if (ret) { 518 + ksft_test_result_fail("set_list error\n"); 519 + return -1; 520 + } 521 + 522 + head.list.next = &a.list; 523 + 524 + /* 525 + * The last element should point to head list, but we short circuit it 526 + */ 527 + a.list.next = &b.list; 528 + b.list.next = &c.list; 529 + c.list.next = &a.list; 530 + 531 + return 0; 532 + } 533 + 534 + /* 535 + * Create a circular robust list. The kernel should be able to destroy the list 536 + * while processing it so it won't be trapped in an infinite loop while handling 537 + * a process exit 538 + */ 539 + TEST(test_circular_list) 540 + { 541 + int wstatus; 542 + 543 + create_child(child_circular_list, NULL); 544 + 545 + wait(&wstatus); 546 + 547 + /* Pass only if the child hasn't return error */ 548 + if (!WEXITSTATUS(wstatus)) 549 + ksft_test_result_pass("%s\n", __func__); 550 + } 551 + 552 + TEST_HARNESS_MAIN