Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

futex: Fault/error injection capabilities

Although futexes are well known for being a royal pita,
we really have very little debugging capabilities - except
for relying on tglx's eye half the time.

By simply making use of the existing fault-injection machinery,
we can improve this situation, allowing generating artificial
uaddress faults and deadlock scenarios. Of course, when this is
disabled in production systems, the overhead for failure checks
is practically zero -- so this is very cheap at the same time.
Future work would be nice to now enhance trinity to make use of
this.

There is a special tunable 'ignore-private', which can filter
out private futexes. Given the tsk->make_it_fail filter and
this option, pi futexes can be narrowed down pretty closely.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <darren@dvhart.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Link: http://lkml.kernel.org/r/1435645562-975-3-git-send-email-dave@stgolabs.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

authored by

Davidlohr Bueso and committed by
Thomas Gleixner
ab51fbab 767f509c

+105 -2
+11
Documentation/fault-injection/fault-injection.txt
··· 15 15 16 16 injects page allocation failures. (alloc_pages(), get_free_pages(), ...) 17 17 18 + o fail_futex 19 + 20 + injects futex deadlock and uaddr fault errors. 21 + 18 22 o fail_make_request 19 23 20 24 injects disk IO errors on devices permitted by setting ··· 117 113 specifies the minimum page allocation order to be injected 118 114 failures. 119 115 116 + - /sys/kernel/debug/fail_futex/ignore-private: 117 + 118 + Format: { 'Y' | 'N' } 119 + default is 'N', setting it to 'Y' will disable failure injections 120 + when dealing with private (address space) futexes. 121 + 120 122 o Boot option 121 123 122 124 In order to inject faults while debugfs is not available (early boot time), ··· 131 121 failslab= 132 122 fail_page_alloc= 133 123 fail_make_request= 124 + fail_futex= 134 125 mmc_core.fail_request=<interval>,<probability>,<space>,<times> 135 126 136 127 How to add new fault injection capability
+87 -2
kernel/futex.c
··· 64 64 #include <linux/hugetlb.h> 65 65 #include <linux/freezer.h> 66 66 #include <linux/bootmem.h> 67 + #include <linux/fault-inject.h> 67 68 68 69 #include <asm/futex.h> 69 70 ··· 259 258 260 259 static struct futex_hash_bucket *futex_queues; 261 260 261 + /* 262 + * Fault injections for futexes. 263 + */ 264 + #ifdef CONFIG_FAIL_FUTEX 265 + 266 + static struct { 267 + struct fault_attr attr; 268 + 269 + u32 ignore_private; 270 + } fail_futex = { 271 + .attr = FAULT_ATTR_INITIALIZER, 272 + .ignore_private = 0, 273 + }; 274 + 275 + static int __init setup_fail_futex(char *str) 276 + { 277 + return setup_fault_attr(&fail_futex.attr, str); 278 + } 279 + __setup("fail_futex=", setup_fail_futex); 280 + 281 + bool should_fail_futex(bool fshared) 282 + { 283 + if (fail_futex.ignore_private && !fshared) 284 + return false; 285 + 286 + return should_fail(&fail_futex.attr, 1); 287 + } 288 + 289 + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 290 + 291 + static int __init fail_futex_debugfs(void) 292 + { 293 + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 294 + struct dentry *dir; 295 + 296 + dir = fault_create_debugfs_attr("fail_futex", NULL, 297 + &fail_futex.attr); 298 + if (IS_ERR(dir)) 299 + return PTR_ERR(dir); 300 + 301 + if (!debugfs_create_bool("ignore-private", mode, dir, 302 + &fail_futex.ignore_private)) { 303 + debugfs_remove_recursive(dir); 304 + return -ENOMEM; 305 + } 306 + 307 + return 0; 308 + } 309 + 310 + late_initcall(fail_futex_debugfs); 311 + 312 + #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 313 + 314 + #else 315 + static inline bool should_fail_futex(bool fshared) 316 + { 317 + return false; 318 + } 319 + #endif /* CONFIG_FAIL_FUTEX */ 320 + 262 321 static inline void futex_get_mm(union futex_key *key) 263 322 { 264 323 atomic_inc(&key->private.mm->mm_count); ··· 474 413 if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) 475 414 return -EFAULT; 476 415 416 + if (unlikely(should_fail_futex(fshared))) 417 + return -EFAULT; 418 + 477 419 /* 478 420 * PROCESS_PRIVATE futexes are fast. 479 421 * As the mm cannot disappear under us and the 'key' only needs ··· 492 428 } 493 429 494 430 again: 431 + /* Ignore any VERIFY_READ mapping (futex common case) */ 432 + if (unlikely(should_fail_futex(fshared))) 433 + return -EFAULT; 434 + 495 435 err = get_user_pages_fast(address, 1, 1, &page); 496 436 /* 497 437 * If write access is not required (eg. FUTEX_WAIT), try ··· 584 516 * A RO anonymous page will never change and thus doesn't make 585 517 * sense for futex operations. 586 518 */ 587 - if (ro) { 519 + if (unlikely(should_fail_futex(fshared)) || ro) { 588 520 err = -EFAULT; 589 521 goto out; 590 522 } ··· 1042 974 { 1043 975 u32 uninitialized_var(curval); 1044 976 977 + if (unlikely(should_fail_futex(true))) 978 + return -EFAULT; 979 + 1045 980 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) 1046 981 return -EFAULT; 1047 982 ··· 1086 1015 if (get_futex_value_locked(&uval, uaddr)) 1087 1016 return -EFAULT; 1088 1017 1018 + if (unlikely(should_fail_futex(true))) 1019 + return -EFAULT; 1020 + 1089 1021 /* 1090 1022 * Detect deadlocks. 1091 1023 */ 1092 1024 if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) 1025 + return -EDEADLK; 1026 + 1027 + if ((unlikely(should_fail_futex(true)))) 1093 1028 return -EDEADLK; 1094 1029 1095 1030 /* ··· 1231 1154 * owner died bit, because we are the owner. 1232 1155 */ 1233 1156 newval = FUTEX_WAITERS | task_pid_vnr(new_owner); 1157 + 1158 + if (unlikely(should_fail_futex(true))) 1159 + ret = -EFAULT; 1234 1160 1235 1161 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) 1236 1162 ret = -EFAULT; ··· 1535 1455 int ret, vpid; 1536 1456 1537 1457 if (get_futex_value_locked(&curval, pifutex)) 1458 + return -EFAULT; 1459 + 1460 + if (unlikely(should_fail_futex(true))) 1538 1461 return -EFAULT; 1539 1462 1540 1463 /* ··· 2620 2537 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 2621 2538 * @uaddr: the futex we initially wait on (non-pi) 2622 2539 * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be 2623 - * the same type, no requeueing from private to shared, etc. 2540 + * the same type, no requeueing from private to shared, etc. 2624 2541 * @val: the expected value of uaddr 2625 2542 * @abs_time: absolute timeout 2626 2543 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all ··· 3095 3012 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 3096 3013 cmd == FUTEX_WAIT_BITSET || 3097 3014 cmd == FUTEX_WAIT_REQUEUE_PI)) { 3015 + if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) 3016 + return -EFAULT; 3098 3017 if (copy_from_user(&ts, utime, sizeof(ts)) != 0) 3099 3018 return -EFAULT; 3100 3019 if (!timespec_valid(&ts))
+7
lib/Kconfig.debug
··· 1542 1542 and to test how the mmc host driver handles retries from 1543 1543 the block device. 1544 1544 1545 + config FAIL_FUTEX 1546 + bool "Fault-injection capability for futexes" 1547 + select DEBUG_FS 1548 + depends on FAULT_INJECTION && FUTEX 1549 + help 1550 + Provide fault-injection capability for futexes. 1551 + 1545 1552 config FAULT_INJECTION_DEBUG_FS 1546 1553 bool "Debugfs entries for fault-injection capabilities" 1547 1554 depends on FAULT_INJECTION && SYSFS && DEBUG_FS