Merge tag 'locking-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+10

Documentation/admin-guide/kernel-parameters.txt

··· 782 782 cs89x0_media= [HW,NET] 783 783 Format: { rj45 | aui | bnc } 784 784 785 + csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call 786 + handling. When switched on, additional debug data is 787 + printed to the console in case a hanging CPU is 788 + detected, and that CPU is pinged again in order to try 789 + to resolve the hang situation. 790 + 0: disable csdlock debugging (default) 791 + 1: enable basic csdlock debugging (minor impact) 792 + ext: enable extended csdlock debugging (more impact, 793 + but more data) 794 + 785 795 dasd= [HW,NET] 786 796 See header of drivers/s390/block/dasd_devmap.c. 787 797

+3

Documentation/dev-tools/kcsan.rst

+1

MAINTAINERS

··· 7452 7452 M: Ingo Molnar <mingo@redhat.com> 7453 7453 R: Peter Zijlstra <peterz@infradead.org> 7454 7454 R: Darren Hart <dvhart@infradead.org> 7455 + R: Davidlohr Bueso <dave@stgolabs.net> 7455 7456 L: linux-kernel@vger.kernel.org 7456 7457 S: Maintained 7457 7458 T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core

+1 -1

arch/arm/include/asm/spinlock.h

··· 22 22 * assembler to insert a extra (16-bit) IT instruction, depending on the 23 23 * presence or absence of neighbouring conditional instructions. 24 24 * 25 - * To avoid this unpredictableness, an approprite IT is inserted explicitly: 25 + * To avoid this unpredictability, an appropriate IT is inserted explicitly: 26 26 * the assembler won't change IT instructions which are explicitly present 27 27 * in the input. 28 28 */

+2 -2

arch/x86/include/asm/jump_label.h

··· 14 14 #include <linux/stringify.h> 15 15 #include <linux/types.h> 16 16 17 - static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 17 + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) 18 18 { 19 19 asm_volatile_goto("1:" 20 20 ".byte " __stringify(BYTES_NOP5) "\n\t" ··· 30 30 return true; 31 31 } 32 32 33 - static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 33 + static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) 34 34 { 35 35 asm_volatile_goto("1:" 36 36 ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"

+2

drivers/net/wireless/ath/ath10k/mac.c

··· 4727 4727 /* Must not be called with conf_mutex held as workers can use that also. */ 4728 4728 void ath10k_drain_tx(struct ath10k *ar) 4729 4729 { 4730 + lockdep_assert_not_held(&ar->conf_mutex); 4731 + 4730 4732 /* make sure rcu-protected mac80211 tx path itself is drained */ 4731 4733 synchronize_net(); 4732 4734

+6

include/linux/kcsan-checks.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * KCSAN access checks and modifiers. These can be used to explicitly check 4 + * uninstrumented accesses, or change KCSAN checking behaviour of accesses. 5 + * 6 + * Copyright (C) 2019, Google LLC. 7 + */ 2 8 3 9 #ifndef _LINUX_KCSAN_CHECKS_H 4 10 #define _LINUX_KCSAN_CHECKS_H

+7

include/linux/kcsan.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * The Kernel Concurrency Sanitizer (KCSAN) infrastructure. Public interface and 4 + * data structures to set up runtime. See kcsan-checks.h for explicit checks and 5 + * modifiers. For more info please see Documentation/dev-tools/kcsan.rst. 6 + * 7 + * Copyright (C) 2019, Google LLC. 8 + */ 2 9 3 10 #ifndef _LINUX_KCSAN_H 4 11 #define _LINUX_KCSAN_H

+16 -4

include/linux/lockdep.h

··· 155 155 extern void lockdep_init_task(struct task_struct *task); 156 156 157 157 /* 158 - * Split the recrursion counter in two to readily detect 'off' vs recursion. 158 + * Split the recursion counter in two to readily detect 'off' vs recursion. 159 159 */ 160 160 #define LOCKDEP_RECURSION_BITS 16 161 161 #define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS) ··· 268 268 269 269 extern void lock_release(struct lockdep_map *lock, unsigned long ip); 270 270 271 + /* lock_is_held_type() returns */ 272 + #define LOCK_STATE_UNKNOWN -1 273 + #define LOCK_STATE_NOT_HELD 0 274 + #define LOCK_STATE_HELD 1 275 + 271 276 /* 272 277 * Same "read" as for lock_acquire(), except -1 means any. 273 278 */ ··· 306 301 307 302 #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) 308 303 309 - #define lockdep_assert_held(l) do { \ 310 - WARN_ON(debug_locks && !lockdep_is_held(l)); \ 304 + #define lockdep_assert_held(l) do { \ 305 + WARN_ON(debug_locks && \ 306 + lockdep_is_held(l) == LOCK_STATE_NOT_HELD); \ 307 + } while (0) 308 + 309 + #define lockdep_assert_not_held(l) do { \ 310 + WARN_ON(debug_locks && \ 311 + lockdep_is_held(l) == LOCK_STATE_HELD); \ 311 312 } while (0) 312 313 313 314 #define lockdep_assert_held_write(l) do { \ ··· 408 397 #define lockdep_is_held_type(l, r) (1) 409 398 410 399 #define lockdep_assert_held(l) do { (void)(l); } while (0) 411 - #define lockdep_assert_held_write(l) do { (void)(l); } while (0) 400 + #define lockdep_assert_not_held(l) do { (void)(l); } while (0) 401 + #define lockdep_assert_held_write(l) do { (void)(l); } while (0) 412 402 #define lockdep_assert_held_read(l) do { (void)(l); } while (0) 413 403 #define lockdep_assert_held_once(l) do { (void)(l); } while (0) 414 404 #define lockdep_assert_none_held_once() do { } while (0)

+1 -3

include/linux/mutex.h

··· 20 20 #include <linux/osq_lock.h> 21 21 #include <linux/debug_locks.h> 22 22 23 + struct ww_class; 23 24 struct ww_acquire_ctx; 24 25 25 26 /* ··· 65 64 struct lockdep_map dep_map; 66 65 #endif 67 66 }; 68 - 69 - struct ww_class; 70 - struct ww_acquire_ctx; 71 67 72 68 struct ww_mutex { 73 69 struct mutex base;

+3 -32

include/linux/rtmutex.h

··· 31 31 raw_spinlock_t wait_lock; 32 32 struct rb_root_cached waiters; 33 33 struct task_struct *owner; 34 - #ifdef CONFIG_DEBUG_RT_MUTEXES 35 - int save_state; 36 - const char *name, *file; 37 - int line; 38 - void *magic; 39 - #endif 40 34 #ifdef CONFIG_DEBUG_LOCK_ALLOC 41 35 struct lockdep_map dep_map; 42 36 #endif ··· 40 46 struct hrtimer_sleeper; 41 47 42 48 #ifdef CONFIG_DEBUG_RT_MUTEXES 43 - extern int rt_mutex_debug_check_no_locks_freed(const void *from, 44 - unsigned long len); 45 - extern void rt_mutex_debug_check_no_locks_held(struct task_struct *task); 49 + extern void rt_mutex_debug_task_free(struct task_struct *tsk); 46 50 #else 47 - static inline int rt_mutex_debug_check_no_locks_freed(const void *from, 48 - unsigned long len) 49 - { 50 - return 0; 51 - } 52 - # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) 51 + static inline void rt_mutex_debug_task_free(struct task_struct *tsk) { } 53 52 #endif 54 53 55 - #ifdef CONFIG_DEBUG_RT_MUTEXES 56 - # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ 57 - , .name = #mutexname, .file = __FILE__, .line = __LINE__ 58 - 59 - # define rt_mutex_init(mutex) \ 54 + #define rt_mutex_init(mutex) \ 60 55 do { \ 61 56 static struct lock_class_key __key; \ 62 57 __rt_mutex_init(mutex, __func__, &__key); \ 63 58 } while (0) 64 - 65 - extern void rt_mutex_debug_task_free(struct task_struct *tsk); 66 - #else 67 - # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) 68 - # define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) 69 - # define rt_mutex_debug_task_free(t) do { } while (0) 70 - #endif 71 59 72 60 #ifdef CONFIG_DEBUG_LOCK_ALLOC 73 61 #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ ··· 62 86 { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ 63 87 , .waiters = RB_ROOT_CACHED \ 64 88 , .owner = NULL \ 65 - __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ 66 89 __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} 67 90 68 91 #define DEFINE_RT_MUTEX(mutexname) \ ··· 79 104 } 80 105 81 106 extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); 82 - extern void rt_mutex_destroy(struct rt_mutex *lock); 83 107 84 108 #ifdef CONFIG_DEBUG_LOCK_ALLOC 85 109 extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); ··· 89 115 #endif 90 116 91 117 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); 92 - extern int rt_mutex_timed_lock(struct rt_mutex *lock, 93 - struct hrtimer_sleeper *timeout); 94 - 95 118 extern int rt_mutex_trylock(struct rt_mutex *lock); 96 119 97 120 extern void rt_mutex_unlock(struct rt_mutex *lock);

+1 -1

include/linux/rwsem.h

··· 110 110 111 111 /* 112 112 * This is the same regardless of which rwsem implementation that is being used. 113 - * It is just a heuristic meant to be called by somebody alreadying holding the 113 + * It is just a heuristic meant to be called by somebody already holding the 114 114 * rwsem to see if somebody from an incompatible type is wanting access to the 115 115 * lock. 116 116 */

+2 -2

include/linux/static_call.h

··· 118 118 119 119 #define static_call_update(name, func) \ 120 120 ({ \ 121 - BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name))); \ 121 + typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \ 122 122 __static_call_update(&STATIC_CALL_KEY(name), \ 123 - STATIC_CALL_TRAMP_ADDR(name), func); \ 123 + STATIC_CALL_TRAMP_ADDR(name), __F); \ 124 124 }) 125 125 126 126 #define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))

+2 -15

include/linux/ww_mutex.h

··· 48 48 #endif 49 49 }; 50 50 51 - #ifdef CONFIG_DEBUG_LOCK_ALLOC 52 - # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \ 53 - , .ww_class = class 54 - #else 55 - # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) 56 - #endif 57 - 58 51 #define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die) \ 59 52 { .stamp = ATOMIC_LONG_INIT(0) \ 60 53 , .acquire_name = #ww_class "_acquire" \ 61 54 , .mutex_name = #ww_class "_mutex" \ 62 55 , .is_wait_die = _is_wait_die } 63 - 64 - #define __WW_MUTEX_INITIALIZER(lockname, class) \ 65 - { .base = __MUTEX_INITIALIZER(lockname.base) \ 66 - __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } 67 56 68 57 #define DEFINE_WD_CLASS(classname) \ 69 58 struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1) ··· 60 71 #define DEFINE_WW_CLASS(classname) \ 61 72 struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0) 62 73 63 - #define DEFINE_WW_MUTEX(mutexname, ww_class) \ 64 - struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class) 65 - 66 74 /** 67 75 * ww_mutex_init - initialize the w/w mutex 68 76 * @lock: the mutex to be initialized 69 77 * @ww_class: the w/w class the mutex should belong to 70 78 * 71 79 * Initialize the w/w mutex to unlocked state and associate it with the given 72 - * class. 80 + * class. Static define macro for w/w mutex is not provided and this function 81 + * is the only way to properly initialize the w/w mutex. 73 82 * 74 83 * It is not allowed to initialize an already locked mutex. 75 84 */

+14 -15

kernel/futex.c

··· 981 981 * p->pi_lock: 982 982 * 983 983 * p->pi_state_list -> pi_state->list, relation 984 + * pi_mutex->owner -> pi_state->owner, relation 984 985 * 985 986 * pi_state->refcount: 986 987 * ··· 1495 1494 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) 1496 1495 { 1497 1496 u32 curval, newval; 1497 + struct rt_mutex_waiter *top_waiter; 1498 1498 struct task_struct *new_owner; 1499 1499 bool postunlock = false; 1500 1500 DEFINE_WAKE_Q(wake_q); 1501 1501 int ret = 0; 1502 1502 1503 - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 1504 - if (WARN_ON_ONCE(!new_owner)) { 1503 + top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); 1504 + if (WARN_ON_ONCE(!top_waiter)) { 1505 1505 /* 1506 1506 * As per the comment in futex_unlock_pi() this should not happen. 1507 1507 * ··· 1514 1512 ret = -EAGAIN; 1515 1513 goto out_unlock; 1516 1514 } 1515 + 1516 + new_owner = top_waiter->task; 1517 1517 1518 1518 /* 1519 1519 * We pass it to the next owner. The WAITERS bit is always kept ··· 2319 2315 2320 2316 /* 2321 2317 * PI futexes can not be requeued and must remove themself from the 2322 - * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry 2323 - * and dropped here. 2318 + * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. 2324 2319 */ 2325 2320 static void unqueue_me_pi(struct futex_q *q) 2326 - __releases(q->lock_ptr) 2327 2321 { 2328 2322 __unqueue_futex(q); 2329 2323 2330 2324 BUG_ON(!q->pi_state); 2331 2325 put_pi_state(q->pi_state); 2332 2326 q->pi_state = NULL; 2333 - 2334 - spin_unlock(q->lock_ptr); 2335 2327 } 2336 2328 2337 2329 static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ··· 2909 2909 if (res) 2910 2910 ret = (res < 0) ? res : 0; 2911 2911 2912 - /* Unqueue and drop the lock */ 2913 2912 unqueue_me_pi(&q); 2913 + spin_unlock(q.lock_ptr); 2914 2914 goto out; 2915 2915 2916 2916 out_unlock_put_key: ··· 3237 3237 * reference count. 3238 3238 */ 3239 3239 3240 - /* Check if the requeue code acquired the second futex for us. */ 3240 + /* 3241 + * Check if the requeue code acquired the second futex for us and do 3242 + * any pertinent fixup. 3243 + */ 3241 3244 if (!q.rt_waiter) { 3242 - /* 3243 - * Got the lock. We might not be the anticipated owner if we 3244 - * did a lock-steal - fix up the PI-state in that case. 3245 - */ 3246 3245 if (q.pi_state && (q.pi_state->owner != current)) { 3247 3246 spin_lock(q.lock_ptr); 3248 - ret = fixup_pi_state_owner(uaddr2, &q, current); 3247 + ret = fixup_owner(uaddr2, &q, true); 3249 3248 /* 3250 3249 * Drop the reference to the pi state which 3251 3250 * the requeue_pi() code acquired for us. ··· 3286 3287 if (res) 3287 3288 ret = (res < 0) ? res : 0; 3288 3289 3289 - /* Unqueue and drop the lock. */ 3290 3290 unqueue_me_pi(&q); 3291 + spin_unlock(q.lock_ptr); 3291 3292 } 3292 3293 3293 3294 if (ret == -EINTR) {

+2 -2

kernel/kcsan/Makefile

··· 13 13 obj-y := core.o debugfs.o report.o 14 14 obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o 15 15 16 - CFLAGS_kcsan-test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer 17 - obj-$(CONFIG_KCSAN_TEST) += kcsan-test.o 16 + CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer 17 + obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o

+5

kernel/kcsan/atomic.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Rules for implicitly atomic memory accesses. 4 + * 5 + * Copyright (C) 2019, Google LLC. 6 + */ 2 7 3 8 #ifndef _KERNEL_KCSAN_ATOMIC_H 4 9 #define _KERNEL_KCSAN_ATOMIC_H

+5 -2

kernel/kcsan/core.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * KCSAN core runtime. 4 + * 5 + * Copyright (C) 2019, Google LLC. 6 + */ 2 7 3 8 #define pr_fmt(fmt) "kcsan: " fmt 4 9 ··· 643 638 int cpu; 644 639 645 640 BUG_ON(!in_task()); 646 - 647 - kcsan_debugfs_init(); 648 641 649 642 for_each_possible_cpu(cpu) 650 643 per_cpu(kcsan_rand_state, cpu) = (u32)get_cycles();

+8 -1

kernel/kcsan/debugfs.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * KCSAN debugfs interface. 4 + * 5 + * Copyright (C) 2019, Google LLC. 6 + */ 2 7 3 8 #define pr_fmt(fmt) "kcsan: " fmt 4 9 ··· 266 261 .release = single_release 267 262 }; 268 263 269 - void __init kcsan_debugfs_init(void) 264 + static void __init kcsan_debugfs_init(void) 270 265 { 271 266 debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops); 272 267 } 268 + 269 + late_initcall(kcsan_debugfs_init);

+5

kernel/kcsan/encoding.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * KCSAN watchpoint encoding. 4 + * 5 + * Copyright (C) 2019, Google LLC. 6 + */ 2 7 3 8 #ifndef _KERNEL_KCSAN_ENCODING_H 4 9 #define _KERNEL_KCSAN_ENCODING_H

+56 -64

kernel/kcsan/kcsan-test.c kernel/kcsan/kcsan_test.c

··· 13 13 * Author: Marco Elver <elver@google.com> 14 14 */ 15 15 16 + #define pr_fmt(fmt) "kcsan_test: " fmt 17 + 16 18 #include <kunit/test.h> 17 19 #include <linux/jiffies.h> 18 20 #include <linux/kcsan-checks.h> ··· 953 951 } 954 952 955 953 /* 956 - * Each test case is run with different numbers of threads. Until KUnit supports 957 - * passing arguments for each test case, we encode #threads in the test case 958 - * name (read by get_num_threads()). [The '-' was chosen as a stylistic 959 - * preference to separate test name and #threads.] 954 + * Generate thread counts for all test cases. Values generated are in interval 955 + * [2, 5] followed by exponentially increasing thread counts from 8 to 32. 960 956 * 961 957 * The thread counts are chosen to cover potentially interesting boundaries and 962 - * corner cases (range 2-5), and then stress the system with larger counts. 958 + * corner cases (2 to 5), and then stress the system with larger counts. 963 959 */ 964 - #define KCSAN_KUNIT_CASE(test_name) \ 965 - { .run_case = test_name, .name = #test_name "-02" }, \ 966 - { .run_case = test_name, .name = #test_name "-03" }, \ 967 - { .run_case = test_name, .name = #test_name "-04" }, \ 968 - { .run_case = test_name, .name = #test_name "-05" }, \ 969 - { .run_case = test_name, .name = #test_name "-08" }, \ 970 - { .run_case = test_name, .name = #test_name "-16" } 960 + static const void *nthreads_gen_params(const void *prev, char *desc) 961 + { 962 + long nthreads = (long)prev; 971 963 964 + if (nthreads < 0 || nthreads >= 32) 965 + nthreads = 0; /* stop */ 966 + else if (!nthreads) 967 + nthreads = 2; /* initial value */ 968 + else if (nthreads < 5) 969 + nthreads++; 970 + else if (nthreads == 5) 971 + nthreads = 8; 972 + else 973 + nthreads *= 2; 974 + 975 + if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) { 976 + /* 977 + * Without any preemption, keep 2 CPUs free for other tasks, one 978 + * of which is the main test case function checking for 979 + * completion or failure. 980 + */ 981 + const long min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0; 982 + const long min_required_cpus = 2 + min_unused_cpus; 983 + 984 + if (num_online_cpus() < min_required_cpus) { 985 + pr_err_once("Too few online CPUs (%u < %ld) for test\n", 986 + num_online_cpus(), min_required_cpus); 987 + nthreads = 0; 988 + } else if (nthreads >= num_online_cpus() - min_unused_cpus) { 989 + /* Use negative value to indicate last param. */ 990 + nthreads = -(num_online_cpus() - min_unused_cpus); 991 + pr_warn_once("Limiting number of threads to %ld (only %d online CPUs)\n", 992 + -nthreads, num_online_cpus()); 993 + } 994 + } 995 + 996 + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "threads=%ld", abs(nthreads)); 997 + return (void *)nthreads; 998 + } 999 + 1000 + #define KCSAN_KUNIT_CASE(test_name) KUNIT_CASE_PARAM(test_name, nthreads_gen_params) 972 1001 static struct kunit_case kcsan_test_cases[] = { 973 1002 KCSAN_KUNIT_CASE(test_basic), 974 1003 KCSAN_KUNIT_CASE(test_concurrent_races), ··· 1028 995 }; 1029 996 1030 997 /* ===== End test cases ===== */ 1031 - 1032 - /* Get number of threads encoded in test name. */ 1033 - static bool __no_kcsan 1034 - get_num_threads(const char *test, int *nthreads) 1035 - { 1036 - int len = strlen(test); 1037 - 1038 - if (WARN_ON(len < 3)) 1039 - return false; 1040 - 1041 - *nthreads = test[len - 1] - '0'; 1042 - *nthreads += (test[len - 2] - '0') * 10; 1043 - 1044 - if (WARN_ON(*nthreads < 0)) 1045 - return false; 1046 - 1047 - return true; 1048 - } 1049 998 1050 999 /* Concurrent accesses from interrupts. */ 1051 1000 __no_kcsan ··· 1091 1076 if (!torture_init_begin((char *)test->name, 1)) 1092 1077 return -EBUSY; 1093 1078 1094 - if (!get_num_threads(test->name, &nthreads)) 1095 - goto err; 1096 - 1097 1079 if (WARN_ON(threads)) 1098 1080 goto err; 1099 1081 ··· 1099 1087 goto err; 1100 1088 } 1101 1089 1102 - if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) { 1103 - /* 1104 - * Without any preemption, keep 2 CPUs free for other tasks, one 1105 - * of which is the main test case function checking for 1106 - * completion or failure. 1107 - */ 1108 - const int min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0; 1109 - const int min_required_cpus = 2 + min_unused_cpus; 1090 + nthreads = abs((long)test->param_value); 1091 + if (WARN_ON(!nthreads)) 1092 + goto err; 1110 1093 1111 - if (num_online_cpus() < min_required_cpus) { 1112 - pr_err("%s: too few online CPUs (%u < %d) for test", 1113 - test->name, num_online_cpus(), min_required_cpus); 1094 + threads = kcalloc(nthreads + 1, sizeof(struct task_struct *), GFP_KERNEL); 1095 + if (WARN_ON(!threads)) 1096 + goto err; 1097 + 1098 + threads[nthreads] = NULL; 1099 + for (i = 0; i < nthreads; ++i) { 1100 + if (torture_create_kthread(access_thread, NULL, threads[i])) 1114 1101 goto err; 1115 - } else if (nthreads > num_online_cpus() - min_unused_cpus) { 1116 - nthreads = num_online_cpus() - min_unused_cpus; 1117 - pr_warn("%s: limiting number of threads to %d\n", 1118 - test->name, nthreads); 1119 - } 1120 - } 1121 - 1122 - if (nthreads) { 1123 - threads = kcalloc(nthreads + 1, sizeof(struct task_struct *), 1124 - GFP_KERNEL); 1125 - if (WARN_ON(!threads)) 1126 - goto err; 1127 - 1128 - threads[nthreads] = NULL; 1129 - for (i = 0; i < nthreads; ++i) { 1130 - if (torture_create_kthread(access_thread, NULL, 1131 - threads[i])) 1132 - goto err; 1133 - } 1134 1102 } 1135 1103 1136 1104 torture_init_end(); ··· 1148 1156 } 1149 1157 1150 1158 static struct kunit_suite kcsan_test_suite = { 1151 - .name = "kcsan-test", 1159 + .name = "kcsan", 1152 1160 .test_cases = kcsan_test_cases, 1153 1161 .init = test_init, 1154 1162 .exit = test_exit,

+2 -6

kernel/kcsan/kcsan.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - 3 2 /* 4 3 * The Kernel Concurrency Sanitizer (KCSAN) infrastructure. For more info please 5 4 * see Documentation/dev-tools/kcsan.rst. 5 + * 6 + * Copyright (C) 2019, Google LLC. 6 7 */ 7 8 8 9 #ifndef _KERNEL_KCSAN_KCSAN_H ··· 30 29 */ 31 30 void kcsan_save_irqtrace(struct task_struct *task); 32 31 void kcsan_restore_irqtrace(struct task_struct *task); 33 - 34 - /* 35 - * Initialize debugfs file. 36 - */ 37 - void kcsan_debugfs_init(void); 38 32 39 33 /* 40 34 * Statistics counters displayed via debugfs; should only be modified in

+5

kernel/kcsan/report.c

+5

kernel/kcsan/selftest.c

-2

kernel/locking/Makefile

··· 12 12 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) 13 13 CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE) 14 14 CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) 15 - CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) 16 15 endif 17 16 18 17 obj-$(CONFIG_DEBUG_IRQFLAGS) += irqflag-debug.o ··· 25 26 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 26 27 obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o 27 28 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o 28 - obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 29 29 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 30 30 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o 31 31 obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o

+12 -7

kernel/locking/lockdep.c

··· 54 54 #include <linux/nmi.h> 55 55 #include <linux/rcupdate.h> 56 56 #include <linux/kprobes.h> 57 + #include <linux/lockdep.h> 57 58 58 59 #include <asm/sections.h> 59 60 ··· 1748 1747 1749 1748 /* 1750 1749 * Step 4: if not match, expand the path by adding the 1751 - * forward or backwards dependencis in the search 1750 + * forward or backwards dependencies in the search 1752 1751 * 1753 1752 */ 1754 1753 first = true; ··· 1917 1916 * -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the 1918 1917 * dependency graph, as any strong path ..-> A -> B ->.. we can get with 1919 1918 * having dependency A -> B, we could already get a equivalent path ..-> A -> 1920 - * .. -> B -> .. with A -> .. -> B. Therefore A -> B is reduntant. 1919 + * .. -> B -> .. with A -> .. -> B. Therefore A -> B is redundant. 1921 1920 * 1922 1921 * We need to make sure both the start and the end of A -> .. -> B is not 1923 1922 * weaker than A -> B. For the start part, please see the comment in ··· 5254 5253 5255 5254 if (match_held_lock(hlock, lock)) { 5256 5255 if (read == -1 || hlock->read == read) 5257 - return 1; 5256 + return LOCK_STATE_HELD; 5258 5257 5259 - return 0; 5258 + return LOCK_STATE_NOT_HELD; 5260 5259 } 5261 5260 } 5262 5261 5263 - return 0; 5262 + return LOCK_STATE_NOT_HELD; 5264 5263 } 5265 5264 5266 5265 static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock) ··· 5539 5538 noinstr int lock_is_held_type(const struct lockdep_map *lock, int read) 5540 5539 { 5541 5540 unsigned long flags; 5542 - int ret = 0; 5541 + int ret = LOCK_STATE_NOT_HELD; 5543 5542 5543 + /* 5544 + * Avoid false negative lockdep_assert_held() and 5545 + * lockdep_assert_not_held(). 5546 + */ 5544 5547 if (unlikely(!lockdep_enabled())) 5545 - return 1; /* avoid false negative lockdep_assert_held() */ 5548 + return LOCK_STATE_UNKNOWN; 5546 5549 5547 5550 raw_local_irq_save(flags); 5548 5551 check_flags(flags);

+1 -1

kernel/locking/lockdep_proc.c

··· 348 348 debug_locks); 349 349 350 350 /* 351 - * Zappped classes and lockdep data buffers reuse statistics. 351 + * Zapped classes and lockdep data buffers reuse statistics. 352 352 */ 353 353 seq_puts(m, "\n"); 354 354 seq_printf(m, " zapped classes: %11lu\n",

+97 -49

kernel/locking/locktorture.c

··· 76 76 struct lock_torture_ops { 77 77 void (*init)(void); 78 78 void (*exit)(void); 79 - int (*writelock)(void); 79 + int (*writelock)(int tid); 80 80 void (*write_delay)(struct torture_random_state *trsp); 81 81 void (*task_boost)(struct torture_random_state *trsp); 82 - void (*writeunlock)(void); 83 - int (*readlock)(void); 82 + void (*writeunlock)(int tid); 83 + int (*readlock)(int tid); 84 84 void (*read_delay)(struct torture_random_state *trsp); 85 - void (*readunlock)(void); 85 + void (*readunlock)(int tid); 86 86 87 87 unsigned long flags; /* for irq spinlocks */ 88 88 const char *name; ··· 105 105 * Definitions for lock torture testing. 106 106 */ 107 107 108 - static int torture_lock_busted_write_lock(void) 108 + static int torture_lock_busted_write_lock(int tid __maybe_unused) 109 109 { 110 110 return 0; /* BUGGY, do not use in real life!!! */ 111 111 } ··· 122 122 torture_preempt_schedule(); /* Allow test to be preempted. */ 123 123 } 124 124 125 - static void torture_lock_busted_write_unlock(void) 125 + static void torture_lock_busted_write_unlock(int tid __maybe_unused) 126 126 { 127 127 /* BUGGY, do not use in real life!!! */ 128 128 } ··· 145 145 146 146 static DEFINE_SPINLOCK(torture_spinlock); 147 147 148 - static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock) 148 + static int torture_spin_lock_write_lock(int tid __maybe_unused) 149 + __acquires(torture_spinlock) 149 150 { 150 151 spin_lock(&torture_spinlock); 151 152 return 0; ··· 170 169 torture_preempt_schedule(); /* Allow test to be preempted. */ 171 170 } 172 171 173 - static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) 172 + static void torture_spin_lock_write_unlock(int tid __maybe_unused) 173 + __releases(torture_spinlock) 174 174 { 175 175 spin_unlock(&torture_spinlock); 176 176 } ··· 187 185 .name = "spin_lock" 188 186 }; 189 187 190 - static int torture_spin_lock_write_lock_irq(void) 188 + static int torture_spin_lock_write_lock_irq(int tid __maybe_unused) 191 189 __acquires(torture_spinlock) 192 190 { 193 191 unsigned long flags; ··· 197 195 return 0; 198 196 } 199 197 200 - static void torture_lock_spin_write_unlock_irq(void) 198 + static void torture_lock_spin_write_unlock_irq(int tid __maybe_unused) 201 199 __releases(torture_spinlock) 202 200 { 203 201 spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags); ··· 216 214 217 215 static DEFINE_RWLOCK(torture_rwlock); 218 216 219 - static int torture_rwlock_write_lock(void) __acquires(torture_rwlock) 217 + static int torture_rwlock_write_lock(int tid __maybe_unused) 218 + __acquires(torture_rwlock) 220 219 { 221 220 write_lock(&torture_rwlock); 222 221 return 0; ··· 238 235 udelay(shortdelay_us); 239 236 } 240 237 241 - static void torture_rwlock_write_unlock(void) __releases(torture_rwlock) 238 + static void torture_rwlock_write_unlock(int tid __maybe_unused) 239 + __releases(torture_rwlock) 242 240 { 243 241 write_unlock(&torture_rwlock); 244 242 } 245 243 246 - static int torture_rwlock_read_lock(void) __acquires(torture_rwlock) 244 + static int torture_rwlock_read_lock(int tid __maybe_unused) 245 + __acquires(torture_rwlock) 247 246 { 248 247 read_lock(&torture_rwlock); 249 248 return 0; ··· 266 261 udelay(shortdelay_us); 267 262 } 268 263 269 - static void torture_rwlock_read_unlock(void) __releases(torture_rwlock) 264 + static void torture_rwlock_read_unlock(int tid __maybe_unused) 265 + __releases(torture_rwlock) 270 266 { 271 267 read_unlock(&torture_rwlock); 272 268 } ··· 283 277 .name = "rw_lock" 284 278 }; 285 279 286 - static int torture_rwlock_write_lock_irq(void) __acquires(torture_rwlock) 280 + static int torture_rwlock_write_lock_irq(int tid __maybe_unused) 281 + __acquires(torture_rwlock) 287 282 { 288 283 unsigned long flags; 289 284 ··· 293 286 return 0; 294 287 } 295 288 296 - static void torture_rwlock_write_unlock_irq(void) 289 + static void torture_rwlock_write_unlock_irq(int tid __maybe_unused) 297 290 __releases(torture_rwlock) 298 291 { 299 292 write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); 300 293 } 301 294 302 - static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock) 295 + static int torture_rwlock_read_lock_irq(int tid __maybe_unused) 296 + __acquires(torture_rwlock) 303 297 { 304 298 unsigned long flags; 305 299 ··· 309 301 return 0; 310 302 } 311 303 312 - static void torture_rwlock_read_unlock_irq(void) 304 + static void torture_rwlock_read_unlock_irq(int tid __maybe_unused) 313 305 __releases(torture_rwlock) 314 306 { 315 307 read_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); ··· 328 320 329 321 static DEFINE_MUTEX(torture_mutex); 330 322 331 - static int torture_mutex_lock(void) __acquires(torture_mutex) 323 + static int torture_mutex_lock(int tid __maybe_unused) 324 + __acquires(torture_mutex) 332 325 { 333 326 mutex_lock(&torture_mutex); 334 327 return 0; ··· 349 340 torture_preempt_schedule(); /* Allow test to be preempted. */ 350 341 } 351 342 352 - static void torture_mutex_unlock(void) __releases(torture_mutex) 343 + static void torture_mutex_unlock(int tid __maybe_unused) 344 + __releases(torture_mutex) 353 345 { 354 346 mutex_unlock(&torture_mutex); 355 347 } ··· 367 357 }; 368 358 369 359 #include <linux/ww_mutex.h> 360 + /* 361 + * The torture ww_mutexes should belong to the same lock class as 362 + * torture_ww_class to avoid lockdep problem. The ww_mutex_init() 363 + * function is called for initialization to ensure that. 364 + */ 370 365 static DEFINE_WD_CLASS(torture_ww_class); 371 - static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class); 372 - static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class); 373 - static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class); 366 + static struct ww_mutex torture_ww_mutex_0, torture_ww_mutex_1, torture_ww_mutex_2; 367 + static struct ww_acquire_ctx *ww_acquire_ctxs; 374 368 375 - static int torture_ww_mutex_lock(void) 369 + static void torture_ww_mutex_init(void) 370 + { 371 + ww_mutex_init(&torture_ww_mutex_0, &torture_ww_class); 372 + ww_mutex_init(&torture_ww_mutex_1, &torture_ww_class); 373 + ww_mutex_init(&torture_ww_mutex_2, &torture_ww_class); 374 + 375 + ww_acquire_ctxs = kmalloc_array(cxt.nrealwriters_stress, 376 + sizeof(*ww_acquire_ctxs), 377 + GFP_KERNEL); 378 + if (!ww_acquire_ctxs) 379 + VERBOSE_TOROUT_STRING("ww_acquire_ctx: Out of memory"); 380 + } 381 + 382 + static void torture_ww_mutex_exit(void) 383 + { 384 + kfree(ww_acquire_ctxs); 385 + } 386 + 387 + static int torture_ww_mutex_lock(int tid) 376 388 __acquires(torture_ww_mutex_0) 377 389 __acquires(torture_ww_mutex_1) 378 390 __acquires(torture_ww_mutex_2) ··· 404 372 struct list_head link; 405 373 struct ww_mutex *lock; 406 374 } locks[3], *ll, *ln; 407 - struct ww_acquire_ctx ctx; 375 + struct ww_acquire_ctx *ctx = &ww_acquire_ctxs[tid]; 408 376 409 377 locks[0].lock = &torture_ww_mutex_0; 410 378 list_add(&locks[0].link, &list); ··· 415 383 locks[2].lock = &torture_ww_mutex_2; 416 384 list_add(&locks[2].link, &list); 417 385 418 - ww_acquire_init(&ctx, &torture_ww_class); 386 + ww_acquire_init(ctx, &torture_ww_class); 419 387 420 388 list_for_each_entry(ll, &list, link) { 421 389 int err; 422 390 423 - err = ww_mutex_lock(ll->lock, &ctx); 391 + err = ww_mutex_lock(ll->lock, ctx); 424 392 if (!err) 425 393 continue; 426 394 ··· 431 399 if (err != -EDEADLK) 432 400 return err; 433 401 434 - ww_mutex_lock_slow(ll->lock, &ctx); 402 + ww_mutex_lock_slow(ll->lock, ctx); 435 403 list_move(&ll->link, &list); 436 404 } 437 405 438 - ww_acquire_fini(&ctx); 439 406 return 0; 440 407 } 441 408 442 - static void torture_ww_mutex_unlock(void) 409 + static void torture_ww_mutex_unlock(int tid) 443 410 __releases(torture_ww_mutex_0) 444 411 __releases(torture_ww_mutex_1) 445 412 __releases(torture_ww_mutex_2) 446 413 { 414 + struct ww_acquire_ctx *ctx = &ww_acquire_ctxs[tid]; 415 + 447 416 ww_mutex_unlock(&torture_ww_mutex_0); 448 417 ww_mutex_unlock(&torture_ww_mutex_1); 449 418 ww_mutex_unlock(&torture_ww_mutex_2); 419 + ww_acquire_fini(ctx); 450 420 } 451 421 452 422 static struct lock_torture_ops ww_mutex_lock_ops = { 423 + .init = torture_ww_mutex_init, 424 + .exit = torture_ww_mutex_exit, 453 425 .writelock = torture_ww_mutex_lock, 454 426 .write_delay = torture_mutex_delay, 455 427 .task_boost = torture_boost_dummy, ··· 467 431 #ifdef CONFIG_RT_MUTEXES 468 432 static DEFINE_RT_MUTEX(torture_rtmutex); 469 433 470 - static int torture_rtmutex_lock(void) __acquires(torture_rtmutex) 434 + static int torture_rtmutex_lock(int tid __maybe_unused) 435 + __acquires(torture_rtmutex) 471 436 { 472 437 rt_mutex_lock(&torture_rtmutex); 473 438 return 0; ··· 524 487 torture_preempt_schedule(); /* Allow test to be preempted. */ 525 488 } 526 489 527 - static void torture_rtmutex_unlock(void) __releases(torture_rtmutex) 490 + static void torture_rtmutex_unlock(int tid __maybe_unused) 491 + __releases(torture_rtmutex) 528 492 { 529 493 rt_mutex_unlock(&torture_rtmutex); 530 494 } ··· 543 505 #endif 544 506 545 507 static DECLARE_RWSEM(torture_rwsem); 546 - static int torture_rwsem_down_write(void) __acquires(torture_rwsem) 508 + static int torture_rwsem_down_write(int tid __maybe_unused) 509 + __acquires(torture_rwsem) 547 510 { 548 511 down_write(&torture_rwsem); 549 512 return 0; ··· 564 525 torture_preempt_schedule(); /* Allow test to be preempted. */ 565 526 } 566 527 567 - static void torture_rwsem_up_write(void) __releases(torture_rwsem) 528 + static void torture_rwsem_up_write(int tid __maybe_unused) 529 + __releases(torture_rwsem) 568 530 { 569 531 up_write(&torture_rwsem); 570 532 } 571 533 572 - static int torture_rwsem_down_read(void) __acquires(torture_rwsem) 534 + static int torture_rwsem_down_read(int tid __maybe_unused) 535 + __acquires(torture_rwsem) 573 536 { 574 537 down_read(&torture_rwsem); 575 538 return 0; ··· 591 550 torture_preempt_schedule(); /* Allow test to be preempted. */ 592 551 } 593 552 594 - static void torture_rwsem_up_read(void) __releases(torture_rwsem) 553 + static void torture_rwsem_up_read(int tid __maybe_unused) 554 + __releases(torture_rwsem) 595 555 { 596 556 up_read(&torture_rwsem); 597 557 } ··· 621 579 percpu_free_rwsem(&pcpu_rwsem); 622 580 } 623 581 624 - static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem) 582 + static int torture_percpu_rwsem_down_write(int tid __maybe_unused) 583 + __acquires(pcpu_rwsem) 625 584 { 626 585 percpu_down_write(&pcpu_rwsem); 627 586 return 0; 628 587 } 629 588 630 - static void torture_percpu_rwsem_up_write(void) __releases(pcpu_rwsem) 589 + static void torture_percpu_rwsem_up_write(int tid __maybe_unused) 590 + __releases(pcpu_rwsem) 631 591 { 632 592 percpu_up_write(&pcpu_rwsem); 633 593 } 634 594 635 - static int torture_percpu_rwsem_down_read(void) __acquires(pcpu_rwsem) 595 + static int torture_percpu_rwsem_down_read(int tid __maybe_unused) 596 + __acquires(pcpu_rwsem) 636 597 { 637 598 percpu_down_read(&pcpu_rwsem); 638 599 return 0; 639 600 } 640 601 641 - static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem) 602 + static void torture_percpu_rwsem_up_read(int tid __maybe_unused) 603 + __releases(pcpu_rwsem) 642 604 { 643 605 percpu_up_read(&pcpu_rwsem); 644 606 } ··· 667 621 static int lock_torture_writer(void *arg) 668 622 { 669 623 struct lock_stress_stats *lwsp = arg; 624 + int tid = lwsp - cxt.lwsa; 670 625 DEFINE_TORTURE_RANDOM(rand); 671 626 672 627 VERBOSE_TOROUT_STRING("lock_torture_writer task started"); ··· 678 631 schedule_timeout_uninterruptible(1); 679 632 680 633 cxt.cur_ops->task_boost(&rand); 681 - cxt.cur_ops->writelock(); 634 + cxt.cur_ops->writelock(tid); 682 635 if (WARN_ON_ONCE(lock_is_write_held)) 683 636 lwsp->n_lock_fail++; 684 637 lock_is_write_held = true; ··· 689 642 cxt.cur_ops->write_delay(&rand); 690 643 lock_is_write_held = false; 691 644 WRITE_ONCE(last_lock_release, jiffies); 692 - cxt.cur_ops->writeunlock(); 645 + cxt.cur_ops->writeunlock(tid); 693 646 694 647 stutter_wait("lock_torture_writer"); 695 648 } while (!torture_must_stop()); ··· 706 659 static int lock_torture_reader(void *arg) 707 660 { 708 661 struct lock_stress_stats *lrsp = arg; 662 + int tid = lrsp - cxt.lrsa; 709 663 DEFINE_TORTURE_RANDOM(rand); 710 664 711 665 VERBOSE_TOROUT_STRING("lock_torture_reader task started"); ··· 716 668 if ((torture_random(&rand) & 0xfffff) == 0) 717 669 schedule_timeout_uninterruptible(1); 718 670 719 - cxt.cur_ops->readlock(); 671 + cxt.cur_ops->readlock(tid); 720 672 lock_is_read_held = true; 721 673 if (WARN_ON_ONCE(lock_is_write_held)) 722 674 lrsp->n_lock_fail++; /* rare, but... */ ··· 724 676 lrsp->n_lock_acquired++; 725 677 cxt.cur_ops->read_delay(&rand); 726 678 lock_is_read_held = false; 727 - cxt.cur_ops->readunlock(); 679 + cxt.cur_ops->readunlock(tid); 728 680 729 681 stutter_wait("lock_torture_reader"); 730 682 } while (!torture_must_stop()); ··· 939 891 goto unwind; 940 892 } 941 893 942 - if (cxt.cur_ops->init) { 943 - cxt.cur_ops->init(); 944 - cxt.init_called = true; 945 - } 946 - 947 894 if (nwriters_stress >= 0) 948 895 cxt.nrealwriters_stress = nwriters_stress; 949 896 else 950 897 cxt.nrealwriters_stress = 2 * num_online_cpus(); 898 + 899 + if (cxt.cur_ops->init) { 900 + cxt.cur_ops->init(); 901 + cxt.init_called = true; 902 + } 951 903 952 904 #ifdef CONFIG_DEBUG_MUTEXES 953 905 if (str_has_prefix(torture_type, "mutex"))

+1 -1

kernel/locking/mcs_spinlock.h

··· 7 7 * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock 8 8 * with the desirable properties of being fair, and with each cpu trying 9 9 * to acquire the lock spinning on a local variable. 10 - * It avoids expensive cache bouncings that common test-and-set spin-lock 10 + * It avoids expensive cache bounces that common test-and-set spin-lock 11 11 * implementations incur. 12 12 */ 13 13 #ifndef __LINUX_MCS_SPINLOCK_H

+2 -2

kernel/locking/mutex.c

··· 92 92 } 93 93 94 94 /* 95 - * Trylock variant that retuns the owning task on failure. 95 + * Trylock variant that returns the owning task on failure. 96 96 */ 97 97 static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) 98 98 { ··· 207 207 208 208 /* 209 209 * Give up ownership to a specific task, when @task = NULL, this is equivalent 210 - * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves 210 + * to a regular unlock. Sets PICKUP on a handoff, clears HANDOFF, preserves 211 211 * WAITERS. Provides RELEASE semantics like a regular unlock, the 212 212 * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff. 213 213 */

+2 -2

kernel/locking/osq_lock.c

··· 135 135 */ 136 136 137 137 /* 138 - * Wait to acquire the lock or cancelation. Note that need_resched() 138 + * Wait to acquire the lock or cancellation. Note that need_resched() 139 139 * will come with an IPI, which will wake smp_cond_load_relaxed() if it 140 140 * is implemented with a monitor-wait. vcpu_is_preempted() relies on 141 141 * polling, be careful. ··· 164 164 165 165 /* 166 166 * We can only fail the cmpxchg() racing against an unlock(), 167 - * in which case we should observe @node->locked becomming 167 + * in which case we should observe @node->locked becoming 168 168 * true. 169 169 */ 170 170 if (smp_load_acquire(&node->locked))

-182

kernel/locking/rtmutex-debug.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * RT-Mutexes: blocking mutual exclusion locks with PI support 4 - * 5 - * started by Ingo Molnar and Thomas Gleixner: 6 - * 7 - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 - * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 - * 10 - * This code is based on the rt.c implementation in the preempt-rt tree. 11 - * Portions of said code are 12 - * 13 - * Copyright (C) 2004 LynuxWorks, Inc., Igor Manyilov, Bill Huey 14 - * Copyright (C) 2006 Esben Nielsen 15 - * Copyright (C) 2006 Kihon Technologies Inc., 16 - * Steven Rostedt <rostedt@goodmis.org> 17 - * 18 - * See rt.c in preempt-rt for proper credits and further information 19 - */ 20 - #include <linux/sched.h> 21 - #include <linux/sched/rt.h> 22 - #include <linux/sched/debug.h> 23 - #include <linux/delay.h> 24 - #include <linux/export.h> 25 - #include <linux/spinlock.h> 26 - #include <linux/kallsyms.h> 27 - #include <linux/syscalls.h> 28 - #include <linux/interrupt.h> 29 - #include <linux/rbtree.h> 30 - #include <linux/fs.h> 31 - #include <linux/debug_locks.h> 32 - 33 - #include "rtmutex_common.h" 34 - 35 - static void printk_task(struct task_struct *p) 36 - { 37 - if (p) 38 - printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio); 39 - else 40 - printk("<none>"); 41 - } 42 - 43 - static void printk_lock(struct rt_mutex *lock, int print_owner) 44 - { 45 - if (lock->name) 46 - printk(" [%p] {%s}\n", 47 - lock, lock->name); 48 - else 49 - printk(" [%p] {%s:%d}\n", 50 - lock, lock->file, lock->line); 51 - 52 - if (print_owner && rt_mutex_owner(lock)) { 53 - printk(".. ->owner: %p\n", lock->owner); 54 - printk(".. held by: "); 55 - printk_task(rt_mutex_owner(lock)); 56 - printk("\n"); 57 - } 58 - } 59 - 60 - void rt_mutex_debug_task_free(struct task_struct *task) 61 - { 62 - DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); 63 - DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); 64 - } 65 - 66 - /* 67 - * We fill out the fields in the waiter to store the information about 68 - * the deadlock. We print when we return. act_waiter can be NULL in 69 - * case of a remove waiter operation. 70 - */ 71 - void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, 72 - struct rt_mutex_waiter *act_waiter, 73 - struct rt_mutex *lock) 74 - { 75 - struct task_struct *task; 76 - 77 - if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter) 78 - return; 79 - 80 - task = rt_mutex_owner(act_waiter->lock); 81 - if (task && task != current) { 82 - act_waiter->deadlock_task_pid = get_pid(task_pid(task)); 83 - act_waiter->deadlock_lock = lock; 84 - } 85 - } 86 - 87 - void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) 88 - { 89 - struct task_struct *task; 90 - 91 - if (!waiter->deadlock_lock || !debug_locks) 92 - return; 93 - 94 - rcu_read_lock(); 95 - task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); 96 - if (!task) { 97 - rcu_read_unlock(); 98 - return; 99 - } 100 - 101 - if (!debug_locks_off()) { 102 - rcu_read_unlock(); 103 - return; 104 - } 105 - 106 - pr_warn("\n"); 107 - pr_warn("============================================\n"); 108 - pr_warn("WARNING: circular locking deadlock detected!\n"); 109 - pr_warn("%s\n", print_tainted()); 110 - pr_warn("--------------------------------------------\n"); 111 - printk("%s/%d is deadlocking current task %s/%d\n\n", 112 - task->comm, task_pid_nr(task), 113 - current->comm, task_pid_nr(current)); 114 - 115 - printk("\n1) %s/%d is trying to acquire this lock:\n", 116 - current->comm, task_pid_nr(current)); 117 - printk_lock(waiter->lock, 1); 118 - 119 - printk("\n2) %s/%d is blocked on this lock:\n", 120 - task->comm, task_pid_nr(task)); 121 - printk_lock(waiter->deadlock_lock, 1); 122 - 123 - debug_show_held_locks(current); 124 - debug_show_held_locks(task); 125 - 126 - printk("\n%s/%d's [blocked] stackdump:\n\n", 127 - task->comm, task_pid_nr(task)); 128 - show_stack(task, NULL, KERN_DEFAULT); 129 - printk("\n%s/%d's [current] stackdump:\n\n", 130 - current->comm, task_pid_nr(current)); 131 - dump_stack(); 132 - debug_show_all_locks(); 133 - rcu_read_unlock(); 134 - 135 - printk("[ turning off deadlock detection." 136 - "Please report this trace. ]\n\n"); 137 - } 138 - 139 - void debug_rt_mutex_lock(struct rt_mutex *lock) 140 - { 141 - } 142 - 143 - void debug_rt_mutex_unlock(struct rt_mutex *lock) 144 - { 145 - DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); 146 - } 147 - 148 - void 149 - debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner) 150 - { 151 - } 152 - 153 - void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) 154 - { 155 - DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); 156 - } 157 - 158 - void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 159 - { 160 - memset(waiter, 0x11, sizeof(*waiter)); 161 - waiter->deadlock_task_pid = NULL; 162 - } 163 - 164 - void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) 165 - { 166 - put_pid(waiter->deadlock_task_pid); 167 - memset(waiter, 0x22, sizeof(*waiter)); 168 - } 169 - 170 - void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key) 171 - { 172 - /* 173 - * Make sure we are not reinitializing a held lock: 174 - */ 175 - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 176 - lock->name = name; 177 - 178 - #ifdef CONFIG_DEBUG_LOCK_ALLOC 179 - lockdep_init_map(&lock->dep_map, name, key, 0); 180 - #endif 181 - } 182 -

-37

kernel/locking/rtmutex-debug.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * RT-Mutexes: blocking mutual exclusion locks with PI support 4 - * 5 - * started by Ingo Molnar and Thomas Gleixner: 6 - * 7 - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 - * 10 - * This file contains macros used solely by rtmutex.c. Debug version. 11 - */ 12 - 13 - extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); 14 - extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); 15 - extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); 16 - extern void debug_rt_mutex_lock(struct rt_mutex *lock); 17 - extern void debug_rt_mutex_unlock(struct rt_mutex *lock); 18 - extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 19 - struct task_struct *powner); 20 - extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); 21 - extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, 22 - struct rt_mutex_waiter *waiter, 23 - struct rt_mutex *lock); 24 - extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); 25 - # define debug_rt_mutex_reset_waiter(w) \ 26 - do { (w)->deadlock_lock = NULL; } while (0) 27 - 28 - static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, 29 - enum rtmutex_chainwalk walk) 30 - { 31 - return (waiter != NULL); 32 - } 33 - 34 - static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) 35 - { 36 - debug_rt_mutex_print_deadlock(w); 37 - }

+156 -278

kernel/locking/rtmutex.c

··· 49 49 * set this bit before looking at the lock. 50 50 */ 51 51 52 - static void 52 + static __always_inline void 53 53 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) 54 54 { 55 55 unsigned long val = (unsigned long)owner; ··· 60 60 WRITE_ONCE(lock->owner, (struct task_struct *)val); 61 61 } 62 62 63 - static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 63 + static __always_inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 64 64 { 65 65 lock->owner = (struct task_struct *) 66 66 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 67 67 } 68 68 69 - static void fixup_rt_mutex_waiters(struct rt_mutex *lock) 69 + static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex *lock) 70 70 { 71 71 unsigned long owner, *p = (unsigned long *) &lock->owner; 72 72 ··· 149 149 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 150 150 * relaxed semantics suffice. 151 151 */ 152 - static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 152 + static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 153 153 { 154 154 unsigned long owner, *p = (unsigned long *) &lock->owner; 155 155 ··· 165 165 * 2) Drop lock->wait_lock 166 166 * 3) Try to unlock the lock with cmpxchg 167 167 */ 168 - static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 169 - unsigned long flags) 168 + static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 169 + unsigned long flags) 170 170 __releases(lock->wait_lock) 171 171 { 172 172 struct task_struct *owner = rt_mutex_owner(lock); ··· 204 204 # define rt_mutex_cmpxchg_acquire(l,c,n) (0) 205 205 # define rt_mutex_cmpxchg_release(l,c,n) (0) 206 206 207 - static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 207 + static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 208 208 { 209 209 lock->owner = (struct task_struct *) 210 210 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); ··· 213 213 /* 214 214 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 215 215 */ 216 - static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 217 - unsigned long flags) 216 + static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 217 + unsigned long flags) 218 218 __releases(lock->wait_lock) 219 219 { 220 220 lock->owner = NULL; ··· 229 229 #define task_to_waiter(p) \ 230 230 &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } 231 231 232 - static inline int 233 - rt_mutex_waiter_less(struct rt_mutex_waiter *left, 234 - struct rt_mutex_waiter *right) 232 + static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, 233 + struct rt_mutex_waiter *right) 235 234 { 236 235 if (left->prio < right->prio) 237 236 return 1; ··· 247 248 return 0; 248 249 } 249 250 250 - static inline int 251 - rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 252 - struct rt_mutex_waiter *right) 251 + static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 252 + struct rt_mutex_waiter *right) 253 253 { 254 254 if (left->prio != right->prio) 255 255 return 0; ··· 268 270 #define __node_2_waiter(node) \ 269 271 rb_entry((node), struct rt_mutex_waiter, tree_entry) 270 272 271 - static inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 273 + static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 272 274 { 273 275 return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b)); 274 276 } 275 277 276 - static void 278 + static __always_inline void 277 279 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 278 280 { 279 281 rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); 280 282 } 281 283 282 - static void 284 + static __always_inline void 283 285 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 284 286 { 285 287 if (RB_EMPTY_NODE(&waiter->tree_entry)) ··· 292 294 #define __node_2_pi_waiter(node) \ 293 295 rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) 294 296 295 - static inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 297 + static __always_inline bool 298 + __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 296 299 { 297 300 return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); 298 301 } 299 302 300 - static void 303 + static __always_inline void 301 304 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 302 305 { 303 306 rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); 304 307 } 305 308 306 - static void 309 + static __always_inline void 307 310 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 308 311 { 309 312 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) ··· 314 315 RB_CLEAR_NODE(&waiter->pi_tree_entry); 315 316 } 316 317 317 - static void rt_mutex_adjust_prio(struct task_struct *p) 318 + static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) 318 319 { 319 320 struct task_struct *pi_task = NULL; 320 321 ··· 339 340 * deadlock detection is disabled independent of the detect argument 340 341 * and the config settings. 341 342 */ 342 - static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 343 - enum rtmutex_chainwalk chwalk) 343 + static __always_inline bool 344 + rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 345 + enum rtmutex_chainwalk chwalk) 344 346 { 345 - /* 346 - * This is just a wrapper function for the following call, 347 - * because debug_rt_mutex_detect_deadlock() smells like a magic 348 - * debug feature and I wanted to keep the cond function in the 349 - * main source file along with the comments instead of having 350 - * two of the same in the headers. 351 - */ 352 - return debug_rt_mutex_detect_deadlock(waiter, chwalk); 347 + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEX)) 348 + return waiter != NULL; 349 + return chwalk == RT_MUTEX_FULL_CHAINWALK; 353 350 } 354 351 355 352 /* ··· 353 358 */ 354 359 int max_lock_depth = 1024; 355 360 356 - static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 361 + static __always_inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 357 362 { 358 363 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 359 364 } ··· 421 426 * unlock(lock->wait_lock); release [L] 422 427 * goto again; 423 428 */ 424 - static int rt_mutex_adjust_prio_chain(struct task_struct *task, 425 - enum rtmutex_chainwalk chwalk, 426 - struct rt_mutex *orig_lock, 427 - struct rt_mutex *next_lock, 428 - struct rt_mutex_waiter *orig_waiter, 429 - struct task_struct *top_task) 429 + static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 430 + enum rtmutex_chainwalk chwalk, 431 + struct rt_mutex *orig_lock, 432 + struct rt_mutex *next_lock, 433 + struct rt_mutex_waiter *orig_waiter, 434 + struct task_struct *top_task) 430 435 { 431 436 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 432 437 struct rt_mutex_waiter *prerequeue_top_waiter; ··· 574 579 * walk, we detected a deadlock. 575 580 */ 576 581 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 577 - debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); 578 582 raw_spin_unlock(&lock->wait_lock); 579 583 ret = -EDEADLK; 580 584 goto out_unlock_pi; ··· 700 706 } else if (prerequeue_top_waiter == waiter) { 701 707 /* 702 708 * The waiter was the top waiter on the lock, but is 703 - * no longer the top prority waiter. Replace waiter in 709 + * no longer the top priority waiter. Replace waiter in 704 710 * the owner tasks pi waiters tree with the new top 705 711 * (highest priority) waiter and adjust the priority 706 712 * of the owner. ··· 778 784 * @waiter: The waiter that is queued to the lock's wait tree if the 779 785 * callsite called task_blocked_on_lock(), otherwise NULL 780 786 */ 781 - static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 782 - struct rt_mutex_waiter *waiter) 787 + static int __sched 788 + try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 789 + struct rt_mutex_waiter *waiter) 783 790 { 784 791 lockdep_assert_held(&lock->wait_lock); 785 792 ··· 881 886 raw_spin_unlock(&task->pi_lock); 882 887 883 888 takeit: 884 - /* We got the lock. */ 885 - debug_rt_mutex_lock(lock); 886 - 887 889 /* 888 890 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 889 891 * are still waiters or clears it. ··· 897 905 * 898 906 * This must be called with lock->wait_lock held and interrupts disabled 899 907 */ 900 - static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 901 - struct rt_mutex_waiter *waiter, 902 - struct task_struct *task, 903 - enum rtmutex_chainwalk chwalk) 908 + static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock, 909 + struct rt_mutex_waiter *waiter, 910 + struct task_struct *task, 911 + enum rtmutex_chainwalk chwalk) 904 912 { 905 913 struct task_struct *owner = rt_mutex_owner(lock); 906 914 struct rt_mutex_waiter *top_waiter = waiter; ··· 986 994 * 987 995 * Called with lock->wait_lock held and interrupts disabled. 988 996 */ 989 - static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, 990 - struct rt_mutex *lock) 997 + static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q, 998 + struct rt_mutex *lock) 991 999 { 992 1000 struct rt_mutex_waiter *waiter; 993 1001 ··· 1036 1044 * Must be called with lock->wait_lock held and interrupts disabled. I must 1037 1045 * have just failed to try_to_take_rt_mutex(). 1038 1046 */ 1039 - static void remove_waiter(struct rt_mutex *lock, 1040 - struct rt_mutex_waiter *waiter) 1047 + static void __sched remove_waiter(struct rt_mutex *lock, 1048 + struct rt_mutex_waiter *waiter) 1041 1049 { 1042 1050 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1043 1051 struct task_struct *owner = rt_mutex_owner(lock); ··· 1094 1102 * 1095 1103 * Called from sched_setscheduler 1096 1104 */ 1097 - void rt_mutex_adjust_pi(struct task_struct *task) 1105 + void __sched rt_mutex_adjust_pi(struct task_struct *task) 1098 1106 { 1099 1107 struct rt_mutex_waiter *waiter; 1100 1108 struct rt_mutex *next_lock; ··· 1117 1125 next_lock, NULL, task); 1118 1126 } 1119 1127 1120 - void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 1128 + void __sched rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 1121 1129 { 1122 1130 debug_rt_mutex_init_waiter(waiter); 1123 1131 RB_CLEAR_NODE(&waiter->pi_tree_entry); ··· 1135 1143 * 1136 1144 * Must be called with lock->wait_lock held and interrupts disabled 1137 1145 */ 1138 - static int __sched 1139 - __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1140 - struct hrtimer_sleeper *timeout, 1141 - struct rt_mutex_waiter *waiter) 1146 + static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1147 + struct hrtimer_sleeper *timeout, 1148 + struct rt_mutex_waiter *waiter) 1142 1149 { 1143 1150 int ret = 0; 1144 1151 ··· 1146 1155 if (try_to_take_rt_mutex(lock, current, waiter)) 1147 1156 break; 1148 1157 1149 - /* 1150 - * TASK_INTERRUPTIBLE checks for signals and 1151 - * timeout. Ignored otherwise. 1152 - */ 1153 - if (likely(state == TASK_INTERRUPTIBLE)) { 1154 - /* Signal pending? */ 1155 - if (signal_pending(current)) 1156 - ret = -EINTR; 1157 - if (timeout && !timeout->task) 1158 - ret = -ETIMEDOUT; 1159 - if (ret) 1160 - break; 1158 + if (timeout && !timeout->task) { 1159 + ret = -ETIMEDOUT; 1160 + break; 1161 + } 1162 + if (signal_pending_state(state, current)) { 1163 + ret = -EINTR; 1164 + break; 1161 1165 } 1162 1166 1163 1167 raw_spin_unlock_irq(&lock->wait_lock); 1164 - 1165 - debug_rt_mutex_print_deadlock(waiter); 1166 1168 1167 1169 schedule(); 1168 1170 ··· 1167 1183 return ret; 1168 1184 } 1169 1185 1170 - static void rt_mutex_handle_deadlock(int res, int detect_deadlock, 1171 - struct rt_mutex_waiter *w) 1186 + static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1187 + struct rt_mutex_waiter *w) 1172 1188 { 1173 1189 /* 1174 1190 * If the result is not -EDEADLOCK or the caller requested ··· 1178 1194 return; 1179 1195 1180 1196 /* 1181 - * Yell lowdly and stop the task right here. 1197 + * Yell loudly and stop the task right here. 1182 1198 */ 1183 - rt_mutex_print_deadlock(w); 1199 + WARN(1, "rtmutex deadlock detected\n"); 1184 1200 while (1) { 1185 1201 set_current_state(TASK_INTERRUPTIBLE); 1186 1202 schedule(); ··· 1190 1206 /* 1191 1207 * Slow path lock function: 1192 1208 */ 1193 - static int __sched 1194 - rt_mutex_slowlock(struct rt_mutex *lock, int state, 1195 - struct hrtimer_sleeper *timeout, 1196 - enum rtmutex_chainwalk chwalk) 1209 + static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, 1210 + struct hrtimer_sleeper *timeout, 1211 + enum rtmutex_chainwalk chwalk) 1197 1212 { 1198 1213 struct rt_mutex_waiter waiter; 1199 1214 unsigned long flags; ··· 1251 1268 return ret; 1252 1269 } 1253 1270 1254 - static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) 1271 + static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) 1255 1272 { 1256 1273 int ret = try_to_take_rt_mutex(lock, current, NULL); 1257 1274 ··· 1267 1284 /* 1268 1285 * Slow path try-lock function: 1269 1286 */ 1270 - static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) 1287 + static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock) 1271 1288 { 1272 1289 unsigned long flags; 1273 1290 int ret; ··· 1294 1311 } 1295 1312 1296 1313 /* 1314 + * Performs the wakeup of the top-waiter and re-enables preemption. 1315 + */ 1316 + void __sched rt_mutex_postunlock(struct wake_q_head *wake_q) 1317 + { 1318 + wake_up_q(wake_q); 1319 + 1320 + /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 1321 + preempt_enable(); 1322 + } 1323 + 1324 + /* 1297 1325 * Slow path to release a rt-mutex. 1298 1326 * 1299 1327 * Return whether the current task needs to call rt_mutex_postunlock(). 1300 1328 */ 1301 - static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, 1302 - struct wake_q_head *wake_q) 1329 + static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) 1303 1330 { 1331 + DEFINE_WAKE_Q(wake_q); 1304 1332 unsigned long flags; 1305 1333 1306 1334 /* irqsave required to support early boot calls */ ··· 1353 1359 while (!rt_mutex_has_waiters(lock)) { 1354 1360 /* Drops lock->wait_lock ! */ 1355 1361 if (unlock_rt_mutex_safe(lock, flags) == true) 1356 - return false; 1362 + return; 1357 1363 /* Relock the rtmutex and try again */ 1358 1364 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1359 1365 } ··· 1364 1370 * 1365 1371 * Queue the next waiter for wakeup once we release the wait_lock. 1366 1372 */ 1367 - mark_wakeup_next_waiter(wake_q, lock); 1373 + mark_wakeup_next_waiter(&wake_q, lock); 1368 1374 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1369 1375 1370 - return true; /* call rt_mutex_postunlock() */ 1376 + rt_mutex_postunlock(&wake_q); 1371 1377 } 1372 1378 1373 1379 /* ··· 1376 1382 * The atomic acquire/release ops are compiled away, when either the 1377 1383 * architecture does not support cmpxchg or when debugging is enabled. 1378 1384 */ 1379 - static inline int 1380 - rt_mutex_fastlock(struct rt_mutex *lock, int state, 1381 - int (*slowfn)(struct rt_mutex *lock, int state, 1382 - struct hrtimer_sleeper *timeout, 1383 - enum rtmutex_chainwalk chwalk)) 1385 + static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, long state, 1386 + unsigned int subclass) 1384 1387 { 1385 - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1386 - return 0; 1388 + int ret; 1387 1389 1388 - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1389 - } 1390 - 1391 - static inline int 1392 - rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, 1393 - struct hrtimer_sleeper *timeout, 1394 - enum rtmutex_chainwalk chwalk, 1395 - int (*slowfn)(struct rt_mutex *lock, int state, 1396 - struct hrtimer_sleeper *timeout, 1397 - enum rtmutex_chainwalk chwalk)) 1398 - { 1399 - if (chwalk == RT_MUTEX_MIN_CHAINWALK && 1400 - likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1401 - return 0; 1402 - 1403 - return slowfn(lock, state, timeout, chwalk); 1404 - } 1405 - 1406 - static inline int 1407 - rt_mutex_fasttrylock(struct rt_mutex *lock, 1408 - int (*slowfn)(struct rt_mutex *lock)) 1409 - { 1410 - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1411 - return 1; 1412 - 1413 - return slowfn(lock); 1414 - } 1415 - 1416 - /* 1417 - * Performs the wakeup of the top-waiter and re-enables preemption. 1418 - */ 1419 - void rt_mutex_postunlock(struct wake_q_head *wake_q) 1420 - { 1421 - wake_up_q(wake_q); 1422 - 1423 - /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ 1424 - preempt_enable(); 1425 - } 1426 - 1427 - static inline void 1428 - rt_mutex_fastunlock(struct rt_mutex *lock, 1429 - bool (*slowfn)(struct rt_mutex *lock, 1430 - struct wake_q_head *wqh)) 1431 - { 1432 - DEFINE_WAKE_Q(wake_q); 1433 - 1434 - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1435 - return; 1436 - 1437 - if (slowfn(lock, &wake_q)) 1438 - rt_mutex_postunlock(&wake_q); 1439 - } 1440 - 1441 - static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) 1442 - { 1443 1390 might_sleep(); 1444 - 1445 1391 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); 1446 - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); 1392 + 1393 + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1394 + return 0; 1395 + 1396 + ret = rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1397 + if (ret) 1398 + mutex_release(&lock->dep_map, _RET_IP_); 1399 + return ret; 1447 1400 } 1448 1401 1449 1402 #ifdef CONFIG_DEBUG_LOCK_ALLOC ··· 1402 1461 */ 1403 1462 void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) 1404 1463 { 1405 - __rt_mutex_lock(lock, subclass); 1464 + __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass); 1406 1465 } 1407 1466 EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); 1408 1467 ··· 1415 1474 */ 1416 1475 void __sched rt_mutex_lock(struct rt_mutex *lock) 1417 1476 { 1418 - __rt_mutex_lock(lock, 0); 1477 + __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0); 1419 1478 } 1420 1479 EXPORT_SYMBOL_GPL(rt_mutex_lock); 1421 1480 #endif ··· 1431 1490 */ 1432 1491 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) 1433 1492 { 1434 - int ret; 1435 - 1436 - might_sleep(); 1437 - 1438 - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); 1439 - ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); 1440 - if (ret) 1441 - mutex_release(&lock->dep_map, _RET_IP_); 1442 - 1443 - return ret; 1493 + return __rt_mutex_lock(lock, TASK_INTERRUPTIBLE, 0); 1444 1494 } 1445 1495 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 1446 - 1447 - /* 1448 - * Futex variant, must not use fastpath. 1449 - */ 1450 - int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) 1451 - { 1452 - return rt_mutex_slowtrylock(lock); 1453 - } 1454 - 1455 - int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) 1456 - { 1457 - return __rt_mutex_slowtrylock(lock); 1458 - } 1459 - 1460 - /** 1461 - * rt_mutex_timed_lock - lock a rt_mutex interruptible 1462 - * the timeout structure is provided 1463 - * by the caller 1464 - * 1465 - * @lock: the rt_mutex to be locked 1466 - * @timeout: timeout structure or NULL (no timeout) 1467 - * 1468 - * Returns: 1469 - * 0 on success 1470 - * -EINTR when interrupted by a signal 1471 - * -ETIMEDOUT when the timeout expired 1472 - */ 1473 - int 1474 - rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) 1475 - { 1476 - int ret; 1477 - 1478 - might_sleep(); 1479 - 1480 - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); 1481 - ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1482 - RT_MUTEX_MIN_CHAINWALK, 1483 - rt_mutex_slowlock); 1484 - if (ret) 1485 - mutex_release(&lock->dep_map, _RET_IP_); 1486 - 1487 - return ret; 1488 - } 1489 - EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); 1490 1496 1491 1497 /** 1492 1498 * rt_mutex_trylock - try to lock a rt_mutex 1493 1499 * 1494 1500 * @lock: the rt_mutex to be locked 1495 1501 * 1496 - * This function can only be called in thread context. It's safe to 1497 - * call it from atomic regions, but not from hard interrupt or soft 1498 - * interrupt context. 1502 + * This function can only be called in thread context. It's safe to call it 1503 + * from atomic regions, but not from hard or soft interrupt context. 1499 1504 * 1500 - * Returns 1 on success and 0 on contention 1505 + * Returns: 1506 + * 1 on success 1507 + * 0 on contention 1501 1508 */ 1502 1509 int __sched rt_mutex_trylock(struct rt_mutex *lock) 1503 1510 { 1504 1511 int ret; 1505 1512 1506 - if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) 1513 + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) 1507 1514 return 0; 1508 1515 1509 - ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); 1516 + /* 1517 + * No lockdep annotation required because lockdep disables the fast 1518 + * path. 1519 + */ 1520 + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1521 + return 1; 1522 + 1523 + ret = rt_mutex_slowtrylock(lock); 1510 1524 if (ret) 1511 1525 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 1512 1526 ··· 1477 1581 void __sched rt_mutex_unlock(struct rt_mutex *lock) 1478 1582 { 1479 1583 mutex_release(&lock->dep_map, _RET_IP_); 1480 - rt_mutex_fastunlock(lock, rt_mutex_slowunlock); 1584 + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1585 + return; 1586 + 1587 + rt_mutex_slowunlock(lock); 1481 1588 } 1482 1589 EXPORT_SYMBOL_GPL(rt_mutex_unlock); 1590 + 1591 + /* 1592 + * Futex variants, must not use fastpath. 1593 + */ 1594 + int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) 1595 + { 1596 + return rt_mutex_slowtrylock(lock); 1597 + } 1598 + 1599 + int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) 1600 + { 1601 + return __rt_mutex_slowtrylock(lock); 1602 + } 1483 1603 1484 1604 /** 1485 1605 * __rt_mutex_futex_unlock - Futex variant, that since futex variants ··· 1505 1593 * @wake_q: The wake queue head from which to get the next lock waiter 1506 1594 */ 1507 1595 bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, 1508 - struct wake_q_head *wake_q) 1596 + struct wake_q_head *wake_q) 1509 1597 { 1510 1598 lockdep_assert_held(&lock->wait_lock); 1511 1599 ··· 1542 1630 } 1543 1631 1544 1632 /** 1545 - * rt_mutex_destroy - mark a mutex unusable 1546 - * @lock: the mutex to be destroyed 1547 - * 1548 - * This function marks the mutex uninitialized, and any subsequent 1549 - * use of the mutex is forbidden. The mutex must not be locked when 1550 - * this function is called. 1551 - */ 1552 - void rt_mutex_destroy(struct rt_mutex *lock) 1553 - { 1554 - WARN_ON(rt_mutex_is_locked(lock)); 1555 - #ifdef CONFIG_DEBUG_RT_MUTEXES 1556 - lock->magic = NULL; 1557 - #endif 1558 - } 1559 - EXPORT_SYMBOL_GPL(rt_mutex_destroy); 1560 - 1561 - /** 1562 1633 * __rt_mutex_init - initialize the rt_mutex 1563 1634 * 1564 1635 * @lock: The rt_mutex to be initialized ··· 1552 1657 * 1553 1658 * Initializing of a locked rt_mutex is not allowed 1554 1659 */ 1555 - void __rt_mutex_init(struct rt_mutex *lock, const char *name, 1660 + void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name, 1556 1661 struct lock_class_key *key) 1557 1662 { 1558 - lock->owner = NULL; 1559 - raw_spin_lock_init(&lock->wait_lock); 1560 - lock->waiters = RB_ROOT_CACHED; 1663 + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 1664 + lockdep_init_map(&lock->dep_map, name, key, 0); 1561 1665 1562 - if (name && key) 1563 - debug_rt_mutex_init(lock, name, key); 1666 + __rt_mutex_basic_init(lock); 1564 1667 } 1565 1668 EXPORT_SYMBOL_GPL(__rt_mutex_init); 1566 1669 ··· 1576 1683 * possible at this point because the pi_state which contains the rtmutex 1577 1684 * is not yet visible to other tasks. 1578 1685 */ 1579 - void rt_mutex_init_proxy_locked(struct rt_mutex *lock, 1580 - struct task_struct *proxy_owner) 1686 + void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock, 1687 + struct task_struct *proxy_owner) 1581 1688 { 1582 - __rt_mutex_init(lock, NULL, NULL); 1583 - debug_rt_mutex_proxy_lock(lock, proxy_owner); 1689 + __rt_mutex_basic_init(lock); 1584 1690 rt_mutex_set_owner(lock, proxy_owner); 1585 1691 } 1586 1692 ··· 1595 1703 * possible because it belongs to the pi_state which is about to be freed 1596 1704 * and it is not longer visible to other tasks. 1597 1705 */ 1598 - void rt_mutex_proxy_unlock(struct rt_mutex *lock) 1706 + void __sched rt_mutex_proxy_unlock(struct rt_mutex *lock) 1599 1707 { 1600 1708 debug_rt_mutex_proxy_unlock(lock); 1601 1709 rt_mutex_set_owner(lock, NULL); ··· 1620 1728 * 1621 1729 * Special API call for PI-futex support. 1622 1730 */ 1623 - int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1624 - struct rt_mutex_waiter *waiter, 1625 - struct task_struct *task) 1731 + int __sched __rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1732 + struct rt_mutex_waiter *waiter, 1733 + struct task_struct *task) 1626 1734 { 1627 1735 int ret; 1628 1736 ··· 1644 1752 */ 1645 1753 ret = 0; 1646 1754 } 1647 - 1648 - debug_rt_mutex_print_deadlock(waiter); 1649 1755 1650 1756 return ret; 1651 1757 } ··· 1667 1777 * 1668 1778 * Special API call for PI-futex support. 1669 1779 */ 1670 - int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1671 - struct rt_mutex_waiter *waiter, 1672 - struct task_struct *task) 1780 + int __sched rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1781 + struct rt_mutex_waiter *waiter, 1782 + struct task_struct *task) 1673 1783 { 1674 1784 int ret; 1675 1785 ··· 1680 1790 raw_spin_unlock_irq(&lock->wait_lock); 1681 1791 1682 1792 return ret; 1683 - } 1684 - 1685 - /** 1686 - * rt_mutex_next_owner - return the next owner of the lock 1687 - * 1688 - * @lock: the rt lock query 1689 - * 1690 - * Returns the next owner of the lock or NULL 1691 - * 1692 - * Caller has to serialize against other accessors to the lock 1693 - * itself. 1694 - * 1695 - * Special API call for PI-futex support 1696 - */ 1697 - struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) 1698 - { 1699 - if (!rt_mutex_has_waiters(lock)) 1700 - return NULL; 1701 - 1702 - return rt_mutex_top_waiter(lock)->task; 1703 1793 } 1704 1794 1705 1795 /** ··· 1699 1829 * 1700 1830 * Special API call for PI-futex support 1701 1831 */ 1702 - int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, 1703 - struct hrtimer_sleeper *to, 1704 - struct rt_mutex_waiter *waiter) 1832 + int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock, 1833 + struct hrtimer_sleeper *to, 1834 + struct rt_mutex_waiter *waiter) 1705 1835 { 1706 1836 int ret; 1707 1837 ··· 1739 1869 * 1740 1870 * Special API call for PI-futex support 1741 1871 */ 1742 - bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, 1743 - struct rt_mutex_waiter *waiter) 1872 + bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, 1873 + struct rt_mutex_waiter *waiter) 1744 1874 { 1745 1875 bool cleanup = false; 1746 1876 ··· 1775 1905 1776 1906 return cleanup; 1777 1907 } 1908 + 1909 + #ifdef CONFIG_DEBUG_RT_MUTEXES 1910 + void rt_mutex_debug_task_free(struct task_struct *task) 1911 + { 1912 + DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); 1913 + DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); 1914 + } 1915 + #endif

-35

kernel/locking/rtmutex.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * RT-Mutexes: blocking mutual exclusion locks with PI support 4 - * 5 - * started by Ingo Molnar and Thomas Gleixner: 6 - * 7 - * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 - * 10 - * This file contains macros used solely by rtmutex.c. 11 - * Non-debug version. 12 - */ 13 - 14 - #define rt_mutex_deadlock_check(l) (0) 15 - #define debug_rt_mutex_init_waiter(w) do { } while (0) 16 - #define debug_rt_mutex_free_waiter(w) do { } while (0) 17 - #define debug_rt_mutex_lock(l) do { } while (0) 18 - #define debug_rt_mutex_proxy_lock(l,p) do { } while (0) 19 - #define debug_rt_mutex_proxy_unlock(l) do { } while (0) 20 - #define debug_rt_mutex_unlock(l) do { } while (0) 21 - #define debug_rt_mutex_init(m, n, k) do { } while (0) 22 - #define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) 23 - #define debug_rt_mutex_print_deadlock(w) do { } while (0) 24 - #define debug_rt_mutex_reset_waiter(w) do { } while (0) 25 - 26 - static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) 27 - { 28 - WARN(1, "rtmutex deadlock detected\n"); 29 - } 30 - 31 - static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w, 32 - enum rtmutex_chainwalk walk) 33 - { 34 - return walk == RT_MUTEX_FULL_CHAINWALK; 35 - }

+52 -53

kernel/locking/rtmutex_common.h

··· 13 13 #ifndef __KERNEL_RTMUTEX_COMMON_H 14 14 #define __KERNEL_RTMUTEX_COMMON_H 15 15 16 + #include <linux/debug_locks.h> 16 17 #include <linux/rtmutex.h> 17 18 #include <linux/sched/wake_q.h> 18 19 ··· 24 23 * @tree_entry: pi node to enqueue into the mutex waiters tree 25 24 * @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree 26 25 * @task: task reference to the blocked task 26 + * @lock: Pointer to the rt_mutex on which the waiter blocks 27 + * @prio: Priority of the waiter 28 + * @deadline: Deadline of the waiter if applicable 27 29 */ 28 30 struct rt_mutex_waiter { 29 - struct rb_node tree_entry; 30 - struct rb_node pi_tree_entry; 31 + struct rb_node tree_entry; 32 + struct rb_node pi_tree_entry; 31 33 struct task_struct *task; 32 34 struct rt_mutex *lock; 33 - #ifdef CONFIG_DEBUG_RT_MUTEXES 34 - unsigned long ip; 35 - struct pid *deadlock_task_pid; 36 - struct rt_mutex *deadlock_lock; 37 - #endif 38 - int prio; 39 - u64 deadline; 35 + int prio; 36 + u64 deadline; 40 37 }; 41 38 42 39 /* 43 - * Various helpers to access the waiters-tree: 40 + * Must be guarded because this header is included from rcu/tree_plugin.h 41 + * unconditionally. 44 42 */ 45 - 46 43 #ifdef CONFIG_RT_MUTEXES 47 - 48 44 static inline int rt_mutex_has_waiters(struct rt_mutex *lock) 49 45 { 50 46 return !RB_EMPTY_ROOT(&lock->waiters.rb_root); 51 47 } 52 48 53 - static inline struct rt_mutex_waiter * 54 - rt_mutex_top_waiter(struct rt_mutex *lock) 49 + static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex *lock) 55 50 { 56 51 struct rb_node *leftmost = rb_first_cached(&lock->waiters); 57 52 struct rt_mutex_waiter *w = NULL; ··· 64 67 return !RB_EMPTY_ROOT(&p->pi_waiters.rb_root); 65 68 } 66 69 67 - static inline struct rt_mutex_waiter * 68 - task_top_pi_waiter(struct task_struct *p) 70 + static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p) 69 71 { 70 - return rb_entry(p->pi_waiters.rb_leftmost, 71 - struct rt_mutex_waiter, pi_tree_entry); 72 + return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter, 73 + pi_tree_entry); 72 74 } 73 75 74 - #else 75 - 76 - static inline int rt_mutex_has_waiters(struct rt_mutex *lock) 77 - { 78 - return false; 79 - } 80 - 81 - static inline struct rt_mutex_waiter * 82 - rt_mutex_top_waiter(struct rt_mutex *lock) 83 - { 84 - return NULL; 85 - } 86 - 87 - static inline int task_has_pi_waiters(struct task_struct *p) 88 - { 89 - return false; 90 - } 91 - 92 - static inline struct rt_mutex_waiter * 93 - task_top_pi_waiter(struct task_struct *p) 94 - { 95 - return NULL; 96 - } 97 - 98 - #endif 99 - 100 - /* 101 - * lock->owner state tracking: 102 - */ 103 76 #define RT_MUTEX_HAS_WAITERS 1UL 104 77 105 78 static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) ··· 78 111 79 112 return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); 80 113 } 114 + #else /* CONFIG_RT_MUTEXES */ 115 + /* Used in rcu/tree_plugin.h */ 116 + static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) 117 + { 118 + return NULL; 119 + } 120 + #endif /* !CONFIG_RT_MUTEXES */ 81 121 82 122 /* 83 123 * Constants for rt mutex functions which have a selectable deadlock ··· 101 127 RT_MUTEX_FULL_CHAINWALK, 102 128 }; 103 129 130 + static inline void __rt_mutex_basic_init(struct rt_mutex *lock) 131 + { 132 + lock->owner = NULL; 133 + raw_spin_lock_init(&lock->wait_lock); 134 + lock->waiters = RB_ROOT_CACHED; 135 + } 136 + 104 137 /* 105 138 * PI-futex support (proxy locking functions, etc.): 106 139 */ 107 - extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); 108 140 extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, 109 141 struct task_struct *proxy_owner); 110 142 extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); ··· 136 156 137 157 extern void rt_mutex_postunlock(struct wake_q_head *wake_q); 138 158 139 - #ifdef CONFIG_DEBUG_RT_MUTEXES 140 - # include "rtmutex-debug.h" 141 - #else 142 - # include "rtmutex.h" 143 - #endif 159 + /* Debug functions */ 160 + static inline void debug_rt_mutex_unlock(struct rt_mutex *lock) 161 + { 162 + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 163 + DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); 164 + } 165 + 166 + static inline void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) 167 + { 168 + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 169 + DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); 170 + } 171 + 172 + static inline void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 173 + { 174 + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 175 + memset(waiter, 0x11, sizeof(*waiter)); 176 + } 177 + 178 + static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) 179 + { 180 + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 181 + memset(waiter, 0x22, sizeof(*waiter)); 182 + } 144 183 145 184 #endif

+2 -2

kernel/locking/rwsem.c

··· 632 632 } 633 633 634 634 /* 635 - * The rwsem_spin_on_owner() function returns the folowing 4 values 635 + * The rwsem_spin_on_owner() function returns the following 4 values 636 636 * depending on the lock owner state. 637 637 * OWNER_NULL : owner is currently NULL 638 638 * OWNER_WRITER: when owner changes and is a writer ··· 819 819 * we try to get it. The new owner may be a spinnable 820 820 * writer. 821 821 * 822 - * To take advantage of two scenarios listed agove, the RT 822 + * To take advantage of two scenarios listed above, the RT 823 823 * task is made to retry one more time to see if it can 824 824 * acquire the lock or continue spinning on the new owning 825 825 * writer. Of course, if the time lag is long enough or the

+2 -2

kernel/locking/spinlock.c

··· 58 58 /* 59 59 * We build the __lock_function inlines here. They are too large for 60 60 * inlining all over the place, but here is only one user per function 61 - * which embedds them into the calling _lock_function below. 61 + * which embeds them into the calling _lock_function below. 62 62 * 63 63 * This could be a long-held lock. We both prepare to spin for a long 64 - * time (making _this_ CPU preemptable if possible), and we also signal 64 + * time (making _this_ CPU preemptible if possible), and we also signal 65 65 * towards that other CPU that it should break the lock ASAP. 66 66 */ 67 67 #define BUILD_LOCK_OPS(op, locktype) \

+9 -9

kernel/sched/core.c

··· 5396 5396 switch (mode) { 5397 5397 case preempt_dynamic_none: 5398 5398 static_call_update(cond_resched, __cond_resched); 5399 - static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); 5400 - static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); 5401 - static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); 5402 - static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); 5399 + static_call_update(might_resched, (void *)&__static_call_return0); 5400 + static_call_update(preempt_schedule, NULL); 5401 + static_call_update(preempt_schedule_notrace, NULL); 5402 + static_call_update(irqentry_exit_cond_resched, NULL); 5403 5403 pr_info("Dynamic Preempt: none\n"); 5404 5404 break; 5405 5405 5406 5406 case preempt_dynamic_voluntary: 5407 5407 static_call_update(cond_resched, __cond_resched); 5408 5408 static_call_update(might_resched, __cond_resched); 5409 - static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); 5410 - static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); 5411 - static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); 5409 + static_call_update(preempt_schedule, NULL); 5410 + static_call_update(preempt_schedule_notrace, NULL); 5411 + static_call_update(irqentry_exit_cond_resched, NULL); 5412 5412 pr_info("Dynamic Preempt: voluntary\n"); 5413 5413 break; 5414 5414 5415 5415 case preempt_dynamic_full: 5416 - static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); 5417 - static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); 5416 + static_call_update(cond_resched, (void *)&__static_call_return0); 5417 + static_call_update(might_resched, (void *)&__static_call_return0); 5418 5418 static_call_update(preempt_schedule, __preempt_schedule_func); 5419 5419 static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); 5420 5420 static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);

+264 -12

kernel/smp.c

··· 24 24 #include <linux/sched/clock.h> 25 25 #include <linux/nmi.h> 26 26 #include <linux/sched/debug.h> 27 + #include <linux/jump_label.h> 27 28 28 29 #include "smpboot.h" 29 30 #include "sched/smp.h" 30 31 31 32 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK) 32 33 34 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 35 + union cfd_seq_cnt { 36 + u64 val; 37 + struct { 38 + u64 src:16; 39 + u64 dst:16; 40 + #define CFD_SEQ_NOCPU 0xffff 41 + u64 type:4; 42 + #define CFD_SEQ_QUEUE 0 43 + #define CFD_SEQ_IPI 1 44 + #define CFD_SEQ_NOIPI 2 45 + #define CFD_SEQ_PING 3 46 + #define CFD_SEQ_PINGED 4 47 + #define CFD_SEQ_HANDLE 5 48 + #define CFD_SEQ_DEQUEUE 6 49 + #define CFD_SEQ_IDLE 7 50 + #define CFD_SEQ_GOTIPI 8 51 + #define CFD_SEQ_HDLEND 9 52 + u64 cnt:28; 53 + } u; 54 + }; 55 + 56 + static char *seq_type[] = { 57 + [CFD_SEQ_QUEUE] = "queue", 58 + [CFD_SEQ_IPI] = "ipi", 59 + [CFD_SEQ_NOIPI] = "noipi", 60 + [CFD_SEQ_PING] = "ping", 61 + [CFD_SEQ_PINGED] = "pinged", 62 + [CFD_SEQ_HANDLE] = "handle", 63 + [CFD_SEQ_DEQUEUE] = "dequeue (src CPU 0 == empty)", 64 + [CFD_SEQ_IDLE] = "idle", 65 + [CFD_SEQ_GOTIPI] = "gotipi", 66 + [CFD_SEQ_HDLEND] = "hdlend (src CPU 0 == early)", 67 + }; 68 + 69 + struct cfd_seq_local { 70 + u64 ping; 71 + u64 pinged; 72 + u64 handle; 73 + u64 dequeue; 74 + u64 idle; 75 + u64 gotipi; 76 + u64 hdlend; 77 + }; 78 + #endif 79 + 80 + struct cfd_percpu { 81 + call_single_data_t csd; 82 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 83 + u64 seq_queue; 84 + u64 seq_ipi; 85 + u64 seq_noipi; 86 + #endif 87 + }; 88 + 33 89 struct call_function_data { 34 - call_single_data_t __percpu *csd; 90 + struct cfd_percpu __percpu *pcpu; 35 91 cpumask_var_t cpumask; 36 92 cpumask_var_t cpumask_ipi; 37 93 }; ··· 110 54 free_cpumask_var(cfd->cpumask); 111 55 return -ENOMEM; 112 56 } 113 - cfd->csd = alloc_percpu(call_single_data_t); 114 - if (!cfd->csd) { 57 + cfd->pcpu = alloc_percpu(struct cfd_percpu); 58 + if (!cfd->pcpu) { 115 59 free_cpumask_var(cfd->cpumask); 116 60 free_cpumask_var(cfd->cpumask_ipi); 117 61 return -ENOMEM; ··· 126 70 127 71 free_cpumask_var(cfd->cpumask); 128 72 free_cpumask_var(cfd->cpumask_ipi); 129 - free_percpu(cfd->csd); 73 + free_percpu(cfd->pcpu); 130 74 return 0; 131 75 } 132 76 ··· 158 102 159 103 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 160 104 105 + static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled); 106 + static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended); 107 + 108 + static int __init csdlock_debug(char *str) 109 + { 110 + unsigned int val = 0; 111 + 112 + if (str && !strcmp(str, "ext")) { 113 + val = 1; 114 + static_branch_enable(&csdlock_debug_extended); 115 + } else 116 + get_option(&str, &val); 117 + 118 + if (val) 119 + static_branch_enable(&csdlock_debug_enabled); 120 + 121 + return 0; 122 + } 123 + early_param("csdlock_debug", csdlock_debug); 124 + 161 125 static DEFINE_PER_CPU(call_single_data_t *, cur_csd); 162 126 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); 163 127 static DEFINE_PER_CPU(void *, cur_csd_info); 128 + static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local); 164 129 165 130 #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) 166 131 static atomic_t csd_bug_count = ATOMIC_INIT(0); 132 + static u64 cfd_seq; 133 + 134 + #define CFD_SEQ(s, d, t, c) \ 135 + (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c } 136 + 137 + static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type) 138 + { 139 + union cfd_seq_cnt new, old; 140 + 141 + new = CFD_SEQ(src, dst, type, 0); 142 + 143 + do { 144 + old.val = READ_ONCE(cfd_seq); 145 + new.u.cnt = old.u.cnt + 1; 146 + } while (cmpxchg(&cfd_seq, old.val, new.val) != old.val); 147 + 148 + return old.val; 149 + } 150 + 151 + #define cfd_seq_store(var, src, dst, type) \ 152 + do { \ 153 + if (static_branch_unlikely(&csdlock_debug_extended)) \ 154 + var = cfd_seq_inc(src, dst, type); \ 155 + } while (0) 167 156 168 157 /* Record current CSD work for current CPU, NULL to erase. */ 169 - static void csd_lock_record(call_single_data_t *csd) 158 + static void __csd_lock_record(call_single_data_t *csd) 170 159 { 171 160 if (!csd) { 172 161 smp_mb(); /* NULL cur_csd after unlock. */ ··· 226 125 /* Or before unlock, as the case may be. */ 227 126 } 228 127 229 - static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd) 128 + static __always_inline void csd_lock_record(call_single_data_t *csd) 129 + { 130 + if (static_branch_unlikely(&csdlock_debug_enabled)) 131 + __csd_lock_record(csd); 132 + } 133 + 134 + static int csd_lock_wait_getcpu(call_single_data_t *csd) 230 135 { 231 136 unsigned int csd_type; 232 137 ··· 242 135 return -1; 243 136 } 244 137 138 + static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst, 139 + unsigned int type, union cfd_seq_cnt *data, 140 + unsigned int *n_data, unsigned int now) 141 + { 142 + union cfd_seq_cnt new[2]; 143 + unsigned int i, j, k; 144 + 145 + new[0].val = val; 146 + new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1); 147 + 148 + for (i = 0; i < 2; i++) { 149 + if (new[i].u.cnt <= now) 150 + new[i].u.cnt |= 0x80000000U; 151 + for (j = 0; j < *n_data; j++) { 152 + if (new[i].u.cnt == data[j].u.cnt) { 153 + /* Direct read value trumps generated one. */ 154 + if (i == 0) 155 + data[j].val = new[i].val; 156 + break; 157 + } 158 + if (new[i].u.cnt < data[j].u.cnt) { 159 + for (k = *n_data; k > j; k--) 160 + data[k].val = data[k - 1].val; 161 + data[j].val = new[i].val; 162 + (*n_data)++; 163 + break; 164 + } 165 + } 166 + if (j == *n_data) { 167 + data[j].val = new[i].val; 168 + (*n_data)++; 169 + } 170 + } 171 + } 172 + 173 + static const char *csd_lock_get_type(unsigned int type) 174 + { 175 + return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type]; 176 + } 177 + 178 + static void csd_lock_print_extended(call_single_data_t *csd, int cpu) 179 + { 180 + struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu); 181 + unsigned int srccpu = csd->node.src; 182 + struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu); 183 + struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); 184 + unsigned int now; 185 + union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)]; 186 + unsigned int n_data = 0, i; 187 + 188 + data[0].val = READ_ONCE(cfd_seq); 189 + now = data[0].u.cnt; 190 + 191 + cfd_seq_data_add(pcpu->seq_queue, srccpu, cpu, CFD_SEQ_QUEUE, data, &n_data, now); 192 + cfd_seq_data_add(pcpu->seq_ipi, srccpu, cpu, CFD_SEQ_IPI, data, &n_data, now); 193 + cfd_seq_data_add(pcpu->seq_noipi, srccpu, cpu, CFD_SEQ_NOIPI, data, &n_data, now); 194 + 195 + cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PING, data, &n_data, now); 196 + cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now); 197 + 198 + cfd_seq_data_add(seq->idle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_IDLE, data, &n_data, now); 199 + cfd_seq_data_add(seq->gotipi, CFD_SEQ_NOCPU, cpu, CFD_SEQ_GOTIPI, data, &n_data, now); 200 + cfd_seq_data_add(seq->handle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HANDLE, data, &n_data, now); 201 + cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu, CFD_SEQ_DEQUEUE, data, &n_data, now); 202 + cfd_seq_data_add(seq->hdlend, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HDLEND, data, &n_data, now); 203 + 204 + for (i = 0; i < n_data; i++) { 205 + pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n", 206 + data[i].u.cnt & ~0x80000000U, data[i].u.src, 207 + data[i].u.dst, csd_lock_get_type(data[i].u.type)); 208 + } 209 + pr_alert("\tcsd: cnt now: %07x\n", now); 210 + } 211 + 245 212 /* 246 213 * Complain if too much time spent waiting. Note that only 247 214 * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, 248 215 * so waiting on other types gets much less information. 249 216 */ 250 - static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) 217 + static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) 251 218 { 252 219 int cpu = -1; 253 220 int cpux; ··· 365 184 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); 366 185 } 367 186 if (cpu >= 0) { 187 + if (static_branch_unlikely(&csdlock_debug_extended)) 188 + csd_lock_print_extended(csd, cpu); 368 189 if (!trigger_single_cpu_backtrace(cpu)) 369 190 dump_cpu_task(cpu); 370 191 if (!cpu_cur_csd) { ··· 387 204 * previous function call. For multi-cpu calls its even more interesting 388 205 * as we'll have to ensure no other cpu is observing our csd. 389 206 */ 390 - static __always_inline void csd_lock_wait(call_single_data_t *csd) 207 + static void __csd_lock_wait(call_single_data_t *csd) 391 208 { 392 209 int bug_id = 0; 393 210 u64 ts0, ts1; ··· 401 218 smp_acquire__after_ctrl_dep(); 402 219 } 403 220 221 + static __always_inline void csd_lock_wait(call_single_data_t *csd) 222 + { 223 + if (static_branch_unlikely(&csdlock_debug_enabled)) { 224 + __csd_lock_wait(csd); 225 + return; 226 + } 227 + 228 + smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); 229 + } 230 + 231 + static void __smp_call_single_queue_debug(int cpu, struct llist_node *node) 232 + { 233 + unsigned int this_cpu = smp_processor_id(); 234 + struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local); 235 + struct call_function_data *cfd = this_cpu_ptr(&cfd_data); 236 + struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); 237 + 238 + cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); 239 + if (llist_add(node, &per_cpu(call_single_queue, cpu))) { 240 + cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); 241 + cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING); 242 + send_call_function_single_ipi(cpu); 243 + cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED); 244 + } else { 245 + cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); 246 + } 247 + } 404 248 #else 249 + #define cfd_seq_store(var, src, dst, type) 250 + 405 251 static void csd_lock_record(call_single_data_t *csd) 406 252 { 407 253 } ··· 468 256 469 257 void __smp_call_single_queue(int cpu, struct llist_node *node) 470 258 { 259 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 260 + if (static_branch_unlikely(&csdlock_debug_extended)) { 261 + unsigned int type; 262 + 263 + type = CSD_TYPE(container_of(node, call_single_data_t, 264 + node.llist)); 265 + if (type == CSD_TYPE_SYNC || type == CSD_TYPE_ASYNC) { 266 + __smp_call_single_queue_debug(cpu, node); 267 + return; 268 + } 269 + } 270 + #endif 271 + 471 272 /* 472 273 * The list addition should be visible before sending the IPI 473 274 * handler locks the list to pull the entry off it because of ··· 539 314 */ 540 315 void generic_smp_call_function_single_interrupt(void) 541 316 { 317 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU, 318 + smp_processor_id(), CFD_SEQ_GOTIPI); 542 319 flush_smp_call_function_queue(true); 543 320 } 544 321 ··· 568 341 lockdep_assert_irqs_disabled(); 569 342 570 343 head = this_cpu_ptr(&call_single_queue); 344 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU, 345 + smp_processor_id(), CFD_SEQ_HANDLE); 571 346 entry = llist_del_all(head); 347 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue, 348 + /* Special meaning of source cpu: 0 == queue empty */ 349 + entry ? CFD_SEQ_NOCPU : 0, 350 + smp_processor_id(), CFD_SEQ_DEQUEUE); 572 351 entry = llist_reverse_order(entry); 573 352 574 353 /* There shouldn't be any pending callbacks on an offline CPU. */ ··· 633 400 } 634 401 } 635 402 636 - if (!entry) 403 + if (!entry) { 404 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, 405 + 0, smp_processor_id(), 406 + CFD_SEQ_HDLEND); 637 407 return; 408 + } 638 409 639 410 /* 640 411 * Second; run all !SYNC callbacks. ··· 676 439 */ 677 440 if (entry) 678 441 sched_ttwu_pending(entry); 442 + 443 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU, 444 + smp_processor_id(), CFD_SEQ_HDLEND); 679 445 } 680 446 681 447 void flush_smp_call_function_from_idle(void) ··· 688 448 if (llist_empty(this_cpu_ptr(&call_single_queue))) 689 449 return; 690 450 451 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, 452 + smp_processor_id(), CFD_SEQ_IDLE); 691 453 local_irq_save(flags); 692 454 flush_smp_call_function_queue(true); 693 455 if (local_softirq_pending()) ··· 906 664 907 665 cpumask_clear(cfd->cpumask_ipi); 908 666 for_each_cpu(cpu, cfd->cpumask) { 909 - call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); 667 + struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); 668 + call_single_data_t *csd = &pcpu->csd; 910 669 911 670 if (cond_func && !cond_func(cpu, info)) 912 671 continue; ··· 921 678 csd->node.src = smp_processor_id(); 922 679 csd->node.dst = cpu; 923 680 #endif 924 - if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) 681 + cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); 682 + if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) { 925 683 __cpumask_set_cpu(cpu, cfd->cpumask_ipi); 684 + cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); 685 + } else { 686 + cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); 687 + } 926 688 } 927 689 928 690 /* Send a message to all CPUs in the map */ 691 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, 692 + CFD_SEQ_NOCPU, CFD_SEQ_PING); 929 693 arch_send_call_function_ipi_mask(cfd->cpumask_ipi); 694 + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, 695 + CFD_SEQ_NOCPU, CFD_SEQ_PINGED); 930 696 931 697 if (wait) { 932 698 for_each_cpu(cpu, cfd->cpumask) { 933 699 call_single_data_t *csd; 934 700 935 - csd = per_cpu_ptr(cfd->csd, cpu); 701 + csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; 936 702 csd_lock_wait(csd); 937 703 } 938 704 }

+2 -2

kernel/static_call.c

··· 165 165 166 166 stop = __stop_static_call_sites; 167 167 168 - #ifdef CONFIG_MODULES 169 168 if (mod) { 169 + #ifdef CONFIG_MODULES 170 170 stop = mod->static_call_sites + 171 171 mod->num_static_call_sites; 172 172 init = mod->state == MODULE_STATE_COMING; 173 - } 174 173 #endif 174 + } 175 175 176 176 for (site = site_mod->sites; 177 177 site < stop && static_call_key(site) == key; site++) {

+3 -2

lib/Kconfig.kcsan

··· 69 69 panic. Recommended to be enabled, ensuring critical functionality 70 70 works as intended. 71 71 72 - config KCSAN_TEST 73 - tristate "KCSAN test for integrated runtime behaviour" 72 + config KCSAN_KUNIT_TEST 73 + tristate "KCSAN test for integrated runtime behaviour" if !KUNIT_ALL_TESTS 74 + default KUNIT_ALL_TESTS 74 75 depends on TRACEPOINTS && KUNIT 75 76 select TORTURE_TEST 76 77 help

+479

tools/memory-model/Documentation/access-marking.txt

··· 1 + MARKING SHARED-MEMORY ACCESSES 2 + ============================== 3 + 4 + This document provides guidelines for marking intentionally concurrent 5 + normal accesses to shared memory, that is "normal" as in accesses that do 6 + not use read-modify-write atomic operations. It also describes how to 7 + document these accesses, both with comments and with special assertions 8 + processed by the Kernel Concurrency Sanitizer (KCSAN). This discussion 9 + builds on an earlier LWN article [1]. 10 + 11 + 12 + ACCESS-MARKING OPTIONS 13 + ====================== 14 + 15 + The Linux kernel provides the following access-marking options: 16 + 17 + 1. Plain C-language accesses (unmarked), for example, "a = b;" 18 + 19 + 2. Data-race marking, for example, "data_race(a = b);" 20 + 21 + 3. READ_ONCE(), for example, "a = READ_ONCE(b);" 22 + The various forms of atomic_read() also fit in here. 23 + 24 + 4. WRITE_ONCE(), for example, "WRITE_ONCE(a, b);" 25 + The various forms of atomic_set() also fit in here. 26 + 27 + 28 + These may be used in combination, as shown in this admittedly improbable 29 + example: 30 + 31 + WRITE_ONCE(a, b + data_race(c + d) + READ_ONCE(e)); 32 + 33 + Neither plain C-language accesses nor data_race() (#1 and #2 above) place 34 + any sort of constraint on the compiler's choice of optimizations [2]. 35 + In contrast, READ_ONCE() and WRITE_ONCE() (#3 and #4 above) restrict the 36 + compiler's use of code-motion and common-subexpression optimizations. 37 + Therefore, if a given access is involved in an intentional data race, 38 + using READ_ONCE() for loads and WRITE_ONCE() for stores is usually 39 + preferable to data_race(), which in turn is usually preferable to plain 40 + C-language accesses. 41 + 42 + KCSAN will complain about many types of data races involving plain 43 + C-language accesses, but marking all accesses involved in a given data 44 + race with one of data_race(), READ_ONCE(), or WRITE_ONCE(), will prevent 45 + KCSAN from complaining. Of course, lack of KCSAN complaints does not 46 + imply correct code. Therefore, please take a thoughtful approach 47 + when responding to KCSAN complaints. Churning the code base with 48 + ill-considered additions of data_race(), READ_ONCE(), and WRITE_ONCE() 49 + is unhelpful. 50 + 51 + In fact, the following sections describe situations where use of 52 + data_race() and even plain C-language accesses is preferable to 53 + READ_ONCE() and WRITE_ONCE(). 54 + 55 + 56 + Use of the data_race() Macro 57 + ---------------------------- 58 + 59 + Here are some situations where data_race() should be used instead of 60 + READ_ONCE() and WRITE_ONCE(): 61 + 62 + 1. Data-racy loads from shared variables whose values are used only 63 + for diagnostic purposes. 64 + 65 + 2. Data-racy reads whose values are checked against marked reload. 66 + 67 + 3. Reads whose values feed into error-tolerant heuristics. 68 + 69 + 4. Writes setting values that feed into error-tolerant heuristics. 70 + 71 + 72 + Data-Racy Reads for Approximate Diagnostics 73 + 74 + Approximate diagnostics include lockdep reports, monitoring/statistics 75 + (including /proc and /sys output), WARN*()/BUG*() checks whose return 76 + values are ignored, and other situations where reads from shared variables 77 + are not an integral part of the core concurrency design. 78 + 79 + In fact, use of data_race() instead READ_ONCE() for these diagnostic 80 + reads can enable better checking of the remaining accesses implementing 81 + the core concurrency design. For example, suppose that the core design 82 + prevents any non-diagnostic reads from shared variable x from running 83 + concurrently with updates to x. Then using plain C-language writes 84 + to x allows KCSAN to detect reads from x from within regions of code 85 + that fail to exclude the updates. In this case, it is important to use 86 + data_race() for the diagnostic reads because otherwise KCSAN would give 87 + false-positive warnings about these diagnostic reads. 88 + 89 + In theory, plain C-language loads can also be used for this use case. 90 + However, in practice this will have the disadvantage of causing KCSAN 91 + to generate false positives because KCSAN will have no way of knowing 92 + that the resulting data race was intentional. 93 + 94 + 95 + Data-Racy Reads That Are Checked Against Marked Reload 96 + 97 + The values from some reads are not implicitly trusted. They are instead 98 + fed into some operation that checks the full value against a later marked 99 + load from memory, which means that the occasional arbitrarily bogus value 100 + is not a problem. For example, if a bogus value is fed into cmpxchg(), 101 + all that happens is that this cmpxchg() fails, which normally results 102 + in a retry. Unless the race condition that resulted in the bogus value 103 + recurs, this retry will with high probability succeed, so no harm done. 104 + 105 + However, please keep in mind that a data_race() load feeding into 106 + a cmpxchg_relaxed() might still be subject to load fusing on some 107 + architectures. Therefore, it is best to capture the return value from 108 + the failing cmpxchg() for the next iteration of the loop, an approach 109 + that provides the compiler much less scope for mischievous optimizations. 110 + Capturing the return value from cmpxchg() also saves a memory reference 111 + in many cases. 112 + 113 + In theory, plain C-language loads can also be used for this use case. 114 + However, in practice this will have the disadvantage of causing KCSAN 115 + to generate false positives because KCSAN will have no way of knowing 116 + that the resulting data race was intentional. 117 + 118 + 119 + Reads Feeding Into Error-Tolerant Heuristics 120 + 121 + Values from some reads feed into heuristics that can tolerate occasional 122 + errors. Such reads can use data_race(), thus allowing KCSAN to focus on 123 + the other accesses to the relevant shared variables. But please note 124 + that data_race() loads are subject to load fusing, which can result in 125 + consistent errors, which in turn are quite capable of breaking heuristics. 126 + Therefore use of data_race() should be limited to cases where some other 127 + code (such as a barrier() call) will force the occasional reload. 128 + 129 + In theory, plain C-language loads can also be used for this use case. 130 + However, in practice this will have the disadvantage of causing KCSAN 131 + to generate false positives because KCSAN will have no way of knowing 132 + that the resulting data race was intentional. 133 + 134 + 135 + Writes Setting Values Feeding Into Error-Tolerant Heuristics 136 + 137 + The values read into error-tolerant heuristics come from somewhere, 138 + for example, from sysfs. This means that some code in sysfs writes 139 + to this same variable, and these writes can also use data_race(). 140 + After all, if the heuristic can tolerate the occasional bogus value 141 + due to compiler-mangled reads, it can also tolerate the occasional 142 + compiler-mangled write, at least assuming that the proper value is in 143 + place once the write completes. 144 + 145 + Plain C-language stores can also be used for this use case. However, 146 + in kernels built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, this 147 + will have the disadvantage of causing KCSAN to generate false positives 148 + because KCSAN will have no way of knowing that the resulting data race 149 + was intentional. 150 + 151 + 152 + Use of Plain C-Language Accesses 153 + -------------------------------- 154 + 155 + Here are some example situations where plain C-language accesses should 156 + used instead of READ_ONCE(), WRITE_ONCE(), and data_race(): 157 + 158 + 1. Accesses protected by mutual exclusion, including strict locking 159 + and sequence locking. 160 + 161 + 2. Initialization-time and cleanup-time accesses. This covers a 162 + wide variety of situations, including the uniprocessor phase of 163 + system boot, variables to be used by not-yet-spawned kthreads, 164 + structures not yet published to reference-counted or RCU-protected 165 + data structures, and the cleanup side of any of these situations. 166 + 167 + 3. Per-CPU variables that are not accessed from other CPUs. 168 + 169 + 4. Private per-task variables, including on-stack variables, some 170 + fields in the task_struct structure, and task-private heap data. 171 + 172 + 5. Any other loads for which there is not supposed to be a concurrent 173 + store to that same variable. 174 + 175 + 6. Any other stores for which there should be neither concurrent 176 + loads nor concurrent stores to that same variable. 177 + 178 + But note that KCSAN makes two explicit exceptions to this rule 179 + by default, refraining from flagging plain C-language stores: 180 + 181 + a. No matter what. You can override this default by building 182 + with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n. 183 + 184 + b. When the store writes the value already contained in 185 + that variable. You can override this default by building 186 + with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. 187 + 188 + c. When one of the stores is in an interrupt handler and 189 + the other in the interrupted code. You can override this 190 + default by building with CONFIG_KCSAN_INTERRUPT_WATCHER=y. 191 + 192 + Note that it is important to use plain C-language accesses in these cases, 193 + because doing otherwise prevents KCSAN from detecting violations of your 194 + code's synchronization rules. 195 + 196 + 197 + ACCESS-DOCUMENTATION OPTIONS 198 + ============================ 199 + 200 + It is important to comment marked accesses so that people reading your 201 + code, yourself included, are reminded of the synchronization design. 202 + However, it is even more important to comment plain C-language accesses 203 + that are intentionally involved in data races. Such comments are 204 + needed to remind people reading your code, again, yourself included, 205 + of how the compiler has been prevented from optimizing those accesses 206 + into concurrency bugs. 207 + 208 + It is also possible to tell KCSAN about your synchronization design. 209 + For example, ASSERT_EXCLUSIVE_ACCESS(foo) tells KCSAN that any 210 + concurrent access to variable foo by any other CPU is an error, even 211 + if that concurrent access is marked with READ_ONCE(). In addition, 212 + ASSERT_EXCLUSIVE_WRITER(foo) tells KCSAN that although it is OK for there 213 + to be concurrent reads from foo from other CPUs, it is an error for some 214 + other CPU to be concurrently writing to foo, even if that concurrent 215 + write is marked with data_race() or WRITE_ONCE(). 216 + 217 + Note that although KCSAN will call out data races involving either 218 + ASSERT_EXCLUSIVE_ACCESS() or ASSERT_EXCLUSIVE_WRITER() on the one hand 219 + and data_race() writes on the other, KCSAN will not report the location 220 + of these data_race() writes. 221 + 222 + 223 + EXAMPLES 224 + ======== 225 + 226 + As noted earlier, the goal is to prevent the compiler from destroying 227 + your concurrent algorithm, to help the human reader, and to inform 228 + KCSAN of aspects of your concurrency design. This section looks at a 229 + few examples showing how this can be done. 230 + 231 + 232 + Lock Protection With Lockless Diagnostic Access 233 + ----------------------------------------------- 234 + 235 + For example, suppose a shared variable "foo" is read only while a 236 + reader-writer spinlock is read-held, written only while that same 237 + spinlock is write-held, except that it is also read locklessly for 238 + diagnostic purposes. The code might look as follows: 239 + 240 + int foo; 241 + DEFINE_RWLOCK(foo_rwlock); 242 + 243 + void update_foo(int newval) 244 + { 245 + write_lock(&foo_rwlock); 246 + foo = newval; 247 + do_something(newval); 248 + write_unlock(&foo_rwlock); 249 + } 250 + 251 + int read_foo(void) 252 + { 253 + int ret; 254 + 255 + read_lock(&foo_rwlock); 256 + do_something_else(); 257 + ret = foo; 258 + read_unlock(&foo_rwlock); 259 + return ret; 260 + } 261 + 262 + int read_foo_diagnostic(void) 263 + { 264 + return data_race(foo); 265 + } 266 + 267 + The reader-writer lock prevents the compiler from introducing concurrency 268 + bugs into any part of the main algorithm using foo, which means that 269 + the accesses to foo within both update_foo() and read_foo() can (and 270 + should) be plain C-language accesses. One benefit of making them be 271 + plain C-language accesses is that KCSAN can detect any erroneous lockless 272 + reads from or updates to foo. The data_race() in read_foo_diagnostic() 273 + tells KCSAN that data races are expected, and should be silently 274 + ignored. This data_race() also tells the human reading the code that 275 + read_foo_diagnostic() might sometimes return a bogus value. 276 + 277 + However, please note that your kernel must be built with 278 + CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to 279 + detect a buggy lockless write. If you need KCSAN to detect such a 280 + write even if that write did not change the value of foo, you also 281 + need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. If you need KCSAN to 282 + detect such a write happening in an interrupt handler running on the 283 + same CPU doing the legitimate lock-protected write, you also need 284 + CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these Kconfig 285 + options set properly, KCSAN can be quite helpful, although it is not 286 + necessarily a full replacement for hardware watchpoints. On the other 287 + hand, neither are hardware watchpoints a full replacement for KCSAN 288 + because it is not always easy to tell hardware watchpoint to conditionally 289 + trap on accesses. 290 + 291 + 292 + Lock-Protected Writes With Lockless Reads 293 + ----------------------------------------- 294 + 295 + For another example, suppose a shared variable "foo" is updated only 296 + while holding a spinlock, but is read locklessly. The code might look 297 + as follows: 298 + 299 + int foo; 300 + DEFINE_SPINLOCK(foo_lock); 301 + 302 + void update_foo(int newval) 303 + { 304 + spin_lock(&foo_lock); 305 + WRITE_ONCE(foo, newval); 306 + ASSERT_EXCLUSIVE_WRITER(foo); 307 + do_something(newval); 308 + spin_unlock(&foo_wlock); 309 + } 310 + 311 + int read_foo(void) 312 + { 313 + do_something_else(); 314 + return READ_ONCE(foo); 315 + } 316 + 317 + Because foo is read locklessly, all accesses are marked. The purpose 318 + of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy 319 + concurrent lockless write. 320 + 321 + 322 + Lockless Reads and Writes 323 + ------------------------- 324 + 325 + For another example, suppose a shared variable "foo" is both read and 326 + updated locklessly. The code might look as follows: 327 + 328 + int foo; 329 + 330 + int update_foo(int newval) 331 + { 332 + int ret; 333 + 334 + ret = xchg(&foo, newval); 335 + do_something(newval); 336 + return ret; 337 + } 338 + 339 + int read_foo(void) 340 + { 341 + do_something_else(); 342 + return READ_ONCE(foo); 343 + } 344 + 345 + Because foo is accessed locklessly, all accesses are marked. It does 346 + not make sense to use ASSERT_EXCLUSIVE_WRITER() in this case because 347 + there really can be concurrent lockless writers. KCSAN would 348 + flag any concurrent plain C-language reads from foo, and given 349 + CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, also any concurrent plain 350 + C-language writes to foo. 351 + 352 + 353 + Lockless Reads and Writes, But With Single-Threaded Initialization 354 + ------------------------------------------------------------------ 355 + 356 + For yet another example, suppose that foo is initialized in a 357 + single-threaded manner, but that a number of kthreads are then created 358 + that locklessly and concurrently access foo. Some snippets of this code 359 + might look as follows: 360 + 361 + int foo; 362 + 363 + void initialize_foo(int initval, int nkthreads) 364 + { 365 + int i; 366 + 367 + foo = initval; 368 + ASSERT_EXCLUSIVE_ACCESS(foo); 369 + for (i = 0; i < nkthreads; i++) 370 + kthread_run(access_foo_concurrently, ...); 371 + } 372 + 373 + /* Called from access_foo_concurrently(). */ 374 + int update_foo(int newval) 375 + { 376 + int ret; 377 + 378 + ret = xchg(&foo, newval); 379 + do_something(newval); 380 + return ret; 381 + } 382 + 383 + /* Also called from access_foo_concurrently(). */ 384 + int read_foo(void) 385 + { 386 + do_something_else(); 387 + return READ_ONCE(foo); 388 + } 389 + 390 + The initialize_foo() uses a plain C-language write to foo because there 391 + are not supposed to be concurrent accesses during initialization. The 392 + ASSERT_EXCLUSIVE_ACCESS() allows KCSAN to flag buggy concurrent unmarked 393 + reads, and the ASSERT_EXCLUSIVE_ACCESS() call further allows KCSAN to 394 + flag buggy concurrent writes, even if: (1) Those writes are marked or 395 + (2) The kernel was built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=y. 396 + 397 + 398 + Checking Stress-Test Race Coverage 399 + ---------------------------------- 400 + 401 + When designing stress tests it is important to ensure that race conditions 402 + of interest really do occur. For example, consider the following code 403 + fragment: 404 + 405 + int foo; 406 + 407 + int update_foo(int newval) 408 + { 409 + return xchg(&foo, newval); 410 + } 411 + 412 + int xor_shift_foo(int shift, int mask) 413 + { 414 + int old, new, newold; 415 + 416 + newold = data_race(foo); /* Checked by cmpxchg(). */ 417 + do { 418 + old = newold; 419 + new = (old << shift) ^ mask; 420 + newold = cmpxchg(&foo, old, new); 421 + } while (newold != old); 422 + return old; 423 + } 424 + 425 + int read_foo(void) 426 + { 427 + return READ_ONCE(foo); 428 + } 429 + 430 + If it is possible for update_foo(), xor_shift_foo(), and read_foo() to be 431 + invoked concurrently, the stress test should force this concurrency to 432 + actually happen. KCSAN can evaluate the stress test when the above code 433 + is modified to read as follows: 434 + 435 + int foo; 436 + 437 + int update_foo(int newval) 438 + { 439 + ASSERT_EXCLUSIVE_ACCESS(foo); 440 + return xchg(&foo, newval); 441 + } 442 + 443 + int xor_shift_foo(int shift, int mask) 444 + { 445 + int old, new, newold; 446 + 447 + newold = data_race(foo); /* Checked by cmpxchg(). */ 448 + do { 449 + old = newold; 450 + new = (old << shift) ^ mask; 451 + ASSERT_EXCLUSIVE_ACCESS(foo); 452 + newold = cmpxchg(&foo, old, new); 453 + } while (newold != old); 454 + return old; 455 + } 456 + 457 + 458 + int read_foo(void) 459 + { 460 + ASSERT_EXCLUSIVE_ACCESS(foo); 461 + return READ_ONCE(foo); 462 + } 463 + 464 + If a given stress-test run does not result in KCSAN complaints from 465 + each possible pair of ASSERT_EXCLUSIVE_ACCESS() invocations, the 466 + stress test needs improvement. If the stress test was to be evaluated 467 + on a regular basis, it would be wise to place the above instances of 468 + ASSERT_EXCLUSIVE_ACCESS() under #ifdef so that they did not result in 469 + false positives when not evaluating the stress test. 470 + 471 + 472 + REFERENCES 473 + ========== 474 + 475 + [1] "Concurrency bugs should fear the big bad data-race detector (part 2)" 476 + https://lwn.net/Articles/816854/ 477 + 478 + [2] "Who's afraid of a big bad optimizing compiler?" 479 + https://lwn.net/Articles/793253/

-1

tools/memory-model/Documentation/simple.txt

··· 189 189 190 190 Documentation/atomic_t.txt 191 191 Documentation/atomic_bitops.txt 192 - Documentation/core-api/atomic_ops.rst 193 192 Documentation/core-api/refcount-vs-atomic.rst 194 193 195 194 Reading code using these primitives is often also quite helpful.