Merge tag 'locking-urgent-2024-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
"lockdep:
- Fix potential deadlock between lockdep and RCU (Zhiguo Niu)
- Use str_plural() to address Coccinelle warning (Thorsten Blum)
- Add debuggability enhancement (Luis Claudio R. Goncalves)

static keys & calls:
- Fix static_key_slow_dec() yet again (Peter Zijlstra)
- Handle module init failure correctly in static_call_del_module()
(Thomas Gleixner)
- Replace pointless WARN_ON() in static_call_module_notify() (Thomas
Gleixner)

<linux/cleanup.h>:
- Add usage and style documentation (Dan Williams)

rwsems:
- Move is_rwsem_reader_owned() and rwsem_owner() under
CONFIG_DEBUG_RWSEMS (Waiman Long)

atomic ops, x86:
- Redeclare x86_32 arch_atomic64_{add,sub}() as void (Uros Bizjak)
- Introduce the read64_nonatomic macro to x86_32 with cx8 (Uros
Bizjak)"

Signed-off-by: Ingo Molnar <mingo@kernel.org>

* tag 'locking-urgent-2024-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
locking/rwsem: Move is_rwsem_reader_owned() and rwsem_owner() under CONFIG_DEBUG_RWSEMS
jump_label: Fix static_key_slow_dec() yet again
static_call: Replace pointless WARN_ON() in static_call_module_notify()
static_call: Handle module init failure correctly in static_call_del_module()
locking/lockdep: Simplify character output in seq_line()
lockdep: fix deadlock issue between lockdep and rcu
lockdep: Use str_plural() to fix Coccinelle warning
cleanup: Add usage and style documentation
lockdep: suggest the fix for "lockdep bfs error:-1" on print_bfs_bug
locking/atomic/x86: Redeclare x86_32 arch_atomic64_{add,sub}() as void
locking/atomic/x86: Introduce the read64_nonatomic macro to x86_32 with cx8

+241 -45
+8
Documentation/core-api/cleanup.rst
··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 3 + =========================== 4 + Scope-based Cleanup Helpers 5 + =========================== 6 + 7 + .. kernel-doc:: include/linux/cleanup.h 8 + :doc: scope-based cleanup helpers
+1
Documentation/core-api/index.rst
··· 35 35 36 36 kobject 37 37 kref 38 + cleanup 38 39 assoc_array 39 40 xarray 40 41 maple_tree
+2 -4
arch/x86/include/asm/atomic64_32.h
··· 163 163 } 164 164 #define arch_atomic64_dec_return arch_atomic64_dec_return 165 165 166 - static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) 166 + static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) 167 167 { 168 168 __alternative_atomic64(add, add_return, 169 169 ASM_OUTPUT2("+A" (i), "+c" (v)), 170 170 ASM_NO_INPUT_CLOBBER("memory")); 171 - return i; 172 171 } 173 172 174 - static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) 173 + static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) 175 174 { 176 175 __alternative_atomic64(sub, sub_return, 177 176 ASM_OUTPUT2("+A" (i), "+c" (v)), 178 177 ASM_NO_INPUT_CLOBBER("memory")); 179 - return i; 180 178 } 181 179 182 180 static __always_inline void arch_atomic64_inc(atomic64_t *v)
+7 -2
arch/x86/lib/atomic64_cx8_32.S
··· 16 16 cmpxchg8b (\reg) 17 17 .endm 18 18 19 + .macro read64_nonatomic reg 20 + movl (\reg), %eax 21 + movl 4(\reg), %edx 22 + .endm 23 + 19 24 SYM_FUNC_START(atomic64_read_cx8) 20 25 read64 %ecx 21 26 RET ··· 56 51 movl %edx, %edi 57 52 movl %ecx, %ebp 58 53 59 - read64 %ecx 54 + read64_nonatomic %ecx 60 55 1: 61 56 movl %eax, %ebx 62 57 movl %edx, %ecx ··· 84 79 SYM_FUNC_START(atomic64_\func\()_return_cx8) 85 80 pushl %ebx 86 81 87 - read64 %esi 82 + read64_nonatomic %esi 88 83 1: 89 84 movl %eax, %ebx 90 85 movl %edx, %ecx
+136
include/linux/cleanup.h
··· 4 4 5 5 #include <linux/compiler.h> 6 6 7 + /** 8 + * DOC: scope-based cleanup helpers 9 + * 10 + * The "goto error" pattern is notorious for introducing subtle resource 11 + * leaks. It is tedious and error prone to add new resource acquisition 12 + * constraints into code paths that already have several unwind 13 + * conditions. The "cleanup" helpers enable the compiler to help with 14 + * this tedium and can aid in maintaining LIFO (last in first out) 15 + * unwind ordering to avoid unintentional leaks. 16 + * 17 + * As drivers make up the majority of the kernel code base, here is an 18 + * example of using these helpers to clean up PCI drivers. The target of 19 + * the cleanups are occasions where a goto is used to unwind a device 20 + * reference (pci_dev_put()), or unlock the device (pci_dev_unlock()) 21 + * before returning. 22 + * 23 + * The DEFINE_FREE() macro can arrange for PCI device references to be 24 + * dropped when the associated variable goes out of scope:: 25 + * 26 + * DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) 27 + * ... 28 + * struct pci_dev *dev __free(pci_dev_put) = 29 + * pci_get_slot(parent, PCI_DEVFN(0, 0)); 30 + * 31 + * The above will automatically call pci_dev_put() if @dev is non-NULL 32 + * when @dev goes out of scope (automatic variable scope). If a function 33 + * wants to invoke pci_dev_put() on error, but return @dev (i.e. without 34 + * freeing it) on success, it can do:: 35 + * 36 + * return no_free_ptr(dev); 37 + * 38 + * ...or:: 39 + * 40 + * return_ptr(dev); 41 + * 42 + * The DEFINE_GUARD() macro can arrange for the PCI device lock to be 43 + * dropped when the scope where guard() is invoked ends:: 44 + * 45 + * DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) 46 + * ... 47 + * guard(pci_dev)(dev); 48 + * 49 + * The lifetime of the lock obtained by the guard() helper follows the 50 + * scope of automatic variable declaration. Take the following example:: 51 + * 52 + * func(...) 53 + * { 54 + * if (...) { 55 + * ... 56 + * guard(pci_dev)(dev); // pci_dev_lock() invoked here 57 + * ... 58 + * } // <- implied pci_dev_unlock() triggered here 59 + * } 60 + * 61 + * Observe the lock is held for the remainder of the "if ()" block not 62 + * the remainder of "func()". 63 + * 64 + * Now, when a function uses both __free() and guard(), or multiple 65 + * instances of __free(), the LIFO order of variable definition order 66 + * matters. GCC documentation says: 67 + * 68 + * "When multiple variables in the same scope have cleanup attributes, 69 + * at exit from the scope their associated cleanup functions are run in 70 + * reverse order of definition (last defined, first cleanup)." 71 + * 72 + * When the unwind order matters it requires that variables be defined 73 + * mid-function scope rather than at the top of the file. Take the 74 + * following example and notice the bug highlighted by "!!":: 75 + * 76 + * LIST_HEAD(list); 77 + * DEFINE_MUTEX(lock); 78 + * 79 + * struct object { 80 + * struct list_head node; 81 + * }; 82 + * 83 + * static struct object *alloc_add(void) 84 + * { 85 + * struct object *obj; 86 + * 87 + * lockdep_assert_held(&lock); 88 + * obj = kzalloc(sizeof(*obj), GFP_KERNEL); 89 + * if (obj) { 90 + * LIST_HEAD_INIT(&obj->node); 91 + * list_add(obj->node, &list): 92 + * } 93 + * return obj; 94 + * } 95 + * 96 + * static void remove_free(struct object *obj) 97 + * { 98 + * lockdep_assert_held(&lock); 99 + * list_del(&obj->node); 100 + * kfree(obj); 101 + * } 102 + * 103 + * DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T)) 104 + * static int init(void) 105 + * { 106 + * struct object *obj __free(remove_free) = NULL; 107 + * int err; 108 + * 109 + * guard(mutex)(&lock); 110 + * obj = alloc_add(); 111 + * 112 + * if (!obj) 113 + * return -ENOMEM; 114 + * 115 + * err = other_init(obj); 116 + * if (err) 117 + * return err; // remove_free() called without the lock!! 118 + * 119 + * no_free_ptr(obj); 120 + * return 0; 121 + * } 122 + * 123 + * That bug is fixed by changing init() to call guard() and define + 124 + * initialize @obj in this order:: 125 + * 126 + * guard(mutex)(&lock); 127 + * struct object *obj __free(remove_free) = alloc_add(); 128 + * 129 + * Given that the "__free(...) = NULL" pattern for variables defined at 130 + * the top of the function poses this potential interdependency problem 131 + * the recommendation is to always define and assign variables in one 132 + * statement and not group variable definitions at the top of the 133 + * function when __free() is used. 134 + * 135 + * Lastly, given that the benefit of cleanup helpers is removal of 136 + * "goto", and that the "goto" statement can jump between scopes, the 137 + * expectation is that usage of "goto" and cleanup helpers is never 138 + * mixed in the same function. I.e. for a given routine, convert all 139 + * resources that need a "goto" cleanup to scope-based cleanup, or 140 + * convert none of them. 141 + */ 142 + 7 143 /* 8 144 * DEFINE_FREE(name, type, free): 9 145 * simple helper macro that defines the required wrapper for a __free()
+27 -7
kernel/jump_label.c
··· 168 168 jump_label_update(key); 169 169 /* 170 170 * Ensure that when static_key_fast_inc_not_disabled() or 171 - * static_key_slow_try_dec() observe the positive value, 171 + * static_key_dec_not_one() observe the positive value, 172 172 * they must also observe all the text changes. 173 173 */ 174 174 atomic_set_release(&key->enabled, 1); ··· 250 250 } 251 251 EXPORT_SYMBOL_GPL(static_key_disable); 252 252 253 - static bool static_key_slow_try_dec(struct static_key *key) 253 + static bool static_key_dec_not_one(struct static_key *key) 254 254 { 255 255 int v; 256 256 ··· 274 274 * enabled. This suggests an ordering problem on the user side. 275 275 */ 276 276 WARN_ON_ONCE(v < 0); 277 + 278 + /* 279 + * Warn about underflow, and lie about success in an attempt to 280 + * not make things worse. 281 + */ 282 + if (WARN_ON_ONCE(v == 0)) 283 + return true; 284 + 277 285 if (v <= 1) 278 286 return false; 279 287 } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1))); ··· 292 284 static void __static_key_slow_dec_cpuslocked(struct static_key *key) 293 285 { 294 286 lockdep_assert_cpus_held(); 287 + int val; 295 288 296 - if (static_key_slow_try_dec(key)) 289 + if (static_key_dec_not_one(key)) 297 290 return; 298 291 299 292 guard(mutex)(&jump_label_mutex); 300 - if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) 293 + val = atomic_read(&key->enabled); 294 + /* 295 + * It should be impossible to observe -1 with jump_label_mutex held, 296 + * see static_key_slow_inc_cpuslocked(). 297 + */ 298 + if (WARN_ON_ONCE(val == -1)) 299 + return; 300 + /* 301 + * Cannot already be 0, something went sideways. 302 + */ 303 + if (WARN_ON_ONCE(val == 0)) 304 + return; 305 + 306 + if (atomic_dec_and_test(&key->enabled)) 301 307 jump_label_update(key); 302 - else 303 - WARN_ON_ONCE(!static_key_slow_try_dec(key)); 304 308 } 305 309 306 310 static void __static_key_slow_dec(struct static_key *key) ··· 349 329 { 350 330 STATIC_KEY_CHECK_USE(key); 351 331 352 - if (static_key_slow_try_dec(key)) 332 + if (static_key_dec_not_one(key)) 353 333 return; 354 334 355 335 schedule_delayed_work(work, timeout);
+37 -18
kernel/locking/lockdep.c
··· 788 788 printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p)); 789 789 else 790 790 printk("%d lock%s held by %s/%d:\n", depth, 791 - depth > 1 ? "s" : "", p->comm, task_pid_nr(p)); 791 + str_plural(depth), p->comm, task_pid_nr(p)); 792 792 /* 793 793 * It's not reliable to print a task's held locks if it's not sleeping 794 794 * and it's not the current task. ··· 2084 2084 /* 2085 2085 * Breadth-first-search failed, graph got corrupted? 2086 2086 */ 2087 + if (ret == BFS_EQUEUEFULL) 2088 + pr_warn("Increase LOCKDEP_CIRCULAR_QUEUE_BITS to avoid this warning:\n"); 2089 + 2087 2090 WARN(1, "lockdep bfs error:%d\n", ret); 2088 2091 } 2089 2092 ··· 6266 6263 static void free_zapped_rcu(struct rcu_head *cb); 6267 6264 6268 6265 /* 6269 - * Schedule an RCU callback if no RCU callback is pending. Must be called with 6270 - * the graph lock held. 6271 - */ 6272 - static void call_rcu_zapped(struct pending_free *pf) 6266 + * See if we need to queue an RCU callback, must called with 6267 + * the lockdep lock held, returns false if either we don't have 6268 + * any pending free or the callback is already scheduled. 6269 + * Otherwise, a call_rcu() must follow this function call. 6270 + */ 6271 + static bool prepare_call_rcu_zapped(struct pending_free *pf) 6273 6272 { 6274 6273 WARN_ON_ONCE(inside_selftest()); 6275 6274 6276 6275 if (list_empty(&pf->zapped)) 6277 - return; 6276 + return false; 6278 6277 6279 6278 if (delayed_free.scheduled) 6280 - return; 6279 + return false; 6281 6280 6282 6281 delayed_free.scheduled = true; 6283 6282 6284 6283 WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf); 6285 6284 delayed_free.index ^= 1; 6286 6285 6287 - call_rcu(&delayed_free.rcu_head, free_zapped_rcu); 6286 + return true; 6288 6287 } 6289 6288 6290 6289 /* The caller must hold the graph lock. May be called from RCU context. */ ··· 6312 6307 { 6313 6308 struct pending_free *pf; 6314 6309 unsigned long flags; 6310 + bool need_callback; 6315 6311 6316 6312 if (WARN_ON_ONCE(ch != &delayed_free.rcu_head)) 6317 6313 return; ··· 6324 6318 pf = delayed_free.pf + (delayed_free.index ^ 1); 6325 6319 __free_zapped_classes(pf); 6326 6320 delayed_free.scheduled = false; 6327 - 6328 - /* 6329 - * If there's anything on the open list, close and start a new callback. 6330 - */ 6331 - call_rcu_zapped(delayed_free.pf + delayed_free.index); 6332 - 6321 + need_callback = 6322 + prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index); 6333 6323 lockdep_unlock(); 6334 6324 raw_local_irq_restore(flags); 6325 + 6326 + /* 6327 + * If there's pending free and its callback has not been scheduled, 6328 + * queue an RCU callback. 6329 + */ 6330 + if (need_callback) 6331 + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); 6332 + 6335 6333 } 6336 6334 6337 6335 /* ··· 6375 6365 { 6376 6366 struct pending_free *pf; 6377 6367 unsigned long flags; 6368 + bool need_callback; 6378 6369 6379 6370 init_data_structures_once(); 6380 6371 ··· 6383 6372 lockdep_lock(); 6384 6373 pf = get_pending_free(); 6385 6374 __lockdep_free_key_range(pf, start, size); 6386 - call_rcu_zapped(pf); 6375 + need_callback = prepare_call_rcu_zapped(pf); 6387 6376 lockdep_unlock(); 6388 6377 raw_local_irq_restore(flags); 6389 - 6378 + if (need_callback) 6379 + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); 6390 6380 /* 6391 6381 * Wait for any possible iterators from look_up_lock_class() to pass 6392 6382 * before continuing to free the memory they refer to. ··· 6481 6469 struct pending_free *pf; 6482 6470 unsigned long flags; 6483 6471 int locked; 6472 + bool need_callback = false; 6484 6473 6485 6474 raw_local_irq_save(flags); 6486 6475 locked = graph_lock(); ··· 6490 6477 6491 6478 pf = get_pending_free(); 6492 6479 __lockdep_reset_lock(pf, lock); 6493 - call_rcu_zapped(pf); 6480 + need_callback = prepare_call_rcu_zapped(pf); 6494 6481 6495 6482 graph_unlock(); 6496 6483 out_irq: 6497 6484 raw_local_irq_restore(flags); 6485 + if (need_callback) 6486 + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); 6498 6487 } 6499 6488 6500 6489 /* ··· 6540 6525 struct pending_free *pf; 6541 6526 unsigned long flags; 6542 6527 bool found = false; 6528 + bool need_callback = false; 6543 6529 6544 6530 might_sleep(); 6545 6531 ··· 6561 6545 if (found) { 6562 6546 pf = get_pending_free(); 6563 6547 __lockdep_free_key_range(pf, key, 1); 6564 - call_rcu_zapped(pf); 6548 + need_callback = prepare_call_rcu_zapped(pf); 6565 6549 } 6566 6550 lockdep_unlock(); 6567 6551 raw_local_irq_restore(flags); 6552 + 6553 + if (need_callback) 6554 + call_rcu(&delayed_free.rcu_head, free_zapped_rcu); 6568 6555 6569 6556 /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ 6570 6557 synchronize_rcu();
+1 -1
kernel/locking/lockdep_proc.c
··· 424 424 for (i = 0; i < offset; i++) 425 425 seq_puts(m, " "); 426 426 for (i = 0; i < length; i++) 427 - seq_printf(m, "%c", c); 427 + seq_putc(m, c); 428 428 seq_puts(m, "\n"); 429 429 } 430 430
+10 -12
kernel/locking/rwsem.c
··· 181 181 __rwsem_set_reader_owned(sem, current); 182 182 } 183 183 184 + #ifdef CONFIG_DEBUG_RWSEMS 185 + /* 186 + * Return just the real task structure pointer of the owner 187 + */ 188 + static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) 189 + { 190 + return (struct task_struct *) 191 + (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); 192 + } 193 + 184 194 /* 185 195 * Return true if the rwsem is owned by a reader. 186 196 */ 187 197 static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) 188 198 { 189 - #ifdef CONFIG_DEBUG_RWSEMS 190 199 /* 191 200 * Check the count to see if it is write-locked. 192 201 */ ··· 203 194 204 195 if (count & RWSEM_WRITER_MASK) 205 196 return false; 206 - #endif 207 197 return rwsem_test_oflags(sem, RWSEM_READER_OWNED); 208 198 } 209 199 210 - #ifdef CONFIG_DEBUG_RWSEMS 211 200 /* 212 201 * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there 213 202 * is a task pointer in owner of a reader-owned rwsem, it will be the ··· 270 263 } 271 264 272 265 return false; 273 - } 274 - 275 - /* 276 - * Return just the real task structure pointer of the owner 277 - */ 278 - static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) 279 - { 280 - return (struct task_struct *) 281 - (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); 282 266 } 283 267 284 268 /*
+12 -1
kernel/static_call_inline.c
··· 411 411 412 412 for (site = start; site < stop; site++) { 413 413 key = static_call_key(site); 414 + 415 + /* 416 + * If the key was not updated due to a memory allocation 417 + * failure in __static_call_init() then treating key::sites 418 + * as key::mods in the code below would cause random memory 419 + * access and #GP. In that case all subsequent sites have 420 + * not been touched either, so stop iterating. 421 + */ 422 + if (!static_call_key_has_mods(key)) 423 + break; 424 + 414 425 if (key == prev_key) 415 426 continue; 416 427 ··· 453 442 case MODULE_STATE_COMING: 454 443 ret = static_call_add_module(mod); 455 444 if (ret) { 456 - WARN(1, "Failed to allocate memory for static calls"); 445 + pr_warn("Failed to allocate memory for static calls\n"); 457 446 static_call_del_module(mod); 458 447 } 459 448 break;