Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core locking changes from Ingo Molnar:
"Main changes:

- jump label asm preparatory work for PowerPC (Anton Blanchard)

- rwsem optimizations and cleanups (Davidlohr Bueso)

- mutex optimizations and cleanups (Jason Low)

- futex fix (Oleg Nesterov)

- remove broken atomicity checks from {READ,WRITE}_ONCE() (Peter
Zijlstra)"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
powerpc, jump_label: Include linux/jump_label.h to get HAVE_JUMP_LABEL define
jump_label: Allow jump labels to be used in assembly
jump_label: Allow asm/jump_label.h to be included in assembly
locking/mutex: Further simplify mutex_spin_on_owner()
locking: Remove atomicy checks from {READ,WRITE}_ONCE
locking/rtmutex: Rename argument in the rt_mutex_adjust_prio_chain() documentation as well
locking/rwsem: Fix lock optimistic spinning when owner is not running
locking: Remove ACCESS_ONCE() usage
locking/rwsem: Check for active lock before bailing on spinning
locking/rwsem: Avoid deceiving lock spinners
locking/rwsem: Set lock ownership ASAP
locking/rwsem: Document barrier need when waking tasks
locking/futex: Check PF_KTHREAD rather than !p->mm to filter out kthreads
locking/mutex: Refactor mutex_spin_on_owner()
locking/mutex: In mutex_spin_on_owner(), return true when owner changes

Linus Torvalds 11 years ago cc76ee75 9c65e12a

+160 -147

23 changed files

expand all collapse all

Makefile

arch

arm

include

asm

jump_label.h

arm64

include

asm

jump_label.h

mips

include

asm

jump_label.h

powerpc

platforms

powernv

opal-wrappers.S

pseries

hvCall.S

lpar.c

s390

include

asm

jump_label.h

sparc

include

asm

jump_label.h

x86

include

asm

jump_label.h

include

linux

compiler.h

jump_label.h

seqlock.h

kernel

futex.c

locking

mcs_spinlock.h

mutex.c

osq_lock.c

rtmutex.c

rwsem-spinlock.c

rwsem-xadd.c

rwsem.c

rwsem.h

lib

lockref.c

Makefile

reviewed

··· 779 779 # check for 'asm goto' 780 780 ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) 781 781 KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO 782 782 + KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO 782 783 endif 783 784 784 785 include $(srctree)/scripts/Makefile.kasan

+2 -3

arch/arm/include/asm/jump_label.h

reviewed

··· 1 1 #ifndef _ASM_ARM_JUMP_LABEL_H 2 2 #define _ASM_ARM_JUMP_LABEL_H 3 3 4 4 - #ifdef __KERNEL__ 4 4 + #ifndef __ASSEMBLY__ 5 5 6 6 #include <linux/types.h> 7 7 ··· 27 27 return true; 28 28 } 29 29 30 30 - #endif /* __KERNEL__ */ 31 31 - 32 30 typedef u32 jump_label_t; 33 31 34 32 struct jump_entry { ··· 35 37 jump_label_t key; 36 38 }; 37 39 40 40 + #endif /* __ASSEMBLY__ */ 38 41 #endif

+4 -4

arch/arm64/include/asm/jump_label.h

reviewed

··· 18 18 */ 19 19 #ifndef __ASM_JUMP_LABEL_H 20 20 #define __ASM_JUMP_LABEL_H 21 21 + 22 22 + #ifndef __ASSEMBLY__ 23 23 + 21 24 #include <linux/types.h> 22 25 #include <asm/insn.h> 23 23 - 24 24 - #ifdef __KERNEL__ 25 26 26 27 #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE 27 28 ··· 40 39 return true; 41 40 } 42 41 43 43 - #endif /* __KERNEL__ */ 44 44 - 45 42 typedef u64 jump_label_t; 46 43 47 44 struct jump_entry { ··· 48 49 jump_label_t key; 49 50 }; 50 51 52 52 + #endif /* __ASSEMBLY__ */ 51 53 #endif /* __ASM_JUMP_LABEL_H */

+3 -4

arch/mips/include/asm/jump_label.h

reviewed

··· 8 8 #ifndef _ASM_MIPS_JUMP_LABEL_H 9 9 #define _ASM_MIPS_JUMP_LABEL_H 10 10 11 11 - #include <linux/types.h> 11 11 + #ifndef __ASSEMBLY__ 12 12 13 13 - #ifdef __KERNEL__ 13 13 + #include <linux/types.h> 14 14 15 15 #define JUMP_LABEL_NOP_SIZE 4 16 16 ··· 39 39 return true; 40 40 } 41 41 42 42 - #endif /* __KERNEL__ */ 43 43 - 44 42 #ifdef CONFIG_64BIT 45 43 typedef u64 jump_label_t; 46 44 #else ··· 51 53 jump_label_t key; 52 54 }; 53 55 56 56 + #endif /* __ASSEMBLY__ */ 54 57 #endif /* _ASM_MIPS_JUMP_LABEL_H */

+1 -1

arch/powerpc/platforms/powernv/opal-wrappers.S

reviewed

··· 9 9 * 2 of the License, or (at your option) any later version. 10 10 */ 11 11 12 12 + #include <linux/jump_label.h> 12 13 #include <asm/ppc_asm.h> 13 14 #include <asm/hvcall.h> 14 15 #include <asm/asm-offsets.h> 15 16 #include <asm/opal.h> 16 16 - #include <asm/jump_label.h> 17 17 18 18 .section ".text" 19 19

+1 -1

arch/powerpc/platforms/pseries/hvCall.S

reviewed

··· 7 7 * as published by the Free Software Foundation; either version 8 8 * 2 of the License, or (at your option) any later version. 9 9 */ 10 10 + #include <linux/jump_label.h> 10 11 #include <asm/hvcall.h> 11 12 #include <asm/processor.h> 12 13 #include <asm/ppc_asm.h> 13 14 #include <asm/asm-offsets.h> 14 15 #include <asm/ptrace.h> 15 15 - #include <asm/jump_label.h> 16 16 17 17 .section ".text" 18 18

+1 -1

arch/powerpc/platforms/pseries/lpar.c

reviewed

··· 26 26 #include <linux/dma-mapping.h> 27 27 #include <linux/console.h> 28 28 #include <linux/export.h> 29 29 - #include <linux/static_key.h> 29 29 + #include <linux/jump_label.h> 30 30 #include <asm/processor.h> 31 31 #include <asm/mmu.h> 32 32 #include <asm/page.h>

arch/s390/include/asm/jump_label.h

reviewed

··· 1 1 #ifndef _ASM_S390_JUMP_LABEL_H 2 2 #define _ASM_S390_JUMP_LABEL_H 3 3 4 4 + #ifndef __ASSEMBLY__ 5 5 + 4 6 #include <linux/types.h> 5 7 6 8 #define JUMP_LABEL_NOP_SIZE 6 ··· 41 39 jump_label_t key; 42 40 }; 43 41 42 42 + #endif /* __ASSEMBLY__ */ 44 43 #endif

+2 -3

arch/sparc/include/asm/jump_label.h

reviewed

··· 1 1 #ifndef _ASM_SPARC_JUMP_LABEL_H 2 2 #define _ASM_SPARC_JUMP_LABEL_H 3 3 4 4 - #ifdef __KERNEL__ 4 4 + #ifndef __ASSEMBLY__ 5 5 6 6 #include <linux/types.h> 7 7 ··· 22 22 return true; 23 23 } 24 24 25 25 - #endif /* __KERNEL__ */ 26 26 - 27 25 typedef u32 jump_label_t; 28 26 29 27 struct jump_entry { ··· 30 32 jump_label_t key; 31 33 }; 32 34 35 35 + #endif /* __ASSEMBLY__ */ 33 36 #endif

+2 -3

arch/x86/include/asm/jump_label.h

reviewed

··· 1 1 #ifndef _ASM_X86_JUMP_LABEL_H 2 2 #define _ASM_X86_JUMP_LABEL_H 3 3 4 4 - #ifdef __KERNEL__ 4 4 + #ifndef __ASSEMBLY__ 5 5 6 6 #include <linux/stringify.h> 7 7 #include <linux/types.h> ··· 30 30 return true; 31 31 } 32 32 33 33 - #endif /* __KERNEL__ */ 34 34 - 35 33 #ifdef CONFIG_X86_64 36 34 typedef u64 jump_label_t; 37 35 #else ··· 42 44 jump_label_t key; 43 45 }; 44 46 47 47 + #endif /* __ASSEMBLY__ */ 45 48 #endif

-16

include/linux/compiler.h

reviewed

··· 192 192 193 193 #include <uapi/linux/types.h> 194 194 195 195 - static __always_inline void data_access_exceeds_word_size(void) 196 196 - #ifdef __compiletime_warning 197 197 - __compiletime_warning("data access exceeds word size and won't be atomic") 198 198 - #endif 199 199 - ; 200 200 - 201 201 - static __always_inline void data_access_exceeds_word_size(void) 202 202 - { 203 203 - } 204 204 - 205 195 static __always_inline void __read_once_size(const volatile void *p, void *res, int size) 206 196 { 207 197 switch (size) { 208 198 case 1: *(__u8 *)res = *(volatile __u8 *)p; break; 209 199 case 2: *(__u16 *)res = *(volatile __u16 *)p; break; 210 200 case 4: *(__u32 *)res = *(volatile __u32 *)p; break; 211 211 - #ifdef CONFIG_64BIT 212 201 case 8: *(__u64 *)res = *(volatile __u64 *)p; break; 213 213 - #endif 214 202 default: 215 203 barrier(); 216 204 __builtin_memcpy((void *)res, (const void *)p, size); 217 217 - data_access_exceeds_word_size(); 218 205 barrier(); 219 206 } 220 207 } ··· 212 225 case 1: *(volatile __u8 *)p = *(__u8 *)res; break; 213 226 case 2: *(volatile __u16 *)p = *(__u16 *)res; break; 214 227 case 4: *(volatile __u32 *)p = *(__u32 *)res; break; 215 215 - #ifdef CONFIG_64BIT 216 228 case 8: *(volatile __u64 *)p = *(__u64 *)res; break; 217 217 - #endif 218 229 default: 219 230 barrier(); 220 231 __builtin_memcpy((void *)p, (const void *)res, size); 221 221 - data_access_exceeds_word_size(); 222 232 barrier(); 223 233 } 224 234 }

+17 -4

include/linux/jump_label.h

reviewed

··· 45 45 * same as using STATIC_KEY_INIT_FALSE. 46 46 */ 47 47 48 48 + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 49 49 + # define HAVE_JUMP_LABEL 50 50 + #endif 51 51 + 52 52 + #ifndef __ASSEMBLY__ 53 53 + 48 54 #include <linux/types.h> 49 55 #include <linux/compiler.h> 50 56 #include <linux/bug.h> ··· 61 55 "%s used before call to jump_label_init", \ 62 56 __func__) 63 57 64 64 - #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 58 58 + #ifdef HAVE_JUMP_LABEL 65 59 66 60 struct static_key { 67 61 atomic_t enabled; ··· 72 66 #endif 73 67 }; 74 68 75 75 - # include <asm/jump_label.h> 76 76 - # define HAVE_JUMP_LABEL 77 69 #else 78 70 struct static_key { 79 71 atomic_t enabled; 80 72 }; 81 81 - #endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ 73 73 + #endif /* HAVE_JUMP_LABEL */ 74 74 + #endif /* __ASSEMBLY__ */ 75 75 + 76 76 + #ifdef HAVE_JUMP_LABEL 77 77 + #include <asm/jump_label.h> 78 78 + #endif 79 79 + 80 80 + #ifndef __ASSEMBLY__ 82 81 83 82 enum jump_label_type { 84 83 JUMP_LABEL_DISABLE = 0, ··· 214 203 } 215 204 216 205 #endif /* _LINUX_JUMP_LABEL_H */ 206 206 + 207 207 + #endif /* __ASSEMBLY__ */

+3 -3

include/linux/seqlock.h

reviewed

··· 108 108 unsigned ret; 109 109 110 110 repeat: 111 111 - ret = ACCESS_ONCE(s->sequence); 111 111 + ret = READ_ONCE(s->sequence); 112 112 if (unlikely(ret & 1)) { 113 113 cpu_relax(); 114 114 goto repeat; ··· 127 127 */ 128 128 static inline unsigned raw_read_seqcount(const seqcount_t *s) 129 129 { 130 130 - unsigned ret = ACCESS_ONCE(s->sequence); 130 130 + unsigned ret = READ_ONCE(s->sequence); 131 131 smp_rmb(); 132 132 return ret; 133 133 } ··· 179 179 */ 180 180 static inline unsigned raw_seqcount_begin(const seqcount_t *s) 181 181 { 182 182 - unsigned ret = ACCESS_ONCE(s->sequence); 182 182 + unsigned ret = READ_ONCE(s->sequence); 183 183 smp_rmb(); 184 184 return ret & ~1; 185 185 }

+1 -1

kernel/futex.c

reviewed

··· 900 900 if (!p) 901 901 return -ESRCH; 902 902 903 903 - if (!p->mm) { 903 903 + if (unlikely(p->flags & PF_KTHREAD)) { 904 904 put_task_struct(p); 905 905 return -EPERM; 906 906 }

+3 -3

kernel/locking/mcs_spinlock.h

reviewed

··· 78 78 */ 79 79 return; 80 80 } 81 81 - ACCESS_ONCE(prev->next) = node; 81 81 + WRITE_ONCE(prev->next, node); 82 82 83 83 /* Wait until the lock holder passes the lock down. */ 84 84 arch_mcs_spin_lock_contended(&node->locked); ··· 91 91 static inline 92 92 void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) 93 93 { 94 94 - struct mcs_spinlock *next = ACCESS_ONCE(node->next); 94 94 + struct mcs_spinlock *next = READ_ONCE(node->next); 95 95 96 96 if (likely(!next)) { 97 97 /* ··· 100 100 if (likely(cmpxchg(lock, node, NULL) == node)) 101 101 return; 102 102 /* Wait until the next pointer is set */ 103 103 - while (!(next = ACCESS_ONCE(node->next))) 103 103 + while (!(next = READ_ONCE(node->next))) 104 104 cpu_relax_lowlatency(); 105 105 } 106 106

+21 -30

kernel/locking/mutex.c

reviewed

··· 25 25 #include <linux/spinlock.h> 26 26 #include <linux/interrupt.h> 27 27 #include <linux/debug_locks.h> 28 28 - #include "mcs_spinlock.h" 28 28 + #include <linux/osq_lock.h> 29 29 30 30 /* 31 31 * In the DEBUG case we are using the "NULL fastpath" for mutexes, ··· 217 217 } 218 218 219 219 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER 220 220 - static inline bool owner_running(struct mutex *lock, struct task_struct *owner) 221 221 - { 222 222 - if (lock->owner != owner) 223 223 - return false; 224 224 - 225 225 - /* 226 226 - * Ensure we emit the owner->on_cpu, dereference _after_ checking 227 227 - * lock->owner still matches owner, if that fails, owner might 228 228 - * point to free()d memory, if it still matches, the rcu_read_lock() 229 229 - * ensures the memory stays valid. 230 230 - */ 231 231 - barrier(); 232 232 - 233 233 - return owner->on_cpu; 234 234 - } 235 235 - 236 220 /* 237 221 * Look out! "owner" is an entirely speculative pointer 238 222 * access and not reliable. 239 223 */ 240 224 static noinline 241 241 - int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) 225 225 + bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) 242 226 { 227 227 + bool ret = true; 228 228 + 243 229 rcu_read_lock(); 244 244 - while (owner_running(lock, owner)) { 245 245 - if (need_resched()) 230 230 + while (lock->owner == owner) { 231 231 + /* 232 232 + * Ensure we emit the owner->on_cpu, dereference _after_ 233 233 + * checking lock->owner still matches owner. If that fails, 234 234 + * owner might point to freed memory. If it still matches, 235 235 + * the rcu_read_lock() ensures the memory stays valid. 236 236 + */ 237 237 + barrier(); 238 238 + 239 239 + if (!owner->on_cpu || need_resched()) { 240 240 + ret = false; 246 241 break; 242 242 + } 247 243 248 244 cpu_relax_lowlatency(); 249 245 } 250 246 rcu_read_unlock(); 251 247 252 252 - /* 253 253 - * We break out the loop above on need_resched() and when the 254 254 - * owner changed, which is a sign for heavy contention. Return 255 255 - * success only when lock->owner is NULL. 256 256 - */ 257 257 - return lock->owner == NULL; 248 248 + return ret; 258 249 } 259 250 260 251 /* ··· 260 269 return 0; 261 270 262 271 rcu_read_lock(); 263 263 - owner = ACCESS_ONCE(lock->owner); 272 272 + owner = READ_ONCE(lock->owner); 264 273 if (owner) 265 274 retval = owner->on_cpu; 266 275 rcu_read_unlock(); ··· 334 343 * As such, when deadlock detection needs to be 335 344 * performed the optimistic spinning cannot be done. 336 345 */ 337 337 - if (ACCESS_ONCE(ww->ctx)) 346 346 + if (READ_ONCE(ww->ctx)) 338 347 break; 339 348 } 340 349 ··· 342 351 * If there's an owner, wait for it to either 343 352 * release the lock or go to sleep. 344 353 */ 345 345 - owner = ACCESS_ONCE(lock->owner); 354 354 + owner = READ_ONCE(lock->owner); 346 355 if (owner && !mutex_spin_on_owner(lock, owner)) 347 356 break; 348 357 ··· 481 490 __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) 482 491 { 483 492 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); 484 484 - struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); 493 493 + struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); 485 494 486 495 if (!hold_ctx) 487 496 return 0;

+7 -7

kernel/locking/osq_lock.c

reviewed

··· 98 98 99 99 prev = decode_cpu(old); 100 100 node->prev = prev; 101 101 - ACCESS_ONCE(prev->next) = node; 101 101 + WRITE_ONCE(prev->next, node); 102 102 103 103 /* 104 104 * Normally @prev is untouchable after the above store; because at that ··· 109 109 * cmpxchg in an attempt to undo our queueing. 110 110 */ 111 111 112 112 - while (!ACCESS_ONCE(node->locked)) { 112 112 + while (!READ_ONCE(node->locked)) { 113 113 /* 114 114 * If we need to reschedule bail... so we can block. 115 115 */ ··· 148 148 * Or we race against a concurrent unqueue()'s step-B, in which 149 149 * case its step-C will write us a new @node->prev pointer. 150 150 */ 151 151 - prev = ACCESS_ONCE(node->prev); 151 151 + prev = READ_ONCE(node->prev); 152 152 } 153 153 154 154 /* ··· 170 170 * it will wait in Step-A. 171 171 */ 172 172 173 173 - ACCESS_ONCE(next->prev) = prev; 174 174 - ACCESS_ONCE(prev->next) = next; 173 173 + WRITE_ONCE(next->prev, prev); 174 174 + WRITE_ONCE(prev->next, next); 175 175 176 176 return false; 177 177 } ··· 193 193 node = this_cpu_ptr(&osq_node); 194 194 next = xchg(&node->next, NULL); 195 195 if (next) { 196 196 - ACCESS_ONCE(next->locked) = 1; 196 196 + WRITE_ONCE(next->locked, 1); 197 197 return; 198 198 } 199 199 200 200 next = osq_wait_next(lock, node, NULL); 201 201 if (next) 202 202 - ACCESS_ONCE(next->locked) = 1; 202 202 + WRITE_ONCE(next->locked, 1); 203 203 }

+1 -1

kernel/locking/rtmutex.c

reviewed

··· 349 349 * 350 350 * @task: the task owning the mutex (owner) for which a chain walk is 351 351 * probably needed 352 352 - * @deadlock_detect: do we have to carry out deadlock detection? 352 352 + * @chwalk: do we have to carry out deadlock detection? 353 353 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 354 354 * things for a task that has just got its priority adjusted, and 355 355 * is waiting on a mutex)

kernel/locking/rwsem-spinlock.c

reviewed

··· 85 85 86 86 list_del(&waiter->list); 87 87 tsk = waiter->task; 88 88 + /* 89 89 + * Make sure we do not wakeup the next reader before 90 90 + * setting the nil condition to grant the next reader; 91 91 + * otherwise we could miss the wakeup on the other 92 92 + * side and end up sleeping again. See the pairing 93 93 + * in rwsem_down_read_failed(). 94 94 + */ 88 95 smp_mb(); 89 96 waiter->task = NULL; 90 97 wake_up_process(tsk);

+58 -40

kernel/locking/rwsem-xadd.c

reviewed

··· 14 14 #include <linux/init.h> 15 15 #include <linux/export.h> 16 16 #include <linux/sched/rt.h> 17 17 + #include <linux/osq_lock.h> 17 18 18 18 - #include "mcs_spinlock.h" 19 19 + #include "rwsem.h" 19 20 20 21 /* 21 22 * Guide to the rw_semaphore's count field for common values. ··· 187 186 waiter = list_entry(next, struct rwsem_waiter, list); 188 187 next = waiter->list.next; 189 188 tsk = waiter->task; 189 189 + /* 190 190 + * Make sure we do not wakeup the next reader before 191 191 + * setting the nil condition to grant the next reader; 192 192 + * otherwise we could miss the wakeup on the other 193 193 + * side and end up sleeping again. See the pairing 194 194 + * in rwsem_down_read_failed(). 195 195 + */ 190 196 smp_mb(); 191 197 waiter->task = NULL; 192 198 wake_up_process(tsk); ··· 266 258 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { 267 259 if (!list_is_singular(&sem->wait_list)) 268 260 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); 261 261 + rwsem_set_owner(sem); 269 262 return true; 270 263 } 271 264 ··· 279 270 */ 280 271 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) 281 272 { 282 282 - long old, count = ACCESS_ONCE(sem->count); 273 273 + long old, count = READ_ONCE(sem->count); 283 274 284 275 while (true) { 285 276 if (!(count == 0 || count == RWSEM_WAITING_BIAS)) 286 277 return false; 287 278 288 279 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); 289 289 - if (old == count) 280 280 + if (old == count) { 281 281 + rwsem_set_owner(sem); 290 282 return true; 283 283 + } 291 284 292 285 count = old; 293 286 } ··· 298 287 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) 299 288 { 300 289 struct task_struct *owner; 301 301 - bool on_cpu = false; 290 290 + bool ret = true; 302 291 303 292 if (need_resched()) 304 293 return false; 305 294 306 295 rcu_read_lock(); 307 307 - owner = ACCESS_ONCE(sem->owner); 308 308 - if (owner) 309 309 - on_cpu = owner->on_cpu; 296 296 + owner = READ_ONCE(sem->owner); 297 297 + if (!owner) { 298 298 + long count = READ_ONCE(sem->count); 299 299 + /* 300 300 + * If sem->owner is not set, yet we have just recently entered the 301 301 + * slowpath with the lock being active, then there is a possibility 302 302 + * reader(s) may have the lock. To be safe, bail spinning in these 303 303 + * situations. 304 304 + */ 305 305 + if (count & RWSEM_ACTIVE_MASK) 306 306 + ret = false; 307 307 + goto done; 308 308 + } 309 309 + 310 310 + ret = owner->on_cpu; 311 311 + done: 310 312 rcu_read_unlock(); 311 311 - 312 312 - /* 313 313 - * If sem->owner is not set, yet we have just recently entered the 314 314 - * slowpath, then there is a possibility reader(s) may have the lock. 315 315 - * To be safe, avoid spinning in these situations. 316 316 - */ 317 317 - return on_cpu; 318 318 - } 319 319 - 320 320 - static inline bool owner_running(struct rw_semaphore *sem, 321 321 - struct task_struct *owner) 322 322 - { 323 323 - if (sem->owner != owner) 324 324 - return false; 325 325 - 326 326 - /* 327 327 - * Ensure we emit the owner->on_cpu, dereference _after_ checking 328 328 - * sem->owner still matches owner, if that fails, owner might 329 329 - * point to free()d memory, if it still matches, the rcu_read_lock() 330 330 - * ensures the memory stays valid. 331 331 - */ 332 332 - barrier(); 333 333 - 334 334 - return owner->on_cpu; 313 313 + return ret; 335 314 } 336 315 337 316 static noinline 338 317 bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) 339 318 { 319 319 + long count; 320 320 + 340 321 rcu_read_lock(); 341 341 - while (owner_running(sem, owner)) { 342 342 - if (need_resched()) 343 343 - break; 322 322 + while (sem->owner == owner) { 323 323 + /* 324 324 + * Ensure we emit the owner->on_cpu, dereference _after_ 325 325 + * checking sem->owner still matches owner, if that fails, 326 326 + * owner might point to free()d memory, if it still matches, 327 327 + * the rcu_read_lock() ensures the memory stays valid. 328 328 + */ 329 329 + barrier(); 330 330 + 331 331 + /* abort spinning when need_resched or owner is not running */ 332 332 + if (!owner->on_cpu || need_resched()) { 333 333 + rcu_read_unlock(); 334 334 + return false; 335 335 + } 344 336 345 337 cpu_relax_lowlatency(); 346 338 } 347 339 rcu_read_unlock(); 348 340 341 341 + if (READ_ONCE(sem->owner)) 342 342 + return true; /* new owner, continue spinning */ 343 343 + 349 344 /* 350 350 - * We break out the loop above on need_resched() or when the 351 351 - * owner changed, which is a sign for heavy contention. Return 352 352 - * success only when sem->owner is NULL. 345 345 + * When the owner is not set, the lock could be free or 346 346 + * held by readers. Check the counter to verify the 347 347 + * state. 353 348 */ 354 354 - return sem->owner == NULL; 349 349 + count = READ_ONCE(sem->count); 350 350 + return (count == 0 || count == RWSEM_WAITING_BIAS); 355 351 } 356 352 357 353 static bool rwsem_optimistic_spin(struct rw_semaphore *sem) ··· 376 358 goto done; 377 359 378 360 while (true) { 379 379 - owner = ACCESS_ONCE(sem->owner); 361 361 + owner = READ_ONCE(sem->owner); 380 362 if (owner && !rwsem_spin_on_owner(sem, owner)) 381 363 break; 382 364 ··· 450 432 451 433 /* we're now waiting on the lock, but no longer actively locking */ 452 434 if (waiting) { 453 453 - count = ACCESS_ONCE(sem->count); 435 435 + count = READ_ONCE(sem->count); 454 436 455 437 /* 456 438 * If there were already threads queued before us and there are

+1 -21

kernel/locking/rwsem.c

reviewed

··· 9 9 #include <linux/sched.h> 10 10 #include <linux/export.h> 11 11 #include <linux/rwsem.h> 12 12 - 13 12 #include <linux/atomic.h> 14 13 15 15 - #ifdef CONFIG_RWSEM_SPIN_ON_OWNER 16 16 - static inline void rwsem_set_owner(struct rw_semaphore *sem) 17 17 - { 18 18 - sem->owner = current; 19 19 - } 20 20 - 21 21 - static inline void rwsem_clear_owner(struct rw_semaphore *sem) 22 22 - { 23 23 - sem->owner = NULL; 24 24 - } 25 25 - 26 26 - #else 27 27 - static inline void rwsem_set_owner(struct rw_semaphore *sem) 28 28 - { 29 29 - } 30 30 - 31 31 - static inline void rwsem_clear_owner(struct rw_semaphore *sem) 32 32 - { 33 33 - } 34 34 - #endif 14 14 + #include "rwsem.h" 35 15 36 16 /* 37 17 * lock for reading

+20

kernel/locking/rwsem.h

reviewed

··· 1 1 + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER 2 2 + static inline void rwsem_set_owner(struct rw_semaphore *sem) 3 3 + { 4 4 + sem->owner = current; 5 5 + } 6 6 + 7 7 + static inline void rwsem_clear_owner(struct rw_semaphore *sem) 8 8 + { 9 9 + sem->owner = NULL; 10 10 + } 11 11 + 12 12 + #else 13 13 + static inline void rwsem_set_owner(struct rw_semaphore *sem) 14 14 + { 15 15 + } 16 16 + 17 17 + static inline void rwsem_clear_owner(struct rw_semaphore *sem) 18 18 + { 19 19 + } 20 20 + #endif

+1 -1

lib/lockref.c

reviewed

··· 18 18 #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ 19 19 struct lockref old; \ 20 20 BUILD_BUG_ON(sizeof(old) != 8); \ 21 21 - old.lock_count = ACCESS_ONCE(lockref->lock_count); \ 21 21 + old.lock_count = READ_ONCE(lockref->lock_count); \ 22 22 while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ 23 23 struct lockref new = old, prev = old; \ 24 24 CODE \