Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'topic/qspinlock' into next

Merge Nick's powerpc qspinlock implementation. From his cover letter:

This replaces the generic queued spinlock code (like s390 does) with our
own implementation.

Generic PV qspinlock code is causing latency / starvation regressions on
large systems that are resulting in hard lockups reported (mostly in
pathoogical cases). The generic qspinlock code has a number of issues
important for powerpc hardware and hypervisors that aren't easily solved
without changing code that would impact other architectures. Follow
s390's lead and implement our own for now.

Issues for powerpc using generic qspinlocks:
- The previous lock value should not be loaded with simple loads, and
need not be passed around from previous loads or cmpxchg results,
because powerpc uses ll/sc-style atomics which can perform more
complex operations that do not require this. powerpc implementations
tend to prefer loads use larx for improved coherency performance.
- The queueing process should absolutely minimise the number of stores
to the lock word to reduce exclusive coherency probes, important for
large system scalability. The pending logic is counter productive
here.
- Non-atomic unlock for paravirt locks is important (atomic
instructions tend to still be more expensive than x86 CPUs).
- Yielding to the lock owner is important in the oversubscribed
paravirt case, which requires storing the owner CPU in the lock
word.
- More control of lock stealing for the paravirt case is important to
keep latency down on large systems.
- The lock acquisition operation should always be made with a special
variant of atomic instructions with the lock hint bit set,
including (especially) in the queueing paths. This is more a matter
of adding more arch lock helpers so not an insurmountable problem
for generic code.

+1234 -82
+1 -2
arch/powerpc/Kconfig
··· 99 99 config GENERIC_LOCKBREAK 100 100 bool 101 101 default y 102 - depends on SMP && PREEMPTION 102 + depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS 103 103 104 104 config GENERIC_HWEIGHT 105 105 bool ··· 158 158 select ARCH_USE_CMPXCHG_LOCKREF if PPC64 159 159 select ARCH_USE_MEMTEST 160 160 select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS 161 - select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS 162 161 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT 163 162 select ARCH_WANT_IPC_PARSE_VERSION 164 163 select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+160 -70
arch/powerpc/include/asm/qspinlock.h
··· 2 2 #ifndef _ASM_POWERPC_QSPINLOCK_H 3 3 #define _ASM_POWERPC_QSPINLOCK_H 4 4 5 - #include <asm-generic/qspinlock_types.h> 5 + #include <linux/compiler.h> 6 + #include <asm/qspinlock_types.h> 6 7 #include <asm/paravirt.h> 7 8 8 - #define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ 9 - 10 - #ifdef CONFIG_PARAVIRT_SPINLOCKS 11 - extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); 12 - extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); 13 - extern void __pv_queued_spin_unlock(struct qspinlock *lock); 14 - 15 - static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) 16 - { 17 - if (!is_shared_processor()) 18 - native_queued_spin_lock_slowpath(lock, val); 19 - else 20 - __pv_queued_spin_lock_slowpath(lock, val); 21 - } 22 - 23 - #define queued_spin_unlock queued_spin_unlock 24 - static inline void queued_spin_unlock(struct qspinlock *lock) 25 - { 26 - if (!is_shared_processor()) 27 - smp_store_release(&lock->locked, 0); 28 - else 29 - __pv_queued_spin_unlock(lock); 30 - } 31 - 9 + #ifdef CONFIG_PPC64 10 + /* 11 + * Use the EH=1 hint for accesses that result in the lock being acquired. 12 + * The hardware is supposed to optimise this pattern by holding the lock 13 + * cacheline longer, and releasing when a store to the same memory (the 14 + * unlock) is performed. 15 + */ 16 + #define _Q_SPIN_EH_HINT 1 32 17 #else 33 - extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); 34 - #endif 35 - 36 - static __always_inline void queued_spin_lock(struct qspinlock *lock) 37 - { 38 - u32 val = 0; 39 - 40 - if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) 41 - return; 42 - 43 - queued_spin_lock_slowpath(lock, val); 44 - } 45 - #define queued_spin_lock queued_spin_lock 46 - 47 - #ifdef CONFIG_PARAVIRT_SPINLOCKS 48 - #define SPIN_THRESHOLD (1<<15) /* not tuned */ 49 - 50 - static __always_inline void pv_wait(u8 *ptr, u8 val) 51 - { 52 - if (*ptr != val) 53 - return; 54 - yield_to_any(); 55 - /* 56 - * We could pass in a CPU here if waiting in the queue and yield to 57 - * the previous CPU in the queue. 58 - */ 59 - } 60 - 61 - static __always_inline void pv_kick(int cpu) 62 - { 63 - prod_cpu(cpu); 64 - } 65 - 66 - extern void __pv_init_lock_hash(void); 67 - 68 - static inline void pv_spinlocks_init(void) 69 - { 70 - __pv_init_lock_hash(); 71 - } 72 - 18 + #define _Q_SPIN_EH_HINT 0 73 19 #endif 74 20 75 21 /* 76 - * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait, 77 - * which was found to have performance problems if implemented with 78 - * the preferred spin_begin()/spin_end() SMT priority pattern. Use the 79 - * generic version instead. 22 + * The trylock itself may steal. This makes trylocks slightly stronger, and 23 + * makes locks slightly more efficient when stealing. 24 + * 25 + * This is compile-time, so if true then there may always be stealers, so the 26 + * nosteal paths become unused. 80 27 */ 28 + #define _Q_SPIN_TRY_LOCK_STEAL 1 81 29 82 - #include <asm-generic/qspinlock.h> 30 + /* 31 + * Put a speculation barrier after testing the lock/node and finding it 32 + * busy. Try to prevent pointless speculation in slow paths. 33 + * 34 + * Slows down the lockstorm microbenchmark with no stealing, where locking 35 + * is purely FIFO through the queue. May have more benefit in real workload 36 + * where speculating into the wrong place could have a greater cost. 37 + */ 38 + #define _Q_SPIN_SPEC_BARRIER 0 39 + 40 + #ifdef CONFIG_PPC64 41 + /* 42 + * Execute a miso instruction after passing the MCS lock ownership to the 43 + * queue head. Miso is intended to make stores visible to other CPUs sooner. 44 + * 45 + * This seems to make the lockstorm microbenchmark nospin test go slightly 46 + * faster on POWER10, but disable for now. 47 + */ 48 + #define _Q_SPIN_MISO 0 49 + #else 50 + #define _Q_SPIN_MISO 0 51 + #endif 52 + 53 + #ifdef CONFIG_PPC64 54 + /* 55 + * This executes miso after an unlock of the lock word, having ownership 56 + * pass to the next CPU sooner. This will slow the uncontended path to some 57 + * degree. Not evidence it helps yet. 58 + */ 59 + #define _Q_SPIN_MISO_UNLOCK 0 60 + #else 61 + #define _Q_SPIN_MISO_UNLOCK 0 62 + #endif 63 + 64 + /* 65 + * Seems to slow down lockstorm microbenchmark, suspect queue node just 66 + * has to become shared again right afterwards when its waiter spins on 67 + * the lock field. 68 + */ 69 + #define _Q_SPIN_PREFETCH_NEXT 0 70 + 71 + static __always_inline int queued_spin_is_locked(struct qspinlock *lock) 72 + { 73 + return READ_ONCE(lock->val); 74 + } 75 + 76 + static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) 77 + { 78 + return !lock.val; 79 + } 80 + 81 + static __always_inline int queued_spin_is_contended(struct qspinlock *lock) 82 + { 83 + return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); 84 + } 85 + 86 + static __always_inline u32 queued_spin_encode_locked_val(void) 87 + { 88 + /* XXX: make this use lock value in paca like simple spinlocks? */ 89 + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); 90 + } 91 + 92 + static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock) 93 + { 94 + u32 new = queued_spin_encode_locked_val(); 95 + u32 prev; 96 + 97 + /* Trylock succeeds only when unlocked and no queued nodes */ 98 + asm volatile( 99 + "1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n" 100 + " cmpwi 0,%0,0 \n" 101 + " bne- 2f \n" 102 + " stwcx. %2,0,%1 \n" 103 + " bne- 1b \n" 104 + "\t" PPC_ACQUIRE_BARRIER " \n" 105 + "2: \n" 106 + : "=&r" (prev) 107 + : "r" (&lock->val), "r" (new), 108 + "i" (_Q_SPIN_EH_HINT) 109 + : "cr0", "memory"); 110 + 111 + return likely(prev == 0); 112 + } 113 + 114 + static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) 115 + { 116 + u32 new = queued_spin_encode_locked_val(); 117 + u32 prev, tmp; 118 + 119 + /* Trylock may get ahead of queued nodes if it finds unlocked */ 120 + asm volatile( 121 + "1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n" 122 + " andc. %1,%0,%4 \n" 123 + " bne- 2f \n" 124 + " and %1,%0,%4 \n" 125 + " or %1,%1,%3 \n" 126 + " stwcx. %1,0,%2 \n" 127 + " bne- 1b \n" 128 + "\t" PPC_ACQUIRE_BARRIER " \n" 129 + "2: \n" 130 + : "=&r" (prev), "=&r" (tmp) 131 + : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK), 132 + "i" (_Q_SPIN_EH_HINT) 133 + : "cr0", "memory"); 134 + 135 + return likely(!(prev & ~_Q_TAIL_CPU_MASK)); 136 + } 137 + 138 + static __always_inline int queued_spin_trylock(struct qspinlock *lock) 139 + { 140 + if (!_Q_SPIN_TRY_LOCK_STEAL) 141 + return __queued_spin_trylock_nosteal(lock); 142 + else 143 + return __queued_spin_trylock_steal(lock); 144 + } 145 + 146 + void queued_spin_lock_slowpath(struct qspinlock *lock); 147 + 148 + static __always_inline void queued_spin_lock(struct qspinlock *lock) 149 + { 150 + if (!queued_spin_trylock(lock)) 151 + queued_spin_lock_slowpath(lock); 152 + } 153 + 154 + static inline void queued_spin_unlock(struct qspinlock *lock) 155 + { 156 + smp_store_release(&lock->locked, 0); 157 + if (_Q_SPIN_MISO_UNLOCK) 158 + asm volatile("miso" ::: "memory"); 159 + } 160 + 161 + #define arch_spin_is_locked(l) queued_spin_is_locked(l) 162 + #define arch_spin_is_contended(l) queued_spin_is_contended(l) 163 + #define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) 164 + #define arch_spin_lock(l) queued_spin_lock(l) 165 + #define arch_spin_trylock(l) queued_spin_trylock(l) 166 + #define arch_spin_unlock(l) queued_spin_unlock(l) 167 + 168 + #ifdef CONFIG_PARAVIRT_SPINLOCKS 169 + void pv_spinlocks_init(void); 170 + #else 171 + static inline void pv_spinlocks_init(void) { } 172 + #endif 83 173 84 174 #endif /* _ASM_POWERPC_QSPINLOCK_H */
-7
arch/powerpc/include/asm/qspinlock_paravirt.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 - #ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H 3 - #define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H 4 - 5 - EXPORT_SYMBOL(__pv_queued_spin_unlock); 6 - 7 - #endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */
+72
arch/powerpc/include/asm/qspinlock_types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + #ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H 3 + #define _ASM_POWERPC_QSPINLOCK_TYPES_H 4 + 5 + #include <linux/types.h> 6 + #include <asm/byteorder.h> 7 + 8 + typedef struct qspinlock { 9 + union { 10 + u32 val; 11 + 12 + #ifdef __LITTLE_ENDIAN 13 + struct { 14 + u16 locked; 15 + u8 reserved[2]; 16 + }; 17 + #else 18 + struct { 19 + u8 reserved[2]; 20 + u16 locked; 21 + }; 22 + #endif 23 + }; 24 + } arch_spinlock_t; 25 + 26 + #define __ARCH_SPIN_LOCK_UNLOCKED { { .val = 0 } } 27 + 28 + /* 29 + * Bitfields in the lock word: 30 + * 31 + * 0: locked bit 32 + * 1-14: lock holder cpu 33 + * 15: lock owner or queuer vcpus observed to be preempted bit 34 + * 16: must queue bit 35 + * 17-31: tail cpu (+1) 36 + */ 37 + #define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ 38 + << _Q_ ## type ## _OFFSET) 39 + /* 0x00000001 */ 40 + #define _Q_LOCKED_OFFSET 0 41 + #define _Q_LOCKED_BITS 1 42 + #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) 43 + 44 + /* 0x00007ffe */ 45 + #define _Q_OWNER_CPU_OFFSET 1 46 + #define _Q_OWNER_CPU_BITS 14 47 + #define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) 48 + 49 + #if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) 50 + #error "qspinlock does not support such large CONFIG_NR_CPUS" 51 + #endif 52 + 53 + /* 0x00008000 */ 54 + #define _Q_SLEEPY_OFFSET 15 55 + #define _Q_SLEEPY_BITS 1 56 + #define _Q_SLEEPY_VAL (1U << _Q_SLEEPY_OFFSET) 57 + 58 + /* 0x00010000 */ 59 + #define _Q_MUST_Q_OFFSET 16 60 + #define _Q_MUST_Q_BITS 1 61 + #define _Q_MUST_Q_VAL (1U << _Q_MUST_Q_OFFSET) 62 + 63 + /* 0xfffe0000 */ 64 + #define _Q_TAIL_CPU_OFFSET 17 65 + #define _Q_TAIL_CPU_BITS 15 66 + #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) 67 + 68 + #if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS) 69 + #error "qspinlock does not support such large CONFIG_NR_CPUS" 70 + #endif 71 + 72 + #endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
+1 -1
arch/powerpc/include/asm/spinlock.h
··· 13 13 /* See include/linux/spinlock.h */ 14 14 #define smp_mb__after_spinlock() smp_mb() 15 15 16 - #ifndef CONFIG_PARAVIRT_SPINLOCKS 16 + #ifndef CONFIG_PPC_QUEUED_SPINLOCKS 17 17 static inline void pv_spinlocks_init(void) { } 18 18 #endif 19 19
+1 -1
arch/powerpc/include/asm/spinlock_types.h
··· 7 7 #endif 8 8 9 9 #ifdef CONFIG_PPC_QUEUED_SPINLOCKS 10 - #include <asm-generic/qspinlock_types.h> 10 + #include <asm/qspinlock_types.h> 11 11 #include <asm-generic/qrwlock_types.h> 12 12 #else 13 13 #include <asm/simple_spinlock_types.h>
+3 -1
arch/powerpc/lib/Makefile
··· 52 52 obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ 53 53 memcpy_64.o copy_mc_64.o 54 54 55 - ifndef CONFIG_PPC_QUEUED_SPINLOCKS 55 + ifdef CONFIG_PPC_QUEUED_SPINLOCKS 56 + obj-$(CONFIG_SMP) += qspinlock.o 57 + else 56 58 obj64-$(CONFIG_SMP) += locks.o 57 59 endif 58 60
+996
arch/powerpc/lib/qspinlock.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + #include <linux/bug.h> 3 + #include <linux/compiler.h> 4 + #include <linux/export.h> 5 + #include <linux/percpu.h> 6 + #include <linux/smp.h> 7 + #include <linux/topology.h> 8 + #include <linux/sched/clock.h> 9 + #include <asm/qspinlock.h> 10 + #include <asm/paravirt.h> 11 + 12 + #define MAX_NODES 4 13 + 14 + struct qnode { 15 + struct qnode *next; 16 + struct qspinlock *lock; 17 + int cpu; 18 + int yield_cpu; 19 + u8 locked; /* 1 if lock acquired */ 20 + }; 21 + 22 + struct qnodes { 23 + int count; 24 + struct qnode nodes[MAX_NODES]; 25 + }; 26 + 27 + /* Tuning parameters */ 28 + static int steal_spins __read_mostly = (1 << 5); 29 + static int remote_steal_spins __read_mostly = (1 << 2); 30 + #if _Q_SPIN_TRY_LOCK_STEAL == 1 31 + static const bool maybe_stealers = true; 32 + #else 33 + static bool maybe_stealers __read_mostly = true; 34 + #endif 35 + static int head_spins __read_mostly = (1 << 8); 36 + 37 + static bool pv_yield_owner __read_mostly = true; 38 + static bool pv_yield_allow_steal __read_mostly = false; 39 + static bool pv_spin_on_preempted_owner __read_mostly = false; 40 + static bool pv_sleepy_lock __read_mostly = true; 41 + static bool pv_sleepy_lock_sticky __read_mostly = false; 42 + static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; 43 + static int pv_sleepy_lock_factor __read_mostly = 256; 44 + static bool pv_yield_prev __read_mostly = true; 45 + static bool pv_yield_propagate_owner __read_mostly = true; 46 + static bool pv_prod_head __read_mostly = false; 47 + 48 + static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); 49 + static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); 50 + 51 + #if _Q_SPIN_SPEC_BARRIER == 1 52 + #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) 53 + #else 54 + #define spec_barrier() do { } while (0) 55 + #endif 56 + 57 + static __always_inline bool recently_sleepy(void) 58 + { 59 + /* pv_sleepy_lock is true when this is called */ 60 + if (pv_sleepy_lock_interval_ns) { 61 + u64 seen = this_cpu_read(sleepy_lock_seen_clock); 62 + 63 + if (seen) { 64 + u64 delta = sched_clock() - seen; 65 + if (delta < pv_sleepy_lock_interval_ns) 66 + return true; 67 + this_cpu_write(sleepy_lock_seen_clock, 0); 68 + } 69 + } 70 + 71 + return false; 72 + } 73 + 74 + static __always_inline int get_steal_spins(bool paravirt, bool sleepy) 75 + { 76 + if (paravirt && sleepy) 77 + return steal_spins * pv_sleepy_lock_factor; 78 + else 79 + return steal_spins; 80 + } 81 + 82 + static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) 83 + { 84 + if (paravirt && sleepy) 85 + return remote_steal_spins * pv_sleepy_lock_factor; 86 + else 87 + return remote_steal_spins; 88 + } 89 + 90 + static __always_inline int get_head_spins(bool paravirt, bool sleepy) 91 + { 92 + if (paravirt && sleepy) 93 + return head_spins * pv_sleepy_lock_factor; 94 + else 95 + return head_spins; 96 + } 97 + 98 + static inline u32 encode_tail_cpu(int cpu) 99 + { 100 + return (cpu + 1) << _Q_TAIL_CPU_OFFSET; 101 + } 102 + 103 + static inline int decode_tail_cpu(u32 val) 104 + { 105 + return (val >> _Q_TAIL_CPU_OFFSET) - 1; 106 + } 107 + 108 + static inline int get_owner_cpu(u32 val) 109 + { 110 + return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; 111 + } 112 + 113 + /* 114 + * Try to acquire the lock if it was not already locked. If the tail matches 115 + * mytail then clear it, otherwise leave it unchnaged. Return previous value. 116 + * 117 + * This is used by the head of the queue to acquire the lock and clean up 118 + * its tail if it was the last one queued. 119 + */ 120 + static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) 121 + { 122 + u32 newval = queued_spin_encode_locked_val(); 123 + u32 prev, tmp; 124 + 125 + asm volatile( 126 + "1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" 127 + /* This test is necessary if there could be stealers */ 128 + " andi. %1,%0,%5 \n" 129 + " bne 3f \n" 130 + /* Test whether the lock tail == mytail */ 131 + " and %1,%0,%6 \n" 132 + " cmpw 0,%1,%3 \n" 133 + /* Merge the new locked value */ 134 + " or %1,%1,%4 \n" 135 + " bne 2f \n" 136 + /* If the lock tail matched, then clear it, otherwise leave it. */ 137 + " andc %1,%1,%6 \n" 138 + "2: stwcx. %1,0,%2 \n" 139 + " bne- 1b \n" 140 + "\t" PPC_ACQUIRE_BARRIER " \n" 141 + "3: \n" 142 + : "=&r" (prev), "=&r" (tmp) 143 + : "r" (&lock->val), "r"(tail), "r" (newval), 144 + "i" (_Q_LOCKED_VAL), 145 + "r" (_Q_TAIL_CPU_MASK), 146 + "i" (_Q_SPIN_EH_HINT) 147 + : "cr0", "memory"); 148 + 149 + return prev; 150 + } 151 + 152 + /* 153 + * Publish our tail, replacing previous tail. Return previous value. 154 + * 155 + * This provides a release barrier for publishing node, this pairs with the 156 + * acquire barrier in get_tail_qnode() when the next CPU finds this tail 157 + * value. 158 + */ 159 + static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) 160 + { 161 + u32 prev, tmp; 162 + 163 + asm volatile( 164 + "\t" PPC_RELEASE_BARRIER " \n" 165 + "1: lwarx %0,0,%2 # publish_tail_cpu \n" 166 + " andc %1,%0,%4 \n" 167 + " or %1,%1,%3 \n" 168 + " stwcx. %1,0,%2 \n" 169 + " bne- 1b \n" 170 + : "=&r" (prev), "=&r"(tmp) 171 + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) 172 + : "cr0", "memory"); 173 + 174 + return prev; 175 + } 176 + 177 + static __always_inline u32 set_mustq(struct qspinlock *lock) 178 + { 179 + u32 prev; 180 + 181 + asm volatile( 182 + "1: lwarx %0,0,%1 # set_mustq \n" 183 + " or %0,%0,%2 \n" 184 + " stwcx. %0,0,%1 \n" 185 + " bne- 1b \n" 186 + : "=&r" (prev) 187 + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) 188 + : "cr0", "memory"); 189 + 190 + return prev; 191 + } 192 + 193 + static __always_inline u32 clear_mustq(struct qspinlock *lock) 194 + { 195 + u32 prev; 196 + 197 + asm volatile( 198 + "1: lwarx %0,0,%1 # clear_mustq \n" 199 + " andc %0,%0,%2 \n" 200 + " stwcx. %0,0,%1 \n" 201 + " bne- 1b \n" 202 + : "=&r" (prev) 203 + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) 204 + : "cr0", "memory"); 205 + 206 + return prev; 207 + } 208 + 209 + static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) 210 + { 211 + u32 prev; 212 + u32 new = old | _Q_SLEEPY_VAL; 213 + 214 + BUG_ON(!(old & _Q_LOCKED_VAL)); 215 + BUG_ON(old & _Q_SLEEPY_VAL); 216 + 217 + asm volatile( 218 + "1: lwarx %0,0,%1 # try_set_sleepy \n" 219 + " cmpw 0,%0,%2 \n" 220 + " bne- 2f \n" 221 + " stwcx. %3,0,%1 \n" 222 + " bne- 1b \n" 223 + "2: \n" 224 + : "=&r" (prev) 225 + : "r" (&lock->val), "r"(old), "r" (new) 226 + : "cr0", "memory"); 227 + 228 + return likely(prev == old); 229 + } 230 + 231 + static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) 232 + { 233 + if (pv_sleepy_lock) { 234 + if (pv_sleepy_lock_interval_ns) 235 + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 236 + if (!(val & _Q_SLEEPY_VAL)) 237 + try_set_sleepy(lock, val); 238 + } 239 + } 240 + 241 + static __always_inline void seen_sleepy_lock(void) 242 + { 243 + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) 244 + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 245 + } 246 + 247 + static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) 248 + { 249 + if (pv_sleepy_lock) { 250 + if (pv_sleepy_lock_interval_ns) 251 + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 252 + if (val & _Q_LOCKED_VAL) { 253 + if (!(val & _Q_SLEEPY_VAL)) 254 + try_set_sleepy(lock, val); 255 + } 256 + } 257 + } 258 + 259 + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) 260 + { 261 + int cpu = decode_tail_cpu(val); 262 + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); 263 + int idx; 264 + 265 + /* 266 + * After publishing the new tail and finding a previous tail in the 267 + * previous val (which is the control dependency), this barrier 268 + * orders the release barrier in publish_tail_cpu performed by the 269 + * last CPU, with subsequently looking at its qnode structures 270 + * after the barrier. 271 + */ 272 + smp_acquire__after_ctrl_dep(); 273 + 274 + for (idx = 0; idx < MAX_NODES; idx++) { 275 + struct qnode *qnode = &qnodesp->nodes[idx]; 276 + if (qnode->lock == lock) 277 + return qnode; 278 + } 279 + 280 + BUG(); 281 + } 282 + 283 + /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 284 + static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) 285 + { 286 + int owner; 287 + u32 yield_count; 288 + bool preempted = false; 289 + 290 + BUG_ON(!(val & _Q_LOCKED_VAL)); 291 + 292 + if (!paravirt) 293 + goto relax; 294 + 295 + if (!pv_yield_owner) 296 + goto relax; 297 + 298 + owner = get_owner_cpu(val); 299 + yield_count = yield_count_of(owner); 300 + 301 + if ((yield_count & 1) == 0) 302 + goto relax; /* owner vcpu is running */ 303 + 304 + spin_end(); 305 + 306 + seen_sleepy_owner(lock, val); 307 + preempted = true; 308 + 309 + /* 310 + * Read the lock word after sampling the yield count. On the other side 311 + * there may a wmb because the yield count update is done by the 312 + * hypervisor preemption and the value update by the OS, however this 313 + * ordering might reduce the chance of out of order accesses and 314 + * improve the heuristic. 315 + */ 316 + smp_rmb(); 317 + 318 + if (READ_ONCE(lock->val) == val) { 319 + if (mustq) 320 + clear_mustq(lock); 321 + yield_to_preempted(owner, yield_count); 322 + if (mustq) 323 + set_mustq(lock); 324 + spin_begin(); 325 + 326 + /* Don't relax if we yielded. Maybe we should? */ 327 + return preempted; 328 + } 329 + spin_begin(); 330 + relax: 331 + spin_cpu_relax(); 332 + 333 + return preempted; 334 + } 335 + 336 + /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 337 + static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) 338 + { 339 + return __yield_to_locked_owner(lock, val, paravirt, false); 340 + } 341 + 342 + /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 343 + static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) 344 + { 345 + bool mustq = false; 346 + 347 + if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) 348 + mustq = true; 349 + 350 + return __yield_to_locked_owner(lock, val, paravirt, mustq); 351 + } 352 + 353 + static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) 354 + { 355 + struct qnode *next; 356 + int owner; 357 + 358 + if (!paravirt) 359 + return; 360 + if (!pv_yield_propagate_owner) 361 + return; 362 + 363 + owner = get_owner_cpu(val); 364 + if (*set_yield_cpu == owner) 365 + return; 366 + 367 + next = READ_ONCE(node->next); 368 + if (!next) 369 + return; 370 + 371 + if (vcpu_is_preempted(owner)) { 372 + next->yield_cpu = owner; 373 + *set_yield_cpu = owner; 374 + } else if (*set_yield_cpu != -1) { 375 + next->yield_cpu = owner; 376 + *set_yield_cpu = owner; 377 + } 378 + } 379 + 380 + /* Called inside spin_begin() */ 381 + static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) 382 + { 383 + int prev_cpu = decode_tail_cpu(val); 384 + u32 yield_count; 385 + int yield_cpu; 386 + bool preempted = false; 387 + 388 + if (!paravirt) 389 + goto relax; 390 + 391 + if (!pv_yield_propagate_owner) 392 + goto yield_prev; 393 + 394 + yield_cpu = READ_ONCE(node->yield_cpu); 395 + if (yield_cpu == -1) { 396 + /* Propagate back the -1 CPU */ 397 + if (node->next && node->next->yield_cpu != -1) 398 + node->next->yield_cpu = yield_cpu; 399 + goto yield_prev; 400 + } 401 + 402 + yield_count = yield_count_of(yield_cpu); 403 + if ((yield_count & 1) == 0) 404 + goto yield_prev; /* owner vcpu is running */ 405 + 406 + spin_end(); 407 + 408 + preempted = true; 409 + seen_sleepy_node(lock, val); 410 + 411 + smp_rmb(); 412 + 413 + if (yield_cpu == node->yield_cpu) { 414 + if (node->next && node->next->yield_cpu != yield_cpu) 415 + node->next->yield_cpu = yield_cpu; 416 + yield_to_preempted(yield_cpu, yield_count); 417 + spin_begin(); 418 + return preempted; 419 + } 420 + spin_begin(); 421 + 422 + yield_prev: 423 + if (!pv_yield_prev) 424 + goto relax; 425 + 426 + yield_count = yield_count_of(prev_cpu); 427 + if ((yield_count & 1) == 0) 428 + goto relax; /* owner vcpu is running */ 429 + 430 + spin_end(); 431 + 432 + preempted = true; 433 + seen_sleepy_node(lock, val); 434 + 435 + smp_rmb(); /* See __yield_to_locked_owner comment */ 436 + 437 + if (!node->locked) { 438 + yield_to_preempted(prev_cpu, yield_count); 439 + spin_begin(); 440 + return preempted; 441 + } 442 + spin_begin(); 443 + 444 + relax: 445 + spin_cpu_relax(); 446 + 447 + return preempted; 448 + } 449 + 450 + static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) 451 + { 452 + if (iters >= get_steal_spins(paravirt, sleepy)) 453 + return true; 454 + 455 + if (IS_ENABLED(CONFIG_NUMA) && 456 + (iters >= get_remote_steal_spins(paravirt, sleepy))) { 457 + int cpu = get_owner_cpu(val); 458 + if (numa_node_id() != cpu_to_node(cpu)) 459 + return true; 460 + } 461 + return false; 462 + } 463 + 464 + static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) 465 + { 466 + bool seen_preempted = false; 467 + bool sleepy = false; 468 + int iters = 0; 469 + u32 val; 470 + 471 + if (!steal_spins) { 472 + /* XXX: should spin_on_preempted_owner do anything here? */ 473 + return false; 474 + } 475 + 476 + /* Attempt to steal the lock */ 477 + spin_begin(); 478 + do { 479 + bool preempted = false; 480 + 481 + val = READ_ONCE(lock->val); 482 + if (val & _Q_MUST_Q_VAL) 483 + break; 484 + spec_barrier(); 485 + 486 + if (unlikely(!(val & _Q_LOCKED_VAL))) { 487 + spin_end(); 488 + if (__queued_spin_trylock_steal(lock)) 489 + return true; 490 + spin_begin(); 491 + } else { 492 + preempted = yield_to_locked_owner(lock, val, paravirt); 493 + } 494 + 495 + if (paravirt && pv_sleepy_lock) { 496 + if (!sleepy) { 497 + if (val & _Q_SLEEPY_VAL) { 498 + seen_sleepy_lock(); 499 + sleepy = true; 500 + } else if (recently_sleepy()) { 501 + sleepy = true; 502 + } 503 + } 504 + if (pv_sleepy_lock_sticky && seen_preempted && 505 + !(val & _Q_SLEEPY_VAL)) { 506 + if (try_set_sleepy(lock, val)) 507 + val |= _Q_SLEEPY_VAL; 508 + } 509 + } 510 + 511 + if (preempted) { 512 + seen_preempted = true; 513 + sleepy = true; 514 + if (!pv_spin_on_preempted_owner) 515 + iters++; 516 + /* 517 + * pv_spin_on_preempted_owner don't increase iters 518 + * while the owner is preempted -- we won't interfere 519 + * with it by definition. This could introduce some 520 + * latency issue if we continually observe preempted 521 + * owners, but hopefully that's a rare corner case of 522 + * a badly oversubscribed system. 523 + */ 524 + } else { 525 + iters++; 526 + } 527 + } while (!steal_break(val, iters, paravirt, sleepy)); 528 + 529 + spin_end(); 530 + 531 + return false; 532 + } 533 + 534 + static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) 535 + { 536 + struct qnodes *qnodesp; 537 + struct qnode *next, *node; 538 + u32 val, old, tail; 539 + bool seen_preempted = false; 540 + bool sleepy = false; 541 + bool mustq = false; 542 + int idx; 543 + int set_yield_cpu = -1; 544 + int iters = 0; 545 + 546 + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); 547 + 548 + qnodesp = this_cpu_ptr(&qnodes); 549 + if (unlikely(qnodesp->count >= MAX_NODES)) { 550 + spec_barrier(); 551 + while (!queued_spin_trylock(lock)) 552 + cpu_relax(); 553 + return; 554 + } 555 + 556 + idx = qnodesp->count++; 557 + /* 558 + * Ensure that we increment the head node->count before initialising 559 + * the actual node. If the compiler is kind enough to reorder these 560 + * stores, then an IRQ could overwrite our assignments. 561 + */ 562 + barrier(); 563 + node = &qnodesp->nodes[idx]; 564 + node->next = NULL; 565 + node->lock = lock; 566 + node->cpu = smp_processor_id(); 567 + node->yield_cpu = -1; 568 + node->locked = 0; 569 + 570 + tail = encode_tail_cpu(node->cpu); 571 + 572 + old = publish_tail_cpu(lock, tail); 573 + 574 + /* 575 + * If there was a previous node; link it and wait until reaching the 576 + * head of the waitqueue. 577 + */ 578 + if (old & _Q_TAIL_CPU_MASK) { 579 + struct qnode *prev = get_tail_qnode(lock, old); 580 + 581 + /* Link @node into the waitqueue. */ 582 + WRITE_ONCE(prev->next, node); 583 + 584 + /* Wait for mcs node lock to be released */ 585 + spin_begin(); 586 + while (!node->locked) { 587 + spec_barrier(); 588 + 589 + if (yield_to_prev(lock, node, old, paravirt)) 590 + seen_preempted = true; 591 + } 592 + spec_barrier(); 593 + spin_end(); 594 + 595 + /* Clear out stale propagated yield_cpu */ 596 + if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) 597 + node->yield_cpu = -1; 598 + 599 + smp_rmb(); /* acquire barrier for the mcs lock */ 600 + 601 + /* 602 + * Generic qspinlocks have this prefetch here, but it seems 603 + * like it could cause additional line transitions because 604 + * the waiter will keep loading from it. 605 + */ 606 + if (_Q_SPIN_PREFETCH_NEXT) { 607 + next = READ_ONCE(node->next); 608 + if (next) 609 + prefetchw(next); 610 + } 611 + } 612 + 613 + /* We're at the head of the waitqueue, wait for the lock. */ 614 + again: 615 + spin_begin(); 616 + for (;;) { 617 + bool preempted; 618 + 619 + val = READ_ONCE(lock->val); 620 + if (!(val & _Q_LOCKED_VAL)) 621 + break; 622 + spec_barrier(); 623 + 624 + if (paravirt && pv_sleepy_lock && maybe_stealers) { 625 + if (!sleepy) { 626 + if (val & _Q_SLEEPY_VAL) { 627 + seen_sleepy_lock(); 628 + sleepy = true; 629 + } else if (recently_sleepy()) { 630 + sleepy = true; 631 + } 632 + } 633 + if (pv_sleepy_lock_sticky && seen_preempted && 634 + !(val & _Q_SLEEPY_VAL)) { 635 + if (try_set_sleepy(lock, val)) 636 + val |= _Q_SLEEPY_VAL; 637 + } 638 + } 639 + 640 + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); 641 + preempted = yield_head_to_locked_owner(lock, val, paravirt); 642 + if (!maybe_stealers) 643 + continue; 644 + 645 + if (preempted) 646 + seen_preempted = true; 647 + 648 + if (paravirt && preempted) { 649 + sleepy = true; 650 + 651 + if (!pv_spin_on_preempted_owner) 652 + iters++; 653 + } else { 654 + iters++; 655 + } 656 + 657 + if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { 658 + mustq = true; 659 + set_mustq(lock); 660 + val |= _Q_MUST_Q_VAL; 661 + } 662 + } 663 + spec_barrier(); 664 + spin_end(); 665 + 666 + /* If we're the last queued, must clean up the tail. */ 667 + old = trylock_clean_tail(lock, tail); 668 + if (unlikely(old & _Q_LOCKED_VAL)) { 669 + BUG_ON(!maybe_stealers); 670 + goto again; /* Can only be true if maybe_stealers. */ 671 + } 672 + 673 + if ((old & _Q_TAIL_CPU_MASK) == tail) 674 + goto release; /* We were the tail, no next. */ 675 + 676 + /* There is a next, must wait for node->next != NULL (MCS protocol) */ 677 + next = READ_ONCE(node->next); 678 + if (!next) { 679 + spin_begin(); 680 + while (!(next = READ_ONCE(node->next))) 681 + cpu_relax(); 682 + spin_end(); 683 + } 684 + spec_barrier(); 685 + 686 + /* 687 + * Unlock the next mcs waiter node. Release barrier is not required 688 + * here because the acquirer is only accessing the lock word, and 689 + * the acquire barrier we took the lock with orders that update vs 690 + * this store to locked. The corresponding barrier is the smp_rmb() 691 + * acquire barrier for mcs lock, above. 692 + */ 693 + if (paravirt && pv_prod_head) { 694 + int next_cpu = next->cpu; 695 + WRITE_ONCE(next->locked, 1); 696 + if (_Q_SPIN_MISO) 697 + asm volatile("miso" ::: "memory"); 698 + if (vcpu_is_preempted(next_cpu)) 699 + prod_cpu(next_cpu); 700 + } else { 701 + WRITE_ONCE(next->locked, 1); 702 + if (_Q_SPIN_MISO) 703 + asm volatile("miso" ::: "memory"); 704 + } 705 + 706 + release: 707 + qnodesp->count--; /* release the node */ 708 + } 709 + 710 + void queued_spin_lock_slowpath(struct qspinlock *lock) 711 + { 712 + /* 713 + * This looks funny, but it induces the compiler to inline both 714 + * sides of the branch rather than share code as when the condition 715 + * is passed as the paravirt argument to the functions. 716 + */ 717 + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { 718 + if (try_to_steal_lock(lock, true)) { 719 + spec_barrier(); 720 + return; 721 + } 722 + queued_spin_lock_mcs_queue(lock, true); 723 + } else { 724 + if (try_to_steal_lock(lock, false)) { 725 + spec_barrier(); 726 + return; 727 + } 728 + queued_spin_lock_mcs_queue(lock, false); 729 + } 730 + } 731 + EXPORT_SYMBOL(queued_spin_lock_slowpath); 732 + 733 + #ifdef CONFIG_PARAVIRT_SPINLOCKS 734 + void pv_spinlocks_init(void) 735 + { 736 + } 737 + #endif 738 + 739 + #include <linux/debugfs.h> 740 + static int steal_spins_set(void *data, u64 val) 741 + { 742 + #if _Q_SPIN_TRY_LOCK_STEAL == 1 743 + /* MAYBE_STEAL remains true */ 744 + steal_spins = val; 745 + #else 746 + static DEFINE_MUTEX(lock); 747 + 748 + /* 749 + * The lock slow path has a !maybe_stealers case that can assume 750 + * the head of queue will not see concurrent waiters. That waiter 751 + * is unsafe in the presence of stealers, so must keep them away 752 + * from one another. 753 + */ 754 + 755 + mutex_lock(&lock); 756 + if (val && !steal_spins) { 757 + maybe_stealers = true; 758 + /* wait for queue head waiter to go away */ 759 + synchronize_rcu(); 760 + steal_spins = val; 761 + } else if (!val && steal_spins) { 762 + steal_spins = val; 763 + /* wait for all possible stealers to go away */ 764 + synchronize_rcu(); 765 + maybe_stealers = false; 766 + } else { 767 + steal_spins = val; 768 + } 769 + mutex_unlock(&lock); 770 + #endif 771 + 772 + return 0; 773 + } 774 + 775 + static int steal_spins_get(void *data, u64 *val) 776 + { 777 + *val = steal_spins; 778 + 779 + return 0; 780 + } 781 + 782 + DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); 783 + 784 + static int remote_steal_spins_set(void *data, u64 val) 785 + { 786 + remote_steal_spins = val; 787 + 788 + return 0; 789 + } 790 + 791 + static int remote_steal_spins_get(void *data, u64 *val) 792 + { 793 + *val = remote_steal_spins; 794 + 795 + return 0; 796 + } 797 + 798 + DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); 799 + 800 + static int head_spins_set(void *data, u64 val) 801 + { 802 + head_spins = val; 803 + 804 + return 0; 805 + } 806 + 807 + static int head_spins_get(void *data, u64 *val) 808 + { 809 + *val = head_spins; 810 + 811 + return 0; 812 + } 813 + 814 + DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); 815 + 816 + static int pv_yield_owner_set(void *data, u64 val) 817 + { 818 + pv_yield_owner = !!val; 819 + 820 + return 0; 821 + } 822 + 823 + static int pv_yield_owner_get(void *data, u64 *val) 824 + { 825 + *val = pv_yield_owner; 826 + 827 + return 0; 828 + } 829 + 830 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); 831 + 832 + static int pv_yield_allow_steal_set(void *data, u64 val) 833 + { 834 + pv_yield_allow_steal = !!val; 835 + 836 + return 0; 837 + } 838 + 839 + static int pv_yield_allow_steal_get(void *data, u64 *val) 840 + { 841 + *val = pv_yield_allow_steal; 842 + 843 + return 0; 844 + } 845 + 846 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); 847 + 848 + static int pv_spin_on_preempted_owner_set(void *data, u64 val) 849 + { 850 + pv_spin_on_preempted_owner = !!val; 851 + 852 + return 0; 853 + } 854 + 855 + static int pv_spin_on_preempted_owner_get(void *data, u64 *val) 856 + { 857 + *val = pv_spin_on_preempted_owner; 858 + 859 + return 0; 860 + } 861 + 862 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); 863 + 864 + static int pv_sleepy_lock_set(void *data, u64 val) 865 + { 866 + pv_sleepy_lock = !!val; 867 + 868 + return 0; 869 + } 870 + 871 + static int pv_sleepy_lock_get(void *data, u64 *val) 872 + { 873 + *val = pv_sleepy_lock; 874 + 875 + return 0; 876 + } 877 + 878 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); 879 + 880 + static int pv_sleepy_lock_sticky_set(void *data, u64 val) 881 + { 882 + pv_sleepy_lock_sticky = !!val; 883 + 884 + return 0; 885 + } 886 + 887 + static int pv_sleepy_lock_sticky_get(void *data, u64 *val) 888 + { 889 + *val = pv_sleepy_lock_sticky; 890 + 891 + return 0; 892 + } 893 + 894 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); 895 + 896 + static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) 897 + { 898 + pv_sleepy_lock_interval_ns = val; 899 + 900 + return 0; 901 + } 902 + 903 + static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) 904 + { 905 + *val = pv_sleepy_lock_interval_ns; 906 + 907 + return 0; 908 + } 909 + 910 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); 911 + 912 + static int pv_sleepy_lock_factor_set(void *data, u64 val) 913 + { 914 + pv_sleepy_lock_factor = val; 915 + 916 + return 0; 917 + } 918 + 919 + static int pv_sleepy_lock_factor_get(void *data, u64 *val) 920 + { 921 + *val = pv_sleepy_lock_factor; 922 + 923 + return 0; 924 + } 925 + 926 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); 927 + 928 + static int pv_yield_prev_set(void *data, u64 val) 929 + { 930 + pv_yield_prev = !!val; 931 + 932 + return 0; 933 + } 934 + 935 + static int pv_yield_prev_get(void *data, u64 *val) 936 + { 937 + *val = pv_yield_prev; 938 + 939 + return 0; 940 + } 941 + 942 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); 943 + 944 + static int pv_yield_propagate_owner_set(void *data, u64 val) 945 + { 946 + pv_yield_propagate_owner = !!val; 947 + 948 + return 0; 949 + } 950 + 951 + static int pv_yield_propagate_owner_get(void *data, u64 *val) 952 + { 953 + *val = pv_yield_propagate_owner; 954 + 955 + return 0; 956 + } 957 + 958 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); 959 + 960 + static int pv_prod_head_set(void *data, u64 val) 961 + { 962 + pv_prod_head = !!val; 963 + 964 + return 0; 965 + } 966 + 967 + static int pv_prod_head_get(void *data, u64 *val) 968 + { 969 + *val = pv_prod_head; 970 + 971 + return 0; 972 + } 973 + 974 + DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); 975 + 976 + static __init int spinlock_debugfs_init(void) 977 + { 978 + debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); 979 + debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); 980 + debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); 981 + if (is_shared_processor()) { 982 + debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); 983 + debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); 984 + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); 985 + debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); 986 + debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); 987 + debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); 988 + debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); 989 + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); 990 + debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); 991 + debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); 992 + } 993 + 994 + return 0; 995 + } 996 + device_initcall(spinlock_debugfs_init);