arch/powerpc/lib/qspinlock.c at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / arch / powerpc / lib / qspinlock.c
at master 24 kB view raw
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2#include <linux/bug.h>
  3#include <linux/compiler.h>
  4#include <linux/export.h>
  5#include <linux/percpu.h>
  6#include <linux/processor.h>
  7#include <linux/smp.h>
  8#include <linux/topology.h>
  9#include <linux/sched/clock.h>
 10#include <asm/qspinlock.h>
 11#include <asm/paravirt.h>
 12#include <trace/events/lock.h>
 13
 14#define MAX_NODES	4
 15
 16struct qnode {
 17	struct qnode	*next;
 18	struct qspinlock *lock;
 19	int		cpu;
 20	u8		sleepy; /* 1 if the previous vCPU was preempted or
 21				 * if the previous node was sleepy */
 22	u8		locked; /* 1 if lock acquired */
 23};
 24
 25struct qnodes {
 26	int		count;
 27	struct qnode nodes[MAX_NODES];
 28};
 29
 30/* Tuning parameters */
 31static int steal_spins __read_mostly = (1 << 5);
 32static int remote_steal_spins __read_mostly = (1 << 2);
 33#if _Q_SPIN_TRY_LOCK_STEAL == 1
 34static const bool maybe_stealers = true;
 35#else
 36static bool maybe_stealers __read_mostly = true;
 37#endif
 38static int head_spins __read_mostly = (1 << 8);
 39
 40static bool pv_yield_owner __read_mostly = true;
 41static bool pv_yield_allow_steal __read_mostly = false;
 42static bool pv_spin_on_preempted_owner __read_mostly = false;
 43static bool pv_sleepy_lock __read_mostly = true;
 44static bool pv_sleepy_lock_sticky __read_mostly = false;
 45static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
 46static int pv_sleepy_lock_factor __read_mostly = 256;
 47static bool pv_yield_prev __read_mostly = true;
 48static bool pv_yield_sleepy_owner __read_mostly = true;
 49static bool pv_prod_head __read_mostly = false;
 50
 51static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 52static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
 53
 54#if _Q_SPIN_SPEC_BARRIER == 1
 55#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
 56#else
 57#define spec_barrier() do { } while (0)
 58#endif
 59
 60static __always_inline bool recently_sleepy(void)
 61{
 62	/* pv_sleepy_lock is true when this is called */
 63	if (pv_sleepy_lock_interval_ns) {
 64		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
 65
 66		if (seen) {
 67			u64 delta = sched_clock() - seen;
 68			if (delta < pv_sleepy_lock_interval_ns)
 69				return true;
 70			this_cpu_write(sleepy_lock_seen_clock, 0);
 71		}
 72	}
 73
 74	return false;
 75}
 76
 77static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
 78{
 79	if (paravirt && sleepy)
 80		return steal_spins * pv_sleepy_lock_factor;
 81	else
 82		return steal_spins;
 83}
 84
 85static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
 86{
 87	if (paravirt && sleepy)
 88		return remote_steal_spins * pv_sleepy_lock_factor;
 89	else
 90		return remote_steal_spins;
 91}
 92
 93static __always_inline int get_head_spins(bool paravirt, bool sleepy)
 94{
 95	if (paravirt && sleepy)
 96		return head_spins * pv_sleepy_lock_factor;
 97	else
 98		return head_spins;
 99}
100
101static inline u32 encode_tail_cpu(int cpu)
102{
103	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
104}
105
106static inline int decode_tail_cpu(u32 val)
107{
108	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
109}
110
111static inline int get_owner_cpu(u32 val)
112{
113	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
114}
115
116/*
117 * Try to acquire the lock if it was not already locked. If the tail matches
118 * mytail then clear it, otherwise leave it unchnaged. Return previous value.
119 *
120 * This is used by the head of the queue to acquire the lock and clean up
121 * its tail if it was the last one queued.
122 */
123static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
124{
125	u32 newval = queued_spin_encode_locked_val();
126	u32 prev, tmp;
127
128	asm volatile(
129"1:	lwarx	%0,0,%2,%7	# trylock_clean_tail			\n"
130	/* This test is necessary if there could be stealers */
131"	andi.	%1,%0,%5						\n"
132"	bne	3f							\n"
133	/* Test whether the lock tail == mytail */
134"	and	%1,%0,%6						\n"
135"	cmpw	0,%1,%3							\n"
136	/* Merge the new locked value */
137"	or	%1,%1,%4						\n"
138"	bne	2f							\n"
139	/* If the lock tail matched, then clear it, otherwise leave it. */
140"	andc	%1,%1,%6						\n"
141"2:	stwcx.	%1,0,%2							\n"
142"	bne-	1b							\n"
143"\t"	PPC_ACQUIRE_BARRIER "						\n"
144"3:									\n"
145	: "=&r" (prev), "=&r" (tmp)
146	: "r" (&lock->val), "r"(tail), "r" (newval),
147	  "i" (_Q_LOCKED_VAL),
148	  "r" (_Q_TAIL_CPU_MASK),
149	  "i" (_Q_SPIN_EH_HINT)
150	: "cr0", "memory");
151
152	return prev;
153}
154
155/*
156 * Publish our tail, replacing previous tail. Return previous value.
157 *
158 * This provides a release barrier for publishing node, this pairs with the
159 * acquire barrier in get_tail_qnode() when the next CPU finds this tail
160 * value.
161 */
162static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
163{
164	u32 prev, tmp;
165
166	kcsan_release();
167
168	asm volatile(
169"\t"	PPC_RELEASE_BARRIER "						\n"
170"1:	lwarx	%0,0,%2		# publish_tail_cpu			\n"
171"	andc	%1,%0,%4						\n"
172"	or	%1,%1,%3						\n"
173"	stwcx.	%1,0,%2							\n"
174"	bne-	1b							\n"
175	: "=&r" (prev), "=&r"(tmp)
176	: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
177	: "cr0", "memory");
178
179	return prev;
180}
181
182static __always_inline u32 set_mustq(struct qspinlock *lock)
183{
184	u32 prev;
185
186	asm volatile(
187"1:	lwarx	%0,0,%1		# set_mustq				\n"
188"	or	%0,%0,%2						\n"
189"	stwcx.	%0,0,%1							\n"
190"	bne-	1b							\n"
191	: "=&r" (prev)
192	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
193	: "cr0", "memory");
194
195	return prev;
196}
197
198static __always_inline u32 clear_mustq(struct qspinlock *lock)
199{
200	u32 prev;
201
202	asm volatile(
203"1:	lwarx	%0,0,%1		# clear_mustq				\n"
204"	andc	%0,%0,%2						\n"
205"	stwcx.	%0,0,%1							\n"
206"	bne-	1b							\n"
207	: "=&r" (prev)
208	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
209	: "cr0", "memory");
210
211	return prev;
212}
213
214static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
215{
216	u32 prev;
217	u32 new = old | _Q_SLEEPY_VAL;
218
219	BUG_ON(!(old & _Q_LOCKED_VAL));
220	BUG_ON(old & _Q_SLEEPY_VAL);
221
222	asm volatile(
223"1:	lwarx	%0,0,%1		# try_set_sleepy			\n"
224"	cmpw	0,%0,%2							\n"
225"	bne-	2f							\n"
226"	stwcx.	%3,0,%1							\n"
227"	bne-	1b							\n"
228"2:									\n"
229	: "=&r" (prev)
230	: "r" (&lock->val), "r"(old), "r" (new)
231	: "cr0", "memory");
232
233	return likely(prev == old);
234}
235
236static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
237{
238	if (pv_sleepy_lock) {
239		if (pv_sleepy_lock_interval_ns)
240			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
241		if (!(val & _Q_SLEEPY_VAL))
242			try_set_sleepy(lock, val);
243	}
244}
245
246static __always_inline void seen_sleepy_lock(void)
247{
248	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
249		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
250}
251
252static __always_inline void seen_sleepy_node(void)
253{
254	if (pv_sleepy_lock) {
255		if (pv_sleepy_lock_interval_ns)
256			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
257		/* Don't set sleepy because we likely have a stale val */
258	}
259}
260
261static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
262{
263	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
264	int idx;
265
266	/*
267	 * After publishing the new tail and finding a previous tail in the
268	 * previous val (which is the control dependency), this barrier
269	 * orders the release barrier in publish_tail_cpu performed by the
270	 * last CPU, with subsequently looking at its qnode structures
271	 * after the barrier.
272	 */
273	smp_acquire__after_ctrl_dep();
274
275	for (idx = 0; idx < MAX_NODES; idx++) {
276		struct qnode *qnode = &qnodesp->nodes[idx];
277		if (qnode->lock == lock)
278			return qnode;
279	}
280
281	BUG();
282}
283
284/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
285static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
286{
287	int owner;
288	u32 yield_count;
289	bool preempted = false;
290
291	BUG_ON(!(val & _Q_LOCKED_VAL));
292
293	if (!paravirt)
294		goto relax;
295
296	if (!pv_yield_owner)
297		goto relax;
298
299	owner = get_owner_cpu(val);
300	yield_count = yield_count_of(owner);
301
302	if ((yield_count & 1) == 0)
303		goto relax; /* owner vcpu is running */
304
305	spin_end();
306
307	seen_sleepy_owner(lock, val);
308	preempted = true;
309
310	/*
311	 * Read the lock word after sampling the yield count. On the other side
312	 * there may a wmb because the yield count update is done by the
313	 * hypervisor preemption and the value update by the OS, however this
314	 * ordering might reduce the chance of out of order accesses and
315	 * improve the heuristic.
316	 */
317	smp_rmb();
318
319	if (READ_ONCE(lock->val) == val) {
320		if (mustq)
321			clear_mustq(lock);
322		yield_to_preempted(owner, yield_count);
323		if (mustq)
324			set_mustq(lock);
325		spin_begin();
326
327		/* Don't relax if we yielded. Maybe we should? */
328		return preempted;
329	}
330	spin_begin();
331relax:
332	spin_cpu_relax();
333
334	return preempted;
335}
336
337/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
338static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
339{
340	return __yield_to_locked_owner(lock, val, paravirt, false);
341}
342
343/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
344static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
345{
346	bool mustq = false;
347
348	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
349		mustq = true;
350
351	return __yield_to_locked_owner(lock, val, paravirt, mustq);
352}
353
354static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
355{
356	struct qnode *next;
357	int owner;
358
359	if (!paravirt)
360		return;
361	if (!pv_yield_sleepy_owner)
362		return;
363
364	next = READ_ONCE(node->next);
365	if (!next)
366		return;
367
368	if (next->sleepy)
369		return;
370
371	owner = get_owner_cpu(val);
372	if (vcpu_is_preempted(owner))
373		next->sleepy = 1;
374}
375
376/* Called inside spin_begin() */
377static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
378{
379	u32 yield_count;
380	bool preempted = false;
381
382	if (!paravirt)
383		goto relax;
384
385	if (!pv_yield_sleepy_owner)
386		goto yield_prev;
387
388	/*
389	 * If the previous waiter was preempted it might not be able to
390	 * propagate sleepy to us, so check the lock in that case too.
391	 */
392	if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
393		u32 val = READ_ONCE(lock->val);
394
395		if (val & _Q_LOCKED_VAL) {
396			if (node->next && !node->next->sleepy) {
397				/*
398				 * Propagate sleepy to next waiter. Only if
399				 * owner is preempted, which allows the queue
400				 * to become "non-sleepy" if vCPU preemption
401				 * ceases to occur, even if the lock remains
402				 * highly contended.
403				 */
404				if (vcpu_is_preempted(get_owner_cpu(val)))
405					node->next->sleepy = 1;
406			}
407
408			preempted = yield_to_locked_owner(lock, val, paravirt);
409			if (preempted)
410				return preempted;
411		}
412		node->sleepy = false;
413	}
414
415yield_prev:
416	if (!pv_yield_prev)
417		goto relax;
418
419	yield_count = yield_count_of(prev_cpu);
420	if ((yield_count & 1) == 0)
421		goto relax; /* owner vcpu is running */
422
423	spin_end();
424
425	preempted = true;
426	seen_sleepy_node();
427
428	smp_rmb(); /* See __yield_to_locked_owner comment */
429
430	if (!READ_ONCE(node->locked)) {
431		yield_to_preempted(prev_cpu, yield_count);
432		spin_begin();
433		return preempted;
434	}
435	spin_begin();
436
437relax:
438	spin_cpu_relax();
439
440	return preempted;
441}
442
443static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
444{
445	if (iters >= get_steal_spins(paravirt, sleepy))
446		return true;
447
448	if (IS_ENABLED(CONFIG_NUMA) &&
449	    (iters >= get_remote_steal_spins(paravirt, sleepy))) {
450		int cpu = get_owner_cpu(val);
451		if (numa_node_id() != cpu_to_node(cpu))
452			return true;
453	}
454	return false;
455}
456
457static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
458{
459	bool seen_preempted = false;
460	bool sleepy = false;
461	int iters = 0;
462	u32 val;
463
464	if (!steal_spins) {
465		/* XXX: should spin_on_preempted_owner do anything here? */
466		return false;
467	}
468
469	/* Attempt to steal the lock */
470	spin_begin();
471	do {
472		bool preempted = false;
473
474		val = READ_ONCE(lock->val);
475		if (val & _Q_MUST_Q_VAL)
476			break;
477		spec_barrier();
478
479		if (unlikely(!(val & _Q_LOCKED_VAL))) {
480			spin_end();
481			if (__queued_spin_trylock_steal(lock))
482				return true;
483			spin_begin();
484		} else {
485			preempted = yield_to_locked_owner(lock, val, paravirt);
486		}
487
488		if (paravirt && pv_sleepy_lock) {
489			if (!sleepy) {
490				if (val & _Q_SLEEPY_VAL) {
491					seen_sleepy_lock();
492					sleepy = true;
493				} else if (recently_sleepy()) {
494					sleepy = true;
495				}
496			}
497			if (pv_sleepy_lock_sticky && seen_preempted &&
498			    !(val & _Q_SLEEPY_VAL)) {
499				if (try_set_sleepy(lock, val))
500					val |= _Q_SLEEPY_VAL;
501			}
502		}
503
504		if (preempted) {
505			seen_preempted = true;
506			sleepy = true;
507			if (!pv_spin_on_preempted_owner)
508				iters++;
509			/*
510			 * pv_spin_on_preempted_owner don't increase iters
511			 * while the owner is preempted -- we won't interfere
512			 * with it by definition. This could introduce some
513			 * latency issue if we continually observe preempted
514			 * owners, but hopefully that's a rare corner case of
515			 * a badly oversubscribed system.
516			 */
517		} else {
518			iters++;
519		}
520	} while (!steal_break(val, iters, paravirt, sleepy));
521
522	spin_end();
523
524	return false;
525}
526
527static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
528{
529	struct qnodes *qnodesp;
530	struct qnode *next, *node;
531	u32 val, old, tail;
532	bool seen_preempted = false;
533	bool sleepy = false;
534	bool mustq = false;
535	int idx;
536	int iters = 0;
537
538	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
539
540	qnodesp = this_cpu_ptr(&qnodes);
541	if (unlikely(qnodesp->count >= MAX_NODES)) {
542		spec_barrier();
543		while (!queued_spin_trylock(lock))
544			cpu_relax();
545		return;
546	}
547
548	idx = qnodesp->count++;
549	/*
550	 * Ensure that we increment the head node->count before initialising
551	 * the actual node. If the compiler is kind enough to reorder these
552	 * stores, then an IRQ could overwrite our assignments.
553	 */
554	barrier();
555	node = &qnodesp->nodes[idx];
556	node->next = NULL;
557	node->lock = lock;
558	node->cpu = smp_processor_id();
559	node->sleepy = 0;
560	node->locked = 0;
561
562	tail = encode_tail_cpu(node->cpu);
563
564	/*
565	 * Assign all attributes of a node before it can be published.
566	 * Issues an lwsync, serving as a release barrier, as well as a
567	 * compiler barrier.
568	 */
569	old = publish_tail_cpu(lock, tail);
570
571	/*
572	 * If there was a previous node; link it and wait until reaching the
573	 * head of the waitqueue.
574	 */
575	if (old & _Q_TAIL_CPU_MASK) {
576		int prev_cpu = decode_tail_cpu(old);
577		struct qnode *prev = get_tail_qnode(lock, prev_cpu);
578
579		/* Link @node into the waitqueue. */
580		WRITE_ONCE(prev->next, node);
581
582		/* Wait for mcs node lock to be released */
583		spin_begin();
584		while (!READ_ONCE(node->locked)) {
585			spec_barrier();
586
587			if (yield_to_prev(lock, node, prev_cpu, paravirt))
588				seen_preempted = true;
589		}
590		spec_barrier();
591		spin_end();
592
593		smp_rmb(); /* acquire barrier for the mcs lock */
594
595		/*
596		 * Generic qspinlocks have this prefetch here, but it seems
597		 * like it could cause additional line transitions because
598		 * the waiter will keep loading from it.
599		 */
600		if (_Q_SPIN_PREFETCH_NEXT) {
601			next = READ_ONCE(node->next);
602			if (next)
603				prefetchw(next);
604		}
605	}
606
607	/* We're at the head of the waitqueue, wait for the lock. */
608again:
609	spin_begin();
610	for (;;) {
611		bool preempted;
612
613		val = READ_ONCE(lock->val);
614		if (!(val & _Q_LOCKED_VAL))
615			break;
616		spec_barrier();
617
618		if (paravirt && pv_sleepy_lock && maybe_stealers) {
619			if (!sleepy) {
620				if (val & _Q_SLEEPY_VAL) {
621					seen_sleepy_lock();
622					sleepy = true;
623				} else if (recently_sleepy()) {
624					sleepy = true;
625				}
626			}
627			if (pv_sleepy_lock_sticky && seen_preempted &&
628			    !(val & _Q_SLEEPY_VAL)) {
629				if (try_set_sleepy(lock, val))
630					val |= _Q_SLEEPY_VAL;
631			}
632		}
633
634		propagate_sleepy(node, val, paravirt);
635		preempted = yield_head_to_locked_owner(lock, val, paravirt);
636		if (!maybe_stealers)
637			continue;
638
639		if (preempted)
640			seen_preempted = true;
641
642		if (paravirt && preempted) {
643			sleepy = true;
644
645			if (!pv_spin_on_preempted_owner)
646				iters++;
647		} else {
648			iters++;
649		}
650
651		if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
652			mustq = true;
653			set_mustq(lock);
654			val |= _Q_MUST_Q_VAL;
655		}
656	}
657	spec_barrier();
658	spin_end();
659
660	/* If we're the last queued, must clean up the tail. */
661	old = trylock_clean_tail(lock, tail);
662	if (unlikely(old & _Q_LOCKED_VAL)) {
663		BUG_ON(!maybe_stealers);
664		goto again; /* Can only be true if maybe_stealers. */
665	}
666
667	if ((old & _Q_TAIL_CPU_MASK) == tail)
668		goto release; /* We were the tail, no next. */
669
670	/* There is a next, must wait for node->next != NULL (MCS protocol) */
671	next = READ_ONCE(node->next);
672	if (!next) {
673		spin_begin();
674		while (!(next = READ_ONCE(node->next)))
675			cpu_relax();
676		spin_end();
677	}
678	spec_barrier();
679
680	/*
681	 * Unlock the next mcs waiter node. Release barrier is not required
682	 * here because the acquirer is only accessing the lock word, and
683	 * the acquire barrier we took the lock with orders that update vs
684	 * this store to locked. The corresponding barrier is the smp_rmb()
685	 * acquire barrier for mcs lock, above.
686	 */
687	if (paravirt && pv_prod_head) {
688		int next_cpu = next->cpu;
689		WRITE_ONCE(next->locked, 1);
690		if (_Q_SPIN_MISO)
691			asm volatile("miso" ::: "memory");
692		if (vcpu_is_preempted(next_cpu))
693			prod_cpu(next_cpu);
694	} else {
695		WRITE_ONCE(next->locked, 1);
696		if (_Q_SPIN_MISO)
697			asm volatile("miso" ::: "memory");
698	}
699
700release:
701	/*
702	 * Clear the lock before releasing the node, as another CPU might see stale
703	 * values if an interrupt occurs after we increment qnodesp->count
704	 * but before node->lock is initialized. The barrier ensures that
705	 * there are no further stores to the node after it has been released.
706	 */
707	node->lock = NULL;
708	barrier();
709	qnodesp->count--;
710}
711
712void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
713{
714	trace_contention_begin(lock, LCB_F_SPIN);
715	/*
716	 * This looks funny, but it induces the compiler to inline both
717	 * sides of the branch rather than share code as when the condition
718	 * is passed as the paravirt argument to the functions.
719	 */
720	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
721		if (try_to_steal_lock(lock, true))
722			spec_barrier();
723		else
724			queued_spin_lock_mcs_queue(lock, true);
725	} else {
726		if (try_to_steal_lock(lock, false))
727			spec_barrier();
728		else
729			queued_spin_lock_mcs_queue(lock, false);
730	}
731	trace_contention_end(lock, 0);
732}
733EXPORT_SYMBOL(queued_spin_lock_slowpath);
734
735#ifdef CONFIG_PARAVIRT_SPINLOCKS
736void pv_spinlocks_init(void)
737{
738}
739#endif
740
741#include <linux/debugfs.h>
742static int steal_spins_set(void *data, u64 val)
743{
744#if _Q_SPIN_TRY_LOCK_STEAL == 1
745	/* MAYBE_STEAL remains true */
746	steal_spins = val;
747#else
748	static DEFINE_MUTEX(lock);
749
750	/*
751	 * The lock slow path has a !maybe_stealers case that can assume
752	 * the head of queue will not see concurrent waiters. That waiter
753	 * is unsafe in the presence of stealers, so must keep them away
754	 * from one another.
755	 */
756
757	mutex_lock(&lock);
758	if (val && !steal_spins) {
759		maybe_stealers = true;
760		/* wait for queue head waiter to go away */
761		synchronize_rcu();
762		steal_spins = val;
763	} else if (!val && steal_spins) {
764		steal_spins = val;
765		/* wait for all possible stealers to go away */
766		synchronize_rcu();
767		maybe_stealers = false;
768	} else {
769		steal_spins = val;
770	}
771	mutex_unlock(&lock);
772#endif
773
774	return 0;
775}
776
777static int steal_spins_get(void *data, u64 *val)
778{
779	*val = steal_spins;
780
781	return 0;
782}
783
784DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
785
786static int remote_steal_spins_set(void *data, u64 val)
787{
788	remote_steal_spins = val;
789
790	return 0;
791}
792
793static int remote_steal_spins_get(void *data, u64 *val)
794{
795	*val = remote_steal_spins;
796
797	return 0;
798}
799
800DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
801
802static int head_spins_set(void *data, u64 val)
803{
804	head_spins = val;
805
806	return 0;
807}
808
809static int head_spins_get(void *data, u64 *val)
810{
811	*val = head_spins;
812
813	return 0;
814}
815
816DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
817
818static int pv_yield_owner_set(void *data, u64 val)
819{
820	pv_yield_owner = !!val;
821
822	return 0;
823}
824
825static int pv_yield_owner_get(void *data, u64 *val)
826{
827	*val = pv_yield_owner;
828
829	return 0;
830}
831
832DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
833
834static int pv_yield_allow_steal_set(void *data, u64 val)
835{
836	pv_yield_allow_steal = !!val;
837
838	return 0;
839}
840
841static int pv_yield_allow_steal_get(void *data, u64 *val)
842{
843	*val = pv_yield_allow_steal;
844
845	return 0;
846}
847
848DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
849
850static int pv_spin_on_preempted_owner_set(void *data, u64 val)
851{
852	pv_spin_on_preempted_owner = !!val;
853
854	return 0;
855}
856
857static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
858{
859	*val = pv_spin_on_preempted_owner;
860
861	return 0;
862}
863
864DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
865
866static int pv_sleepy_lock_set(void *data, u64 val)
867{
868	pv_sleepy_lock = !!val;
869
870	return 0;
871}
872
873static int pv_sleepy_lock_get(void *data, u64 *val)
874{
875	*val = pv_sleepy_lock;
876
877	return 0;
878}
879
880DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
881
882static int pv_sleepy_lock_sticky_set(void *data, u64 val)
883{
884	pv_sleepy_lock_sticky = !!val;
885
886	return 0;
887}
888
889static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
890{
891	*val = pv_sleepy_lock_sticky;
892
893	return 0;
894}
895
896DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
897
898static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
899{
900	pv_sleepy_lock_interval_ns = val;
901
902	return 0;
903}
904
905static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
906{
907	*val = pv_sleepy_lock_interval_ns;
908
909	return 0;
910}
911
912DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
913
914static int pv_sleepy_lock_factor_set(void *data, u64 val)
915{
916	pv_sleepy_lock_factor = val;
917
918	return 0;
919}
920
921static int pv_sleepy_lock_factor_get(void *data, u64 *val)
922{
923	*val = pv_sleepy_lock_factor;
924
925	return 0;
926}
927
928DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
929
930static int pv_yield_prev_set(void *data, u64 val)
931{
932	pv_yield_prev = !!val;
933
934	return 0;
935}
936
937static int pv_yield_prev_get(void *data, u64 *val)
938{
939	*val = pv_yield_prev;
940
941	return 0;
942}
943
944DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
945
946static int pv_yield_sleepy_owner_set(void *data, u64 val)
947{
948	pv_yield_sleepy_owner = !!val;
949
950	return 0;
951}
952
953static int pv_yield_sleepy_owner_get(void *data, u64 *val)
954{
955	*val = pv_yield_sleepy_owner;
956
957	return 0;
958}
959
960DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
961
962static int pv_prod_head_set(void *data, u64 val)
963{
964	pv_prod_head = !!val;
965
966	return 0;
967}
968
969static int pv_prod_head_get(void *data, u64 *val)
970{
971	*val = pv_prod_head;
972
973	return 0;
974}
975
976DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
977
978static __init int spinlock_debugfs_init(void)
979{
980	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
981	debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
982	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
983	if (is_shared_processor()) {
984		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
985		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
986		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
987		debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
988		debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
989		debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
990		debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
991		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
992		debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
993		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
994	}
995
996	return 0;
997}
998device_initcall(spinlock_debugfs_init);