include/linux/seqlock.h at v5.9-rc5 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / seqlock.h
at v5.9-rc5 35 kB view raw
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __LINUX_SEQLOCK_H
   3#define __LINUX_SEQLOCK_H
   4
   5/*
   6 * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
   7 * lockless readers (read-only retry loops), and no writer starvation.
   8 *
   9 * See Documentation/locking/seqlock.rst
  10 *
  11 * Copyrights:
  12 * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
  13 * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH
  14 */
  15
  16#include <linux/compiler.h>
  17#include <linux/kcsan-checks.h>
  18#include <linux/lockdep.h>
  19#include <linux/mutex.h>
  20#include <linux/preempt.h>
  21#include <linux/spinlock.h>
  22
  23#include <asm/processor.h>
  24
  25/*
  26 * The seqlock seqcount_t interface does not prescribe a precise sequence of
  27 * read begin/retry/end. For readers, typically there is a call to
  28 * read_seqcount_begin() and read_seqcount_retry(), however, there are more
  29 * esoteric cases which do not follow this pattern.
  30 *
  31 * As a consequence, we take the following best-effort approach for raw usage
  32 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
  33 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
  34 * atomics; if there is a matching read_seqcount_retry() call, no following
  35 * memory operations are considered atomic. Usage of the seqlock_t interface
  36 * is not affected.
  37 */
  38#define KCSAN_SEQLOCK_REGION_MAX 1000
  39
  40/*
  41 * Sequence counters (seqcount_t)
  42 *
  43 * This is the raw counting mechanism, without any writer protection.
  44 *
  45 * Write side critical sections must be serialized and non-preemptible.
  46 *
  47 * If readers can be invoked from hardirq or softirq contexts,
  48 * interrupts or bottom halves must also be respectively disabled before
  49 * entering the write section.
  50 *
  51 * This mechanism can't be used if the protected data contains pointers,
  52 * as the writer can invalidate a pointer that a reader is following.
  53 *
  54 * If the write serialization mechanism is one of the common kernel
  55 * locking primitives, use a sequence counter with associated lock
  56 * (seqcount_LOCKTYPE_t) instead.
  57 *
  58 * If it's desired to automatically handle the sequence counter writer
  59 * serialization and non-preemptibility requirements, use a sequential
  60 * lock (seqlock_t) instead.
  61 *
  62 * See Documentation/locking/seqlock.rst
  63 */
  64typedef struct seqcount {
  65	unsigned sequence;
  66#ifdef CONFIG_DEBUG_LOCK_ALLOC
  67	struct lockdep_map dep_map;
  68#endif
  69} seqcount_t;
  70
  71static inline void __seqcount_init(seqcount_t *s, const char *name,
  72					  struct lock_class_key *key)
  73{
  74	/*
  75	 * Make sure we are not reinitializing a held lock:
  76	 */
  77	lockdep_init_map(&s->dep_map, name, key, 0);
  78	s->sequence = 0;
  79}
  80
  81#ifdef CONFIG_DEBUG_LOCK_ALLOC
  82
  83# define SEQCOUNT_DEP_MAP_INIT(lockname)				\
  84		.dep_map = { .name = #lockname }
  85
  86/**
  87 * seqcount_init() - runtime initializer for seqcount_t
  88 * @s: Pointer to the seqcount_t instance
  89 */
  90# define seqcount_init(s)						\
  91	do {								\
  92		static struct lock_class_key __key;			\
  93		__seqcount_init((s), #s, &__key);			\
  94	} while (0)
  95
  96static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  97{
  98	seqcount_t *l = (seqcount_t *)s;
  99	unsigned long flags;
 100
 101	local_irq_save(flags);
 102	seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
 103	seqcount_release(&l->dep_map, _RET_IP_);
 104	local_irq_restore(flags);
 105}
 106
 107#else
 108# define SEQCOUNT_DEP_MAP_INIT(lockname)
 109# define seqcount_init(s) __seqcount_init(s, NULL, NULL)
 110# define seqcount_lockdep_reader_access(x)
 111#endif
 112
 113/**
 114 * SEQCNT_ZERO() - static initializer for seqcount_t
 115 * @name: Name of the seqcount_t instance
 116 */
 117#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
 118
 119/*
 120 * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
 121 *
 122 * A sequence counter which associates the lock used for writer
 123 * serialization at initialization time. This enables lockdep to validate
 124 * that the write side critical section is properly serialized.
 125 *
 126 * For associated locks which do not implicitly disable preemption,
 127 * preemption protection is enforced in the write side function.
 128 *
 129 * Lockdep is never used in any for the raw write variants.
 130 *
 131 * See Documentation/locking/seqlock.rst
 132 */
 133
 134#ifdef CONFIG_LOCKDEP
 135#define __SEQ_LOCK(expr)	expr
 136#else
 137#define __SEQ_LOCK(expr)
 138#endif
 139
 140/**
 141 * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated
 142 * @seqcount:	The real sequence counter
 143 * @lock:	Pointer to the associated spinlock
 144 *
 145 * A plain sequence counter with external writer synchronization by a
 146 * spinlock. The spinlock is associated to the sequence count in the
 147 * static initializer or init function. This enables lockdep to validate
 148 * that the write side critical section is properly serialized.
 149 */
 150
 151/**
 152 * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
 153 * @s:		Pointer to the seqcount_LOCKNAME_t instance
 154 * @lock:	Pointer to the associated LOCKTYPE
 155 */
 156
 157/*
 158 * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers
 159 * @locktype:		actual typename
 160 * @lockname:		name
 161 * @preemptible:	preemptibility of above locktype
 162 * @lockmember:		argument for lockdep_assert_held()
 163 */
 164#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember)	\
 165typedef struct seqcount_##lockname {					\
 166	seqcount_t		seqcount;				\
 167	__SEQ_LOCK(locktype	*lock);					\
 168} seqcount_##lockname##_t;						\
 169									\
 170static __always_inline void						\
 171seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock)	\
 172{									\
 173	seqcount_init(&s->seqcount);					\
 174	__SEQ_LOCK(s->lock = lock);					\
 175}									\
 176									\
 177static __always_inline seqcount_t *					\
 178__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)			\
 179{									\
 180	return &s->seqcount;						\
 181}									\
 182									\
 183static __always_inline bool						\
 184__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s)		\
 185{									\
 186	return preemptible;						\
 187}									\
 188									\
 189static __always_inline void						\
 190__seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
 191{									\
 192	__SEQ_LOCK(lockdep_assert_held(lockmember));			\
 193}
 194
 195/*
 196 * __seqprop() for seqcount_t
 197 */
 198
 199static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
 200{
 201	return s;
 202}
 203
 204static inline bool __seqcount_preemptible(seqcount_t *s)
 205{
 206	return false;
 207}
 208
 209static inline void __seqcount_assert(seqcount_t *s)
 210{
 211	lockdep_assert_preemption_disabled();
 212}
 213
 214SEQCOUNT_LOCKTYPE(raw_spinlock_t,	raw_spinlock,	false,	s->lock)
 215SEQCOUNT_LOCKTYPE(spinlock_t,		spinlock,	false,	s->lock)
 216SEQCOUNT_LOCKTYPE(rwlock_t,		rwlock,		false,	s->lock)
 217SEQCOUNT_LOCKTYPE(struct mutex,		mutex,		true,	s->lock)
 218SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
 219
 220/**
 221 * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
 222 * @name:	Name of the seqcount_LOCKNAME_t instance
 223 * @lock:	Pointer to the associated LOCKTYPE
 224 */
 225
 226#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) {			\
 227	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
 228	__SEQ_LOCK(.lock	= (assoc_lock))				\
 229}
 230
 231#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 232#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 233#define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 234#define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 235#define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 236
 237
 238#define __seqprop_case(s, lockname, prop)				\
 239	seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s))
 240
 241#define __seqprop(s, prop) _Generic(*(s),				\
 242	seqcount_t:		__seqcount_##prop((void *)(s)),		\
 243	__seqprop_case((s),	raw_spinlock,	prop),			\
 244	__seqprop_case((s),	spinlock,	prop),			\
 245	__seqprop_case((s),	rwlock,		prop),			\
 246	__seqprop_case((s),	mutex,		prop),			\
 247	__seqprop_case((s),	ww_mutex,	prop))
 248
 249#define __seqcount_ptr(s)		__seqprop(s, ptr)
 250#define __seqcount_lock_preemptible(s)	__seqprop(s, preemptible)
 251#define __seqcount_assert_lock_held(s)	__seqprop(s, assert)
 252
 253/**
 254 * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
 255 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 256 *
 257 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
 258 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
 259 * provided before actually loading any of the variables that are to be
 260 * protected in this critical section.
 261 *
 262 * Use carefully, only in critical code, and comment how the barrier is
 263 * provided.
 264 *
 265 * Return: count to be passed to read_seqcount_retry()
 266 */
 267#define __read_seqcount_begin(s)					\
 268	__read_seqcount_t_begin(__seqcount_ptr(s))
 269
 270static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
 271{
 272	unsigned ret;
 273
 274repeat:
 275	ret = READ_ONCE(s->sequence);
 276	if (unlikely(ret & 1)) {
 277		cpu_relax();
 278		goto repeat;
 279	}
 280	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
 281	return ret;
 282}
 283
 284/**
 285 * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
 286 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 287 *
 288 * Return: count to be passed to read_seqcount_retry()
 289 */
 290#define raw_read_seqcount_begin(s)					\
 291	raw_read_seqcount_t_begin(__seqcount_ptr(s))
 292
 293static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
 294{
 295	unsigned ret = __read_seqcount_t_begin(s);
 296	smp_rmb();
 297	return ret;
 298}
 299
 300/**
 301 * read_seqcount_begin() - begin a seqcount_t read critical section
 302 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 303 *
 304 * Return: count to be passed to read_seqcount_retry()
 305 */
 306#define read_seqcount_begin(s)						\
 307	read_seqcount_t_begin(__seqcount_ptr(s))
 308
 309static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
 310{
 311	seqcount_lockdep_reader_access(s);
 312	return raw_read_seqcount_t_begin(s);
 313}
 314
 315/**
 316 * raw_read_seqcount() - read the raw seqcount_t counter value
 317 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 318 *
 319 * raw_read_seqcount opens a read critical section of the given
 320 * seqcount_t, without any lockdep checking, and without checking or
 321 * masking the sequence counter LSB. Calling code is responsible for
 322 * handling that.
 323 *
 324 * Return: count to be passed to read_seqcount_retry()
 325 */
 326#define raw_read_seqcount(s)						\
 327	raw_read_seqcount_t(__seqcount_ptr(s))
 328
 329static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
 330{
 331	unsigned ret = READ_ONCE(s->sequence);
 332	smp_rmb();
 333	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
 334	return ret;
 335}
 336
 337/**
 338 * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
 339 *                        lockdep and w/o counter stabilization
 340 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 341 *
 342 * raw_seqcount_begin opens a read critical section of the given
 343 * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
 344 * for the count to stabilize. If a writer is active when it begins, it
 345 * will fail the read_seqcount_retry() at the end of the read critical
 346 * section instead of stabilizing at the beginning of it.
 347 *
 348 * Use this only in special kernel hot paths where the read section is
 349 * small and has a high probability of success through other external
 350 * means. It will save a single branching instruction.
 351 *
 352 * Return: count to be passed to read_seqcount_retry()
 353 */
 354#define raw_seqcount_begin(s)						\
 355	raw_seqcount_t_begin(__seqcount_ptr(s))
 356
 357static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
 358{
 359	/*
 360	 * If the counter is odd, let read_seqcount_retry() fail
 361	 * by decrementing the counter.
 362	 */
 363	return raw_read_seqcount_t(s) & ~1;
 364}
 365
 366/**
 367 * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
 368 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 369 * @start: count, from read_seqcount_begin()
 370 *
 371 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
 372 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
 373 * provided before actually loading any of the variables that are to be
 374 * protected in this critical section.
 375 *
 376 * Use carefully, only in critical code, and comment how the barrier is
 377 * provided.
 378 *
 379 * Return: true if a read section retry is required, else false
 380 */
 381#define __read_seqcount_retry(s, start)					\
 382	__read_seqcount_t_retry(__seqcount_ptr(s), start)
 383
 384static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 385{
 386	kcsan_atomic_next(0);
 387	return unlikely(READ_ONCE(s->sequence) != start);
 388}
 389
 390/**
 391 * read_seqcount_retry() - end a seqcount_t read critical section
 392 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 393 * @start: count, from read_seqcount_begin()
 394 *
 395 * read_seqcount_retry closes the read critical section of given
 396 * seqcount_t.  If the critical section was invalid, it must be ignored
 397 * (and typically retried).
 398 *
 399 * Return: true if a read section retry is required, else false
 400 */
 401#define read_seqcount_retry(s, start)					\
 402	read_seqcount_t_retry(__seqcount_ptr(s), start)
 403
 404static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 405{
 406	smp_rmb();
 407	return __read_seqcount_t_retry(s, start);
 408}
 409
 410/**
 411 * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
 412 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 413 */
 414#define raw_write_seqcount_begin(s)					\
 415do {									\
 416	if (__seqcount_lock_preemptible(s))				\
 417		preempt_disable();					\
 418									\
 419	raw_write_seqcount_t_begin(__seqcount_ptr(s));			\
 420} while (0)
 421
 422static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 423{
 424	kcsan_nestable_atomic_begin();
 425	s->sequence++;
 426	smp_wmb();
 427}
 428
 429/**
 430 * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
 431 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 432 */
 433#define raw_write_seqcount_end(s)					\
 434do {									\
 435	raw_write_seqcount_t_end(__seqcount_ptr(s));			\
 436									\
 437	if (__seqcount_lock_preemptible(s))				\
 438		preempt_enable();					\
 439} while (0)
 440
 441static inline void raw_write_seqcount_t_end(seqcount_t *s)
 442{
 443	smp_wmb();
 444	s->sequence++;
 445	kcsan_nestable_atomic_end();
 446}
 447
 448/**
 449 * write_seqcount_begin_nested() - start a seqcount_t write section with
 450 *                                 custom lockdep nesting level
 451 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 452 * @subclass: lockdep nesting level
 453 *
 454 * See Documentation/locking/lockdep-design.rst
 455 */
 456#define write_seqcount_begin_nested(s, subclass)			\
 457do {									\
 458	__seqcount_assert_lock_held(s);					\
 459									\
 460	if (__seqcount_lock_preemptible(s))				\
 461		preempt_disable();					\
 462									\
 463	write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass);	\
 464} while (0)
 465
 466static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 467{
 468	raw_write_seqcount_t_begin(s);
 469	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 470}
 471
 472/**
 473 * write_seqcount_begin() - start a seqcount_t write side critical section
 474 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 475 *
 476 * write_seqcount_begin opens a write side critical section of the given
 477 * seqcount_t.
 478 *
 479 * Context: seqcount_t write side critical sections must be serialized and
 480 * non-preemptible. If readers can be invoked from hardirq or softirq
 481 * context, interrupts or bottom halves must be respectively disabled.
 482 */
 483#define write_seqcount_begin(s)						\
 484do {									\
 485	__seqcount_assert_lock_held(s);					\
 486									\
 487	if (__seqcount_lock_preemptible(s))				\
 488		preempt_disable();					\
 489									\
 490	write_seqcount_t_begin(__seqcount_ptr(s));			\
 491} while (0)
 492
 493static inline void write_seqcount_t_begin(seqcount_t *s)
 494{
 495	write_seqcount_t_begin_nested(s, 0);
 496}
 497
 498/**
 499 * write_seqcount_end() - end a seqcount_t write side critical section
 500 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 501 *
 502 * The write section must've been opened with write_seqcount_begin().
 503 */
 504#define write_seqcount_end(s)						\
 505do {									\
 506	write_seqcount_t_end(__seqcount_ptr(s));			\
 507									\
 508	if (__seqcount_lock_preemptible(s))				\
 509		preempt_enable();					\
 510} while (0)
 511
 512static inline void write_seqcount_t_end(seqcount_t *s)
 513{
 514	seqcount_release(&s->dep_map, _RET_IP_);
 515	raw_write_seqcount_t_end(s);
 516}
 517
 518/**
 519 * raw_write_seqcount_barrier() - do a seqcount_t write barrier
 520 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 521 *
 522 * This can be used to provide an ordering guarantee instead of the usual
 523 * consistency guarantee. It is one wmb cheaper, because it can collapse
 524 * the two back-to-back wmb()s.
 525 *
 526 * Note that writes surrounding the barrier should be declared atomic (e.g.
 527 * via WRITE_ONCE): a) to ensure the writes become visible to other threads
 528 * atomically, avoiding compiler optimizations; b) to document which writes are
 529 * meant to propagate to the reader critical section. This is necessary because
 530 * neither writes before and after the barrier are enclosed in a seq-writer
 531 * critical section that would ensure readers are aware of ongoing writes::
 532 *
 533 *	seqcount_t seq;
 534 *	bool X = true, Y = false;
 535 *
 536 *	void read(void)
 537 *	{
 538 *		bool x, y;
 539 *
 540 *		do {
 541 *			int s = read_seqcount_begin(&seq);
 542 *
 543 *			x = X; y = Y;
 544 *
 545 *		} while (read_seqcount_retry(&seq, s));
 546 *
 547 *		BUG_ON(!x && !y);
 548 *      }
 549 *
 550 *      void write(void)
 551 *      {
 552 *		WRITE_ONCE(Y, true);
 553 *
 554 *		raw_write_seqcount_barrier(seq);
 555 *
 556 *		WRITE_ONCE(X, false);
 557 *      }
 558 */
 559#define raw_write_seqcount_barrier(s)					\
 560	raw_write_seqcount_t_barrier(__seqcount_ptr(s))
 561
 562static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
 563{
 564	kcsan_nestable_atomic_begin();
 565	s->sequence++;
 566	smp_wmb();
 567	s->sequence++;
 568	kcsan_nestable_atomic_end();
 569}
 570
 571/**
 572 * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
 573 *                               side operations
 574 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 575 *
 576 * After write_seqcount_invalidate, no seqcount_t read side operations
 577 * will complete successfully and see data older than this.
 578 */
 579#define write_seqcount_invalidate(s)					\
 580	write_seqcount_t_invalidate(__seqcount_ptr(s))
 581
 582static inline void write_seqcount_t_invalidate(seqcount_t *s)
 583{
 584	smp_wmb();
 585	kcsan_nestable_atomic_begin();
 586	s->sequence+=2;
 587	kcsan_nestable_atomic_end();
 588}
 589
 590/**
 591 * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
 592 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 593 *
 594 * Use seqcount_t latching to switch between two storage places protected
 595 * by a sequence counter. Doing so allows having interruptible, preemptible,
 596 * seqcount_t write side critical sections.
 597 *
 598 * Check raw_write_seqcount_latch() for more details and a full reader and
 599 * writer usage example.
 600 *
 601 * Return: sequence counter raw value. Use the lowest bit as an index for
 602 * picking which data copy to read. The full counter value must then be
 603 * checked with read_seqcount_retry().
 604 */
 605#define raw_read_seqcount_latch(s)					\
 606	raw_read_seqcount_t_latch(__seqcount_ptr(s))
 607
 608static inline int raw_read_seqcount_t_latch(seqcount_t *s)
 609{
 610	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
 611	int seq = READ_ONCE(s->sequence); /* ^^^ */
 612	return seq;
 613}
 614
 615/**
 616 * raw_write_seqcount_latch() - redirect readers to even/odd copy
 617 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
 618 *
 619 * The latch technique is a multiversion concurrency control method that allows
 620 * queries during non-atomic modifications. If you can guarantee queries never
 621 * interrupt the modification -- e.g. the concurrency is strictly between CPUs
 622 * -- you most likely do not need this.
 623 *
 624 * Where the traditional RCU/lockless data structures rely on atomic
 625 * modifications to ensure queries observe either the old or the new state the
 626 * latch allows the same for non-atomic updates. The trade-off is doubling the
 627 * cost of storage; we have to maintain two copies of the entire data
 628 * structure.
 629 *
 630 * Very simply put: we first modify one copy and then the other. This ensures
 631 * there is always one copy in a stable state, ready to give us an answer.
 632 *
 633 * The basic form is a data structure like::
 634 *
 635 *	struct latch_struct {
 636 *		seqcount_t		seq;
 637 *		struct data_struct	data[2];
 638 *	};
 639 *
 640 * Where a modification, which is assumed to be externally serialized, does the
 641 * following::
 642 *
 643 *	void latch_modify(struct latch_struct *latch, ...)
 644 *	{
 645 *		smp_wmb();	// Ensure that the last data[1] update is visible
 646 *		latch->seq++;
 647 *		smp_wmb();	// Ensure that the seqcount update is visible
 648 *
 649 *		modify(latch->data[0], ...);
 650 *
 651 *		smp_wmb();	// Ensure that the data[0] update is visible
 652 *		latch->seq++;
 653 *		smp_wmb();	// Ensure that the seqcount update is visible
 654 *
 655 *		modify(latch->data[1], ...);
 656 *	}
 657 *
 658 * The query will have a form like::
 659 *
 660 *	struct entry *latch_query(struct latch_struct *latch, ...)
 661 *	{
 662 *		struct entry *entry;
 663 *		unsigned seq, idx;
 664 *
 665 *		do {
 666 *			seq = raw_read_seqcount_latch(&latch->seq);
 667 *
 668 *			idx = seq & 0x01;
 669 *			entry = data_query(latch->data[idx], ...);
 670 *
 671 *		// read_seqcount_retry() includes needed smp_rmb()
 672 *		} while (read_seqcount_retry(&latch->seq, seq));
 673 *
 674 *		return entry;
 675 *	}
 676 *
 677 * So during the modification, queries are first redirected to data[1]. Then we
 678 * modify data[0]. When that is complete, we redirect queries back to data[0]
 679 * and we can modify data[1].
 680 *
 681 * NOTE:
 682 *
 683 *	The non-requirement for atomic modifications does _NOT_ include
 684 *	the publishing of new entries in the case where data is a dynamic
 685 *	data structure.
 686 *
 687 *	An iteration might start in data[0] and get suspended long enough
 688 *	to miss an entire modification sequence, once it resumes it might
 689 *	observe the new entry.
 690 *
 691 * NOTE:
 692 *
 693 *	When data is a dynamic data structure; one should use regular RCU
 694 *	patterns to manage the lifetimes of the objects within.
 695 */
 696#define raw_write_seqcount_latch(s)					\
 697	raw_write_seqcount_t_latch(__seqcount_ptr(s))
 698
 699static inline void raw_write_seqcount_t_latch(seqcount_t *s)
 700{
 701       smp_wmb();      /* prior stores before incrementing "sequence" */
 702       s->sequence++;
 703       smp_wmb();      /* increment "sequence" before following stores */
 704}
 705
 706/*
 707 * Sequential locks (seqlock_t)
 708 *
 709 * Sequence counters with an embedded spinlock for writer serialization
 710 * and non-preemptibility.
 711 *
 712 * For more info, see:
 713 *    - Comments on top of seqcount_t
 714 *    - Documentation/locking/seqlock.rst
 715 */
 716typedef struct {
 717	struct seqcount seqcount;
 718	spinlock_t lock;
 719} seqlock_t;
 720
 721#define __SEQLOCK_UNLOCKED(lockname)					\
 722	{								\
 723		.seqcount = SEQCNT_ZERO(lockname),			\
 724		.lock =	__SPIN_LOCK_UNLOCKED(lockname)			\
 725	}
 726
 727/**
 728 * seqlock_init() - dynamic initializer for seqlock_t
 729 * @sl: Pointer to the seqlock_t instance
 730 */
 731#define seqlock_init(sl)						\
 732	do {								\
 733		seqcount_init(&(sl)->seqcount);				\
 734		spin_lock_init(&(sl)->lock);				\
 735	} while (0)
 736
 737/**
 738 * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t
 739 * @sl: Name of the seqlock_t instance
 740 */
 741#define DEFINE_SEQLOCK(sl) \
 742		seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
 743
 744/**
 745 * read_seqbegin() - start a seqlock_t read side critical section
 746 * @sl: Pointer to seqlock_t
 747 *
 748 * Return: count, to be passed to read_seqretry()
 749 */
 750static inline unsigned read_seqbegin(const seqlock_t *sl)
 751{
 752	unsigned ret = read_seqcount_begin(&sl->seqcount);
 753
 754	kcsan_atomic_next(0);  /* non-raw usage, assume closing read_seqretry() */
 755	kcsan_flat_atomic_begin();
 756	return ret;
 757}
 758
 759/**
 760 * read_seqretry() - end a seqlock_t read side section
 761 * @sl: Pointer to seqlock_t
 762 * @start: count, from read_seqbegin()
 763 *
 764 * read_seqretry closes the read side critical section of given seqlock_t.
 765 * If the critical section was invalid, it must be ignored (and typically
 766 * retried).
 767 *
 768 * Return: true if a read section retry is required, else false
 769 */
 770static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 771{
 772	/*
 773	 * Assume not nested: read_seqretry() may be called multiple times when
 774	 * completing read critical section.
 775	 */
 776	kcsan_flat_atomic_end();
 777
 778	return read_seqcount_retry(&sl->seqcount, start);
 779}
 780
 781/**
 782 * write_seqlock() - start a seqlock_t write side critical section
 783 * @sl: Pointer to seqlock_t
 784 *
 785 * write_seqlock opens a write side critical section for the given
 786 * seqlock_t.  It also implicitly acquires the spinlock_t embedded inside
 787 * that sequential lock. All seqlock_t write side sections are thus
 788 * automatically serialized and non-preemptible.
 789 *
 790 * Context: if the seqlock_t read section, or other write side critical
 791 * sections, can be invoked from hardirq or softirq contexts, use the
 792 * _irqsave or _bh variants of this function instead.
 793 */
 794static inline void write_seqlock(seqlock_t *sl)
 795{
 796	spin_lock(&sl->lock);
 797	write_seqcount_t_begin(&sl->seqcount);
 798}
 799
 800/**
 801 * write_sequnlock() - end a seqlock_t write side critical section
 802 * @sl: Pointer to seqlock_t
 803 *
 804 * write_sequnlock closes the (serialized and non-preemptible) write side
 805 * critical section of given seqlock_t.
 806 */
 807static inline void write_sequnlock(seqlock_t *sl)
 808{
 809	write_seqcount_t_end(&sl->seqcount);
 810	spin_unlock(&sl->lock);
 811}
 812
 813/**
 814 * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
 815 * @sl: Pointer to seqlock_t
 816 *
 817 * _bh variant of write_seqlock(). Use only if the read side section, or
 818 * other write side sections, can be invoked from softirq contexts.
 819 */
 820static inline void write_seqlock_bh(seqlock_t *sl)
 821{
 822	spin_lock_bh(&sl->lock);
 823	write_seqcount_t_begin(&sl->seqcount);
 824}
 825
 826/**
 827 * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
 828 * @sl: Pointer to seqlock_t
 829 *
 830 * write_sequnlock_bh closes the serialized, non-preemptible, and
 831 * softirqs-disabled, seqlock_t write side critical section opened with
 832 * write_seqlock_bh().
 833 */
 834static inline void write_sequnlock_bh(seqlock_t *sl)
 835{
 836	write_seqcount_t_end(&sl->seqcount);
 837	spin_unlock_bh(&sl->lock);
 838}
 839
 840/**
 841 * write_seqlock_irq() - start a non-interruptible seqlock_t write section
 842 * @sl: Pointer to seqlock_t
 843 *
 844 * _irq variant of write_seqlock(). Use only if the read side section, or
 845 * other write sections, can be invoked from hardirq contexts.
 846 */
 847static inline void write_seqlock_irq(seqlock_t *sl)
 848{
 849	spin_lock_irq(&sl->lock);
 850	write_seqcount_t_begin(&sl->seqcount);
 851}
 852
 853/**
 854 * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
 855 * @sl: Pointer to seqlock_t
 856 *
 857 * write_sequnlock_irq closes the serialized and non-interruptible
 858 * seqlock_t write side section opened with write_seqlock_irq().
 859 */
 860static inline void write_sequnlock_irq(seqlock_t *sl)
 861{
 862	write_seqcount_t_end(&sl->seqcount);
 863	spin_unlock_irq(&sl->lock);
 864}
 865
 866static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 867{
 868	unsigned long flags;
 869
 870	spin_lock_irqsave(&sl->lock, flags);
 871	write_seqcount_t_begin(&sl->seqcount);
 872	return flags;
 873}
 874
 875/**
 876 * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
 877 *                           section
 878 * @lock:  Pointer to seqlock_t
 879 * @flags: Stack-allocated storage for saving caller's local interrupt
 880 *         state, to be passed to write_sequnlock_irqrestore().
 881 *
 882 * _irqsave variant of write_seqlock(). Use it only if the read side
 883 * section, or other write sections, can be invoked from hardirq context.
 884 */
 885#define write_seqlock_irqsave(lock, flags)				\
 886	do { flags = __write_seqlock_irqsave(lock); } while (0)
 887
 888/**
 889 * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
 890 *                                section
 891 * @sl:    Pointer to seqlock_t
 892 * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
 893 *
 894 * write_sequnlock_irqrestore closes the serialized and non-interruptible
 895 * seqlock_t write section previously opened with write_seqlock_irqsave().
 896 */
 897static inline void
 898write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 899{
 900	write_seqcount_t_end(&sl->seqcount);
 901	spin_unlock_irqrestore(&sl->lock, flags);
 902}
 903
 904/**
 905 * read_seqlock_excl() - begin a seqlock_t locking reader section
 906 * @sl:	Pointer to seqlock_t
 907 *
 908 * read_seqlock_excl opens a seqlock_t locking reader critical section.  A
 909 * locking reader exclusively locks out *both* other writers *and* other
 910 * locking readers, but it does not update the embedded sequence number.
 911 *
 912 * Locking readers act like a normal spin_lock()/spin_unlock().
 913 *
 914 * Context: if the seqlock_t write section, *or other read sections*, can
 915 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
 916 * variant of this function instead.
 917 *
 918 * The opened read section must be closed with read_sequnlock_excl().
 919 */
 920static inline void read_seqlock_excl(seqlock_t *sl)
 921{
 922	spin_lock(&sl->lock);
 923}
 924
 925/**
 926 * read_sequnlock_excl() - end a seqlock_t locking reader critical section
 927 * @sl: Pointer to seqlock_t
 928 */
 929static inline void read_sequnlock_excl(seqlock_t *sl)
 930{
 931	spin_unlock(&sl->lock);
 932}
 933
 934/**
 935 * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
 936 *			    softirqs disabled
 937 * @sl: Pointer to seqlock_t
 938 *
 939 * _bh variant of read_seqlock_excl(). Use this variant only if the
 940 * seqlock_t write side section, *or other read sections*, can be invoked
 941 * from softirq contexts.
 942 */
 943static inline void read_seqlock_excl_bh(seqlock_t *sl)
 944{
 945	spin_lock_bh(&sl->lock);
 946}
 947
 948/**
 949 * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
 950 *			      reader section
 951 * @sl: Pointer to seqlock_t
 952 */
 953static inline void read_sequnlock_excl_bh(seqlock_t *sl)
 954{
 955	spin_unlock_bh(&sl->lock);
 956}
 957
 958/**
 959 * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
 960 *			     reader section
 961 * @sl: Pointer to seqlock_t
 962 *
 963 * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
 964 * write side section, *or other read sections*, can be invoked from a
 965 * hardirq context.
 966 */
 967static inline void read_seqlock_excl_irq(seqlock_t *sl)
 968{
 969	spin_lock_irq(&sl->lock);
 970}
 971
 972/**
 973 * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
 974 *                             locking reader section
 975 * @sl: Pointer to seqlock_t
 976 */
 977static inline void read_sequnlock_excl_irq(seqlock_t *sl)
 978{
 979	spin_unlock_irq(&sl->lock);
 980}
 981
 982static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
 983{
 984	unsigned long flags;
 985
 986	spin_lock_irqsave(&sl->lock, flags);
 987	return flags;
 988}
 989
 990/**
 991 * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
 992 *				 locking reader section
 993 * @lock:  Pointer to seqlock_t
 994 * @flags: Stack-allocated storage for saving caller's local interrupt
 995 *         state, to be passed to read_sequnlock_excl_irqrestore().
 996 *
 997 * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
 998 * write side section, *or other read sections*, can be invoked from a
 999 * hardirq context.
1000 */
1001#define read_seqlock_excl_irqsave(lock, flags)				\
1002	do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
1003
1004/**
1005 * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
1006 *				      locking reader section
1007 * @sl:    Pointer to seqlock_t
1008 * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
1009 */
1010static inline void
1011read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
1012{
1013	spin_unlock_irqrestore(&sl->lock, flags);
1014}
1015
1016/**
1017 * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
1018 * @lock: Pointer to seqlock_t
1019 * @seq : Marker and return parameter. If the passed value is even, the
1020 * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
1021 * If the passed value is odd, the reader will become a *locking* reader
1022 * as in read_seqlock_excl().  In the first call to this function, the
1023 * caller *must* initialize and pass an even value to @seq; this way, a
1024 * lockless read can be optimistically tried first.
1025 *
1026 * read_seqbegin_or_lock is an API designed to optimistically try a normal
1027 * lockless seqlock_t read section first.  If an odd counter is found, the
1028 * lockless read trial has failed, and the next read iteration transforms
1029 * itself into a full seqlock_t locking reader.
1030 *
1031 * This is typically used to avoid seqlock_t lockless readers starvation
1032 * (too much retry loops) in the case of a sharp spike in write side
1033 * activity.
1034 *
1035 * Context: if the seqlock_t write section, *or other read sections*, can
1036 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
1037 * variant of this function instead.
1038 *
1039 * Check Documentation/locking/seqlock.rst for template example code.
1040 *
1041 * Return: the encountered sequence counter value, through the @seq
1042 * parameter, which is overloaded as a return parameter. This returned
1043 * value must be checked with need_seqretry(). If the read section need to
1044 * be retried, this returned value must also be passed as the @seq
1045 * parameter of the next read_seqbegin_or_lock() iteration.
1046 */
1047static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
1048{
1049	if (!(*seq & 1))	/* Even */
1050		*seq = read_seqbegin(lock);
1051	else			/* Odd */
1052		read_seqlock_excl(lock);
1053}
1054
1055/**
1056 * need_seqretry() - validate seqlock_t "locking or lockless" read section
1057 * @lock: Pointer to seqlock_t
1058 * @seq: sequence count, from read_seqbegin_or_lock()
1059 *
1060 * Return: true if a read section retry is required, false otherwise
1061 */
1062static inline int need_seqretry(seqlock_t *lock, int seq)
1063{
1064	return !(seq & 1) && read_seqretry(lock, seq);
1065}
1066
1067/**
1068 * done_seqretry() - end seqlock_t "locking or lockless" reader section
1069 * @lock: Pointer to seqlock_t
1070 * @seq: count, from read_seqbegin_or_lock()
1071 *
1072 * done_seqretry finishes the seqlock_t read side critical section started
1073 * with read_seqbegin_or_lock() and validated by need_seqretry().
1074 */
1075static inline void done_seqretry(seqlock_t *lock, int seq)
1076{
1077	if (seq & 1)
1078		read_sequnlock_excl(lock);
1079}
1080
1081/**
1082 * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
1083 *                                   a non-interruptible locking reader
1084 * @lock: Pointer to seqlock_t
1085 * @seq:  Marker and return parameter. Check read_seqbegin_or_lock().
1086 *
1087 * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
1088 * the seqlock_t write section, *or other read sections*, can be invoked
1089 * from hardirq context.
1090 *
1091 * Note: Interrupts will be disabled only for "locking reader" mode.
1092 *
1093 * Return:
1094 *
1095 *   1. The saved local interrupts state in case of a locking reader, to
1096 *      be passed to done_seqretry_irqrestore().
1097 *
1098 *   2. The encountered sequence counter value, returned through @seq
1099 *      overloaded as a return parameter. Check read_seqbegin_or_lock().
1100 */
1101static inline unsigned long
1102read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
1103{
1104	unsigned long flags = 0;
1105
1106	if (!(*seq & 1))	/* Even */
1107		*seq = read_seqbegin(lock);
1108	else			/* Odd */
1109		read_seqlock_excl_irqsave(lock, flags);
1110
1111	return flags;
1112}
1113
1114/**
1115 * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
1116 *				non-interruptible locking reader section
1117 * @lock:  Pointer to seqlock_t
1118 * @seq:   Count, from read_seqbegin_or_lock_irqsave()
1119 * @flags: Caller's saved local interrupt state in case of a locking
1120 *	   reader, also from read_seqbegin_or_lock_irqsave()
1121 *
1122 * This is the _irqrestore variant of done_seqretry(). The read section
1123 * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
1124 * by need_seqretry().
1125 */
1126static inline void
1127done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
1128{
1129	if (seq & 1)
1130		read_sequnlock_excl_irqrestore(lock, flags);
1131}
1132#endif /* __LINUX_SEQLOCK_H */