at v5.9-rc5 35 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef __LINUX_SEQLOCK_H 3#define __LINUX_SEQLOCK_H 4 5/* 6 * seqcount_t / seqlock_t - a reader-writer consistency mechanism with 7 * lockless readers (read-only retry loops), and no writer starvation. 8 * 9 * See Documentation/locking/seqlock.rst 10 * 11 * Copyrights: 12 * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli 13 * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH 14 */ 15 16#include <linux/compiler.h> 17#include <linux/kcsan-checks.h> 18#include <linux/lockdep.h> 19#include <linux/mutex.h> 20#include <linux/preempt.h> 21#include <linux/spinlock.h> 22 23#include <asm/processor.h> 24 25/* 26 * The seqlock seqcount_t interface does not prescribe a precise sequence of 27 * read begin/retry/end. For readers, typically there is a call to 28 * read_seqcount_begin() and read_seqcount_retry(), however, there are more 29 * esoteric cases which do not follow this pattern. 30 * 31 * As a consequence, we take the following best-effort approach for raw usage 32 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, 33 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as 34 * atomics; if there is a matching read_seqcount_retry() call, no following 35 * memory operations are considered atomic. Usage of the seqlock_t interface 36 * is not affected. 37 */ 38#define KCSAN_SEQLOCK_REGION_MAX 1000 39 40/* 41 * Sequence counters (seqcount_t) 42 * 43 * This is the raw counting mechanism, without any writer protection. 44 * 45 * Write side critical sections must be serialized and non-preemptible. 46 * 47 * If readers can be invoked from hardirq or softirq contexts, 48 * interrupts or bottom halves must also be respectively disabled before 49 * entering the write section. 50 * 51 * This mechanism can't be used if the protected data contains pointers, 52 * as the writer can invalidate a pointer that a reader is following. 53 * 54 * If the write serialization mechanism is one of the common kernel 55 * locking primitives, use a sequence counter with associated lock 56 * (seqcount_LOCKTYPE_t) instead. 57 * 58 * If it's desired to automatically handle the sequence counter writer 59 * serialization and non-preemptibility requirements, use a sequential 60 * lock (seqlock_t) instead. 61 * 62 * See Documentation/locking/seqlock.rst 63 */ 64typedef struct seqcount { 65 unsigned sequence; 66#ifdef CONFIG_DEBUG_LOCK_ALLOC 67 struct lockdep_map dep_map; 68#endif 69} seqcount_t; 70 71static inline void __seqcount_init(seqcount_t *s, const char *name, 72 struct lock_class_key *key) 73{ 74 /* 75 * Make sure we are not reinitializing a held lock: 76 */ 77 lockdep_init_map(&s->dep_map, name, key, 0); 78 s->sequence = 0; 79} 80 81#ifdef CONFIG_DEBUG_LOCK_ALLOC 82 83# define SEQCOUNT_DEP_MAP_INIT(lockname) \ 84 .dep_map = { .name = #lockname } 85 86/** 87 * seqcount_init() - runtime initializer for seqcount_t 88 * @s: Pointer to the seqcount_t instance 89 */ 90# define seqcount_init(s) \ 91 do { \ 92 static struct lock_class_key __key; \ 93 __seqcount_init((s), #s, &__key); \ 94 } while (0) 95 96static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 97{ 98 seqcount_t *l = (seqcount_t *)s; 99 unsigned long flags; 100 101 local_irq_save(flags); 102 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 103 seqcount_release(&l->dep_map, _RET_IP_); 104 local_irq_restore(flags); 105} 106 107#else 108# define SEQCOUNT_DEP_MAP_INIT(lockname) 109# define seqcount_init(s) __seqcount_init(s, NULL, NULL) 110# define seqcount_lockdep_reader_access(x) 111#endif 112 113/** 114 * SEQCNT_ZERO() - static initializer for seqcount_t 115 * @name: Name of the seqcount_t instance 116 */ 117#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } 118 119/* 120 * Sequence counters with associated locks (seqcount_LOCKTYPE_t) 121 * 122 * A sequence counter which associates the lock used for writer 123 * serialization at initialization time. This enables lockdep to validate 124 * that the write side critical section is properly serialized. 125 * 126 * For associated locks which do not implicitly disable preemption, 127 * preemption protection is enforced in the write side function. 128 * 129 * Lockdep is never used in any for the raw write variants. 130 * 131 * See Documentation/locking/seqlock.rst 132 */ 133 134#ifdef CONFIG_LOCKDEP 135#define __SEQ_LOCK(expr) expr 136#else 137#define __SEQ_LOCK(expr) 138#endif 139 140/** 141 * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated 142 * @seqcount: The real sequence counter 143 * @lock: Pointer to the associated spinlock 144 * 145 * A plain sequence counter with external writer synchronization by a 146 * spinlock. The spinlock is associated to the sequence count in the 147 * static initializer or init function. This enables lockdep to validate 148 * that the write side critical section is properly serialized. 149 */ 150 151/** 152 * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t 153 * @s: Pointer to the seqcount_LOCKNAME_t instance 154 * @lock: Pointer to the associated LOCKTYPE 155 */ 156 157/* 158 * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers 159 * @locktype: actual typename 160 * @lockname: name 161 * @preemptible: preemptibility of above locktype 162 * @lockmember: argument for lockdep_assert_held() 163 */ 164#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember) \ 165typedef struct seqcount_##lockname { \ 166 seqcount_t seqcount; \ 167 __SEQ_LOCK(locktype *lock); \ 168} seqcount_##lockname##_t; \ 169 \ 170static __always_inline void \ 171seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock) \ 172{ \ 173 seqcount_init(&s->seqcount); \ 174 __SEQ_LOCK(s->lock = lock); \ 175} \ 176 \ 177static __always_inline seqcount_t * \ 178__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \ 179{ \ 180 return &s->seqcount; \ 181} \ 182 \ 183static __always_inline bool \ 184__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s) \ 185{ \ 186 return preemptible; \ 187} \ 188 \ 189static __always_inline void \ 190__seqcount_##lockname##_assert(seqcount_##lockname##_t *s) \ 191{ \ 192 __SEQ_LOCK(lockdep_assert_held(lockmember)); \ 193} 194 195/* 196 * __seqprop() for seqcount_t 197 */ 198 199static inline seqcount_t *__seqcount_ptr(seqcount_t *s) 200{ 201 return s; 202} 203 204static inline bool __seqcount_preemptible(seqcount_t *s) 205{ 206 return false; 207} 208 209static inline void __seqcount_assert(seqcount_t *s) 210{ 211 lockdep_assert_preemption_disabled(); 212} 213 214SEQCOUNT_LOCKTYPE(raw_spinlock_t, raw_spinlock, false, s->lock) 215SEQCOUNT_LOCKTYPE(spinlock_t, spinlock, false, s->lock) 216SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) 217SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) 218SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) 219 220/** 221 * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t 222 * @name: Name of the seqcount_LOCKNAME_t instance 223 * @lock: Pointer to the associated LOCKTYPE 224 */ 225 226#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ 227 .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ 228 __SEQ_LOCK(.lock = (assoc_lock)) \ 229} 230 231#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) 232#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) 233#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) 234#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) 235#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) 236 237 238#define __seqprop_case(s, lockname, prop) \ 239 seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) 240 241#define __seqprop(s, prop) _Generic(*(s), \ 242 seqcount_t: __seqcount_##prop((void *)(s)), \ 243 __seqprop_case((s), raw_spinlock, prop), \ 244 __seqprop_case((s), spinlock, prop), \ 245 __seqprop_case((s), rwlock, prop), \ 246 __seqprop_case((s), mutex, prop), \ 247 __seqprop_case((s), ww_mutex, prop)) 248 249#define __seqcount_ptr(s) __seqprop(s, ptr) 250#define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) 251#define __seqcount_assert_lock_held(s) __seqprop(s, assert) 252 253/** 254 * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier 255 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 256 * 257 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() 258 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 259 * provided before actually loading any of the variables that are to be 260 * protected in this critical section. 261 * 262 * Use carefully, only in critical code, and comment how the barrier is 263 * provided. 264 * 265 * Return: count to be passed to read_seqcount_retry() 266 */ 267#define __read_seqcount_begin(s) \ 268 __read_seqcount_t_begin(__seqcount_ptr(s)) 269 270static inline unsigned __read_seqcount_t_begin(const seqcount_t *s) 271{ 272 unsigned ret; 273 274repeat: 275 ret = READ_ONCE(s->sequence); 276 if (unlikely(ret & 1)) { 277 cpu_relax(); 278 goto repeat; 279 } 280 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 281 return ret; 282} 283 284/** 285 * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep 286 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 287 * 288 * Return: count to be passed to read_seqcount_retry() 289 */ 290#define raw_read_seqcount_begin(s) \ 291 raw_read_seqcount_t_begin(__seqcount_ptr(s)) 292 293static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) 294{ 295 unsigned ret = __read_seqcount_t_begin(s); 296 smp_rmb(); 297 return ret; 298} 299 300/** 301 * read_seqcount_begin() - begin a seqcount_t read critical section 302 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 303 * 304 * Return: count to be passed to read_seqcount_retry() 305 */ 306#define read_seqcount_begin(s) \ 307 read_seqcount_t_begin(__seqcount_ptr(s)) 308 309static inline unsigned read_seqcount_t_begin(const seqcount_t *s) 310{ 311 seqcount_lockdep_reader_access(s); 312 return raw_read_seqcount_t_begin(s); 313} 314 315/** 316 * raw_read_seqcount() - read the raw seqcount_t counter value 317 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 318 * 319 * raw_read_seqcount opens a read critical section of the given 320 * seqcount_t, without any lockdep checking, and without checking or 321 * masking the sequence counter LSB. Calling code is responsible for 322 * handling that. 323 * 324 * Return: count to be passed to read_seqcount_retry() 325 */ 326#define raw_read_seqcount(s) \ 327 raw_read_seqcount_t(__seqcount_ptr(s)) 328 329static inline unsigned raw_read_seqcount_t(const seqcount_t *s) 330{ 331 unsigned ret = READ_ONCE(s->sequence); 332 smp_rmb(); 333 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 334 return ret; 335} 336 337/** 338 * raw_seqcount_begin() - begin a seqcount_t read critical section w/o 339 * lockdep and w/o counter stabilization 340 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 341 * 342 * raw_seqcount_begin opens a read critical section of the given 343 * seqcount_t. Unlike read_seqcount_begin(), this function will not wait 344 * for the count to stabilize. If a writer is active when it begins, it 345 * will fail the read_seqcount_retry() at the end of the read critical 346 * section instead of stabilizing at the beginning of it. 347 * 348 * Use this only in special kernel hot paths where the read section is 349 * small and has a high probability of success through other external 350 * means. It will save a single branching instruction. 351 * 352 * Return: count to be passed to read_seqcount_retry() 353 */ 354#define raw_seqcount_begin(s) \ 355 raw_seqcount_t_begin(__seqcount_ptr(s)) 356 357static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) 358{ 359 /* 360 * If the counter is odd, let read_seqcount_retry() fail 361 * by decrementing the counter. 362 */ 363 return raw_read_seqcount_t(s) & ~1; 364} 365 366/** 367 * __read_seqcount_retry() - end a seqcount_t read section w/o barrier 368 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 369 * @start: count, from read_seqcount_begin() 370 * 371 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 372 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 373 * provided before actually loading any of the variables that are to be 374 * protected in this critical section. 375 * 376 * Use carefully, only in critical code, and comment how the barrier is 377 * provided. 378 * 379 * Return: true if a read section retry is required, else false 380 */ 381#define __read_seqcount_retry(s, start) \ 382 __read_seqcount_t_retry(__seqcount_ptr(s), start) 383 384static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) 385{ 386 kcsan_atomic_next(0); 387 return unlikely(READ_ONCE(s->sequence) != start); 388} 389 390/** 391 * read_seqcount_retry() - end a seqcount_t read critical section 392 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 393 * @start: count, from read_seqcount_begin() 394 * 395 * read_seqcount_retry closes the read critical section of given 396 * seqcount_t. If the critical section was invalid, it must be ignored 397 * (and typically retried). 398 * 399 * Return: true if a read section retry is required, else false 400 */ 401#define read_seqcount_retry(s, start) \ 402 read_seqcount_t_retry(__seqcount_ptr(s), start) 403 404static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) 405{ 406 smp_rmb(); 407 return __read_seqcount_t_retry(s, start); 408} 409 410/** 411 * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep 412 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 413 */ 414#define raw_write_seqcount_begin(s) \ 415do { \ 416 if (__seqcount_lock_preemptible(s)) \ 417 preempt_disable(); \ 418 \ 419 raw_write_seqcount_t_begin(__seqcount_ptr(s)); \ 420} while (0) 421 422static inline void raw_write_seqcount_t_begin(seqcount_t *s) 423{ 424 kcsan_nestable_atomic_begin(); 425 s->sequence++; 426 smp_wmb(); 427} 428 429/** 430 * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep 431 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 432 */ 433#define raw_write_seqcount_end(s) \ 434do { \ 435 raw_write_seqcount_t_end(__seqcount_ptr(s)); \ 436 \ 437 if (__seqcount_lock_preemptible(s)) \ 438 preempt_enable(); \ 439} while (0) 440 441static inline void raw_write_seqcount_t_end(seqcount_t *s) 442{ 443 smp_wmb(); 444 s->sequence++; 445 kcsan_nestable_atomic_end(); 446} 447 448/** 449 * write_seqcount_begin_nested() - start a seqcount_t write section with 450 * custom lockdep nesting level 451 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 452 * @subclass: lockdep nesting level 453 * 454 * See Documentation/locking/lockdep-design.rst 455 */ 456#define write_seqcount_begin_nested(s, subclass) \ 457do { \ 458 __seqcount_assert_lock_held(s); \ 459 \ 460 if (__seqcount_lock_preemptible(s)) \ 461 preempt_disable(); \ 462 \ 463 write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \ 464} while (0) 465 466static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) 467{ 468 raw_write_seqcount_t_begin(s); 469 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 470} 471 472/** 473 * write_seqcount_begin() - start a seqcount_t write side critical section 474 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 475 * 476 * write_seqcount_begin opens a write side critical section of the given 477 * seqcount_t. 478 * 479 * Context: seqcount_t write side critical sections must be serialized and 480 * non-preemptible. If readers can be invoked from hardirq or softirq 481 * context, interrupts or bottom halves must be respectively disabled. 482 */ 483#define write_seqcount_begin(s) \ 484do { \ 485 __seqcount_assert_lock_held(s); \ 486 \ 487 if (__seqcount_lock_preemptible(s)) \ 488 preempt_disable(); \ 489 \ 490 write_seqcount_t_begin(__seqcount_ptr(s)); \ 491} while (0) 492 493static inline void write_seqcount_t_begin(seqcount_t *s) 494{ 495 write_seqcount_t_begin_nested(s, 0); 496} 497 498/** 499 * write_seqcount_end() - end a seqcount_t write side critical section 500 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 501 * 502 * The write section must've been opened with write_seqcount_begin(). 503 */ 504#define write_seqcount_end(s) \ 505do { \ 506 write_seqcount_t_end(__seqcount_ptr(s)); \ 507 \ 508 if (__seqcount_lock_preemptible(s)) \ 509 preempt_enable(); \ 510} while (0) 511 512static inline void write_seqcount_t_end(seqcount_t *s) 513{ 514 seqcount_release(&s->dep_map, _RET_IP_); 515 raw_write_seqcount_t_end(s); 516} 517 518/** 519 * raw_write_seqcount_barrier() - do a seqcount_t write barrier 520 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 521 * 522 * This can be used to provide an ordering guarantee instead of the usual 523 * consistency guarantee. It is one wmb cheaper, because it can collapse 524 * the two back-to-back wmb()s. 525 * 526 * Note that writes surrounding the barrier should be declared atomic (e.g. 527 * via WRITE_ONCE): a) to ensure the writes become visible to other threads 528 * atomically, avoiding compiler optimizations; b) to document which writes are 529 * meant to propagate to the reader critical section. This is necessary because 530 * neither writes before and after the barrier are enclosed in a seq-writer 531 * critical section that would ensure readers are aware of ongoing writes:: 532 * 533 * seqcount_t seq; 534 * bool X = true, Y = false; 535 * 536 * void read(void) 537 * { 538 * bool x, y; 539 * 540 * do { 541 * int s = read_seqcount_begin(&seq); 542 * 543 * x = X; y = Y; 544 * 545 * } while (read_seqcount_retry(&seq, s)); 546 * 547 * BUG_ON(!x && !y); 548 * } 549 * 550 * void write(void) 551 * { 552 * WRITE_ONCE(Y, true); 553 * 554 * raw_write_seqcount_barrier(seq); 555 * 556 * WRITE_ONCE(X, false); 557 * } 558 */ 559#define raw_write_seqcount_barrier(s) \ 560 raw_write_seqcount_t_barrier(__seqcount_ptr(s)) 561 562static inline void raw_write_seqcount_t_barrier(seqcount_t *s) 563{ 564 kcsan_nestable_atomic_begin(); 565 s->sequence++; 566 smp_wmb(); 567 s->sequence++; 568 kcsan_nestable_atomic_end(); 569} 570 571/** 572 * write_seqcount_invalidate() - invalidate in-progress seqcount_t read 573 * side operations 574 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 575 * 576 * After write_seqcount_invalidate, no seqcount_t read side operations 577 * will complete successfully and see data older than this. 578 */ 579#define write_seqcount_invalidate(s) \ 580 write_seqcount_t_invalidate(__seqcount_ptr(s)) 581 582static inline void write_seqcount_t_invalidate(seqcount_t *s) 583{ 584 smp_wmb(); 585 kcsan_nestable_atomic_begin(); 586 s->sequence+=2; 587 kcsan_nestable_atomic_end(); 588} 589 590/** 591 * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy 592 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 593 * 594 * Use seqcount_t latching to switch between two storage places protected 595 * by a sequence counter. Doing so allows having interruptible, preemptible, 596 * seqcount_t write side critical sections. 597 * 598 * Check raw_write_seqcount_latch() for more details and a full reader and 599 * writer usage example. 600 * 601 * Return: sequence counter raw value. Use the lowest bit as an index for 602 * picking which data copy to read. The full counter value must then be 603 * checked with read_seqcount_retry(). 604 */ 605#define raw_read_seqcount_latch(s) \ 606 raw_read_seqcount_t_latch(__seqcount_ptr(s)) 607 608static inline int raw_read_seqcount_t_latch(seqcount_t *s) 609{ 610 /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ 611 int seq = READ_ONCE(s->sequence); /* ^^^ */ 612 return seq; 613} 614 615/** 616 * raw_write_seqcount_latch() - redirect readers to even/odd copy 617 * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants 618 * 619 * The latch technique is a multiversion concurrency control method that allows 620 * queries during non-atomic modifications. If you can guarantee queries never 621 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 622 * -- you most likely do not need this. 623 * 624 * Where the traditional RCU/lockless data structures rely on atomic 625 * modifications to ensure queries observe either the old or the new state the 626 * latch allows the same for non-atomic updates. The trade-off is doubling the 627 * cost of storage; we have to maintain two copies of the entire data 628 * structure. 629 * 630 * Very simply put: we first modify one copy and then the other. This ensures 631 * there is always one copy in a stable state, ready to give us an answer. 632 * 633 * The basic form is a data structure like:: 634 * 635 * struct latch_struct { 636 * seqcount_t seq; 637 * struct data_struct data[2]; 638 * }; 639 * 640 * Where a modification, which is assumed to be externally serialized, does the 641 * following:: 642 * 643 * void latch_modify(struct latch_struct *latch, ...) 644 * { 645 * smp_wmb(); // Ensure that the last data[1] update is visible 646 * latch->seq++; 647 * smp_wmb(); // Ensure that the seqcount update is visible 648 * 649 * modify(latch->data[0], ...); 650 * 651 * smp_wmb(); // Ensure that the data[0] update is visible 652 * latch->seq++; 653 * smp_wmb(); // Ensure that the seqcount update is visible 654 * 655 * modify(latch->data[1], ...); 656 * } 657 * 658 * The query will have a form like:: 659 * 660 * struct entry *latch_query(struct latch_struct *latch, ...) 661 * { 662 * struct entry *entry; 663 * unsigned seq, idx; 664 * 665 * do { 666 * seq = raw_read_seqcount_latch(&latch->seq); 667 * 668 * idx = seq & 0x01; 669 * entry = data_query(latch->data[idx], ...); 670 * 671 * // read_seqcount_retry() includes needed smp_rmb() 672 * } while (read_seqcount_retry(&latch->seq, seq)); 673 * 674 * return entry; 675 * } 676 * 677 * So during the modification, queries are first redirected to data[1]. Then we 678 * modify data[0]. When that is complete, we redirect queries back to data[0] 679 * and we can modify data[1]. 680 * 681 * NOTE: 682 * 683 * The non-requirement for atomic modifications does _NOT_ include 684 * the publishing of new entries in the case where data is a dynamic 685 * data structure. 686 * 687 * An iteration might start in data[0] and get suspended long enough 688 * to miss an entire modification sequence, once it resumes it might 689 * observe the new entry. 690 * 691 * NOTE: 692 * 693 * When data is a dynamic data structure; one should use regular RCU 694 * patterns to manage the lifetimes of the objects within. 695 */ 696#define raw_write_seqcount_latch(s) \ 697 raw_write_seqcount_t_latch(__seqcount_ptr(s)) 698 699static inline void raw_write_seqcount_t_latch(seqcount_t *s) 700{ 701 smp_wmb(); /* prior stores before incrementing "sequence" */ 702 s->sequence++; 703 smp_wmb(); /* increment "sequence" before following stores */ 704} 705 706/* 707 * Sequential locks (seqlock_t) 708 * 709 * Sequence counters with an embedded spinlock for writer serialization 710 * and non-preemptibility. 711 * 712 * For more info, see: 713 * - Comments on top of seqcount_t 714 * - Documentation/locking/seqlock.rst 715 */ 716typedef struct { 717 struct seqcount seqcount; 718 spinlock_t lock; 719} seqlock_t; 720 721#define __SEQLOCK_UNLOCKED(lockname) \ 722 { \ 723 .seqcount = SEQCNT_ZERO(lockname), \ 724 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 725 } 726 727/** 728 * seqlock_init() - dynamic initializer for seqlock_t 729 * @sl: Pointer to the seqlock_t instance 730 */ 731#define seqlock_init(sl) \ 732 do { \ 733 seqcount_init(&(sl)->seqcount); \ 734 spin_lock_init(&(sl)->lock); \ 735 } while (0) 736 737/** 738 * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t 739 * @sl: Name of the seqlock_t instance 740 */ 741#define DEFINE_SEQLOCK(sl) \ 742 seqlock_t sl = __SEQLOCK_UNLOCKED(sl) 743 744/** 745 * read_seqbegin() - start a seqlock_t read side critical section 746 * @sl: Pointer to seqlock_t 747 * 748 * Return: count, to be passed to read_seqretry() 749 */ 750static inline unsigned read_seqbegin(const seqlock_t *sl) 751{ 752 unsigned ret = read_seqcount_begin(&sl->seqcount); 753 754 kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ 755 kcsan_flat_atomic_begin(); 756 return ret; 757} 758 759/** 760 * read_seqretry() - end a seqlock_t read side section 761 * @sl: Pointer to seqlock_t 762 * @start: count, from read_seqbegin() 763 * 764 * read_seqretry closes the read side critical section of given seqlock_t. 765 * If the critical section was invalid, it must be ignored (and typically 766 * retried). 767 * 768 * Return: true if a read section retry is required, else false 769 */ 770static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 771{ 772 /* 773 * Assume not nested: read_seqretry() may be called multiple times when 774 * completing read critical section. 775 */ 776 kcsan_flat_atomic_end(); 777 778 return read_seqcount_retry(&sl->seqcount, start); 779} 780 781/** 782 * write_seqlock() - start a seqlock_t write side critical section 783 * @sl: Pointer to seqlock_t 784 * 785 * write_seqlock opens a write side critical section for the given 786 * seqlock_t. It also implicitly acquires the spinlock_t embedded inside 787 * that sequential lock. All seqlock_t write side sections are thus 788 * automatically serialized and non-preemptible. 789 * 790 * Context: if the seqlock_t read section, or other write side critical 791 * sections, can be invoked from hardirq or softirq contexts, use the 792 * _irqsave or _bh variants of this function instead. 793 */ 794static inline void write_seqlock(seqlock_t *sl) 795{ 796 spin_lock(&sl->lock); 797 write_seqcount_t_begin(&sl->seqcount); 798} 799 800/** 801 * write_sequnlock() - end a seqlock_t write side critical section 802 * @sl: Pointer to seqlock_t 803 * 804 * write_sequnlock closes the (serialized and non-preemptible) write side 805 * critical section of given seqlock_t. 806 */ 807static inline void write_sequnlock(seqlock_t *sl) 808{ 809 write_seqcount_t_end(&sl->seqcount); 810 spin_unlock(&sl->lock); 811} 812 813/** 814 * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section 815 * @sl: Pointer to seqlock_t 816 * 817 * _bh variant of write_seqlock(). Use only if the read side section, or 818 * other write side sections, can be invoked from softirq contexts. 819 */ 820static inline void write_seqlock_bh(seqlock_t *sl) 821{ 822 spin_lock_bh(&sl->lock); 823 write_seqcount_t_begin(&sl->seqcount); 824} 825 826/** 827 * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section 828 * @sl: Pointer to seqlock_t 829 * 830 * write_sequnlock_bh closes the serialized, non-preemptible, and 831 * softirqs-disabled, seqlock_t write side critical section opened with 832 * write_seqlock_bh(). 833 */ 834static inline void write_sequnlock_bh(seqlock_t *sl) 835{ 836 write_seqcount_t_end(&sl->seqcount); 837 spin_unlock_bh(&sl->lock); 838} 839 840/** 841 * write_seqlock_irq() - start a non-interruptible seqlock_t write section 842 * @sl: Pointer to seqlock_t 843 * 844 * _irq variant of write_seqlock(). Use only if the read side section, or 845 * other write sections, can be invoked from hardirq contexts. 846 */ 847static inline void write_seqlock_irq(seqlock_t *sl) 848{ 849 spin_lock_irq(&sl->lock); 850 write_seqcount_t_begin(&sl->seqcount); 851} 852 853/** 854 * write_sequnlock_irq() - end a non-interruptible seqlock_t write section 855 * @sl: Pointer to seqlock_t 856 * 857 * write_sequnlock_irq closes the serialized and non-interruptible 858 * seqlock_t write side section opened with write_seqlock_irq(). 859 */ 860static inline void write_sequnlock_irq(seqlock_t *sl) 861{ 862 write_seqcount_t_end(&sl->seqcount); 863 spin_unlock_irq(&sl->lock); 864} 865 866static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 867{ 868 unsigned long flags; 869 870 spin_lock_irqsave(&sl->lock, flags); 871 write_seqcount_t_begin(&sl->seqcount); 872 return flags; 873} 874 875/** 876 * write_seqlock_irqsave() - start a non-interruptible seqlock_t write 877 * section 878 * @lock: Pointer to seqlock_t 879 * @flags: Stack-allocated storage for saving caller's local interrupt 880 * state, to be passed to write_sequnlock_irqrestore(). 881 * 882 * _irqsave variant of write_seqlock(). Use it only if the read side 883 * section, or other write sections, can be invoked from hardirq context. 884 */ 885#define write_seqlock_irqsave(lock, flags) \ 886 do { flags = __write_seqlock_irqsave(lock); } while (0) 887 888/** 889 * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write 890 * section 891 * @sl: Pointer to seqlock_t 892 * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() 893 * 894 * write_sequnlock_irqrestore closes the serialized and non-interruptible 895 * seqlock_t write section previously opened with write_seqlock_irqsave(). 896 */ 897static inline void 898write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 899{ 900 write_seqcount_t_end(&sl->seqcount); 901 spin_unlock_irqrestore(&sl->lock, flags); 902} 903 904/** 905 * read_seqlock_excl() - begin a seqlock_t locking reader section 906 * @sl: Pointer to seqlock_t 907 * 908 * read_seqlock_excl opens a seqlock_t locking reader critical section. A 909 * locking reader exclusively locks out *both* other writers *and* other 910 * locking readers, but it does not update the embedded sequence number. 911 * 912 * Locking readers act like a normal spin_lock()/spin_unlock(). 913 * 914 * Context: if the seqlock_t write section, *or other read sections*, can 915 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh 916 * variant of this function instead. 917 * 918 * The opened read section must be closed with read_sequnlock_excl(). 919 */ 920static inline void read_seqlock_excl(seqlock_t *sl) 921{ 922 spin_lock(&sl->lock); 923} 924 925/** 926 * read_sequnlock_excl() - end a seqlock_t locking reader critical section 927 * @sl: Pointer to seqlock_t 928 */ 929static inline void read_sequnlock_excl(seqlock_t *sl) 930{ 931 spin_unlock(&sl->lock); 932} 933 934/** 935 * read_seqlock_excl_bh() - start a seqlock_t locking reader section with 936 * softirqs disabled 937 * @sl: Pointer to seqlock_t 938 * 939 * _bh variant of read_seqlock_excl(). Use this variant only if the 940 * seqlock_t write side section, *or other read sections*, can be invoked 941 * from softirq contexts. 942 */ 943static inline void read_seqlock_excl_bh(seqlock_t *sl) 944{ 945 spin_lock_bh(&sl->lock); 946} 947 948/** 949 * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking 950 * reader section 951 * @sl: Pointer to seqlock_t 952 */ 953static inline void read_sequnlock_excl_bh(seqlock_t *sl) 954{ 955 spin_unlock_bh(&sl->lock); 956} 957 958/** 959 * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking 960 * reader section 961 * @sl: Pointer to seqlock_t 962 * 963 * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t 964 * write side section, *or other read sections*, can be invoked from a 965 * hardirq context. 966 */ 967static inline void read_seqlock_excl_irq(seqlock_t *sl) 968{ 969 spin_lock_irq(&sl->lock); 970} 971 972/** 973 * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t 974 * locking reader section 975 * @sl: Pointer to seqlock_t 976 */ 977static inline void read_sequnlock_excl_irq(seqlock_t *sl) 978{ 979 spin_unlock_irq(&sl->lock); 980} 981 982static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 983{ 984 unsigned long flags; 985 986 spin_lock_irqsave(&sl->lock, flags); 987 return flags; 988} 989 990/** 991 * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t 992 * locking reader section 993 * @lock: Pointer to seqlock_t 994 * @flags: Stack-allocated storage for saving caller's local interrupt 995 * state, to be passed to read_sequnlock_excl_irqrestore(). 996 * 997 * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t 998 * write side section, *or other read sections*, can be invoked from a 999 * hardirq context. 1000 */ 1001#define read_seqlock_excl_irqsave(lock, flags) \ 1002 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 1003 1004/** 1005 * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t 1006 * locking reader section 1007 * @sl: Pointer to seqlock_t 1008 * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() 1009 */ 1010static inline void 1011read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 1012{ 1013 spin_unlock_irqrestore(&sl->lock, flags); 1014} 1015 1016/** 1017 * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader 1018 * @lock: Pointer to seqlock_t 1019 * @seq : Marker and return parameter. If the passed value is even, the 1020 * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). 1021 * If the passed value is odd, the reader will become a *locking* reader 1022 * as in read_seqlock_excl(). In the first call to this function, the 1023 * caller *must* initialize and pass an even value to @seq; this way, a 1024 * lockless read can be optimistically tried first. 1025 * 1026 * read_seqbegin_or_lock is an API designed to optimistically try a normal 1027 * lockless seqlock_t read section first. If an odd counter is found, the 1028 * lockless read trial has failed, and the next read iteration transforms 1029 * itself into a full seqlock_t locking reader. 1030 * 1031 * This is typically used to avoid seqlock_t lockless readers starvation 1032 * (too much retry loops) in the case of a sharp spike in write side 1033 * activity. 1034 * 1035 * Context: if the seqlock_t write section, *or other read sections*, can 1036 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh 1037 * variant of this function instead. 1038 * 1039 * Check Documentation/locking/seqlock.rst for template example code. 1040 * 1041 * Return: the encountered sequence counter value, through the @seq 1042 * parameter, which is overloaded as a return parameter. This returned 1043 * value must be checked with need_seqretry(). If the read section need to 1044 * be retried, this returned value must also be passed as the @seq 1045 * parameter of the next read_seqbegin_or_lock() iteration. 1046 */ 1047static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 1048{ 1049 if (!(*seq & 1)) /* Even */ 1050 *seq = read_seqbegin(lock); 1051 else /* Odd */ 1052 read_seqlock_excl(lock); 1053} 1054 1055/** 1056 * need_seqretry() - validate seqlock_t "locking or lockless" read section 1057 * @lock: Pointer to seqlock_t 1058 * @seq: sequence count, from read_seqbegin_or_lock() 1059 * 1060 * Return: true if a read section retry is required, false otherwise 1061 */ 1062static inline int need_seqretry(seqlock_t *lock, int seq) 1063{ 1064 return !(seq & 1) && read_seqretry(lock, seq); 1065} 1066 1067/** 1068 * done_seqretry() - end seqlock_t "locking or lockless" reader section 1069 * @lock: Pointer to seqlock_t 1070 * @seq: count, from read_seqbegin_or_lock() 1071 * 1072 * done_seqretry finishes the seqlock_t read side critical section started 1073 * with read_seqbegin_or_lock() and validated by need_seqretry(). 1074 */ 1075static inline void done_seqretry(seqlock_t *lock, int seq) 1076{ 1077 if (seq & 1) 1078 read_sequnlock_excl(lock); 1079} 1080 1081/** 1082 * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or 1083 * a non-interruptible locking reader 1084 * @lock: Pointer to seqlock_t 1085 * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). 1086 * 1087 * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if 1088 * the seqlock_t write section, *or other read sections*, can be invoked 1089 * from hardirq context. 1090 * 1091 * Note: Interrupts will be disabled only for "locking reader" mode. 1092 * 1093 * Return: 1094 * 1095 * 1. The saved local interrupts state in case of a locking reader, to 1096 * be passed to done_seqretry_irqrestore(). 1097 * 1098 * 2. The encountered sequence counter value, returned through @seq 1099 * overloaded as a return parameter. Check read_seqbegin_or_lock(). 1100 */ 1101static inline unsigned long 1102read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 1103{ 1104 unsigned long flags = 0; 1105 1106 if (!(*seq & 1)) /* Even */ 1107 *seq = read_seqbegin(lock); 1108 else /* Odd */ 1109 read_seqlock_excl_irqsave(lock, flags); 1110 1111 return flags; 1112} 1113 1114/** 1115 * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a 1116 * non-interruptible locking reader section 1117 * @lock: Pointer to seqlock_t 1118 * @seq: Count, from read_seqbegin_or_lock_irqsave() 1119 * @flags: Caller's saved local interrupt state in case of a locking 1120 * reader, also from read_seqbegin_or_lock_irqsave() 1121 * 1122 * This is the _irqrestore variant of done_seqretry(). The read section 1123 * must've been opened with read_seqbegin_or_lock_irqsave(), and validated 1124 * by need_seqretry(). 1125 */ 1126static inline void 1127done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 1128{ 1129 if (seq & 1) 1130 read_sequnlock_excl_irqrestore(lock, flags); 1131} 1132#endif /* __LINUX_SEQLOCK_H */