at v5.8-rc6 18 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef __LINUX_SEQLOCK_H 3#define __LINUX_SEQLOCK_H 4/* 5 * Reader/writer consistent mechanism without starving writers. This type of 6 * lock for data where the reader wants a consistent set of information 7 * and is willing to retry if the information changes. There are two types 8 * of readers: 9 * 1. Sequence readers which never block a writer but they may have to retry 10 * if a writer is in progress by detecting change in sequence number. 11 * Writers do not wait for a sequence reader. 12 * 2. Locking readers which will wait if a writer or another locking reader 13 * is in progress. A locking reader in progress will also block a writer 14 * from going forward. Unlike the regular rwlock, the read lock here is 15 * exclusive so that only one locking reader can get it. 16 * 17 * This is not as cache friendly as brlock. Also, this may not work well 18 * for data that contains pointers, because any writer could 19 * invalidate a pointer that a reader was following. 20 * 21 * Expected non-blocking reader usage: 22 * do { 23 * seq = read_seqbegin(&foo); 24 * ... 25 * } while (read_seqretry(&foo, seq)); 26 * 27 * 28 * On non-SMP the spin locks disappear but the writer still needs 29 * to increment the sequence variables because an interrupt routine could 30 * change the state of the data. 31 * 32 * Based on x86_64 vsyscall gettimeofday 33 * by Keith Owens and Andrea Arcangeli 34 */ 35 36#include <linux/spinlock.h> 37#include <linux/preempt.h> 38#include <linux/lockdep.h> 39#include <linux/compiler.h> 40#include <linux/kcsan-checks.h> 41#include <asm/processor.h> 42 43/* 44 * The seqlock interface does not prescribe a precise sequence of read 45 * begin/retry/end. For readers, typically there is a call to 46 * read_seqcount_begin() and read_seqcount_retry(), however, there are more 47 * esoteric cases which do not follow this pattern. 48 * 49 * As a consequence, we take the following best-effort approach for raw usage 50 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, 51 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as 52 * atomics; if there is a matching read_seqcount_retry() call, no following 53 * memory operations are considered atomic. Usage of seqlocks via seqlock_t 54 * interface is not affected. 55 */ 56#define KCSAN_SEQLOCK_REGION_MAX 1000 57 58/* 59 * Version using sequence counter only. 60 * This can be used when code has its own mutex protecting the 61 * updating starting before the write_seqcountbeqin() and ending 62 * after the write_seqcount_end(). 63 */ 64typedef struct seqcount { 65 unsigned sequence; 66#ifdef CONFIG_DEBUG_LOCK_ALLOC 67 struct lockdep_map dep_map; 68#endif 69} seqcount_t; 70 71static inline void __seqcount_init(seqcount_t *s, const char *name, 72 struct lock_class_key *key) 73{ 74 /* 75 * Make sure we are not reinitializing a held lock: 76 */ 77 lockdep_init_map(&s->dep_map, name, key, 0); 78 s->sequence = 0; 79} 80 81#ifdef CONFIG_DEBUG_LOCK_ALLOC 82# define SEQCOUNT_DEP_MAP_INIT(lockname) \ 83 .dep_map = { .name = #lockname } \ 84 85# define seqcount_init(s) \ 86 do { \ 87 static struct lock_class_key __key; \ 88 __seqcount_init((s), #s, &__key); \ 89 } while (0) 90 91static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 92{ 93 seqcount_t *l = (seqcount_t *)s; 94 unsigned long flags; 95 96 local_irq_save(flags); 97 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 98 seqcount_release(&l->dep_map, _RET_IP_); 99 local_irq_restore(flags); 100} 101 102#else 103# define SEQCOUNT_DEP_MAP_INIT(lockname) 104# define seqcount_init(s) __seqcount_init(s, NULL, NULL) 105# define seqcount_lockdep_reader_access(x) 106#endif 107 108#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} 109 110 111/** 112 * __read_seqcount_begin - begin a seq-read critical section (without barrier) 113 * @s: pointer to seqcount_t 114 * Returns: count to be passed to read_seqcount_retry 115 * 116 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() 117 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 118 * provided before actually loading any of the variables that are to be 119 * protected in this critical section. 120 * 121 * Use carefully, only in critical code, and comment how the barrier is 122 * provided. 123 */ 124static inline unsigned __read_seqcount_begin(const seqcount_t *s) 125{ 126 unsigned ret; 127 128repeat: 129 ret = READ_ONCE(s->sequence); 130 if (unlikely(ret & 1)) { 131 cpu_relax(); 132 goto repeat; 133 } 134 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 135 return ret; 136} 137 138/** 139 * raw_read_seqcount - Read the raw seqcount 140 * @s: pointer to seqcount_t 141 * Returns: count to be passed to read_seqcount_retry 142 * 143 * raw_read_seqcount opens a read critical section of the given 144 * seqcount without any lockdep checking and without checking or 145 * masking the LSB. Calling code is responsible for handling that. 146 */ 147static inline unsigned raw_read_seqcount(const seqcount_t *s) 148{ 149 unsigned ret = READ_ONCE(s->sequence); 150 smp_rmb(); 151 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 152 return ret; 153} 154 155/** 156 * raw_read_seqcount_begin - start seq-read critical section w/o lockdep 157 * @s: pointer to seqcount_t 158 * Returns: count to be passed to read_seqcount_retry 159 * 160 * raw_read_seqcount_begin opens a read critical section of the given 161 * seqcount, but without any lockdep checking. Validity of the critical 162 * section is tested by checking read_seqcount_retry function. 163 */ 164static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) 165{ 166 unsigned ret = __read_seqcount_begin(s); 167 smp_rmb(); 168 return ret; 169} 170 171/** 172 * read_seqcount_begin - begin a seq-read critical section 173 * @s: pointer to seqcount_t 174 * Returns: count to be passed to read_seqcount_retry 175 * 176 * read_seqcount_begin opens a read critical section of the given seqcount. 177 * Validity of the critical section is tested by checking read_seqcount_retry 178 * function. 179 */ 180static inline unsigned read_seqcount_begin(const seqcount_t *s) 181{ 182 seqcount_lockdep_reader_access(s); 183 return raw_read_seqcount_begin(s); 184} 185 186/** 187 * raw_seqcount_begin - begin a seq-read critical section 188 * @s: pointer to seqcount_t 189 * Returns: count to be passed to read_seqcount_retry 190 * 191 * raw_seqcount_begin opens a read critical section of the given seqcount. 192 * Validity of the critical section is tested by checking read_seqcount_retry 193 * function. 194 * 195 * Unlike read_seqcount_begin(), this function will not wait for the count 196 * to stabilize. If a writer is active when we begin, we will fail the 197 * read_seqcount_retry() instead of stabilizing at the beginning of the 198 * critical section. 199 */ 200static inline unsigned raw_seqcount_begin(const seqcount_t *s) 201{ 202 unsigned ret = READ_ONCE(s->sequence); 203 smp_rmb(); 204 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 205 return ret & ~1; 206} 207 208/** 209 * __read_seqcount_retry - end a seq-read critical section (without barrier) 210 * @s: pointer to seqcount_t 211 * @start: count, from read_seqcount_begin 212 * Returns: 1 if retry is required, else 0 213 * 214 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 215 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 216 * provided before actually loading any of the variables that are to be 217 * protected in this critical section. 218 * 219 * Use carefully, only in critical code, and comment how the barrier is 220 * provided. 221 */ 222static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) 223{ 224 kcsan_atomic_next(0); 225 return unlikely(READ_ONCE(s->sequence) != start); 226} 227 228/** 229 * read_seqcount_retry - end a seq-read critical section 230 * @s: pointer to seqcount_t 231 * @start: count, from read_seqcount_begin 232 * Returns: 1 if retry is required, else 0 233 * 234 * read_seqcount_retry closes a read critical section of the given seqcount. 235 * If the critical section was invalid, it must be ignored (and typically 236 * retried). 237 */ 238static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 239{ 240 smp_rmb(); 241 return __read_seqcount_retry(s, start); 242} 243 244 245 246static inline void raw_write_seqcount_begin(seqcount_t *s) 247{ 248 kcsan_nestable_atomic_begin(); 249 s->sequence++; 250 smp_wmb(); 251} 252 253static inline void raw_write_seqcount_end(seqcount_t *s) 254{ 255 smp_wmb(); 256 s->sequence++; 257 kcsan_nestable_atomic_end(); 258} 259 260/** 261 * raw_write_seqcount_barrier - do a seq write barrier 262 * @s: pointer to seqcount_t 263 * 264 * This can be used to provide an ordering guarantee instead of the 265 * usual consistency guarantee. It is one wmb cheaper, because we can 266 * collapse the two back-to-back wmb()s. 267 * 268 * Note that writes surrounding the barrier should be declared atomic (e.g. 269 * via WRITE_ONCE): a) to ensure the writes become visible to other threads 270 * atomically, avoiding compiler optimizations; b) to document which writes are 271 * meant to propagate to the reader critical section. This is necessary because 272 * neither writes before and after the barrier are enclosed in a seq-writer 273 * critical section that would ensure readers are aware of ongoing writes. 274 * 275 * seqcount_t seq; 276 * bool X = true, Y = false; 277 * 278 * void read(void) 279 * { 280 * bool x, y; 281 * 282 * do { 283 * int s = read_seqcount_begin(&seq); 284 * 285 * x = X; y = Y; 286 * 287 * } while (read_seqcount_retry(&seq, s)); 288 * 289 * BUG_ON(!x && !y); 290 * } 291 * 292 * void write(void) 293 * { 294 * WRITE_ONCE(Y, true); 295 * 296 * raw_write_seqcount_barrier(seq); 297 * 298 * WRITE_ONCE(X, false); 299 * } 300 */ 301static inline void raw_write_seqcount_barrier(seqcount_t *s) 302{ 303 kcsan_nestable_atomic_begin(); 304 s->sequence++; 305 smp_wmb(); 306 s->sequence++; 307 kcsan_nestable_atomic_end(); 308} 309 310static inline int raw_read_seqcount_latch(seqcount_t *s) 311{ 312 /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ 313 int seq = READ_ONCE(s->sequence); /* ^^^ */ 314 return seq; 315} 316 317/** 318 * raw_write_seqcount_latch - redirect readers to even/odd copy 319 * @s: pointer to seqcount_t 320 * 321 * The latch technique is a multiversion concurrency control method that allows 322 * queries during non-atomic modifications. If you can guarantee queries never 323 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 324 * -- you most likely do not need this. 325 * 326 * Where the traditional RCU/lockless data structures rely on atomic 327 * modifications to ensure queries observe either the old or the new state the 328 * latch allows the same for non-atomic updates. The trade-off is doubling the 329 * cost of storage; we have to maintain two copies of the entire data 330 * structure. 331 * 332 * Very simply put: we first modify one copy and then the other. This ensures 333 * there is always one copy in a stable state, ready to give us an answer. 334 * 335 * The basic form is a data structure like: 336 * 337 * struct latch_struct { 338 * seqcount_t seq; 339 * struct data_struct data[2]; 340 * }; 341 * 342 * Where a modification, which is assumed to be externally serialized, does the 343 * following: 344 * 345 * void latch_modify(struct latch_struct *latch, ...) 346 * { 347 * smp_wmb(); <- Ensure that the last data[1] update is visible 348 * latch->seq++; 349 * smp_wmb(); <- Ensure that the seqcount update is visible 350 * 351 * modify(latch->data[0], ...); 352 * 353 * smp_wmb(); <- Ensure that the data[0] update is visible 354 * latch->seq++; 355 * smp_wmb(); <- Ensure that the seqcount update is visible 356 * 357 * modify(latch->data[1], ...); 358 * } 359 * 360 * The query will have a form like: 361 * 362 * struct entry *latch_query(struct latch_struct *latch, ...) 363 * { 364 * struct entry *entry; 365 * unsigned seq, idx; 366 * 367 * do { 368 * seq = raw_read_seqcount_latch(&latch->seq); 369 * 370 * idx = seq & 0x01; 371 * entry = data_query(latch->data[idx], ...); 372 * 373 * smp_rmb(); 374 * } while (seq != latch->seq); 375 * 376 * return entry; 377 * } 378 * 379 * So during the modification, queries are first redirected to data[1]. Then we 380 * modify data[0]. When that is complete, we redirect queries back to data[0] 381 * and we can modify data[1]. 382 * 383 * NOTE: The non-requirement for atomic modifications does _NOT_ include 384 * the publishing of new entries in the case where data is a dynamic 385 * data structure. 386 * 387 * An iteration might start in data[0] and get suspended long enough 388 * to miss an entire modification sequence, once it resumes it might 389 * observe the new entry. 390 * 391 * NOTE: When data is a dynamic data structure; one should use regular RCU 392 * patterns to manage the lifetimes of the objects within. 393 */ 394static inline void raw_write_seqcount_latch(seqcount_t *s) 395{ 396 smp_wmb(); /* prior stores before incrementing "sequence" */ 397 s->sequence++; 398 smp_wmb(); /* increment "sequence" before following stores */ 399} 400 401/* 402 * Sequence counter only version assumes that callers are using their 403 * own mutexing. 404 */ 405static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) 406{ 407 raw_write_seqcount_begin(s); 408 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 409} 410 411static inline void write_seqcount_begin(seqcount_t *s) 412{ 413 write_seqcount_begin_nested(s, 0); 414} 415 416static inline void write_seqcount_end(seqcount_t *s) 417{ 418 seqcount_release(&s->dep_map, _RET_IP_); 419 raw_write_seqcount_end(s); 420} 421 422/** 423 * write_seqcount_invalidate - invalidate in-progress read-side seq operations 424 * @s: pointer to seqcount_t 425 * 426 * After write_seqcount_invalidate, no read-side seq operations will complete 427 * successfully and see data older than this. 428 */ 429static inline void write_seqcount_invalidate(seqcount_t *s) 430{ 431 smp_wmb(); 432 kcsan_nestable_atomic_begin(); 433 s->sequence+=2; 434 kcsan_nestable_atomic_end(); 435} 436 437typedef struct { 438 struct seqcount seqcount; 439 spinlock_t lock; 440} seqlock_t; 441 442/* 443 * These macros triggered gcc-3.x compile-time problems. We think these are 444 * OK now. Be cautious. 445 */ 446#define __SEQLOCK_UNLOCKED(lockname) \ 447 { \ 448 .seqcount = SEQCNT_ZERO(lockname), \ 449 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 450 } 451 452#define seqlock_init(x) \ 453 do { \ 454 seqcount_init(&(x)->seqcount); \ 455 spin_lock_init(&(x)->lock); \ 456 } while (0) 457 458#define DEFINE_SEQLOCK(x) \ 459 seqlock_t x = __SEQLOCK_UNLOCKED(x) 460 461/* 462 * Read side functions for starting and finalizing a read side section. 463 */ 464static inline unsigned read_seqbegin(const seqlock_t *sl) 465{ 466 unsigned ret = read_seqcount_begin(&sl->seqcount); 467 468 kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ 469 kcsan_flat_atomic_begin(); 470 return ret; 471} 472 473static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 474{ 475 /* 476 * Assume not nested: read_seqretry() may be called multiple times when 477 * completing read critical section. 478 */ 479 kcsan_flat_atomic_end(); 480 481 return read_seqcount_retry(&sl->seqcount, start); 482} 483 484/* 485 * Lock out other writers and update the count. 486 * Acts like a normal spin_lock/unlock. 487 * Don't need preempt_disable() because that is in the spin_lock already. 488 */ 489static inline void write_seqlock(seqlock_t *sl) 490{ 491 spin_lock(&sl->lock); 492 write_seqcount_begin(&sl->seqcount); 493} 494 495static inline void write_sequnlock(seqlock_t *sl) 496{ 497 write_seqcount_end(&sl->seqcount); 498 spin_unlock(&sl->lock); 499} 500 501static inline void write_seqlock_bh(seqlock_t *sl) 502{ 503 spin_lock_bh(&sl->lock); 504 write_seqcount_begin(&sl->seqcount); 505} 506 507static inline void write_sequnlock_bh(seqlock_t *sl) 508{ 509 write_seqcount_end(&sl->seqcount); 510 spin_unlock_bh(&sl->lock); 511} 512 513static inline void write_seqlock_irq(seqlock_t *sl) 514{ 515 spin_lock_irq(&sl->lock); 516 write_seqcount_begin(&sl->seqcount); 517} 518 519static inline void write_sequnlock_irq(seqlock_t *sl) 520{ 521 write_seqcount_end(&sl->seqcount); 522 spin_unlock_irq(&sl->lock); 523} 524 525static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 526{ 527 unsigned long flags; 528 529 spin_lock_irqsave(&sl->lock, flags); 530 write_seqcount_begin(&sl->seqcount); 531 return flags; 532} 533 534#define write_seqlock_irqsave(lock, flags) \ 535 do { flags = __write_seqlock_irqsave(lock); } while (0) 536 537static inline void 538write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 539{ 540 write_seqcount_end(&sl->seqcount); 541 spin_unlock_irqrestore(&sl->lock, flags); 542} 543 544/* 545 * A locking reader exclusively locks out other writers and locking readers, 546 * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. 547 * Don't need preempt_disable() because that is in the spin_lock already. 548 */ 549static inline void read_seqlock_excl(seqlock_t *sl) 550{ 551 spin_lock(&sl->lock); 552} 553 554static inline void read_sequnlock_excl(seqlock_t *sl) 555{ 556 spin_unlock(&sl->lock); 557} 558 559/** 560 * read_seqbegin_or_lock - begin a sequence number check or locking block 561 * @lock: sequence lock 562 * @seq : sequence number to be checked 563 * 564 * First try it once optimistically without taking the lock. If that fails, 565 * take the lock. The sequence number is also used as a marker for deciding 566 * whether to be a reader (even) or writer (odd). 567 * N.B. seq must be initialized to an even number to begin with. 568 */ 569static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 570{ 571 if (!(*seq & 1)) /* Even */ 572 *seq = read_seqbegin(lock); 573 else /* Odd */ 574 read_seqlock_excl(lock); 575} 576 577static inline int need_seqretry(seqlock_t *lock, int seq) 578{ 579 return !(seq & 1) && read_seqretry(lock, seq); 580} 581 582static inline void done_seqretry(seqlock_t *lock, int seq) 583{ 584 if (seq & 1) 585 read_sequnlock_excl(lock); 586} 587 588static inline void read_seqlock_excl_bh(seqlock_t *sl) 589{ 590 spin_lock_bh(&sl->lock); 591} 592 593static inline void read_sequnlock_excl_bh(seqlock_t *sl) 594{ 595 spin_unlock_bh(&sl->lock); 596} 597 598static inline void read_seqlock_excl_irq(seqlock_t *sl) 599{ 600 spin_lock_irq(&sl->lock); 601} 602 603static inline void read_sequnlock_excl_irq(seqlock_t *sl) 604{ 605 spin_unlock_irq(&sl->lock); 606} 607 608static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 609{ 610 unsigned long flags; 611 612 spin_lock_irqsave(&sl->lock, flags); 613 return flags; 614} 615 616#define read_seqlock_excl_irqsave(lock, flags) \ 617 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 618 619static inline void 620read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 621{ 622 spin_unlock_irqrestore(&sl->lock, flags); 623} 624 625static inline unsigned long 626read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 627{ 628 unsigned long flags = 0; 629 630 if (!(*seq & 1)) /* Even */ 631 *seq = read_seqbegin(lock); 632 else /* Odd */ 633 read_seqlock_excl_irqsave(lock, flags); 634 635 return flags; 636} 637 638static inline void 639done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 640{ 641 if (seq & 1) 642 read_sequnlock_excl_irqrestore(lock, flags); 643} 644#endif /* __LINUX_SEQLOCK_H */