Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

slub: Replace cmpxchg_double()

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.924677086@infradead.org

+137 -67
+9 -3
include/linux/slub_def.h
··· 39 39 CPU_PARTIAL_FREE, /* Refill cpu partial on free */ 40 40 CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */ 41 41 CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ 42 - NR_SLUB_STAT_ITEMS }; 42 + NR_SLUB_STAT_ITEMS 43 + }; 43 44 44 45 #ifndef CONFIG_SLUB_TINY 45 46 /* ··· 48 47 * with this_cpu_cmpxchg_double() alignment requirements. 49 48 */ 50 49 struct kmem_cache_cpu { 51 - void **freelist; /* Pointer to next available object */ 52 - unsigned long tid; /* Globally unique transaction id */ 50 + union { 51 + struct { 52 + void **freelist; /* Pointer to next available object */ 53 + unsigned long tid; /* Globally unique transaction id */ 54 + }; 55 + freelist_aba_t freelist_tid; 56 + }; 53 57 struct slab *slab; /* The slab from which we are allocating */ 54 58 #ifdef CONFIG_SLUB_CPU_PARTIAL 55 59 struct slab *partial; /* Partially allocated frozen slabs */
+46 -7
mm/slab.h
··· 6 6 */ 7 7 void __init kmem_cache_init(void); 8 8 9 + #ifdef CONFIG_64BIT 10 + # ifdef system_has_cmpxchg128 11 + # define system_has_freelist_aba() system_has_cmpxchg128() 12 + # define try_cmpxchg_freelist try_cmpxchg128 13 + # endif 14 + #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 15 + typedef u128 freelist_full_t; 16 + #else /* CONFIG_64BIT */ 17 + # ifdef system_has_cmpxchg64 18 + # define system_has_freelist_aba() system_has_cmpxchg64() 19 + # define try_cmpxchg_freelist try_cmpxchg64 20 + # endif 21 + #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 22 + typedef u64 freelist_full_t; 23 + #endif /* CONFIG_64BIT */ 24 + 25 + #if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 26 + #undef system_has_freelist_aba 27 + #endif 28 + 29 + /* 30 + * Freelist pointer and counter to cmpxchg together, avoids the typical ABA 31 + * problems with cmpxchg of just a pointer. 32 + */ 33 + typedef union { 34 + struct { 35 + void *freelist; 36 + unsigned long counter; 37 + }; 38 + freelist_full_t full; 39 + } freelist_aba_t; 40 + 9 41 /* Reuses the bits in struct page */ 10 42 struct slab { 11 43 unsigned long __page_flags; ··· 70 38 #endif 71 39 }; 72 40 /* Double-word boundary */ 73 - void *freelist; /* first free object */ 74 41 union { 75 - unsigned long counters; 76 42 struct { 77 - unsigned inuse:16; 78 - unsigned objects:15; 79 - unsigned frozen:1; 43 + void *freelist; /* first free object */ 44 + union { 45 + unsigned long counters; 46 + struct { 47 + unsigned inuse:16; 48 + unsigned objects:15; 49 + unsigned frozen:1; 50 + }; 51 + }; 80 52 }; 53 + #ifdef system_has_freelist_aba 54 + freelist_aba_t freelist_counter; 55 + #endif 81 56 }; 82 57 }; 83 58 struct rcu_head rcu_head; ··· 111 72 #endif 112 73 #undef SLAB_MATCH 113 74 static_assert(sizeof(struct slab) <= sizeof(struct page)); 114 - #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB) 115 - static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *))); 75 + #if defined(system_has_freelist_aba) && defined(CONFIG_SLUB) 76 + static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t))); 116 77 #endif 117 78 118 79 /**
+82 -57
mm/slub.c
··· 292 292 /* Poison object */ 293 293 #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U) 294 294 /* Use cmpxchg_double */ 295 + 296 + #ifdef system_has_freelist_aba 295 297 #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U) 298 + #else 299 + #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0U) 300 + #endif 296 301 297 302 /* 298 303 * Tracking user of a slab. ··· 517 512 __bit_spin_unlock(PG_locked, &page->flags); 518 513 } 519 514 515 + static inline bool 516 + __update_freelist_fast(struct slab *slab, 517 + void *freelist_old, unsigned long counters_old, 518 + void *freelist_new, unsigned long counters_new) 519 + { 520 + #ifdef system_has_freelist_aba 521 + freelist_aba_t old = { .freelist = freelist_old, .counter = counters_old }; 522 + freelist_aba_t new = { .freelist = freelist_new, .counter = counters_new }; 523 + 524 + return try_cmpxchg_freelist(&slab->freelist_counter.full, &old.full, new.full); 525 + #else 526 + return false; 527 + #endif 528 + } 529 + 530 + static inline bool 531 + __update_freelist_slow(struct slab *slab, 532 + void *freelist_old, unsigned long counters_old, 533 + void *freelist_new, unsigned long counters_new) 534 + { 535 + bool ret = false; 536 + 537 + slab_lock(slab); 538 + if (slab->freelist == freelist_old && 539 + slab->counters == counters_old) { 540 + slab->freelist = freelist_new; 541 + slab->counters = counters_new; 542 + ret = true; 543 + } 544 + slab_unlock(slab); 545 + 546 + return ret; 547 + } 548 + 520 549 /* 521 550 * Interrupts must be disabled (for the fallback code to work right), typically 522 551 * by an _irqsave() lock variant. On PREEMPT_RT the preempt_disable(), which is ··· 558 519 * allocation/ free operation in hardirq context. Therefore nothing can 559 520 * interrupt the operation. 560 521 */ 561 - static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, 522 + static inline bool __slab_update_freelist(struct kmem_cache *s, struct slab *slab, 562 523 void *freelist_old, unsigned long counters_old, 563 524 void *freelist_new, unsigned long counters_new, 564 525 const char *n) 565 526 { 527 + bool ret; 528 + 566 529 if (USE_LOCKLESS_FAST_PATH()) 567 530 lockdep_assert_irqs_disabled(); 568 - #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ 569 - defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 531 + 570 532 if (s->flags & __CMPXCHG_DOUBLE) { 571 - if (cmpxchg_double(&slab->freelist, &slab->counters, 572 - freelist_old, counters_old, 573 - freelist_new, counters_new)) 574 - return true; 575 - } else 576 - #endif 577 - { 578 - slab_lock(slab); 579 - if (slab->freelist == freelist_old && 580 - slab->counters == counters_old) { 581 - slab->freelist = freelist_new; 582 - slab->counters = counters_new; 583 - slab_unlock(slab); 584 - return true; 585 - } 586 - slab_unlock(slab); 533 + ret = __update_freelist_fast(slab, freelist_old, counters_old, 534 + freelist_new, counters_new); 535 + } else { 536 + ret = __update_freelist_slow(slab, freelist_old, counters_old, 537 + freelist_new, counters_new); 587 538 } 539 + if (likely(ret)) 540 + return true; 588 541 589 542 cpu_relax(); 590 543 stat(s, CMPXCHG_DOUBLE_FAIL); ··· 588 557 return false; 589 558 } 590 559 591 - static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, 560 + static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab, 592 561 void *freelist_old, unsigned long counters_old, 593 562 void *freelist_new, unsigned long counters_new, 594 563 const char *n) 595 564 { 596 - #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ 597 - defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 565 + bool ret; 566 + 598 567 if (s->flags & __CMPXCHG_DOUBLE) { 599 - if (cmpxchg_double(&slab->freelist, &slab->counters, 600 - freelist_old, counters_old, 601 - freelist_new, counters_new)) 602 - return true; 603 - } else 604 - #endif 605 - { 568 + ret = __update_freelist_fast(slab, freelist_old, counters_old, 569 + freelist_new, counters_new); 570 + } else { 606 571 unsigned long flags; 607 572 608 573 local_irq_save(flags); 609 - slab_lock(slab); 610 - if (slab->freelist == freelist_old && 611 - slab->counters == counters_old) { 612 - slab->freelist = freelist_new; 613 - slab->counters = counters_new; 614 - slab_unlock(slab); 615 - local_irq_restore(flags); 616 - return true; 617 - } 618 - slab_unlock(slab); 574 + ret = __update_freelist_slow(slab, freelist_old, counters_old, 575 + freelist_new, counters_new); 619 576 local_irq_restore(flags); 620 577 } 578 + if (likely(ret)) 579 + return true; 621 580 622 581 cpu_relax(); 623 582 stat(s, CMPXCHG_DOUBLE_FAIL); ··· 2249 2228 VM_BUG_ON(new.frozen); 2250 2229 new.frozen = 1; 2251 2230 2252 - if (!__cmpxchg_double_slab(s, slab, 2231 + if (!__slab_update_freelist(s, slab, 2253 2232 freelist, counters, 2254 2233 new.freelist, new.counters, 2255 2234 "acquire_slab")) ··· 2575 2554 } 2576 2555 2577 2556 2578 - if (!cmpxchg_double_slab(s, slab, 2557 + if (!slab_update_freelist(s, slab, 2579 2558 old.freelist, old.counters, 2580 2559 new.freelist, new.counters, 2581 2560 "unfreezing slab")) { ··· 2632 2611 2633 2612 new.frozen = 0; 2634 2613 2635 - } while (!__cmpxchg_double_slab(s, slab, 2614 + } while (!__slab_update_freelist(s, slab, 2636 2615 old.freelist, old.counters, 2637 2616 new.freelist, new.counters, 2638 2617 "unfreezing slab")); ··· 3029 3008 } 3030 3009 3031 3010 #ifndef CONFIG_SLUB_TINY 3011 + static inline bool 3012 + __update_cpu_freelist_fast(struct kmem_cache *s, 3013 + void *freelist_old, void *freelist_new, 3014 + unsigned long tid) 3015 + { 3016 + freelist_aba_t old = { .freelist = freelist_old, .counter = tid }; 3017 + freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) }; 3018 + 3019 + return this_cpu_try_cmpxchg_freelist(s->cpu_slab->freelist_tid.full, 3020 + &old.full, new.full); 3021 + } 3022 + 3032 3023 /* 3033 3024 * Check the slab->freelist and either transfer the freelist to the 3034 3025 * per cpu freelist or deactivate the slab. ··· 3067 3034 new.inuse = slab->objects; 3068 3035 new.frozen = freelist != NULL; 3069 3036 3070 - } while (!__cmpxchg_double_slab(s, slab, 3037 + } while (!__slab_update_freelist(s, slab, 3071 3038 freelist, counters, 3072 3039 NULL, new.counters, 3073 3040 "get_freelist")); ··· 3392 3359 * against code executing on this cpu *not* from access by 3393 3360 * other cpus. 3394 3361 */ 3395 - if (unlikely(!this_cpu_cmpxchg_double( 3396 - s->cpu_slab->freelist, s->cpu_slab->tid, 3397 - object, tid, 3398 - next_object, next_tid(tid)))) { 3399 - 3362 + if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) { 3400 3363 note_cmpxchg_failure("slab_alloc", s, tid); 3401 3364 goto redo; 3402 3365 } ··· 3660 3631 } 3661 3632 } 3662 3633 3663 - } while (!cmpxchg_double_slab(s, slab, 3634 + } while (!slab_update_freelist(s, slab, 3664 3635 prior, counters, 3665 3636 head, new.counters, 3666 3637 "__slab_free")); ··· 3765 3736 3766 3737 set_freepointer(s, tail_obj, freelist); 3767 3738 3768 - if (unlikely(!this_cpu_cmpxchg_double( 3769 - s->cpu_slab->freelist, s->cpu_slab->tid, 3770 - freelist, tid, 3771 - head, next_tid(tid)))) { 3772 - 3739 + if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) { 3773 3740 note_cmpxchg_failure("slab_free", s, tid); 3774 3741 goto redo; 3775 3742 } ··· 4530 4505 } 4531 4506 } 4532 4507 4533 - #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ 4534 - defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 4535 - if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0) 4508 + #ifdef system_has_freelist_aba 4509 + if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) { 4536 4510 /* Enable fast mode */ 4537 4511 s->flags |= __CMPXCHG_DOUBLE; 4512 + } 4538 4513 #endif 4539 4514 4540 4515 /*