at master 19 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef MM_SLAB_H 3#define MM_SLAB_H 4 5#include <linux/reciprocal_div.h> 6#include <linux/list_lru.h> 7#include <linux/local_lock.h> 8#include <linux/random.h> 9#include <linux/kobject.h> 10#include <linux/sched/mm.h> 11#include <linux/memcontrol.h> 12#include <linux/kfence.h> 13#include <linux/kasan.h> 14 15/* 16 * Internal slab definitions 17 */ 18 19#ifdef CONFIG_64BIT 20# ifdef system_has_cmpxchg128 21# define system_has_freelist_aba() system_has_cmpxchg128() 22# define try_cmpxchg_freelist try_cmpxchg128 23# endif 24#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 25typedef u128 freelist_full_t; 26#else /* CONFIG_64BIT */ 27# ifdef system_has_cmpxchg64 28# define system_has_freelist_aba() system_has_cmpxchg64() 29# define try_cmpxchg_freelist try_cmpxchg64 30# endif 31#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 32typedef u64 freelist_full_t; 33#endif /* CONFIG_64BIT */ 34 35#if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 36#undef system_has_freelist_aba 37#endif 38 39/* 40 * Freelist pointer and counter to cmpxchg together, avoids the typical ABA 41 * problems with cmpxchg of just a pointer. 42 */ 43struct freelist_counters { 44 union { 45 struct { 46 void *freelist; 47 union { 48 unsigned long counters; 49 struct { 50 unsigned inuse:16; 51 unsigned objects:15; 52 /* 53 * If slab debugging is enabled then the 54 * frozen bit can be reused to indicate 55 * that the slab was corrupted 56 */ 57 unsigned frozen:1; 58 }; 59 }; 60 }; 61#ifdef system_has_freelist_aba 62 freelist_full_t freelist_counters; 63#endif 64 }; 65}; 66 67/* Reuses the bits in struct page */ 68struct slab { 69 memdesc_flags_t flags; 70 71 struct kmem_cache *slab_cache; 72 union { 73 struct { 74 union { 75 struct list_head slab_list; 76 struct { /* For deferred deactivate_slab() */ 77 struct llist_node llnode; 78 void *flush_freelist; 79 }; 80#ifdef CONFIG_SLUB_CPU_PARTIAL 81 struct { 82 struct slab *next; 83 int slabs; /* Nr of slabs left */ 84 }; 85#endif 86 }; 87 /* Double-word boundary */ 88 struct freelist_counters; 89 }; 90 struct rcu_head rcu_head; 91 }; 92 93 unsigned int __page_type; 94 atomic_t __page_refcount; 95#ifdef CONFIG_SLAB_OBJ_EXT 96 unsigned long obj_exts; 97#endif 98}; 99 100#define SLAB_MATCH(pg, sl) \ 101 static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) 102SLAB_MATCH(flags, flags); 103SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ 104SLAB_MATCH(_refcount, __page_refcount); 105#ifdef CONFIG_MEMCG 106SLAB_MATCH(memcg_data, obj_exts); 107#elif defined(CONFIG_SLAB_OBJ_EXT) 108SLAB_MATCH(_unused_slab_obj_exts, obj_exts); 109#endif 110#undef SLAB_MATCH 111static_assert(sizeof(struct slab) <= sizeof(struct page)); 112#if defined(system_has_freelist_aba) 113static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist_counters))); 114#endif 115 116/** 117 * slab_folio - The folio allocated for a slab 118 * @s: The slab. 119 * 120 * Slabs are allocated as folios that contain the individual objects and are 121 * using some fields in the first struct page of the folio - those fields are 122 * now accessed by struct slab. It is occasionally necessary to convert back to 123 * a folio in order to communicate with the rest of the mm. Please use this 124 * helper function instead of casting yourself, as the implementation may change 125 * in the future. 126 */ 127#define slab_folio(s) (_Generic((s), \ 128 const struct slab *: (const struct folio *)s, \ 129 struct slab *: (struct folio *)s)) 130 131/** 132 * page_slab - Converts from struct page to its slab. 133 * @page: A page which may or may not belong to a slab. 134 * 135 * Return: The slab which contains this page or NULL if the page does 136 * not belong to a slab. This includes pages returned from large kmalloc. 137 */ 138static inline struct slab *page_slab(const struct page *page) 139{ 140 unsigned long head; 141 142 head = READ_ONCE(page->compound_head); 143 if (head & 1) 144 page = (struct page *)(head - 1); 145 if (data_race(page->page_type >> 24) != PGTY_slab) 146 page = NULL; 147 148 return (struct slab *)page; 149} 150 151/** 152 * slab_page - The first struct page allocated for a slab 153 * @s: The slab. 154 * 155 * A convenience wrapper for converting slab to the first struct page of the 156 * underlying folio, to communicate with code not yet converted to folio or 157 * struct slab. 158 */ 159#define slab_page(s) folio_page(slab_folio(s), 0) 160 161static inline void *slab_address(const struct slab *slab) 162{ 163 return folio_address(slab_folio(slab)); 164} 165 166static inline int slab_nid(const struct slab *slab) 167{ 168 return memdesc_nid(slab->flags); 169} 170 171static inline pg_data_t *slab_pgdat(const struct slab *slab) 172{ 173 return NODE_DATA(slab_nid(slab)); 174} 175 176static inline struct slab *virt_to_slab(const void *addr) 177{ 178 return page_slab(virt_to_page(addr)); 179} 180 181static inline int slab_order(const struct slab *slab) 182{ 183 return folio_order(slab_folio(slab)); 184} 185 186static inline size_t slab_size(const struct slab *slab) 187{ 188 return PAGE_SIZE << slab_order(slab); 189} 190 191#ifdef CONFIG_SLUB_CPU_PARTIAL 192#define slub_percpu_partial(c) ((c)->partial) 193 194#define slub_set_percpu_partial(c, p) \ 195({ \ 196 slub_percpu_partial(c) = (p)->next; \ 197}) 198 199#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) 200#else 201#define slub_percpu_partial(c) NULL 202 203#define slub_set_percpu_partial(c, p) 204 205#define slub_percpu_partial_read_once(c) NULL 206#endif // CONFIG_SLUB_CPU_PARTIAL 207 208/* 209 * Word size structure that can be atomically updated or read and that 210 * contains both the order and the number of objects that a slab of the 211 * given order would contain. 212 */ 213struct kmem_cache_order_objects { 214 unsigned int x; 215}; 216 217/* 218 * Slab cache management. 219 */ 220struct kmem_cache { 221 struct kmem_cache_cpu __percpu *cpu_slab; 222 struct lock_class_key lock_key; 223 struct slub_percpu_sheaves __percpu *cpu_sheaves; 224 /* Used for retrieving partial slabs, etc. */ 225 slab_flags_t flags; 226 unsigned long min_partial; 227 unsigned int size; /* Object size including metadata */ 228 unsigned int object_size; /* Object size without metadata */ 229 struct reciprocal_value reciprocal_size; 230 unsigned int offset; /* Free pointer offset */ 231#ifdef CONFIG_SLUB_CPU_PARTIAL 232 /* Number of per cpu partial objects to keep around */ 233 unsigned int cpu_partial; 234 /* Number of per cpu partial slabs to keep around */ 235 unsigned int cpu_partial_slabs; 236#endif 237 unsigned int sheaf_capacity; 238 struct kmem_cache_order_objects oo; 239 240 /* Allocation and freeing of slabs */ 241 struct kmem_cache_order_objects min; 242 gfp_t allocflags; /* gfp flags to use on each alloc */ 243 int refcount; /* Refcount for slab cache destroy */ 244 void (*ctor)(void *object); /* Object constructor */ 245 unsigned int inuse; /* Offset to metadata */ 246 unsigned int align; /* Alignment */ 247 unsigned int red_left_pad; /* Left redzone padding size */ 248 const char *name; /* Name (only for display!) */ 249 struct list_head list; /* List of slab caches */ 250#ifdef CONFIG_SYSFS 251 struct kobject kobj; /* For sysfs */ 252#endif 253#ifdef CONFIG_SLAB_FREELIST_HARDENED 254 unsigned long random; 255#endif 256 257#ifdef CONFIG_NUMA 258 /* 259 * Defragmentation by allocating from a remote node. 260 */ 261 unsigned int remote_node_defrag_ratio; 262#endif 263 264#ifdef CONFIG_SLAB_FREELIST_RANDOM 265 unsigned int *random_seq; 266#endif 267 268#ifdef CONFIG_KASAN_GENERIC 269 struct kasan_cache kasan_info; 270#endif 271 272#ifdef CONFIG_HARDENED_USERCOPY 273 unsigned int useroffset; /* Usercopy region offset */ 274 unsigned int usersize; /* Usercopy region size */ 275#endif 276 277 struct kmem_cache_node *node[MAX_NUMNODES]; 278}; 279 280#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) 281#define SLAB_SUPPORTS_SYSFS 1 282void sysfs_slab_unlink(struct kmem_cache *s); 283void sysfs_slab_release(struct kmem_cache *s); 284#else 285static inline void sysfs_slab_unlink(struct kmem_cache *s) { } 286static inline void sysfs_slab_release(struct kmem_cache *s) { } 287#endif 288 289void *fixup_red_left(struct kmem_cache *s, void *p); 290 291static inline void *nearest_obj(struct kmem_cache *cache, 292 const struct slab *slab, void *x) 293{ 294 void *object = x - (x - slab_address(slab)) % cache->size; 295 void *last_object = slab_address(slab) + 296 (slab->objects - 1) * cache->size; 297 void *result = (unlikely(object > last_object)) ? last_object : object; 298 299 result = fixup_red_left(cache, result); 300 return result; 301} 302 303/* Determine object index from a given position */ 304static inline unsigned int __obj_to_index(const struct kmem_cache *cache, 305 void *addr, void *obj) 306{ 307 return reciprocal_divide(kasan_reset_tag(obj) - addr, 308 cache->reciprocal_size); 309} 310 311static inline unsigned int obj_to_index(const struct kmem_cache *cache, 312 const struct slab *slab, void *obj) 313{ 314 if (is_kfence_address(obj)) 315 return 0; 316 return __obj_to_index(cache, slab_address(slab), obj); 317} 318 319static inline int objs_per_slab(const struct kmem_cache *cache, 320 const struct slab *slab) 321{ 322 return slab->objects; 323} 324 325/* 326 * State of the slab allocator. 327 * 328 * This is used to describe the states of the allocator during bootup. 329 * Allocators use this to gradually bootstrap themselves. Most allocators 330 * have the problem that the structures used for managing slab caches are 331 * allocated from slab caches themselves. 332 */ 333enum slab_state { 334 DOWN, /* No slab functionality yet */ 335 PARTIAL, /* SLUB: kmem_cache_node available */ 336 UP, /* Slab caches usable but not all extras yet */ 337 FULL /* Everything is working */ 338}; 339 340extern enum slab_state slab_state; 341 342/* The slab cache mutex protects the management structures during changes */ 343extern struct mutex slab_mutex; 344 345/* The list of all slab caches on the system */ 346extern struct list_head slab_caches; 347 348/* The slab cache that manages slab cache information */ 349extern struct kmem_cache *kmem_cache; 350 351/* A table of kmalloc cache names and sizes */ 352extern const struct kmalloc_info_struct { 353 const char *name[NR_KMALLOC_TYPES]; 354 unsigned int size; 355} kmalloc_info[]; 356 357/* Kmalloc array related functions */ 358void setup_kmalloc_cache_index_table(void); 359void create_kmalloc_caches(void); 360 361extern u8 kmalloc_size_index[24]; 362 363static inline unsigned int size_index_elem(unsigned int bytes) 364{ 365 return (bytes - 1) / 8; 366} 367 368/* 369 * Find the kmem_cache structure that serves a given size of 370 * allocation 371 * 372 * This assumes size is larger than zero and not larger than 373 * KMALLOC_MAX_CACHE_SIZE and the caller must check that. 374 */ 375static inline struct kmem_cache * 376kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) 377{ 378 unsigned int index; 379 380 if (!b) 381 b = &kmalloc_caches[kmalloc_type(flags, caller)]; 382 if (size <= 192) 383 index = kmalloc_size_index[size_index_elem(size)]; 384 else 385 index = fls(size - 1); 386 387 return (*b)[index]; 388} 389 390gfp_t kmalloc_fix_flags(gfp_t flags); 391 392/* Functions provided by the slab allocators */ 393int do_kmem_cache_create(struct kmem_cache *s, const char *name, 394 unsigned int size, struct kmem_cache_args *args, 395 slab_flags_t flags); 396 397void __init kmem_cache_init(void); 398extern void create_boot_cache(struct kmem_cache *, const char *name, 399 unsigned int size, slab_flags_t flags, 400 unsigned int useroffset, unsigned int usersize); 401 402int slab_unmergeable(struct kmem_cache *s); 403struct kmem_cache *find_mergeable(unsigned size, unsigned align, 404 slab_flags_t flags, const char *name, void (*ctor)(void *)); 405struct kmem_cache * 406__kmem_cache_alias(const char *name, unsigned int size, unsigned int align, 407 slab_flags_t flags, void (*ctor)(void *)); 408 409slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name); 410 411static inline bool is_kmalloc_cache(struct kmem_cache *s) 412{ 413 return (s->flags & SLAB_KMALLOC); 414} 415 416static inline bool is_kmalloc_normal(struct kmem_cache *s) 417{ 418 if (!is_kmalloc_cache(s)) 419 return false; 420 return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT)); 421} 422 423bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj); 424void flush_all_rcu_sheaves(void); 425void flush_rcu_sheaves_on_cache(struct kmem_cache *s); 426 427#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ 428 SLAB_CACHE_DMA32 | SLAB_PANIC | \ 429 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \ 430 SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ 431 SLAB_TEMPORARY | SLAB_ACCOUNT | \ 432 SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) 433 434#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 435 SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) 436 437#define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS) 438 439bool __kmem_cache_empty(struct kmem_cache *); 440int __kmem_cache_shutdown(struct kmem_cache *); 441void __kmem_cache_release(struct kmem_cache *); 442int __kmem_cache_shrink(struct kmem_cache *); 443void slab_kmem_cache_release(struct kmem_cache *); 444 445struct seq_file; 446struct file; 447 448struct slabinfo { 449 unsigned long active_objs; 450 unsigned long num_objs; 451 unsigned long active_slabs; 452 unsigned long num_slabs; 453 unsigned long shared_avail; 454 unsigned int limit; 455 unsigned int batchcount; 456 unsigned int shared; 457 unsigned int objects_per_slab; 458 unsigned int cache_order; 459}; 460 461void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo); 462 463#ifdef CONFIG_SLUB_DEBUG 464#ifdef CONFIG_SLUB_DEBUG_ON 465DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); 466#else 467DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); 468#endif 469extern void print_tracking(struct kmem_cache *s, void *object); 470long validate_slab_cache(struct kmem_cache *s); 471static inline bool __slub_debug_enabled(void) 472{ 473 return static_branch_unlikely(&slub_debug_enabled); 474} 475#else 476static inline void print_tracking(struct kmem_cache *s, void *object) 477{ 478} 479static inline bool __slub_debug_enabled(void) 480{ 481 return false; 482} 483#endif 484 485/* 486 * Returns true if any of the specified slab_debug flags is enabled for the 487 * cache. Use only for flags parsed by setup_slub_debug() as it also enables 488 * the static key. 489 */ 490static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) 491{ 492 if (IS_ENABLED(CONFIG_SLUB_DEBUG)) 493 VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); 494 if (__slub_debug_enabled()) 495 return s->flags & flags; 496 return false; 497} 498 499#if IS_ENABLED(CONFIG_SLUB_DEBUG) && IS_ENABLED(CONFIG_KUNIT) 500bool slab_in_kunit_test(void); 501#else 502static inline bool slab_in_kunit_test(void) { return false; } 503#endif 504 505#ifdef CONFIG_SLAB_OBJ_EXT 506 507/* 508 * slab_obj_exts - get the pointer to the slab object extension vector 509 * associated with a slab. 510 * @slab: a pointer to the slab struct 511 * 512 * Returns a pointer to the object extension vector associated with the slab, 513 * or NULL if no such vector has been associated yet. 514 */ 515static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) 516{ 517 unsigned long obj_exts = READ_ONCE(slab->obj_exts); 518 519#ifdef CONFIG_MEMCG 520 /* 521 * obj_exts should be either NULL, a valid pointer with 522 * MEMCG_DATA_OBJEXTS bit set or be equal to OBJEXTS_ALLOC_FAIL. 523 */ 524 VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS) && 525 obj_exts != OBJEXTS_ALLOC_FAIL, slab_page(slab)); 526 VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); 527#endif 528 return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK); 529} 530 531int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, 532 gfp_t gfp, bool new_slab); 533 534#else /* CONFIG_SLAB_OBJ_EXT */ 535 536static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) 537{ 538 return NULL; 539} 540 541#endif /* CONFIG_SLAB_OBJ_EXT */ 542 543static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) 544{ 545 return (s->flags & SLAB_RECLAIM_ACCOUNT) ? 546 NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; 547} 548 549#ifdef CONFIG_MEMCG 550bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, 551 gfp_t flags, size_t size, void **p); 552void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, 553 void **p, int objects, struct slabobj_ext *obj_exts); 554#endif 555 556void kvfree_rcu_cb(struct rcu_head *head); 557 558size_t __ksize(const void *objp); 559 560static inline size_t slab_ksize(const struct kmem_cache *s) 561{ 562#ifdef CONFIG_SLUB_DEBUG 563 /* 564 * Debugging requires use of the padding between object 565 * and whatever may come after it. 566 */ 567 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 568 return s->object_size; 569#endif 570 if (s->flags & SLAB_KASAN) 571 return s->object_size; 572 /* 573 * If we have the need to store the freelist pointer 574 * back there or track user information then we can 575 * only use the space before that information. 576 */ 577 if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) 578 return s->inuse; 579 /* 580 * Else we can use all the padding etc for the allocation 581 */ 582 return s->size; 583} 584 585static inline unsigned int large_kmalloc_order(const struct page *page) 586{ 587 return page[1].flags.f & 0xff; 588} 589 590static inline size_t large_kmalloc_size(const struct page *page) 591{ 592 return PAGE_SIZE << large_kmalloc_order(page); 593} 594 595#ifdef CONFIG_SLUB_DEBUG 596void dump_unreclaimable_slab(void); 597#else 598static inline void dump_unreclaimable_slab(void) 599{ 600} 601#endif 602 603void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); 604 605#ifdef CONFIG_SLAB_FREELIST_RANDOM 606int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, 607 gfp_t gfp); 608void cache_random_seq_destroy(struct kmem_cache *cachep); 609#else 610static inline int cache_random_seq_create(struct kmem_cache *cachep, 611 unsigned int count, gfp_t gfp) 612{ 613 return 0; 614} 615static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } 616#endif /* CONFIG_SLAB_FREELIST_RANDOM */ 617 618static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) 619{ 620 if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, 621 &init_on_alloc)) { 622 if (c->ctor) 623 return false; 624 if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) 625 return flags & __GFP_ZERO; 626 return true; 627 } 628 return flags & __GFP_ZERO; 629} 630 631static inline bool slab_want_init_on_free(struct kmem_cache *c) 632{ 633 if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, 634 &init_on_free)) 635 return !(c->ctor || 636 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); 637 return false; 638} 639 640#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) 641void debugfs_slab_release(struct kmem_cache *); 642#else 643static inline void debugfs_slab_release(struct kmem_cache *s) { } 644#endif 645 646#ifdef CONFIG_PRINTK 647#define KS_ADDRS_COUNT 16 648struct kmem_obj_info { 649 void *kp_ptr; 650 struct slab *kp_slab; 651 void *kp_objp; 652 unsigned long kp_data_offset; 653 struct kmem_cache *kp_slab_cache; 654 void *kp_ret; 655 void *kp_stack[KS_ADDRS_COUNT]; 656 void *kp_free_stack[KS_ADDRS_COUNT]; 657}; 658void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); 659#endif 660 661void __check_heap_object(const void *ptr, unsigned long n, 662 const struct slab *slab, bool to_user); 663 664void defer_free_barrier(void); 665 666static inline bool slub_debug_orig_size(struct kmem_cache *s) 667{ 668 return (kmem_cache_debug_flags(s, SLAB_STORE_USER) && 669 (s->flags & SLAB_KMALLOC)); 670} 671 672#ifdef CONFIG_SLUB_DEBUG 673void skip_orig_size_check(struct kmem_cache *s, const void *object); 674#endif 675 676#endif /* MM_SLAB_H */