at for-next 38 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). 4 * 5 * (C) SGI 2006, Christoph Lameter 6 * Cleaned up and restructured to ease the addition of alternative 7 * implementations of SLAB allocators. 8 * (C) Linux Foundation 2008-2013 9 * Unified interface for all slab allocators 10 */ 11 12#ifndef _LINUX_SLAB_H 13#define _LINUX_SLAB_H 14 15#include <linux/cache.h> 16#include <linux/gfp.h> 17#include <linux/overflow.h> 18#include <linux/types.h> 19#include <linux/workqueue.h> 20#include <linux/percpu-refcount.h> 21#include <linux/cleanup.h> 22#include <linux/hash.h> 23 24enum _slab_flag_bits { 25 _SLAB_CONSISTENCY_CHECKS, 26 _SLAB_RED_ZONE, 27 _SLAB_POISON, 28 _SLAB_KMALLOC, 29 _SLAB_HWCACHE_ALIGN, 30 _SLAB_CACHE_DMA, 31 _SLAB_CACHE_DMA32, 32 _SLAB_STORE_USER, 33 _SLAB_PANIC, 34 _SLAB_TYPESAFE_BY_RCU, 35 _SLAB_TRACE, 36#ifdef CONFIG_DEBUG_OBJECTS 37 _SLAB_DEBUG_OBJECTS, 38#endif 39 _SLAB_NOLEAKTRACE, 40 _SLAB_NO_MERGE, 41#ifdef CONFIG_FAILSLAB 42 _SLAB_FAILSLAB, 43#endif 44#ifdef CONFIG_MEMCG 45 _SLAB_ACCOUNT, 46#endif 47#ifdef CONFIG_KASAN_GENERIC 48 _SLAB_KASAN, 49#endif 50 _SLAB_NO_USER_FLAGS, 51#ifdef CONFIG_KFENCE 52 _SLAB_SKIP_KFENCE, 53#endif 54#ifndef CONFIG_SLUB_TINY 55 _SLAB_RECLAIM_ACCOUNT, 56#endif 57 _SLAB_OBJECT_POISON, 58 _SLAB_CMPXCHG_DOUBLE, 59#ifdef CONFIG_SLAB_OBJ_EXT 60 _SLAB_NO_OBJ_EXT, 61#endif 62 _SLAB_FLAGS_LAST_BIT 63}; 64 65#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) 66#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) 67 68/* 69 * Flags to pass to kmem_cache_create(). 70 * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op 71 */ 72/* DEBUG: Perform (expensive) checks on alloc/free */ 73#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) 74/* DEBUG: Red zone objs in a cache */ 75#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) 76/* DEBUG: Poison objects */ 77#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) 78/* Indicate a kmalloc slab */ 79#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) 80/** 81 * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries. 82 * 83 * Sufficiently large objects are aligned on cache line boundary. For object 84 * size smaller than a half of cache line size, the alignment is on the half of 85 * cache line size. In general, if object size is smaller than 1/2^n of cache 86 * line size, the alignment is adjusted to 1/2^n. 87 * 88 * If explicit alignment is also requested by the respective 89 * &struct kmem_cache_args field, the greater of both is alignments is applied. 90 */ 91#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) 92/* Use GFP_DMA memory */ 93#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) 94/* Use GFP_DMA32 memory */ 95#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) 96/* DEBUG: Store the last owner for bug hunting */ 97#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) 98/* Panic if kmem_cache_create() fails */ 99#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) 100/** 101 * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! 102 * 103 * This delays freeing the SLAB page by a grace period, it does _NOT_ 104 * delay object freeing. This means that if you do kmem_cache_free() 105 * that memory location is free to be reused at any time. Thus it may 106 * be possible to see another object there in the same RCU grace period. 107 * 108 * This feature only ensures the memory location backing the object 109 * stays valid, the trick to using this is relying on an independent 110 * object validation pass. Something like: 111 * 112 * :: 113 * 114 * begin: 115 * rcu_read_lock(); 116 * obj = lockless_lookup(key); 117 * if (obj) { 118 * if (!try_get_ref(obj)) // might fail for free objects 119 * rcu_read_unlock(); 120 * goto begin; 121 * 122 * if (obj->key != key) { // not the object we expected 123 * put_ref(obj); 124 * rcu_read_unlock(); 125 * goto begin; 126 * } 127 * } 128 * rcu_read_unlock(); 129 * 130 * This is useful if we need to approach a kernel structure obliquely, 131 * from its address obtained without the usual locking. We can lock 132 * the structure to stabilize it and check it's still at the given address, 133 * only if we can be sure that the memory has not been meanwhile reused 134 * for some other kind of object (which our subsystem's lock might corrupt). 135 * 136 * rcu_read_lock before reading the address, then rcu_read_unlock after 137 * taking the spinlock within the structure expected at that address. 138 * 139 * Note that it is not possible to acquire a lock within a structure 140 * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference 141 * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages 142 * are not zeroed before being given to the slab, which means that any 143 * locks must be initialized after each and every kmem_struct_alloc(). 144 * Alternatively, make the ctor passed to kmem_cache_create() initialize 145 * the locks at page-allocation time, as is done in __i915_request_ctor(), 146 * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers 147 * to safely acquire those ctor-initialized locks under rcu_read_lock() 148 * protection. 149 * 150 * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. 151 */ 152#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) 153/* Trace allocations and frees */ 154#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) 155 156/* Flag to prevent checks on free */ 157#ifdef CONFIG_DEBUG_OBJECTS 158# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) 159#else 160# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED 161#endif 162 163/* Avoid kmemleak tracing */ 164#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) 165 166/* 167 * Prevent merging with compatible kmem caches. This flag should be used 168 * cautiously. Valid use cases: 169 * 170 * - caches created for self-tests (e.g. kunit) 171 * - general caches created and used by a subsystem, only when a 172 * (subsystem-specific) debug option is enabled 173 * - performance critical caches, should be very rare and consulted with slab 174 * maintainers, and not used together with CONFIG_SLUB_TINY 175 */ 176#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) 177 178/* Fault injection mark */ 179#ifdef CONFIG_FAILSLAB 180# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) 181#else 182# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED 183#endif 184/** 185 * define SLAB_ACCOUNT - Account allocations to memcg. 186 * 187 * All object allocations from this cache will be memcg accounted, regardless of 188 * __GFP_ACCOUNT being or not being passed to individual allocations. 189 */ 190#ifdef CONFIG_MEMCG 191# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) 192#else 193# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED 194#endif 195 196#ifdef CONFIG_KASAN_GENERIC 197#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) 198#else 199#define SLAB_KASAN __SLAB_FLAG_UNUSED 200#endif 201 202/* 203 * Ignore user specified debugging flags. 204 * Intended for caches created for self-tests so they have only flags 205 * specified in the code and other flags are ignored. 206 */ 207#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) 208 209#ifdef CONFIG_KFENCE 210#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) 211#else 212#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED 213#endif 214 215/* The following flags affect the page allocator grouping pages by mobility */ 216/** 217 * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable. 218 * 219 * Use this flag for caches that have an associated shrinker. As a result, slab 220 * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by 221 * mobility, and are accounted in SReclaimable counter in /proc/meminfo 222 */ 223#ifndef CONFIG_SLUB_TINY 224#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) 225#else 226#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED 227#endif 228#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ 229 230/* Slab created using create_boot_cache */ 231#ifdef CONFIG_SLAB_OBJ_EXT 232#define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) 233#else 234#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED 235#endif 236 237/* 238 * freeptr_t represents a SLUB freelist pointer, which might be encoded 239 * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. 240 */ 241typedef struct { unsigned long v; } freeptr_t; 242 243/* 244 * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. 245 * 246 * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. 247 * 248 * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. 249 * Both make kfree a no-op. 250 */ 251#define ZERO_SIZE_PTR ((void *)16) 252 253#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ 254 (unsigned long)ZERO_SIZE_PTR) 255 256#include <linux/kasan.h> 257 258struct list_lru; 259struct mem_cgroup; 260/* 261 * struct kmem_cache related prototypes 262 */ 263bool slab_is_available(void); 264 265/** 266 * struct kmem_cache_args - Less common arguments for kmem_cache_create() 267 * 268 * Any uninitialized fields of the structure are interpreted as unused. The 269 * exception is @freeptr_offset where %0 is a valid value, so 270 * @use_freeptr_offset must be also set to %true in order to interpret the field 271 * as used. For @useroffset %0 is also valid, but only with non-%0 272 * @usersize. 273 * 274 * When %NULL args is passed to kmem_cache_create(), it is equivalent to all 275 * fields unused. 276 */ 277struct kmem_cache_args { 278 /** 279 * @align: The required alignment for the objects. 280 * 281 * %0 means no specific alignment is requested. 282 */ 283 unsigned int align; 284 /** 285 * @useroffset: Usercopy region offset. 286 * 287 * %0 is a valid offset, when @usersize is non-%0 288 */ 289 unsigned int useroffset; 290 /** 291 * @usersize: Usercopy region size. 292 * 293 * %0 means no usercopy region is specified. 294 */ 295 unsigned int usersize; 296 /** 297 * @freeptr_offset: Custom offset for the free pointer 298 * in &SLAB_TYPESAFE_BY_RCU caches 299 * 300 * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer 301 * outside of the object. This might cause the object to grow in size. 302 * Cache creators that have a reason to avoid this can specify a custom 303 * free pointer offset in their struct where the free pointer will be 304 * placed. 305 * 306 * Note that placing the free pointer inside the object requires the 307 * caller to ensure that no fields are invalidated that are required to 308 * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for 309 * details). 310 * 311 * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset 312 * is specified, %use_freeptr_offset must be set %true. 313 * 314 * Note that @ctor currently isn't supported with custom free pointers 315 * as a @ctor requires an external free pointer. 316 */ 317 unsigned int freeptr_offset; 318 /** 319 * @use_freeptr_offset: Whether a @freeptr_offset is used. 320 */ 321 bool use_freeptr_offset; 322 /** 323 * @ctor: A constructor for the objects. 324 * 325 * The constructor is invoked for each object in a newly allocated slab 326 * page. It is the cache user's responsibility to free object in the 327 * same state as after calling the constructor, or deal appropriately 328 * with any differences between a freshly constructed and a reallocated 329 * object. 330 * 331 * %NULL means no constructor. 332 */ 333 void (*ctor)(void *); 334}; 335 336struct kmem_cache *__kmem_cache_create_args(const char *name, 337 unsigned int object_size, 338 struct kmem_cache_args *args, 339 slab_flags_t flags); 340static inline struct kmem_cache * 341__kmem_cache_create(const char *name, unsigned int size, unsigned int align, 342 slab_flags_t flags, void (*ctor)(void *)) 343{ 344 struct kmem_cache_args kmem_args = { 345 .align = align, 346 .ctor = ctor, 347 }; 348 349 return __kmem_cache_create_args(name, size, &kmem_args, flags); 350} 351 352/** 353 * kmem_cache_create_usercopy - Create a kmem cache with a region suitable 354 * for copying to userspace. 355 * @name: A string which is used in /proc/slabinfo to identify this cache. 356 * @size: The size of objects to be created in this cache. 357 * @align: The required alignment for the objects. 358 * @flags: SLAB flags 359 * @useroffset: Usercopy region offset 360 * @usersize: Usercopy region size 361 * @ctor: A constructor for the objects, or %NULL. 362 * 363 * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY() 364 * if whitelisting a single field is sufficient, or kmem_cache_create() with 365 * the necessary parameters passed via the args parameter (see 366 * &struct kmem_cache_args) 367 * 368 * Return: a pointer to the cache on success, NULL on failure. 369 */ 370static inline struct kmem_cache * 371kmem_cache_create_usercopy(const char *name, unsigned int size, 372 unsigned int align, slab_flags_t flags, 373 unsigned int useroffset, unsigned int usersize, 374 void (*ctor)(void *)) 375{ 376 struct kmem_cache_args kmem_args = { 377 .align = align, 378 .ctor = ctor, 379 .useroffset = useroffset, 380 .usersize = usersize, 381 }; 382 383 return __kmem_cache_create_args(name, size, &kmem_args, flags); 384} 385 386/* If NULL is passed for @args, use this variant with default arguments. */ 387static inline struct kmem_cache * 388__kmem_cache_default_args(const char *name, unsigned int size, 389 struct kmem_cache_args *args, 390 slab_flags_t flags) 391{ 392 struct kmem_cache_args kmem_default_args = {}; 393 394 /* Make sure we don't get passed garbage. */ 395 if (WARN_ON_ONCE(args)) 396 return ERR_PTR(-EINVAL); 397 398 return __kmem_cache_create_args(name, size, &kmem_default_args, flags); 399} 400 401/** 402 * kmem_cache_create - Create a kmem cache. 403 * @__name: A string which is used in /proc/slabinfo to identify this cache. 404 * @__object_size: The size of objects to be created in this cache. 405 * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL 406 * means defaults will be used for all the arguments. 407 * 408 * This is currently implemented as a macro using ``_Generic()`` to call 409 * either the new variant of the function, or a legacy one. 410 * 411 * The new variant has 4 parameters: 412 * ``kmem_cache_create(name, object_size, args, flags)`` 413 * 414 * See __kmem_cache_create_args() which implements this. 415 * 416 * The legacy variant has 5 parameters: 417 * ``kmem_cache_create(name, object_size, align, flags, ctor)`` 418 * 419 * The align and ctor parameters map to the respective fields of 420 * &struct kmem_cache_args 421 * 422 * Context: Cannot be called within a interrupt, but can be interrupted. 423 * 424 * Return: a pointer to the cache on success, NULL on failure. 425 */ 426#define kmem_cache_create(__name, __object_size, __args, ...) \ 427 _Generic((__args), \ 428 struct kmem_cache_args *: __kmem_cache_create_args, \ 429 void *: __kmem_cache_default_args, \ 430 default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) 431 432void kmem_cache_destroy(struct kmem_cache *s); 433int kmem_cache_shrink(struct kmem_cache *s); 434 435/* 436 * Please use this macro to create slab caches. Simply specify the 437 * name of the structure and maybe some flags that are listed above. 438 * 439 * The alignment of the struct determines object alignment. If you 440 * f.e. add ____cacheline_aligned_in_smp to the struct declaration 441 * then the objects will be properly aligned in SMP configurations. 442 */ 443#define KMEM_CACHE(__struct, __flags) \ 444 __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ 445 &(struct kmem_cache_args) { \ 446 .align = __alignof__(struct __struct), \ 447 }, (__flags)) 448 449/* 450 * To whitelist a single field for copying to/from usercopy, use this 451 * macro instead for KMEM_CACHE() above. 452 */ 453#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ 454 __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ 455 &(struct kmem_cache_args) { \ 456 .align = __alignof__(struct __struct), \ 457 .useroffset = offsetof(struct __struct, __field), \ 458 .usersize = sizeof_field(struct __struct, __field), \ 459 }, (__flags)) 460 461/* 462 * Common kmalloc functions provided by all allocators 463 */ 464void * __must_check krealloc_noprof(const void *objp, size_t new_size, 465 gfp_t flags) __realloc_size(2); 466#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) 467 468void kfree(const void *objp); 469void kfree_sensitive(const void *objp); 470size_t __ksize(const void *objp); 471 472DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) 473DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) 474 475/** 476 * ksize - Report actual allocation size of associated object 477 * 478 * @objp: Pointer returned from a prior kmalloc()-family allocation. 479 * 480 * This should not be used for writing beyond the originally requested 481 * allocation size. Either use krealloc() or round up the allocation size 482 * with kmalloc_size_roundup() prior to allocation. If this is used to 483 * access beyond the originally requested allocation size, UBSAN_BOUNDS 484 * and/or FORTIFY_SOURCE may trip, since they only know about the 485 * originally allocated size via the __alloc_size attribute. 486 */ 487size_t ksize(const void *objp); 488 489#ifdef CONFIG_PRINTK 490bool kmem_dump_obj(void *object); 491#else 492static inline bool kmem_dump_obj(void *object) { return false; } 493#endif 494 495/* 496 * Some archs want to perform DMA into kmalloc caches and need a guaranteed 497 * alignment larger than the alignment of a 64-bit integer. 498 * Setting ARCH_DMA_MINALIGN in arch headers allows that. 499 */ 500#ifdef ARCH_HAS_DMA_MINALIGN 501#if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN) 502#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN 503#endif 504#endif 505 506#ifndef ARCH_KMALLOC_MINALIGN 507#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 508#elif ARCH_KMALLOC_MINALIGN > 8 509#define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN 510#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) 511#endif 512 513/* 514 * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. 515 * Intended for arches that get misalignment faults even for 64 bit integer 516 * aligned buffers. 517 */ 518#ifndef ARCH_SLAB_MINALIGN 519#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) 520#endif 521 522/* 523 * Arches can define this function if they want to decide the minimum slab 524 * alignment at runtime. The value returned by the function must be a power 525 * of two and >= ARCH_SLAB_MINALIGN. 526 */ 527#ifndef arch_slab_minalign 528static inline unsigned int arch_slab_minalign(void) 529{ 530 return ARCH_SLAB_MINALIGN; 531} 532#endif 533 534/* 535 * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. 536 * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN 537 * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. 538 */ 539#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) 540#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) 541#define __assume_page_alignment __assume_aligned(PAGE_SIZE) 542 543/* 544 * Kmalloc array related definitions 545 */ 546 547/* 548 * SLUB directly allocates requests fitting in to an order-1 page 549 * (PAGE_SIZE*2). Larger requests are passed to the page allocator. 550 */ 551#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) 552#define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) 553#ifndef KMALLOC_SHIFT_LOW 554#define KMALLOC_SHIFT_LOW 3 555#endif 556 557/* Maximum allocatable size */ 558#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) 559/* Maximum size for which we actually use a slab cache */ 560#define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) 561/* Maximum order allocatable via the slab allocator */ 562#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) 563 564/* 565 * Kmalloc subsystem. 566 */ 567#ifndef KMALLOC_MIN_SIZE 568#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) 569#endif 570 571/* 572 * This restriction comes from byte sized index implementation. 573 * Page size is normally 2^12 bytes and, in this case, if we want to use 574 * byte sized index which can represent 2^8 entries, the size of the object 575 * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. 576 * If minimum size of kmalloc is less than 16, we use it as minimum object 577 * size and give up to use byte sized index. 578 */ 579#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ 580 (KMALLOC_MIN_SIZE) : 16) 581 582#ifdef CONFIG_RANDOM_KMALLOC_CACHES 583#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies 584#else 585#define RANDOM_KMALLOC_CACHES_NR 0 586#endif 587 588/* 589 * Whenever changing this, take care of that kmalloc_type() and 590 * create_kmalloc_caches() still work as intended. 591 * 592 * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP 593 * is for accounted but unreclaimable and non-dma objects. All the other 594 * kmem caches can have both accounted and unaccounted objects. 595 */ 596enum kmalloc_cache_type { 597 KMALLOC_NORMAL = 0, 598#ifndef CONFIG_ZONE_DMA 599 KMALLOC_DMA = KMALLOC_NORMAL, 600#endif 601#ifndef CONFIG_MEMCG 602 KMALLOC_CGROUP = KMALLOC_NORMAL, 603#endif 604 KMALLOC_RANDOM_START = KMALLOC_NORMAL, 605 KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, 606#ifdef CONFIG_SLUB_TINY 607 KMALLOC_RECLAIM = KMALLOC_NORMAL, 608#else 609 KMALLOC_RECLAIM, 610#endif 611#ifdef CONFIG_ZONE_DMA 612 KMALLOC_DMA, 613#endif 614#ifdef CONFIG_MEMCG 615 KMALLOC_CGROUP, 616#endif 617 NR_KMALLOC_TYPES 618}; 619 620typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; 621 622extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; 623 624/* 625 * Define gfp bits that should not be set for KMALLOC_NORMAL. 626 */ 627#define KMALLOC_NOT_NORMAL_BITS \ 628 (__GFP_RECLAIMABLE | \ 629 (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ 630 (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0)) 631 632extern unsigned long random_kmalloc_seed; 633 634static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) 635{ 636 /* 637 * The most common case is KMALLOC_NORMAL, so test for it 638 * with a single branch for all the relevant flags. 639 */ 640 if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) 641#ifdef CONFIG_RANDOM_KMALLOC_CACHES 642 /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ 643 return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, 644 ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); 645#else 646 return KMALLOC_NORMAL; 647#endif 648 649 /* 650 * At least one of the flags has to be set. Their priorities in 651 * decreasing order are: 652 * 1) __GFP_DMA 653 * 2) __GFP_RECLAIMABLE 654 * 3) __GFP_ACCOUNT 655 */ 656 if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) 657 return KMALLOC_DMA; 658 if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE)) 659 return KMALLOC_RECLAIM; 660 else 661 return KMALLOC_CGROUP; 662} 663 664/* 665 * Figure out which kmalloc slab an allocation of a certain size 666 * belongs to. 667 * 0 = zero alloc 668 * 1 = 65 .. 96 bytes 669 * 2 = 129 .. 192 bytes 670 * n = 2^(n-1)+1 .. 2^n 671 * 672 * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized; 673 * typical usage is via kmalloc_index() and therefore evaluated at compile-time. 674 * Callers where !size_is_constant should only be test modules, where runtime 675 * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab(). 676 */ 677static __always_inline unsigned int __kmalloc_index(size_t size, 678 bool size_is_constant) 679{ 680 if (!size) 681 return 0; 682 683 if (size <= KMALLOC_MIN_SIZE) 684 return KMALLOC_SHIFT_LOW; 685 686 if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) 687 return 1; 688 if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) 689 return 2; 690 if (size <= 8) return 3; 691 if (size <= 16) return 4; 692 if (size <= 32) return 5; 693 if (size <= 64) return 6; 694 if (size <= 128) return 7; 695 if (size <= 256) return 8; 696 if (size <= 512) return 9; 697 if (size <= 1024) return 10; 698 if (size <= 2 * 1024) return 11; 699 if (size <= 4 * 1024) return 12; 700 if (size <= 8 * 1024) return 13; 701 if (size <= 16 * 1024) return 14; 702 if (size <= 32 * 1024) return 15; 703 if (size <= 64 * 1024) return 16; 704 if (size <= 128 * 1024) return 17; 705 if (size <= 256 * 1024) return 18; 706 if (size <= 512 * 1024) return 19; 707 if (size <= 1024 * 1024) return 20; 708 if (size <= 2 * 1024 * 1024) return 21; 709 710 if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) 711 BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); 712 else 713 BUG(); 714 715 /* Will never be reached. Needed because the compiler may complain */ 716 return -1; 717} 718static_assert(PAGE_SHIFT <= 20); 719#define kmalloc_index(s) __kmalloc_index(s, true) 720 721#include <linux/alloc_tag.h> 722 723/** 724 * kmem_cache_alloc - Allocate an object 725 * @cachep: The cache to allocate from. 726 * @flags: See kmalloc(). 727 * 728 * Allocate an object from this cache. 729 * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. 730 * 731 * Return: pointer to the new object or %NULL in case of error 732 */ 733void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, 734 gfp_t flags) __assume_slab_alignment __malloc; 735#define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__)) 736 737void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, 738 gfp_t gfpflags) __assume_slab_alignment __malloc; 739#define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) 740 741/** 742 * kmem_cache_charge - memcg charge an already allocated slab memory 743 * @objp: address of the slab object to memcg charge 744 * @gfpflags: describe the allocation context 745 * 746 * kmem_cache_charge allows charging a slab object to the current memcg, 747 * primarily in cases where charging at allocation time might not be possible 748 * because the target memcg is not known (i.e. softirq context) 749 * 750 * The objp should be pointer returned by the slab allocator functions like 751 * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge 752 * behavior can be controlled through gfpflags parameter, which affects how the 753 * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes 754 * that overcharging is requested instead of failure, but is not applied for the 755 * internal metadata allocation. 756 * 757 * There are several cases where it will return true even if the charging was 758 * not done: 759 * More specifically: 760 * 761 * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems. 762 * 2. Already charged slab objects. 763 * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc() 764 * without __GFP_ACCOUNT 765 * 4. Allocating internal metadata has failed 766 * 767 * Return: true if charge was successful otherwise false. 768 */ 769bool kmem_cache_charge(void *objp, gfp_t gfpflags); 770void kmem_cache_free(struct kmem_cache *s, void *objp); 771 772kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, 773 unsigned int useroffset, unsigned int usersize, 774 void (*ctor)(void *)); 775 776/* 777 * Bulk allocation and freeing operations. These are accelerated in an 778 * allocator specific way to avoid taking locks repeatedly or building 779 * metadata structures unnecessarily. 780 * 781 * Note that interrupts must be enabled when calling these functions. 782 */ 783void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); 784 785int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); 786#define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) 787 788static __always_inline void kfree_bulk(size_t size, void **p) 789{ 790 kmem_cache_free_bulk(NULL, size, p); 791} 792 793void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, 794 int node) __assume_slab_alignment __malloc; 795#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) 796 797/* 798 * These macros allow declaring a kmem_buckets * parameter alongside size, which 799 * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call 800 * sites don't have to pass NULL. 801 */ 802#ifdef CONFIG_SLAB_BUCKETS 803#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) 804#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) 805#define PASS_BUCKET_PARAM(_b) (_b) 806#else 807#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) 808#define PASS_BUCKET_PARAMS(_size, _b) (_size) 809#define PASS_BUCKET_PARAM(_b) NULL 810#endif 811 812/* 813 * The following functions are not to be used directly and are intended only 814 * for internal use from kmalloc() and kmalloc_node() 815 * with the exception of kunit tests 816 */ 817 818void *__kmalloc_noprof(size_t size, gfp_t flags) 819 __assume_kmalloc_alignment __alloc_size(1); 820 821void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) 822 __assume_kmalloc_alignment __alloc_size(1); 823 824void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) 825 __assume_kmalloc_alignment __alloc_size(3); 826 827void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, 828 int node, size_t size) 829 __assume_kmalloc_alignment __alloc_size(4); 830 831void *__kmalloc_large_noprof(size_t size, gfp_t flags) 832 __assume_page_alignment __alloc_size(1); 833 834void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) 835 __assume_page_alignment __alloc_size(1); 836 837/** 838 * kmalloc - allocate kernel memory 839 * @size: how many bytes of memory are required. 840 * @flags: describe the allocation context 841 * 842 * kmalloc is the normal method of allocating memory 843 * for objects smaller than page size in the kernel. 844 * 845 * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN 846 * bytes. For @size of power of two bytes, the alignment is also guaranteed 847 * to be at least to the size. For other sizes, the alignment is guaranteed to 848 * be at least the largest power-of-two divisor of @size. 849 * 850 * The @flags argument may be one of the GFP flags defined at 851 * include/linux/gfp_types.h and described at 852 * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` 853 * 854 * The recommended usage of the @flags is described at 855 * :ref:`Documentation/core-api/memory-allocation.rst <memory_allocation>` 856 * 857 * Below is a brief outline of the most useful GFP flags 858 * 859 * %GFP_KERNEL 860 * Allocate normal kernel ram. May sleep. 861 * 862 * %GFP_NOWAIT 863 * Allocation will not sleep. 864 * 865 * %GFP_ATOMIC 866 * Allocation will not sleep. May use emergency pools. 867 * 868 * Also it is possible to set different flags by OR'ing 869 * in one or more of the following additional @flags: 870 * 871 * %__GFP_ZERO 872 * Zero the allocated memory before returning. Also see kzalloc(). 873 * 874 * %__GFP_HIGH 875 * This allocation has high priority and may use emergency pools. 876 * 877 * %__GFP_NOFAIL 878 * Indicate that this allocation is in no way allowed to fail 879 * (think twice before using). 880 * 881 * %__GFP_NORETRY 882 * If memory is not immediately available, 883 * then give up at once. 884 * 885 * %__GFP_NOWARN 886 * If allocation fails, don't issue any warnings. 887 * 888 * %__GFP_RETRY_MAYFAIL 889 * Try really hard to succeed the allocation but fail 890 * eventually. 891 */ 892static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) 893{ 894 if (__builtin_constant_p(size) && size) { 895 unsigned int index; 896 897 if (size > KMALLOC_MAX_CACHE_SIZE) 898 return __kmalloc_large_noprof(size, flags); 899 900 index = kmalloc_index(size); 901 return __kmalloc_cache_noprof( 902 kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 903 flags, size); 904 } 905 return __kmalloc_noprof(size, flags); 906} 907#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) 908 909#define kmem_buckets_alloc(_b, _size, _flags) \ 910 alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) 911 912#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ 913 alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) 914 915static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) 916{ 917 if (__builtin_constant_p(size) && size) { 918 unsigned int index; 919 920 if (size > KMALLOC_MAX_CACHE_SIZE) 921 return __kmalloc_large_node_noprof(size, flags, node); 922 923 index = kmalloc_index(size); 924 return __kmalloc_cache_node_noprof( 925 kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 926 flags, node, size); 927 } 928 return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); 929} 930#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) 931 932/** 933 * kmalloc_array - allocate memory for an array. 934 * @n: number of elements. 935 * @size: element size. 936 * @flags: the type of memory to allocate (see kmalloc). 937 */ 938static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) 939{ 940 size_t bytes; 941 942 if (unlikely(check_mul_overflow(n, size, &bytes))) 943 return NULL; 944 if (__builtin_constant_p(n) && __builtin_constant_p(size)) 945 return kmalloc_noprof(bytes, flags); 946 return kmalloc_noprof(bytes, flags); 947} 948#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) 949 950/** 951 * krealloc_array - reallocate memory for an array. 952 * @p: pointer to the memory chunk to reallocate 953 * @new_n: new number of elements to alloc 954 * @new_size: new size of a single member of the array 955 * @flags: the type of memory to allocate (see kmalloc) 956 * 957 * If __GFP_ZERO logic is requested, callers must ensure that, starting with the 958 * initial memory allocation, every subsequent call to this API for the same 959 * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that 960 * __GFP_ZERO is not fully honored by this API. 961 * 962 * See krealloc_noprof() for further details. 963 * 964 * In any case, the contents of the object pointed to are preserved up to the 965 * lesser of the new and old sizes. 966 */ 967static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, 968 size_t new_n, 969 size_t new_size, 970 gfp_t flags) 971{ 972 size_t bytes; 973 974 if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) 975 return NULL; 976 977 return krealloc_noprof(p, bytes, flags); 978} 979#define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) 980 981/** 982 * kcalloc - allocate memory for an array. The memory is set to zero. 983 * @n: number of elements. 984 * @size: element size. 985 * @flags: the type of memory to allocate (see kmalloc). 986 */ 987#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) 988 989void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, 990 unsigned long caller) __alloc_size(1); 991#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ 992 __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) 993#define kmalloc_node_track_caller(...) \ 994 alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) 995 996/* 997 * kmalloc_track_caller is a special version of kmalloc that records the 998 * calling function of the routine calling it for slab leak tracking instead 999 * of just the calling function (confusing, eh?). 1000 * It's useful when the call to kmalloc comes from a widely-used standard 1001 * allocator where we care about the real place the memory allocation 1002 * request comes from. 1003 */ 1004#define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) 1005 1006#define kmalloc_track_caller_noprof(...) \ 1007 kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) 1008 1009static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, 1010 int node) 1011{ 1012 size_t bytes; 1013 1014 if (unlikely(check_mul_overflow(n, size, &bytes))) 1015 return NULL; 1016 if (__builtin_constant_p(n) && __builtin_constant_p(size)) 1017 return kmalloc_node_noprof(bytes, flags, node); 1018 return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); 1019} 1020#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) 1021 1022#define kcalloc_node(_n, _size, _flags, _node) \ 1023 kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node) 1024 1025/* 1026 * Shortcuts 1027 */ 1028#define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) 1029 1030/** 1031 * kzalloc - allocate memory. The memory is set to zero. 1032 * @size: how many bytes of memory are required. 1033 * @flags: the type of memory to allocate (see kmalloc). 1034 */ 1035static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) 1036{ 1037 return kmalloc_noprof(size, flags | __GFP_ZERO); 1038} 1039#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) 1040#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) 1041 1042void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); 1043#define kvmalloc_node_noprof(size, flags, node) \ 1044 __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) 1045#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) 1046 1047#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) 1048#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) 1049#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) 1050 1051#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) 1052#define kmem_buckets_valloc(_b, _size, _flags) \ 1053 alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) 1054 1055static inline __alloc_size(1, 2) void * 1056kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) 1057{ 1058 size_t bytes; 1059 1060 if (unlikely(check_mul_overflow(n, size, &bytes))) 1061 return NULL; 1062 1063 return kvmalloc_node_noprof(bytes, flags, node); 1064} 1065 1066#define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) 1067#define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node) 1068#define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE) 1069 1070#define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) 1071#define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) 1072#define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) 1073 1074void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) 1075 __realloc_size(2); 1076#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) 1077 1078extern void kvfree(const void *addr); 1079DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) 1080 1081extern void kvfree_sensitive(const void *addr, size_t len); 1082 1083unsigned int kmem_cache_size(struct kmem_cache *s); 1084 1085/** 1086 * kmalloc_size_roundup - Report allocation bucket size for the given size 1087 * 1088 * @size: Number of bytes to round up from. 1089 * 1090 * This returns the number of bytes that would be available in a kmalloc() 1091 * allocation of @size bytes. For example, a 126 byte request would be 1092 * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly 1093 * for the general-purpose kmalloc()-based allocations, and is not for the 1094 * pre-sized kmem_cache_alloc()-based allocations.) 1095 * 1096 * Use this to kmalloc() the full bucket size ahead of time instead of using 1097 * ksize() to query the size after an allocation. 1098 */ 1099size_t kmalloc_size_roundup(size_t size); 1100 1101void __init kmem_cache_init_late(void); 1102 1103#endif /* _LINUX_SLAB_H */