Merge branch 'slab/for-6.11/buckets' into slab/for-next

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'slab/for-6.11/buckets' into slab/for-next

Merge all the slab patches previously collected on top of v6.10-rc1,
over cleanups/fixes that had to be based on rc6.

Vlastimil Babka 2 years ago 436381ea a52c6330

+316 -137

16 changed files

expand all collapse all

Documentation

core-api

memory-allocation.rst

include

linux

mm.h

poison.h

slab.h

ipc

msgutil.c

kernel

configs

hardening.config

lib

fortify_kunit.c

slub_kunit.c

Kconfig

slab.h

slab_common.c

slub.c

util.c

rust

kernel

alloc

allocator.rs

scripts

kernel-doc

tools

include

linux

poison.h

+4 -2

Documentation/core-api/memory-allocation.rst

reviewed

··· 144 144 smaller than page size. 145 145 146 146 The address of a chunk allocated with `kmalloc` is aligned to at least 147 147 - ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the 148 148 - alignment is also guaranteed to be at least the respective size. 147 147 + ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the 148 148 + alignment is also guaranteed to be at least the respective size. For other 149 149 + sizes, the alignment is guaranteed to be at least the largest power-of-two 150 150 + divisor of the size. 149 151 150 152 Chunks allocated with kmalloc() can be resized with krealloc(). Similarly 151 153 to kmalloc_array(): a helper for resizing arrays is provided in the form of

+3 -3

include/linux/mm.h

reviewed

··· 1110 1110 * 1111 1111 * Return: The order of the folio. 1112 1112 */ 1113 1113 - static inline unsigned int folio_order(struct folio *folio) 1113 1113 + static inline unsigned int folio_order(const struct folio *folio) 1114 1114 { 1115 1115 if (!folio_test_large(folio)) 1116 1116 return 0; ··· 2150 2150 * it from being split. It is not necessary for the folio to be locked. 2151 2151 * Return: The base-2 logarithm of the size of this folio. 2152 2152 */ 2153 2153 - static inline unsigned int folio_shift(struct folio *folio) 2153 2153 + static inline unsigned int folio_shift(const struct folio *folio) 2154 2154 { 2155 2155 return PAGE_SHIFT + folio_order(folio); 2156 2156 } ··· 2163 2163 * it from being split. It is not necessary for the folio to be locked. 2164 2164 * Return: The number of bytes in this folio. 2165 2165 */ 2166 2166 - static inline size_t folio_size(struct folio *folio) 2166 2166 + static inline size_t folio_size(const struct folio *folio) 2167 2167 { 2168 2168 return PAGE_SIZE << folio_order(folio); 2169 2169 }

+2 -5

include/linux/poison.h

reviewed

··· 38 38 * Magic nums for obj red zoning. 39 39 * Placed in the first word before and the first word after an obj. 40 40 */ 41 41 - #define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ 42 42 - #define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ 43 43 - 44 44 - #define SLUB_RED_INACTIVE 0xbb 45 45 - #define SLUB_RED_ACTIVE 0xcc 41 41 + #define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */ 42 42 + #define SLUB_RED_ACTIVE 0xcc /* when obj is active */ 46 43 47 44 /* ...and for poisoning */ 48 45 #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */

+65 -32

include/linux/slab.h

reviewed

··· 426 426 NR_KMALLOC_TYPES 427 427 }; 428 428 429 429 - extern struct kmem_cache * 430 430 - kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; 429 429 + typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; 430 430 + 431 431 + extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; 431 432 432 433 /* 433 434 * Define gfp bits that should not be set for KMALLOC_NORMAL. ··· 529 528 530 529 #include <linux/alloc_tag.h> 531 530 532 532 - void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); 533 533 - #define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__)) 534 534 - 535 531 /** 536 532 * kmem_cache_alloc - Allocate an object 537 533 * @cachep: The cache to allocate from. ··· 549 551 550 552 void kmem_cache_free(struct kmem_cache *s, void *objp); 551 553 554 554 + kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, 555 555 + unsigned int useroffset, unsigned int usersize, 556 556 + void (*ctor)(void *)); 557 557 + 552 558 /* 553 559 * Bulk allocation and freeing operations. These are accelerated in an 554 560 * allocator specific way to avoid taking locks repeatedly or building ··· 570 568 kmem_cache_free_bulk(NULL, size, p); 571 569 } 572 570 573 573 - void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment 574 574 - __alloc_size(1); 575 575 - #define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__)) 576 576 - 577 571 void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, 578 572 int node) __assume_slab_alignment __malloc; 579 573 #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) 580 574 581 581 - void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size) 582 582 - __assume_kmalloc_alignment __alloc_size(3); 575 575 + /* 576 576 + * These macros allow declaring a kmem_buckets * parameter alongside size, which 577 577 + * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call 578 578 + * sites don't have to pass NULL. 579 579 + */ 580 580 + #ifdef CONFIG_SLAB_BUCKETS 581 581 + #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) 582 582 + #define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) 583 583 + #define PASS_BUCKET_PARAM(_b) (_b) 584 584 + #else 585 585 + #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) 586 586 + #define PASS_BUCKET_PARAMS(_size, _b) (_size) 587 587 + #define PASS_BUCKET_PARAM(_b) NULL 588 588 + #endif 583 589 584 584 - void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, 585 585 - int node, size_t size) __assume_kmalloc_alignment 586 586 - __alloc_size(4); 587 587 - #define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__)) 590 590 + /* 591 591 + * The following functions are not to be used directly and are intended only 592 592 + * for internal use from kmalloc() and kmalloc_node() 593 593 + * with the exception of kunit tests 594 594 + */ 588 595 589 589 - #define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__)) 596 596 + void *__kmalloc_noprof(size_t size, gfp_t flags) 597 597 + __assume_kmalloc_alignment __alloc_size(1); 590 598 591 591 - void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment 592 592 - __alloc_size(1); 593 593 - #define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__)) 599 599 + void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) 600 600 + __assume_kmalloc_alignment __alloc_size(1); 594 601 595 595 - void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment 596 596 - __alloc_size(1); 597 597 - #define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__)) 602 602 + void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) 603 603 + __assume_kmalloc_alignment __alloc_size(3); 604 604 + 605 605 + void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, 606 606 + int node, size_t size) 607 607 + __assume_kmalloc_alignment __alloc_size(4); 608 608 + 609 609 + void *__kmalloc_large_noprof(size_t size, gfp_t flags) 610 610 + __assume_page_alignment __alloc_size(1); 611 611 + 612 612 + void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) 613 613 + __assume_page_alignment __alloc_size(1); 598 614 599 615 /** 600 616 * kmalloc - allocate kernel memory ··· 624 604 * 625 605 * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN 626 606 * bytes. For @size of power of two bytes, the alignment is also guaranteed 627 627 - * to be at least to the size. 607 607 + * to be at least to the size. For other sizes, the alignment is guaranteed to 608 608 + * be at least the largest power-of-two divisor of @size. 628 609 * 629 610 * The @flags argument may be one of the GFP flags defined at 630 611 * include/linux/gfp_types.h and described at ··· 675 654 unsigned int index; 676 655 677 656 if (size > KMALLOC_MAX_CACHE_SIZE) 678 678 - return kmalloc_large_noprof(size, flags); 657 657 + return __kmalloc_large_noprof(size, flags); 679 658 680 659 index = kmalloc_index(size); 681 681 - return kmalloc_trace_noprof( 660 660 + return __kmalloc_cache_noprof( 682 661 kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 683 662 flags, size); 684 663 } ··· 686 665 } 687 666 #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) 688 667 668 668 + #define kmem_buckets_alloc(_b, _size, _flags) \ 669 669 + alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) 670 670 + 671 671 + #define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ 672 672 + alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) 673 673 + 689 674 static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) 690 675 { 691 676 if (__builtin_constant_p(size) && size) { 692 677 unsigned int index; 693 678 694 679 if (size > KMALLOC_MAX_CACHE_SIZE) 695 695 - return kmalloc_large_node_noprof(size, flags, node); 680 680 + return __kmalloc_large_node_noprof(size, flags, node); 696 681 697 682 index = kmalloc_index(size); 698 698 - return kmalloc_node_trace_noprof( 683 683 + return __kmalloc_cache_node_noprof( 699 684 kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 700 685 flags, node, size); 701 686 } 702 702 - return __kmalloc_node_noprof(size, flags, node); 687 687 + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); 703 688 } 704 689 #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) 705 690 ··· 756 729 */ 757 730 #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) 758 731 759 759 - void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, 760 760 - unsigned long caller) __alloc_size(1); 732 732 + void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, 733 733 + unsigned long caller) __alloc_size(1); 734 734 + #define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ 735 735 + __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) 761 736 #define kmalloc_node_track_caller(...) \ 762 737 alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) 763 738 ··· 785 756 return NULL; 786 757 if (__builtin_constant_p(n) && __builtin_constant_p(size)) 787 758 return kmalloc_node_noprof(bytes, flags, node); 788 788 - return __kmalloc_node_noprof(bytes, flags, node); 759 759 + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); 789 760 } 790 761 #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) 791 762 ··· 809 780 #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) 810 781 #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) 811 782 812 812 - extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1); 783 783 + void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); 784 784 + #define kvmalloc_node_noprof(size, flags, node) \ 785 785 + __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) 813 786 #define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) 814 787 815 788 #define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) ··· 819 788 #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) 820 789 821 790 #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) 791 791 + #define kmem_buckets_valloc(_b, _size, _flags) \ 792 792 + alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) 822 793 823 794 static inline __alloc_size(1, 2) void * 824 795 kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)

+12 -1

ipc/msgutil.c

reviewed

··· 42 42 #define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg)) 43 43 #define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg)) 44 44 45 45 + static kmem_buckets *msg_buckets __ro_after_init; 46 46 + 47 47 + static int __init init_msg_buckets(void) 48 48 + { 49 49 + msg_buckets = kmem_buckets_create("msg_msg", SLAB_ACCOUNT, 50 50 + sizeof(struct msg_msg), 51 51 + DATALEN_MSG, NULL); 52 52 + 53 53 + return 0; 54 54 + } 55 55 + subsys_initcall(init_msg_buckets); 45 56 46 57 static struct msg_msg *alloc_msg(size_t len) 47 58 { ··· 61 50 size_t alen; 62 51 63 52 alen = min(len, DATALEN_MSG); 64 64 - msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT); 53 53 + msg = kmem_buckets_alloc(msg_buckets, sizeof(*msg) + alen, GFP_KERNEL); 65 54 if (msg == NULL) 66 55 return NULL; 67 56

kernel/configs/hardening.config

reviewed

··· 20 20 # Randomize allocator freelists, harden metadata. 21 21 CONFIG_SLAB_FREELIST_RANDOM=y 22 22 CONFIG_SLAB_FREELIST_HARDENED=y 23 23 + CONFIG_SLAB_BUCKETS=y 23 24 CONFIG_SHUFFLE_PAGE_ALLOCATOR=y 24 25 CONFIG_RANDOM_KMALLOC_CACHES=y 25 26

-2

lib/fortify_kunit.c

reviewed

··· 234 234 checker(expected_size, \ 235 235 kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE), \ 236 236 kfree(p)); \ 237 237 - checker(expected_size, __kmalloc(alloc_size, gfp), \ 238 238 - kfree(p)); \ 239 237 \ 240 238 orig = kmalloc(alloc_size, gfp); \ 241 239 KUNIT_EXPECT_TRUE(test, orig != NULL); \

+1 -1

lib/slub_kunit.c

reviewed

··· 140 140 { 141 141 struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32, 142 142 SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE); 143 143 - u8 *p = kmalloc_trace(s, GFP_KERNEL, 18); 143 143 + u8 *p = __kmalloc_cache_noprof(s, GFP_KERNEL, 18); 144 144 145 145 kasan_disable_current(); 146 146

+17

mm/Kconfig

reviewed

··· 273 273 sacrifices to harden the kernel slab allocator against common 274 274 freelist exploit methods. 275 275 276 276 + config SLAB_BUCKETS 277 277 + bool "Support allocation from separate kmalloc buckets" 278 278 + depends on !SLUB_TINY 279 279 + default SLAB_FREELIST_HARDENED 280 280 + help 281 281 + Kernel heap attacks frequently depend on being able to create 282 282 + specifically-sized allocations with user-controlled contents 283 283 + that will be allocated into the same kmalloc bucket as a 284 284 + target object. To avoid sharing these allocation buckets, 285 285 + provide an explicitly separated set of buckets to be used for 286 286 + user-controlled allocations. This may very slightly increase 287 287 + memory fragmentation, though in practice it's only a handful 288 288 + of extra pages since the bulk of user-controlled allocations 289 289 + are relatively long-lived. 290 290 + 291 291 + If unsure, say Y. 292 292 + 276 293 config SLUB_STATS 277 294 default n 278 295 bool "Enable performance statistics"

+6 -4

mm/slab.h

reviewed

··· 168 168 */ 169 169 static inline bool slab_test_pfmemalloc(const struct slab *slab) 170 170 { 171 171 - return folio_test_active((struct folio *)slab_folio(slab)); 171 171 + return folio_test_active(slab_folio(slab)); 172 172 } 173 173 174 174 static inline void slab_set_pfmemalloc(struct slab *slab) ··· 213 213 214 214 static inline int slab_order(const struct slab *slab) 215 215 { 216 216 - return folio_order((struct folio *)slab_folio(slab)); 216 216 + return folio_order(slab_folio(slab)); 217 217 } 218 218 219 219 static inline size_t slab_size(const struct slab *slab) ··· 405 405 * KMALLOC_MAX_CACHE_SIZE and the caller must check that. 406 406 */ 407 407 static inline struct kmem_cache * 408 408 - kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) 408 408 + kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) 409 409 { 410 410 unsigned int index; 411 411 412 412 + if (!b) 413 413 + b = &kmalloc_caches[kmalloc_type(flags, caller)]; 412 414 if (size <= 192) 413 415 index = kmalloc_size_index[size_index_elem(size)]; 414 416 else 415 417 index = fls(size - 1); 416 418 417 417 - return kmalloc_caches[kmalloc_type(flags, caller)][index]; 419 419 + return (*b)[index]; 418 420 } 419 421 420 422 gfp_t kmalloc_fix_flags(gfp_t flags);

+104 -7

mm/slab_common.c

reviewed

··· 392 392 } 393 393 EXPORT_SYMBOL(kmem_cache_create); 394 394 395 395 + static struct kmem_cache *kmem_buckets_cache __ro_after_init; 396 396 + 397 397 + /** 398 398 + * kmem_buckets_create - Create a set of caches that handle dynamic sized 399 399 + * allocations via kmem_buckets_alloc() 400 400 + * @name: A prefix string which is used in /proc/slabinfo to identify this 401 401 + * cache. The individual caches with have their sizes as the suffix. 402 402 + * @flags: SLAB flags (see kmem_cache_create() for details). 403 403 + * @useroffset: Starting offset within an allocation that may be copied 404 404 + * to/from userspace. 405 405 + * @usersize: How many bytes, starting at @useroffset, may be copied 406 406 + * to/from userspace. 407 407 + * @ctor: A constructor for the objects, run when new allocations are made. 408 408 + * 409 409 + * Cannot be called within an interrupt, but can be interrupted. 410 410 + * 411 411 + * Return: a pointer to the cache on success, NULL on failure. When 412 412 + * CONFIG_SLAB_BUCKETS is not enabled, ZERO_SIZE_PTR is returned, and 413 413 + * subsequent calls to kmem_buckets_alloc() will fall back to kmalloc(). 414 414 + * (i.e. callers only need to check for NULL on failure.) 415 415 + */ 416 416 + kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, 417 417 + unsigned int useroffset, 418 418 + unsigned int usersize, 419 419 + void (*ctor)(void *)) 420 420 + { 421 421 + kmem_buckets *b; 422 422 + int idx; 423 423 + 424 424 + /* 425 425 + * When the separate buckets API is not built in, just return 426 426 + * a non-NULL value for the kmem_buckets pointer, which will be 427 427 + * unused when performing allocations. 428 428 + */ 429 429 + if (!IS_ENABLED(CONFIG_SLAB_BUCKETS)) 430 430 + return ZERO_SIZE_PTR; 431 431 + 432 432 + if (WARN_ON(!kmem_buckets_cache)) 433 433 + return NULL; 434 434 + 435 435 + b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO); 436 436 + if (WARN_ON(!b)) 437 437 + return NULL; 438 438 + 439 439 + flags |= SLAB_NO_MERGE; 440 440 + 441 441 + for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) { 442 442 + char *short_size, *cache_name; 443 443 + unsigned int cache_useroffset, cache_usersize; 444 444 + unsigned int size; 445 445 + 446 446 + if (!kmalloc_caches[KMALLOC_NORMAL][idx]) 447 447 + continue; 448 448 + 449 449 + size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size; 450 450 + if (!size) 451 451 + continue; 452 452 + 453 453 + short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-'); 454 454 + if (WARN_ON(!short_size)) 455 455 + goto fail; 456 456 + 457 457 + cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1); 458 458 + if (WARN_ON(!cache_name)) 459 459 + goto fail; 460 460 + 461 461 + if (useroffset >= size) { 462 462 + cache_useroffset = 0; 463 463 + cache_usersize = 0; 464 464 + } else { 465 465 + cache_useroffset = useroffset; 466 466 + cache_usersize = min(size - cache_useroffset, usersize); 467 467 + } 468 468 + (*b)[idx] = kmem_cache_create_usercopy(cache_name, size, 469 469 + 0, flags, cache_useroffset, 470 470 + cache_usersize, ctor); 471 471 + kfree(cache_name); 472 472 + if (WARN_ON(!(*b)[idx])) 473 473 + goto fail; 474 474 + } 475 475 + 476 476 + return b; 477 477 + 478 478 + fail: 479 479 + for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) 480 480 + kmem_cache_destroy((*b)[idx]); 481 481 + kfree(b); 482 482 + 483 483 + return NULL; 484 484 + } 485 485 + EXPORT_SYMBOL(kmem_buckets_create); 486 486 + 395 487 #ifdef SLAB_SUPPORTS_SYSFS 396 488 /* 397 489 * For a given kmem_cache, kmem_cache_destroy() should only be called ··· 709 617 s->size = s->object_size = size; 710 618 711 619 /* 712 712 - * For power of two sizes, guarantee natural alignment for kmalloc 713 713 - * caches, regardless of SL*B debugging options. 620 620 + * kmalloc caches guarantee alignment of at least the largest 621 621 + * power-of-two divisor of the size. For power-of-two sizes, 622 622 + * it is the size itself. 714 623 */ 715 715 - if (is_power_of_2(size)) 716 716 - align = max(align, size); 624 624 + if (flags & SLAB_KMALLOC) 625 625 + align = max(align, 1U << (ffs(size) - 1)); 717 626 s->align = calculate_alignment(flags, align, size); 718 627 719 628 #ifdef CONFIG_HARDENED_USERCOPY ··· 746 653 return s; 747 654 } 748 655 749 749 - struct kmem_cache * 750 750 - kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = 656 656 + kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init = 751 657 { /* initialization for https://llvm.org/pr42570 */ }; 752 658 EXPORT_SYMBOL(kmalloc_caches); 753 659 ··· 795 703 * The flags don't matter since size_index is common to all. 796 704 * Neither does the caller for just getting ->object_size. 797 705 */ 798 798 - return kmalloc_slab(size, GFP_KERNEL, 0)->object_size; 706 706 + return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size; 799 707 } 800 708 801 709 /* Above the smaller buckets, size is a multiple of page size. */ ··· 1024 932 1025 933 /* Kmalloc array is now usable */ 1026 934 slab_state = UP; 935 935 + 936 936 + if (IS_ENABLED(CONFIG_SLAB_BUCKETS)) 937 937 + kmem_buckets_cache = kmem_cache_create("kmalloc_buckets", 938 938 + sizeof(kmem_buckets), 939 939 + 0, SLAB_NO_MERGE, NULL); 1027 940 } 1028 941 1029 942 /**

+75 -56

mm/slub.c

reviewed

··· 788 788 kunit_put_resource(resource); 789 789 return true; 790 790 } 791 791 + 792 792 + static bool slab_in_kunit_test(void) 793 793 + { 794 794 + struct kunit_resource *resource; 795 795 + 796 796 + if (!kunit_get_current_test()) 797 797 + return false; 798 798 + 799 799 + resource = kunit_find_named_resource(current->kunit_test, "slab_errors"); 800 800 + if (!resource) 801 801 + return false; 802 802 + 803 803 + kunit_put_resource(resource); 804 804 + return true; 805 805 + } 791 806 #else 792 807 static inline bool slab_add_kunit_errors(void) { return false; } 808 808 + static inline bool slab_in_kunit_test(void) { return false; } 793 809 #endif 794 810 795 811 static inline unsigned int size_from_object(struct kmem_cache *s) ··· 978 962 979 963 static void print_slab_info(const struct slab *slab) 980 964 { 981 981 - struct folio *folio = (struct folio *)slab_folio(slab); 982 982 - 983 965 pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", 984 966 slab, slab->objects, slab->inuse, slab->freelist, 985 985 - folio_flags(folio, 0)); 967 967 + &slab->__page_flags); 986 968 } 987 969 988 970 /* ··· 1206 1192 pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", 1207 1193 fault, end - 1, fault - addr, 1208 1194 fault[0], value); 1209 1209 - print_trailer(s, slab, object); 1210 1210 - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); 1211 1195 1212 1196 skip_bug_print: 1213 1197 restore_bytes(s, what, value, fault, end); ··· 1228 1216 * Padding is extended by another word if Redzoning is enabled and 1229 1217 * object_size == inuse. 1230 1218 * 1231 1231 - * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 1232 1232 - * 0xcc (RED_ACTIVE) for objects in use. 1219 1219 + * We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with 1220 1220 + * 0xcc (SLUB_RED_ACTIVE) for objects in use. 1233 1221 * 1234 1222 * object + s->inuse 1235 1223 * Meta data starts here. ··· 1314 1302 u8 *p = object; 1315 1303 u8 *endobject = object + s->object_size; 1316 1304 unsigned int orig_size, kasan_meta_size; 1305 1305 + int ret = 1; 1317 1306 1318 1307 if (s->flags & SLAB_RED_ZONE) { 1319 1308 if (!check_bytes_and_report(s, slab, object, "Left Redzone", 1320 1309 object - s->red_left_pad, val, s->red_left_pad)) 1321 1321 - return 0; 1310 1310 + ret = 0; 1322 1311 1323 1312 if (!check_bytes_and_report(s, slab, object, "Right Redzone", 1324 1313 endobject, val, s->inuse - s->object_size)) 1325 1325 - return 0; 1314 1314 + ret = 0; 1326 1315 1327 1316 if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { 1328 1317 orig_size = get_orig_size(s, object); ··· 1332 1319 !check_bytes_and_report(s, slab, object, 1333 1320 "kmalloc Redzone", p + orig_size, 1334 1321 val, s->object_size - orig_size)) { 1335 1335 - return 0; 1322 1322 + ret = 0; 1336 1323 } 1337 1324 } 1338 1325 } else { 1339 1326 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { 1340 1340 - check_bytes_and_report(s, slab, p, "Alignment padding", 1327 1327 + if (!check_bytes_and_report(s, slab, p, "Alignment padding", 1341 1328 endobject, POISON_INUSE, 1342 1342 - s->inuse - s->object_size); 1329 1329 + s->inuse - s->object_size)) 1330 1330 + ret = 0; 1343 1331 } 1344 1332 } 1345 1333 ··· 1356 1342 !check_bytes_and_report(s, slab, p, "Poison", 1357 1343 p + kasan_meta_size, POISON_FREE, 1358 1344 s->object_size - kasan_meta_size - 1)) 1359 1359 - return 0; 1345 1345 + ret = 0; 1360 1346 if (kasan_meta_size < s->object_size && 1361 1347 !check_bytes_and_report(s, slab, p, "End Poison", 1362 1348 p + s->object_size - 1, POISON_END, 1)) 1363 1363 - return 0; 1349 1349 + ret = 0; 1364 1350 } 1365 1351 /* 1366 1352 * check_pad_bytes cleans up on its own. 1367 1353 */ 1368 1368 - check_pad_bytes(s, slab, p); 1354 1354 + if (!check_pad_bytes(s, slab, p)) 1355 1355 + ret = 0; 1369 1356 } 1370 1357 1371 1371 - if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE) 1372 1372 - /* 1373 1373 - * Object and freepointer overlap. Cannot check 1374 1374 - * freepointer while object is allocated. 1375 1375 - */ 1376 1376 - return 1; 1377 1377 - 1378 1378 - /* Check free pointer validity */ 1379 1379 - if (!check_valid_pointer(s, slab, get_freepointer(s, p))) { 1358 1358 + /* 1359 1359 + * Cannot check freepointer while object is allocated if 1360 1360 + * object and freepointer overlap. 1361 1361 + */ 1362 1362 + if ((freeptr_outside_object(s) || val != SLUB_RED_ACTIVE) && 1363 1363 + !check_valid_pointer(s, slab, get_freepointer(s, p))) { 1380 1364 object_err(s, slab, p, "Freepointer corrupt"); 1381 1365 /* 1382 1366 * No choice but to zap it and thus lose the remainder ··· 1382 1370 * another error because the object count is now wrong. 1383 1371 */ 1384 1372 set_freepointer(s, p, NULL); 1385 1385 - return 0; 1373 1373 + ret = 0; 1386 1374 } 1387 1387 - return 1; 1375 1375 + 1376 1376 + if (!ret && !slab_in_kunit_test()) { 1377 1377 + print_trailer(s, slab, object); 1378 1378 + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); 1379 1379 + } 1380 1380 + 1381 1381 + return ret; 1388 1382 } 1389 1383 1390 1384 static int check_slab(struct kmem_cache *s, struct slab *slab) ··· 2572 2554 */ 2573 2555 static inline bool slab_test_node_partial(const struct slab *slab) 2574 2556 { 2575 2575 - return folio_test_workingset((struct folio *)slab_folio(slab)); 2557 2557 + return folio_test_workingset(slab_folio(slab)); 2576 2558 } 2577 2559 2578 2560 static inline void slab_set_node_partial(struct slab *slab) ··· 4081 4063 * directly to the page allocator. We use __GFP_COMP, because we will need to 4082 4064 * know the allocation order to free the pages properly in kfree. 4083 4065 */ 4084 4084 - static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) 4066 4066 + static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) 4085 4067 { 4086 4068 struct folio *folio; 4087 4069 void *ptr = NULL; ··· 4106 4088 return ptr; 4107 4089 } 4108 4090 4109 4109 - void *kmalloc_large_noprof(size_t size, gfp_t flags) 4091 4091 + void *__kmalloc_large_noprof(size_t size, gfp_t flags) 4110 4092 { 4111 4111 - void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); 4093 4093 + void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE); 4112 4094 4113 4095 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), 4114 4096 flags, NUMA_NO_NODE); 4115 4097 return ret; 4116 4098 } 4117 4117 - EXPORT_SYMBOL(kmalloc_large_noprof); 4099 4099 + EXPORT_SYMBOL(__kmalloc_large_noprof); 4118 4100 4119 4119 - void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) 4101 4101 + void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) 4120 4102 { 4121 4121 - void *ret = __kmalloc_large_node(size, flags, node); 4103 4103 + void *ret = ___kmalloc_large_node(size, flags, node); 4122 4104 4123 4105 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), 4124 4106 flags, node); 4125 4107 return ret; 4126 4108 } 4127 4127 - EXPORT_SYMBOL(kmalloc_large_node_noprof); 4109 4109 + EXPORT_SYMBOL(__kmalloc_large_node_noprof); 4128 4110 4129 4111 static __always_inline 4130 4130 - void *__do_kmalloc_node(size_t size, gfp_t flags, int node, 4112 4112 + void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node, 4131 4113 unsigned long caller) 4132 4114 { 4133 4115 struct kmem_cache *s; 4134 4116 void *ret; 4135 4117 4136 4118 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { 4137 4137 - ret = __kmalloc_large_node(size, flags, node); 4119 4119 + ret = __kmalloc_large_node_noprof(size, flags, node); 4138 4120 trace_kmalloc(caller, ret, size, 4139 4121 PAGE_SIZE << get_order(size), flags, node); 4140 4122 return ret; ··· 4143 4125 if (unlikely(!size)) 4144 4126 return ZERO_SIZE_PTR; 4145 4127 4146 4146 - s = kmalloc_slab(size, flags, caller); 4128 4128 + s = kmalloc_slab(size, b, flags, caller); 4147 4129 4148 4130 ret = slab_alloc_node(s, NULL, flags, node, caller, size); 4149 4131 ret = kasan_kmalloc(s, ret, size, flags); 4150 4132 trace_kmalloc(caller, ret, size, s->size, flags, node); 4151 4133 return ret; 4152 4134 } 4153 4153 - 4154 4154 - void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) 4135 4135 + void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) 4155 4136 { 4156 4156 - return __do_kmalloc_node(size, flags, node, _RET_IP_); 4137 4137 + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_); 4157 4138 } 4158 4139 EXPORT_SYMBOL(__kmalloc_node_noprof); 4159 4140 4160 4141 void *__kmalloc_noprof(size_t size, gfp_t flags) 4161 4142 { 4162 4162 - return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); 4143 4143 + return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_); 4163 4144 } 4164 4145 EXPORT_SYMBOL(__kmalloc_noprof); 4165 4146 4166 4166 - void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, 4167 4167 - int node, unsigned long caller) 4147 4147 + void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, 4148 4148 + int node, unsigned long caller) 4168 4149 { 4169 4169 - return __do_kmalloc_node(size, flags, node, caller); 4170 4170 - } 4171 4171 - EXPORT_SYMBOL(kmalloc_node_track_caller_noprof); 4150 4150 + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller); 4172 4151 4173 4173 - void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) 4152 4152 + } 4153 4153 + EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof); 4154 4154 + 4155 4155 + void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) 4174 4156 { 4175 4157 void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, 4176 4158 _RET_IP_, size); ··· 4180 4162 ret = kasan_kmalloc(s, ret, size, gfpflags); 4181 4163 return ret; 4182 4164 } 4183 4183 - EXPORT_SYMBOL(kmalloc_trace_noprof); 4165 4165 + EXPORT_SYMBOL(__kmalloc_cache_noprof); 4184 4166 4185 4185 - void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, 4186 4186 - int node, size_t size) 4167 4167 + void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, 4168 4168 + int node, size_t size) 4187 4169 { 4188 4170 void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size); 4189 4171 ··· 4192 4174 ret = kasan_kmalloc(s, ret, size, gfpflags); 4193 4175 return ret; 4194 4176 } 4195 4195 - EXPORT_SYMBOL(kmalloc_node_trace_noprof); 4177 4177 + EXPORT_SYMBOL(__kmalloc_cache_node_noprof); 4196 4178 4197 4179 static noinline void free_to_partial_list( 4198 4180 struct kmem_cache *s, struct slab *slab, ··· 5177 5159 */ 5178 5160 s->inuse = size; 5179 5161 5180 5180 - if (slub_debug_orig_size(s) || 5181 5181 - (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || 5182 5182 - ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || 5183 5183 - s->ctor) { 5162 5162 + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor || 5163 5163 + ((flags & SLAB_RED_ZONE) && 5164 5164 + (s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) { 5184 5165 /* 5185 5166 * Relocate free pointer after the object if it is not 5186 5167 * permitted to overwrite the first word of the object on ··· 5187 5170 * 5188 5171 * This is the case if we do RCU, have a constructor or 5189 5172 * destructor, are poisoning the objects, or are 5190 5190 - * redzoning an object smaller than sizeof(void *). 5173 5173 + * redzoning an object smaller than sizeof(void *) or are 5174 5174 + * redzoning an object with slub_debug_orig_size() enabled, 5175 5175 + * in which case the right redzone may be extended. 5191 5176 * 5192 5177 * The assumption that s->offset >= s->inuse means free 5193 5178 * pointer is outside of the object is used in the

+17 -6

mm/util.c

reviewed

··· 198 198 } 199 199 EXPORT_SYMBOL(kmemdup_nul); 200 200 201 201 + static kmem_buckets *user_buckets __ro_after_init; 202 202 + 203 203 + static int __init init_user_buckets(void) 204 204 + { 205 205 + user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL); 206 206 + 207 207 + return 0; 208 208 + } 209 209 + subsys_initcall(init_user_buckets); 210 210 + 201 211 /** 202 212 * memdup_user - duplicate memory region from user space 203 213 * ··· 221 211 { 222 212 void *p; 223 213 224 224 - p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); 214 214 + p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN); 225 215 if (!p) 226 216 return ERR_PTR(-ENOMEM); 227 217 ··· 247 237 { 248 238 void *p; 249 239 250 250 - p = kvmalloc(len, GFP_USER); 240 240 + p = kmem_buckets_valloc(user_buckets, len, GFP_USER); 251 241 if (!p) 252 242 return ERR_PTR(-ENOMEM); 253 243 ··· 604 594 EXPORT_SYMBOL(vm_mmap); 605 595 606 596 /** 607 607 - * kvmalloc_node - attempt to allocate physically contiguous memory, but upon 597 597 + * __kvmalloc_node - attempt to allocate physically contiguous memory, but upon 608 598 * failure, fall back to non-contiguous (vmalloc) allocation. 609 599 * @size: size of the request. 600 600 + * @b: which set of kmalloc buckets to allocate from. 610 601 * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. 611 602 * @node: numa node to allocate from 612 603 * ··· 620 609 * 621 610 * Return: pointer to the allocated memory of %NULL in case of failure 622 611 */ 623 623 - void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) 612 612 + void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) 624 613 { 625 614 gfp_t kmalloc_flags = flags; 626 615 void *ret; ··· 642 631 kmalloc_flags &= ~__GFP_NOFAIL; 643 632 } 644 633 645 645 - ret = kmalloc_node_noprof(size, kmalloc_flags, node); 634 634 + ret = __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, b), kmalloc_flags, node); 646 635 647 636 /* 648 637 * It doesn't really make sense to fallback to vmalloc for sub page ··· 671 660 flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, 672 661 node, __builtin_return_address(0)); 673 662 } 674 674 - EXPORT_SYMBOL(kvmalloc_node_noprof); 663 663 + EXPORT_SYMBOL(__kvmalloc_node_noprof); 675 664 676 665 /** 677 666 * kvfree() - Free memory.

+6 -13

rust/kernel/alloc/allocator.rs

reviewed

··· 18 18 // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. 19 19 let layout = new_layout.pad_to_align(); 20 20 21 21 - let mut size = layout.size(); 22 22 - 23 23 - if layout.align() > bindings::ARCH_SLAB_MINALIGN { 24 24 - // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size 25 25 - // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for 26 26 - // more information). 27 27 - // 28 28 - // Note that `layout.size()` (after padding) is guaranteed to be a multiple of 29 29 - // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee. 30 30 - size = size.next_power_of_two(); 31 31 - } 21 21 + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()` 22 22 + // which together with the slab guarantees means the `krealloc` will return a properly aligned 23 23 + // object (see comments in `kmalloc()` for more information). 24 24 + let size = layout.size(); 32 25 33 26 // SAFETY: 34 27 // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the 35 28 // function safety requirement. 36 36 - // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero 37 37 - // according to the function safety requirement) or a result from `next_power_of_two()`. 29 29 + // - `size` is greater than 0 since it's from `layout.size()` (which cannot be zero according 30 30 + // to the function safety requirement) 38 31 unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags.0) as *mut u8 } 39 32 } 40 33

scripts/kernel-doc

reviewed

··· 1729 1729 $prototype =~ s/__printf\s*$\s*\d*\s*,\s*\d*\s*$ +//; 1730 1730 $prototype =~ s/__(?:re)?alloc_size\s*$\s*\d+\s*(?:,\s*\d+\s*)?$ +//; 1731 1731 $prototype =~ s/__diagnose_as\s*$\s*\S+\s*(?:,\s*\d+\s*)*$ +//; 1732 1732 + $prototype =~ s/DECL_BUCKET_PARAMS\s*$\s*(\S+)\s*,\s*(\S+)\s*$/$1, $2/; 1732 1733 my $define = $prototype =~ s/^#\s*define\s+//; #ak added 1733 1734 $prototype =~ s/__attribute_const__ +//; 1734 1735 $prototype =~ s/__attribute__\s*\(\(

+2 -5

tools/include/linux/poison.h

reviewed

··· 47 47 * Magic nums for obj red zoning. 48 48 * Placed in the first word before and the first word after an obj. 49 49 */ 50 50 - #define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ 51 51 - #define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ 52 52 - 53 53 - #define SLUB_RED_INACTIVE 0xbb 54 54 - #define SLUB_RED_ACTIVE 0xcc 50 50 + #define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */ 51 51 + #define SLUB_RED_ACTIVE 0xcc /* when obj is active */ 55 52 56 53 /* ...and for poisoning */ 57 54 #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */