Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm/slab: Plumb kmem_buckets into __do_kmalloc_node()

Introduce CONFIG_SLAB_BUCKETS which provides the infrastructure to
support separated kmalloc buckets (in the following kmem_buckets_create()
patches and future codetag-based separation). Since this will provide
a mitigation for a very common case of exploits, it is recommended to
enable this feature for general purpose distros. By default, the new
Kconfig will be enabled if CONFIG_SLAB_FREELIST_HARDENED is enabled (and
it is added to the hardening.config Kconfig fragment).

To be able to choose which buckets to allocate from, make the buckets
available to the internal kmalloc interfaces by adding them as the
second argument, rather than depending on the buckets being chosen from
the fixed set of global buckets. Where the bucket is not available,
pass NULL, which means "use the default system kmalloc bucket set"
(the prior existing behavior), as implemented in kmalloc_slab().

To avoid adding the extra argument when !CONFIG_SLAB_BUCKETS, only the
top-level macros and static inlines use the buckets argument (where
they are stripped out and compiled out respectively). The actual extern
functions can then be built without the argument, and the internals
fall back to the global kmalloc buckets unconditionally.

Co-developed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

authored by

Kees Cook and committed by
Vlastimil Babka
67f2df3b 72e0fe22

+56 -18
+22 -5
include/linux/slab.h
··· 571 571 #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) 572 572 573 573 /* 574 + * These macros allow declaring a kmem_buckets * parameter alongside size, which 575 + * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call 576 + * sites don't have to pass NULL. 577 + */ 578 + #ifdef CONFIG_SLAB_BUCKETS 579 + #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) 580 + #define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) 581 + #define PASS_BUCKET_PARAM(_b) (_b) 582 + #else 583 + #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) 584 + #define PASS_BUCKET_PARAMS(_size, _b) (_size) 585 + #define PASS_BUCKET_PARAM(_b) NULL 586 + #endif 587 + 588 + /* 574 589 * The following functions are not to be used directly and are intended only 575 590 * for internal use from kmalloc() and kmalloc_node() 576 591 * with the exception of kunit tests ··· 594 579 void *__kmalloc_noprof(size_t size, gfp_t flags) 595 580 __assume_kmalloc_alignment __alloc_size(1); 596 581 597 - void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) 582 + void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) 598 583 __assume_kmalloc_alignment __alloc_size(1); 599 584 600 585 void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) ··· 695 680 kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 696 681 flags, node, size); 697 682 } 698 - return __kmalloc_node_noprof(size, flags, node); 683 + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); 699 684 } 700 685 #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) 701 686 ··· 746 731 */ 747 732 #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) 748 733 749 - void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, 750 - unsigned long caller) __alloc_size(1); 734 + void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, 735 + unsigned long caller) __alloc_size(1); 736 + #define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ 737 + __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) 751 738 #define kmalloc_node_track_caller(...) \ 752 739 alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) 753 740 ··· 775 758 return NULL; 776 759 if (__builtin_constant_p(n) && __builtin_constant_p(size)) 777 760 return kmalloc_node_noprof(bytes, flags, node); 778 - return __kmalloc_node_noprof(bytes, flags, node); 761 + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); 779 762 } 780 763 #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) 781 764
+1
kernel/configs/hardening.config
··· 20 20 # Randomize allocator freelists, harden metadata. 21 21 CONFIG_SLAB_FREELIST_RANDOM=y 22 22 CONFIG_SLAB_FREELIST_HARDENED=y 23 + CONFIG_SLAB_BUCKETS=y 23 24 CONFIG_SHUFFLE_PAGE_ALLOCATOR=y 24 25 CONFIG_RANDOM_KMALLOC_CACHES=y 25 26
+17
mm/Kconfig
··· 273 273 sacrifices to harden the kernel slab allocator against common 274 274 freelist exploit methods. 275 275 276 + config SLAB_BUCKETS 277 + bool "Support allocation from separate kmalloc buckets" 278 + depends on !SLUB_TINY 279 + default SLAB_FREELIST_HARDENED 280 + help 281 + Kernel heap attacks frequently depend on being able to create 282 + specifically-sized allocations with user-controlled contents 283 + that will be allocated into the same kmalloc bucket as a 284 + target object. To avoid sharing these allocation buckets, 285 + provide an explicitly separated set of buckets to be used for 286 + user-controlled allocations. This may very slightly increase 287 + memory fragmentation, though in practice it's only a handful 288 + of extra pages since the bulk of user-controlled allocations 289 + are relatively long-lived. 290 + 291 + If unsure, say Y. 292 + 276 293 config SLUB_STATS 277 294 default n 278 295 bool "Enable performance statistics"
+4 -2
mm/slab.h
··· 403 403 * KMALLOC_MAX_CACHE_SIZE and the caller must check that. 404 404 */ 405 405 static inline struct kmem_cache * 406 - kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) 406 + kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) 407 407 { 408 408 unsigned int index; 409 409 410 + if (!b) 411 + b = &kmalloc_caches[kmalloc_type(flags, caller)]; 410 412 if (size <= 192) 411 413 index = kmalloc_size_index[size_index_elem(size)]; 412 414 else 413 415 index = fls(size - 1); 414 416 415 - return kmalloc_caches[kmalloc_type(flags, caller)][index]; 417 + return (*b)[index]; 416 418 } 417 419 418 420 gfp_t kmalloc_fix_flags(gfp_t flags);
+1 -1
mm/slab_common.c
··· 703 703 * The flags don't matter since size_index is common to all. 704 704 * Neither does the caller for just getting ->object_size. 705 705 */ 706 - return kmalloc_slab(size, GFP_KERNEL, 0)->object_size; 706 + return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size; 707 707 } 708 708 709 709 /* Above the smaller buckets, size is a multiple of page size. */
+10 -10
mm/slub.c
··· 4117 4117 EXPORT_SYMBOL(__kmalloc_large_node_noprof); 4118 4118 4119 4119 static __always_inline 4120 - void *__do_kmalloc_node(size_t size, gfp_t flags, int node, 4120 + void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node, 4121 4121 unsigned long caller) 4122 4122 { 4123 4123 struct kmem_cache *s; ··· 4133 4133 if (unlikely(!size)) 4134 4134 return ZERO_SIZE_PTR; 4135 4135 4136 - s = kmalloc_slab(size, flags, caller); 4136 + s = kmalloc_slab(size, b, flags, caller); 4137 4137 4138 4138 ret = slab_alloc_node(s, NULL, flags, node, caller, size); 4139 4139 ret = kasan_kmalloc(s, ret, size, flags); 4140 4140 trace_kmalloc(caller, ret, size, s->size, flags, node); 4141 4141 return ret; 4142 4142 } 4143 - 4144 - void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) 4143 + void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) 4145 4144 { 4146 - return __do_kmalloc_node(size, flags, node, _RET_IP_); 4145 + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_); 4147 4146 } 4148 4147 EXPORT_SYMBOL(__kmalloc_node_noprof); 4149 4148 4150 4149 void *__kmalloc_noprof(size_t size, gfp_t flags) 4151 4150 { 4152 - return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); 4151 + return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_); 4153 4152 } 4154 4153 EXPORT_SYMBOL(__kmalloc_noprof); 4155 4154 4156 - void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, 4157 - int node, unsigned long caller) 4155 + void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, 4156 + int node, unsigned long caller) 4158 4157 { 4159 - return __do_kmalloc_node(size, flags, node, caller); 4158 + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller); 4159 + 4160 4160 } 4161 - EXPORT_SYMBOL(kmalloc_node_track_caller_noprof); 4161 + EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof); 4162 4162 4163 4163 void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) 4164 4164 {
+1
scripts/kernel-doc
··· 1729 1729 $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; 1730 1730 $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; 1731 1731 $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; 1732 + $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/; 1732 1733 my $define = $prototype =~ s/^#\s*define\s+//; #ak added 1733 1734 $prototype =~ s/__attribute_const__ +//; 1734 1735 $prototype =~ s/__attribute__\s*\(\(