Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'slab-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:
"This happens to be a small one (due to summer I guess), and all
hardening related:

- Randomized kmalloc caches, by GONG, Ruiqi.

A new opt-in hardening feature to make heap spraying harder. It
creates multiple (16) copies of kmalloc caches, reducing the chance
of an attacker-controllable allocation site to land in the same
slab as e.g. an allocation site with use-after-free vulnerability.

The selection of the copy is derived from the allocation site
address, including a per-boot random seed.

- Stronger typing for hardened freelists in SLUB, by Jann Horn

Introduces a custom type for hardened freelist entries instead of
"void *" as those are not directly dereferencable. While reviewing
this, I've noticed opportunities for further cleanups in that code
and added those on top"

* tag 'slab-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
Randomized slab caches for kmalloc()
mm/slub: remove freelist_dereference()
mm/slub: remove redundant kasan_reset_tag() from freelist_ptr calculations
mm/slub: refactor freelist to use custom type

+130 -40
+9 -3
include/linux/percpu.h
··· 35 35 #define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \ 36 36 PCPU_MIN_ALLOC_SHIFT) 37 37 38 + #ifdef CONFIG_RANDOM_KMALLOC_CACHES 39 + #define PERCPU_DYNAMIC_SIZE_SHIFT 12 40 + #else 41 + #define PERCPU_DYNAMIC_SIZE_SHIFT 10 42 + #endif 43 + 38 44 /* 39 45 * Percpu allocator can serve percpu allocations before slab is 40 46 * initialized which allows slab to depend on the percpu allocator. ··· 48 42 * for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than 49 43 * PERCPU_DYNAMIC_EARLY_SIZE. 50 44 */ 51 - #define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10) 45 + #define PERCPU_DYNAMIC_EARLY_SIZE (20 << PERCPU_DYNAMIC_SIZE_SHIFT) 52 46 53 47 /* 54 48 * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy ··· 62 56 * intelligent way to determine this would be nice. 63 57 */ 64 58 #if BITS_PER_LONG > 32 65 - #define PERCPU_DYNAMIC_RESERVE (28 << 10) 59 + #define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT) 66 60 #else 67 - #define PERCPU_DYNAMIC_RESERVE (20 << 10) 61 + #define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT) 68 62 #endif 69 63 70 64 extern void *pcpu_base_addr;
+20 -3
include/linux/slab.h
··· 19 19 #include <linux/workqueue.h> 20 20 #include <linux/percpu-refcount.h> 21 21 #include <linux/cleanup.h> 22 + #include <linux/hash.h> 22 23 23 24 24 25 /* ··· 346 345 #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ 347 346 (KMALLOC_MIN_SIZE) : 16) 348 347 348 + #ifdef CONFIG_RANDOM_KMALLOC_CACHES 349 + #define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies 350 + #else 351 + #define RANDOM_KMALLOC_CACHES_NR 0 352 + #endif 353 + 349 354 /* 350 355 * Whenever changing this, take care of that kmalloc_type() and 351 356 * create_kmalloc_caches() still work as intended. ··· 368 361 #ifndef CONFIG_MEMCG_KMEM 369 362 KMALLOC_CGROUP = KMALLOC_NORMAL, 370 363 #endif 364 + KMALLOC_RANDOM_START = KMALLOC_NORMAL, 365 + KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, 371 366 #ifdef CONFIG_SLUB_TINY 372 367 KMALLOC_RECLAIM = KMALLOC_NORMAL, 373 368 #else ··· 395 386 (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ 396 387 (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0)) 397 388 398 - static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) 389 + extern unsigned long random_kmalloc_seed; 390 + 391 + static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) 399 392 { 400 393 /* 401 394 * The most common case is KMALLOC_NORMAL, so test for it 402 395 * with a single branch for all the relevant flags. 403 396 */ 404 397 if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) 398 + #ifdef CONFIG_RANDOM_KMALLOC_CACHES 399 + /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ 400 + return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, 401 + ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); 402 + #else 405 403 return KMALLOC_NORMAL; 404 + #endif 406 405 407 406 /* 408 407 * At least one of the flags has to be set. Their priorities in ··· 597 580 598 581 index = kmalloc_index(size); 599 582 return kmalloc_trace( 600 - kmalloc_caches[kmalloc_type(flags)][index], 583 + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 601 584 flags, size); 602 585 } 603 586 return __kmalloc(size, flags); ··· 613 596 614 597 index = kmalloc_index(size); 615 598 return kmalloc_node_trace( 616 - kmalloc_caches[kmalloc_type(flags)][index], 599 + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 617 600 flags, node, size); 618 601 } 619 602 return __kmalloc_node(size, flags, node);
+17
mm/Kconfig
··· 337 337 which requires the taking of locks that may cause latency spikes. 338 338 Typically one would choose no for a realtime system. 339 339 340 + config RANDOM_KMALLOC_CACHES 341 + default n 342 + depends on SLUB && !SLUB_TINY 343 + bool "Randomize slab caches for normal kmalloc" 344 + help 345 + A hardening feature that creates multiple copies of slab caches for 346 + normal kmalloc allocation and makes kmalloc randomly pick one based 347 + on code address, which makes the attackers more difficult to spray 348 + vulnerable memory objects on the heap for the purpose of exploiting 349 + memory vulnerabilities. 350 + 351 + Currently the number of copies is set to 16, a reasonably large value 352 + that effectively diverges the memory objects allocated for different 353 + subsystems or modules into different caches, at the expense of a 354 + limited degree of memory and CPU overhead that relates to hardware and 355 + system workload. 356 + 340 357 endmenu # SLAB allocator options 341 358 342 359 config SHUFFLE_PAGE_ALLOCATOR
+5 -2
mm/kfence/kfence_test.c
··· 212 212 213 213 static inline size_t kmalloc_cache_alignment(size_t size) 214 214 { 215 - return kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]->align; 215 + /* just to get ->align so no need to pass in the real caller */ 216 + enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0); 217 + return kmalloc_caches[type][__kmalloc_index(size, false)]->align; 216 218 } 217 219 218 220 /* Must always inline to match stack trace against caller. */ ··· 284 282 285 283 if (is_kfence_address(alloc)) { 286 284 struct slab *slab = virt_to_slab(alloc); 285 + enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_); 287 286 struct kmem_cache *s = test_cache ?: 288 - kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]; 287 + kmalloc_caches[type][__kmalloc_index(size, false)]; 289 288 290 289 /* 291 290 * Verify that various helpers return the right values
+1 -1
mm/slab.c
··· 1670 1670 if (freelist_size > KMALLOC_MAX_CACHE_SIZE) { 1671 1671 freelist_cache_size = PAGE_SIZE << get_order(freelist_size); 1672 1672 } else { 1673 - freelist_cache = kmalloc_slab(freelist_size, 0u); 1673 + freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_); 1674 1674 if (!freelist_cache) 1675 1675 continue; 1676 1676 freelist_cache_size = freelist_cache->size;
+1 -1
mm/slab.h
··· 282 282 void create_kmalloc_caches(slab_flags_t); 283 283 284 284 /* Find the kmalloc slab corresponding for a certain size */ 285 - struct kmem_cache *kmalloc_slab(size_t, gfp_t); 285 + struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller); 286 286 287 287 void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, 288 288 int node, size_t orig_size,
+44 -5
mm/slab_common.c
··· 678 678 { /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ }; 679 679 EXPORT_SYMBOL(kmalloc_caches); 680 680 681 + #ifdef CONFIG_RANDOM_KMALLOC_CACHES 682 + unsigned long random_kmalloc_seed __ro_after_init; 683 + EXPORT_SYMBOL(random_kmalloc_seed); 684 + #endif 685 + 681 686 /* 682 687 * Conversion table for small slabs sizes / 8 to the index in the 683 688 * kmalloc array. This is necessary for slabs < 192 since we have non power ··· 725 720 * Find the kmem_cache structure that serves a given size of 726 721 * allocation 727 722 */ 728 - struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 723 + struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) 729 724 { 730 725 unsigned int index; 731 726 ··· 740 735 index = fls(size - 1); 741 736 } 742 737 743 - return kmalloc_caches[kmalloc_type(flags)][index]; 738 + return kmalloc_caches[kmalloc_type(flags, caller)][index]; 744 739 } 745 740 746 741 size_t kmalloc_size_roundup(size_t size) ··· 757 752 if (size > KMALLOC_MAX_CACHE_SIZE) 758 753 return PAGE_SIZE << get_order(size); 759 754 760 - /* The flags don't matter since size_index is common to all. */ 761 - c = kmalloc_slab(size, GFP_KERNEL); 755 + /* 756 + * The flags don't matter since size_index is common to all. 757 + * Neither does the caller for just getting ->object_size. 758 + */ 759 + c = kmalloc_slab(size, GFP_KERNEL, 0); 762 760 return c ? c->object_size : 0; 763 761 } 764 762 EXPORT_SYMBOL(kmalloc_size_roundup); ··· 784 776 #define KMALLOC_RCL_NAME(sz) 785 777 #endif 786 778 779 + #ifdef CONFIG_RANDOM_KMALLOC_CACHES 780 + #define __KMALLOC_RANDOM_CONCAT(a, b) a ## b 781 + #define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz) 782 + #define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz, 783 + #define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz, 784 + #define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz, 785 + #define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz, 786 + #define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz, 787 + #define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz, 788 + #define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz, 789 + #define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz, 790 + #define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz, 791 + #define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz, 792 + #define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz, 793 + #define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz, 794 + #define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz, 795 + #define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz, 796 + #define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz, 797 + #else // CONFIG_RANDOM_KMALLOC_CACHES 798 + #define KMALLOC_RANDOM_NAME(N, sz) 799 + #endif 800 + 787 801 #define INIT_KMALLOC_INFO(__size, __short_size) \ 788 802 { \ 789 803 .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ 790 804 KMALLOC_RCL_NAME(__short_size) \ 791 805 KMALLOC_CGROUP_NAME(__short_size) \ 792 806 KMALLOC_DMA_NAME(__short_size) \ 807 + KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \ 793 808 .size = __size, \ 794 809 } 795 810 ··· 921 890 flags |= SLAB_CACHE_DMA; 922 891 } 923 892 893 + #ifdef CONFIG_RANDOM_KMALLOC_CACHES 894 + if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END) 895 + flags |= SLAB_NO_MERGE; 896 + #endif 897 + 924 898 /* 925 899 * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for 926 900 * KMALLOC_NORMAL caches. ··· 977 941 new_kmalloc_cache(2, type, flags); 978 942 } 979 943 } 944 + #ifdef CONFIG_RANDOM_KMALLOC_CACHES 945 + random_kmalloc_seed = get_random_u64(); 946 + #endif 980 947 981 948 /* Kmalloc array is now usable */ 982 949 slab_state = UP; ··· 1015 976 return ret; 1016 977 } 1017 978 1018 - s = kmalloc_slab(size, flags); 979 + s = kmalloc_slab(size, flags, caller); 1019 980 1020 981 if (unlikely(ZERO_OR_NULL_PTR(s))) 1021 982 return s;
+33 -25
mm/slub.c
··· 361 361 *******************************************************************/ 362 362 363 363 /* 364 + * freeptr_t represents a SLUB freelist pointer, which might be encoded 365 + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. 366 + */ 367 + typedef struct { unsigned long v; } freeptr_t; 368 + 369 + /* 364 370 * Returns freelist pointer (ptr). With hardening, this is obfuscated 365 371 * with an XOR of the address where the pointer is held and a per-cache 366 372 * random number. 367 373 */ 368 - static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, 369 - unsigned long ptr_addr) 374 + static inline freeptr_t freelist_ptr_encode(const struct kmem_cache *s, 375 + void *ptr, unsigned long ptr_addr) 370 376 { 377 + unsigned long encoded; 378 + 371 379 #ifdef CONFIG_SLAB_FREELIST_HARDENED 372 - /* 373 - * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged. 374 - * Normally, this doesn't cause any issues, as both set_freepointer() 375 - * and get_freepointer() are called with a pointer with the same tag. 376 - * However, there are some issues with CONFIG_SLUB_DEBUG code. For 377 - * example, when __free_slub() iterates over objects in a cache, it 378 - * passes untagged pointers to check_object(). check_object() in turns 379 - * calls get_freepointer() with an untagged pointer, which causes the 380 - * freepointer to be restored incorrectly. 381 - */ 382 - return (void *)((unsigned long)ptr ^ s->random ^ 383 - swab((unsigned long)kasan_reset_tag((void *)ptr_addr))); 380 + encoded = (unsigned long)ptr ^ s->random ^ swab(ptr_addr); 384 381 #else 385 - return ptr; 382 + encoded = (unsigned long)ptr; 386 383 #endif 384 + return (freeptr_t){.v = encoded}; 387 385 } 388 386 389 - /* Returns the freelist pointer recorded at location ptr_addr. */ 390 - static inline void *freelist_dereference(const struct kmem_cache *s, 391 - void *ptr_addr) 387 + static inline void *freelist_ptr_decode(const struct kmem_cache *s, 388 + freeptr_t ptr, unsigned long ptr_addr) 392 389 { 393 - return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr), 394 - (unsigned long)ptr_addr); 390 + void *decoded; 391 + 392 + #ifdef CONFIG_SLAB_FREELIST_HARDENED 393 + decoded = (void *)(ptr.v ^ s->random ^ swab(ptr_addr)); 394 + #else 395 + decoded = (void *)ptr.v; 396 + #endif 397 + return decoded; 395 398 } 396 399 397 400 static inline void *get_freepointer(struct kmem_cache *s, void *object) 398 401 { 402 + unsigned long ptr_addr; 403 + freeptr_t p; 404 + 399 405 object = kasan_reset_tag(object); 400 - return freelist_dereference(s, object + s->offset); 406 + ptr_addr = (unsigned long)object + s->offset; 407 + p = *(freeptr_t *)(ptr_addr); 408 + return freelist_ptr_decode(s, p, ptr_addr); 401 409 } 402 410 403 411 #ifndef CONFIG_SLUB_TINY ··· 429 421 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) 430 422 { 431 423 unsigned long freepointer_addr; 432 - void *p; 424 + freeptr_t p; 433 425 434 426 if (!debug_pagealloc_enabled_static()) 435 427 return get_freepointer(s, object); 436 428 437 429 object = kasan_reset_tag(object); 438 430 freepointer_addr = (unsigned long)object + s->offset; 439 - copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); 440 - return freelist_ptr(s, p, freepointer_addr); 431 + copy_from_kernel_nofault(&p, (freeptr_t *)freepointer_addr, sizeof(p)); 432 + return freelist_ptr_decode(s, p, freepointer_addr); 441 433 } 442 434 443 435 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) ··· 449 441 #endif 450 442 451 443 freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); 452 - *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr); 444 + *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); 453 445 } 454 446 455 447 /* Loop over all objects in a slab */