Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm, kfence: insert KFENCE hooks for SLUB

Inserts KFENCE hooks into the SLUB allocator.

To pass the originally requested size to KFENCE, add an argument
'orig_size' to slab_alloc*(). The additional argument is required to
preserve the requested original size for kmalloc() allocations, which
uses size classes (e.g. an allocation of 272 bytes will return an object
of size 512). Therefore, kmem_cache::size does not represent the
kmalloc-caller's requested size, and we must introduce the argument
'orig_size' to propagate the originally requested size to KFENCE.

Without the originally requested size, we would not be able to detect
out-of-bounds accesses for objects placed at the end of a KFENCE object
page if that object is not equal to the kmalloc-size class it was
bucketed into.

When KFENCE is disabled, there is no additional overhead, since
slab_alloc*() functions are __always_inline.

Link: https://lkml.kernel.org/r/20201103175841.3495947-6-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Jann Horn <jannh@google.com>
Co-developed-by: Marco Elver <elver@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hillf Danton <hdanton@sina.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joern Engel <joern@purestorage.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: SeongJae Park <sjpark@amazon.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Alexander Potapenko and committed by
Linus Torvalds
b89fb5ef d3fb45f3

+51 -14
+3
include/linux/slub_def.h
··· 7 7 * 8 8 * (C) 2007 SGI, Christoph Lameter 9 9 */ 10 + #include <linux/kfence.h> 10 11 #include <linux/kobject.h> 11 12 #include <linux/reciprocal_div.h> 12 13 ··· 186 185 static inline unsigned int obj_to_index(const struct kmem_cache *cache, 187 186 const struct page *page, void *obj) 188 187 { 188 + if (is_kfence_address(obj)) 189 + return 0; 189 190 return __obj_to_index(cache, page_address(page), obj); 190 191 } 191 192
+2
mm/kfence/core.c
··· 317 317 /* Set required struct page fields. */ 318 318 page = virt_to_page(meta->addr); 319 319 page->slab_cache = cache; 320 + if (IS_ENABLED(CONFIG_SLUB)) 321 + page->objects = 1; 320 322 if (IS_ENABLED(CONFIG_SLAB)) 321 323 page->s_mem = addr; 322 324
+46 -14
mm/slub.c
··· 27 27 #include <linux/ctype.h> 28 28 #include <linux/debugobjects.h> 29 29 #include <linux/kallsyms.h> 30 + #include <linux/kfence.h> 30 31 #include <linux/memory.h> 31 32 #include <linux/math64.h> 32 33 #include <linux/fault-inject.h> ··· 1571 1570 void *old_tail = *tail ? *tail : *head; 1572 1571 int rsize; 1573 1572 1573 + if (is_kfence_address(next)) { 1574 + slab_free_hook(s, next); 1575 + return true; 1576 + } 1577 + 1574 1578 /* Head and tail of the reconstructed freelist */ 1575 1579 *head = NULL; 1576 1580 *tail = NULL; ··· 2815 2809 * Otherwise we can simply pick the next object from the lockless free list. 2816 2810 */ 2817 2811 static __always_inline void *slab_alloc_node(struct kmem_cache *s, 2818 - gfp_t gfpflags, int node, unsigned long addr) 2812 + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) 2819 2813 { 2820 2814 void *object; 2821 2815 struct kmem_cache_cpu *c; ··· 2826 2820 s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); 2827 2821 if (!s) 2828 2822 return NULL; 2823 + 2824 + object = kfence_alloc(s, orig_size, gfpflags); 2825 + if (unlikely(object)) 2826 + goto out; 2827 + 2829 2828 redo: 2830 2829 /* 2831 2830 * Must read kmem_cache cpu data via this cpu ptr. Preemption is ··· 2903 2892 if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) 2904 2893 memset(kasan_reset_tag(object), 0, s->object_size); 2905 2894 2895 + out: 2906 2896 slab_post_alloc_hook(s, objcg, gfpflags, 1, &object); 2907 2897 2908 2898 return object; 2909 2899 } 2910 2900 2911 2901 static __always_inline void *slab_alloc(struct kmem_cache *s, 2912 - gfp_t gfpflags, unsigned long addr) 2902 + gfp_t gfpflags, unsigned long addr, size_t orig_size) 2913 2903 { 2914 - return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr); 2904 + return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size); 2915 2905 } 2916 2906 2917 2907 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 2918 2908 { 2919 - void *ret = slab_alloc(s, gfpflags, _RET_IP_); 2909 + void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size); 2920 2910 2921 2911 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, 2922 2912 s->size, gfpflags); ··· 2929 2917 #ifdef CONFIG_TRACING 2930 2918 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) 2931 2919 { 2932 - void *ret = slab_alloc(s, gfpflags, _RET_IP_); 2920 + void *ret = slab_alloc(s, gfpflags, _RET_IP_, size); 2933 2921 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); 2934 2922 ret = kasan_kmalloc(s, ret, size, gfpflags); 2935 2923 return ret; ··· 2940 2928 #ifdef CONFIG_NUMA 2941 2929 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 2942 2930 { 2943 - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); 2931 + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size); 2944 2932 2945 2933 trace_kmem_cache_alloc_node(_RET_IP_, ret, 2946 2934 s->object_size, s->size, gfpflags, node); ··· 2954 2942 gfp_t gfpflags, 2955 2943 int node, size_t size) 2956 2944 { 2957 - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); 2945 + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size); 2958 2946 2959 2947 trace_kmalloc_node(_RET_IP_, ret, 2960 2948 size, s->size, gfpflags, node); ··· 2987 2975 unsigned long flags; 2988 2976 2989 2977 stat(s, FREE_SLOWPATH); 2978 + 2979 + if (kfence_free(head)) 2980 + return; 2990 2981 2991 2982 if (kmem_cache_debug(s) && 2992 2983 !free_debug_processing(s, page, head, tail, cnt, addr)) ··· 3235 3220 df->s = cache_from_obj(s, object); /* Support for memcg */ 3236 3221 } 3237 3222 3223 + if (is_kfence_address(object)) { 3224 + slab_free_hook(df->s, object); 3225 + __kfence_free(object); 3226 + p[size] = NULL; /* mark object processed */ 3227 + return size; 3228 + } 3229 + 3238 3230 /* Start new detached freelist */ 3239 3231 df->page = page; 3240 3232 set_freepointer(df->s, object, NULL); ··· 3317 3295 c = this_cpu_ptr(s->cpu_slab); 3318 3296 3319 3297 for (i = 0; i < size; i++) { 3320 - void *object = c->freelist; 3298 + void *object = kfence_alloc(s, s->object_size, flags); 3321 3299 3300 + if (unlikely(object)) { 3301 + p[i] = object; 3302 + continue; 3303 + } 3304 + 3305 + object = c->freelist; 3322 3306 if (unlikely(!object)) { 3323 3307 /* 3324 3308 * We may have removed an object from c->freelist using ··· 4049 4021 if (unlikely(ZERO_OR_NULL_PTR(s))) 4050 4022 return s; 4051 4023 4052 - ret = slab_alloc(s, flags, _RET_IP_); 4024 + ret = slab_alloc(s, flags, _RET_IP_, size); 4053 4025 4054 4026 trace_kmalloc(_RET_IP_, ret, size, s->size, flags); 4055 4027 ··· 4097 4069 if (unlikely(ZERO_OR_NULL_PTR(s))) 4098 4070 return s; 4099 4071 4100 - ret = slab_alloc_node(s, flags, node, _RET_IP_); 4072 + ret = slab_alloc_node(s, flags, node, _RET_IP_, size); 4101 4073 4102 4074 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); 4103 4075 ··· 4123 4095 struct kmem_cache *s; 4124 4096 unsigned int offset; 4125 4097 size_t object_size; 4098 + bool is_kfence = is_kfence_address(ptr); 4126 4099 4127 4100 ptr = kasan_reset_tag(ptr); 4128 4101 ··· 4136 4107 to_user, 0, n); 4137 4108 4138 4109 /* Find offset within object. */ 4139 - offset = (ptr - page_address(page)) % s->size; 4110 + if (is_kfence) 4111 + offset = ptr - kfence_object_start(ptr); 4112 + else 4113 + offset = (ptr - page_address(page)) % s->size; 4140 4114 4141 4115 /* Adjust for redzone and reject if within the redzone. */ 4142 - if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { 4116 + if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { 4143 4117 if (offset < s->red_left_pad) 4144 4118 usercopy_abort("SLUB object in left red zone", 4145 4119 s->name, to_user, offset, n); ··· 4559 4527 if (unlikely(ZERO_OR_NULL_PTR(s))) 4560 4528 return s; 4561 4529 4562 - ret = slab_alloc(s, gfpflags, caller); 4530 + ret = slab_alloc(s, gfpflags, caller, size); 4563 4531 4564 4532 /* Honor the call site pointer we received. */ 4565 4533 trace_kmalloc(caller, ret, size, s->size, gfpflags); ··· 4590 4558 if (unlikely(ZERO_OR_NULL_PTR(s))) 4591 4559 return s; 4592 4560 4593 - ret = slab_alloc_node(s, gfpflags, node, caller); 4561 + ret = slab_alloc_node(s, gfpflags, node, caller, size); 4594 4562 4595 4563 /* Honor the call site pointer we received. */ 4596 4564 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);