Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

slub: Introduce CONFIG_SLUB_RCU_DEBUG

Currently, KASAN is unable to catch use-after-free in SLAB_TYPESAFE_BY_RCU
slabs because use-after-free is allowed within the RCU grace period by
design.

Add a SLUB debugging feature which RCU-delays every individual
kmem_cache_free() before either actually freeing the object or handing it
off to KASAN, and change KASAN to poison freed objects as normal when this
option is enabled.

For now I've configured Kconfig.debug to default-enable this feature in the
KASAN GENERIC and SW_TAGS modes; I'm not enabling it by default in HW_TAGS
mode because I'm not sure if it might have unwanted performance degradation
effects there.

Note that this is mostly useful with KASAN in the quarantine-based GENERIC
mode; SLAB_TYPESAFE_BY_RCU slabs are basically always also slabs with a
->ctor, and KASAN's assign_tag() currently has to assign fixed tags for
those, reducing the effectiveness of SW_TAGS/HW_TAGS mode.
(A possible future extension of this work would be to also let SLUB call
the ->ctor() on every allocation instead of only when the slab page is
allocated; then tag-based modes would be able to assign new tags on every
reallocation.)

Tested-by: syzbot+263726e59eab6b442723@syzkaller.appspotmail.com
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Acked-by: Marco Elver <elver@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz> #slab
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

authored by

Jann Horn and committed by
Vlastimil Babka
b8c8ba73 b3c34245

+182 -19
+11 -6
include/linux/kasan.h
··· 196 196 return false; 197 197 } 198 198 199 - bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init); 199 + bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init, 200 + bool still_accessible); 200 201 /** 201 202 * kasan_slab_free - Poison, initialize, and quarantine a slab object. 202 203 * @object: Object to be freed. 203 204 * @init: Whether to initialize the object. 205 + * @still_accessible: Whether the object contents are still accessible. 204 206 * 205 207 * This function informs that a slab object has been freed and is not 206 - * supposed to be accessed anymore, except for objects in 207 - * SLAB_TYPESAFE_BY_RCU caches. 208 + * supposed to be accessed anymore, except when @still_accessible is set 209 + * (indicating that the object is in a SLAB_TYPESAFE_BY_RCU cache and an RCU 210 + * grace period might not have passed yet). 208 211 * 209 212 * For KASAN modes that have integrated memory initialization 210 213 * (kasan_has_integrated_init() == true), this function also initializes ··· 223 220 * @Return true if KASAN took ownership of the object; false otherwise. 224 221 */ 225 222 static __always_inline bool kasan_slab_free(struct kmem_cache *s, 226 - void *object, bool init) 223 + void *object, bool init, 224 + bool still_accessible) 227 225 { 228 226 if (kasan_enabled()) 229 - return __kasan_slab_free(s, object, init); 227 + return __kasan_slab_free(s, object, init, still_accessible); 230 228 return false; 231 229 } 232 230 ··· 423 419 return false; 424 420 } 425 421 426 - static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init) 422 + static inline bool kasan_slab_free(struct kmem_cache *s, void *object, 423 + bool init, bool still_accessible) 427 424 { 428 425 return false; 429 426 }
+32
mm/Kconfig.debug
··· 70 70 off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying 71 71 "slab_debug=-". 72 72 73 + config SLUB_RCU_DEBUG 74 + bool "Enable UAF detection in TYPESAFE_BY_RCU caches (for KASAN)" 75 + depends on SLUB_DEBUG 76 + # SLUB_RCU_DEBUG should build fine without KASAN, but is currently useless 77 + # without KASAN, so mark it as a dependency of KASAN for now. 78 + depends on KASAN 79 + default KASAN_GENERIC || KASAN_SW_TAGS 80 + help 81 + Make SLAB_TYPESAFE_BY_RCU caches behave approximately as if the cache 82 + was not marked as SLAB_TYPESAFE_BY_RCU and every caller used 83 + kfree_rcu() instead. 84 + 85 + This is intended for use in combination with KASAN, to enable KASAN to 86 + detect use-after-free accesses in such caches. 87 + (KFENCE is able to do that independent of this flag.) 88 + 89 + This might degrade performance. 90 + Unfortunately this also prevents a very specific bug pattern from 91 + triggering (insufficient checks against an object being recycled 92 + within the RCU grace period); so this option can be turned off even on 93 + KASAN builds, in case you want to test for such a bug. 94 + 95 + If you're using this for testing bugs / fuzzing and care about 96 + catching all the bugs WAY more than performance, you might want to 97 + also turn on CONFIG_RCU_STRICT_GRACE_PERIOD. 98 + 99 + WARNING: 100 + This is designed as a debugging feature, not a security feature. 101 + Objects are sometimes recycled without RCU delay under memory pressure. 102 + 103 + If unsure, say N. 104 + 73 105 config PAGE_OWNER 74 106 bool "Track page owner" 75 107 depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
+6 -5
mm/kasan/common.c
··· 230 230 } 231 231 232 232 static inline void poison_slab_object(struct kmem_cache *cache, void *object, 233 - bool init) 233 + bool init, bool still_accessible) 234 234 { 235 235 void *tagged_object = object; 236 236 237 237 object = kasan_reset_tag(object); 238 238 239 239 /* RCU slabs could be legally used after free within the RCU period. */ 240 - if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) 240 + if (unlikely(still_accessible)) 241 241 return; 242 242 243 243 kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE), ··· 255 255 return check_slab_allocation(cache, object, ip); 256 256 } 257 257 258 - bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init) 258 + bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init, 259 + bool still_accessible) 259 260 { 260 261 if (!kasan_arch_is_ready() || is_kfence_address(object)) 261 262 return false; 262 263 263 - poison_slab_object(cache, object, init); 264 + poison_slab_object(cache, object, init, still_accessible); 264 265 265 266 /* 266 267 * If the object is put into quarantine, do not let slab put the object ··· 519 518 if (check_slab_allocation(slab->slab_cache, ptr, ip)) 520 519 return false; 521 520 522 - poison_slab_object(slab->slab_cache, ptr, false); 521 + poison_slab_object(slab->slab_cache, ptr, false, false); 523 522 return true; 524 523 } 525 524
+46
mm/kasan/kasan_test.c
··· 996 996 kmem_cache_destroy(cache); 997 997 } 998 998 999 + static void kmem_cache_rcu_uaf(struct kunit *test) 1000 + { 1001 + char *p; 1002 + size_t size = 200; 1003 + struct kmem_cache *cache; 1004 + 1005 + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB_RCU_DEBUG); 1006 + 1007 + cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU, 1008 + NULL); 1009 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); 1010 + 1011 + p = kmem_cache_alloc(cache, GFP_KERNEL); 1012 + if (!p) { 1013 + kunit_err(test, "Allocation failed: %s\n", __func__); 1014 + kmem_cache_destroy(cache); 1015 + return; 1016 + } 1017 + *p = 1; 1018 + 1019 + rcu_read_lock(); 1020 + 1021 + /* Free the object - this will internally schedule an RCU callback. */ 1022 + kmem_cache_free(cache, p); 1023 + 1024 + /* 1025 + * We should still be allowed to access the object at this point because 1026 + * the cache is SLAB_TYPESAFE_BY_RCU and we've been in an RCU read-side 1027 + * critical section since before the kmem_cache_free(). 1028 + */ 1029 + READ_ONCE(*p); 1030 + 1031 + rcu_read_unlock(); 1032 + 1033 + /* 1034 + * Wait for the RCU callback to execute; after this, the object should 1035 + * have actually been freed from KASAN's perspective. 1036 + */ 1037 + rcu_barrier(); 1038 + 1039 + KUNIT_EXPECT_KASAN_FAIL(test, READ_ONCE(*p)); 1040 + 1041 + kmem_cache_destroy(cache); 1042 + } 1043 + 999 1044 static void empty_cache_ctor(void *object) { } 1000 1045 1001 1046 static void kmem_cache_double_destroy(struct kunit *test) ··· 1982 1937 KUNIT_CASE(kmem_cache_oob), 1983 1938 KUNIT_CASE(kmem_cache_double_free), 1984 1939 KUNIT_CASE(kmem_cache_invalid_free), 1940 + KUNIT_CASE(kmem_cache_rcu_uaf), 1985 1941 KUNIT_CASE(kmem_cache_double_destroy), 1986 1942 KUNIT_CASE(kmem_cache_accounted), 1987 1943 KUNIT_CASE(kmem_cache_bulk),
+16
mm/slab_common.c
··· 511 511 /* in-flight kfree_rcu()'s may include objects from our cache */ 512 512 kvfree_rcu_barrier(); 513 513 514 + if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) && 515 + (s->flags & SLAB_TYPESAFE_BY_RCU)) { 516 + /* 517 + * Under CONFIG_SLUB_RCU_DEBUG, when objects in a 518 + * SLAB_TYPESAFE_BY_RCU slab are freed, SLUB will internally 519 + * defer their freeing with call_rcu(). 520 + * Wait for such call_rcu() invocations here before actually 521 + * destroying the cache. 522 + * 523 + * It doesn't matter that we haven't looked at the slab refcount 524 + * yet - slabs with SLAB_TYPESAFE_BY_RCU can't be merged, so 525 + * the refcount should be 1 here. 526 + */ 527 + rcu_barrier(); 528 + } 529 + 514 530 cpus_read_lock(); 515 531 mutex_lock(&slab_mutex); 516 532
+71 -8
mm/slub.c
··· 2200 2200 } 2201 2201 #endif /* CONFIG_MEMCG */ 2202 2202 2203 + #ifdef CONFIG_SLUB_RCU_DEBUG 2204 + static void slab_free_after_rcu_debug(struct rcu_head *rcu_head); 2205 + 2206 + struct rcu_delayed_free { 2207 + struct rcu_head head; 2208 + void *object; 2209 + }; 2210 + #endif 2211 + 2203 2212 /* 2204 2213 * Hooks for other subsystems that check memory allocations. In a typical 2205 2214 * production configuration these hooks all should produce no code at all. 2206 2215 * 2207 2216 * Returns true if freeing of the object can proceed, false if its reuse 2208 - * was delayed by KASAN quarantine, or it was returned to KFENCE. 2217 + * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned 2218 + * to KFENCE. 2209 2219 */ 2210 2220 static __always_inline 2211 - bool slab_free_hook(struct kmem_cache *s, void *x, bool init) 2221 + bool slab_free_hook(struct kmem_cache *s, void *x, bool init, 2222 + bool after_rcu_delay) 2212 2223 { 2224 + /* Are the object contents still accessible? */ 2225 + bool still_accessible = (s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay; 2226 + 2213 2227 kmemleak_free_recursive(x, s->flags); 2214 2228 kmsan_slab_free(s, x); 2215 2229 ··· 2233 2219 debug_check_no_obj_freed(x, s->object_size); 2234 2220 2235 2221 /* Use KCSAN to help debug racy use-after-free. */ 2236 - if (!(s->flags & SLAB_TYPESAFE_BY_RCU)) 2222 + if (!still_accessible) 2237 2223 __kcsan_check_access(x, s->object_size, 2238 2224 KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); 2239 2225 ··· 2246 2232 */ 2247 2233 if (kasan_slab_pre_free(s, x)) 2248 2234 return false; 2235 + 2236 + #ifdef CONFIG_SLUB_RCU_DEBUG 2237 + if (still_accessible) { 2238 + struct rcu_delayed_free *delayed_free; 2239 + 2240 + delayed_free = kmalloc(sizeof(*delayed_free), GFP_NOWAIT); 2241 + if (delayed_free) { 2242 + /* 2243 + * Let KASAN track our call stack as a "related work 2244 + * creation", just like if the object had been freed 2245 + * normally via kfree_rcu(). 2246 + * We have to do this manually because the rcu_head is 2247 + * not located inside the object. 2248 + */ 2249 + kasan_record_aux_stack_noalloc(x); 2250 + 2251 + delayed_free->object = x; 2252 + call_rcu(&delayed_free->head, slab_free_after_rcu_debug); 2253 + return false; 2254 + } 2255 + } 2256 + #endif /* CONFIG_SLUB_RCU_DEBUG */ 2249 2257 2250 2258 /* 2251 2259 * As memory initialization might be integrated into KASAN, ··· 2292 2256 s->size - inuse - rsize); 2293 2257 } 2294 2258 /* KASAN might put x into memory quarantine, delaying its reuse. */ 2295 - return !kasan_slab_free(s, x, init); 2259 + return !kasan_slab_free(s, x, init, still_accessible); 2296 2260 } 2297 2261 2298 2262 static __fastpath_inline ··· 2306 2270 bool init; 2307 2271 2308 2272 if (is_kfence_address(next)) { 2309 - slab_free_hook(s, next, false); 2273 + slab_free_hook(s, next, false, false); 2310 2274 return false; 2311 2275 } 2312 2276 ··· 2321 2285 next = get_freepointer(s, object); 2322 2286 2323 2287 /* If object's reuse doesn't have to be delayed */ 2324 - if (likely(slab_free_hook(s, object, init))) { 2288 + if (likely(slab_free_hook(s, object, init, false))) { 2325 2289 /* Move object to the new freelist */ 2326 2290 set_freepointer(s, object, *head); 2327 2291 *head = object; ··· 4513 4477 memcg_slab_free_hook(s, slab, &object, 1); 4514 4478 alloc_tagging_slab_free_hook(s, slab, &object, 1); 4515 4479 4516 - if (likely(slab_free_hook(s, object, slab_want_init_on_free(s)))) 4480 + if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false))) 4517 4481 do_slab_free(s, slab, object, object, 1, addr); 4518 4482 } 4519 4483 ··· 4522 4486 static noinline 4523 4487 void memcg_alloc_abort_single(struct kmem_cache *s, void *object) 4524 4488 { 4525 - if (likely(slab_free_hook(s, object, slab_want_init_on_free(s)))) 4489 + if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false))) 4526 4490 do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_); 4527 4491 } 4528 4492 #endif ··· 4540 4504 if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt))) 4541 4505 do_slab_free(s, slab, head, tail, cnt, addr); 4542 4506 } 4507 + 4508 + #ifdef CONFIG_SLUB_RCU_DEBUG 4509 + static void slab_free_after_rcu_debug(struct rcu_head *rcu_head) 4510 + { 4511 + struct rcu_delayed_free *delayed_free = 4512 + container_of(rcu_head, struct rcu_delayed_free, head); 4513 + void *object = delayed_free->object; 4514 + struct slab *slab = virt_to_slab(object); 4515 + struct kmem_cache *s; 4516 + 4517 + kfree(delayed_free); 4518 + 4519 + if (WARN_ON(is_kfence_address(object))) 4520 + return; 4521 + 4522 + /* find the object and the cache again */ 4523 + if (WARN_ON(!slab)) 4524 + return; 4525 + s = slab->slab_cache; 4526 + if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU))) 4527 + return; 4528 + 4529 + /* resume freeing */ 4530 + if (slab_free_hook(s, object, slab_want_init_on_free(s), true)) 4531 + do_slab_free(s, slab, object, object, 1, _THIS_IP_); 4532 + } 4533 + #endif /* CONFIG_SLUB_RCU_DEBUG */ 4543 4534 4544 4535 #ifdef CONFIG_KASAN_GENERIC 4545 4536 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)