slub: Support 4k kmallocs again to compensate for page allocator slowness

Currently we hand off PAGE_SIZEd kmallocs to the page allocator in the
mistaken belief that the page allocator can handle these allocations
effectively. However, measurements indicate a minimum slowdown by the
factor of 8 (and that is only SMP, NUMA is much worse) vs the slub fastpath
which causes regressions in tbench.

Increase the number of kmalloc caches by one so that we again handle 4k
kmallocs directly from slub. 4k page buffering for the page allocator
will be performed by slub like done by slab.

At some point the page allocator fastpath should be fixed. A lot of the kernel
would benefit from a faster ability to allocate a single page. If that is
done then the 4k allocs may again be forwarded to the page allocator and this
patch could be reverted.

Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Christoph Lameter <clameter@sgi.com>

authored by Christoph Lameter and committed by Christoph Lameter 331dc558 71c7a06f

+12 -12
+3 -3
include/linux/slub_def.h
··· 111 * We keep the general caches in an array of slab caches that are used for 112 * 2^x bytes of allocations. 113 */ 114 - extern struct kmem_cache kmalloc_caches[PAGE_SHIFT]; 115 116 /* 117 * Sorry that the following has to be that ugly but some versions of GCC ··· 197 static __always_inline void *kmalloc(size_t size, gfp_t flags) 198 { 199 if (__builtin_constant_p(size)) { 200 - if (size > PAGE_SIZE / 2) 201 return kmalloc_large(size, flags); 202 203 if (!(flags & SLUB_DMA)) { ··· 219 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 220 { 221 if (__builtin_constant_p(size) && 222 - size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) { 223 struct kmem_cache *s = kmalloc_slab(size); 224 225 if (!s)
··· 111 * We keep the general caches in an array of slab caches that are used for 112 * 2^x bytes of allocations. 113 */ 114 + extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1]; 115 116 /* 117 * Sorry that the following has to be that ugly but some versions of GCC ··· 197 static __always_inline void *kmalloc(size_t size, gfp_t flags) 198 { 199 if (__builtin_constant_p(size)) { 200 + if (size > PAGE_SIZE) 201 return kmalloc_large(size, flags); 202 203 if (!(flags & SLUB_DMA)) { ··· 219 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 220 { 221 if (__builtin_constant_p(size) && 222 + size <= PAGE_SIZE && !(flags & SLUB_DMA)) { 223 struct kmem_cache *s = kmalloc_slab(size); 224 225 if (!s)
+9 -9
mm/slub.c
··· 2517 * Kmalloc subsystem 2518 *******************************************************************/ 2519 2520 - struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; 2521 EXPORT_SYMBOL(kmalloc_caches); 2522 2523 #ifdef CONFIG_ZONE_DMA 2524 - static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; 2525 #endif 2526 2527 static int __init setup_slub_min_order(char *str) ··· 2703 { 2704 struct kmem_cache *s; 2705 2706 - if (unlikely(size > PAGE_SIZE / 2)) 2707 return kmalloc_large(size, flags); 2708 2709 s = get_slab(size, flags); ··· 2720 { 2721 struct kmem_cache *s; 2722 2723 - if (unlikely(size > PAGE_SIZE / 2)) 2724 return kmalloc_large(size, flags); 2725 2726 s = get_slab(size, flags); ··· 3032 caches++; 3033 } 3034 3035 - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { 3036 create_kmalloc_cache(&kmalloc_caches[i], 3037 "kmalloc", 1 << i, GFP_KERNEL); 3038 caches++; ··· 3059 slab_state = UP; 3060 3061 /* Provide the correct kmalloc names now that the caches are up */ 3062 - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) 3063 kmalloc_caches[i]. name = 3064 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 3065 ··· 3088 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3089 return 1; 3090 3091 - if ((s->flags & __PAGE_ALLOC_FALLBACK) 3092 return 1; 3093 3094 if (s->ctor) ··· 3252 { 3253 struct kmem_cache *s; 3254 3255 - if (unlikely(size > PAGE_SIZE / 2)) 3256 return kmalloc_large(size, gfpflags); 3257 3258 s = get_slab(size, gfpflags); ··· 3268 { 3269 struct kmem_cache *s; 3270 3271 - if (unlikely(size > PAGE_SIZE / 2)) 3272 return kmalloc_large(size, gfpflags); 3273 3274 s = get_slab(size, gfpflags);
··· 2517 * Kmalloc subsystem 2518 *******************************************************************/ 2519 2520 + struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; 2521 EXPORT_SYMBOL(kmalloc_caches); 2522 2523 #ifdef CONFIG_ZONE_DMA 2524 + static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; 2525 #endif 2526 2527 static int __init setup_slub_min_order(char *str) ··· 2703 { 2704 struct kmem_cache *s; 2705 2706 + if (unlikely(size > PAGE_SIZE)) 2707 return kmalloc_large(size, flags); 2708 2709 s = get_slab(size, flags); ··· 2720 { 2721 struct kmem_cache *s; 2722 2723 + if (unlikely(size > PAGE_SIZE)) 2724 return kmalloc_large(size, flags); 2725 2726 s = get_slab(size, flags); ··· 3032 caches++; 3033 } 3034 3035 + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { 3036 create_kmalloc_cache(&kmalloc_caches[i], 3037 "kmalloc", 1 << i, GFP_KERNEL); 3038 caches++; ··· 3059 slab_state = UP; 3060 3061 /* Provide the correct kmalloc names now that the caches are up */ 3062 + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) 3063 kmalloc_caches[i]. name = 3064 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 3065 ··· 3088 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3089 return 1; 3090 3091 + if ((s->flags & __PAGE_ALLOC_FALLBACK)) 3092 return 1; 3093 3094 if (s->ctor) ··· 3252 { 3253 struct kmem_cache *s; 3254 3255 + if (unlikely(size > PAGE_SIZE)) 3256 return kmalloc_large(size, gfpflags); 3257 3258 s = get_slab(size, gfpflags); ··· 3268 { 3269 struct kmem_cache *s; 3270 3271 + if (unlikely(size > PAGE_SIZE)) 3272 return kmalloc_large(size, gfpflags); 3273 3274 s = get_slab(size, gfpflags);