slub: Support 4k kmallocs again to compensate for page allocator slowness

Currently we hand off PAGE_SIZEd kmallocs to the page allocator in the
mistaken belief that the page allocator can handle these allocations
effectively. However, measurements indicate a minimum slowdown by the
factor of 8 (and that is only SMP, NUMA is much worse) vs the slub fastpath
which causes regressions in tbench.

Increase the number of kmalloc caches by one so that we again handle 4k
kmallocs directly from slub. 4k page buffering for the page allocator
will be performed by slub like done by slab.

At some point the page allocator fastpath should be fixed. A lot of the kernel
would benefit from a faster ability to allocate a single page. If that is
done then the 4k allocs may again be forwarded to the page allocator and this
patch could be reverted.

Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Christoph Lameter <clameter@sgi.com>

authored by Christoph Lameter and committed by Christoph Lameter 331dc558 71c7a06f

+12 -12
+3 -3
include/linux/slub_def.h
··· 111 111 * We keep the general caches in an array of slab caches that are used for 112 112 * 2^x bytes of allocations. 113 113 */ 114 - extern struct kmem_cache kmalloc_caches[PAGE_SHIFT]; 114 + extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1]; 115 115 116 116 /* 117 117 * Sorry that the following has to be that ugly but some versions of GCC ··· 197 197 static __always_inline void *kmalloc(size_t size, gfp_t flags) 198 198 { 199 199 if (__builtin_constant_p(size)) { 200 - if (size > PAGE_SIZE / 2) 200 + if (size > PAGE_SIZE) 201 201 return kmalloc_large(size, flags); 202 202 203 203 if (!(flags & SLUB_DMA)) { ··· 219 219 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 220 220 { 221 221 if (__builtin_constant_p(size) && 222 - size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) { 222 + size <= PAGE_SIZE && !(flags & SLUB_DMA)) { 223 223 struct kmem_cache *s = kmalloc_slab(size); 224 224 225 225 if (!s)
+9 -9
mm/slub.c
··· 2517 2517 * Kmalloc subsystem 2518 2518 *******************************************************************/ 2519 2519 2520 - struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; 2520 + struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; 2521 2521 EXPORT_SYMBOL(kmalloc_caches); 2522 2522 2523 2523 #ifdef CONFIG_ZONE_DMA 2524 - static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; 2524 + static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; 2525 2525 #endif 2526 2526 2527 2527 static int __init setup_slub_min_order(char *str) ··· 2703 2703 { 2704 2704 struct kmem_cache *s; 2705 2705 2706 - if (unlikely(size > PAGE_SIZE / 2)) 2706 + if (unlikely(size > PAGE_SIZE)) 2707 2707 return kmalloc_large(size, flags); 2708 2708 2709 2709 s = get_slab(size, flags); ··· 2720 2720 { 2721 2721 struct kmem_cache *s; 2722 2722 2723 - if (unlikely(size > PAGE_SIZE / 2)) 2723 + if (unlikely(size > PAGE_SIZE)) 2724 2724 return kmalloc_large(size, flags); 2725 2725 2726 2726 s = get_slab(size, flags); ··· 3032 3032 caches++; 3033 3033 } 3034 3034 3035 - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { 3035 + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { 3036 3036 create_kmalloc_cache(&kmalloc_caches[i], 3037 3037 "kmalloc", 1 << i, GFP_KERNEL); 3038 3038 caches++; ··· 3059 3059 slab_state = UP; 3060 3060 3061 3061 /* Provide the correct kmalloc names now that the caches are up */ 3062 - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) 3062 + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) 3063 3063 kmalloc_caches[i]. name = 3064 3064 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 3065 3065 ··· 3088 3088 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3089 3089 return 1; 3090 3090 3091 - if ((s->flags & __PAGE_ALLOC_FALLBACK) 3091 + if ((s->flags & __PAGE_ALLOC_FALLBACK)) 3092 3092 return 1; 3093 3093 3094 3094 if (s->ctor) ··· 3252 3252 { 3253 3253 struct kmem_cache *s; 3254 3254 3255 - if (unlikely(size > PAGE_SIZE / 2)) 3255 + if (unlikely(size > PAGE_SIZE)) 3256 3256 return kmalloc_large(size, gfpflags); 3257 3257 3258 3258 s = get_slab(size, gfpflags); ··· 3268 3268 { 3269 3269 struct kmem_cache *s; 3270 3270 3271 - if (unlikely(size > PAGE_SIZE / 2)) 3271 + if (unlikely(size > PAGE_SIZE)) 3272 3272 return kmalloc_large(size, gfpflags); 3273 3273 3274 3274 s = get_slab(size, gfpflags);