Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
slob: fix lockup in slob_free()
slub: use get_track()
slub: rename calculate_min_partial() to set_min_partial()
slub: add min_partial sysfs tunable
slub: move min_partial to struct kmem_cache
SLUB: Fix default slab order for big object sizes
SLUB: Do not pass 8k objects through to the page allocator
SLUB: Introduce and use SLUB_MAX_SIZE and SLUB_PAGE_SHIFT constants
slob: clean up the code
SLUB: Use ->objsize from struct kmem_cache_cpu in slab_free()

+96 -50
+17 -4
include/linux/slub_def.h
··· 46 46 struct kmem_cache_node { 47 47 spinlock_t list_lock; /* Protect partial list and nr_partial */ 48 48 unsigned long nr_partial; 49 - unsigned long min_partial; 50 49 struct list_head partial; 51 50 #ifdef CONFIG_SLUB_DEBUG 52 51 atomic_long_t nr_slabs; ··· 88 89 void (*ctor)(void *); 89 90 int inuse; /* Offset to metadata */ 90 91 int align; /* Alignment */ 92 + unsigned long min_partial; 91 93 const char *name; /* Name (only for display!) */ 92 94 struct list_head list; /* List of slab caches */ 93 95 #ifdef CONFIG_SLUB_DEBUG ··· 121 121 #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) 122 122 123 123 /* 124 + * Maximum kmalloc object size handled by SLUB. Larger object allocations 125 + * are passed through to the page allocator. The page allocator "fastpath" 126 + * is relatively slow so we need this value sufficiently high so that 127 + * performance critical objects are allocated through the SLUB fastpath. 128 + * 129 + * This should be dropped to PAGE_SIZE / 2 once the page allocator 130 + * "fastpath" becomes competitive with the slab allocator fastpaths. 131 + */ 132 + #define SLUB_MAX_SIZE (2 * PAGE_SIZE) 133 + 134 + #define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2) 135 + 136 + /* 124 137 * We keep the general caches in an array of slab caches that are used for 125 138 * 2^x bytes of allocations. 126 139 */ 127 - extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1]; 140 + extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT]; 128 141 129 142 /* 130 143 * Sorry that the following has to be that ugly but some versions of GCC ··· 225 212 static __always_inline void *kmalloc(size_t size, gfp_t flags) 226 213 { 227 214 if (__builtin_constant_p(size)) { 228 - if (size > PAGE_SIZE) 215 + if (size > SLUB_MAX_SIZE) 229 216 return kmalloc_large(size, flags); 230 217 231 218 if (!(flags & SLUB_DMA)) { ··· 247 234 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 248 235 { 249 236 if (__builtin_constant_p(size) && 250 - size <= PAGE_SIZE && !(flags & SLUB_DMA)) { 237 + size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { 251 238 struct kmem_cache *s = kmalloc_slab(size); 252 239 253 240 if (!s)
+27 -16
mm/slob.c
··· 126 126 static LIST_HEAD(free_slob_large); 127 127 128 128 /* 129 - * slob_page: True for all slob pages (false for bigblock pages) 129 + * is_slob_page: True for all slob pages (false for bigblock pages) 130 130 */ 131 - static inline int slob_page(struct slob_page *sp) 131 + static inline int is_slob_page(struct slob_page *sp) 132 132 { 133 133 return PageSlobPage((struct page *)sp); 134 134 } ··· 141 141 static inline void clear_slob_page(struct slob_page *sp) 142 142 { 143 143 __ClearPageSlobPage((struct page *)sp); 144 + } 145 + 146 + static inline struct slob_page *slob_page(const void *addr) 147 + { 148 + return (struct slob_page *)virt_to_page(addr); 144 149 } 145 150 146 151 /* ··· 235 230 return !((unsigned long)slob_next(s) & ~PAGE_MASK); 236 231 } 237 232 238 - static void *slob_new_page(gfp_t gfp, int order, int node) 233 + static void *slob_new_pages(gfp_t gfp, int order, int node) 239 234 { 240 235 void *page; 241 236 ··· 252 247 return page_address(page); 253 248 } 254 249 250 + static void slob_free_pages(void *b, int order) 251 + { 252 + free_pages((unsigned long)b, order); 253 + } 254 + 255 255 /* 256 256 * Allocate a slob block within a given slob_page sp. 257 257 */ 258 258 static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) 259 259 { 260 - slob_t *prev, *cur, *aligned = 0; 260 + slob_t *prev, *cur, *aligned = NULL; 261 261 int delta = 0, units = SLOB_UNITS(size); 262 262 263 263 for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { ··· 359 349 360 350 /* Not enough space: must allocate a new page */ 361 351 if (!b) { 362 - b = slob_new_page(gfp & ~__GFP_ZERO, 0, node); 352 + b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node); 363 353 if (!b) 364 - return 0; 365 - sp = (struct slob_page *)virt_to_page(b); 354 + return NULL; 355 + sp = slob_page(b); 366 356 set_slob_page(sp); 367 357 368 358 spin_lock_irqsave(&slob_lock, flags); ··· 394 384 return; 395 385 BUG_ON(!size); 396 386 397 - sp = (struct slob_page *)virt_to_page(block); 387 + sp = slob_page(block); 398 388 units = SLOB_UNITS(size); 399 389 400 390 spin_lock_irqsave(&slob_lock, flags); ··· 403 393 /* Go directly to page allocator. Do not pass slob allocator */ 404 394 if (slob_page_free(sp)) 405 395 clear_slob_page_free(sp); 396 + spin_unlock_irqrestore(&slob_lock, flags); 406 397 clear_slob_page(sp); 407 398 free_slob_page(sp); 408 399 free_page((unsigned long)b); 409 - goto out; 400 + return; 410 401 } 411 402 412 403 if (!slob_page_free(sp)) { ··· 487 476 } else { 488 477 void *ret; 489 478 490 - ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); 479 + ret = slob_new_pages(gfp | __GFP_COMP, get_order(size), node); 491 480 if (ret) { 492 481 struct page *page; 493 482 page = virt_to_page(ret); ··· 505 494 if (unlikely(ZERO_OR_NULL_PTR(block))) 506 495 return; 507 496 508 - sp = (struct slob_page *)virt_to_page(block); 509 - if (slob_page(sp)) { 497 + sp = slob_page(block); 498 + if (is_slob_page(sp)) { 510 499 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 511 500 unsigned int *m = (unsigned int *)(block - align); 512 501 slob_free(m, *m + align); ··· 524 513 if (unlikely(block == ZERO_SIZE_PTR)) 525 514 return 0; 526 515 527 - sp = (struct slob_page *)virt_to_page(block); 528 - if (slob_page(sp)) { 516 + sp = slob_page(block); 517 + if (is_slob_page(sp)) { 529 518 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 530 519 unsigned int *m = (unsigned int *)(block - align); 531 520 return SLOB_UNITS(*m) * SLOB_UNIT; ··· 584 573 if (c->size < PAGE_SIZE) 585 574 b = slob_alloc(c->size, flags, c->align, node); 586 575 else 587 - b = slob_new_page(flags, get_order(c->size), node); 576 + b = slob_new_pages(flags, get_order(c->size), node); 588 577 589 578 if (c->ctor) 590 579 c->ctor(b); ··· 598 587 if (size < PAGE_SIZE) 599 588 slob_free(b, size); 600 589 else 601 - free_pages((unsigned long)b, get_order(size)); 590 + slob_free_pages(b, get_order(size)); 602 591 } 603 592 604 593 static void kmem_rcu_free(struct rcu_head *head)
+52 -30
mm/slub.c
··· 374 374 static void set_track(struct kmem_cache *s, void *object, 375 375 enum track_item alloc, unsigned long addr) 376 376 { 377 - struct track *p; 377 + struct track *p = get_track(s, object, alloc); 378 378 379 - if (s->offset) 380 - p = object + s->offset + sizeof(void *); 381 - else 382 - p = object + s->inuse; 383 - 384 - p += alloc; 385 379 if (addr) { 386 380 p->addr = addr; 387 381 p->cpu = smp_processor_id(); ··· 1329 1335 n = get_node(s, zone_to_nid(zone)); 1330 1336 1331 1337 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1332 - n->nr_partial > n->min_partial) { 1338 + n->nr_partial > s->min_partial) { 1333 1339 page = get_partial_node(n); 1334 1340 if (page) 1335 1341 return page; ··· 1381 1387 slab_unlock(page); 1382 1388 } else { 1383 1389 stat(c, DEACTIVATE_EMPTY); 1384 - if (n->nr_partial < n->min_partial) { 1390 + if (n->nr_partial < s->min_partial) { 1385 1391 /* 1386 1392 * Adding an empty slab to the partial slabs in order 1387 1393 * to avoid page allocator overhead. This slab needs ··· 1718 1724 c = get_cpu_slab(s, smp_processor_id()); 1719 1725 debug_check_no_locks_freed(object, c->objsize); 1720 1726 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1721 - debug_check_no_obj_freed(object, s->objsize); 1727 + debug_check_no_obj_freed(object, c->objsize); 1722 1728 if (likely(page == c->page && c->node >= 0)) { 1723 1729 object[c->offset] = c->freelist; 1724 1730 c->freelist = object; ··· 1838 1844 int order; 1839 1845 int min_objects; 1840 1846 int fraction; 1847 + int max_objects; 1841 1848 1842 1849 /* 1843 1850 * Attempt to find best configuration for a slab. This ··· 1851 1856 min_objects = slub_min_objects; 1852 1857 if (!min_objects) 1853 1858 min_objects = 4 * (fls(nr_cpu_ids) + 1); 1859 + max_objects = (PAGE_SIZE << slub_max_order)/size; 1860 + min_objects = min(min_objects, max_objects); 1861 + 1854 1862 while (min_objects > 1) { 1855 1863 fraction = 16; 1856 1864 while (fraction >= 4) { ··· 1863 1865 return order; 1864 1866 fraction /= 2; 1865 1867 } 1866 - min_objects /= 2; 1868 + min_objects --; 1867 1869 } 1868 1870 1869 1871 /* ··· 1926 1928 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) 1927 1929 { 1928 1930 n->nr_partial = 0; 1929 - 1930 - /* 1931 - * The larger the object size is, the more pages we want on the partial 1932 - * list to avoid pounding the page allocator excessively. 1933 - */ 1934 - n->min_partial = ilog2(s->size); 1935 - if (n->min_partial < MIN_PARTIAL) 1936 - n->min_partial = MIN_PARTIAL; 1937 - else if (n->min_partial > MAX_PARTIAL) 1938 - n->min_partial = MAX_PARTIAL; 1939 - 1940 1931 spin_lock_init(&n->list_lock); 1941 1932 INIT_LIST_HEAD(&n->partial); 1942 1933 #ifdef CONFIG_SLUB_DEBUG ··· 2168 2181 } 2169 2182 #endif 2170 2183 2184 + static void set_min_partial(struct kmem_cache *s, unsigned long min) 2185 + { 2186 + if (min < MIN_PARTIAL) 2187 + min = MIN_PARTIAL; 2188 + else if (min > MAX_PARTIAL) 2189 + min = MAX_PARTIAL; 2190 + s->min_partial = min; 2191 + } 2192 + 2171 2193 /* 2172 2194 * calculate_sizes() determines the order and the distribution of data within 2173 2195 * a slab object. ··· 2315 2319 if (!calculate_sizes(s, -1)) 2316 2320 goto error; 2317 2321 2322 + /* 2323 + * The larger the object size is, the more pages we want on the partial 2324 + * list to avoid pounding the page allocator excessively. 2325 + */ 2326 + set_min_partial(s, ilog2(s->size)); 2318 2327 s->refcount = 1; 2319 2328 #ifdef CONFIG_NUMA 2320 2329 s->remote_node_defrag_ratio = 1000; ··· 2476 2475 * Kmalloc subsystem 2477 2476 *******************************************************************/ 2478 2477 2479 - struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; 2478 + struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; 2480 2479 EXPORT_SYMBOL(kmalloc_caches); 2481 2480 2482 2481 static int __init setup_slub_min_order(char *str) ··· 2538 2537 } 2539 2538 2540 2539 #ifdef CONFIG_ZONE_DMA 2541 - static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; 2540 + static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; 2542 2541 2543 2542 static void sysfs_add_func(struct work_struct *w) 2544 2543 { ··· 2659 2658 { 2660 2659 struct kmem_cache *s; 2661 2660 2662 - if (unlikely(size > PAGE_SIZE)) 2661 + if (unlikely(size > SLUB_MAX_SIZE)) 2663 2662 return kmalloc_large(size, flags); 2664 2663 2665 2664 s = get_slab(size, flags); ··· 2687 2686 { 2688 2687 struct kmem_cache *s; 2689 2688 2690 - if (unlikely(size > PAGE_SIZE)) 2689 + if (unlikely(size > SLUB_MAX_SIZE)) 2691 2690 return kmalloc_large_node(size, flags, node); 2692 2691 2693 2692 s = get_slab(size, flags); ··· 2987 2986 caches++; 2988 2987 } 2989 2988 2990 - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { 2989 + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 2991 2990 create_kmalloc_cache(&kmalloc_caches[i], 2992 2991 "kmalloc", 1 << i, GFP_KERNEL); 2993 2992 caches++; ··· 3024 3023 slab_state = UP; 3025 3024 3026 3025 /* Provide the correct kmalloc names now that the caches are up */ 3027 - for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) 3026 + for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) 3028 3027 kmalloc_caches[i]. name = 3029 3028 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 3030 3029 ··· 3224 3223 { 3225 3224 struct kmem_cache *s; 3226 3225 3227 - if (unlikely(size > PAGE_SIZE)) 3226 + if (unlikely(size > SLUB_MAX_SIZE)) 3228 3227 return kmalloc_large(size, gfpflags); 3229 3228 3230 3229 s = get_slab(size, gfpflags); ··· 3240 3239 { 3241 3240 struct kmem_cache *s; 3242 3241 3243 - if (unlikely(size > PAGE_SIZE)) 3242 + if (unlikely(size > SLUB_MAX_SIZE)) 3244 3243 return kmalloc_large_node(size, gfpflags, node); 3245 3244 3246 3245 s = get_slab(size, gfpflags); ··· 3837 3836 } 3838 3837 SLAB_ATTR(order); 3839 3838 3839 + static ssize_t min_partial_show(struct kmem_cache *s, char *buf) 3840 + { 3841 + return sprintf(buf, "%lu\n", s->min_partial); 3842 + } 3843 + 3844 + static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, 3845 + size_t length) 3846 + { 3847 + unsigned long min; 3848 + int err; 3849 + 3850 + err = strict_strtoul(buf, 10, &min); 3851 + if (err) 3852 + return err; 3853 + 3854 + set_min_partial(s, min); 3855 + return length; 3856 + } 3857 + SLAB_ATTR(min_partial); 3858 + 3840 3859 static ssize_t ctor_show(struct kmem_cache *s, char *buf) 3841 3860 { 3842 3861 if (s->ctor) { ··· 4172 4151 &object_size_attr.attr, 4173 4152 &objs_per_slab_attr.attr, 4174 4153 &order_attr.attr, 4154 + &min_partial_attr.attr, 4175 4155 &objects_attr.attr, 4176 4156 &objects_partial_attr.attr, 4177 4157 &total_objects_attr.attr,