Merge branch 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm

* 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm:
Explain kmem_cache_cpu fields
SLUB: Do not upset lockdep
SLUB: Fix coding style violations
Add parameter to add_partial to avoid having two functions
SLUB: rename defrag to remote_node_defrag_ratio
Move count_partial before kmem_cache_shrink
SLUB: Fix sysfs refcounting
slub: fix shadowed variable sparse warnings

+108 -89
+9 -6
include/linux/slub_def.h
··· 12 12 #include <linux/kobject.h> 13 13 14 14 struct kmem_cache_cpu { 15 - void **freelist; 16 - struct page *page; 17 - int node; 18 - unsigned int offset; 19 - unsigned int objsize; 15 + void **freelist; /* Pointer to first free per cpu object */ 16 + struct page *page; /* The slab from which we are allocating */ 17 + int node; /* The node of the page (or -1 for debug) */ 18 + unsigned int offset; /* Freepointer offset (in word units) */ 19 + unsigned int objsize; /* Size of an object (from kmem_cache) */ 20 20 }; 21 21 22 22 struct kmem_cache_node { ··· 59 59 #endif 60 60 61 61 #ifdef CONFIG_NUMA 62 - int defrag_ratio; 62 + /* 63 + * Defragmentation by allocating from a remote node. 64 + */ 65 + int remote_node_defrag_ratio; 63 66 struct kmem_cache_node *node[MAX_NUMNODES]; 64 67 #endif 65 68 #ifdef CONFIG_SMP
+99 -83
mm/slub.c
··· 247 247 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 248 248 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 249 249 { return 0; } 250 - static inline void sysfs_slab_remove(struct kmem_cache *s) {} 250 + static inline void sysfs_slab_remove(struct kmem_cache *s) 251 + { 252 + kfree(s); 253 + } 251 254 #endif 252 255 253 256 /******************************************************************** ··· 357 354 printk(KERN_ERR "%8s 0x%p: ", text, addr + i); 358 355 newline = 0; 359 356 } 360 - printk(" %02x", addr[i]); 357 + printk(KERN_CONT " %02x", addr[i]); 361 358 offset = i % 16; 362 359 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; 363 360 if (offset == 15) { 364 - printk(" %s\n",ascii); 361 + printk(KERN_CONT " %s\n", ascii); 365 362 newline = 1; 366 363 } 367 364 } 368 365 if (!newline) { 369 366 i %= 16; 370 367 while (i < 16) { 371 - printk(" "); 368 + printk(KERN_CONT " "); 372 369 ascii[i] = ' '; 373 370 i++; 374 371 } 375 - printk(" %s\n", ascii); 372 + printk(KERN_CONT " %s\n", ascii); 376 373 } 377 374 } 378 375 ··· 532 529 533 530 if (s->flags & __OBJECT_POISON) { 534 531 memset(p, POISON_FREE, s->objsize - 1); 535 - p[s->objsize -1] = POISON_END; 532 + p[s->objsize - 1] = POISON_END; 536 533 } 537 534 538 535 if (s->flags & SLAB_RED_ZONE) ··· 561 558 562 559 static int check_bytes_and_report(struct kmem_cache *s, struct page *page, 563 560 u8 *object, char *what, 564 - u8* start, unsigned int value, unsigned int bytes) 561 + u8 *start, unsigned int value, unsigned int bytes) 565 562 { 566 563 u8 *fault; 567 564 u8 *end; ··· 695 692 (!check_bytes_and_report(s, page, p, "Poison", p, 696 693 POISON_FREE, s->objsize - 1) || 697 694 !check_bytes_and_report(s, page, p, "Poison", 698 - p + s->objsize -1, POISON_END, 1))) 695 + p + s->objsize - 1, POISON_END, 1))) 699 696 return 0; 700 697 /* 701 698 * check_pad_bytes cleans up on its own. ··· 903 900 "SLUB <none>: no slab for object 0x%p.\n", 904 901 object); 905 902 dump_stack(); 906 - } 907 - else 903 + } else 908 904 object_err(s, page, object, 909 905 "page slab pointer corrupt."); 910 906 goto fail; ··· 949 947 /* 950 948 * Determine which debug features should be switched on 951 949 */ 952 - for ( ;*str && *str != ','; str++) { 950 + for (; *str && *str != ','; str++) { 953 951 switch (tolower(*str)) { 954 952 case 'f': 955 953 slub_debug |= SLAB_DEBUG_FREE; ··· 968 966 break; 969 967 default: 970 968 printk(KERN_ERR "slub_debug option '%c' " 971 - "unknown. skipped\n",*str); 969 + "unknown. skipped\n", *str); 972 970 } 973 971 } 974 972 ··· 1041 1039 */ 1042 1040 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1043 1041 { 1044 - struct page * page; 1042 + struct page *page; 1045 1043 int pages = 1 << s->order; 1046 1044 1047 1045 if (s->order) ··· 1137 1135 mod_zone_page_state(page_zone(page), 1138 1136 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1139 1137 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1140 - - pages); 1138 + -pages); 1141 1139 1142 1140 __free_pages(page, s->order); 1143 1141 } ··· 1197 1195 /* 1198 1196 * Management of partially allocated slabs 1199 1197 */ 1200 - static void add_partial_tail(struct kmem_cache_node *n, struct page *page) 1198 + static void add_partial(struct kmem_cache_node *n, 1199 + struct page *page, int tail) 1201 1200 { 1202 1201 spin_lock(&n->list_lock); 1203 1202 n->nr_partial++; 1204 - list_add_tail(&page->lru, &n->partial); 1205 - spin_unlock(&n->list_lock); 1206 - } 1207 - 1208 - static void add_partial(struct kmem_cache_node *n, struct page *page) 1209 - { 1210 - spin_lock(&n->list_lock); 1211 - n->nr_partial++; 1212 - list_add(&page->lru, &n->partial); 1203 + if (tail) 1204 + list_add_tail(&page->lru, &n->partial); 1205 + else 1206 + list_add(&page->lru, &n->partial); 1213 1207 spin_unlock(&n->list_lock); 1214 1208 } 1215 1209 ··· 1290 1292 * expensive if we do it every time we are trying to find a slab 1291 1293 * with available objects. 1292 1294 */ 1293 - if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) 1295 + if (!s->remote_node_defrag_ratio || 1296 + get_cycles() % 1024 > s->remote_node_defrag_ratio) 1294 1297 return NULL; 1295 1298 1296 1299 zonelist = &NODE_DATA(slab_node(current->mempolicy)) ··· 1334 1335 * 1335 1336 * On exit the slab lock will have been dropped. 1336 1337 */ 1337 - static void unfreeze_slab(struct kmem_cache *s, struct page *page) 1338 + static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1338 1339 { 1339 1340 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1340 1341 ··· 1342 1343 if (page->inuse) { 1343 1344 1344 1345 if (page->freelist) 1345 - add_partial(n, page); 1346 + add_partial(n, page, tail); 1346 1347 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1347 1348 add_full(n, page); 1348 1349 slab_unlock(page); ··· 1357 1358 * partial list stays small. kmem_cache_shrink can 1358 1359 * reclaim empty slabs from the partial list. 1359 1360 */ 1360 - add_partial_tail(n, page); 1361 + add_partial(n, page, 1); 1361 1362 slab_unlock(page); 1362 1363 } else { 1363 1364 slab_unlock(page); ··· 1372 1373 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1373 1374 { 1374 1375 struct page *page = c->page; 1376 + int tail = 1; 1375 1377 /* 1376 1378 * Merge cpu freelist into freelist. Typically we get here 1377 1379 * because both freelists are empty. So this is unlikely ··· 1380 1380 */ 1381 1381 while (unlikely(c->freelist)) { 1382 1382 void **object; 1383 + 1384 + tail = 0; /* Hot objects. Put the slab first */ 1383 1385 1384 1386 /* Retrieve object from cpu_freelist */ 1385 1387 object = c->freelist; ··· 1393 1391 page->inuse--; 1394 1392 } 1395 1393 c->page = NULL; 1396 - unfreeze_slab(s, page); 1394 + unfreeze_slab(s, page, tail); 1397 1395 } 1398 1396 1399 1397 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) ··· 1541 1539 * 1542 1540 * Otherwise we can simply pick the next object from the lockless free list. 1543 1541 */ 1544 - static void __always_inline *slab_alloc(struct kmem_cache *s, 1542 + static __always_inline void *slab_alloc(struct kmem_cache *s, 1545 1543 gfp_t gfpflags, int node, void *addr) 1546 1544 { 1547 1545 void **object; ··· 1615 1613 * then add it. 1616 1614 */ 1617 1615 if (unlikely(!prior)) 1618 - add_partial_tail(get_node(s, page_to_nid(page)), page); 1616 + add_partial(get_node(s, page_to_nid(page)), page, 1); 1619 1617 1620 1618 out_unlock: 1621 1619 slab_unlock(page); ··· 1649 1647 * If fastpath is not possible then fall back to __slab_free where we deal 1650 1648 * with all sorts of special processing. 1651 1649 */ 1652 - static void __always_inline slab_free(struct kmem_cache *s, 1650 + static __always_inline void slab_free(struct kmem_cache *s, 1653 1651 struct page *page, void *x, void *addr) 1654 1652 { 1655 1653 void **object = (void *)x; ··· 1999 1997 { 2000 1998 struct page *page; 2001 1999 struct kmem_cache_node *n; 2000 + unsigned long flags; 2002 2001 2003 2002 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); 2004 2003 ··· 2024 2021 #endif 2025 2022 init_kmem_cache_node(n); 2026 2023 atomic_long_inc(&n->nr_slabs); 2027 - add_partial(n, page); 2024 + /* 2025 + * lockdep requires consistent irq usage for each lock 2026 + * so even though there cannot be a race this early in 2027 + * the boot sequence, we still disable irqs. 2028 + */ 2029 + local_irq_save(flags); 2030 + add_partial(n, page, 0); 2031 + local_irq_restore(flags); 2028 2032 return n; 2029 2033 } 2030 2034 ··· 2216 2206 2217 2207 s->refcount = 1; 2218 2208 #ifdef CONFIG_NUMA 2219 - s->defrag_ratio = 100; 2209 + s->remote_node_defrag_ratio = 100; 2220 2210 #endif 2221 2211 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2222 2212 goto error; ··· 2238 2228 */ 2239 2229 int kmem_ptr_validate(struct kmem_cache *s, const void *object) 2240 2230 { 2241 - struct page * page; 2231 + struct page *page; 2242 2232 2243 2233 page = get_object_page(object); 2244 2234 ··· 2332 2322 if (kmem_cache_close(s)) 2333 2323 WARN_ON(1); 2334 2324 sysfs_slab_remove(s); 2335 - kfree(s); 2336 2325 } else 2337 2326 up_write(&slub_lock); 2338 2327 } ··· 2350 2341 2351 2342 static int __init setup_slub_min_order(char *str) 2352 2343 { 2353 - get_option (&str, &slub_min_order); 2344 + get_option(&str, &slub_min_order); 2354 2345 2355 2346 return 1; 2356 2347 } ··· 2359 2350 2360 2351 static int __init setup_slub_max_order(char *str) 2361 2352 { 2362 - get_option (&str, &slub_max_order); 2353 + get_option(&str, &slub_max_order); 2363 2354 2364 2355 return 1; 2365 2356 } ··· 2368 2359 2369 2360 static int __init setup_slub_min_objects(char *str) 2370 2361 { 2371 - get_option (&str, &slub_min_objects); 2362 + get_option(&str, &slub_min_objects); 2372 2363 2373 2364 return 1; 2374 2365 } ··· 2613 2604 slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); 2614 2605 } 2615 2606 EXPORT_SYMBOL(kfree); 2607 + 2608 + static unsigned long count_partial(struct kmem_cache_node *n) 2609 + { 2610 + unsigned long flags; 2611 + unsigned long x = 0; 2612 + struct page *page; 2613 + 2614 + spin_lock_irqsave(&n->list_lock, flags); 2615 + list_for_each_entry(page, &n->partial, lru) 2616 + x += page->inuse; 2617 + spin_unlock_irqrestore(&n->list_lock, flags); 2618 + return x; 2619 + } 2616 2620 2617 2621 /* 2618 2622 * kmem_cache_shrink removes empty slabs from the partial lists and sorts ··· 2953 2931 * Check if alignment is compatible. 2954 2932 * Courtesy of Adrian Drzewiecki 2955 2933 */ 2956 - if ((s->size & ~(align -1)) != s->size) 2934 + if ((s->size & ~(align - 1)) != s->size) 2957 2935 continue; 2958 2936 2959 2937 if (s->size - size >= sizeof(void *)) ··· 3062 3040 return NOTIFY_OK; 3063 3041 } 3064 3042 3065 - static struct notifier_block __cpuinitdata slab_notifier = 3066 - { &slab_cpuup_callback, NULL, 0 }; 3043 + static struct notifier_block __cpuinitdata slab_notifier = { 3044 + &slab_cpuup_callback, NULL, 0 3045 + }; 3067 3046 3068 3047 #endif 3069 3048 ··· 3097 3074 return s; 3098 3075 3099 3076 return slab_alloc(s, gfpflags, node, caller); 3100 - } 3101 - 3102 - static unsigned long count_partial(struct kmem_cache_node *n) 3103 - { 3104 - unsigned long flags; 3105 - unsigned long x = 0; 3106 - struct page *page; 3107 - 3108 - spin_lock_irqsave(&n->list_lock, flags); 3109 - list_for_each_entry(page, &n->partial, lru) 3110 - x += page->inuse; 3111 - spin_unlock_irqrestore(&n->list_lock, flags); 3112 - return x; 3113 3077 } 3114 3078 3115 3079 #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) ··· 3400 3390 static int list_locations(struct kmem_cache *s, char *buf, 3401 3391 enum track_item alloc) 3402 3392 { 3403 - int n = 0; 3393 + int len = 0; 3404 3394 unsigned long i; 3405 3395 struct loc_track t = { 0, 0, NULL }; 3406 3396 int node; ··· 3431 3421 for (i = 0; i < t.count; i++) { 3432 3422 struct location *l = &t.loc[i]; 3433 3423 3434 - if (n > PAGE_SIZE - 100) 3424 + if (len > PAGE_SIZE - 100) 3435 3425 break; 3436 - n += sprintf(buf + n, "%7ld ", l->count); 3426 + len += sprintf(buf + len, "%7ld ", l->count); 3437 3427 3438 3428 if (l->addr) 3439 - n += sprint_symbol(buf + n, (unsigned long)l->addr); 3429 + len += sprint_symbol(buf + len, (unsigned long)l->addr); 3440 3430 else 3441 - n += sprintf(buf + n, "<not-available>"); 3431 + len += sprintf(buf + len, "<not-available>"); 3442 3432 3443 3433 if (l->sum_time != l->min_time) { 3444 3434 unsigned long remainder; 3445 3435 3446 - n += sprintf(buf + n, " age=%ld/%ld/%ld", 3436 + len += sprintf(buf + len, " age=%ld/%ld/%ld", 3447 3437 l->min_time, 3448 3438 div_long_long_rem(l->sum_time, l->count, &remainder), 3449 3439 l->max_time); 3450 3440 } else 3451 - n += sprintf(buf + n, " age=%ld", 3441 + len += sprintf(buf + len, " age=%ld", 3452 3442 l->min_time); 3453 3443 3454 3444 if (l->min_pid != l->max_pid) 3455 - n += sprintf(buf + n, " pid=%ld-%ld", 3445 + len += sprintf(buf + len, " pid=%ld-%ld", 3456 3446 l->min_pid, l->max_pid); 3457 3447 else 3458 - n += sprintf(buf + n, " pid=%ld", 3448 + len += sprintf(buf + len, " pid=%ld", 3459 3449 l->min_pid); 3460 3450 3461 3451 if (num_online_cpus() > 1 && !cpus_empty(l->cpus) && 3462 - n < PAGE_SIZE - 60) { 3463 - n += sprintf(buf + n, " cpus="); 3464 - n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50, 3452 + len < PAGE_SIZE - 60) { 3453 + len += sprintf(buf + len, " cpus="); 3454 + len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, 3465 3455 l->cpus); 3466 3456 } 3467 3457 3468 3458 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && 3469 - n < PAGE_SIZE - 60) { 3470 - n += sprintf(buf + n, " nodes="); 3471 - n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50, 3459 + len < PAGE_SIZE - 60) { 3460 + len += sprintf(buf + len, " nodes="); 3461 + len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, 3472 3462 l->nodes); 3473 3463 } 3474 3464 3475 - n += sprintf(buf + n, "\n"); 3465 + len += sprintf(buf + len, "\n"); 3476 3466 } 3477 3467 3478 3468 free_loc_track(&t); 3479 3469 if (!t.count) 3480 - n += sprintf(buf, "No data\n"); 3481 - return n; 3470 + len += sprintf(buf, "No data\n"); 3471 + return len; 3482 3472 } 3483 3473 3484 3474 enum slab_stat_type { ··· 3508 3498 3509 3499 for_each_possible_cpu(cpu) { 3510 3500 struct page *page; 3511 - int node; 3512 3501 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3513 3502 3514 3503 if (!c) ··· 3519 3510 continue; 3520 3511 if (page) { 3521 3512 if (flags & SO_CPU) { 3522 - int x = 0; 3523 - 3524 3513 if (flags & SO_OBJECTS) 3525 3514 x = page->inuse; 3526 3515 else ··· 3855 3848 SLAB_ATTR_RO(free_calls); 3856 3849 3857 3850 #ifdef CONFIG_NUMA 3858 - static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf) 3851 + static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) 3859 3852 { 3860 - return sprintf(buf, "%d\n", s->defrag_ratio / 10); 3853 + return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10); 3861 3854 } 3862 3855 3863 - static ssize_t defrag_ratio_store(struct kmem_cache *s, 3856 + static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, 3864 3857 const char *buf, size_t length) 3865 3858 { 3866 3859 int n = simple_strtoul(buf, NULL, 10); 3867 3860 3868 3861 if (n < 100) 3869 - s->defrag_ratio = n * 10; 3862 + s->remote_node_defrag_ratio = n * 10; 3870 3863 return length; 3871 3864 } 3872 - SLAB_ATTR(defrag_ratio); 3865 + SLAB_ATTR(remote_node_defrag_ratio); 3873 3866 #endif 3874 3867 3875 - static struct attribute * slab_attrs[] = { 3868 + static struct attribute *slab_attrs[] = { 3876 3869 &slab_size_attr.attr, 3877 3870 &object_size_attr.attr, 3878 3871 &objs_per_slab_attr.attr, ··· 3900 3893 &cache_dma_attr.attr, 3901 3894 #endif 3902 3895 #ifdef CONFIG_NUMA 3903 - &defrag_ratio_attr.attr, 3896 + &remote_node_defrag_ratio_attr.attr, 3904 3897 #endif 3905 3898 NULL 3906 3899 }; ··· 3947 3940 return err; 3948 3941 } 3949 3942 3943 + static void kmem_cache_release(struct kobject *kobj) 3944 + { 3945 + struct kmem_cache *s = to_slab(kobj); 3946 + 3947 + kfree(s); 3948 + } 3949 + 3950 3950 static struct sysfs_ops slab_sysfs_ops = { 3951 3951 .show = slab_attr_show, 3952 3952 .store = slab_attr_store, ··· 3961 3947 3962 3948 static struct kobj_type slab_ktype = { 3963 3949 .sysfs_ops = &slab_sysfs_ops, 3950 + .release = kmem_cache_release 3964 3951 }; 3965 3952 3966 3953 static int uevent_filter(struct kset *kset, struct kobject *kobj) ··· 4063 4048 { 4064 4049 kobject_uevent(&s->kobj, KOBJ_REMOVE); 4065 4050 kobject_del(&s->kobj); 4051 + kobject_put(&s->kobj); 4066 4052 } 4067 4053 4068 4054 /*