commit 64096c17417380d8a472d096645f4cbc9406c987 · tjh.dev/kernel

+1

Documentation/vm/slub.txt

··· 41 P Poisoning (object and padding) 42 U User tracking (free and alloc) 43 T Trace (please only use on single slabs) 44 O Switch debugging off for caches that would have 45 caused higher minimum slab orders 46 - Switch all debugging off (useful if the kernel is

··· 41 P Poisoning (object and padding) 42 U User tracking (free and alloc) 43 T Trace (please only use on single slabs) 44 + A Toggle failslab filter mark for the cache 45 O Switch debugging off for caches that would have 46 caused higher minimum slab orders 47 - Switch all debugging off (useful if the kernel is

+3 -2

include/linux/fault-inject.h

··· 82 #endif /* CONFIG_FAULT_INJECTION */ 83 84 #ifdef CONFIG_FAILSLAB 85 - extern bool should_failslab(size_t size, gfp_t gfpflags); 86 #else 87 - static inline bool should_failslab(size_t size, gfp_t gfpflags) 88 { 89 return false; 90 }

··· 82 #endif /* CONFIG_FAULT_INJECTION */ 83 84 #ifdef CONFIG_FAILSLAB 85 + extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags); 86 #else 87 + static inline bool should_failslab(size_t size, gfp_t gfpflags, 88 + unsigned long flags) 89 { 90 return false; 91 }

+5

include/linux/slab.h

··· 70 #else 71 # define SLAB_NOTRACK 0x00000000UL 72 #endif 73 74 /* The following flags affect the page allocator grouping pages by mobility */ 75 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */

··· 70 #else 71 # define SLAB_NOTRACK 0x00000000UL 72 #endif 73 + #ifdef CONFIG_FAILSLAB 74 + # define SLAB_FAILSLAB 0x02000000UL /* Fault injection mark */ 75 + #else 76 + # define SLAB_FAILSLAB 0x00000000UL 77 + #endif 78 79 /* The following flags affect the page allocator grouping pages by mobility */ 80 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */

+12 -15

include/linux/slub_def.h

··· 38 void **freelist; /* Pointer to first free per cpu object */ 39 struct page *page; /* The slab from which we are allocating */ 40 int node; /* The node of the page (or -1 for debug) */ 41 - unsigned int offset; /* Freepointer offset (in word units) */ 42 - unsigned int objsize; /* Size of an object (from kmem_cache) */ 43 #ifdef CONFIG_SLUB_STATS 44 unsigned stat[NR_SLUB_STAT_ITEMS]; 45 #endif ··· 67 * Slab cache management. 68 */ 69 struct kmem_cache { 70 /* Used for retriving partial slabs etc */ 71 unsigned long flags; 72 int size; /* The size of an object including meta data */ ··· 103 int remote_node_defrag_ratio; 104 struct kmem_cache_node *node[MAX_NUMNODES]; 105 #endif 106 - #ifdef CONFIG_SMP 107 - struct kmem_cache_cpu *cpu_slab[NR_CPUS]; 108 - #else 109 - struct kmem_cache_cpu cpu_slab; 110 - #endif 111 }; 112 113 /* ··· 129 130 #define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2) 131 132 /* 133 * We keep the general caches in an array of slab caches that are used for 134 * 2^x bytes of allocations. 135 */ 136 - extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT]; 137 138 /* 139 * Sorry that the following has to be that ugly but some versions of GCC ··· 210 211 return &kmalloc_caches[index]; 212 } 213 - 214 - #ifdef CONFIG_ZONE_DMA 215 - #define SLUB_DMA __GFP_DMA 216 - #else 217 - /* Disable DMA functionality */ 218 - #define SLUB_DMA (__force gfp_t)0 219 - #endif 220 221 void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 222 void *__kmalloc(size_t size, gfp_t flags);

··· 38 void **freelist; /* Pointer to first free per cpu object */ 39 struct page *page; /* The slab from which we are allocating */ 40 int node; /* The node of the page (or -1 for debug) */ 41 #ifdef CONFIG_SLUB_STATS 42 unsigned stat[NR_SLUB_STAT_ITEMS]; 43 #endif ··· 69 * Slab cache management. 70 */ 71 struct kmem_cache { 72 + struct kmem_cache_cpu *cpu_slab; 73 /* Used for retriving partial slabs etc */ 74 unsigned long flags; 75 int size; /* The size of an object including meta data */ ··· 104 int remote_node_defrag_ratio; 105 struct kmem_cache_node *node[MAX_NUMNODES]; 106 #endif 107 }; 108 109 /* ··· 135 136 #define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2) 137 138 + #ifdef CONFIG_ZONE_DMA 139 + #define SLUB_DMA __GFP_DMA 140 + /* Reserve extra caches for potential DMA use */ 141 + #define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT - 6) 142 + #else 143 + /* Disable DMA functionality */ 144 + #define SLUB_DMA (__force gfp_t)0 145 + #define KMALLOC_CACHES SLUB_PAGE_SHIFT 146 + #endif 147 + 148 /* 149 * We keep the general caches in an array of slab caches that are used for 150 * 2^x bytes of allocations. 151 */ 152 + extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES]; 153 154 /* 155 * Sorry that the following has to be that ugly but some versions of GCC ··· 206 207 return &kmalloc_caches[index]; 208 } 209 210 void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 211 void *__kmalloc(size_t size, gfp_t flags);

+15 -3

mm/failslab.c

··· 1 #include <linux/fault-inject.h> 2 #include <linux/gfp.h> 3 4 static struct { 5 struct fault_attr attr; 6 u32 ignore_gfp_wait; 7 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 8 struct dentry *ignore_gfp_wait_file; 9 #endif 10 } failslab = { 11 .attr = FAULT_ATTR_INITIALIZER, 12 .ignore_gfp_wait = 1, 13 }; 14 15 - bool should_failslab(size_t size, gfp_t gfpflags) 16 { 17 if (gfpflags & __GFP_NOFAIL) 18 return false; 19 20 if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) 21 return false; 22 23 return should_fail(&failslab.attr, size); ··· 37 __setup("failslab=", setup_failslab); 38 39 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 40 - 41 static int __init failslab_debugfs_init(void) 42 { 43 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; ··· 52 debugfs_create_bool("ignore-gfp-wait", mode, dir, 53 &failslab.ignore_gfp_wait); 54 55 - if (!failslab.ignore_gfp_wait_file) { 56 err = -ENOMEM; 57 debugfs_remove(failslab.ignore_gfp_wait_file); 58 cleanup_fault_attr_dentries(&failslab.attr); 59 }

··· 1 #include <linux/fault-inject.h> 2 #include <linux/gfp.h> 3 + #include <linux/slab.h> 4 5 static struct { 6 struct fault_attr attr; 7 u32 ignore_gfp_wait; 8 + int cache_filter; 9 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 10 struct dentry *ignore_gfp_wait_file; 11 + struct dentry *cache_filter_file; 12 #endif 13 } failslab = { 14 .attr = FAULT_ATTR_INITIALIZER, 15 .ignore_gfp_wait = 1, 16 + .cache_filter = 0, 17 }; 18 19 + bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) 20 { 21 if (gfpflags & __GFP_NOFAIL) 22 return false; 23 24 if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) 25 + return false; 26 + 27 + if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) 28 return false; 29 30 return should_fail(&failslab.attr, size); ··· 30 __setup("failslab=", setup_failslab); 31 32 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 33 static int __init failslab_debugfs_init(void) 34 { 35 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; ··· 46 debugfs_create_bool("ignore-gfp-wait", mode, dir, 47 &failslab.ignore_gfp_wait); 48 49 + failslab.cache_filter_file = 50 + debugfs_create_bool("cache-filter", mode, dir, 51 + &failslab.cache_filter); 52 + 53 + if (!failslab.ignore_gfp_wait_file || 54 + !failslab.cache_filter_file) { 55 err = -ENOMEM; 56 + debugfs_remove(failslab.cache_filter_file); 57 debugfs_remove(failslab.ignore_gfp_wait_file); 58 cleanup_fault_attr_dentries(&failslab.attr); 59 }

+6 -7

mm/slab.c

··· 935 936 from->avail -= nr; 937 to->avail += nr; 938 - to->touched = 1; 939 return nr; 940 } 941 ··· 982 983 if (limit > 1) 984 limit = 12; 985 - ac_ptr = kmalloc_node(memsize, gfp, node); 986 if (ac_ptr) { 987 for_each_node(i) { 988 - if (i == node || !node_online(i)) { 989 - ac_ptr[i] = NULL; 990 continue; 991 - } 992 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); 993 if (!ac_ptr[i]) { 994 for (i--; i >= 0; i--) ··· 2960 spin_lock(&l3->list_lock); 2961 2962 /* See if we can refill from the shared array */ 2963 - if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) 2964 goto alloc_done; 2965 2966 while (batchcount > 0) { 2967 struct list_head *entry; ··· 3100 if (cachep == &cache_cache) 3101 return false; 3102 3103 - return should_failslab(obj_size(cachep), flags); 3104 } 3105 3106 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)

··· 935 936 from->avail -= nr; 937 to->avail += nr; 938 return nr; 939 } 940 ··· 983 984 if (limit > 1) 985 limit = 12; 986 + ac_ptr = kzalloc_node(memsize, gfp, node); 987 if (ac_ptr) { 988 for_each_node(i) { 989 + if (i == node || !node_online(i)) 990 continue; 991 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); 992 if (!ac_ptr[i]) { 993 for (i--; i >= 0; i--) ··· 2963 spin_lock(&l3->list_lock); 2964 2965 /* See if we can refill from the shared array */ 2966 + if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) { 2967 + l3->shared->touched = 1; 2968 goto alloc_done; 2969 + } 2970 2971 while (batchcount > 0) { 2972 struct list_head *entry; ··· 3101 if (cachep == &cache_cache) 3102 return false; 3103 3104 + return should_failslab(obj_size(cachep), flags, cachep->flags); 3105 } 3106 3107 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)

+105 -234

mm/slub.c

··· 151 * Set of flags that will prevent slab merging 152 */ 153 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 154 - SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) 155 156 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 157 SLAB_CACHE_DMA | SLAB_NOTRACK) ··· 218 219 #endif 220 221 - static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) 222 { 223 #ifdef CONFIG_SLUB_STATS 224 - c->stat[si]++; 225 #endif 226 } 227 ··· 243 #endif 244 } 245 246 - static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) 247 - { 248 - #ifdef CONFIG_SMP 249 - return s->cpu_slab[cpu]; 250 - #else 251 - return &s->cpu_slab; 252 - #endif 253 - } 254 - 255 /* Verify that a pointer has an address that is valid within a slab page */ 256 static inline int check_valid_pointer(struct kmem_cache *s, 257 struct page *page, const void *object) ··· 261 return 1; 262 } 263 264 - /* 265 - * Slow version of get and set free pointer. 266 - * 267 - * This version requires touching the cache lines of kmem_cache which 268 - * we avoid to do in the fast alloc free paths. There we obtain the offset 269 - * from the page struct. 270 - */ 271 static inline void *get_freepointer(struct kmem_cache *s, void *object) 272 { 273 return *(void **)(object + s->offset); ··· 1005 case 't': 1006 slub_debug |= SLAB_TRACE; 1007 break; 1008 default: 1009 printk(KERN_ERR "slub_debug option '%c' " 1010 "unknown. skipped\n", *str); ··· 1112 if (!page) 1113 return NULL; 1114 1115 - stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); 1116 } 1117 1118 if (kmemcheck_enabled ··· 1410 static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1411 { 1412 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1413 - struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); 1414 1415 __ClearPageSlubFrozen(page); 1416 if (page->inuse) { 1417 1418 if (page->freelist) { 1419 add_partial(n, page, tail); 1420 - stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1421 } else { 1422 - stat(c, DEACTIVATE_FULL); 1423 if (SLABDEBUG && PageSlubDebug(page) && 1424 (s->flags & SLAB_STORE_USER)) 1425 add_full(n, page); 1426 } 1427 slab_unlock(page); 1428 } else { 1429 - stat(c, DEACTIVATE_EMPTY); 1430 if (n->nr_partial < s->min_partial) { 1431 /* 1432 * Adding an empty slab to the partial slabs in order ··· 1441 slab_unlock(page); 1442 } else { 1443 slab_unlock(page); 1444 - stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); 1445 discard_slab(s, page); 1446 } 1447 } ··· 1456 int tail = 1; 1457 1458 if (page->freelist) 1459 - stat(c, DEACTIVATE_REMOTE_FREES); 1460 /* 1461 * Merge cpu freelist into slab freelist. Typically we get here 1462 * because both freelists are empty. So this is unlikely ··· 1469 1470 /* Retrieve object from cpu_freelist */ 1471 object = c->freelist; 1472 - c->freelist = c->freelist[c->offset]; 1473 1474 /* And put onto the regular freelist */ 1475 - object[c->offset] = page->freelist; 1476 page->freelist = object; 1477 page->inuse--; 1478 } ··· 1482 1483 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1484 { 1485 - stat(c, CPUSLAB_FLUSH); 1486 slab_lock(c->page); 1487 deactivate_slab(s, c); 1488 } ··· 1494 */ 1495 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1496 { 1497 - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 1498 1499 if (likely(c && c->page)) 1500 flush_slab(s, c); ··· 1622 if (unlikely(!node_match(c, node))) 1623 goto another_slab; 1624 1625 - stat(c, ALLOC_REFILL); 1626 1627 load_freelist: 1628 object = c->page->freelist; ··· 1631 if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) 1632 goto debug; 1633 1634 - c->freelist = object[c->offset]; 1635 c->page->inuse = c->page->objects; 1636 c->page->freelist = NULL; 1637 c->node = page_to_nid(c->page); 1638 unlock_out: 1639 slab_unlock(c->page); 1640 - stat(c, ALLOC_SLOWPATH); 1641 return object; 1642 1643 another_slab: ··· 1647 new = get_partial(s, gfpflags, node); 1648 if (new) { 1649 c->page = new; 1650 - stat(c, ALLOC_FROM_PARTIAL); 1651 goto load_freelist; 1652 } 1653 ··· 1660 local_irq_disable(); 1661 1662 if (new) { 1663 - c = get_cpu_slab(s, smp_processor_id()); 1664 - stat(c, ALLOC_SLAB); 1665 if (c->page) 1666 flush_slab(s, c); 1667 slab_lock(new); ··· 1677 goto another_slab; 1678 1679 c->page->inuse++; 1680 - c->page->freelist = object[c->offset]; 1681 c->node = -1; 1682 goto unlock_out; 1683 } ··· 1698 void **object; 1699 struct kmem_cache_cpu *c; 1700 unsigned long flags; 1701 - unsigned int objsize; 1702 1703 gfpflags &= gfp_allowed_mask; 1704 1705 lockdep_trace_alloc(gfpflags); 1706 might_sleep_if(gfpflags & __GFP_WAIT); 1707 1708 - if (should_failslab(s->objsize, gfpflags)) 1709 return NULL; 1710 1711 local_irq_save(flags); 1712 - c = get_cpu_slab(s, smp_processor_id()); 1713 - objsize = c->objsize; 1714 - if (unlikely(!c->freelist || !node_match(c, node))) 1715 1716 object = __slab_alloc(s, gfpflags, node, addr, c); 1717 1718 else { 1719 - object = c->freelist; 1720 - c->freelist = object[c->offset]; 1721 - stat(c, ALLOC_FASTPATH); 1722 } 1723 local_irq_restore(flags); 1724 1725 if (unlikely(gfpflags & __GFP_ZERO) && object) 1726 - memset(object, 0, objsize); 1727 1728 - kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); 1729 - kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); 1730 1731 return object; 1732 } ··· 1779 * handling required then we can return immediately. 1780 */ 1781 static void __slab_free(struct kmem_cache *s, struct page *page, 1782 - void *x, unsigned long addr, unsigned int offset) 1783 { 1784 void *prior; 1785 void **object = (void *)x; 1786 - struct kmem_cache_cpu *c; 1787 1788 - c = get_cpu_slab(s, raw_smp_processor_id()); 1789 - stat(c, FREE_SLOWPATH); 1790 slab_lock(page); 1791 1792 if (unlikely(SLABDEBUG && PageSlubDebug(page))) 1793 goto debug; 1794 1795 checks_ok: 1796 - prior = object[offset] = page->freelist; 1797 page->freelist = object; 1798 page->inuse--; 1799 1800 if (unlikely(PageSlubFrozen(page))) { 1801 - stat(c, FREE_FROZEN); 1802 goto out_unlock; 1803 } 1804 ··· 1810 */ 1811 if (unlikely(!prior)) { 1812 add_partial(get_node(s, page_to_nid(page)), page, 1); 1813 - stat(c, FREE_ADD_PARTIAL); 1814 } 1815 1816 out_unlock: ··· 1823 * Slab still on the partial list. 1824 */ 1825 remove_partial(s, page); 1826 - stat(c, FREE_REMOVE_PARTIAL); 1827 } 1828 slab_unlock(page); 1829 - stat(c, FREE_SLAB); 1830 discard_slab(s, page); 1831 return; 1832 ··· 1856 1857 kmemleak_free_recursive(x, s->flags); 1858 local_irq_save(flags); 1859 - c = get_cpu_slab(s, smp_processor_id()); 1860 - kmemcheck_slab_free(s, object, c->objsize); 1861 - debug_check_no_locks_freed(object, c->objsize); 1862 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1863 - debug_check_no_obj_freed(object, c->objsize); 1864 if (likely(page == c->page && c->node >= 0)) { 1865 - object[c->offset] = c->freelist; 1866 c->freelist = object; 1867 - stat(c, FREE_FASTPATH); 1868 } else 1869 - __slab_free(s, page, x, addr, c->offset); 1870 1871 local_irq_restore(flags); 1872 } ··· 2053 return ALIGN(align, sizeof(void *)); 2054 } 2055 2056 - static void init_kmem_cache_cpu(struct kmem_cache *s, 2057 - struct kmem_cache_cpu *c) 2058 - { 2059 - c->page = NULL; 2060 - c->freelist = NULL; 2061 - c->node = 0; 2062 - c->offset = s->offset / sizeof(void *); 2063 - c->objsize = s->objsize; 2064 - #ifdef CONFIG_SLUB_STATS 2065 - memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned)); 2066 - #endif 2067 - } 2068 - 2069 static void 2070 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) 2071 { ··· 2066 #endif 2067 } 2068 2069 - #ifdef CONFIG_SMP 2070 - /* 2071 - * Per cpu array for per cpu structures. 2072 - * 2073 - * The per cpu array places all kmem_cache_cpu structures from one processor 2074 - * close together meaning that it becomes possible that multiple per cpu 2075 - * structures are contained in one cacheline. This may be particularly 2076 - * beneficial for the kmalloc caches. 2077 - * 2078 - * A desktop system typically has around 60-80 slabs. With 100 here we are 2079 - * likely able to get per cpu structures for all caches from the array defined 2080 - * here. We must be able to cover all kmalloc caches during bootstrap. 2081 - * 2082 - * If the per cpu array is exhausted then fall back to kmalloc 2083 - * of individual cachelines. No sharing is possible then. 2084 - */ 2085 - #define NR_KMEM_CACHE_CPU 100 2086 - 2087 - static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU], 2088 - kmem_cache_cpu); 2089 - 2090 - static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); 2091 - static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); 2092 - 2093 - static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, 2094 - int cpu, gfp_t flags) 2095 - { 2096 - struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); 2097 - 2098 - if (c) 2099 - per_cpu(kmem_cache_cpu_free, cpu) = 2100 - (void *)c->freelist; 2101 - else { 2102 - /* Table overflow: So allocate ourselves */ 2103 - c = kmalloc_node( 2104 - ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), 2105 - flags, cpu_to_node(cpu)); 2106 - if (!c) 2107 - return NULL; 2108 - } 2109 - 2110 - init_kmem_cache_cpu(s, c); 2111 - return c; 2112 - } 2113 - 2114 - static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) 2115 - { 2116 - if (c < per_cpu(kmem_cache_cpu, cpu) || 2117 - c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { 2118 - kfree(c); 2119 - return; 2120 - } 2121 - c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); 2122 - per_cpu(kmem_cache_cpu_free, cpu) = c; 2123 - } 2124 - 2125 - static void free_kmem_cache_cpus(struct kmem_cache *s) 2126 - { 2127 - int cpu; 2128 - 2129 - for_each_online_cpu(cpu) { 2130 - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 2131 - 2132 - if (c) { 2133 - s->cpu_slab[cpu] = NULL; 2134 - free_kmem_cache_cpu(c, cpu); 2135 - } 2136 - } 2137 - } 2138 - 2139 - static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) 2140 - { 2141 - int cpu; 2142 - 2143 - for_each_online_cpu(cpu) { 2144 - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 2145 - 2146 - if (c) 2147 - continue; 2148 - 2149 - c = alloc_kmem_cache_cpu(s, cpu, flags); 2150 - if (!c) { 2151 - free_kmem_cache_cpus(s); 2152 - return 0; 2153 - } 2154 - s->cpu_slab[cpu] = c; 2155 - } 2156 - return 1; 2157 - } 2158 - 2159 - /* 2160 - * Initialize the per cpu array. 2161 - */ 2162 - static void init_alloc_cpu_cpu(int cpu) 2163 - { 2164 - int i; 2165 - 2166 - if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) 2167 - return; 2168 - 2169 - for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) 2170 - free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); 2171 - 2172 - cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); 2173 - } 2174 - 2175 - static void __init init_alloc_cpu(void) 2176 - { 2177 - int cpu; 2178 - 2179 - for_each_online_cpu(cpu) 2180 - init_alloc_cpu_cpu(cpu); 2181 - } 2182 - 2183 - #else 2184 - static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} 2185 - static inline void init_alloc_cpu(void) {} 2186 2187 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) 2188 { 2189 - init_kmem_cache_cpu(s, &s->cpu_slab); 2190 return 1; 2191 } 2192 - #endif 2193 2194 #ifdef CONFIG_NUMA 2195 /* ··· 2152 int node; 2153 int local_node; 2154 2155 - if (slab_state >= UP) 2156 local_node = page_to_nid(virt_to_page(s)); 2157 else 2158 local_node = 0; ··· 2368 2369 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) 2370 return 1; 2371 free_kmem_cache_nodes(s); 2372 error: 2373 if (flags & SLAB_PANIC) ··· 2476 int node; 2477 2478 flush_all(s); 2479 - 2480 /* Attempt to free all objects */ 2481 - free_kmem_cache_cpus(s); 2482 for_each_node_state(node, N_NORMAL_MEMORY) { 2483 struct kmem_cache_node *n = get_node(s, node); 2484 ··· 2517 * Kmalloc subsystem 2518 *******************************************************************/ 2519 2520 - struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; 2521 EXPORT_SYMBOL(kmalloc_caches); 2522 2523 static int __init setup_slub_min_order(char *str) ··· 2607 char *text; 2608 size_t realsize; 2609 unsigned long slabflags; 2610 2611 s = kmalloc_caches_dma[index]; 2612 if (s) ··· 2627 realsize = kmalloc_caches[index].objsize; 2628 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2629 (unsigned int)realsize); 2630 - s = kmalloc(kmem_size, flags & ~SLUB_DMA); 2631 2632 /* 2633 * Must defer sysfs creation to a workqueue because we don't know ··· 2646 if (slab_state >= SYSFS) 2647 slabflags |= __SYSFS_ADD_DEFERRED; 2648 2649 - if (!s || !text || !kmem_cache_open(s, flags, text, 2650 realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { 2651 - kfree(s); 2652 kfree(text); 2653 goto unlock_out; 2654 } ··· 3050 int i; 3051 int caches = 0; 3052 3053 - init_alloc_cpu(); 3054 - 3055 #ifdef CONFIG_NUMA 3056 /* 3057 * Must first have the slab cache available for the allocations of the ··· 3133 3134 #ifdef CONFIG_SMP 3135 register_cpu_notifier(&slab_notifier); 3136 - kmem_size = offsetof(struct kmem_cache, cpu_slab) + 3137 - nr_cpu_ids * sizeof(struct kmem_cache_cpu *); 3138 #else 3139 kmem_size = sizeof(struct kmem_cache); 3140 #endif ··· 3225 down_write(&slub_lock); 3226 s = find_mergeable(size, align, flags, name, ctor); 3227 if (s) { 3228 - int cpu; 3229 - 3230 s->refcount++; 3231 /* 3232 * Adjust the object sizes so that we clear 3233 * the complete object on kzalloc. 3234 */ 3235 s->objsize = max(s->objsize, (int)size); 3236 - 3237 - /* 3238 - * And then we need to update the object size in the 3239 - * per cpu structures 3240 - */ 3241 - for_each_online_cpu(cpu) 3242 - get_cpu_slab(s, cpu)->objsize = s->objsize; 3243 - 3244 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3245 up_write(&slub_lock); 3246 ··· 3284 unsigned long flags; 3285 3286 switch (action) { 3287 - case CPU_UP_PREPARE: 3288 - case CPU_UP_PREPARE_FROZEN: 3289 - init_alloc_cpu_cpu(cpu); 3290 - down_read(&slub_lock); 3291 - list_for_each_entry(s, &slab_caches, list) 3292 - s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, 3293 - GFP_KERNEL); 3294 - up_read(&slub_lock); 3295 - break; 3296 - 3297 case CPU_UP_CANCELED: 3298 case CPU_UP_CANCELED_FROZEN: 3299 case CPU_DEAD: 3300 case CPU_DEAD_FROZEN: 3301 down_read(&slub_lock); 3302 list_for_each_entry(s, &slab_caches, list) { 3303 - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3304 - 3305 local_irq_save(flags); 3306 __flush_cpu_slab(s, cpu); 3307 local_irq_restore(flags); 3308 - free_kmem_cache_cpu(c, cpu); 3309 - s->cpu_slab[cpu] = NULL; 3310 } 3311 up_read(&slub_lock); 3312 break; ··· 3778 int cpu; 3779 3780 for_each_possible_cpu(cpu) { 3781 - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3782 3783 if (!c || c->node < 0) 3784 continue; ··· 4021 } 4022 SLAB_ATTR(trace); 4023 4024 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 4025 { 4026 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); ··· 4220 return -ENOMEM; 4221 4222 for_each_online_cpu(cpu) { 4223 - unsigned x = get_cpu_slab(s, cpu)->stat[si]; 4224 4225 data[cpu] = x; 4226 sum += x; ··· 4243 int cpu; 4244 4245 for_each_online_cpu(cpu) 4246 - get_cpu_slab(s, cpu)->stat[si] = 0; 4247 } 4248 4249 #define STAT_ATTR(si, text) \ ··· 4334 &deactivate_remote_frees_attr.attr, 4335 &order_fallback_attr.attr, 4336 #endif 4337 NULL 4338 }; 4339

··· 151 * Set of flags that will prevent slab merging 152 */ 153 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 154 + SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ 155 + SLAB_FAILSLAB) 156 157 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 158 SLAB_CACHE_DMA | SLAB_NOTRACK) ··· 217 218 #endif 219 220 + static inline void stat(struct kmem_cache *s, enum stat_item si) 221 { 222 #ifdef CONFIG_SLUB_STATS 223 + __this_cpu_inc(s->cpu_slab->stat[si]); 224 #endif 225 } 226 ··· 242 #endif 243 } 244 245 /* Verify that a pointer has an address that is valid within a slab page */ 246 static inline int check_valid_pointer(struct kmem_cache *s, 247 struct page *page, const void *object) ··· 269 return 1; 270 } 271 272 static inline void *get_freepointer(struct kmem_cache *s, void *object) 273 { 274 return *(void **)(object + s->offset); ··· 1020 case 't': 1021 slub_debug |= SLAB_TRACE; 1022 break; 1023 + case 'a': 1024 + slub_debug |= SLAB_FAILSLAB; 1025 + break; 1026 default: 1027 printk(KERN_ERR "slub_debug option '%c' " 1028 "unknown. skipped\n", *str); ··· 1124 if (!page) 1125 return NULL; 1126 1127 + stat(s, ORDER_FALLBACK); 1128 } 1129 1130 if (kmemcheck_enabled ··· 1422 static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1423 { 1424 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1425 1426 __ClearPageSlubFrozen(page); 1427 if (page->inuse) { 1428 1429 if (page->freelist) { 1430 add_partial(n, page, tail); 1431 + stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1432 } else { 1433 + stat(s, DEACTIVATE_FULL); 1434 if (SLABDEBUG && PageSlubDebug(page) && 1435 (s->flags & SLAB_STORE_USER)) 1436 add_full(n, page); 1437 } 1438 slab_unlock(page); 1439 } else { 1440 + stat(s, DEACTIVATE_EMPTY); 1441 if (n->nr_partial < s->min_partial) { 1442 /* 1443 * Adding an empty slab to the partial slabs in order ··· 1454 slab_unlock(page); 1455 } else { 1456 slab_unlock(page); 1457 + stat(s, FREE_SLAB); 1458 discard_slab(s, page); 1459 } 1460 } ··· 1469 int tail = 1; 1470 1471 if (page->freelist) 1472 + stat(s, DEACTIVATE_REMOTE_FREES); 1473 /* 1474 * Merge cpu freelist into slab freelist. Typically we get here 1475 * because both freelists are empty. So this is unlikely ··· 1482 1483 /* Retrieve object from cpu_freelist */ 1484 object = c->freelist; 1485 + c->freelist = get_freepointer(s, c->freelist); 1486 1487 /* And put onto the regular freelist */ 1488 + set_freepointer(s, object, page->freelist); 1489 page->freelist = object; 1490 page->inuse--; 1491 } ··· 1495 1496 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1497 { 1498 + stat(s, CPUSLAB_FLUSH); 1499 slab_lock(c->page); 1500 deactivate_slab(s, c); 1501 } ··· 1507 */ 1508 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1509 { 1510 + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 1511 1512 if (likely(c && c->page)) 1513 flush_slab(s, c); ··· 1635 if (unlikely(!node_match(c, node))) 1636 goto another_slab; 1637 1638 + stat(s, ALLOC_REFILL); 1639 1640 load_freelist: 1641 object = c->page->freelist; ··· 1644 if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) 1645 goto debug; 1646 1647 + c->freelist = get_freepointer(s, object); 1648 c->page->inuse = c->page->objects; 1649 c->page->freelist = NULL; 1650 c->node = page_to_nid(c->page); 1651 unlock_out: 1652 slab_unlock(c->page); 1653 + stat(s, ALLOC_SLOWPATH); 1654 return object; 1655 1656 another_slab: ··· 1660 new = get_partial(s, gfpflags, node); 1661 if (new) { 1662 c->page = new; 1663 + stat(s, ALLOC_FROM_PARTIAL); 1664 goto load_freelist; 1665 } 1666 ··· 1673 local_irq_disable(); 1674 1675 if (new) { 1676 + c = __this_cpu_ptr(s->cpu_slab); 1677 + stat(s, ALLOC_SLAB); 1678 if (c->page) 1679 flush_slab(s, c); 1680 slab_lock(new); ··· 1690 goto another_slab; 1691 1692 c->page->inuse++; 1693 + c->page->freelist = get_freepointer(s, object); 1694 c->node = -1; 1695 goto unlock_out; 1696 } ··· 1711 void **object; 1712 struct kmem_cache_cpu *c; 1713 unsigned long flags; 1714 1715 gfpflags &= gfp_allowed_mask; 1716 1717 lockdep_trace_alloc(gfpflags); 1718 might_sleep_if(gfpflags & __GFP_WAIT); 1719 1720 + if (should_failslab(s->objsize, gfpflags, s->flags)) 1721 return NULL; 1722 1723 local_irq_save(flags); 1724 + c = __this_cpu_ptr(s->cpu_slab); 1725 + object = c->freelist; 1726 + if (unlikely(!object || !node_match(c, node))) 1727 1728 object = __slab_alloc(s, gfpflags, node, addr, c); 1729 1730 else { 1731 + c->freelist = get_freepointer(s, object); 1732 + stat(s, ALLOC_FASTPATH); 1733 } 1734 local_irq_restore(flags); 1735 1736 if (unlikely(gfpflags & __GFP_ZERO) && object) 1737 + memset(object, 0, s->objsize); 1738 1739 + kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); 1740 + kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); 1741 1742 return object; 1743 } ··· 1794 * handling required then we can return immediately. 1795 */ 1796 static void __slab_free(struct kmem_cache *s, struct page *page, 1797 + void *x, unsigned long addr) 1798 { 1799 void *prior; 1800 void **object = (void *)x; 1801 1802 + stat(s, FREE_SLOWPATH); 1803 slab_lock(page); 1804 1805 if (unlikely(SLABDEBUG && PageSlubDebug(page))) 1806 goto debug; 1807 1808 checks_ok: 1809 + prior = page->freelist; 1810 + set_freepointer(s, object, prior); 1811 page->freelist = object; 1812 page->inuse--; 1813 1814 if (unlikely(PageSlubFrozen(page))) { 1815 + stat(s, FREE_FROZEN); 1816 goto out_unlock; 1817 } 1818 ··· 1826 */ 1827 if (unlikely(!prior)) { 1828 add_partial(get_node(s, page_to_nid(page)), page, 1); 1829 + stat(s, FREE_ADD_PARTIAL); 1830 } 1831 1832 out_unlock: ··· 1839 * Slab still on the partial list. 1840 */ 1841 remove_partial(s, page); 1842 + stat(s, FREE_REMOVE_PARTIAL); 1843 } 1844 slab_unlock(page); 1845 + stat(s, FREE_SLAB); 1846 discard_slab(s, page); 1847 return; 1848 ··· 1872 1873 kmemleak_free_recursive(x, s->flags); 1874 local_irq_save(flags); 1875 + c = __this_cpu_ptr(s->cpu_slab); 1876 + kmemcheck_slab_free(s, object, s->objsize); 1877 + debug_check_no_locks_freed(object, s->objsize); 1878 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1879 + debug_check_no_obj_freed(object, s->objsize); 1880 if (likely(page == c->page && c->node >= 0)) { 1881 + set_freepointer(s, object, c->freelist); 1882 c->freelist = object; 1883 + stat(s, FREE_FASTPATH); 1884 } else 1885 + __slab_free(s, page, x, addr); 1886 1887 local_irq_restore(flags); 1888 } ··· 2069 return ALIGN(align, sizeof(void *)); 2070 } 2071 2072 static void 2073 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) 2074 { ··· 2095 #endif 2096 } 2097 2098 + static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); 2099 2100 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) 2101 { 2102 + if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) 2103 + /* 2104 + * Boot time creation of the kmalloc array. Use static per cpu data 2105 + * since the per cpu allocator is not available yet. 2106 + */ 2107 + s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches); 2108 + else 2109 + s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); 2110 + 2111 + if (!s->cpu_slab) 2112 + return 0; 2113 + 2114 return 1; 2115 } 2116 2117 #ifdef CONFIG_NUMA 2118 /* ··· 2287 int node; 2288 int local_node; 2289 2290 + if (slab_state >= UP && (s < kmalloc_caches || 2291 + s > kmalloc_caches + KMALLOC_CACHES)) 2292 local_node = page_to_nid(virt_to_page(s)); 2293 else 2294 local_node = 0; ··· 2502 2503 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) 2504 return 1; 2505 + 2506 free_kmem_cache_nodes(s); 2507 error: 2508 if (flags & SLAB_PANIC) ··· 2609 int node; 2610 2611 flush_all(s); 2612 + free_percpu(s->cpu_slab); 2613 /* Attempt to free all objects */ 2614 for_each_node_state(node, N_NORMAL_MEMORY) { 2615 struct kmem_cache_node *n = get_node(s, node); 2616 ··· 2651 * Kmalloc subsystem 2652 *******************************************************************/ 2653 2654 + struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; 2655 EXPORT_SYMBOL(kmalloc_caches); 2656 2657 static int __init setup_slub_min_order(char *str) ··· 2741 char *text; 2742 size_t realsize; 2743 unsigned long slabflags; 2744 + int i; 2745 2746 s = kmalloc_caches_dma[index]; 2747 if (s) ··· 2760 realsize = kmalloc_caches[index].objsize; 2761 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2762 (unsigned int)realsize); 2763 + 2764 + s = NULL; 2765 + for (i = 0; i < KMALLOC_CACHES; i++) 2766 + if (!kmalloc_caches[i].size) 2767 + break; 2768 + 2769 + BUG_ON(i >= KMALLOC_CACHES); 2770 + s = kmalloc_caches + i; 2771 2772 /* 2773 * Must defer sysfs creation to a workqueue because we don't know ··· 2772 if (slab_state >= SYSFS) 2773 slabflags |= __SYSFS_ADD_DEFERRED; 2774 2775 + if (!text || !kmem_cache_open(s, flags, text, 2776 realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { 2777 + s->size = 0; 2778 kfree(text); 2779 goto unlock_out; 2780 } ··· 3176 int i; 3177 int caches = 0; 3178 3179 #ifdef CONFIG_NUMA 3180 /* 3181 * Must first have the slab cache available for the allocations of the ··· 3261 3262 #ifdef CONFIG_SMP 3263 register_cpu_notifier(&slab_notifier); 3264 + #endif 3265 + #ifdef CONFIG_NUMA 3266 + kmem_size = offsetof(struct kmem_cache, node) + 3267 + nr_node_ids * sizeof(struct kmem_cache_node *); 3268 #else 3269 kmem_size = sizeof(struct kmem_cache); 3270 #endif ··· 3351 down_write(&slub_lock); 3352 s = find_mergeable(size, align, flags, name, ctor); 3353 if (s) { 3354 s->refcount++; 3355 /* 3356 * Adjust the object sizes so that we clear 3357 * the complete object on kzalloc. 3358 */ 3359 s->objsize = max(s->objsize, (int)size); 3360 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3361 up_write(&slub_lock); 3362 ··· 3420 unsigned long flags; 3421 3422 switch (action) { 3423 case CPU_UP_CANCELED: 3424 case CPU_UP_CANCELED_FROZEN: 3425 case CPU_DEAD: 3426 case CPU_DEAD_FROZEN: 3427 down_read(&slub_lock); 3428 list_for_each_entry(s, &slab_caches, list) { 3429 local_irq_save(flags); 3430 __flush_cpu_slab(s, cpu); 3431 local_irq_restore(flags); 3432 } 3433 up_read(&slub_lock); 3434 break; ··· 3928 int cpu; 3929 3930 for_each_possible_cpu(cpu) { 3931 + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 3932 3933 if (!c || c->node < 0) 3934 continue; ··· 4171 } 4172 SLAB_ATTR(trace); 4173 4174 + #ifdef CONFIG_FAILSLAB 4175 + static ssize_t failslab_show(struct kmem_cache *s, char *buf) 4176 + { 4177 + return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); 4178 + } 4179 + 4180 + static ssize_t failslab_store(struct kmem_cache *s, const char *buf, 4181 + size_t length) 4182 + { 4183 + s->flags &= ~SLAB_FAILSLAB; 4184 + if (buf[0] == '1') 4185 + s->flags |= SLAB_FAILSLAB; 4186 + return length; 4187 + } 4188 + SLAB_ATTR(failslab); 4189 + #endif 4190 + 4191 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 4192 { 4193 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); ··· 4353 return -ENOMEM; 4354 4355 for_each_online_cpu(cpu) { 4356 + unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; 4357 4358 data[cpu] = x; 4359 sum += x; ··· 4376 int cpu; 4377 4378 for_each_online_cpu(cpu) 4379 + per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; 4380 } 4381 4382 #define STAT_ATTR(si, text) \ ··· 4467 &deactivate_remote_frees_attr.attr, 4468 &order_fallback_attr.attr, 4469 #endif 4470 + #ifdef CONFIG_FAILSLAB 4471 + &failslab_attr.attr, 4472 + #endif 4473 + 4474 NULL 4475 }; 4476