Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
slub: Dont define useless label in the !CONFIG_CMPXCHG_LOCAL case
slab,rcu: don't assume the size of struct rcu_head
slub,rcu: don't assume the size of struct rcu_head
slub: automatically reserve bytes at the end of slab
Lockless (and preemptless) fastpaths for slub
slub: Get rid of slab_free_hook_irq()
slub: min_partial needs to be in first cacheline
slub: fix ksize() build error
slub: fix kmemcheck calls to match ksize() hints
Revert "slab: Fix missing DEBUG_SLAB last user"
mm: Remove support for kmem_cache_name()

+333 -103
-1
include/linux/slab.h
··· 105 105 int kmem_cache_shrink(struct kmem_cache *); 106 106 void kmem_cache_free(struct kmem_cache *, void *); 107 107 unsigned int kmem_cache_size(struct kmem_cache *); 108 - const char *kmem_cache_name(struct kmem_cache *); 109 108 110 109 /* 111 110 * Please use this macro to create slab caches. Simply specify the
+6 -2
include/linux/slub_def.h
··· 35 35 NR_SLUB_STAT_ITEMS }; 36 36 37 37 struct kmem_cache_cpu { 38 - void **freelist; /* Pointer to first free per cpu object */ 38 + void **freelist; /* Pointer to next available object */ 39 + #ifdef CONFIG_CMPXCHG_LOCAL 40 + unsigned long tid; /* Globally unique transaction id */ 41 + #endif 39 42 struct page *page; /* The slab from which we are allocating */ 40 43 int node; /* The node of the page (or -1 for debug) */ 41 44 #ifdef CONFIG_SLUB_STATS ··· 73 70 struct kmem_cache_cpu __percpu *cpu_slab; 74 71 /* Used for retriving partial slabs etc */ 75 72 unsigned long flags; 73 + unsigned long min_partial; 76 74 int size; /* The size of an object including meta data */ 77 75 int objsize; /* The size of an object without meta data */ 78 76 int offset; /* Free pointer offset. */ ··· 87 83 void (*ctor)(void *); 88 84 int inuse; /* Offset to metadata */ 89 85 int align; /* Alignment */ 90 - unsigned long min_partial; 86 + int reserved; /* Reserved bytes at the end of slabs */ 91 87 const char *name; /* Name (only for display!) */ 92 88 struct list_head list; /* List of slab caches */ 93 89 #ifdef CONFIG_SYSFS
+25 -30
mm/slab.c
··· 191 191 #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) 192 192 193 193 /* 194 - * struct slab 195 - * 196 - * Manages the objs in a slab. Placed either at the beginning of mem allocated 197 - * for a slab, or allocated from an general cache. 198 - * Slabs are chained into three list: fully used, partial, fully free slabs. 199 - */ 200 - struct slab { 201 - struct list_head list; 202 - unsigned long colouroff; 203 - void *s_mem; /* including colour offset */ 204 - unsigned int inuse; /* num of objs active in slab */ 205 - kmem_bufctl_t free; 206 - unsigned short nodeid; 207 - }; 208 - 209 - /* 210 194 * struct slab_rcu 211 195 * 212 196 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to ··· 203 219 * 204 220 * rcu_read_lock before reading the address, then rcu_read_unlock after 205 221 * taking the spinlock within the structure expected at that address. 206 - * 207 - * We assume struct slab_rcu can overlay struct slab when destroying. 208 222 */ 209 223 struct slab_rcu { 210 224 struct rcu_head head; 211 225 struct kmem_cache *cachep; 212 226 void *addr; 227 + }; 228 + 229 + /* 230 + * struct slab 231 + * 232 + * Manages the objs in a slab. Placed either at the beginning of mem allocated 233 + * for a slab, or allocated from an general cache. 234 + * Slabs are chained into three list: fully used, partial, fully free slabs. 235 + */ 236 + struct slab { 237 + union { 238 + struct { 239 + struct list_head list; 240 + unsigned long colouroff; 241 + void *s_mem; /* including colour offset */ 242 + unsigned int inuse; /* num of objs active in slab */ 243 + kmem_bufctl_t free; 244 + unsigned short nodeid; 245 + }; 246 + struct slab_rcu __slab_cover_slab_rcu; 247 + }; 213 248 }; 214 249 215 250 /* ··· 2150 2147 * 2151 2148 * @name must be valid until the cache is destroyed. This implies that 2152 2149 * the module calling this has to destroy the cache before getting unloaded. 2153 - * Note that kmem_cache_name() is not guaranteed to return the same pointer, 2154 - * therefore applications must manage it themselves. 2155 2150 * 2156 2151 * The flags are 2157 2152 * ··· 2289 2288 if (ralign < align) { 2290 2289 ralign = align; 2291 2290 } 2292 - /* disable debug if not aligning with REDZONE_ALIGN */ 2293 - if (ralign & (__alignof__(unsigned long long) - 1)) 2291 + /* disable debug if necessary */ 2292 + if (ralign > __alignof__(unsigned long long)) 2294 2293 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 2295 2294 /* 2296 2295 * 4) Store it. ··· 2316 2315 */ 2317 2316 if (flags & SLAB_RED_ZONE) { 2318 2317 /* add space for red zone words */ 2319 - cachep->obj_offset += align; 2320 - size += align + sizeof(unsigned long long); 2318 + cachep->obj_offset += sizeof(unsigned long long); 2319 + size += 2 * sizeof(unsigned long long); 2321 2320 } 2322 2321 if (flags & SLAB_STORE_USER) { 2323 2322 /* user store requires one word storage behind the end of ··· 3840 3839 return obj_size(cachep); 3841 3840 } 3842 3841 EXPORT_SYMBOL(kmem_cache_size); 3843 - 3844 - const char *kmem_cache_name(struct kmem_cache *cachep) 3845 - { 3846 - return cachep->name; 3847 - } 3848 - EXPORT_SYMBOL_GPL(kmem_cache_name); 3849 3842 3850 3843 /* 3851 3844 * This initializes kmem_list3 or resizes various caches for all nodes.
-6
mm/slob.c
··· 666 666 } 667 667 EXPORT_SYMBOL(kmem_cache_size); 668 668 669 - const char *kmem_cache_name(struct kmem_cache *c) 670 - { 671 - return c->name; 672 - } 673 - EXPORT_SYMBOL(kmem_cache_name); 674 - 675 669 int kmem_cache_shrink(struct kmem_cache *d) 676 670 { 677 671 return 0;
+302 -64
mm/slub.c
··· 281 281 return (p - addr) / s->size; 282 282 } 283 283 284 + static inline size_t slab_ksize(const struct kmem_cache *s) 285 + { 286 + #ifdef CONFIG_SLUB_DEBUG 287 + /* 288 + * Debugging requires use of the padding between object 289 + * and whatever may come after it. 290 + */ 291 + if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 292 + return s->objsize; 293 + 294 + #endif 295 + /* 296 + * If we have the need to store the freelist pointer 297 + * back there or track user information then we can 298 + * only use the space before that information. 299 + */ 300 + if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 301 + return s->inuse; 302 + /* 303 + * Else we can use all the padding etc for the allocation 304 + */ 305 + return s->size; 306 + } 307 + 308 + static inline int order_objects(int order, unsigned long size, int reserved) 309 + { 310 + return ((PAGE_SIZE << order) - reserved) / size; 311 + } 312 + 284 313 static inline struct kmem_cache_order_objects oo_make(int order, 285 - unsigned long size) 314 + unsigned long size, int reserved) 286 315 { 287 316 struct kmem_cache_order_objects x = { 288 - (order << OO_SHIFT) + (PAGE_SIZE << order) / size 317 + (order << OO_SHIFT) + order_objects(order, size, reserved) 289 318 }; 290 319 291 320 return x; ··· 646 617 return 1; 647 618 648 619 start = page_address(page); 649 - length = (PAGE_SIZE << compound_order(page)); 620 + length = (PAGE_SIZE << compound_order(page)) - s->reserved; 650 621 end = start + length; 651 622 remainder = length % s->size; 652 623 if (!remainder) ··· 727 698 return 0; 728 699 } 729 700 730 - maxobj = (PAGE_SIZE << compound_order(page)) / s->size; 701 + maxobj = order_objects(compound_order(page), s->size, s->reserved); 731 702 if (page->objects > maxobj) { 732 703 slab_err(s, page, "objects %u > max %u", 733 704 s->name, page->objects, maxobj); ··· 777 748 nr++; 778 749 } 779 750 780 - max_objects = (PAGE_SIZE << compound_order(page)) / s->size; 751 + max_objects = order_objects(compound_order(page), s->size, s->reserved); 781 752 if (max_objects > MAX_OBJS_PER_PAGE) 782 753 max_objects = MAX_OBJS_PER_PAGE; 783 754 ··· 829 800 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 830 801 { 831 802 flags &= gfp_allowed_mask; 832 - kmemcheck_slab_alloc(s, flags, object, s->objsize); 803 + kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 833 804 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 834 805 } 835 806 836 807 static inline void slab_free_hook(struct kmem_cache *s, void *x) 837 808 { 838 809 kmemleak_free_recursive(x, s->flags); 839 - } 840 810 841 - static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) 842 - { 843 - kmemcheck_slab_free(s, object, s->objsize); 844 - debug_check_no_locks_freed(object, s->objsize); 845 - if (!(s->flags & SLAB_DEBUG_OBJECTS)) 846 - debug_check_no_obj_freed(object, s->objsize); 811 + /* 812 + * Trouble is that we may no longer disable interupts in the fast path 813 + * So in order to make the debug calls that expect irqs to be 814 + * disabled we need to disable interrupts temporarily. 815 + */ 816 + #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) 817 + { 818 + unsigned long flags; 819 + 820 + local_irq_save(flags); 821 + kmemcheck_slab_free(s, x, s->objsize); 822 + debug_check_no_locks_freed(x, s->objsize); 823 + if (!(s->flags & SLAB_DEBUG_OBJECTS)) 824 + debug_check_no_obj_freed(x, s->objsize); 825 + local_irq_restore(flags); 826 + } 827 + #endif 847 828 } 848 829 849 830 /* ··· 1140 1101 1141 1102 static inline void slab_free_hook(struct kmem_cache *s, void *x) {} 1142 1103 1143 - static inline void slab_free_hook_irq(struct kmem_cache *s, 1144 - void *object) {} 1145 - 1146 1104 #endif /* CONFIG_SLUB_DEBUG */ 1147 1105 1148 1106 /* ··· 1285 1249 __free_pages(page, order); 1286 1250 } 1287 1251 1252 + #define need_reserve_slab_rcu \ 1253 + (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) 1254 + 1288 1255 static void rcu_free_slab(struct rcu_head *h) 1289 1256 { 1290 1257 struct page *page; 1291 1258 1292 - page = container_of((struct list_head *)h, struct page, lru); 1259 + if (need_reserve_slab_rcu) 1260 + page = virt_to_head_page(h); 1261 + else 1262 + page = container_of((struct list_head *)h, struct page, lru); 1263 + 1293 1264 __free_slab(page->slab, page); 1294 1265 } 1295 1266 1296 1267 static void free_slab(struct kmem_cache *s, struct page *page) 1297 1268 { 1298 1269 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1299 - /* 1300 - * RCU free overloads the RCU head over the LRU 1301 - */ 1302 - struct rcu_head *head = (void *)&page->lru; 1270 + struct rcu_head *head; 1271 + 1272 + if (need_reserve_slab_rcu) { 1273 + int order = compound_order(page); 1274 + int offset = (PAGE_SIZE << order) - s->reserved; 1275 + 1276 + VM_BUG_ON(s->reserved != sizeof(*head)); 1277 + head = page_address(page) + offset; 1278 + } else { 1279 + /* 1280 + * RCU free overloads the RCU head over the LRU 1281 + */ 1282 + head = (void *)&page->lru; 1283 + } 1303 1284 1304 1285 call_rcu(head, rcu_free_slab); 1305 1286 } else ··· 1540 1487 } 1541 1488 } 1542 1489 1490 + #ifdef CONFIG_CMPXCHG_LOCAL 1491 + #ifdef CONFIG_PREEMPT 1492 + /* 1493 + * Calculate the next globally unique transaction for disambiguiation 1494 + * during cmpxchg. The transactions start with the cpu number and are then 1495 + * incremented by CONFIG_NR_CPUS. 1496 + */ 1497 + #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) 1498 + #else 1499 + /* 1500 + * No preemption supported therefore also no need to check for 1501 + * different cpus. 1502 + */ 1503 + #define TID_STEP 1 1504 + #endif 1505 + 1506 + static inline unsigned long next_tid(unsigned long tid) 1507 + { 1508 + return tid + TID_STEP; 1509 + } 1510 + 1511 + static inline unsigned int tid_to_cpu(unsigned long tid) 1512 + { 1513 + return tid % TID_STEP; 1514 + } 1515 + 1516 + static inline unsigned long tid_to_event(unsigned long tid) 1517 + { 1518 + return tid / TID_STEP; 1519 + } 1520 + 1521 + static inline unsigned int init_tid(int cpu) 1522 + { 1523 + return cpu; 1524 + } 1525 + 1526 + static inline void note_cmpxchg_failure(const char *n, 1527 + const struct kmem_cache *s, unsigned long tid) 1528 + { 1529 + #ifdef SLUB_DEBUG_CMPXCHG 1530 + unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); 1531 + 1532 + printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); 1533 + 1534 + #ifdef CONFIG_PREEMPT 1535 + if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) 1536 + printk("due to cpu change %d -> %d\n", 1537 + tid_to_cpu(tid), tid_to_cpu(actual_tid)); 1538 + else 1539 + #endif 1540 + if (tid_to_event(tid) != tid_to_event(actual_tid)) 1541 + printk("due to cpu running other code. Event %ld->%ld\n", 1542 + tid_to_event(tid), tid_to_event(actual_tid)); 1543 + else 1544 + printk("for unknown reason: actual=%lx was=%lx target=%lx\n", 1545 + actual_tid, tid, next_tid(tid)); 1546 + #endif 1547 + } 1548 + 1549 + #endif 1550 + 1551 + void init_kmem_cache_cpus(struct kmem_cache *s) 1552 + { 1553 + #if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT) 1554 + int cpu; 1555 + 1556 + for_each_possible_cpu(cpu) 1557 + per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); 1558 + #endif 1559 + 1560 + } 1543 1561 /* 1544 1562 * Remove the cpu slab 1545 1563 */ ··· 1642 1518 page->inuse--; 1643 1519 } 1644 1520 c->page = NULL; 1521 + #ifdef CONFIG_CMPXCHG_LOCAL 1522 + c->tid = next_tid(c->tid); 1523 + #endif 1645 1524 unfreeze_slab(s, page, tail); 1646 1525 } 1647 1526 ··· 1779 1652 { 1780 1653 void **object; 1781 1654 struct page *new; 1655 + #ifdef CONFIG_CMPXCHG_LOCAL 1656 + unsigned long flags; 1657 + 1658 + local_irq_save(flags); 1659 + #ifdef CONFIG_PREEMPT 1660 + /* 1661 + * We may have been preempted and rescheduled on a different 1662 + * cpu before disabling interrupts. Need to reload cpu area 1663 + * pointer. 1664 + */ 1665 + c = this_cpu_ptr(s->cpu_slab); 1666 + #endif 1667 + #endif 1782 1668 1783 1669 /* We handle __GFP_ZERO in the caller */ 1784 1670 gfpflags &= ~__GFP_ZERO; ··· 1818 1678 c->node = page_to_nid(c->page); 1819 1679 unlock_out: 1820 1680 slab_unlock(c->page); 1681 + #ifdef CONFIG_CMPXCHG_LOCAL 1682 + c->tid = next_tid(c->tid); 1683 + local_irq_restore(flags); 1684 + #endif 1821 1685 stat(s, ALLOC_SLOWPATH); 1822 1686 return object; 1823 1687 ··· 1883 1739 { 1884 1740 void **object; 1885 1741 struct kmem_cache_cpu *c; 1742 + #ifdef CONFIG_CMPXCHG_LOCAL 1743 + unsigned long tid; 1744 + #else 1886 1745 unsigned long flags; 1746 + #endif 1887 1747 1888 1748 if (slab_pre_alloc_hook(s, gfpflags)) 1889 1749 return NULL; 1890 1750 1751 + #ifndef CONFIG_CMPXCHG_LOCAL 1891 1752 local_irq_save(flags); 1753 + #else 1754 + redo: 1755 + #endif 1756 + 1757 + /* 1758 + * Must read kmem_cache cpu data via this cpu ptr. Preemption is 1759 + * enabled. We may switch back and forth between cpus while 1760 + * reading from one cpu area. That does not matter as long 1761 + * as we end up on the original cpu again when doing the cmpxchg. 1762 + */ 1892 1763 c = __this_cpu_ptr(s->cpu_slab); 1764 + 1765 + #ifdef CONFIG_CMPXCHG_LOCAL 1766 + /* 1767 + * The transaction ids are globally unique per cpu and per operation on 1768 + * a per cpu queue. Thus they can be guarantee that the cmpxchg_double 1769 + * occurs on the right processor and that there was no operation on the 1770 + * linked list in between. 1771 + */ 1772 + tid = c->tid; 1773 + barrier(); 1774 + #endif 1775 + 1893 1776 object = c->freelist; 1894 1777 if (unlikely(!object || !node_match(c, node))) 1895 1778 1896 1779 object = __slab_alloc(s, gfpflags, node, addr, c); 1897 1780 1898 1781 else { 1782 + #ifdef CONFIG_CMPXCHG_LOCAL 1783 + /* 1784 + * The cmpxchg will only match if there was no additonal 1785 + * operation and if we are on the right processor. 1786 + * 1787 + * The cmpxchg does the following atomically (without lock semantics!) 1788 + * 1. Relocate first pointer to the current per cpu area. 1789 + * 2. Verify that tid and freelist have not been changed 1790 + * 3. If they were not changed replace tid and freelist 1791 + * 1792 + * Since this is without lock semantics the protection is only against 1793 + * code executing on this cpu *not* from access by other cpus. 1794 + */ 1795 + if (unlikely(!this_cpu_cmpxchg_double( 1796 + s->cpu_slab->freelist, s->cpu_slab->tid, 1797 + object, tid, 1798 + get_freepointer(s, object), next_tid(tid)))) { 1799 + 1800 + note_cmpxchg_failure("slab_alloc", s, tid); 1801 + goto redo; 1802 + } 1803 + #else 1899 1804 c->freelist = get_freepointer(s, object); 1805 + #endif 1900 1806 stat(s, ALLOC_FASTPATH); 1901 1807 } 1808 + 1809 + #ifndef CONFIG_CMPXCHG_LOCAL 1902 1810 local_irq_restore(flags); 1811 + #endif 1903 1812 1904 1813 if (unlikely(gfpflags & __GFP_ZERO) && object) 1905 1814 memset(object, 0, s->objsize); ··· 2030 1833 { 2031 1834 void *prior; 2032 1835 void **object = (void *)x; 1836 + #ifdef CONFIG_CMPXCHG_LOCAL 1837 + unsigned long flags; 2033 1838 2034 - stat(s, FREE_SLOWPATH); 1839 + local_irq_save(flags); 1840 + #endif 2035 1841 slab_lock(page); 1842 + stat(s, FREE_SLOWPATH); 2036 1843 2037 1844 if (kmem_cache_debug(s)) 2038 1845 goto debug; ··· 2066 1865 2067 1866 out_unlock: 2068 1867 slab_unlock(page); 1868 + #ifdef CONFIG_CMPXCHG_LOCAL 1869 + local_irq_restore(flags); 1870 + #endif 2069 1871 return; 2070 1872 2071 1873 slab_empty: ··· 2080 1876 stat(s, FREE_REMOVE_PARTIAL); 2081 1877 } 2082 1878 slab_unlock(page); 1879 + #ifdef CONFIG_CMPXCHG_LOCAL 1880 + local_irq_restore(flags); 1881 + #endif 2083 1882 stat(s, FREE_SLAB); 2084 1883 discard_slab(s, page); 2085 1884 return; ··· 2109 1902 { 2110 1903 void **object = (void *)x; 2111 1904 struct kmem_cache_cpu *c; 1905 + #ifdef CONFIG_CMPXCHG_LOCAL 1906 + unsigned long tid; 1907 + #else 2112 1908 unsigned long flags; 1909 + #endif 2113 1910 2114 1911 slab_free_hook(s, x); 2115 1912 1913 + #ifndef CONFIG_CMPXCHG_LOCAL 2116 1914 local_irq_save(flags); 1915 + 1916 + #else 1917 + redo: 1918 + #endif 1919 + 1920 + /* 1921 + * Determine the currently cpus per cpu slab. 1922 + * The cpu may change afterward. However that does not matter since 1923 + * data is retrieved via this pointer. If we are on the same cpu 1924 + * during the cmpxchg then the free will succedd. 1925 + */ 2117 1926 c = __this_cpu_ptr(s->cpu_slab); 2118 1927 2119 - slab_free_hook_irq(s, x); 1928 + #ifdef CONFIG_CMPXCHG_LOCAL 1929 + tid = c->tid; 1930 + barrier(); 1931 + #endif 2120 1932 2121 1933 if (likely(page == c->page && c->node != NUMA_NO_NODE)) { 2122 1934 set_freepointer(s, object, c->freelist); 1935 + 1936 + #ifdef CONFIG_CMPXCHG_LOCAL 1937 + if (unlikely(!this_cpu_cmpxchg_double( 1938 + s->cpu_slab->freelist, s->cpu_slab->tid, 1939 + c->freelist, tid, 1940 + object, next_tid(tid)))) { 1941 + 1942 + note_cmpxchg_failure("slab_free", s, tid); 1943 + goto redo; 1944 + } 1945 + #else 2123 1946 c->freelist = object; 1947 + #endif 2124 1948 stat(s, FREE_FASTPATH); 2125 1949 } else 2126 1950 __slab_free(s, page, x, addr); 2127 1951 1952 + #ifndef CONFIG_CMPXCHG_LOCAL 2128 1953 local_irq_restore(flags); 1954 + #endif 2129 1955 } 2130 1956 2131 1957 void kmem_cache_free(struct kmem_cache *s, void *x) ··· 2228 1988 * the smallest order which will fit the object. 2229 1989 */ 2230 1990 static inline int slab_order(int size, int min_objects, 2231 - int max_order, int fract_leftover) 1991 + int max_order, int fract_leftover, int reserved) 2232 1992 { 2233 1993 int order; 2234 1994 int rem; 2235 1995 int min_order = slub_min_order; 2236 1996 2237 - if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) 1997 + if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) 2238 1998 return get_order(size * MAX_OBJS_PER_PAGE) - 1; 2239 1999 2240 2000 for (order = max(min_order, ··· 2243 2003 2244 2004 unsigned long slab_size = PAGE_SIZE << order; 2245 2005 2246 - if (slab_size < min_objects * size) 2006 + if (slab_size < min_objects * size + reserved) 2247 2007 continue; 2248 2008 2249 - rem = slab_size % size; 2009 + rem = (slab_size - reserved) % size; 2250 2010 2251 2011 if (rem <= slab_size / fract_leftover) 2252 2012 break; ··· 2256 2016 return order; 2257 2017 } 2258 2018 2259 - static inline int calculate_order(int size) 2019 + static inline int calculate_order(int size, int reserved) 2260 2020 { 2261 2021 int order; 2262 2022 int min_objects; ··· 2274 2034 min_objects = slub_min_objects; 2275 2035 if (!min_objects) 2276 2036 min_objects = 4 * (fls(nr_cpu_ids) + 1); 2277 - max_objects = (PAGE_SIZE << slub_max_order)/size; 2037 + max_objects = order_objects(slub_max_order, size, reserved); 2278 2038 min_objects = min(min_objects, max_objects); 2279 2039 2280 2040 while (min_objects > 1) { 2281 2041 fraction = 16; 2282 2042 while (fraction >= 4) { 2283 2043 order = slab_order(size, min_objects, 2284 - slub_max_order, fraction); 2044 + slub_max_order, fraction, reserved); 2285 2045 if (order <= slub_max_order) 2286 2046 return order; 2287 2047 fraction /= 2; ··· 2293 2053 * We were unable to place multiple objects in a slab. Now 2294 2054 * lets see if we can place a single object there. 2295 2055 */ 2296 - order = slab_order(size, 1, slub_max_order, 1); 2056 + order = slab_order(size, 1, slub_max_order, 1, reserved); 2297 2057 if (order <= slub_max_order) 2298 2058 return order; 2299 2059 2300 2060 /* 2301 2061 * Doh this slab cannot be placed using slub_max_order. 2302 2062 */ 2303 - order = slab_order(size, 1, MAX_ORDER, 1); 2063 + order = slab_order(size, 1, MAX_ORDER, 1, reserved); 2304 2064 if (order < MAX_ORDER) 2305 2065 return order; 2306 2066 return -ENOSYS; ··· 2350 2110 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 2351 2111 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); 2352 2112 2113 + #ifdef CONFIG_CMPXCHG_LOCAL 2114 + /* 2115 + * Must align to double word boundary for the double cmpxchg instructions 2116 + * to work. 2117 + */ 2118 + s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *)); 2119 + #else 2120 + /* Regular alignment is sufficient */ 2353 2121 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); 2122 + #endif 2354 2123 2355 - return s->cpu_slab != NULL; 2124 + if (!s->cpu_slab) 2125 + return 0; 2126 + 2127 + init_kmem_cache_cpus(s); 2128 + 2129 + return 1; 2356 2130 } 2357 2131 2358 2132 static struct kmem_cache *kmem_cache_node; ··· 2565 2311 if (forced_order >= 0) 2566 2312 order = forced_order; 2567 2313 else 2568 - order = calculate_order(size); 2314 + order = calculate_order(size, s->reserved); 2569 2315 2570 2316 if (order < 0) 2571 2317 return 0; ··· 2583 2329 /* 2584 2330 * Determine the number of objects per slab 2585 2331 */ 2586 - s->oo = oo_make(order, size); 2587 - s->min = oo_make(get_order(size), size); 2332 + s->oo = oo_make(order, size, s->reserved); 2333 + s->min = oo_make(get_order(size), size, s->reserved); 2588 2334 if (oo_objects(s->oo) > oo_objects(s->max)) 2589 2335 s->max = s->oo; 2590 2336 ··· 2603 2349 s->objsize = size; 2604 2350 s->align = align; 2605 2351 s->flags = kmem_cache_flags(size, flags, name, ctor); 2352 + s->reserved = 0; 2353 + 2354 + if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) 2355 + s->reserved = sizeof(struct rcu_head); 2606 2356 2607 2357 if (!calculate_sizes(s, -1)) 2608 2358 goto error; ··· 2656 2398 return s->objsize; 2657 2399 } 2658 2400 EXPORT_SYMBOL(kmem_cache_size); 2659 - 2660 - const char *kmem_cache_name(struct kmem_cache *s) 2661 - { 2662 - return s->name; 2663 - } 2664 - EXPORT_SYMBOL(kmem_cache_name); 2665 2401 2666 2402 static void list_slab_objects(struct kmem_cache *s, struct page *page, 2667 2403 const char *text) ··· 2948 2696 size_t ksize(const void *object) 2949 2697 { 2950 2698 struct page *page; 2951 - struct kmem_cache *s; 2952 2699 2953 2700 if (unlikely(object == ZERO_SIZE_PTR)) 2954 2701 return 0; ··· 2958 2707 WARN_ON(!PageCompound(page)); 2959 2708 return PAGE_SIZE << compound_order(page); 2960 2709 } 2961 - s = page->slab; 2962 2710 2963 - #ifdef CONFIG_SLUB_DEBUG 2964 - /* 2965 - * Debugging requires use of the padding between object 2966 - * and whatever may come after it. 2967 - */ 2968 - if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 2969 - return s->objsize; 2970 - 2971 - #endif 2972 - /* 2973 - * If we have the need to store the freelist pointer 2974 - * back there or track user information then we can 2975 - * only use the space before that information. 2976 - */ 2977 - if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 2978 - return s->inuse; 2979 - /* 2980 - * Else we can use all the padding etc for the allocation 2981 - */ 2982 - return s->size; 2711 + return slab_ksize(page->slab); 2983 2712 } 2984 2713 EXPORT_SYMBOL(ksize); 2985 2714 ··· 4248 4017 } 4249 4018 SLAB_ATTR_RO(destroy_by_rcu); 4250 4019 4020 + static ssize_t reserved_show(struct kmem_cache *s, char *buf) 4021 + { 4022 + return sprintf(buf, "%d\n", s->reserved); 4023 + } 4024 + SLAB_ATTR_RO(reserved); 4025 + 4251 4026 #ifdef CONFIG_SLUB_DEBUG 4252 4027 static ssize_t slabs_show(struct kmem_cache *s, char *buf) 4253 4028 { ··· 4540 4303 &reclaim_account_attr.attr, 4541 4304 &destroy_by_rcu_attr.attr, 4542 4305 &shrink_attr.attr, 4306 + &reserved_attr.attr, 4543 4307 #ifdef CONFIG_SLUB_DEBUG 4544 4308 &total_objects_attr.attr, 4545 4309 &slabs_attr.attr,