SLUB: Use unique end pointer for each slab page.

We use a NULL pointer on freelists to signal that there are no more objects.
However the NULL pointers of all slabs match in contrast to the pointers to
the real objects which are in different ranges for different slab pages.

Change the end pointer to be a pointer to the first object and set bit 0.
Every slab will then have a different end pointer. This is necessary to ensure
that end markers can be matched to the source slab during cmpxchg_local.

Bring back the use of the mapping field by SLUB since we would otherwise have
to call a relatively expensive function page_address() in __slab_alloc(). Use
of the mapping field allows avoiding a call to page_address() in various other
functions as well.

There is no need to change the page_mapping() function since bit 0 is set on
the mapping as also for anonymous pages. page_mapping(slab_page) will
therefore still return NULL although the mapping field is overloaded.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by Christoph Lameter and committed by Christoph Lameter 683d0baa 5bb983b0

+51 -24
+4 -1
include/linux/mm_types.h
··· 64 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 65 spinlock_t ptl; 66 #endif 67 - struct kmem_cache *slab; /* SLUB: Pointer to slab */ 68 struct page *first_page; /* Compound tail pages */ 69 }; 70 union {
··· 64 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 65 spinlock_t ptl; 66 #endif 67 + struct { 68 + struct kmem_cache *slab; /* SLUB: Pointer to slab */ 69 + void *end; /* SLUB: end marker */ 70 + }; 71 struct page *first_page; /* Compound tail pages */ 72 }; 73 union {
+47 -23
mm/slub.c
··· 280 #endif 281 } 282 283 static inline int check_valid_pointer(struct kmem_cache *s, 284 struct page *page, const void *object) 285 { 286 void *base; 287 288 - if (!object) 289 return 1; 290 291 - base = page_address(page); 292 if (object < base || object >= base + s->objects * s->size || 293 (object - base) % s->size) { 294 return 0; ··· 338 339 /* Scan freelist */ 340 #define for_each_free_object(__p, __s, __free) \ 341 - for (__p = (__free); __p; __p = get_freepointer((__s), __p)) 342 343 /* Determine object index from a given position */ 344 static inline int slab_index(void *p, struct kmem_cache *s, void *addr) ··· 491 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 492 { 493 unsigned int off; /* Offset of last byte */ 494 - u8 *addr = page_address(page); 495 496 print_tracking(s, p); 497 ··· 669 if (!(s->flags & SLAB_POISON)) 670 return 1; 671 672 - start = page_address(page); 673 end = start + (PAGE_SIZE << s->order); 674 length = s->objects * s->size; 675 remainder = end - (start + length); ··· 736 * of the free objects in this slab. May cause 737 * another error because the object count is now wrong. 738 */ 739 - set_freepointer(s, p, NULL); 740 return 0; 741 } 742 return 1; ··· 770 void *fp = page->freelist; 771 void *object = NULL; 772 773 - while (fp && nr <= s->objects) { 774 if (fp == search) 775 return 1; 776 if (!check_valid_pointer(s, page, fp)) { 777 if (object) { 778 object_err(s, page, object, 779 "Freechain corrupt"); 780 - set_freepointer(s, object, NULL); 781 break; 782 } else { 783 slab_err(s, page, "Freepointer corrupt"); 784 - page->freelist = NULL; 785 page->inuse = s->objects; 786 slab_fix(s, "Freelist cleared"); 787 return 0; ··· 887 */ 888 slab_fix(s, "Marking all objects used"); 889 page->inuse = s->objects; 890 - page->freelist = NULL; 891 } 892 return 0; 893 } ··· 928 } 929 930 /* Special debug activities for freeing objects */ 931 - if (!SlabFrozen(page) && !page->freelist) 932 remove_full(s, page); 933 if (s->flags & SLAB_STORE_USER) 934 set_track(s, object, TRACK_FREE, addr); ··· 1120 SetSlabDebug(page); 1121 1122 start = page_address(page); 1123 1124 if (unlikely(s->flags & SLAB_POISON)) 1125 memset(start, POISON_INUSE, PAGE_SIZE << s->order); ··· 1132 last = p; 1133 } 1134 setup_object(s, page, last); 1135 - set_freepointer(s, last, NULL); 1136 1137 page->freelist = start; 1138 page->inuse = 0; ··· 1148 void *p; 1149 1150 slab_pad_check(s, page); 1151 - for_each_object(p, s, page_address(page)) 1152 check_object(s, page, p, 0); 1153 ClearSlabDebug(page); 1154 } ··· 1158 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1159 -pages); 1160 1161 __free_pages(page, s->order); 1162 } 1163 ··· 1361 ClearSlabFrozen(page); 1362 if (page->inuse) { 1363 1364 - if (page->freelist) 1365 add_partial(n, page, tail); 1366 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1367 add_full(n, page); ··· 1397 * Merge cpu freelist into freelist. Typically we get here 1398 * because both freelists are empty. So this is unlikely 1399 * to occur. 1400 */ 1401 - while (unlikely(c->freelist)) { 1402 void **object; 1403 1404 tail = 0; /* Hot objects. Put the slab first */ ··· 1502 goto another_slab; 1503 load_freelist: 1504 object = c->page->freelist; 1505 - if (unlikely(!object)) 1506 goto another_slab; 1507 if (unlikely(SlabDebug(c->page))) 1508 goto debug; ··· 1510 object = c->page->freelist; 1511 c->freelist = object[c->offset]; 1512 c->page->inuse = s->objects; 1513 - c->page->freelist = NULL; 1514 c->node = page_to_nid(c->page); 1515 slab_unlock(c->page); 1516 return object; ··· 1574 1575 local_irq_save(flags); 1576 c = get_cpu_slab(s, smp_processor_id()); 1577 - if (unlikely(!c->freelist || !node_match(c, node))) 1578 1579 object = __slab_alloc(s, gfpflags, node, addr, c); 1580 ··· 1638 * was not on the partial list before 1639 * then add it. 1640 */ 1641 - if (unlikely(!prior)) 1642 add_partial(get_node(s, page_to_nid(page)), page, 1); 1643 1644 out_unlock: ··· 1646 return; 1647 1648 slab_empty: 1649 - if (prior) 1650 /* 1651 * Slab still on the partial list. 1652 */ ··· 1866 struct kmem_cache_cpu *c) 1867 { 1868 c->page = NULL; 1869 - c->freelist = NULL; 1870 c->node = 0; 1871 c->offset = s->offset / sizeof(void *); 1872 c->objsize = s->objsize; ··· 3129 unsigned long *map) 3130 { 3131 void *p; 3132 - void *addr = page_address(page); 3133 3134 if (!check_slab(s, page) || 3135 !on_freelist(s, page, NULL)) ··· 3409 static void process_slab(struct loc_track *t, struct kmem_cache *s, 3410 struct page *page, enum track_item alloc) 3411 { 3412 - void *addr = page_address(page); 3413 DECLARE_BITMAP(map, s->objects); 3414 void *p; 3415
··· 280 #endif 281 } 282 283 + /* 284 + * The end pointer in a slab is special. It points to the first object in the 285 + * slab but has bit 0 set to mark it. 286 + * 287 + * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 288 + * in the mapping set. 289 + */ 290 + static inline int is_end(void *addr) 291 + { 292 + return (unsigned long)addr & PAGE_MAPPING_ANON; 293 + } 294 + 295 + void *slab_address(struct page *page) 296 + { 297 + return page->end - PAGE_MAPPING_ANON; 298 + } 299 + 300 static inline int check_valid_pointer(struct kmem_cache *s, 301 struct page *page, const void *object) 302 { 303 void *base; 304 305 + if (object == page->end) 306 return 1; 307 308 + base = slab_address(page); 309 if (object < base || object >= base + s->objects * s->size || 310 (object - base) % s->size) { 311 return 0; ··· 321 322 /* Scan freelist */ 323 #define for_each_free_object(__p, __s, __free) \ 324 + for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ 325 + __p)) 326 327 /* Determine object index from a given position */ 328 static inline int slab_index(void *p, struct kmem_cache *s, void *addr) ··· 473 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 474 { 475 unsigned int off; /* Offset of last byte */ 476 + u8 *addr = slab_address(page); 477 478 print_tracking(s, p); 479 ··· 651 if (!(s->flags & SLAB_POISON)) 652 return 1; 653 654 + start = slab_address(page); 655 end = start + (PAGE_SIZE << s->order); 656 length = s->objects * s->size; 657 remainder = end - (start + length); ··· 718 * of the free objects in this slab. May cause 719 * another error because the object count is now wrong. 720 */ 721 + set_freepointer(s, p, page->end); 722 return 0; 723 } 724 return 1; ··· 752 void *fp = page->freelist; 753 void *object = NULL; 754 755 + while (fp != page->end && nr <= s->objects) { 756 if (fp == search) 757 return 1; 758 if (!check_valid_pointer(s, page, fp)) { 759 if (object) { 760 object_err(s, page, object, 761 "Freechain corrupt"); 762 + set_freepointer(s, object, page->end); 763 break; 764 } else { 765 slab_err(s, page, "Freepointer corrupt"); 766 + page->freelist = page->end; 767 page->inuse = s->objects; 768 slab_fix(s, "Freelist cleared"); 769 return 0; ··· 869 */ 870 slab_fix(s, "Marking all objects used"); 871 page->inuse = s->objects; 872 + page->freelist = page->end; 873 } 874 return 0; 875 } ··· 910 } 911 912 /* Special debug activities for freeing objects */ 913 + if (!SlabFrozen(page) && page->freelist == page->end) 914 remove_full(s, page); 915 if (s->flags & SLAB_STORE_USER) 916 set_track(s, object, TRACK_FREE, addr); ··· 1102 SetSlabDebug(page); 1103 1104 start = page_address(page); 1105 + page->end = start + 1; 1106 1107 if (unlikely(s->flags & SLAB_POISON)) 1108 memset(start, POISON_INUSE, PAGE_SIZE << s->order); ··· 1113 last = p; 1114 } 1115 setup_object(s, page, last); 1116 + set_freepointer(s, last, page->end); 1117 1118 page->freelist = start; 1119 page->inuse = 0; ··· 1129 void *p; 1130 1131 slab_pad_check(s, page); 1132 + for_each_object(p, s, slab_address(page)) 1133 check_object(s, page, p, 0); 1134 ClearSlabDebug(page); 1135 } ··· 1139 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1140 -pages); 1141 1142 + page->mapping = NULL; 1143 __free_pages(page, s->order); 1144 } 1145 ··· 1341 ClearSlabFrozen(page); 1342 if (page->inuse) { 1343 1344 + if (page->freelist != page->end) 1345 add_partial(n, page, tail); 1346 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1347 add_full(n, page); ··· 1377 * Merge cpu freelist into freelist. Typically we get here 1378 * because both freelists are empty. So this is unlikely 1379 * to occur. 1380 + * 1381 + * We need to use _is_end here because deactivate slab may 1382 + * be called for a debug slab. Then c->freelist may contain 1383 + * a dummy pointer. 1384 */ 1385 + while (unlikely(!is_end(c->freelist))) { 1386 void **object; 1387 1388 tail = 0; /* Hot objects. Put the slab first */ ··· 1478 goto another_slab; 1479 load_freelist: 1480 object = c->page->freelist; 1481 + if (unlikely(object == c->page->end)) 1482 goto another_slab; 1483 if (unlikely(SlabDebug(c->page))) 1484 goto debug; ··· 1486 object = c->page->freelist; 1487 c->freelist = object[c->offset]; 1488 c->page->inuse = s->objects; 1489 + c->page->freelist = c->page->end; 1490 c->node = page_to_nid(c->page); 1491 slab_unlock(c->page); 1492 return object; ··· 1550 1551 local_irq_save(flags); 1552 c = get_cpu_slab(s, smp_processor_id()); 1553 + if (unlikely(is_end(c->freelist) || !node_match(c, node))) 1554 1555 object = __slab_alloc(s, gfpflags, node, addr, c); 1556 ··· 1614 * was not on the partial list before 1615 * then add it. 1616 */ 1617 + if (unlikely(prior == page->end)) 1618 add_partial(get_node(s, page_to_nid(page)), page, 1); 1619 1620 out_unlock: ··· 1622 return; 1623 1624 slab_empty: 1625 + if (prior != page->end) 1626 /* 1627 * Slab still on the partial list. 1628 */ ··· 1842 struct kmem_cache_cpu *c) 1843 { 1844 c->page = NULL; 1845 + c->freelist = (void *)PAGE_MAPPING_ANON; 1846 c->node = 0; 1847 c->offset = s->offset / sizeof(void *); 1848 c->objsize = s->objsize; ··· 3105 unsigned long *map) 3106 { 3107 void *p; 3108 + void *addr = slab_address(page); 3109 3110 if (!check_slab(s, page) || 3111 !on_freelist(s, page, NULL)) ··· 3385 static void process_slab(struct loc_track *t, struct kmem_cache *s, 3386 struct page *page, enum track_item alloc) 3387 { 3388 + void *addr = slab_address(page); 3389 DECLARE_BITMAP(map, s->objects); 3390 void *p; 3391