SLUB: Use unique end pointer for each slab page.

We use a NULL pointer on freelists to signal that there are no more objects.
However the NULL pointers of all slabs match in contrast to the pointers to
the real objects which are in different ranges for different slab pages.

Change the end pointer to be a pointer to the first object and set bit 0.
Every slab will then have a different end pointer. This is necessary to ensure
that end markers can be matched to the source slab during cmpxchg_local.

Bring back the use of the mapping field by SLUB since we would otherwise have
to call a relatively expensive function page_address() in __slab_alloc(). Use
of the mapping field allows avoiding a call to page_address() in various other
functions as well.

There is no need to change the page_mapping() function since bit 0 is set on
the mapping as also for anonymous pages. page_mapping(slab_page) will
therefore still return NULL although the mapping field is overloaded.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by Christoph Lameter and committed by Christoph Lameter 683d0baa 5bb983b0

+51 -24
+4 -1
include/linux/mm_types.h
··· 64 64 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 65 65 spinlock_t ptl; 66 66 #endif 67 - struct kmem_cache *slab; /* SLUB: Pointer to slab */ 67 + struct { 68 + struct kmem_cache *slab; /* SLUB: Pointer to slab */ 69 + void *end; /* SLUB: end marker */ 70 + }; 68 71 struct page *first_page; /* Compound tail pages */ 69 72 }; 70 73 union {
+47 -23
mm/slub.c
··· 280 280 #endif 281 281 } 282 282 283 + /* 284 + * The end pointer in a slab is special. It points to the first object in the 285 + * slab but has bit 0 set to mark it. 286 + * 287 + * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 288 + * in the mapping set. 289 + */ 290 + static inline int is_end(void *addr) 291 + { 292 + return (unsigned long)addr & PAGE_MAPPING_ANON; 293 + } 294 + 295 + void *slab_address(struct page *page) 296 + { 297 + return page->end - PAGE_MAPPING_ANON; 298 + } 299 + 283 300 static inline int check_valid_pointer(struct kmem_cache *s, 284 301 struct page *page, const void *object) 285 302 { 286 303 void *base; 287 304 288 - if (!object) 305 + if (object == page->end) 289 306 return 1; 290 307 291 - base = page_address(page); 308 + base = slab_address(page); 292 309 if (object < base || object >= base + s->objects * s->size || 293 310 (object - base) % s->size) { 294 311 return 0; ··· 338 321 339 322 /* Scan freelist */ 340 323 #define for_each_free_object(__p, __s, __free) \ 341 - for (__p = (__free); __p; __p = get_freepointer((__s), __p)) 324 + for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ 325 + __p)) 342 326 343 327 /* Determine object index from a given position */ 344 328 static inline int slab_index(void *p, struct kmem_cache *s, void *addr) ··· 491 473 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 492 474 { 493 475 unsigned int off; /* Offset of last byte */ 494 - u8 *addr = page_address(page); 476 + u8 *addr = slab_address(page); 495 477 496 478 print_tracking(s, p); 497 479 ··· 669 651 if (!(s->flags & SLAB_POISON)) 670 652 return 1; 671 653 672 - start = page_address(page); 654 + start = slab_address(page); 673 655 end = start + (PAGE_SIZE << s->order); 674 656 length = s->objects * s->size; 675 657 remainder = end - (start + length); ··· 736 718 * of the free objects in this slab. May cause 737 719 * another error because the object count is now wrong. 738 720 */ 739 - set_freepointer(s, p, NULL); 721 + set_freepointer(s, p, page->end); 740 722 return 0; 741 723 } 742 724 return 1; ··· 770 752 void *fp = page->freelist; 771 753 void *object = NULL; 772 754 773 - while (fp && nr <= s->objects) { 755 + while (fp != page->end && nr <= s->objects) { 774 756 if (fp == search) 775 757 return 1; 776 758 if (!check_valid_pointer(s, page, fp)) { 777 759 if (object) { 778 760 object_err(s, page, object, 779 761 "Freechain corrupt"); 780 - set_freepointer(s, object, NULL); 762 + set_freepointer(s, object, page->end); 781 763 break; 782 764 } else { 783 765 slab_err(s, page, "Freepointer corrupt"); 784 - page->freelist = NULL; 766 + page->freelist = page->end; 785 767 page->inuse = s->objects; 786 768 slab_fix(s, "Freelist cleared"); 787 769 return 0; ··· 887 869 */ 888 870 slab_fix(s, "Marking all objects used"); 889 871 page->inuse = s->objects; 890 - page->freelist = NULL; 872 + page->freelist = page->end; 891 873 } 892 874 return 0; 893 875 } ··· 928 910 } 929 911 930 912 /* Special debug activities for freeing objects */ 931 - if (!SlabFrozen(page) && !page->freelist) 913 + if (!SlabFrozen(page) && page->freelist == page->end) 932 914 remove_full(s, page); 933 915 if (s->flags & SLAB_STORE_USER) 934 916 set_track(s, object, TRACK_FREE, addr); ··· 1120 1102 SetSlabDebug(page); 1121 1103 1122 1104 start = page_address(page); 1105 + page->end = start + 1; 1123 1106 1124 1107 if (unlikely(s->flags & SLAB_POISON)) 1125 1108 memset(start, POISON_INUSE, PAGE_SIZE << s->order); ··· 1132 1113 last = p; 1133 1114 } 1134 1115 setup_object(s, page, last); 1135 - set_freepointer(s, last, NULL); 1116 + set_freepointer(s, last, page->end); 1136 1117 1137 1118 page->freelist = start; 1138 1119 page->inuse = 0; ··· 1148 1129 void *p; 1149 1130 1150 1131 slab_pad_check(s, page); 1151 - for_each_object(p, s, page_address(page)) 1132 + for_each_object(p, s, slab_address(page)) 1152 1133 check_object(s, page, p, 0); 1153 1134 ClearSlabDebug(page); 1154 1135 } ··· 1158 1139 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1159 1140 -pages); 1160 1141 1142 + page->mapping = NULL; 1161 1143 __free_pages(page, s->order); 1162 1144 } 1163 1145 ··· 1361 1341 ClearSlabFrozen(page); 1362 1342 if (page->inuse) { 1363 1343 1364 - if (page->freelist) 1344 + if (page->freelist != page->end) 1365 1345 add_partial(n, page, tail); 1366 1346 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1367 1347 add_full(n, page); ··· 1397 1377 * Merge cpu freelist into freelist. Typically we get here 1398 1378 * because both freelists are empty. So this is unlikely 1399 1379 * to occur. 1380 + * 1381 + * We need to use _is_end here because deactivate slab may 1382 + * be called for a debug slab. Then c->freelist may contain 1383 + * a dummy pointer. 1400 1384 */ 1401 - while (unlikely(c->freelist)) { 1385 + while (unlikely(!is_end(c->freelist))) { 1402 1386 void **object; 1403 1387 1404 1388 tail = 0; /* Hot objects. Put the slab first */ ··· 1502 1478 goto another_slab; 1503 1479 load_freelist: 1504 1480 object = c->page->freelist; 1505 - if (unlikely(!object)) 1481 + if (unlikely(object == c->page->end)) 1506 1482 goto another_slab; 1507 1483 if (unlikely(SlabDebug(c->page))) 1508 1484 goto debug; ··· 1510 1486 object = c->page->freelist; 1511 1487 c->freelist = object[c->offset]; 1512 1488 c->page->inuse = s->objects; 1513 - c->page->freelist = NULL; 1489 + c->page->freelist = c->page->end; 1514 1490 c->node = page_to_nid(c->page); 1515 1491 slab_unlock(c->page); 1516 1492 return object; ··· 1574 1550 1575 1551 local_irq_save(flags); 1576 1552 c = get_cpu_slab(s, smp_processor_id()); 1577 - if (unlikely(!c->freelist || !node_match(c, node))) 1553 + if (unlikely(is_end(c->freelist) || !node_match(c, node))) 1578 1554 1579 1555 object = __slab_alloc(s, gfpflags, node, addr, c); 1580 1556 ··· 1638 1614 * was not on the partial list before 1639 1615 * then add it. 1640 1616 */ 1641 - if (unlikely(!prior)) 1617 + if (unlikely(prior == page->end)) 1642 1618 add_partial(get_node(s, page_to_nid(page)), page, 1); 1643 1619 1644 1620 out_unlock: ··· 1646 1622 return; 1647 1623 1648 1624 slab_empty: 1649 - if (prior) 1625 + if (prior != page->end) 1650 1626 /* 1651 1627 * Slab still on the partial list. 1652 1628 */ ··· 1866 1842 struct kmem_cache_cpu *c) 1867 1843 { 1868 1844 c->page = NULL; 1869 - c->freelist = NULL; 1845 + c->freelist = (void *)PAGE_MAPPING_ANON; 1870 1846 c->node = 0; 1871 1847 c->offset = s->offset / sizeof(void *); 1872 1848 c->objsize = s->objsize; ··· 3129 3105 unsigned long *map) 3130 3106 { 3131 3107 void *p; 3132 - void *addr = page_address(page); 3108 + void *addr = slab_address(page); 3133 3109 3134 3110 if (!check_slab(s, page) || 3135 3111 !on_freelist(s, page, NULL)) ··· 3409 3385 static void process_slab(struct loc_track *t, struct kmem_cache *s, 3410 3386 struct page *page, enum track_item alloc) 3411 3387 { 3412 - void *addr = page_address(page); 3388 + void *addr = slab_address(page); 3413 3389 DECLARE_BITMAP(map, s->objects); 3414 3390 void *p; 3415 3391