Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

thp: remove PG_buddy

PG_buddy can be converted to _mapcount == -2. So the PG_compound_lock can
be added to page->flags without overflowing (because of the sparse section
bits increasing) with CONFIG_X86_PAE=y and CONFIG_X86_PAT=y. This also
has to move the memory hotplug code from _mapcount to lru.next to avoid
any risk of clashes. We can't use lru.next for PG_buddy removal, but
memory hotplug can use lru.next even more easily than the mapcount
instead.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andrea Arcangeli and committed by
Linus Torvalds
5f24ce5f 21ae5b01

+52 -29
+8 -6
fs/proc/page.c
··· 116 116 if (PageHuge(page)) 117 117 u |= 1 << KPF_HUGE; 118 118 119 + /* 120 + * Caveats on high order pages: page->_count will only be set 121 + * -1 on the head page; SLUB/SLQB do the same for PG_slab; 122 + * SLOB won't set PG_slab at all on compound pages. 123 + */ 124 + if (PageBuddy(page)) 125 + u |= 1 << KPF_BUDDY; 126 + 119 127 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); 120 128 121 - /* 122 - * Caveats on high order pages: 123 - * PG_buddy will only be set on the head page; SLUB/SLQB do the same 124 - * for PG_slab; SLOB won't set PG_slab at all on compound pages. 125 - */ 126 129 u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); 127 - u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy); 128 130 129 131 u |= kpf_copy_bit(k, KPF_ERROR, PG_error); 130 132 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
+9 -5
include/linux/memory_hotplug.h
··· 13 13 #ifdef CONFIG_MEMORY_HOTPLUG 14 14 15 15 /* 16 - * Types for free bootmem. 17 - * The normal smallest mapcount is -1. Here is smaller value than it. 16 + * Types for free bootmem stored in page->lru.next. These have to be in 17 + * some random range in unsigned long space for debugging purposes. 18 18 */ 19 - #define SECTION_INFO (-1 - 1) 20 - #define MIX_SECTION_INFO (-1 - 2) 21 - #define NODE_INFO (-1 - 3) 19 + enum { 20 + MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, 21 + SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, 22 + MIX_SECTION_INFO, 23 + NODE_INFO, 24 + MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, 25 + }; 22 26 23 27 /* 24 28 * pgdat resizing functions
+21
include/linux/mm.h
··· 397 397 atomic_set(&page->_count, 1); 398 398 } 399 399 400 + /* 401 + * PageBuddy() indicate that the page is free and in the buddy system 402 + * (see mm/page_alloc.c). 403 + */ 404 + static inline int PageBuddy(struct page *page) 405 + { 406 + return atomic_read(&page->_mapcount) == -2; 407 + } 408 + 409 + static inline void __SetPageBuddy(struct page *page) 410 + { 411 + VM_BUG_ON(atomic_read(&page->_mapcount) != -1); 412 + atomic_set(&page->_mapcount, -2); 413 + } 414 + 415 + static inline void __ClearPageBuddy(struct page *page) 416 + { 417 + VM_BUG_ON(!PageBuddy(page)); 418 + atomic_set(&page->_mapcount, -1); 419 + } 420 + 400 421 void put_page(struct page *page); 401 422 void put_pages_list(struct list_head *pages); 402 423
+1 -6
include/linux/page-flags.h
··· 48 48 * struct page (these bits with information) are always mapped into kernel 49 49 * address space... 50 50 * 51 - * PG_buddy is set to indicate that the page is free and in the buddy system 52 - * (see mm/page_alloc.c). 53 - * 54 51 * PG_hwpoison indicates that a page got corrupted in hardware and contains 55 52 * data with incorrect ECC bits that triggered a machine check. Accessing is 56 53 * not safe since it may cause another machine check. Don't touch! ··· 93 96 PG_swapcache, /* Swap page: swp_entry_t in private */ 94 97 PG_mappedtodisk, /* Has blocks allocated on-disk */ 95 98 PG_reclaim, /* To be reclaimed asap */ 96 - PG_buddy, /* Page is free, on buddy lists */ 97 99 PG_swapbacked, /* Page is backed by RAM/swap */ 98 100 PG_unevictable, /* Page is "unevictable" */ 99 101 #ifdef CONFIG_MMU ··· 229 233 * risky: they bypass page accounting. 230 234 */ 231 235 TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) 232 - __PAGEFLAG(Buddy, buddy) 233 236 PAGEFLAG(MappedToDisk, mappedtodisk) 234 237 235 238 /* PG_readahead is only used for file reads; PG_reclaim is only for writes */ ··· 456 461 #define PAGE_FLAGS_CHECK_AT_FREE \ 457 462 (1 << PG_lru | 1 << PG_locked | \ 458 463 1 << PG_private | 1 << PG_private_2 | \ 459 - 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 464 + 1 << PG_writeback | 1 << PG_reserved | \ 460 465 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ 461 466 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ 462 467 __PG_COMPOUND_LOCK)
+8 -6
mm/memory_hotplug.c
··· 82 82 83 83 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 84 84 #ifndef CONFIG_SPARSEMEM_VMEMMAP 85 - static void get_page_bootmem(unsigned long info, struct page *page, int type) 85 + static void get_page_bootmem(unsigned long info, struct page *page, 86 + unsigned long type) 86 87 { 87 - atomic_set(&page->_mapcount, type); 88 + page->lru.next = (struct list_head *) type; 88 89 SetPagePrivate(page); 89 90 set_page_private(page, info); 90 91 atomic_inc(&page->_count); ··· 95 94 * so use __ref to tell modpost not to generate a warning */ 96 95 void __ref put_page_bootmem(struct page *page) 97 96 { 98 - int type; 97 + unsigned long type; 99 98 100 - type = atomic_read(&page->_mapcount); 101 - BUG_ON(type >= -1); 99 + type = (unsigned long) page->lru.next; 100 + BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || 101 + type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); 102 102 103 103 if (atomic_dec_return(&page->_count) == 1) { 104 104 ClearPagePrivate(page); 105 105 set_page_private(page, 0); 106 - reset_page_mapcount(page); 106 + INIT_LIST_HEAD(&page->lru); 107 107 __free_pages_bootmem(page, 0); 108 108 } 109 109
+3 -4
mm/page_alloc.c
··· 449 449 * (c) a page and its buddy have the same order && 450 450 * (d) a page and its buddy are in the same zone. 451 451 * 452 - * For recording whether a page is in the buddy system, we use PG_buddy. 453 - * Setting, clearing, and testing PG_buddy is serialized by zone->lock. 452 + * For recording whether a page is in the buddy system, we set ->_mapcount -2. 453 + * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock. 454 454 * 455 455 * For recording page's order, we use page_private(page). 456 456 */ ··· 483 483 * as necessary, plus some accounting needed to play nicely with other 484 484 * parts of the VM system. 485 485 * At each level, we keep a list of pages, which are heads of continuous 486 - * free pages of length of (1 << order) and marked with PG_buddy. Page's 486 + * free pages of length of (1 << order) and marked with _mapcount -2. Page's 487 487 * order is recorded in page_private(page) field. 488 488 * So when we are allocating or freeing one, we can derive the state of the 489 489 * other. That is, if we allocate a small block, and both were ··· 5574 5574 {1UL << PG_swapcache, "swapcache" }, 5575 5575 {1UL << PG_mappedtodisk, "mappedtodisk" }, 5576 5576 {1UL << PG_reclaim, "reclaim" }, 5577 - {1UL << PG_buddy, "buddy" }, 5578 5577 {1UL << PG_swapbacked, "swapbacked" }, 5579 5578 {1UL << PG_unevictable, "unevictable" }, 5580 5579 #ifdef CONFIG_MMU
+2 -2
mm/sparse.c
··· 671 671 static void free_map_bootmem(struct page *page, unsigned long nr_pages) 672 672 { 673 673 unsigned long maps_section_nr, removing_section_nr, i; 674 - int magic; 674 + unsigned long magic; 675 675 676 676 for (i = 0; i < nr_pages; i++, page++) { 677 - magic = atomic_read(&page->_mapcount); 677 + magic = (unsigned long) page->lru.next; 678 678 679 679 BUG_ON(magic == NODE_INFO); 680 680