x86: introduce page pool in cpa

DEBUG_PAGEALLOC was not possible on 64-bit due to its early-bootup
hardcoded reliance on PSE pages, and the unrobustness of the runtime
splitup of large pages. The splitup ended in recursive calls to
alloc_pages() when a page for a pte split was requested.

Avoid the recursion with a preallocated page pool, which is used to
split up large mappings and gets refilled in the return path of
kernel_map_pages after the split has been done. The size of the page
pool is adjusted to the available memory.

This part just implements the page pool and the initialization w/o
using it yet.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

+87 -1
+2
arch/x86/mm/init_32.c
··· 664 664 if (boot_cpu_data.wp_works_ok < 0) 665 665 test_wp_bit(); 666 666 667 + cpa_init(); 668 + 667 669 /* 668 670 * Subtle. SMP is doing it's boot stuff late (because it has to 669 671 * fork idle threads) - but it also needs low mappings for the
+2
arch/x86/mm/init_64.c
··· 528 528 reservedpages << (PAGE_SHIFT-10), 529 529 datasize >> 10, 530 530 initsize >> 10); 531 + 532 + cpa_init(); 531 533 } 532 534 533 535 void free_init_pages(char *what, unsigned long begin, unsigned long end)
+81 -1
arch/x86/mm/pageattr.c
··· 8 8 #include <linux/sched.h> 9 9 #include <linux/slab.h> 10 10 #include <linux/mm.h> 11 + #include <linux/interrupt.h> 11 12 12 13 #include <asm/e820.h> 13 14 #include <asm/processor.h> ··· 337 336 return do_split; 338 337 } 339 338 339 + static LIST_HEAD(page_pool); 340 + static unsigned long pool_size, pool_pages, pool_low; 341 + static unsigned long pool_used, pool_failed, pool_refill; 342 + 343 + static void cpa_fill_pool(void) 344 + { 345 + struct page *p; 346 + gfp_t gfp = GFP_KERNEL; 347 + 348 + /* Do not allocate from interrupt context */ 349 + if (in_irq() || irqs_disabled()) 350 + return; 351 + /* 352 + * Check unlocked. I does not matter when we have one more 353 + * page in the pool. The bit lock avoids recursive pool 354 + * allocations: 355 + */ 356 + if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) 357 + return; 358 + 359 + #ifdef CONFIG_DEBUG_PAGEALLOC 360 + /* 361 + * We could do: 362 + * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; 363 + * but this fails on !PREEMPT kernels 364 + */ 365 + gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; 366 + #endif 367 + 368 + while (pool_pages < pool_size) { 369 + p = alloc_pages(gfp, 0); 370 + if (!p) { 371 + pool_failed++; 372 + break; 373 + } 374 + spin_lock_irq(&pgd_lock); 375 + list_add(&p->lru, &page_pool); 376 + pool_pages++; 377 + spin_unlock_irq(&pgd_lock); 378 + } 379 + clear_bit_unlock(0, &pool_refill); 380 + } 381 + 382 + #define SHIFT_MB (20 - PAGE_SHIFT) 383 + #define ROUND_MB_GB ((1 << 10) - 1) 384 + #define SHIFT_MB_GB 10 385 + #define POOL_PAGES_PER_GB 16 386 + 387 + void __init cpa_init(void) 388 + { 389 + struct sysinfo si; 390 + unsigned long gb; 391 + 392 + si_meminfo(&si); 393 + /* 394 + * Calculate the number of pool pages: 395 + * 396 + * Convert totalram (nr of pages) to MiB and round to the next 397 + * GiB. Shift MiB to Gib and multiply the result by 398 + * POOL_PAGES_PER_GB: 399 + */ 400 + gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; 401 + pool_size = POOL_PAGES_PER_GB * gb; 402 + pool_low = pool_size; 403 + 404 + cpa_fill_pool(); 405 + printk(KERN_DEBUG 406 + "CPA: page pool initialized %lu of %lu pages preallocated\n", 407 + pool_pages, pool_size); 408 + } 409 + 340 410 static int split_large_page(pte_t *kpte, unsigned long address) 341 411 { 342 412 unsigned long flags, pfn, pfninc = 1; ··· 672 600 * Check whether we really changed something: 673 601 */ 674 602 if (!cpa.flushtlb) 675 - return ret; 603 + goto out; 676 604 677 605 /* 678 606 * No need to flush, when we did not set any of the caching ··· 691 619 else 692 620 cpa_flush_all(cache); 693 621 622 + out: 623 + cpa_fill_pool(); 694 624 return ret; 695 625 } 696 626 ··· 846 772 * but that can deadlock->flush only current cpu: 847 773 */ 848 774 __flush_tlb_all(); 775 + 776 + /* 777 + * Try to refill the page pool here. We can do this only after 778 + * the tlb flush. 779 + */ 780 + cpa_fill_pool(); 849 781 } 850 782 #endif 851 783
+2
include/asm-x86/cacheflush.h
··· 44 44 45 45 void clflush_cache_range(void *addr, unsigned int size); 46 46 47 + void cpa_init(void); 48 + 47 49 #ifdef CONFIG_DEBUG_RODATA 48 50 void mark_rodata_ro(void); 49 51 #endif