Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sparc64: Eliminate PTE table memory wastage.

We've split up the PTE tables so that they take up half a page instead of
a full page. This is in order to facilitate transparent huge page
support, which works much better if our PMDs cover 4MB instead of 8MB.

What we do is have a one-behind cache for PTE table allocations in the
mm struct.

This logic triggers only on allocations. For example, we don't try to
keep track of free'd up page table blocks in the style that the s390 port
does.

There were only two slightly annoying aspects to this change:

1) Changing pgtable_t to be a "pte_t *". There's all of this special
logic in the TLB free paths that needed adjustments, as did the
PMD populate interfaces.

2) init_new_context() needs to zap the pointer, since the mm struct
just gets copied from the parent on fork.

Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

David Miller and committed by
Linus Torvalds
c460bec7 56a70b8c

+123 -44
+1
arch/sparc/include/asm/mmu_64.h
··· 93 93 spinlock_t lock; 94 94 unsigned long sparc64_ctx_val; 95 95 unsigned long huge_pte_count; 96 + struct page *pgtable_page; 96 97 struct tsb_config tsb_block[MM_NUM_TSBS]; 97 98 struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; 98 99 } mm_context_t;
+1 -1
arch/sparc/include/asm/page_64.h
··· 86 86 87 87 #endif /* (STRICT_MM_TYPECHECKS) */ 88 88 89 - typedef struct page *pgtable_t; 89 + typedef pte_t *pgtable_t; 90 90 91 91 #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ 92 92 (_AC(0x0000000070000000,UL)) : \
+11 -43
arch/sparc/include/asm/pgalloc_64.h
··· 38 38 kmem_cache_free(pgtable_cache, pmd); 39 39 } 40 40 41 - static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 42 - unsigned long address) 43 - { 44 - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); 45 - } 46 - 47 - static inline pgtable_t pte_alloc_one(struct mm_struct *mm, 48 - unsigned long address) 49 - { 50 - struct page *page; 51 - pte_t *pte; 52 - 53 - pte = pte_alloc_one_kernel(mm, address); 54 - if (!pte) 55 - return NULL; 56 - page = virt_to_page(pte); 57 - pgtable_page_ctor(page); 58 - return page; 59 - } 60 - 61 - static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 62 - { 63 - free_page((unsigned long)pte); 64 - } 65 - 66 - static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) 67 - { 68 - pgtable_page_dtor(ptepage); 69 - __free_page(ptepage); 70 - } 41 + extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 42 + unsigned long address); 43 + extern pgtable_t pte_alloc_one(struct mm_struct *mm, 44 + unsigned long address); 45 + extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte); 46 + extern void pte_free(struct mm_struct *mm, pgtable_t ptepage); 71 47 72 48 #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) 73 - #define pmd_populate(MM,PMD,PTE_PAGE) \ 74 - pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) 75 - #define pmd_pgtable(pmd) pmd_page(pmd) 49 + #define pmd_populate(MM, PMD, PTE) pmd_set(PMD, PTE) 50 + #define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD)) 76 51 77 52 #define check_pgt_cache() do { } while (0) 78 53 79 - static inline void pgtable_free(void *table, bool is_page) 80 - { 81 - if (is_page) 82 - free_page((unsigned long)table); 83 - else 84 - kmem_cache_free(pgtable_cache, table); 85 - } 54 + extern void pgtable_free(void *table, bool is_page); 86 55 87 56 #ifdef CONFIG_SMP 88 57 ··· 82 113 } 83 114 #endif /* !CONFIG_SMP */ 84 115 85 - static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage, 116 + static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte, 86 117 unsigned long address) 87 118 { 88 - pgtable_page_dtor(ptepage); 89 - pgtable_free_tlb(tlb, page_address(ptepage), true); 119 + pgtable_free_tlb(tlb, pte, true); 90 120 } 91 121 92 122 #define __pmd_free_tlb(tlb, pmd, addr) \
+101
arch/sparc/mm/init_64.c
··· 2467 2467 __asm__ __volatile__("wrpr %0, 0, %%pstate" 2468 2468 : : "r" (pstate)); 2469 2469 } 2470 + 2471 + static pte_t *get_from_cache(struct mm_struct *mm) 2472 + { 2473 + struct page *page; 2474 + pte_t *ret; 2475 + 2476 + spin_lock(&mm->page_table_lock); 2477 + page = mm->context.pgtable_page; 2478 + ret = NULL; 2479 + if (page) { 2480 + void *p = page_address(page); 2481 + 2482 + mm->context.pgtable_page = NULL; 2483 + 2484 + ret = (pte_t *) (p + (PAGE_SIZE / 2)); 2485 + } 2486 + spin_unlock(&mm->page_table_lock); 2487 + 2488 + return ret; 2489 + } 2490 + 2491 + static struct page *__alloc_for_cache(struct mm_struct *mm) 2492 + { 2493 + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | 2494 + __GFP_REPEAT | __GFP_ZERO); 2495 + 2496 + if (page) { 2497 + spin_lock(&mm->page_table_lock); 2498 + if (!mm->context.pgtable_page) { 2499 + atomic_set(&page->_count, 2); 2500 + mm->context.pgtable_page = page; 2501 + } 2502 + spin_unlock(&mm->page_table_lock); 2503 + } 2504 + return page; 2505 + } 2506 + 2507 + pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 2508 + unsigned long address) 2509 + { 2510 + struct page *page; 2511 + pte_t *pte; 2512 + 2513 + pte = get_from_cache(mm); 2514 + if (pte) 2515 + return pte; 2516 + 2517 + page = __alloc_for_cache(mm); 2518 + if (page) 2519 + pte = (pte_t *) page_address(page); 2520 + 2521 + return pte; 2522 + } 2523 + 2524 + pgtable_t pte_alloc_one(struct mm_struct *mm, 2525 + unsigned long address) 2526 + { 2527 + struct page *page; 2528 + pte_t *pte; 2529 + 2530 + pte = get_from_cache(mm); 2531 + if (pte) 2532 + return pte; 2533 + 2534 + page = __alloc_for_cache(mm); 2535 + if (page) { 2536 + pgtable_page_ctor(page); 2537 + pte = (pte_t *) page_address(page); 2538 + } 2539 + 2540 + return pte; 2541 + } 2542 + 2543 + void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 2544 + { 2545 + struct page *page = virt_to_page(pte); 2546 + if (put_page_testzero(page)) 2547 + free_hot_cold_page(page, 0); 2548 + } 2549 + 2550 + static void __pte_free(pgtable_t pte) 2551 + { 2552 + struct page *page = virt_to_page(pte); 2553 + if (put_page_testzero(page)) { 2554 + pgtable_page_dtor(page); 2555 + free_hot_cold_page(page, 0); 2556 + } 2557 + } 2558 + 2559 + void pte_free(struct mm_struct *mm, pgtable_t pte) 2560 + { 2561 + __pte_free(pte); 2562 + } 2563 + 2564 + void pgtable_free(void *table, bool is_page) 2565 + { 2566 + if (is_page) 2567 + __pte_free(table); 2568 + else 2569 + kmem_cache_free(pgtable_cache, table); 2570 + }
+9
arch/sparc/mm/tsb.c
··· 445 445 mm->context.huge_pte_count = 0; 446 446 #endif 447 447 448 + mm->context.pgtable_page = NULL; 449 + 448 450 /* copy_mm() copies over the parent's mm_struct before calling 449 451 * us, so we need to zero out the TSB pointer or else tsb_grow() 450 452 * will be confused and think there is an older TSB to free up. ··· 485 483 void destroy_context(struct mm_struct *mm) 486 484 { 487 485 unsigned long flags, i; 486 + struct page *page; 488 487 489 488 for (i = 0; i < MM_NUM_TSBS; i++) 490 489 tsb_destroy_one(&mm->context.tsb_block[i]); 490 + 491 + page = mm->context.pgtable_page; 492 + if (page && put_page_testzero(page)) { 493 + pgtable_page_dtor(page); 494 + free_hot_cold_page(page, 0); 495 + } 491 496 492 497 spin_lock_irqsave(&ctx_alloc_lock, flags); 493 498