Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/mm: Make page table size a variable

Radix and hash MMU models support different page table sizes. Make
the #defines a variable so that existing code can work with variable
sizes.

Slice related code is only used by hash, so use hash constants there. We
will replicate some of the boundary conditions with resepct to TASK_SIZE
using radix values too. Right now we do boundary condition check using
hash constants.

Swapper pgdir size is initialized in asm code. We select the max pgd
size to keep it simple. For now we select hash pgdir. When adding radix
we will switch that to radix pgdir which is 64K.

BUILD_BUG_ON check which is removed is already done in hugepage_init()
using MAYBE_BUILD_BUG_ON().

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Aneesh Kumar K.V and committed by
Michael Ellerman
dd1842a2 13f829a5

+135 -97
+8 -37
arch/powerpc/include/asm/book3s/64/hash-4k.h
··· 5 5 * for each page table entry. The PMD and PGD level use a 32b record for 6 6 * each entry by assuming that each entry is page aligned. 7 7 */ 8 - #define PTE_INDEX_SIZE 9 9 - #define PMD_INDEX_SIZE 7 10 - #define PUD_INDEX_SIZE 9 11 - #define PGD_INDEX_SIZE 9 8 + #define H_PTE_INDEX_SIZE 9 9 + #define H_PMD_INDEX_SIZE 7 10 + #define H_PUD_INDEX_SIZE 9 11 + #define H_PGD_INDEX_SIZE 9 12 12 13 13 #ifndef __ASSEMBLY__ 14 - #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) 15 - #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) 16 - #define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) 17 - #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) 18 - #endif /* __ASSEMBLY__ */ 19 - 20 - #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) 21 - #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) 22 - #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) 23 - #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) 24 - 25 - /* PMD_SHIFT determines what a second-level page table entry can map */ 26 - #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) 27 - #define PMD_SIZE (1UL << PMD_SHIFT) 28 - #define PMD_MASK (~(PMD_SIZE-1)) 14 + #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) 15 + #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) 16 + #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) 17 + #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) 29 18 30 19 /* With 4k base page size, hugepage PTEs go at the PMD level */ 31 20 #define MIN_HUGEPTE_SHIFT PMD_SHIFT 32 - 33 - /* PUD_SHIFT determines what a third-level page table entry can map */ 34 - #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) 35 - #define PUD_SIZE (1UL << PUD_SHIFT) 36 - #define PUD_MASK (~(PUD_SIZE-1)) 37 - 38 - /* PGDIR_SHIFT determines what a fourth-level page table entry can map */ 39 - #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) 40 - #define PGDIR_SIZE (1UL << PGDIR_SHIFT) 41 - #define PGDIR_MASK (~(PGDIR_SIZE-1)) 42 - 43 - /* Bits to mask out from a PMD to get to the PTE page */ 44 - #define PMD_MASKED_BITS 0 45 - /* Bits to mask out from a PUD to get to the PMD page */ 46 - #define PUD_MASKED_BITS 0 47 - /* Bits to mask out from a PGD to get to the PUD page */ 48 - #define PGD_MASKED_BITS 0 49 21 50 22 /* PTE flags to conserve for HPTE identification */ 51 23 #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ ··· 28 56 #define H_PAGE_4K_PFN 0x0 29 57 #define H_PAGE_THP_HUGE 0x0 30 58 #define H_PAGE_COMBO 0x0 31 - #ifndef __ASSEMBLY__ 32 59 /* 33 60 * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() 34 61 */
+10 -36
arch/powerpc/include/asm/book3s/64/hash-64k.h
··· 1 1 #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H 2 2 #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H 3 3 4 - #define PTE_INDEX_SIZE 8 5 - #define PMD_INDEX_SIZE 5 6 - #define PUD_INDEX_SIZE 5 7 - #define PGD_INDEX_SIZE 12 8 - 9 - #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) 10 - #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) 11 - #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) 12 - #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) 4 + #define H_PTE_INDEX_SIZE 8 5 + #define H_PMD_INDEX_SIZE 5 6 + #define H_PUD_INDEX_SIZE 5 7 + #define H_PGD_INDEX_SIZE 12 13 8 14 9 /* With 4k base page size, hugepage PTEs go at the PMD level */ 15 10 #define MIN_HUGEPTE_SHIFT PAGE_SHIFT 16 - 17 - /* PMD_SHIFT determines what a second-level page table entry can map */ 18 - #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) 19 - #define PMD_SIZE (1UL << PMD_SHIFT) 20 - #define PMD_MASK (~(PMD_SIZE-1)) 21 - 22 - /* PUD_SHIFT determines what a third-level page table entry can map */ 23 - #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) 24 - #define PUD_SIZE (1UL << PUD_SHIFT) 25 - #define PUD_MASK (~(PUD_SIZE-1)) 26 - 27 - /* PGDIR_SHIFT determines what a fourth-level page table entry can map */ 28 - #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) 29 - #define PGDIR_SIZE (1UL << PGDIR_SHIFT) 30 - #define PGDIR_MASK (~(PGDIR_SIZE-1)) 31 11 32 12 #define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ 33 13 #define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ ··· 36 56 */ 37 57 #define PTE_FRAG_SIZE_SHIFT 12 38 58 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) 39 - 40 - /* Bits to mask out from a PMD to get to the PTE page */ 41 - #define PMD_MASKED_BITS 0xc0000000000000ffUL 42 - /* Bits to mask out from a PUD to get to the PMD page */ 43 - #define PUD_MASKED_BITS 0xc0000000000000ffUL 44 - /* Bits to mask out from a PGD to get to the PUD page */ 45 - #define PGD_MASKED_BITS 0xc0000000000000ffUL 46 59 47 60 #ifndef __ASSEMBLY__ 48 61 #include <asm/errno.h> ··· 108 135 __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN)); 109 136 } 110 137 111 - #define PTE_TABLE_SIZE PTE_FRAG_SIZE 138 + #define H_PTE_TABLE_SIZE PTE_FRAG_SIZE 112 139 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 113 - #define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE)) 140 + #define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ 141 + (sizeof(unsigned long) << PMD_INDEX_SIZE)) 114 142 #else 115 - #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) 143 + #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) 116 144 #endif 117 - #define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) 118 - #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) 145 + #define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) 146 + #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) 119 147 120 148 #ifdef CONFIG_HUGETLB_PAGE 121 149 /*
+9 -5
arch/powerpc/include/asm/book3s/64/hash.h
··· 29 29 /* 30 30 * Size of EA range mapped by our pagetables. 31 31 */ 32 - #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ 33 - PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) 34 - #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) 32 + #define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \ 33 + H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) 34 + #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) 35 35 36 36 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 37 - #define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) 37 + /* 38 + * only with hash we need to use the second half of pmd page table 39 + * to store pointer to deposited pgtable_t 40 + */ 41 + #define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1) 38 42 #else 39 - #define PMD_CACHE_INDEX PMD_INDEX_SIZE 43 + #define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE 40 44 #endif 41 45 /* 42 46 * Define the address range of the kernel non-linear virtual area
+2 -2
arch/powerpc/include/asm/book3s/64/mmu-hash.h
··· 462 462 add rt,rt,rx 463 463 464 464 /* 4 bits per slice and we have one slice per 1TB */ 465 - #define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) 465 + #define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) 466 466 467 467 #ifndef __ASSEMBLY__ 468 468 ··· 533 533 /* 534 534 * Bad address. We return VSID 0 for that 535 535 */ 536 - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) 536 + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) 537 537 return 0; 538 538 539 539 if (ssize == MMU_SEGSIZE_256M)
+60
arch/powerpc/include/asm/book3s/64/pgtable.h
··· 144 144 #define PAGE_KERNEL_EXEC PAGE_KERNEL_X 145 145 #define PAGE_AGP (PAGE_KERNEL_NC) 146 146 147 + #ifndef __ASSEMBLY__ 148 + /* 149 + * page table defines 150 + */ 151 + extern unsigned long __pte_index_size; 152 + extern unsigned long __pmd_index_size; 153 + extern unsigned long __pud_index_size; 154 + extern unsigned long __pgd_index_size; 155 + extern unsigned long __pmd_cache_index; 156 + #define PTE_INDEX_SIZE __pte_index_size 157 + #define PMD_INDEX_SIZE __pmd_index_size 158 + #define PUD_INDEX_SIZE __pud_index_size 159 + #define PGD_INDEX_SIZE __pgd_index_size 160 + #define PMD_CACHE_INDEX __pmd_cache_index 161 + /* 162 + * Because of use of pte fragments and THP, size of page table 163 + * are not always derived out of index size above. 164 + */ 165 + extern unsigned long __pte_table_size; 166 + extern unsigned long __pmd_table_size; 167 + extern unsigned long __pud_table_size; 168 + extern unsigned long __pgd_table_size; 169 + #define PTE_TABLE_SIZE __pte_table_size 170 + #define PMD_TABLE_SIZE __pmd_table_size 171 + #define PUD_TABLE_SIZE __pud_table_size 172 + #define PGD_TABLE_SIZE __pgd_table_size 173 + /* 174 + * Pgtable size used by swapper, init in asm code 175 + * We will switch this later to radix PGD 176 + */ 177 + #define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) 178 + 179 + #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) 180 + #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) 181 + #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) 182 + #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) 183 + 184 + /* PMD_SHIFT determines what a second-level page table entry can map */ 185 + #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) 186 + #define PMD_SIZE (1UL << PMD_SHIFT) 187 + #define PMD_MASK (~(PMD_SIZE-1)) 188 + 189 + /* PUD_SHIFT determines what a third-level page table entry can map */ 190 + #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) 191 + #define PUD_SIZE (1UL << PUD_SHIFT) 192 + #define PUD_MASK (~(PUD_SIZE-1)) 193 + 194 + /* PGDIR_SHIFT determines what a fourth-level page table entry can map */ 195 + #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) 196 + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) 197 + #define PGDIR_MASK (~(PGDIR_SIZE-1)) 198 + 199 + /* Bits to mask out from a PMD to get to the PTE page */ 200 + #define PMD_MASKED_BITS 0xc0000000000000ffUL 201 + /* Bits to mask out from a PUD to get to the PMD page */ 202 + #define PUD_MASKED_BITS 0xc0000000000000ffUL 203 + /* Bits to mask out from a PGD to get to the PUD page */ 204 + #define PGD_MASKED_BITS 0xc0000000000000ffUL 205 + #endif /* __ASSEMBLY__ */ 206 + 147 207 #include <asm/book3s/64/hash.h> 148 208 #include <asm/barrier.h> 149 209
+1 -1
arch/powerpc/include/asm/page_64.h
··· 93 93 94 94 #define SLICE_LOW_TOP (0x100000000ul) 95 95 #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) 96 - #define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT) 96 + #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) 97 97 98 98 #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) 99 99 #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
+4
arch/powerpc/kernel/asm-offsets.c
··· 438 438 DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); 439 439 #endif 440 440 441 + #ifdef MAX_PGD_TABLE_SIZE 442 + DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE); 443 + #else 441 444 DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); 445 + #endif 442 446 DEFINE(PTE_SIZE, sizeof(pte_t)); 443 447 444 448 #ifdef CONFIG_KVM
+12
arch/powerpc/mm/hash_utils_64.c
··· 870 870 871 871 void __init early_init_mmu(void) 872 872 { 873 + /* 874 + * initialize page table size 875 + */ 876 + __pte_index_size = H_PTE_INDEX_SIZE; 877 + __pmd_index_size = H_PMD_INDEX_SIZE; 878 + __pud_index_size = H_PUD_INDEX_SIZE; 879 + __pgd_index_size = H_PGD_INDEX_SIZE; 880 + __pmd_cache_index = H_PMD_CACHE_INDEX; 881 + __pte_table_size = H_PTE_TABLE_SIZE; 882 + __pmd_table_size = H_PMD_TABLE_SIZE; 883 + __pud_table_size = H_PUD_TABLE_SIZE; 884 + __pgd_table_size = H_PGD_TABLE_SIZE; 873 885 /* Initialize the MMU Hash table and create the linear mapping 874 886 * of memory. Has to be done before SLB initialization as this is 875 887 * currently where the page size encoding is obtained.
+2 -2
arch/powerpc/mm/init_64.c
··· 66 66 #include "mmu_decl.h" 67 67 68 68 #ifdef CONFIG_PPC_STD_MMU_64 69 - #if PGTABLE_RANGE > USER_VSID_RANGE 69 + #if H_PGTABLE_RANGE > USER_VSID_RANGE 70 70 #warning Limited user VSID range means pagetable space is wasted 71 71 #endif 72 72 73 - #if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) 73 + #if (TASK_SIZE_USER64 < H_PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) 74 74 #warning TASK_SIZE is smaller than it needs to be. 75 75 #endif 76 76 #endif /* CONFIG_PPC_STD_MMU_64 */
+1
arch/powerpc/mm/pgtable-book3e.c
··· 77 77 pmd_t *pmdp; 78 78 pte_t *ptep; 79 79 80 + BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); 80 81 if (slab_is_available()) { 81 82 pgdp = pgd_offset_k(ea); 82 83 pudp = pud_alloc(&init_mm, pgdp, ea);
+1
arch/powerpc/mm/pgtable-hash64.c
··· 61 61 pmd_t *pmdp; 62 62 pte_t *ptep; 63 63 64 + BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); 64 65 if (slab_is_available()) { 65 66 pgdp = pgd_offset_k(ea); 66 67 pudp = pud_alloc(&init_mm, pgdp, ea);
+22 -11
arch/powerpc/mm/pgtable_64.c
··· 58 58 #define CREATE_TRACE_POINTS 59 59 #include <trace/events/thp.h> 60 60 61 - /* Some sanity checking */ 62 - #if TASK_SIZE_USER64 > PGTABLE_RANGE 63 - #error TASK_SIZE_USER64 exceeds pagetable range 64 - #endif 65 - 66 61 #ifdef CONFIG_PPC_STD_MMU_64 67 62 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) 68 63 #error TASK_SIZE_USER64 exceeds user VSID range ··· 70 75 */ 71 76 struct prtb_entry *process_tb; 72 77 struct patb_entry *partition_tb; 78 + /* 79 + * page table size 80 + */ 81 + unsigned long __pte_index_size; 82 + EXPORT_SYMBOL(__pte_index_size); 83 + unsigned long __pmd_index_size; 84 + EXPORT_SYMBOL(__pmd_index_size); 85 + unsigned long __pud_index_size; 86 + EXPORT_SYMBOL(__pud_index_size); 87 + unsigned long __pgd_index_size; 88 + EXPORT_SYMBOL(__pgd_index_size); 89 + unsigned long __pmd_cache_index; 90 + EXPORT_SYMBOL(__pmd_cache_index); 91 + unsigned long __pte_table_size; 92 + EXPORT_SYMBOL(__pte_table_size); 93 + unsigned long __pmd_table_size; 94 + EXPORT_SYMBOL(__pmd_table_size); 95 + unsigned long __pud_table_size; 96 + EXPORT_SYMBOL(__pud_table_size); 97 + unsigned long __pgd_table_size; 98 + EXPORT_SYMBOL(__pgd_table_size); 99 + 73 100 #endif 74 101 unsigned long ioremap_bot = IOREMAP_BASE; 75 102 ··· 755 738 756 739 int has_transparent_hugepage(void) 757 740 { 758 - 759 - BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER, 760 - "hugepages can't be allocated by the buddy allocator"); 761 - 762 - BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2, 763 - "We need more than 2 pages to do deferred thp split"); 764 741 765 742 if (!mmu_has_feature(MMU_FTR_16M_PAGE)) 766 743 return 0;
+1 -1
arch/powerpc/mm/slb_low.S
··· 35 35 * check for bad kernel/user address 36 36 * (ea & ~REGION_MASK) >= PGTABLE_RANGE 37 37 */ 38 - rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4) 38 + rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4) 39 39 bne- 8f 40 40 41 41 srdi r9,r3,60 /* get region */
+2 -2
arch/powerpc/mm/slice.c
··· 37 37 #include <asm/hugetlb.h> 38 38 39 39 /* some sanity checks */ 40 - #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE 41 - #error PGTABLE_RANGE exceeds slice_mask high_slices size 40 + #if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE 41 + #error H_PGTABLE_RANGE exceeds slice_mask high_slices size 42 42 #endif 43 43 44 44 static DEFINE_SPINLOCK(slice_convert_lock);