Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sparc32: mm: Restructure sparc32 MMU page-table layout

The "SRMMU" supports 4k pages using a fixed three-level walk with a
256-entry PGD and 64-entry PMD/PTE levels. In order to fill a page
with a 'pgtable_t', the SRMMU code allocates four native PTE tables
into a single PTE allocation and similarly for the PMD level, leading
to an array of 16 physical pointers in a 'pmd_t'

This breaks the generic code which assumes READ_ONCE(*pmd) will be
word sized.

In a manner similar to ef22d8abd876 ("m68k: mm: Restructure Motorola
MMU page-table layout"), this patch implements the native page-table
setup directly. This significantly increases the page-table memory
overhead, but will be addresses in a subsequent patch.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Will Deacon and committed by
David S. Miller
8e958839 ed894bf5

+58 -103
+5 -5
arch/sparc/include/asm/page_32.h
··· 54 54 */ 55 55 typedef struct { unsigned long pte; } pte_t; 56 56 typedef struct { unsigned long iopte; } iopte_t; 57 - typedef struct { unsigned long pmdv[16]; } pmd_t; 57 + typedef struct { unsigned long pmd; } pmd_t; 58 58 typedef struct { unsigned long pgd; } pgd_t; 59 59 typedef struct { unsigned long ctxd; } ctxd_t; 60 60 typedef struct { unsigned long pgprot; } pgprot_t; ··· 62 62 63 63 #define pte_val(x) ((x).pte) 64 64 #define iopte_val(x) ((x).iopte) 65 - #define pmd_val(x) ((x).pmdv[0]) 65 + #define pmd_val(x) ((x).pmd) 66 66 #define pgd_val(x) ((x).pgd) 67 67 #define ctxd_val(x) ((x).ctxd) 68 68 #define pgprot_val(x) ((x).pgprot) ··· 82 82 */ 83 83 typedef unsigned long pte_t; 84 84 typedef unsigned long iopte_t; 85 - typedef struct { unsigned long pmdv[16]; } pmd_t; 85 + typedef unsigned long pmd_t; 86 86 typedef unsigned long pgd_t; 87 87 typedef unsigned long ctxd_t; 88 88 typedef unsigned long pgprot_t; ··· 90 90 91 91 #define pte_val(x) (x) 92 92 #define iopte_val(x) (x) 93 - #define pmd_val(x) ((x).pmdv[0]) 93 + #define pmd_val(x) (x) 94 94 #define pgd_val(x) (x) 95 95 #define ctxd_val(x) (x) 96 96 #define pgprot_val(x) (x) 97 97 #define iopgprot_val(x) (x) 98 98 99 99 #define __pte(x) (x) 100 - #define __pmd(x) ((pmd_t) { { (x) }, }) 100 + #define __pmd(x) (x) 101 101 #define __iopte(x) (x) 102 102 #define __pgd(x) (x) 103 103 #define __ctxd(x) (x)
+3 -2
arch/sparc/include/asm/pgalloc_32.h
··· 60 60 61 61 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) 62 62 { 63 - return srmmu_get_nocache(PTE_SIZE, PTE_SIZE); 63 + return srmmu_get_nocache(SRMMU_PTE_TABLE_SIZE, 64 + SRMMU_PTE_TABLE_SIZE); 64 65 } 65 66 66 67 67 68 static inline void free_pte_fast(pte_t *pte) 68 69 { 69 - srmmu_free_nocache(pte, PTE_SIZE); 70 + srmmu_free_nocache(pte, SRMMU_PTE_TABLE_SIZE); 70 71 } 71 72 72 73 #define pte_free_kernel(mm, pte) free_pte_fast(pte)
+15 -14
arch/sparc/include/asm/pgtable_32.h
··· 11 11 12 12 #include <linux/const.h> 13 13 14 + #define PMD_SHIFT 18 15 + #define PMD_SIZE (1UL << PMD_SHIFT) 16 + #define PMD_MASK (~(PMD_SIZE-1)) 17 + #define PMD_ALIGN(__addr) (((__addr) + ~PMD_MASK) & PMD_MASK) 18 + 19 + #define PGDIR_SHIFT 24 20 + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) 21 + #define PGDIR_MASK (~(PGDIR_SIZE-1)) 22 + #define PGDIR_ALIGN(__addr) (((__addr) + ~PGDIR_MASK) & PGDIR_MASK) 23 + 14 24 #ifndef __ASSEMBLY__ 15 25 #include <asm-generic/pgtable-nopud.h> 16 26 ··· 44 34 #define pmd_ERROR(e) __builtin_trap() 45 35 #define pgd_ERROR(e) __builtin_trap() 46 36 47 - #define PMD_SHIFT 22 48 - #define PMD_SIZE (1UL << PMD_SHIFT) 49 - #define PMD_MASK (~(PMD_SIZE-1)) 50 - #define PMD_ALIGN(__addr) (((__addr) + ~PMD_MASK) & PMD_MASK) 51 - #define PGDIR_SHIFT SRMMU_PGDIR_SHIFT 52 - #define PGDIR_SIZE SRMMU_PGDIR_SIZE 53 - #define PGDIR_MASK SRMMU_PGDIR_MASK 54 - #define PTRS_PER_PTE 1024 55 - #define PTRS_PER_PMD SRMMU_PTRS_PER_PMD 56 - #define PTRS_PER_PGD SRMMU_PTRS_PER_PGD 57 - #define USER_PTRS_PER_PGD PAGE_OFFSET / SRMMU_PGDIR_SIZE 37 + #define PTRS_PER_PTE 64 38 + #define PTRS_PER_PMD 64 39 + #define PTRS_PER_PGD 256 40 + #define USER_PTRS_PER_PGD PAGE_OFFSET / PGDIR_SIZE 58 41 #define FIRST_USER_ADDRESS 0UL 59 42 #define PTE_SIZE (PTRS_PER_PTE*4) 60 43 ··· 182 179 183 180 static inline void pmd_clear(pmd_t *pmdp) 184 181 { 185 - int i; 186 - for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) 187 - set_pte((pte_t *)&pmdp->pmdv[i], __pte(0)); 182 + set_pte((pte_t *)&pmd_val(*pmdp), __pte(0)); 188 183 } 189 184 190 185 static inline int pud_none(pud_t pud)
+3 -33
arch/sparc/include/asm/pgtsrmmu.h
··· 17 17 /* Number of contexts is implementation-dependent; 64k is the most we support */ 18 18 #define SRMMU_MAX_CONTEXTS 65536 19 19 20 - /* PMD_SHIFT determines the size of the area a second-level page table entry can map */ 21 - #define SRMMU_REAL_PMD_SHIFT 18 22 - #define SRMMU_REAL_PMD_SIZE (1UL << SRMMU_REAL_PMD_SHIFT) 23 - #define SRMMU_REAL_PMD_MASK (~(SRMMU_REAL_PMD_SIZE-1)) 24 - #define SRMMU_REAL_PMD_ALIGN(__addr) (((__addr)+SRMMU_REAL_PMD_SIZE-1)&SRMMU_REAL_PMD_MASK) 25 - 26 - /* PGDIR_SHIFT determines what a third-level page table entry can map */ 27 - #define SRMMU_PGDIR_SHIFT 24 28 - #define SRMMU_PGDIR_SIZE (1UL << SRMMU_PGDIR_SHIFT) 29 - #define SRMMU_PGDIR_MASK (~(SRMMU_PGDIR_SIZE-1)) 30 - #define SRMMU_PGDIR_ALIGN(addr) (((addr)+SRMMU_PGDIR_SIZE-1)&SRMMU_PGDIR_MASK) 31 - 32 - #define SRMMU_REAL_PTRS_PER_PTE 64 33 - #define SRMMU_REAL_PTRS_PER_PMD 64 34 - #define SRMMU_PTRS_PER_PGD 256 35 - 36 - #define SRMMU_REAL_PTE_TABLE_SIZE (SRMMU_REAL_PTRS_PER_PTE*4) 37 - #define SRMMU_PMD_TABLE_SIZE (SRMMU_REAL_PTRS_PER_PMD*4) 38 - #define SRMMU_PGD_TABLE_SIZE (SRMMU_PTRS_PER_PGD*4) 39 - 40 - /* 41 - * To support pagetables in highmem, Linux introduces APIs which 42 - * return struct page* and generally manipulate page tables when 43 - * they are not mapped into kernel space. Our hardware page tables 44 - * are smaller than pages. We lump hardware tabes into big, page sized 45 - * software tables. 46 - * 47 - * PMD_SHIFT determines the size of the area a second-level page table entry 48 - * can map, and our pmd_t is 16 times larger than normal. The values which 49 - * were once defined here are now generic for 4c and srmmu, so they're 50 - * found in pgtable.h. 51 - */ 52 - #define SRMMU_PTRS_PER_PMD 4 20 + #define SRMMU_PTE_TABLE_SIZE (PAGE_SIZE) 21 + #define SRMMU_PMD_TABLE_SIZE (PAGE_SIZE) 22 + #define SRMMU_PGD_TABLE_SIZE (PTRS_PER_PGD*4) 53 23 54 24 /* Definition of the values in the ET field of PTD's and PTE's */ 55 25 #define SRMMU_ET_MASK 0x3
+3 -2
arch/sparc/include/asm/viking.h
··· 10 10 11 11 #include <asm/asi.h> 12 12 #include <asm/mxcc.h> 13 + #include <asm/pgtable.h> 13 14 #include <asm/pgtsrmmu.h> 14 15 15 16 /* Bits in the SRMMU control register for GNU/Viking modules. ··· 228 227 : "=r" (val) 229 228 : "r" (vaddr | 0x200), "i" (ASI_M_FLUSH_PROBE)); 230 229 if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) { 231 - vaddr &= ~SRMMU_PGDIR_MASK; 230 + vaddr &= ~PGDIR_MASK; 232 231 vaddr >>= PAGE_SHIFT; 233 232 return val | (vaddr << 8); 234 233 } ··· 238 237 : "=r" (val) 239 238 : "r" (vaddr | 0x100), "i" (ASI_M_FLUSH_PROBE)); 240 239 if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) { 241 - vaddr &= ~SRMMU_REAL_PMD_MASK; 240 + vaddr &= ~PMD_MASK; 242 241 vaddr >>= PAGE_SHIFT; 243 242 return val | (vaddr << 8); 244 243 }
+4 -4
arch/sparc/kernel/head_32.S
··· 24 24 #include <asm/winmacro.h> 25 25 #include <asm/thread_info.h> /* TI_UWINMASK */ 26 26 #include <asm/errno.h> 27 - #include <asm/pgtsrmmu.h> /* SRMMU_PGDIR_SHIFT */ 27 + #include <asm/pgtable.h> /* PGDIR_SHIFT */ 28 28 #include <asm/export.h> 29 29 30 30 .data ··· 273 273 lda [%o1] ASI_M_BYPASS, %o2 ! This is the 0x0 16MB pgd 274 274 275 275 /* Calculate to KERNBASE entry. */ 276 - add %o1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %o3 276 + add %o1, KERNBASE >> (PGDIR_SHIFT - 2), %o3 277 277 278 278 /* Poke the entry into the calculated address. */ 279 279 sta %o2, [%o3] ASI_M_BYPASS ··· 317 317 sll %g1, 0x8, %g1 ! make phys addr for l1 tbl 318 318 319 319 lda [%g1] ASI_M_BYPASS, %g2 ! get level1 entry for 0x0 320 - add %g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3 320 + add %g1, KERNBASE >> (PGDIR_SHIFT - 2), %g3 321 321 sta %g2, [%g3] ASI_M_BYPASS ! place at KERNBASE entry 322 322 b go_to_highmem 323 323 nop ! wheee.... ··· 341 341 sll %g1, 0x8, %g1 ! make phys addr for l1 tbl 342 342 343 343 lda [%g1] ASI_M_BYPASS, %g2 ! get level1 entry for 0x0 344 - add %g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3 344 + add %g1, KERNBASE >> (PGDIR_SHIFT - 2), %g3 345 345 sta %g2, [%g3] ASI_M_BYPASS ! place at KERNBASE entry 346 346 b go_to_highmem 347 347 nop ! wheee....
+2 -1
arch/sparc/mm/hypersparc.S
··· 10 10 #include <asm/asm-offsets.h> 11 11 #include <asm/asi.h> 12 12 #include <asm/page.h> 13 + #include <asm/pgtable.h> 13 14 #include <asm/pgtsrmmu.h> 14 15 #include <linux/init.h> 15 16 ··· 294 293 cmp %o3, -1 295 294 be hypersparc_flush_tlb_range_out 296 295 #endif 297 - sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4 296 + sethi %hi(~((1 << PGDIR_SHIFT) - 1)), %o4 298 297 sta %o3, [%g1] ASI_M_MMUREGS 299 298 and %o1, %o4, %o1 300 299 add %o1, 0x200, %o1
+20 -40
arch/sparc/mm/srmmu.c
··· 136 136 137 137 void pmd_set(pmd_t *pmdp, pte_t *ptep) 138 138 { 139 - unsigned long ptp; /* Physical address, shifted right by 4 */ 140 - int i; 141 - 142 - ptp = __nocache_pa(ptep) >> 4; 143 - for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) { 144 - set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp)); 145 - ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4); 146 - } 139 + unsigned long ptp = __nocache_pa(ptep) >> 4; 140 + set_pte((pte_t *)&pmd_val(*pmdp), __pte(SRMMU_ET_PTD | ptp)); 147 141 } 148 142 149 143 void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep) 150 144 { 151 - unsigned long ptp; /* Physical address, shifted right by 4 */ 152 - int i; 153 - 154 - ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4); /* watch for overflow */ 155 - for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) { 156 - set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp)); 157 - ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4); 158 - } 145 + unsigned long ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4); /* watch for overflow */ 146 + set_pte((pte_t *)&pmd_val(*pmdp), __pte(SRMMU_ET_PTD | ptp)); 159 147 } 160 148 161 149 /* Find an entry in the third-level page table.. */ ··· 151 163 { 152 164 void *pte; 153 165 154 - pte = __nocache_va((dir->pmdv[0] & SRMMU_PTD_PMASK) << 4); 166 + pte = __nocache_va((pmd_val(*dir) & SRMMU_PTD_PMASK) << 4); 155 167 return (pte_t *) pte + 156 168 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); 157 169 } ··· 388 400 p = page_to_pfn(pte) << PAGE_SHIFT; /* Physical address */ 389 401 390 402 /* free non cached virtual address*/ 391 - srmmu_free_nocache(__nocache_va(p), PTE_SIZE); 403 + srmmu_free_nocache(__nocache_va(p), SRMMU_PTE_TABLE_SIZE); 392 404 } 393 405 394 406 /* context handling - a dynamically sized pool is used */ ··· 810 822 what = 0; 811 823 addr = start - PAGE_SIZE; 812 824 813 - if (!(start & ~(SRMMU_REAL_PMD_MASK))) { 814 - if (srmmu_probe(addr + SRMMU_REAL_PMD_SIZE) == probed) 825 + if (!(start & ~(PMD_MASK))) { 826 + if (srmmu_probe(addr + PMD_SIZE) == probed) 815 827 what = 1; 816 828 } 817 829 818 - if (!(start & ~(SRMMU_PGDIR_MASK))) { 819 - if (srmmu_probe(addr + SRMMU_PGDIR_SIZE) == probed) 830 + if (!(start & ~(PGDIR_MASK))) { 831 + if (srmmu_probe(addr + PGDIR_SIZE) == probed) 820 832 what = 2; 821 833 } 822 834 ··· 825 837 pudp = pud_offset(p4dp, start); 826 838 if (what == 2) { 827 839 *(pgd_t *)__nocache_fix(pgdp) = __pgd(probed); 828 - start += SRMMU_PGDIR_SIZE; 840 + start += PGDIR_SIZE; 829 841 continue; 830 842 } 831 843 if (pud_none(*(pud_t *)__nocache_fix(pudp))) { ··· 837 849 pud_set(__nocache_fix(pudp), pmdp); 838 850 } 839 851 pmdp = pmd_offset(__nocache_fix(pgdp), start); 852 + if (what == 1) { 853 + *(pmd_t *)__nocache_fix(pmdp) = __pmd(probed); 854 + start += PMD_SIZE; 855 + continue; 856 + } 840 857 if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { 841 858 ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); 842 859 if (ptep == NULL) 843 860 early_pgtable_allocfail("pte"); 844 861 memset(__nocache_fix(ptep), 0, PTE_SIZE); 845 862 pmd_set(__nocache_fix(pmdp), ptep); 846 - } 847 - if (what == 1) { 848 - /* We bend the rule where all 16 PTPs in a pmd_t point 849 - * inside the same PTE page, and we leak a perfectly 850 - * good hardware PTE piece. Alternatives seem worse. 851 - */ 852 - unsigned int x; /* Index of HW PMD in soft cluster */ 853 - unsigned long *val; 854 - x = (start >> PMD_SHIFT) & 15; 855 - val = &pmdp->pmdv[x]; 856 - *(unsigned long *)__nocache_fix(val) = probed; 857 - start += SRMMU_REAL_PMD_SIZE; 858 - continue; 859 863 } 860 864 ptep = pte_offset_kernel(__nocache_fix(pmdp), start); 861 865 *(pte_t *)__nocache_fix(ptep) = __pte(probed); ··· 870 890 /* Map sp_bank entry SP_ENTRY, starting at virtual address VBASE. */ 871 891 static unsigned long __init map_spbank(unsigned long vbase, int sp_entry) 872 892 { 873 - unsigned long pstart = (sp_banks[sp_entry].base_addr & SRMMU_PGDIR_MASK); 874 - unsigned long vstart = (vbase & SRMMU_PGDIR_MASK); 875 - unsigned long vend = SRMMU_PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes); 893 + unsigned long pstart = (sp_banks[sp_entry].base_addr & PGDIR_MASK); 894 + unsigned long vstart = (vbase & PGDIR_MASK); 895 + unsigned long vend = PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes); 876 896 /* Map "low" memory only */ 877 897 const unsigned long min_vaddr = PAGE_OFFSET; 878 898 const unsigned long max_vaddr = PAGE_OFFSET + SRMMU_MAXMEM; ··· 885 905 886 906 while (vstart < vend) { 887 907 do_large_mapping(vstart, pstart); 888 - vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE; 908 + vstart += PGDIR_SIZE; pstart += PGDIR_SIZE; 889 909 } 890 910 return vstart; 891 911 }
+3 -2
arch/sparc/mm/viking.S
··· 13 13 #include <asm/asi.h> 14 14 #include <asm/mxcc.h> 15 15 #include <asm/page.h> 16 + #include <asm/pgtable.h> 16 17 #include <asm/pgtsrmmu.h> 17 18 #include <asm/viking.h> 18 19 ··· 158 157 cmp %o3, -1 159 158 be 2f 160 159 #endif 161 - sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4 160 + sethi %hi(~((1 << PGDIR_SHIFT) - 1)), %o4 162 161 sta %o3, [%g1] ASI_M_MMUREGS 163 162 and %o1, %o4, %o1 164 163 add %o1, 0x200, %o1 ··· 244 243 ld [%o0 + VMA_VM_MM], %o0 245 244 ld [%o0 + AOFF_mm_context], %o3 246 245 lda [%g1] ASI_M_MMUREGS, %g5 247 - sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4 246 + sethi %hi(~((1 << PGDIR_SHIFT) - 1)), %o4 248 247 sta %o3, [%g1] ASI_M_MMUREGS 249 248 and %o1, %o4, %o1 250 249 add %o1, 0x200, %o1