Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: LPAE: Page table maintenance for the 3-level format

This patch modifies the pgd/pmd/pte manipulation functions to support
the 3-level page table format. Since there is no need for an 'ext'
argument to cpu_set_pte_ext(), this patch conditionally defines a
different prototype for this function when CONFIG_ARM_LPAE.

The patch also introduces the L_PGD_SWAPPER flag to mark pgd entries
pointing to pmd tables pre-allocated in the swapper_pg_dir and avoid
trying to free them at run-time. This flag is 0 with the classic page
table format.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

+150 -7
+24
arch/arm/include/asm/pgalloc.h
··· 25 25 #define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER)) 26 26 #define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL)) 27 27 28 + #ifdef CONFIG_ARM_LPAE 29 + 30 + static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 31 + { 32 + return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); 33 + } 34 + 35 + static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) 36 + { 37 + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); 38 + free_page((unsigned long)pmd); 39 + } 40 + 41 + static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 42 + { 43 + set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); 44 + } 45 + 46 + #else /* !CONFIG_ARM_LPAE */ 47 + 28 48 /* 29 49 * Since we have only two-level page tables, these are trivial 30 50 */ 31 51 #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) 32 52 #define pmd_free(mm, pmd) do { } while (0) 33 53 #define pud_populate(mm,pmd,pte) BUG() 54 + 55 + #endif /* CONFIG_ARM_LPAE */ 34 56 35 57 extern pgd_t *pgd_alloc(struct mm_struct *mm); 36 58 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); ··· 131 109 { 132 110 pmdval_t pmdval = (pte + PTE_HWTABLE_OFF) | prot; 133 111 pmdp[0] = __pmd(pmdval); 112 + #ifndef CONFIG_ARM_LPAE 134 113 pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t)); 114 + #endif 135 115 flush_pmd_entry(pmdp); 136 116 } 137 117
+53
arch/arm/include/asm/pgtable-3level.h
··· 99 99 #define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 3) << 2) /* normal inner write-back */ 100 100 #define L_PTE_MT_MASK (_AT(pteval_t, 7) << 2) 101 101 102 + /* 103 + * Software PGD flags. 104 + */ 105 + #define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */ 106 + 107 + #ifndef __ASSEMBLY__ 108 + 109 + #define pud_none(pud) (!pud_val(pud)) 110 + #define pud_bad(pud) (!(pud_val(pud) & 2)) 111 + #define pud_present(pud) (pud_val(pud)) 112 + 113 + #define pud_clear(pudp) \ 114 + do { \ 115 + *pudp = __pud(0); \ 116 + clean_pmd_entry(pudp); \ 117 + } while (0) 118 + 119 + #define set_pud(pudp, pud) \ 120 + do { \ 121 + *pudp = pud; \ 122 + flush_pmd_entry(pudp); \ 123 + } while (0) 124 + 125 + static inline pmd_t *pud_page_vaddr(pud_t pud) 126 + { 127 + return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); 128 + } 129 + 130 + /* Find an entry in the second-level page table.. */ 131 + #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) 132 + static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) 133 + { 134 + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); 135 + } 136 + 137 + #define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) 138 + 139 + #define copy_pmd(pmdpd,pmdps) \ 140 + do { \ 141 + *pmdpd = *pmdps; \ 142 + flush_pmd_entry(pmdpd); \ 143 + } while (0) 144 + 145 + #define pmd_clear(pmdp) \ 146 + do { \ 147 + *pmdp = __pmd(0); \ 148 + clean_pmd_entry(pmdp); \ 149 + } while (0) 150 + 151 + #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext))) 152 + 153 + #endif /* __ASSEMBLY__ */ 154 + 102 155 #endif /* _ASM_PGTABLE_3LEVEL_H */
+21
arch/arm/include/asm/proc-fns.h
··· 65 65 * Set a possibly extended PTE. Non-extended PTEs should 66 66 * ignore 'ext'. 67 67 */ 68 + #ifdef CONFIG_ARM_LPAE 69 + void (*set_pte_ext)(pte_t *ptep, pte_t pte); 70 + #else 68 71 void (*set_pte_ext)(pte_t *ptep, pte_t pte, unsigned int ext); 72 + #endif 69 73 70 74 /* Suspend/resume */ 71 75 unsigned int suspend_size; ··· 83 79 extern int cpu_do_idle(void); 84 80 extern void cpu_dcache_clean_area(void *, int); 85 81 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); 82 + #ifdef CONFIG_ARM_LPAE 83 + extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte); 84 + #else 86 85 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); 86 + #endif 87 87 extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); 88 88 89 89 /* These three are private to arch/arm/kernel/suspend.c */ ··· 115 107 116 108 #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) 117 109 110 + #ifdef CONFIG_ARM_LPAE 111 + #define cpu_get_pgd() \ 112 + ({ \ 113 + unsigned long pg, pg2; \ 114 + __asm__("mrrc p15, 0, %0, %1, c2" \ 115 + : "=r" (pg), "=r" (pg2) \ 116 + : \ 117 + : "cc"); \ 118 + pg &= ~(PTRS_PER_PGD*sizeof(pgd_t)-1); \ 119 + (pgd_t *)phys_to_virt(pg); \ 120 + }) 121 + #else 118 122 #define cpu_get_pgd() \ 119 123 ({ \ 120 124 unsigned long pg; \ ··· 135 115 pg &= ~0x3fff; \ 136 116 (pgd_t *)phys_to_virt(pg); \ 137 117 }) 118 + #endif 138 119 139 120 #endif 140 121
+5 -3
arch/arm/mm/ioremap.c
··· 64 64 } while (seq != init_mm.context.kvm_seq); 65 65 } 66 66 67 - #ifndef CONFIG_SMP 67 + #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) 68 68 /* 69 69 * Section support is unsafe on SMP - If you iounmap and ioremap a region, 70 70 * the other CPUs will not see this change until their next context switch. ··· 202 202 unsigned long addr; 203 203 struct vm_struct * area; 204 204 205 + #ifndef CONFIG_ARM_LPAE 205 206 /* 206 207 * High mappings must be supersection aligned 207 208 */ 208 209 if (pfn >= 0x100000 && (__pfn_to_phys(pfn) & ~SUPERSECTION_MASK)) 209 210 return NULL; 211 + #endif 210 212 211 213 /* 212 214 * Don't allow RAM to be mapped - this causes problems with ARMv6+ ··· 230 228 return NULL; 231 229 addr = (unsigned long)area->addr; 232 230 233 - #ifndef CONFIG_SMP 231 + #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) 234 232 if (DOMAIN_IO == 0 && 235 233 (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) || 236 234 cpu_is_xsc3()) && pfn >= 0x100000 && ··· 322 320 void __iounmap(volatile void __iomem *io_addr) 323 321 { 324 322 void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); 325 - #ifndef CONFIG_SMP 323 + #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) 326 324 struct vm_struct **p, *tmp; 327 325 328 326 /*
+47 -4
arch/arm/mm/pgd.c
··· 10 10 #include <linux/mm.h> 11 11 #include <linux/gfp.h> 12 12 #include <linux/highmem.h> 13 + #include <linux/slab.h> 13 14 14 15 #include <asm/pgalloc.h> 15 16 #include <asm/page.h> 16 17 #include <asm/tlbflush.h> 17 18 18 19 #include "mm.h" 20 + 21 + #ifdef CONFIG_ARM_LPAE 22 + #define __pgd_alloc() kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL) 23 + #define __pgd_free(pgd) kfree(pgd) 24 + #else 25 + #define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL, 2) 26 + #define __pgd_free(pgd) free_pages((unsigned long)pgd, 2) 27 + #endif 19 28 20 29 /* 21 30 * need to get a 16k page for level 1 ··· 36 27 pmd_t *new_pmd, *init_pmd; 37 28 pte_t *new_pte, *init_pte; 38 29 39 - new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2); 30 + new_pgd = __pgd_alloc(); 40 31 if (!new_pgd) 41 32 goto no_pgd; 42 33 ··· 51 42 52 43 clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); 53 44 45 + #ifdef CONFIG_ARM_LPAE 46 + /* 47 + * Allocate PMD table for modules and pkmap mappings. 48 + */ 49 + new_pud = pud_alloc(mm, new_pgd + pgd_index(MODULES_VADDR), 50 + MODULES_VADDR); 51 + if (!new_pud) 52 + goto no_pud; 53 + 54 + new_pmd = pmd_alloc(mm, new_pud, 0); 55 + if (!new_pmd) 56 + goto no_pmd; 57 + #endif 58 + 54 59 if (!vectors_high()) { 55 60 /* 56 61 * On ARM, first page must always be allocated since it 57 - * contains the machine vectors. 62 + * contains the machine vectors. The vectors are always high 63 + * with LPAE. 58 64 */ 59 65 new_pud = pud_alloc(mm, new_pgd, 0); 60 66 if (!new_pud) ··· 98 74 no_pmd: 99 75 pud_free(mm, new_pud); 100 76 no_pud: 101 - free_pages((unsigned long)new_pgd, 2); 77 + __pgd_free(new_pgd); 102 78 no_pgd: 103 79 return NULL; 104 80 } ··· 135 111 pgd_clear(pgd); 136 112 pud_free(mm, pud); 137 113 no_pgd: 138 - free_pages((unsigned long) pgd_base, 2); 114 + #ifdef CONFIG_ARM_LPAE 115 + /* 116 + * Free modules/pkmap or identity pmd tables. 117 + */ 118 + for (pgd = pgd_base; pgd < pgd_base + PTRS_PER_PGD; pgd++) { 119 + if (pgd_none_or_clear_bad(pgd)) 120 + continue; 121 + if (pgd_val(*pgd) & L_PGD_SWAPPER) 122 + continue; 123 + pud = pud_offset(pgd, 0); 124 + if (pud_none_or_clear_bad(pud)) 125 + continue; 126 + pmd = pmd_offset(pud, 0); 127 + pud_clear(pud); 128 + pmd_free(mm, pmd); 129 + pgd_clear(pgd); 130 + pud_free(mm, pud); 131 + } 132 + #endif 133 + __pgd_free(pgd_base); 139 134 }