Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions

- Untangle the somewhat incestous way of how VMALLOC_START is used all across the
kernel, but is, on x86, defined deep inside one of the lowest level page table headers.
It doesn't help that vmalloc.h only includes a single asm header:

#include <asm/page.h> /* pgprot_t */

So there was no existing cross-arch way to decouple address layout
definitions from page.h details. I used this:

#ifndef VMALLOC_START
# include <asm/vmalloc.h>
#endif

This way every architecture that wants to simplify page.h can do so.

- Also on x86 we had a couple of LDT related inline functions that used
the late-stage address space layout positions - but these could be
uninlined without real trouble - the end result is cleaner this way as
well.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+183 -154
+1 -9
arch/x86/include/asm/cpu_entry_area.h
··· 6 6 #include <linux/percpu-defs.h> 7 7 #include <asm/processor.h> 8 8 #include <asm/intel_ds.h> 9 + #include <asm/pgtable_areas.h> 9 10 10 11 #ifdef CONFIG_X86_64 11 12 ··· 134 133 135 134 extern void setup_cpu_entry_areas(void); 136 135 extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); 137 - 138 - /* Single page reserved for the readonly IDT mapping: */ 139 - #define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE 140 - #define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) 141 - 142 - #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) 143 - 144 - #define CPU_ENTRY_AREA_MAP_SIZE \ 145 - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) 146 136 147 137 extern struct cpu_entry_area *get_cpu_entry_area(int cpu); 148 138
+6 -80
arch/x86/include/asm/mmu_context.h
··· 69 69 int slot; 70 70 }; 71 71 72 - /* This is a multiple of PAGE_SIZE. */ 73 - #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) 74 - 75 - static inline void *ldt_slot_va(int slot) 76 - { 77 - return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 78 - } 79 - 80 72 /* 81 73 * Used for LDT copy/destruction. 82 74 */ ··· 91 99 static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } 92 100 #endif 93 101 102 + #ifdef CONFIG_MODIFY_LDT_SYSCALL 103 + extern void load_mm_ldt(struct mm_struct *mm); 104 + extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); 105 + #else 94 106 static inline void load_mm_ldt(struct mm_struct *mm) 95 107 { 96 - #ifdef CONFIG_MODIFY_LDT_SYSCALL 97 - struct ldt_struct *ldt; 98 - 99 - /* READ_ONCE synchronizes with smp_store_release */ 100 - ldt = READ_ONCE(mm->context.ldt); 101 - 102 - /* 103 - * Any change to mm->context.ldt is followed by an IPI to all 104 - * CPUs with the mm active. The LDT will not be freed until 105 - * after the IPI is handled by all such CPUs. This means that, 106 - * if the ldt_struct changes before we return, the values we see 107 - * will be safe, and the new values will be loaded before we run 108 - * any user code. 109 - * 110 - * NB: don't try to convert this to use RCU without extreme care. 111 - * We would still need IRQs off, because we don't want to change 112 - * the local LDT after an IPI loaded a newer value than the one 113 - * that we can see. 114 - */ 115 - 116 - if (unlikely(ldt)) { 117 - if (static_cpu_has(X86_FEATURE_PTI)) { 118 - if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { 119 - /* 120 - * Whoops -- either the new LDT isn't mapped 121 - * (if slot == -1) or is mapped into a bogus 122 - * slot (if slot > 1). 123 - */ 124 - clear_LDT(); 125 - return; 126 - } 127 - 128 - /* 129 - * If page table isolation is enabled, ldt->entries 130 - * will not be mapped in the userspace pagetables. 131 - * Tell the CPU to access the LDT through the alias 132 - * at ldt_slot_va(ldt->slot). 133 - */ 134 - set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); 135 - } else { 136 - set_ldt(ldt->entries, ldt->nr_entries); 137 - } 138 - } else { 139 - clear_LDT(); 140 - } 141 - #else 142 108 clear_LDT(); 143 - #endif 144 109 } 145 - 146 110 static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) 147 111 { 148 - #ifdef CONFIG_MODIFY_LDT_SYSCALL 149 - /* 150 - * Load the LDT if either the old or new mm had an LDT. 151 - * 152 - * An mm will never go from having an LDT to not having an LDT. Two 153 - * mms never share an LDT, so we don't gain anything by checking to 154 - * see whether the LDT changed. There's also no guarantee that 155 - * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, 156 - * then prev->context.ldt will also be non-NULL. 157 - * 158 - * If we really cared, we could optimize the case where prev == next 159 - * and we're exiting lazy mode. Most of the time, if this happens, 160 - * we don't actually need to reload LDTR, but modify_ldt() is mostly 161 - * used by legacy code and emulators where we don't need this level of 162 - * performance. 163 - * 164 - * This uses | instead of || because it generates better code. 165 - */ 166 - if (unlikely((unsigned long)prev->context.ldt | 167 - (unsigned long)next->context.ldt)) 168 - load_mm_ldt(next); 169 - #endif 170 - 171 112 DEBUG_LOCKS_WARN_ON(preemptible()); 172 113 } 114 + #endif 173 115 174 - void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 116 + extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 175 117 176 118 /* 177 119 * Init a new mm. Used on mm copies, like at fork()
+53
arch/x86/include/asm/pgtable_32_areas.h
··· 1 + #ifndef _ASM_X86_PGTABLE_32_AREAS_H 2 + #define _ASM_X86_PGTABLE_32_AREAS_H 3 + 4 + #include <asm/cpu_entry_area.h> 5 + 6 + /* 7 + * Just any arbitrary offset to the start of the vmalloc VM area: the 8 + * current 8MB value just means that there will be a 8MB "hole" after the 9 + * physical memory until the kernel virtual memory starts. That means that 10 + * any out-of-bounds memory accesses will hopefully be caught. 11 + * The vmalloc() routines leaves a hole of 4kB between each vmalloced 12 + * area for the same reason. ;) 13 + */ 14 + #define VMALLOC_OFFSET (8 * 1024 * 1024) 15 + 16 + #ifndef __ASSEMBLY__ 17 + extern bool __vmalloc_start_set; /* set once high_memory is set */ 18 + #endif 19 + 20 + #define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) 21 + #ifdef CONFIG_X86_PAE 22 + #define LAST_PKMAP 512 23 + #else 24 + #define LAST_PKMAP 1024 25 + #endif 26 + 27 + #define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) 28 + 29 + /* The +1 is for the readonly IDT page: */ 30 + #define CPU_ENTRY_AREA_BASE \ 31 + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) 32 + 33 + #define LDT_BASE_ADDR \ 34 + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) 35 + 36 + #define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) 37 + 38 + #define PKMAP_BASE \ 39 + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) 40 + 41 + #ifdef CONFIG_HIGHMEM 42 + # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) 43 + #else 44 + # define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) 45 + #endif 46 + 47 + #define MODULES_VADDR VMALLOC_START 48 + #define MODULES_END VMALLOC_END 49 + #define MODULES_LEN (MODULES_VADDR - MODULES_END) 50 + 51 + #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) 52 + 53 + #endif /* _ASM_X86_PGTABLE_32_AREAS_H */
+3 -54
arch/x86/include/asm/pgtable_32_types.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _ASM_X86_PGTABLE_32_DEFS_H 3 - #define _ASM_X86_PGTABLE_32_DEFS_H 2 + #ifndef _ASM_X86_PGTABLE_32_TYPES_H 3 + #define _ASM_X86_PGTABLE_32_TYPES_H 4 4 5 5 /* 6 6 * The Linux x86 paging architecture is 'compile-time dual-mode', it ··· 20 20 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) 21 21 #define PGDIR_MASK (~(PGDIR_SIZE - 1)) 22 22 23 - /* Just any arbitrary offset to the start of the vmalloc VM area: the 24 - * current 8MB value just means that there will be a 8MB "hole" after the 25 - * physical memory until the kernel virtual memory starts. That means that 26 - * any out-of-bounds memory accesses will hopefully be caught. 27 - * The vmalloc() routines leaves a hole of 4kB between each vmalloced 28 - * area for the same reason. ;) 29 - */ 30 - #define VMALLOC_OFFSET (8 * 1024 * 1024) 31 - 32 - #ifndef __ASSEMBLY__ 33 - extern bool __vmalloc_start_set; /* set once high_memory is set */ 34 - #endif 35 - 36 - #define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) 37 - #ifdef CONFIG_X86_PAE 38 - #define LAST_PKMAP 512 39 - #else 40 - #define LAST_PKMAP 1024 41 - #endif 42 - 43 - /* 44 - * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE. 45 - * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c 46 - * to avoid include recursion hell. 47 - */ 48 - #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43) 49 - 50 - /* The +1 is for the readonly IDT page: */ 51 - #define CPU_ENTRY_AREA_BASE \ 52 - ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) 53 - 54 - #define LDT_BASE_ADDR \ 55 - ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) 56 - 57 - #define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) 58 - 59 - #define PKMAP_BASE \ 60 - ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) 61 - 62 - #ifdef CONFIG_HIGHMEM 63 - # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) 64 - #else 65 - # define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) 66 - #endif 67 - 68 - #define MODULES_VADDR VMALLOC_START 69 - #define MODULES_END VMALLOC_END 70 - #define MODULES_LEN (MODULES_VADDR - MODULES_END) 71 - 72 - #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) 73 - 74 - #endif /* _ASM_X86_PGTABLE_32_DEFS_H */ 23 + #endif /* _ASM_X86_PGTABLE_32_TYPES_H */
+16
arch/x86/include/asm/pgtable_areas.h
··· 1 + #ifndef _ASM_X86_PGTABLE_AREAS_H 2 + #define _ASM_X86_PGTABLE_AREAS_H 3 + 4 + #ifdef CONFIG_X86_32 5 + # include <asm/pgtable_32_areas.h> 6 + #endif 7 + 8 + /* Single page reserved for the readonly IDT mapping: */ 9 + #define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE 10 + #define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) 11 + 12 + #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) 13 + 14 + #define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) 15 + 16 + #endif /* _ASM_X86_PGTABLE_AREAS_H */
+2
arch/x86/include/asm/vmalloc.h
··· 1 1 #ifndef _ASM_X86_VMALLOC_H 2 2 #define _ASM_X86_VMALLOC_H 3 3 4 + #include <asm/pgtable_areas.h> 5 + 4 6 #endif /* _ASM_X86_VMALLOC_H */
+83
arch/x86/kernel/ldt.c
··· 28 28 #include <asm/desc.h> 29 29 #include <asm/mmu_context.h> 30 30 #include <asm/syscalls.h> 31 + #include <asm/pgtable_areas.h> 32 + 33 + /* This is a multiple of PAGE_SIZE. */ 34 + #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) 35 + 36 + static inline void *ldt_slot_va(int slot) 37 + { 38 + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 39 + } 40 + 41 + void load_mm_ldt(struct mm_struct *mm) 42 + { 43 + struct ldt_struct *ldt; 44 + 45 + /* READ_ONCE synchronizes with smp_store_release */ 46 + ldt = READ_ONCE(mm->context.ldt); 47 + 48 + /* 49 + * Any change to mm->context.ldt is followed by an IPI to all 50 + * CPUs with the mm active. The LDT will not be freed until 51 + * after the IPI is handled by all such CPUs. This means that, 52 + * if the ldt_struct changes before we return, the values we see 53 + * will be safe, and the new values will be loaded before we run 54 + * any user code. 55 + * 56 + * NB: don't try to convert this to use RCU without extreme care. 57 + * We would still need IRQs off, because we don't want to change 58 + * the local LDT after an IPI loaded a newer value than the one 59 + * that we can see. 60 + */ 61 + 62 + if (unlikely(ldt)) { 63 + if (static_cpu_has(X86_FEATURE_PTI)) { 64 + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { 65 + /* 66 + * Whoops -- either the new LDT isn't mapped 67 + * (if slot == -1) or is mapped into a bogus 68 + * slot (if slot > 1). 69 + */ 70 + clear_LDT(); 71 + return; 72 + } 73 + 74 + /* 75 + * If page table isolation is enabled, ldt->entries 76 + * will not be mapped in the userspace pagetables. 77 + * Tell the CPU to access the LDT through the alias 78 + * at ldt_slot_va(ldt->slot). 79 + */ 80 + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); 81 + } else { 82 + set_ldt(ldt->entries, ldt->nr_entries); 83 + } 84 + } else { 85 + clear_LDT(); 86 + } 87 + } 88 + 89 + void switch_ldt(struct mm_struct *prev, struct mm_struct *next) 90 + { 91 + /* 92 + * Load the LDT if either the old or new mm had an LDT. 93 + * 94 + * An mm will never go from having an LDT to not having an LDT. Two 95 + * mms never share an LDT, so we don't gain anything by checking to 96 + * see whether the LDT changed. There's also no guarantee that 97 + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, 98 + * then prev->context.ldt will also be non-NULL. 99 + * 100 + * If we really cared, we could optimize the case where prev == next 101 + * and we're exiting lazy mode. Most of the time, if this happens, 102 + * we don't actually need to reload LDTR, but modify_ldt() is mostly 103 + * used by legacy code and emulators where we don't need this level of 104 + * performance. 105 + * 106 + * This uses | instead of || because it generates better code. 107 + */ 108 + if (unlikely((unsigned long)prev->context.ldt | 109 + (unsigned long)next->context.ldt)) 110 + load_mm_ldt(next); 111 + 112 + DEBUG_LOCKS_WARN_ON(preemptible()); 113 + } 31 114 32 115 static void refresh_ldt_segments(void) 33 116 {
+1
arch/x86/kernel/setup.c
··· 41 41 #include <asm/proto.h> 42 42 #include <asm/unwind.h> 43 43 #include <asm/vsyscall.h> 44 + #include <linux/vmalloc.h> 44 45 45 46 /* 46 47 * max_low_pfn_mapped: highest directly mapped pfn < 4 GB
+1
arch/x86/mm/fault.c
··· 29 29 #include <asm/efi.h> /* efi_recover_from_page_fault()*/ 30 30 #include <asm/desc.h> /* store_idt(), ... */ 31 31 #include <asm/cpu_entry_area.h> /* exception stack */ 32 + #include <asm/pgtable_areas.h> /* VMALLOC_START, ... */ 32 33 33 34 #define CREATE_TRACE_POINTS 34 35 #include <asm/trace/exceptions.h>
+1
arch/x86/mm/init_32.c
··· 52 52 #include <asm/page_types.h> 53 53 #include <asm/cpu_entry_area.h> 54 54 #include <asm/init.h> 55 + #include <asm/pgtable_areas.h> 55 56 56 57 #include "mm_internal.h" 57 58
+1
arch/x86/mm/pgtable_32.c
··· 18 18 #include <asm/tlb.h> 19 19 #include <asm/tlbflush.h> 20 20 #include <asm/io.h> 21 + #include <linux/vmalloc.h> 21 22 22 23 unsigned int __VMALLOC_RESERVE = 128 << 20; 23 24
+1
arch/x86/mm/physaddr.c
··· 5 5 #include <linux/mm.h> 6 6 7 7 #include <asm/page.h> 8 + #include <linux/vmalloc.h> 8 9 9 10 #include "physaddr.h" 10 11
+5 -10
include/linux/mm.h
··· 625 625 * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there 626 626 * is no special casing required. 627 627 */ 628 - static inline bool is_vmalloc_addr(const void *x) 629 - { 630 - #ifdef CONFIG_MMU 631 - unsigned long addr = (unsigned long)x; 632 - 633 - return addr >= VMALLOC_START && addr < VMALLOC_END; 634 - #else 635 - return false; 636 - #endif 637 - } 638 628 639 629 #ifndef is_ioremap_addr 640 630 #define is_ioremap_addr(x) is_vmalloc_addr(x) 641 631 #endif 642 632 643 633 #ifdef CONFIG_MMU 634 + extern bool is_vmalloc_addr(const void *x); 644 635 extern int is_vmalloc_or_module_addr(const void *x); 645 636 #else 637 + static inline bool is_vmalloc_addr(const void *x) 638 + { 639 + return false; 640 + } 646 641 static inline int is_vmalloc_or_module_addr(const void *x) 647 642 { 648 643 return 0;
+1 -1
mm/highmem.c
··· 29 29 #include <linux/highmem.h> 30 30 #include <linux/kgdb.h> 31 31 #include <asm/tlbflush.h> 32 - 32 + #include <linux/vmalloc.h> 33 33 34 34 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) 35 35 DEFINE_PER_CPU(int, __kmap_atomic_idx);
+8
mm/vmalloc.c
··· 41 41 42 42 #include "internal.h" 43 43 44 + bool is_vmalloc_addr(const void *x) 45 + { 46 + unsigned long addr = (unsigned long)x; 47 + 48 + return addr >= VMALLOC_START && addr < VMALLOC_END; 49 + } 50 + EXPORT_SYMBOL(is_vmalloc_addr); 51 + 44 52 struct vfree_deferred { 45 53 struct llist_head list; 46 54 struct work_struct wq;