Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: mm: make vmalloc_seq handling SMP safe

Rework the vmalloc_seq handling so it can be used safely under SMP, as
we started using it to ensure that vmap'ed stacks are guaranteed to be
mapped by the active mm before switching to a task, and here we need to
ensure that changes to the page tables are visible to other CPUs when
they observe a change in the sequence count.

Since LPAE needs none of this, fold a check against it into the
vmalloc_seq counter check after breaking it out into a separate static
inline helper.

Given that vmap'ed stacks are now also supported on !SMP configurations,
let's drop the WARN() that could potentially now fire spuriously.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

+41 -32
+1 -1
arch/arm/include/asm/mmu.h
··· 10 10 #else 11 11 int switch_pending; 12 12 #endif 13 - unsigned int vmalloc_seq; 13 + atomic_t vmalloc_seq; 14 14 unsigned long sigpage; 15 15 #ifdef CONFIG_VDSO 16 16 unsigned long vdso;
+20 -2
arch/arm/include/asm/mmu_context.h
··· 23 23 24 24 void __check_vmalloc_seq(struct mm_struct *mm); 25 25 26 + #ifdef CONFIG_MMU 27 + static inline void check_vmalloc_seq(struct mm_struct *mm) 28 + { 29 + if (!IS_ENABLED(CONFIG_ARM_LPAE) && 30 + unlikely(atomic_read(&mm->context.vmalloc_seq) != 31 + atomic_read(&init_mm.context.vmalloc_seq))) 32 + __check_vmalloc_seq(mm); 33 + } 34 + #endif 35 + 26 36 #ifdef CONFIG_CPU_HAS_ASID 27 37 28 38 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); ··· 62 52 static inline void check_and_switch_context(struct mm_struct *mm, 63 53 struct task_struct *tsk) 64 54 { 65 - if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) 66 - __check_vmalloc_seq(mm); 55 + check_vmalloc_seq(mm); 67 56 68 57 if (irqs_disabled()) 69 58 /* ··· 137 128 } 138 129 #endif 139 130 } 131 + 132 + #ifdef CONFIG_VMAP_STACK 133 + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 134 + { 135 + if (mm != &init_mm) 136 + check_vmalloc_seq(mm); 137 + } 138 + #define enter_lazy_tlb enter_lazy_tlb 139 + #endif 140 140 141 141 #include <asm-generic/mmu_context.h> 142 142
+1 -2
arch/arm/include/asm/page.h
··· 147 147 #include <asm/pgtable-3level-types.h> 148 148 #else 149 149 #include <asm/pgtable-2level-types.h> 150 - #endif 151 - 152 150 #ifdef CONFIG_VMAP_STACK 153 151 #define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED 152 + #endif 154 153 #endif 155 154 156 155 #endif /* CONFIG_MMU */
+7 -18
arch/arm/kernel/traps.c
··· 885 885 die("kernel stack overflow", regs, 0); 886 886 } 887 887 888 + #ifndef CONFIG_ARM_LPAE 888 889 /* 889 890 * Normally, we rely on the logic in do_translation_fault() to update stale PMD 890 891 * entries covering the vmalloc space in a task's page tables when it first ··· 896 895 * So we need to ensure that these PMD entries are up to date *before* the MM 897 896 * switch. As we already have some logic in the MM switch path that takes care 898 897 * of this, let's trigger it by bumping the counter every time the core vmalloc 899 - * code modifies a PMD entry in the vmalloc region. 898 + * code modifies a PMD entry in the vmalloc region. Use release semantics on 899 + * the store so that other CPUs observing the counter's new value are 900 + * guaranteed to see the updated page table entries as well. 900 901 */ 901 902 void arch_sync_kernel_mappings(unsigned long start, unsigned long end) 902 903 { 903 - if (start > VMALLOC_END || end < VMALLOC_START) 904 - return; 905 - 906 - /* 907 - * This hooks into the core vmalloc code to receive notifications of 908 - * any PMD level changes that have been made to the kernel page tables. 909 - * This means it should only be triggered once for every MiB worth of 910 - * vmalloc space, given that we don't support huge vmalloc/vmap on ARM, 911 - * and that kernel PMD level table entries are rarely (if ever) 912 - * updated. 913 - * 914 - * This means that the counter is going to max out at ~250 for the 915 - * typical case. If it overflows, something entirely unexpected has 916 - * occurred so let's throw a warning if that happens. 917 - */ 918 - WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX); 904 + if (start < VMALLOC_END && end > VMALLOC_START) 905 + atomic_inc_return_release(&init_mm.context.vmalloc_seq); 919 906 } 920 - 907 + #endif 921 908 #endif
+1 -2
arch/arm/mm/context.c
··· 240 240 unsigned int cpu = smp_processor_id(); 241 241 u64 asid; 242 242 243 - if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) 244 - __check_vmalloc_seq(mm); 243 + check_vmalloc_seq(mm); 245 244 246 245 /* 247 246 * We cannot update the pgd and the ASID atomicly with classic
+11 -7
arch/arm/mm/ioremap.c
··· 117 117 118 118 void __check_vmalloc_seq(struct mm_struct *mm) 119 119 { 120 - unsigned int seq; 120 + int seq; 121 121 122 122 do { 123 - seq = init_mm.context.vmalloc_seq; 123 + seq = atomic_read(&init_mm.context.vmalloc_seq); 124 124 memcpy(pgd_offset(mm, VMALLOC_START), 125 125 pgd_offset_k(VMALLOC_START), 126 126 sizeof(pgd_t) * (pgd_index(VMALLOC_END) - 127 127 pgd_index(VMALLOC_START))); 128 - mm->context.vmalloc_seq = seq; 129 - } while (seq != init_mm.context.vmalloc_seq); 128 + /* 129 + * Use a store-release so that other CPUs that observe the 130 + * counter's new value are guaranteed to see the results of the 131 + * memcpy as well. 132 + */ 133 + atomic_set_release(&mm->context.vmalloc_seq, seq); 134 + } while (seq != atomic_read(&init_mm.context.vmalloc_seq)); 130 135 } 131 136 132 137 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) ··· 162 157 * Note: this is still racy on SMP machines. 163 158 */ 164 159 pmd_clear(pmdp); 165 - init_mm.context.vmalloc_seq++; 160 + atomic_inc_return_release(&init_mm.context.vmalloc_seq); 166 161 167 162 /* 168 163 * Free the page table, if there was one. ··· 179 174 * Ensure that the active_mm is up to date - we want to 180 175 * catch any use-after-iounmap cases. 181 176 */ 182 - if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq) 183 - __check_vmalloc_seq(current->active_mm); 177 + check_vmalloc_seq(current->active_mm); 184 178 185 179 flush_tlb_kernel_range(virt, end); 186 180 }