Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RISC-V: Flush I$ when making a dirty page executable

The RISC-V ISA allows for instruction caches that are not coherent WRT
stores, even on a single hart. As a result, we need to explicitly flush
the instruction cache whenever marking a dirty page as executable in
order to preserve the correct system behavior.

Local instruction caches aren't that scary (our implementations actually
flush the cache, but RISC-V is defined to allow higher-performance
implementations to exist), but RISC-V defines no way to perform an
instruction cache shootdown. When explicitly asked to do so we can
shoot down remote instruction caches via an IPI, but this is a bit on
the slow side.

Instead of requiring an IPI to all harts whenever marking a page as
executable, we simply flush the currently running harts. In order to
maintain correct behavior, we additionally mark every other hart as
needing a deferred instruction cache which will be taken before anything
runs on it.

Signed-off-by: Andrew Waterman <andrew@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>

authored by

Andrew Waterman and committed by
Palmer Dabbelt
08f051ed 28dfbe6e

+174 -30
+20 -4
arch/riscv/include/asm/cacheflush.h
··· 18 18 19 19 #undef flush_icache_range 20 20 #undef flush_icache_user_range 21 + #undef flush_dcache_page 21 22 22 23 static inline void local_flush_icache_all(void) 23 24 { 24 25 asm volatile ("fence.i" ::: "memory"); 25 26 } 26 27 28 + #define PG_dcache_clean PG_arch_1 29 + 30 + static inline void flush_dcache_page(struct page *page) 31 + { 32 + if (test_bit(PG_dcache_clean, &page->flags)) 33 + clear_bit(PG_dcache_clean, &page->flags); 34 + } 35 + 36 + /* 37 + * RISC-V doesn't have an instruction to flush parts of the instruction cache, 38 + * so instead we just flush the whole thing. 39 + */ 40 + #define flush_icache_range(start, end) flush_icache_all() 41 + #define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() 42 + 27 43 #ifndef CONFIG_SMP 28 44 29 - #define flush_icache_range(start, end) local_flush_icache_all() 30 - #define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() 45 + #define flush_icache_all() local_flush_icache_all() 46 + #define flush_icache_mm(mm, local) flush_icache_all() 31 47 32 48 #else /* CONFIG_SMP */ 33 49 34 - #define flush_icache_range(start, end) sbi_remote_fence_i(0) 35 - #define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) 50 + #define flush_icache_all() sbi_remote_fence_i(0) 51 + void flush_icache_mm(struct mm_struct *mm, bool local); 36 52 37 53 #endif /* CONFIG_SMP */ 38 54
+4
arch/riscv/include/asm/mmu.h
··· 19 19 20 20 typedef struct { 21 21 void *vdso; 22 + #ifdef CONFIG_SMP 23 + /* A local icache flush is needed before user execution can resume. */ 24 + cpumask_t icache_stale_mask; 25 + #endif 22 26 } mm_context_t; 23 27 24 28 #endif /* __ASSEMBLY__ */
+44
arch/riscv/include/asm/mmu_context.h
··· 1 1 /* 2 2 * Copyright (C) 2012 Regents of the University of California 3 + * Copyright (C) 2017 SiFive 3 4 * 4 5 * This program is free software; you can redistribute it and/or 5 6 * modify it under the terms of the GNU General Public License ··· 20 19 #include <linux/mm.h> 21 20 #include <linux/sched.h> 22 21 #include <asm/tlbflush.h> 22 + #include <asm/cacheflush.h> 23 23 24 24 static inline void enter_lazy_tlb(struct mm_struct *mm, 25 25 struct task_struct *task) ··· 48 46 csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); 49 47 } 50 48 49 + /* 50 + * When necessary, performs a deferred icache flush for the given MM context, 51 + * on the local CPU. RISC-V has no direct mechanism for instruction cache 52 + * shoot downs, so instead we send an IPI that informs the remote harts they 53 + * need to flush their local instruction caches. To avoid pathologically slow 54 + * behavior in a common case (a bunch of single-hart processes on a many-hart 55 + * machine, ie 'make -j') we avoid the IPIs for harts that are not currently 56 + * executing a MM context and instead schedule a deferred local instruction 57 + * cache flush to be performed before execution resumes on each hart. This 58 + * actually performs that local instruction cache flush, which implicitly only 59 + * refers to the current hart. 60 + */ 61 + static inline void flush_icache_deferred(struct mm_struct *mm) 62 + { 63 + #ifdef CONFIG_SMP 64 + unsigned int cpu = smp_processor_id(); 65 + cpumask_t *mask = &mm->context.icache_stale_mask; 66 + 67 + if (cpumask_test_cpu(cpu, mask)) { 68 + cpumask_clear_cpu(cpu, mask); 69 + /* 70 + * Ensure the remote hart's writes are visible to this hart. 71 + * This pairs with a barrier in flush_icache_mm. 72 + */ 73 + smp_mb(); 74 + local_flush_icache_all(); 75 + } 76 + #endif 77 + } 78 + 51 79 static inline void switch_mm(struct mm_struct *prev, 52 80 struct mm_struct *next, struct task_struct *task) 53 81 { 54 82 if (likely(prev != next)) { 83 + /* 84 + * Mark the current MM context as inactive, and the next as 85 + * active. This is at least used by the icache flushing 86 + * routines in order to determine who should 87 + */ 88 + unsigned int cpu = smp_processor_id(); 89 + 90 + cpumask_clear_cpu(cpu, mm_cpumask(prev)); 91 + cpumask_set_cpu(cpu, mm_cpumask(next)); 92 + 55 93 set_pgdir(next->pgd); 56 94 local_flush_tlb_all(); 95 + 96 + flush_icache_deferred(next); 57 97 } 58 98 } 59 99
+32 -26
arch/riscv/include/asm/pgtable.h
··· 178 178 #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) 179 179 #define pte_unmap(pte) ((void)(pte)) 180 180 181 - /* 182 - * Certain architectures need to do special things when PTEs within 183 - * a page table are directly modified. Thus, the following hook is 184 - * made available. 185 - */ 186 - static inline void set_pte(pte_t *ptep, pte_t pteval) 187 - { 188 - *ptep = pteval; 189 - } 190 - 191 - static inline void set_pte_at(struct mm_struct *mm, 192 - unsigned long addr, pte_t *ptep, pte_t pteval) 193 - { 194 - set_pte(ptep, pteval); 195 - } 196 - 197 - static inline void pte_clear(struct mm_struct *mm, 198 - unsigned long addr, pte_t *ptep) 199 - { 200 - set_pte_at(mm, addr, ptep, __pte(0)); 201 - } 202 - 203 181 static inline int pte_present(pte_t pte) 204 182 { 205 183 return (pte_val(pte) & _PAGE_PRESENT); ··· 188 210 return (pte_val(pte) == 0); 189 211 } 190 212 191 - /* static inline int pte_read(pte_t pte) */ 192 - 193 213 static inline int pte_write(pte_t pte) 194 214 { 195 215 return pte_val(pte) & _PAGE_WRITE; 216 + } 217 + 218 + static inline int pte_exec(pte_t pte) 219 + { 220 + return pte_val(pte) & _PAGE_EXEC; 196 221 } 197 222 198 223 static inline int pte_huge(pte_t pte) ··· 203 222 return pte_present(pte) 204 223 && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); 205 224 } 206 - 207 - /* static inline int pte_exec(pte_t pte) */ 208 225 209 226 static inline int pte_dirty(pte_t pte) 210 227 { ··· 288 309 static inline int pte_same(pte_t pte_a, pte_t pte_b) 289 310 { 290 311 return pte_val(pte_a) == pte_val(pte_b); 312 + } 313 + 314 + /* 315 + * Certain architectures need to do special things when PTEs within 316 + * a page table are directly modified. Thus, the following hook is 317 + * made available. 318 + */ 319 + static inline void set_pte(pte_t *ptep, pte_t pteval) 320 + { 321 + *ptep = pteval; 322 + } 323 + 324 + void flush_icache_pte(pte_t pte); 325 + 326 + static inline void set_pte_at(struct mm_struct *mm, 327 + unsigned long addr, pte_t *ptep, pte_t pteval) 328 + { 329 + if (pte_present(pteval) && pte_exec(pteval)) 330 + flush_icache_pte(pteval); 331 + 332 + set_pte(ptep, pteval); 333 + } 334 + 335 + static inline void pte_clear(struct mm_struct *mm, 336 + unsigned long addr, pte_t *ptep) 337 + { 338 + set_pte_at(mm, addr, ptep, __pte(0)); 291 339 } 292 340 293 341 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+2
arch/riscv/include/asm/tlbflush.h
··· 17 17 18 18 #ifdef CONFIG_MMU 19 19 20 + #include <linux/mm_types.h> 21 + 20 22 /* Flush entire local TLB */ 21 23 static inline void local_flush_tlb_all(void) 22 24 {
+48
arch/riscv/kernel/smp.c
··· 108 108 { 109 109 send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); 110 110 } 111 + 112 + /* 113 + * Performs an icache flush for the given MM context. RISC-V has no direct 114 + * mechanism for instruction cache shoot downs, so instead we send an IPI that 115 + * informs the remote harts they need to flush their local instruction caches. 116 + * To avoid pathologically slow behavior in a common case (a bunch of 117 + * single-hart processes on a many-hart machine, ie 'make -j') we avoid the 118 + * IPIs for harts that are not currently executing a MM context and instead 119 + * schedule a deferred local instruction cache flush to be performed before 120 + * execution resumes on each hart. 121 + */ 122 + void flush_icache_mm(struct mm_struct *mm, bool local) 123 + { 124 + unsigned int cpu; 125 + cpumask_t others, *mask; 126 + 127 + preempt_disable(); 128 + 129 + /* Mark every hart's icache as needing a flush for this MM. */ 130 + mask = &mm->context.icache_stale_mask; 131 + cpumask_setall(mask); 132 + /* Flush this hart's I$ now, and mark it as flushed. */ 133 + cpu = smp_processor_id(); 134 + cpumask_clear_cpu(cpu, mask); 135 + local_flush_icache_all(); 136 + 137 + /* 138 + * Flush the I$ of other harts concurrently executing, and mark them as 139 + * flushed. 140 + */ 141 + cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); 142 + local |= cpumask_empty(&others); 143 + if (mm != current->active_mm || !local) 144 + sbi_remote_fence_i(others.bits); 145 + else { 146 + /* 147 + * It's assumed that at least one strongly ordered operation is 148 + * performed on this hart between setting a hart's cpumask bit 149 + * and scheduling this MM context on that hart. Sending an SBI 150 + * remote message will do this, but in the case where no 151 + * messages are sent we still need to order this hart's writes 152 + * with flush_icache_deferred(). 153 + */ 154 + smp_mb(); 155 + } 156 + 157 + preempt_enable(); 158 + }
+1
arch/riscv/mm/Makefile
··· 2 2 obj-y += fault.o 3 3 obj-y += extable.o 4 4 obj-y += ioremap.o 5 + obj-y += cacheflush.o
+23
arch/riscv/mm/cacheflush.c
··· 1 + /* 2 + * Copyright (C) 2017 SiFive 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation, version 2. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + */ 13 + 14 + #include <asm/pgtable.h> 15 + #include <asm/cacheflush.h> 16 + 17 + void flush_icache_pte(pte_t pte) 18 + { 19 + struct page *page = pte_page(pte); 20 + 21 + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) 22 + flush_icache_all(); 23 + }