[PATCH] Optimize D-cache alias handling on fork

Virtually index, physically tagged cache architectures can get away
without cache flushing when forking. This patch adds a new cache
flushing function flush_cache_dup_mm(struct mm_struct *) which for the
moment I've implemented to do the same thing on all architectures
except on MIPS where it's a no-op.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Ralf Baechle and committed by Linus Torvalds ec8c0446 bcd02280

+54 -7
+17 -6
Documentation/cachetlb.txt
··· 179 179 lines associated with 'mm'. 180 180 181 181 This interface is used to handle whole address space 182 - page table operations such as what happens during 183 - fork, exit, and exec. 182 + page table operations such as what happens during exit and exec. 184 183 185 - 2) void flush_cache_range(struct vm_area_struct *vma, 184 + 2) void flush_cache_dup_mm(struct mm_struct *mm) 185 + 186 + This interface flushes an entire user address space from 187 + the caches. That is, after running, there will be no cache 188 + lines associated with 'mm'. 189 + 190 + This interface is used to handle whole address space 191 + page table operations such as what happens during fork. 192 + 193 + This option is separate from flush_cache_mm to allow some 194 + optimizations for VIPT caches. 195 + 196 + 3) void flush_cache_range(struct vm_area_struct *vma, 186 197 unsigned long start, unsigned long end) 187 198 188 199 Here we are flushing a specific range of (user) virtual ··· 210 199 call flush_cache_page (see below) for each entry which may be 211 200 modified. 212 201 213 - 3) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) 202 + 4) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) 214 203 215 204 This time we need to remove a PAGE_SIZE sized range 216 205 from the cache. The 'vma' is the backing structure used by ··· 231 220 232 221 This is used primarily during fault processing. 233 222 234 - 4) void flush_cache_kmaps(void) 223 + 5) void flush_cache_kmaps(void) 235 224 236 225 This routine need only be implemented if the platform utilizes 237 226 highmem. It will be called right before all of the kmaps ··· 243 232 244 233 This routing should be implemented in asm/highmem.h 245 234 246 - 5) void flush_cache_vmap(unsigned long start, unsigned long end) 235 + 6) void flush_cache_vmap(unsigned long start, unsigned long end) 247 236 void flush_cache_vunmap(unsigned long start, unsigned long end) 248 237 249 238 Here in these two interfaces we are flushing a specific range
+1
include/asm-alpha/cacheflush.h
··· 6 6 /* Caches aren't brain-dead on the Alpha. */ 7 7 #define flush_cache_all() do { } while (0) 8 8 #define flush_cache_mm(mm) do { } while (0) 9 + #define flush_cache_dup_mm(mm) do { } while (0) 9 10 #define flush_cache_range(vma, start, end) do { } while (0) 10 11 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 11 12 #define flush_dcache_page(page) do { } while (0)
+2
include/asm-arm/cacheflush.h
··· 319 319 unsigned long len, int write); 320 320 #endif 321 321 322 + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) 323 + 322 324 /* 323 325 * flush_cache_user_range is used when we want to ensure that the 324 326 * Harvard caches are synchronised for the user space address range.
+1
include/asm-arm26/cacheflush.h
··· 22 22 23 23 #define flush_cache_all() do { } while (0) 24 24 #define flush_cache_mm(mm) do { } while (0) 25 + #define flush_cache_dup_mm(mm) do { } while (0) 25 26 #define flush_cache_range(vma,start,end) do { } while (0) 26 27 #define flush_cache_page(vma,vmaddr,pfn) do { } while (0) 27 28 #define flush_cache_vmap(start, end) do { } while (0)
+1
include/asm-avr32/cacheflush.h
··· 87 87 */ 88 88 #define flush_cache_all() do { } while (0) 89 89 #define flush_cache_mm(mm) do { } while (0) 90 + #define flush_cache_dup_mm(mm) do { } while (0) 90 91 #define flush_cache_range(vma, start, end) do { } while (0) 91 92 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 92 93 #define flush_cache_vmap(start, end) do { } while (0)
+1
include/asm-cris/cacheflush.h
··· 9 9 */ 10 10 #define flush_cache_all() do { } while (0) 11 11 #define flush_cache_mm(mm) do { } while (0) 12 + #define flush_cache_dup_mm(mm) do { } while (0) 12 13 #define flush_cache_range(vma, start, end) do { } while (0) 13 14 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 14 15 #define flush_dcache_page(page) do { } while (0)
+1
include/asm-frv/cacheflush.h
··· 20 20 */ 21 21 #define flush_cache_all() do {} while(0) 22 22 #define flush_cache_mm(mm) do {} while(0) 23 + #define flush_cache_dup_mm(mm) do {} while(0) 23 24 #define flush_cache_range(mm, start, end) do {} while(0) 24 25 #define flush_cache_page(vma, vmaddr, pfn) do {} while(0) 25 26 #define flush_cache_vmap(start, end) do {} while(0)
+1
include/asm-h8300/cacheflush.h
··· 12 12 13 13 #define flush_cache_all() 14 14 #define flush_cache_mm(mm) 15 + #define flush_cache_dup_mm(mm) do { } while (0) 15 16 #define flush_cache_range(vma,a,b) 16 17 #define flush_cache_page(vma,p,pfn) 17 18 #define flush_dcache_page(page)
+1
include/asm-i386/cacheflush.h
··· 7 7 /* Caches aren't brain-dead on the intel. */ 8 8 #define flush_cache_all() do { } while (0) 9 9 #define flush_cache_mm(mm) do { } while (0) 10 + #define flush_cache_dup_mm(mm) do { } while (0) 10 11 #define flush_cache_range(vma, start, end) do { } while (0) 11 12 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 12 13 #define flush_dcache_page(page) do { } while (0)
+1
include/asm-ia64/cacheflush.h
··· 18 18 19 19 #define flush_cache_all() do { } while (0) 20 20 #define flush_cache_mm(mm) do { } while (0) 21 + #define flush_cache_dup_mm(mm) do { } while (0) 21 22 #define flush_cache_range(vma, start, end) do { } while (0) 22 23 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 23 24 #define flush_icache_page(vma,page) do { } while (0)
+3
include/asm-m32r/cacheflush.h
··· 9 9 #if defined(CONFIG_CHIP_M32700) || defined(CONFIG_CHIP_OPSP) || defined(CONFIG_CHIP_M32104) 10 10 #define flush_cache_all() do { } while (0) 11 11 #define flush_cache_mm(mm) do { } while (0) 12 + #define flush_cache_dup_mm(mm) do { } while (0) 12 13 #define flush_cache_range(vma, start, end) do { } while (0) 13 14 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 14 15 #define flush_dcache_page(page) do { } while (0) ··· 30 29 #elif defined(CONFIG_CHIP_M32102) 31 30 #define flush_cache_all() do { } while (0) 32 31 #define flush_cache_mm(mm) do { } while (0) 32 + #define flush_cache_dup_mm(mm) do { } while (0) 33 33 #define flush_cache_range(vma, start, end) do { } while (0) 34 34 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 35 35 #define flush_dcache_page(page) do { } while (0) ··· 43 41 #else 44 42 #define flush_cache_all() do { } while (0) 45 43 #define flush_cache_mm(mm) do { } while (0) 44 + #define flush_cache_dup_mm(mm) do { } while (0) 46 45 #define flush_cache_range(vma, start, end) do { } while (0) 47 46 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 48 47 #define flush_dcache_page(page) do { } while (0)
+2
include/asm-m68k/cacheflush.h
··· 89 89 __flush_cache_030(); 90 90 } 91 91 92 + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) 93 + 92 94 /* flush_cache_range/flush_cache_page must be macros to avoid 93 95 a dependency on linux/mm.h, which includes this file... */ 94 96 static inline void flush_cache_range(struct vm_area_struct *vma,
+1
include/asm-m68knommu/cacheflush.h
··· 8 8 9 9 #define flush_cache_all() __flush_cache_all() 10 10 #define flush_cache_mm(mm) do { } while (0) 11 + #define flush_cache_dup_mm(mm) do { } while (0) 11 12 #define flush_cache_range(vma, start, end) __flush_cache_all() 12 13 #define flush_cache_page(vma, vmaddr) do { } while (0) 13 14 #define flush_dcache_range(start,len) __flush_cache_all()
+2
include/asm-mips/cacheflush.h
··· 17 17 * 18 18 * - flush_cache_all() flushes entire cache 19 19 * - flush_cache_mm(mm) flushes the specified mm context's cache lines 20 + * - flush_cache_dup mm(mm) handles cache flushing when forking 20 21 * - flush_cache_page(mm, vmaddr, pfn) flushes a single page 21 22 * - flush_cache_range(vma, start, end) flushes a range of pages 22 23 * - flush_icache_range(start, end) flush a range of instructions ··· 32 31 extern void (*flush_cache_all)(void); 33 32 extern void (*__flush_cache_all)(void); 34 33 extern void (*flush_cache_mm)(struct mm_struct *mm); 34 + #define flush_cache_dup_mm(mm) do { (void) (mm); } while (0) 35 35 extern void (*flush_cache_range)(struct vm_area_struct *vma, 36 36 unsigned long start, unsigned long end); 37 37 extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
+2
include/asm-parisc/cacheflush.h
··· 15 15 #define flush_cache_mm(mm) flush_cache_all_local() 16 16 #endif 17 17 18 + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) 19 + 18 20 #define flush_kernel_dcache_range(start,size) \ 19 21 flush_kernel_dcache_range_asm((start), (start)+(size)); 20 22
+1
include/asm-powerpc/cacheflush.h
··· 18 18 */ 19 19 #define flush_cache_all() do { } while (0) 20 20 #define flush_cache_mm(mm) do { } while (0) 21 + #define flush_cache_dup_mm(mm) do { } while (0) 21 22 #define flush_cache_range(vma, start, end) do { } while (0) 22 23 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 23 24 #define flush_icache_page(vma, page) do { } while (0)
+1
include/asm-s390/cacheflush.h
··· 7 7 /* Caches aren't brain-dead on the s390. */ 8 8 #define flush_cache_all() do { } while (0) 9 9 #define flush_cache_mm(mm) do { } while (0) 10 + #define flush_cache_dup_mm(mm) do { } while (0) 10 11 #define flush_cache_range(vma, start, end) do { } while (0) 11 12 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 12 13 #define flush_dcache_page(page) do { } while (0)
+2
include/asm-sh/cpu-sh2/cacheflush.h
··· 15 15 * 16 16 * - flush_cache_all() flushes entire cache 17 17 * - flush_cache_mm(mm) flushes the specified mm context's cache lines 18 + * - flush_cache_dup mm(mm) handles cache flushing when forking 18 19 * - flush_cache_page(mm, vmaddr, pfn) flushes a single page 19 20 * - flush_cache_range(vma, start, end) flushes a range of pages 20 21 * ··· 28 27 */ 29 28 #define flush_cache_all() do { } while (0) 30 29 #define flush_cache_mm(mm) do { } while (0) 30 + #define flush_cache_dup_mm(mm) do { } while (0) 31 31 #define flush_cache_range(vma, start, end) do { } while (0) 32 32 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 33 33 #define flush_dcache_page(page) do { } while (0)
+3
include/asm-sh/cpu-sh3/cacheflush.h
··· 15 15 * 16 16 * - flush_cache_all() flushes entire cache 17 17 * - flush_cache_mm(mm) flushes the specified mm context's cache lines 18 + * - flush_cache_dup mm(mm) handles cache flushing when forking 18 19 * - flush_cache_page(mm, vmaddr, pfn) flushes a single page 19 20 * - flush_cache_range(vma, start, end) flushes a range of pages 20 21 * ··· 40 39 41 40 void flush_cache_all(void); 42 41 void flush_cache_mm(struct mm_struct *mm); 42 + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) 43 43 void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 44 44 unsigned long end); 45 45 void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); ··· 50 48 #else 51 49 #define flush_cache_all() do { } while (0) 52 50 #define flush_cache_mm(mm) do { } while (0) 51 + #define flush_cache_dup_mm(mm) do { } while (0) 53 52 #define flush_cache_range(vma, start, end) do { } while (0) 54 53 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 55 54 #define flush_dcache_page(page) do { } while (0)
+1
include/asm-sh/cpu-sh4/cacheflush.h
··· 18 18 */ 19 19 void flush_cache_all(void); 20 20 void flush_cache_mm(struct mm_struct *mm); 21 + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) 21 22 void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 22 23 unsigned long end); 23 24 void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
+2
include/asm-sh64/cacheflush.h
··· 21 21 struct page *page, unsigned long addr, 22 22 int len); 23 23 24 + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) 25 + 24 26 #define flush_dcache_mmap_lock(mapping) do { } while (0) 25 27 #define flush_dcache_mmap_unlock(mapping) do { } while (0) 26 28
+1
include/asm-sparc/cacheflush.h
··· 48 48 49 49 #define flush_cache_all() BTFIXUP_CALL(flush_cache_all)() 50 50 #define flush_cache_mm(mm) BTFIXUP_CALL(flush_cache_mm)(mm) 51 + #define flush_cache_dup_mm(mm) BTFIXUP_CALL(flush_cache_mm)(mm) 51 52 #define flush_cache_range(vma,start,end) BTFIXUP_CALL(flush_cache_range)(vma,start,end) 52 53 #define flush_cache_page(vma,addr,pfn) BTFIXUP_CALL(flush_cache_page)(vma,addr) 53 54 #define flush_icache_range(start, end) do { } while (0)
+1
include/asm-sparc64/cacheflush.h
··· 12 12 /* These are the same regardless of whether this is an SMP kernel or not. */ 13 13 #define flush_cache_mm(__mm) \ 14 14 do { if ((__mm) == current->mm) flushw_user(); } while(0) 15 + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) 15 16 #define flush_cache_range(vma, start, end) \ 16 17 flush_cache_mm((vma)->vm_mm) 17 18 #define flush_cache_page(vma, page, pfn) \
+1
include/asm-v850/cacheflush.h
··· 24 24 systems with MMUs, so we don't need them. */ 25 25 #define flush_cache_all() ((void)0) 26 26 #define flush_cache_mm(mm) ((void)0) 27 + #define flush_cache_dup_mm(mm) ((void)0) 27 28 #define flush_cache_range(vma, start, end) ((void)0) 28 29 #define flush_cache_page(vma, vmaddr, pfn) ((void)0) 29 30 #define flush_dcache_page(page) ((void)0)
+1
include/asm-x86_64/cacheflush.h
··· 7 7 /* Caches aren't brain-dead on the intel. */ 8 8 #define flush_cache_all() do { } while (0) 9 9 #define flush_cache_mm(mm) do { } while (0) 10 + #define flush_cache_dup_mm(mm) do { } while (0) 10 11 #define flush_cache_range(vma, start, end) do { } while (0) 11 12 #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) 12 13 #define flush_dcache_page(page) do { } while (0)
+2
include/asm-xtensa/cacheflush.h
··· 75 75 76 76 #define flush_cache_all() __flush_invalidate_cache_all(); 77 77 #define flush_cache_mm(mm) __flush_invalidate_cache_all(); 78 + #define flush_cache_dup_mm(mm) __flush_invalidate_cache_all(); 78 79 79 80 #define flush_cache_vmap(start,end) __flush_invalidate_cache_all(); 80 81 #define flush_cache_vunmap(start,end) __flush_invalidate_cache_all(); ··· 89 88 90 89 #define flush_cache_all() do { } while (0) 91 90 #define flush_cache_mm(mm) do { } while (0) 91 + #define flush_cache_dup_mm(mm) do { } while (0) 92 92 93 93 #define flush_cache_vmap(start,end) do { } while (0) 94 94 #define flush_cache_vunmap(start,end) do { } while (0)
+1 -1
kernel/fork.c
··· 203 203 struct mempolicy *pol; 204 204 205 205 down_write(&oldmm->mmap_sem); 206 - flush_cache_mm(oldmm); 206 + flush_cache_dup_mm(oldmm); 207 207 /* 208 208 * Not linked in yet - no deadlock potential: 209 209 */