Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: mm: implement LoUIS API for cache maintenance ops

ARM v7 architecture introduced the concept of cache levels and related
control registers. New processors like A7 and A15 embed an L2 unified cache
controller that becomes part of the cache level hierarchy. Some operations in
the kernel like cpu_suspend and __cpu_disable do not require a flush of the
entire cache hierarchy to DRAM but just the cache levels belonging to the
Level of Unification Inner Shareable (LoUIS), which in most of ARM v7 systems
correspond to L1.

The current cache flushing API used in cpu_suspend and __cpu_disable,
flush_cache_all(), ends up flushing the whole cache hierarchy since for
v7 it cleans and invalidates all cache levels up to Level of Coherency
(LoC) which cripples system performance when used in hot paths like hotplug
and cpuidle.

Therefore a new kernel cache maintenance API must be added to cope with
latest ARM system requirements.

This patch adds flush_cache_louis() to the ARM kernel cache maintenance API.

This function cleans and invalidates all data cache levels up to the
Level of Unification Inner Shareable (LoUIS) and invalidates the instruction
cache for processors that support it (> v7).

This patch also creates an alias of the cache LoUIS function to flush_kern_all
for all processor versions prior to v7, so that the current cache flushing
behaviour is unchanged for those processors.

v7 cache maintenance code implements a cache LoUIS function that cleans and
invalidates the D-cache up to LoUIS and invalidates the I-cache, according
to the new API.

Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Tested-by: Shawn Guo <shawn.guo@linaro.org>

+113
+15
arch/arm/include/asm/cacheflush.h
··· 49 49 * 50 50 * Unconditionally clean and invalidate the entire cache. 51 51 * 52 + * flush_kern_louis() 53 + * 54 + * Flush data cache levels up to the level of unification 55 + * inner shareable and invalidate the I-cache. 56 + * Only needed from v7 onwards, falls back to flush_cache_all() 57 + * for all other processor versions. 58 + * 52 59 * flush_user_all() 53 60 * 54 61 * Clean and invalidate all user space cache entries ··· 104 97 struct cpu_cache_fns { 105 98 void (*flush_icache_all)(void); 106 99 void (*flush_kern_all)(void); 100 + void (*flush_kern_louis)(void); 107 101 void (*flush_user_all)(void); 108 102 void (*flush_user_range)(unsigned long, unsigned long, unsigned int); 109 103 ··· 127 119 128 120 #define __cpuc_flush_icache_all cpu_cache.flush_icache_all 129 121 #define __cpuc_flush_kern_all cpu_cache.flush_kern_all 122 + #define __cpuc_flush_kern_louis cpu_cache.flush_kern_louis 130 123 #define __cpuc_flush_user_all cpu_cache.flush_user_all 131 124 #define __cpuc_flush_user_range cpu_cache.flush_user_range 132 125 #define __cpuc_coherent_kern_range cpu_cache.coherent_kern_range ··· 148 139 149 140 extern void __cpuc_flush_icache_all(void); 150 141 extern void __cpuc_flush_kern_all(void); 142 + extern void __cpuc_flush_kern_louis(void); 151 143 extern void __cpuc_flush_user_all(void); 152 144 extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int); 153 145 extern void __cpuc_coherent_kern_range(unsigned long, unsigned long); ··· 213 203 { 214 204 __flush_icache_preferred(); 215 205 } 206 + 207 + /* 208 + * Flush caches up to Level of Unification Inner Shareable 209 + */ 210 + #define flush_cache_louis() __cpuc_flush_kern_louis() 216 211 217 212 #define flush_cache_all() __cpuc_flush_kern_all() 218 213
+1
arch/arm/include/asm/glue-cache.h
··· 132 132 #ifndef MULTI_CACHE 133 133 #define __cpuc_flush_icache_all __glue(_CACHE,_flush_icache_all) 134 134 #define __cpuc_flush_kern_all __glue(_CACHE,_flush_kern_cache_all) 135 + #define __cpuc_flush_kern_louis __glue(_CACHE,_flush_kern_cache_louis) 135 136 #define __cpuc_flush_user_all __glue(_CACHE,_flush_user_cache_all) 136 137 #define __cpuc_flush_user_range __glue(_CACHE,_flush_user_cache_range) 137 138 #define __cpuc_coherent_kern_range __glue(_CACHE,_coherent_kern_range)
+3
arch/arm/mm/cache-fa.S
··· 240 240 mov pc, lr 241 241 ENDPROC(fa_dma_unmap_area) 242 242 243 + .globl fa_flush_kern_cache_louis 244 + .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all 245 + 243 246 __INITDATA 244 247 245 248 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+3
arch/arm/mm/cache-v3.S
··· 128 128 ENDPROC(v3_dma_unmap_area) 129 129 ENDPROC(v3_dma_map_area) 130 130 131 + .globl v3_flush_kern_cache_louis 132 + .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all 133 + 131 134 __INITDATA 132 135 133 136 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+3
arch/arm/mm/cache-v4.S
··· 140 140 ENDPROC(v4_dma_unmap_area) 141 141 ENDPROC(v4_dma_map_area) 142 142 143 + .globl v4_flush_kern_cache_louis 144 + .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all 145 + 143 146 __INITDATA 144 147 145 148 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+3
arch/arm/mm/cache-v4wb.S
··· 251 251 mov pc, lr 252 252 ENDPROC(v4wb_dma_unmap_area) 253 253 254 + .globl v4wb_flush_kern_cache_louis 255 + .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all 256 + 254 257 __INITDATA 255 258 256 259 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+3
arch/arm/mm/cache-v4wt.S
··· 196 196 ENDPROC(v4wt_dma_unmap_area) 197 197 ENDPROC(v4wt_dma_map_area) 198 198 199 + .globl v4wt_flush_kern_cache_louis 200 + .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all 201 + 199 202 __INITDATA 200 203 201 204 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+3
arch/arm/mm/cache-v6.S
··· 326 326 mov pc, lr 327 327 ENDPROC(v6_dma_unmap_area) 328 328 329 + .globl v6_flush_kern_cache_louis 330 + .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all 331 + 329 332 __INITDATA 330 333 331 334 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+36
arch/arm/mm/cache-v7.S
··· 33 33 mov pc, lr 34 34 ENDPROC(v7_flush_icache_all) 35 35 36 + /* 37 + * v7_flush_dcache_louis() 38 + * 39 + * Flush the D-cache up to the Level of Unification Inner Shareable 40 + * 41 + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) 42 + */ 43 + 44 + ENTRY(v7_flush_dcache_louis) 45 + dmb @ ensure ordering with previous memory accesses 46 + mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr 47 + ands r3, r0, #0xe00000 @ extract LoUIS from clidr 48 + mov r3, r3, lsr #20 @ r3 = LoUIS * 2 49 + moveq pc, lr @ return if level == 0 50 + mov r10, #0 @ r10 (starting level) = 0 51 + b loop1 @ start flushing cache levels 52 + ENDPROC(v7_flush_dcache_louis) 53 + 36 54 /* 37 55 * v7_flush_dcache_all() 38 56 * ··· 137 119 THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 138 120 mov pc, lr 139 121 ENDPROC(v7_flush_kern_cache_all) 122 + 123 + /* 124 + * v7_flush_kern_cache_louis(void) 125 + * 126 + * Flush the data cache up to Level of Unification Inner Shareable. 127 + * Invalidate the I-cache to the point of unification. 128 + */ 129 + ENTRY(v7_flush_kern_cache_louis) 130 + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) 131 + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) 132 + bl v7_flush_dcache_louis 133 + mov r0, #0 134 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 135 + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 136 + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) 137 + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 138 + mov pc, lr 139 + ENDPROC(v7_flush_kern_cache_louis) 140 140 141 141 /* 142 142 * v7_flush_cache_all()
+3
arch/arm/mm/proc-arm1020.S
··· 368 368 mov pc, lr 369 369 ENDPROC(arm1020_dma_unmap_area) 370 370 371 + .globl arm1020_flush_kern_cache_louis 372 + .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all 373 + 371 374 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 372 375 define_cache_functions arm1020 373 376
+3
arch/arm/mm/proc-arm1020e.S
··· 354 354 mov pc, lr 355 355 ENDPROC(arm1020e_dma_unmap_area) 356 356 357 + .globl arm1020e_flush_kern_cache_louis 358 + .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all 359 + 357 360 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 358 361 define_cache_functions arm1020e 359 362
+3
arch/arm/mm/proc-arm1022.S
··· 343 343 mov pc, lr 344 344 ENDPROC(arm1022_dma_unmap_area) 345 345 346 + .globl arm1022_flush_kern_cache_louis 347 + .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all 348 + 346 349 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 347 350 define_cache_functions arm1022 348 351
+3
arch/arm/mm/proc-arm1026.S
··· 337 337 mov pc, lr 338 338 ENDPROC(arm1026_dma_unmap_area) 339 339 340 + .globl arm1026_flush_kern_cache_louis 341 + .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all 342 + 340 343 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 341 344 define_cache_functions arm1026 342 345
+3
arch/arm/mm/proc-arm920.S
··· 319 319 mov pc, lr 320 320 ENDPROC(arm920_dma_unmap_area) 321 321 322 + .globl arm920_flush_kern_cache_louis 323 + .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all 324 + 322 325 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 323 326 define_cache_functions arm920 324 327 #endif
+3
arch/arm/mm/proc-arm922.S
··· 321 321 mov pc, lr 322 322 ENDPROC(arm922_dma_unmap_area) 323 323 324 + .globl arm922_flush_kern_cache_louis 325 + .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all 326 + 324 327 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 325 328 define_cache_functions arm922 326 329 #endif
+3
arch/arm/mm/proc-arm925.S
··· 376 376 mov pc, lr 377 377 ENDPROC(arm925_dma_unmap_area) 378 378 379 + .globl arm925_flush_kern_cache_louis 380 + .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all 381 + 379 382 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 380 383 define_cache_functions arm925 381 384
+3
arch/arm/mm/proc-arm926.S
··· 339 339 mov pc, lr 340 340 ENDPROC(arm926_dma_unmap_area) 341 341 342 + .globl arm926_flush_kern_cache_louis 343 + .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all 344 + 342 345 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 343 346 define_cache_functions arm926 344 347
+3
arch/arm/mm/proc-arm940.S
··· 267 267 mov pc, lr 268 268 ENDPROC(arm940_dma_unmap_area) 269 269 270 + .globl arm940_flush_kern_cache_louis 271 + .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all 272 + 270 273 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 271 274 define_cache_functions arm940 272 275
+3
arch/arm/mm/proc-arm946.S
··· 310 310 mov pc, lr 311 311 ENDPROC(arm946_dma_unmap_area) 312 312 313 + .globl arm946_flush_kern_cache_louis 314 + .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all 315 + 313 316 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 314 317 define_cache_functions arm946 315 318
+3
arch/arm/mm/proc-feroceon.S
··· 415 415 mov pc, lr 416 416 ENDPROC(feroceon_dma_unmap_area) 417 417 418 + .globl feroceon_flush_kern_cache_louis 419 + .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all 420 + 418 421 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 419 422 define_cache_functions feroceon 420 423
+1
arch/arm/mm/proc-macros.S
··· 299 299 ENTRY(\name\()_cache_fns) 300 300 .long \name\()_flush_icache_all 301 301 .long \name\()_flush_kern_cache_all 302 + .long \name\()_flush_kern_cache_louis 302 303 .long \name\()_flush_user_cache_all 303 304 .long \name\()_flush_user_cache_range 304 305 .long \name\()_coherent_kern_range
+3
arch/arm/mm/proc-mohawk.S
··· 303 303 mov pc, lr 304 304 ENDPROC(mohawk_dma_unmap_area) 305 305 306 + .globl mohawk_flush_kern_cache_louis 307 + .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all 308 + 306 309 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 307 310 define_cache_functions mohawk 308 311
+3
arch/arm/mm/proc-xsc3.S
··· 337 337 mov pc, lr 338 338 ENDPROC(xsc3_dma_unmap_area) 339 339 340 + .globl xsc3_flush_kern_cache_louis 341 + .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all 342 + 340 343 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 341 344 define_cache_functions xsc3 342 345
+3
arch/arm/mm/proc-xscale.S
··· 410 410 mov pc, lr 411 411 ENDPROC(xscale_dma_unmap_area) 412 412 413 + .globl xscale_flush_kern_cache_louis 414 + .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all 415 + 413 416 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 414 417 define_cache_functions xscale 415 418