Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: cacheflush: avoid clobbering the frame pointer

Thumb2 uses R7 rather than R11 as the frame pointer, and even if we
rarely use a frame pointer to begin with when building in Thumb2 mode,
there are cases where it is required by the compiler (Clang when
inserting profiling hooks via -pg)

However, preserving and restoring the frame pointer is risky, as any
unhandled exceptions raised in the mean time will produce a bogus
backtrace, and it would be better not to touch the frame pointer at all.
This is the case even when CONFIG_FRAME_POINTER is not set, as the
unwind directive used by the unwinder may also use R7 or R11 as the
unwind anchor, even if the frame pointer is not managed strictly
according to the frame pointer ABI.

So let's tweak the cacheflush asm code not to clobber R7 or R11 at all,
so that we can drop R7 from the clobber lists of the inline asm blocks
that call these routines, and remove the code that preserves/restores
R11.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>

+23 -35
+3 -9
arch/arm/include/asm/cacheflush.h
··· 446 446 * however some exceptions may exist. Caveat emptor. 447 447 * 448 448 * - The clobber list is dictated by the call to v7_flush_dcache_*. 449 - * fp is preserved to the stack explicitly prior disabling the cache 450 - * since adding it to the clobber list is incompatible with having 451 - * CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering 452 - * trampoline are inserted by the linker and to keep sp 64-bit aligned. 453 449 */ 454 450 #define v7_exit_coherency_flush(level) \ 455 451 asm volatile( \ 456 452 ".arch armv7-a \n\t" \ 457 - "stmfd sp!, {fp, ip} \n\t" \ 458 453 "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \ 459 454 "bic r0, r0, #"__stringify(CR_C)" \n\t" \ 460 455 "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \ ··· 459 464 "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \ 460 465 "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \ 461 466 "isb \n\t" \ 462 - "dsb \n\t" \ 463 - "ldmfd sp!, {fp, ip}" \ 464 - : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ 465 - "r9","r10","lr","memory" ) 467 + "dsb" \ 468 + : : : "r0","r1","r2","r3","r4","r5","r6", \ 469 + "r9","r10","ip","lr","memory" ) 466 470 467 471 void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, 468 472 void *kaddr, unsigned long len);
+2 -4
arch/arm/mach-exynos/mcpm-exynos.c
··· 35 35 */ 36 36 #define exynos_v7_exit_coherency_flush(level) \ 37 37 asm volatile( \ 38 - "stmfd sp!, {fp, ip}\n\t"\ 39 38 "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR\n\t" \ 40 39 "bic r0, r0, #"__stringify(CR_C)"\n\t" \ 41 40 "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR\n\t" \ ··· 49 50 "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR\n\t" \ 50 51 "isb\n\t" \ 51 52 "dsb\n\t" \ 52 - "ldmfd sp!, {fp, ip}" \ 53 53 : \ 54 54 : "Ir" (pmu_base_addr + S5P_INFORM0) \ 55 - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ 56 - "r9", "r10", "lr", "memory") 55 + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", \ 56 + "r9", "r10", "ip", "lr", "memory") 57 57 58 58 static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) 59 59 {
+18 -22
arch/arm/mm/cache-v7.S
··· 90 90 * 91 91 * Flush the D-cache up to the Level of Unification Inner Shareable 92 92 * 93 - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) 93 + * Corrupted registers: r0-r6, r9-r10 94 94 */ 95 95 96 96 ENTRY(v7_flush_dcache_louis) ··· 117 117 * 118 118 * Flush the whole D-cache. 119 119 * 120 - * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) 120 + * Corrupted registers: r0-r6, r9-r10 121 121 * 122 122 * - mm - mm_struct describing address space 123 123 */ ··· 149 149 movw r4, #0x3ff 150 150 ands r4, r4, r1, lsr #3 @ find maximum number on the way size 151 151 clz r5, r4 @ find bit position of way size increment 152 - movw r7, #0x7fff 153 - ands r7, r7, r1, lsr #13 @ extract max number of the index size 152 + movw r6, #0x7fff 153 + and r1, r6, r1, lsr #13 @ extract max number of the index size 154 + mov r6, #1 155 + movne r4, r4, lsl r5 @ # of ways shifted into bits [31:...] 156 + movne r6, r6, lsl r5 @ 1 shifted left by same amount 154 157 loop1: 155 - mov r9, r7 @ create working copy of max index 158 + mov r9, r1 @ create working copy of max index 156 159 loop2: 157 - ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 158 - THUMB( lsl r6, r4, r5 ) 159 - THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 160 - ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 161 - THUMB( lsl r6, r9, r2 ) 162 - THUMB( orr r11, r11, r6 ) @ factor index number into r11 163 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way 160 + mov r5, r9, lsl r2 @ factor set number into r5 161 + orr r5, r5, r4 @ factor way number into r5 162 + orr r5, r5, r10 @ factor cache level into r5 163 + mcr p15, 0, r5, c7, c14, 2 @ clean & invalidate by set/way 164 164 subs r9, r9, #1 @ decrement the index 165 165 bge loop2 166 - subs r4, r4, #1 @ decrement the way 167 - bge loop1 166 + subs r4, r4, r6 @ decrement the way 167 + bcs loop1 168 168 skip: 169 169 add r10, r10, #2 @ increment cache number 170 170 cmp r3, r10 ··· 192 192 * 193 193 */ 194 194 ENTRY(v7_flush_kern_cache_all) 195 - ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) 196 - THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) 195 + stmfd sp!, {r4-r6, r9-r10, lr} 197 196 bl v7_flush_dcache_all 198 197 mov r0, #0 199 198 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 200 199 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 201 - ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) 202 - THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 200 + ldmfd sp!, {r4-r6, r9-r10, lr} 203 201 ret lr 204 202 ENDPROC(v7_flush_kern_cache_all) 205 203 ··· 208 210 * Invalidate the I-cache to the point of unification. 209 211 */ 210 212 ENTRY(v7_flush_kern_cache_louis) 211 - ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) 212 - THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) 213 + stmfd sp!, {r4-r6, r9-r10, lr} 213 214 bl v7_flush_dcache_louis 214 215 mov r0, #0 215 216 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 216 217 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 217 - ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) 218 - THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) 218 + ldmfd sp!, {r4-r6, r9-r10, lr} 219 219 ret lr 220 220 ENDPROC(v7_flush_kern_cache_louis) 221 221