Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: x86/aes - Don't use %rbp as temporary register

When using the "aes-asm" implementation of AES (*not* the AES-NI
implementation) on an x86_64, v4.12-rc1 kernel with lockdep enabled, the
following warning was reported, along with a long unwinder dump:

WARNING: kernel stack regs at ffffc90000643558 in kworker/u4:2:155 has bad 'bp' value 000000000000001c

The problem is that aes_enc_block() and aes_dec_block() use %rbp as a
temporary register, which breaks stack traces if an interrupt occurs.

Fix this by replacing %rbp with %r9, which was being used to hold the
saved value of %rbp. This required rearranging the AES round macro
slightly since %r9d cannot be used as the target of a move from %ah-%dh.

Performance is essentially unchanged --- actually about 0.2% faster than
before. Interestingly, I also measured aes-generic as being nearly 7%
faster than aes-asm, so perhaps aes-asm has outlived its usefulness...

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Eric Biggers and committed by
Herbert Xu
9417cd1c 248c6505

+22 -25
+22 -25
arch/x86/crypto/aes-x86_64-asm_64.S
··· 42 42 #define R5E %esi 43 43 #define R6 %rdi 44 44 #define R6E %edi 45 - #define R7 %rbp 46 - #define R7E %ebp 45 + #define R7 %r9 /* don't use %rbp; it breaks stack traces */ 46 + #define R7E %r9d 47 47 #define R8 %r8 48 - #define R9 %r9 49 48 #define R10 %r10 50 49 #define R11 %r11 51 50 52 - #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ 51 + #define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \ 53 52 ENTRY(FUNC); \ 54 53 movq r1,r2; \ 55 - movq r3,r4; \ 56 54 leaq KEY+48(r8),r9; \ 57 55 movq r10,r11; \ 58 56 movl (r7),r5 ## E; \ ··· 68 70 je B192; \ 69 71 leaq 32(r9),r9; 70 72 71 - #define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ 73 + #define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \ 72 74 movq r1,r2; \ 73 - movq r3,r4; \ 74 75 movl r5 ## E,(r9); \ 75 76 movl r6 ## E,4(r9); \ 76 77 movl r7 ## E,8(r9); \ ··· 85 88 movl TAB(,r6,4),r6 ## E; \ 86 89 roll $16,r2 ## E; \ 87 90 shrl $16,r4 ## E; \ 88 - movzbl r4 ## H,r7 ## E; \ 89 - movzbl r4 ## L,r4 ## E; \ 91 + movzbl r4 ## L,r7 ## E; \ 92 + movzbl r4 ## H,r4 ## E; \ 90 93 xorl OFFSET(r8),ra ## E; \ 91 94 xorl OFFSET+4(r8),rb ## E; \ 92 - xorl TAB+3072(,r7,4),r5 ## E;\ 93 - xorl TAB+2048(,r4,4),r6 ## E;\ 95 + xorl TAB+3072(,r4,4),r5 ## E;\ 96 + xorl TAB+2048(,r7,4),r6 ## E;\ 94 97 movzbl r1 ## L,r7 ## E; \ 95 98 movzbl r1 ## H,r4 ## E; \ 96 99 movl TAB+1024(,r4,4),r4 ## E;\ ··· 98 101 roll $16,r1 ## E; \ 99 102 shrl $16,r3 ## E; \ 100 103 xorl TAB(,r7,4),r5 ## E; \ 101 - movzbl r3 ## H,r7 ## E; \ 102 - movzbl r3 ## L,r3 ## E; \ 103 - xorl TAB+3072(,r7,4),r4 ## E;\ 104 - xorl TAB+2048(,r3,4),r5 ## E;\ 105 - movzbl r1 ## H,r7 ## E; \ 106 - movzbl r1 ## L,r3 ## E; \ 104 + movzbl r3 ## L,r7 ## E; \ 105 + movzbl r3 ## H,r3 ## E; \ 106 + xorl TAB+3072(,r3,4),r4 ## E;\ 107 + xorl TAB+2048(,r7,4),r5 ## E;\ 108 + movzbl r1 ## L,r7 ## E; \ 109 + movzbl r1 ## H,r3 ## E; \ 107 110 shrl $16,r1 ## E; \ 108 - xorl TAB+3072(,r7,4),r6 ## E;\ 109 - movl TAB+2048(,r3,4),r3 ## E;\ 110 - movzbl r1 ## H,r7 ## E; \ 111 - movzbl r1 ## L,r1 ## E; \ 112 - xorl TAB+1024(,r7,4),r6 ## E;\ 113 - xorl TAB(,r1,4),r3 ## E; \ 111 + xorl TAB+3072(,r3,4),r6 ## E;\ 112 + movl TAB+2048(,r7,4),r3 ## E;\ 113 + movzbl r1 ## L,r7 ## E; \ 114 + movzbl r1 ## H,r1 ## E; \ 115 + xorl TAB+1024(,r1,4),r6 ## E;\ 116 + xorl TAB(,r7,4),r3 ## E; \ 114 117 movzbl r2 ## H,r1 ## E; \ 115 118 movzbl r2 ## L,r7 ## E; \ 116 119 shrl $16,r2 ## E; \ ··· 128 131 movl r4 ## E,r2 ## E; 129 132 130 133 #define entry(FUNC,KEY,B128,B192) \ 131 - prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) 134 + prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11) 132 135 133 - #define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) 136 + #define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11) 134 137 135 138 #define encrypt_round(TAB,OFFSET) \ 136 139 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \