Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: arm64/aes-ce - add 5 way interleave routines

In preparation of tweaking the accelerated AES chaining mode routines
to be able to use a 5-way stride, implement the core routines to
support processing 5 blocks of input at a time. While at it, drop
the 2 way versions, which have been unused for a while now.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Ard Biesheuvel and committed by
Herbert Xu
e2174139 d45b1714

+52 -68
+35 -23
arch/arm64/crypto/aes-ce.S
··· 52 52 load_round_keys \rounds, \temp 53 53 .endm 54 54 55 - .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 55 + .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4 56 56 aes\de \i0\().16b, \k\().16b 57 57 aes\mc \i0\().16b, \i0\().16b 58 58 .ifnb \i1 ··· 63 63 aes\mc \i2\().16b, \i2\().16b 64 64 aes\de \i3\().16b, \k\().16b 65 65 aes\mc \i3\().16b, \i3\().16b 66 + .ifnb \i4 67 + aes\de \i4\().16b, \k\().16b 68 + aes\mc \i4\().16b, \i4\().16b 69 + .endif 66 70 .endif 67 71 .endif 68 72 .endm 69 73 70 - /* up to 4 interleaved encryption rounds with the same round key */ 71 - .macro round_Nx, enc, k, i0, i1, i2, i3 74 + /* up to 5 interleaved encryption rounds with the same round key */ 75 + .macro round_Nx, enc, k, i0, i1, i2, i3, i4 72 76 .ifc \enc, e 73 - do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 77 + do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4 74 78 .else 75 - do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 79 + do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4 76 80 .endif 77 81 .endm 78 82 79 - /* up to 4 interleaved final rounds */ 80 - .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 83 + /* up to 5 interleaved final rounds */ 84 + .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4 81 85 aes\de \i0\().16b, \k\().16b 82 86 .ifnb \i1 83 87 aes\de \i1\().16b, \k\().16b 84 88 .ifnb \i3 85 89 aes\de \i2\().16b, \k\().16b 86 90 aes\de \i3\().16b, \k\().16b 91 + .ifnb \i4 92 + aes\de \i4\().16b, \k\().16b 93 + .endif 87 94 .endif 88 95 .endif 89 96 eor \i0\().16b, \i0\().16b, \k2\().16b ··· 99 92 .ifnb \i3 100 93 eor \i2\().16b, \i2\().16b, \k2\().16b 101 94 eor \i3\().16b, \i3\().16b, \k2\().16b 95 + .ifnb \i4 96 + eor \i4\().16b, \i4\().16b, \k2\().16b 97 + .endif 102 98 .endif 103 99 .endif 104 100 .endm 105 101 106 - /* up to 4 interleaved blocks */ 107 - .macro do_block_Nx, enc, rounds, i0, i1, i2, i3 102 + /* up to 5 interleaved blocks */ 103 + .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4 108 104 cmp \rounds, #12 109 105 blo 2222f /* 128 bits */ 110 106 beq 1111f /* 192 bits */ 111 - round_Nx \enc, v17, \i0, \i1, \i2, \i3 112 - round_Nx \enc, v18, \i0, \i1, \i2, \i3 113 - 1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 114 - round_Nx \enc, v20, \i0, \i1, \i2, \i3 107 + round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4 108 + round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4 109 + 1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 110 + round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4 115 111 2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 116 - round_Nx \enc, \key, \i0, \i1, \i2, \i3 112 + round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4 117 113 .endr 118 - fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 114 + fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4 119 115 .endm 120 116 121 117 .macro encrypt_block, in, rounds, t0, t1, t2 122 118 do_block_Nx e, \rounds, \in 123 119 .endm 124 120 125 - .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2 126 - do_block_Nx e, \rounds, \i0, \i1 127 - .endm 128 - 129 121 .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 130 122 do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 123 + .endm 124 + 125 + .macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2 126 + do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4 131 127 .endm 132 128 133 129 .macro decrypt_block, in, rounds, t0, t1, t2 134 130 do_block_Nx d, \rounds, \in 135 131 .endm 136 132 137 - .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 138 - do_block_Nx d, \rounds, \i0, \i1 139 - .endm 140 - 141 133 .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 142 134 do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 143 135 .endm 136 + 137 + .macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2 138 + do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4 139 + .endm 140 + 141 + #define MAX_STRIDE 5 144 142 145 143 #include "aes-modes.S"
+16
arch/arm64/crypto/aes-modes.S
··· 13 13 .text 14 14 .align 4 15 15 16 + #ifndef MAX_STRIDE 17 + #define MAX_STRIDE 4 18 + #endif 19 + 16 20 aes_encrypt_block4x: 17 21 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 18 22 ret ··· 26 22 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 27 23 ret 28 24 ENDPROC(aes_decrypt_block4x) 25 + 26 + #if MAX_STRIDE == 5 27 + aes_encrypt_block5x: 28 + encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 29 + ret 30 + ENDPROC(aes_encrypt_block5x) 31 + 32 + aes_decrypt_block5x: 33 + decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 34 + ret 35 + ENDPROC(aes_decrypt_block5x) 36 + #endif 29 37 30 38 /* 31 39 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+1 -45
arch/arm64/crypto/aes-neon.S
··· 117 117 118 118 /* 119 119 * Interleaved versions: functionally equivalent to the 120 - * ones above, but applied to 2 or 4 AES states in parallel. 120 + * ones above, but applied to AES states in parallel. 121 121 */ 122 - 123 - .macro sub_bytes_2x, in0, in1 124 - sub v8.16b, \in0\().16b, v15.16b 125 - tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 126 - sub v9.16b, \in1\().16b, v15.16b 127 - tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 128 - sub v10.16b, v8.16b, v15.16b 129 - tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 130 - sub v11.16b, v9.16b, v15.16b 131 - tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 132 - sub v8.16b, v10.16b, v15.16b 133 - tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b 134 - sub v9.16b, v11.16b, v15.16b 135 - tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b 136 - tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 137 - tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 138 - .endm 139 122 140 123 .macro sub_bytes_4x, in0, in1, in2, in3 141 124 sub v8.16b, \in0\().16b, v15.16b ··· 198 215 eor \in1\().16b, \in1\().16b, v11.16b 199 216 .endm 200 217 201 - .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i 202 - ld1 {v15.4s}, [\rk] 203 - add \rkp, \rk, #16 204 - mov \i, \rounds 205 - 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 206 - eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 207 - movi v15.16b, #0x40 208 - tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 209 - tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 210 - sub_bytes_2x \in0, \in1 211 - subs \i, \i, #1 212 - ld1 {v15.4s}, [\rkp], #16 213 - beq 2222f 214 - mix_columns_2x \in0, \in1, \enc 215 - b 1111b 216 - 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 217 - eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 218 - .endm 219 - 220 218 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 221 219 ld1 {v15.4s}, [\rk] 222 220 add \rkp, \rk, #16 ··· 222 258 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 223 259 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 224 260 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 225 - .endm 226 - 227 - .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i 228 - do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i 229 - .endm 230 - 231 - .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i 232 - do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i 233 261 .endm 234 262 235 263 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i