Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: arm64/aes-ccm - Reuse existing MAC update for AAD input

CCM combines the counter (CTR) encryption mode with a MAC based on the
same block cipher. This MAC construction is a bit clunky: it invokes the
block cipher in a way that cannot be parallelized, resulting in poor CPU
pipeline efficiency.

The arm64 CCM code mitigates this by interleaving the encryption and MAC
at the AES round level, resulting in a substantial speedup. But this
approach does not apply to the additional authenticated data (AAD) which
is not encrypted.

This means the special asm routine dealing with the AAD is not any
better than the MAC update routine used by the arm64 AES block
encryption driver, so let's reuse that, and drop the special AES-CCM
version.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Ard Biesheuvel and committed by
Herbert Xu
948ffc66 c131098d

+43 -79
+1
arch/arm64/crypto/Kconfig
··· 268 268 depends on ARM64 && KERNEL_MODE_NEON 269 269 select CRYPTO_ALGAPI 270 270 select CRYPTO_AES_ARM64_CE 271 + select CRYPTO_AES_ARM64_CE_BLK 271 272 select CRYPTO_AEAD 272 273 select CRYPTO_LIB_AES 273 274 help
-71
arch/arm64/crypto/aes-ce-ccm-core.S
··· 15 15 .arch armv8-a+crypto 16 16 17 17 /* 18 - * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 19 - * u32 macp, u8 const rk[], u32 rounds); 20 - */ 21 - SYM_FUNC_START(ce_aes_ccm_auth_data) 22 - ld1 {v0.16b}, [x0] /* load mac */ 23 - cbz w3, 1f 24 - sub w3, w3, #16 25 - eor v1.16b, v1.16b, v1.16b 26 - 0: ldrb w7, [x1], #1 /* get 1 byte of input */ 27 - subs w2, w2, #1 28 - add w3, w3, #1 29 - ins v1.b[0], w7 30 - ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 31 - beq 8f /* out of input? */ 32 - cbnz w3, 0b 33 - eor v0.16b, v0.16b, v1.16b 34 - 1: ld1 {v3.4s}, [x4] /* load first round key */ 35 - prfm pldl1strm, [x1] 36 - cmp w5, #12 /* which key size? */ 37 - add x6, x4, #16 38 - sub w7, w5, #2 /* modified # of rounds */ 39 - bmi 2f 40 - bne 5f 41 - mov v5.16b, v3.16b 42 - b 4f 43 - 2: mov v4.16b, v3.16b 44 - ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ 45 - 3: aese v0.16b, v4.16b 46 - aesmc v0.16b, v0.16b 47 - 4: ld1 {v3.4s}, [x6], #16 /* load next round key */ 48 - aese v0.16b, v5.16b 49 - aesmc v0.16b, v0.16b 50 - 5: ld1 {v4.4s}, [x6], #16 /* load next round key */ 51 - subs w7, w7, #3 52 - aese v0.16b, v3.16b 53 - aesmc v0.16b, v0.16b 54 - ld1 {v5.4s}, [x6], #16 /* load next round key */ 55 - bpl 3b 56 - aese v0.16b, v4.16b 57 - subs w2, w2, #16 /* last data? */ 58 - eor v0.16b, v0.16b, v5.16b /* final round */ 59 - bmi 6f 60 - ld1 {v1.16b}, [x1], #16 /* load next input block */ 61 - eor v0.16b, v0.16b, v1.16b /* xor with mac */ 62 - bne 1b 63 - 6: st1 {v0.16b}, [x0] /* store mac */ 64 - beq 10f 65 - adds w2, w2, #16 66 - beq 10f 67 - mov w3, w2 68 - 7: ldrb w7, [x1], #1 69 - umov w6, v0.b[0] 70 - eor w6, w6, w7 71 - strb w6, [x0], #1 72 - subs w2, w2, #1 73 - beq 10f 74 - ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 75 - b 7b 76 - 8: cbz w3, 91f 77 - mov w7, w3 78 - add w3, w3, #16 79 - 9: ext v1.16b, v1.16b, v1.16b, #1 80 - adds w7, w7, #1 81 - bne 9b 82 - 91: eor v0.16b, v0.16b, v1.16b 83 - st1 {v0.16b}, [x0] 84 - 10: mov w0, w3 85 - ret 86 - SYM_FUNC_END(ce_aes_ccm_auth_data) 87 - 88 - /* 89 18 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], 90 19 * u32 rounds); 91 20 */
+41 -8
arch/arm64/crypto/aes-ce-ccm-glue.c
··· 18 18 19 19 #include "aes-ce-setkey.h" 20 20 21 + MODULE_IMPORT_NS(CRYPTO_INTERNAL); 22 + 21 23 static int num_rounds(struct crypto_aes_ctx *ctx) 22 24 { 23 25 /* ··· 32 30 return 6 + ctx->key_length / 4; 33 31 } 34 32 35 - asmlinkage u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 36 - u32 macp, u32 const rk[], u32 rounds); 33 + asmlinkage u32 ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds, 34 + int blocks, u8 dg[], int enc_before, 35 + int enc_after); 37 36 38 37 asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 39 38 u32 const rk[], u32 rounds, u8 mac[], ··· 100 97 return 0; 101 98 } 102 99 100 + static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 101 + u32 macp, u32 const rk[], u32 rounds) 102 + { 103 + int enc_after = (macp + abytes) % AES_BLOCK_SIZE; 104 + 105 + do { 106 + u32 blocks = abytes / AES_BLOCK_SIZE; 107 + 108 + if (macp == AES_BLOCK_SIZE || (!macp && blocks > 0)) { 109 + u32 rem = ce_aes_mac_update(in, rk, rounds, blocks, mac, 110 + macp, enc_after); 111 + u32 adv = (blocks - rem) * AES_BLOCK_SIZE; 112 + 113 + macp = enc_after ? 0 : AES_BLOCK_SIZE; 114 + in += adv; 115 + abytes -= adv; 116 + 117 + if (unlikely(rem)) { 118 + kernel_neon_end(); 119 + kernel_neon_begin(); 120 + macp = 0; 121 + } 122 + } else { 123 + u32 l = min(AES_BLOCK_SIZE - macp, abytes); 124 + 125 + crypto_xor(&mac[macp], in, l); 126 + in += l; 127 + macp += l; 128 + abytes -= l; 129 + } 130 + } while (abytes > 0); 131 + 132 + return macp; 133 + } 134 + 103 135 static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) 104 136 { 105 137 struct crypto_aead *aead = crypto_aead_reqtfm(req); ··· 142 104 struct __packed { __be16 l; __be32 h; u16 len; } ltag; 143 105 struct scatter_walk walk; 144 106 u32 len = req->assoclen; 145 - u32 macp = 0; 107 + u32 macp = AES_BLOCK_SIZE; 146 108 147 109 /* prepend the AAD with a length tag */ 148 110 if (len < 0xff00) { ··· 166 128 scatterwalk_start(&walk, sg_next(walk.sg)); 167 129 n = scatterwalk_clamp(&walk, len); 168 130 } 169 - n = min_t(u32, n, SZ_4K); /* yield NEON at least every 4k */ 170 131 p = scatterwalk_map(&walk); 171 132 172 133 macp = ce_aes_ccm_auth_data(mac, p, n, macp, ctx->key_enc, 173 134 num_rounds(ctx)); 174 135 175 - if (len / SZ_4K > (len - n) / SZ_4K) { 176 - kernel_neon_end(); 177 - kernel_neon_begin(); 178 - } 179 136 len -= n; 180 137 181 138 scatterwalk_unmap(p);
+1
arch/arm64/crypto/aes-glue.c
··· 1048 1048 1049 1049 #ifdef USE_V8_CRYPTO_EXTENSIONS 1050 1050 module_cpu_feature_match(AES, aes_init); 1051 + EXPORT_SYMBOL_NS(ce_aes_mac_update, CRYPTO_INTERNAL); 1051 1052 #else 1052 1053 module_init(aes_init); 1053 1054 EXPORT_SYMBOL(neon_aes_ecb_encrypt);