Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: arm64/sm4 - add CE implementation for CCM mode

This patch is a CE-optimized assembly implementation for CCM mode.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 223 and 225
modes of tcrypt, and compared the performance before and after this patch (the
driver used before this patch is ccm_base(ctr-sm4-ce,cbcmac-sm4-ce)).
The abscissas are blocks of different lengths. The data is tabulated and the
unit is Mb/s:

Before (rfc4309(ccm_base(ctr-sm4-ce,cbcmac-sm4-ce))):

ccm(sm4) | 16 64 256 512 1024 1420 4096 8192
-------------+---------------------------------------------------------------
CCM enc | 35.07 125.40 336.47 468.17 581.97 619.18 712.56 736.01
CCM dec | 34.87 124.40 335.08 466.75 581.04 618.81 712.25 735.89
CCM mb enc | 34.71 123.96 333.92 465.39 579.91 617.49 711.45 734.92
CCM mb dec | 34.42 122.80 331.02 462.81 578.28 616.42 709.88 734.19

After (rfc4309(ccm-sm4-ce)):

ccm-sm4-ce | 16 64 256 512 1024 1420 4096 8192
-------------+---------------------------------------------------------------
CCM enc | 77.12 249.82 569.94 725.17 839.27 867.71 952.87 969.89
CCM dec | 75.90 247.26 566.29 722.12 836.90 865.95 951.74 968.57
CCM mb enc | 75.98 245.25 562.91 718.99 834.76 864.70 950.17 967.90
CCM mb dec | 75.06 243.78 560.58 717.13 833.68 862.70 949.35 967.11

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Tianjia Zhang and committed by
Herbert Xu
67fa3a7f 6b5360a5

+650
+16
arch/arm64/crypto/Kconfig
··· 281 281 - ARMv8 Crypto Extensions 282 282 - NEON (Advanced SIMD) extensions 283 283 284 + config CRYPTO_SM4_ARM64_CE_CCM 285 + tristate "AEAD cipher: SM4 in CCM mode (ARMv8 Crypto Extensions)" 286 + depends on KERNEL_MODE_NEON 287 + select CRYPTO_ALGAPI 288 + select CRYPTO_AEAD 289 + select CRYPTO_SM4 290 + select CRYPTO_SM4_ARM64_CE_BLK 291 + help 292 + AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with 293 + CCM (Counter with Cipher Block Chaining-Message Authentication Code) 294 + authenticated encryption mode (NIST SP800-38C) 295 + 296 + Architecture: arm64 using: 297 + - ARMv8 Crypto Extensions 298 + - NEON (Advanced SIMD) extensions 299 + 284 300 config CRYPTO_CRCT10DIF_ARM64_CE 285 301 tristate "CRCT10DIF (PMULL)" 286 302 depends on KERNEL_MODE_NEON && CRC_T10DIF
+3
arch/arm64/crypto/Makefile
··· 29 29 obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o 30 30 sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o 31 31 32 + obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o 33 + sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o 34 + 32 35 obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o 33 36 sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o 34 37
+328
arch/arm64/crypto/sm4-ce-ccm-core.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions 4 + * as specified in rfc8998 5 + * https://datatracker.ietf.org/doc/html/rfc8998 6 + * 7 + * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 8 + */ 9 + 10 + #include <linux/linkage.h> 11 + #include <asm/assembler.h> 12 + #include "sm4-ce-asm.h" 13 + 14 + .arch armv8-a+crypto 15 + 16 + .irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31 17 + .set .Lv\b\().4s, \b 18 + .endr 19 + 20 + .macro sm4e, vd, vn 21 + .inst 0xcec08400 | (.L\vn << 5) | .L\vd 22 + .endm 23 + 24 + /* Register macros */ 25 + 26 + #define RMAC v16 27 + 28 + /* Helper macros. */ 29 + 30 + #define inc_le128(vctr) \ 31 + mov vctr.d[1], x8; \ 32 + mov vctr.d[0], x7; \ 33 + adds x8, x8, #1; \ 34 + rev64 vctr.16b, vctr.16b; \ 35 + adc x7, x7, xzr; 36 + 37 + 38 + .align 3 39 + SYM_FUNC_START(sm4_ce_cbcmac_update) 40 + /* input: 41 + * x0: round key array, CTX 42 + * x1: mac 43 + * x2: src 44 + * w3: nblocks 45 + */ 46 + SM4_PREPARE(x0) 47 + 48 + ld1 {RMAC.16b}, [x1] 49 + 50 + .Lcbcmac_loop_4x: 51 + cmp w3, #4 52 + blt .Lcbcmac_loop_1x 53 + 54 + sub w3, w3, #4 55 + 56 + ld1 {v0.16b-v3.16b}, [x2], #64 57 + 58 + SM4_CRYPT_BLK(RMAC) 59 + eor RMAC.16b, RMAC.16b, v0.16b 60 + SM4_CRYPT_BLK(RMAC) 61 + eor RMAC.16b, RMAC.16b, v1.16b 62 + SM4_CRYPT_BLK(RMAC) 63 + eor RMAC.16b, RMAC.16b, v2.16b 64 + SM4_CRYPT_BLK(RMAC) 65 + eor RMAC.16b, RMAC.16b, v3.16b 66 + 67 + cbz w3, .Lcbcmac_end 68 + b .Lcbcmac_loop_4x 69 + 70 + .Lcbcmac_loop_1x: 71 + sub w3, w3, #1 72 + 73 + ld1 {v0.16b}, [x2], #16 74 + 75 + SM4_CRYPT_BLK(RMAC) 76 + eor RMAC.16b, RMAC.16b, v0.16b 77 + 78 + cbnz w3, .Lcbcmac_loop_1x 79 + 80 + .Lcbcmac_end: 81 + st1 {RMAC.16b}, [x1] 82 + ret 83 + SYM_FUNC_END(sm4_ce_cbcmac_update) 84 + 85 + .align 3 86 + SYM_FUNC_START(sm4_ce_ccm_final) 87 + /* input: 88 + * x0: round key array, CTX 89 + * x1: ctr0 (big endian, 128 bit) 90 + * x2: mac 91 + */ 92 + SM4_PREPARE(x0) 93 + 94 + ld1 {RMAC.16b}, [x2] 95 + ld1 {v0.16b}, [x1] 96 + 97 + SM4_CRYPT_BLK2(RMAC, v0) 98 + 99 + /* en-/decrypt the mac with ctr0 */ 100 + eor RMAC.16b, RMAC.16b, v0.16b 101 + st1 {RMAC.16b}, [x2] 102 + 103 + ret 104 + SYM_FUNC_END(sm4_ce_ccm_final) 105 + 106 + .align 3 107 + SYM_FUNC_START(sm4_ce_ccm_enc) 108 + /* input: 109 + * x0: round key array, CTX 110 + * x1: dst 111 + * x2: src 112 + * x3: ctr (big endian, 128 bit) 113 + * w4: nbytes 114 + * x5: mac 115 + */ 116 + SM4_PREPARE(x0) 117 + 118 + ldp x7, x8, [x3] 119 + rev x7, x7 120 + rev x8, x8 121 + 122 + ld1 {RMAC.16b}, [x5] 123 + 124 + .Lccm_enc_loop_4x: 125 + cmp w4, #(4 * 16) 126 + blt .Lccm_enc_loop_1x 127 + 128 + sub w4, w4, #(4 * 16) 129 + 130 + /* construct CTRs */ 131 + inc_le128(v8) /* +0 */ 132 + inc_le128(v9) /* +1 */ 133 + inc_le128(v10) /* +2 */ 134 + inc_le128(v11) /* +3 */ 135 + 136 + ld1 {v0.16b-v3.16b}, [x2], #64 137 + 138 + SM4_CRYPT_BLK2(v8, RMAC) 139 + eor v8.16b, v8.16b, v0.16b 140 + eor RMAC.16b, RMAC.16b, v0.16b 141 + SM4_CRYPT_BLK2(v9, RMAC) 142 + eor v9.16b, v9.16b, v1.16b 143 + eor RMAC.16b, RMAC.16b, v1.16b 144 + SM4_CRYPT_BLK2(v10, RMAC) 145 + eor v10.16b, v10.16b, v2.16b 146 + eor RMAC.16b, RMAC.16b, v2.16b 147 + SM4_CRYPT_BLK2(v11, RMAC) 148 + eor v11.16b, v11.16b, v3.16b 149 + eor RMAC.16b, RMAC.16b, v3.16b 150 + 151 + st1 {v8.16b-v11.16b}, [x1], #64 152 + 153 + cbz w4, .Lccm_enc_end 154 + b .Lccm_enc_loop_4x 155 + 156 + .Lccm_enc_loop_1x: 157 + cmp w4, #16 158 + blt .Lccm_enc_tail 159 + 160 + sub w4, w4, #16 161 + 162 + /* construct CTRs */ 163 + inc_le128(v8) 164 + 165 + ld1 {v0.16b}, [x2], #16 166 + 167 + SM4_CRYPT_BLK2(v8, RMAC) 168 + eor v8.16b, v8.16b, v0.16b 169 + eor RMAC.16b, RMAC.16b, v0.16b 170 + 171 + st1 {v8.16b}, [x1], #16 172 + 173 + cbz w4, .Lccm_enc_end 174 + b .Lccm_enc_loop_1x 175 + 176 + .Lccm_enc_tail: 177 + /* construct CTRs */ 178 + inc_le128(v8) 179 + 180 + SM4_CRYPT_BLK2(RMAC, v8) 181 + 182 + /* store new MAC */ 183 + st1 {RMAC.16b}, [x5] 184 + 185 + .Lccm_enc_tail_loop: 186 + ldrb w0, [x2], #1 /* get 1 byte from input */ 187 + umov w9, v8.b[0] /* get top crypted CTR byte */ 188 + umov w6, RMAC.b[0] /* get top MAC byte */ 189 + 190 + eor w9, w9, w0 /* w9 = CTR ^ input */ 191 + eor w6, w6, w0 /* w6 = MAC ^ input */ 192 + 193 + strb w9, [x1], #1 /* store out byte */ 194 + strb w6, [x5], #1 /* store MAC byte */ 195 + 196 + subs w4, w4, #1 197 + beq .Lccm_enc_ret 198 + 199 + /* shift out one byte */ 200 + ext RMAC.16b, RMAC.16b, RMAC.16b, #1 201 + ext v8.16b, v8.16b, v8.16b, #1 202 + 203 + b .Lccm_enc_tail_loop 204 + 205 + .Lccm_enc_end: 206 + /* store new MAC */ 207 + st1 {RMAC.16b}, [x5] 208 + 209 + /* store new CTR */ 210 + rev x7, x7 211 + rev x8, x8 212 + stp x7, x8, [x3] 213 + 214 + .Lccm_enc_ret: 215 + ret 216 + SYM_FUNC_END(sm4_ce_ccm_enc) 217 + 218 + .align 3 219 + SYM_FUNC_START(sm4_ce_ccm_dec) 220 + /* input: 221 + * x0: round key array, CTX 222 + * x1: dst 223 + * x2: src 224 + * x3: ctr (big endian, 128 bit) 225 + * w4: nbytes 226 + * x5: mac 227 + */ 228 + SM4_PREPARE(x0) 229 + 230 + ldp x7, x8, [x3] 231 + rev x7, x7 232 + rev x8, x8 233 + 234 + ld1 {RMAC.16b}, [x5] 235 + 236 + .Lccm_dec_loop_4x: 237 + cmp w4, #(4 * 16) 238 + blt .Lccm_dec_loop_1x 239 + 240 + sub w4, w4, #(4 * 16) 241 + 242 + /* construct CTRs */ 243 + inc_le128(v8) /* +0 */ 244 + inc_le128(v9) /* +1 */ 245 + inc_le128(v10) /* +2 */ 246 + inc_le128(v11) /* +3 */ 247 + 248 + ld1 {v0.16b-v3.16b}, [x2], #64 249 + 250 + SM4_CRYPT_BLK2(v8, RMAC) 251 + eor v8.16b, v8.16b, v0.16b 252 + eor RMAC.16b, RMAC.16b, v8.16b 253 + SM4_CRYPT_BLK2(v9, RMAC) 254 + eor v9.16b, v9.16b, v1.16b 255 + eor RMAC.16b, RMAC.16b, v9.16b 256 + SM4_CRYPT_BLK2(v10, RMAC) 257 + eor v10.16b, v10.16b, v2.16b 258 + eor RMAC.16b, RMAC.16b, v10.16b 259 + SM4_CRYPT_BLK2(v11, RMAC) 260 + eor v11.16b, v11.16b, v3.16b 261 + eor RMAC.16b, RMAC.16b, v11.16b 262 + 263 + st1 {v8.16b-v11.16b}, [x1], #64 264 + 265 + cbz w4, .Lccm_dec_end 266 + b .Lccm_dec_loop_4x 267 + 268 + .Lccm_dec_loop_1x: 269 + cmp w4, #16 270 + blt .Lccm_dec_tail 271 + 272 + sub w4, w4, #16 273 + 274 + /* construct CTRs */ 275 + inc_le128(v8) 276 + 277 + ld1 {v0.16b}, [x2], #16 278 + 279 + SM4_CRYPT_BLK2(v8, RMAC) 280 + eor v8.16b, v8.16b, v0.16b 281 + eor RMAC.16b, RMAC.16b, v8.16b 282 + 283 + st1 {v8.16b}, [x1], #16 284 + 285 + cbz w4, .Lccm_dec_end 286 + b .Lccm_dec_loop_1x 287 + 288 + .Lccm_dec_tail: 289 + /* construct CTRs */ 290 + inc_le128(v8) 291 + 292 + SM4_CRYPT_BLK2(RMAC, v8) 293 + 294 + /* store new MAC */ 295 + st1 {RMAC.16b}, [x5] 296 + 297 + .Lccm_dec_tail_loop: 298 + ldrb w0, [x2], #1 /* get 1 byte from input */ 299 + umov w9, v8.b[0] /* get top crypted CTR byte */ 300 + umov w6, RMAC.b[0] /* get top MAC byte */ 301 + 302 + eor w9, w9, w0 /* w9 = CTR ^ input */ 303 + eor w6, w6, w9 /* w6 = MAC ^ output */ 304 + 305 + strb w9, [x1], #1 /* store out byte */ 306 + strb w6, [x5], #1 /* store MAC byte */ 307 + 308 + subs w4, w4, #1 309 + beq .Lccm_dec_ret 310 + 311 + /* shift out one byte */ 312 + ext RMAC.16b, RMAC.16b, RMAC.16b, #1 313 + ext v8.16b, v8.16b, v8.16b, #1 314 + 315 + b .Lccm_dec_tail_loop 316 + 317 + .Lccm_dec_end: 318 + /* store new MAC */ 319 + st1 {RMAC.16b}, [x5] 320 + 321 + /* store new CTR */ 322 + rev x7, x7 323 + rev x8, x8 324 + stp x7, x8, [x3] 325 + 326 + .Lccm_dec_ret: 327 + ret 328 + SYM_FUNC_END(sm4_ce_ccm_dec)
+303
arch/arm64/crypto/sm4-ce-ccm-glue.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions 4 + * as specified in rfc8998 5 + * https://datatracker.ietf.org/doc/html/rfc8998 6 + * 7 + * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 8 + */ 9 + 10 + #include <linux/module.h> 11 + #include <linux/crypto.h> 12 + #include <linux/kernel.h> 13 + #include <linux/cpufeature.h> 14 + #include <asm/neon.h> 15 + #include <crypto/scatterwalk.h> 16 + #include <crypto/internal/aead.h> 17 + #include <crypto/internal/skcipher.h> 18 + #include <crypto/sm4.h> 19 + #include "sm4-ce.h" 20 + 21 + asmlinkage void sm4_ce_cbcmac_update(const u32 *rkey_enc, u8 *mac, 22 + const u8 *src, unsigned int nblocks); 23 + asmlinkage void sm4_ce_ccm_enc(const u32 *rkey_enc, u8 *dst, const u8 *src, 24 + u8 *iv, unsigned int nbytes, u8 *mac); 25 + asmlinkage void sm4_ce_ccm_dec(const u32 *rkey_enc, u8 *dst, const u8 *src, 26 + u8 *iv, unsigned int nbytes, u8 *mac); 27 + asmlinkage void sm4_ce_ccm_final(const u32 *rkey_enc, u8 *iv, u8 *mac); 28 + 29 + 30 + static int ccm_setkey(struct crypto_aead *tfm, const u8 *key, 31 + unsigned int key_len) 32 + { 33 + struct sm4_ctx *ctx = crypto_aead_ctx(tfm); 34 + 35 + if (key_len != SM4_KEY_SIZE) 36 + return -EINVAL; 37 + 38 + kernel_neon_begin(); 39 + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, 40 + crypto_sm4_fk, crypto_sm4_ck); 41 + kernel_neon_end(); 42 + 43 + return 0; 44 + } 45 + 46 + static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 47 + { 48 + if ((authsize & 1) || authsize < 4) 49 + return -EINVAL; 50 + return 0; 51 + } 52 + 53 + static int ccm_format_input(u8 info[], struct aead_request *req, 54 + unsigned int msglen) 55 + { 56 + struct crypto_aead *aead = crypto_aead_reqtfm(req); 57 + unsigned int l = req->iv[0] + 1; 58 + unsigned int m; 59 + __be32 len; 60 + 61 + /* verify that CCM dimension 'L': 2 <= L <= 8 */ 62 + if (l < 2 || l > 8) 63 + return -EINVAL; 64 + if (l < 4 && msglen >> (8 * l)) 65 + return -EOVERFLOW; 66 + 67 + memset(&req->iv[SM4_BLOCK_SIZE - l], 0, l); 68 + 69 + memcpy(info, req->iv, SM4_BLOCK_SIZE); 70 + 71 + m = crypto_aead_authsize(aead); 72 + 73 + /* format flags field per RFC 3610/NIST 800-38C */ 74 + *info |= ((m - 2) / 2) << 3; 75 + if (req->assoclen) 76 + *info |= (1 << 6); 77 + 78 + /* 79 + * format message length field, 80 + * Linux uses a u32 type to represent msglen 81 + */ 82 + if (l >= 4) 83 + l = 4; 84 + 85 + len = cpu_to_be32(msglen); 86 + memcpy(&info[SM4_BLOCK_SIZE - l], (u8 *)&len + 4 - l, l); 87 + 88 + return 0; 89 + } 90 + 91 + static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) 92 + { 93 + struct crypto_aead *aead = crypto_aead_reqtfm(req); 94 + struct sm4_ctx *ctx = crypto_aead_ctx(aead); 95 + struct __packed { __be16 l; __be32 h; } aadlen; 96 + u32 assoclen = req->assoclen; 97 + struct scatter_walk walk; 98 + unsigned int len; 99 + 100 + if (assoclen < 0xff00) { 101 + aadlen.l = cpu_to_be16(assoclen); 102 + len = 2; 103 + } else { 104 + aadlen.l = cpu_to_be16(0xfffe); 105 + put_unaligned_be32(assoclen, &aadlen.h); 106 + len = 6; 107 + } 108 + 109 + sm4_ce_crypt_block(ctx->rkey_enc, mac, mac); 110 + crypto_xor(mac, (const u8 *)&aadlen, len); 111 + 112 + scatterwalk_start(&walk, req->src); 113 + 114 + do { 115 + u32 n = scatterwalk_clamp(&walk, assoclen); 116 + u8 *p, *ptr; 117 + 118 + if (!n) { 119 + scatterwalk_start(&walk, sg_next(walk.sg)); 120 + n = scatterwalk_clamp(&walk, assoclen); 121 + } 122 + 123 + p = ptr = scatterwalk_map(&walk); 124 + assoclen -= n; 125 + scatterwalk_advance(&walk, n); 126 + 127 + while (n > 0) { 128 + unsigned int l, nblocks; 129 + 130 + if (len == SM4_BLOCK_SIZE) { 131 + if (n < SM4_BLOCK_SIZE) { 132 + sm4_ce_crypt_block(ctx->rkey_enc, 133 + mac, mac); 134 + 135 + len = 0; 136 + } else { 137 + nblocks = n / SM4_BLOCK_SIZE; 138 + sm4_ce_cbcmac_update(ctx->rkey_enc, 139 + mac, ptr, nblocks); 140 + 141 + ptr += nblocks * SM4_BLOCK_SIZE; 142 + n %= SM4_BLOCK_SIZE; 143 + 144 + continue; 145 + } 146 + } 147 + 148 + l = min(n, SM4_BLOCK_SIZE - len); 149 + if (l) { 150 + crypto_xor(mac + len, ptr, l); 151 + len += l; 152 + ptr += l; 153 + n -= l; 154 + } 155 + } 156 + 157 + scatterwalk_unmap(p); 158 + scatterwalk_done(&walk, 0, assoclen); 159 + } while (assoclen); 160 + } 161 + 162 + static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, 163 + u32 *rkey_enc, u8 mac[], 164 + void (*sm4_ce_ccm_crypt)(const u32 *rkey_enc, u8 *dst, 165 + const u8 *src, u8 *iv, 166 + unsigned int nbytes, u8 *mac)) 167 + { 168 + u8 __aligned(8) ctr0[SM4_BLOCK_SIZE]; 169 + int err; 170 + 171 + /* preserve the initial ctr0 for the TAG */ 172 + memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); 173 + crypto_inc(walk->iv, SM4_BLOCK_SIZE); 174 + 175 + kernel_neon_begin(); 176 + 177 + if (req->assoclen) 178 + ccm_calculate_auth_mac(req, mac); 179 + 180 + do { 181 + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; 182 + const u8 *src = walk->src.virt.addr; 183 + u8 *dst = walk->dst.virt.addr; 184 + 185 + if (walk->nbytes == walk->total) 186 + tail = 0; 187 + 188 + if (walk->nbytes - tail) 189 + sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv, 190 + walk->nbytes - tail, mac); 191 + 192 + if (walk->nbytes == walk->total) 193 + sm4_ce_ccm_final(rkey_enc, ctr0, mac); 194 + 195 + kernel_neon_end(); 196 + 197 + if (walk->nbytes) { 198 + err = skcipher_walk_done(walk, tail); 199 + if (err) 200 + return err; 201 + if (walk->nbytes) 202 + kernel_neon_begin(); 203 + } 204 + } while (walk->nbytes > 0); 205 + 206 + return 0; 207 + } 208 + 209 + static int ccm_encrypt(struct aead_request *req) 210 + { 211 + struct crypto_aead *aead = crypto_aead_reqtfm(req); 212 + struct sm4_ctx *ctx = crypto_aead_ctx(aead); 213 + u8 __aligned(8) mac[SM4_BLOCK_SIZE]; 214 + struct skcipher_walk walk; 215 + int err; 216 + 217 + err = ccm_format_input(mac, req, req->cryptlen); 218 + if (err) 219 + return err; 220 + 221 + err = skcipher_walk_aead_encrypt(&walk, req, false); 222 + if (err) 223 + return err; 224 + 225 + err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_enc); 226 + if (err) 227 + return err; 228 + 229 + /* copy authtag to end of dst */ 230 + scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, 231 + crypto_aead_authsize(aead), 1); 232 + 233 + return 0; 234 + } 235 + 236 + static int ccm_decrypt(struct aead_request *req) 237 + { 238 + struct crypto_aead *aead = crypto_aead_reqtfm(req); 239 + unsigned int authsize = crypto_aead_authsize(aead); 240 + struct sm4_ctx *ctx = crypto_aead_ctx(aead); 241 + u8 __aligned(8) mac[SM4_BLOCK_SIZE]; 242 + u8 authtag[SM4_BLOCK_SIZE]; 243 + struct skcipher_walk walk; 244 + int err; 245 + 246 + err = ccm_format_input(mac, req, req->cryptlen - authsize); 247 + if (err) 248 + return err; 249 + 250 + err = skcipher_walk_aead_decrypt(&walk, req, false); 251 + if (err) 252 + return err; 253 + 254 + err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_dec); 255 + if (err) 256 + return err; 257 + 258 + /* compare calculated auth tag with the stored one */ 259 + scatterwalk_map_and_copy(authtag, req->src, 260 + req->assoclen + req->cryptlen - authsize, 261 + authsize, 0); 262 + 263 + if (crypto_memneq(authtag, mac, authsize)) 264 + return -EBADMSG; 265 + 266 + return 0; 267 + } 268 + 269 + static struct aead_alg sm4_ccm_alg = { 270 + .base = { 271 + .cra_name = "ccm(sm4)", 272 + .cra_driver_name = "ccm-sm4-ce", 273 + .cra_priority = 400, 274 + .cra_blocksize = 1, 275 + .cra_ctxsize = sizeof(struct sm4_ctx), 276 + .cra_module = THIS_MODULE, 277 + }, 278 + .ivsize = SM4_BLOCK_SIZE, 279 + .chunksize = SM4_BLOCK_SIZE, 280 + .maxauthsize = SM4_BLOCK_SIZE, 281 + .setkey = ccm_setkey, 282 + .setauthsize = ccm_setauthsize, 283 + .encrypt = ccm_encrypt, 284 + .decrypt = ccm_decrypt, 285 + }; 286 + 287 + static int __init sm4_ce_ccm_init(void) 288 + { 289 + return crypto_register_aead(&sm4_ccm_alg); 290 + } 291 + 292 + static void __exit sm4_ce_ccm_exit(void) 293 + { 294 + crypto_unregister_aead(&sm4_ccm_alg); 295 + } 296 + 297 + module_cpu_feature_match(SM4, sm4_ce_ccm_init); 298 + module_exit(sm4_ce_ccm_exit); 299 + 300 + MODULE_DESCRIPTION("Synchronous SM4 in CCM mode using ARMv8 Crypto Extensions"); 301 + MODULE_ALIAS_CRYPTO("ccm(sm4)"); 302 + MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>"); 303 + MODULE_LICENSE("GPL v2");