Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'arm64-fpsimd-on-stack-for-v6.19' into libcrypto-fpsimd-on-stack

Pull fpsimd-on-stack changes from Ard Biesheuvel:

"Shared tag/branch for arm64 FP/SIMD changes going through libcrypto"

Signed-off-by: Eric Biggers <ebiggers@kernel.org>

+541 -627
+55 -61
arch/arm64/crypto/aes-ce-ccm-glue.c
··· 8 8 * Author: Ard Biesheuvel <ardb@kernel.org> 9 9 */ 10 10 11 - #include <asm/neon.h> 12 11 #include <linux/unaligned.h> 13 12 #include <crypto/aes.h> 14 13 #include <crypto/scatterwalk.h> 15 14 #include <crypto/internal/aead.h> 16 15 #include <crypto/internal/skcipher.h> 17 16 #include <linux/module.h> 17 + 18 + #include <asm/simd.h> 18 19 19 20 #include "aes-ce-setkey.h" 20 21 ··· 115 114 in += adv; 116 115 abytes -= adv; 117 116 118 - if (unlikely(rem)) { 119 - kernel_neon_end(); 120 - kernel_neon_begin(); 117 + if (unlikely(rem)) 121 118 macp = 0; 122 - } 123 119 } else { 124 120 u32 l = min(AES_BLOCK_SIZE - macp, abytes); 125 121 ··· 185 187 if (unlikely(err)) 186 188 return err; 187 189 188 - kernel_neon_begin(); 190 + scoped_ksimd() { 191 + if (req->assoclen) 192 + ccm_calculate_auth_mac(req, mac); 189 193 190 - if (req->assoclen) 191 - ccm_calculate_auth_mac(req, mac); 194 + do { 195 + u32 tail = walk.nbytes % AES_BLOCK_SIZE; 196 + const u8 *src = walk.src.virt.addr; 197 + u8 *dst = walk.dst.virt.addr; 198 + u8 buf[AES_BLOCK_SIZE]; 199 + u8 *final_iv = NULL; 192 200 193 - do { 194 - u32 tail = walk.nbytes % AES_BLOCK_SIZE; 195 - const u8 *src = walk.src.virt.addr; 196 - u8 *dst = walk.dst.virt.addr; 197 - u8 buf[AES_BLOCK_SIZE]; 198 - u8 *final_iv = NULL; 201 + if (walk.nbytes == walk.total) { 202 + tail = 0; 203 + final_iv = orig_iv; 204 + } 199 205 200 - if (walk.nbytes == walk.total) { 201 - tail = 0; 202 - final_iv = orig_iv; 203 - } 206 + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 207 + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], 208 + src, walk.nbytes); 204 209 205 - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 206 - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], 207 - src, walk.nbytes); 210 + ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, 211 + ctx->key_enc, num_rounds(ctx), 212 + mac, walk.iv, final_iv); 208 213 209 - ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, 210 - ctx->key_enc, num_rounds(ctx), 211 - mac, walk.iv, final_iv); 214 + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 215 + memcpy(walk.dst.virt.addr, dst, walk.nbytes); 212 216 213 - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 214 - memcpy(walk.dst.virt.addr, dst, walk.nbytes); 215 - 216 - if (walk.nbytes) { 217 - err = skcipher_walk_done(&walk, tail); 218 - } 219 - } while (walk.nbytes); 220 - 221 - kernel_neon_end(); 217 + if (walk.nbytes) { 218 + err = skcipher_walk_done(&walk, tail); 219 + } 220 + } while (walk.nbytes); 221 + } 222 222 223 223 if (unlikely(err)) 224 224 return err; ··· 250 254 if (unlikely(err)) 251 255 return err; 252 256 253 - kernel_neon_begin(); 257 + scoped_ksimd() { 258 + if (req->assoclen) 259 + ccm_calculate_auth_mac(req, mac); 254 260 255 - if (req->assoclen) 256 - ccm_calculate_auth_mac(req, mac); 261 + do { 262 + u32 tail = walk.nbytes % AES_BLOCK_SIZE; 263 + const u8 *src = walk.src.virt.addr; 264 + u8 *dst = walk.dst.virt.addr; 265 + u8 buf[AES_BLOCK_SIZE]; 266 + u8 *final_iv = NULL; 257 267 258 - do { 259 - u32 tail = walk.nbytes % AES_BLOCK_SIZE; 260 - const u8 *src = walk.src.virt.addr; 261 - u8 *dst = walk.dst.virt.addr; 262 - u8 buf[AES_BLOCK_SIZE]; 263 - u8 *final_iv = NULL; 268 + if (walk.nbytes == walk.total) { 269 + tail = 0; 270 + final_iv = orig_iv; 271 + } 264 272 265 - if (walk.nbytes == walk.total) { 266 - tail = 0; 267 - final_iv = orig_iv; 268 - } 273 + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 274 + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], 275 + src, walk.nbytes); 269 276 270 - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 271 - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], 272 - src, walk.nbytes); 277 + ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, 278 + ctx->key_enc, num_rounds(ctx), 279 + mac, walk.iv, final_iv); 273 280 274 - ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, 275 - ctx->key_enc, num_rounds(ctx), 276 - mac, walk.iv, final_iv); 281 + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 282 + memcpy(walk.dst.virt.addr, dst, walk.nbytes); 277 283 278 - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) 279 - memcpy(walk.dst.virt.addr, dst, walk.nbytes); 280 - 281 - if (walk.nbytes) { 282 - err = skcipher_walk_done(&walk, tail); 283 - } 284 - } while (walk.nbytes); 285 - 286 - kernel_neon_end(); 284 + if (walk.nbytes) { 285 + err = skcipher_walk_done(&walk, tail); 286 + } 287 + } while (walk.nbytes); 288 + } 287 289 288 290 if (unlikely(err)) 289 291 return err;
+41 -42
arch/arm64/crypto/aes-ce-glue.c
··· 52 52 return; 53 53 } 54 54 55 - kernel_neon_begin(); 56 - __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); 57 - kernel_neon_end(); 55 + scoped_ksimd() 56 + __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); 58 57 } 59 58 60 59 static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) ··· 65 66 return; 66 67 } 67 68 68 - kernel_neon_begin(); 69 - __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); 70 - kernel_neon_end(); 69 + scoped_ksimd() 70 + __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); 71 71 } 72 72 73 73 int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, ··· 92 94 for (i = 0; i < kwords; i++) 93 95 ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); 94 96 95 - kernel_neon_begin(); 96 - for (i = 0; i < sizeof(rcon); i++) { 97 - u32 *rki = ctx->key_enc + (i * kwords); 98 - u32 *rko = rki + kwords; 97 + scoped_ksimd() { 98 + for (i = 0; i < sizeof(rcon); i++) { 99 + u32 *rki = ctx->key_enc + (i * kwords); 100 + u32 *rko = rki + kwords; 99 101 100 - rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; 101 - rko[1] = rko[0] ^ rki[1]; 102 - rko[2] = rko[1] ^ rki[2]; 103 - rko[3] = rko[2] ^ rki[3]; 102 + rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ 103 + rcon[i] ^ rki[0]; 104 + rko[1] = rko[0] ^ rki[1]; 105 + rko[2] = rko[1] ^ rki[2]; 106 + rko[3] = rko[2] ^ rki[3]; 104 107 105 - if (key_len == AES_KEYSIZE_192) { 106 - if (i >= 7) 107 - break; 108 - rko[4] = rko[3] ^ rki[4]; 109 - rko[5] = rko[4] ^ rki[5]; 110 - } else if (key_len == AES_KEYSIZE_256) { 111 - if (i >= 6) 112 - break; 113 - rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; 114 - rko[5] = rko[4] ^ rki[5]; 115 - rko[6] = rko[5] ^ rki[6]; 116 - rko[7] = rko[6] ^ rki[7]; 108 + if (key_len == AES_KEYSIZE_192) { 109 + if (i >= 7) 110 + break; 111 + rko[4] = rko[3] ^ rki[4]; 112 + rko[5] = rko[4] ^ rki[5]; 113 + } else if (key_len == AES_KEYSIZE_256) { 114 + if (i >= 6) 115 + break; 116 + rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; 117 + rko[5] = rko[4] ^ rki[5]; 118 + rko[6] = rko[5] ^ rki[6]; 119 + rko[7] = rko[6] ^ rki[7]; 120 + } 117 121 } 122 + 123 + /* 124 + * Generate the decryption keys for the Equivalent Inverse 125 + * Cipher. This involves reversing the order of the round 126 + * keys, and applying the Inverse Mix Columns transformation on 127 + * all but the first and the last one. 128 + */ 129 + key_enc = (struct aes_block *)ctx->key_enc; 130 + key_dec = (struct aes_block *)ctx->key_dec; 131 + j = num_rounds(ctx); 132 + 133 + key_dec[0] = key_enc[j]; 134 + for (i = 1, j--; j > 0; i++, j--) 135 + __aes_ce_invert(key_dec + i, key_enc + j); 136 + key_dec[i] = key_enc[0]; 118 137 } 119 138 120 - /* 121 - * Generate the decryption keys for the Equivalent Inverse Cipher. 122 - * This involves reversing the order of the round keys, and applying 123 - * the Inverse Mix Columns transformation on all but the first and 124 - * the last one. 125 - */ 126 - key_enc = (struct aes_block *)ctx->key_enc; 127 - key_dec = (struct aes_block *)ctx->key_dec; 128 - j = num_rounds(ctx); 129 - 130 - key_dec[0] = key_enc[j]; 131 - for (i = 1, j--; j > 0; i++, j--) 132 - __aes_ce_invert(key_dec + i, key_enc + j); 133 - key_dec[i] = key_enc[0]; 134 - 135 - kernel_neon_end(); 136 139 return 0; 137 140 } 138 141 EXPORT_SYMBOL(ce_aes_expandkey);
+63 -76
arch/arm64/crypto/aes-glue.c
··· 5 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6 6 */ 7 7 8 - #include <asm/hwcap.h> 9 - #include <asm/neon.h> 10 8 #include <crypto/aes.h> 11 9 #include <crypto/ctr.h> 12 10 #include <crypto/internal/hash.h> ··· 17 19 #include <linux/kernel.h> 18 20 #include <linux/module.h> 19 21 #include <linux/string.h> 22 + 23 + #include <asm/hwcap.h> 24 + #include <asm/simd.h> 20 25 21 26 #include "aes-ce-setkey.h" 22 27 ··· 187 186 err = skcipher_walk_virt(&walk, req, false); 188 187 189 188 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { 190 - kernel_neon_begin(); 191 - aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 192 - ctx->key_enc, rounds, blocks); 193 - kernel_neon_end(); 189 + scoped_ksimd() 190 + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 191 + ctx->key_enc, rounds, blocks); 194 192 err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); 195 193 } 196 194 return err; ··· 206 206 err = skcipher_walk_virt(&walk, req, false); 207 207 208 208 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { 209 - kernel_neon_begin(); 210 - aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 211 - ctx->key_dec, rounds, blocks); 212 - kernel_neon_end(); 209 + scoped_ksimd() 210 + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 211 + ctx->key_dec, rounds, blocks); 213 212 err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); 214 213 } 215 214 return err; ··· 223 224 unsigned int blocks; 224 225 225 226 while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { 226 - kernel_neon_begin(); 227 - aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, 228 - ctx->key_enc, rounds, blocks, walk->iv); 229 - kernel_neon_end(); 227 + scoped_ksimd() 228 + aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, 229 + ctx->key_enc, rounds, blocks, walk->iv); 230 230 err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); 231 231 } 232 232 return err; ··· 251 253 unsigned int blocks; 252 254 253 255 while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { 254 - kernel_neon_begin(); 255 - aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, 256 - ctx->key_dec, rounds, blocks, walk->iv); 257 - kernel_neon_end(); 256 + scoped_ksimd() 257 + aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, 258 + ctx->key_dec, rounds, blocks, walk->iv); 258 259 err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); 259 260 } 260 261 return err; ··· 319 322 if (err) 320 323 return err; 321 324 322 - kernel_neon_begin(); 323 - aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 324 - ctx->key_enc, rounds, walk.nbytes, walk.iv); 325 - kernel_neon_end(); 325 + scoped_ksimd() 326 + aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 327 + ctx->key_enc, rounds, walk.nbytes, walk.iv); 326 328 327 329 return skcipher_walk_done(&walk, 0); 328 330 } ··· 375 379 if (err) 376 380 return err; 377 381 378 - kernel_neon_begin(); 379 - aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 380 - ctx->key_dec, rounds, walk.nbytes, walk.iv); 381 - kernel_neon_end(); 382 + scoped_ksimd() 383 + aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 384 + ctx->key_dec, rounds, walk.nbytes, walk.iv); 382 385 383 386 return skcipher_walk_done(&walk, 0); 384 387 } ··· 394 399 395 400 blocks = walk.nbytes / AES_BLOCK_SIZE; 396 401 if (blocks) { 397 - kernel_neon_begin(); 398 - aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 399 - ctx->key1.key_enc, rounds, blocks, 400 - req->iv, ctx->key2.key_enc); 401 - kernel_neon_end(); 402 + scoped_ksimd() 403 + aes_essiv_cbc_encrypt(walk.dst.virt.addr, 404 + walk.src.virt.addr, 405 + ctx->key1.key_enc, rounds, blocks, 406 + req->iv, ctx->key2.key_enc); 402 407 err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); 403 408 } 404 409 return err ?: cbc_encrypt_walk(req, &walk); ··· 416 421 417 422 blocks = walk.nbytes / AES_BLOCK_SIZE; 418 423 if (blocks) { 419 - kernel_neon_begin(); 420 - aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 421 - ctx->key1.key_dec, rounds, blocks, 422 - req->iv, ctx->key2.key_enc); 423 - kernel_neon_end(); 424 + scoped_ksimd() 425 + aes_essiv_cbc_decrypt(walk.dst.virt.addr, 426 + walk.src.virt.addr, 427 + ctx->key1.key_dec, rounds, blocks, 428 + req->iv, ctx->key2.key_enc); 424 429 err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); 425 430 } 426 431 return err ?: cbc_decrypt_walk(req, &walk); ··· 456 461 else if (nbytes < walk.total) 457 462 nbytes &= ~(AES_BLOCK_SIZE - 1); 458 463 459 - kernel_neon_begin(); 460 - aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, 461 - walk.iv, byte_ctr); 462 - kernel_neon_end(); 464 + scoped_ksimd() 465 + aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, 466 + walk.iv, byte_ctr); 463 467 464 468 if (unlikely(nbytes < AES_BLOCK_SIZE)) 465 469 memcpy(walk.dst.virt.addr, ··· 500 506 else if (nbytes < walk.total) 501 507 nbytes &= ~(AES_BLOCK_SIZE - 1); 502 508 503 - kernel_neon_begin(); 504 - aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, 505 - walk.iv); 506 - kernel_neon_end(); 509 + scoped_ksimd() 510 + aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, 511 + walk.iv); 507 512 508 513 if (unlikely(nbytes < AES_BLOCK_SIZE)) 509 514 memcpy(walk.dst.virt.addr, ··· 555 562 if (walk.nbytes < walk.total) 556 563 nbytes &= ~(AES_BLOCK_SIZE - 1); 557 564 558 - kernel_neon_begin(); 559 - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 560 - ctx->key1.key_enc, rounds, nbytes, 561 - ctx->key2.key_enc, walk.iv, first); 562 - kernel_neon_end(); 565 + scoped_ksimd() 566 + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 567 + ctx->key1.key_enc, rounds, nbytes, 568 + ctx->key2.key_enc, walk.iv, first); 563 569 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 564 570 } 565 571 ··· 576 584 if (err) 577 585 return err; 578 586 579 - kernel_neon_begin(); 580 - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 581 - ctx->key1.key_enc, rounds, walk.nbytes, 582 - ctx->key2.key_enc, walk.iv, first); 583 - kernel_neon_end(); 587 + scoped_ksimd() 588 + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 589 + ctx->key1.key_enc, rounds, walk.nbytes, 590 + ctx->key2.key_enc, walk.iv, first); 584 591 585 592 return skcipher_walk_done(&walk, 0); 586 593 } ··· 625 634 if (walk.nbytes < walk.total) 626 635 nbytes &= ~(AES_BLOCK_SIZE - 1); 627 636 628 - kernel_neon_begin(); 629 - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 630 - ctx->key1.key_dec, rounds, nbytes, 631 - ctx->key2.key_enc, walk.iv, first); 632 - kernel_neon_end(); 637 + scoped_ksimd() 638 + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 639 + ctx->key1.key_dec, rounds, nbytes, 640 + ctx->key2.key_enc, walk.iv, first); 633 641 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 634 642 } 635 643 ··· 647 657 return err; 648 658 649 659 650 - kernel_neon_begin(); 651 - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 652 - ctx->key1.key_dec, rounds, walk.nbytes, 653 - ctx->key2.key_enc, walk.iv, first); 654 - kernel_neon_end(); 660 + scoped_ksimd() 661 + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 662 + ctx->key1.key_dec, rounds, walk.nbytes, 663 + ctx->key2.key_enc, walk.iv, first); 655 664 656 665 return skcipher_walk_done(&walk, 0); 657 666 } ··· 797 808 return err; 798 809 799 810 /* encrypt the zero vector */ 800 - kernel_neon_begin(); 801 - aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc, 802 - rounds, 1); 803 - kernel_neon_end(); 811 + scoped_ksimd() 812 + aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, 813 + ctx->key.key_enc, rounds, 1); 804 814 805 815 cmac_gf128_mul_by_x(consts, consts); 806 816 cmac_gf128_mul_by_x(consts + 1, consts); ··· 825 837 if (err) 826 838 return err; 827 839 828 - kernel_neon_begin(); 829 - aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); 830 - aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); 831 - kernel_neon_end(); 840 + scoped_ksimd() { 841 + aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); 842 + aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); 843 + } 832 844 833 845 return cbcmac_setkey(tfm, key, sizeof(key)); 834 846 } ··· 848 860 int rem; 849 861 850 862 do { 851 - kernel_neon_begin(); 852 - rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, 853 - dg, enc_before, !enc_before); 854 - kernel_neon_end(); 863 + scoped_ksimd() 864 + rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, 865 + dg, enc_before, !enc_before); 855 866 in += (blocks - rem) * AES_BLOCK_SIZE; 856 867 blocks = rem; 857 868 } while (blocks);
+75 -75
arch/arm64/crypto/aes-neonbs-glue.c
··· 85 85 86 86 ctx->rounds = 6 + key_len / 4; 87 87 88 - kernel_neon_begin(); 89 - aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); 90 - kernel_neon_end(); 88 + scoped_ksimd() 89 + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); 91 90 92 91 return 0; 93 92 } ··· 109 110 blocks = round_down(blocks, 110 111 walk.stride / AES_BLOCK_SIZE); 111 112 112 - kernel_neon_begin(); 113 - fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, 114 - ctx->rounds, blocks); 115 - kernel_neon_end(); 113 + scoped_ksimd() 114 + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, 115 + ctx->rounds, blocks); 116 116 err = skcipher_walk_done(&walk, 117 117 walk.nbytes - blocks * AES_BLOCK_SIZE); 118 118 } ··· 144 146 145 147 memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); 146 148 147 - kernel_neon_begin(); 148 - aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); 149 - kernel_neon_end(); 149 + scoped_ksimd() 150 + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); 150 151 memzero_explicit(&rk, sizeof(rk)); 151 152 152 153 return 0; ··· 164 167 unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; 165 168 166 169 /* fall back to the non-bitsliced NEON implementation */ 167 - kernel_neon_begin(); 168 - neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 169 - ctx->enc, ctx->key.rounds, blocks, 170 - walk.iv); 171 - kernel_neon_end(); 170 + scoped_ksimd() 171 + neon_aes_cbc_encrypt(walk.dst.virt.addr, 172 + walk.src.virt.addr, 173 + ctx->enc, ctx->key.rounds, blocks, 174 + walk.iv); 172 175 err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); 173 176 } 174 177 return err; ··· 190 193 blocks = round_down(blocks, 191 194 walk.stride / AES_BLOCK_SIZE); 192 195 193 - kernel_neon_begin(); 194 - aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 195 - ctx->key.rk, ctx->key.rounds, blocks, 196 - walk.iv); 197 - kernel_neon_end(); 196 + scoped_ksimd() 197 + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 198 + ctx->key.rk, ctx->key.rounds, blocks, 199 + walk.iv); 198 200 err = skcipher_walk_done(&walk, 199 201 walk.nbytes - blocks * AES_BLOCK_SIZE); 200 202 } ··· 216 220 const u8 *src = walk.src.virt.addr; 217 221 u8 *dst = walk.dst.virt.addr; 218 222 219 - kernel_neon_begin(); 220 - if (blocks >= 8) { 221 - aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds, 222 - blocks, walk.iv); 223 - dst += blocks * AES_BLOCK_SIZE; 224 - src += blocks * AES_BLOCK_SIZE; 223 + scoped_ksimd() { 224 + if (blocks >= 8) { 225 + aesbs_ctr_encrypt(dst, src, ctx->key.rk, 226 + ctx->key.rounds, blocks, 227 + walk.iv); 228 + dst += blocks * AES_BLOCK_SIZE; 229 + src += blocks * AES_BLOCK_SIZE; 230 + } 231 + if (nbytes && walk.nbytes == walk.total) { 232 + u8 buf[AES_BLOCK_SIZE]; 233 + u8 *d = dst; 234 + 235 + if (unlikely(nbytes < AES_BLOCK_SIZE)) 236 + src = dst = memcpy(buf + sizeof(buf) - 237 + nbytes, src, nbytes); 238 + 239 + neon_aes_ctr_encrypt(dst, src, ctx->enc, 240 + ctx->key.rounds, nbytes, 241 + walk.iv); 242 + 243 + if (unlikely(nbytes < AES_BLOCK_SIZE)) 244 + memcpy(d, dst, nbytes); 245 + 246 + nbytes = 0; 247 + } 225 248 } 226 - if (nbytes && walk.nbytes == walk.total) { 227 - u8 buf[AES_BLOCK_SIZE]; 228 - u8 *d = dst; 229 - 230 - if (unlikely(nbytes < AES_BLOCK_SIZE)) 231 - src = dst = memcpy(buf + sizeof(buf) - nbytes, 232 - src, nbytes); 233 - 234 - neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, 235 - nbytes, walk.iv); 236 - 237 - if (unlikely(nbytes < AES_BLOCK_SIZE)) 238 - memcpy(d, dst, nbytes); 239 - 240 - nbytes = 0; 241 - } 242 - kernel_neon_end(); 243 249 err = skcipher_walk_done(&walk, nbytes); 244 250 } 245 251 return err; ··· 318 320 in = walk.src.virt.addr; 319 321 nbytes = walk.nbytes; 320 322 321 - kernel_neon_begin(); 322 - if (blocks >= 8) { 323 - if (first == 1) 324 - neon_aes_ecb_encrypt(walk.iv, walk.iv, 325 - ctx->twkey, 326 - ctx->key.rounds, 1); 327 - first = 2; 323 + scoped_ksimd() { 324 + if (blocks >= 8) { 325 + if (first == 1) 326 + neon_aes_ecb_encrypt(walk.iv, walk.iv, 327 + ctx->twkey, 328 + ctx->key.rounds, 1); 329 + first = 2; 328 330 329 - fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, 330 - walk.iv); 331 + fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, 332 + walk.iv); 331 333 332 - out += blocks * AES_BLOCK_SIZE; 333 - in += blocks * AES_BLOCK_SIZE; 334 - nbytes -= blocks * AES_BLOCK_SIZE; 334 + out += blocks * AES_BLOCK_SIZE; 335 + in += blocks * AES_BLOCK_SIZE; 336 + nbytes -= blocks * AES_BLOCK_SIZE; 337 + } 338 + if (walk.nbytes == walk.total && nbytes > 0) { 339 + if (encrypt) 340 + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, 341 + ctx->key.rounds, nbytes, 342 + ctx->twkey, walk.iv, first); 343 + else 344 + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, 345 + ctx->key.rounds, nbytes, 346 + ctx->twkey, walk.iv, first); 347 + nbytes = first = 0; 348 + } 335 349 } 336 - if (walk.nbytes == walk.total && nbytes > 0) { 337 - if (encrypt) 338 - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, 339 - ctx->key.rounds, nbytes, 340 - ctx->twkey, walk.iv, first); 341 - else 342 - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, 343 - ctx->key.rounds, nbytes, 344 - ctx->twkey, walk.iv, first); 345 - nbytes = first = 0; 346 - } 347 - kernel_neon_end(); 348 350 err = skcipher_walk_done(&walk, nbytes); 349 351 } 350 352 ··· 367 369 in = walk.src.virt.addr; 368 370 nbytes = walk.nbytes; 369 371 370 - kernel_neon_begin(); 371 - if (encrypt) 372 - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, 373 - nbytes, ctx->twkey, walk.iv, first); 374 - else 375 - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, 376 - nbytes, ctx->twkey, walk.iv, first); 377 - kernel_neon_end(); 372 + scoped_ksimd() { 373 + if (encrypt) 374 + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, 375 + ctx->key.rounds, nbytes, ctx->twkey, 376 + walk.iv, first); 377 + else 378 + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, 379 + ctx->key.rounds, nbytes, ctx->twkey, 380 + walk.iv, first); 381 + } 378 382 379 383 return skcipher_walk_done(&walk, 0); 380 384 }
+13 -14
arch/arm64/crypto/ghash-ce-glue.c
··· 5 5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 6 */ 7 7 8 - #include <asm/neon.h> 9 8 #include <crypto/aes.h> 10 9 #include <crypto/b128ops.h> 11 10 #include <crypto/gcm.h> ··· 20 21 #include <linux/module.h> 21 22 #include <linux/string.h> 22 23 #include <linux/unaligned.h> 24 + 25 + #include <asm/simd.h> 23 26 24 27 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); 25 28 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); ··· 75 74 u64 const h[][2], 76 75 const char *head)) 77 76 { 78 - kernel_neon_begin(); 79 - simd_update(blocks, dg, src, key->h, head); 80 - kernel_neon_end(); 77 + scoped_ksimd() 78 + simd_update(blocks, dg, src, key->h, head); 81 79 } 82 80 83 81 /* avoid hogging the CPU for too long */ ··· 329 329 tag = NULL; 330 330 } 331 331 332 - kernel_neon_begin(); 333 - pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, 334 - dg, iv, ctx->aes_key.key_enc, nrounds, 335 - tag); 336 - kernel_neon_end(); 332 + scoped_ksimd() 333 + pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, 334 + dg, iv, ctx->aes_key.key_enc, nrounds, 335 + tag); 337 336 338 337 if (unlikely(!nbytes)) 339 338 break; ··· 398 399 tag = NULL; 399 400 } 400 401 401 - kernel_neon_begin(); 402 - ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, 403 - dg, iv, ctx->aes_key.key_enc, 404 - nrounds, tag, otag, authsize); 405 - kernel_neon_end(); 402 + scoped_ksimd() 403 + ret = pmull_gcm_decrypt(nbytes, dst, src, 404 + ctx->ghash_key.h, 405 + dg, iv, ctx->aes_key.key_enc, 406 + nrounds, tag, otag, authsize); 406 407 407 408 if (unlikely(!nbytes)) 408 409 break;
+2 -3
arch/arm64/crypto/nhpoly1305-neon-glue.c
··· 25 25 do { 26 26 unsigned int n = min_t(unsigned int, srclen, SZ_4K); 27 27 28 - kernel_neon_begin(); 29 - crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); 30 - kernel_neon_end(); 28 + scoped_ksimd() 29 + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); 31 30 src += n; 32 31 srclen -= n; 33 32 } while (srclen);
+8 -7
arch/arm64/crypto/sm3-ce-glue.c
··· 5 5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6 6 */ 7 7 8 - #include <asm/neon.h> 9 8 #include <crypto/internal/hash.h> 10 9 #include <crypto/sm3.h> 11 10 #include <crypto/sm3_base.h> 12 11 #include <linux/cpufeature.h> 13 12 #include <linux/kernel.h> 14 13 #include <linux/module.h> 14 + 15 + #include <asm/simd.h> 15 16 16 17 MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions"); 17 18 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); ··· 26 25 { 27 26 int remain; 28 27 29 - kernel_neon_begin(); 30 - remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); 31 - kernel_neon_end(); 28 + scoped_ksimd() { 29 + remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); 30 + } 32 31 return remain; 33 32 } 34 33 35 34 static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, 36 35 unsigned int len, u8 *out) 37 36 { 38 - kernel_neon_begin(); 39 - sm3_base_do_finup(desc, data, len, sm3_ce_transform); 40 - kernel_neon_end(); 37 + scoped_ksimd() { 38 + sm3_base_do_finup(desc, data, len, sm3_ce_transform); 39 + } 41 40 return sm3_base_finish(desc, out); 42 41 } 43 42
+6 -10
arch/arm64/crypto/sm3-neon-glue.c
··· 5 5 * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 6 6 */ 7 7 8 - #include <asm/neon.h> 8 + #include <asm/simd.h> 9 9 #include <crypto/internal/hash.h> 10 10 #include <crypto/sm3.h> 11 11 #include <crypto/sm3_base.h> ··· 20 20 static int sm3_neon_update(struct shash_desc *desc, const u8 *data, 21 21 unsigned int len) 22 22 { 23 - int remain; 24 - 25 - kernel_neon_begin(); 26 - remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform); 27 - kernel_neon_end(); 28 - return remain; 23 + scoped_ksimd() 24 + return sm3_base_do_update_blocks(desc, data, len, 25 + sm3_neon_transform); 29 26 } 30 27 31 28 static int sm3_neon_finup(struct shash_desc *desc, const u8 *data, 32 29 unsigned int len, u8 *out) 33 30 { 34 - kernel_neon_begin(); 35 - sm3_base_do_finup(desc, data, len, sm3_neon_transform); 36 - kernel_neon_end(); 31 + scoped_ksimd() 32 + sm3_base_do_finup(desc, data, len, sm3_neon_transform); 37 33 return sm3_base_finish(desc, out); 38 34 } 39 35
+16 -33
arch/arm64/crypto/sm4-ce-ccm-glue.c
··· 11 11 #include <linux/crypto.h> 12 12 #include <linux/kernel.h> 13 13 #include <linux/cpufeature.h> 14 - #include <asm/neon.h> 14 + #include <asm/simd.h> 15 15 #include <crypto/scatterwalk.h> 16 16 #include <crypto/internal/aead.h> 17 17 #include <crypto/internal/skcipher.h> ··· 35 35 if (key_len != SM4_KEY_SIZE) 36 36 return -EINVAL; 37 37 38 - kernel_neon_begin(); 39 - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, 40 - crypto_sm4_fk, crypto_sm4_ck); 41 - kernel_neon_end(); 38 + scoped_ksimd() 39 + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, 40 + crypto_sm4_fk, crypto_sm4_ck); 42 41 43 42 return 0; 44 43 } ··· 166 167 memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); 167 168 crypto_inc(walk->iv, SM4_BLOCK_SIZE); 168 169 169 - kernel_neon_begin(); 170 + scoped_ksimd() { 171 + if (req->assoclen) 172 + ccm_calculate_auth_mac(req, mac); 170 173 171 - if (req->assoclen) 172 - ccm_calculate_auth_mac(req, mac); 174 + while (walk->nbytes) { 175 + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; 173 176 174 - while (walk->nbytes && walk->nbytes != walk->total) { 175 - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; 177 + if (walk->nbytes == walk->total) 178 + tail = 0; 176 179 177 - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, 178 - walk->src.virt.addr, walk->iv, 179 - walk->nbytes - tail, mac); 180 + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, 181 + walk->src.virt.addr, walk->iv, 182 + walk->nbytes - tail, mac); 180 183 181 - kernel_neon_end(); 182 - 183 - err = skcipher_walk_done(walk, tail); 184 - 185 - kernel_neon_begin(); 186 - } 187 - 188 - if (walk->nbytes) { 189 - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, 190 - walk->src.virt.addr, walk->iv, 191 - walk->nbytes, mac); 192 - 184 + err = skcipher_walk_done(walk, tail); 185 + } 193 186 sm4_ce_ccm_final(rkey_enc, ctr0, mac); 194 - 195 - kernel_neon_end(); 196 - 197 - err = skcipher_walk_done(walk, 0); 198 - } else { 199 - sm4_ce_ccm_final(rkey_enc, ctr0, mac); 200 - 201 - kernel_neon_end(); 202 187 } 203 188 204 189 return err;
+4 -6
arch/arm64/crypto/sm4-ce-cipher-glue.c
··· 32 32 if (!crypto_simd_usable()) { 33 33 sm4_crypt_block(ctx->rkey_enc, out, in); 34 34 } else { 35 - kernel_neon_begin(); 36 - sm4_ce_do_crypt(ctx->rkey_enc, out, in); 37 - kernel_neon_end(); 35 + scoped_ksimd() 36 + sm4_ce_do_crypt(ctx->rkey_enc, out, in); 38 37 } 39 38 } 40 39 ··· 44 45 if (!crypto_simd_usable()) { 45 46 sm4_crypt_block(ctx->rkey_dec, out, in); 46 47 } else { 47 - kernel_neon_begin(); 48 - sm4_ce_do_crypt(ctx->rkey_dec, out, in); 49 - kernel_neon_end(); 48 + scoped_ksimd() 49 + sm4_ce_do_crypt(ctx->rkey_dec, out, in); 50 50 } 51 51 } 52 52
+22 -40
arch/arm64/crypto/sm4-ce-gcm-glue.c
··· 11 11 #include <linux/crypto.h> 12 12 #include <linux/kernel.h> 13 13 #include <linux/cpufeature.h> 14 - #include <asm/neon.h> 14 + #include <asm/simd.h> 15 15 #include <crypto/b128ops.h> 16 16 #include <crypto/scatterwalk.h> 17 17 #include <crypto/internal/aead.h> ··· 48 48 if (key_len != SM4_KEY_SIZE) 49 49 return -EINVAL; 50 50 51 - kernel_neon_begin(); 52 - 53 - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 54 - crypto_sm4_fk, crypto_sm4_ck); 55 - sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); 56 - 57 - kernel_neon_end(); 51 + scoped_ksimd() { 52 + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 53 + crypto_sm4_fk, crypto_sm4_ck); 54 + sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); 55 + } 58 56 return 0; 59 57 } 60 58 ··· 147 149 memcpy(iv, req->iv, GCM_IV_SIZE); 148 150 put_unaligned_be32(2, iv + GCM_IV_SIZE); 149 151 150 - kernel_neon_begin(); 152 + scoped_ksimd() { 153 + if (req->assoclen) 154 + gcm_calculate_auth_mac(req, ghash); 151 155 152 - if (req->assoclen) 153 - gcm_calculate_auth_mac(req, ghash); 156 + do { 157 + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; 158 + const u8 *src = walk->src.virt.addr; 159 + u8 *dst = walk->dst.virt.addr; 160 + const u8 *l = NULL; 154 161 155 - while (walk->nbytes) { 156 - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; 157 - const u8 *src = walk->src.virt.addr; 158 - u8 *dst = walk->dst.virt.addr; 162 + if (walk->nbytes == walk->total) { 163 + l = (const u8 *)&lengths; 164 + tail = 0; 165 + } 159 166 160 - if (walk->nbytes == walk->total) { 161 167 sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, 162 - walk->nbytes, ghash, 163 - ctx->ghash_table, 164 - (const u8 *)&lengths); 168 + walk->nbytes - tail, ghash, 169 + ctx->ghash_table, l); 165 170 166 - kernel_neon_end(); 167 - 168 - return skcipher_walk_done(walk, 0); 169 - } 170 - 171 - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, 172 - walk->nbytes - tail, ghash, 173 - ctx->ghash_table, NULL); 174 - 175 - kernel_neon_end(); 176 - 177 - err = skcipher_walk_done(walk, tail); 178 - 179 - kernel_neon_begin(); 171 + err = skcipher_walk_done(walk, tail); 172 + } while (walk->nbytes); 180 173 } 181 - 182 - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv, 183 - walk->nbytes, ghash, ctx->ghash_table, 184 - (const u8 *)&lengths); 185 - 186 - kernel_neon_end(); 187 - 188 174 return err; 189 175 } 190 176
+95 -117
arch/arm64/crypto/sm4-ce-glue.c
··· 8 8 * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 9 9 */ 10 10 11 - #include <asm/neon.h> 11 + #include <asm/simd.h> 12 12 #include <crypto/b128ops.h> 13 13 #include <crypto/internal/hash.h> 14 14 #include <crypto/internal/skcipher.h> ··· 74 74 if (key_len != SM4_KEY_SIZE) 75 75 return -EINVAL; 76 76 77 - kernel_neon_begin(); 78 - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, 79 - crypto_sm4_fk, crypto_sm4_ck); 80 - kernel_neon_end(); 77 + scoped_ksimd() 78 + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, 79 + crypto_sm4_fk, crypto_sm4_ck); 81 80 return 0; 82 81 } 83 82 ··· 93 94 if (ret) 94 95 return ret; 95 96 96 - kernel_neon_begin(); 97 - sm4_ce_expand_key(key, ctx->key1.rkey_enc, 98 - ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); 99 - sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, 100 - ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); 101 - kernel_neon_end(); 97 + scoped_ksimd() { 98 + sm4_ce_expand_key(key, ctx->key1.rkey_enc, 99 + ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); 100 + sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, 101 + ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); 102 + } 102 103 103 104 return 0; 104 105 } ··· 116 117 u8 *dst = walk.dst.virt.addr; 117 118 unsigned int nblks; 118 119 119 - kernel_neon_begin(); 120 - 121 - nblks = BYTES2BLKS(nbytes); 122 - if (nblks) { 123 - sm4_ce_crypt(rkey, dst, src, nblks); 124 - nbytes -= nblks * SM4_BLOCK_SIZE; 120 + scoped_ksimd() { 121 + nblks = BYTES2BLKS(nbytes); 122 + if (nblks) { 123 + sm4_ce_crypt(rkey, dst, src, nblks); 124 + nbytes -= nblks * SM4_BLOCK_SIZE; 125 + } 125 126 } 126 - 127 - kernel_neon_end(); 128 127 129 128 err = skcipher_walk_done(&walk, nbytes); 130 129 } ··· 164 167 165 168 nblocks = nbytes / SM4_BLOCK_SIZE; 166 169 if (nblocks) { 167 - kernel_neon_begin(); 168 - 169 - if (encrypt) 170 - sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, 171 - walk.iv, nblocks); 172 - else 173 - sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, 174 - walk.iv, nblocks); 175 - 176 - kernel_neon_end(); 170 + scoped_ksimd() { 171 + if (encrypt) 172 + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, 173 + walk.iv, nblocks); 174 + else 175 + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, 176 + walk.iv, nblocks); 177 + } 177 178 } 178 179 179 180 err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); ··· 244 249 if (err) 245 250 return err; 246 251 247 - kernel_neon_begin(); 248 - 249 - if (encrypt) 250 - sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, 251 - walk.src.virt.addr, walk.iv, walk.nbytes); 252 - else 253 - sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, 254 - walk.src.virt.addr, walk.iv, walk.nbytes); 255 - 256 - kernel_neon_end(); 252 + scoped_ksimd() { 253 + if (encrypt) 254 + sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, 255 + walk.src.virt.addr, walk.iv, walk.nbytes); 256 + else 257 + sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, 258 + walk.src.virt.addr, walk.iv, walk.nbytes); 259 + } 257 260 258 261 return skcipher_walk_done(&walk, 0); 259 262 } ··· 281 288 u8 *dst = walk.dst.virt.addr; 282 289 unsigned int nblks; 283 290 284 - kernel_neon_begin(); 291 + scoped_ksimd() { 292 + nblks = BYTES2BLKS(nbytes); 293 + if (nblks) { 294 + sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); 295 + dst += nblks * SM4_BLOCK_SIZE; 296 + src += nblks * SM4_BLOCK_SIZE; 297 + nbytes -= nblks * SM4_BLOCK_SIZE; 298 + } 285 299 286 - nblks = BYTES2BLKS(nbytes); 287 - if (nblks) { 288 - sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); 289 - dst += nblks * SM4_BLOCK_SIZE; 290 - src += nblks * SM4_BLOCK_SIZE; 291 - nbytes -= nblks * SM4_BLOCK_SIZE; 300 + /* tail */ 301 + if (walk.nbytes == walk.total && nbytes > 0) { 302 + u8 keystream[SM4_BLOCK_SIZE]; 303 + 304 + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); 305 + crypto_inc(walk.iv, SM4_BLOCK_SIZE); 306 + crypto_xor_cpy(dst, src, keystream, nbytes); 307 + nbytes = 0; 308 + } 292 309 } 293 - 294 - /* tail */ 295 - if (walk.nbytes == walk.total && nbytes > 0) { 296 - u8 keystream[SM4_BLOCK_SIZE]; 297 - 298 - sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); 299 - crypto_inc(walk.iv, SM4_BLOCK_SIZE); 300 - crypto_xor_cpy(dst, src, keystream, nbytes); 301 - nbytes = 0; 302 - } 303 - 304 - kernel_neon_end(); 305 310 306 311 err = skcipher_walk_done(&walk, nbytes); 307 312 } ··· 350 359 if (nbytes < walk.total) 351 360 nbytes &= ~(SM4_BLOCK_SIZE - 1); 352 361 353 - kernel_neon_begin(); 354 - 355 - if (encrypt) 356 - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, 357 - walk.src.virt.addr, walk.iv, nbytes, 358 - rkey2_enc); 359 - else 360 - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, 361 - walk.src.virt.addr, walk.iv, nbytes, 362 - rkey2_enc); 363 - 364 - kernel_neon_end(); 362 + scoped_ksimd() { 363 + if (encrypt) 364 + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, 365 + walk.src.virt.addr, walk.iv, nbytes, 366 + rkey2_enc); 367 + else 368 + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, 369 + walk.src.virt.addr, walk.iv, nbytes, 370 + rkey2_enc); 371 + } 365 372 366 373 rkey2_enc = NULL; 367 374 ··· 384 395 if (err) 385 396 return err; 386 397 387 - kernel_neon_begin(); 388 - 389 - if (encrypt) 390 - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, 391 - walk.src.virt.addr, walk.iv, walk.nbytes, 392 - rkey2_enc); 393 - else 394 - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, 395 - walk.src.virt.addr, walk.iv, walk.nbytes, 396 - rkey2_enc); 397 - 398 - kernel_neon_end(); 398 + scoped_ksimd() { 399 + if (encrypt) 400 + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, 401 + walk.src.virt.addr, walk.iv, walk.nbytes, 402 + rkey2_enc); 403 + else 404 + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, 405 + walk.src.virt.addr, walk.iv, walk.nbytes, 406 + rkey2_enc); 407 + } 399 408 400 409 return skcipher_walk_done(&walk, 0); 401 410 } ··· 497 510 if (key_len != SM4_KEY_SIZE) 498 511 return -EINVAL; 499 512 500 - kernel_neon_begin(); 501 - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 502 - crypto_sm4_fk, crypto_sm4_ck); 503 - kernel_neon_end(); 504 - 513 + scoped_ksimd() 514 + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 515 + crypto_sm4_fk, crypto_sm4_ck); 505 516 return 0; 506 517 } 507 518 ··· 515 530 516 531 memset(consts, 0, SM4_BLOCK_SIZE); 517 532 518 - kernel_neon_begin(); 533 + scoped_ksimd() { 534 + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 535 + crypto_sm4_fk, crypto_sm4_ck); 519 536 520 - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 521 - crypto_sm4_fk, crypto_sm4_ck); 522 - 523 - /* encrypt the zero block */ 524 - sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); 525 - 526 - kernel_neon_end(); 537 + /* encrypt the zero block */ 538 + sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); 539 + } 527 540 528 541 /* gf(2^128) multiply zero-ciphertext with u and u^2 */ 529 542 a = be64_to_cpu(consts[0].a); ··· 551 568 if (key_len != SM4_KEY_SIZE) 552 569 return -EINVAL; 553 570 554 - kernel_neon_begin(); 571 + scoped_ksimd() { 572 + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 573 + crypto_sm4_fk, crypto_sm4_ck); 555 574 556 - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, 557 - crypto_sm4_fk, crypto_sm4_ck); 575 + sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); 576 + sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); 558 577 559 - sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); 560 - sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); 561 - 562 - sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, 563 - crypto_sm4_fk, crypto_sm4_ck); 564 - 565 - kernel_neon_end(); 578 + sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, 579 + crypto_sm4_fk, crypto_sm4_ck); 580 + } 566 581 567 582 return 0; 568 583 } ··· 581 600 unsigned int nblocks = len / SM4_BLOCK_SIZE; 582 601 583 602 len %= SM4_BLOCK_SIZE; 584 - kernel_neon_begin(); 585 - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, 586 - nblocks, false, true); 587 - kernel_neon_end(); 603 + scoped_ksimd() 604 + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, 605 + nblocks, false, true); 588 606 return len; 589 607 } 590 608 ··· 599 619 ctx->digest[len] ^= 0x80; 600 620 consts += SM4_BLOCK_SIZE; 601 621 } 602 - kernel_neon_begin(); 603 - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, 604 - false, true); 605 - kernel_neon_end(); 622 + scoped_ksimd() 623 + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, 624 + false, true); 606 625 memcpy(out, ctx->digest, SM4_BLOCK_SIZE); 607 626 return 0; 608 627 } ··· 614 635 615 636 if (len) { 616 637 crypto_xor(ctx->digest, src, len); 617 - kernel_neon_begin(); 618 - sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, 619 - ctx->digest); 620 - kernel_neon_end(); 638 + scoped_ksimd() 639 + sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, 640 + ctx->digest); 621 641 } 622 642 memcpy(out, ctx->digest, SM4_BLOCK_SIZE); 623 643 return 0;
+8 -17
arch/arm64/crypto/sm4-neon-glue.c
··· 48 48 49 49 nblocks = nbytes / SM4_BLOCK_SIZE; 50 50 if (nblocks) { 51 - kernel_neon_begin(); 52 - 53 - sm4_neon_crypt(rkey, dst, src, nblocks); 54 - 55 - kernel_neon_end(); 51 + scoped_ksimd() 52 + sm4_neon_crypt(rkey, dst, src, nblocks); 56 53 } 57 54 58 55 err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); ··· 123 126 124 127 nblocks = nbytes / SM4_BLOCK_SIZE; 125 128 if (nblocks) { 126 - kernel_neon_begin(); 127 - 128 - sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, 129 - walk.iv, nblocks); 130 - 131 - kernel_neon_end(); 129 + scoped_ksimd() 130 + sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, 131 + walk.iv, nblocks); 132 132 } 133 133 134 134 err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); ··· 151 157 152 158 nblocks = nbytes / SM4_BLOCK_SIZE; 153 159 if (nblocks) { 154 - kernel_neon_begin(); 155 - 156 - sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, 157 - walk.iv, nblocks); 158 - 159 - kernel_neon_end(); 160 + scoped_ksimd() 161 + sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, 162 + walk.iv, nblocks); 160 163 161 164 dst += nblocks * SM4_BLOCK_SIZE; 162 165 src += nblocks * SM4_BLOCK_SIZE;
+14 -2
arch/arm64/include/asm/fpu.h
··· 6 6 #ifndef __ASM_FPU_H 7 7 #define __ASM_FPU_H 8 8 9 + #include <linux/preempt.h> 9 10 #include <asm/neon.h> 10 11 11 12 #define kernel_fpu_available() cpu_has_neon() 12 - #define kernel_fpu_begin() kernel_neon_begin() 13 - #define kernel_fpu_end() kernel_neon_end() 13 + 14 + static inline void kernel_fpu_begin(void) 15 + { 16 + BUG_ON(!in_task()); 17 + preempt_disable(); 18 + kernel_neon_begin(NULL); 19 + } 20 + 21 + static inline void kernel_fpu_end(void) 22 + { 23 + kernel_neon_end(NULL); 24 + preempt_enable(); 25 + } 14 26 15 27 #endif /* ! __ASM_FPU_H */
+2 -2
arch/arm64/include/asm/neon.h
··· 13 13 14 14 #define cpu_has_neon() system_supports_fpsimd() 15 15 16 - void kernel_neon_begin(void); 17 - void kernel_neon_end(void); 16 + void kernel_neon_begin(struct user_fpsimd_state *); 17 + void kernel_neon_end(struct user_fpsimd_state *); 18 18 19 19 #endif /* ! __ASM_NEON_H */
+6 -1
arch/arm64/include/asm/processor.h
··· 172 172 unsigned long fault_code; /* ESR_EL1 value */ 173 173 struct debug_info debug; /* debugging */ 174 174 175 - struct user_fpsimd_state kernel_fpsimd_state; 175 + /* 176 + * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the 177 + * address of a caller provided buffer that will be used to preserve a 178 + * task's kernel mode FPSIMD state while it is scheduled out. 179 + */ 180 + struct user_fpsimd_state *kernel_fpsimd_state; 176 181 unsigned int kernel_fpsimd_cpu; 177 182 #ifdef CONFIG_ARM64_PTR_AUTH 178 183 struct ptrauth_keys_user keys_user;
+5 -2
arch/arm64/include/asm/simd.h
··· 43 43 44 44 #endif /* ! CONFIG_KERNEL_MODE_NEON */ 45 45 46 - DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end()) 46 + DEFINE_LOCK_GUARD_1(ksimd, 47 + struct user_fpsimd_state, 48 + kernel_neon_begin(_T->lock), 49 + kernel_neon_end(_T->lock)) 47 50 48 - #define scoped_ksimd() scoped_guard(ksimd) 51 + #define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){}) 49 52 50 53 #endif
+9 -13
arch/arm64/include/asm/xor.h
··· 9 9 #include <linux/hardirq.h> 10 10 #include <asm-generic/xor.h> 11 11 #include <asm/hwcap.h> 12 - #include <asm/neon.h> 12 + #include <asm/simd.h> 13 13 14 14 #ifdef CONFIG_KERNEL_MODE_NEON 15 15 ··· 19 19 xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, 20 20 const unsigned long * __restrict p2) 21 21 { 22 - kernel_neon_begin(); 23 - xor_block_inner_neon.do_2(bytes, p1, p2); 24 - kernel_neon_end(); 22 + scoped_ksimd() 23 + xor_block_inner_neon.do_2(bytes, p1, p2); 25 24 } 26 25 27 26 static void ··· 28 29 const unsigned long * __restrict p2, 29 30 const unsigned long * __restrict p3) 30 31 { 31 - kernel_neon_begin(); 32 - xor_block_inner_neon.do_3(bytes, p1, p2, p3); 33 - kernel_neon_end(); 32 + scoped_ksimd() 33 + xor_block_inner_neon.do_3(bytes, p1, p2, p3); 34 34 } 35 35 36 36 static void ··· 38 40 const unsigned long * __restrict p3, 39 41 const unsigned long * __restrict p4) 40 42 { 41 - kernel_neon_begin(); 42 - xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); 43 - kernel_neon_end(); 43 + scoped_ksimd() 44 + xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); 44 45 } 45 46 46 47 static void ··· 49 52 const unsigned long * __restrict p4, 50 53 const unsigned long * __restrict p5) 51 54 { 52 - kernel_neon_begin(); 53 - xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); 54 - kernel_neon_end(); 55 + scoped_ksimd() 56 + xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); 55 57 } 56 58 57 59 static struct xor_block_template xor_block_arm64 = {
+40 -14
arch/arm64/kernel/fpsimd.c
··· 1489 1489 * Elide the load if this CPU holds the most recent kernel mode 1490 1490 * FPSIMD context of the current task. 1491 1491 */ 1492 - if (last->st == &task->thread.kernel_fpsimd_state && 1492 + if (last->st == task->thread.kernel_fpsimd_state && 1493 1493 task->thread.kernel_fpsimd_cpu == smp_processor_id()) 1494 1494 return; 1495 1495 1496 - fpsimd_load_state(&task->thread.kernel_fpsimd_state); 1496 + fpsimd_load_state(task->thread.kernel_fpsimd_state); 1497 1497 } 1498 1498 1499 1499 static void fpsimd_save_kernel_state(struct task_struct *task) 1500 1500 { 1501 1501 struct cpu_fp_state cpu_fp_state = { 1502 - .st = &task->thread.kernel_fpsimd_state, 1502 + .st = task->thread.kernel_fpsimd_state, 1503 1503 .to_save = FP_STATE_FPSIMD, 1504 1504 }; 1505 1505 1506 - fpsimd_save_state(&task->thread.kernel_fpsimd_state); 1506 + BUG_ON(!cpu_fp_state.st); 1507 + 1508 + fpsimd_save_state(task->thread.kernel_fpsimd_state); 1507 1509 fpsimd_bind_state_to_cpu(&cpu_fp_state); 1508 1510 1509 1511 task->thread.kernel_fpsimd_cpu = smp_processor_id(); ··· 1776 1774 void fpsimd_flush_task_state(struct task_struct *t) 1777 1775 { 1778 1776 t->thread.fpsimd_cpu = NR_CPUS; 1777 + t->thread.kernel_fpsimd_state = NULL; 1779 1778 /* 1780 1779 * If we don't support fpsimd, bail out after we have 1781 1780 * reset the fpsimd_cpu for this task and clear the ··· 1836 1833 * 1837 1834 * The caller may freely use the FPSIMD registers until kernel_neon_end() is 1838 1835 * called. 1836 + * 1837 + * Unless called from non-preemptible task context, @state must point to a 1838 + * caller provided buffer that will be used to preserve the task's kernel mode 1839 + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel 1840 + * mode FPSIMD occurring in softirq context. May be %NULL otherwise. 1839 1841 */ 1840 - void kernel_neon_begin(void) 1842 + void kernel_neon_begin(struct user_fpsimd_state *state) 1841 1843 { 1842 1844 if (WARN_ON(!system_supports_fpsimd())) 1843 1845 return; 1846 + 1847 + WARN_ON((preemptible() || in_serving_softirq()) && !state); 1844 1848 1845 1849 BUG_ON(!may_use_simd()); 1846 1850 ··· 1856 1846 /* Save unsaved fpsimd state, if any: */ 1857 1847 if (test_thread_flag(TIF_KERNEL_FPSTATE)) { 1858 1848 BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()); 1859 - fpsimd_save_kernel_state(current); 1849 + fpsimd_save_state(state); 1860 1850 } else { 1861 1851 fpsimd_save_user_state(); 1862 1852 ··· 1877 1867 * mode in task context. So in this case, setting the flag here 1878 1868 * is always appropriate. 1879 1869 */ 1880 - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) 1870 + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) { 1871 + /* 1872 + * Record the caller provided buffer as the kernel mode 1873 + * FP/SIMD buffer for this task, so that the state can 1874 + * be preserved and restored on a context switch. 1875 + */ 1876 + WARN_ON(current->thread.kernel_fpsimd_state != NULL); 1877 + current->thread.kernel_fpsimd_state = state; 1881 1878 set_thread_flag(TIF_KERNEL_FPSTATE); 1879 + } 1882 1880 } 1883 1881 1884 1882 /* Invalidate any task state remaining in the fpsimd regs: */ ··· 1904 1886 * 1905 1887 * The caller must not use the FPSIMD registers after this function is called, 1906 1888 * unless kernel_neon_begin() is called again in the meantime. 1889 + * 1890 + * The value of @state must match the value passed to the preceding call to 1891 + * kernel_neon_begin(). 1907 1892 */ 1908 - void kernel_neon_end(void) 1893 + void kernel_neon_end(struct user_fpsimd_state *state) 1909 1894 { 1910 1895 if (!system_supports_fpsimd()) 1896 + return; 1897 + 1898 + if (!test_thread_flag(TIF_KERNEL_FPSTATE)) 1911 1899 return; 1912 1900 1913 1901 /* ··· 1921 1897 * the task context kernel mode FPSIMD state. This can only happen when 1922 1898 * running in softirq context on non-PREEMPT_RT. 1923 1899 */ 1924 - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() && 1925 - test_thread_flag(TIF_KERNEL_FPSTATE)) 1926 - fpsimd_load_kernel_state(current); 1927 - else 1900 + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) { 1901 + fpsimd_load_state(state); 1902 + } else { 1928 1903 clear_thread_flag(TIF_KERNEL_FPSTATE); 1904 + WARN_ON(current->thread.kernel_fpsimd_state != state); 1905 + current->thread.kernel_fpsimd_state = NULL; 1906 + } 1929 1907 } 1930 1908 EXPORT_SYMBOL_GPL(kernel_neon_end); 1931 1909 ··· 1963 1937 WARN_ON(preemptible()); 1964 1938 1965 1939 if (may_use_simd()) { 1966 - kernel_neon_begin(); 1940 + kernel_neon_begin(&efi_fpsimd_state); 1967 1941 } else { 1968 1942 /* 1969 1943 * If !efi_sve_state, SVE can't be in use yet and doesn't need ··· 2012 1986 return; 2013 1987 2014 1988 if (!efi_fpsimd_state_used) { 2015 - kernel_neon_end(); 1989 + kernel_neon_end(&efi_fpsimd_state); 2016 1990 } else { 2017 1991 if (system_supports_sve() && efi_sve_state_used) { 2018 1992 bool ffr = true;
+12 -21
crypto/aegis128-neon.c
··· 4 4 */ 5 5 6 6 #include <asm/cpufeature.h> 7 - #include <asm/neon.h> 7 + #include <asm/simd.h> 8 8 9 9 #include "aegis.h" 10 10 #include "aegis-neon.h" ··· 24 24 const union aegis_block *key, 25 25 const u8 *iv) 26 26 { 27 - kernel_neon_begin(); 28 - crypto_aegis128_init_neon(state, key, iv); 29 - kernel_neon_end(); 27 + scoped_ksimd() 28 + crypto_aegis128_init_neon(state, key, iv); 30 29 } 31 30 32 31 void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg) 33 32 { 34 - kernel_neon_begin(); 35 - crypto_aegis128_update_neon(state, msg); 36 - kernel_neon_end(); 33 + scoped_ksimd() 34 + crypto_aegis128_update_neon(state, msg); 37 35 } 38 36 39 37 void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst, 40 38 const u8 *src, unsigned int size) 41 39 { 42 - kernel_neon_begin(); 43 - crypto_aegis128_encrypt_chunk_neon(state, dst, src, size); 44 - kernel_neon_end(); 40 + scoped_ksimd() 41 + crypto_aegis128_encrypt_chunk_neon(state, dst, src, size); 45 42 } 46 43 47 44 void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst, 48 45 const u8 *src, unsigned int size) 49 46 { 50 - kernel_neon_begin(); 51 - crypto_aegis128_decrypt_chunk_neon(state, dst, src, size); 52 - kernel_neon_end(); 47 + scoped_ksimd() 48 + crypto_aegis128_decrypt_chunk_neon(state, dst, src, size); 53 49 } 54 50 55 51 int crypto_aegis128_final_simd(struct aegis_state *state, ··· 54 58 unsigned int cryptlen, 55 59 unsigned int authsize) 56 60 { 57 - int ret; 58 - 59 - kernel_neon_begin(); 60 - ret = crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen, 61 - authsize); 62 - kernel_neon_end(); 63 - 64 - return ret; 61 + scoped_ksimd() 62 + return crypto_aegis128_final_neon(state, tag_xor, assoclen, 63 + cryptlen, authsize); 65 64 }
+10 -9
drivers/net/ethernet/mellanox/mlx5/core/wc.c
··· 9 9 10 10 #if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) 11 11 #include <asm/neon.h> 12 + #include <asm/simd.h> 12 13 #endif 13 14 14 15 #define TEST_WC_NUM_WQES 255 ··· 265 264 { 266 265 #if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) 267 266 if (cpu_has_neon()) { 268 - kernel_neon_begin(); 269 - asm volatile 270 - (".arch_extension simd\n\t" 271 - "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t" 272 - "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]" 273 - : 274 - : "r"(mmio_wqe), "r"(sq->bfreg.map + offset) 275 - : "memory", "v0", "v1", "v2", "v3"); 276 - kernel_neon_end(); 267 + scoped_ksimd() { 268 + asm volatile( 269 + ".arch_extension simd\n\t" 270 + "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t" 271 + "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]" 272 + : 273 + : "r"(mmio_wqe), "r"(sq->bfreg.map + offset) 274 + : "memory", "v0", "v1", "v2", "v3"); 275 + } 277 276 return; 278 277 } 279 278 #endif
+6 -13
lib/crc/arm/crc-t10dif.h
··· 5 5 * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> 6 6 */ 7 7 8 - #include <asm/neon.h> 9 8 #include <asm/simd.h> 10 9 11 10 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); ··· 18 19 19 20 static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) 20 21 { 21 - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { 22 + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { 22 23 if (static_branch_likely(&have_pmull)) { 23 - if (likely(may_use_simd())) { 24 - kernel_neon_begin(); 25 - crc = crc_t10dif_pmull64(crc, data, length); 26 - kernel_neon_end(); 27 - return crc; 28 - } 24 + scoped_ksimd() 25 + return crc_t10dif_pmull64(crc, data, length); 29 26 } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && 30 - static_branch_likely(&have_neon) && 31 - likely(may_use_simd())) { 27 + static_branch_likely(&have_neon)) { 32 28 u8 buf[16] __aligned(16); 33 29 34 - kernel_neon_begin(); 35 - crc_t10dif_pmull8(crc, data, length, buf); 36 - kernel_neon_end(); 30 + scoped_ksimd() 31 + crc_t10dif_pmull8(crc, data, length, buf); 37 32 38 33 return crc_t10dif_generic(0, buf, sizeof(buf)); 39 34 }
+4 -7
lib/crc/arm/crc32.h
··· 8 8 #include <linux/cpufeature.h> 9 9 10 10 #include <asm/hwcap.h> 11 - #include <asm/neon.h> 12 11 #include <asm/simd.h> 13 12 14 13 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32); ··· 41 42 len -= n; 42 43 } 43 44 n = round_down(len, 16); 44 - kernel_neon_begin(); 45 - crc = crc32_pmull_le(p, n, crc); 46 - kernel_neon_end(); 45 + scoped_ksimd() 46 + crc = crc32_pmull_le(p, n, crc); 47 47 p += n; 48 48 len -= n; 49 49 } ··· 69 71 len -= n; 70 72 } 71 73 n = round_down(len, 16); 72 - kernel_neon_begin(); 73 - crc = crc32c_pmull_le(p, n, crc); 74 - kernel_neon_end(); 74 + scoped_ksimd() 75 + crc = crc32c_pmull_le(p, n, crc); 75 76 p += n; 76 77 len -= n; 77 78 }
+6 -13
lib/crc/arm64/crc-t10dif.h
··· 7 7 8 8 #include <linux/cpufeature.h> 9 9 10 - #include <asm/neon.h> 11 10 #include <asm/simd.h> 12 11 13 12 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd); ··· 20 21 21 22 static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) 22 23 { 23 - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { 24 + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { 24 25 if (static_branch_likely(&have_pmull)) { 25 - if (likely(may_use_simd())) { 26 - kernel_neon_begin(); 27 - crc = crc_t10dif_pmull_p64(crc, data, length); 28 - kernel_neon_end(); 29 - return crc; 30 - } 26 + scoped_ksimd() 27 + return crc_t10dif_pmull_p64(crc, data, length); 31 28 } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && 32 - static_branch_likely(&have_asimd) && 33 - likely(may_use_simd())) { 29 + static_branch_likely(&have_asimd)) { 34 30 u8 buf[16]; 35 31 36 - kernel_neon_begin(); 37 - crc_t10dif_pmull_p8(crc, data, length, buf); 38 - kernel_neon_end(); 32 + scoped_ksimd() 33 + crc_t10dif_pmull_p8(crc, data, length, buf); 39 34 40 35 return crc_t10dif_generic(0, buf, sizeof(buf)); 41 36 }
+6 -10
lib/crc/arm64/crc32.h
··· 2 2 3 3 #include <asm/alternative.h> 4 4 #include <asm/cpufeature.h> 5 - #include <asm/neon.h> 6 5 #include <asm/simd.h> 7 6 8 7 // The minimum input length to consider the 4-way interleaved code path ··· 22 23 23 24 if (len >= min_len && cpu_have_named_feature(PMULL) && 24 25 likely(may_use_simd())) { 25 - kernel_neon_begin(); 26 - crc = crc32_le_arm64_4way(crc, p, len); 27 - kernel_neon_end(); 26 + scoped_ksimd() 27 + crc = crc32_le_arm64_4way(crc, p, len); 28 28 29 29 p += round_down(len, 64); 30 30 len %= 64; ··· 42 44 43 45 if (len >= min_len && cpu_have_named_feature(PMULL) && 44 46 likely(may_use_simd())) { 45 - kernel_neon_begin(); 46 - crc = crc32c_le_arm64_4way(crc, p, len); 47 - kernel_neon_end(); 47 + scoped_ksimd() 48 + crc = crc32c_le_arm64_4way(crc, p, len); 48 49 49 50 p += round_down(len, 64); 50 51 len %= 64; ··· 62 65 63 66 if (len >= min_len && cpu_have_named_feature(PMULL) && 64 67 likely(may_use_simd())) { 65 - kernel_neon_begin(); 66 - crc = crc32_be_arm64_4way(crc, p, len); 67 - kernel_neon_end(); 68 + scoped_ksimd() 69 + crc = crc32_be_arm64_4way(crc, p, len); 68 70 69 71 p += round_down(len, 64); 70 72 len %= 64;
+7 -10
lib/raid6/neon.c
··· 8 8 #include <linux/raid/pq.h> 9 9 10 10 #ifdef __KERNEL__ 11 - #include <asm/neon.h> 11 + #include <asm/simd.h> 12 12 #else 13 - #define kernel_neon_begin() 14 - #define kernel_neon_end() 13 + #define scoped_ksimd() 15 14 #define cpu_has_neon() (1) 16 15 #endif 17 16 ··· 31 32 { \ 32 33 void raid6_neon ## _n ## _gen_syndrome_real(int, \ 33 34 unsigned long, void**); \ 34 - kernel_neon_begin(); \ 35 - raid6_neon ## _n ## _gen_syndrome_real(disks, \ 35 + scoped_ksimd() \ 36 + raid6_neon ## _n ## _gen_syndrome_real(disks, \ 36 37 (unsigned long)bytes, ptrs); \ 37 - kernel_neon_end(); \ 38 38 } \ 39 39 static void raid6_neon ## _n ## _xor_syndrome(int disks, \ 40 40 int start, int stop, \ ··· 41 43 { \ 42 44 void raid6_neon ## _n ## _xor_syndrome_real(int, \ 43 45 int, int, unsigned long, void**); \ 44 - kernel_neon_begin(); \ 45 - raid6_neon ## _n ## _xor_syndrome_real(disks, \ 46 - start, stop, (unsigned long)bytes, ptrs); \ 47 - kernel_neon_end(); \ 46 + scoped_ksimd() \ 47 + raid6_neon ## _n ## _xor_syndrome_real(disks, \ 48 + start, stop, (unsigned long)bytes, ptrs);\ 48 49 } \ 49 50 struct raid6_calls const raid6_neonx ## _n = { \ 50 51 raid6_neon ## _n ## _gen_syndrome, \
+6 -9
lib/raid6/recov_neon.c
··· 7 7 #include <linux/raid/pq.h> 8 8 9 9 #ifdef __KERNEL__ 10 - #include <asm/neon.h> 10 + #include <asm/simd.h> 11 11 #include "neon.h" 12 12 #else 13 - #define kernel_neon_begin() 14 - #define kernel_neon_end() 13 + #define scoped_ksimd() 15 14 #define cpu_has_neon() (1) 16 15 #endif 17 16 ··· 54 55 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 55 56 raid6_gfexp[failb]]]; 56 57 57 - kernel_neon_begin(); 58 - __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); 59 - kernel_neon_end(); 58 + scoped_ksimd() 59 + __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); 60 60 } 61 61 62 62 static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, ··· 84 86 /* Now, pick the proper data tables */ 85 87 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 86 88 87 - kernel_neon_begin(); 88 - __raid6_datap_recov_neon(bytes, p, q, dq, qmul); 89 - kernel_neon_end(); 89 + scoped_ksimd() 90 + __raid6_datap_recov_neon(bytes, p, q, dq, qmul); 90 91 } 91 92 92 93 const struct raid6_recov_calls raid6_recov_neon = {