Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: aesni - replace function pointers with static branches

Replace the function pointers in the GCM implementation with static branches,
which are based on code patching, which occurs only at module load time.
This avoids the severe performance penalty caused by the use of retpolines.

In order to retain the ability to switch between different versions of the
implementation based on the input size on cores that support AVX and AVX2,
use static branches instead of static calls.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Ard Biesheuvel and committed by
Herbert Xu
d6cbf4ea 83c83e65

+54 -44
+54 -44
arch/x86/crypto/aesni-intel_glue.c
··· 31 31 #include <crypto/internal/aead.h> 32 32 #include <crypto/internal/simd.h> 33 33 #include <crypto/internal/skcipher.h> 34 + #include <linux/jump_label.h> 34 35 #include <linux/workqueue.h> 35 36 #include <linux/spinlock.h> 36 37 ··· 129 128 struct gcm_context_data *gdata, 130 129 u8 *auth_tag, unsigned long auth_tag_len); 131 130 132 - static const struct aesni_gcm_tfm_s { 133 - void (*init)(void *ctx, struct gcm_context_data *gdata, u8 *iv, 134 - u8 *hash_subkey, const u8 *aad, unsigned long aad_len); 135 - void (*enc_update)(void *ctx, struct gcm_context_data *gdata, u8 *out, 136 - const u8 *in, unsigned long plaintext_len); 137 - void (*dec_update)(void *ctx, struct gcm_context_data *gdata, u8 *out, 138 - const u8 *in, unsigned long ciphertext_len); 139 - void (*finalize)(void *ctx, struct gcm_context_data *gdata, 140 - u8 *auth_tag, unsigned long auth_tag_len); 141 - } *aesni_gcm_tfm; 142 - 143 - static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = { 144 - .init = &aesni_gcm_init, 145 - .enc_update = &aesni_gcm_enc_update, 146 - .dec_update = &aesni_gcm_dec_update, 147 - .finalize = &aesni_gcm_finalize, 148 - }; 149 - 150 131 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, 151 132 void *keys, u8 *out, unsigned int num_bytes); 152 133 asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, ··· 158 175 struct gcm_context_data *gdata, 159 176 u8 *auth_tag, unsigned long auth_tag_len); 160 177 161 - static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { 162 - .init = &aesni_gcm_init_avx_gen2, 163 - .enc_update = &aesni_gcm_enc_update_avx_gen2, 164 - .dec_update = &aesni_gcm_dec_update_avx_gen2, 165 - .finalize = &aesni_gcm_finalize_avx_gen2, 166 - }; 167 - 168 178 /* 169 179 * asmlinkage void aesni_gcm_init_avx_gen4() 170 180 * gcm_data *my_ctx_data, context data ··· 181 205 struct gcm_context_data *gdata, 182 206 u8 *auth_tag, unsigned long auth_tag_len); 183 207 184 - static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { 185 - .init = &aesni_gcm_init_avx_gen4, 186 - .enc_update = &aesni_gcm_enc_update_avx_gen4, 187 - .dec_update = &aesni_gcm_dec_update_avx_gen4, 188 - .finalize = &aesni_gcm_finalize_avx_gen4, 189 - }; 208 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx); 209 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2); 190 210 191 211 static inline struct 192 212 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) ··· 613 641 u8 *iv, void *aes_ctx, u8 *auth_tag, 614 642 unsigned long auth_tag_len) 615 643 { 616 - const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; 617 644 u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); 618 645 struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); 619 646 unsigned long left = req->cryptlen; 620 647 struct scatter_walk assoc_sg_walk; 621 648 struct skcipher_walk walk; 649 + bool do_avx, do_avx2; 622 650 u8 *assocmem = NULL; 623 651 u8 *assoc; 624 652 int err; ··· 626 654 if (!enc) 627 655 left -= auth_tag_len; 628 656 629 - if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4) 630 - gcm_tfm = &aesni_gcm_tfm_avx_gen2; 631 - if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2) 632 - gcm_tfm = &aesni_gcm_tfm_sse; 657 + do_avx = (left >= AVX_GEN2_OPTSIZE); 658 + do_avx2 = (left >= AVX_GEN4_OPTSIZE); 633 659 634 660 /* Linearize assoc, if not already linear */ 635 661 if (req->src->length >= assoclen && req->src->length) { ··· 647 677 } 648 678 649 679 kernel_fpu_begin(); 650 - gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); 680 + if (static_branch_likely(&gcm_use_avx2) && do_avx2) 681 + aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc, 682 + assoclen); 683 + else if (static_branch_likely(&gcm_use_avx) && do_avx) 684 + aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc, 685 + assoclen); 686 + else 687 + aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); 651 688 kernel_fpu_end(); 652 689 653 690 if (!assocmem) ··· 667 690 668 691 while (walk.nbytes > 0) { 669 692 kernel_fpu_begin(); 670 - (enc ? gcm_tfm->enc_update 671 - : gcm_tfm->dec_update)(aes_ctx, data, walk.dst.virt.addr, 672 - walk.src.virt.addr, walk.nbytes); 693 + if (static_branch_likely(&gcm_use_avx2) && do_avx2) { 694 + if (enc) 695 + aesni_gcm_enc_update_avx_gen4(aes_ctx, data, 696 + walk.dst.virt.addr, 697 + walk.src.virt.addr, 698 + walk.nbytes); 699 + else 700 + aesni_gcm_dec_update_avx_gen4(aes_ctx, data, 701 + walk.dst.virt.addr, 702 + walk.src.virt.addr, 703 + walk.nbytes); 704 + } else if (static_branch_likely(&gcm_use_avx) && do_avx) { 705 + if (enc) 706 + aesni_gcm_enc_update_avx_gen2(aes_ctx, data, 707 + walk.dst.virt.addr, 708 + walk.src.virt.addr, 709 + walk.nbytes); 710 + else 711 + aesni_gcm_dec_update_avx_gen2(aes_ctx, data, 712 + walk.dst.virt.addr, 713 + walk.src.virt.addr, 714 + walk.nbytes); 715 + } else if (enc) { 716 + aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr, 717 + walk.src.virt.addr, walk.nbytes); 718 + } else { 719 + aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr, 720 + walk.src.virt.addr, walk.nbytes); 721 + } 673 722 kernel_fpu_end(); 674 723 675 724 err = skcipher_walk_done(&walk, 0); ··· 705 702 return err; 706 703 707 704 kernel_fpu_begin(); 708 - gcm_tfm->finalize(aes_ctx, data, auth_tag, auth_tag_len); 705 + if (static_branch_likely(&gcm_use_avx2) && do_avx2) 706 + aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag, 707 + auth_tag_len); 708 + else if (static_branch_likely(&gcm_use_avx) && do_avx) 709 + aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag, 710 + auth_tag_len); 711 + else 712 + aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len); 709 713 kernel_fpu_end(); 710 714 711 715 return 0; ··· 1151 1141 #ifdef CONFIG_X86_64 1152 1142 if (boot_cpu_has(X86_FEATURE_AVX2)) { 1153 1143 pr_info("AVX2 version of gcm_enc/dec engaged.\n"); 1154 - aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4; 1144 + static_branch_enable(&gcm_use_avx); 1145 + static_branch_enable(&gcm_use_avx2); 1155 1146 } else 1156 1147 if (boot_cpu_has(X86_FEATURE_AVX)) { 1157 1148 pr_info("AVX version of gcm_enc/dec engaged.\n"); 1158 - aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2; 1149 + static_branch_enable(&gcm_use_avx); 1159 1150 } else { 1160 1151 pr_info("SSE version of gcm_enc/dec engaged.\n"); 1161 - aesni_gcm_tfm = &aesni_gcm_tfm_sse; 1162 1152 } 1163 1153 aesni_ctr_enc_tfm = aesni_ctr_enc; 1164 1154 if (boot_cpu_has(X86_FEATURE_AVX)) {