Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: update AS_* macros to binutils >=2.23, supporting ADX and AVX2

Now that the kernel specifies binutils 2.23 as the minimum version, we
can remove ifdefs for AVX2 and ADX throughout.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>

authored by

Jason A. Donenfeld and committed by
Masahiro Yamada
e6abef61 d7e40ea8

+15 -90
-10
arch/x86/Kconfig.assembler
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 # Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 3 3 4 - config AS_AVX2 5 - def_bool $(as-instr,vpbroadcastb %xmm0$(comma)%ymm1) 6 - help 7 - Supported by binutils >= 2.22 and LLVM integrated assembler 8 - 9 4 config AS_AVX512 10 5 def_bool $(as-instr,vpmovm2b %k1$(comma)%zmm5) 11 6 help ··· 15 20 def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) 16 21 help 17 22 Supported by binutils >= 2.24 and LLVM integrated assembler 18 - 19 - config AS_ADX 20 - def_bool $(as-instr,adox %eax$(comma)%eax) 21 - help 22 - Supported by binutils >= 2.23 and LLVM integrated assembler
+2 -4
arch/x86/crypto/Makefile
··· 47 47 aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o 48 48 49 49 obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o 50 - chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o 51 - chacha-x86_64-$(CONFIG_AS_AVX2) += chacha-avx2-x86_64.o 50 + chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha_glue.o 52 51 chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o 53 52 54 53 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o ··· 55 56 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o 56 57 57 58 obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o 58 - sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 59 - sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o 59 + sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o 60 60 sha1-ssse3-$(CONFIG_AS_SHA1_NI) += sha1_ni_asm.o 61 61 62 62 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
-3
arch/x86/crypto/aesni-intel_avx-x86_64.S
··· 1868 1868 ret 1869 1869 SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) 1870 1870 1871 - #ifdef CONFIG_AS_AVX2 1872 1871 ############################################################################### 1873 1872 # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 1874 1873 # Input: A and B (128-bits each, bit-reflected) ··· 2835 2836 FUNC_RESTORE 2836 2837 ret 2837 2838 SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) 2838 - 2839 - #endif /* CONFIG_AS_AVX2 */
-7
arch/x86/crypto/aesni-intel_glue.c
··· 233 233 .finalize = &aesni_gcm_finalize_avx_gen2, 234 234 }; 235 235 236 - #ifdef CONFIG_AS_AVX2 237 236 /* 238 237 * asmlinkage void aesni_gcm_init_avx_gen4() 239 238 * gcm_data *my_ctx_data, context data ··· 274 275 .dec_update = &aesni_gcm_dec_update_avx_gen4, 275 276 .finalize = &aesni_gcm_finalize_avx_gen4, 276 277 }; 277 - 278 - #endif 279 278 280 279 static inline struct 281 280 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) ··· 703 706 if (!enc) 704 707 left -= auth_tag_len; 705 708 706 - #ifdef CONFIG_AS_AVX2 707 709 if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4) 708 710 gcm_tfm = &aesni_gcm_tfm_avx_gen2; 709 - #endif 710 711 if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2) 711 712 gcm_tfm = &aesni_gcm_tfm_sse; 712 713 ··· 1064 1069 if (!x86_match_cpu(aesni_cpu_id)) 1065 1070 return -ENODEV; 1066 1071 #ifdef CONFIG_X86_64 1067 - #ifdef CONFIG_AS_AVX2 1068 1072 if (boot_cpu_has(X86_FEATURE_AVX2)) { 1069 1073 pr_info("AVX2 version of gcm_enc/dec engaged.\n"); 1070 1074 aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4; 1071 1075 } else 1072 - #endif 1073 1076 if (boot_cpu_has(X86_FEATURE_AVX)) { 1074 1077 pr_info("AVX version of gcm_enc/dec engaged.\n"); 1075 1078 aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2;
+2 -4
arch/x86/crypto/chacha_glue.c
··· 79 79 } 80 80 } 81 81 82 - if (IS_ENABLED(CONFIG_AS_AVX2) && 83 - static_branch_likely(&chacha_use_avx2)) { 82 + if (static_branch_likely(&chacha_use_avx2)) { 84 83 while (bytes >= CHACHA_BLOCK_SIZE * 8) { 85 84 chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); 86 85 bytes -= CHACHA_BLOCK_SIZE * 8; ··· 287 288 288 289 static_branch_enable(&chacha_use_simd); 289 290 290 - if (IS_ENABLED(CONFIG_AS_AVX2) && 291 - boot_cpu_has(X86_FEATURE_AVX) && 291 + if (boot_cpu_has(X86_FEATURE_AVX) && 292 292 boot_cpu_has(X86_FEATURE_AVX2) && 293 293 cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { 294 294 static_branch_enable(&chacha_use_avx2);
-8
arch/x86/crypto/poly1305-x86_64-cryptogams.pl
··· 1514 1514 1515 1515 if ($avx>1) { 1516 1516 1517 - if ($kernel) { 1518 - $code .= "#ifdef CONFIG_AS_AVX2\n"; 1519 - } 1520 - 1521 1517 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = 1522 1518 map("%ymm$_",(0..15)); 1523 1519 my $S4=$MASK; ··· 2803 2807 &declare_function("poly1305_blocks_avx2", 32, 4); 2804 2808 poly1305_blocks_avxN(0); 2805 2809 &end_function("poly1305_blocks_avx2"); 2806 - 2807 - if($kernel) { 2808 - $code .= "#endif\n"; 2809 - } 2810 2810 2811 2811 ####################################################################### 2812 2812 if ($avx>2) {
+2 -3
arch/x86/crypto/poly1305_glue.c
··· 108 108 kernel_fpu_begin(); 109 109 if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) 110 110 poly1305_blocks_avx512(ctx, inp, bytes, padbit); 111 - else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2)) 111 + else if (static_branch_likely(&poly1305_use_avx2)) 112 112 poly1305_blocks_avx2(ctx, inp, bytes, padbit); 113 113 else 114 114 poly1305_blocks_avx(ctx, inp, bytes, padbit); ··· 264 264 if (boot_cpu_has(X86_FEATURE_AVX) && 265 265 cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) 266 266 static_branch_enable(&poly1305_use_avx); 267 - if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && 268 - boot_cpu_has(X86_FEATURE_AVX2) && 267 + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && 269 268 cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) 270 269 static_branch_enable(&poly1305_use_avx2); 271 270 if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
-6
arch/x86/crypto/sha1_ssse3_glue.c
··· 174 174 crypto_unregister_shash(&sha1_avx_alg); 175 175 } 176 176 177 - #if defined(CONFIG_AS_AVX2) 178 177 #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ 179 178 180 179 asmlinkage void sha1_transform_avx2(struct sha1_state *state, ··· 244 245 if (avx2_usable()) 245 246 crypto_unregister_shash(&sha1_avx2_alg); 246 247 } 247 - 248 - #else 249 - static inline int register_sha1_avx2(void) { return 0; } 250 - static inline void unregister_sha1_avx2(void) { } 251 - #endif 252 248 253 249 #ifdef CONFIG_AS_SHA1_NI 254 250 asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data,
-3
arch/x86/crypto/sha256-avx2-asm.S
··· 48 48 # This code schedules 2 blocks at a time, with 4 lanes per block 49 49 ######################################################################## 50 50 51 - #ifdef CONFIG_AS_AVX2 52 51 #include <linux/linkage.h> 53 52 54 53 ## assume buffers not aligned ··· 766 767 .align 32 767 768 _SHUF_DC00: 768 769 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF 769 - 770 - #endif
-6
arch/x86/crypto/sha256_ssse3_glue.c
··· 220 220 ARRAY_SIZE(sha256_avx_algs)); 221 221 } 222 222 223 - #if defined(CONFIG_AS_AVX2) 224 223 asmlinkage void sha256_transform_rorx(struct sha256_state *state, 225 224 const u8 *data, int blocks); 226 225 ··· 293 294 crypto_unregister_shashes(sha256_avx2_algs, 294 295 ARRAY_SIZE(sha256_avx2_algs)); 295 296 } 296 - 297 - #else 298 - static inline int register_sha256_avx2(void) { return 0; } 299 - static inline void unregister_sha256_avx2(void) { } 300 - #endif 301 297 302 298 #ifdef CONFIG_AS_SHA256_NI 303 299 asmlinkage void sha256_ni_transform(struct sha256_state *digest,
-3
arch/x86/crypto/sha512-avx2-asm.S
··· 49 49 # This code schedules 1 blocks at a time, with 4 lanes per block 50 50 ######################################################################## 51 51 52 - #ifdef CONFIG_AS_AVX2 53 52 #include <linux/linkage.h> 54 53 55 54 .text ··· 748 749 MASK_YMM_LO: 749 750 .octa 0x00000000000000000000000000000000 750 751 .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 751 - 752 - #endif
-5
arch/x86/crypto/sha512_ssse3_glue.c
··· 218 218 ARRAY_SIZE(sha512_avx_algs)); 219 219 } 220 220 221 - #if defined(CONFIG_AS_AVX2) 222 221 asmlinkage void sha512_transform_rorx(struct sha512_state *state, 223 222 const u8 *data, int blocks); 224 223 ··· 292 293 crypto_unregister_shashes(sha512_avx2_algs, 293 294 ARRAY_SIZE(sha512_avx2_algs)); 294 295 } 295 - #else 296 - static inline int register_sha512_avx2(void) { return 0; } 297 - static inline void unregister_sha512_avx2(void) { } 298 - #endif 299 296 300 297 static int __init sha512_ssse3_mod_init(void) 301 298 {
+4 -4
crypto/Kconfig
··· 267 267 268 268 config CRYPTO_CURVE25519_X86 269 269 tristate "x86_64 accelerated Curve25519 scalar multiplication library" 270 - depends on X86 && 64BIT && AS_ADX 270 + depends on X86 && 64BIT 271 271 select CRYPTO_LIB_CURVE25519_GENERIC 272 272 select CRYPTO_ARCH_HAVE_LIB_CURVE25519 273 273 ··· 465 465 466 466 config CRYPTO_NHPOLY1305_AVX2 467 467 tristate "NHPoly1305 hash function (x86_64 AVX2 implementation)" 468 - depends on X86 && 64BIT && AS_AVX2 468 + depends on X86 && 64BIT 469 469 select CRYPTO_NHPOLY1305 470 470 help 471 471 AVX2 optimized implementation of the hash function used by the ··· 1303 1303 1304 1304 config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 1305 1305 tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)" 1306 - depends on X86 && 64BIT && AS_AVX2 1306 + depends on X86 && 64BIT 1307 1307 depends on CRYPTO 1308 1308 select CRYPTO_CAMELLIA_AESNI_AVX_X86_64 1309 1309 help ··· 1573 1573 1574 1574 config CRYPTO_SERPENT_AVX2_X86_64 1575 1575 tristate "Serpent cipher algorithm (x86_64/AVX2)" 1576 - depends on X86 && 64BIT && AS_AVX2 1576 + depends on X86 && 64BIT 1577 1577 select CRYPTO_SERPENT_AVX_X86_64 1578 1578 help 1579 1579 Serpent cipher algorithm, by Anderson, Biham & Knudsen.
-6
lib/raid6/algos.c
··· 34 34 &raid6_avx512x2, 35 35 &raid6_avx512x1, 36 36 #endif 37 - #ifdef CONFIG_AS_AVX2 38 37 &raid6_avx2x2, 39 38 &raid6_avx2x1, 40 - #endif 41 39 &raid6_sse2x2, 42 40 &raid6_sse2x1, 43 41 &raid6_sse1x2, ··· 49 51 &raid6_avx512x2, 50 52 &raid6_avx512x1, 51 53 #endif 52 - #ifdef CONFIG_AS_AVX2 53 54 &raid6_avx2x4, 54 55 &raid6_avx2x2, 55 56 &raid6_avx2x1, 56 - #endif 57 57 &raid6_sse2x4, 58 58 &raid6_sse2x2, 59 59 &raid6_sse2x1, ··· 97 101 #ifdef CONFIG_AS_AVX512 98 102 &raid6_recov_avx512, 99 103 #endif 100 - #ifdef CONFIG_AS_AVX2 101 104 &raid6_recov_avx2, 102 - #endif 103 105 &raid6_recov_ssse3, 104 106 #endif 105 107 #ifdef CONFIG_S390
-4
lib/raid6/avx2.c
··· 13 13 * 14 14 */ 15 15 16 - #ifdef CONFIG_AS_AVX2 17 - 18 16 #include <linux/raid/pq.h> 19 17 #include "x86.h" 20 18 ··· 468 470 1 /* Has cache hints */ 469 471 }; 470 472 #endif 471 - 472 - #endif /* CONFIG_AS_AVX2 */
-6
lib/raid6/recov_avx2.c
··· 4 4 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> 5 5 */ 6 6 7 - #ifdef CONFIG_AS_AVX2 8 - 9 7 #include <linux/raid/pq.h> 10 8 #include "x86.h" 11 9 ··· 311 313 #endif 312 314 .priority = 2, 313 315 }; 314 - 315 - #else 316 - #warning "your version of binutils lacks AVX2 support" 317 - #endif
-3
lib/raid6/test/Makefile
··· 35 35 ifeq ($(IS_X86),yes) 36 36 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o 37 37 CFLAGS += -DCONFIG_X86 38 - CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ 39 - gcc -c -x assembler - >/dev/null 2>&1 && \ 40 - rm ./-.o && echo -DCONFIG_AS_AVX2=1) 41 38 CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \ 42 39 gcc -c -x assembler - >/dev/null 2>&1 && \ 43 40 rm ./-.o && echo -DCONFIG_AS_AVX512=1)
+1 -1
net/netfilter/Makefile
··· 83 83 nft_set_pipapo.o 84 84 85 85 ifdef CONFIG_X86_64 86 - ifdef CONFIG_AS_AVX2 86 + ifndef CONFIG_UML 87 87 nf_tables-objs += nft_set_pipapo_avx2.o 88 88 endif 89 89 endif
+1 -1
net/netfilter/nf_tables_api.c
··· 3291 3291 &nft_set_rhash_type, 3292 3292 &nft_set_bitmap_type, 3293 3293 &nft_set_rbtree_type, 3294 - #if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2) 3294 + #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) 3295 3295 &nft_set_pipapo_avx2_type, 3296 3296 #endif 3297 3297 &nft_set_pipapo_type,
+1 -1
net/netfilter/nft_set_pipapo.c
··· 2201 2201 }, 2202 2202 }; 2203 2203 2204 - #if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2) 2204 + #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) 2205 2205 const struct nft_set_type nft_set_pipapo_avx2_type = { 2206 2206 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | 2207 2207 NFT_SET_TIMEOUT,
+2 -2
net/netfilter/nft_set_pipapo_avx2.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 #ifndef _NFT_SET_PIPAPO_AVX2_H 3 3 4 - #ifdef CONFIG_AS_AVX2 4 + #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) 5 5 #include <asm/fpu/xstate.h> 6 6 #define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE) 7 7 ··· 9 9 const u32 *key, const struct nft_set_ext **ext); 10 10 bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, 11 11 struct nft_set_estimate *est); 12 - #endif /* CONFIG_AS_AVX2 */ 12 + #endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */ 13 13 14 14 #endif /* _NFT_SET_PIPAPO_AVX2_H */