Merge local branch 'x86-codegen'

Merge trivial x86 code generation annoyances

- Introduce helper macros for clang asm input problems

- use said macros to improve trivially stupid code generation issues in
bitops and array_index_mask_nospec

- also improve codegen with 32-bit array index comparisons

None of these really matter, but I look at code generation and profiles
fairly regularly, and these misfeatures caused the generated code to
look really odd and distract from the real issues.

* branch 'x86-codegen' of local tree:
x86: improve bitop code generation with clang
x86: improve array_index_mask_nospec() code generation
clang: work around asm input constraint problems

+34 -19
+10 -14
arch/x86/include/asm/barrier.h
··· 33 33 * Returns: 34 34 * 0 - (index < size) 35 35 */ 36 - static __always_inline unsigned long array_index_mask_nospec(unsigned long index, 37 - unsigned long size) 38 - { 39 - unsigned long mask; 40 - 41 - asm volatile ("cmp %1,%2; sbb %0,%0;" 42 - :"=r" (mask) 43 - :"g"(size),"r" (index) 44 - :"cc"); 45 - return mask; 46 - } 47 - 48 - /* Override the default implementation from linux/nospec.h. */ 49 - #define array_index_mask_nospec array_index_mask_nospec 36 + #define array_index_mask_nospec(idx,sz) ({ \ 37 + typeof((idx)+(sz)) __idx = (idx); \ 38 + typeof(__idx) __sz = (sz); \ 39 + unsigned long __mask; \ 40 + asm volatile ("cmp %1,%2; sbb %0,%0" \ 41 + :"=r" (__mask) \ 42 + :ASM_INPUT_G (__sz), \ 43 + "r" (__idx) \ 44 + :"cc"); \ 45 + __mask; }) 50 46 51 47 /* Prevent speculative execution past this barrier. */ 52 48 #define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
+5 -5
arch/x86/include/asm/bitops.h
··· 250 250 { 251 251 asm("rep; bsf %1,%0" 252 252 : "=r" (word) 253 - : "rm" (word)); 253 + : ASM_INPUT_RM (word)); 254 254 return word; 255 255 } 256 256 ··· 297 297 298 298 asm("bsr %1,%0" 299 299 : "=r" (word) 300 - : "rm" (word)); 300 + : ASM_INPUT_RM (word)); 301 301 return word; 302 302 } 303 303 ··· 320 320 */ 321 321 asm("bsfl %1,%0" 322 322 : "=r" (r) 323 - : "rm" (x), "0" (-1)); 323 + : ASM_INPUT_RM (x), "0" (-1)); 324 324 #elif defined(CONFIG_X86_CMOV) 325 325 asm("bsfl %1,%0\n\t" 326 326 "cmovzl %2,%0" ··· 377 377 */ 378 378 asm("bsrl %1,%0" 379 379 : "=r" (r) 380 - : "rm" (x), "0" (-1)); 380 + : ASM_INPUT_RM (x), "0" (-1)); 381 381 #elif defined(CONFIG_X86_CMOV) 382 382 asm("bsrl %1,%0\n\t" 383 383 "cmovzl %2,%0" ··· 416 416 */ 417 417 asm("bsrq %1,%q0" 418 418 : "+r" (bitpos) 419 - : "rm" (x)); 419 + : ASM_INPUT_RM (x)); 420 420 return bitpos + 1; 421 421 } 422 422 #else
+10
include/linux/compiler-clang.h
··· 118 118 119 119 #define __diag_ignore_all(option, comment) \ 120 120 __diag_clang(13, ignore, option) 121 + 122 + /* 123 + * clang has horrible behavior with "g" or "rm" constraints for asm 124 + * inputs, turning them into something worse than "m". Avoid using 125 + * constraints with multiple possible uses (but "ir" seems to be ok): 126 + * 127 + * https://github.com/llvm/llvm-project/issues/20571 128 + */ 129 + #define ASM_INPUT_G "ir" 130 + #define ASM_INPUT_RM "r"
+9
include/linux/compiler_types.h
··· 409 409 #define asm_goto_output(x...) asm volatile goto(x) 410 410 #endif 411 411 412 + /* 413 + * Clang has trouble with constraints with multiple 414 + * alternative behaviors (mainly "g" and "rm"). 415 + */ 416 + #ifndef ASM_INPUT_G 417 + #define ASM_INPUT_G "g" 418 + #define ASM_INPUT_RM "rm" 419 + #endif 420 + 412 421 #ifdef CONFIG_CC_HAS_ASM_INLINE 413 422 #define asm_inline asm __inline 414 423 #else