Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/cpufeature: Replace the old static_cpu_has() with safe variant

So the old one didn't work properly before alternatives had run.
And it was supposed to provide an optimized JMP because the
assumption was that the offset it is jumping to is within a
signed byte and thus a two-byte JMP.

So I did an x86_64 allyesconfig build and dumped all possible
sites where static_cpu_has() was used. The optimization amounted
to all in all 12(!) places where static_cpu_has() had generated
a 2-byte JMP. Which has saved us a whopping 36 bytes!

This clearly is not worth the trouble so we can remove it. The
only place where the optimization might count - in __switch_to()
- we will handle differently. But that's not subject of this
patch.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453842730-28463-6-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Borislav Petkov and committed by
Ingo Molnar
bc696ca0 cd4d09ec

+21 -125
-10
arch/x86/Kconfig.debug
··· 350 350 351 351 If unsure say N here. 352 352 353 - config X86_DEBUG_STATIC_CPU_HAS 354 - bool "Debug alternatives" 355 - depends on DEBUG_KERNEL 356 - ---help--- 357 - This option causes additional code to be generated which 358 - fails if static_cpu_has() is used before alternatives have 359 - run. 360 - 361 - If unsure, say N. 362 - 363 353 config X86_DEBUG_FPU 364 354 bool "Debug the x86 FPU code" 365 355 depends on DEBUG_KERNEL
+7 -93
arch/x86/include/asm/cpufeature.h
··· 125 125 #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) 126 126 #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 127 127 /* 128 - * Do not add any more of those clumsy macros - use static_cpu_has_safe() for 128 + * Do not add any more of those clumsy macros - use static_cpu_has() for 129 129 * fast paths and boot_cpu_has() otherwise! 130 130 */ 131 131 132 132 #if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS) 133 - extern void warn_pre_alternatives(void); 134 - extern bool __static_cpu_has_safe(u16 bit); 133 + extern bool __static_cpu_has(u16 bit); 135 134 136 135 /* 137 136 * Static testing of CPU features. Used the same as boot_cpu_has(). 138 137 * These are only valid after alternatives have run, but will statically 139 138 * patch the target code for additional performance. 140 139 */ 141 - static __always_inline __pure bool __static_cpu_has(u16 bit) 142 - { 143 - #ifdef CC_HAVE_ASM_GOTO 144 - 145 - #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS 146 - 147 - /* 148 - * Catch too early usage of this before alternatives 149 - * have run. 150 - */ 151 - asm_volatile_goto("1: jmp %l[t_warn]\n" 152 - "2:\n" 153 - ".section .altinstructions,\"a\"\n" 154 - " .long 1b - .\n" 155 - " .long 0\n" /* no replacement */ 156 - " .word %P0\n" /* 1: do replace */ 157 - " .byte 2b - 1b\n" /* source len */ 158 - " .byte 0\n" /* replacement len */ 159 - " .byte 0\n" /* pad len */ 160 - ".previous\n" 161 - /* skipping size check since replacement size = 0 */ 162 - : : "i" (X86_FEATURE_ALWAYS) : : t_warn); 163 - 164 - #endif 165 - 166 - asm_volatile_goto("1: jmp %l[t_no]\n" 167 - "2:\n" 168 - ".section .altinstructions,\"a\"\n" 169 - " .long 1b - .\n" 170 - " .long 0\n" /* no replacement */ 171 - " .word %P0\n" /* feature bit */ 172 - " .byte 2b - 1b\n" /* source len */ 173 - " .byte 0\n" /* replacement len */ 174 - " .byte 0\n" /* pad len */ 175 - ".previous\n" 176 - /* skipping size check since replacement size = 0 */ 177 - : : "i" (bit) : : t_no); 178 - return true; 179 - t_no: 180 - return false; 181 - 182 - #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS 183 - t_warn: 184 - warn_pre_alternatives(); 185 - return false; 186 - #endif 187 - 188 - #else /* CC_HAVE_ASM_GOTO */ 189 - 190 - u8 flag; 191 - /* Open-coded due to __stringify() in ALTERNATIVE() */ 192 - asm volatile("1: movb $0,%0\n" 193 - "2:\n" 194 - ".section .altinstructions,\"a\"\n" 195 - " .long 1b - .\n" 196 - " .long 3f - .\n" 197 - " .word %P1\n" /* feature bit */ 198 - " .byte 2b - 1b\n" /* source len */ 199 - " .byte 4f - 3f\n" /* replacement len */ 200 - " .byte 0\n" /* pad len */ 201 - ".previous\n" 202 - ".section .discard,\"aw\",@progbits\n" 203 - " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ 204 - ".previous\n" 205 - ".section .altinstr_replacement,\"ax\"\n" 206 - "3: movb $1,%0\n" 207 - "4:\n" 208 - ".previous\n" 209 - : "=qm" (flag) : "i" (bit)); 210 - return flag; 211 - 212 - #endif /* CC_HAVE_ASM_GOTO */ 213 - } 214 - 215 - #define static_cpu_has(bit) \ 216 - ( \ 217 - __builtin_constant_p(boot_cpu_has(bit)) ? \ 218 - boot_cpu_has(bit) : \ 219 - __builtin_constant_p(bit) ? \ 220 - __static_cpu_has(bit) : \ 221 - boot_cpu_has(bit) \ 222 - ) 223 - 224 - static __always_inline __pure bool _static_cpu_has_safe(u16 bit) 140 + static __always_inline __pure bool _static_cpu_has(u16 bit) 225 141 { 226 142 #ifdef CC_HAVE_ASM_GOTO 227 143 asm_volatile_goto("1: jmp %l[t_dynamic]\n" ··· 171 255 t_no: 172 256 return false; 173 257 t_dynamic: 174 - return __static_cpu_has_safe(bit); 258 + return __static_cpu_has(bit); 175 259 #else 176 260 u8 flag; 177 261 /* Open-coded due to __stringify() in ALTERNATIVE() */ ··· 209 293 ".previous\n" 210 294 : "=qm" (flag) 211 295 : "i" (bit), "i" (X86_FEATURE_ALWAYS)); 212 - return (flag == 2 ? __static_cpu_has_safe(bit) : flag); 296 + return (flag == 2 ? __static_cpu_has(bit) : flag); 213 297 #endif /* CC_HAVE_ASM_GOTO */ 214 298 } 215 299 216 - #define static_cpu_has_safe(bit) \ 300 + #define static_cpu_has(bit) \ 217 301 ( \ 218 302 __builtin_constant_p(boot_cpu_has(bit)) ? \ 219 303 boot_cpu_has(bit) : \ 220 - _static_cpu_has_safe(bit) \ 304 + _static_cpu_has(bit) \ 221 305 ) 222 306 #else 223 307 /* 224 308 * gcc 3.x is too stupid to do the static test; fall back to dynamic. 225 309 */ 226 310 #define static_cpu_has(bit) boot_cpu_has(bit) 227 - #define static_cpu_has_safe(bit) boot_cpu_has(bit) 228 311 #endif 229 312 230 313 #define cpu_has_bug(c, bit) cpu_has(c, (bit)) ··· 231 316 #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) 232 317 233 318 #define static_cpu_has_bug(bit) static_cpu_has((bit)) 234 - #define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit)) 235 319 #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) 236 320 237 321 #define MAX_CPU_FEATURES (NCAPINTS * 32)
+7 -7
arch/x86/include/asm/fpu/internal.h
··· 59 59 */ 60 60 static __always_inline __pure bool use_eager_fpu(void) 61 61 { 62 - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); 62 + return static_cpu_has(X86_FEATURE_EAGER_FPU); 63 63 } 64 64 65 65 static __always_inline __pure bool use_xsaveopt(void) 66 66 { 67 - return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); 67 + return static_cpu_has(X86_FEATURE_XSAVEOPT); 68 68 } 69 69 70 70 static __always_inline __pure bool use_xsave(void) 71 71 { 72 - return static_cpu_has_safe(X86_FEATURE_XSAVE); 72 + return static_cpu_has(X86_FEATURE_XSAVE); 73 73 } 74 74 75 75 static __always_inline __pure bool use_fxsr(void) 76 76 { 77 - return static_cpu_has_safe(X86_FEATURE_FXSR); 77 + return static_cpu_has(X86_FEATURE_FXSR); 78 78 } 79 79 80 80 /* ··· 301 301 302 302 WARN_ON(system_state != SYSTEM_BOOTING); 303 303 304 - if (static_cpu_has_safe(X86_FEATURE_XSAVES)) 304 + if (static_cpu_has(X86_FEATURE_XSAVES)) 305 305 XSTATE_OP(XSAVES, xstate, lmask, hmask, err); 306 306 else 307 307 XSTATE_OP(XSAVE, xstate, lmask, hmask, err); ··· 323 323 324 324 WARN_ON(system_state != SYSTEM_BOOTING); 325 325 326 - if (static_cpu_has_safe(X86_FEATURE_XSAVES)) 326 + if (static_cpu_has(X86_FEATURE_XSAVES)) 327 327 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); 328 328 else 329 329 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); ··· 461 461 * pending. Clear the x87 state here by setting it to fixed values. 462 462 * "m" is a random variable that should be in L1. 463 463 */ 464 - if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { 464 + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { 465 465 asm volatile( 466 466 "fnclex\n\t" 467 467 "emms\n\t"
+2 -2
arch/x86/kernel/apic/apic_numachip.c
··· 30 30 unsigned long value; 31 31 unsigned int id = (x >> 24) & 0xff; 32 32 33 - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { 33 + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { 34 34 rdmsrl(MSR_FAM10H_NODE_ID, value); 35 35 id |= (value << 2) & 0xff00; 36 36 } ··· 178 178 this_cpu_write(cpu_llc_id, node); 179 179 180 180 /* Account for nodes per socket in multi-core-module processors */ 181 - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { 181 + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { 182 182 rdmsrl(MSR_FAM10H_NODE_ID, val); 183 183 nodes = ((val >> 3) & 7) + 1; 184 184 }
+2 -10
arch/x86/kernel/cpu/common.c
··· 1475 1475 } 1476 1476 #endif 1477 1477 1478 - #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS 1479 - void warn_pre_alternatives(void) 1480 - { 1481 - WARN(1, "You're using static_cpu_has before alternatives have run!\n"); 1482 - } 1483 - EXPORT_SYMBOL_GPL(warn_pre_alternatives); 1484 - #endif 1485 - 1486 - inline bool __static_cpu_has_safe(u16 bit) 1478 + inline bool __static_cpu_has(u16 bit) 1487 1479 { 1488 1480 return boot_cpu_has(bit); 1489 1481 } 1490 - EXPORT_SYMBOL_GPL(__static_cpu_has_safe); 1482 + EXPORT_SYMBOL_GPL(__static_cpu_has); 1491 1483 1492 1484 static void bsp_resume(void) 1493 1485 {
+1 -1
arch/x86/kernel/vm86_32.c
··· 362 362 /* make room for real-mode segments */ 363 363 tsk->thread.sp0 += 16; 364 364 365 - if (static_cpu_has_safe(X86_FEATURE_SEP)) 365 + if (static_cpu_has(X86_FEATURE_SEP)) 366 366 tsk->thread.sysenter_cs = 0; 367 367 368 368 load_sp0(tss, &tsk->thread);
+1 -1
drivers/cpufreq/intel_pstate.c
··· 1431 1431 if (!all_cpu_data) 1432 1432 return -ENOMEM; 1433 1433 1434 - if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { 1434 + if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) { 1435 1435 pr_info("intel_pstate: HWP enabled\n"); 1436 1436 hwp_active++; 1437 1437 }
+1 -1
fs/btrfs/disk-io.c
··· 930 930 if (bio_flags & EXTENT_BIO_TREE_LOG) 931 931 return 0; 932 932 #ifdef CONFIG_X86 933 - if (static_cpu_has_safe(X86_FEATURE_XMM4_2)) 933 + if (static_cpu_has(X86_FEATURE_XMM4_2)) 934 934 return 0; 935 935 #endif 936 936 return 1;