Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 alternative instruction updates from Ingo Molnar:
"Small RDTSCP opimization, enabled by the newly added ALTERNATIVE_3(),
and other small improvements"

* 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/TSC: Use RDTSCP
x86/alternatives: Add an ALTERNATIVE_3() macro
x86/alternatives: Print containing function
x86/alternatives: Add macro comments

+49 -12
+33 -8
arch/x86/include/asm/alternative.h
··· 94 94 #define alt_total_slen alt_end_marker"b-661b" 95 95 #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" 96 96 97 - #define __OLDINSTR(oldinstr, num) \ 98 - "661:\n\t" oldinstr "\n662:\n" \ 99 - ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ 100 - "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" 101 - 102 97 #define OLDINSTR(oldinstr, num) \ 103 - __OLDINSTR(oldinstr, num) \ 98 + "# ALT: oldnstr\n" \ 99 + "661:\n\t" oldinstr "\n662:\n" \ 100 + "# ALT: padding\n" \ 101 + ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ 102 + "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \ 104 103 alt_end_marker ":\n" 105 104 106 105 /* ··· 115 116 * additionally longer than the first replacement alternative. 116 117 */ 117 118 #define OLDINSTR_2(oldinstr, num1, num2) \ 119 + "# ALT: oldinstr2\n" \ 118 120 "661:\n\t" oldinstr "\n662:\n" \ 121 + "# ALT: padding2\n" \ 119 122 ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ 120 123 "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ 124 + alt_end_marker ":\n" 125 + 126 + #define OLDINSTR_3(oldinsn, n1, n2, n3) \ 127 + "# ALT: oldinstr3\n" \ 128 + "661:\n\t" oldinsn "\n662:\n" \ 129 + "# ALT: padding3\n" \ 130 + ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ 131 + " - (" alt_slen ")) > 0) * " \ 132 + "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ 133 + " - (" alt_slen ")), 0x90\n" \ 121 134 alt_end_marker ":\n" 122 135 123 136 #define ALTINSTR_ENTRY(feature, num) \ ··· 140 129 " .byte " alt_rlen(num) "\n" /* replacement len */ \ 141 130 " .byte " alt_pad_len "\n" /* pad len */ 142 131 143 - #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ 144 - b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" 132 + #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ 133 + "# ALT: replacement " #num "\n" \ 134 + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n" 145 135 146 136 /* alternative assembly primitive: */ 147 137 #define ALTERNATIVE(oldinstr, newinstr, feature) \ ··· 163 151 ".pushsection .altinstr_replacement, \"ax\"\n" \ 164 152 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ 165 153 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ 154 + ".popsection\n" 155 + 156 + #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \ 157 + OLDINSTR_3(oldinsn, 1, 2, 3) \ 158 + ".pushsection .altinstructions,\"a\"\n" \ 159 + ALTINSTR_ENTRY(feat1, 1) \ 160 + ALTINSTR_ENTRY(feat2, 2) \ 161 + ALTINSTR_ENTRY(feat3, 3) \ 162 + ".popsection\n" \ 163 + ".pushsection .altinstr_replacement, \"ax\"\n" \ 164 + ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \ 165 + ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \ 166 + ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \ 166 167 ".popsection\n" 167 168 168 169 /*
+14 -2
arch/x86/include/asm/msr.h
··· 217 217 */ 218 218 static __always_inline unsigned long long rdtsc_ordered(void) 219 219 { 220 + DECLARE_ARGS(val, low, high); 221 + 220 222 /* 221 223 * The RDTSC instruction is not ordered relative to memory 222 224 * access. The Intel SDM and the AMD APM are both vague on this ··· 229 227 * ordering guarantees as reading from a global memory location 230 228 * that some other imaginary CPU is updating continuously with a 231 229 * time stamp. 230 + * 231 + * Thus, use the preferred barrier on the respective CPU, aiming for 232 + * RDTSCP as the default. 232 233 */ 233 - barrier_nospec(); 234 - return rdtsc(); 234 + asm volatile(ALTERNATIVE_3("rdtsc", 235 + "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC, 236 + "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, 237 + "rdtscp", X86_FEATURE_RDTSCP) 238 + : EAX_EDX_RET(val, low, high) 239 + /* RDTSCP clobbers ECX with MSR_TSC_AUX. */ 240 + :: "ecx"); 241 + 242 + return EAX_EDX_VAL(val, low, high); 235 243 } 236 244 237 245 static inline unsigned long long native_read_pmc(int counter)
+2 -2
arch/x86/kernel/alternative.c
··· 394 394 continue; 395 395 } 396 396 397 - DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", 397 + DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", 398 398 a->cpuid >> 5, 399 399 a->cpuid & 0x1f, 400 - instr, a->instrlen, 400 + instr, instr, a->instrlen, 401 401 replacement, a->replacementlen, a->padlen); 402 402 403 403 DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);