x86/asm: Fix inline asm call constraints for Clang

For inline asm statements which have a CALL instruction, we list the
stack pointer as a constraint to convince GCC to ensure the frame
pointer is set up first:

static inline void foo()
{
register void *__sp asm(_ASM_SP);
asm("call bar" : "+r" (__sp))
}

Unfortunately, that pattern causes Clang to corrupt the stack pointer.

The fix is easy: convert the stack pointer register variable to a global
variable.

It should be noted that the end result is different based on the GCC
version. With GCC 6.4, this patch has exactly the same result as
before:

defconfig defconfig-nofp distro distro-nofp
before 9820389 9491555 8816046 8516940
after 9820389 9491555 8816046 8516940

With GCC 7.2, however, GCC's behavior has changed. It now changes its
behavior based on the conversion of the register variable to a global.
That somehow convinces it to *always* set up the frame pointer before
inserting *any* inline asm. (Therefore, listing the variable as an
output constraint is a no-op and is no longer necessary.) It's a bit
overkill, but the performance impact should be negligible. And in fact,
there's a nice improvement with frame pointers disabled:

defconfig defconfig-nofp distro distro-nofp
before 9796316 9468236 9076191 8790305
after 9796957 9464267 9076381 8785949

So in summary, while listing the stack pointer as an output constraint
is no longer necessary for newer versions of GCC, it's still needed for
older versions.

Suggested-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3db862e970c432ae823cf515c52b54fec8270e0e.1505942196.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by Josh Poimboeuf and committed by Ingo Molnar f5caf621 0d0970ee

Changed files
+42 -45
arch
tools
objtool
Documentation
+1 -2
arch/x86/include/asm/alternative.h
··· 218 218 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ 219 219 output, input...) \ 220 220 { \ 221 - register void *__sp asm(_ASM_SP); \ 222 221 asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ 223 222 "call %P[new2]", feature2) \ 224 - : output, "+r" (__sp) \ 223 + : output, ASM_CALL_CONSTRAINT \ 225 224 : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ 226 225 [new2] "i" (newfunc2), ## input); \ 227 226 }
+11
arch/x86/include/asm/asm.h
··· 132 132 /* For C file, we already have NOKPROBE_SYMBOL macro */ 133 133 #endif 134 134 135 + #ifndef __ASSEMBLY__ 136 + /* 137 + * This output constraint should be used for any inline asm which has a "call" 138 + * instruction. Otherwise the asm may be inserted before the frame pointer 139 + * gets set up by the containing function. If you forget to do this, objtool 140 + * may print a "call without frame pointer save/setup" warning. 141 + */ 142 + register unsigned int __asm_call_sp asm("esp"); 143 + #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) 144 + #endif 145 + 135 146 #endif /* _ASM_X86_ASM_H */
+4 -6
arch/x86/include/asm/mshyperv.h
··· 179 179 u64 input_address = input ? virt_to_phys(input) : 0; 180 180 u64 output_address = output ? virt_to_phys(output) : 0; 181 181 u64 hv_status; 182 - register void *__sp asm(_ASM_SP); 183 182 184 183 #ifdef CONFIG_X86_64 185 184 if (!hv_hypercall_pg) ··· 186 187 187 188 __asm__ __volatile__("mov %4, %%r8\n" 188 189 "call *%5" 189 - : "=a" (hv_status), "+r" (__sp), 190 + : "=a" (hv_status), ASM_CALL_CONSTRAINT, 190 191 "+c" (control), "+d" (input_address) 191 192 : "r" (output_address), "m" (hv_hypercall_pg) 192 193 : "cc", "memory", "r8", "r9", "r10", "r11"); ··· 201 202 202 203 __asm__ __volatile__("call *%7" 203 204 : "=A" (hv_status), 204 - "+c" (input_address_lo), "+r" (__sp) 205 + "+c" (input_address_lo), ASM_CALL_CONSTRAINT 205 206 : "A" (control), 206 207 "b" (input_address_hi), 207 208 "D"(output_address_hi), "S"(output_address_lo), ··· 223 224 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) 224 225 { 225 226 u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; 226 - register void *__sp asm(_ASM_SP); 227 227 228 228 #ifdef CONFIG_X86_64 229 229 { 230 230 __asm__ __volatile__("call *%4" 231 - : "=a" (hv_status), "+r" (__sp), 231 + : "=a" (hv_status), ASM_CALL_CONSTRAINT, 232 232 "+c" (control), "+d" (input1) 233 233 : "m" (hv_hypercall_pg) 234 234 : "cc", "r8", "r9", "r10", "r11"); ··· 240 242 __asm__ __volatile__ ("call *%5" 241 243 : "=A"(hv_status), 242 244 "+c"(input1_lo), 243 - "+r"(__sp) 245 + ASM_CALL_CONSTRAINT 244 246 : "A" (control), 245 247 "b" (input1_hi), 246 248 "m" (hv_hypercall_pg)
+7 -7
arch/x86/include/asm/paravirt_types.h
··· 459 459 */ 460 460 #ifdef CONFIG_X86_32 461 461 #define PVOP_VCALL_ARGS \ 462 - unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \ 463 - register void *__sp asm("esp") 462 + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; 463 + 464 464 #define PVOP_CALL_ARGS PVOP_VCALL_ARGS 465 465 466 466 #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) ··· 480 480 /* [re]ax isn't an arg, but the return val */ 481 481 #define PVOP_VCALL_ARGS \ 482 482 unsigned long __edi = __edi, __esi = __esi, \ 483 - __edx = __edx, __ecx = __ecx, __eax = __eax; \ 484 - register void *__sp asm("rsp") 483 + __edx = __edx, __ecx = __ecx, __eax = __eax; 484 + 485 485 #define PVOP_CALL_ARGS PVOP_VCALL_ARGS 486 486 487 487 #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) ··· 532 532 asm volatile(pre \ 533 533 paravirt_alt(PARAVIRT_CALL) \ 534 534 post \ 535 - : call_clbr, "+r" (__sp) \ 535 + : call_clbr, ASM_CALL_CONSTRAINT \ 536 536 : paravirt_type(op), \ 537 537 paravirt_clobber(clbr), \ 538 538 ##__VA_ARGS__ \ ··· 542 542 asm volatile(pre \ 543 543 paravirt_alt(PARAVIRT_CALL) \ 544 544 post \ 545 - : call_clbr, "+r" (__sp) \ 545 + : call_clbr, ASM_CALL_CONSTRAINT \ 546 546 : paravirt_type(op), \ 547 547 paravirt_clobber(clbr), \ 548 548 ##__VA_ARGS__ \ ··· 569 569 asm volatile(pre \ 570 570 paravirt_alt(PARAVIRT_CALL) \ 571 571 post \ 572 - : call_clbr, "+r" (__sp) \ 572 + : call_clbr, ASM_CALL_CONSTRAINT \ 573 573 : paravirt_type(op), \ 574 574 paravirt_clobber(clbr), \ 575 575 ##__VA_ARGS__ \
+5 -10
arch/x86/include/asm/preempt.h
··· 100 100 101 101 #ifdef CONFIG_PREEMPT 102 102 extern asmlinkage void ___preempt_schedule(void); 103 - # define __preempt_schedule() \ 104 - ({ \ 105 - register void *__sp asm(_ASM_SP); \ 106 - asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \ 107 - }) 103 + # define __preempt_schedule() \ 104 + asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT) 108 105 109 106 extern asmlinkage void preempt_schedule(void); 110 107 extern asmlinkage void ___preempt_schedule_notrace(void); 111 - # define __preempt_schedule_notrace() \ 112 - ({ \ 113 - register void *__sp asm(_ASM_SP); \ 114 - asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \ 115 - }) 108 + # define __preempt_schedule_notrace() \ 109 + asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT) 110 + 116 111 extern asmlinkage void preempt_schedule_notrace(void); 117 112 #endif 118 113
+2 -4
arch/x86/include/asm/processor.h
··· 677 677 * Like all of Linux's memory ordering operations, this is a 678 678 * compiler barrier as well. 679 679 */ 680 - register void *__sp asm(_ASM_SP); 681 - 682 680 #ifdef CONFIG_X86_32 683 681 asm volatile ( 684 682 "pushfl\n\t" ··· 684 686 "pushl $1f\n\t" 685 687 "iret\n\t" 686 688 "1:" 687 - : "+r" (__sp) : : "memory"); 689 + : ASM_CALL_CONSTRAINT : : "memory"); 688 690 #else 689 691 unsigned int tmp; 690 692 ··· 701 703 "iretq\n\t" 702 704 UNWIND_HINT_RESTORE 703 705 "1:" 704 - : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); 706 + : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); 705 707 #endif 706 708 } 707 709
+2 -2
arch/x86/include/asm/rwsem.h
··· 103 103 ({ \ 104 104 long tmp; \ 105 105 struct rw_semaphore* ret; \ 106 - register void *__sp asm(_ASM_SP); \ 107 106 \ 108 107 asm volatile("# beginning down_write\n\t" \ 109 108 LOCK_PREFIX " xadd %1,(%4)\n\t" \ ··· 113 114 " call " slow_path "\n" \ 114 115 "1:\n" \ 115 116 "# ending down_write" \ 116 - : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ 117 + : "+m" (sem->count), "=d" (tmp), \ 118 + "=a" (ret), ASM_CALL_CONSTRAINT \ 117 119 : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ 118 120 : "memory", "cc"); \ 119 121 ret; \
+2 -2
arch/x86/include/asm/uaccess.h
··· 166 166 ({ \ 167 167 int __ret_gu; \ 168 168 register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ 169 - register void *__sp asm(_ASM_SP); \ 170 169 __chk_user_ptr(ptr); \ 171 170 might_fault(); \ 172 171 asm volatile("call __get_user_%P4" \ 173 - : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \ 172 + : "=a" (__ret_gu), "=r" (__val_gu), \ 173 + ASM_CALL_CONSTRAINT \ 174 174 : "0" (ptr), "i" (sizeof(*(ptr)))); \ 175 175 (x) = (__force __typeof__(*(ptr))) __val_gu; \ 176 176 __builtin_expect(__ret_gu, 0); \
+2 -3
arch/x86/include/asm/xen/hypercall.h
··· 113 113 register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ 114 114 register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ 115 115 register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ 116 - register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \ 117 - register void *__sp asm(_ASM_SP); 116 + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; 118 117 119 - #define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp) 118 + #define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT 120 119 #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) 121 120 #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) 122 121 #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
+1 -2
arch/x86/kvm/emulate.c
··· 5296 5296 5297 5297 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) 5298 5298 { 5299 - register void *__sp asm(_ASM_SP); 5300 5299 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; 5301 5300 5302 5301 if (!(ctxt->d & ByteOp)) ··· 5303 5304 5304 5305 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" 5305 5306 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), 5306 - [fastop]"+S"(fop), "+r"(__sp) 5307 + [fastop]"+S"(fop), ASM_CALL_CONSTRAINT 5307 5308 : "c"(ctxt->src2.val)); 5308 5309 5309 5310 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
+1 -2
arch/x86/kvm/vmx.c
··· 9036 9036 static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) 9037 9037 { 9038 9038 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 9039 - register void *__sp asm(_ASM_SP); 9040 9039 9041 9040 if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) 9042 9041 == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { ··· 9064 9065 #ifdef CONFIG_X86_64 9065 9066 [sp]"=&r"(tmp), 9066 9067 #endif 9067 - "+r"(__sp) 9068 + ASM_CALL_CONSTRAINT 9068 9069 : 9069 9070 [entry]"r"(entry), 9070 9071 [ss]"i"(__KERNEL_DS),
+1 -2
arch/x86/mm/fault.c
··· 806 806 if (is_vmalloc_addr((void *)address) && 807 807 (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || 808 808 address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { 809 - register void *__sp asm("rsp"); 810 809 unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); 811 810 /* 812 811 * We're likely to be running with very little stack space ··· 820 821 asm volatile ("movq %[stack], %%rsp\n\t" 821 822 "call handle_stack_overflow\n\t" 822 823 "1: jmp 1b" 823 - : "+r" (__sp) 824 + : ASM_CALL_CONSTRAINT 824 825 : "D" ("kernel stack overflow (page fault)"), 825 826 "S" (regs), "d" (address), 826 827 [stack] "rm" (stack));
+3 -3
tools/objtool/Documentation/stack-validation.txt
··· 194 194 If it's a GCC-compiled .c file, the error may be because the function 195 195 uses an inline asm() statement which has a "call" instruction. An 196 196 asm() statement with a call instruction must declare the use of the 197 - stack pointer in its output operand. For example, on x86_64: 197 + stack pointer in its output operand. On x86_64, this means adding 198 + the ASM_CALL_CONSTRAINT as an output constraint: 198 199 199 - register void *__sp asm("rsp"); 200 - asm volatile("call func" : "+r" (__sp)); 200 + asm volatile("call func" : ASM_CALL_CONSTRAINT); 201 201 202 202 Otherwise the stack frame may not get created before the call. 203 203