x86/xen: Get rid of paravirt op adjust_exception_frame

When running as Xen pv-guest the exception frame on the stack contains
%r11 and %rcx additional to the other data pushed by the processor.

Instead of having a paravirt op being called for each exception type
prepend the Xen specific code to each exception entry. When running as
Xen pv-guest just use the exception entry with prepended instructions,
otherwise use the entry without the Xen specific code.

[ tglx: Merged through tip to avoid ugly merge conflict ]

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: xen-devel@lists.xenproject.org
Cc: boris.ostrovsky@oracle.com
Cc: luto@amacapital.net
Link: http://lkml.kernel.org/r/20170831174249.26853-1-jg@pfupf.net

authored by

Juergen Gross and committed by
Thomas Gleixner
5878d5d6 ef1d4dea

+133 -77
+4 -19
arch/x86/entry/entry_64.S
··· 821 821 .endif 822 822 823 823 ASM_CLAC 824 - PARAVIRT_ADJUST_EXCEPTION_FRAME 825 824 826 825 .ifeq \has_error_code 827 826 pushq $-1 /* ORIG_RAX: no syscall to restart */ ··· 966 967 ENDPROC(do_softirq_own_stack) 967 968 968 969 #ifdef CONFIG_XEN 969 - idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 970 + idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 970 971 971 972 /* 972 973 * A note on the "critical region" in our callback handler. ··· 1033 1034 movq 8(%rsp), %r11 1034 1035 addq $0x30, %rsp 1035 1036 pushq $0 /* RIP */ 1036 - pushq %r11 1037 - pushq %rcx 1038 1037 UNWIND_HINT_IRET_REGS offset=8 1039 1038 jmp general_protection 1040 1039 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ ··· 1063 1066 idtentry stack_segment do_stack_segment has_error_code=1 1064 1067 1065 1068 #ifdef CONFIG_XEN 1066 - idtentry xen_debug do_debug has_error_code=0 1067 - idtentry xen_int3 do_int3 has_error_code=0 1068 - idtentry xen_stack_segment do_stack_segment has_error_code=1 1069 + idtentry xendebug do_debug has_error_code=0 1070 + idtentry xenint3 do_int3 has_error_code=0 1069 1071 #endif 1070 1072 1071 1073 idtentry general_protection do_general_protection has_error_code=1 ··· 1228 1232 END(error_exit) 1229 1233 1230 1234 /* Runs on exception stack */ 1235 + /* XXX: broken on Xen PV */ 1231 1236 ENTRY(nmi) 1232 1237 UNWIND_HINT_IRET_REGS 1233 - /* 1234 - * Fix up the exception frame if we're on Xen. 1235 - * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most 1236 - * one value to the stack on native, so it may clobber the rdx 1237 - * scratch slot, but it won't clobber any of the important 1238 - * slots past it. 1239 - * 1240 - * Xen is a different story, because the Xen frame itself overlaps 1241 - * the "NMI executing" variable. 1242 - */ 1243 - PARAVIRT_ADJUST_EXCEPTION_FRAME 1244 - 1245 1238 /* 1246 1239 * We allow breakpoints in NMIs. If a breakpoint occurs, then 1247 1240 * the iretq it performs will take us out of NMI context.
-1
arch/x86/entry/entry_64_compat.S
··· 293 293 /* 294 294 * Interrupts are off on entry. 295 295 */ 296 - PARAVIRT_ADJUST_EXCEPTION_FRAME 297 296 ASM_CLAC /* Do this early to minimize exposure */ 298 297 SWAPGS 299 298
-5
arch/x86/include/asm/paravirt.h
··· 960 960 #define GET_CR2_INTO_RAX \ 961 961 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) 962 962 963 - #define PARAVIRT_ADJUST_EXCEPTION_FRAME \ 964 - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ 965 - CLBR_NONE, \ 966 - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) 967 - 968 963 #define USERGS_SYSRET64 \ 969 964 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 970 965 CLBR_NONE, \
-3
arch/x86/include/asm/paravirt_types.h
··· 196 196 void (*safe_halt)(void); 197 197 void (*halt)(void); 198 198 199 - #ifdef CONFIG_X86_64 200 - void (*adjust_exception_frame)(void); 201 - #endif 202 199 } __no_randomize_layout; 203 200 204 201 struct pv_mmu_ops {
+3
arch/x86/include/asm/proto.h
··· 24 24 void __end_entry_SYSENTER_compat(void); 25 25 void entry_SYSCALL_compat(void); 26 26 void entry_INT80_compat(void); 27 + #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) 28 + void xen_entry_INT80_compat(void); 29 + #endif 27 30 #endif 28 31 29 32 void x86_configure_nx(void);
+25 -3
arch/x86/include/asm/traps.h
··· 13 13 asmlinkage void debug(void); 14 14 asmlinkage void nmi(void); 15 15 asmlinkage void int3(void); 16 - asmlinkage void xen_debug(void); 17 - asmlinkage void xen_int3(void); 18 - asmlinkage void xen_stack_segment(void); 19 16 asmlinkage void overflow(void); 20 17 asmlinkage void bounds(void); 21 18 asmlinkage void invalid_op(void); ··· 34 37 asmlinkage void machine_check(void); 35 38 #endif /* CONFIG_X86_MCE */ 36 39 asmlinkage void simd_coprocessor_error(void); 40 + 41 + #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) 42 + asmlinkage void xen_divide_error(void); 43 + asmlinkage void xen_xendebug(void); 44 + asmlinkage void xen_xenint3(void); 45 + asmlinkage void xen_nmi(void); 46 + asmlinkage void xen_overflow(void); 47 + asmlinkage void xen_bounds(void); 48 + asmlinkage void xen_invalid_op(void); 49 + asmlinkage void xen_device_not_available(void); 50 + asmlinkage void xen_double_fault(void); 51 + asmlinkage void xen_coprocessor_segment_overrun(void); 52 + asmlinkage void xen_invalid_TSS(void); 53 + asmlinkage void xen_segment_not_present(void); 54 + asmlinkage void xen_stack_segment(void); 55 + asmlinkage void xen_general_protection(void); 56 + asmlinkage void xen_page_fault(void); 57 + asmlinkage void xen_spurious_interrupt_bug(void); 58 + asmlinkage void xen_coprocessor_error(void); 59 + asmlinkage void xen_alignment_check(void); 60 + #ifdef CONFIG_X86_MCE 61 + asmlinkage void xen_machine_check(void); 62 + #endif /* CONFIG_X86_MCE */ 63 + asmlinkage void xen_simd_coprocessor_error(void); 64 + #endif 37 65 38 66 dotraplinkage void do_divide_error(struct pt_regs *, long); 39 67 dotraplinkage void do_debug(struct pt_regs *, long);
-1
arch/x86/kernel/asm-offsets_64.c
··· 20 20 int main(void) 21 21 { 22 22 #ifdef CONFIG_PARAVIRT 23 - OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); 24 23 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 25 24 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 26 25 BLANK();
-3
arch/x86/kernel/paravirt.c
··· 319 319 .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), 320 320 .safe_halt = native_safe_halt, 321 321 .halt = native_halt, 322 - #ifdef CONFIG_X86_64 323 - .adjust_exception_frame = paravirt_nop, 324 - #endif 325 322 }; 326 323 327 324 __visible struct pv_cpu_ops pv_cpu_ops = {
+65 -33
arch/x86/xen/enlighten_pv.c
··· 579 579 preempt_enable(); 580 580 } 581 581 582 + #ifdef CONFIG_X86_64 583 + struct trap_array_entry { 584 + void (*orig)(void); 585 + void (*xen)(void); 586 + bool ist_okay; 587 + }; 588 + 589 + static struct trap_array_entry trap_array[] = { 590 + { debug, xen_xendebug, true }, 591 + { int3, xen_xenint3, true }, 592 + { double_fault, xen_double_fault, true }, 593 + #ifdef CONFIG_X86_MCE 594 + { machine_check, xen_machine_check, true }, 595 + #endif 596 + { nmi, xen_nmi, true }, 597 + { overflow, xen_overflow, false }, 598 + #ifdef CONFIG_IA32_EMULATION 599 + { entry_INT80_compat, xen_entry_INT80_compat, false }, 600 + #endif 601 + { page_fault, xen_page_fault, false }, 602 + { divide_error, xen_divide_error, false }, 603 + { bounds, xen_bounds, false }, 604 + { invalid_op, xen_invalid_op, false }, 605 + { device_not_available, xen_device_not_available, false }, 606 + { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, 607 + { invalid_TSS, xen_invalid_TSS, false }, 608 + { segment_not_present, xen_segment_not_present, false }, 609 + { stack_segment, xen_stack_segment, false }, 610 + { general_protection, xen_general_protection, false }, 611 + { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, 612 + { coprocessor_error, xen_coprocessor_error, false }, 613 + { alignment_check, xen_alignment_check, false }, 614 + { simd_coprocessor_error, xen_simd_coprocessor_error, false }, 615 + }; 616 + 617 + static bool get_trap_addr(void **addr, unsigned int ist) 618 + { 619 + unsigned int nr; 620 + bool ist_okay = false; 621 + 622 + /* 623 + * Replace trap handler addresses by Xen specific ones. 624 + * Check for known traps using IST and whitelist them. 625 + * The debugger ones are the only ones we care about. 626 + * Xen will handle faults like double_fault, * so we should never see 627 + * them. Warn if there's an unexpected IST-using fault handler. 628 + */ 629 + for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) { 630 + struct trap_array_entry *entry = trap_array + nr; 631 + 632 + if (*addr == entry->orig) { 633 + *addr = entry->xen; 634 + ist_okay = entry->ist_okay; 635 + break; 636 + } 637 + } 638 + 639 + if (WARN_ON(ist != 0 && !ist_okay)) 640 + return false; 641 + 642 + return true; 643 + } 644 + #endif 645 + 582 646 static int cvt_gate_to_trap(int vector, const gate_desc *val, 583 647 struct trap_info *info) 584 648 { ··· 655 591 656 592 addr = gate_offset(val); 657 593 #ifdef CONFIG_X86_64 658 - /* 659 - * Look for known traps using IST, and substitute them 660 - * appropriately. The debugger ones are the only ones we care 661 - * about. Xen will handle faults like double_fault, 662 - * so we should never see them. Warn if 663 - * there's an unexpected IST-using fault handler. 664 - */ 665 - if (addr == (unsigned long)debug) 666 - addr = (unsigned long)xen_debug; 667 - else if (addr == (unsigned long)int3) 668 - addr = (unsigned long)xen_int3; 669 - else if (addr == (unsigned long)stack_segment) 670 - addr = (unsigned long)xen_stack_segment; 671 - else if (addr == (unsigned long)double_fault) { 672 - /* Don't need to handle these */ 594 + if (!get_trap_addr((void **)&addr, val->bits.ist)) 673 595 return 0; 674 - #ifdef CONFIG_X86_MCE 675 - } else if (addr == (unsigned long)machine_check) { 676 - /* 677 - * when xen hypervisor inject vMCE to guest, 678 - * use native mce handler to handle it 679 - */ 680 - ; 681 - #endif 682 - } else if (addr == (unsigned long)nmi) 683 - /* 684 - * Use the native version as well. 685 - */ 686 - ; 687 - else { 688 - /* Some other trap using IST? */ 689 - if (WARN_ON(val->bits.ist != 0)) 690 - return 0; 691 - } 692 596 #endif /* CONFIG_X86_64 */ 693 597 info->address = addr; 694 598
-3
arch/x86/xen/irq.c
··· 123 123 124 124 .safe_halt = xen_safe_halt, 125 125 .halt = xen_halt, 126 - #ifdef CONFIG_X86_64 127 - .adjust_exception_frame = xen_adjust_exception_frame, 128 - #endif 129 126 }; 130 127 131 128 void __init xen_init_irq_ops(void)
+36 -5
arch/x86/xen/xen-asm_64.S
··· 16 16 17 17 #include <linux/linkage.h> 18 18 19 - ENTRY(xen_adjust_exception_frame) 20 - mov 8+0(%rsp), %rcx 21 - mov 8+8(%rsp), %r11 22 - ret $16 23 - ENDPROC(xen_adjust_exception_frame) 19 + .macro xen_pv_trap name 20 + ENTRY(xen_\name) 21 + pop %rcx 22 + pop %r11 23 + jmp \name 24 + END(xen_\name) 25 + .endm 26 + 27 + xen_pv_trap divide_error 28 + xen_pv_trap debug 29 + xen_pv_trap xendebug 30 + xen_pv_trap int3 31 + xen_pv_trap xenint3 32 + xen_pv_trap nmi 33 + xen_pv_trap overflow 34 + xen_pv_trap bounds 35 + xen_pv_trap invalid_op 36 + xen_pv_trap device_not_available 37 + xen_pv_trap double_fault 38 + xen_pv_trap coprocessor_segment_overrun 39 + xen_pv_trap invalid_TSS 40 + xen_pv_trap segment_not_present 41 + xen_pv_trap stack_segment 42 + xen_pv_trap general_protection 43 + xen_pv_trap page_fault 44 + xen_pv_trap spurious_interrupt_bug 45 + xen_pv_trap coprocessor_error 46 + xen_pv_trap alignment_check 47 + #ifdef CONFIG_X86_MCE 48 + xen_pv_trap machine_check 49 + #endif /* CONFIG_X86_MCE */ 50 + xen_pv_trap simd_coprocessor_error 51 + #ifdef CONFIG_IA32_EMULATION 52 + xen_pv_trap entry_INT80_compat 53 + #endif 54 + xen_pv_trap hypervisor_callback 24 55 25 56 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 26 57 /*
-1
arch/x86/xen/xen-ops.h
··· 138 138 __visible void xen_iret(void); 139 139 __visible void xen_sysret32(void); 140 140 __visible void xen_sysret64(void); 141 - __visible void xen_adjust_exception_frame(void); 142 141 143 142 extern int xen_panic_handler_init(void); 144 143