x86_64, traps: Fix the espfix64 #DF fixup and rewrite it in C

There's nothing special enough about the espfix64 double fault fixup to
justify writing it in assembly. Move it to C.

This also fixes a bug: if the double fault came from an IST stack, the
old asm code would return to a partially uninitialized stack frame.

Fixes: 3891a04aafd668686239349ea58f3314ea2af86b
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andy Lutomirski and committed by
Linus Torvalds
af726f21 fc14f9c1

+26 -32
+2 -32
arch/x86/kernel/entry_64.S
··· 828 828 jnz native_irq_return_ldt 829 829 #endif 830 830 831 + .global native_irq_return_iret 831 832 native_irq_return_iret: 832 833 iretq 833 834 _ASM_EXTABLE(native_irq_return_iret, bad_iret) ··· 922 921 #endif 923 922 CFI_ENDPROC 924 923 END(common_interrupt) 925 - 926 - /* 927 - * If IRET takes a fault on the espfix stack, then we 928 - * end up promoting it to a doublefault. In that case, 929 - * modify the stack to make it look like we just entered 930 - * the #GP handler from user space, similar to bad_iret. 931 - */ 932 - #ifdef CONFIG_X86_ESPFIX64 933 - ALIGN 934 - __do_double_fault: 935 - XCPT_FRAME 1 RDI+8 936 - movq RSP(%rdi),%rax /* Trap on the espfix stack? */ 937 - sarq $PGDIR_SHIFT,%rax 938 - cmpl $ESPFIX_PGD_ENTRY,%eax 939 - jne do_double_fault /* No, just deliver the fault */ 940 - cmpl $__KERNEL_CS,CS(%rdi) 941 - jne do_double_fault 942 - movq RIP(%rdi),%rax 943 - cmpq $native_irq_return_iret,%rax 944 - jne do_double_fault /* This shouldn't happen... */ 945 - movq PER_CPU_VAR(kernel_stack),%rax 946 - subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ 947 - movq %rax,RSP(%rdi) 948 - movq $0,(%rax) /* Missing (lost) #GP error code */ 949 - movq $general_protection,RIP(%rdi) 950 - retq 951 - CFI_ENDPROC 952 - END(__do_double_fault) 953 - #else 954 - # define __do_double_fault do_double_fault 955 - #endif 956 924 957 925 /* 958 926 * APIC interrupts. ··· 1094 1124 idtentry bounds do_bounds has_error_code=0 1095 1125 idtentry invalid_op do_invalid_op has_error_code=0 1096 1126 idtentry device_not_available do_device_not_available has_error_code=0 1097 - idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 1127 + idtentry double_fault do_double_fault has_error_code=1 paranoid=1 1098 1128 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1099 1129 idtentry invalid_TSS do_invalid_TSS has_error_code=1 1100 1130 idtentry segment_not_present do_segment_not_present has_error_code=1
+24
arch/x86/kernel/traps.c
··· 259 259 static const char str[] = "double fault"; 260 260 struct task_struct *tsk = current; 261 261 262 + #ifdef CONFIG_X86_ESPFIX64 263 + extern unsigned char native_irq_return_iret[]; 264 + 265 + /* 266 + * If IRET takes a non-IST fault on the espfix64 stack, then we 267 + * end up promoting it to a doublefault. In that case, modify 268 + * the stack to make it look like we just entered the #GP 269 + * handler from user space, similar to bad_iret. 270 + */ 271 + if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 272 + regs->cs == __KERNEL_CS && 273 + regs->ip == (unsigned long)native_irq_return_iret) 274 + { 275 + struct pt_regs *normal_regs = task_pt_regs(current); 276 + 277 + /* Fake a #GP(0) from userspace. */ 278 + memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 279 + normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 280 + regs->ip = (unsigned long)general_protection; 281 + regs->sp = (unsigned long)&normal_regs->orig_ax; 282 + return; 283 + } 284 + #endif 285 + 262 286 exception_enter(); 263 287 /* Return not checked because double check cannot be ignored */ 264 288 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);